如果您想以不同的方式扩展每个功能,就像 StandardScaler 所做的那样,您可以使用这个:
import numpy as np
from sklearn.base import TransformerMixin
from sklearn.preprocessing import StandardScaler
class NDStandardScaler(TransformerMixin):
def __init__(self, **kwargs):
self._scaler = StandardScaler(copy=True, **kwargs)
self._orig_shape = None
def fit(self, X, **kwargs):
X = np.array(X)
# Save the original shape to reshape the flattened X later
# back to its original shape
if len(X.shape) > 1:
self._orig_shape = X.shape[1:]
X = self._flatten(X)
self._scaler.fit(X, **kwargs)
return self
def transform(self, X, **kwargs):
X = np.array(X)
X = self._flatten(X)
X = self._scaler.transform(X, **kwargs)
X = self._reshape(X)
return X
def _flatten(self, X):
# Reshape X to <= 2 dimensions
if len(X.shape) > 2:
n_dims = np.prod(self._orig_shape)
X = X.reshape(-1, n_dims)
return X
def _reshape(self, X):
# Reshape X back to it's original shape
if len(X.shape) >= 2:
X = X.reshape(-1, *self._orig_shape)
return X
在将输入的特征提供给 sklearn 的 StandardScaler 之前,它只是将输入的特征展平。然后,它重新塑造它们。用法同StandardScaler:
data = [[[0, 1], [2, 3]], [[1, 5], [2, 9]]]
scaler = NDStandardScaler()
print(scaler.fit_transform(data))
打印
[[[-1. -1.]
[ 0. -1.]]
[[ 1. 1.]
[ 0. 1.]]]
参数with_mean 和with_std 直接传递给StandardScaler,因此可以按预期工作。 copy=False 不起作用,因为重塑不会就地发生。对于二维输入,NDStandardScaler 的工作方式与 StandardScaler 类似:
data = [[0, 0], [0, 0], [1, 1], [1, 1]]
scaler = NDStandardScaler()
scaler.fit(data)
print(scaler.transform(data))
print(scaler.transform([[2, 2]]))
打印
[[-1. -1.]
[-1. -1.]
[ 1. 1.]
[ 1. 1.]]
[[3. 3.]]
就像在 StandardScaler 的 sklearn 示例中一样。