【发布时间】:2017-11-25 13:49:47
【问题描述】:
我一直收到一个
IndexError: only integers, slices (:), ellipsis (...), numpy.newaxis (None) and integer or boolean arrays are valid indices
在尝试使我的数据框适合以下管道时。训练和测试是具有相同列的两个数据框。有不同的列,但我只想通过 ItemSelector 关注其中的三个。
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn import preprocessing
from sklearn.pipeline import FeatureUnion
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.multiclass import OneVsRestClassifier
from sklearn.pipeline import Pipeline
class ItemSelector(BaseEstimator, TransformerMixin):
def __init__(self, column):
self.column = column
def fit(self, X, y=None):
return self
def transform(self, X):
return X[self.column]
def predictCases(train, test):
target_names = sorted(list(set(train['TARGET'].values)))
y_train = np.array([target_names.index(x) for x in train['TARGET'].values])
y_test = np.array([target_names.index(x) for x in test['TARGET'].values])
# train and predict
classifier = Pipeline([
('union', FeatureUnion([
('text', Pipeline([
('selector', ItemSelector(column='TEXT')),
('tfidf_vec', TfidfVectorizer())
])),
('feature1', Pipeline([
('selector', ItemSelector(column='CATEG_FEAT1')),
('lbe', LabelEncoder())
])),
('feature2', Pipeline([
('selector', ItemSelector(column='CATEG_FEAT2')),
('lbe', LabelEncoder())
]))
])),
('clf', OneVsRestClassifier(LinearSVC()))])
classifier.fit(train.values, y_train)
predicted = classifier.predict(test.values)
return(metrics.precision_recall_fscore_support(y_test, predicted))
完全错误:
IndexError Traceback (most recent call last)
<ipython-input-19-95d9d0c337f4> in <module>()
----> 1 tt = predictCases(train_resampled, validate)
<ipython-input-17-efc951f4192e> in predictCases(train, test)
24 ])),
25 ('clf', OneVsRestClassifier(LinearSVC()))])
---> 26 classifier.fit(train.values, y_train)
27 predicted = classifier.predict(test.values)
28 return(metrics.precision_recall_fscore_support(y_test, predicted))
C:\\Anaconda3\lib\site-packages\sklearn\pipeline.py in fit(self, X, y, **fit_params)
266 This estimator
267 """
--> 268 Xt, fit_params = self._fit(X, y, **fit_params)
269 if self._final_estimator is not None:
270 self._final_estimator.fit(Xt, y, **fit_params)
C:\\Anaconda3\lib\site-packages\sklearn\pipeline.py in _fit(self, X, y, **fit_params)
232 pass
233 elif hasattr(transform, "fit_transform"):
--> 234 Xt = transform.fit_transform(Xt, y, **fit_params_steps[name])
235 else:
236 Xt = transform.fit(Xt, y, **fit_params_steps[name]) \
C:\\Anaconda3\lib\site-packages\sklearn\pipeline.py in fit_transform(self, X, y, **fit_params)
732 delayed(_fit_transform_one)(trans, name, weight, X, y,
733 **fit_params)
--> 734 for name, trans, weight in self._iter())
735
736 if not result:
C:\\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self, iterable)
756 # was dispatched. In particular this covers the edge
757 # case of Parallel used with an exhausted iterator.
--> 758 while self.dispatch_one_batch(iterator):
759 self._iterating = True
760 else:
C:\\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in dispatch_one_batch(self, iterator)
606 return False
607 else:
--> 608 self._dispatch(tasks)
609 return True
610
C:\\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in _dispatch(self, batch)
569 dispatch_timestamp = time.time()
570 cb = BatchCompletionCallBack(dispatch_timestamp, len(batch), self)
--> 571 job = self._backend.apply_async(batch, callback=cb)
572 self._jobs.append(job)
573
C:\\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in apply_async(self, func, callback)
107 def apply_async(self, func, callback=None):
108 """Schedule a func to be run"""
--> 109 result = ImmediateResult(func)
110 if callback:
111 callback(result)
C:\\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in __init__(self, batch)
324 # Don't delay the application, to avoid keeping the input
325 # arguments in memory
--> 326 self.results = batch()
327
328 def get(self):
C:\\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self)
129
130 def __call__(self):
--> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items]
132
133 def __len__(self):
C:\\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in <listcomp>(.0)
129
130 def __call__(self):
--> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items]
132
133 def __len__(self):
C:\\Anaconda3\lib\site-packages\sklearn\pipeline.py in _fit_transform_one(transformer, name, weight, X, y, **fit_params)
575 **fit_params):
576 if hasattr(transformer, 'fit_transform'):
--> 577 res = transformer.fit_transform(X, y, **fit_params)
578 else:
579 res = transformer.fit(X, y, **fit_params).transform(X)
C:\\Anaconda3\lib\site-packages\sklearn\pipeline.py in fit_transform(self, X, y, **fit_params)
299 """
300 last_step = self._final_estimator
--> 301 Xt, fit_params = self._fit(X, y, **fit_params)
302 if hasattr(last_step, 'fit_transform'):
303 return last_step.fit_transform(Xt, y, **fit_params)
C:\\Anaconda3\lib\site-packages\sklearn\pipeline.py in _fit(self, X, y, **fit_params)
232 pass
233 elif hasattr(transform, "fit_transform"):
--> 234 Xt = transform.fit_transform(Xt, y, **fit_params_steps[name])
235 else:
236 Xt = transform.fit(Xt, y, **fit_params_steps[name]) \
C:\\Anaconda3\lib\site-packages\sklearn\base.py in fit_transform(self, X, y, **fit_params)
495 else:
496 # fit method of arity 2 (supervised transformation)
--> 497 return self.fit(X, y, **fit_params).transform(X)
498
499
<ipython-input-2-fdc42fd9d831> in transform(self, X)
10
11 def transform(self, X):
---> 12 return X[self.column]
IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices
编辑:
如果我在 fit 中使用 train 而不是 train.values,我会收到以下错误:
TypeError: fit_transform() takes 2 positional arguments but 3 were given
【问题讨论】:
标签: python python-3.x scikit-learn pipeline