【问题标题】:Stacking Classifier doesn't recognize Keras堆叠分类器无法识别 Keras
【发布时间】:2020-08-28 17:20:36
【问题描述】:

我在 5 个 scikit-learn 分类器和一个 Keras 分类器上使用 StackingClassifier。然而,它似乎没有将 Keras 识别为分类器。

相关代码:

from tensorflow.keras import layers
from tensorflow import keras
from keras.constraints import maxnorm

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation,  Flatten, Input
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras import metrics
import joblib
from joblib import parallel_backend
np.random.seed(42)
from sklearn.model_selection import GridSearchCV
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
import sklearn
from sklearn.ensemble import StackingClassifier
def create_model ():
    # create model
    model = Sequential()
    model.add(Dense(best_neurons, input_shape=(X_train.shape[1],), kernel_initializer=best_init_mode, activation='relu', 
                   kernel_constraint=maxnorm(best_weight_constraint)))
    model.add(Dropout(best_dropout_rate))
    model.add(Flatten())
    optimizer= tf.keras.optimizers.RMSprop(lr=best_learn_rate)
    model.add(Dense(units = 1, kernel_initializer=best_init_mode, activation = 'sigmoid'))  # Compile model
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=[keras.metrics.AUC(), 'accuracy'])
    return model


NN_clf=KerasClassifier(build_fn=create_model, epochs=best_epochs, batch_size= best_batch_size)
RF_clf =RandomForestClassifier(max_depth=best_max_depth_rf, n_estimators=best_n_estimators_rf, 
                               min_samples_leaf=best_min_samples_leaf_rf, max_features=best_max_features_rf,
                               class_weight=best_class_weight_rf, max_samples=best_max_samples_rf,
                               random_state=42, oob_score=True)
KN_clf =KNeighborsClassifier(n_neighbors=best_n_neighbors,  p=best_p, leaf_size=best_leaf_size )
#DT_clf = DecisionTreeClassifier(max_depth=best_max_depth_dt, min_samples_leaf=best_min_samples_leaf_dt)
SV_clf =  SVC(gamma=best_gamma_sv, C=best_c_sv, kernel=best_kernel_sv, random_state=42, probability=True)
GBC_clf =  xgb.XGBClassifier(learning_rate=best_learning_rate_gbc, random_state=42, colsample_bytree=best_colsample_bytree_gbc,
                             max_depth=best_max_depth_gbc, n_estimators=best_n_estimators_gbc,
                            gamma=best_gamma_gbc, subsample=best_subsample_gbc)
EX_clf= ExtraTreesClassifier(max_depth=best_max_depth_ex, n_estimators=best_n_estimators_ex, 
                             min_samples_leaf=best_min_samples_leaf_ex, max_features=best_max_features_ex,
                             warm_start=False, oob_score=True, bootstrap=True, random_state=42)
LR_clf=LogisticRegression(random_state=42, solver=best_solver, penalty=best_penalty, class_weight=best_class_weight, C=best_log_C)

estimators= [('RF', RF_clf), ('GBC', GBC_clf),  ('EX', EX_clf), ('LR',LR_clf), ('KN', KN_clf),
            ('SV', SV_clf), ('NN', NN_clf) ]
clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression(), n_jobs=-1)
clf.fit(X_train, y_train.values.ravel())
print("Stacking model score: %.3f" % clf.score(X_test, y_test.values.ravel()))

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-41-272df6aa838e> in <module>
      2             ('SV', SV_clf), ('NN', NN_clf) ]
      3 clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression(), n_jobs=-1)
----> 4 clf.fit(X_train, y_train.values.ravel())
      5 print("Stacking model score: %.3f" % clf.score(X_test, y_test.values.ravel()))

~\Anaconda3\lib\site-packages\sklearn\ensemble\_stacking.py in fit(self, X, y, sample_weight)
    411         self._le = LabelEncoder().fit(y)
    412         self.classes_ = self._le.classes_
--> 413         return super().fit(X, self._le.transform(y), sample_weight)
    414 
    415     @if_delegate_has_method(delegate='final_estimator_')

~\Anaconda3\lib\site-packages\sklearn\ensemble\_stacking.py in fit(self, X, y, sample_weight)
    129         # all_estimators contains all estimators, the one to be fitted and the
    130         # 'drop' string.
--> 131         names, all_estimators = self._validate_estimators()
    132         self._validate_final_estimator()
    133 

~\Anaconda3\lib\site-packages\sklearn\ensemble\_base.py in _validate_estimators(self)
    247                 raise ValueError(
    248                     "The estimator {} should be a {}.".format(
--> 249                         est.__class__.__name__, is_estimator_type.__name__[3:]
    250                     )
    251                 )

ValueError: The estimator KerasClassifier should be a classifier.

我正在使用 Sci-kit learn 版本 2.2,TF 版本 2.x。我看到了类似的错误here,但不想重写我的代码并使用 MLextend 库。

【问题讨论】:

    标签: tensorflow keras scikit-learn ensemble-learning


    【解决方案1】:

    这个问题是因为here 报告了VotingClassifier 的类似问题。

    解决方案就是将这个_estimator_type='classifier' 添加到KerasClassifier

    注意:请仅提供重现问题的最少代码。

    from sklearn.datasets import make_classification
    from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
    from sklearn.linear_model import LogisticRegression
    import numpy as np
    from tensorflow.keras import layers
    from tensorflow import keras
    from keras.constraints import maxnorm
    
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import Dense, Dropout, Activation,  Flatten, Input
    from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
    from tensorflow.keras import metrics
    import joblib
    from joblib import parallel_backend
    np.random.seed(42)
    from sklearn.model_selection import GridSearchCV
    from sklearn.feature_selection import SelectKBest
    from sklearn.feature_selection import chi2
    import sklearn
    from sklearn.ensemble import StackingClassifier
    from sklearn.neighbors import KNeighborsClassifier
    
    def create_model ():
        # create model
        model = Sequential()
        model.add(Dense(20, input_dim=20, activation='relu'))
        model.add(Dropout(0.2))
        model.add(Flatten())
        optimizer= keras.optimizers.RMSprop(lr=0.001)
        model.add(Dense(units = 1, activation = 'sigmoid'))  # Compile model
        model.compile(loss='binary_crossentropy',
                      optimizer=optimizer, metrics=[keras.metrics.AUC(), 'accuracy'])
        return model
    
    
    NN_clf=KerasClassifier(build_fn=create_model, epochs=15, batch_size= 32)
    NN_clf._estimator_type = "classifier"
    
    RF_clf =RandomForestClassifier(random_state=42, oob_score=True)
    KN_clf =KNeighborsClassifier()
    SV_clf =  SVC(random_state=42, probability=True)
    EX_clf= ExtraTreesClassifier(random_state=42)
    LR_clf=LogisticRegression(random_state=42,)
    
    estimators= [('RF', RF_clf), ('EX', EX_clf), ('LR',LR_clf), ('KN', KN_clf),
                ('SV', SV_clf), ('NN', NN_clf) ]
    clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression())
    
    X, y = make_classification()
    
    from sklearn.model_selection import train_test_split
    
    X_train, X_test, y_train , y_test = train_test_split(X, y, test_size=0.3)
    
    
    clf.fit(X_train, y_train)
    print("Stacking model score: %.3f" % clf.score(X_test, y_test))
    
    # Stacking model score: 0.967
    

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 2021-03-29
      • 2020-08-02
      • 2021-06-30
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      相关资源
      最近更新 更多