【发布时间】:2021-04-25 17:24:29
【问题描述】:
import os
from pylab import rcParams
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns; sns.set()
from numpy import *
from scipy import stats
from pandas.plotting import scatter_matrix
import sklearn
import warnings
from imblearn.over_sampling import SMOTE
import tensorflow as tf
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import GridSearchCV
from imblearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
data = pd.read_excel(r'Attrition Data Exercise.xlsx')
X = data.iloc[:, 3:-1].values
y = data.iloc[:, -1].values
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import OrdinalEncoder
ct = ColumnTransformer(transformers=
[('one_encoder', OneHotEncoder(), [2, 5, 11, 13, 28]),
('ord_encoder', OrdinalEncoder(), [0])],
remainder='passthrough')
X = np.array(ct.fit_transform(X))
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
ann = tf.keras.models.Sequential()
ann.add(tf.keras.layers.Dropout(rate=0.3))
ann.add(tf.keras.layers.Dense(units=6, activation='relu', kernel_regularizer='l1', bias_regularizer='l2'))
ann.add(tf.keras.layers.Dropout(rate=0.3))
ann.add(tf.keras.layers.Dense(units=3, activation='relu', kernel_regularizer='l1', bias_regularizer='l2'))
ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))
opt = tf.keras.optimizers.Adam(
learning_rate=0.001,
beta_1=0.9,
beta_2=0.999,
epsilon=1e-08)
ann.compile(optimizer = opt, loss = 'binary_crossentropy', metrics = ['accuracy', tf.keras.metrics.Recall()])
以上代码运行成功。当我在单元格中运行以下代码时,它会导致错误。
pipe = Pipeline([('smt', SMOTE()), ('model', KerasClassifier(build_fn = ann, verbose = 0, epochs=170))])
weights = np.linspace(0.5, 0.5, 1)
gsc = GridSearchCV(
estimator = pipe,
param_grid = {
'smt__sampling_strategy' : weights
},
scoring = 'f1',
cv = 4)
grid_result = gsc.fit(X_train, y_train)
上面的代码导致如下错误:
ValueError: The first argument to `Layer.call` must always be passed
知道我可能做错了什么或可以改进的地方吗? 我也尝试用 KerasRegressor 替换 KerasClassifier,只是为了看看是否有什么变化,但没有任何变化。本质上出了什么问题?
我正在尝试使用 imblearn 和 GridSearchCV 中的 Pipeline 类来获得对不平衡数据集进行分类的最佳参数,我想省略验证集的重新采样,而只重新采样训练集,imblearn 的 Pipeline 似乎是正在做。但是,我在实施接受的解决方案时遇到了错误
还附上了指向错误跟踪的屏幕截图的链接。Error Trace Complete
【问题讨论】:
-
KerasClassifier的build_fn参数应该是可调用的,所以不妨试试build_fn = lambda: ann -
请不要重复标题中的标签(已编辑)。错误究竟是在哪里弹出的?请发布完整的错误跟踪(不在此处,更新您的帖子)
标签: python machine-learning keras tensorflow2.0