同一组不会出现在两个不同的折叠中(不同组的数量必须至少等于折叠的数量)
在 GroupKfold 中,组的形状与数据形状相同
对于 X、y 和组中的数据:
import numpy as np
import pandas as pd
from sklearn.model_selection import GroupKFold
from sklearn.model_selection import GridSearchCV
from xgboost import XGBClassifier
import datetime
X = np.array([[1,2,1,1], [3,4,7,8], [5,6,1,3], [7,8,4,7]])
y=np.array([0,2,1,2])
groups=np.array([2,1,0,1])
group_kfold = GroupKFold(n_splits=len(groups.unique))
group_kfold.get_n_splits(X, y, groups)
param_grid ={
'min_child_weight': [50,100],
'subsample': [0.1,0.2],
'colsample_bytree': [0.1,0.2],
'max_depth': [2,3],
'learning_rate': [0.01],
'n_estimators': [100,500],
'reg_lambda': [0.1,0.2]
}
xgb = XGBClassifier()
grid_search = GridSearchCV(xgb, param_grid, cv=group_kfold.split(X, Y, groups), n_jobs=-1)
result = grid_search.fit(X,Y)