【发布时间】:2021-01-04 22:30:02
【问题描述】:
我使用以下代码为四个分类器创建我想要的图:
我有一个有两列的df,mean 是 mda 重要性,std 是特征重要性的 std
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
'''Function to use to calculate importances'''
def featImpMDA(clf,X,y,n_splits=5):
# feat importance based on OOS score reduction
from sklearn.metrics import log_loss
from sklearn.model_selection._split import KFold
cvGen=KFold(n_splits=n_splits)
scr0,scr1=pd.Series(),pd.DataFrame(columns=X.columns)
for i,(train,test) in enumerate(cvGen.split(X=X)):
X0,y0=X.iloc[train,:],y.iloc[train]
X1,y1=X.iloc[test,:],y.iloc[test]
fit=clf.fit(X=X0,y=y0) # the fit occurs here
prob=fit.predict_proba(X1) # prediction before shuffling
scr0.loc[i]=-log_loss(y1,prob,labels=clf.classes_)
for j in X.columns:
X1_=X1.copy(deep=True)
np.random.shuffle(X1_[j].values) # shuffle one column
prob=fit.predict_proba(X1_) # prediction after shuffling
scr1.loc[i,j]=-log_loss(y1,prob,labels=clf.classes_)
imp=(-1*scr1).add(scr0,axis=0)
imp=imp/(-1*scr1)
imp=pd.concat({'mean':imp.mean(),
'std':imp.std()*imp.shape[0]**-.5},axis=1) # CLT
return imp
'''Import data'''
data = load_breast_cancer()
X, y = data.data, data.target
X = pd.DataFrame(X, columns = [data.feature_names])
X_train, X_test, y_train, y_test = train_test_split(X, pd.DataFrame(y), random_state=42)
'''Calculate importances'''
clf = RandomForestClassifier(n_estimators=100, random_state=42)
bc_rf = featImpMDA(clf,X_train,y_train,n_splits=5)
clf = ExtraTreesClassifier(n_estimators=100, random_state=42)
bc_et = featImpMDA(clf,X_train,y_train,n_splits=5)
clf = XGBClassifier(n_estimators=100, random_state=42)
bc_xgb = featImpMDA(clf,X_train,y_train,n_splits=5)
clf = DecisionTreeClassifier(random_state=42)
bc_dt = featImpMDA(clf,X_train,y_train,n_splits=5)
'''Plot 1 RF'''
bc_rf.sort_values(by='mean', ascending=False, inplace=True)
bc_rf = bc_rf.iloc[:30,:]
bc_rf.sort_values(by='mean', ascending=True, inplace=True)
plt.figure(figsize=(3,10))
ax = bc_rf['mean'].plot(kind='barh',color='b', alpha=.40, xerr=bc_rf['std'],
error_kw={'ecolor':'r'})
plt.title('{} full {} feature importances '.format('US', 'RF'), fontsize=10)
plt.xlabel("Permutation Importance (MDA)")
'''Plot 2 ET'''
bc_et.sort_values(by='mean', ascending=False, inplace=True)
bc_et = bc_et.iloc[:30,:]
bc_et.sort_values(by='mean', ascending=True, inplace=True)
plt.figure(figsize=(3,10))
ax = bc_et['mean'].plot(kind='barh',color='b', alpha=.40, xerr=bc_et['std'],
error_kw={'ecolor':'r'})
plt.title('{} full {} feature importances '.format('US', 'ET'), fontsize=10)
plt.xlabel("Permutation Importance (MDA)")
'''Plot 3 XGB'''
bc_xgb.sort_values(by='mean', ascending=False, inplace=True)
bc_xgb = bc_xgb.iloc[:30,:]
bc_xgb.sort_values(by='mean', ascending=True, inplace=True)
plt.figure(figsize=(3,10))
ax = bc_xgb['mean'].plot(kind='barh',color='b', alpha=.40, xerr=bc_xgb['std'],
error_kw={'ecolor':'r'})
plt.title('{} full {} feature importances '.format('US', 'XGB'), fontsize=10)
plt.xlabel("Permutation Importance (MDA)")
'''Plot 4 DT'''
bc_dt.sort_values(by='mean', ascending=False, inplace=True)
bc_dt = bc_dt.iloc[:30,:]
bc_dt.sort_values(by='mean', ascending=True, inplace=True)
plt.figure(figsize=(3,10))
ax = bc_dt['mean'].plot(kind='barh',color='b', alpha=.40, xerr=bc_dt['std'],
error_kw={'ecolor':'r'})
plt.title('{} full {} feature importances '.format('US', 'DT'), fontsize=10)
plt.xlabel("Permutation Importance (MDA)")
但是,我有兴趣为 4 个分类器创建一个包含 4 个图的子图。我不确定在这里做什么。任何帮助将不胜感激!
【问题讨论】:
-
如果没有看到数据、您正在生成的当前绘图以及您想要的绘图的更详细描述,我们无法真正提供明确的帮助。
-
感谢您的建议,我已经用数据集和所需的图表更新了问题。只是不知道如何使它们成为子图。
标签: python pandas matplotlib plot subplot