预测概率可用于可视化模型性能。真正的标签可以用颜色来表示。
试试这个例子:
from sklearn.datasets import make_classification
import matplotlib.pyplot as plt
X, y = make_classification(n_samples=1000, n_features=4,
n_informative=2, n_redundant=0,
random_state=1, shuffle=False)
from sklearn.linear_model import LogisticRegression
lr=LogisticRegression(random_state=0, solver='lbfgs', max_iter=10)
lr.fit(X, y)
prediction=lr.predict_proba(X)[:,1]
plt.figure(figsize=(15,7))
plt.hist(prediction[y==0], bins=50, label='Negatives')
plt.hist(prediction[y==1], bins=50, label='Positives', alpha=0.7, color='r')
plt.xlabel('Probability of being Positive Class', fontsize=25)
plt.ylabel('Number of records in each bucket', fontsize=25)
plt.legend(fontsize=15)
plt.tick_params(axis='both', labelsize=25, pad=5)
plt.show()