【发布时间】:2017-11-15 14:05:14
【问题描述】:
我正在尝试将两个图合并为一个:
- http://scikit-learn.org/stable/auto_examples/linear_model/plot_sgd_iris.html
- http://scikit-learn.org/stable/auto_examples/ensemble/plot_voting_decision_regions.html#sphx-glr-auto-examples-ensemble-plot-voting-decision-regions-py
在左图中,我想显示决策边界与对应于 OVA 分类器的超平面,在右图中,我想显示决策概率。
这是目前为止的代码:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sn
from sklearn import datasets
from sklearn import preprocessing
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.svm import SVC
def plot_hyperplane(c, color, fitted_model):
"""
Plot the one-against-all classifiers for the given model.
Parameters
--------------
c : index of the hyperplane to be plot
color : color to be used when drawing the line
fitted_model : the fitted model
"""
xmin, xmax = plt.xlim()
ymin, ymax = plt.ylim()
try:
coef = fitted_model.coef_
intercept = fitted_model.intercept_
except:
return
def line(x0):
return (-(x0 * coef[c, 0]) - intercept[c]) / coef[c, 1]
plt.plot([xmin, xmax], [line(xmin), line(xmax)], ls="--", color=color, zorder=3)
def plot_decision_boundary(X, y, fitted_model, features, targets):
"""
This function plots a model decision boundary as well as it tries to plot
the decision probabilities, if available.
Requires a model fitted with two features only.
Parameters
--------------
X : the data to learn
y : the classification labels
fitted_model : the fitted model
"""
cmap = plt.get_cmap('Set3')
prob = cmap
colors = [cmap(i) for i in np.linspace(0, 1, len(fitted_model.classes_))]
plt.figure(figsize=(9.5, 5))
for i, plot_type in enumerate(['Decision Boundary', 'Decision Probabilities']):
plt.subplot(1, 2, i+1)
mesh_step_size = 0.01 # step size in the mesh
x_min, x_max = X[:, 0].min() - .1, X[:, 0].max() + .1
y_min, y_max = X[:, 1].min() - .1, X[:, 1].max() + .1
xx, yy = np.meshgrid(np.arange(x_min, x_max, mesh_step_size), np.arange(y_min, y_max, mesh_step_size))
# First plot, predicted results using the given model
if i == 0:
Z = fitted_model.predict(np.c_[xx.ravel(), yy.ravel()])
for h, color in zip(fitted_model.classes_, colors):
plot_hyperplane(h, color, fitted_model)
# Second plot, predicted probabilities using the given model
else:
prob = 'RdYlBu_r'
try:
Z = fitted_model.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
except:
plt.text(0.4, 0.5, 'Probabilities Unavailable', horizontalalignment='center',
verticalalignment='center', transform=plt.gca().transAxes, fontsize=12)
plt.axis('off')
break
Z = Z.reshape(xx.shape)
# Display Z
plt.imshow(Z, interpolation='nearest', cmap=prob, alpha=0.5,
extent=(x_min, x_max, y_min, y_max), origin='lower', zorder=1)
# Plot the data points
for i, color in zip(fitted_model.classes_, colors):
idx = np.where(y == i)
plt.scatter(X[idx, 0], X[idx, 1], facecolor=color, edgecolor='k', lw=1,
label=iris.target_names[i], cmap=cmap, alpha=0.8, zorder=2)
plt.title(plot_type + '\n' +
str(fitted_model).split('(')[0]+ ' Test Accuracy: ' + str(np.round(fitted_model.score(X, y), 5)))
plt.xlabel(features[0])
plt.ylabel(features[1])
plt.gca().set_aspect('equal')
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.tight_layout()
plt.subplots_adjust(top=0.9, bottom=0.08, wspace=0.02)
plt.show()
if __name__ == '__main__':
iris = datasets.load_iris()
X = iris.data[:, [0, 2]]
y = iris.target
scaler = preprocessing.StandardScaler().fit_transform(X)
clf1 = DecisionTreeClassifier(max_depth=4)
clf2 = KNeighborsClassifier(n_neighbors=7)
clf3 = SVC(kernel='rbf', probability=True)
clf4 = SGDClassifier(alpha=0.001, n_iter=100).fit(X, y)
clf1.fit(X, y)
clf2.fit(X, y)
clf3.fit(X, y)
clf4.fit(X, y)
plot_decision_boundary(X, y, clf1, iris.feature_names, iris.target_names[[0, 2]])
plot_decision_boundary(X, y, clf2, iris.feature_names, iris.target_names[[0, 2]])
plot_decision_boundary(X, y, clf3, iris.feature_names, iris.target_names[[0, 2]])
plot_decision_boundary(X, y, clf4, iris.feature_names, iris.target_names[[0, 2]])
结果:
可以看出,对于最后一个示例(给定代码中的clf4),到目前为止,我无法将超平面绘制在错误的位置。我想知道如何纠正这个问题。应将它们转换为适合模型所用特征的正确范围。
谢谢。
【问题讨论】:
标签: python matplotlib plot scikit-learn