import numpy as np
import pandas as pd
import matplotlib.pylab as plt
import matplotlib
from sklearn import datasets
import scipy.cluster.hierarchy as sch
matplotlib.rcParams[\'font.sans-serif\'] = [\'SimHei\']
data=pd.read_csv(\'023-Mall_Customers.csv\')
X = data.iloc[:,[3,4]].values
# print(X)
dendrogram = sch.dendrogram(sch.linkage(y=X,method=\'ward\')) # ward:两个簇类的平方差的总和
# 构建凝聚层次聚类模型
from sklearn.cluster import AgglomerativeClustering
agg = AgglomerativeClustering(n_clusters=5,affinity=\'euclidean\',linkage=\'ward\')
agg.fit(X)
labels = agg.labels_ # 打印标签
print(agg.n_leaves_) # 获取叶子数
# 数据可视化
for i in labels:
plt.scatter(X[labels == i,0],X[labels == i,1],marker=\'o\')
# 评估
from sklearn.metrics import silhouette_score
si_score = silhouette_score(X,agg.labels_,metric=\'euclidean\',sample_size=len(X))
print(\'si_score:{:.4f}\'.format(si_score))