Note
Go to the end to download the full example code. or to run this example in your browser via Binder
K-means 聚类#
该图显示了:
左上角:K-means 算法使用 8 个聚类的结果。
右上角:使用 3 个聚类的结果。
左下角:初始化不佳对分类过程的影响:通过将 n_init 设置为 1(默认值为 10),算法将以不同的质心种子运行的次数减少。
右下角:真实情况。
# 代码来源:Gaël Varoquaux
# 由Jaques Grobler修改用于文档
# SPDX许可证标识符:BSD-3-Clause
import matplotlib.pyplot as plt
# 尽管以下导入没有被直接使用,但它对于在matplotlib < 3.2版本中实现3D投影是必需的。
import mpl_toolkits.mplot3d # noqa: F401
import numpy as np
from sklearn import datasets
from sklearn.cluster import KMeans
np.random.seed(5)
iris = datasets.load_iris()
X = iris.data
y = iris.target
estimators = [
("k_means_iris_8", KMeans(n_clusters=8)),
("k_means_iris_3", KMeans(n_clusters=3)),
("k_means_iris_bad_init", KMeans(n_clusters=3, n_init=1, init="random")),
]
fig = plt.figure(figsize=(10, 8))
titles = ["8 clusters", "3 clusters", "3 clusters, bad initialization"]
for idx, ((name, est), title) in enumerate(zip(estimators, titles)):
ax = fig.add_subplot(2, 2, idx + 1, projection="3d", elev=48, azim=134)
est.fit(X)
labels = est.labels_
ax.scatter(X[:, 3], X[:, 0], X[:, 2], c=labels.astype(float), edgecolor="k")
ax.xaxis.set_ticklabels([])
ax.yaxis.set_ticklabels([])
ax.zaxis.set_ticklabels([])
ax.set_xlabel("Petal width")
ax.set_ylabel("Sepal length")
ax.set_zlabel("Petal length")
ax.set_title(title)
# 绘制真实值
ax = fig.add_subplot(2, 2, 4, projection="3d", elev=48, azim=134)
for name, label in [("Setosa", 0), ("Versicolour", 1), ("Virginica", 2)]:
ax.text3D(
X[y == label, 3].mean(),
X[y == label, 0].mean(),
X[y == label, 2].mean() + 2,
name,
horizontalalignment="center",
bbox=dict(alpha=0.2, edgecolor="w", facecolor="w"),
)
ax.scatter(X[:, 3], X[:, 0], X[:, 2], c=y, edgecolor="k")
ax.xaxis.set_ticklabels([])
ax.yaxis.set_ticklabels([])
ax.zaxis.set_ticklabels([])
ax.set_xlabel("Petal width")
ax.set_ylabel("Sepal length")
ax.set_zlabel("Petal length")
ax.set_title("Ground Truth")
plt.subplots_adjust(wspace=0.25, hspace=0.25)
plt.show()
Total running time of the script: (0 minutes 0.127 seconds)
Related examples
使用鸢尾花数据集的PCA示例
鸢尾花数据集
稀疏性示例:仅拟合特征1和特征2
手写数字数据上的K-Means聚类演示