如何绘制聚类中心?
How to plot the cluster centers?
通过从头开始使用这个 k-means 聚类,我如何在照片的散点图上绘制 k=3 的初始随机聚类中心? Photo
对于 Iris 数据集
import numpy as np
from scipy.spatial.distance import cdist
def kmeans(x,k, no_of_iterations):
idx = np.random.choice(len(x), k, replace=False)
#Randomly choosing Centroids
centroids = x[idx, :]
#finding the distance between centroids and all the data points
distances = cdist(x, centroids ,'euclidean')
points = np.array([np.argmin(i) for i in distances])
for _ in range(no_of_iterations):
centroids = []
for idx in range(k):
#Updating Centroids by taking mean of Cluster it belongs to
temp_cent = x[points==idx].mean(axis=0)
centroids.append(temp_cent)
centroids = np.vstack(centroids) #Updated Centroids
distances = cdist(x, centroids ,'euclidean')
points = np.array([np.argmin(i) for i in distances])
return points
您可以通过 matplotlib 的 scatter
函数绘制点和中心。可以根据通过 kmeans
.
计算的组分配颜色
这是一个示例(kmeans
函数现在也 return 质心)。
import matplotlib.pyplot as plt
import seaborn as sns # for the iris dataset
import numpy as np
from scipy.spatial.distance import cdist
def kmeans(x, k, no_of_iterations=100):
idx = np.random.choice(len(x), k, replace=False)
# Randomly choosing Centroids
centroids = x[idx, :]
# finding the distance between centroids and all the data points
distances = cdist(x, centroids, 'euclidean')
points = np.array([np.argmin(i) for i in distances])
for _ in range(no_of_iterations):
centroids = []
for idx in range(k):
# Updating Centroids by taking mean of Cluster it belongs to
temp_cent = x[points == idx].mean(axis=0)
centroids.append(temp_cent)
centroids = np.vstack(centroids) # Updated Centroids
distances = cdist(x, centroids, 'euclidean')
points = np.array([np.argmin(i) for i in distances])
return points, centroids
iris = sns.load_dataset('iris')
x = iris[['sepal_length', 'sepal_width']].to_numpy()
k = 3
points, centroids = kmeans(x, k)
colors = plt.cm.Set2.colors
for val, color in zip(range(k), colors):
plt.scatter(centroids[val, 0], centroids[val, 1], facecolor='none', edgecolor=color, lw=3,
s=100, label=f'centroid {val}')
for val, color in zip(range(k), colors):
plt.scatter(x[points == val, 0], x[points == val, 1], color=color, label=f'set {val}')
plt.legend(ncol=2)
plt.show()
这里尝试显示给定的目标名称以及 kmeans 近似值。请注意,kmeans 值的顺序是随机的。较大的背景圆圈显示目标名称,较小的圆圈,很好地向其质心分组,来自 kmeans。
from sklearn.datasets import load_iris
iris_data = load_iris()
x = iris_data.data[:, :2]
color_givens = ['magenta', 'gold', 'cyan']
for val, (name, color) in enumerate(zip(iris_data.target_names, color_givens)):
plt.scatter(x[iris_data.target == val, 0], x[iris_data.target == val, 1],
color=color, s=150, alpha=0.6, label=f'given {name}')
k = 3
points, centroids = kmeans(x, k)
colors_kmeans = plt.cm.Set1.colors
for val, color in zip(range(k), colors_kmeans):
plt.scatter(centroids[val, 0], centroids[val, 1], facecolor='none', edgecolor=color, lw=3,
s=150, label=f'centroid {val}')
for val, color in zip(range(k), colors_kmeans):
plt.scatter(x[points == val, 0], x[points == val, 1], color=color, label=f'set {val}')
plt.xlabel(iris_data.feature_names[0])
plt.ylabel(iris_data.feature_names[1])
plt.legend(ncol=3)
plt.show()
通过从头开始使用这个 k-means 聚类,我如何在照片的散点图上绘制 k=3 的初始随机聚类中心? Photo 对于 Iris 数据集
import numpy as np
from scipy.spatial.distance import cdist
def kmeans(x,k, no_of_iterations):
idx = np.random.choice(len(x), k, replace=False)
#Randomly choosing Centroids
centroids = x[idx, :]
#finding the distance between centroids and all the data points
distances = cdist(x, centroids ,'euclidean')
points = np.array([np.argmin(i) for i in distances])
for _ in range(no_of_iterations):
centroids = []
for idx in range(k):
#Updating Centroids by taking mean of Cluster it belongs to
temp_cent = x[points==idx].mean(axis=0)
centroids.append(temp_cent)
centroids = np.vstack(centroids) #Updated Centroids
distances = cdist(x, centroids ,'euclidean')
points = np.array([np.argmin(i) for i in distances])
return points
您可以通过 matplotlib 的 scatter
函数绘制点和中心。可以根据通过 kmeans
.
这是一个示例(kmeans
函数现在也 return 质心)。
import matplotlib.pyplot as plt
import seaborn as sns # for the iris dataset
import numpy as np
from scipy.spatial.distance import cdist
def kmeans(x, k, no_of_iterations=100):
idx = np.random.choice(len(x), k, replace=False)
# Randomly choosing Centroids
centroids = x[idx, :]
# finding the distance between centroids and all the data points
distances = cdist(x, centroids, 'euclidean')
points = np.array([np.argmin(i) for i in distances])
for _ in range(no_of_iterations):
centroids = []
for idx in range(k):
# Updating Centroids by taking mean of Cluster it belongs to
temp_cent = x[points == idx].mean(axis=0)
centroids.append(temp_cent)
centroids = np.vstack(centroids) # Updated Centroids
distances = cdist(x, centroids, 'euclidean')
points = np.array([np.argmin(i) for i in distances])
return points, centroids
iris = sns.load_dataset('iris')
x = iris[['sepal_length', 'sepal_width']].to_numpy()
k = 3
points, centroids = kmeans(x, k)
colors = plt.cm.Set2.colors
for val, color in zip(range(k), colors):
plt.scatter(centroids[val, 0], centroids[val, 1], facecolor='none', edgecolor=color, lw=3,
s=100, label=f'centroid {val}')
for val, color in zip(range(k), colors):
plt.scatter(x[points == val, 0], x[points == val, 1], color=color, label=f'set {val}')
plt.legend(ncol=2)
plt.show()
这里尝试显示给定的目标名称以及 kmeans 近似值。请注意,kmeans 值的顺序是随机的。较大的背景圆圈显示目标名称,较小的圆圈,很好地向其质心分组,来自 kmeans。
from sklearn.datasets import load_iris
iris_data = load_iris()
x = iris_data.data[:, :2]
color_givens = ['magenta', 'gold', 'cyan']
for val, (name, color) in enumerate(zip(iris_data.target_names, color_givens)):
plt.scatter(x[iris_data.target == val, 0], x[iris_data.target == val, 1],
color=color, s=150, alpha=0.6, label=f'given {name}')
k = 3
points, centroids = kmeans(x, k)
colors_kmeans = plt.cm.Set1.colors
for val, color in zip(range(k), colors_kmeans):
plt.scatter(centroids[val, 0], centroids[val, 1], facecolor='none', edgecolor=color, lw=3,
s=150, label=f'centroid {val}')
for val, color in zip(range(k), colors_kmeans):
plt.scatter(x[points == val, 0], x[points == val, 1], color=color, label=f'set {val}')
plt.xlabel(iris_data.feature_names[0])
plt.ylabel(iris_data.feature_names[1])
plt.legend(ncol=3)
plt.show()