绘制簇矩阵
Plot cluster matrix
我想使用以下 pandas 数据框从 scikit-learn 的 K-means 绘制一个聚类矩阵:
from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer() # toy dataset
data = pd.DataFrame(cancer.data, columns=[cancer.feature_names])
df = data.iloc[:,4:8] #select subset
df.columns = ['smoothness', 'compactness', 'concavity', 'concave points']
df
+----+--------------+---------------+-------------+------------------+
| | smoothness | compactness | concavity | concave points |
|----+--------------+---------------+-------------+------------------|
| 0 | 0.1184 | 0.2776 | 0.3001 | 0.1471 |
| 1 | 0.08474 | 0.07864 | 0.0869 | 0.07017 |
| 2 | 0.1096 | 0.1599 | 0.1974 | 0.1279 |
| 3 | 0.1425 | 0.2839 | 0.2414 | 0.1052 |
| 4 | 0.1003 | 0.1328 | 0.198 | 0.1043 |
+----+--------------+---------------+-------------+------------------+
您可以使用:
def kmeans_scatterplot(df, n_clusters):
axs_length = len(df.columns)
fig, axs = plt.subplots(axs_length, axs_length, figsize=(20,20))
for i, column_i in enumerate(df):
for j, column_j in enumerate(df):
# create plot
if column_i != column_j:
df_temp = df[[column_i, column_j]]
km = KMeans(init='k-means++', n_clusters=n_clusters)
km_clustering = km.fit(df_temp)
axs[i][j].scatter(df_temp[column_i], df_temp[column_j], c=km_clustering.labels_, cmap='rainbow', alpha=0.7, edgecolors='b')
# only show left and bottom lables
if i == axs_length - 1:
axs[i][j].set_xlabel(column_j)
if j == 0:
axs[i][j].set_ylabel(column_i)
kmeans_scatterplot(df, 2)
结果:
IIUC,您可以使用 seaborn.pairplot
简化并传入 Kmeans.label_
作为 hue
参数。例如:
import seaborn as sns
from sklearn.cluster import KMeans
def kmeans_scatterplot(df, n_clusters):
km = KMeans(init='k-means++', n_clusters=n_clusters)
km_clustering = km.fit(df)
sns.pairplot(df.assign(hue=km_clustering.labels_), hue='hue')
kmeans_scatterplot(df, 2)
[出局]
我想使用以下 pandas 数据框从 scikit-learn 的 K-means 绘制一个聚类矩阵:
from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer() # toy dataset
data = pd.DataFrame(cancer.data, columns=[cancer.feature_names])
df = data.iloc[:,4:8] #select subset
df.columns = ['smoothness', 'compactness', 'concavity', 'concave points']
df
+----+--------------+---------------+-------------+------------------+
| | smoothness | compactness | concavity | concave points |
|----+--------------+---------------+-------------+------------------|
| 0 | 0.1184 | 0.2776 | 0.3001 | 0.1471 |
| 1 | 0.08474 | 0.07864 | 0.0869 | 0.07017 |
| 2 | 0.1096 | 0.1599 | 0.1974 | 0.1279 |
| 3 | 0.1425 | 0.2839 | 0.2414 | 0.1052 |
| 4 | 0.1003 | 0.1328 | 0.198 | 0.1043 |
+----+--------------+---------------+-------------+------------------+
您可以使用:
def kmeans_scatterplot(df, n_clusters):
axs_length = len(df.columns)
fig, axs = plt.subplots(axs_length, axs_length, figsize=(20,20))
for i, column_i in enumerate(df):
for j, column_j in enumerate(df):
# create plot
if column_i != column_j:
df_temp = df[[column_i, column_j]]
km = KMeans(init='k-means++', n_clusters=n_clusters)
km_clustering = km.fit(df_temp)
axs[i][j].scatter(df_temp[column_i], df_temp[column_j], c=km_clustering.labels_, cmap='rainbow', alpha=0.7, edgecolors='b')
# only show left and bottom lables
if i == axs_length - 1:
axs[i][j].set_xlabel(column_j)
if j == 0:
axs[i][j].set_ylabel(column_i)
kmeans_scatterplot(df, 2)
结果:
IIUC,您可以使用 seaborn.pairplot
简化并传入 Kmeans.label_
作为 hue
参数。例如:
import seaborn as sns
from sklearn.cluster import KMeans
def kmeans_scatterplot(df, n_clusters):
km = KMeans(init='k-means++', n_clusters=n_clusters)
km_clustering = km.fit(df)
sns.pairplot(df.assign(hue=km_clustering.labels_), hue='hue')
kmeans_scatterplot(df, 2)
[出局]