如何在 Matplotlib 中散点绘制包含数组的列表字典? (详细截图)
How to scatter plot a dict of lists containing arrays in Matplotlib? (Screenshot in details)
我想做的是在 dict 中绘制数据,最好使用 Matplotlib。
下面是截图,因为我认为查看数据结构更容易理解。但这里也有一个描述。
- 一个字典包含7个列表。
- 每个列表代表一个集群。
- 每个 list 包含许多 arrays,其中有两个项目。
- 每个数组代表一个二维点。
我想在此博客中重新创建结果 post。不幸的是,作者没有提供他用于情节的代码。
https://datasciencelab.wordpress.com/2013/12/12/clustering-with-k-means-in-python/
截图如下:
如果有帮助,这里是我用来生成集群的完整代码:
import numpy as np
import random
import matplotlib.pyplot as plt # Whosebug
# k-Means Algorithm (Lloyd's Algorithm)
def cluster_points(X, mu):
clusters = {}
for x in X:
bestmukey = min([(i[0], np.linalg.norm(x-mu[i[0]])) \
for i in enumerate(mu)], key=lambda t:t[1])[0]
try:
clusters[bestmukey].append(x)
except KeyError:
clusters[bestmukey] = [x]
return clusters
def reevaluate_centers(mu, clusters):
newmu = []
keys = sorted(clusters.keys())
for k in keys:
newmu.append(np.mean(clusters[k], axis = 0))
return newmu
def has_converged(mu, oldmu):
return set([tuple(a) for a in mu]) == set([tuple(a) for a in oldmu])
def find_centers(X, K):
# Initialize to K random centers
oldmu = random.sample(X, K)
mu = random.sample(X, K)
while not has_converged(mu, oldmu):
oldmu = mu
# Assign all points in X to clusters
clusters = cluster_points(X, mu)
# Reevaluate centers
mu = reevaluate_centers(oldmu, clusters)
return(mu, clusters)
# initialization
def init_board_gauss(N, k):
n = float(N)/k
X = []
for i in range(k):
c = (random.uniform(-1, 1), random.uniform(-1, 1))
s = random.uniform(0.05,0.5)
x = []
while len(x) < n:
a, b = np.array([np.random.normal(c[0], s), np.random.normal(c[1], s)])
# Continue drawing points from the distribution in the range [-1,1]
if abs(a) < 1 and abs(b) < 1:
x.append([a,b])
X.extend(x)
X = np.array(X)[:N]
return X
X = init_board_gauss(200,3)
# generating clusters
mu, clusters = find_centers(X, 7)
clusters = cluster_points(X, mu)
我想出了一个解决办法。
下面是我changed/added的代码:
# Initialize points
n_points = 200
n_clusters = 7
X = init_board_gauss(n_points, n_clusters)
# Cluster points
mu, clusters = find_centers(X, n_clusters)
clusters = cluster_points(X, mu)
# Generate random colors
def generate_random_color():
r = lambda: random.randint(0,255)
return '#%02X%02X%02X' % (r(),r(),r())
# Plot each cluster
for i in range(0, n_clusters):
colx = tuple(x[0] for x in clusters[i])
coly = tuple(x[1] for x in clusters[i])
cluster_color = generate_random_color()
plt.scatter(colx,coly, color=cluster_color)
这里是完整代码:
import matplotlib.pyplot as plt
import numpy as np
import random
# k-Means Algorithm (Lloyd's Algorithm)
def cluster_points(X, mu):
clusters = {}
for x in X:
bestmukey = min([(i[0], np.linalg.norm(x-mu[i[0]])) \
for i in enumerate(mu)], key=lambda t:t[1])[0]
try:
clusters[bestmukey].append(x)
except KeyError:
clusters[bestmukey] = [x]
return clusters
def reevaluate_centers(mu, clusters):
newmu = []
keys = sorted(clusters.keys())
for k in keys:
newmu.append(np.mean(clusters[k], axis = 0))
return newmu
def has_converged(mu, oldmu):
return set([tuple(a) for a in mu]) == set([tuple(a) for a in oldmu])
def find_centers(X, K):
# Initialize to K random centers
oldmu = random.sample(X, K)
mu = random.sample(X, K)
while not has_converged(mu, oldmu):
oldmu = mu
# Assign all points in X to clusters
clusters = cluster_points(X, mu)
# Reevaluate centers
mu = reevaluate_centers(oldmu, clusters)
return(mu, clusters)
# Initialization
def init_board(N):
X = np.array([(random.uniform(-1, 1), random.uniform(-1, 1)) for i in range(N)])
return X
def init_board_gauss(N, k):
n = float(N)/k
X = []
for i in range(k):
c = (random.uniform(-1, 1), random.uniform(-1, 1))
s = random.uniform(0.05,0.5)
x = []
while len(x) < n:
a, b = np.array([np.random.normal(c[0], s), np.random.normal(c[1], s)])
# Continue drawing points from the distribution in the range [-1,1]
if abs(a) < 1 and abs(b) < 1:
x.append([a,b])
X.extend(x)
X = np.array(X)[:N]
return X
# Initialize points
n_points = 200
n_clusters = 7
X = init_board_gauss(n_points, n_clusters)
# Cluster points
mu, clusters = find_centers(X, n_clusters)
clusters = cluster_points(X, mu)
# Generate random colors
def generate_random_color():
r = lambda: random.randint(0,255)
return '#%02X%02X%02X' % (r(),r(),r())
# Plot each cluster
for i in range(0, n_clusters):
colx = tuple(x[0] for x in clusters[i])
coly = tuple(x[1] for x in clusters[i])
cluster_color = generate_random_color()
plt.scatter(colx,coly, color=cluster_color)
我想做的是在 dict 中绘制数据,最好使用 Matplotlib。 下面是截图,因为我认为查看数据结构更容易理解。但这里也有一个描述。
- 一个字典包含7个列表。
- 每个列表代表一个集群。
- 每个 list 包含许多 arrays,其中有两个项目。
- 每个数组代表一个二维点。
我想在此博客中重新创建结果 post。不幸的是,作者没有提供他用于情节的代码。 https://datasciencelab.wordpress.com/2013/12/12/clustering-with-k-means-in-python/
截图如下:
如果有帮助,这里是我用来生成集群的完整代码:
import numpy as np
import random
import matplotlib.pyplot as plt # Whosebug
# k-Means Algorithm (Lloyd's Algorithm)
def cluster_points(X, mu):
clusters = {}
for x in X:
bestmukey = min([(i[0], np.linalg.norm(x-mu[i[0]])) \
for i in enumerate(mu)], key=lambda t:t[1])[0]
try:
clusters[bestmukey].append(x)
except KeyError:
clusters[bestmukey] = [x]
return clusters
def reevaluate_centers(mu, clusters):
newmu = []
keys = sorted(clusters.keys())
for k in keys:
newmu.append(np.mean(clusters[k], axis = 0))
return newmu
def has_converged(mu, oldmu):
return set([tuple(a) for a in mu]) == set([tuple(a) for a in oldmu])
def find_centers(X, K):
# Initialize to K random centers
oldmu = random.sample(X, K)
mu = random.sample(X, K)
while not has_converged(mu, oldmu):
oldmu = mu
# Assign all points in X to clusters
clusters = cluster_points(X, mu)
# Reevaluate centers
mu = reevaluate_centers(oldmu, clusters)
return(mu, clusters)
# initialization
def init_board_gauss(N, k):
n = float(N)/k
X = []
for i in range(k):
c = (random.uniform(-1, 1), random.uniform(-1, 1))
s = random.uniform(0.05,0.5)
x = []
while len(x) < n:
a, b = np.array([np.random.normal(c[0], s), np.random.normal(c[1], s)])
# Continue drawing points from the distribution in the range [-1,1]
if abs(a) < 1 and abs(b) < 1:
x.append([a,b])
X.extend(x)
X = np.array(X)[:N]
return X
X = init_board_gauss(200,3)
# generating clusters
mu, clusters = find_centers(X, 7)
clusters = cluster_points(X, mu)
我想出了一个解决办法。
下面是我changed/added的代码:
# Initialize points
n_points = 200
n_clusters = 7
X = init_board_gauss(n_points, n_clusters)
# Cluster points
mu, clusters = find_centers(X, n_clusters)
clusters = cluster_points(X, mu)
# Generate random colors
def generate_random_color():
r = lambda: random.randint(0,255)
return '#%02X%02X%02X' % (r(),r(),r())
# Plot each cluster
for i in range(0, n_clusters):
colx = tuple(x[0] for x in clusters[i])
coly = tuple(x[1] for x in clusters[i])
cluster_color = generate_random_color()
plt.scatter(colx,coly, color=cluster_color)
这里是完整代码:
import matplotlib.pyplot as plt
import numpy as np
import random
# k-Means Algorithm (Lloyd's Algorithm)
def cluster_points(X, mu):
clusters = {}
for x in X:
bestmukey = min([(i[0], np.linalg.norm(x-mu[i[0]])) \
for i in enumerate(mu)], key=lambda t:t[1])[0]
try:
clusters[bestmukey].append(x)
except KeyError:
clusters[bestmukey] = [x]
return clusters
def reevaluate_centers(mu, clusters):
newmu = []
keys = sorted(clusters.keys())
for k in keys:
newmu.append(np.mean(clusters[k], axis = 0))
return newmu
def has_converged(mu, oldmu):
return set([tuple(a) for a in mu]) == set([tuple(a) for a in oldmu])
def find_centers(X, K):
# Initialize to K random centers
oldmu = random.sample(X, K)
mu = random.sample(X, K)
while not has_converged(mu, oldmu):
oldmu = mu
# Assign all points in X to clusters
clusters = cluster_points(X, mu)
# Reevaluate centers
mu = reevaluate_centers(oldmu, clusters)
return(mu, clusters)
# Initialization
def init_board(N):
X = np.array([(random.uniform(-1, 1), random.uniform(-1, 1)) for i in range(N)])
return X
def init_board_gauss(N, k):
n = float(N)/k
X = []
for i in range(k):
c = (random.uniform(-1, 1), random.uniform(-1, 1))
s = random.uniform(0.05,0.5)
x = []
while len(x) < n:
a, b = np.array([np.random.normal(c[0], s), np.random.normal(c[1], s)])
# Continue drawing points from the distribution in the range [-1,1]
if abs(a) < 1 and abs(b) < 1:
x.append([a,b])
X.extend(x)
X = np.array(X)[:N]
return X
# Initialize points
n_points = 200
n_clusters = 7
X = init_board_gauss(n_points, n_clusters)
# Cluster points
mu, clusters = find_centers(X, n_clusters)
clusters = cluster_points(X, mu)
# Generate random colors
def generate_random_color():
r = lambda: random.randint(0,255)
return '#%02X%02X%02X' % (r(),r(),r())
# Plot each cluster
for i in range(0, n_clusters):
colx = tuple(x[0] for x in clusters[i])
coly = tuple(x[1] for x in clusters[i])
cluster_color = generate_random_color()
plt.scatter(colx,coly, color=cluster_color)