Python (numpy) - 关联两个分箱图
Python (numpy) - correlate two binned plots
我的问题是如何关联我的两个分箱图并输出 Pearson 相关系数?
我不确定如何正确提取 np.corrcoef
函数所需的合并数组。这是我的脚本:
import numpy as np
import matplotlib.pyplot as plt
A = np.genfromtxt('data1.txt')
x1 = A[:,1]
y1 = A[:,2]
B=np.genfromtxt('data2.txt')
x2 = B[:,1]
y2 = B[:,2]
fig = plt.figure()
plt.subplots_adjust(hspace=0.5)
plt.subplot(121)
AA = plt.hexbin(x1,y1,cmap='jet',gridsize=500,vmin=0,vmax=450,mincnt=1)
plt.axis([-180,180,-180,180])
cb = plt.colorbar()
plt.title('Data1')
plt.subplot(122)
BB = plt.hexbin(x2,y2,cmap='jet',gridsize=500,vmin=0,vmax=450,mincnt=1)
plt.axis([-180,180,-180,180])
cb = plt.colorbar()
plt.title('Data 2')
array1 = np.ndarray.flatten(AA)
array2 = np.ndarray.flatten(BB)
print np.corrcoef(array1,array2)
plt.show()
答案可以在documentation中找到:
Returns: object
a PolyCollection
instance; use get_array()
on this PolyCollection
to get the counts in each hexagon.
这是您的代码的修订版:
A = np.genfromtxt('data1.txt')
x1 = A[:,1]
y1 = A[:,2]
B = np.genfromtxt('data2.txt')
x2 = B[:,1]
y2 = B[:,2]
# make figure and axes
fig, (ax1, ax2) = plt.subplots(1, 2)
# define common keyword arguments
hex_params = dict(cmap='jet', gridsize=500, vmin=0, vmax=450, mincnt=1)
# plot and set titles
hex1 = ax1.hexbin(x1, y1, **hex_params)
hex2 = ax2.hexbin(x2, y2, **hex_params)
ax1.set_title('Data 1')
ax2.set_title('Data 2')
# set axes lims
[ax.set_xlim(-180, 180) for ax in (ax1, ax2)]
[ax.set_ylim(-180, 180) for ax in (ax1, ax2)]
# add single colorbar
fig.subplots_adjust(right=0.8, hspace=0.5)
cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7])
fig.colorbar(hex2, cax=cbar_ax)
# get binned data and corr coeff
binned1 = hex1.get_array()
binned2 = hex2.get_array()
print np.corrcoef(binned1, binned2)
plt.show()
但有两条评论:您确定要皮尔逊相关系数吗?你到底想展示什么?如果您想显示分布是 same/different,您可能需要使用 Kolmogorov-Smirnov 检验。
也不要使用 jet
作为颜色图。 Jet is bad。
我的问题是如何关联我的两个分箱图并输出 Pearson 相关系数?
我不确定如何正确提取 np.corrcoef
函数所需的合并数组。这是我的脚本:
import numpy as np
import matplotlib.pyplot as plt
A = np.genfromtxt('data1.txt')
x1 = A[:,1]
y1 = A[:,2]
B=np.genfromtxt('data2.txt')
x2 = B[:,1]
y2 = B[:,2]
fig = plt.figure()
plt.subplots_adjust(hspace=0.5)
plt.subplot(121)
AA = plt.hexbin(x1,y1,cmap='jet',gridsize=500,vmin=0,vmax=450,mincnt=1)
plt.axis([-180,180,-180,180])
cb = plt.colorbar()
plt.title('Data1')
plt.subplot(122)
BB = plt.hexbin(x2,y2,cmap='jet',gridsize=500,vmin=0,vmax=450,mincnt=1)
plt.axis([-180,180,-180,180])
cb = plt.colorbar()
plt.title('Data 2')
array1 = np.ndarray.flatten(AA)
array2 = np.ndarray.flatten(BB)
print np.corrcoef(array1,array2)
plt.show()
答案可以在documentation中找到:
Returns: object
a
PolyCollection
instance; useget_array()
on thisPolyCollection
to get the counts in each hexagon.
这是您的代码的修订版:
A = np.genfromtxt('data1.txt')
x1 = A[:,1]
y1 = A[:,2]
B = np.genfromtxt('data2.txt')
x2 = B[:,1]
y2 = B[:,2]
# make figure and axes
fig, (ax1, ax2) = plt.subplots(1, 2)
# define common keyword arguments
hex_params = dict(cmap='jet', gridsize=500, vmin=0, vmax=450, mincnt=1)
# plot and set titles
hex1 = ax1.hexbin(x1, y1, **hex_params)
hex2 = ax2.hexbin(x2, y2, **hex_params)
ax1.set_title('Data 1')
ax2.set_title('Data 2')
# set axes lims
[ax.set_xlim(-180, 180) for ax in (ax1, ax2)]
[ax.set_ylim(-180, 180) for ax in (ax1, ax2)]
# add single colorbar
fig.subplots_adjust(right=0.8, hspace=0.5)
cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7])
fig.colorbar(hex2, cax=cbar_ax)
# get binned data and corr coeff
binned1 = hex1.get_array()
binned2 = hex2.get_array()
print np.corrcoef(binned1, binned2)
plt.show()
但有两条评论:您确定要皮尔逊相关系数吗?你到底想展示什么?如果您想显示分布是 same/different,您可能需要使用 Kolmogorov-Smirnov 检验。
也不要使用 jet
作为颜色图。 Jet is bad。