多个箱线图使用循环
Multiple Boxplot's using a loop
我有一个大数据集,我已将数据集划分为 0.1(从 0 到 100)的小子集。我想为此绘制一个从 0 到 100 的箱线图(与线图相同,但带有箱线图)。
对于generating/reducing的数据(原来是十亿),我有运行一个循环,如下:
a = np.array(Treecover.where(low)).flatten() # Treecover less than 25 only/low
b = np.array(RZSC.where(low)).flatten()
Low_TC_data = pd.DataFrame({'Treecover': a, 'RZSC': b})
Low_TC_data_NaN = Low_TC_data.dropna()
Tree = []
Mean = []
Max = []
Min = []
Median = []
for i in np.arange(0,1,0.1):
Tree.append(i)
a = 0
a = Low_TC_data_NaN['RZSC'].where((Low_TC_data_NaN['Treecover']>=(i)) & (Low_TC_data_NaN['Treecover']<(i+0.1))).dropna()
Mean.append(a.mean())
Min.append(a.min())
Max.append(a.max())
Median.append(a.median())
然后我绘制了一个散点图来可视化数据。
fig = plt.figure(figsize=(10, 7))
plt.scatter(Tree, Mean, s = 500)
plt.scatter(Tree, Median,color = 'red', s = 500)
plt.fill_between(Tree, Min, Max, alpha = 0.3)
我尝试通过如下方式绘制前 11 个箱线图:
fig = plt.figure(figsize=(20, 10))
i = 0
green_diamond = dict(markerfacecolor='w', marker='D')
plt.boxplot((Low_TC_data_NaN['RZSC'].where((Low_TC_data_NaN['Treecover']>=(i)) & (Low_TC_data_NaN['Treecover']<(i+0.1))).dropna(),
Low_TC_data_NaN['RZSC'].where((Low_TC_data_NaN['Treecover']>=(i+0.1)) & (Low_TC_data_NaN['Treecover']<(i+0.2))).dropna(),
Low_TC_data_NaN['RZSC'].where((Low_TC_data_NaN['Treecover']>=(i+0.2)) & (Low_TC_data_NaN['Treecover']<(i+0.3))).dropna(),
Low_TC_data_NaN['RZSC'].where((Low_TC_data_NaN['Treecover']>=(i+0.3)) & (Low_TC_data_NaN['Treecover']<(i+0.4))).dropna(),
Low_TC_data_NaN['RZSC'].where((Low_TC_data_NaN['Treecover']>=(i+0.4)) & (Low_TC_data_NaN['Treecover']<(i+0.5))).dropna(),
Low_TC_data_NaN['RZSC'].where((Low_TC_data_NaN['Treecover']>=(i+0.5)) & (Low_TC_data_NaN['Treecover']<(i+0.6))).dropna(),
Low_TC_data_NaN['RZSC'].where((Low_TC_data_NaN['Treecover']>=(i+0.6)) & (Low_TC_data_NaN['Treecover']<(i+0.7))).dropna(),
Low_TC_data_NaN['RZSC'].where((Low_TC_data_NaN['Treecover']>=(i+0.7)) & (Low_TC_data_NaN['Treecover']<(i+0.8))).dropna(),
Low_TC_data_NaN['RZSC'].where((Low_TC_data_NaN['Treecover']>=(i+0.8)) & (Low_TC_data_NaN['Treecover']<(i+0.9))).dropna(),
Low_TC_data_NaN['RZSC'].where((Low_TC_data_NaN['Treecover']>=(i+0.9)) & (Low_TC_data_NaN['Treecover']<(i+1))).dropna(),
Low_TC_data_NaN['RZSC'].where((Low_TC_data_NaN['Treecover']>=(i+1)) & (Low_TC_data_NaN['Treecover']<(i+1.1))).dropna()),
flierprops=green_diamond);
# and this code can go on to
# ....Low_TC_data_NaN['RZSC'].where((Low_TC_data_NaN['Treecover']>=(i+24.9)) & (Low_TC_data_NaN['Treecover']<(i+25))).dropna()
# For first 250 boxplots
上面X轴为0,0.1,0.2,0.3.......1.
如果可能的话,我想要一个包含 100 个这样的总集(甚至 1000 个总集)的箱线图。
我使用以下代码尝试了一次失败的尝试:
fig = plt.figure(figsize=(5, 10))
data = [(Low_TC_data_NaN['RZSC'].where((Low_TC_data_NaN['Treecover']>=(i)) & (Low_TC_data_NaN['Treecover']<(i+0.1))).dropna()) for i in np.arange(0,1,0.1)]
[plt.boxplot(np.array(data[i]).ravel()) for i in range(5)];
这里面的值好像也有点奇怪。并且不遵循第二个数字。
我想要所有数据步骤的箱线图。基本上,我希望第二个图形代码更短(简洁)以获得与图 2 相同的结果,并且可以在 x 轴上有大约 100 个值时使用。
我只是稍微调整了一下,而不是循环箱线图,而是循环数据本身。
代码看起来有点像这样:
data = [[Low_TC_data_NaN['RZSC'].where((Low_TC_data_NaN['Treecover']>=(i)) &
(Low_TC_data_NaN['Treecover']<(i+0.1))).dropna()] for i in np.arange(0,25,0.1)]
fig = plt.figure(figsize=(20, 10))
green_diamond = dict(markerfacecolor='w', marker='D')
plt.boxplot(np.array(data).squeeze(), flierprops=green_diamond);
plt.ylim(0,4000)
结果是这样的:
X-axis 似乎有 0-25 之间的值(总共 250 个箱形图)。
我有一个大数据集,我已将数据集划分为 0.1(从 0 到 100)的小子集。我想为此绘制一个从 0 到 100 的箱线图(与线图相同,但带有箱线图)。
对于generating/reducing的数据(原来是十亿),我有运行一个循环,如下:
a = np.array(Treecover.where(low)).flatten() # Treecover less than 25 only/low
b = np.array(RZSC.where(low)).flatten()
Low_TC_data = pd.DataFrame({'Treecover': a, 'RZSC': b})
Low_TC_data_NaN = Low_TC_data.dropna()
Tree = []
Mean = []
Max = []
Min = []
Median = []
for i in np.arange(0,1,0.1):
Tree.append(i)
a = 0
a = Low_TC_data_NaN['RZSC'].where((Low_TC_data_NaN['Treecover']>=(i)) & (Low_TC_data_NaN['Treecover']<(i+0.1))).dropna()
Mean.append(a.mean())
Min.append(a.min())
Max.append(a.max())
Median.append(a.median())
然后我绘制了一个散点图来可视化数据。
fig = plt.figure(figsize=(10, 7))
plt.scatter(Tree, Mean, s = 500)
plt.scatter(Tree, Median,color = 'red', s = 500)
plt.fill_between(Tree, Min, Max, alpha = 0.3)
我尝试通过如下方式绘制前 11 个箱线图:
fig = plt.figure(figsize=(20, 10))
i = 0
green_diamond = dict(markerfacecolor='w', marker='D')
plt.boxplot((Low_TC_data_NaN['RZSC'].where((Low_TC_data_NaN['Treecover']>=(i)) & (Low_TC_data_NaN['Treecover']<(i+0.1))).dropna(),
Low_TC_data_NaN['RZSC'].where((Low_TC_data_NaN['Treecover']>=(i+0.1)) & (Low_TC_data_NaN['Treecover']<(i+0.2))).dropna(),
Low_TC_data_NaN['RZSC'].where((Low_TC_data_NaN['Treecover']>=(i+0.2)) & (Low_TC_data_NaN['Treecover']<(i+0.3))).dropna(),
Low_TC_data_NaN['RZSC'].where((Low_TC_data_NaN['Treecover']>=(i+0.3)) & (Low_TC_data_NaN['Treecover']<(i+0.4))).dropna(),
Low_TC_data_NaN['RZSC'].where((Low_TC_data_NaN['Treecover']>=(i+0.4)) & (Low_TC_data_NaN['Treecover']<(i+0.5))).dropna(),
Low_TC_data_NaN['RZSC'].where((Low_TC_data_NaN['Treecover']>=(i+0.5)) & (Low_TC_data_NaN['Treecover']<(i+0.6))).dropna(),
Low_TC_data_NaN['RZSC'].where((Low_TC_data_NaN['Treecover']>=(i+0.6)) & (Low_TC_data_NaN['Treecover']<(i+0.7))).dropna(),
Low_TC_data_NaN['RZSC'].where((Low_TC_data_NaN['Treecover']>=(i+0.7)) & (Low_TC_data_NaN['Treecover']<(i+0.8))).dropna(),
Low_TC_data_NaN['RZSC'].where((Low_TC_data_NaN['Treecover']>=(i+0.8)) & (Low_TC_data_NaN['Treecover']<(i+0.9))).dropna(),
Low_TC_data_NaN['RZSC'].where((Low_TC_data_NaN['Treecover']>=(i+0.9)) & (Low_TC_data_NaN['Treecover']<(i+1))).dropna(),
Low_TC_data_NaN['RZSC'].where((Low_TC_data_NaN['Treecover']>=(i+1)) & (Low_TC_data_NaN['Treecover']<(i+1.1))).dropna()),
flierprops=green_diamond);
# and this code can go on to
# ....Low_TC_data_NaN['RZSC'].where((Low_TC_data_NaN['Treecover']>=(i+24.9)) & (Low_TC_data_NaN['Treecover']<(i+25))).dropna()
# For first 250 boxplots
上面X轴为0,0.1,0.2,0.3.......1.
如果可能的话,我想要一个包含 100 个这样的总集(甚至 1000 个总集)的箱线图。
我使用以下代码尝试了一次失败的尝试:
fig = plt.figure(figsize=(5, 10))
data = [(Low_TC_data_NaN['RZSC'].where((Low_TC_data_NaN['Treecover']>=(i)) & (Low_TC_data_NaN['Treecover']<(i+0.1))).dropna()) for i in np.arange(0,1,0.1)]
[plt.boxplot(np.array(data[i]).ravel()) for i in range(5)];
这里面的值好像也有点奇怪。并且不遵循第二个数字。
我想要所有数据步骤的箱线图。基本上,我希望第二个图形代码更短(简洁)以获得与图 2 相同的结果,并且可以在 x 轴上有大约 100 个值时使用。
我只是稍微调整了一下,而不是循环箱线图,而是循环数据本身。
代码看起来有点像这样:
data = [[Low_TC_data_NaN['RZSC'].where((Low_TC_data_NaN['Treecover']>=(i)) &
(Low_TC_data_NaN['Treecover']<(i+0.1))).dropna()] for i in np.arange(0,25,0.1)]
fig = plt.figure(figsize=(20, 10))
green_diamond = dict(markerfacecolor='w', marker='D')
plt.boxplot(np.array(data).squeeze(), flierprops=green_diamond);
plt.ylim(0,4000)
结果是这样的:
X-axis 似乎有 0-25 之间的值(总共 250 个箱形图)。