如何使用 seaborn 防止 barplot 重叠?

How to prevent an overlap in barplot using seaborn?

month   cate_cnt1_members   cate_cnt2_members   cate_cnt3_members   cate_cnt4_members   cate_cnt5_members   cate_cnt6_members   cate_cnt7_members   cate_cnt8_members   cate_cnt9_members   cate_cnt10_members  cate_cnt11_members  cate_cnt12_members  cate_cnt13_members  cate_cnt14_members
201501  93.525692   5.989799    0.455098    0.027863    0.001548    0.000000    0.000000    0.000000    0.000000    0.000000    0.000000    0.000000    0.000000    0.0
201502  90.515995   8.396707    0.971026    0.107892    0.008380    0.000000    0.000000    0.000000    0.000000    0.000000    0.000000    0.000000    0.000000    0.0
201503  82.525162   14.066414   2.836065    0.505229    0.061750    0.005380    0.000000    0.000000    0.000000    0.000000    0.000000    0.000000    0.000000    0.0
201504  75.546295   18.279664   4.884050    1.102780    0.172282    0.013621    0.001199    0.000109    0.000000    0.000000    0.000000    0.000000    0.000000    0.0
201505  71.142107   20.954861   6.278794    1.401423    0.206386    0.015837    0.000593    0.000000    0.000000    0.000000    0.000000    0.000000    0.000000    0.0
201506  63.783161   23.386509   9.241094    2.914457    0.601408    0.067921    0.005178    0.000273    0.000000    0.000000    0.000000    0.000000    0.000000    0.0
201507  62.361179   23.364693   9.888232    3.445630    0.812055    0.116408    0.010563    0.001240    0.000000    0.000000    0.000000    0.000000    0.000000    0.0

上面显示了 pandas 数据框,它显示了不同类别的百分比,每个月都有所不同。我想使用 seaborn 来获得一个条形图,其条形图包含从第 2 列到末尾的 14 个类别成员的百分比。 这是我的代码:

flatui = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"]
f, ax = plt.subplots(figsize=(6, 15))
sns.barplot(x = df['month'], y = df['cate_cnt1_members'], label='cate_cnt1_members',   color=sns.color_palette("Set2", 10)[0])
sns.barplot(x = df['month'], y = df['cate_cnt2_members'], label='cate_cnt2_members',   color=sns.color_palette("Set2", 10)[1])
sns.barplot(x = df['month'], y = df['cate_cnt3_members'], label='cate_cnt3_members',   color=sns.color_palette("Set2", 10)[2])
sns.barplot(x = df['month'], y = df['cate_cnt4_members'], label='cate_cnt4_members',   color=sns.color_palette("Set2", 10)[3])
sns.barplot(x = df['month'], y = df['cate_cnt5_members'], label='cate_cnt5_members',   color=sns.color_palette("Set2", 10)[4])
sns.barplot(x = df['month'], y = df['cate_cnt6_members'], label='cate_cnt6_members',   color=sns.color_palette("Set2", 10)[5])
sns.barplot(x = df['month'], y = df['cate_cnt7_members'], label='cate_cnt7_members',   color=sns.color_palette("Set2", 10)[6])
sns.barplot(x = df['month'], y = df['cate_cnt8_members'], label='cate_cnt8_members',   color=sns.color_palette("Set2", 10)[7])
sns.barplot(x = df['month'], y = df['cate_cnt9_members'], label='cate_cnt9_members',   color=sns.color_palette("Set2", 10)[8])
sns.barplot(x = df['month'], y = df['cate_cnt10_members'], label='cate_cnt10_members', color=sns.color_palette("Set2", 10)[9])
sns.barplot(x = df['month'], y = df['cate_cnt11_members'], label='cate_cnt11_members', color=sns.color_palette("Paired")[0])
sns.barplot(x = df['month'], y = df['cate_cnt12_members'], label='cate_cnt12_members', color=sns.color_palette("Paired")[1])
sns.barplot(x = df['month'], y = df['cate_cnt13_members'], label='cate_cnt13_members', color=sns.color_palette("Paired")[4])
sns.barplot(x = df['month'], y = df['cate_cnt14_members'], label='cate_cnt14_members', color=sns.color_palette(flatui)[0])
plt.ylabel("percentage of category scope count")
plt.xlabel(" Month")
ax.legend(ncol=7, loc="topper middle", frameon=True)
sns.despine(left=True, bottom=True)

结果如下。但我不希望它们相互重叠。我希望 14 个组件的总和为 100 并完全填充 100。那么我该如何实现呢?

考虑将数据从宽变长,然后 运行 一个枢轴 table 作为堆叠条形图的来源:

from io import StringIO
import pandas as pd
from matplotlib import rc, pyplot as plt
import seaborn

data = """month,cate_cnt1_members,cate_cnt2_members,cate_cnt3_members,cate_cnt4_members,cate_cnt5_members,cate_cnt6_members,cate_cnt7_members,cate_cnt8_members,cate_cnt9_members,cate_cnt10_members,cate_cnt11_members,cate_cnt12_members,cate_cnt13_members,cate_cnt14_members
201501,93.525692,5.989799,0.455098,0.027863,0.001548,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
201502,90.515995,8.396707,0.971026,0.107892,0.008380,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
201503,82.525162,14.066414,2.836065,0.505229,0.061750,0.005380,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
201504,75.546295,18.279664,4.884050,1.102780,0.172282,0.013621,0.001199,0.000109,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
201505,71.142107,20.954861,6.278794,1.401423,0.206386,0.015837,0.000593,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
201506,63.783161,23.386509,9.241094,2.914457,0.601408,0.067921,0.005178,0.000273,0.000000,0.000000,0.000000,0.000000,0.000000,0.0"""

df = pd.read_csv(StringIO(data))

dfm = pd.melt(df, id_vars="month")

seaborn.set()

dfm.pivot_table(values="value", columns="variable", index="month", aggfunc='sum').plot.bar(stacked=True)
locs, labels = plt.xticks()
plt.legend(loc='upper center', ncol=7, frameon=True, shadow=False, prop={'size':8})
plt.setp(labels, rotation=0, rotation_mode="anchor", ha="center")
plt.show()

在 pandas 中有一个非常简单的方法可以做到这一点。首先,您必须将索引设置为月份,然后创建一个堆积条形图

df = df.set_index('month')
df.plot.bar(stacked=True)

在 seaborn 中执行此操作有点棘手。您必须计算每一行的累计总和,然后绘制

# set the index if you haven't
df = df.set_index('month')
df = df.cumsum(axis=1)

然后对您的原始代码进行一些小的调整。然后以相反的顺序绘制,因此 100% 条形图首先绘制。

flatui = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"]
f, ax = plt.subplots(figsize=(10, 15))
sns.barplot(x = df.index, y = df['cate_cnt14_members'], label='cate_cnt14_members',   color=sns.color_palette("Set2", 10)[0])
sns.barplot(x = df.index, y = df['cate_cnt13_members'], label='cate_cnt13_members',   color=sns.color_palette("Set2", 10)[1])
sns.barplot(x = df.index, y = df['cate_cnt12_members'], label='cate_cnt12_members',   color=sns.color_palette("Set2", 10)[2])
sns.barplot(x = df.index, y = df['cate_cnt11_members'], label='cate_cnt11_members',   color=sns.color_palette("Set2", 10)[3])
sns.barplot(x = df.index, y = df['cate_cnt10_members'], label='cate_cnt10_members',   color=sns.color_palette("Set2", 10)[4])
sns.barplot(x = df.index, y = df['cate_cnt9_members'], label='cate_cnt9_members',   color=sns.color_palette("Set2", 10)[5])
sns.barplot(x = df.index, y = df['cate_cnt8_members'], label='cate_cnt8_members',   color=sns.color_palette("Set2", 10)[6])
sns.barplot(x = df.index, y = df['cate_cnt7_members'], label='cate_cnt7_members',   color=sns.color_palette("Set2", 10)[7])
sns.barplot(x = df.index, y = df['cate_cnt6_members'], label='cate_cnt6_members',   color=sns.color_palette("Set2", 10)[8])
sns.barplot(x = df.index, y = df['cate_cnt5_members'], label='cate_cnt5_members', color=sns.color_palette("Set2", 10)[9])
sns.barplot(x = df.index, y = df['cate_cnt4_members'], label='cate_cnt4_members', color=sns.color_palette("Paired")[0])
sns.barplot(x = df.index, y = df['cate_cnt3_members'], label='cate_cnt3_members', color=sns.color_palette("Paired")[1])
sns.barplot(x = df.index, y = df['cate_cnt2_members'], label='cate_cnt2_members', color=sns.color_palette("Paired")[4])
sns.barplot(x = df.index, y = df['cate_cnt1_members'], label='cate_cnt1_members', color=sns.color_palette(flatui)[0])
plt.ylabel("percentage of category scope count")
plt.xlabel(" Month")
ax.legend(ncol=7, loc="upper center", frameon=True)
sns.despine(left=True, bottom=True)