如何使用 seaborn 防止 barplot 重叠?
How to prevent an overlap in barplot using seaborn?
month cate_cnt1_members cate_cnt2_members cate_cnt3_members cate_cnt4_members cate_cnt5_members cate_cnt6_members cate_cnt7_members cate_cnt8_members cate_cnt9_members cate_cnt10_members cate_cnt11_members cate_cnt12_members cate_cnt13_members cate_cnt14_members
201501 93.525692 5.989799 0.455098 0.027863 0.001548 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0
201502 90.515995 8.396707 0.971026 0.107892 0.008380 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0
201503 82.525162 14.066414 2.836065 0.505229 0.061750 0.005380 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0
201504 75.546295 18.279664 4.884050 1.102780 0.172282 0.013621 0.001199 0.000109 0.000000 0.000000 0.000000 0.000000 0.000000 0.0
201505 71.142107 20.954861 6.278794 1.401423 0.206386 0.015837 0.000593 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0
201506 63.783161 23.386509 9.241094 2.914457 0.601408 0.067921 0.005178 0.000273 0.000000 0.000000 0.000000 0.000000 0.000000 0.0
201507 62.361179 23.364693 9.888232 3.445630 0.812055 0.116408 0.010563 0.001240 0.000000 0.000000 0.000000 0.000000 0.000000 0.0
上面显示了 pandas 数据框,它显示了不同类别的百分比,每个月都有所不同。我想使用 seaborn 来获得一个条形图,其条形图包含从第 2 列到末尾的 14 个类别成员的百分比。
这是我的代码:
flatui = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"]
f, ax = plt.subplots(figsize=(6, 15))
sns.barplot(x = df['month'], y = df['cate_cnt1_members'], label='cate_cnt1_members', color=sns.color_palette("Set2", 10)[0])
sns.barplot(x = df['month'], y = df['cate_cnt2_members'], label='cate_cnt2_members', color=sns.color_palette("Set2", 10)[1])
sns.barplot(x = df['month'], y = df['cate_cnt3_members'], label='cate_cnt3_members', color=sns.color_palette("Set2", 10)[2])
sns.barplot(x = df['month'], y = df['cate_cnt4_members'], label='cate_cnt4_members', color=sns.color_palette("Set2", 10)[3])
sns.barplot(x = df['month'], y = df['cate_cnt5_members'], label='cate_cnt5_members', color=sns.color_palette("Set2", 10)[4])
sns.barplot(x = df['month'], y = df['cate_cnt6_members'], label='cate_cnt6_members', color=sns.color_palette("Set2", 10)[5])
sns.barplot(x = df['month'], y = df['cate_cnt7_members'], label='cate_cnt7_members', color=sns.color_palette("Set2", 10)[6])
sns.barplot(x = df['month'], y = df['cate_cnt8_members'], label='cate_cnt8_members', color=sns.color_palette("Set2", 10)[7])
sns.barplot(x = df['month'], y = df['cate_cnt9_members'], label='cate_cnt9_members', color=sns.color_palette("Set2", 10)[8])
sns.barplot(x = df['month'], y = df['cate_cnt10_members'], label='cate_cnt10_members', color=sns.color_palette("Set2", 10)[9])
sns.barplot(x = df['month'], y = df['cate_cnt11_members'], label='cate_cnt11_members', color=sns.color_palette("Paired")[0])
sns.barplot(x = df['month'], y = df['cate_cnt12_members'], label='cate_cnt12_members', color=sns.color_palette("Paired")[1])
sns.barplot(x = df['month'], y = df['cate_cnt13_members'], label='cate_cnt13_members', color=sns.color_palette("Paired")[4])
sns.barplot(x = df['month'], y = df['cate_cnt14_members'], label='cate_cnt14_members', color=sns.color_palette(flatui)[0])
plt.ylabel("percentage of category scope count")
plt.xlabel(" Month")
ax.legend(ncol=7, loc="topper middle", frameon=True)
sns.despine(left=True, bottom=True)
结果如下。但我不希望它们相互重叠。我希望 14 个组件的总和为 100 并完全填充 100。那么我该如何实现呢?
考虑将数据从宽变长,然后 运行 一个枢轴 table 作为堆叠条形图的来源:
from io import StringIO
import pandas as pd
from matplotlib import rc, pyplot as plt
import seaborn
data = """month,cate_cnt1_members,cate_cnt2_members,cate_cnt3_members,cate_cnt4_members,cate_cnt5_members,cate_cnt6_members,cate_cnt7_members,cate_cnt8_members,cate_cnt9_members,cate_cnt10_members,cate_cnt11_members,cate_cnt12_members,cate_cnt13_members,cate_cnt14_members
201501,93.525692,5.989799,0.455098,0.027863,0.001548,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
201502,90.515995,8.396707,0.971026,0.107892,0.008380,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
201503,82.525162,14.066414,2.836065,0.505229,0.061750,0.005380,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
201504,75.546295,18.279664,4.884050,1.102780,0.172282,0.013621,0.001199,0.000109,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
201505,71.142107,20.954861,6.278794,1.401423,0.206386,0.015837,0.000593,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
201506,63.783161,23.386509,9.241094,2.914457,0.601408,0.067921,0.005178,0.000273,0.000000,0.000000,0.000000,0.000000,0.000000,0.0"""
df = pd.read_csv(StringIO(data))
dfm = pd.melt(df, id_vars="month")
seaborn.set()
dfm.pivot_table(values="value", columns="variable", index="month", aggfunc='sum').plot.bar(stacked=True)
locs, labels = plt.xticks()
plt.legend(loc='upper center', ncol=7, frameon=True, shadow=False, prop={'size':8})
plt.setp(labels, rotation=0, rotation_mode="anchor", ha="center")
plt.show()
在 pandas 中有一个非常简单的方法可以做到这一点。首先,您必须将索引设置为月份,然后创建一个堆积条形图
df = df.set_index('month')
df.plot.bar(stacked=True)
在 seaborn 中执行此操作有点棘手。您必须计算每一行的累计总和,然后绘制
# set the index if you haven't
df = df.set_index('month')
df = df.cumsum(axis=1)
然后对您的原始代码进行一些小的调整。然后以相反的顺序绘制,因此 100% 条形图首先绘制。
flatui = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"]
f, ax = plt.subplots(figsize=(10, 15))
sns.barplot(x = df.index, y = df['cate_cnt14_members'], label='cate_cnt14_members', color=sns.color_palette("Set2", 10)[0])
sns.barplot(x = df.index, y = df['cate_cnt13_members'], label='cate_cnt13_members', color=sns.color_palette("Set2", 10)[1])
sns.barplot(x = df.index, y = df['cate_cnt12_members'], label='cate_cnt12_members', color=sns.color_palette("Set2", 10)[2])
sns.barplot(x = df.index, y = df['cate_cnt11_members'], label='cate_cnt11_members', color=sns.color_palette("Set2", 10)[3])
sns.barplot(x = df.index, y = df['cate_cnt10_members'], label='cate_cnt10_members', color=sns.color_palette("Set2", 10)[4])
sns.barplot(x = df.index, y = df['cate_cnt9_members'], label='cate_cnt9_members', color=sns.color_palette("Set2", 10)[5])
sns.barplot(x = df.index, y = df['cate_cnt8_members'], label='cate_cnt8_members', color=sns.color_palette("Set2", 10)[6])
sns.barplot(x = df.index, y = df['cate_cnt7_members'], label='cate_cnt7_members', color=sns.color_palette("Set2", 10)[7])
sns.barplot(x = df.index, y = df['cate_cnt6_members'], label='cate_cnt6_members', color=sns.color_palette("Set2", 10)[8])
sns.barplot(x = df.index, y = df['cate_cnt5_members'], label='cate_cnt5_members', color=sns.color_palette("Set2", 10)[9])
sns.barplot(x = df.index, y = df['cate_cnt4_members'], label='cate_cnt4_members', color=sns.color_palette("Paired")[0])
sns.barplot(x = df.index, y = df['cate_cnt3_members'], label='cate_cnt3_members', color=sns.color_palette("Paired")[1])
sns.barplot(x = df.index, y = df['cate_cnt2_members'], label='cate_cnt2_members', color=sns.color_palette("Paired")[4])
sns.barplot(x = df.index, y = df['cate_cnt1_members'], label='cate_cnt1_members', color=sns.color_palette(flatui)[0])
plt.ylabel("percentage of category scope count")
plt.xlabel(" Month")
ax.legend(ncol=7, loc="upper center", frameon=True)
sns.despine(left=True, bottom=True)
month cate_cnt1_members cate_cnt2_members cate_cnt3_members cate_cnt4_members cate_cnt5_members cate_cnt6_members cate_cnt7_members cate_cnt8_members cate_cnt9_members cate_cnt10_members cate_cnt11_members cate_cnt12_members cate_cnt13_members cate_cnt14_members
201501 93.525692 5.989799 0.455098 0.027863 0.001548 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0
201502 90.515995 8.396707 0.971026 0.107892 0.008380 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0
201503 82.525162 14.066414 2.836065 0.505229 0.061750 0.005380 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0
201504 75.546295 18.279664 4.884050 1.102780 0.172282 0.013621 0.001199 0.000109 0.000000 0.000000 0.000000 0.000000 0.000000 0.0
201505 71.142107 20.954861 6.278794 1.401423 0.206386 0.015837 0.000593 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0
201506 63.783161 23.386509 9.241094 2.914457 0.601408 0.067921 0.005178 0.000273 0.000000 0.000000 0.000000 0.000000 0.000000 0.0
201507 62.361179 23.364693 9.888232 3.445630 0.812055 0.116408 0.010563 0.001240 0.000000 0.000000 0.000000 0.000000 0.000000 0.0
上面显示了 pandas 数据框,它显示了不同类别的百分比,每个月都有所不同。我想使用 seaborn 来获得一个条形图,其条形图包含从第 2 列到末尾的 14 个类别成员的百分比。 这是我的代码:
flatui = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"]
f, ax = plt.subplots(figsize=(6, 15))
sns.barplot(x = df['month'], y = df['cate_cnt1_members'], label='cate_cnt1_members', color=sns.color_palette("Set2", 10)[0])
sns.barplot(x = df['month'], y = df['cate_cnt2_members'], label='cate_cnt2_members', color=sns.color_palette("Set2", 10)[1])
sns.barplot(x = df['month'], y = df['cate_cnt3_members'], label='cate_cnt3_members', color=sns.color_palette("Set2", 10)[2])
sns.barplot(x = df['month'], y = df['cate_cnt4_members'], label='cate_cnt4_members', color=sns.color_palette("Set2", 10)[3])
sns.barplot(x = df['month'], y = df['cate_cnt5_members'], label='cate_cnt5_members', color=sns.color_palette("Set2", 10)[4])
sns.barplot(x = df['month'], y = df['cate_cnt6_members'], label='cate_cnt6_members', color=sns.color_palette("Set2", 10)[5])
sns.barplot(x = df['month'], y = df['cate_cnt7_members'], label='cate_cnt7_members', color=sns.color_palette("Set2", 10)[6])
sns.barplot(x = df['month'], y = df['cate_cnt8_members'], label='cate_cnt8_members', color=sns.color_palette("Set2", 10)[7])
sns.barplot(x = df['month'], y = df['cate_cnt9_members'], label='cate_cnt9_members', color=sns.color_palette("Set2", 10)[8])
sns.barplot(x = df['month'], y = df['cate_cnt10_members'], label='cate_cnt10_members', color=sns.color_palette("Set2", 10)[9])
sns.barplot(x = df['month'], y = df['cate_cnt11_members'], label='cate_cnt11_members', color=sns.color_palette("Paired")[0])
sns.barplot(x = df['month'], y = df['cate_cnt12_members'], label='cate_cnt12_members', color=sns.color_palette("Paired")[1])
sns.barplot(x = df['month'], y = df['cate_cnt13_members'], label='cate_cnt13_members', color=sns.color_palette("Paired")[4])
sns.barplot(x = df['month'], y = df['cate_cnt14_members'], label='cate_cnt14_members', color=sns.color_palette(flatui)[0])
plt.ylabel("percentage of category scope count")
plt.xlabel(" Month")
ax.legend(ncol=7, loc="topper middle", frameon=True)
sns.despine(left=True, bottom=True)
结果如下。但我不希望它们相互重叠。我希望 14 个组件的总和为 100 并完全填充 100。那么我该如何实现呢?
考虑将数据从宽变长,然后 运行 一个枢轴 table 作为堆叠条形图的来源:
from io import StringIO
import pandas as pd
from matplotlib import rc, pyplot as plt
import seaborn
data = """month,cate_cnt1_members,cate_cnt2_members,cate_cnt3_members,cate_cnt4_members,cate_cnt5_members,cate_cnt6_members,cate_cnt7_members,cate_cnt8_members,cate_cnt9_members,cate_cnt10_members,cate_cnt11_members,cate_cnt12_members,cate_cnt13_members,cate_cnt14_members
201501,93.525692,5.989799,0.455098,0.027863,0.001548,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
201502,90.515995,8.396707,0.971026,0.107892,0.008380,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
201503,82.525162,14.066414,2.836065,0.505229,0.061750,0.005380,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
201504,75.546295,18.279664,4.884050,1.102780,0.172282,0.013621,0.001199,0.000109,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
201505,71.142107,20.954861,6.278794,1.401423,0.206386,0.015837,0.000593,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
201506,63.783161,23.386509,9.241094,2.914457,0.601408,0.067921,0.005178,0.000273,0.000000,0.000000,0.000000,0.000000,0.000000,0.0"""
df = pd.read_csv(StringIO(data))
dfm = pd.melt(df, id_vars="month")
seaborn.set()
dfm.pivot_table(values="value", columns="variable", index="month", aggfunc='sum').plot.bar(stacked=True)
locs, labels = plt.xticks()
plt.legend(loc='upper center', ncol=7, frameon=True, shadow=False, prop={'size':8})
plt.setp(labels, rotation=0, rotation_mode="anchor", ha="center")
plt.show()
在 pandas 中有一个非常简单的方法可以做到这一点。首先,您必须将索引设置为月份,然后创建一个堆积条形图
df = df.set_index('month')
df.plot.bar(stacked=True)
在 seaborn 中执行此操作有点棘手。您必须计算每一行的累计总和,然后绘制
# set the index if you haven't
df = df.set_index('month')
df = df.cumsum(axis=1)
然后对您的原始代码进行一些小的调整。然后以相反的顺序绘制,因此 100% 条形图首先绘制。
flatui = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"]
f, ax = plt.subplots(figsize=(10, 15))
sns.barplot(x = df.index, y = df['cate_cnt14_members'], label='cate_cnt14_members', color=sns.color_palette("Set2", 10)[0])
sns.barplot(x = df.index, y = df['cate_cnt13_members'], label='cate_cnt13_members', color=sns.color_palette("Set2", 10)[1])
sns.barplot(x = df.index, y = df['cate_cnt12_members'], label='cate_cnt12_members', color=sns.color_palette("Set2", 10)[2])
sns.barplot(x = df.index, y = df['cate_cnt11_members'], label='cate_cnt11_members', color=sns.color_palette("Set2", 10)[3])
sns.barplot(x = df.index, y = df['cate_cnt10_members'], label='cate_cnt10_members', color=sns.color_palette("Set2", 10)[4])
sns.barplot(x = df.index, y = df['cate_cnt9_members'], label='cate_cnt9_members', color=sns.color_palette("Set2", 10)[5])
sns.barplot(x = df.index, y = df['cate_cnt8_members'], label='cate_cnt8_members', color=sns.color_palette("Set2", 10)[6])
sns.barplot(x = df.index, y = df['cate_cnt7_members'], label='cate_cnt7_members', color=sns.color_palette("Set2", 10)[7])
sns.barplot(x = df.index, y = df['cate_cnt6_members'], label='cate_cnt6_members', color=sns.color_palette("Set2", 10)[8])
sns.barplot(x = df.index, y = df['cate_cnt5_members'], label='cate_cnt5_members', color=sns.color_palette("Set2", 10)[9])
sns.barplot(x = df.index, y = df['cate_cnt4_members'], label='cate_cnt4_members', color=sns.color_palette("Paired")[0])
sns.barplot(x = df.index, y = df['cate_cnt3_members'], label='cate_cnt3_members', color=sns.color_palette("Paired")[1])
sns.barplot(x = df.index, y = df['cate_cnt2_members'], label='cate_cnt2_members', color=sns.color_palette("Paired")[4])
sns.barplot(x = df.index, y = df['cate_cnt1_members'], label='cate_cnt1_members', color=sns.color_palette(flatui)[0])
plt.ylabel("percentage of category scope count")
plt.xlabel(" Month")
ax.legend(ncol=7, loc="upper center", frameon=True)
sns.despine(left=True, bottom=True)