Python seaborn 的两侧分组条形图

Two sided grouped barplots with Python seaborn

我正在尝试绘制一个类似于解释的人口金字塔的双面图 and 。问题是我有要组合在一起的分类变量 (male/female):

import pandas as pd
import seaborn as sns

# data
data = {'species': ['X', 'X', 'Y', 'Y', 'Z', 'Z', 'X', 'X', 'Y', 'Y', 'Z', 'Z'],
        'sex': ['male', 'female', 'male', 'female', 'male', 'female', 'male', 'female', 'male', 'female', 'male', 'female'], 
        'mass (g)': [4000, 3500, 3800, 3200, 5500, 4900, 2500, 2100, 2400, 2000, 4200, 3800],
        'age': ['adult', 'adult', 'adult', 'adult', 'adult', 'adult', 'juvenile', 'juvenile', 'juvenile', 'juvenile', 'juvenile', 'juvenile']}
df = pd.DataFrame(data)

# convert juvenile mass to negative
df.loc[df.age.eq('juvenile'), 'mass (g)'] = df['mass (g)'].mul(-1)

# plot
sns.set_theme(style="whitegrid")
fig, ax = plt.subplots(figsize=(10,5))
sns.barplot(data=df, x='mass (g)', y='species', hue='sex', ci=False, orient='horizontal', dodge=True)
ax.yaxis.tick_right()
ax.yaxis.set_label_position("right")
plt.show()

下图就是我的目标。不同的颜色条代表 male/female 性别。不同的物种 X、Y、Z 属于不同的分类组。图右侧的条形图显示每个物种的成虫质量。

我用红色勾勒出左侧的条形图,以显示每个物种的幼体数量。我如何绘制这个?我在 seaborn 文档或 SO 上找不到任何有用的东西。

尝试这样的事情:

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# data
data = {'species': ['X', 'X', 'Y', 'Y', 'Z', 'Z', 'X', 'X', 'Y', 'Y', 'Z', 'Z'],
        'sex': ['male', 'female', 'male', 'female', 'male', 'female', 'male', 'female', 'male', 'female', 'male', 'female'], 
        'mass (g)': [4000, 3500, 3800, 3200, 5500, 4900, 2500, 2100, 2400, 2000, 4200, 3800],
        'age': ['adult', 'adult', 'adult', 'adult', 'adult', 'adult', 'juvenile', 'juvenile', 'juvenile', 'juvenile', 'juvenile', 'juvenile']}
df = pd.DataFrame(data)

# convert juvenile mass to negative
df.loc[df.age.eq('juvenile'), 'mass (g)'] = df['mass (g)'].mul(-1)

# plot
sns.set_theme(style="whitegrid")
fig, ax = plt.subplots(figsize=(10,5))
df_reshape = df.set_index(['species','sex','age']).unstack(['age','sex'])['mass (g)']
df_reshape.loc[:, 'adult'].plot.barh(ax=ax)
df_reshape.loc[:, 'juvenile'].plot.barh(legend=False, ax=ax)
plt.show()

输出:


import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# data
data = {'species': ['X', 'X', 'Y', 'Y', 'Z', 'Z', 'X', 'X', 'Y', 'Y', 'Z', 'Z'],
        'sex': ['male', 'female', 'male', 'female', 'male', 'female', 'male', 'female', 'male', 'female', 'male', 'female'], 
        'mass (g)': [4000, 3500, 3800, 3200, 5500, 4900, 2500, 2100, 2400, 2000, 4200, 3800],
        'age': ['adult', 'adult', 'adult', 'adult', 'adult', 'adult', 'juvenile', 'juvenile', 'juvenile', 'juvenile', 'juvenile', 'juvenile']}
df = pd.DataFrame(data)

# convert juvenile mass to negative
df.loc[df.age.eq('juvenile'), 'mass (g)'] = df['mass (g)'].mul(-1)

# plot
sns.set_theme(style="whitegrid")
fig, ax = plt.subplots(figsize=(10,5))
df_reshape = df.set_index(['species','sex','age']).unstack(['age','sex'])['mass (g)']
df_reshape.loc[:, ['adult']].plot.barh(ax=ax, edgecolor='k')
df_reshape.loc[:, ['juvenile']].plot.barh(ax=ax, label='Juvenile', color=['navy','red'], alpha=.6, edgecolor='k', hatch='/')
plt.show()

输出:

如果您混合使用正值和负值,默认情况下,seaborn 的条形图会将它们平均化。

您可以背对背绘制两个条形图,然后反转左边的一个:

from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd

data = {'species': ['X', 'X', 'Y', 'Y', 'Z', 'Z', 'X', 'X', 'Y', 'Y', 'Z', 'Z'],
        'sex': ['male', 'female', 'male', 'female', 'male', 'female', 'male', 'female', 'male', 'female', 'male', 'female'],
        'mass (g)': [4000, 3500, 3800, 3200, 5500, 4900, 2500, 2100, 2400, 2000, 4200, 3800],
        'age': ['adult', 'adult', 'adult', 'adult', 'adult', 'adult', 'juvenile', 'juvenile', 'juvenile', 'juvenile', 'juvenile', 'juvenile']}
df = pd.DataFrame(data)
df['sex'] = pd.Categorical(df['sex'])  # make hue column categorical, forcing a fixed order

sns.set_theme(style='whitegrid')
fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(10, 5), sharey=True, gridspec_kw={'wspace': 0})


# draw adult subplot at the right
sns.barplot(data=df[df['age'] == 'adult'], x='mass (g)', y='species', hue='sex',
            ci=False, orient='horizontal', dodge=True, ax=ax2)
ax2.yaxis.set_label_position('right')
ax2.tick_params(axis='y', labelright=True, right=True)
ax2.set_title('  '+'adult', loc='left')
ax2.legend_.remove()  # remove the legend; the legend will be in ax1

# draw juvenile subplot at the left
sns.barplot(data=df[df['age'] == 'juvenile'], x='mass (g)', y='species', hue='sex',
            ci=False, orient='horizontal', dodge=True, ax=ax1)

# optionally use the same scale left and right
xmax = max(ax1.get_xlim()[1], ax2.get_xlim()[1])
ax1.set_xlim(xmax=xmax)
ax2.set_xlim(xmax=xmax)

ax1.invert_xaxis()  # reverse the direction
ax1.tick_params(labelleft=False, left=False)
ax1.set_ylabel('')
ax1.set_title('juvenile'+'  ', loc='right')

plt.tight_layout()
plt.show()

seaborn 的条形图的一个有趣的特点是它还会对给定的数据框的值进行平均,每个人都有一行(并计算置信区间)。

我刚刚使用 `pivot 正确调整了数据

import pandas as pd
import seaborn as sns

# data
data = {'species': ['X', 'X', 'Y', 'Y', 'Z', 'Z', 'X', 'X', 'Y', 'Y', 'Z', 'Z'],
        'sex': ['male', 'female', 'male', 'female', 'male', 'female', 'male', 'female', 'male', 'female', 'male', 'female'], 
        'mass (g)': [4000, 3500, 3800, 3200, 5500, 4900, 2500, 2100, 2400, 2000, 4200, 3800],
        'age': ['adult', 'adult', 'adult', 'adult', 'adult', 'adult', 'juvenile', 'juvenile', 'juvenile', 'juvenile', 'juvenile', 'juvenile']}
df = pd.DataFrame(data)

# convert juvenile mass to negative
df.loc[df.age.eq('juvenile'), 'mass (g)'] = df['mass (g)'].mul(-1)

# pivot data
df=df.pivot(columns=['age'], index=['species', 'sex'], values=['mass (g)']).reset_index()
df = df.set_index(['species', 'sex'])['mass (g)'].reset_index()

# plot
sns.set_theme(style="whitegrid")
fig, ax = plt.subplots(figsize=(10,5))
sns.barplot(data=df, x='adult', y='species', hue='sex', ci=False, orient='horizontal', dodge=True)
sns.barplot(data=df, x='juvenile', y='species', hue='sex', ci=False, orient='horizontal', dodge=True)