从数据框创建多个箱线图
Create multiple boxplots from dataframe
我想根据数据框中的数据创建多个(在本例中为两个)箱线图
我有以下数据框:
Country Fund R^2 Style
0 Austria BG EMCore Convertibles Global CHF R T 0.739131 Allocation
1 Austria BG EMCore Convertibles Global R T 0.740917 Allocation
2 Austria BG Trend A T 0.738376 Fixed Income
3 Austria Banken Euro Bond-Mix A 0.71161 Fixed Income
4 Austria Banken KMU-Fonds T 0.778276 Allocation
5 Brazil Banken Nachhaltigkeitsfonds T 0.912808 Allocation
6 Brazil Banken Portfolio-Mix A 0.857019 Allocation
7 Brazil Banken Portfolio-Mix T 0.868856 Fixed Income
8 Brazil Banken Sachwerte-Fonds T 0.730626 Fixed Income
9 Brazil Banken Strategie Wachstum T 0.918684 Fixed Income
我想为每个国家/地区创建一个箱线图,按样式汇总并显示 R^2 的分布。
我正在考虑 groupby 操作,但不知何故我无法为每个国家/地区制作两张图表。
提前致谢
给你。代码中的描述。
=^..^=
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from io import StringIO
data = StringIO("""
Country R^2 Style
Austria 0.739131 Allocation
Austria 0.740917 Allocation
Austria 0.738376 Fixed_Income
Austria 0.71161 Fixed_Income
Austria 0.778276 Allocation
Brazil 0.912808 Allocation
Brazil 0.857019 Allocation
Brazil 0.868856 New_Style
Brazil 0.730626 Fixed_Income
Brazil 0.918684 Fixed_Income
Brazil 0.618684 New_Style
""")
# load data into data frame
df = pd.read_csv(data, sep=' ')
# group data by Country
grouped_data = df.groupby(['Country'])
# create list of grouped data frames
df_list = []
country_list = []
for item in list(grouped_data):
df_list.append(item[1])
country_list.append(item[0])
# plot box for each Country
for df in df_list:
country = df['Country'].unique()
df = df.drop(['Country'], axis=1)
df = df[['Style', 'R^2']]
columns_names = list(set(df['Style']))
# pivot rows into columns
df = df.assign(g = df.groupby('Style').cumcount()).pivot('g','Style','R^2')
# plot box
df.boxplot(column=colums_names)
plt.title(country[0])
plt.show()
输出:
使用 seaborn 来完成这类任务。这里有几个选项:
使用seaborn的箱线图
import seaborn as sns
sns.set()
# Note - the data is stored in a data frame df
sns.boxplot(x='Country', y='R^2', hue='Style', data=df)
或者,您可以使用 seaborn 的 FacetGrid。
g = sns.FacetGrid(df, col="Country", row="Style")
g = g.map(sns.boxplot, 'R^2', orient='v')
我自己想出了一些解决办法。
df= "This is the table from the original question"
uniquenames=df.Country.unique()
# create dictionary of the data with countries set as keys
diction={elem:pd.DataFrame for elem in uniquenames}
# fill dictionary with values
for key in diction.keys():
diction[key]=df[:][df.Country==key]
#plot the data
for i in diction.keys():
diction[i].boxplot(column="R^2",by="Style",
figsize=(15,6),patch_artist=True,fontsize=12)
plt.xticks(rotation=90)
plt.title(i,fontsize=12)
我想根据数据框中的数据创建多个(在本例中为两个)箱线图
我有以下数据框:
Country Fund R^2 Style
0 Austria BG EMCore Convertibles Global CHF R T 0.739131 Allocation
1 Austria BG EMCore Convertibles Global R T 0.740917 Allocation
2 Austria BG Trend A T 0.738376 Fixed Income
3 Austria Banken Euro Bond-Mix A 0.71161 Fixed Income
4 Austria Banken KMU-Fonds T 0.778276 Allocation
5 Brazil Banken Nachhaltigkeitsfonds T 0.912808 Allocation
6 Brazil Banken Portfolio-Mix A 0.857019 Allocation
7 Brazil Banken Portfolio-Mix T 0.868856 Fixed Income
8 Brazil Banken Sachwerte-Fonds T 0.730626 Fixed Income
9 Brazil Banken Strategie Wachstum T 0.918684 Fixed Income
我想为每个国家/地区创建一个箱线图,按样式汇总并显示 R^2 的分布。 我正在考虑 groupby 操作,但不知何故我无法为每个国家/地区制作两张图表。
提前致谢
给你。代码中的描述。
=^..^=
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from io import StringIO
data = StringIO("""
Country R^2 Style
Austria 0.739131 Allocation
Austria 0.740917 Allocation
Austria 0.738376 Fixed_Income
Austria 0.71161 Fixed_Income
Austria 0.778276 Allocation
Brazil 0.912808 Allocation
Brazil 0.857019 Allocation
Brazil 0.868856 New_Style
Brazil 0.730626 Fixed_Income
Brazil 0.918684 Fixed_Income
Brazil 0.618684 New_Style
""")
# load data into data frame
df = pd.read_csv(data, sep=' ')
# group data by Country
grouped_data = df.groupby(['Country'])
# create list of grouped data frames
df_list = []
country_list = []
for item in list(grouped_data):
df_list.append(item[1])
country_list.append(item[0])
# plot box for each Country
for df in df_list:
country = df['Country'].unique()
df = df.drop(['Country'], axis=1)
df = df[['Style', 'R^2']]
columns_names = list(set(df['Style']))
# pivot rows into columns
df = df.assign(g = df.groupby('Style').cumcount()).pivot('g','Style','R^2')
# plot box
df.boxplot(column=colums_names)
plt.title(country[0])
plt.show()
输出:
使用 seaborn 来完成这类任务。这里有几个选项:
使用seaborn的箱线图
import seaborn as sns
sns.set()
# Note - the data is stored in a data frame df
sns.boxplot(x='Country', y='R^2', hue='Style', data=df)
或者,您可以使用 seaborn 的 FacetGrid。
g = sns.FacetGrid(df, col="Country", row="Style")
g = g.map(sns.boxplot, 'R^2', orient='v')
我自己想出了一些解决办法。
df= "This is the table from the original question"
uniquenames=df.Country.unique()
# create dictionary of the data with countries set as keys
diction={elem:pd.DataFrame for elem in uniquenames}
# fill dictionary with values
for key in diction.keys():
diction[key]=df[:][df.Country==key]
#plot the data
for i in diction.keys():
diction[i].boxplot(column="R^2",by="Style",
figsize=(15,6),patch_artist=True,fontsize=12)
plt.xticks(rotation=90)
plt.title(i,fontsize=12)