Python plotly: 设置箱形图中箱子的宽度与该类别中的行数成比例
Python plotly: Setting width of the box in a box plot proportional to the number of rows in that category
我在 Python 中有这样一个数据框:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.figure_factory as ff
np.random.seed(1234)
df = pd.DataFrame(np.random.randn(10, 4),
columns=['Col1', 'Col2', 'Col3', 'Col4'])
df['id'] = range(1, len(df.index)+1)
df
# making a long dataframe
# sorting the dataframe by value (i.e. randomly)
long_df = df.melt(id_vars = ['id'],
var_name = 'type',
value_name = 'value').sort_values(by='value')
long_df['id'] = range(1, len(long_df.index)+1)
long_df.head()
long_df = long_df.drop(long_df[long_df.id < 10].index)
long_df.head()
long_df['type'].value_counts().sort_index()
我使用这些命令创建了一个箱线图:
box_plot= ff.create_facet_grid(
long_df,
x = 'type',
y = 'value',
trace_type = 'box',
color_name = 'type',
color_is_cat = True,
width = 1000,
ggplot2 = False,
showlegend = False,
)
box_plot.show()
我有什么办法可以将框宽度设置为与该类别中的行数成比例吗? (类似于 R 的方式)。我希望框的宽度按以下顺序排列(从瘦到胖):col2(n=5)--> col4(n=7) --> col1(n=9) --> col3(n=10)
可以用matplotlib来完成:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
np.random.seed(1234)
df = pd.DataFrame(np.random.randn(10, 4),
columns=['Col1', 'Col2', 'Col3', 'Col4'])
df['id'] = range(1, len(df.index)+1)
# making a long dataframe
# sorting the dataframe by value (i.e. randomly)
long_df = df.melt(id_vars=['id'],
var_name='type',
value_name='value').sort_values(by='value')
long_df['id'] = range(1, len(long_df.index)+1)
long_df = long_df.drop(long_df[long_df.id < 10].index)
long_df['type'].value_counts().sort_index()
cols = ['red', 'green', 'blue', 'orange']
plt.style.use('ggplot')
fig, ax = plt.subplots()
for i, col in enumerate(sorted(long_df['type'].unique(), key=lambda c: long_df[long_df['type'] == c].shape[0])):
col_df = long_df[long_df['type'] == col]
bp = plt.boxplot(col_df['value'],
positions=[i*120],
widths=len(col_df['value'])*10,
patch_artist=True,
labels=[col]
)
for element in ['boxes', 'whiskers', 'fliers', 'means', 'medians', 'caps']:
plt.setp(bp[element], color=f'xkcd:dark {cols[i]}')
for patch in bp['boxes']:
patch.set(facecolor=f'xkcd:light {cols[i]}')
plt.xlabel('type')
plt.show()
或者,如果您更喜欢接近 R 的东西:
from plotnine import ggplot, aes, geom_boxplot
import numpy as np
import pandas as pd
np.random.seed(1234)
df = pd.DataFrame(np.random.randn(10, 4),
columns=['Col1', 'Col2', 'Col3', 'Col4'])
df['id'] = range(1, len(df.index)+1)
# making a long dataframe
# sorting the dataframe by value (i.e. randomly)
long_df = df.melt(id_vars=['id'],
var_name='type',
value_name='value').sort_values(by='value')
long_df['id'] = range(1, len(long_df.index)+1)
long_df = long_df.drop(long_df[long_df.id < 10].index)
type_list = long_df['type'].value_counts(ascending=True).index.tolist()
long_df['type'] = pd.Categorical(long_df['type'], categories=type_list)
p = ggplot(long_df) + aes(x='type', y='value', fill='type') + geom_boxplot(varwidth = True, alpha=0.8, show_legend=False)
print(p)
我在 Python 中有这样一个数据框:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.figure_factory as ff
np.random.seed(1234)
df = pd.DataFrame(np.random.randn(10, 4),
columns=['Col1', 'Col2', 'Col3', 'Col4'])
df['id'] = range(1, len(df.index)+1)
df
# making a long dataframe
# sorting the dataframe by value (i.e. randomly)
long_df = df.melt(id_vars = ['id'],
var_name = 'type',
value_name = 'value').sort_values(by='value')
long_df['id'] = range(1, len(long_df.index)+1)
long_df.head()
long_df = long_df.drop(long_df[long_df.id < 10].index)
long_df.head()
long_df['type'].value_counts().sort_index()
我使用这些命令创建了一个箱线图:
box_plot= ff.create_facet_grid(
long_df,
x = 'type',
y = 'value',
trace_type = 'box',
color_name = 'type',
color_is_cat = True,
width = 1000,
ggplot2 = False,
showlegend = False,
)
box_plot.show()
可以用matplotlib来完成:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
np.random.seed(1234)
df = pd.DataFrame(np.random.randn(10, 4),
columns=['Col1', 'Col2', 'Col3', 'Col4'])
df['id'] = range(1, len(df.index)+1)
# making a long dataframe
# sorting the dataframe by value (i.e. randomly)
long_df = df.melt(id_vars=['id'],
var_name='type',
value_name='value').sort_values(by='value')
long_df['id'] = range(1, len(long_df.index)+1)
long_df = long_df.drop(long_df[long_df.id < 10].index)
long_df['type'].value_counts().sort_index()
cols = ['red', 'green', 'blue', 'orange']
plt.style.use('ggplot')
fig, ax = plt.subplots()
for i, col in enumerate(sorted(long_df['type'].unique(), key=lambda c: long_df[long_df['type'] == c].shape[0])):
col_df = long_df[long_df['type'] == col]
bp = plt.boxplot(col_df['value'],
positions=[i*120],
widths=len(col_df['value'])*10,
patch_artist=True,
labels=[col]
)
for element in ['boxes', 'whiskers', 'fliers', 'means', 'medians', 'caps']:
plt.setp(bp[element], color=f'xkcd:dark {cols[i]}')
for patch in bp['boxes']:
patch.set(facecolor=f'xkcd:light {cols[i]}')
plt.xlabel('type')
plt.show()
或者,如果您更喜欢接近 R 的东西:
from plotnine import ggplot, aes, geom_boxplot
import numpy as np
import pandas as pd
np.random.seed(1234)
df = pd.DataFrame(np.random.randn(10, 4),
columns=['Col1', 'Col2', 'Col3', 'Col4'])
df['id'] = range(1, len(df.index)+1)
# making a long dataframe
# sorting the dataframe by value (i.e. randomly)
long_df = df.melt(id_vars=['id'],
var_name='type',
value_name='value').sort_values(by='value')
long_df['id'] = range(1, len(long_df.index)+1)
long_df = long_df.drop(long_df[long_df.id < 10].index)
type_list = long_df['type'].value_counts(ascending=True).index.tolist()
long_df['type'] = pd.Categorical(long_df['type'], categories=type_list)
p = ggplot(long_df) + aes(x='type', y='value', fill='type') + geom_boxplot(varwidth = True, alpha=0.8, show_legend=False)
print(p)