Plotly:如何处理箱形图中类别之间的不均匀差距?
Plotly: How to handle uneven gaps between categories in a box plot?
我正在尝试使用较大数据集的子集生成箱线图。当我显示情节时,数据中存在奇怪的差距。有没有办法将每个图集中在正确的标签上。还有,我可以去掉图例中多余的标签吗?
fig = go.Figure()
melted_data = melted_data.sort_values(['model', 'alpha'])
for model, alpha in zip(combos['model'].to_list(), combos['alpha'].to_list()):
data = melted_data[(melted_data.model == model) & (melted_data.alpha == alpha)]
fig.add_trace(go.Box(
y= data['value'],
x = data['model'],
marker_color=colors[alpha],
name = alpha,
boxmean=True,
))
fig.update_layout(
showlegend=True,
boxmode='group', # group together boxes of the different traces for each value of x
boxgap = .1)
fig.show()
更新
这是重现问题的代码:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly
colors = {'A':plotly.colors.qualitative.Plotly[0],
'B':plotly.colors.qualitative.Plotly[1],
'C':plotly.colors.qualitative.Plotly[2],
'D':plotly.colors.qualitative.Plotly[3],
'E':plotly.colors.qualitative.Plotly[4],}
models = ['modelA', 'modelA', 'modelA', 'modelA', 'modelA', 'modelB', 'modelB', 'modelC', 'modelC', 'modelB', ]
samples = ['A', 'B', 'C', 'D', 'E', 'A', 'B', 'B', 'D', 'C']
score_cols = ['score_{}'.format(x) for x in range(10)]
scores = [(np.random.normal(mu, sd, 10).tolist()) for mu, sd in zip((np.random.normal(.90, .06, 10)), [.06]*10)]
data = dict(zip(score_cols, scores))
data['model'] = models
data['sample'] = samples
df = pd.DataFrame(data)
melted_data = pd.melt(df, id_vars =['model', 'sample'], value_vars=score_cols)
fig = go.Figure()
for model, sample in zip(models, samples):
data = melted_data[(melted_data['model'] == model) & (melted_data['sample'] == sample)]
fig.add_trace(go.Box(
y= data['value'],
x = data['model'],
marker_color=colors[sample],
name = sample,
boxmean=True,
))
fig.update_layout(
showlegend=True,
boxmode='group', # group together boxes of the different traces for each value of x
boxgap = .1)
fig.show()
我不太明白为什么你的go.Figure
会变成这样。但是,如果您将数据从宽变长并释放 px.bar
,您将获得更短、更清晰的代码,并且可以说是更好的视觉效果。我们稍后可以讨论更多细节,但你会在这个情节之后找到一个完整的片段:
完整代码:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly
import plotly.express as px
colors = {'A':plotly.colors.qualitative.Plotly[0],
'B':plotly.colors.qualitative.Plotly[1],
'C':plotly.colors.qualitative.Plotly[2],
'D':plotly.colors.qualitative.Plotly[3],
'E':plotly.colors.qualitative.Plotly[4],}
models = ['modelA', 'modelA', 'modelA', 'modelA', 'modelA', 'modelB', 'modelB', 'modelC', 'modelC', 'modelB', ]
samples = ['A', 'B', 'C', 'D', 'E', 'A', 'B', 'B', 'D', 'C']
score_cols = ['score_{}'.format(x) for x in range(10)]
scores = [(np.random.normal(mu, sd, 10).tolist()) for mu, sd in zip((np.random.normal(.90, .06, 10)), [.06]*10)]
data = dict(zip(score_cols, scores))
data['model'] = models
data['sample'] = samples
df = pd.DataFrame(data)
df_long = pd.wide_to_long(df, stubnames='score',
i=['model', 'sample'], j='type',
sep='_', suffix='\w+').reset_index()
df_long
fig = px.box(df_long, x='model', y="score", color ='sample')
fig.show()
我正在尝试使用较大数据集的子集生成箱线图。当我显示情节时,数据中存在奇怪的差距。有没有办法将每个图集中在正确的标签上。还有,我可以去掉图例中多余的标签吗?
fig = go.Figure()
melted_data = melted_data.sort_values(['model', 'alpha'])
for model, alpha in zip(combos['model'].to_list(), combos['alpha'].to_list()):
data = melted_data[(melted_data.model == model) & (melted_data.alpha == alpha)]
fig.add_trace(go.Box(
y= data['value'],
x = data['model'],
marker_color=colors[alpha],
name = alpha,
boxmean=True,
))
fig.update_layout(
showlegend=True,
boxmode='group', # group together boxes of the different traces for each value of x
boxgap = .1)
fig.show()
更新
这是重现问题的代码:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly
colors = {'A':plotly.colors.qualitative.Plotly[0],
'B':plotly.colors.qualitative.Plotly[1],
'C':plotly.colors.qualitative.Plotly[2],
'D':plotly.colors.qualitative.Plotly[3],
'E':plotly.colors.qualitative.Plotly[4],}
models = ['modelA', 'modelA', 'modelA', 'modelA', 'modelA', 'modelB', 'modelB', 'modelC', 'modelC', 'modelB', ]
samples = ['A', 'B', 'C', 'D', 'E', 'A', 'B', 'B', 'D', 'C']
score_cols = ['score_{}'.format(x) for x in range(10)]
scores = [(np.random.normal(mu, sd, 10).tolist()) for mu, sd in zip((np.random.normal(.90, .06, 10)), [.06]*10)]
data = dict(zip(score_cols, scores))
data['model'] = models
data['sample'] = samples
df = pd.DataFrame(data)
melted_data = pd.melt(df, id_vars =['model', 'sample'], value_vars=score_cols)
fig = go.Figure()
for model, sample in zip(models, samples):
data = melted_data[(melted_data['model'] == model) & (melted_data['sample'] == sample)]
fig.add_trace(go.Box(
y= data['value'],
x = data['model'],
marker_color=colors[sample],
name = sample,
boxmean=True,
))
fig.update_layout(
showlegend=True,
boxmode='group', # group together boxes of the different traces for each value of x
boxgap = .1)
fig.show()
我不太明白为什么你的go.Figure
会变成这样。但是,如果您将数据从宽变长并释放 px.bar
,您将获得更短、更清晰的代码,并且可以说是更好的视觉效果。我们稍后可以讨论更多细节,但你会在这个情节之后找到一个完整的片段:
完整代码:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly
import plotly.express as px
colors = {'A':plotly.colors.qualitative.Plotly[0],
'B':plotly.colors.qualitative.Plotly[1],
'C':plotly.colors.qualitative.Plotly[2],
'D':plotly.colors.qualitative.Plotly[3],
'E':plotly.colors.qualitative.Plotly[4],}
models = ['modelA', 'modelA', 'modelA', 'modelA', 'modelA', 'modelB', 'modelB', 'modelC', 'modelC', 'modelB', ]
samples = ['A', 'B', 'C', 'D', 'E', 'A', 'B', 'B', 'D', 'C']
score_cols = ['score_{}'.format(x) for x in range(10)]
scores = [(np.random.normal(mu, sd, 10).tolist()) for mu, sd in zip((np.random.normal(.90, .06, 10)), [.06]*10)]
data = dict(zip(score_cols, scores))
data['model'] = models
data['sample'] = samples
df = pd.DataFrame(data)
df_long = pd.wide_to_long(df, stubnames='score',
i=['model', 'sample'], j='type',
sep='_', suffix='\w+').reset_index()
df_long
fig = px.box(df_long, x='model', y="score", color ='sample')
fig.show()