如何在 plotly 中创建按组着色的注释条
How to create annotated bars colored by group in plotly
Matplotlib
是 Python 的跨平台数据可视化和图形绘图库,可高度自定义。
Matplotlib
具有定制化的巨大优势。下面是带有 matploltlib
的自定义直方图
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.cm as cm
import matplotlib.ticker as ticker
from matplotlib.patches import Rectangle
from matplotlib.gridspec import GridSpec
from matplotlib.patches import Polygon
from matplotlib.patches import ConnectionPatch
def customized_Histogram(df, j):
"""
Generate Histogram
Parameters:
****************
df:
pandas dataframe
j(str):
column name in str
****************
Generates Advanced Histogram
"""
# Colours for different percentiles
perc_25_colour = 'gold'
perc_50_colour = 'mediumaquamarine'
perc_75_colour = 'deepskyblue'
perc_95_colour = 'peachpuff'
# Plot the Histogram from the random data
fig, ax = plt.subplots(figsize=(14,8))
# '''
# counts = numpy.ndarray of count of data ponts for each bin/column in the histogram
# bins = numpy.ndarray of bin edge/range values
# patches = a list of Patch objects.
# each Patch object contains a Rectnagle object.
# e.g. Rectangle(xy=(-2.51953, 0), width=0.501013, height=3, angle=0)
# '''
counts, bins, patches = ax.hist(df[j], facecolor=perc_50_colour, edgecolor='gray')
# Set the ticks to be at the edges of the bins.
ax.set_xticks(bins.round(2))
plt.xticks(rotation=70)
# Set the graph title and axes titles
plt.title(f'Distribution of {j}', fontsize=20)
plt.ylabel('Count', fontsize=15)
plt.xlabel(j, fontsize=15)
# Change the colors of bars at the edges
twentyfifth, seventyfifth, ninetyfifth = np.percentile(df[j], [25, 75, 95])
for patch, leftside, rightside in zip(patches, bins[:-1], bins[1:]):
if rightside < twentyfifth:
patch.set_facecolor(perc_25_colour)
elif leftside > ninetyfifth:
patch.set_facecolor(perc_95_colour)
elif leftside > seventyfifth:
patch.set_facecolor(perc_75_colour)
# Calculate bar centre to display the count of data points and %
bin_x_centers = 0.5 * np.diff(bins) + bins[:-1]
bin_y_centers = ax.get_yticks()[1] * 0.25
# Display the the count of data points and % for each bar in histogram
for i in range(len(bins)-1):
bin_label = "{0:,}".format(counts[i]) + " ({0:,.2f}%)".format((counts[i]/counts.sum())*100)
plt.text(bin_x_centers[i],
bin_y_centers,
bin_label,
rotation=90,
rotation_mode='anchor')
# Annotation for bar values
ax.annotate('Each bar shows count and percentage of total',
xy=(.80,.30),
xycoords='figure fraction',
horizontalalignment='center',
verticalalignment='bottom',
fontsize=10,
bbox=dict(boxstyle="round",
fc="white"),
rotation=-90)
#create legend
handles = [Rectangle((0,0),1,1,color=c,ec="k") for c in [
perc_25_colour,
perc_50_colour,
perc_75_colour,
perc_95_colour
]
]
labels= ["0-25 Percentile","25-50 Percentile", "50-75 Percentile", ">95 Percentile"]
plt.legend(handles, labels, bbox_to_anchor=(0.5, 0., 0.80, 0.99))
# fig.savefig("filename.jpg",dpi=150, bbox_inches='tight')
plt.show()
import seaborn as sns
tips = sns.load_dataset("tips")
customized_Histogram(tips, "total_bill")
自定义直方图
如何使用Plotly
绘制上面的图?
- plotly 可以用同样的方法构建自定义图形。从根本上说,它理解 API 和功能(如 matplotlib 解决方案)
- matplotlib
hist()
和 numpy histogram()
基本相同。 return counts 和 bins https://numpy.org/doc/stable/reference/generated/numpy.histogram.html
- 然后使用矢量化逻辑而不是过程逻辑来定义条形的颜色
- plotly 不直接具有条形边缘上刻度的概念。已将 bar 的 x 值移动到 bin 的中间,然后从 bin edges
定义 xaxis
def plotly_histogram(df_in, col):
col = "total_bill"
df = pd.DataFrame(np.histogram(df_in[col])).T.rename(columns={0: "count", 1: "edge"})
# center bars between edges...
df["x"] = df["edge"] + df["edge"].diff().mode()[0] / 2
cmap = {
"0-25 Percentile": "gold",
"25-50 Percentile": "mediumaquamarine",
"50-75 Percentile": "deepskyblue",
">95 Percentile": "peachpuff",
}
df["color"] = np.select(
[
df["edge"].shift(s) < np.percentile(df_in[col], p)
for p, s in zip([25, 50, 95], [-1, 1, 0])
],
list(cmap.keys())[0:3],
list(cmap.keys())[3],
)
df["text"] = df["count"].apply(lambda c: f"{c} ({c/len(df_in)*100:.2f})")
fig = px.bar(
df,
x="x",
y="count",
color="color",
color_discrete_map=cmap,
hover_data={"x": False, "edge": ":.2f"},
).update_layout(
xaxis={
"tickmode": "array",
"tickvals": df["edge"],
"title": col,
"tickangle": 285,
"tickformat": ".2f",
}
)
for r in df.dropna().loc[:, ["x", "text"]].iterrows():
fig.add_annotation(
x=r[1]["x"],
y=0,
text=r[1]["text"],
showarrow=False,
textangle=270,
yanchor="bottom",
yshift=10,
)
return fig
import seaborn as sns
import pandas as pd
import numpy as np
import plotly.express as px
plotly_histogram(sns.load_dataset("tips"), "total_bill")
Matplotlib
是 Python 的跨平台数据可视化和图形绘图库,可高度自定义。
Matplotlib
具有定制化的巨大优势。下面是带有 matploltlib
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.cm as cm
import matplotlib.ticker as ticker
from matplotlib.patches import Rectangle
from matplotlib.gridspec import GridSpec
from matplotlib.patches import Polygon
from matplotlib.patches import ConnectionPatch
def customized_Histogram(df, j):
"""
Generate Histogram
Parameters:
****************
df:
pandas dataframe
j(str):
column name in str
****************
Generates Advanced Histogram
"""
# Colours for different percentiles
perc_25_colour = 'gold'
perc_50_colour = 'mediumaquamarine'
perc_75_colour = 'deepskyblue'
perc_95_colour = 'peachpuff'
# Plot the Histogram from the random data
fig, ax = plt.subplots(figsize=(14,8))
# '''
# counts = numpy.ndarray of count of data ponts for each bin/column in the histogram
# bins = numpy.ndarray of bin edge/range values
# patches = a list of Patch objects.
# each Patch object contains a Rectnagle object.
# e.g. Rectangle(xy=(-2.51953, 0), width=0.501013, height=3, angle=0)
# '''
counts, bins, patches = ax.hist(df[j], facecolor=perc_50_colour, edgecolor='gray')
# Set the ticks to be at the edges of the bins.
ax.set_xticks(bins.round(2))
plt.xticks(rotation=70)
# Set the graph title and axes titles
plt.title(f'Distribution of {j}', fontsize=20)
plt.ylabel('Count', fontsize=15)
plt.xlabel(j, fontsize=15)
# Change the colors of bars at the edges
twentyfifth, seventyfifth, ninetyfifth = np.percentile(df[j], [25, 75, 95])
for patch, leftside, rightside in zip(patches, bins[:-1], bins[1:]):
if rightside < twentyfifth:
patch.set_facecolor(perc_25_colour)
elif leftside > ninetyfifth:
patch.set_facecolor(perc_95_colour)
elif leftside > seventyfifth:
patch.set_facecolor(perc_75_colour)
# Calculate bar centre to display the count of data points and %
bin_x_centers = 0.5 * np.diff(bins) + bins[:-1]
bin_y_centers = ax.get_yticks()[1] * 0.25
# Display the the count of data points and % for each bar in histogram
for i in range(len(bins)-1):
bin_label = "{0:,}".format(counts[i]) + " ({0:,.2f}%)".format((counts[i]/counts.sum())*100)
plt.text(bin_x_centers[i],
bin_y_centers,
bin_label,
rotation=90,
rotation_mode='anchor')
# Annotation for bar values
ax.annotate('Each bar shows count and percentage of total',
xy=(.80,.30),
xycoords='figure fraction',
horizontalalignment='center',
verticalalignment='bottom',
fontsize=10,
bbox=dict(boxstyle="round",
fc="white"),
rotation=-90)
#create legend
handles = [Rectangle((0,0),1,1,color=c,ec="k") for c in [
perc_25_colour,
perc_50_colour,
perc_75_colour,
perc_95_colour
]
]
labels= ["0-25 Percentile","25-50 Percentile", "50-75 Percentile", ">95 Percentile"]
plt.legend(handles, labels, bbox_to_anchor=(0.5, 0., 0.80, 0.99))
# fig.savefig("filename.jpg",dpi=150, bbox_inches='tight')
plt.show()
import seaborn as sns
tips = sns.load_dataset("tips")
customized_Histogram(tips, "total_bill")
自定义直方图
如何使用Plotly
绘制上面的图?
- plotly 可以用同样的方法构建自定义图形。从根本上说,它理解 API 和功能(如 matplotlib 解决方案)
- matplotlib
hist()
和 numpyhistogram()
基本相同。 return counts 和 bins https://numpy.org/doc/stable/reference/generated/numpy.histogram.html - 然后使用矢量化逻辑而不是过程逻辑来定义条形的颜色
- plotly 不直接具有条形边缘上刻度的概念。已将 bar 的 x 值移动到 bin 的中间,然后从 bin edges 定义 xaxis
def plotly_histogram(df_in, col):
col = "total_bill"
df = pd.DataFrame(np.histogram(df_in[col])).T.rename(columns={0: "count", 1: "edge"})
# center bars between edges...
df["x"] = df["edge"] + df["edge"].diff().mode()[0] / 2
cmap = {
"0-25 Percentile": "gold",
"25-50 Percentile": "mediumaquamarine",
"50-75 Percentile": "deepskyblue",
">95 Percentile": "peachpuff",
}
df["color"] = np.select(
[
df["edge"].shift(s) < np.percentile(df_in[col], p)
for p, s in zip([25, 50, 95], [-1, 1, 0])
],
list(cmap.keys())[0:3],
list(cmap.keys())[3],
)
df["text"] = df["count"].apply(lambda c: f"{c} ({c/len(df_in)*100:.2f})")
fig = px.bar(
df,
x="x",
y="count",
color="color",
color_discrete_map=cmap,
hover_data={"x": False, "edge": ":.2f"},
).update_layout(
xaxis={
"tickmode": "array",
"tickvals": df["edge"],
"title": col,
"tickangle": 285,
"tickformat": ".2f",
}
)
for r in df.dropna().loc[:, ["x", "text"]].iterrows():
fig.add_annotation(
x=r[1]["x"],
y=0,
text=r[1]["text"],
showarrow=False,
textangle=270,
yanchor="bottom",
yshift=10,
)
return fig
import seaborn as sns
import pandas as pd
import numpy as np
import plotly.express as px
plotly_histogram(sns.load_dataset("tips"), "total_bill")