有没有办法让 matplotlib 自动格式化聚集柱形图

Is there a way to have matplotlib auto format clustered column graphs

当我增加数量或构建或工作负载时,使用我拥有的代码,我必须修改 bar_width 以使一切都适合。

我一直在寻找一种自动设置绘图格式的方法,但似乎找不到任何可行的方法。希望网上有人知道更好的方法。

import matplotlib.pyplot as plt
import numpy as np

workloads = (
    "Workload 1",
    "Workload 2",
    "Workload 3",
    "Workload 4",
    "Workload 5",
    "Workload 6",
)

# list of tuples
# tuple key:
#    (label, data values positional to workloads, data stdev positional to workloads)
data = [
    ("build 1", (133_289, 163_421, 180_473, 24_978, 47_761, 47_167), (9_093, 6_740, 4_085, 7, 44, 96)),
    ("build 2", (163_156, 201_082, 224_287, 25_000, 47_869, 47_285), (2_306, 1_119, 780, 2, 19, 30)),
    ("build 3", (121_760, 148_433, 163_835, 24_975, 47_594, 47_030), (3_232, 6_995, 5_660, 3, 96, 347)),
    ("build 4", (136_456, 163_810, 180_590, 24_991, 47_714, 47_262), (1_623, 2_147, 2_770, 5, 22, 6)),
    ("build 5", (150_551, 182_301, 199_579, 24_992, 47_648, 47_169), (2_216, 3_590, 5_386, 4, 43, 151)),
    ("build 6", (121_390, 146_764, 161_634, 24_981, 47_599, 47_056), (4_454, 6_107, 7_102, 4, 35, 155)),
    ("build 7", (136_456, 163_810, 180_590, 24_991, 47_714, 47_262), (1_623, 2_147, 2_770, 5, 22, 6)),
    ("build 8", (150_551, 182_301, 199_579, 24_992, 47_648, 47_169), (2_216, 3_590, 5_386, 4, 43, 151)),
    ("build 9", (121_390, 146_764, 161_634, 24_981, 47_599, 47_056), (4_454, 6_107, 7_102, 4, 35, 155)),
    ("build 10", (121_390, 146_764, 161_634, 24_981, 47_599, 47_056), (4_454, 6_107, 7_102, 4, 35, 155)),
]

# the x locations for the groups, specified by the number of items in workloads
ind = np.arange(len(workloads))

# the width of the bars
bar_width = 0.10

fig, ax = plt.subplots()

bars = []
for i, data_point in enumerate(data):
    my_label = data_point[0]
    my_data = data_point[1]
    my_data_stdev = data_point[2]

    bars.append(ax.bar(ind + bar_width * i, my_data, bar_width, yerr=my_data_stdev, label=my_label))

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel("Data value")
ax.set_title("Builds")
ax.set_xticks(ind)
ax.set_xticklabels(workloads)
ax.legend()

# rotate the x-axis labels 22.5 degrees so we can better see them
# this is for dates but seems to work for x-axis labels
fig.autofmt_xdate(rotation=22.5)

plt.show()

您可以引入一个变量 bar_total_width 来指示属于一个标签的一组条的宽度。然后,单个条的宽度将是总宽度除以每组中的条数。通过减去位置总宽度的一半,标签可以放在每组的中心。 (另请注意 ax.set_xticklabels() 有一个旋转参数。或者您可以使用 ax.tick_params(axis=x, rotation=...)。)

以下示例使用 0.9 作为总条宽,因此在组之间留有 0.1 的间隙。

import matplotlib.pyplot as plt
import numpy as np

workloads = ("Workload 1", "Workload 2", "Workload 3", "Workload 4", "Workload 5", "Workload 6",)
# tuple key: (label, data values positional to workloads, data stdev positional to workloads)
data = [("build 1", (133_289, 163_421, 180_473, 24_978, 47_761, 47_167), (9_093, 6_740, 4_085, 7, 44, 96)),
        ("build 2", (163_156, 201_082, 224_287, 25_000, 47_869, 47_285), (2_306, 1_119, 780, 2, 19, 30)),
        ("build 3", (121_760, 148_433, 163_835, 24_975, 47_594, 47_030), (3_232, 6_995, 5_660, 3, 96, 347)),
        ("build 4", (136_456, 163_810, 180_590, 24_991, 47_714, 47_262), (1_623, 2_147, 2_770, 5, 22, 6)),
        ("build 5", (150_551, 182_301, 199_579, 24_992, 47_648, 47_169), (2_216, 3_590, 5_386, 4, 43, 151)),
        ("build 6", (121_390, 146_764, 161_634, 24_981, 47_599, 47_056), (4_454, 6_107, 7_102, 4, 35, 155)),
        ("build 7", (136_456, 163_810, 180_590, 24_991, 47_714, 47_262), (1_623, 2_147, 2_770, 5, 22, 6)),
        ("build 8", (150_551, 182_301, 199_579, 24_992, 47_648, 47_169), (2_216, 3_590, 5_386, 4, 43, 151)),
        ("build 9", (121_390, 146_764, 161_634, 24_981, 47_599, 47_056), (4_454, 6_107, 7_102, 4, 35, 155)),
        ("build 10", (121_390, 146_764, 161_634, 24_981, 47_599, 47_056), (4_454, 6_107, 7_102, 4, 35, 155))]

# the x locations for the groups, specified by the number of items in workloads
ind = np.arange(len(workloads))

# the width of the bars
bar_total_width = 0.9
bar_width = bar_total_width / len(data)

fig, ax = plt.subplots()

for i, data_point in enumerate(data):
    my_label = data_point[0]
    my_data = data_point[1]
    my_data_stdev = data_point[2]
    bars_i = ax.bar(ind - bar_total_width/2 + bar_width * i, my_data, bar_width, yerr=my_data_stdev, label=my_label)

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel("Data value")
ax.set_title("Builds")
ax.set_xticks(ind)
ax.set_xticklabels(workloads, rotation=20)
ax.margins(x=0.01)  # less white space left and right
ax.legend()
plt.tight_layout()
plt.show()

所有这些都可以通过创建数据框和使用 pandas 绘图来简化很多。标签的总宽度和旋转是绘图函数的参数,条形位置和图例自动出现。

import matplotlib.pyplot as plt
import numpy as np

workloads = ("Workload 1", "Workload 2", "Workload 3", "Workload 4", "Workload 5", "Workload 6",)
# tuple key: (label, data values positional to workloads, data stdev positional to workloads)
data = [("build 1", (133_289, 163_421, 180_473, 24_978, 47_761, 47_167), (9_093, 6_740, 4_085, 7, 44, 96)),
        ("build 2", (163_156, 201_082, 224_287, 25_000, 47_869, 47_285), (2_306, 1_119, 780, 2, 19, 30)),
        ("build 3", (121_760, 148_433, 163_835, 24_975, 47_594, 47_030), (3_232, 6_995, 5_660, 3, 96, 347)),
        ("build 4", (136_456, 163_810, 180_590, 24_991, 47_714, 47_262), (1_623, 2_147, 2_770, 5, 22, 6)),
        ("build 5", (150_551, 182_301, 199_579, 24_992, 47_648, 47_169), (2_216, 3_590, 5_386, 4, 43, 151)),
        ("build 6", (121_390, 146_764, 161_634, 24_981, 47_599, 47_056), (4_454, 6_107, 7_102, 4, 35, 155)),
        ("build 7", (136_456, 163_810, 180_590, 24_991, 47_714, 47_262), (1_623, 2_147, 2_770, 5, 22, 6)),
        ("build 8", (150_551, 182_301, 199_579, 24_992, 47_648, 47_169), (2_216, 3_590, 5_386, 4, 43, 151)),
        ("build 9", (121_390, 146_764, 161_634, 24_981, 47_599, 47_056), (4_454, 6_107, 7_102, 4, 35, 155)),
        ("build 10", (121_390, 146_764, 161_634, 24_981, 47_599, 47_056), (4_454, 6_107, 7_102, 4, 35, 155))]

fig, ax = plt.subplots()

df_means = pd.DataFrame(index=workloads)
df_stdevs = pd.DataFrame(index=workloads)

for my_label, means, stdevs in data:
    df_means[my_label] = means
    df_stdevs[my_label] = stdevs
yerr = df_stdevs.values.T
df_means.plot.bar(yerr=yerr, rot=20, width=0.9, capsize=1, alpha=0.7, ax=ax)

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel("Data value")
ax.set_title("Builds")
ax.margins(x=0.01)  # less white space left and right
plt.tight_layout()
plt.show()

更多的自动化将通过 Seaborn 实现。 Seaborn 将根据原始数据计算均值和标准差,并一次性创建带有误差条和图例的完整条形图。 Seaborn 需要一个包含 3 列的长格式数据框:一列用于工作负载 ID,一列用于构建 ID,只有一列用于值。

对于名为 WorkloadBuildValue 的列,命令将是(假设多行具有相同的工作负载和构建 ID):

import seaborn as sns

sns.barplot(data=df, x='Workload', y='value', hue='Build')