使用 pandas 的堆叠条形图
Stacking bar plot using pandas
我想以条形图的形式表示我的数据,如我的预期输出所示。
time,date,category
0,2002-05-01,2
1,2002-05-02,0
2,2002-05-03,0
3,2002-05-04,0
4,2002-05-05,0
5,2002-05-06,0
6,2002-05-07,0
7,2002-05-08,2
8,2002-05-09,2
9,2002-05-10,0
10,2002-05-11,2
11,2002-05-12,0
12,2002-05-13,0
13,2002-05-14,2
14,2002-05-15,2
15,2002-05-16,2
16,2002-05-17,2
17,2002-05-18,2
18,2002-05-19,0
19,2002-05-20,0
20,2002-05-21,1
21,2002-05-22,2
22,2002-05-23,0
23,2002-05-24,1
24,2002-05-25,0
25,2002-05-26,0
26,2002-05-27,0
27,2002-05-28,0
28,2002-05-29,1
29,2002-05-30,0
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
df = pd.read_csv('df.csv')
daily_category = df[['date','category']]
daily_category['weekday'] = pd.to_datetime(daily_category['date']).dt.day_name()
daily_category_plot = daily_category[['weekday','category']]
daily_category_plot[['category']].groupby('weekday').count().plot(kind='bar', legend=None)
plt.show()
但是,我收到以下错误
回溯(最后一次调用):
文件“day_plot.py”,第 10 行,位于
daily_category_plot[['category']].groupby('weekday').count().plot(kind='bar', legend=None)
文件“/home/..../.local/lib/python3.6/site-packages/pandas/core/frame.py”,第 6525 行,在 groupby 中
dropna=dropna,
文件“/home/..../.local/lib/python3.6/site-packages/pandas/core/groupby/groupby.py”,第 533 行,在 init 中
dropna=self.dropna,
文件“/home/..../.local/lib/python3.6/site-packages/pandas/core/groupby/grouper.py”,第 786 行,在 get_grouper
提高 KeyError(gpr)
键错误:'weekday'
********** 下面是我手动提取数据的进一步示例 returns 几乎是预期的输出,只是日期表示为数字而不是工作日名称。 ***********
Day,category1,category2,category3
Sunday,0,0,4
Monday,0,0,4
Tuesday,1,1,2
Wednesday,1,4,0
Thursday,0,2,3
Friday,1,1,2
Saturday,0,2,2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.read_csv('df.csv')
ax = df.plot.bar(stacked=True, color=['green', 'red', 'blue'])
ax.set_xticklabels(labels=df.index, rotation=70, rotation_mode="anchor", ha="right")
ax.set_xlabel('')
ax.set_ylabel('Number of days')
plt.show()
测试输出
更新代码生成奇怪的情节
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
df = pd.read_csv('df.csv')
daily_category = df[['time','date','category']]
daily_category['weekday'] = pd.to_datetime(daily_category['date']).dt.day_name()
ans = (daily_category.groupby(['weekday', 'category'])
.size()
.reset_index(name='sum')
.pivot(index='weekday', columns='category', values='sum')
)
ans.plot.bar(stacked=True)
plt.show()
更新输出
import pandas as pd
import matplotlib.pyplot as plt
d = """0,2002-05-01,2 1,2002-05-02,0 2,2002-05-03,0 3,2002-05-04,0 4,2002-05-05,0 5,2002-05-06,0 6,2002-05-07,0 7,2002-05-08,2 8,2002-05-09,2 9,2002-05-10,0 10,2002-05-11,2 11,2002-05-12,0 12,2002-05-13,0 13,2002-05-14,2 14,2002-05-15,2 15,2002-05-16,2 16,2002-05-17,2 17,2002-05-18,2 18,2002-05-19,0 19,2002-05-20,0 20,2002-05-21,1 21,2002-05-22,2 22,2002-05-23,0 23,2002-05-24,1 24,2002-05-25,0 25,2002-05-26,0 26,2002-05-27,0 27,2002-05-28,0 28,2002-05-29,1 29,2002-05-30,0"""
df = pd.DataFrame([v.split(',') for v in d.split(' ')], columns=['time', 'date', 'category'])
df.time, df.category = df.time.astype(int), df.category.astype(int)
data = df.copy()
data['weekday'] = pd.to_datetime(data['date']).dt.day_name()
data.drop(columns=['time', 'date'], inplace=True)
weekdays = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
categories = sorted(list(set(df.category)))
counts = pd.DataFrame(0, index=weekdays, columns=categories)
for weekday, category in zip(data.weekday, data.category):
counts.loc[weekday, category] += 1
counts.plot.bar(stacked=True);
此解决方案对列使用 groupby
,并使用 pivot
转换返回的 Dataframe。这可以由 plot.bar()
绘制,但标签错误。因此索引被更改。
我确实复制并通过了你的代码,并通过
获得了一个 DataFrame
import pandas as pd
from io import StringIO
t = """time,date,category
0,2002-05-01,2
..."""
df = pd.read_csv(StringIO(t))
df['weekday'] = df.date.apply(lambda x: pd.to_datetime(x).weekday())
为了检查周三柱的预期输出,我使用了过滤器选项。
>>>df[df['weekday']==2]
time date category weekday
0 0 2002-05-01 2 2
7 7 2002-05-08 2 2
14 14 2002-05-15 2 2
21 21 2002-05-22 2 2
28 28 2002-05-29 1 2
所以我想在星期三只看类别 1 (1/5) 和类别 2 (4/5)。
ans = (df.groupby(["weekday", "category"])
.size()
.reset_index(name="sum")
.pivot(index='weekday', columns='category', values='sum')
)
ans.index = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
ans.plot.bar(stacked=True)
我想以条形图的形式表示我的数据,如我的预期输出所示。
time,date,category
0,2002-05-01,2
1,2002-05-02,0
2,2002-05-03,0
3,2002-05-04,0
4,2002-05-05,0
5,2002-05-06,0
6,2002-05-07,0
7,2002-05-08,2
8,2002-05-09,2
9,2002-05-10,0
10,2002-05-11,2
11,2002-05-12,0
12,2002-05-13,0
13,2002-05-14,2
14,2002-05-15,2
15,2002-05-16,2
16,2002-05-17,2
17,2002-05-18,2
18,2002-05-19,0
19,2002-05-20,0
20,2002-05-21,1
21,2002-05-22,2
22,2002-05-23,0
23,2002-05-24,1
24,2002-05-25,0
25,2002-05-26,0
26,2002-05-27,0
27,2002-05-28,0
28,2002-05-29,1
29,2002-05-30,0
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
df = pd.read_csv('df.csv')
daily_category = df[['date','category']]
daily_category['weekday'] = pd.to_datetime(daily_category['date']).dt.day_name()
daily_category_plot = daily_category[['weekday','category']]
daily_category_plot[['category']].groupby('weekday').count().plot(kind='bar', legend=None)
plt.show()
但是,我收到以下错误
回溯(最后一次调用): 文件“day_plot.py”,第 10 行,位于 daily_category_plot[['category']].groupby('weekday').count().plot(kind='bar', legend=None) 文件“/home/..../.local/lib/python3.6/site-packages/pandas/core/frame.py”,第 6525 行,在 groupby 中 dropna=dropna, 文件“/home/..../.local/lib/python3.6/site-packages/pandas/core/groupby/groupby.py”,第 533 行,在 init 中 dropna=self.dropna, 文件“/home/..../.local/lib/python3.6/site-packages/pandas/core/groupby/grouper.py”,第 786 行,在 get_grouper 提高 KeyError(gpr) 键错误:'weekday'
********** 下面是我手动提取数据的进一步示例 returns 几乎是预期的输出,只是日期表示为数字而不是工作日名称。 ***********
Day,category1,category2,category3
Sunday,0,0,4
Monday,0,0,4
Tuesday,1,1,2
Wednesday,1,4,0
Thursday,0,2,3
Friday,1,1,2
Saturday,0,2,2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.read_csv('df.csv')
ax = df.plot.bar(stacked=True, color=['green', 'red', 'blue'])
ax.set_xticklabels(labels=df.index, rotation=70, rotation_mode="anchor", ha="right")
ax.set_xlabel('')
ax.set_ylabel('Number of days')
plt.show()
测试输出
更新代码生成奇怪的情节
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
df = pd.read_csv('df.csv')
daily_category = df[['time','date','category']]
daily_category['weekday'] = pd.to_datetime(daily_category['date']).dt.day_name()
ans = (daily_category.groupby(['weekday', 'category'])
.size()
.reset_index(name='sum')
.pivot(index='weekday', columns='category', values='sum')
)
ans.plot.bar(stacked=True)
plt.show()
更新输出
import pandas as pd
import matplotlib.pyplot as plt
d = """0,2002-05-01,2 1,2002-05-02,0 2,2002-05-03,0 3,2002-05-04,0 4,2002-05-05,0 5,2002-05-06,0 6,2002-05-07,0 7,2002-05-08,2 8,2002-05-09,2 9,2002-05-10,0 10,2002-05-11,2 11,2002-05-12,0 12,2002-05-13,0 13,2002-05-14,2 14,2002-05-15,2 15,2002-05-16,2 16,2002-05-17,2 17,2002-05-18,2 18,2002-05-19,0 19,2002-05-20,0 20,2002-05-21,1 21,2002-05-22,2 22,2002-05-23,0 23,2002-05-24,1 24,2002-05-25,0 25,2002-05-26,0 26,2002-05-27,0 27,2002-05-28,0 28,2002-05-29,1 29,2002-05-30,0"""
df = pd.DataFrame([v.split(',') for v in d.split(' ')], columns=['time', 'date', 'category'])
df.time, df.category = df.time.astype(int), df.category.astype(int)
data = df.copy()
data['weekday'] = pd.to_datetime(data['date']).dt.day_name()
data.drop(columns=['time', 'date'], inplace=True)
weekdays = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
categories = sorted(list(set(df.category)))
counts = pd.DataFrame(0, index=weekdays, columns=categories)
for weekday, category in zip(data.weekday, data.category):
counts.loc[weekday, category] += 1
counts.plot.bar(stacked=True);
此解决方案对列使用 groupby
,并使用 pivot
转换返回的 Dataframe。这可以由 plot.bar()
绘制,但标签错误。因此索引被更改。
我确实复制并通过了你的代码,并通过
获得了一个 DataFrameimport pandas as pd
from io import StringIO
t = """time,date,category
0,2002-05-01,2
..."""
df = pd.read_csv(StringIO(t))
df['weekday'] = df.date.apply(lambda x: pd.to_datetime(x).weekday())
为了检查周三柱的预期输出,我使用了过滤器选项。
>>>df[df['weekday']==2]
time date category weekday
0 0 2002-05-01 2 2
7 7 2002-05-08 2 2
14 14 2002-05-15 2 2
21 21 2002-05-22 2 2
28 28 2002-05-29 1 2
所以我想在星期三只看类别 1 (1/5) 和类别 2 (4/5)。
ans = (df.groupby(["weekday", "category"])
.size()
.reset_index(name="sum")
.pivot(index='weekday', columns='category', values='sum')
)
ans.index = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
ans.plot.bar(stacked=True)