Python / Matplotlib -- 按年份的日期直方图
Python / Matplotlib -- Histogram of Dates by Day of Year
我有一个跨越数年(百年)的日期列表。我想制作一个有 366 个桶的直方图,一年中的每一天一个,x 轴以清晰的方式标记,让我可以看到哪个日期是哪个(我期待 2 月 29 日下降,例如)。
我制作了以下直方图,但易于阅读的 X 轴日期标签会很棒。下面的代码看起来很麻烦,但得到了我想要的(没有 X 轴标签):
from datetime import date, datetime, timedelta
from collections import Counter
import pylab
def plot_data(data):
"""data is a list of dicts that contain a field "date" with a datetime."""
def get_day(d):
return d.strftime("%B %d") # e.g. January 01
days = []
n = 366
start_date = date(2020, 1, 1) # pick a leap year
for i in range(n):
d = start_date + timedelta(days=i)
days.append(get_day(d))
counts = Counter(get_day(d['date']) for d in data)
Y = [counts.get(d) for d in days]
X = list(range(len(days)))
pylab.bar(X, Y)
pylab.xlim([0, n])
pylab.title("Dates day of year")
pylab.xlabel("Day of Year (0-366)")
pylab.ylabel("Count")
pylab.savefig("Figure 1.png")
任何有助于缩短此时间并使 x 轴日期更灵活和清晰的帮助将不胜感激!
更新
我已将以下想法合并到 following gist 中,它产生的输出如下所示:
尝试检查此代码:
# import section
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as md
import numpy as np
from datetime import date
from itertools import product
# generate a dataframe like yours
date = [date(2020, m, d).strftime("%B %d") for m, d in product(range(1, 13, 1), range(1, 29, 1))]
value = np.abs(np.random.randn(len(date)))
data = pd.DataFrame({'date': date,
'value': value})
data.set_index('date', inplace = True)
# convert index from str to date
data.index = pd.to_datetime(data.index, format = '%B %d')
# plot
fig, ax = plt.subplots(1, 1, figsize = (16, 8))
ax.bar(data.index,
data['value'])
# formatting xaxis
ax.xaxis.set_major_locator(md.DayLocator(interval = 5))
ax.xaxis.set_major_formatter(md.DateFormatter('%B %d'))
plt.setp(ax.xaxis.get_majorticklabels(), rotation = 90)
ax.set_xlim([data.index[0], data.index[-1]])
plt.show()
这给了我这个情节:
我将数据帧的索引从字符串转换为日期,然后通过 ax.xaxis.set_major_locator
和 ax.xaxis.set_major_formatter
方法应用了我想要的 xaxis 格式。
为了画图我用了matplotlib
,但是把这个方法翻译成pylab
.
应该不难
编辑
如果您想要单独刻度的天数和月数,您可以添加辅助轴(选中此 example),如以下代码所示:
# import section
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as md
import numpy as np
from datetime import date
from itertools import product
from mpl_toolkits.axes_grid1 import host_subplot
import mpl_toolkits.axisartist as AA
# generate a dataframe like yours
date = [date(2020, m, d).strftime("%B %d") for m, d in product(range(1, 13, 1), range(1, 29, 1))]
value = np.abs(np.random.randn(len(date)))
data = pd.DataFrame({'date': date,
'value': value})
data.set_index('date', inplace = True)
# convert index from str to date
data.index = pd.to_datetime(data.index, format = '%B %d')
# prepare days and months axes
fig = plt.figure(figsize = (16, 8))
days = host_subplot(111, axes_class = AA.Axes, figure = fig)
plt.subplots_adjust(bottom = 0.1)
months = days.twiny()
# position months axis
offset = -20
new_fixed_axis = months.get_grid_helper().new_fixed_axis
months.axis['bottom'] = new_fixed_axis(loc = 'bottom',
axes = months,
offset = (0, offset))
months.axis['bottom'].toggle(all = True)
#plot
days.bar(data.index, data['value'])
# formatting days axis
days.xaxis.set_major_locator(md.DayLocator(interval = 10))
days.xaxis.set_major_formatter(md.DateFormatter('%d'))
plt.setp(days.xaxis.get_majorticklabels(), rotation = 0)
days.set_xlim([data.index[0], data.index[-1]])
# formatting months axis
months.xaxis.set_major_locator(md.MonthLocator())
months.xaxis.set_major_formatter(md.DateFormatter('%b'))
months.set_xlim([data.index[0], data.index[-1]])
plt.show()
产生这个情节:
稍微修改已接受的答案即可得到:
locator = md.MonthLocator(bymonthday=(1, 15))
ax.xaxis.set_major_locator(locator)
ax.xaxis.set_major_formatter(md.ConciseDateFormatter(locator))
#plt.setp(ax.xaxis.get_majorticklabels(), rotation = 90 )
ax.set_xlim([data.index[0], data.index[-1]])
plt.show()
我有一个跨越数年(百年)的日期列表。我想制作一个有 366 个桶的直方图,一年中的每一天一个,x 轴以清晰的方式标记,让我可以看到哪个日期是哪个(我期待 2 月 29 日下降,例如)。
我制作了以下直方图,但易于阅读的 X 轴日期标签会很棒。下面的代码看起来很麻烦,但得到了我想要的(没有 X 轴标签):
from datetime import date, datetime, timedelta
from collections import Counter
import pylab
def plot_data(data):
"""data is a list of dicts that contain a field "date" with a datetime."""
def get_day(d):
return d.strftime("%B %d") # e.g. January 01
days = []
n = 366
start_date = date(2020, 1, 1) # pick a leap year
for i in range(n):
d = start_date + timedelta(days=i)
days.append(get_day(d))
counts = Counter(get_day(d['date']) for d in data)
Y = [counts.get(d) for d in days]
X = list(range(len(days)))
pylab.bar(X, Y)
pylab.xlim([0, n])
pylab.title("Dates day of year")
pylab.xlabel("Day of Year (0-366)")
pylab.ylabel("Count")
pylab.savefig("Figure 1.png")
任何有助于缩短此时间并使 x 轴日期更灵活和清晰的帮助将不胜感激!
更新
我已将以下想法合并到 following gist 中,它产生的输出如下所示:
尝试检查此代码:
# import section
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as md
import numpy as np
from datetime import date
from itertools import product
# generate a dataframe like yours
date = [date(2020, m, d).strftime("%B %d") for m, d in product(range(1, 13, 1), range(1, 29, 1))]
value = np.abs(np.random.randn(len(date)))
data = pd.DataFrame({'date': date,
'value': value})
data.set_index('date', inplace = True)
# convert index from str to date
data.index = pd.to_datetime(data.index, format = '%B %d')
# plot
fig, ax = plt.subplots(1, 1, figsize = (16, 8))
ax.bar(data.index,
data['value'])
# formatting xaxis
ax.xaxis.set_major_locator(md.DayLocator(interval = 5))
ax.xaxis.set_major_formatter(md.DateFormatter('%B %d'))
plt.setp(ax.xaxis.get_majorticklabels(), rotation = 90)
ax.set_xlim([data.index[0], data.index[-1]])
plt.show()
这给了我这个情节:
我将数据帧的索引从字符串转换为日期,然后通过 ax.xaxis.set_major_locator
和 ax.xaxis.set_major_formatter
方法应用了我想要的 xaxis 格式。
为了画图我用了matplotlib
,但是把这个方法翻译成pylab
.
编辑
如果您想要单独刻度的天数和月数,您可以添加辅助轴(选中此 example),如以下代码所示:
# import section
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as md
import numpy as np
from datetime import date
from itertools import product
from mpl_toolkits.axes_grid1 import host_subplot
import mpl_toolkits.axisartist as AA
# generate a dataframe like yours
date = [date(2020, m, d).strftime("%B %d") for m, d in product(range(1, 13, 1), range(1, 29, 1))]
value = np.abs(np.random.randn(len(date)))
data = pd.DataFrame({'date': date,
'value': value})
data.set_index('date', inplace = True)
# convert index from str to date
data.index = pd.to_datetime(data.index, format = '%B %d')
# prepare days and months axes
fig = plt.figure(figsize = (16, 8))
days = host_subplot(111, axes_class = AA.Axes, figure = fig)
plt.subplots_adjust(bottom = 0.1)
months = days.twiny()
# position months axis
offset = -20
new_fixed_axis = months.get_grid_helper().new_fixed_axis
months.axis['bottom'] = new_fixed_axis(loc = 'bottom',
axes = months,
offset = (0, offset))
months.axis['bottom'].toggle(all = True)
#plot
days.bar(data.index, data['value'])
# formatting days axis
days.xaxis.set_major_locator(md.DayLocator(interval = 10))
days.xaxis.set_major_formatter(md.DateFormatter('%d'))
plt.setp(days.xaxis.get_majorticklabels(), rotation = 0)
days.set_xlim([data.index[0], data.index[-1]])
# formatting months axis
months.xaxis.set_major_locator(md.MonthLocator())
months.xaxis.set_major_formatter(md.DateFormatter('%b'))
months.set_xlim([data.index[0], data.index[-1]])
plt.show()
产生这个情节:
稍微修改已接受的答案即可得到:
locator = md.MonthLocator(bymonthday=(1, 15))
ax.xaxis.set_major_locator(locator)
ax.xaxis.set_major_formatter(md.ConciseDateFormatter(locator))
#plt.setp(ax.xaxis.get_majorticklabels(), rotation = 90 )
ax.set_xlim([data.index[0], data.index[-1]])
plt.show()