在 Seaborn 的 Regplot 中使用日期时间

Using Datetimes with Seaborn's Regplot

我在 Jupyter/IPython 工作以绘制每天的单词量,但在 Seaborn 中使用带有 Regplot 的日期时间时遇到问题。 Regplot 本身显然 does not support regression against date data,虽然我想要完成的事情并不一定需要 Regplot 的解决方法——也许只是一种格式化 x 轴标签的方法。

一个最小的工作示例,使用简单的时间戳:

%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt 
import matplotlib.dates as dates
import seaborn as sns
import time
import datetime
import radar
sns.set(style="whitegrid", color_codes=True)

data = pd.DataFrame([])

for i in np.arange(1, 10):
    date =  radar.random_datetime(start='2016-05-20', stop='2016-05-25')
    data = data.append(pd.DataFrame({'Date': time.mktime(date.timetuple()), 'Words': i + 100}, index=[0]), ignore_index=True)

points = plt.scatter(x = data['Date'], y = data["Words"], c=data["Words"], s=75, cmap="BrBG")
plt.colorbar(points)
sns.regplot(x = data['Date'], y = data["Words"], data=data, scatter=False, color='r')

它呈现了一个带有重叠趋势线的散点图:

但是日期是日期时间:

points = plt.scatter(x = pd.to_datetime(data['Date'], unit='s').dt.to_pydatetime(), y = data["Words"], c=data["Words"], s=75, cmap="BrBG")
plt.colorbar(points)
sns.regplot(x = pd.to_datetime(data['Date'], unit='s').dt.to_pydatetime(), y = data["Words"], data=data, scatter=False, color='r')

它 returns 有以下错误:

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-7-d6488afe3dcb> in <module>()
      1 points = plt.scatter(x = pd.to_datetime(data['Date'], unit='s').dt.to_pydatetime(), y = data["Words"], c=data["Words"], s=75, cmap="BrBG")
      2 plt.colorbar(points)
----> 3 sns.regplot(x = pd.to_datetime(data['Date'], unit='s').dt.to_pydatetime(), y = data["Words"], data=data, scatter=False, color='r')

C:\Python\WinPython-64bit-3.5.2.2Qt5\python-3.5.2.amd64\lib\site-packages\seaborn\linearmodels.py in regplot(x, y, data, x_estimator, x_bins, x_ci, scatter, fit_reg, ci, n_boot, units, order, logistic, lowess, robust, logx, x_partial, y_partial, truncate, dropna, x_jitter, y_jitter, label, color, marker, scatter_kws, line_kws, ax)
    777     scatter_kws["marker"] = marker
    778     line_kws = {} if line_kws is None else copy.copy(line_kws)
--> 779     plotter.plot(ax, scatter_kws, line_kws)
    780     return ax
    781 

C:\Python\WinPython-64bit-3.5.2.2Qt5\python-3.5.2.amd64\lib\site-packages\seaborn\linearmodels.py in plot(self, ax, scatter_kws, line_kws)
    330             self.scatterplot(ax, scatter_kws)
    331         if self.fit_reg:
--> 332             self.lineplot(ax, line_kws)
    333 
    334         # Label the axes

C:\Python\WinPython-64bit-3.5.2.2Qt5\python-3.5.2.amd64\lib\site-packages\seaborn\linearmodels.py in lineplot(self, ax, kws)
    375 
    376         # Fit the regression model
--> 377         grid, yhat, err_bands = self.fit_regression(ax)
    378 
    379         # Get set default aesthetics

C:\Python\WinPython-64bit-3.5.2.2Qt5\python-3.5.2.amd64\lib\site-packages\seaborn\linearmodels.py in fit_regression(self, ax, x_range, grid)
    207             yhat, yhat_boots = self.fit_logx(grid)
    208         else:
--> 209             yhat, yhat_boots = self.fit_fast(grid)
    210 
    211         # Compute the confidence interval at each grid point

C:\Python\WinPython-64bit-3.5.2.2Qt5\python-3.5.2.amd64\lib\site-packages\seaborn\linearmodels.py in fit_fast(self, grid)
    222         grid = np.c_[np.ones(len(grid)), grid]
    223         reg_func = lambda _x, _y: np.linalg.pinv(_x).dot(_y)
--> 224         yhat = grid.dot(reg_func(X, y))
    225         if self.ci is None:
    226             return yhat, None

C:\Python\WinPython-64bit-3.5.2.2Qt5\python-3.5.2.amd64\lib\site-packages\seaborn\linearmodels.py in <lambda>(_x, _y)
    221         X, y = np.c_[np.ones(len(self.x)), self.x], self.y
    222         grid = np.c_[np.ones(len(grid)), grid]
--> 223         reg_func = lambda _x, _y: np.linalg.pinv(_x).dot(_y)
    224         yhat = grid.dot(reg_func(X, y))
    225         if self.ci is None:

C:\Python\WinPython-64bit-3.5.2.2Qt5\python-3.5.2.amd64\lib\site-packages\numpy\linalg\linalg.py in pinv(a, rcond)
   1614     a, wrap = _makearray(a)
   1615     _assertNoEmpty2d(a)
-> 1616     a = a.conjugate()
   1617     u, s, vt = svd(a, 0)
   1618     m = u.shape[0]

AttributeError: 'datetime.datetime' object has no attribute 'conjugate'

尽管散点图确实以格式良好的日期时间呈现:

有没有办法在 Regplot 中使用日期时间,或者使用时间戳但将 x 轴上的标签格式化为日期?

您可以在 xticks 的位置获取时间戳的值,然后将它们转换为您想要的格式。

ax = plt.gca()
xticks = ax.get_xticks()
xticks_dates = [datetime.datetime.fromtimestamp(x).strftime('%Y-%m-%d %H:%M:%S') for x in xticks]
ax.set_xticklabels(xticks_dates)