索引转换为 pandas 日期时间后,Plotly 图表是一堆乱七八糟的线条
Plotly chart is a mess of lines after index converted to pandas datetime
我的图表只是一团乱七八糟的锯齿状线条 (see chart here)。只有在我使用 df['Date'] = pd.to_datetime(df.index)
将索引转换为日期时间格式后才会发生这种情况。
完整代码:
#IMPORTS
import yfinance as yf
import time
import pandas as pd
import datetime
import numpy as np
import xlsxwriter
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
# SETTING UP DF
df = ((pd.read_csv('Book1.csv')).set_index('Date'))[:-1]
df['SMA30'] = df.Total.rolling(30).sum()
df['SMA365'] = df.Total.rolling(365).sum()
df['Monthly Avg'] = df.SMA30.mean()
df['Date'] = pd.to_datetime(df.index)
# PLOTTING FIGURE
fig = go.Figure()
fig.update_layout(title = 'EQ Footfall')
fig.add_trace(go.Scatter(x=df['Date'], y=df.Total, name = 'Footfall Daily'))
fig.add_trace(go.Scatter(x=df.index, y=df.SMA30, name = 'SMA30'))
fig.add_trace(go.Scatter(x=df.index, y=df.SMA365, name = 'SMA365'))
fig.update_xaxes(rangeslider_visible=True)
fig.update_xaxes(tickangle=-45)
- 数据框索引中的日期顺序很重要
- 模拟日期 none 格式为 YYYYMMDD
- 没有这条线
df = df.reindex(df.sort_index().index)
,生成的绘图是在 x 和 y 坐标之间绘制线,其中 x 不是连续的
- 当日期是一个字符串时,它是一个分类变量,所以它的行为与连续变量不同
import yfinance as yf
import time
import pandas as pd
import datetime
import numpy as np
import xlsxwriter
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
# SETTING UP DF
# df = ((pd.read_csv('Book1.csv')).set_index('Date'))[:-1]
df = pd.DataFrame({"Date":pd.Series(pd.date_range("1-jan-2018", periods=int(365*2.5))).dt.strftime("%Y%m%d"),
"Total":np.random.randint(1,5, int(365*2.5))}).set_index("Date")
# simulate dates in none sequential order
np.random.shuffle(df.index.values)
# reindex with sequential dates, NB dates or format YYYYMMDD are sortable in this way
df = df.reindex(df.sort_index().index)
df['SMA30'] = df.Total.rolling(30).sum()
df['SMA365'] = df.Total.rolling(365).sum()
df['Monthly Avg'] = df.SMA30.mean()
df['Date'] = pd.to_datetime(df.index)
# df["Date"] = df.index
# PLOTTING FIGURE
fig = go.Figure()
fig.update_layout(title = 'EQ Footfall')
fig.add_trace(go.Scatter(x=df['Date'], y=df.Total, name = 'Footfall Daily'))
fig.add_trace(go.Scatter(x=df.index, y=df.SMA30, name = 'SMA30'))
fig.add_trace(go.Scatter(x=df.index, y=df.SMA365, name = 'SMA365'))
fig.update_xaxes(rangeslider_visible=True)
fig.update_xaxes(tickangle=-45)
@Oddaspa 的建议也对我有用:
对索引进行排序会有所帮助。 df.sort_index()
我发现的另一个解决方案是将 csv 文件中的日期结构修改为 yyyy-mm-dd。这似乎主要是如何 plotly 读取日期的问题。希望这有帮助。
我的图表只是一团乱七八糟的锯齿状线条 (see chart here)。只有在我使用 df['Date'] = pd.to_datetime(df.index)
将索引转换为日期时间格式后才会发生这种情况。
完整代码:
#IMPORTS
import yfinance as yf
import time
import pandas as pd
import datetime
import numpy as np
import xlsxwriter
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
# SETTING UP DF
df = ((pd.read_csv('Book1.csv')).set_index('Date'))[:-1]
df['SMA30'] = df.Total.rolling(30).sum()
df['SMA365'] = df.Total.rolling(365).sum()
df['Monthly Avg'] = df.SMA30.mean()
df['Date'] = pd.to_datetime(df.index)
# PLOTTING FIGURE
fig = go.Figure()
fig.update_layout(title = 'EQ Footfall')
fig.add_trace(go.Scatter(x=df['Date'], y=df.Total, name = 'Footfall Daily'))
fig.add_trace(go.Scatter(x=df.index, y=df.SMA30, name = 'SMA30'))
fig.add_trace(go.Scatter(x=df.index, y=df.SMA365, name = 'SMA365'))
fig.update_xaxes(rangeslider_visible=True)
fig.update_xaxes(tickangle=-45)
- 数据框索引中的日期顺序很重要
- 模拟日期 none 格式为 YYYYMMDD
- 没有这条线
df = df.reindex(df.sort_index().index)
,生成的绘图是在 x 和 y 坐标之间绘制线,其中 x 不是连续的 - 当日期是一个字符串时,它是一个分类变量,所以它的行为与连续变量不同
import yfinance as yf
import time
import pandas as pd
import datetime
import numpy as np
import xlsxwriter
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
# SETTING UP DF
# df = ((pd.read_csv('Book1.csv')).set_index('Date'))[:-1]
df = pd.DataFrame({"Date":pd.Series(pd.date_range("1-jan-2018", periods=int(365*2.5))).dt.strftime("%Y%m%d"),
"Total":np.random.randint(1,5, int(365*2.5))}).set_index("Date")
# simulate dates in none sequential order
np.random.shuffle(df.index.values)
# reindex with sequential dates, NB dates or format YYYYMMDD are sortable in this way
df = df.reindex(df.sort_index().index)
df['SMA30'] = df.Total.rolling(30).sum()
df['SMA365'] = df.Total.rolling(365).sum()
df['Monthly Avg'] = df.SMA30.mean()
df['Date'] = pd.to_datetime(df.index)
# df["Date"] = df.index
# PLOTTING FIGURE
fig = go.Figure()
fig.update_layout(title = 'EQ Footfall')
fig.add_trace(go.Scatter(x=df['Date'], y=df.Total, name = 'Footfall Daily'))
fig.add_trace(go.Scatter(x=df.index, y=df.SMA30, name = 'SMA30'))
fig.add_trace(go.Scatter(x=df.index, y=df.SMA365, name = 'SMA365'))
fig.update_xaxes(rangeslider_visible=True)
fig.update_xaxes(tickangle=-45)
@Oddaspa 的建议也对我有用:
对索引进行排序会有所帮助。 df.sort_index()
我发现的另一个解决方案是将 csv 文件中的日期结构修改为 yyyy-mm-dd。这似乎主要是如何 plotly 读取日期的问题。希望这有帮助。