使用 Pandas 和 Bokeh 绘制按类别分组的 JSON 时间序列数据
Plotting JSON time series data grouped by category with Pandas and Bokeh
我有一些每日时间序列 JSON 数据,它涵盖同一文件中的多个站点(来自底部 JSON 的单个条目的示例)。
我想使用 Bokeh 绘制这些图,每个站点的时间序列(categorized/grouped by "system_name")作为同一图上的不同颜色的线。我如何获得每一行的情节?当前的方法是尝试使用 multi_line
- 它应该只是 p.line
使用 for
循环吗?
Guidance/pointers非常感谢。
import json
from datetime import datetime
from pandas.io.json import json_normalize
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource
output_file('wyndham.html')
with open('wyndham_data.txt', 'r') as f:
a = json.load(f)
res = json_normalize(a['features'])
gby = res.groupby('properties.system_name')
for key, item in gby:
g = item.sort_values(by='properties.date_stamp') **<<<works to here**
source = ColumnDataSource(dict(x = g[['properties.date_stamp']],
y = g[['properties.energy_prod(KWh)']]))
p = figure()
p.multi_line(x, y, source=source)
show(p)
样本JSON:
{
"type" : "FeatureCollection",
"name" : "wyndham-solar-energy-production.json",
"features" : [
{
"type" : "Feature",
"geometry" : null,
"properties" : {
"system_id" : "9386741",
"system_name" : "Yerambooee Community Centre ",
"date_stamp" : "2018-08-01",
"energy_prod(KWh)" : 51.5,
"energy_life(MWh)" : null,
"C02 (Kg)" : 47.41,
"KWp" : 18.2,
"performance" : 2.8,
"lat" : -37.8587717,
"lon" : 144.7100923,
"date_installed" : "2017-07-27"
}
}, ...
你可以像这样画一条线:
import numpy as np
import pandas as pd
from datetime import datetime
import json
import matplotlib.pyplot as plt
import bokeh
with open('1.json', 'r+') as f:
data = json.load(f)
df = pd.json_normalize(data['features'])
df.index = df['properties.date_stamp']
print(df)
plt.figure()
df.plot()
经过大量试验和错误后,我找到了使它起作用的方法。
(虽然代码和输出本身并不漂亮 - 它实现了练习的目的)。
线条和颜色的数量超过了 Bokeh 默认调色板中的颜色数量。 Bokeh.palettes linear_palette 函数允许我为 30 行中的每一行设置独特的颜色阴影。
在我发布的问题中,我使用了已下载 JSON 的本地副本,并将其保存到文本文件中。我已经添加了 import requests
和目标 URL,以防您希望自己 运行 这个。请注意,在我的机器上 运行 大约需要 15 秒。 Imgur link 截图:Wyndham Wind Farm Scheme Daily Ouput Plot.
我还有一个SettingWithCopyWarning
:
SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
site_data['properties.date_stamp'] = pd.to_datetime(site_data['properties.date_stamp'])
import requests
import pandas as pd
import json
from datetime import datetime
from pandas import json_normalize
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource
from bokeh.palettes import linear_palette, Viridis256
URL = "https://data.gov.au/data/dataset/aa75879c-1d3e-4ad2-b331-826032c6b84b/resource/6e309687-023b-436b-9079-582b7e2fb074/download/wyndham-solar-energy-production.json"
r = requests.get(URL)
a = json.loads(r.text)
res = json_normalize(a['features'])
gby = res.groupby('properties.system_name')
sites = res['properties.system_name'].unique()
num_sites = len(sites)
output_file('wyndham.html')
plot_colors = linear_palette(Viridis256, num_sites)
p = figure(width=1800, height=900, x_axis_type="datetime",
title = "Wyndham Wind Farm Scheme Daily Power Output")
p.yaxis.axis_label = "Daily Power Output (kW.h)"
count = 0
for key, grp in gby:
line_col = plot_colors[count]
g = grp.sort_values(by='properties.date_stamp')
site_data = g[['properties.date_stamp','properties.energy_prod(KWh)']]
site_data['properties.date_stamp'] = pd.to_datetime(site_data['properties.date_stamp'])
site_cds = ColumnDataSource(site_data)
p.line(x=site_data['properties.date_stamp'], y=site_data['properties.energy_prod(KWh)'],
legend_label=key, line_width = 2, line_color = line_col)
count += 1
show(p)
Wyndham Wind Farm Scheme Daily Ouput Plot
我有一些每日时间序列 JSON 数据,它涵盖同一文件中的多个站点(来自底部 JSON 的单个条目的示例)。
我想使用 Bokeh 绘制这些图,每个站点的时间序列(categorized/grouped by "system_name")作为同一图上的不同颜色的线。我如何获得每一行的情节?当前的方法是尝试使用 multi_line
- 它应该只是 p.line
使用 for
循环吗?
Guidance/pointers非常感谢。
import json
from datetime import datetime
from pandas.io.json import json_normalize
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource
output_file('wyndham.html')
with open('wyndham_data.txt', 'r') as f:
a = json.load(f)
res = json_normalize(a['features'])
gby = res.groupby('properties.system_name')
for key, item in gby:
g = item.sort_values(by='properties.date_stamp') **<<<works to here**
source = ColumnDataSource(dict(x = g[['properties.date_stamp']],
y = g[['properties.energy_prod(KWh)']]))
p = figure()
p.multi_line(x, y, source=source)
show(p)
样本JSON:
{
"type" : "FeatureCollection",
"name" : "wyndham-solar-energy-production.json",
"features" : [
{
"type" : "Feature",
"geometry" : null,
"properties" : {
"system_id" : "9386741",
"system_name" : "Yerambooee Community Centre ",
"date_stamp" : "2018-08-01",
"energy_prod(KWh)" : 51.5,
"energy_life(MWh)" : null,
"C02 (Kg)" : 47.41,
"KWp" : 18.2,
"performance" : 2.8,
"lat" : -37.8587717,
"lon" : 144.7100923,
"date_installed" : "2017-07-27"
}
}, ...
你可以像这样画一条线:
import numpy as np
import pandas as pd
from datetime import datetime
import json
import matplotlib.pyplot as plt
import bokeh
with open('1.json', 'r+') as f:
data = json.load(f)
df = pd.json_normalize(data['features'])
df.index = df['properties.date_stamp']
print(df)
plt.figure()
df.plot()
经过大量试验和错误后,我找到了使它起作用的方法。 (虽然代码和输出本身并不漂亮 - 它实现了练习的目的)。
线条和颜色的数量超过了 Bokeh 默认调色板中的颜色数量。 Bokeh.palettes linear_palette 函数允许我为 30 行中的每一行设置独特的颜色阴影。
在我发布的问题中,我使用了已下载 JSON 的本地副本,并将其保存到文本文件中。我已经添加了 import requests
和目标 URL,以防您希望自己 运行 这个。请注意,在我的机器上 运行 大约需要 15 秒。 Imgur link 截图:Wyndham Wind Farm Scheme Daily Ouput Plot.
我还有一个SettingWithCopyWarning
:
SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy site_data['properties.date_stamp'] = pd.to_datetime(site_data['properties.date_stamp'])
import requests
import pandas as pd
import json
from datetime import datetime
from pandas import json_normalize
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource
from bokeh.palettes import linear_palette, Viridis256
URL = "https://data.gov.au/data/dataset/aa75879c-1d3e-4ad2-b331-826032c6b84b/resource/6e309687-023b-436b-9079-582b7e2fb074/download/wyndham-solar-energy-production.json"
r = requests.get(URL)
a = json.loads(r.text)
res = json_normalize(a['features'])
gby = res.groupby('properties.system_name')
sites = res['properties.system_name'].unique()
num_sites = len(sites)
output_file('wyndham.html')
plot_colors = linear_palette(Viridis256, num_sites)
p = figure(width=1800, height=900, x_axis_type="datetime",
title = "Wyndham Wind Farm Scheme Daily Power Output")
p.yaxis.axis_label = "Daily Power Output (kW.h)"
count = 0
for key, grp in gby:
line_col = plot_colors[count]
g = grp.sort_values(by='properties.date_stamp')
site_data = g[['properties.date_stamp','properties.energy_prod(KWh)']]
site_data['properties.date_stamp'] = pd.to_datetime(site_data['properties.date_stamp'])
site_cds = ColumnDataSource(site_data)
p.line(x=site_data['properties.date_stamp'], y=site_data['properties.energy_prod(KWh)'],
legend_label=key, line_width = 2, line_color = line_col)
count += 1
show(p)
Wyndham Wind Farm Scheme Daily Ouput Plot