使用 ggplot() 和 bsts() 包从贝叶斯时间序列分析和 MCMC 生成 BSTS 平均绝对百分比误差 (MAPE) 图
Production of a BSTS Mean Absolute Percentage Error (MAPE) Plot from a Bayesian Time Series Analysis with MCMC using ggplot() and bsts() packages
问题:
我有一个名为 FID 的数据框(见下文),其中包含年份和月份的两列,以及 Sighting_Frequency(鸟类数量)。
数据框包含 3 年 2015-2017 之间的观测值,表明我有 36 个月的数据。我使用 bsts 包中的 bsts() 函数 运行 使用 MCMC 进行了 贝叶斯时间序列分析 bsts 包(请参阅下面的 R 代码)按照下面的教程进行操作。
我想制作一个坚持平均绝对百分比误差 (MAPE) 图,如下图所示,它说明了坚持期间的实际值与预测值以及可信区间使用包 ggplot().
我在尝试生成 d2 数据框 时卡住了(请参阅下面的教程和 R 代码),因为我一直遇到此错误消息:-
Error in data.frame(c(10^as.numeric(-colMeans(bsts.model$one.step.prediction.errors[-(1:burn), :
arguments imply differing number of rows: 48, 32
我一直在努力找出问题所在。如果有人能帮我解决这个问题,我将不胜感激。
非常感谢。
教程
图
R-代码:
################################################################################
##Time Series Model using the bsts() function
##################################################################################
##Open packages for the time series analysis
library(lubridate)
library(bsts)
library(dplyr)
library(ggplot2)
##Create a time series object
myts2 <- ts(BSTS_Dataframe$Sightings_Frequency, start=c(2015, 1), end=c(2017, 12), frequency=12)
##Upload the data into the windows() function
x <- window(myts2, start=c(2015, 01), end=c(2017, 12))
y <- log(x)
### Run the bsts model
ss <- AddLocalLinearTrend(list(), y)
ss <- AddSeasonal(ss, y, nseasons = 3)
# bsts.model <- bsts(y, state.specification = ss, family = "poisson", niter = 2, ping=0, seed=1234)
bsts.model <- bsts(y, state.specification = ss, family = "logit", niter = 100, ping = 0, seed = 123)
##Open plotting window
dev.new()
##Plot the bsts.model
plot(bsts.model)
##Get a suggested number of burns
burn<-bsts::SuggestBurn(0.1, bsts.model)
##Predict
p<-predict.bsts(bsts.model, horizon = 12, burn=burn, quantiles=c(.25, .975))
##Actual vs predicted
d2 <- data.frame(
# fitted values and predictions
c(10^as.numeric(-colMeans(bsts.model$one.step.prediction.errors[-(1:burn),])+y),
10^as.numeric(p$mean)),
# actual data and dates
as.numeric(BSTS_Dataframe$Sightings_Frequency),
as.Date(time(BSTS_Dataframe$Sightings_Frequency)))
######################################
Error message
######################################
Error in data.frame(c(10^as.numeric(-colMeans(bsts.model$one.step.prediction.errors[-(1:burn), :
arguments imply differing number of rows: 48, 32
names(d2) <- c("Fitted", "Actual", "Date")
### MAPE (mean absolute percentage error)
MAPE <- dplyr::filter(d2, year(Date)>2017) %>% dplyr::summarise(MAPE=mean(abs(Actual-Fitted)/Actual))
### 95% forecast credible interval
posterior.interval <- cbind.data.frame(
10^as.numeric(p$interval[1,]),
10^as.numeric(p$interval[2,]),
subset(d2, year(Date)>2017)$Date)
names(posterior.interval) <- c("LL", "UL", "Date")
### Join intervals to the forecast
d3 <- left_join(d2, posterior.interval, by="Date")
### Plot actual versus predicted with credible intervals for the holdout period
ggplot(data=d3, aes(x=Date)) +
geom_line(aes(y=Actual, colour = "Actual"), size=1.2) +
geom_line(aes(y=Fitted, colour = "Fitted"), size=1.2, linetype=2) +
theme_bw() + theme(legend.title = element_blank()) + ylab("") + xlab("") +
geom_vline(xintercept=as.numeric(as.Date("2017-12-01")), linetype=2) +
geom_ribbon(aes(ymin=LL, ymax=UL), fill="grey", alpha=0.5) +
ggtitle(paste0("BSTS -- Holdout MAPE = ", round(100*MAPE,2), "%")) +
theme(axis.text.x=element_text(angle = -90, hjust = 0))
FID 数据帧
structure(list(Year = structure(1:32, .Label = c("2015-01", "2015-02",
"2015-03", "2015-04", "2015-05", "2015-08", "2015-09", "2015-10",
"2015-11", "2015-12", "2016-01", "2016-02", "2016-03", "2016-04",
"2016-05", "2016-07", "2016-08", "2016-09", "2016-10", "2016-11",
"2016-12", "2017-01", "2017-02", "2017-03", "2017-04", "2017-05",
"2017-07", "2017-08", "2017-09", "2017-10", "2017-11", "2017-12"
), class = "factor"), Sightings_Frequency = c(36L, 28L, 39L,
46L, 5L, 22L, 10L, 15L, 8L, 33L, 33L, 29L, 31L, 23L, 8L, 9L,
40L, 41L, 40L, 30L, 30L, 44L, 37L, 41L, 42L, 20L, 7L, 27L, 35L,
27L, 43L, 38L)), class = "data.frame", row.names = c(NA, -32L
))
#######################################################################################
##A Bayesian Structural Time Series Model with mcmc
#######################################################################################
##Open packages for the time series analysis
library(lubridate)
library(bsts)
library(dplyr)
library(ggplot2)
library(ggfortify)
###################################################################################
##Time Series Model using the bsts() function
##################################################################################
BSTS_Dataframe$Year <- lubridate::ymd(paste0(FID$Year,"-01"))
allDates <- seq.Date(
min(FID$Year),
max(FID$Year),
"month")
FID <- FID %>% right_join(data.frame(Year = allDates), by = c("Year")) %>% dplyr::arrange(Year) %>%
tidyr::fill(Sightings_Frequency, .direction = "down")
##Create a time series object
myts2 <- ts(FID$Sightings_Frequency, start=c(2015, 1), end=c(2017, 12), frequency=12)
##Upload the data into the windows() function
x <- window(myts2, start=c(2015, 01), end=c(2016, 12))
y <- log(x)
##Produce a list for the object ss
ss <- list()
#ss <- AddLocalLinearTrend(list(), y)
ss <- AddSeasonal(ss, y, nseasons = 12)
ss <- AddLocalLevel(ss, y)
# bsts.model <- bsts(y, state.specification = ss, family = "poisson", niter = 2, ping=0, seed=1234)
# If these are poisson distributed, no need to use logit because it bounds reponse
# between 0-1
bsts.model <- bsts(y, state.specification = ss, niter = 100, ping = 0, seed = 123)
##Open plotting window
dev.new()
##Plot the bsts.model
plot(bsts.model)
##Get a suggested number of burns
burn<-bsts::SuggestBurn(0.1, bsts.model)
##Predict
p<-predict.bsts(bsts.model, horizon = 12, burn=burn, quantiles=c(.25, .975))
p$mean
##Actual vs predicted
d2 <- data.frame(
# fitted values and predictions
c(exp(as.numeric(-colMeans(bsts.model$one.step.prediction.errors[-(1:burn),])+y)),
exp(as.numeric(p$mean))),
# actual data and dates
as.numeric(FID$Sightings_Frequency),
as.Date(FID$Year))
names(d2) <- c("Fitted", "Actual", "Date")
### MAPE (mean absolute percentage error)
MAPE <- dplyr::filter(d2, lubridate::year(Date)>=2017) %>%
dplyr::summarise(MAPE=mean(abs(Actual-Fitted)/Actual))
### 95% forecast credible interval
posterior.interval <- cbind.data.frame(
exp(as.numeric(p$interval[1,])),
exp(as.numeric(p$interval[2,])),
tail(d2,12)$Date)
names(posterior.interval) <- c("LL", "UL", "Date")
### Join intervals to the forecast
d3 <- left_join(d2, posterior.interval, by="Date")
##Open plotting window
dev.new()
### Plot actual versus predicted with credible intervals for the holdout period
ggplot(data=d3, aes(x=Date)) +
geom_line(aes(y=Actual, colour = "Actual"), size=1.2) +
geom_line(aes(y=Fitted, colour = "Fitted"), size=1.2, linetype=2) +
theme_bw() + theme(legend.title = element_blank()) + ylab("") + xlab("") +
geom_vline(xintercept=as.numeric(as.Date("2017-12-01")), linetype=2) +
geom_ribbon(aes(ymin=LL, ymax=UL), fill="grey", alpha=0.5) +
ggtitle(paste0("BSTS -- Holdout MAPE = ", round(100*MAPE,2), "%")) +
theme(axis.text.x=element_text(angle = -90, hjust = 0))
情节
问题:
我有一个名为 FID 的数据框(见下文),其中包含年份和月份的两列,以及 Sighting_Frequency(鸟类数量)。
数据框包含 3 年 2015-2017 之间的观测值,表明我有 36 个月的数据。我使用 bsts 包中的 bsts() 函数 运行 使用 MCMC 进行了 贝叶斯时间序列分析 bsts 包(请参阅下面的 R 代码)按照下面的教程进行操作。
我想制作一个坚持平均绝对百分比误差 (MAPE) 图,如下图所示,它说明了坚持期间的实际值与预测值以及可信区间使用包 ggplot().
我在尝试生成 d2 数据框 时卡住了(请参阅下面的教程和 R 代码),因为我一直遇到此错误消息:-
Error in data.frame(c(10^as.numeric(-colMeans(bsts.model$one.step.prediction.errors[-(1:burn), :
arguments imply differing number of rows: 48, 32
我一直在努力找出问题所在。如果有人能帮我解决这个问题,我将不胜感激。
非常感谢。
教程
图
R-代码:
################################################################################
##Time Series Model using the bsts() function
##################################################################################
##Open packages for the time series analysis
library(lubridate)
library(bsts)
library(dplyr)
library(ggplot2)
##Create a time series object
myts2 <- ts(BSTS_Dataframe$Sightings_Frequency, start=c(2015, 1), end=c(2017, 12), frequency=12)
##Upload the data into the windows() function
x <- window(myts2, start=c(2015, 01), end=c(2017, 12))
y <- log(x)
### Run the bsts model
ss <- AddLocalLinearTrend(list(), y)
ss <- AddSeasonal(ss, y, nseasons = 3)
# bsts.model <- bsts(y, state.specification = ss, family = "poisson", niter = 2, ping=0, seed=1234)
bsts.model <- bsts(y, state.specification = ss, family = "logit", niter = 100, ping = 0, seed = 123)
##Open plotting window
dev.new()
##Plot the bsts.model
plot(bsts.model)
##Get a suggested number of burns
burn<-bsts::SuggestBurn(0.1, bsts.model)
##Predict
p<-predict.bsts(bsts.model, horizon = 12, burn=burn, quantiles=c(.25, .975))
##Actual vs predicted
d2 <- data.frame(
# fitted values and predictions
c(10^as.numeric(-colMeans(bsts.model$one.step.prediction.errors[-(1:burn),])+y),
10^as.numeric(p$mean)),
# actual data and dates
as.numeric(BSTS_Dataframe$Sightings_Frequency),
as.Date(time(BSTS_Dataframe$Sightings_Frequency)))
######################################
Error message
######################################
Error in data.frame(c(10^as.numeric(-colMeans(bsts.model$one.step.prediction.errors[-(1:burn), :
arguments imply differing number of rows: 48, 32
names(d2) <- c("Fitted", "Actual", "Date")
### MAPE (mean absolute percentage error)
MAPE <- dplyr::filter(d2, year(Date)>2017) %>% dplyr::summarise(MAPE=mean(abs(Actual-Fitted)/Actual))
### 95% forecast credible interval
posterior.interval <- cbind.data.frame(
10^as.numeric(p$interval[1,]),
10^as.numeric(p$interval[2,]),
subset(d2, year(Date)>2017)$Date)
names(posterior.interval) <- c("LL", "UL", "Date")
### Join intervals to the forecast
d3 <- left_join(d2, posterior.interval, by="Date")
### Plot actual versus predicted with credible intervals for the holdout period
ggplot(data=d3, aes(x=Date)) +
geom_line(aes(y=Actual, colour = "Actual"), size=1.2) +
geom_line(aes(y=Fitted, colour = "Fitted"), size=1.2, linetype=2) +
theme_bw() + theme(legend.title = element_blank()) + ylab("") + xlab("") +
geom_vline(xintercept=as.numeric(as.Date("2017-12-01")), linetype=2) +
geom_ribbon(aes(ymin=LL, ymax=UL), fill="grey", alpha=0.5) +
ggtitle(paste0("BSTS -- Holdout MAPE = ", round(100*MAPE,2), "%")) +
theme(axis.text.x=element_text(angle = -90, hjust = 0))
FID 数据帧
structure(list(Year = structure(1:32, .Label = c("2015-01", "2015-02",
"2015-03", "2015-04", "2015-05", "2015-08", "2015-09", "2015-10",
"2015-11", "2015-12", "2016-01", "2016-02", "2016-03", "2016-04",
"2016-05", "2016-07", "2016-08", "2016-09", "2016-10", "2016-11",
"2016-12", "2017-01", "2017-02", "2017-03", "2017-04", "2017-05",
"2017-07", "2017-08", "2017-09", "2017-10", "2017-11", "2017-12"
), class = "factor"), Sightings_Frequency = c(36L, 28L, 39L,
46L, 5L, 22L, 10L, 15L, 8L, 33L, 33L, 29L, 31L, 23L, 8L, 9L,
40L, 41L, 40L, 30L, 30L, 44L, 37L, 41L, 42L, 20L, 7L, 27L, 35L,
27L, 43L, 38L)), class = "data.frame", row.names = c(NA, -32L
))
#######################################################################################
##A Bayesian Structural Time Series Model with mcmc
#######################################################################################
##Open packages for the time series analysis
library(lubridate)
library(bsts)
library(dplyr)
library(ggplot2)
library(ggfortify)
###################################################################################
##Time Series Model using the bsts() function
##################################################################################
BSTS_Dataframe$Year <- lubridate::ymd(paste0(FID$Year,"-01"))
allDates <- seq.Date(
min(FID$Year),
max(FID$Year),
"month")
FID <- FID %>% right_join(data.frame(Year = allDates), by = c("Year")) %>% dplyr::arrange(Year) %>%
tidyr::fill(Sightings_Frequency, .direction = "down")
##Create a time series object
myts2 <- ts(FID$Sightings_Frequency, start=c(2015, 1), end=c(2017, 12), frequency=12)
##Upload the data into the windows() function
x <- window(myts2, start=c(2015, 01), end=c(2016, 12))
y <- log(x)
##Produce a list for the object ss
ss <- list()
#ss <- AddLocalLinearTrend(list(), y)
ss <- AddSeasonal(ss, y, nseasons = 12)
ss <- AddLocalLevel(ss, y)
# bsts.model <- bsts(y, state.specification = ss, family = "poisson", niter = 2, ping=0, seed=1234)
# If these are poisson distributed, no need to use logit because it bounds reponse
# between 0-1
bsts.model <- bsts(y, state.specification = ss, niter = 100, ping = 0, seed = 123)
##Open plotting window
dev.new()
##Plot the bsts.model
plot(bsts.model)
##Get a suggested number of burns
burn<-bsts::SuggestBurn(0.1, bsts.model)
##Predict
p<-predict.bsts(bsts.model, horizon = 12, burn=burn, quantiles=c(.25, .975))
p$mean
##Actual vs predicted
d2 <- data.frame(
# fitted values and predictions
c(exp(as.numeric(-colMeans(bsts.model$one.step.prediction.errors[-(1:burn),])+y)),
exp(as.numeric(p$mean))),
# actual data and dates
as.numeric(FID$Sightings_Frequency),
as.Date(FID$Year))
names(d2) <- c("Fitted", "Actual", "Date")
### MAPE (mean absolute percentage error)
MAPE <- dplyr::filter(d2, lubridate::year(Date)>=2017) %>%
dplyr::summarise(MAPE=mean(abs(Actual-Fitted)/Actual))
### 95% forecast credible interval
posterior.interval <- cbind.data.frame(
exp(as.numeric(p$interval[1,])),
exp(as.numeric(p$interval[2,])),
tail(d2,12)$Date)
names(posterior.interval) <- c("LL", "UL", "Date")
### Join intervals to the forecast
d3 <- left_join(d2, posterior.interval, by="Date")
##Open plotting window
dev.new()
### Plot actual versus predicted with credible intervals for the holdout period
ggplot(data=d3, aes(x=Date)) +
geom_line(aes(y=Actual, colour = "Actual"), size=1.2) +
geom_line(aes(y=Fitted, colour = "Fitted"), size=1.2, linetype=2) +
theme_bw() + theme(legend.title = element_blank()) + ylab("") + xlab("") +
geom_vline(xintercept=as.numeric(as.Date("2017-12-01")), linetype=2) +
geom_ribbon(aes(ymin=LL, ymax=UL), fill="grey", alpha=0.5) +
ggtitle(paste0("BSTS -- Holdout MAPE = ", round(100*MAPE,2), "%")) +
theme(axis.text.x=element_text(angle = -90, hjust = 0))
情节