绘制 2x2x2 时间序列的原始值和预测值
Plot raw and predict values for 2x2x2 time-series
这是我的数据样本
library(tidyr)
library(dplyr)
library(ggplot2)
resource <- c("good","good","bad","bad","good","good","bad","bad","good","good","bad","bad","good","good","bad","bad")
fertilizer <- c("none", "nitrogen","none","nitrogen","none", "nitrogen","none","nitrogen","none", "nitrogen","none","nitrogen","none", "nitrogen","none","nitrogen")
t0 <- sample(1:20, 16)
t1 <- sample(1:20, 16)
t2 <- sample(1:20, 16)
t3 <- sample(1:20, 16)
t4 <- sample(1:20, 16)
t5 <- sample(1:20, 16)
t6 <- sample(10:100, 16)
t7 <- sample(10:100, 16)
t8 <- sample(10:100, 16)
t9 <- sample(10:100, 16)
t10 <- sample(10:100, 16)
replicates <- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)
data <- data.frame(resource, fertilizer,replicates, t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10)
data$resource <- as.factor(data$resource)
data$fertilizer <- as.factor(data$fertilizer)
data.melt <- data %>% ungroup %>% gather(time, value, -replicates, -resource, -fertilizer)
data.melt$predict <- sample(1:200, 176)
其中,资源和肥料有 2 个因子,因此实际上有 4 个处理和 4 x 4 = 16 个重复。时间是一个有 10 个级别的因素。我运行一个模型,预测值在predict
列。
现在我想为每种类型的资源和肥料(4个处理)[即4块]。我还想为每个时间点的藻类生长添加一个置信区间。这是我对代码的尝试。
ggplot(df, aes(x=time, y=predicted)) + geom_point(size=3)+ stat_summary(geom = "point", fun.y = "mean") + facet_grid(resource + fertilizer ~.)
使用这个简单的代码,我仍然只得到 2 个图,而不是 4 个。此外,预测函数的均值没有绘制出来。我不知道如何将 value
和 predicted
以及相应的置信区间绘制在一起。
如果任何人也可以展示所有四种治疗如何在一个图上,并且我是否可以将其分面(如上),那将会很有帮助
我建议的解决方案是创建第二个 data.frame,其中包含所有汇总统计信息,例如平均预测值。我展示了一种使用 dplyr
包中的 group_by
和 summarize
来执行此操作的方法。摘要数据需要包含与主要数据匹配的列 resource
、fertilizer
和 time
。摘要数据还有包含其他 y
值的列。
然后,需要将主要数据和汇总数据分别提供给相应的 ggplot 函数,但不要在主 ggplot()
调用中提供。 facet_grid
可用于将数据分成四个图。
# Convert time to factor, specifying correct order of time points.
data.melt$time = factor(data.melt$time, levels=paste("t", seq(0, 10), sep=""))
# Create an auxilliary data.frame containing summary data.
# I've used standard deviation as place-holder for confidence intervals;
# I'll let you calculate those on your own.
summary_dat = data.melt %>%
group_by(resource, fertilizer, time) %>%
summarise(mean_predicted=mean(predict),
upper_ci=mean(predict) + sd(predict),
lower_ci=mean(predict) - sd(predict))
p = ggplot() +
theme_bw() +
geom_errorbar(data=summary_dat, aes(x=time, ymax=upper_ci, ymin=lower_ci),
width=0.3, size=0.7, colour="tomato") +
geom_point(data=data.melt, aes(x=time, y=value),
size=1.6, colour="grey20", alpha=0.5) +
geom_point(data=summary_dat, aes(x=time, y=mean_predicted),
size=3, shape=21, fill="tomato", colour="grey20") +
facet_grid(resource ~ fertilizer)
ggsave("plot.png", plot=p, height=4, width=6.5, units="in", dpi=150)
这是我的数据样本
library(tidyr)
library(dplyr)
library(ggplot2)
resource <- c("good","good","bad","bad","good","good","bad","bad","good","good","bad","bad","good","good","bad","bad")
fertilizer <- c("none", "nitrogen","none","nitrogen","none", "nitrogen","none","nitrogen","none", "nitrogen","none","nitrogen","none", "nitrogen","none","nitrogen")
t0 <- sample(1:20, 16)
t1 <- sample(1:20, 16)
t2 <- sample(1:20, 16)
t3 <- sample(1:20, 16)
t4 <- sample(1:20, 16)
t5 <- sample(1:20, 16)
t6 <- sample(10:100, 16)
t7 <- sample(10:100, 16)
t8 <- sample(10:100, 16)
t9 <- sample(10:100, 16)
t10 <- sample(10:100, 16)
replicates <- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)
data <- data.frame(resource, fertilizer,replicates, t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10)
data$resource <- as.factor(data$resource)
data$fertilizer <- as.factor(data$fertilizer)
data.melt <- data %>% ungroup %>% gather(time, value, -replicates, -resource, -fertilizer)
data.melt$predict <- sample(1:200, 176)
其中,资源和肥料有 2 个因子,因此实际上有 4 个处理和 4 x 4 = 16 个重复。时间是一个有 10 个级别的因素。我运行一个模型,预测值在predict
列。
现在我想为每种类型的资源和肥料(4个处理)[即4块]。我还想为每个时间点的藻类生长添加一个置信区间。这是我对代码的尝试。
ggplot(df, aes(x=time, y=predicted)) + geom_point(size=3)+ stat_summary(geom = "point", fun.y = "mean") + facet_grid(resource + fertilizer ~.)
使用这个简单的代码,我仍然只得到 2 个图,而不是 4 个。此外,预测函数的均值没有绘制出来。我不知道如何将 value
和 predicted
以及相应的置信区间绘制在一起。
如果任何人也可以展示所有四种治疗如何在一个图上,并且我是否可以将其分面(如上),那将会很有帮助
我建议的解决方案是创建第二个 data.frame,其中包含所有汇总统计信息,例如平均预测值。我展示了一种使用 dplyr
包中的 group_by
和 summarize
来执行此操作的方法。摘要数据需要包含与主要数据匹配的列 resource
、fertilizer
和 time
。摘要数据还有包含其他 y
值的列。
然后,需要将主要数据和汇总数据分别提供给相应的 ggplot 函数,但不要在主 ggplot()
调用中提供。 facet_grid
可用于将数据分成四个图。
# Convert time to factor, specifying correct order of time points.
data.melt$time = factor(data.melt$time, levels=paste("t", seq(0, 10), sep=""))
# Create an auxilliary data.frame containing summary data.
# I've used standard deviation as place-holder for confidence intervals;
# I'll let you calculate those on your own.
summary_dat = data.melt %>%
group_by(resource, fertilizer, time) %>%
summarise(mean_predicted=mean(predict),
upper_ci=mean(predict) + sd(predict),
lower_ci=mean(predict) - sd(predict))
p = ggplot() +
theme_bw() +
geom_errorbar(data=summary_dat, aes(x=time, ymax=upper_ci, ymin=lower_ci),
width=0.3, size=0.7, colour="tomato") +
geom_point(data=data.melt, aes(x=time, y=value),
size=1.6, colour="grey20", alpha=0.5) +
geom_point(data=summary_dat, aes(x=time, y=mean_predicted),
size=3, shape=21, fill="tomato", colour="grey20") +
facet_grid(resource ~ fertilizer)
ggsave("plot.png", plot=p, height=4, width=6.5, units="in", dpi=150)