geom_point 和 geom_errorbar 使用 ggplot2 的多个数据帧
geom_point and geom_errorbar with multiple dataframes using ggplot2
我想使用 ggplot2 绘制两个每周平均时间序列(来自代表不同工具的两个不同数据帧)。这应该很简单,但我一定遗漏了一些东西。我查看了以下帖子:
using-both-geom-point-and-geom-line-for-multiple-x-in-ggplot2
object-not-found-error-with-ggplot2-when-adding-shape-aesthetic
和好老 cookbook for r 但我一直 运行 一个接一个地出错。我使用的数据帧来自使用 ddply 的总结,它们在这里是为了可重复性:
mean_TS_Cond_use<-
structure(list(week_DOY = c(207, 207, 230, 230, 237, 237, 237,
239, 239, 239, 246, 246, 246, 253, 253, 253, 260, 267, 267, 281,
281, 281, 288, 288, 288, 295, 295, 316, 316, 323, 323, 330, 330,
330, 337, 337), Leaf.age.ordered = structure(c(1L, 4L, 1L, 3L,
1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 3L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 1L, 2L, 3L, 2L, 3L
), .Label = c("young", "mature", "old", "old1"), class = "factor"),
week_N_Cond = c(7L, 2L, 7L, 2L, 4L, 6L, 3L, 6L, 2L, 10L,
3L, 6L, 7L, 2L, 5L, 4L, 1L, 3L, 1L, 3L, 3L, 6L, 4L, 11L,
2L, 5L, 4L, 4L, 6L, 2L, 3L, 6L, 20L, 7L, 6L, 2L), week_mean_Cond = c(46.675,
28, 38.125, 59.1, 23.5333333333333, 101.5, 58.1333333333333,
16.8, 35.5, 62.4, 31.4, 144, 49.3, 49.7, 55.6333333333333,
57.65, 7.3, 4.74, NaN, 69.4, 112.3, 80.35, 47.85, 21.6416666666667,
6.41, 70.3333333333333, 59.1, 41.6, 24.9666666666667, 64.3,
NaN, 39.1, 95.8909090909091, 44.7333333333333, 20.9733333333333,
40), week_sd_Cond = c(17.6941374471885, NA, 24.1760728820874,
17.1119841047145, 18.1934970067146, 86.4448379025607, 43.4743985965687,
NA, NA, NA, NA, 1.4142135623731, 9.61665222413704, NA, 30.8034630087809,
28.0721392131059, NA, 1.40007142674936, NA, 31.5912962697006,
23.0774781984514, 20.545478010177, 5.30330085889911, 13.7910353732657,
NA, 9.97513575513302, 1.69705627484771, 5.23259018078045,
6.02522475376092, NA, NA, 9.33380951166242, 59.2789584008602,
7.7693843599949, 20.8945957925329, 33.799704140717)), .Names = c("week_DOY",
"Leaf.age.ordered", "week_N_Cond", "week_mean_Cond", "week_sd_Cond"
), row.names = c(NA, -36L), class = "data.frame")
mean_TS_Gs_use<-structure(list(week_DOY = c(232, 232, 239, 239, 246, 246, 246,
267, 267, 267, 281, 316, 316, 316, 323, 323, 330, 330, 330, 337,
337), Leaf.age.ordered = structure(c(2L, 3L, 1L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 3L, 1L, 2L, 3L, 2L, 3L, 1L, 2L, 3L, 2L, 3L), .Label = c("young",
"mature", "old"), class = "factor"), week_N_GS = c(56L, 49L,
30L, 30L, 55L, 21L, 54L, 7L, 21L, 19L, 6L, 3L, 8L, 4L, 30L, 15L,
36L, 99L, 70L, 52L, 23L), week_mean_GS = c(73.2017857142857,
170.422448979592, 88.1133333333333, 66.4866666666667, 125.794545454545,
103.247619047619, 70.0981481481481, 154.414285714286, 258.757142857143,
114.073684210526, 254.15, 167.5, 175.8125, 136.25, 87.9866666666667,
46.46, 112.455555555556, 111.778787878788, 88.4242857142857,
169.346153846154, 160.895652173913), week_sd_GS = c(27.4044421818562,
112.736252423718, 30.7610561377961, 26.4143473727146, 98.1052296302704,
59.4644819959581, 43.7727299045695, 77.6537062556456, 84.1063943551771,
67.674177268777, 79.52214157076, 47.4155037935906, 45.4656365527071,
9.46449505608548, 58.2085118395473, 17.0402800111132, 33.7885563420893,
97.9779549056591, 76.6287028293478, 130.657736481864, 93.5849467220259
)), .Names = c("week_DOY", "Leaf.age.ordered", "week_N_GS", "week_mean_GS",
"week_sd_GS"), row.names = c(NA, -21L), class = "data.frame")
第一个数据帧 geom_point 和 geom_errorbar 的一切都是 groovy:
mGts<-ggplot(data=mean_TS_Cond_use, aes(x = week_DOY, y = week_mean_Cond, color=Leaf.age.ordered, ymax = week_mean_Cond + week_sd_Cond, ymin=week_mean_Cond - week_sd_Cond))+
geom_point(size=4) +
geom_errorbar()
mGts
我尝试像这样从新数据帧添加新时间序列:
mGts_situ<-mGts +
geom_point(aes(x = week_DOY, y = week_mean_GS, color=Leaf.age.ordered), data=mean_TS_Gs_use, size=4, shape=18) +
geom_errorbar(aes(ymax = week_mean_GS + week_sd_GS, ymin=week_mean_GS - week_sd_GS), data=mean_TS_Gs_use)
mGts_situ
但我收到一条错误消息“未找到对象 'week_mean_Cond'。”由于 ggplot 是第一个数据帧中对象的 'looking',因此我尝试摆脱继承的 aes 并在 aes 调用之前移动 'data=' 的定义。 (我还在 ggplot 调用和其他小改动之外定义了错误栏限制)。这是新的尝试:
Gs_upper<-mean_TS_Gs_use$week_mean_GS + mean_TS_Gs_use$week_sd_GS
Gs_lower<-mean_TS_Gs_use$week_mean_GS - mean_TS_Gs_use$week_sd_GS
mGts_situ<-mGts +
geom_point(data=mean_TS_Gs_use, inherit.aes = FALSE, aes(x = week_DOY, y = week_mean_GS, color=Leaf.age.ordered, ymax = Gs_upper, ymin = Gs_lower), size=4, shape=18) +
geom_errorbar()+
scale_x_continuous("DOY", limits = c(200, 350)) +
scale_y_continuous("Weekly Mean", limits = c(0, 345))+
theme_bw()
mGts_situ
这不会给出任何对象的错误,但它仍然不会显示新数据集的错误栏 ('mean_TS_Gs_use')。您可以看到第一个绘制的数据框(圆圈)有误差线,但第二个绘制的数据框(三角形)没有:
鱼与熊掌不可兼得inherit.aes
,你要么继承一切,要么指定一切。
在您的情况下,新数据的 ymin
和 ymax
具有不同的列名称,因此我们确实需要在新的 geom_errorbar
中设置 inherit.aes = F
] 层,但随后我们需要指定所有美学。
如果原剧情中的ymin
和ymax
只设置在geom_errorbar
层,而不是顶层,我们可以省去一点麻烦:
mGts <-
ggplot(
data = mean_TS_Cond_use,
aes(
x = week_DOY,
y = week_mean_Cond,
color = Leaf.age.ordered
)
) +
geom_point(size = 4) +
geom_errorbar(
# move these down here
aes(ymax = week_mean_Cond + week_sd_Cond,
ymin = week_mean_Cond - week_sd_Cond)
)
有了这个改变,新的 geom_point
层就可以了,但是我们将设置 inherit.aes = F
并重新指定 geom_errorbar
的美学:
mGts_situ <- mGts +
geom_point(
mapping = aes(
x = week_DOY,
y = week_mean_GS,
color = Leaf.age.ordered
),
data = mean_TS_Gs_use,
size = 4,
shape = 18
) +
geom_errorbar(
mapping = aes(
ymax = week_mean_GS + week_sd_GS,
ymin = week_mean_GS - week_sd_GS,
x = week_DOY,
color = Leaf.age.ordered
),
data = mean_TS_Gs_use,
inherit.aes = FALSE
)
mGts_situ
我认为如果我们结合两个数据框,这个图会更容易创建:
library(dplyr)
library(ggplot2)
重命名列,以便我们在两个数据框中使用通用名称。添加一个新列以区分源数据来自哪个数据框。然后合并两个数据框:
mean_TS_Cond_use = mean_TS_Cond_use %>%
rename(week_mean=week_mean_Cond, week_sd=week_sd_Cond) %>%
mutate(Source="Cond")
mean_TS_Gs_use = mean_TS_Gs_use %>%
rename(week_mean=week_mean_GS, week_sd=week_sd_GS) %>%
mutate(Source="Gs")
df = bind_rows(list(mean_TS_Cond_use, mean_TS_Gs_use))
重置 Leaf.age.ordered
的顺序:
df$Leaf.age.ordered = factor(df$Leaf.age.ordered, levels=c("young","mature","old","old1"))
将 week_DOY
转换为因子(因此闪避将正常工作):
df$week_DOY_f = factor(df$week_DOY, levels=min(df$week_DOY):max(df$week_DOY))
用闪避绘图以避免重叠。 group
美学是为了正确躲避:
pd = position_dodge(0.5)
ggplot(df, aes(x=week_DOY_f,
y=week_mean, colour=Source, fill=Source,
ymax=week_mean + week_sd, ymin=week_mean - week_sd)) +
geom_errorbar(position=pd, aes(group=interaction(Leaf.age.ordered, Source)),
width=0.1, alpha=0.5) +
geom_point(position=pd, aes(group=interaction(Leaf.age.ordered, Source),
size=Leaf.age.ordered),
pch=21, color="black", stroke=0.2) +
theme_bw() +
scale_size_discrete(range=c(1,3)) +
guides(size=guide_legend(override.aes=list(fill="grey30")))
情节仍然很丰富,但希望更容易阅读:
或者文本标签可能更适合区分年龄:
ggplot(df, aes(x=week_DOY_f,
y=week_mean, colour=Source,
ymax=week_mean + week_sd, ymin=week_mean - week_sd)) +
geom_errorbar(position=pd, aes(group=interaction(Leaf.age.ordered, Source)),
width=0.1, alpha=0.5) +
geom_label(position=pd, aes(label=toupper(substr(Leaf.age.ordered,1,1)),
group=interaction(Leaf.age.ordered, Source)),
fontface="bold", fill="white", label.size=0, size=2.5,
label.padding=unit(0.05,"lines"), show.legend=FALSE) +
theme_bw() +
guides(colour=guide_legend(override.aes=list(alpha=1,lwd=1)))
我想使用 ggplot2 绘制两个每周平均时间序列(来自代表不同工具的两个不同数据帧)。这应该很简单,但我一定遗漏了一些东西。我查看了以下帖子:
using-both-geom-point-and-geom-line-for-multiple-x-in-ggplot2 object-not-found-error-with-ggplot2-when-adding-shape-aesthetic
和好老 cookbook for r 但我一直 运行 一个接一个地出错。我使用的数据帧来自使用 ddply 的总结,它们在这里是为了可重复性:
mean_TS_Cond_use<-
structure(list(week_DOY = c(207, 207, 230, 230, 237, 237, 237,
239, 239, 239, 246, 246, 246, 253, 253, 253, 260, 267, 267, 281,
281, 281, 288, 288, 288, 295, 295, 316, 316, 323, 323, 330, 330,
330, 337, 337), Leaf.age.ordered = structure(c(1L, 4L, 1L, 3L,
1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 3L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 1L, 2L, 3L, 2L, 3L
), .Label = c("young", "mature", "old", "old1"), class = "factor"),
week_N_Cond = c(7L, 2L, 7L, 2L, 4L, 6L, 3L, 6L, 2L, 10L,
3L, 6L, 7L, 2L, 5L, 4L, 1L, 3L, 1L, 3L, 3L, 6L, 4L, 11L,
2L, 5L, 4L, 4L, 6L, 2L, 3L, 6L, 20L, 7L, 6L, 2L), week_mean_Cond = c(46.675,
28, 38.125, 59.1, 23.5333333333333, 101.5, 58.1333333333333,
16.8, 35.5, 62.4, 31.4, 144, 49.3, 49.7, 55.6333333333333,
57.65, 7.3, 4.74, NaN, 69.4, 112.3, 80.35, 47.85, 21.6416666666667,
6.41, 70.3333333333333, 59.1, 41.6, 24.9666666666667, 64.3,
NaN, 39.1, 95.8909090909091, 44.7333333333333, 20.9733333333333,
40), week_sd_Cond = c(17.6941374471885, NA, 24.1760728820874,
17.1119841047145, 18.1934970067146, 86.4448379025607, 43.4743985965687,
NA, NA, NA, NA, 1.4142135623731, 9.61665222413704, NA, 30.8034630087809,
28.0721392131059, NA, 1.40007142674936, NA, 31.5912962697006,
23.0774781984514, 20.545478010177, 5.30330085889911, 13.7910353732657,
NA, 9.97513575513302, 1.69705627484771, 5.23259018078045,
6.02522475376092, NA, NA, 9.33380951166242, 59.2789584008602,
7.7693843599949, 20.8945957925329, 33.799704140717)), .Names = c("week_DOY",
"Leaf.age.ordered", "week_N_Cond", "week_mean_Cond", "week_sd_Cond"
), row.names = c(NA, -36L), class = "data.frame")
mean_TS_Gs_use<-structure(list(week_DOY = c(232, 232, 239, 239, 246, 246, 246,
267, 267, 267, 281, 316, 316, 316, 323, 323, 330, 330, 330, 337,
337), Leaf.age.ordered = structure(c(2L, 3L, 1L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 3L, 1L, 2L, 3L, 2L, 3L, 1L, 2L, 3L, 2L, 3L), .Label = c("young",
"mature", "old"), class = "factor"), week_N_GS = c(56L, 49L,
30L, 30L, 55L, 21L, 54L, 7L, 21L, 19L, 6L, 3L, 8L, 4L, 30L, 15L,
36L, 99L, 70L, 52L, 23L), week_mean_GS = c(73.2017857142857,
170.422448979592, 88.1133333333333, 66.4866666666667, 125.794545454545,
103.247619047619, 70.0981481481481, 154.414285714286, 258.757142857143,
114.073684210526, 254.15, 167.5, 175.8125, 136.25, 87.9866666666667,
46.46, 112.455555555556, 111.778787878788, 88.4242857142857,
169.346153846154, 160.895652173913), week_sd_GS = c(27.4044421818562,
112.736252423718, 30.7610561377961, 26.4143473727146, 98.1052296302704,
59.4644819959581, 43.7727299045695, 77.6537062556456, 84.1063943551771,
67.674177268777, 79.52214157076, 47.4155037935906, 45.4656365527071,
9.46449505608548, 58.2085118395473, 17.0402800111132, 33.7885563420893,
97.9779549056591, 76.6287028293478, 130.657736481864, 93.5849467220259
)), .Names = c("week_DOY", "Leaf.age.ordered", "week_N_GS", "week_mean_GS",
"week_sd_GS"), row.names = c(NA, -21L), class = "data.frame")
第一个数据帧 geom_point 和 geom_errorbar 的一切都是 groovy:
mGts<-ggplot(data=mean_TS_Cond_use, aes(x = week_DOY, y = week_mean_Cond, color=Leaf.age.ordered, ymax = week_mean_Cond + week_sd_Cond, ymin=week_mean_Cond - week_sd_Cond))+
geom_point(size=4) +
geom_errorbar()
mGts
我尝试像这样从新数据帧添加新时间序列:
mGts_situ<-mGts +
geom_point(aes(x = week_DOY, y = week_mean_GS, color=Leaf.age.ordered), data=mean_TS_Gs_use, size=4, shape=18) +
geom_errorbar(aes(ymax = week_mean_GS + week_sd_GS, ymin=week_mean_GS - week_sd_GS), data=mean_TS_Gs_use)
mGts_situ
但我收到一条错误消息“未找到对象 'week_mean_Cond'。”由于 ggplot 是第一个数据帧中对象的 'looking',因此我尝试摆脱继承的 aes 并在 aes 调用之前移动 'data=' 的定义。 (我还在 ggplot 调用和其他小改动之外定义了错误栏限制)。这是新的尝试:
Gs_upper<-mean_TS_Gs_use$week_mean_GS + mean_TS_Gs_use$week_sd_GS
Gs_lower<-mean_TS_Gs_use$week_mean_GS - mean_TS_Gs_use$week_sd_GS
mGts_situ<-mGts +
geom_point(data=mean_TS_Gs_use, inherit.aes = FALSE, aes(x = week_DOY, y = week_mean_GS, color=Leaf.age.ordered, ymax = Gs_upper, ymin = Gs_lower), size=4, shape=18) +
geom_errorbar()+
scale_x_continuous("DOY", limits = c(200, 350)) +
scale_y_continuous("Weekly Mean", limits = c(0, 345))+
theme_bw()
mGts_situ
这不会给出任何对象的错误,但它仍然不会显示新数据集的错误栏 ('mean_TS_Gs_use')。您可以看到第一个绘制的数据框(圆圈)有误差线,但第二个绘制的数据框(三角形)没有:
鱼与熊掌不可兼得inherit.aes
,你要么继承一切,要么指定一切。
在您的情况下,新数据的 ymin
和 ymax
具有不同的列名称,因此我们确实需要在新的 geom_errorbar
中设置 inherit.aes = F
] 层,但随后我们需要指定所有美学。
如果原剧情中的ymin
和ymax
只设置在geom_errorbar
层,而不是顶层,我们可以省去一点麻烦:
mGts <-
ggplot(
data = mean_TS_Cond_use,
aes(
x = week_DOY,
y = week_mean_Cond,
color = Leaf.age.ordered
)
) +
geom_point(size = 4) +
geom_errorbar(
# move these down here
aes(ymax = week_mean_Cond + week_sd_Cond,
ymin = week_mean_Cond - week_sd_Cond)
)
有了这个改变,新的 geom_point
层就可以了,但是我们将设置 inherit.aes = F
并重新指定 geom_errorbar
的美学:
mGts_situ <- mGts +
geom_point(
mapping = aes(
x = week_DOY,
y = week_mean_GS,
color = Leaf.age.ordered
),
data = mean_TS_Gs_use,
size = 4,
shape = 18
) +
geom_errorbar(
mapping = aes(
ymax = week_mean_GS + week_sd_GS,
ymin = week_mean_GS - week_sd_GS,
x = week_DOY,
color = Leaf.age.ordered
),
data = mean_TS_Gs_use,
inherit.aes = FALSE
)
mGts_situ
我认为如果我们结合两个数据框,这个图会更容易创建:
library(dplyr)
library(ggplot2)
重命名列,以便我们在两个数据框中使用通用名称。添加一个新列以区分源数据来自哪个数据框。然后合并两个数据框:
mean_TS_Cond_use = mean_TS_Cond_use %>%
rename(week_mean=week_mean_Cond, week_sd=week_sd_Cond) %>%
mutate(Source="Cond")
mean_TS_Gs_use = mean_TS_Gs_use %>%
rename(week_mean=week_mean_GS, week_sd=week_sd_GS) %>%
mutate(Source="Gs")
df = bind_rows(list(mean_TS_Cond_use, mean_TS_Gs_use))
重置 Leaf.age.ordered
的顺序:
df$Leaf.age.ordered = factor(df$Leaf.age.ordered, levels=c("young","mature","old","old1"))
将 week_DOY
转换为因子(因此闪避将正常工作):
df$week_DOY_f = factor(df$week_DOY, levels=min(df$week_DOY):max(df$week_DOY))
用闪避绘图以避免重叠。 group
美学是为了正确躲避:
pd = position_dodge(0.5)
ggplot(df, aes(x=week_DOY_f,
y=week_mean, colour=Source, fill=Source,
ymax=week_mean + week_sd, ymin=week_mean - week_sd)) +
geom_errorbar(position=pd, aes(group=interaction(Leaf.age.ordered, Source)),
width=0.1, alpha=0.5) +
geom_point(position=pd, aes(group=interaction(Leaf.age.ordered, Source),
size=Leaf.age.ordered),
pch=21, color="black", stroke=0.2) +
theme_bw() +
scale_size_discrete(range=c(1,3)) +
guides(size=guide_legend(override.aes=list(fill="grey30")))
情节仍然很丰富,但希望更容易阅读:
或者文本标签可能更适合区分年龄:
ggplot(df, aes(x=week_DOY_f,
y=week_mean, colour=Source,
ymax=week_mean + week_sd, ymin=week_mean - week_sd)) +
geom_errorbar(position=pd, aes(group=interaction(Leaf.age.ordered, Source)),
width=0.1, alpha=0.5) +
geom_label(position=pd, aes(label=toupper(substr(Leaf.age.ordered,1,1)),
group=interaction(Leaf.age.ordered, Source)),
fontface="bold", fill="white", label.size=0, size=2.5,
label.padding=unit(0.05,"lines"), show.legend=FALSE) +
theme_bw() +
guides(colour=guide_legend(override.aes=list(alpha=1,lwd=1)))