根据数据集的条件设置颜色和框(箱线图)
Set color and boxes on conditions from data set (boxplot)
所以我有一个名为 Carbon 的数据集(Carbon$Graphite 确实包含一些在头部看不到的值):
Mesa TIC TOC Graphite TC
Kx V17 Ja 5.26 15.64 NA 20.90
Mu V17 Nej 4.08 11.32 NA 15.40
Ob V17 Nej 5.22 12.68 NA 17.90
Vä V17 Ja 6.45 6.35 NA 12.80
Ös V17 Nej 3.90 2.54 NA 6.44
Ig V17 Ja 8.20 3.20 NA 11.40
我想要一个显示 8 个框的箱线图,一个框只包含满足 Carbon$TIC[Carbon$Mesa=="Ja, ], one that equals Carbon$TIC[Carbon$Mesa=="Ja, ] 的值,一个等于 Carbon$TOC[Carbon$ Mesa=="Ja, ] and so fort. And the colour (fill) of the boxes is Carbon$Mesa so "Ja"=red box 和 "Nej"=blue box。我已经设法在不使用 ggpplot 的情况下做到这一点,但我需要用 ggplot 做到这一点(所以我所有的图表看起来都一样剩下的图表是用 ggplot 完成的。
我以前在没有 ggplot 的情况下制作的代码(这是我想要的,但像往常一样在 ggplot 的旁边有图例):
MesaJa <-Carbon[Carbon$Mesa=="Ja", ]
MesaNej <-Carbon[Carbon$Mesa=="Nej", ]
col.box<- c( rep("red", 3), rep("blue", 3))
boxplot( list(MesaJa [, "TIC"], MesaJa [, "TOC"], MesaJa [, "TC"],
MesaNej[, "TIC"], MesaNej[, "TOC"], MesaNej[, "TC"] ),
names=c("TIC", "TOC", "TC","TIC", "TOC", "TC") ,
col=col.box
)
legend("topleft", legend= c("Lime mud", "No lime mud"), pch=19, col=c("red","blue"), cex=0.7)
我尝试了几种不同的方法,但仍然无法正常工作。我得到的最接近的是:
Carbon$TIC_Ja <- ifelse(Carbon$Mesa=="Ja",Carbon$TIC, NA)
Carbon$TIC_Nej <- ifelse(Carbon$Mesa=="Nej",Carbon$TIC, NA)
Carbon$TOC_Ja <- ifelse(Carbon$Mesa=="Ja",Carbon$TOC, NA)
Carbon$TOC_Nej <- ifelse(Carbon$Mesa=="Nej",Carbon$TOC, NA)
Carbon$TC_Ja <- ifelse(Carbon$Mesa=="Ja",Carbon$TC, NA)
Carbon$TC_Nej <- ifelse(Carbon$Mesa=="Nej",Carbon$TC, NA)
Carbon.plot<-Carbon[ , c(1, 6:11)]
Carbon.key <- colnames(Carbon)
ggplot(
gather(Carbon.plot, key=Carbon.key, value="value", -"Mesa"),
aes(x=factor(Carbon.key), y=as.numeric(value), fill= Carbon.key)
) +
geom_boxplot() +
scale_fill_manual(values=c("red", "blue", "red", "blue", "red", "blue"),
labels=c("Lime mud added", "No lime mud")
)
但它仍然不好,因为我只想要图例中的上面两个 post 并且想删除 "NA"。而且我认为必须有一种不涉及使用 ifelse 对数据框进行排序的更简单的方法。我已经搜索过了,但到目前为止还没有看到任何类似的例子。那么,请帮忙?
编辑:添加了有关 df 和会话信息的信息。但是在 post 解决了这个问题之后,我开始更新了一些软件包,但是,事情并不顺利,所以目前我什至没有 ggplot 工作。
> str(Carbon)
'data.frame': 70 obs. of 5 variables:
$ Mesa : chr "Ja" "Nej" "Nej" "Ja" ...
$ TIC : num 5.26 4.08 5.22 6.45 3.9 ...
$ TOC : num 15.64 11.32 12.68 6.35 2.54 ...
$ Graphite: num NA NA NA NA NA NA NA NA NA NA ...
$ TC : num 20.9 15.4 17.9 12.8 6.44 11.4 12.9 21.6 11.8 15.3 ...
> dput(Carbon)
structure(list(Mesa = c("Ja", "Nej", "Nej", "Ja", "Nej", "Ja",
"Ja", "Ja", "Ja", "Ja", "Nej", "Nej", "Nej", "Ja", "Ja", "Nej",
"Nej", "Nej", "Nej", "Ja", "Ja", "Ja", "Ja", "Nej", "Nej", "Nej",
"Ja", "Ja", "Ja", "Nej", "Nej", "Nej", "Nej", "Ja", "Ja", "Ja",
"Ja", "Nej", "Nej", "Nej", "Ja", "Ja", "Ja", "Nej", "Nej", "Nej",
"Ja", "Nej", "Ja", "Ja", "Ja", "Ja", "Nej", "Nej", "Nej", "Ja",
"Ja", "Ja", "Nej", "Nej", "Nej", "Nej", "Ja", "Ja", "Ja", "Ja",
"Nej", "Nej", "Nej", "Ja"), TIC = c(5.26, 4.08, 5.22, 6.45, 3.9,
8.2, 10.67, 7.43, 9.55, 8.19, 7.83, 4.04, 2.66, 4.93, 7.41, 3.25,
4.47, 4.385, 3.48, 8.01, 9.49, 8.93, 6.03, 7.32, 3.84, 2.42,
5.01, 3.87, 7, 4.8, 5.64, 5.76, 5.69, 8.7, 10.2, 9.78, 6.1, 8.07,
4.33, 3.98, 7.39, 9.68, 9.67, 3.75, 5.07, 4.7, 4.86, 2.98, 8.05,
10.29, 9.99, 6.65, 8.85, 4.82, 3.84, 8.49, 3.86, 6.63, 3.49,
3.01, 4.83, 3.78, 8.95, 10.1, 8.15, 6.16, 8.15, 4.27, 3.96, 4.96
), TOC = c(15.64, 11.32, 12.68, 6.35, 2.54, 3.2, 2.23, 14.17,
2.25, 7.11, 2.37, 17.16, 36.14, 13.47, 5.29, 17.95, 14.63, 3.85,
6.31, 3.19, 2.81, 7.27, 23.07, 1.94, 26.19, 36.63, 23.19, 11.37,
5.1, 2.39, 18.46, 2.17, 2.45, 1.7, 4.2, 4.92, 20.2, 0.86, 20.67,
33.32, 5.11, 2.01, 0.53, 6.48, 29.51, 2.5, 9.41, 3.42, 3.04,
4, 4.1, 11.15, 1.94, 20.66, 31.73, 1.81, 16.39, 20.75, 6.61,
33.98, 2.48, 3.15, 1.65, 2.4, 12.63, 17.33, 0.99, 25.62, 38.63,
17.93), Graphite = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 0.13, NA, NA, NA, NA, NA, 0.07,
0.05, NA, 0.06, NA, 0.03, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, 0.07, 0.02, 0.08, 0.03, 0.33, NA, NA, NA, NA,
NA, 0.02, 0.13, NA, 0.05, 0.02, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA), TC = c(20.9, 15.4, 17.9, 12.8, 6.44, 11.4, 12.9,
21.6, 11.8, 15.3, 10.2, 21.2, 38.8, 18.4, 12.7, 21.2, 19.1, 8.235,
9.92, 11.2, 12.3, 16.2, 29.1, 9.26, 30.1, 39.1, 28.2, 15.3, 12.1,
7.22, 24.1, 7.93, 8.14, 10.4, 14.4, 14.7, 26.3, 8.93, 25, 37.3,
12.5, 11.7, 10.2, 10.3, 34.6, 7.28, 14.3, 6.73, 11.1, 14.3, 14.1,
17.8, 10.8, 25.5, 35.7, 10.3, 20.3, 27.4, 10.1, 37, 7.33, 6.93,
10.6, 12.5, 20.8, 23.5, 9.14, 29.9, 42.6, 22.9)), class = "data.frame", row.names = c("Kx V17",
"Mu V17", "Ob V17", "Vä V17", "Ös V17", "Ig V17", "Va V17", "Gä V17",
"Sk V17", "Fr V17", "Gr V17", "Bi V17", "As V17", "Kx H17", "Pi H17",
"Mu H17", "Ob H17", "Do H17", "Ös H17", "Ig H17", "Va H17", "Gä H17",
"Fr H17", "Gr H17", "Bi H17", "As H17", "So H17", "Kx V18", "Pi V18",
"Mu V18", "Ob V18", "Do V18", "Ös V18", "Ig V18", "Va V18", "Gä V18",
"Fr V18", "Gr V18", "Bi V18", "As V18", "So V18", "Kx H18", "Pi H18",
"Mu H18", "Ob H18", "Do H18", "Vä H18", "Ös H18", "Ig H18", "Va H18",
"Gä H18", "Fr H18", "Gr H18", "Bi H18", "As H18", "So H18", "Kx V19",
"Pi V19", "Mu V19", "Ob V19", "Do V19", "Ös V19", "Ig V19", "Va V19",
"Gä V19", "Fr V19", "Gr V19", "Bi V19", "As V19", "So V19"))
> sessionInfo()
R version 3.5.3 (2019-03-11)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 18362)
Matrix products: default
locale:
[1] LC_COLLATE=Swedish_Sweden.1252 LC_CTYPE=Swedish_Sweden.1252
[3] LC_MONETARY=Swedish_Sweden.1252 LC_NUMERIC=C
[5] LC_TIME=Swedish_Sweden.1252
attached base packages:
[1] stats graphics grDevices utils datasets methods base
loaded via a namespace (and not attached):
[1] crayon_1.3.4 grid_3.5.3 R6_2.4.1 lifecycle_0.2.0 gtable_0.3.0
[6] magrittr_1.5 scales_1.1.0 rlang_0.4.5 rstudioapi_0.11 ellipsis_0.3.0
[11] tools_3.5.3 glue_1.4.0 munsell_0.5.0 compiler_3.5.3 colorspace_1.4-1
这里一种可能的方法是使用例如 tidyr
中的 pivot_longer
函数将数据帧转换为更长的格式(pivot_longer
在 tidyr
[=16 中可用=]):
library(tidyr)
library(dplyr)
Carbon %>% pivot_longer(cols = c(TIC, TOC, TC), names_to = "var",values_to = "val") %>%
mutate(var = factor(var,levels = c("TIC","TOC","TC")))
# A tibble: 210 x 4
Mesa Graphite var val
<chr> <dbl> <fct> <dbl>
1 Ja NA TIC 5.26
2 Ja NA TOC 15.6
3 Ja NA TC 20.9
4 Nej NA TIC 4.08
5 Nej NA TOC 11.3
6 Nej NA TC 15.4
7 Nej NA TIC 5.22
8 Nej NA TOC 12.7
9 Nej NA TC 17.9
10 Ja NA TIC 6.45
# … with 200 more rows
然后,你可以使用interaction
将"Mesa"和"var"分组为x值并在ggplot2
中得到以下boxplot
:
library(tidyr)
library(dplyr)
library(ggplot)
Carbon %>% pivot_longer(cols = c(TIC, TOC, TC), names_to = "var",values_to = "val") %>%
mutate(var = factor(var,levels = c("TIC","TOC","TC"))) %>%
ggplot(aes(x = interaction(var, Mesa), y = val, fill = Mesa))+
geom_boxplot()+
scale_x_discrete(labels = rep(c("TIC","TOC","TC"),2))+
scale_fill_manual(values = c("red","blue"), labels = c("Lime mud", "No lime mud"))
它是否回答了您的问题?
所以我有一个名为 Carbon 的数据集(Carbon$Graphite 确实包含一些在头部看不到的值):
Mesa TIC TOC Graphite TC
Kx V17 Ja 5.26 15.64 NA 20.90
Mu V17 Nej 4.08 11.32 NA 15.40
Ob V17 Nej 5.22 12.68 NA 17.90
Vä V17 Ja 6.45 6.35 NA 12.80
Ös V17 Nej 3.90 2.54 NA 6.44
Ig V17 Ja 8.20 3.20 NA 11.40
我想要一个显示 8 个框的箱线图,一个框只包含满足 Carbon$TIC[Carbon$Mesa=="Ja, ], one that equals Carbon$TIC[Carbon$Mesa=="Ja, ] 的值,一个等于 Carbon$TOC[Carbon$ Mesa=="Ja, ] and so fort. And the colour (fill) of the boxes is Carbon$Mesa so "Ja"=red box 和 "Nej"=blue box。我已经设法在不使用 ggpplot 的情况下做到这一点,但我需要用 ggplot 做到这一点(所以我所有的图表看起来都一样剩下的图表是用 ggplot 完成的。
我以前在没有 ggplot 的情况下制作的代码(这是我想要的,但像往常一样在 ggplot 的旁边有图例):
MesaJa <-Carbon[Carbon$Mesa=="Ja", ]
MesaNej <-Carbon[Carbon$Mesa=="Nej", ]
col.box<- c( rep("red", 3), rep("blue", 3))
boxplot( list(MesaJa [, "TIC"], MesaJa [, "TOC"], MesaJa [, "TC"],
MesaNej[, "TIC"], MesaNej[, "TOC"], MesaNej[, "TC"] ),
names=c("TIC", "TOC", "TC","TIC", "TOC", "TC") ,
col=col.box
)
legend("topleft", legend= c("Lime mud", "No lime mud"), pch=19, col=c("red","blue"), cex=0.7)
我尝试了几种不同的方法,但仍然无法正常工作。我得到的最接近的是:
Carbon$TIC_Ja <- ifelse(Carbon$Mesa=="Ja",Carbon$TIC, NA)
Carbon$TIC_Nej <- ifelse(Carbon$Mesa=="Nej",Carbon$TIC, NA)
Carbon$TOC_Ja <- ifelse(Carbon$Mesa=="Ja",Carbon$TOC, NA)
Carbon$TOC_Nej <- ifelse(Carbon$Mesa=="Nej",Carbon$TOC, NA)
Carbon$TC_Ja <- ifelse(Carbon$Mesa=="Ja",Carbon$TC, NA)
Carbon$TC_Nej <- ifelse(Carbon$Mesa=="Nej",Carbon$TC, NA)
Carbon.plot<-Carbon[ , c(1, 6:11)]
Carbon.key <- colnames(Carbon)
ggplot(
gather(Carbon.plot, key=Carbon.key, value="value", -"Mesa"),
aes(x=factor(Carbon.key), y=as.numeric(value), fill= Carbon.key)
) +
geom_boxplot() +
scale_fill_manual(values=c("red", "blue", "red", "blue", "red", "blue"),
labels=c("Lime mud added", "No lime mud")
)
但它仍然不好,因为我只想要图例中的上面两个 post 并且想删除 "NA"。而且我认为必须有一种不涉及使用 ifelse 对数据框进行排序的更简单的方法。我已经搜索过了,但到目前为止还没有看到任何类似的例子。那么,请帮忙?
编辑:添加了有关 df 和会话信息的信息。但是在 post 解决了这个问题之后,我开始更新了一些软件包,但是,事情并不顺利,所以目前我什至没有 ggplot 工作。
> str(Carbon)
'data.frame': 70 obs. of 5 variables:
$ Mesa : chr "Ja" "Nej" "Nej" "Ja" ...
$ TIC : num 5.26 4.08 5.22 6.45 3.9 ...
$ TOC : num 15.64 11.32 12.68 6.35 2.54 ...
$ Graphite: num NA NA NA NA NA NA NA NA NA NA ...
$ TC : num 20.9 15.4 17.9 12.8 6.44 11.4 12.9 21.6 11.8 15.3 ...
> dput(Carbon)
structure(list(Mesa = c("Ja", "Nej", "Nej", "Ja", "Nej", "Ja",
"Ja", "Ja", "Ja", "Ja", "Nej", "Nej", "Nej", "Ja", "Ja", "Nej",
"Nej", "Nej", "Nej", "Ja", "Ja", "Ja", "Ja", "Nej", "Nej", "Nej",
"Ja", "Ja", "Ja", "Nej", "Nej", "Nej", "Nej", "Ja", "Ja", "Ja",
"Ja", "Nej", "Nej", "Nej", "Ja", "Ja", "Ja", "Nej", "Nej", "Nej",
"Ja", "Nej", "Ja", "Ja", "Ja", "Ja", "Nej", "Nej", "Nej", "Ja",
"Ja", "Ja", "Nej", "Nej", "Nej", "Nej", "Ja", "Ja", "Ja", "Ja",
"Nej", "Nej", "Nej", "Ja"), TIC = c(5.26, 4.08, 5.22, 6.45, 3.9,
8.2, 10.67, 7.43, 9.55, 8.19, 7.83, 4.04, 2.66, 4.93, 7.41, 3.25,
4.47, 4.385, 3.48, 8.01, 9.49, 8.93, 6.03, 7.32, 3.84, 2.42,
5.01, 3.87, 7, 4.8, 5.64, 5.76, 5.69, 8.7, 10.2, 9.78, 6.1, 8.07,
4.33, 3.98, 7.39, 9.68, 9.67, 3.75, 5.07, 4.7, 4.86, 2.98, 8.05,
10.29, 9.99, 6.65, 8.85, 4.82, 3.84, 8.49, 3.86, 6.63, 3.49,
3.01, 4.83, 3.78, 8.95, 10.1, 8.15, 6.16, 8.15, 4.27, 3.96, 4.96
), TOC = c(15.64, 11.32, 12.68, 6.35, 2.54, 3.2, 2.23, 14.17,
2.25, 7.11, 2.37, 17.16, 36.14, 13.47, 5.29, 17.95, 14.63, 3.85,
6.31, 3.19, 2.81, 7.27, 23.07, 1.94, 26.19, 36.63, 23.19, 11.37,
5.1, 2.39, 18.46, 2.17, 2.45, 1.7, 4.2, 4.92, 20.2, 0.86, 20.67,
33.32, 5.11, 2.01, 0.53, 6.48, 29.51, 2.5, 9.41, 3.42, 3.04,
4, 4.1, 11.15, 1.94, 20.66, 31.73, 1.81, 16.39, 20.75, 6.61,
33.98, 2.48, 3.15, 1.65, 2.4, 12.63, 17.33, 0.99, 25.62, 38.63,
17.93), Graphite = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 0.13, NA, NA, NA, NA, NA, 0.07,
0.05, NA, 0.06, NA, 0.03, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, 0.07, 0.02, 0.08, 0.03, 0.33, NA, NA, NA, NA,
NA, 0.02, 0.13, NA, 0.05, 0.02, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA), TC = c(20.9, 15.4, 17.9, 12.8, 6.44, 11.4, 12.9,
21.6, 11.8, 15.3, 10.2, 21.2, 38.8, 18.4, 12.7, 21.2, 19.1, 8.235,
9.92, 11.2, 12.3, 16.2, 29.1, 9.26, 30.1, 39.1, 28.2, 15.3, 12.1,
7.22, 24.1, 7.93, 8.14, 10.4, 14.4, 14.7, 26.3, 8.93, 25, 37.3,
12.5, 11.7, 10.2, 10.3, 34.6, 7.28, 14.3, 6.73, 11.1, 14.3, 14.1,
17.8, 10.8, 25.5, 35.7, 10.3, 20.3, 27.4, 10.1, 37, 7.33, 6.93,
10.6, 12.5, 20.8, 23.5, 9.14, 29.9, 42.6, 22.9)), class = "data.frame", row.names = c("Kx V17",
"Mu V17", "Ob V17", "Vä V17", "Ös V17", "Ig V17", "Va V17", "Gä V17",
"Sk V17", "Fr V17", "Gr V17", "Bi V17", "As V17", "Kx H17", "Pi H17",
"Mu H17", "Ob H17", "Do H17", "Ös H17", "Ig H17", "Va H17", "Gä H17",
"Fr H17", "Gr H17", "Bi H17", "As H17", "So H17", "Kx V18", "Pi V18",
"Mu V18", "Ob V18", "Do V18", "Ös V18", "Ig V18", "Va V18", "Gä V18",
"Fr V18", "Gr V18", "Bi V18", "As V18", "So V18", "Kx H18", "Pi H18",
"Mu H18", "Ob H18", "Do H18", "Vä H18", "Ös H18", "Ig H18", "Va H18",
"Gä H18", "Fr H18", "Gr H18", "Bi H18", "As H18", "So H18", "Kx V19",
"Pi V19", "Mu V19", "Ob V19", "Do V19", "Ös V19", "Ig V19", "Va V19",
"Gä V19", "Fr V19", "Gr V19", "Bi V19", "As V19", "So V19"))
> sessionInfo()
R version 3.5.3 (2019-03-11)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 18362)
Matrix products: default
locale:
[1] LC_COLLATE=Swedish_Sweden.1252 LC_CTYPE=Swedish_Sweden.1252
[3] LC_MONETARY=Swedish_Sweden.1252 LC_NUMERIC=C
[5] LC_TIME=Swedish_Sweden.1252
attached base packages:
[1] stats graphics grDevices utils datasets methods base
loaded via a namespace (and not attached):
[1] crayon_1.3.4 grid_3.5.3 R6_2.4.1 lifecycle_0.2.0 gtable_0.3.0
[6] magrittr_1.5 scales_1.1.0 rlang_0.4.5 rstudioapi_0.11 ellipsis_0.3.0
[11] tools_3.5.3 glue_1.4.0 munsell_0.5.0 compiler_3.5.3 colorspace_1.4-1
这里一种可能的方法是使用例如 tidyr
中的 pivot_longer
函数将数据帧转换为更长的格式(pivot_longer
在 tidyr
[=16 中可用=]):
library(tidyr)
library(dplyr)
Carbon %>% pivot_longer(cols = c(TIC, TOC, TC), names_to = "var",values_to = "val") %>%
mutate(var = factor(var,levels = c("TIC","TOC","TC")))
# A tibble: 210 x 4
Mesa Graphite var val
<chr> <dbl> <fct> <dbl>
1 Ja NA TIC 5.26
2 Ja NA TOC 15.6
3 Ja NA TC 20.9
4 Nej NA TIC 4.08
5 Nej NA TOC 11.3
6 Nej NA TC 15.4
7 Nej NA TIC 5.22
8 Nej NA TOC 12.7
9 Nej NA TC 17.9
10 Ja NA TIC 6.45
# … with 200 more rows
然后,你可以使用interaction
将"Mesa"和"var"分组为x值并在ggplot2
中得到以下boxplot
:
library(tidyr)
library(dplyr)
library(ggplot)
Carbon %>% pivot_longer(cols = c(TIC, TOC, TC), names_to = "var",values_to = "val") %>%
mutate(var = factor(var,levels = c("TIC","TOC","TC"))) %>%
ggplot(aes(x = interaction(var, Mesa), y = val, fill = Mesa))+
geom_boxplot()+
scale_x_discrete(labels = rep(c("TIC","TOC","TC"),2))+
scale_fill_manual(values = c("red","blue"), labels = c("Lime mud", "No lime mud"))
它是否回答了您的问题?