如何在 R 中使用 facet_wrap 并排绘制箱线图?

How to draw side by side boxplot using facet_wrap in R?

我正在寻找一种解决方案,可以在 R 中使用 facet_wrap 并排绘制 boxplot。虽然有很多好的解决方案,但是,我没有遇到任何我想要的。我决定画一张我想看到的 plot 我的两个 data.frame 的照片。 Data.frame C 有我的 校准 四种不同计量模型(即 KGE、NSE、PBIAS 和 R-Sq)的数据) 而 Data.frame V 有我的 validation 数据。我想使用 ggplot2 功能的 facet_wrap 查看每个指标的单独 plot。以下是我到目前为止所做的,但它并没有让我更接近。

graphics.off()
rm(list = ls())

library(tidyverse)

C = data.frame(KGE_M1 = runif(3, 0, 0.5), NSE_M1 = runif(3,0,0.5), R_Sq_M1 = runif(3,-1,0.3), PBIAS_M1 = runif(3, -0.25, 0.25),
               KGE_M2 = runif(3, 0.2, 0.7), NSE_M2 = runif(3,0.2,0.7), R_Sq_M2 = runif(3,-0.5,0.7), PBIAS_M2 = runif(3, -0.15, 0.15),
               KGE_M3 = runif(3, 0.3, 0.8), NSE_M3 = runif(3,0.3,0.8), R_Sq_M3 = runif(3,0.3,0.8), PBIAS_M3 = runif(3, -0.10, 0.10),
               KGE_M4 = runif(3, 0.5, 1), NSE_M4 = runif(3,0.5,1), R_Sq_M4 = runif(3,0.5,1), PBIAS_M4 = runif(3, -0.05, 0.05),
               Cal = rep("Calibration", 3))

V = data.frame(KGE_M1 = runif(3, 0, 0.5), NSE_M1 = runif(3,0,0.5), R_Sq_M1 = runif(3,-1,0.3), PBIAS_M1 = runif(3, -0.25, 0.25),
               KGE_M2 = runif(3, 0.2, 0.7), NSE_M2 = runif(3,0.2,0.7), R_Sq_M2 = runif(3,-0.5,0.7), PBIAS_M2 = runif(3, -0.15, 0.15),
               KGE_M3 = runif(3, 0.3, 0.8), NSE_M3 = runif(3,0.3,0.8), R_Sq_M3 = runif(3,0.3,0.8), PBIAS_M3 = runif(3, -0.10, 0.10),
               KGE_M4 = runif(3, 0.5, 1), NSE_M4 = runif(3,0.5,1), R_Sq_M4 = runif(3,0.5,1), PBIAS_M4 = runif(3, -0.05, 0.05),
               Val = rep("Validation", 3))

C = gather(C, key = "Variable", value = "Value", -Cal)
V = gather(V, key = "Variable", value = "Value", -Val)

ggplot(data = C)+
  geom_boxplot(aes(x= Variable, y = Value))
  + facet_wrap(~Variable)

我想看下面这样的情节

我认为您需要在绘图之前拆分 Variable,以便为 M1、M2、M3 M4 设置一个变量,并为您的条件设置一个变量:

library(tidyverse)
C2 <- C %>% pivot_longer(., -Cal, names_to = "Variable", values_to = "Value") %>%
  group_by(Variable) %>%
  mutate(Variable2 = unlist(strsplit(Variable, "_M"))[2]) %>%
  mutate(Variable2 = paste0("Cal_M",Variable2)) %>%
  mutate(Variable1 = unlist(strsplit(Variable,"_M"))[1])  %>%
  rename(., Type = Cal)

# A tibble: 6 x 5
# Groups:   Variable [6]
  Type        Variable  Value Variable2 Variable1
  <fct>       <chr>     <dbl> <chr>     <chr>    
1 Calibration KGE_M1    0.246 Cal_M1    KGE      
2 Calibration NSE_M1    0.476 Cal_M1    NSE      
3 Calibration R_Sq_M1  -0.978 Cal_M1    R_Sq     
4 Calibration PBIAS_M1  0.117 Cal_M1    PBIAS    
5 Calibration KGE_M2    0.544 Cal_M2    KGE      
6 Calibration NSE_M2    0.270 Cal_M2    NSE   

现在,我们对数据集做同样的事情V

V2 <- V %>% pivot_longer(., -Val, names_to = "Variable", values_to = "Value") %>%
  group_by(Variable) %>%
  mutate(Variable2 = unlist(strsplit(Variable, "_M"))[2]) %>%
  mutate(Variable2 = paste0("Val_M",Variable2)) %>%
  mutate(Variable1 = unlist(strsplit(Variable,"_M"))[1]) %>%
  rename(., Type = Val)

# A tibble: 6 x 5
# Groups:   Variable [6]
  Type       Variable   Value Variable2 Variable1
  <fct>      <chr>      <dbl> <chr>     <chr>    
1 Validation KGE_M1    0.459  Val_M1    KGE      
2 Validation NSE_M1    0.105  Val_M1    NSE      
3 Validation R_Sq_M1  -0.435  Val_M1    R_Sq     
4 Validation PBIAS_M1  0.0281 Val_M1    PBIAS    
5 Validation KGE_M2    0.625  Val_M2    KGE      
6 Validation NSE_M2    0.332  Val_M2    NSE    

我们现在可以将它们绑定在一起:

DF <- rbind(C2,V2)

然后,我们可以绘制:

ggplot(DF, aes(x = Variable2, y = Value))+
  geom_boxplot()+
  facet_wrap(.~Variable1, scales = "free")+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

编辑:重命名 x 轴,添加空列以分隔校准值和验证值

要在校准和验证之间添加一个空 space,您可以简单地为 Variable1 的每个条件添加空行,如下所示:

DF <- as.data.frame(DF) %>% add_row(Type = rep("Empty",4),
                     Variable = rep("Empty",4),
                     Value = rep(NA,4),
                     Variable2 = rep("Empty",4),
                     Variable1 = unique(DF$Variable1))

此外,如果要重命名 x 轴标签,可以使用 scale_x_discrete

ggplot(DF, aes(x = Variable2, y = Value, fill = Type))+
  geom_boxplot()+
  facet_wrap(.~Variable1, scales = "free")+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  scale_x_discrete(labels = c("M1","M2","M3","M4","","M1","M2","M3","M4"))

是否符合您的预期?

所以这里有一种方法可以用来完成所需的工作;

首先我们创建您拥有的数据;

library(tidyverse)

# Creating first dataframe
C <- 
  data.frame(
    KGE_M1 = runif(3, 0, 0.5), 
    NSE_M1 = runif(3,0,0.5), 
    R_Sq_M1 = runif(3,-1,0.3), 
    PBIAS_M1 = runif(3, -0.25, 0.25),
    KGE_M2 = runif(3, 0.2, 0.7), 
    NSE_M2 = runif(3,0.2,0.7), 
    R_Sq_M2 = runif(3,-0.5,0.7), 
    PBIAS_M2 = runif(3, -0.15, 0.15),
    KGE_M3 = runif(3, 0.3, 0.8), 
    NSE_M3 = runif(3,0.3,0.8), 
    R_Sq_M3 = runif(3,0.3,0.8), 
    PBIAS_M3 = runif(3, -0.10, 0.10),
    KGE_M4 = runif(3, 0.5, 1), 
    NSE_M4 = runif(3,0.5,1), 
    R_Sq_M4 = runif(3,0.5,1), 
    PBIAS_M4 = runif(3, -0.05, 0.05),
    Cal = rep("Calibration", 3),
    stringsAsFactors = FALSE)

# Creating second dataframe
V <- 
  data.frame(
    KGE_M1 = runif(3, 0, 0.5), 
    NSE_M1 = runif(3,0,0.5), 
    R_Sq_M1 = runif(3,-1,0.3), 
    PBIAS_M1 = runif(3, -0.25, 0.25),
    KGE_M2 = runif(3, 0.2, 0.7), 
    NSE_M2 = runif(3,0.2,0.7), 
    R_Sq_M2 = runif(3,-0.5,0.7), 
    PBIAS_M2 = runif(3, -0.15, 0.15),
    KGE_M3 = runif(3, 0.3, 0.8), 
    NSE_M3 = runif(3,0.3,0.8), 
    R_Sq_M3 = runif(3,0.3,0.8), 
    PBIAS_M3 = runif(3, -0.10, 0.10),
    KGE_M4 = runif(3, 0.5, 1), 
    NSE_M4 = runif(3,0.5,1), 
    R_Sq_M4 = runif(3,0.5,1), 
    PBIAS_M4 = runif(3, -0.05, 0.05),
    Val = rep("Validation", 3),
    stringsAsFactors = FALSE)

现在我们更改数据格式并将其可视化;

# Rename the variable to make it same
C <- rename(C, Identifier = Cal)
V <- rename(V, Identifier = Val)

data <- 
  # First we bind the two datasets
  bind_rows(C, V) %>%
  # We convert from wide format to long format
  gather(key = "Variable", value = "Value", -Identifier) %>%
  # We separate Variable into 2 columns at the last underscore
  separate(Variable, into = c("Variable", "Number"), sep = "_(?=[^_]+$)")

data %>%
  ggplot()+
  geom_boxplot(aes(x = Number, y = Value, 
                   group  = interaction(Identifier, Number), fill = Identifier)) + 
  facet_wrap(~Variable)