使用 ggplot2 绘制箱形图时重新排序 x 轴的问题
Issue for reordering x axes when plotting boxplot with ggplot2
我正在绘制一个箱线图来表示天气预报模型中的降雨预报质量。 x-axis 是预报时间(天),y-axis 是预报结果的集合分布。蓝框是后报(过去20年re-forecast),红框是预测数据
# library
library(ggplot2)
library(readr)
library(forcats)
model_name <- "ecmwf"
hens <- 11
fens <- 51
ys <- 1999
ye <- 2017
# Observation
clim_obs <- as.factor(rep("clim_obs",20))
pcp_obs <- c(80.9737,229.319,111.603,24.0906,53.037,165.04,28.6957,120.151,387.85,155.383,434.328,184.369,169.443,176.654,14.1557,223.796,105.595,56.6908,89.8277,74.0017)
pcp_obs <- as.vector(t(pcp_obs))
obs = data.frame(clim_obs, pcp_obs)
# create a data frame for forecast/hindcast results
lead_time <- factor(rep(seq(1,40),each=hens*(ye-ys+1)+fens),ordered = TRUE,levels = c(seq(1,40)))
Groups <- factor(rep(c("hindcast","forecast"),c(hens*(ye-ys+1),fens)), ordered = TRUE, levels = c("hindcast","forecast"))
pcp <- read_csv(paste(model_name, "_hind_fcst.csv", sep=""))
pcp <- as.vector(t(pcp))
data = data.frame(lead_time, Groups, pcp)
str(data)
# grouped boxplot
p <- ggplot() +
varwidth = FALSE) +
# geom_boxplot(data=obs, aes(x=clim_obs, y=pcp_obs), alpha = 0.7, outlier.shape = NA, varwidth = FALSE) +
geom_boxplot(data=data, aes(x=fct_relevel(lead_time), y=pcp, fill=Groups), alpha = 0.7, outlier.shape = NA, varwidth = FALSE) +
labs(x = 'Lead time (day)',
y = '15-day accumulative rainfall',
title = '(c) ECMWF') +
theme_classic() +
theme(legend.position = 'bottom', aspect.ratio = 0.35,
axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=0.5)) +
scale_y_continuous(breaks=seq(0,600,50), minor_breaks = seq(0,600,by=10),limits=c(0,600)) +
scale_fill_manual(values=c("deepskyblue" , "coral1")) +
geom_vline(xintercept = seq(0.5,40.5,by=7), #linetype="dotted",
color = "gray", size=0.25) +
ggsave(paste(model_name, "_hind_fcst.pdf", sep=""))
结果图在这里:
图的最后还有一个白色的方框,是用来对比的观测数据。因此,我添加
geom_boxplot(data=obs, aes(x=clim_obs, y=pcp_obs), alpha = 0.7, outlier.shape = NA, varwidth = FALSE) +
但是预测时间的顺序是错误的。修改后的图显示 x-axis 是按字母顺序排列的(即 1, 10, 11, 12, ..., 2, 21, 22, ... clim_obs),但我希望可以数字顺序(即 1、2、3、4、5、...、clim_obs)
我该如何解决这个问题?
生成数据的文件在这里:link
感谢您抽出宝贵时间!
你可以用scale_x_discrete
来排列x轴:
library(ggplot2)
ggplot() +
geom_boxplot(data=obs, aes(x=clim_obs, y=pcp_obs), alpha = 0.7, outlier.shape = NA, varwidth = FALSE) +
geom_boxplot(data=data, aes(x=lead_time, y=pcp, fill=Groups), alpha = 0.7, outlier.shape = NA, varwidth = FALSE) +
labs(x = 'Lead time (day)',
y = '15-day accumulative rainfall',
title = '(c) ECMWF') +
theme_classic() +
theme(legend.position = 'bottom', aspect.ratio = 0.35,
axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=0.5)) +
scale_y_continuous(breaks=seq(0,600,50), minor_breaks = seq(0,600,by=10),limits=c(0,600)) +
scale_fill_manual(values=c("deepskyblue" , "coral1")) +
geom_vline(xintercept = seq(0.5,40.5,by=7), #linetype="dotted",
color = "gray", size=0.25) +
scale_x_discrete(limits=c(1:40, 'clim_obs'))
我正在绘制一个箱线图来表示天气预报模型中的降雨预报质量。 x-axis 是预报时间(天),y-axis 是预报结果的集合分布。蓝框是后报(过去20年re-forecast),红框是预测数据
# library
library(ggplot2)
library(readr)
library(forcats)
model_name <- "ecmwf"
hens <- 11
fens <- 51
ys <- 1999
ye <- 2017
# Observation
clim_obs <- as.factor(rep("clim_obs",20))
pcp_obs <- c(80.9737,229.319,111.603,24.0906,53.037,165.04,28.6957,120.151,387.85,155.383,434.328,184.369,169.443,176.654,14.1557,223.796,105.595,56.6908,89.8277,74.0017)
pcp_obs <- as.vector(t(pcp_obs))
obs = data.frame(clim_obs, pcp_obs)
# create a data frame for forecast/hindcast results
lead_time <- factor(rep(seq(1,40),each=hens*(ye-ys+1)+fens),ordered = TRUE,levels = c(seq(1,40)))
Groups <- factor(rep(c("hindcast","forecast"),c(hens*(ye-ys+1),fens)), ordered = TRUE, levels = c("hindcast","forecast"))
pcp <- read_csv(paste(model_name, "_hind_fcst.csv", sep=""))
pcp <- as.vector(t(pcp))
data = data.frame(lead_time, Groups, pcp)
str(data)
# grouped boxplot
p <- ggplot() +
varwidth = FALSE) +
# geom_boxplot(data=obs, aes(x=clim_obs, y=pcp_obs), alpha = 0.7, outlier.shape = NA, varwidth = FALSE) +
geom_boxplot(data=data, aes(x=fct_relevel(lead_time), y=pcp, fill=Groups), alpha = 0.7, outlier.shape = NA, varwidth = FALSE) +
labs(x = 'Lead time (day)',
y = '15-day accumulative rainfall',
title = '(c) ECMWF') +
theme_classic() +
theme(legend.position = 'bottom', aspect.ratio = 0.35,
axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=0.5)) +
scale_y_continuous(breaks=seq(0,600,50), minor_breaks = seq(0,600,by=10),limits=c(0,600)) +
scale_fill_manual(values=c("deepskyblue" , "coral1")) +
geom_vline(xintercept = seq(0.5,40.5,by=7), #linetype="dotted",
color = "gray", size=0.25) +
ggsave(paste(model_name, "_hind_fcst.pdf", sep=""))
结果图在这里:
图的最后还有一个白色的方框,是用来对比的观测数据。因此,我添加
geom_boxplot(data=obs, aes(x=clim_obs, y=pcp_obs), alpha = 0.7, outlier.shape = NA, varwidth = FALSE) +
但是预测时间的顺序是错误的。修改后的图显示 x-axis 是按字母顺序排列的(即 1, 10, 11, 12, ..., 2, 21, 22, ... clim_obs),但我希望可以数字顺序(即 1、2、3、4、5、...、clim_obs)
我该如何解决这个问题?
生成数据的文件在这里:link
感谢您抽出宝贵时间!
你可以用scale_x_discrete
来排列x轴:
library(ggplot2)
ggplot() +
geom_boxplot(data=obs, aes(x=clim_obs, y=pcp_obs), alpha = 0.7, outlier.shape = NA, varwidth = FALSE) +
geom_boxplot(data=data, aes(x=lead_time, y=pcp, fill=Groups), alpha = 0.7, outlier.shape = NA, varwidth = FALSE) +
labs(x = 'Lead time (day)',
y = '15-day accumulative rainfall',
title = '(c) ECMWF') +
theme_classic() +
theme(legend.position = 'bottom', aspect.ratio = 0.35,
axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=0.5)) +
scale_y_continuous(breaks=seq(0,600,50), minor_breaks = seq(0,600,by=10),limits=c(0,600)) +
scale_fill_manual(values=c("deepskyblue" , "coral1")) +
geom_vline(xintercept = seq(0.5,40.5,by=7), #linetype="dotted",
color = "gray", size=0.25) +
scale_x_discrete(limits=c(1:40, 'clim_obs'))