箱线图和点图可以来自 ggplot 中的两个不同数据吗?
Can the boxplot and the dotplot come from two different data in ggplot?
我希望我的箱形图包含单独的数据点,但随后我想从单独的数据集中提取这些点。
例如,如果我的数据框 ("df") 如下所示:
ID <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
Happiness <- c(2, 3, 10, 7, 6, 8, 3, 9, 5, 1)
Smoke <- c("yes", "yes", "no", "yes", "no", "no", "no", "no", "yes", "no")
Exercise <- c("no", "yes", "no", "yes", "yes", "yes", "yes", "no", "no", "yes")
其中 ID = 受试者 ID,幸福 = 幸福作为 1-10 等级的连续变量,吸烟和锻炼 = 是否 smoke/exercise,
的分类变量
我希望我的箱形图基于 'Smoke',但点图基于 'Exercise'。
所以,在 x 轴上,我有两个组,'smoker'、'non-smoker'(基于“烟雾”),在 Y 轴上,我有幸福。但是,箱形图上的点将指示此人是 'exerciser' 还是 'non-exerciser'(基于“练习”),由形状或颜色描绘。
我希望这是有道理的。
这是我的尝试,但输出结果与我想象的不太一样。
ggplot(df, aes(x=Smoke, y=Happiness, fill = Smoke)) +
geom_boxplot(position = position_dodge()) +
geom_dotplot(aes(shape=Exercise), binaxis='y', stackdir='center', dotsize=.5, position = position_dodge()) +
scale_shape_manual(values=c(3, 16))+
theme_classic() +
labs(title = "Happiness by Smoking/Exercise", y = "Happiness")
通常当我看到点覆盖箱线图时,我假设它们代表相同的东西(即箱线图显示分布,点显示每个单独的值)。如果您对吸烟和锻炼之间的相互作用感兴趣,那么绘制它可能更有意义,例如
library(tidyverse)
ID <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
Happiness <- c(2, 3, 10, 7, 6, 8, 3, 9, 5, 1)
Smoke <- c("yes", "yes", "no", "yes", "no", "no", "no", "no", "yes", "no")
Exercise <- c("no", "yes", "no", "yes", "yes", "yes", "yes", "no", "no", "yes")
df <- tibble("ID" = ID, "Happiness" = Happiness,
"Smoke" = Smoke, "Exercise" = Exercise)
df %>%
mutate(Smoke = ifelse(Smoke == "yes",
"Smoker",
"Non-Smoker"),
Exercise = ifelse(Exercise == "yes",
"Exercises",
"Doesn't Exercise"),
Interaction = factor(str_replace(interaction(Smoke, Exercise),
'\.', '\n'),
ordered=TRUE)) %>%
ggplot(aes(x= Interaction, y = Happiness)) +
geom_boxplot(aes(fill = Smoke)) +
geom_point(aes(shape = Exercise), size = 4) +
labs(title = "Happiness by Smoking/Exercise",
y = "Happiness") +
theme_classic(base_size = 16) +
theme(axis.title.x = element_blank())
编辑
在回答下面的评论时,这是使用类似数据制作雨云图的一种方法(需要比上面的 MRE 更多的数据点,否则图看起来很奇怪):
# Load libraries
library(tidyverse)
# Get data
ID <- seq(1:50)
Happiness <- sample(1:100, 50, replace = TRUE)
Smoke <- sample(c("yes", "no"), 50, replace = TRUE)
Exercise <- sample(c("yes", "no"), 50, replace = TRUE)
df <- tibble("ID" = ID, "Happiness" = Happiness,
"Smoke" = Smoke, "Exercise" = Exercise)
# Source Ben Marwick's code for Violin Plots
source("https://gist.githubusercontent.com/benmarwick/2a1bb0133ff568cbe28d/raw/fb53bd97121f7f9ce947837ef1a4c65a73bffb3f/geom_flat_violin.R")
# Raincloud plot theme
raincloud_theme = theme(
text = element_text(size = 14),
axis.title.x = element_text(size = 14),
axis.title.y = element_blank(),
axis.text = element_text(size = 14),
axis.text.y = element_text(vjust = 0.3),
legend.title=element_text(size=14),
legend.text=element_text(size=14),
legend.position = "right",
plot.title = element_text(lineheight=.8,
face="bold", size = 16),
panel.border = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
axis.line.x = element_line(colour = 'black',
size=0.5, linetype='solid'),
axis.line.y = element_line(colour = 'black',
size=0.5, linetype='solid'))
# Plot the thing
df %>%
mutate(Smoke = ifelse(Smoke == "yes",
"Smoker",
"Non-Smoker"),
Exercise = ifelse(Exercise == "yes",
"Exercises",
"Doesn't Exercise"),
Interaction = factor(str_replace(interaction(Smoke, Exercise),
'\.', '\n'),
ordered=TRUE)) %>%
ggplot(aes(x = Interaction, y = Happiness, fill = Smoke)) +
geom_flat_violin(position = position_nudge(x = .2, y = 0),
alpha = .8) +
geom_point(aes(shape = Exercise),
position = position_jitter(width = .05),
size = 2, alpha = 0.8) +
geom_boxplot(width = .1, outlier.shape = NA, alpha = 0.5) +
coord_flip(xlim=c(1.25,4.25)) +
labs(title = "Happiness by Smoking/Exercise",
y = "Happiness") +
scale_fill_discrete(guide = guide_legend(override.aes = list(shape = c(".", ".")))) +
scale_shape_discrete(guide = guide_legend(override.aes = list(size = 3))) +
theme_classic(base_size = 16) +
theme(axis.title.x = element_blank()) +
raincloud_theme
您可以使用 geom_point() 而不是 geom_dotplot()。
df <- data.frame("ID" = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10),
"Happiness" = c(2, 3, 10, 7, 6, 8, 3, 9, 5, 1),
"Smoke" = c("yes", "yes", "no", "yes", "no", "no", "no", "no", "yes", "no"),
"Exercise" = c("no", "yes", "no", "yes", "yes", "yes", "yes", "no", "no", "yes"))
ggplot(df, aes(x=Smoke, y=Happiness, fill = Smoke)) +
geom_boxplot(position = position_dodge()) +
theme_classic() +
labs(title = "Happiness by Smoking/Exercise", y = "Happiness") +
geom_point(aes(shape = Exercise, colour = Exercise), position = position_dodge(width = 0.5)) +
scale_shape_manual(values=c(17, 16)) +
scale_color_manual(values = c("black", "blue"))
您可以通过更改position_dodge(width = )中的数字来调整练习点是排列还是分开。 0 将使他们排队。该图令人困惑,因为它在一个箱形图中包含两种不同的信息(练习)。我同意其他 post 的观点,每个组合最好有一个单独的箱线图。
我希望我的箱形图包含单独的数据点,但随后我想从单独的数据集中提取这些点。
例如,如果我的数据框 ("df") 如下所示:
ID <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
Happiness <- c(2, 3, 10, 7, 6, 8, 3, 9, 5, 1)
Smoke <- c("yes", "yes", "no", "yes", "no", "no", "no", "no", "yes", "no")
Exercise <- c("no", "yes", "no", "yes", "yes", "yes", "yes", "no", "no", "yes")
其中 ID = 受试者 ID,幸福 = 幸福作为 1-10 等级的连续变量,吸烟和锻炼 = 是否 smoke/exercise,
的分类变量我希望我的箱形图基于 'Smoke',但点图基于 'Exercise'。
所以,在 x 轴上,我有两个组,'smoker'、'non-smoker'(基于“烟雾”),在 Y 轴上,我有幸福。但是,箱形图上的点将指示此人是 'exerciser' 还是 'non-exerciser'(基于“练习”),由形状或颜色描绘。
我希望这是有道理的。
这是我的尝试,但输出结果与我想象的不太一样。
ggplot(df, aes(x=Smoke, y=Happiness, fill = Smoke)) +
geom_boxplot(position = position_dodge()) +
geom_dotplot(aes(shape=Exercise), binaxis='y', stackdir='center', dotsize=.5, position = position_dodge()) +
scale_shape_manual(values=c(3, 16))+
theme_classic() +
labs(title = "Happiness by Smoking/Exercise", y = "Happiness")
通常当我看到点覆盖箱线图时,我假设它们代表相同的东西(即箱线图显示分布,点显示每个单独的值)。如果您对吸烟和锻炼之间的相互作用感兴趣,那么绘制它可能更有意义,例如
library(tidyverse)
ID <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
Happiness <- c(2, 3, 10, 7, 6, 8, 3, 9, 5, 1)
Smoke <- c("yes", "yes", "no", "yes", "no", "no", "no", "no", "yes", "no")
Exercise <- c("no", "yes", "no", "yes", "yes", "yes", "yes", "no", "no", "yes")
df <- tibble("ID" = ID, "Happiness" = Happiness,
"Smoke" = Smoke, "Exercise" = Exercise)
df %>%
mutate(Smoke = ifelse(Smoke == "yes",
"Smoker",
"Non-Smoker"),
Exercise = ifelse(Exercise == "yes",
"Exercises",
"Doesn't Exercise"),
Interaction = factor(str_replace(interaction(Smoke, Exercise),
'\.', '\n'),
ordered=TRUE)) %>%
ggplot(aes(x= Interaction, y = Happiness)) +
geom_boxplot(aes(fill = Smoke)) +
geom_point(aes(shape = Exercise), size = 4) +
labs(title = "Happiness by Smoking/Exercise",
y = "Happiness") +
theme_classic(base_size = 16) +
theme(axis.title.x = element_blank())
编辑
在回答下面的评论时,这是使用类似数据制作雨云图的一种方法(需要比上面的 MRE 更多的数据点,否则图看起来很奇怪):
# Load libraries
library(tidyverse)
# Get data
ID <- seq(1:50)
Happiness <- sample(1:100, 50, replace = TRUE)
Smoke <- sample(c("yes", "no"), 50, replace = TRUE)
Exercise <- sample(c("yes", "no"), 50, replace = TRUE)
df <- tibble("ID" = ID, "Happiness" = Happiness,
"Smoke" = Smoke, "Exercise" = Exercise)
# Source Ben Marwick's code for Violin Plots
source("https://gist.githubusercontent.com/benmarwick/2a1bb0133ff568cbe28d/raw/fb53bd97121f7f9ce947837ef1a4c65a73bffb3f/geom_flat_violin.R")
# Raincloud plot theme
raincloud_theme = theme(
text = element_text(size = 14),
axis.title.x = element_text(size = 14),
axis.title.y = element_blank(),
axis.text = element_text(size = 14),
axis.text.y = element_text(vjust = 0.3),
legend.title=element_text(size=14),
legend.text=element_text(size=14),
legend.position = "right",
plot.title = element_text(lineheight=.8,
face="bold", size = 16),
panel.border = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
axis.line.x = element_line(colour = 'black',
size=0.5, linetype='solid'),
axis.line.y = element_line(colour = 'black',
size=0.5, linetype='solid'))
# Plot the thing
df %>%
mutate(Smoke = ifelse(Smoke == "yes",
"Smoker",
"Non-Smoker"),
Exercise = ifelse(Exercise == "yes",
"Exercises",
"Doesn't Exercise"),
Interaction = factor(str_replace(interaction(Smoke, Exercise),
'\.', '\n'),
ordered=TRUE)) %>%
ggplot(aes(x = Interaction, y = Happiness, fill = Smoke)) +
geom_flat_violin(position = position_nudge(x = .2, y = 0),
alpha = .8) +
geom_point(aes(shape = Exercise),
position = position_jitter(width = .05),
size = 2, alpha = 0.8) +
geom_boxplot(width = .1, outlier.shape = NA, alpha = 0.5) +
coord_flip(xlim=c(1.25,4.25)) +
labs(title = "Happiness by Smoking/Exercise",
y = "Happiness") +
scale_fill_discrete(guide = guide_legend(override.aes = list(shape = c(".", ".")))) +
scale_shape_discrete(guide = guide_legend(override.aes = list(size = 3))) +
theme_classic(base_size = 16) +
theme(axis.title.x = element_blank()) +
raincloud_theme
您可以使用 geom_point() 而不是 geom_dotplot()。
df <- data.frame("ID" = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10),
"Happiness" = c(2, 3, 10, 7, 6, 8, 3, 9, 5, 1),
"Smoke" = c("yes", "yes", "no", "yes", "no", "no", "no", "no", "yes", "no"),
"Exercise" = c("no", "yes", "no", "yes", "yes", "yes", "yes", "no", "no", "yes"))
ggplot(df, aes(x=Smoke, y=Happiness, fill = Smoke)) +
geom_boxplot(position = position_dodge()) +
theme_classic() +
labs(title = "Happiness by Smoking/Exercise", y = "Happiness") +
geom_point(aes(shape = Exercise, colour = Exercise), position = position_dodge(width = 0.5)) +
scale_shape_manual(values=c(17, 16)) +
scale_color_manual(values = c("black", "blue"))
您可以通过更改position_dodge(width = )中的数字来调整练习点是排列还是分开。 0 将使他们排队。该图令人困惑,因为它在一个箱形图中包含两种不同的信息(练习)。我同意其他 post 的观点,每个组合最好有一个单独的箱线图。