在 R 中重现此图
Reproducing this plot in R
我有两组病人。我想为两组患者画图
类似这样的情节
我有这样一个数据
> dput(df)
structure(list(gene = c("18q", "4q", "21p", "21q", "5q", "22q",
"17p", "3p", "9p", "4p", "9q", "19q", "10q", "15q", "16p", "19p",
"1p", "18p", "16q", "8p", "21q", "4q", "18q", "21p", "1p", "3p",
"4p", "17p", "5q", "16q", "18p", "14q", "19p", "20q"), CNV = c("Deletion",
"Deletion", "Deletion", "Deletion", "Deletion", "Deletion", "Deletion",
"Deletion", "Deletion", "Deletion", "Deletion", "Deletion", "Deletion",
"Deletion", "Deletion", "Deletion", "Deletion", "Deletion", "Deletion",
"Deletion", "Deletion", "Deletion", "Deletion", "Deletion", "Deletion",
"Deletion", "Deletion", "Deletion", "Deletion", "Deletion", "Deletion",
"Deletion", "Deletion", "Amplification"), log10_pvalue = c(5.974694135,
5.73754891, 4.995678626, 4.970616222, 4.793174124, 4.793174124,
4.109020403, 3.524328812, 3.524328812, 2.823908741, 2.567030709,
2.186419011, 1.769551079, 1.59345982, 1.59345982, 1.59345982,
1.416801226, 1.195860568, 1.094743951, 1.087777943, 4.083019953,
3.826813732, 3.826813732, 3.826813732, 2.675717545, 2.675717545,
2.675717545, 2.342944147, 2.084072788, 1.850780887, 1.659555885,
1.197226275, 1.197226275, 1.88941029), Percentage_altered = c(0.61,
0.53, 0.61, 0.56, 0.44, 0.5, 0.5, 0.44, 0.5, 0.47, 0.39, 0.28,
0.33, 0.31, 0.33, 0.31, 0.22, 0.36, 0.33, 0.33, 0.63, 0.52, 0.59,
0.67, 0.26, 0.44, 0.52, 0.48, 0.33, 0.44, 0.44, 0.3, 0.33, 0.5
), group = c("Non-responders", "Non-responders", "Non-responders",
"Non-responders", "Non-responders", "Non-responders", "Non-responders",
"Non-responders", "Non-responders", "Non-responders", "Non-responders",
"Non-responders", "Non-responders", "Non-responders", "Non-responders",
"Non-responders", "Non-responders", "Non-responders", "Non-responders",
"Non-responders", "Responders", "Responders", "Responders", "Responders",
"Responders", "Responders", "Responders", "Responders", "Responders",
"Responders", "Responders", "Responders", "Responders", "Responders"
)), class = "data.frame", row.names = c(NA, -34L))
>
我试过这段代码,但没有给我你生成的代码
df %>%
mutate(net_frequency=ifelse(CNV == "Deletion", -Percentage_altered/100, Percentage_altered/100)) %>%
crossing(., tibble(grp = c("Responders", "Non-Responders"))) %>%
mutate(log10_pvalue = if_else(CNV == "Deletion", -log10_pvalue, log10_pvalue)) %>%
ggplot(aes(x = log10_pvalue, y = net_frequency, color = log10_pvalue)) +
geom_point(aes(size=Percentage_altered)) +
geom_text_repel(aes(label=ifelse(log10_pvalue > -log10(0.05), gene, "")), force=10) +
geom_hline(yintercept=0, lty=2) +
scale_color_distiller(type = "div") +
theme_classic() +
facet_wrap(~grp)
我得到了这样的情节,但没有意义
如果你看,对于这两个组,只绘制了响应者的信息
你能帮忙编辑一下代码吗
正如@andrew_reece 所建议的,facet_*
会有所帮助。
由于我们在数据中没有任何“响应者”的概念,我将使用 tidyr::crossing
.
盲目复制数据
此外,为了演示,我删除了 theme_classic
以突出显示窗格。 (使用没有问题,只是想说清楚区别。)
library(dplyr)
library(ggplot2)
library(ggrepel) # geom_text_repel
library(tidyr) # crossing
df %>%
mutate(net_frequency=ifelse(CNV == "Deletion", -Percentage_altered/100, Percentage_altered/100)) %>%
crossing(. tibble(resp = c("Responder", "Non-Responder"))) %>%
ggplot(. aes(x=log10_pvalue, y=net_frequency)) +
geom_point(aes(size=Percentage_altered, color=log10_pvalue)) +
geom_text_repel(aes(label=ifelse(log10_pvalue > -log10(0.05), gene, "")), force=10) +
geom_hline(yintercept=0, lty=2) +
facet_wrap(. ~ resp)
至于“两组不同颜色”,不太清楚你需要什么。如果您希望(例如)响应者的色标为“蓝色”,无响应者的色标为“红色”,请查看 ggplot-extension 包,例如 ggnewscale
或 ggrelayer
。 (它们不是内置的。)
已更新 包括 OP 的更新数据,现在有两组。
编辑 2 根据评论删除 OP 的原始 geom_repel
过滤器。
这是一种通过使 log10_pvalue
的所有 CNV == 'Deletion'
值为负值来创建发散色标的方法。与 facet_wrap()
搭配使用,即可实现您的目标。
df %>%
mutate(net_frequency=ifelse(CNV == "Deletion", -Percentage_altered/100, Percentage_altered/100),
log10_pvalue = if_else(CNV == "Deletion", -log10_pvalue, log10_pvalue)) %>%
ggplot(aes(x = log10_pvalue, y = net_frequency, color = log10_pvalue)) +
geom_point(aes(size=Percentage_altered)) +
geom_text_repel(aes(label=gene), force=10) +
geom_hline(yintercept=0, lty=2) +
scale_color_distiller(type = "div") +
theme_classic() +
facet_wrap(~group)
我有两组病人。我想为两组患者画图
类似这样的情节
我有这样一个数据
> dput(df)
structure(list(gene = c("18q", "4q", "21p", "21q", "5q", "22q",
"17p", "3p", "9p", "4p", "9q", "19q", "10q", "15q", "16p", "19p",
"1p", "18p", "16q", "8p", "21q", "4q", "18q", "21p", "1p", "3p",
"4p", "17p", "5q", "16q", "18p", "14q", "19p", "20q"), CNV = c("Deletion",
"Deletion", "Deletion", "Deletion", "Deletion", "Deletion", "Deletion",
"Deletion", "Deletion", "Deletion", "Deletion", "Deletion", "Deletion",
"Deletion", "Deletion", "Deletion", "Deletion", "Deletion", "Deletion",
"Deletion", "Deletion", "Deletion", "Deletion", "Deletion", "Deletion",
"Deletion", "Deletion", "Deletion", "Deletion", "Deletion", "Deletion",
"Deletion", "Deletion", "Amplification"), log10_pvalue = c(5.974694135,
5.73754891, 4.995678626, 4.970616222, 4.793174124, 4.793174124,
4.109020403, 3.524328812, 3.524328812, 2.823908741, 2.567030709,
2.186419011, 1.769551079, 1.59345982, 1.59345982, 1.59345982,
1.416801226, 1.195860568, 1.094743951, 1.087777943, 4.083019953,
3.826813732, 3.826813732, 3.826813732, 2.675717545, 2.675717545,
2.675717545, 2.342944147, 2.084072788, 1.850780887, 1.659555885,
1.197226275, 1.197226275, 1.88941029), Percentage_altered = c(0.61,
0.53, 0.61, 0.56, 0.44, 0.5, 0.5, 0.44, 0.5, 0.47, 0.39, 0.28,
0.33, 0.31, 0.33, 0.31, 0.22, 0.36, 0.33, 0.33, 0.63, 0.52, 0.59,
0.67, 0.26, 0.44, 0.52, 0.48, 0.33, 0.44, 0.44, 0.3, 0.33, 0.5
), group = c("Non-responders", "Non-responders", "Non-responders",
"Non-responders", "Non-responders", "Non-responders", "Non-responders",
"Non-responders", "Non-responders", "Non-responders", "Non-responders",
"Non-responders", "Non-responders", "Non-responders", "Non-responders",
"Non-responders", "Non-responders", "Non-responders", "Non-responders",
"Non-responders", "Responders", "Responders", "Responders", "Responders",
"Responders", "Responders", "Responders", "Responders", "Responders",
"Responders", "Responders", "Responders", "Responders", "Responders"
)), class = "data.frame", row.names = c(NA, -34L))
>
我试过这段代码,但没有给我你生成的代码
df %>%
mutate(net_frequency=ifelse(CNV == "Deletion", -Percentage_altered/100, Percentage_altered/100)) %>%
crossing(., tibble(grp = c("Responders", "Non-Responders"))) %>%
mutate(log10_pvalue = if_else(CNV == "Deletion", -log10_pvalue, log10_pvalue)) %>%
ggplot(aes(x = log10_pvalue, y = net_frequency, color = log10_pvalue)) +
geom_point(aes(size=Percentage_altered)) +
geom_text_repel(aes(label=ifelse(log10_pvalue > -log10(0.05), gene, "")), force=10) +
geom_hline(yintercept=0, lty=2) +
scale_color_distiller(type = "div") +
theme_classic() +
facet_wrap(~grp)
我得到了这样的情节,但没有意义
如果你看,对于这两个组,只绘制了响应者的信息
你能帮忙编辑一下代码吗
正如@andrew_reece 所建议的,facet_*
会有所帮助。
由于我们在数据中没有任何“响应者”的概念,我将使用 tidyr::crossing
.
此外,为了演示,我删除了 theme_classic
以突出显示窗格。 (使用没有问题,只是想说清楚区别。)
library(dplyr)
library(ggplot2)
library(ggrepel) # geom_text_repel
library(tidyr) # crossing
df %>%
mutate(net_frequency=ifelse(CNV == "Deletion", -Percentage_altered/100, Percentage_altered/100)) %>%
crossing(. tibble(resp = c("Responder", "Non-Responder"))) %>%
ggplot(. aes(x=log10_pvalue, y=net_frequency)) +
geom_point(aes(size=Percentage_altered, color=log10_pvalue)) +
geom_text_repel(aes(label=ifelse(log10_pvalue > -log10(0.05), gene, "")), force=10) +
geom_hline(yintercept=0, lty=2) +
facet_wrap(. ~ resp)
至于“两组不同颜色”,不太清楚你需要什么。如果您希望(例如)响应者的色标为“蓝色”,无响应者的色标为“红色”,请查看 ggplot-extension 包,例如 ggnewscale
或 ggrelayer
。 (它们不是内置的。)
已更新 包括 OP 的更新数据,现在有两组。
编辑 2 根据评论删除 OP 的原始 geom_repel
过滤器。
这是一种通过使 log10_pvalue
的所有 CNV == 'Deletion'
值为负值来创建发散色标的方法。与 facet_wrap()
搭配使用,即可实现您的目标。
df %>%
mutate(net_frequency=ifelse(CNV == "Deletion", -Percentage_altered/100, Percentage_altered/100),
log10_pvalue = if_else(CNV == "Deletion", -log10_pvalue, log10_pvalue)) %>%
ggplot(aes(x = log10_pvalue, y = net_frequency, color = log10_pvalue)) +
geom_point(aes(size=Percentage_altered)) +
geom_text_repel(aes(label=gene), force=10) +
geom_hline(yintercept=0, lty=2) +
scale_color_distiller(type = "div") +
theme_classic() +
facet_wrap(~group)