如何制作带有与相对丰度图相对应的颜色标签的复合图例?
How to make a compound legend with color tag that correspond to the relative abundance plot?
我想从下面的paper中复制一个数字。
在分离X1列时卡住了。我想使用正则表达式,但不知道如何使用。
我有一个计划,通过下划线分隔符(我得到了一个列表)来分隔每个单词,然后将 [-tes & -ria] 和 [-ceae] 后缀的单词分别提取到 Phylum 和 Family 中。之后, family 之后的词应该被收集到 Genus 中。为了准确起见,可能将“未分类”和少于 5 个字符的词的条件分组到前面的词。
此外,是否可以为对应于相对丰度图的每个家族添加小颜色标签?
library(tidyverse)
james <- read_csv("tableS2a.csv")
james <- james %>% mutate(
Cecum = rowSums(select(james, contains("Caecum"))),
Transverse = rowSums(select(james, contains("Transv"))),
Sigmoid = rowSums(select(james, contains("Sigmoi")))
)
james2 <- james %>%
select(X1, Cecum, Transverse, Sigmoid)
james.tab <- james2 %>%
mutate(meanAbundance =
rowMeans(
column_to_rownames(james2, var = "X1")
)
) %>%
arrange(desc(meanAbundance)) %>%
top_n(30, meanAbundance) # extract top30
write.csv2(james.tab, "jamestab.csv")
james.tab2 <-
as.data.frame(
apply(
select(
james.tab,
Cecum,
Transverse,
Sigmoid), 2,
function(x) x / sum(x) * 100)
)
james.tab3 <-
bind_cols(
as.data.frame(
select(james.tab, X1)),
as.data.frame(james.tab2)
)
james.X1 <- select(james.tab3, X1)
# Separate X1 to Phylum(-tes/-ria), Family (-ceae), and genus
james.list <- strsplit(pull(james.X1, X1), "_")
james.class <-
if_else(grepl("(ceae)", james.X1) == T,
mutate(james.X1, Family =
grep(
"[[:alpha:]]ceae(_)",
strsplit(pull(james.X1, X1), "_"),
value = T
)))
我是R的新手,上面的代码大部分是从我以前的工作中粘贴过来的。如果代码效率低下,请原谅我。数据集:Original table -> Top30 csv (pastebin)
追加
这是最近的结果
我没有成功实现ggtext包,可能主题地址不对?
library(tidyverse)
library(patchwork)
library(ggtext)
library(glue)
james <- read_csv("tableS2a.csv")
james2 <- james %>%
mutate(
Cecum = rowSums(select(james, contains("Caecum"))),
Transverse = rowSums(select(james, contains("Transv"))),
Sigmoid = rowSums(select(james, contains("Sigmoi")))
) %>%
select(X1, Cecum, Transverse, Sigmoid) %>%
filter(grepl("(ceae)", james$X1)) # Filter rows with -ceae suffix only
# extract family value with selecting -ceae/les suffix word
family.naming0 <-
regmatches(james2$X1,
regexpr("(?<=_)(.*?(ceae|les)(?=_))", james2$X1, perl = T))
#in between "_" to fail-safe double -ceae. E.g. Bacteria_Bacteriaceae_Aceae
family.naming1 <-
regmatches(james2$X1, regexpr("(?<=ceae_|les_)\d", james2$X1, perl = T))
family.naming2 <-
regmatches(james2$X1, regexpr("(?<=ceae_|les_)unclassified", james2$X1, perl = T))
family.naming3 <-
ifelse(
grepl("(?<=[(ceae_)|(les_)])\d", james2$X1, perl = T),
paste0(family.naming0, " ", family.naming1),
ifelse(
grepl("(?<=[(ceae_)|(les_)])unclassified", james2$X1, perl = T),
paste0(family.naming0, " ", family.naming2),
paste0(family.naming0)
))
james3 <- james2 %>%
gather("Cecum", "Transverse", "Sigmoid", key = "location", value = "abundance") %>%
mutate(relativeAbundance=abundance/sum(abundance) * 100) %>%
mutate(phylum=gsub("(_.*)","", X1)) %>% # extract phylum value with selecting first word
mutate(family=
ifelse(
grepl("(?<=[(ceae_)|(les_)])\d", X1, perl = T),
paste0(family.naming0, " ", family.naming1),
ifelse(
grepl("(?<=[(ceae_)|(les_)])unclassified", X1, perl = T),
paste0(family.naming0, " ", family.naming2),
paste0(family.naming0)
))) %>%
mutate(genus=gsub("_", " ", sub("(.*ceae)+?_((unclassified|\d)*(_)*)", "", X1)))
# change it into percentage
james4 <-
bind_cols(select(james2, X1), as.data.frame(
apply(
select(
james2,
Cecum,
Transverse,
Sigmoid), 2,
function(x) x / sum(x) * 100)))
jamesReg <- james4 %>%
mutate(james4,
meanAbundance=rowMeans(select(james4, Cecum, Transverse, Sigmoid))) %>%
arrange(desc(meanAbundance)) %>%
top_n(30, meanAbundance) %>%
pull(X1)
# collect top 30 from james4X reference
james5 <- james3 %>%
filter(X1 %in% jamesReg)
# change order
james5$location_f <-
factor(james5$location, labels = c("Cecum", "Transverse", "Sigmoid"))
james6 <-
select(james5, location_f, relativeAbundance, genus)
# First plot
james.plot <-
ggplot(james6,
aes(x = location_f, y = relativeAbundance, fill = genus)) +
geom_bar(position = "fill", stat = "identity", show.legend = F) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + # y axis percentage
#theme_minimal() +
theme(axis.title.x = element_blank(),
panel.background = element_blank()) +
ylab("Relative abundances (%)") +
scale_fill_hue(l=60, c=80)
james.table <- data.frame("relativeAbundance"=james5$relativeAbundance[1:30]+
james5$relativeAbundance[31:60]+
james5$relativeAbundance[61:90],
"phylum"=james5$phylum[1:30],
"family"=james5$family[1:30],
"genus"=james5$genus[1:30])
# get colour pattern
ggplotColours <- function(n = 6, h = c(0, 360) + 15) {
if ((diff(h) %% 360) < 1)
h[2] <- h[2] - 360 / n
hcl(h = (seq(h[1], h[2], length = n)), c = 100, l = 65)
}
family <- pull(select(james.table, family))
genus <- pull(select(james.table, genus))
james.table2 <- james.table %>%
mutate(color=ggplotColours(nrow(james.table))) %>% # just in case
mutate(asv=glue("{family}: <i>{genus}</i>"))
# color for long vertical tile (phylum tile)
james.phyl.col <- c("#fddb47", "#58b9b2", "#6585c3", "#e25a4b")
# legend making or second plot
james.legend <-
ggplot(james.table2, aes(y = asv)) +
geom_tile(aes(x = 1, fill = asv), width = 0.9, height = 0.9) +
geom_tile(aes(x = 0.2),
fill = james.phyl.col[as.numeric(as.factor(james.table2$phylum))],
width = 0.4) +
scale_y_discrete(position = "right", expand = c(0,0),
name = "") +
scale_x_continuous(expand = c(0,0), breaks = NULL, name = "") +
scale_fill_discrete(guide = "none") +
facet_grid(phylum ~ ., scales = "free_y", space = "free_y",
switch = "y") +
theme(axis.ticks = element_blank(),
strip.background = element_blank(),
aspect.ratio = 1,
axis.text.y = element_markdown())
# patchwork
james.plot + james.legend
最终图片final
这里是一个示例,说明如何开始将图例作为单独的图,稍后您可以在主图旁边拼凑。
基本上,您是为每个项目制作图块,然后按组对它们进行分面。要使方块完全 1:1 带有刻面有点棘手,因此您必须稍微调整一下 width = ...
和 height = ...
才能使其看起来正确。
library(ggplot2)
# Example of item-group relations
df <- data.frame(
group = c("Actinobacteria", "Actinobacteria", "Bacteroidetes", "Bacteroidetes",
"Firmicutes", "Firmicutes", "Firmicutes"),
item = c("Bifidobacteriaceae", "Coriobacteriaceae",
"Bacteroidaceae", "Porphyromonadacea",
"Acidaminococcacaea", "Clostridiacea", "Clostridiales")
)
group_colours <- c("blue", "green", "red")
ggplot(df, aes(y = item)) +
geom_tile(aes(x = 1, fill = item), width = 0.9, height = 0.9) +
geom_tile(aes(x = 0.2),
fill = group_colours[as.numeric(as.factor(df$group))],
width = 0.4) +
scale_y_discrete(position = "right", expand = c(0,0),
name = "") +
scale_x_continuous(expand = c(0,0), breaks = NULL, name = "") +
scale_fill_discrete(guide = "none") +
facet_grid(group ~ ., scales = "free_y", space = "free_y",
switch = "y") +
theme(axis.ticks = element_blank(),
strip.background = element_blank(),
aspect.ratio = 1)
由 reprex package (v0.3.0)
于 2020-08-18 创建
我想从下面的paper中复制一个数字。
在分离X1列时卡住了。我想使用正则表达式,但不知道如何使用。 我有一个计划,通过下划线分隔符(我得到了一个列表)来分隔每个单词,然后将 [-tes & -ria] 和 [-ceae] 后缀的单词分别提取到 Phylum 和 Family 中。之后, family 之后的词应该被收集到 Genus 中。为了准确起见,可能将“未分类”和少于 5 个字符的词的条件分组到前面的词。
此外,是否可以为对应于相对丰度图的每个家族添加小颜色标签?
library(tidyverse)
james <- read_csv("tableS2a.csv")
james <- james %>% mutate(
Cecum = rowSums(select(james, contains("Caecum"))),
Transverse = rowSums(select(james, contains("Transv"))),
Sigmoid = rowSums(select(james, contains("Sigmoi")))
)
james2 <- james %>%
select(X1, Cecum, Transverse, Sigmoid)
james.tab <- james2 %>%
mutate(meanAbundance =
rowMeans(
column_to_rownames(james2, var = "X1")
)
) %>%
arrange(desc(meanAbundance)) %>%
top_n(30, meanAbundance) # extract top30
write.csv2(james.tab, "jamestab.csv")
james.tab2 <-
as.data.frame(
apply(
select(
james.tab,
Cecum,
Transverse,
Sigmoid), 2,
function(x) x / sum(x) * 100)
)
james.tab3 <-
bind_cols(
as.data.frame(
select(james.tab, X1)),
as.data.frame(james.tab2)
)
james.X1 <- select(james.tab3, X1)
# Separate X1 to Phylum(-tes/-ria), Family (-ceae), and genus
james.list <- strsplit(pull(james.X1, X1), "_")
james.class <-
if_else(grepl("(ceae)", james.X1) == T,
mutate(james.X1, Family =
grep(
"[[:alpha:]]ceae(_)",
strsplit(pull(james.X1, X1), "_"),
value = T
)))
我是R的新手,上面的代码大部分是从我以前的工作中粘贴过来的。如果代码效率低下,请原谅我。数据集:Original table -> Top30 csv (pastebin)
追加
这是最近的结果 我没有成功实现ggtext包,可能主题地址不对?
library(tidyverse)
library(patchwork)
library(ggtext)
library(glue)
james <- read_csv("tableS2a.csv")
james2 <- james %>%
mutate(
Cecum = rowSums(select(james, contains("Caecum"))),
Transverse = rowSums(select(james, contains("Transv"))),
Sigmoid = rowSums(select(james, contains("Sigmoi")))
) %>%
select(X1, Cecum, Transverse, Sigmoid) %>%
filter(grepl("(ceae)", james$X1)) # Filter rows with -ceae suffix only
# extract family value with selecting -ceae/les suffix word
family.naming0 <-
regmatches(james2$X1,
regexpr("(?<=_)(.*?(ceae|les)(?=_))", james2$X1, perl = T))
#in between "_" to fail-safe double -ceae. E.g. Bacteria_Bacteriaceae_Aceae
family.naming1 <-
regmatches(james2$X1, regexpr("(?<=ceae_|les_)\d", james2$X1, perl = T))
family.naming2 <-
regmatches(james2$X1, regexpr("(?<=ceae_|les_)unclassified", james2$X1, perl = T))
family.naming3 <-
ifelse(
grepl("(?<=[(ceae_)|(les_)])\d", james2$X1, perl = T),
paste0(family.naming0, " ", family.naming1),
ifelse(
grepl("(?<=[(ceae_)|(les_)])unclassified", james2$X1, perl = T),
paste0(family.naming0, " ", family.naming2),
paste0(family.naming0)
))
james3 <- james2 %>%
gather("Cecum", "Transverse", "Sigmoid", key = "location", value = "abundance") %>%
mutate(relativeAbundance=abundance/sum(abundance) * 100) %>%
mutate(phylum=gsub("(_.*)","", X1)) %>% # extract phylum value with selecting first word
mutate(family=
ifelse(
grepl("(?<=[(ceae_)|(les_)])\d", X1, perl = T),
paste0(family.naming0, " ", family.naming1),
ifelse(
grepl("(?<=[(ceae_)|(les_)])unclassified", X1, perl = T),
paste0(family.naming0, " ", family.naming2),
paste0(family.naming0)
))) %>%
mutate(genus=gsub("_", " ", sub("(.*ceae)+?_((unclassified|\d)*(_)*)", "", X1)))
# change it into percentage
james4 <-
bind_cols(select(james2, X1), as.data.frame(
apply(
select(
james2,
Cecum,
Transverse,
Sigmoid), 2,
function(x) x / sum(x) * 100)))
jamesReg <- james4 %>%
mutate(james4,
meanAbundance=rowMeans(select(james4, Cecum, Transverse, Sigmoid))) %>%
arrange(desc(meanAbundance)) %>%
top_n(30, meanAbundance) %>%
pull(X1)
# collect top 30 from james4X reference
james5 <- james3 %>%
filter(X1 %in% jamesReg)
# change order
james5$location_f <-
factor(james5$location, labels = c("Cecum", "Transverse", "Sigmoid"))
james6 <-
select(james5, location_f, relativeAbundance, genus)
# First plot
james.plot <-
ggplot(james6,
aes(x = location_f, y = relativeAbundance, fill = genus)) +
geom_bar(position = "fill", stat = "identity", show.legend = F) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + # y axis percentage
#theme_minimal() +
theme(axis.title.x = element_blank(),
panel.background = element_blank()) +
ylab("Relative abundances (%)") +
scale_fill_hue(l=60, c=80)
james.table <- data.frame("relativeAbundance"=james5$relativeAbundance[1:30]+
james5$relativeAbundance[31:60]+
james5$relativeAbundance[61:90],
"phylum"=james5$phylum[1:30],
"family"=james5$family[1:30],
"genus"=james5$genus[1:30])
# get colour pattern
ggplotColours <- function(n = 6, h = c(0, 360) + 15) {
if ((diff(h) %% 360) < 1)
h[2] <- h[2] - 360 / n
hcl(h = (seq(h[1], h[2], length = n)), c = 100, l = 65)
}
family <- pull(select(james.table, family))
genus <- pull(select(james.table, genus))
james.table2 <- james.table %>%
mutate(color=ggplotColours(nrow(james.table))) %>% # just in case
mutate(asv=glue("{family}: <i>{genus}</i>"))
# color for long vertical tile (phylum tile)
james.phyl.col <- c("#fddb47", "#58b9b2", "#6585c3", "#e25a4b")
# legend making or second plot
james.legend <-
ggplot(james.table2, aes(y = asv)) +
geom_tile(aes(x = 1, fill = asv), width = 0.9, height = 0.9) +
geom_tile(aes(x = 0.2),
fill = james.phyl.col[as.numeric(as.factor(james.table2$phylum))],
width = 0.4) +
scale_y_discrete(position = "right", expand = c(0,0),
name = "") +
scale_x_continuous(expand = c(0,0), breaks = NULL, name = "") +
scale_fill_discrete(guide = "none") +
facet_grid(phylum ~ ., scales = "free_y", space = "free_y",
switch = "y") +
theme(axis.ticks = element_blank(),
strip.background = element_blank(),
aspect.ratio = 1,
axis.text.y = element_markdown())
# patchwork
james.plot + james.legend
最终图片final
这里是一个示例,说明如何开始将图例作为单独的图,稍后您可以在主图旁边拼凑。
基本上,您是为每个项目制作图块,然后按组对它们进行分面。要使方块完全 1:1 带有刻面有点棘手,因此您必须稍微调整一下 width = ...
和 height = ...
才能使其看起来正确。
library(ggplot2)
# Example of item-group relations
df <- data.frame(
group = c("Actinobacteria", "Actinobacteria", "Bacteroidetes", "Bacteroidetes",
"Firmicutes", "Firmicutes", "Firmicutes"),
item = c("Bifidobacteriaceae", "Coriobacteriaceae",
"Bacteroidaceae", "Porphyromonadacea",
"Acidaminococcacaea", "Clostridiacea", "Clostridiales")
)
group_colours <- c("blue", "green", "red")
ggplot(df, aes(y = item)) +
geom_tile(aes(x = 1, fill = item), width = 0.9, height = 0.9) +
geom_tile(aes(x = 0.2),
fill = group_colours[as.numeric(as.factor(df$group))],
width = 0.4) +
scale_y_discrete(position = "right", expand = c(0,0),
name = "") +
scale_x_continuous(expand = c(0,0), breaks = NULL, name = "") +
scale_fill_discrete(guide = "none") +
facet_grid(group ~ ., scales = "free_y", space = "free_y",
switch = "y") +
theme(axis.ticks = element_blank(),
strip.background = element_blank(),
aspect.ratio = 1)
由 reprex package (v0.3.0)
于 2020-08-18 创建