R 在循环中使用 gsub
R using gsub in a loop
我有以下列名向量:
plot_variables <- c("Ser predicted (g/L)", "Ser initial (g/L)", "Ser experimental (g/L)", "Glu predicted (g/L)", "Glu initial (g/L)", "Glu experimental (g/L)", Pro predicted (g/L), ...)
我有这些简称的词汇表:
df_glossary <- data.frame(
short = c("Cys", "Pro", "Phe", "Ser", "Glu", "Glc", ...),
full = c("Cysteine", "Proline", "Phenylalanine", "Serine", "Glutamate", "Glucose", ...),
stringsAsFactors = FALSE
)
我想匹配这两个并得到类似的东西:
names_matching <- data.frame(
variable = c("Ser predicted (g/L)", "Ser initial (g/L)", "Ser experimental (g/L)", ...),
label = c("Serine predicted (g/L)", "Serine initial (g/L)", "Serine experimental (g/L)", ...)
)
有没有比这更优雅的方法:
pl<-unlist(plot_variables)
pl<-sapply(1:nrow(df_glossary) , function(x){
pl<<- gsub(df_glossary$short[x], df_glossary$full[x], pl, fixed = TRUE)
})
pl <- pl[,nrow(df_glossary)] %>% data.frame()
names_matching <- cbind(plot_variables %>% data.frame, pl)
我不确定我是否理解了这个问题,这行得通吗?
df_glossary <- data.frame(
shortnames = c("Cys", "Pro", "Phe", "Ser", "Glu", "Glc"),
full = c("Cysteine", "Proline", "Phenylalanine", "Serine", "Glutamate", "Glucose"),
stringsAsFactors = FALSE
)
plot_variables <- c("Ser predicted (g/L)", "Ser initial (g/L)", "Ser experimental (g/L)", "Glu predicted (g/L)", "Glu initial (g/L)", "Glu experimental (g/L)", "Pro predicted (g/L)")
suffixes = c("predicted (g/L)", "initial (g/L)", "experimental (g/L)")
df_glossary %>% rowwise %>%
do(data.frame(short=.$short, full=.$full, suffix=suffixes )) %>%
mutate(label=paste(full, suffix))
short full suffix label
Cys Cysteine predicted (g/L) Cysteine predicted (g/L)
Cys Cysteine initial (g/L) Cysteine initial (g/L)
Cys Cysteine experimental (g/L) Cysteine experimental (g/L)
Pro Proline predicted (g/L) Proline predicted (g/L)
Pro Proline initial (g/L) Proline initial (g/L)
Pro Proline experimental (g/L) Proline experimental (g/L)
Phe Phenylalanine predicted (g/L) Phenylalanine predicted (g/L)
Phe Phenylalanine initial (g/L) Phenylalanine initial (g/L)
Phe Phenylalanine experimental (g/L) Phenylalanine experimental (g/L)
Ser Serine predicted (g/L) Serine predicted (g/L)
Ser Serine initial (g/L) Serine initial (g/L)
Ser Serine experimental (g/L) Serine experimental (g/L)
Glu Glutamate predicted (g/L) Glutamate predicted (g/L)
Glu Glutamate initial (g/L) Glutamate initial (g/L)
Glu Glutamate experimental (g/L) Glutamate experimental (g/L)
Glc Glucose predicted (g/L) Glucose predicted (g/L)
Glc Glucose initial (g/L) Glucose initial (g/L)
Glc Glucose experimental (g/L) Glucose experimental (g/L)
我认为您要查找的是 gsubfn
包中的 gsubfn
。如果你想从另一个数据框中读取键和值,你会遇到一些麻烦,但一般来说它是这样工作的:
> library(gsubfn)
> gsubfn('[Ser|Glu|Pro]*',
list('Ser'='Serine','Glu'='Glutamate','Pro'='Proline'), plot_variables)
[1] "Serine predicted (g/L)" "Serine initial (g/L)"
[3] "Serine experimental (g/L)" "Glutamate predicted (g/L)"
[5] "Glutamate initial (g/L)" "Glutamate experimental (g/L)"
[7] "Proline predicted (g/L)"
我有以下列名向量:
plot_variables <- c("Ser predicted (g/L)", "Ser initial (g/L)", "Ser experimental (g/L)", "Glu predicted (g/L)", "Glu initial (g/L)", "Glu experimental (g/L)", Pro predicted (g/L), ...)
我有这些简称的词汇表:
df_glossary <- data.frame(
short = c("Cys", "Pro", "Phe", "Ser", "Glu", "Glc", ...),
full = c("Cysteine", "Proline", "Phenylalanine", "Serine", "Glutamate", "Glucose", ...),
stringsAsFactors = FALSE
)
我想匹配这两个并得到类似的东西:
names_matching <- data.frame(
variable = c("Ser predicted (g/L)", "Ser initial (g/L)", "Ser experimental (g/L)", ...),
label = c("Serine predicted (g/L)", "Serine initial (g/L)", "Serine experimental (g/L)", ...)
)
有没有比这更优雅的方法:
pl<-unlist(plot_variables)
pl<-sapply(1:nrow(df_glossary) , function(x){
pl<<- gsub(df_glossary$short[x], df_glossary$full[x], pl, fixed = TRUE)
})
pl <- pl[,nrow(df_glossary)] %>% data.frame()
names_matching <- cbind(plot_variables %>% data.frame, pl)
我不确定我是否理解了这个问题,这行得通吗?
df_glossary <- data.frame(
shortnames = c("Cys", "Pro", "Phe", "Ser", "Glu", "Glc"),
full = c("Cysteine", "Proline", "Phenylalanine", "Serine", "Glutamate", "Glucose"),
stringsAsFactors = FALSE
)
plot_variables <- c("Ser predicted (g/L)", "Ser initial (g/L)", "Ser experimental (g/L)", "Glu predicted (g/L)", "Glu initial (g/L)", "Glu experimental (g/L)", "Pro predicted (g/L)")
suffixes = c("predicted (g/L)", "initial (g/L)", "experimental (g/L)")
df_glossary %>% rowwise %>%
do(data.frame(short=.$short, full=.$full, suffix=suffixes )) %>%
mutate(label=paste(full, suffix))
short full suffix label
Cys Cysteine predicted (g/L) Cysteine predicted (g/L)
Cys Cysteine initial (g/L) Cysteine initial (g/L)
Cys Cysteine experimental (g/L) Cysteine experimental (g/L)
Pro Proline predicted (g/L) Proline predicted (g/L)
Pro Proline initial (g/L) Proline initial (g/L)
Pro Proline experimental (g/L) Proline experimental (g/L)
Phe Phenylalanine predicted (g/L) Phenylalanine predicted (g/L)
Phe Phenylalanine initial (g/L) Phenylalanine initial (g/L)
Phe Phenylalanine experimental (g/L) Phenylalanine experimental (g/L)
Ser Serine predicted (g/L) Serine predicted (g/L)
Ser Serine initial (g/L) Serine initial (g/L)
Ser Serine experimental (g/L) Serine experimental (g/L)
Glu Glutamate predicted (g/L) Glutamate predicted (g/L)
Glu Glutamate initial (g/L) Glutamate initial (g/L)
Glu Glutamate experimental (g/L) Glutamate experimental (g/L)
Glc Glucose predicted (g/L) Glucose predicted (g/L)
Glc Glucose initial (g/L) Glucose initial (g/L)
Glc Glucose experimental (g/L) Glucose experimental (g/L)
我认为您要查找的是 gsubfn
包中的 gsubfn
。如果你想从另一个数据框中读取键和值,你会遇到一些麻烦,但一般来说它是这样工作的:
> library(gsubfn)
> gsubfn('[Ser|Glu|Pro]*',
list('Ser'='Serine','Glu'='Glutamate','Pro'='Proline'), plot_variables)
[1] "Serine predicted (g/L)" "Serine initial (g/L)"
[3] "Serine experimental (g/L)" "Glutamate predicted (g/L)"
[5] "Glutamate initial (g/L)" "Glutamate experimental (g/L)"
[7] "Proline predicted (g/L)"