R:使用转换 table 更新 data.frame 中的行名
R: update rownames in data.frame using conversion table
我有以下形式的 data.frame:
> set.seed(1)
> myp <- paste0('P', sort(sample(1:15, 10)))
> mydf <- data.frame(param=c(paste0(myp, 'B'), paste0(myp, 'R'), paste0(myp, 'max'), paste0(myp, 'min')),
+ value1=NA, value2=NA, value3=NA)
> rownames(mydf) <- mydf$param
> mydf$param <- NULL
> mydf
value1 value2 value3
P1B NA NA NA
P3B NA NA NA
P4B NA NA NA
P5B NA NA NA
P6B NA NA NA
P8B NA NA NA
P9B NA NA NA
P10B NA NA NA
P11B NA NA NA
P14B NA NA NA
P1R NA NA NA
P3R NA NA NA
P4R NA NA NA
P5R NA NA NA
P6R NA NA NA
P8R NA NA NA
P9R NA NA NA
P10R NA NA NA
P11R NA NA NA
P14R NA NA NA
P1max NA NA NA
P3max NA NA NA
P4max NA NA NA
P5max NA NA NA
P6max NA NA NA
P8max NA NA NA
P9max NA NA NA
P10max NA NA NA
P11max NA NA NA
P14max NA NA NA
P1min NA NA NA
P3min NA NA NA
P4min NA NA NA
P5min NA NA NA
P6min NA NA NA
P8min NA NA NA
P9min NA NA NA
P10min NA NA NA
P11min NA NA NA
P14min NA NA NA
我想更新行的名称,为此我有一个转换 table,如下所示:
> conv.df <- data.frame(name=myp, new.name=paste0('P', 1:10))
> conv.df
name new.name
1 P1 P1
2 P3 P2
3 P4 P3
4 P5 P4
5 P6 P5
6 P8 P6
7 P9 P7
8 P10 P8
9 P11 P9
10 P14 P10
我唯一想做的就是更新 mydf
中的行名,以便在转换 table conv.df
.[=26= 中反映 new.names
]
这看起来很简单,但我无法全神贯注...我会尝试 regular expression
方法,我唯一的问题是我不能 100% 确定我只会遇到 rownames
形式 PnB
、PnR
、Pnmax
、Pnmin
... 我想要一个适用于任何 PnX
实例的解决方案(它总是是 Pn
后跟 [:alpha:]
)
你可以把行名做成一列,把根"Pn"和字母“[:alpha:]”分开,然后重命名如下,
set.seed(1)
myp <- paste0('P', sort(sample(1:15, 10)))
mydf <- data.frame(param=c(paste0(myp, 'B'), paste0(myp, 'R'), paste0(myp, 'max'), paste0(myp, 'min')),
value1=NA, value2=NA, value3=NA)
rownames(mydf) <- mydf$param
mydf$param <- NULL
mydf
library(tidyverse)
mydf%>%
rownames_to_column()%>%
mutate(root = gsub("^(P\d\d?).*$","\1",rowname),
letter = gsub("^P\d\d?(.*)$","\1",rowname))%>%
mutate(root = recode(root,
P1 = "P1",
P3 = "P2",
P4 = "P3",
P5 = "P4",
P6 = "P5",
P8 = "P6",
P9 = "P7",
P10 = "P8",
P11 = "P9",
P14 = "P10"
))%>%
mutate(rowname = paste0(root,letter))%>%
column_to_rownames()%>%
select(-root,-letter)
@teofil 的回答很好而且有效。这是另一种方法,仍然使用重新编码和一些元编程,
library(tidyverse)
rename_col_df <- function(data,colname,df_rename){
# data is the input data frame
# colname is the column to be modified
# df_rename must have columns name and new.name
colname = enexpr(colname) # Capture the user input col name as a symbol
old_name = df_rename$name
new_name = df_rename$new.name
# Start construcing an expression
# The following line creates a recode function
# recode_expr[[1]] is "recode"
# recode_expr[[2]] is the first argument
recode_expr = expr(recode(!!colname))
# All subsequent arguments to recode are added here
for(i in seq_along(old_name)){
recode_expr[[old_name[i]]] = new_name[i]
}
data = data %>% mutate(!!colname := !!recode_expr)
return(data)
}
conv.df <- data.frame(name=myp, new.name=paste0('P', 1:10),stringsAsFactors = FALSE)
mydf%>%
rownames_to_column()%>%
mutate(root = gsub("^(P\d\d?).*$","\1",rowname),
letter = gsub("^P\d\d?(.*)$","\1",rowname))%>%
rename_col_df(root,conv.df) %>%
mutate(rowname = paste0(root,letter))%>%
column_to_rownames()%>%
select(-root,-letter)
如果我们想将重命名应用于向量而不是数据帧,
rename_vec_df <- function(vec,df_rename){
# vec is the vector to be modified
# df_rename must have columns name and new.name
old_name = df_rename$name
new_name = df_rename$new.name
# Start construcing an expression
# The following line creates a recode function
# recode_expr[[1]] is "recode"
# recode_expr[[2]] is the first argument
recode_expr = expr(recode(!!vec))
# All subsequent arguments to recode are added here
for(i in seq_along(old_name)){
recode_expr[[old_name[i]]] = new_name[i]
}
vec = eval(recode_expr)
return(vec)
}
myp <- paste0('P', sort(sample(1:15, 10)))
conv.df <- data.frame(name=myp, new.name=paste0('P', 1:10),stringsAsFactors = FALSE)
vec <- c("P1","P14","P10")
rename_vec_df(vec,conv.df)
要详细了解此处使用的技术,
https://rlang.r-lib.org/reference/quotation.html
https://adv-r.hadley.nz/metaprogramming.html
跟进@Sada93 代码。要避免 recode
步骤,请使用 join
:
library(tidyverse)
set.seed(1)
myp <- paste0('P', sort(sample(1:15, 10)))
mydf <-
data.frame(
param = c(
paste0(myp, 'B'),
paste0(myp, 'R'),
paste0(myp, 'max'),
paste0(myp, 'min')
),
value1 = NA,
value2 = NA,
value3 = NA
)
rownames(mydf) <- mydf$param
mydf$param <- NULL
mydf
conv.df <- data.frame(name=myp, new.name=paste0('P', 1:10))
mydf %>% rownames_to_column() %>%
mutate(name = gsub("^(P\d\d?).*$","\1",rowname),
letter = gsub("^P\d\d?(.*)$","\1",rowname)) %>%
left_join(., conv.df, by="name") %>%
mutate(rowname=paste(new.name, letter, sep="")) %>%
column_to_rownames() %>%
select(-name, -letter)
value1 value2 value3 new.name
P1B NA NA NA P1
P2B NA NA NA P2
P3B NA NA NA P3
P4B NA NA NA P4
P5B NA NA NA P5
P6B NA NA NA P6
P7B NA NA NA P7
P8B NA NA NA P8
P9B NA NA NA P9
P10B NA NA NA P10
P1R NA NA NA P1
数据框中不能有重复的行名。这是在 base R 中将其添加为列名的一种方法。这里我们提取原始行名中的公共部分,即 "P" 后跟一个数字, match
它与 conv.df$name
并得到相应的 conv.df$new.name
.
mydf$new_name <- conv.df$new.name[
match(sub("(P\d+).*", "\1", rownames(mydf)), conv.df$name)]
mydf
# value1 value2 value3 new_name
#P1B NA NA NA P1
#P2B NA NA NA P2
#P3B NA NA NA P3
#P4B NA NA NA P4
#P7B NA NA NA P5
#P8B NA NA NA P6
#P9B NA NA NA P7
#P11B NA NA NA P8
#P12B NA NA NA P9
#P13B NA NA NA P10
#P1R NA NA NA P1
#P2R NA NA NA P2
#...
其中
sub("(P\d+).*", "\1", rownames(mydf)) #returns
#[1] "P1" "P2" "P3" "P4" "P7" "P8" "P9" "P11" "P12" "P13" "P1" "P2" "P3"
# "P4" "P7" "P8" "P9" "P11" "P12" "P13" "P1" "P2" "P3" "P4" "P7" "P8"
# "P9" "P11" "P12" "P13" "P1" "P2" "P3" "P4" "P7" "P8" "P9" "P11" "P12"
# "P13"
我有以下形式的 data.frame:
> set.seed(1)
> myp <- paste0('P', sort(sample(1:15, 10)))
> mydf <- data.frame(param=c(paste0(myp, 'B'), paste0(myp, 'R'), paste0(myp, 'max'), paste0(myp, 'min')),
+ value1=NA, value2=NA, value3=NA)
> rownames(mydf) <- mydf$param
> mydf$param <- NULL
> mydf
value1 value2 value3
P1B NA NA NA
P3B NA NA NA
P4B NA NA NA
P5B NA NA NA
P6B NA NA NA
P8B NA NA NA
P9B NA NA NA
P10B NA NA NA
P11B NA NA NA
P14B NA NA NA
P1R NA NA NA
P3R NA NA NA
P4R NA NA NA
P5R NA NA NA
P6R NA NA NA
P8R NA NA NA
P9R NA NA NA
P10R NA NA NA
P11R NA NA NA
P14R NA NA NA
P1max NA NA NA
P3max NA NA NA
P4max NA NA NA
P5max NA NA NA
P6max NA NA NA
P8max NA NA NA
P9max NA NA NA
P10max NA NA NA
P11max NA NA NA
P14max NA NA NA
P1min NA NA NA
P3min NA NA NA
P4min NA NA NA
P5min NA NA NA
P6min NA NA NA
P8min NA NA NA
P9min NA NA NA
P10min NA NA NA
P11min NA NA NA
P14min NA NA NA
我想更新行的名称,为此我有一个转换 table,如下所示:
> conv.df <- data.frame(name=myp, new.name=paste0('P', 1:10))
> conv.df
name new.name
1 P1 P1
2 P3 P2
3 P4 P3
4 P5 P4
5 P6 P5
6 P8 P6
7 P9 P7
8 P10 P8
9 P11 P9
10 P14 P10
我唯一想做的就是更新 mydf
中的行名,以便在转换 table conv.df
.[=26= 中反映 new.names
]
这看起来很简单,但我无法全神贯注...我会尝试 regular expression
方法,我唯一的问题是我不能 100% 确定我只会遇到 rownames
形式 PnB
、PnR
、Pnmax
、Pnmin
... 我想要一个适用于任何 PnX
实例的解决方案(它总是是 Pn
后跟 [:alpha:]
)
你可以把行名做成一列,把根"Pn"和字母“[:alpha:]”分开,然后重命名如下,
set.seed(1)
myp <- paste0('P', sort(sample(1:15, 10)))
mydf <- data.frame(param=c(paste0(myp, 'B'), paste0(myp, 'R'), paste0(myp, 'max'), paste0(myp, 'min')),
value1=NA, value2=NA, value3=NA)
rownames(mydf) <- mydf$param
mydf$param <- NULL
mydf
library(tidyverse)
mydf%>%
rownames_to_column()%>%
mutate(root = gsub("^(P\d\d?).*$","\1",rowname),
letter = gsub("^P\d\d?(.*)$","\1",rowname))%>%
mutate(root = recode(root,
P1 = "P1",
P3 = "P2",
P4 = "P3",
P5 = "P4",
P6 = "P5",
P8 = "P6",
P9 = "P7",
P10 = "P8",
P11 = "P9",
P14 = "P10"
))%>%
mutate(rowname = paste0(root,letter))%>%
column_to_rownames()%>%
select(-root,-letter)
@teofil 的回答很好而且有效。这是另一种方法,仍然使用重新编码和一些元编程,
library(tidyverse)
rename_col_df <- function(data,colname,df_rename){
# data is the input data frame
# colname is the column to be modified
# df_rename must have columns name and new.name
colname = enexpr(colname) # Capture the user input col name as a symbol
old_name = df_rename$name
new_name = df_rename$new.name
# Start construcing an expression
# The following line creates a recode function
# recode_expr[[1]] is "recode"
# recode_expr[[2]] is the first argument
recode_expr = expr(recode(!!colname))
# All subsequent arguments to recode are added here
for(i in seq_along(old_name)){
recode_expr[[old_name[i]]] = new_name[i]
}
data = data %>% mutate(!!colname := !!recode_expr)
return(data)
}
conv.df <- data.frame(name=myp, new.name=paste0('P', 1:10),stringsAsFactors = FALSE)
mydf%>%
rownames_to_column()%>%
mutate(root = gsub("^(P\d\d?).*$","\1",rowname),
letter = gsub("^P\d\d?(.*)$","\1",rowname))%>%
rename_col_df(root,conv.df) %>%
mutate(rowname = paste0(root,letter))%>%
column_to_rownames()%>%
select(-root,-letter)
如果我们想将重命名应用于向量而不是数据帧,
rename_vec_df <- function(vec,df_rename){
# vec is the vector to be modified
# df_rename must have columns name and new.name
old_name = df_rename$name
new_name = df_rename$new.name
# Start construcing an expression
# The following line creates a recode function
# recode_expr[[1]] is "recode"
# recode_expr[[2]] is the first argument
recode_expr = expr(recode(!!vec))
# All subsequent arguments to recode are added here
for(i in seq_along(old_name)){
recode_expr[[old_name[i]]] = new_name[i]
}
vec = eval(recode_expr)
return(vec)
}
myp <- paste0('P', sort(sample(1:15, 10)))
conv.df <- data.frame(name=myp, new.name=paste0('P', 1:10),stringsAsFactors = FALSE)
vec <- c("P1","P14","P10")
rename_vec_df(vec,conv.df)
要详细了解此处使用的技术,
https://rlang.r-lib.org/reference/quotation.html https://adv-r.hadley.nz/metaprogramming.html
跟进@Sada93 代码。要避免 recode
步骤,请使用 join
:
library(tidyverse)
set.seed(1)
myp <- paste0('P', sort(sample(1:15, 10)))
mydf <-
data.frame(
param = c(
paste0(myp, 'B'),
paste0(myp, 'R'),
paste0(myp, 'max'),
paste0(myp, 'min')
),
value1 = NA,
value2 = NA,
value3 = NA
)
rownames(mydf) <- mydf$param
mydf$param <- NULL
mydf
conv.df <- data.frame(name=myp, new.name=paste0('P', 1:10))
mydf %>% rownames_to_column() %>%
mutate(name = gsub("^(P\d\d?).*$","\1",rowname),
letter = gsub("^P\d\d?(.*)$","\1",rowname)) %>%
left_join(., conv.df, by="name") %>%
mutate(rowname=paste(new.name, letter, sep="")) %>%
column_to_rownames() %>%
select(-name, -letter)
value1 value2 value3 new.name
P1B NA NA NA P1
P2B NA NA NA P2
P3B NA NA NA P3
P4B NA NA NA P4
P5B NA NA NA P5
P6B NA NA NA P6
P7B NA NA NA P7
P8B NA NA NA P8
P9B NA NA NA P9
P10B NA NA NA P10
P1R NA NA NA P1
数据框中不能有重复的行名。这是在 base R 中将其添加为列名的一种方法。这里我们提取原始行名中的公共部分,即 "P" 后跟一个数字, match
它与 conv.df$name
并得到相应的 conv.df$new.name
.
mydf$new_name <- conv.df$new.name[
match(sub("(P\d+).*", "\1", rownames(mydf)), conv.df$name)]
mydf
# value1 value2 value3 new_name
#P1B NA NA NA P1
#P2B NA NA NA P2
#P3B NA NA NA P3
#P4B NA NA NA P4
#P7B NA NA NA P5
#P8B NA NA NA P6
#P9B NA NA NA P7
#P11B NA NA NA P8
#P12B NA NA NA P9
#P13B NA NA NA P10
#P1R NA NA NA P1
#P2R NA NA NA P2
#...
其中
sub("(P\d+).*", "\1", rownames(mydf)) #returns
#[1] "P1" "P2" "P3" "P4" "P7" "P8" "P9" "P11" "P12" "P13" "P1" "P2" "P3"
# "P4" "P7" "P8" "P9" "P11" "P12" "P13" "P1" "P2" "P3" "P4" "P7" "P8"
# "P9" "P11" "P12" "P13" "P1" "P2" "P3" "P4" "P7" "P8" "P9" "P11" "P12"
# "P13"