使用 R 在宽数据的列名中插入点
Insert dots in column names in wide data using R
以下数据集为宽格式,重复测量“ql”、“st”和“xy”,前缀为“a”、“b”和“c”;
df<-data.frame(id=c(1,2,3,4),
ex=c(1,0,0,1),
aql=c(5,4,NA,6),
bql=c(5,7,NA,9),
cql=c(5,7,NA,9),
bst=c(3,7,8,9),
cst=c(8,7,5,3),
axy=c(1,9,4,4),
cxy=c(5,3,1,4))
我正在寻找一种在前缀字母“a”、“b”和“c”之后插入点的方法,同时保持其他列(即 id、ex)不变。我一直在使用 gsub 函数解决这个问题,例如
names(df) <- gsub("", "\.", names(df))
但得到了不希望的结果。预期的输出看起来像
id ex a.ql b.ql c.ql b.st c.st a.xy c.xy
1 1 1 5 5 5 3 8 1 5
2 2 0 4 7 7 7 7 9 3
3 3 0 NA NA NA 8 5 4 1
4 4 1 6 9 9 9 3 4 4
您也可以尝试使用 tidyverse
方法重塑您的数据,如下所示:
library(tidyverse)
#Data
df<-data.frame(id=c(1,2,3,4),
ex=c(1,0,0,1),
aql=c(5,4,NA,6),
bql=c(5,7,NA,9),
cql=c(5,7,NA,9),
bst=c(3,7,8,9),
cst=c(8,7,5,3),
axy=c(1,9,4,4),
cxy=c(5,3,1,4))
#Reshape
df %>% pivot_longer(-c(1,2)) %>%
mutate(name=paste0(substring(name,1,1),'.',substring(name,2,nchar(name)))) %>%
pivot_wider(names_from = name,values_from=value)
输出:
# A tibble: 4 x 9
id ex a.ql b.ql c.ql b.st c.st a.xy c.xy
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 1 5 5 5 3 8 1 5
2 2 0 4 7 7 7 7 9 3
3 3 0 NA NA NA 8 5 4 1
4 4 1 6 9 9 9 3 4 4
你可以做到
setNames(df, sub("(ql$)|(st$)|(xy$)", "\.\1\2\3", names(df)))
#> id ex a.ql b.ql c.ql b.st c.st a.xy c.xy
#> 1 1 1 5 5 5 3 8 1 5
#> 2 2 0 4 7 7 7 7 9 3
#> 3 3 0 NA NA NA 8 5 4 1
#> 4 4 1 6 9 9 9 3 4 4
尝试
sub("(^[a-c])(.+)", "\1.\2", names(df))
# [1] "id" "ex" "a.ql" "b.ql" "c.ql" "b.st" "c.st" "a.xy" "c.xy"
或
sub("(?<=^[a-c])", ".", names(df), perl = TRUE)
# [1] "id" "ex" "a.ql" "b.ql" "c.ql" "b.st" "c.st" "a.xy" "c.xy"
您可以尝试另一种方法
library(dplyr)
df %>%
rename_at(vars(aql:cxy), ~ str_replace(., "(?<=\w{1})", "\."))
# id ex a.ql b.ql c.ql b.st c.st a.xy c.xy
# 1 1 1 5 5 5 3 8 1 5
# 2 2 0 4 7 7 7 7 9 3
# 3 3 0 NA NA NA 8 5 4 1
# 4 4 1 6 9 9 9 3 4 4
以下数据集为宽格式,重复测量“ql”、“st”和“xy”,前缀为“a”、“b”和“c”;
df<-data.frame(id=c(1,2,3,4),
ex=c(1,0,0,1),
aql=c(5,4,NA,6),
bql=c(5,7,NA,9),
cql=c(5,7,NA,9),
bst=c(3,7,8,9),
cst=c(8,7,5,3),
axy=c(1,9,4,4),
cxy=c(5,3,1,4))
我正在寻找一种在前缀字母“a”、“b”和“c”之后插入点的方法,同时保持其他列(即 id、ex)不变。我一直在使用 gsub 函数解决这个问题,例如
names(df) <- gsub("", "\.", names(df))
但得到了不希望的结果。预期的输出看起来像
id ex a.ql b.ql c.ql b.st c.st a.xy c.xy
1 1 1 5 5 5 3 8 1 5
2 2 0 4 7 7 7 7 9 3
3 3 0 NA NA NA 8 5 4 1
4 4 1 6 9 9 9 3 4 4
您也可以尝试使用 tidyverse
方法重塑您的数据,如下所示:
library(tidyverse)
#Data
df<-data.frame(id=c(1,2,3,4),
ex=c(1,0,0,1),
aql=c(5,4,NA,6),
bql=c(5,7,NA,9),
cql=c(5,7,NA,9),
bst=c(3,7,8,9),
cst=c(8,7,5,3),
axy=c(1,9,4,4),
cxy=c(5,3,1,4))
#Reshape
df %>% pivot_longer(-c(1,2)) %>%
mutate(name=paste0(substring(name,1,1),'.',substring(name,2,nchar(name)))) %>%
pivot_wider(names_from = name,values_from=value)
输出:
# A tibble: 4 x 9
id ex a.ql b.ql c.ql b.st c.st a.xy c.xy
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 1 5 5 5 3 8 1 5
2 2 0 4 7 7 7 7 9 3
3 3 0 NA NA NA 8 5 4 1
4 4 1 6 9 9 9 3 4 4
你可以做到
setNames(df, sub("(ql$)|(st$)|(xy$)", "\.\1\2\3", names(df)))
#> id ex a.ql b.ql c.ql b.st c.st a.xy c.xy
#> 1 1 1 5 5 5 3 8 1 5
#> 2 2 0 4 7 7 7 7 9 3
#> 3 3 0 NA NA NA 8 5 4 1
#> 4 4 1 6 9 9 9 3 4 4
尝试
sub("(^[a-c])(.+)", "\1.\2", names(df))
# [1] "id" "ex" "a.ql" "b.ql" "c.ql" "b.st" "c.st" "a.xy" "c.xy"
或
sub("(?<=^[a-c])", ".", names(df), perl = TRUE)
# [1] "id" "ex" "a.ql" "b.ql" "c.ql" "b.st" "c.st" "a.xy" "c.xy"
您可以尝试另一种方法
library(dplyr)
df %>%
rename_at(vars(aql:cxy), ~ str_replace(., "(?<=\w{1})", "\."))
# id ex a.ql b.ql c.ql b.st c.st a.xy c.xy
# 1 1 1 5 5 5 3 8 1 5
# 2 2 0 4 7 7 7 7 9 3
# 3 3 0 NA NA NA 8 5 4 1
# 4 4 1 6 9 9 9 3 4 4