将 600 列拆分为每列 2 个新列,向量中包含新旧列名
Split 600 columns into 2 new columns for each one, with old and new column names in vectors
我想在分隔符(在本例中为 /
)处将 600 列(列在向量中)拆分为 2 个新列(也列为向量)。我已经制定了拆分的基本逻辑,如下所示,但是有没有比使用数百行代码来完成这项工作更好的方法呢?有什么想法吗?
df1 <- data.frame(codes1 = c('H5394/6N938', '49J62/P82', '142X394/652876','1057C83/25394', '45N564/3558', '49405/A8174'),
codes2 = c('W34142/83X652', '5L622/482S3', '44N574/358866','1P47484/724A94', '454N64/3458', '49A05/28774'),
codes3 = c('19K4/139D54', '4T3962/78D43', '18V94/682876','P083/28394', '434/8558', '43405/2387N'))
originalvar <- c('codes1', 'codes2', 'codes3')
newleftvar <- c('leftcode_0hr', 'leftcode_2hr', 'leftcode_4hr')
newrightvar <- c('rightcode_0hr', 'rightcode_2hr', 'rightcode_4hr')
df1
codes1 codes2 codes3
1 H5394/6N938 W34142/83X652 19K4/139D54
2 49J62/P82 5L622/482S3 4T3962/78D43
3 142X394/652876 44N574/358866 18V94/682876
4 1057C83/25394 1P47484/724A94 P083/28394
5 45N564/3558 454N64/3458 434/8558
6 49405/A8174 49A05/28774 43405/2387N
#my lame approach - lol
df1$leftcode_0hr <- substr(df1$codes1, 1, stringr::str_locate(df1$codes1, "/")-1)
df1$rightcode_0hr <- substr(df1$codes1, stringr::str_locate(df1$codes1, "/")+1, nchar(df1$codes1))
df1$leftcode_2hr <- substr(df1$codes2, 1, stringr::str_locate(df1$codes2, "/")-1)
df1$rightcode_2hr <- substr(df1$codes2, stringr::str_locate(df1$codes2, "/")+1, nchar(df1$codes2))
df1$leftcode_4hr <- substr(df1$codes3, 1, stringr::str_locate(df1$codes3, "/")-1)
df1$rightcode_4hr <- substr(df1$codes3, stringr::str_locate(df1$codes3, "/")+1, nchar(df1$codes3))
df1
codes1 codes2 codes3 leftcode_0hr rightcode_0hr leftcode_2hr rightcode_2hr leftcode_4hr rightcode_4hr
1 H5394/6N938 W34142/83X652 19K4/139D54 H5394 6N938 W34142 83X652 19K4 139D54
2 49J62/P82 5L622/482S3 4T3962/78D43 49J62 P82 5L622 482S3 4T3962 78D43
3 142X394/652876 44N574/358866 18V94/682876 142X394 652876 44N574 358866 18V94 682876
4 1057C83/25394 1P47484/724A94 P083/28394 1057C83 25394 1P47484 724A94 P083 28394
5 45N564/3558 454N64/3458 434/8558 45N564 3558 454N64 3458 434 8558
6 49405/A8174 49A05/28774 43405/2387N 49405 A8174 49A05 28774 43405 2387N
你可以这样做:
separate(
df1 %>% pivot_longer(everything()),
value, into=c("left", "right"), sep = "/"
) %>%
pivot_wider(names_from="name", values_from = left:right, values_fn = list,names_sep = "") %>%
unnest(everything()) %>%
rename_with(~paste0(
str_extract(.x,"^(left|right)code"),
paste0("_",as.numeric(str_extract(.x,"\d+"))*2-2,"hr")
)) %>%
relocate(unlist(lapply(1:ncol(df1),\(i) c(i,i+ncol(df1)))))
输出:
leftcode_0hr rightcode_0hr leftcode_2hr rightcode_2hr leftcode_4hr rightcode_4hr
<chr> <chr> <chr> <chr> <chr> <chr>
1 H5394 6N938 W34142 83X652 19K4 139D54
2 49J62 P82 5L622 482S3 4T3962 78D43
3 142X394 652876 44N574 358866 18V94 682876
4 1057C83 25394 1P47484 724A94 P083 28394
5 45N564 3558 454N64 3458 434 8558
6 49405 A8174 49A05 28774 43405 2387N
这是另一种方法,它简单地采用您的初始想法,并将其包装在一个函数中,然后可以将其应用于 df1
:
的每个列名称
f <- function(code) {
suffix = paste0("_", as.numeric(str_extract(code,"\d+"))*2-2,"hr")
setNames(
list(
substr(df1[[code]], 1, stringr::str_locate(df1[[code]], "/")-1),
substr(df1[[code]], stringr::str_locate(df1[[code]], "/")+1, nchar(df1[[code]]))
),paste0(c("leftcode", "rightcode"),suffix)
)
}
as.data.frame(lapply(colnames(df1), f))
输出:
leftcode_0hr rightcode_0hr leftcode_2hr rightcode_2hr leftcode_4hr rightcode_4hr
1 H5394 6N938 W34142 83X652 19K4 139D54
2 49J62 P82 5L622 482S3 4T3962 78D43
3 142X394 652876 44N574 358866 18V94 682876
4 1057C83 25394 1P47484 724A94 P083 28394
5 45N564 3558 454N64 3458 434 8558
6 49405 A8174 49A05 28774 43405 2387N
终于来个功能更简单的版本
f <- function(d,n) {
setNames(
as.list(as.data.frame(str_split(d,"/",simplify = T))),
paste0(c("left","right"),"code_",2*(n-1),"hr")
)
}
as.data.frame(lapply(seq_along(df1),\(i) f(df1[[i]],i)))
在基础中:
how_many <- seq(1, length(unlist(strsplit(df1$codes1, '/'))), 1)
df1$left_0hr <- unlist(strsplit(df1$codes1, '/'))[which((how_many %% 2 == 0) == FALSE)]
df1
codes1 codes2 codes3 left_0hr
1 H5394/6N938 W34142/83X652 19K4/139D54 H5394
2 49J62/P82 5L622/482S3 4T3962/78D43 49J62
3 142X394/652876 44N574/358866 18V94/682876 142X394
4 1057C83/25394 1P47484/724A94 P083/28394 1057C83
5 45N564/3558 454N64/3458 434/8558 45N564
6 49405/A8174 49A05/28774 43405/2387N 49405
等等。
我想在分隔符(在本例中为 /
)处将 600 列(列在向量中)拆分为 2 个新列(也列为向量)。我已经制定了拆分的基本逻辑,如下所示,但是有没有比使用数百行代码来完成这项工作更好的方法呢?有什么想法吗?
df1 <- data.frame(codes1 = c('H5394/6N938', '49J62/P82', '142X394/652876','1057C83/25394', '45N564/3558', '49405/A8174'),
codes2 = c('W34142/83X652', '5L622/482S3', '44N574/358866','1P47484/724A94', '454N64/3458', '49A05/28774'),
codes3 = c('19K4/139D54', '4T3962/78D43', '18V94/682876','P083/28394', '434/8558', '43405/2387N'))
originalvar <- c('codes1', 'codes2', 'codes3')
newleftvar <- c('leftcode_0hr', 'leftcode_2hr', 'leftcode_4hr')
newrightvar <- c('rightcode_0hr', 'rightcode_2hr', 'rightcode_4hr')
df1
codes1 codes2 codes3
1 H5394/6N938 W34142/83X652 19K4/139D54
2 49J62/P82 5L622/482S3 4T3962/78D43
3 142X394/652876 44N574/358866 18V94/682876
4 1057C83/25394 1P47484/724A94 P083/28394
5 45N564/3558 454N64/3458 434/8558
6 49405/A8174 49A05/28774 43405/2387N
#my lame approach - lol
df1$leftcode_0hr <- substr(df1$codes1, 1, stringr::str_locate(df1$codes1, "/")-1)
df1$rightcode_0hr <- substr(df1$codes1, stringr::str_locate(df1$codes1, "/")+1, nchar(df1$codes1))
df1$leftcode_2hr <- substr(df1$codes2, 1, stringr::str_locate(df1$codes2, "/")-1)
df1$rightcode_2hr <- substr(df1$codes2, stringr::str_locate(df1$codes2, "/")+1, nchar(df1$codes2))
df1$leftcode_4hr <- substr(df1$codes3, 1, stringr::str_locate(df1$codes3, "/")-1)
df1$rightcode_4hr <- substr(df1$codes3, stringr::str_locate(df1$codes3, "/")+1, nchar(df1$codes3))
df1
codes1 codes2 codes3 leftcode_0hr rightcode_0hr leftcode_2hr rightcode_2hr leftcode_4hr rightcode_4hr
1 H5394/6N938 W34142/83X652 19K4/139D54 H5394 6N938 W34142 83X652 19K4 139D54
2 49J62/P82 5L622/482S3 4T3962/78D43 49J62 P82 5L622 482S3 4T3962 78D43
3 142X394/652876 44N574/358866 18V94/682876 142X394 652876 44N574 358866 18V94 682876
4 1057C83/25394 1P47484/724A94 P083/28394 1057C83 25394 1P47484 724A94 P083 28394
5 45N564/3558 454N64/3458 434/8558 45N564 3558 454N64 3458 434 8558
6 49405/A8174 49A05/28774 43405/2387N 49405 A8174 49A05 28774 43405 2387N
你可以这样做:
separate(
df1 %>% pivot_longer(everything()),
value, into=c("left", "right"), sep = "/"
) %>%
pivot_wider(names_from="name", values_from = left:right, values_fn = list,names_sep = "") %>%
unnest(everything()) %>%
rename_with(~paste0(
str_extract(.x,"^(left|right)code"),
paste0("_",as.numeric(str_extract(.x,"\d+"))*2-2,"hr")
)) %>%
relocate(unlist(lapply(1:ncol(df1),\(i) c(i,i+ncol(df1)))))
输出:
leftcode_0hr rightcode_0hr leftcode_2hr rightcode_2hr leftcode_4hr rightcode_4hr
<chr> <chr> <chr> <chr> <chr> <chr>
1 H5394 6N938 W34142 83X652 19K4 139D54
2 49J62 P82 5L622 482S3 4T3962 78D43
3 142X394 652876 44N574 358866 18V94 682876
4 1057C83 25394 1P47484 724A94 P083 28394
5 45N564 3558 454N64 3458 434 8558
6 49405 A8174 49A05 28774 43405 2387N
这是另一种方法,它简单地采用您的初始想法,并将其包装在一个函数中,然后可以将其应用于 df1
:
f <- function(code) {
suffix = paste0("_", as.numeric(str_extract(code,"\d+"))*2-2,"hr")
setNames(
list(
substr(df1[[code]], 1, stringr::str_locate(df1[[code]], "/")-1),
substr(df1[[code]], stringr::str_locate(df1[[code]], "/")+1, nchar(df1[[code]]))
),paste0(c("leftcode", "rightcode"),suffix)
)
}
as.data.frame(lapply(colnames(df1), f))
输出:
leftcode_0hr rightcode_0hr leftcode_2hr rightcode_2hr leftcode_4hr rightcode_4hr
1 H5394 6N938 W34142 83X652 19K4 139D54
2 49J62 P82 5L622 482S3 4T3962 78D43
3 142X394 652876 44N574 358866 18V94 682876
4 1057C83 25394 1P47484 724A94 P083 28394
5 45N564 3558 454N64 3458 434 8558
6 49405 A8174 49A05 28774 43405 2387N
终于来个功能更简单的版本
f <- function(d,n) {
setNames(
as.list(as.data.frame(str_split(d,"/",simplify = T))),
paste0(c("left","right"),"code_",2*(n-1),"hr")
)
}
as.data.frame(lapply(seq_along(df1),\(i) f(df1[[i]],i)))
在基础中:
how_many <- seq(1, length(unlist(strsplit(df1$codes1, '/'))), 1)
df1$left_0hr <- unlist(strsplit(df1$codes1, '/'))[which((how_many %% 2 == 0) == FALSE)]
df1
codes1 codes2 codes3 left_0hr
1 H5394/6N938 W34142/83X652 19K4/139D54 H5394
2 49J62/P82 5L622/482S3 4T3962/78D43 49J62
3 142X394/652876 44N574/358866 18V94/682876 142X394
4 1057C83/25394 1P47484/724A94 P083/28394 1057C83
5 45N564/3558 454N64/3458 434/8558 45N564
6 49405/A8174 49A05/28774 43405/2387N 49405
等等。