如何使用 dplyr 重命名存储为 tibble/dataframe 的文件名
How to rename filnames stored as tibble/dataframe using dplyr
我有以下包含文件列表的数据框。
library(tidyverse)
dat <- structure(list(source_file = structure(c("data/monroe_20180214/180131 WT PB d5/PB x10_01.tif",
"data/monroe_20180214/180131 WT PB d5/PB x10_02.tif", "data/monroe_20180214/180131 WT PB d5/PB x10_03.tif",
"data/monroe_20180214/180131 WT PB d5/PB x10_04.tif", "data/monroe_20180214/180131 WT PB d5/PB x10_05.tif",
"data/monroe_20180214/180131 WT PB d5/PB x10_06.tif"), class = c("fs_path",
"character"))), .Names = "source_file", row.names = c(NA, -6L
), class = c("tbl_df", "tbl", "data.frame"))
dat
#> # A tibble: 6 x 1
#> source_file
#> <chr>
#> 1 data/monroe_20180214/180131 WT PB d5/PB x10_01.tif
#> 2 data/monroe_20180214/180131 WT PB d5/PB x10_02.tif
#> 3 data/monroe_20180214/180131 WT PB d5/PB x10_03.tif
#> 4 data/monroe_20180214/180131 WT PB d5/PB x10_04.tif
#> 5 data/monroe_20180214/180131 WT PB d5/PB x10_05.tif
#> 6 data/monroe_20180214/180131 WT PB d5/PB x10_06.tif
我想做的是通过用新路径 pooled/
替换前两个目录路径来创建第二列 new_filename
并将空格替换为 .
,将反斜杠替换为 __
.我怎样才能做到这一点?
想要的结果是
source_file new_filename
1 data/monroe_20180214/180131 WT PB d5/PB x10_01.tif pooled/180131.WT.PB.d5__PB.x10_01.tif
2 data/monroe_20180214/180131 WT PB d5/PB x10_02.tif ...
3 data/monroe_20180214/180131 WT PB d5/PB x10_03.tif .etc.
4 data/monroe_20180214/180131 WT PB d5/PB x10_04.tif
5 data/monroe_20180214/180131 WT PB d5/PB x10_05.tif
6 data/monroe_20180214/180131 WT PB d5/PB x10_06.tif
使用 string
中的 gsub()
你也可以这样做
dat %>% mutate(new_var = gsub("data/monroe_20180214", "pooled", source_file),
+ new_var = gsub(" ", ".", new_var),
+ new_var = gsub("/", "_", new_var),
+ new_var = gsub("pooled_", "pooled/", new_var))
# A tibble: 6 x 2
source_file new_var
<chr> <chr>
1 data/monroe_20180214/180131 WT PB d5/PB x10_01.tif pooled/180131.WT.PB.d5_PB.x10_01.tif
2 data/monroe_20180214/180131 WT PB d5/PB x10_02.tif pooled/180131.WT.PB.d5_PB.x10_02.tif
3 data/monroe_20180214/180131 WT PB d5/PB x10_03.tif pooled/180131.WT.PB.d5_PB.x10_03.tif
4 data/monroe_20180214/180131 WT PB d5/PB x10_04.tif pooled/180131.WT.PB.d5_PB.x10_04.tif
5 data/monroe_20180214/180131 WT PB d5/PB x10_05.tif pooled/180131.WT.PB.d5_PB.x10_05.tif
6 data/monroe_20180214/180131 WT PB d5/PB x10_06.tif pooled/180131.WT.PB.d5_PB.x10_06.tif
一个班轮:
paste0("pooled/",chartr(" /", "._",(sub("^(?:[^\/]*\/){2}","",dat$source_file))))
#[1] "pooled/180131.WT.PB.d5_PB.x10_01.tif"
#[2] "pooled/180131.WT.PB.d5_PB.x10_02.tif"
#[3] "pooled/180131.WT.PB.d5_PB.x10_03.tif"
#[4] "pooled/180131.WT.PB.d5_PB.x10_04.tif"
#[5] "pooled/180131.WT.PB.d5_PB.x10_05.tif"
#[6] "pooled/180131.WT.PB.d5_PB.x10_06.tif"
这里我们首先将/
前两次出现的部分替换为空字符串(""
),然后使用base R中的chartr
函数将空格替换为点(.
) 和带下划线 (_
) 的正斜杠 (/
) 然后 paste
带有 pooled/
.
的字符串
sub
部分的正则表达式取自 。
在 dplyr
调用中添加:
dat %>%
mutate(new_filename =paste0("pooled/", chartr(" /", "._",
(sub("^(?:[^\/]*\/){2}", "", source_file))))) %>%
select(new_filename)
#new_filename
# <chr>
#1 pooled/180131.WT.PB.d5_PB.x10_01.tif
#2 pooled/180131.WT.PB.d5_PB.x10_02.tif
#3 pooled/180131.WT.PB.d5_PB.x10_03.tif
#4 pooled/180131.WT.PB.d5_PB.x10_04.tif
#5 pooled/180131.WT.PB.d5_PB.x10_05.tif
#6 pooled/180131.WT.PB.d5_PB.x10_06.tif
我有以下包含文件列表的数据框。
library(tidyverse)
dat <- structure(list(source_file = structure(c("data/monroe_20180214/180131 WT PB d5/PB x10_01.tif",
"data/monroe_20180214/180131 WT PB d5/PB x10_02.tif", "data/monroe_20180214/180131 WT PB d5/PB x10_03.tif",
"data/monroe_20180214/180131 WT PB d5/PB x10_04.tif", "data/monroe_20180214/180131 WT PB d5/PB x10_05.tif",
"data/monroe_20180214/180131 WT PB d5/PB x10_06.tif"), class = c("fs_path",
"character"))), .Names = "source_file", row.names = c(NA, -6L
), class = c("tbl_df", "tbl", "data.frame"))
dat
#> # A tibble: 6 x 1
#> source_file
#> <chr>
#> 1 data/monroe_20180214/180131 WT PB d5/PB x10_01.tif
#> 2 data/monroe_20180214/180131 WT PB d5/PB x10_02.tif
#> 3 data/monroe_20180214/180131 WT PB d5/PB x10_03.tif
#> 4 data/monroe_20180214/180131 WT PB d5/PB x10_04.tif
#> 5 data/monroe_20180214/180131 WT PB d5/PB x10_05.tif
#> 6 data/monroe_20180214/180131 WT PB d5/PB x10_06.tif
我想做的是通过用新路径 pooled/
替换前两个目录路径来创建第二列 new_filename
并将空格替换为 .
,将反斜杠替换为 __
.我怎样才能做到这一点?
想要的结果是
source_file new_filename
1 data/monroe_20180214/180131 WT PB d5/PB x10_01.tif pooled/180131.WT.PB.d5__PB.x10_01.tif
2 data/monroe_20180214/180131 WT PB d5/PB x10_02.tif ...
3 data/monroe_20180214/180131 WT PB d5/PB x10_03.tif .etc.
4 data/monroe_20180214/180131 WT PB d5/PB x10_04.tif
5 data/monroe_20180214/180131 WT PB d5/PB x10_05.tif
6 data/monroe_20180214/180131 WT PB d5/PB x10_06.tif
使用 string
中的 gsub()
你也可以这样做
dat %>% mutate(new_var = gsub("data/monroe_20180214", "pooled", source_file),
+ new_var = gsub(" ", ".", new_var),
+ new_var = gsub("/", "_", new_var),
+ new_var = gsub("pooled_", "pooled/", new_var))
# A tibble: 6 x 2
source_file new_var
<chr> <chr>
1 data/monroe_20180214/180131 WT PB d5/PB x10_01.tif pooled/180131.WT.PB.d5_PB.x10_01.tif
2 data/monroe_20180214/180131 WT PB d5/PB x10_02.tif pooled/180131.WT.PB.d5_PB.x10_02.tif
3 data/monroe_20180214/180131 WT PB d5/PB x10_03.tif pooled/180131.WT.PB.d5_PB.x10_03.tif
4 data/monroe_20180214/180131 WT PB d5/PB x10_04.tif pooled/180131.WT.PB.d5_PB.x10_04.tif
5 data/monroe_20180214/180131 WT PB d5/PB x10_05.tif pooled/180131.WT.PB.d5_PB.x10_05.tif
6 data/monroe_20180214/180131 WT PB d5/PB x10_06.tif pooled/180131.WT.PB.d5_PB.x10_06.tif
一个班轮:
paste0("pooled/",chartr(" /", "._",(sub("^(?:[^\/]*\/){2}","",dat$source_file))))
#[1] "pooled/180131.WT.PB.d5_PB.x10_01.tif"
#[2] "pooled/180131.WT.PB.d5_PB.x10_02.tif"
#[3] "pooled/180131.WT.PB.d5_PB.x10_03.tif"
#[4] "pooled/180131.WT.PB.d5_PB.x10_04.tif"
#[5] "pooled/180131.WT.PB.d5_PB.x10_05.tif"
#[6] "pooled/180131.WT.PB.d5_PB.x10_06.tif"
这里我们首先将/
前两次出现的部分替换为空字符串(""
),然后使用base R中的chartr
函数将空格替换为点(.
) 和带下划线 (_
) 的正斜杠 (/
) 然后 paste
带有 pooled/
.
sub
部分的正则表达式取自
在 dplyr
调用中添加:
dat %>%
mutate(new_filename =paste0("pooled/", chartr(" /", "._",
(sub("^(?:[^\/]*\/){2}", "", source_file))))) %>%
select(new_filename)
#new_filename
# <chr>
#1 pooled/180131.WT.PB.d5_PB.x10_01.tif
#2 pooled/180131.WT.PB.d5_PB.x10_02.tif
#3 pooled/180131.WT.PB.d5_PB.x10_03.tif
#4 pooled/180131.WT.PB.d5_PB.x10_04.tif
#5 pooled/180131.WT.PB.d5_PB.x10_05.tif
#6 pooled/180131.WT.PB.d5_PB.x10_06.tif