如何使用 dplyr 重命名存储为 tibble/dataframe 的文件名

How to rename filnames stored as tibble/dataframe using dplyr

我有以下包含文件列表的数据框。

library(tidyverse)
dat <- structure(list(source_file = structure(c("data/monroe_20180214/180131 WT PB d5/PB x10_01.tif", 
"data/monroe_20180214/180131 WT PB d5/PB x10_02.tif", "data/monroe_20180214/180131 WT PB d5/PB x10_03.tif", 
"data/monroe_20180214/180131 WT PB d5/PB x10_04.tif", "data/monroe_20180214/180131 WT PB d5/PB x10_05.tif", 
"data/monroe_20180214/180131 WT PB d5/PB x10_06.tif"), class = c("fs_path", 
"character"))), .Names = "source_file", row.names = c(NA, -6L
), class = c("tbl_df", "tbl", "data.frame"))


dat
#> # A tibble: 6 x 1
#>   source_file                                       
#>   <chr>                                             
#> 1 data/monroe_20180214/180131 WT PB d5/PB x10_01.tif
#> 2 data/monroe_20180214/180131 WT PB d5/PB x10_02.tif
#> 3 data/monroe_20180214/180131 WT PB d5/PB x10_03.tif
#> 4 data/monroe_20180214/180131 WT PB d5/PB x10_04.tif
#> 5 data/monroe_20180214/180131 WT PB d5/PB x10_05.tif
#> 6 data/monroe_20180214/180131 WT PB d5/PB x10_06.tif

我想做的是通过用新路径 pooled/ 替换前两个目录路径来创建第二列 new_filename 并将空格替换为 .,将反斜杠替换为 __.我怎样才能做到这一点?

想要的结果是

  source_file                                         new_filename                                   
1 data/monroe_20180214/180131 WT PB d5/PB x10_01.tif  pooled/180131.WT.PB.d5__PB.x10_01.tif 
2 data/monroe_20180214/180131 WT PB d5/PB x10_02.tif  ...
3 data/monroe_20180214/180131 WT PB d5/PB x10_03.tif  .etc.
4 data/monroe_20180214/180131 WT PB d5/PB x10_04.tif  
5 data/monroe_20180214/180131 WT PB d5/PB x10_05.tif  
6 data/monroe_20180214/180131 WT PB d5/PB x10_06.tif  

使用 string 中的 gsub() 你也可以这样做

     dat %>% mutate(new_var = gsub("data/monroe_20180214", "pooled", source_file),
+                new_var = gsub(" ", ".", new_var), 
+                new_var = gsub("/", "_", new_var), 
+                new_var = gsub("pooled_", "pooled/", new_var))
# A tibble: 6 x 2
                                         source_file                              new_var
                                               <chr>                                <chr>
1 data/monroe_20180214/180131 WT PB d5/PB x10_01.tif pooled/180131.WT.PB.d5_PB.x10_01.tif
2 data/monroe_20180214/180131 WT PB d5/PB x10_02.tif pooled/180131.WT.PB.d5_PB.x10_02.tif
3 data/monroe_20180214/180131 WT PB d5/PB x10_03.tif pooled/180131.WT.PB.d5_PB.x10_03.tif
4 data/monroe_20180214/180131 WT PB d5/PB x10_04.tif pooled/180131.WT.PB.d5_PB.x10_04.tif
5 data/monroe_20180214/180131 WT PB d5/PB x10_05.tif pooled/180131.WT.PB.d5_PB.x10_05.tif
6 data/monroe_20180214/180131 WT PB d5/PB x10_06.tif pooled/180131.WT.PB.d5_PB.x10_06.tif

一个班轮:

paste0("pooled/",chartr(" /", "._",(sub("^(?:[^\/]*\/){2}","",dat$source_file))))


#[1] "pooled/180131.WT.PB.d5_PB.x10_01.tif"
#[2] "pooled/180131.WT.PB.d5_PB.x10_02.tif"
#[3] "pooled/180131.WT.PB.d5_PB.x10_03.tif"
#[4] "pooled/180131.WT.PB.d5_PB.x10_04.tif"
#[5] "pooled/180131.WT.PB.d5_PB.x10_05.tif"
#[6] "pooled/180131.WT.PB.d5_PB.x10_06.tif"

这里我们首先将/前两次出现的部分替换为空字符串(""),然后使用base R中的chartr函数将空格替换为点(.) 和带下划线 (_) 的正斜杠 (/) 然后 paste 带有 pooled/.

的字符串

sub 部分的正则表达式取自

dplyr 调用中添加:

dat %>%
 mutate(new_filename =paste0("pooled/", chartr(" /", "._", 
                            (sub("^(?:[^\/]*\/){2}", "", source_file))))) %>%
 select(new_filename)


#new_filename                        
#  <chr>                               
#1 pooled/180131.WT.PB.d5_PB.x10_01.tif
#2 pooled/180131.WT.PB.d5_PB.x10_02.tif
#3 pooled/180131.WT.PB.d5_PB.x10_03.tif
#4 pooled/180131.WT.PB.d5_PB.x10_04.tif
#5 pooled/180131.WT.PB.d5_PB.x10_05.tif
#6 pooled/180131.WT.PB.d5_PB.x10_06.tif