使用 purrr::map 重写一个向量作为 for 循环

Use purrr::map to rewrite a vector as in for loop

如何只使用 purrr::map 到 return 示例的 for 循环的结果:

vct_string <- c("old ccar","new carr", "house", "oold house")

df_correction <- data.frame(
  pattern  = c("ccar", "carr", "oold"),
  replacement = c("car", "car", "old"),
  stringsAsFactors = FALSE
)

for(i in 1:nrow(df_correction)){
  vct_string <- pmap(df_correction, gsub, x = vct_string)[[i]]
}

> vct_string
[1] "old car"   "new car"   "house"     "old house"

首先为替换写一个函数

word_correct <- function(string) {
  df_correction <- data.frame(
    pattern  = c("old ccar", " new carr", "oold house", "house"), # changed from OP
    replacement = c("car", "car", "old", "house"),
    stringsAsFactors = FALSE
  )
  df_correction[ which(df_correction$pattern == string), "replacement"]
}

# Testing
word_correct("oold")
word_correct("ccar")

然后您可以将该函数作为参数传递给 purrr::map

map_chr(vct_string, word_correct) # using map_chr to return a vector instead of a list which is what map returns

由于您正在使用映射 table 来替换单个单词,因此您实际上可以在第二个函数中使用 map 来获得您想要的结果。

vct_string <- c("old ccar","new carr", "house", "oold house")

single_word_correct <- function(string) {
  
  df_correction <- data.frame(
    pattern  = c("ccar", "carr", "oold"),
    replacement = c("car", "car", "old"),
    stringsAsFactors = FALSE
  )
  if(string %in% df_correction$pattern){
    df_correction[ which(df_correction$pattern == string), "replacement"]
  } else {
    string
  }
}
multi_word_correct <- function(string){
  strings <- strsplit(string, " ")[[1]]
  paste(map_chr(strings, single_word_correct), collapse = " ")
}

map_chr(vct_string, multi_word_correct)

你必须递归地修改你的向量,所以在我看来这是使用 reduce family 函数的经典案例。所以这样做,你必须将向量传递给 purrr::reduce.init 参数以获得所需的输出

purrr::reduce(seq(nrow(df_correction)), .init = vct_string, ~ gsub(df_correction$pattern[.y], df_correction$replacement[.y], .x))

#> [1] "old car"   "new car"   "house"     "old house"

这甚至可以对给定向量的元素进行多次替换。看到这个

#modified example
vct_string <- c("old ccar","new carr", "house", "oold carr")

purrr::reduce(seq(nrow(df_correction)), .init = vct_string, ~ gsub(df_correction$pattern[.y], df_correction$replacement[.y], .x))
[1] "old car" "new car" "house"   "old car"

这里是你如何使用 base::Reduce 来做到这一点:

Reduce(function(x, y) {
  gsub(df_correction[y, 1], df_correction[y, 2], x)
}, init = vct_string, 1:nrow(df_correction))

[1] "old car"   "new car"   "house"     "old house"

实际上,您不需要任何 ReduceMap 功能。只需使用 str_replace_all 其矢量化

library(stringr)
str_replace_all(vct_string, set_names(df_correction$replacement, df_correction$pattern))

[1] "old car"   "new car"   "house"     "old house"