R - 使用 map 将列表函数应用于数据框列并使用列表元素创建新列

R - using map to apply a list function to dataframe column and create new columns with elements of the list

我有一个带有 id 列和一个 eats 列的数据框,以及一个单独的食物列表。我想处理数据框,以便为食物列表中的每种食物添加一列,如果食物存在于 eats 中,则填充 1,否则填充 0。

txt <- tibble(id = c(1, 2, 3),
          eats = c("apple, oats, banana, milk, sugar",
                   "oats, banana, sugar",
                   "chocolate, milk, sugar"))

food_list <- c("apple", "oats", "chocolate")

for (i in food_list){
  print(i)
  txt <- txt %>% 
    mutate(!!i := if_else(stringr::str_detect(eats, i), 1, 0))
}

我可以使用 for 循环来做到这一点,但在没有循环的情况下很难做到这一点。如果有人能指出如何在不使用 for 循环而是使用 purrr 库映射函数的情况下完成此操作,我将不胜感激。

谢谢!

我们可以使用 map 作为

library(purrr)
library(dplyr)
library(stringr)
txt <- map_dfc(food_list, ~ txt %>%
      transmute(!! .x := +(stringr::str_detect(eats, .x)))) %>% 
    bind_cols(txt, .)

-输出

txt
# A tibble: 3 x 5
     id eats                             apple  oats chocolate
  <dbl> <chr>                            <int> <int>     <int>
1     1 apple, oats, banana, milk, sugar     1     1         0
2     2 oats, banana, sugar                  0     1         0
3     3 chocolate, milk, sugar               0     0         1

base R中,可以在线完成

txt[food_list] <- +(sapply(food_list, grepl, x = txt$eats))

您可以使用 cbindstr_detect 以及 map_df:

library(dplyr)
library(purrr)
library(stringr)

cbind(txt, map_dfc(food_list, ~+str_detect(txt$eats, .x))%>%set_names(food_list))

  id                             eats apple oats chocolate
1  1 apple, oats, banana, milk, sugar     1    1         0
2  2              oats, banana, sugar     0    1         0
3  3           chocolate, milk, sugar     0    0         1

这是一个替代解决方案:

library(dplyr)
library(tidyr)

txt %>%
  separate_rows(eats, sep = ", ") %>%
  rowwise() %>%
  mutate(ext = match(eats, food_list)) %>%
  drop_na() %>%
  pivot_wider(names_from = eats, values_from = ext, values_fn = length, values_fill = 0) %>%
  right_join(txt, by = "id") %>%
  relocate(id, eats)

# A tibble: 3 x 5
     id eats                             apple  oats chocolate
  <dbl> <chr>                            <int> <int>     <int>
1     1 apple, oats, banana, milk, sugar     1     1         0
2     2 oats, banana, sugar                  0     1         0
3     3 chocolate, milk, sugar               0     0         1

您可以像这样使用基数 R Reduce

Reduce(function(a, b) {
  a[[b]] <- +(grepl(b, a[["eats"]]))
  a
}, init = txt, food_list)

# A tibble: 3 x 5
     id eats                             apple  oats chocolate
  <dbl> <chr>                            <int> <int>     <int>
1     1 apple, oats, banana, milk, sugar     1     1         0
2     2 oats, banana, sugar                  0     1         0
3     3 chocolate, milk, sugar               0     0         1

您也可以类似地使用 purrr::reduce,在这里您可以使用 (i) 海象运算符和 (ii) 爆炸运算符,而不是子集

library(tidyverse)
txt <- tibble(id = c(1, 2, 3),
              eats = c("apple, oats, banana, milk, sugar",
                       "oats, banana, sugar",
                       "chocolate, milk, sugar"))

food_list <- c("apple", "oats", "chocolate")

reduce(food_list, .init = txt, ~ .x %>% 
         mutate(!!.y := +str_detect(eats, .y))
         )
#> # A tibble: 3 x 5
#>      id eats                             apple  oats chocolate
#>   <dbl> <chr>                            <int> <int>     <int>
#> 1     1 apple, oats, banana, milk, sugar     1     1         0
#> 2     2 oats, banana, sugar                  0     1         0
#> 3     3 chocolate, milk, sugar               0     0         1

reprex package (v2.0.0)

于 2021-07-29 创建

将词边界 (\b) 添加到 food_list 中的值,以便词完全匹配。

例如,查看以下情况下输出的差异 -

library(stringr)
x <- c('apple', 'pineapple')

str_detect(x, 'apple')
#[1] TRUE TRUE

str_detect(x, '\bapple\b')
#[1]  TRUE FALSE

同样适用于 grepl in base R -

food_list <- c("apple", "oats", "chocolate")
food_pat <- sprintf('\b%s\b', food_list)
txt[food_list] <- lapply(food_pat, function(x) as.integer(grepl(x, txt$eats)))
txt

# A tibble: 3 x 5
#     id eats                             apple  oats chocolate
#  <dbl> <chr>                            <int> <int>     <int>
#1     1 apple, oats, banana, milk, sugar     1     1         0
#2     2 oats, banana, sugar                  0     1         0
#3     3 chocolate, milk, sugar               0     0         1