R - 使用 map 将列表函数应用于数据框列并使用列表元素创建新列
R - using map to apply a list function to dataframe column and create new columns with elements of the list
我有一个带有 id 列和一个 eats 列的数据框,以及一个单独的食物列表。我想处理数据框,以便为食物列表中的每种食物添加一列,如果食物存在于 eats 中,则填充 1,否则填充 0。
txt <- tibble(id = c(1, 2, 3),
eats = c("apple, oats, banana, milk, sugar",
"oats, banana, sugar",
"chocolate, milk, sugar"))
food_list <- c("apple", "oats", "chocolate")
for (i in food_list){
print(i)
txt <- txt %>%
mutate(!!i := if_else(stringr::str_detect(eats, i), 1, 0))
}
我可以使用 for 循环来做到这一点,但在没有循环的情况下很难做到这一点。如果有人能指出如何在不使用 for 循环而是使用 purrr 库映射函数的情况下完成此操作,我将不胜感激。
谢谢!
我们可以使用 map
作为
library(purrr)
library(dplyr)
library(stringr)
txt <- map_dfc(food_list, ~ txt %>%
transmute(!! .x := +(stringr::str_detect(eats, .x)))) %>%
bind_cols(txt, .)
-输出
txt
# A tibble: 3 x 5
id eats apple oats chocolate
<dbl> <chr> <int> <int> <int>
1 1 apple, oats, banana, milk, sugar 1 1 0
2 2 oats, banana, sugar 0 1 0
3 3 chocolate, milk, sugar 0 0 1
在base R
中,可以在线完成
txt[food_list] <- +(sapply(food_list, grepl, x = txt$eats))
您可以使用 cbind
和 str_detect
以及 map_df
:
library(dplyr)
library(purrr)
library(stringr)
cbind(txt, map_dfc(food_list, ~+str_detect(txt$eats, .x))%>%set_names(food_list))
id eats apple oats chocolate
1 1 apple, oats, banana, milk, sugar 1 1 0
2 2 oats, banana, sugar 0 1 0
3 3 chocolate, milk, sugar 0 0 1
这是一个替代解决方案:
library(dplyr)
library(tidyr)
txt %>%
separate_rows(eats, sep = ", ") %>%
rowwise() %>%
mutate(ext = match(eats, food_list)) %>%
drop_na() %>%
pivot_wider(names_from = eats, values_from = ext, values_fn = length, values_fill = 0) %>%
right_join(txt, by = "id") %>%
relocate(id, eats)
# A tibble: 3 x 5
id eats apple oats chocolate
<dbl> <chr> <int> <int> <int>
1 1 apple, oats, banana, milk, sugar 1 1 0
2 2 oats, banana, sugar 0 1 0
3 3 chocolate, milk, sugar 0 0 1
您可以像这样使用基数 R Reduce
Reduce(function(a, b) {
a[[b]] <- +(grepl(b, a[["eats"]]))
a
}, init = txt, food_list)
# A tibble: 3 x 5
id eats apple oats chocolate
<dbl> <chr> <int> <int> <int>
1 1 apple, oats, banana, milk, sugar 1 1 0
2 2 oats, banana, sugar 0 1 0
3 3 chocolate, milk, sugar 0 0 1
您也可以类似地使用 purrr::reduce
,在这里您可以使用 (i) 海象运算符和 (ii) 爆炸运算符,而不是子集
library(tidyverse)
txt <- tibble(id = c(1, 2, 3),
eats = c("apple, oats, banana, milk, sugar",
"oats, banana, sugar",
"chocolate, milk, sugar"))
food_list <- c("apple", "oats", "chocolate")
reduce(food_list, .init = txt, ~ .x %>%
mutate(!!.y := +str_detect(eats, .y))
)
#> # A tibble: 3 x 5
#> id eats apple oats chocolate
#> <dbl> <chr> <int> <int> <int>
#> 1 1 apple, oats, banana, milk, sugar 1 1 0
#> 2 2 oats, banana, sugar 0 1 0
#> 3 3 chocolate, milk, sugar 0 0 1
由 reprex package (v2.0.0)
于 2021-07-29 创建
将词边界 (\b
) 添加到 food_list
中的值,以便词完全匹配。
例如,查看以下情况下输出的差异 -
library(stringr)
x <- c('apple', 'pineapple')
str_detect(x, 'apple')
#[1] TRUE TRUE
str_detect(x, '\bapple\b')
#[1] TRUE FALSE
同样适用于 grepl
in base R -
food_list <- c("apple", "oats", "chocolate")
food_pat <- sprintf('\b%s\b', food_list)
txt[food_list] <- lapply(food_pat, function(x) as.integer(grepl(x, txt$eats)))
txt
# A tibble: 3 x 5
# id eats apple oats chocolate
# <dbl> <chr> <int> <int> <int>
#1 1 apple, oats, banana, milk, sugar 1 1 0
#2 2 oats, banana, sugar 0 1 0
#3 3 chocolate, milk, sugar 0 0 1
我有一个带有 id 列和一个 eats 列的数据框,以及一个单独的食物列表。我想处理数据框,以便为食物列表中的每种食物添加一列,如果食物存在于 eats 中,则填充 1,否则填充 0。
txt <- tibble(id = c(1, 2, 3),
eats = c("apple, oats, banana, milk, sugar",
"oats, banana, sugar",
"chocolate, milk, sugar"))
food_list <- c("apple", "oats", "chocolate")
for (i in food_list){
print(i)
txt <- txt %>%
mutate(!!i := if_else(stringr::str_detect(eats, i), 1, 0))
}
我可以使用 for 循环来做到这一点,但在没有循环的情况下很难做到这一点。如果有人能指出如何在不使用 for 循环而是使用 purrr 库映射函数的情况下完成此操作,我将不胜感激。
谢谢!
我们可以使用 map
作为
library(purrr)
library(dplyr)
library(stringr)
txt <- map_dfc(food_list, ~ txt %>%
transmute(!! .x := +(stringr::str_detect(eats, .x)))) %>%
bind_cols(txt, .)
-输出
txt
# A tibble: 3 x 5
id eats apple oats chocolate
<dbl> <chr> <int> <int> <int>
1 1 apple, oats, banana, milk, sugar 1 1 0
2 2 oats, banana, sugar 0 1 0
3 3 chocolate, milk, sugar 0 0 1
在base R
中,可以在线完成
txt[food_list] <- +(sapply(food_list, grepl, x = txt$eats))
您可以使用 cbind
和 str_detect
以及 map_df
:
library(dplyr)
library(purrr)
library(stringr)
cbind(txt, map_dfc(food_list, ~+str_detect(txt$eats, .x))%>%set_names(food_list))
id eats apple oats chocolate
1 1 apple, oats, banana, milk, sugar 1 1 0
2 2 oats, banana, sugar 0 1 0
3 3 chocolate, milk, sugar 0 0 1
这是一个替代解决方案:
library(dplyr)
library(tidyr)
txt %>%
separate_rows(eats, sep = ", ") %>%
rowwise() %>%
mutate(ext = match(eats, food_list)) %>%
drop_na() %>%
pivot_wider(names_from = eats, values_from = ext, values_fn = length, values_fill = 0) %>%
right_join(txt, by = "id") %>%
relocate(id, eats)
# A tibble: 3 x 5
id eats apple oats chocolate
<dbl> <chr> <int> <int> <int>
1 1 apple, oats, banana, milk, sugar 1 1 0
2 2 oats, banana, sugar 0 1 0
3 3 chocolate, milk, sugar 0 0 1
您可以像这样使用基数 R Reduce
Reduce(function(a, b) {
a[[b]] <- +(grepl(b, a[["eats"]]))
a
}, init = txt, food_list)
# A tibble: 3 x 5
id eats apple oats chocolate
<dbl> <chr> <int> <int> <int>
1 1 apple, oats, banana, milk, sugar 1 1 0
2 2 oats, banana, sugar 0 1 0
3 3 chocolate, milk, sugar 0 0 1
您也可以类似地使用 purrr::reduce
,在这里您可以使用 (i) 海象运算符和 (ii) 爆炸运算符,而不是子集
library(tidyverse)
txt <- tibble(id = c(1, 2, 3),
eats = c("apple, oats, banana, milk, sugar",
"oats, banana, sugar",
"chocolate, milk, sugar"))
food_list <- c("apple", "oats", "chocolate")
reduce(food_list, .init = txt, ~ .x %>%
mutate(!!.y := +str_detect(eats, .y))
)
#> # A tibble: 3 x 5
#> id eats apple oats chocolate
#> <dbl> <chr> <int> <int> <int>
#> 1 1 apple, oats, banana, milk, sugar 1 1 0
#> 2 2 oats, banana, sugar 0 1 0
#> 3 3 chocolate, milk, sugar 0 0 1
由 reprex package (v2.0.0)
于 2021-07-29 创建将词边界 (\b
) 添加到 food_list
中的值,以便词完全匹配。
例如,查看以下情况下输出的差异 -
library(stringr)
x <- c('apple', 'pineapple')
str_detect(x, 'apple')
#[1] TRUE TRUE
str_detect(x, '\bapple\b')
#[1] TRUE FALSE
同样适用于 grepl
in base R -
food_list <- c("apple", "oats", "chocolate")
food_pat <- sprintf('\b%s\b', food_list)
txt[food_list] <- lapply(food_pat, function(x) as.integer(grepl(x, txt$eats)))
txt
# A tibble: 3 x 5
# id eats apple oats chocolate
# <dbl> <chr> <int> <int> <int>
#1 1 apple, oats, banana, milk, sugar 1 1 0
#2 2 oats, banana, sugar 0 1 0
#3 3 chocolate, milk, sugar 0 0 1