dplyr 使用 purrr::map 计算 tibbles 列表中的单个观察值
dplyr count single observations in list of tibbles using purrr::map
我正在尝试计算包含由“;”分隔的单个观察结果的小标题列表中出现的频率。当我在 purrr::map()
中使用 purrr::map()
时,我 运行 遇到了一个错误。我怀疑我遗漏了一些简单的东西,因此不胜感激。
以不同客户购买水果为例,同时购买的水果用“;”分隔
# Fruit purchases across days with different number of customers.
day_1 <- as_data_frame(setNames(list(c("oranges;peaches;apples", "pears;apples", "bananas", "oranges;apples", "apples")), "fruits"))
day_2 <- as_data_frame(setNames(list(c("oranges;apples", "peaches","apples;bananas;", "pears", "apples;peaches", "oranges")), "fruits"))
day_3 <- as_data_frame(setNames(list(c("peaches;pears","apples","bananas")), "fruits"))
# Create list of fruit purchases.
fruit_list <- list(day_1, day_2, day_3)
这个 returns 三个小标题的列表,是我的数据的一般格式。我可以使用 dplyr
/purrr
:
来计算每个水果每天的总观察次数
fruit_list %>%
map(function(x) strsplit(x$fruits, ";")) %>%
map(unlist) %>%
map(table)
但是,当我尝试使用 map()
中的 map()
来隔离和统计 tibbles 列表中的单一水果购买时,我 运行 进入错误
"Error: .x
is not a vector (closure)"
fruit_list %>%
map(mutate(fruit_count = map(function(x) strsplit(x$fruits, ";"), length))) %>%
filter(fruit_count==1) %>%
count(solo_fruits = fruits)
我可以在单个 tibble/df 上执行此功能,但不能跨 tibbles 列表。我是否遗漏了 map()
函数的某些内容或更明显的内容?谢谢!
第一个小标题所需的结果格式:
# A tibble: 2 x 2
solo_fruits n
<chr> <int>
1 apples 1
2 bananas 1
我如何得出上述单个样本的答案:
day_1_df <- as.data.frame(fruit_list[[1]])
day_1_df %>%
mutate(fruit_count = map(strsplit(day_1_df$fruits, ";"), length)) %>%
filter(fruit_count==1) %>%
count(solo_fruits = fruits)
不完全符合您的要求,但它可能会以不同的方式解决您的问题:
library(tidyverse)
day_1 <- as_data_frame(setNames(list(c("oranges;peaches;apples", "pears;apples", "bananas", "oranges;apples", "apples")), "fruits"))
day_2 <- as_data_frame(setNames(list(c("oranges;apples", "peaches","apples;bananas;", "pears", "apples;peaches", "oranges")), "fruits"))
day_3 <- as_data_frame(setNames(list(c("peaches;pears","apples","bananas")), "fruits"))
df <- tibble(day = 1:3, fruits = c(day_1, day_2, day_3)) %>%
unnest() %>%
mutate(fruits = strsplit(fruits, ";"), customer = row_number()) %>%
unnest()
df %>%
group_by(customer) %>%
filter(n() == 1) %>%
group_by(customer, day, fruits) %>%
summarise(n = n())
# # A tibble: 7 x 4
# # Groups: customer, day [?]
# customer day fruits n
# <int> <int> <chr> <int>
# 1 3 1 bananas 1
# 2 5 1 apples 1
# 3 7 2 peaches 1
# 4 9 2 pears 1
# 5 11 2 oranges 1
# 6 13 3 apples 1
# 7 14 3 bananas 1
编辑:误会后修改
您可以使用 str_detect
来捕获没有 ;
的行。或者你可以使用 str_count 来计算 ;
然后加 1.
fruit_list%>%
map(~filter(.x,!str_detect(fruits,";"))%>%
mutate(solo_fruits = fruits,count = 1,fruits=NULL))
[[1]]
# A tibble: 2 x 2
solo_fruits count
<chr> <dbl>
1 bananas 1
2 apples 1
[[2]]
# A tibble: 3 x 2
solo_fruits count
<chr> <dbl>
1 peaches 1
2 pears 1
3 oranges 1
[[3]]
# A tibble: 2 x 2
solo_fruits count
<chr> <dbl>
1 apples 1
2 bananas 1
我的意思是使用 str_count
:这将为您提供每行的水果总数。而不是拆分然后使用 length
fruit_list%>%
map(~mutate(.x,count = str_count(fruits,";") + 1))
我正在尝试计算包含由“;”分隔的单个观察结果的小标题列表中出现的频率。当我在 purrr::map()
中使用 purrr::map()
时,我 运行 遇到了一个错误。我怀疑我遗漏了一些简单的东西,因此不胜感激。
以不同客户购买水果为例,同时购买的水果用“;”分隔
# Fruit purchases across days with different number of customers.
day_1 <- as_data_frame(setNames(list(c("oranges;peaches;apples", "pears;apples", "bananas", "oranges;apples", "apples")), "fruits"))
day_2 <- as_data_frame(setNames(list(c("oranges;apples", "peaches","apples;bananas;", "pears", "apples;peaches", "oranges")), "fruits"))
day_3 <- as_data_frame(setNames(list(c("peaches;pears","apples","bananas")), "fruits"))
# Create list of fruit purchases.
fruit_list <- list(day_1, day_2, day_3)
这个 returns 三个小标题的列表,是我的数据的一般格式。我可以使用 dplyr
/purrr
:
fruit_list %>%
map(function(x) strsplit(x$fruits, ";")) %>%
map(unlist) %>%
map(table)
但是,当我尝试使用 map()
中的 map()
来隔离和统计 tibbles 列表中的单一水果购买时,我 运行 进入错误
"Error:
.x
is not a vector (closure)"
fruit_list %>%
map(mutate(fruit_count = map(function(x) strsplit(x$fruits, ";"), length))) %>%
filter(fruit_count==1) %>%
count(solo_fruits = fruits)
我可以在单个 tibble/df 上执行此功能,但不能跨 tibbles 列表。我是否遗漏了 map()
函数的某些内容或更明显的内容?谢谢!
第一个小标题所需的结果格式:
# A tibble: 2 x 2
solo_fruits n
<chr> <int>
1 apples 1
2 bananas 1
我如何得出上述单个样本的答案:
day_1_df <- as.data.frame(fruit_list[[1]])
day_1_df %>%
mutate(fruit_count = map(strsplit(day_1_df$fruits, ";"), length)) %>%
filter(fruit_count==1) %>%
count(solo_fruits = fruits)
不完全符合您的要求,但它可能会以不同的方式解决您的问题:
library(tidyverse)
day_1 <- as_data_frame(setNames(list(c("oranges;peaches;apples", "pears;apples", "bananas", "oranges;apples", "apples")), "fruits"))
day_2 <- as_data_frame(setNames(list(c("oranges;apples", "peaches","apples;bananas;", "pears", "apples;peaches", "oranges")), "fruits"))
day_3 <- as_data_frame(setNames(list(c("peaches;pears","apples","bananas")), "fruits"))
df <- tibble(day = 1:3, fruits = c(day_1, day_2, day_3)) %>%
unnest() %>%
mutate(fruits = strsplit(fruits, ";"), customer = row_number()) %>%
unnest()
df %>%
group_by(customer) %>%
filter(n() == 1) %>%
group_by(customer, day, fruits) %>%
summarise(n = n())
# # A tibble: 7 x 4
# # Groups: customer, day [?]
# customer day fruits n
# <int> <int> <chr> <int>
# 1 3 1 bananas 1
# 2 5 1 apples 1
# 3 7 2 peaches 1
# 4 9 2 pears 1
# 5 11 2 oranges 1
# 6 13 3 apples 1
# 7 14 3 bananas 1
编辑:误会后修改
您可以使用 str_detect
来捕获没有 ;
的行。或者你可以使用 str_count 来计算 ;
然后加 1.
fruit_list%>%
map(~filter(.x,!str_detect(fruits,";"))%>%
mutate(solo_fruits = fruits,count = 1,fruits=NULL))
[[1]]
# A tibble: 2 x 2
solo_fruits count
<chr> <dbl>
1 bananas 1
2 apples 1
[[2]]
# A tibble: 3 x 2
solo_fruits count
<chr> <dbl>
1 peaches 1
2 pears 1
3 oranges 1
[[3]]
# A tibble: 2 x 2
solo_fruits count
<chr> <dbl>
1 apples 1
2 bananas 1
我的意思是使用 str_count
:这将为您提供每行的水果总数。而不是拆分然后使用 length
fruit_list%>%
map(~mutate(.x,count = str_count(fruits,";") + 1))