如何使用 dplyr::count() 和 purrr::map() 遍历分组数据框

Question

背景

我想遍历一组因子变量的数据框来计算使用 dplyr 中的 count 函数在变量中出现每个值，我认为 purrr::map 函数最合适。

但是，我无法让它工作。我尝试使用来满足我的需要，但这也不起作用。我还尝试将一个基于 this post，但无法使其与分组变量一起使用。

问题

是否可以按照我想要的方式遍历分组数据框？如果是这样，怎么样？

提前感谢您的考虑。

可重现的例子



library(tidyverse)

vars_df <-
structure(list(c = structure(c(2L, 3L, 3L, 2L, 3L, 3L, 2L, 2L,
1L, 2L, 2L, 2L, 3L, 1L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
2L, 3L, 1L, 2L, 1L, 3L, 3L, 2L, 1L, 2L, 2L, 2L, 3L, 3L, 2L, 1L,
1L, 2L, 3L, 3L, 2L, 2L, 3L, 3L, 3L, 2L), .Label = c("1", "2",
"3"), class = "factor"), pastpsyc = structure(c(2L, 1L, NA, 2L,
1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L,
1L, 2L, 2L, 1L, 2L, NA, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L,
2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L), .Label = c("0",
"1"), class = "factor"), pastmed = structure(c(2L, 1L, NA, 2L,
1L, 1L, 1L, 1L, NA, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L,
2L, 2L, 1L, 1L, 1L, NA, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L), .Label = c("0",
"1"), class = "factor"), hxsuicide = structure(c(2L, 1L, NA,
2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 2L, 2L, 1L, 1L, 2L, NA, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L), .Label = c("0",
"1"), class = "factor"), hxdsh = structure(c(2L, 1L, NA, 1L,
2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
2L, 2L, 2L, 1L, 2L, NA, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L,
2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L), .Label = c("0",
"1"), class = "factor"), hxtrauma = structure(c(2L, 1L, NA, 2L,
1L, 1L, 1L, 1L, NA, 1L, 1L, NA, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L,
2L, 1L, 2L, 1L, 1L, NA, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L), .Label = c("0",
"1"), class = "factor")), row.names = c(NA, -50L), class = c("tbl_df",
"tbl", "data.frame"))


map_dfr(setNames(c('1', '2', '3'),
       c('1', '2', '3')),  ~
      vars_df %>%
        group_by(c) %>%
         summarise(across(everything(), function(x)
              sum(x == .x, na.rm = TRUE)), .groups = 'drop'), .id = 'var')
#> # A tibble: 9 x 7
#>   var   c     pastpsyc pastmed hxsuicide hxdsh hxtrauma
#>   <chr> <fct>    <int>   <int>     <int> <int>    <int>
#> 1 1     1            3       2         2     5        1
#> 2 1     2           16       9        18    16       10
#> 3 1     3           12       3         8    11        9
#> 4 2     1            0       0         0     0        0
#> 5 2     2            0       0         0     0        0
#> 6 2     3            0       0         0     0        0
#> 7 3     1            0       0         0     0        0
#> 8 3     2            0       0         0     0        0
#> 9 3     3            0       0         0     0        0

vars_df %>%
  group_by(c) %>%
  count(pastpsyc)
#> # A tibble: 7 x 3
#> # Groups:   c [3]
#>   c     pastpsyc     n
#>   <fct> <fct>    <int>
#> 1 1     0            4
#> 2 1     1            3
#> 3 2     0            8
#> 4 2     1           16
#> 5 3     0            5
#> 6 3     1           12
#> 7 3     <NA>         2

vars_df %>%
  group_by(c) %>%
  map(~ count(.))
#> Error in UseMethod("count"): no applicable method for 'count' applied to an object of class "factor"

.get_count <-
  function(mygroup) {
    quo_var <- enquo(mygroup)
    vars_df %>%
      group_by(!! quo_var) %>%
      count() %>%
      ungroup()
  }

vars <-
  vars_df %>%
  colnames()

vars %>%
  syms() %>%
  map(function(var) .get_count(!!var))
#> [[1]]
#> # A tibble: 3 x 2
#>   c         n
#>   <fct> <int>
#> 1 1         7
#> 2 2        24
#> 3 3        19
#> 
#> [[2]]
#> # A tibble: 3 x 2
#>   pastpsyc     n
#>   <fct>    <int>
#> 1 0           17
#> 2 1           31
#> 3 <NA>         2
#> 
#> [[3]]
#> # A tibble: 3 x 2
#>   pastmed     n
#>   <fct>   <int>
#> 1 0          33
#> 2 1          14
#> 3 <NA>        3
#> 
#> [[4]]
#> # A tibble: 3 x 2
#>   hxsuicide     n
#>   <fct>     <int>
#> 1 0            20
#> 2 1            28
#> 3 <NA>          2
#> 
#> [[5]]
#> # A tibble: 3 x 2
#>   hxdsh     n
#>   <fct> <int>
#> 1 0        16
#> 2 1        32
#> 3 <NA>      2
#> 
#> [[6]]
#> # A tibble: 3 x 2
#>   hxtrauma     n
#>   <fct>    <int>
#> 1 0           26
#> 2 1           20
#> 3 <NA>         4

vars %>%
  syms() %>%
  group_by(c) %>%
  map(function(var) .get_count(!!var))
#> Error in UseMethod("group_by"): no applicable method for 'group_by' applied to an object of class "list"

# Created on 2021-05-26 by the reprex package (v2.0.0)

Answer 1

您可以将 map 用作 -

library(tidyverse)

vars %>% map(~vars_df %>% count(c, .data[[.x]]))

#[[1]]
# A tibble: 3 x 2
#  c         n
#  <fct> <int>
#1 1         7
#2 2        24
3 3        19

#[[2]]
# A tibble: 7 x 3
#  c     pastpsyc     n
#  <fct> <fct>    <int>
#1 1     0            4
#2 1     1            3
#3 2     0            8
#4 2     1           16
#5 3     0            5
#6 3     1           12
#7 3     NA           2
#...
#...

以长格式显示输出的不同方式 -

vars_df %>% pivot_longer(cols = -c) %>% count(c, name, value)

#   c    name      value     n
#   <fct> <chr>     <fct> <int>
# 1 1     hxdsh     0         2
# 2 1     hxdsh     1         5
# 3 1     hxsuicide 0         5
# 4 1     hxsuicide 1         2
# 5 1     hxtrauma  0         5
# 6 1     hxtrauma  1         1
# 7 1     hxtrauma  NA        1
# 8 1     pastmed   0         4
# 9 1     pastmed   1         2
#10 1     pastmed   NA        1
# … with 28 more rows

如何使用 dplyr::count() 和 purrr::map() 遍历分组数据框

How to loop over a grouped dataframe using dplyr::count() and purrr::map()

r

dplyr

purrr

tidyverse

背景

问题

可重现的例子