使用参考列汇总数据框

Summarizing a dataframe using a reference column

考虑以下数据框:

df <- data.frame(waypoint = 1:10, pnt1 = NA, pnt2 = NA, pnt3 = NA)

x <- c("A", "B", "C", "D")

df$pnt1 <- as.factor(sample(x, 10, replace = T))
df$pnt2 <- as.factor(sample(x, 10, replace = T))
df$pnt3 <- as.factor(sample(x, 10, replace = T))

df

我想按以下方式总结此数据框,其中原始 df 中“pnt1、pnt2 和 pnt3”列的值针对每个航点求和并放入新列“A、B , C, D”。结果的 shell 将如下所示:

df2 <- data.frame(waypoint = 1:10, A = NA, B = NA, C = NA, D = NA)

我如何编写此代码以生成类似于 df2 且填充了正确值的结果?

library(tidyr)

df %>% 
  pivot_longer(starts_with("pnt")) %>% 
  pivot_wider(id_cols = waypoint,
              names_from = value,
              values_from = value,
              values_fn = length,
              values_fill = 0L,
              names_sort = T)

输出

   waypoint     A     B     C     D
      <int> <int> <int> <int> <int>
 1        1     2     0     0     1
 2        2     0     1     1     1
 3        3     1     0     2     0
 4        4     3     0     0     0
 5        5     2     1     0     0
 6        6     1     2     0     0
 7        7     2     0     1     0
 8        8     1     2     0     0
 9        9     0     1     1     1
10       10     1     1     0     1

数据

set.seed(1):

structure(list(waypoint = 1:10, pnt1 = structure(c(1L, 3L, 1L, 
1L, 2L, 2L, 3L, 1L, 4L, 4L), .Label = c("A", "B", "C", "D"), class = "factor"), 
    pnt2 = structure(c(4L, 4L, 3L, 1L, 1L, 2L, 1L, 2L, 2L, 2L
    ), .Label = c("A", "B", "C", "D"), class = "factor"), pnt3 = structure(c(1L, 
    2L, 3L, 1L, 1L, 1L, 1L, 2L, 3L, 1L), .Label = c("A", "B", 
    "C"), class = "factor")), row.names = c(NA, -10L), class = "data.frame")

您可以使用以下代码:

library(dplyr)
library(stringr)
library(purrr)

df %>%
  rowwise() %>%
  mutate(map_dfc(list(Count_A = "A",
                      Count_B = "B", 
                      Count_C = "C", 
                      Count_D = "D"), ~ sum(cur_data() == .x))) %>%
  select(-c(pnt1, pnt2, pnt3)) %>%
  rename_with(~ str_remove(., fixed("Count_")), starts_with("Count_"))


set.seed(1)

   waypoint     A     B     C     D
      <int> <int> <int> <int> <int>
 1        1     2     0     0     1
 2        2     0     1     1     1
 3        3     1     0     2     0
 4        4     3     0     0     0
 5        5     2     1     0     0
 6        6     1     2     0     0
 7        7     2     0     1     0
 8        8     1     2     0     0
 9        9     0     1     1     1
10       10     1     1     0     1

你绝对可以在这里使用 Base R。

table(cbind(df[1], " "=unlist(df[-1])))
         
waypoint A B C D
      1  2 0 0 1
      2  0 1 1 1
      3  1 0 2 0
      4  3 0 0 0
      5  2 1 0 0
      6  1 2 0 0
      7  2 0 1 0
      8  1 2 0 0
      9  0 1 1 1
      10 1 1 0 1

甚至

xtabs(~waypoint+pnt, reshape(df, -1, dir="long", sep=""))
           pnt
waypoint   A B C D
      1  2 0 0 1
      2  0 1 1 1
      3  1 0 2 0
      4  3 0 0 0
      5  2 1 0 0
      6  1 2 0 0
      7  2 0 1 0
      8  1 2 0 0
      9  0 1 1 1
      10 1 1 0 1

如果你需要一个数据帧,然后用as.data.frame.matrix函数包装整个代码

图书馆看门人也有帮助

set.seed(1)
df <- structure(list(waypoint = 1:10, pnt1 = structure(c(1L, 3L, 1L, 1L, 2L, 2L, 3L, 1L, 4L, 4L), .Label = c("A", "B", "C", "D"), class = "factor"), pnt2 = structure(c(4L, 4L, 3L, 1L, 1L, 2L, 1L, 2L, 2L, 2L ), .Label = c("A", "B", "C", "D"), class = "factor"), pnt3 = structure(c(1L, 2L, 3L, 1L, 1L, 1L, 1L, 2L, 3L, 1L), .Label = c("A", "B", "C"), class = "factor")), row.names = c(NA, -10L), class = "data.frame")



library(janitor) 
library(tidyverse)

df%>% pivot_longer(-waypoint)  %>% tabyl(waypoint, value)

waypoint A B C D
        1 2 0 0 1
        2 0 1 1 1
        3 1 0 2 0
        4 3 0 0 0
        5 2 1 0 0
        6 1 2 0 0
        7 2 0 1 0
        8 1 2 0 0
        9 0 1 1 1
       10 1 1 0 1