使用参考列汇总数据框
Summarizing a dataframe using a reference column
考虑以下数据框:
df <- data.frame(waypoint = 1:10, pnt1 = NA, pnt2 = NA, pnt3 = NA)
x <- c("A", "B", "C", "D")
df$pnt1 <- as.factor(sample(x, 10, replace = T))
df$pnt2 <- as.factor(sample(x, 10, replace = T))
df$pnt3 <- as.factor(sample(x, 10, replace = T))
df
我想按以下方式总结此数据框,其中原始 df 中“pnt1、pnt2 和 pnt3”列的值针对每个航点求和并放入新列“A、B , C, D”。结果的 shell 将如下所示:
df2 <- data.frame(waypoint = 1:10, A = NA, B = NA, C = NA, D = NA)
我如何编写此代码以生成类似于 df2 且填充了正确值的结果?
library(tidyr)
df %>%
pivot_longer(starts_with("pnt")) %>%
pivot_wider(id_cols = waypoint,
names_from = value,
values_from = value,
values_fn = length,
values_fill = 0L,
names_sort = T)
输出
waypoint A B C D
<int> <int> <int> <int> <int>
1 1 2 0 0 1
2 2 0 1 1 1
3 3 1 0 2 0
4 4 3 0 0 0
5 5 2 1 0 0
6 6 1 2 0 0
7 7 2 0 1 0
8 8 1 2 0 0
9 9 0 1 1 1
10 10 1 1 0 1
数据
与set.seed(1)
:
structure(list(waypoint = 1:10, pnt1 = structure(c(1L, 3L, 1L,
1L, 2L, 2L, 3L, 1L, 4L, 4L), .Label = c("A", "B", "C", "D"), class = "factor"),
pnt2 = structure(c(4L, 4L, 3L, 1L, 1L, 2L, 1L, 2L, 2L, 2L
), .Label = c("A", "B", "C", "D"), class = "factor"), pnt3 = structure(c(1L,
2L, 3L, 1L, 1L, 1L, 1L, 2L, 3L, 1L), .Label = c("A", "B",
"C"), class = "factor")), row.names = c(NA, -10L), class = "data.frame")
您可以使用以下代码:
library(dplyr)
library(stringr)
library(purrr)
df %>%
rowwise() %>%
mutate(map_dfc(list(Count_A = "A",
Count_B = "B",
Count_C = "C",
Count_D = "D"), ~ sum(cur_data() == .x))) %>%
select(-c(pnt1, pnt2, pnt3)) %>%
rename_with(~ str_remove(., fixed("Count_")), starts_with("Count_"))
set.seed(1)
waypoint A B C D
<int> <int> <int> <int> <int>
1 1 2 0 0 1
2 2 0 1 1 1
3 3 1 0 2 0
4 4 3 0 0 0
5 5 2 1 0 0
6 6 1 2 0 0
7 7 2 0 1 0
8 8 1 2 0 0
9 9 0 1 1 1
10 10 1 1 0 1
你绝对可以在这里使用 Base R。
table(cbind(df[1], " "=unlist(df[-1])))
waypoint A B C D
1 2 0 0 1
2 0 1 1 1
3 1 0 2 0
4 3 0 0 0
5 2 1 0 0
6 1 2 0 0
7 2 0 1 0
8 1 2 0 0
9 0 1 1 1
10 1 1 0 1
甚至
xtabs(~waypoint+pnt, reshape(df, -1, dir="long", sep=""))
pnt
waypoint A B C D
1 2 0 0 1
2 0 1 1 1
3 1 0 2 0
4 3 0 0 0
5 2 1 0 0
6 1 2 0 0
7 2 0 1 0
8 1 2 0 0
9 0 1 1 1
10 1 1 0 1
如果你需要一个数据帧,然后用as.data.frame.matrix
函数包装整个代码
图书馆看门人也有帮助
set.seed(1)
df <- structure(list(waypoint = 1:10, pnt1 = structure(c(1L, 3L, 1L, 1L, 2L, 2L, 3L, 1L, 4L, 4L), .Label = c("A", "B", "C", "D"), class = "factor"), pnt2 = structure(c(4L, 4L, 3L, 1L, 1L, 2L, 1L, 2L, 2L, 2L ), .Label = c("A", "B", "C", "D"), class = "factor"), pnt3 = structure(c(1L, 2L, 3L, 1L, 1L, 1L, 1L, 2L, 3L, 1L), .Label = c("A", "B", "C"), class = "factor")), row.names = c(NA, -10L), class = "data.frame")
library(janitor)
library(tidyverse)
df%>% pivot_longer(-waypoint) %>% tabyl(waypoint, value)
waypoint A B C D
1 2 0 0 1
2 0 1 1 1
3 1 0 2 0
4 3 0 0 0
5 2 1 0 0
6 1 2 0 0
7 2 0 1 0
8 1 2 0 0
9 0 1 1 1
10 1 1 0 1
考虑以下数据框:
df <- data.frame(waypoint = 1:10, pnt1 = NA, pnt2 = NA, pnt3 = NA)
x <- c("A", "B", "C", "D")
df$pnt1 <- as.factor(sample(x, 10, replace = T))
df$pnt2 <- as.factor(sample(x, 10, replace = T))
df$pnt3 <- as.factor(sample(x, 10, replace = T))
df
我想按以下方式总结此数据框,其中原始 df 中“pnt1、pnt2 和 pnt3”列的值针对每个航点求和并放入新列“A、B , C, D”。结果的 shell 将如下所示:
df2 <- data.frame(waypoint = 1:10, A = NA, B = NA, C = NA, D = NA)
我如何编写此代码以生成类似于 df2 且填充了正确值的结果?
library(tidyr)
df %>%
pivot_longer(starts_with("pnt")) %>%
pivot_wider(id_cols = waypoint,
names_from = value,
values_from = value,
values_fn = length,
values_fill = 0L,
names_sort = T)
输出
waypoint A B C D
<int> <int> <int> <int> <int>
1 1 2 0 0 1
2 2 0 1 1 1
3 3 1 0 2 0
4 4 3 0 0 0
5 5 2 1 0 0
6 6 1 2 0 0
7 7 2 0 1 0
8 8 1 2 0 0
9 9 0 1 1 1
10 10 1 1 0 1
数据
与set.seed(1)
:
structure(list(waypoint = 1:10, pnt1 = structure(c(1L, 3L, 1L,
1L, 2L, 2L, 3L, 1L, 4L, 4L), .Label = c("A", "B", "C", "D"), class = "factor"),
pnt2 = structure(c(4L, 4L, 3L, 1L, 1L, 2L, 1L, 2L, 2L, 2L
), .Label = c("A", "B", "C", "D"), class = "factor"), pnt3 = structure(c(1L,
2L, 3L, 1L, 1L, 1L, 1L, 2L, 3L, 1L), .Label = c("A", "B",
"C"), class = "factor")), row.names = c(NA, -10L), class = "data.frame")
您可以使用以下代码:
library(dplyr)
library(stringr)
library(purrr)
df %>%
rowwise() %>%
mutate(map_dfc(list(Count_A = "A",
Count_B = "B",
Count_C = "C",
Count_D = "D"), ~ sum(cur_data() == .x))) %>%
select(-c(pnt1, pnt2, pnt3)) %>%
rename_with(~ str_remove(., fixed("Count_")), starts_with("Count_"))
set.seed(1)
waypoint A B C D
<int> <int> <int> <int> <int>
1 1 2 0 0 1
2 2 0 1 1 1
3 3 1 0 2 0
4 4 3 0 0 0
5 5 2 1 0 0
6 6 1 2 0 0
7 7 2 0 1 0
8 8 1 2 0 0
9 9 0 1 1 1
10 10 1 1 0 1
你绝对可以在这里使用 Base R。
table(cbind(df[1], " "=unlist(df[-1])))
waypoint A B C D
1 2 0 0 1
2 0 1 1 1
3 1 0 2 0
4 3 0 0 0
5 2 1 0 0
6 1 2 0 0
7 2 0 1 0
8 1 2 0 0
9 0 1 1 1
10 1 1 0 1
甚至
xtabs(~waypoint+pnt, reshape(df, -1, dir="long", sep=""))
pnt
waypoint A B C D
1 2 0 0 1
2 0 1 1 1
3 1 0 2 0
4 3 0 0 0
5 2 1 0 0
6 1 2 0 0
7 2 0 1 0
8 1 2 0 0
9 0 1 1 1
10 1 1 0 1
如果你需要一个数据帧,然后用as.data.frame.matrix
函数包装整个代码
图书馆看门人也有帮助
set.seed(1)
df <- structure(list(waypoint = 1:10, pnt1 = structure(c(1L, 3L, 1L, 1L, 2L, 2L, 3L, 1L, 4L, 4L), .Label = c("A", "B", "C", "D"), class = "factor"), pnt2 = structure(c(4L, 4L, 3L, 1L, 1L, 2L, 1L, 2L, 2L, 2L ), .Label = c("A", "B", "C", "D"), class = "factor"), pnt3 = structure(c(1L, 2L, 3L, 1L, 1L, 1L, 1L, 2L, 3L, 1L), .Label = c("A", "B", "C"), class = "factor")), row.names = c(NA, -10L), class = "data.frame")
library(janitor)
library(tidyverse)
df%>% pivot_longer(-waypoint) %>% tabyl(waypoint, value)
waypoint A B C D
1 2 0 0 1
2 0 1 1 1
3 1 0 2 0
4 3 0 0 0
5 2 1 0 0
6 1 2 0 0
7 2 0 1 0
8 1 2 0 0
9 0 1 1 1
10 1 1 0 1