构建应急 table
Building contingency table
我有一个 table 这样的:
df <- data.frame(P1 = c(1,0,0,0,0,0,"A"),
P2 = c(0,-2,1,2,1,0,"A"),
P3 = c(-1,2,0,2,1,0,"B"),
P4 = c(2,0,-1,0,-1,0,"B"),
Names = c("G1","G2","G3","G1","G2","G3","Group"),
stringsAsFactors = FALSE)
变成
Names P1 P2 P3 P4
G1 1 0 -1 2
G2 0 -2 2 0
G3 0 1 0 -1
G1 0 2 2 0
G2 0 1 1 -1
G3 0 0 0 0
Group A A B B
这里,A
和B
是P1, P2, P3, P4
的分组变量。
我想为 Ids
(G1
, G2
...), Group
(A
,B
), 和 Var
(-2,-1,0,1,2
) table 例如:
Id Group Var Count
G1 A -2 0
G1 A -1 0
G1 A 0 1
G1 A 1 1
G1 A 2 0
G1 B -2 0
G1 B -1 1
G1 B 0 0
G1 B 1 0
G1 B 2 1
G2 A -2 1
G2 A -1 0
G2 A 0 1
...
有没有一种方法可以在不使用大量循环的情况下在 R 中做到这一点?
假设您要将 P1
和 P2
列分组为 A
,将 P3
和 P4
列分组为 B
,您可以使用 data.table
-package 按如下方式处理它:
library(data.table)
DT <- melt(melt(setDT(df),
measure.vars = list(c(2,3),c(4,5)),
value.name = c("A","B")),
id = 1, measure.vars = 3:4, variable.name = 'group'
)[order(Id,group)][, val2 := value]
DT[CJ(Id = Id, group = group, value = value, unique = TRUE)
, on = .(Id, group, value)
][, .(counts = sum(!is.na(val2))), by = .(Id, group, value)]
这导致:
Id group value counts
1: G1 A -2 0
2: G1 A -1 0
3: G1 A 0 2
4: G1 A 1 1
5: G1 A 2 1
6: G1 B -2 0
7: G1 B -1 1
8: G1 B 0 1
9: G1 B 1 0
10: G1 B 2 2
11: G2 A -2 1
12: G2 A -1 0
13: G2 A 0 2
14: G2 A 1 1
15: G2 A 2 0
16: G2 B -2 0
17: G2 B -1 1
18: G2 B 0 1
19: G2 B 1 1
20: G2 B 2 1
21: G3 A -2 0
22: G3 A -1 0
23: G3 A 0 3
24: G3 A 1 1
25: G3 A 2 0
26: G3 B -2 0
27: G3 B -1 1
28: G3 B 0 3
29: G3 B 1 0
30: G3 B 2 0
已用数据:
df <- read.table(text="Id P1 P2 P3 P4
G1 1 0 -1 2
G2 0 -2 2 0
G3 0 1 0 -1
G1 0 2 2 0
G2 0 1 1 -1
G3 0 0 0 0", header=TRUE, stringsAsFactors = FALSE)
请注意,我省略了 'Group' 行,因为您在评论中指出这些只是向哪些组指示 P1
- P4
列应该属于.
有
library(tidyverse)
df <- read.table(text="Id P1 P2 P3 P4
G1 1 0 -1 2
G2 0 -2 2 0
G3 0 1 0 -1
G1 0 2 2 0
G2 0 1 1 -1
G3 0 0 0 0", header=TRUE, stringsAsFactors = FALSE)
我们重塑 table 并重新编码 group
中的 P*
变量。
然后我们计算并完成缺失的案例。结果:
df %>%
gather(P1, P2, P3, P4, key = "p", value = "v") %>%
mutate(group = ifelse(p %in% c("P1", "P2"), "A", "B")) %>%
group_by(Id, group, v) %>%
summarise(Count = n()) %>%
ungroup() %>%
complete(Id, group, v, fill = list("Count" = 0))
如果您不需要输出中的所有组合,只需使用:
df %>%
gather(P1, P2, P3, P4, key = "p", value = "v") %>%
mutate(group = ifelse(p %in% c("P1", "P2"), "A", "B")) %>%
group_by(Id, group, v) %>%
summarise(Count = n())
# A tibble: 17 x 4
# Groups: Id, group [?]
Id group v Count
<chr> <chr> <int> <int>
1 G1 A 0 2
2 G1 A 1 1
3 G1 A 2 1
4 G1 B -1 1
5 G1 B 0 1
6 G1 B 2 2
7 G2 A -2 1
8 G2 A 0 2
9 G2 A 1 1
10 G2 B -1 1
11 G2 B 0 1
12 G2 B 1 1
13 G2 B 2 1
14 G3 A 0 3
15 G3 A 1 1
16 G3 B -1 1
17 G3 B 0 3
我有一个 table 这样的:
df <- data.frame(P1 = c(1,0,0,0,0,0,"A"),
P2 = c(0,-2,1,2,1,0,"A"),
P3 = c(-1,2,0,2,1,0,"B"),
P4 = c(2,0,-1,0,-1,0,"B"),
Names = c("G1","G2","G3","G1","G2","G3","Group"),
stringsAsFactors = FALSE)
变成
Names P1 P2 P3 P4
G1 1 0 -1 2
G2 0 -2 2 0
G3 0 1 0 -1
G1 0 2 2 0
G2 0 1 1 -1
G3 0 0 0 0
Group A A B B
这里,A
和B
是P1, P2, P3, P4
的分组变量。
我想为 Ids
(G1
, G2
...), Group
(A
,B
), 和 Var
(-2,-1,0,1,2
) table 例如:
Id Group Var Count
G1 A -2 0
G1 A -1 0
G1 A 0 1
G1 A 1 1
G1 A 2 0
G1 B -2 0
G1 B -1 1
G1 B 0 0
G1 B 1 0
G1 B 2 1
G2 A -2 1
G2 A -1 0
G2 A 0 1
...
有没有一种方法可以在不使用大量循环的情况下在 R 中做到这一点?
假设您要将 P1
和 P2
列分组为 A
,将 P3
和 P4
列分组为 B
,您可以使用 data.table
-package 按如下方式处理它:
library(data.table)
DT <- melt(melt(setDT(df),
measure.vars = list(c(2,3),c(4,5)),
value.name = c("A","B")),
id = 1, measure.vars = 3:4, variable.name = 'group'
)[order(Id,group)][, val2 := value]
DT[CJ(Id = Id, group = group, value = value, unique = TRUE)
, on = .(Id, group, value)
][, .(counts = sum(!is.na(val2))), by = .(Id, group, value)]
这导致:
Id group value counts 1: G1 A -2 0 2: G1 A -1 0 3: G1 A 0 2 4: G1 A 1 1 5: G1 A 2 1 6: G1 B -2 0 7: G1 B -1 1 8: G1 B 0 1 9: G1 B 1 0 10: G1 B 2 2 11: G2 A -2 1 12: G2 A -1 0 13: G2 A 0 2 14: G2 A 1 1 15: G2 A 2 0 16: G2 B -2 0 17: G2 B -1 1 18: G2 B 0 1 19: G2 B 1 1 20: G2 B 2 1 21: G3 A -2 0 22: G3 A -1 0 23: G3 A 0 3 24: G3 A 1 1 25: G3 A 2 0 26: G3 B -2 0 27: G3 B -1 1 28: G3 B 0 3 29: G3 B 1 0 30: G3 B 2 0
已用数据:
df <- read.table(text="Id P1 P2 P3 P4
G1 1 0 -1 2
G2 0 -2 2 0
G3 0 1 0 -1
G1 0 2 2 0
G2 0 1 1 -1
G3 0 0 0 0", header=TRUE, stringsAsFactors = FALSE)
请注意,我省略了 'Group' 行,因为您在评论中指出这些只是向哪些组指示 P1
- P4
列应该属于.
有
library(tidyverse)
df <- read.table(text="Id P1 P2 P3 P4
G1 1 0 -1 2
G2 0 -2 2 0
G3 0 1 0 -1
G1 0 2 2 0
G2 0 1 1 -1
G3 0 0 0 0", header=TRUE, stringsAsFactors = FALSE)
我们重塑 table 并重新编码 group
中的 P*
变量。
然后我们计算并完成缺失的案例。结果:
df %>%
gather(P1, P2, P3, P4, key = "p", value = "v") %>%
mutate(group = ifelse(p %in% c("P1", "P2"), "A", "B")) %>%
group_by(Id, group, v) %>%
summarise(Count = n()) %>%
ungroup() %>%
complete(Id, group, v, fill = list("Count" = 0))
如果您不需要输出中的所有组合,只需使用:
df %>%
gather(P1, P2, P3, P4, key = "p", value = "v") %>%
mutate(group = ifelse(p %in% c("P1", "P2"), "A", "B")) %>%
group_by(Id, group, v) %>%
summarise(Count = n())
# A tibble: 17 x 4
# Groups: Id, group [?]
Id group v Count
<chr> <chr> <int> <int>
1 G1 A 0 2
2 G1 A 1 1
3 G1 A 2 1
4 G1 B -1 1
5 G1 B 0 1
6 G1 B 2 2
7 G2 A -2 1
8 G2 A 0 2
9 G2 A 1 1
10 G2 B -1 1
11 G2 B 0 1
12 G2 B 1 1
13 G2 B 2 1
14 G3 A 0 3
15 G3 A 1 1
16 G3 B -1 1
17 G3 B 0 3