如何根据数据集列值获取计数结果
How to get count results based on a dataset column values
我有以下数据框:
Group User Manager Client Other data1 Other data2
AG TRUE TRUE FALSE a b
AG TRUE TRUE FALSE c d
JU FALSE FALSE TRUE d e
JU TRUE TRUE FALSE f g
我想使用这样的结果:
USER MANAGER CLIENT COUNT-AG COUNT-JU
TRUE TRUE FALSE 2 1
FALSE FALSE TRUE 0 1
我只能使用这样的方法获得一组或另一组的结果:
f = filter(dataframe, Group == “AG”)
result <- group_by(f,User,Manager) %>%
summarize(count=n())
但我找不到一种方法来获得同一结果中两组的计数。
有什么想法吗?
在 group_by
中也包含 'Group' 之后,我们可以使用 tidyr
中的 spread
从 'long' 重塑为 'wide' 格式。
library(dplyr)
library(tidyr)
df1 %>%
group_by(User, Manager, Client, Group) %>%
summarise(Count = n()) %>%
spread(Group, Count, fill=0)%>%
rename(COUNT_AG=AG, COUNT_JU=JU)
# User Manager Client COUNT_AG COUNT_JU
# (lgl) (lgl) (lgl) (dbl) (dbl)
#1 FALSE FALSE TRUE 0 1
#2 TRUE TRUE FALSE 2 1
如果我们使用 data.table
,将 'data.frame' 转换为 'data.table'(setDT(df1)
,并使用 dcast
和 fun.aggregate
作为 length
.
library(data.table)
dcast(setDT(df1), User+Manager+Client~Group, length)
数据
df1 <- structure(list(Group = c("AG", "AG", "JU", "JU"),
User = c(TRUE,
TRUE, FALSE, TRUE), Manager = c(TRUE, TRUE, FALSE,
TRUE), Client = c(FALSE,
FALSE, TRUE, FALSE), Other_data1 = c("a", "c", "d",
"f"), Other_data2 = c("b",
"d", "e", "g")), .Names = c("Group", "User", "Manager",
"Client",
"Other_data1", "Other_data2"), class = "data.frame",
row.names = c(NA, -4L))
正如我在评论中提到的,您必须使用 data.table。这是在第一行完成的(+ .N 完成的计数)。第二行做你要求的:
library(data.table)
dt = data.table(df1)[, .N, by = .(User,Manager,Client,Group)]
dcast(dt, User+Manager+Client ~ Group, value.var = "N", fill = 0L)
User Manager Client AG JU
1: FALSE FALSE TRUE 0 1
2: TRUE TRUE FALSE 2 1
我有以下数据框:
Group User Manager Client Other data1 Other data2
AG TRUE TRUE FALSE a b
AG TRUE TRUE FALSE c d
JU FALSE FALSE TRUE d e
JU TRUE TRUE FALSE f g
我想使用这样的结果:
USER MANAGER CLIENT COUNT-AG COUNT-JU
TRUE TRUE FALSE 2 1
FALSE FALSE TRUE 0 1
我只能使用这样的方法获得一组或另一组的结果:
f = filter(dataframe, Group == “AG”)
result <- group_by(f,User,Manager) %>%
summarize(count=n())
但我找不到一种方法来获得同一结果中两组的计数。 有什么想法吗?
在 group_by
中也包含 'Group' 之后,我们可以使用 tidyr
中的 spread
从 'long' 重塑为 'wide' 格式。
library(dplyr)
library(tidyr)
df1 %>%
group_by(User, Manager, Client, Group) %>%
summarise(Count = n()) %>%
spread(Group, Count, fill=0)%>%
rename(COUNT_AG=AG, COUNT_JU=JU)
# User Manager Client COUNT_AG COUNT_JU
# (lgl) (lgl) (lgl) (dbl) (dbl)
#1 FALSE FALSE TRUE 0 1
#2 TRUE TRUE FALSE 2 1
如果我们使用 data.table
,将 'data.frame' 转换为 'data.table'(setDT(df1)
,并使用 dcast
和 fun.aggregate
作为 length
.
library(data.table)
dcast(setDT(df1), User+Manager+Client~Group, length)
数据
df1 <- structure(list(Group = c("AG", "AG", "JU", "JU"),
User = c(TRUE,
TRUE, FALSE, TRUE), Manager = c(TRUE, TRUE, FALSE,
TRUE), Client = c(FALSE,
FALSE, TRUE, FALSE), Other_data1 = c("a", "c", "d",
"f"), Other_data2 = c("b",
"d", "e", "g")), .Names = c("Group", "User", "Manager",
"Client",
"Other_data1", "Other_data2"), class = "data.frame",
row.names = c(NA, -4L))
正如我在评论中提到的,您必须使用 data.table。这是在第一行完成的(+ .N 完成的计数)。第二行做你要求的:
library(data.table)
dt = data.table(df1)[, .N, by = .(User,Manager,Client,Group)]
dcast(dt, User+Manager+Client ~ Group, value.var = "N", fill = 0L)
User Manager Client AG JU
1: FALSE FALSE TRUE 0 1
2: TRUE TRUE FALSE 2 1