如何计算每个类别中的非零值
how to count non zero values in each category
这是我的数据
df <- structure(list(team_3_F = c("team ", "team ", "site", "site",
"team ", "team ", "newyorkish", "newyorkish", "team ", "team ",
"newyorkish", "newyorkish", "browingal ", "browingal ", "site",
"site", "browingal ", "browingal ", "browingal ", "browingal ",
"team ", "team ", "team ", "team ", "team ", "team ", "team ",
"team ", "team ", "team ", "site", "site", "browingal ", "browingal ",
"browingal ", "browingal ", "browingal ", "browingal ", "browingal ",
"browingal ", "browingal ", "browingal ", "team ", "team ", "team ",
"team ", "newyorkish", "newyorkish", "browingal ", "browingal ",
"newyorkish", "newyorkish", "browingal ", "browingal ", "team ",
"team ", "browingal ", "browingal ", "team "), name = c("AAA_US",
"BBB_US", "AAA_US", "BBB_US", "AAA_US", "BBB_US", "AAA_US", "BBB_US",
"AAA_US", "BBB_US", "AAA_US", "BBB_US", "AAA_US", "BBB_US", "AAA_US",
"BBB_US", "AAA_US", "BBB_US", "AAA_US", "BBB_US", "AAA_US", "BBB_US",
"AAA_US", "BBB_US", "AAA_US", "BBB_US", "AAA_US", "BBB_US", "AAA_US",
"BBB_US", "AAA_US", "BBB_US", "AAA_US", "BBB_US", "AAA_US", "BBB_US",
"AAA_US", "BBB_US", "AAA_US", "BBB_US", "AAA_US", "BBB_US", "AAA_US",
"BBB_US", "AAA_US", "BBB_US", "AAA_US", "BBB_US", "AAA_US", "BBB_US",
"AAA_US", "BBB_US", "AAA_US", "BBB_US", "AAA_US", "BBB_US", "AAA_US",
"BBB_US", "AAA_US"), value = c(0L, 0L, 0L, 8L, 1L, 0L, 11L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 45L,
0L, 0L, 0L, 18L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 2L, 0L, 3L,
0L, 2L, 0L, 2L, 1L, 0L, 4L, 0L, 88L, 0L, 0L, 1L, 5L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 19L)), row.names = c(NA, -59L), class = "data.frame")
我正在尝试识别每个组的非零值,因此我应该得到这样的输出
browingal AAA_US 1
browingal BBB_US 7
newyorkish AAA_US 4
newyorkish BBB_US 0
site AAA_US 0
site BBB_US 1
team AAA_US 6
team BBB_US 0
我试着归类,但我想不通
df %>% group_by(name) %>% summarise_each(function(x) min(x[x != 0]),value)
您可以使用以下解决方案。根据文档:
mutate_each()
and summarise_each
are deprecated in favor of new
across
function that works with summarize()
& mutate()
.
library(dplyr)
df %>%
group_by(team_3_F, name) %>%
summarise(across(value, ~ sum(.x != 0)))
# A tibble: 8 x 3
# Groups: team_3_F [4]
team_3_F name value
<chr> <chr> <int>
1 "browingal " AAA_US 1
2 "browingal " BBB_US 7
3 "newyorkish" AAA_US 4
4 "newyorkish" BBB_US 0
5 "site" AAA_US 0
6 "site" BBB_US 1
7 "team " AAA_US 6
8 "team " BBB_US 0
data.table
的另一个选项:
library(data.table)
dt <- data.table(df)
dt[, sum(value != 0), by = c("team_3_F", "name")]
team_3_F name V1
1: team AAA_US 6
2: team BBB_US 0
3: site AAA_US 0
4: site BBB_US 1
5: newyorkish AAA_US 4
6: newyorkish BBB_US 0
7: browingal AAA_US 1
8: browingal BBB_US 7
这是我的数据
df <- structure(list(team_3_F = c("team ", "team ", "site", "site",
"team ", "team ", "newyorkish", "newyorkish", "team ", "team ",
"newyorkish", "newyorkish", "browingal ", "browingal ", "site",
"site", "browingal ", "browingal ", "browingal ", "browingal ",
"team ", "team ", "team ", "team ", "team ", "team ", "team ",
"team ", "team ", "team ", "site", "site", "browingal ", "browingal ",
"browingal ", "browingal ", "browingal ", "browingal ", "browingal ",
"browingal ", "browingal ", "browingal ", "team ", "team ", "team ",
"team ", "newyorkish", "newyorkish", "browingal ", "browingal ",
"newyorkish", "newyorkish", "browingal ", "browingal ", "team ",
"team ", "browingal ", "browingal ", "team "), name = c("AAA_US",
"BBB_US", "AAA_US", "BBB_US", "AAA_US", "BBB_US", "AAA_US", "BBB_US",
"AAA_US", "BBB_US", "AAA_US", "BBB_US", "AAA_US", "BBB_US", "AAA_US",
"BBB_US", "AAA_US", "BBB_US", "AAA_US", "BBB_US", "AAA_US", "BBB_US",
"AAA_US", "BBB_US", "AAA_US", "BBB_US", "AAA_US", "BBB_US", "AAA_US",
"BBB_US", "AAA_US", "BBB_US", "AAA_US", "BBB_US", "AAA_US", "BBB_US",
"AAA_US", "BBB_US", "AAA_US", "BBB_US", "AAA_US", "BBB_US", "AAA_US",
"BBB_US", "AAA_US", "BBB_US", "AAA_US", "BBB_US", "AAA_US", "BBB_US",
"AAA_US", "BBB_US", "AAA_US", "BBB_US", "AAA_US", "BBB_US", "AAA_US",
"BBB_US", "AAA_US"), value = c(0L, 0L, 0L, 8L, 1L, 0L, 11L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 45L,
0L, 0L, 0L, 18L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 2L, 0L, 3L,
0L, 2L, 0L, 2L, 1L, 0L, 4L, 0L, 88L, 0L, 0L, 1L, 5L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 19L)), row.names = c(NA, -59L), class = "data.frame")
我正在尝试识别每个组的非零值,因此我应该得到这样的输出
browingal AAA_US 1
browingal BBB_US 7
newyorkish AAA_US 4
newyorkish BBB_US 0
site AAA_US 0
site BBB_US 1
team AAA_US 6
team BBB_US 0
我试着归类,但我想不通
df %>% group_by(name) %>% summarise_each(function(x) min(x[x != 0]),value)
您可以使用以下解决方案。根据文档:
mutate_each()
andsummarise_each
are deprecated in favor of newacross
function that works withsummarize()
&mutate()
.
library(dplyr)
df %>%
group_by(team_3_F, name) %>%
summarise(across(value, ~ sum(.x != 0)))
# A tibble: 8 x 3
# Groups: team_3_F [4]
team_3_F name value
<chr> <chr> <int>
1 "browingal " AAA_US 1
2 "browingal " BBB_US 7
3 "newyorkish" AAA_US 4
4 "newyorkish" BBB_US 0
5 "site" AAA_US 0
6 "site" BBB_US 1
7 "team " AAA_US 6
8 "team " BBB_US 0
data.table
的另一个选项:
library(data.table)
dt <- data.table(df)
dt[, sum(value != 0), by = c("team_3_F", "name")]
team_3_F name V1
1: team AAA_US 6
2: team BBB_US 0
3: site AAA_US 0
4: site BBB_US 1
5: newyorkish AAA_US 4
6: newyorkish BBB_US 0
7: browingal AAA_US 1
8: browingal BBB_US 7