按子组汇总 dplyr
summarise by subgroups dplyr
我是 dyplr 的新手,我正在努力使用 summarize 函数来获得像比例这样简单的东西。
我的数据集的一部分
city = rep(c("A","B"), each = 5)
month = c("Jan","Jan","Jan","Feb","Feb","Jan","Jan","Feb","Feb","Feb")
species = c("aegypti","gambiae","quinque","aegypti","quinque",
"aegypti","gambiae","quinque","aegypti","quinque")
total = c(20, 5, 25, 12, 23, 10, 10, 0, 20, 0)
df = data.frame(city, month, species, total)
然后我要计算每个城市,每个月每个物种的比例。
我可以计算每个城市和每个月的总数
df %>% group_by(city, month) %>% summarise(total = sum(total))
# A tibble: 4 x 3
# Groups: city [2]
# city month total
# <fct> <fct> <dbl>
#1 A Feb 35
#2 A Jan 50
#3 B Feb 20
#4 B Jan 20
那我可以按物种分别计算
df %>% group_by(city, month, species) %>% summarise(total = sum(total)
# A tibble: 9 x 4
# Groups: city, month [4]
# city month species total
# <fct> <fct> <fct> <dbl>
#1 A Feb aegypti 12
#2 A Feb quinque 23
#3 A Jan aegypti 20
#4 A Jan gambiae 5
#5 A Jan quinque 25
#6 B Feb aegypti 20
#7 B Feb quinque 0
#8 B Jan aegypti 10
#9 B Jan gambiae 10
但我需要这样的东西来计算比例。我希望 table 看起来与此类似。
# A tibble: 9 x 4
# Groups: city, month [4]
# city month species total total1 prop
# <fct> <fct> <fct> <dbl> <dbl> <dbl>
#1 A Feb aegypti 12 35 34.29
#2 A Feb quinque 23 35 65.71
#3 A Jan aegypti 20 50 40.00
#4 A Jan gambiae 5 50 10.00
#5 A Jan quinque 25 50 50.00
#6 B Feb aegypti 20 20 100.00
#7 B Feb quinque 0 20 0
#8 B Jan aegypti 10 20 50.00
#9 B Jan gambiae 10 20 50.00
我一直在玩,但我找不到这样做的方法..
谢谢!
group_by
city
和 month
并在组中将 total
除以 sum
以获得比例。
library(dplyr)
df %>% group_by(city, month) %>% mutate(total = total/sum(total) * 100)
# city month species total
# <fct> <fct> <fct> <dbl>
# 1 A Jan aegypti 40
# 2 A Jan gambiae 10
# 3 A Jan quinque 50
# 4 A Feb aegypti 34.3
# 5 A Feb quinque 65.7
# 6 B Jan aegypti 50
# 7 B Jan gambiae 50
# 8 B Feb quinque 0
# 9 B Feb aegypti 100
#10 B Feb quinque 0
或在基数 R 中:
df$total <- with(df, total/ave(total, city, month, FUN = sum))
我们可以使用data.table
library(data.table)
setDT(df)[, total := total/sum(total) * 100, .(city, month)]
我是 dyplr 的新手,我正在努力使用 summarize 函数来获得像比例这样简单的东西。
我的数据集的一部分
city = rep(c("A","B"), each = 5)
month = c("Jan","Jan","Jan","Feb","Feb","Jan","Jan","Feb","Feb","Feb")
species = c("aegypti","gambiae","quinque","aegypti","quinque",
"aegypti","gambiae","quinque","aegypti","quinque")
total = c(20, 5, 25, 12, 23, 10, 10, 0, 20, 0)
df = data.frame(city, month, species, total)
然后我要计算每个城市,每个月每个物种的比例。 我可以计算每个城市和每个月的总数
df %>% group_by(city, month) %>% summarise(total = sum(total))
# A tibble: 4 x 3
# Groups: city [2]
# city month total
# <fct> <fct> <dbl>
#1 A Feb 35
#2 A Jan 50
#3 B Feb 20
#4 B Jan 20
那我可以按物种分别计算
df %>% group_by(city, month, species) %>% summarise(total = sum(total)
# A tibble: 9 x 4
# Groups: city, month [4]
# city month species total
# <fct> <fct> <fct> <dbl>
#1 A Feb aegypti 12
#2 A Feb quinque 23
#3 A Jan aegypti 20
#4 A Jan gambiae 5
#5 A Jan quinque 25
#6 B Feb aegypti 20
#7 B Feb quinque 0
#8 B Jan aegypti 10
#9 B Jan gambiae 10
但我需要这样的东西来计算比例。我希望 table 看起来与此类似。
# A tibble: 9 x 4
# Groups: city, month [4]
# city month species total total1 prop
# <fct> <fct> <fct> <dbl> <dbl> <dbl>
#1 A Feb aegypti 12 35 34.29
#2 A Feb quinque 23 35 65.71
#3 A Jan aegypti 20 50 40.00
#4 A Jan gambiae 5 50 10.00
#5 A Jan quinque 25 50 50.00
#6 B Feb aegypti 20 20 100.00
#7 B Feb quinque 0 20 0
#8 B Jan aegypti 10 20 50.00
#9 B Jan gambiae 10 20 50.00
我一直在玩,但我找不到这样做的方法.. 谢谢!
group_by
city
和 month
并在组中将 total
除以 sum
以获得比例。
library(dplyr)
df %>% group_by(city, month) %>% mutate(total = total/sum(total) * 100)
# city month species total
# <fct> <fct> <fct> <dbl>
# 1 A Jan aegypti 40
# 2 A Jan gambiae 10
# 3 A Jan quinque 50
# 4 A Feb aegypti 34.3
# 5 A Feb quinque 65.7
# 6 B Jan aegypti 50
# 7 B Jan gambiae 50
# 8 B Feb quinque 0
# 9 B Feb aegypti 100
#10 B Feb quinque 0
或在基数 R 中:
df$total <- with(df, total/ave(total, city, month, FUN = sum))
我们可以使用data.table
library(data.table)
setDT(df)[, total := total/sum(total) * 100, .(city, month)]