多列动态分组
Dynamic grouping of multiple columns
我有一个带有 ID 列和多个列(每个月一个)的数据框。每次我 运行 程序时,月份列的数量都会有所不同。我正在编写 R 代码以将月份列名称提取到向量中,对于向量的每个元素,我试图按 ID 变量进行分组并获取每个组的总和。以下是我使用过的代码。一切正常,期望结果是整体总和而不是组总和。非常感谢任何帮助。
Current pattern of result:
newdf for a given month:
ID summ_mon_2017_12
1 20
2 20
3 20
Expected pattern of result:
newdf for a given month:
ID summ_mon_2017_12
1 8
2 5
3 7
library(dplyr)
ID <- c(1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3)
mon_2017_12<-c(rep(rnorm(14)))
mon_2018_01<-c(rep(rnorm(14)))
mon_2018_02<-c(rep(rnorm(14)))
mon_2018_03<-c(rep(rnorm(14)))
mon_2018_04<-c(rep(rnorm(14)))
mon_2018_05<-c(rep(rnorm(14)))
groupsum<-data.frame(ID,mon_2017_12,mon_2018_01,mon_2018_02,mon_2018_03,mon_2018_04,mon_2018_05)
#extract month columns
month_vec <- as.vector(unique(substring(names(groupsum %>%
select(contains("mon_"))),1,11)))
addsummvar<-function(df, n1){
newvar<-paste("summ",n1,sep="_")
newdf<-paste("summ",n1,sep="_")
print(newvar)
print(newdf)
newdf<- df %>%
group_by(ID) %>%
summarise(
!!newvar := sum(df[n1])
)
print(newdf)
}
for(i in 1:length(month_vec)) {
summ_del <- addsummvar(df=groupsum, n1=month_vec[i])
}
这一行不能帮助您达到目的吗?
library(dplyr)
ID <- c(1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3)
mon_2017_12<-c(rep(rnorm(14)))
mon_2018_01<-c(rep(rnorm(14)))
mon_2018_02<-c(rep(rnorm(14)))
mon_2018_03<-c(rep(rnorm(14)))
mon_2018_04<-c(rep(rnorm(14)))
mon_2018_05<-c(rep(rnorm(14)))
groupsum<-data.frame(ID,mon_2017_12,mon_2018_01,mon_2018_02,mon_2018_03,mon_2018_04,mon_2018_05)
groupsum %>% group_by(ID) %>% summarise_all(.funs = sum)
输出:
> groupsum %>% group_by(ID) %>% summarise_all(.funs = sum)
# A tibble: 3 x 7
ID mon_2017_12 mon_2018_01 mon_2018_02 mon_2018_03 mon_2018_04 mon_2018_05
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1.00 -0.00419 0.230 2.87 -1.38 -2.63 2.12
2 2.00 -1.12 1.45 -1.08 -0.907 -0.966 2.86
3 3.00 -2.11 3.49 0.282 -1.35 6.30 -3.60
更新代码以使用 summarise_if
:
过滤掉非数字列
library(dplyr)
set.seed(123)
ID <- c(1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3)
mon_2017_12<-c(rep(rnorm(14)))
mon_2018_01<-c(rep(rnorm(14)))
mon_2018_02<-c(rep(rnorm(14)))
mon_2018_03<-c(rep(rnorm(14)))
mon_2018_04<-c(rep(rnorm(14)))
mon_2018_05<-c(rep(rnorm(14)))
groupsum<-data.frame(ID,mon_2017_12,mon_2018_01,mon_2018_02,mon_2018_03,mon_2018_04,mon_2018_05)
groupsum$var2 <- c(letters[1:14])
head(groupsum)
groupsum %>% group_by(ID) %>% summarise_if(is.numeric,sum)
我有一个带有 ID 列和多个列(每个月一个)的数据框。每次我 运行 程序时,月份列的数量都会有所不同。我正在编写 R 代码以将月份列名称提取到向量中,对于向量的每个元素,我试图按 ID 变量进行分组并获取每个组的总和。以下是我使用过的代码。一切正常,期望结果是整体总和而不是组总和。非常感谢任何帮助。
Current pattern of result:
newdf for a given month:
ID summ_mon_2017_12
1 20
2 20
3 20
Expected pattern of result:
newdf for a given month:
ID summ_mon_2017_12
1 8
2 5
3 7
library(dplyr)
ID <- c(1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3)
mon_2017_12<-c(rep(rnorm(14)))
mon_2018_01<-c(rep(rnorm(14)))
mon_2018_02<-c(rep(rnorm(14)))
mon_2018_03<-c(rep(rnorm(14)))
mon_2018_04<-c(rep(rnorm(14)))
mon_2018_05<-c(rep(rnorm(14)))
groupsum<-data.frame(ID,mon_2017_12,mon_2018_01,mon_2018_02,mon_2018_03,mon_2018_04,mon_2018_05)
#extract month columns
month_vec <- as.vector(unique(substring(names(groupsum %>%
select(contains("mon_"))),1,11)))
addsummvar<-function(df, n1){
newvar<-paste("summ",n1,sep="_")
newdf<-paste("summ",n1,sep="_")
print(newvar)
print(newdf)
newdf<- df %>%
group_by(ID) %>%
summarise(
!!newvar := sum(df[n1])
)
print(newdf)
}
for(i in 1:length(month_vec)) {
summ_del <- addsummvar(df=groupsum, n1=month_vec[i])
}
这一行不能帮助您达到目的吗?
library(dplyr)
ID <- c(1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3)
mon_2017_12<-c(rep(rnorm(14)))
mon_2018_01<-c(rep(rnorm(14)))
mon_2018_02<-c(rep(rnorm(14)))
mon_2018_03<-c(rep(rnorm(14)))
mon_2018_04<-c(rep(rnorm(14)))
mon_2018_05<-c(rep(rnorm(14)))
groupsum<-data.frame(ID,mon_2017_12,mon_2018_01,mon_2018_02,mon_2018_03,mon_2018_04,mon_2018_05)
groupsum %>% group_by(ID) %>% summarise_all(.funs = sum)
输出:
> groupsum %>% group_by(ID) %>% summarise_all(.funs = sum)
# A tibble: 3 x 7
ID mon_2017_12 mon_2018_01 mon_2018_02 mon_2018_03 mon_2018_04 mon_2018_05
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1.00 -0.00419 0.230 2.87 -1.38 -2.63 2.12
2 2.00 -1.12 1.45 -1.08 -0.907 -0.966 2.86
3 3.00 -2.11 3.49 0.282 -1.35 6.30 -3.60
更新代码以使用 summarise_if
:
library(dplyr)
set.seed(123)
ID <- c(1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3)
mon_2017_12<-c(rep(rnorm(14)))
mon_2018_01<-c(rep(rnorm(14)))
mon_2018_02<-c(rep(rnorm(14)))
mon_2018_03<-c(rep(rnorm(14)))
mon_2018_04<-c(rep(rnorm(14)))
mon_2018_05<-c(rep(rnorm(14)))
groupsum<-data.frame(ID,mon_2017_12,mon_2018_01,mon_2018_02,mon_2018_03,mon_2018_04,mon_2018_05)
groupsum$var2 <- c(letters[1:14])
head(groupsum)
groupsum %>% group_by(ID) %>% summarise_if(is.numeric,sum)