组的 R 均值减法
R Mean Subtraction For Groups
data=data.frame(SCORE=c(9,9,10,7,10,10,8,10,9,5,10,7,9,4,9,8,3,6,4,5,10,10,6,9,5,8,3,7,5,6,7,4,8,7,10,6,9,7,8,8,9,9,4,5,6,6,7,10,8,6),
TYPE=c(0,0,1,1,1,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,1,0,1,1,1,0,1,1,1,1,1,0,1,1,1,1,1,1,1,1,0,0,1,0,0,0,0,0,0),
GRADE=c(2,2,2,1,1,1,1,2,1,3,2,2,1,1,2,1,2,1,2,1,1,2,2,2,3,2,3,2,2,1,1,2,2,3,3,1,2,3,3,3,2,2,3,2,2,3,3,3,2,1),
SCHOOL=c(1,1,1,2,2,2,2,1,1,2,1,2,2,2,2,1,1,2,2,1,2,1,2,1,1,1,2,1,2,1,1,1,1,1,2,1,1,1,1,2,2,2,2,1,1,1,1,2,1,2))
如果您有此示例数据并希望将其转换为以下格式:
对于每个 'GRADE' 和 'SCHOOL' 我希望 'TYPE' 的 'SCORE' 的平均值等于 1,然后从 'SCORE' 的平均值中减去它=22=] 对于 'TYPE' 等于 0.
数据table格式可以放
data[ SCORESUBTRACT := sum(SCORE)/nrow(data), by = list(GRADE, SCHOOL)] 但是否有可能从 'TYPE' 减去 'TYPE' 等于 1 等于0?
试试这个:
library(tidyverse)
data %>% group_by(GRADE,SCHOOL,TYPE) %>% summarise(Val=mean(SCORE,na.rm=T)) %>%
pivot_wider(names_from = TYPE,values_from = Val) %>%
mutate(Diff=`1`-`0`)
# A tibble: 6 x 5
# Groups: GRADE, SCHOOL [6]
GRADE SCHOOL `0` `1` Diff
<dbl> <dbl> <dbl> <dbl> <dbl>
1 1 1 7 6.75 -0.25
2 1 2 7.75 7.8 0.0500
3 2 1 7.57 8 0.429
4 2 2 6.5 7.67 1.17
5 3 1 6.5 6.75 0.25
6 3 2 5.67 7.67 2
这里有一个 data.table 解决方案供您使用。希望效果好。
首先让我们直接读取与 data.table 相同的数据并将键设置为年级和学校
data=data.table(SCORE=c(9,9,10,7,10,10,8,10,9,5,10,7,9,4,9,8,3,6,4,5,10,10,6,9,5,8,3,7,5,6,7,4,8,7,10,6,9,7,8,8,9,9,4,5,6,6,7,10,8,6),
TYPE=c(0,0,1,1,1,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,1,0,1,1,1,0,1,1,1,1,1,0,1,1,1,1,1,1,1,1,0,0,1,0,0,0,0,0,0),
GRADE=c(2,2,2,1,1,1,1,2,1,3,2,2,1,1,2,1,2,1,2,1,1,2,2,2,3,2,3,2,2,1,1,2,2,3,3,1,2,3,3,3,2,2,3,2,2,3,3,3,2,1),
SCHOOL=c(1,1,1,2,2,2,2,1,1,2,1,2,2,2,2,1,1,2,2,1,2,1,2,1,1,1,2,1,2,1,1,1,1,1,2,1,1,1,1,2,2,2,2,1,1,1,1,2,1,2))
setkeyv(data , c("GRADE","SCHOOL"))
现在我们像您一样计算平均分数,然后继续计算您想要的更新分数
data[ , MeanScore:= mean(SCORE) , by = key(data)]
data[ TYPE == 0 , updated_score := SCORE - MeanScore ]
data[ TYPE == 1 , updated_score := MeanScore ]
data=data.frame(SCORE=c(9,9,10,7,10,10,8,10,9,5,10,7,9,4,9,8,3,6,4,5,10,10,6,9,5,8,3,7,5,6,7,4,8,7,10,6,9,7,8,8,9,9,4,5,6,6,7,10,8,6),
TYPE=c(0,0,1,1,1,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,1,0,1,1,1,0,1,1,1,1,1,0,1,1,1,1,1,1,1,1,0,0,1,0,0,0,0,0,0),
GRADE=c(2,2,2,1,1,1,1,2,1,3,2,2,1,1,2,1,2,1,2,1,1,2,2,2,3,2,3,2,2,1,1,2,2,3,3,1,2,3,3,3,2,2,3,2,2,3,3,3,2,1),
SCHOOL=c(1,1,1,2,2,2,2,1,1,2,1,2,2,2,2,1,1,2,2,1,2,1,2,1,1,1,2,1,2,1,1,1,1,1,2,1,1,1,1,2,2,2,2,1,1,1,1,2,1,2))
如果您有此示例数据并希望将其转换为以下格式:
对于每个 'GRADE' 和 'SCHOOL' 我希望 'TYPE' 的 'SCORE' 的平均值等于 1,然后从 'SCORE' 的平均值中减去它=22=] 对于 'TYPE' 等于 0.
数据table格式可以放 data[ SCORESUBTRACT := sum(SCORE)/nrow(data), by = list(GRADE, SCHOOL)] 但是否有可能从 'TYPE' 减去 'TYPE' 等于 1 等于0?
试试这个:
library(tidyverse)
data %>% group_by(GRADE,SCHOOL,TYPE) %>% summarise(Val=mean(SCORE,na.rm=T)) %>%
pivot_wider(names_from = TYPE,values_from = Val) %>%
mutate(Diff=`1`-`0`)
# A tibble: 6 x 5
# Groups: GRADE, SCHOOL [6]
GRADE SCHOOL `0` `1` Diff
<dbl> <dbl> <dbl> <dbl> <dbl>
1 1 1 7 6.75 -0.25
2 1 2 7.75 7.8 0.0500
3 2 1 7.57 8 0.429
4 2 2 6.5 7.67 1.17
5 3 1 6.5 6.75 0.25
6 3 2 5.67 7.67 2
这里有一个 data.table 解决方案供您使用。希望效果好。
首先让我们直接读取与 data.table 相同的数据并将键设置为年级和学校
data=data.table(SCORE=c(9,9,10,7,10,10,8,10,9,5,10,7,9,4,9,8,3,6,4,5,10,10,6,9,5,8,3,7,5,6,7,4,8,7,10,6,9,7,8,8,9,9,4,5,6,6,7,10,8,6),
TYPE=c(0,0,1,1,1,1,1,0,0,1,1,0,0,1,1,1,0,0,0,0,0,1,0,1,1,1,0,1,1,1,1,1,0,1,1,1,1,1,1,1,1,0,0,1,0,0,0,0,0,0),
GRADE=c(2,2,2,1,1,1,1,2,1,3,2,2,1,1,2,1,2,1,2,1,1,2,2,2,3,2,3,2,2,1,1,2,2,3,3,1,2,3,3,3,2,2,3,2,2,3,3,3,2,1),
SCHOOL=c(1,1,1,2,2,2,2,1,1,2,1,2,2,2,2,1,1,2,2,1,2,1,2,1,1,1,2,1,2,1,1,1,1,1,2,1,1,1,1,2,2,2,2,1,1,1,1,2,1,2))
setkeyv(data , c("GRADE","SCHOOL"))
现在我们像您一样计算平均分数,然后继续计算您想要的更新分数
data[ , MeanScore:= mean(SCORE) , by = key(data)]
data[ TYPE == 0 , updated_score := SCORE - MeanScore ]
data[ TYPE == 1 , updated_score := MeanScore ]