对列组重复计算
Repeat calculation over groups of columns
在下面的数据框中,我想对所有列求平均值,然后计算每个字母类别的第一个平均值之间的倍数差异,就像我在 excel 中所做的那样:
A0<-1:4
A1<-2:5
A2<-3:6
A3<-4:7
A4<-5:8
B0<-11:14
B1<-12:15
B2<-13:16
B3<-14:17
B4<-15:18
C0<-21:24
C1<-22:25
C2<-23:26
C3<-24:27
C4<-25:28
到目前为止我能做的是:
## step 1
df<-data.frame(A0, A1, A2, A3, A4, B0, B1, B2, B3, B4, C0, C1, C2, C3, C4)
## step 2
rbind(df, c(mean(df[,1])/sapply(df[,1:5], mean),
mean(df[,6])/sapply(df[,6:10], mean),
mean(df[,11])/sapply(df[,11:15], mean)))
## step 3
data.frame(t(rbind(df, c(mean(df[,1])/sapply(df[,1:5], mean),
mean(df[,6])/sapply(df[,6:10], mean),
mean(df[,11])/sapply(df[,11:15], mean)))))
## step 4
ggplot(data.frame(t(rbind(df, c(mean(df[,1])/sapply(df[,1:5], mean),
mean(df[,6])/sapply(df[,6:10], mean),
mean(df[,11])/sapply(df[,11:15], mean))))),
aes(1:15, X5)) + geom_bar(stat="identity")
在这之后我可以做分面和其他的,但是第二步在大数据框架中会非常困难。你有什么建议我可以告诉 R 自动完成它,这样我就不必自己写了吗?
不知道我是否理解你的问题。我尝试模拟您的数据集并使用名为 dplyr
的库创建一个简短的脚本。这是一步一步的脚本。
1. 创建您的数据集。
A0<-1:4
A1<-2:5
A2<-3:6
df <- data.frame(A0,A1, A2, A3)
df
A0 A1 A2 A3
1 1 2 3 4
2 2 3 4 5
3 3 4 5 6
4 4 5 6 7
现在脚本将取每一列的平均值。
mean.df <- df %>%
rbind(Mean = rowMeans(x = df[],na.rm = TRUE))
mean.df
A0 A1 A2 A3
1 1.0 2.0 3.0 4.0
2 2.0 3.0 4.0 5.0
3 3.0 4.0 5.0 6.0
4 4.0 5.0 6.0 7.0
Mean 2.5 3.5 4.5 5.5
现在脚本将从第一个均值中减去每个均值。我是这么理解的。
fold.diff <- mean.df %>%
rbind(Fold_Diff = mean.df[5,] - mean.df[5,1])
fold.diff
A0 A1 A2 A3
1 1.0 2.0 3.0 4.0
2 2.0 3.0 4.0 5.0
3 3.0 4.0 5.0 6.0
4 4.0 5.0 6.0 7.0
Mean 2.5 3.5 4.5 5.5
Fold_Diff 0.0 1.0 2.0 3.0
使用 dplyr
和 tidyr
:
# step 1
df <- data.frame(A0, A1, A2, A3, A4, B0, B1, B2, B3, B4, C0, C1, C2, C3, C4)
library(dplyr)
library(tidyr)
df2 <- df %>% gather(LetterNum, Value) %>%
separate(LetterNum, c("Letter", "Num"), sep = 1) %>%
group_by(Letter, Num) %>%
summarise(Mean1 = mean(Value)) %>%
mutate(Mean = first(Mean1)/Mean1)
ggplot(df2, aes(Num, Mean, group = Letter, fill = Letter)) +
geom_bar(stat="identity", position = "stack") + facet_wrap(~Letter)
或
# step 1
df <- data.frame(A0, A1, A2, A3, A4, B0, B1, B2, B3, B4, C0, C1, C2, C3, C4)
library(dplyr)
library(tidyr)
df2 <- df %>% gather(LetterNum, Value) %>% group_by(LetterNum) %>%
summarise(Mean1 = mean(Value)) %>%
mutate(Group = rep(LETTERS[1:(n()/5)], each = 5)) %>% group_by(Group) %>%
mutate(Mean = first(Mean1)/Mean1)
ggplot(df2, aes(LetterNum, Mean, fill = Group)) + geom_bar(stat="identity",
position = "stack") + facet_wrap(~Group, scale="free")
在下面的数据框中,我想对所有列求平均值,然后计算每个字母类别的第一个平均值之间的倍数差异,就像我在 excel 中所做的那样:
A0<-1:4
A1<-2:5
A2<-3:6
A3<-4:7
A4<-5:8
B0<-11:14
B1<-12:15
B2<-13:16
B3<-14:17
B4<-15:18
C0<-21:24
C1<-22:25
C2<-23:26
C3<-24:27
C4<-25:28
到目前为止我能做的是:
## step 1
df<-data.frame(A0, A1, A2, A3, A4, B0, B1, B2, B3, B4, C0, C1, C2, C3, C4)
## step 2
rbind(df, c(mean(df[,1])/sapply(df[,1:5], mean),
mean(df[,6])/sapply(df[,6:10], mean),
mean(df[,11])/sapply(df[,11:15], mean)))
## step 3
data.frame(t(rbind(df, c(mean(df[,1])/sapply(df[,1:5], mean),
mean(df[,6])/sapply(df[,6:10], mean),
mean(df[,11])/sapply(df[,11:15], mean)))))
## step 4
ggplot(data.frame(t(rbind(df, c(mean(df[,1])/sapply(df[,1:5], mean),
mean(df[,6])/sapply(df[,6:10], mean),
mean(df[,11])/sapply(df[,11:15], mean))))),
aes(1:15, X5)) + geom_bar(stat="identity")
在这之后我可以做分面和其他的,但是第二步在大数据框架中会非常困难。你有什么建议我可以告诉 R 自动完成它,这样我就不必自己写了吗?
不知道我是否理解你的问题。我尝试模拟您的数据集并使用名为 dplyr
的库创建一个简短的脚本。这是一步一步的脚本。
1. 创建您的数据集。
A0<-1:4
A1<-2:5
A2<-3:6
df <- data.frame(A0,A1, A2, A3)
df
A0 A1 A2 A3
1 1 2 3 4
2 2 3 4 5
3 3 4 5 6
4 4 5 6 7
现在脚本将取每一列的平均值。
mean.df <- df %>%
rbind(Mean = rowMeans(x = df[],na.rm = TRUE))
mean.df
A0 A1 A2 A3
1 1.0 2.0 3.0 4.0
2 2.0 3.0 4.0 5.0
3 3.0 4.0 5.0 6.0
4 4.0 5.0 6.0 7.0
Mean 2.5 3.5 4.5 5.5
现在脚本将从第一个均值中减去每个均值。我是这么理解的。
fold.diff <- mean.df %>%
rbind(Fold_Diff = mean.df[5,] - mean.df[5,1])
fold.diff
A0 A1 A2 A3
1 1.0 2.0 3.0 4.0
2 2.0 3.0 4.0 5.0
3 3.0 4.0 5.0 6.0
4 4.0 5.0 6.0 7.0
Mean 2.5 3.5 4.5 5.5
Fold_Diff 0.0 1.0 2.0 3.0
使用 dplyr
和 tidyr
:
# step 1
df <- data.frame(A0, A1, A2, A3, A4, B0, B1, B2, B3, B4, C0, C1, C2, C3, C4)
library(dplyr)
library(tidyr)
df2 <- df %>% gather(LetterNum, Value) %>%
separate(LetterNum, c("Letter", "Num"), sep = 1) %>%
group_by(Letter, Num) %>%
summarise(Mean1 = mean(Value)) %>%
mutate(Mean = first(Mean1)/Mean1)
ggplot(df2, aes(Num, Mean, group = Letter, fill = Letter)) +
geom_bar(stat="identity", position = "stack") + facet_wrap(~Letter)
或
# step 1
df <- data.frame(A0, A1, A2, A3, A4, B0, B1, B2, B3, B4, C0, C1, C2, C3, C4)
library(dplyr)
library(tidyr)
df2 <- df %>% gather(LetterNum, Value) %>% group_by(LetterNum) %>%
summarise(Mean1 = mean(Value)) %>%
mutate(Group = rep(LETTERS[1:(n()/5)], each = 5)) %>% group_by(Group) %>%
mutate(Mean = first(Mean1)/Mean1)
ggplot(df2, aes(LetterNum, Mean, fill = Group)) + geom_bar(stat="identity",
position = "stack") + facet_wrap(~Group, scale="free")