计算双因子 table 的均值向量
Calculating the mean vectors for a two-factorial table
我正在尝试计算变量 RBC
、WBC
和 hemoglobin
的平均试剂向量。我是 R 的新手,所以我的问题是:你能告诉我一种更简单的方法来在 R 中进行以下计算吗?数据来自Rencher的Table6.19。我正在按照 Rencher 中的示例练习在 R 中进行计算。
reagent.dat <- read.table("https://dl.dropboxusercontent.com/u/28713619/reagent.dat")
colnames(reagent.dat) <- c("reagent", "subject", "RBC", "WBC", "hemoglobin")
reagent.dat$reagent <- factor(reagent.dat$reagent)
reagent.dat$subject <- factor(reagent.dat$subject)
library(plyr)
library(dplyr)
library(reshape2)
# Calculate the means per variable, across reagents
reagent.datm <- melt(reagent.dat)
group.means <- ddply(reagent.datm, c("variable","reagent"), summarise,mean=mean(value))
group.means <- tbl_df(group.means)
newdata <- group.means %>% select(reagent, mean)
# Store the group means into a matrix
y_bar <- matrix(c(rep(NA, times=12)), ncol=4)
for (i in 1:4)
y_bar[,i] <- as.matrix(filter(newdata, reagent == i)$mean, ncol=1)
y_bar
dplyr
包实际上可以很容易地简化您的代码,绝对值得学习,因为它非常强大。例如:
reagent.dat <- read.table("https://dl.dropboxusercontent.com/u/28713619/reagent.dat")
colnames(reagent.dat) <- c("reagent", "subject", "RBC", "WBC", "hemoglobin")
#Using dplyr
library(dplyr)
reagentmeans <- reagent.dat %>% select(reagent, RBC, WBC, hemoglobin) %>%
group_by(reagent) %>%
summarize(mean_RBC = mean(RBC), mean_WBC = mean(WBC),
mean_hemoglobin = mean(hemoglobin))
> reagentmeans
Source: local data frame [4 x 4]
reagent mean_RBC mean_WBC mean_hemoglobin
(fctr) (dbl) (dbl) (dbl)
1 1 7.290 4.9535 15.310
2 2 7.210 4.8985 15.725
3 3 7.055 4.8810 15.595
4 4 7.025 4.8915 15.765
您可以使用data.table
,
library(data.table)
setDT(reagent.dat)[, lapply(.SD, mean), by = reagent, .SDcols = c('RBC', 'WBC', 'hemoglobin')]
# reagent RBC WBC hemoglobin
#1: 1 7.290 4.9535 15.310
#2: 2 7.210 4.8985 15.725
#3: 3 7.055 4.8810 15.595
#4: 4 7.025 4.8915 15.765
我正在尝试计算变量 RBC
、WBC
和 hemoglobin
的平均试剂向量。我是 R 的新手,所以我的问题是:你能告诉我一种更简单的方法来在 R 中进行以下计算吗?数据来自Rencher的Table6.19。我正在按照 Rencher 中的示例练习在 R 中进行计算。
reagent.dat <- read.table("https://dl.dropboxusercontent.com/u/28713619/reagent.dat")
colnames(reagent.dat) <- c("reagent", "subject", "RBC", "WBC", "hemoglobin")
reagent.dat$reagent <- factor(reagent.dat$reagent)
reagent.dat$subject <- factor(reagent.dat$subject)
library(plyr)
library(dplyr)
library(reshape2)
# Calculate the means per variable, across reagents
reagent.datm <- melt(reagent.dat)
group.means <- ddply(reagent.datm, c("variable","reagent"), summarise,mean=mean(value))
group.means <- tbl_df(group.means)
newdata <- group.means %>% select(reagent, mean)
# Store the group means into a matrix
y_bar <- matrix(c(rep(NA, times=12)), ncol=4)
for (i in 1:4)
y_bar[,i] <- as.matrix(filter(newdata, reagent == i)$mean, ncol=1)
y_bar
dplyr
包实际上可以很容易地简化您的代码,绝对值得学习,因为它非常强大。例如:
reagent.dat <- read.table("https://dl.dropboxusercontent.com/u/28713619/reagent.dat")
colnames(reagent.dat) <- c("reagent", "subject", "RBC", "WBC", "hemoglobin")
#Using dplyr
library(dplyr)
reagentmeans <- reagent.dat %>% select(reagent, RBC, WBC, hemoglobin) %>%
group_by(reagent) %>%
summarize(mean_RBC = mean(RBC), mean_WBC = mean(WBC),
mean_hemoglobin = mean(hemoglobin))
> reagentmeans
Source: local data frame [4 x 4]
reagent mean_RBC mean_WBC mean_hemoglobin
(fctr) (dbl) (dbl) (dbl)
1 1 7.290 4.9535 15.310
2 2 7.210 4.8985 15.725
3 3 7.055 4.8810 15.595
4 4 7.025 4.8915 15.765
您可以使用data.table
,
library(data.table)
setDT(reagent.dat)[, lapply(.SD, mean), by = reagent, .SDcols = c('RBC', 'WBC', 'hemoglobin')]
# reagent RBC WBC hemoglobin
#1: 1 7.290 4.9535 15.310
#2: 2 7.210 4.8985 15.725
#3: 3 7.055 4.8810 15.595
#4: 4 7.025 4.8915 15.765