对数据帧列表进行方差分析并忽略组数太少的数据帧
ANOVA over a list of dataframes and ignore dataframe where number of groups is too small
我有一个数据框列表,其中包含化学信息,这些信息比较了实验室之间大约 30 种分析物的结果。每个分析物都存储为数据帧列表中的单个数据帧
我编写了一个函数,可以在每个数据帧上执行方差分析,除非实验室数量少于 3 个,否则方差分析应该至少有 3 个组进行比较。我正在尝试设置一个条件(if 语句)来识别不满足 3 实验室要求的分析物,然后 return NA,如果确实满足要求,它会继续进行操作。
下面是我的数据框列表示例
list(Fe = structure(list(Determination_No = 1:6, `2` = c(NA,
NA, NA, NA, NA, NA), `3` = c(56.83, 56.54, 56.18, 56.5, 56.51,
56.34), `4` = c(56.39, 56.43, 56.53, 56.31, 56.47, 56.35), `5` = c(56.32,
56.29, 56.31, 56.32, 56.39, 56.32), `7` = c(56.48, 56.4, 56.54,
56.43, 56.73, 56.62), `8` = c(56.382, 56.258, 56.442, 56.258,
56.532, 56.264), `10` = c(56.3, 56.5, 56.2, 56.5, 56.7, 56.5),
`12` = c(56.11, 56.46, 56.1, 56.35, 56.36, 56.37)), row.names = c(NA,
-6L), class = "data.frame"), SiO2 = structure(list(Determination_No = 1:6,
`2` = c(7.63, 7.65, 7.73, 7.67, 7.67, 7.67), `3` = c(7.84,
7.69, 7.59, 7.77, 7.74, 7.64), `4` = c(7.67, 7.74, 7.62,
7.81, 7.66, 7.8), `5` = c(7.91, 7.84, 7.96, 7.87, 7.84, 7.92
), `7` = c(7.77, 7.83, 7.76, 7.78, 7.65, 7.74), `8` = c(7.936,
7.685, 7.863, 7.838, 7.828, 7.767), `10` = c(7.872684992,
7.851291827, 7.872684992, 7.722932832, 7.680146501, 7.615967003
), `12` = c(7.64, 7.71, 7.71, 7.65, 7.82, 7.68)), row.names = c(NA,
-6L), class = "data.frame"), Al2O3 = structure(list(Determination_No = 1:6,
`2` = c(2.01, 2.02, 2.03, 2.01, 2.02, 2), `3` = c(2.01, 2.01,
2, 2.02, 2.02, 2.03), `4` = c(2, 2.03, 1.99, 2.01, 2.01,
2.01), `5` = c(2.02, 2.02, 2.05, 2.03, 2.02, 2.03), `7` = c(NA,
NA, NA, NA, NA, NA), `8` = c(2.053, 2.044, 2.041, 2.038,
2.008, 2.02), `10` = c(2.002830415, 2.021725042, 2.021725042,
1.983935789, 2.002830415, 2.021725042), `12` = c(NA, NA,
NA, NA, NA, NA)), row.names = c(NA, -6L), class = "data.frame"),
TiO2 = structure(list(Determination_No = 1:6, `2` = c(0.07,
0.07, 0.07, 0.07, 0.07, 0.07), `3` = c(NA, NA, NA, NA, NA,
NA), `4` = c(0.07, 0.07, 0.07, 0.07, 0.07, 0.07), `5` = c(0.07,
0.07, 0.07, 0.07, 0.07, 0.07), `7` = c(NA, NA, NA, NA, NA,
NA), `8` = c(NA, NA, NA, NA, NA, NA), `10` = c(0.066721378,
0.066721378, 0.066721378, 0.066721378, 0.066721378, 0.066721378
), `12` = c(NA, NA, NA, NA, NA, NA)), row.names = c(NA, -6L
), class = "data.frame"), Sn = structure(list(Determination_No = 1:6,
`2` = c(NA, NA, NA, NA, NA, NA), `3` = c(NA, NA, NA,
NA, NA, NA), `4` = c(NA, NA, NA, NA, NA, NA), `5` = c(NA,
NA, NA, NA, NA, NA), `7` = c(NA, NA, NA, NA, NA, NA),
`8` = c(0.004, 0.004, 0.002, 0.007, 0.003, 0.004), `10` = c(NA,
NA, NA, NA, NA, NA), `12` = c(NA, NA, NA, NA, NA, NA)), row.names = c(NA,
-6L), class = "data.frame"))
下面的代码
lab.Anova <- function(x,...){
x <- gather(x, Determination_No) #convert the data to long format for Anova
colnames(x) <- c("Lab_No", "Results") #Rename the columns
LabLm <- lm(Results ~ as.factor(x$Lab_No), data = x) #Create a linear Model for Anova
Lab.Aov <- anova(LabLm)
Lab.Critical <- cbind(Lab.Aov, 'F Critical Value' = qf(1 - 0.05, Lab.Aov[1, 1], Lab.Aov[2, 1]))
Lab.SDWithinLab <- round(sqrt(Lab.Critical[2,3]),digits = 4) #Assign the variance to a variable from ANOVA results
Lab.SDBetweenLab <- round(sqrt(Lab.Critical[1,3]),digits = 4)
list(Lab.SDWithinLab,Lab.SDBetweenLab)
}
我正在尝试对实验室数量进行计数,然后如果 >2 进行方差分析测试,否则 return NA 类似
lab.Anova <- function(x,...){
x <- gather(x, Determination_No) #convert the data to long format for Anova
colnames(x) <- c("Lab_No", "Results") #Rename the columns
totlabs <-count(unique(x$Lab_No | x$Lab_No == !is.na ))
if totlabs > 2
LabLm <- lm(Results ~ as.factor(x$Lab_No), data = x) #Create a linear Model for Anova
Lab.Aov <- anova(LabLm)
Lab.Critical <- cbind(Lab.Aov, 'F Critical Value' = qf(1 - 0.05, Lab.Aov[1, 1], Lab.Aov[2, 1]))
Lab.SDWithinLab <- round(sqrt(Lab.Critical[2,3]),digits = 4) #Assign the variance to a variable from ANOVA results
Lab.SDBetweenLab <- round(sqrt(Lab.Critical[1,3]),digits = 4)
else
Lab.SDWithinLab <- NA
Lab.SDBetweenLab <- NA
list(Lab.SDWithinLab,Lab.SDBetweenLab)
}
我在使用 totlabs 时遇到错误,因为它不喜欢 |和 !is.na
提前致谢
您可以尝试以下功能:
library(dplyr)
library(tidyr)
lab.Anova <- function(x,...){
x <- gather(x, Determination_No)
colnames(x) <- c("Lab_No", "Results")
totlabs <- n_distinct(x$Lab_No[!is.na(x$Results)], na.rm = TRUE)
if(totlabs > 2) {
LabLm <- lm(Results ~ factor(Lab_No), data = x)
Lab.Aov <- anova(LabLm)
Lab.Critical <- cbind(Lab.Aov, 'F Critical Value' = qf(1 - 0.05, Lab.Aov[1, 1], Lab.Aov[2, 1]))
Lab.SDWithinLab <- round(sqrt(Lab.Critical[2,3]),digits = 4)
Lab.SDBetweenLab <- round(sqrt(Lab.Critical[1,3]),digits = 4)
} else {
Lab.SDWithinLab <- NA
Lab.SDBetweenLab <- NA
}
list(Lab.SDWithinLab,Lab.SDBetweenLab)
}
lapply(df, lab.Anova)
我有一个数据框列表,其中包含化学信息,这些信息比较了实验室之间大约 30 种分析物的结果。每个分析物都存储为数据帧列表中的单个数据帧
我编写了一个函数,可以在每个数据帧上执行方差分析,除非实验室数量少于 3 个,否则方差分析应该至少有 3 个组进行比较。我正在尝试设置一个条件(if 语句)来识别不满足 3 实验室要求的分析物,然后 return NA,如果确实满足要求,它会继续进行操作。
下面是我的数据框列表示例
list(Fe = structure(list(Determination_No = 1:6, `2` = c(NA,
NA, NA, NA, NA, NA), `3` = c(56.83, 56.54, 56.18, 56.5, 56.51,
56.34), `4` = c(56.39, 56.43, 56.53, 56.31, 56.47, 56.35), `5` = c(56.32,
56.29, 56.31, 56.32, 56.39, 56.32), `7` = c(56.48, 56.4, 56.54,
56.43, 56.73, 56.62), `8` = c(56.382, 56.258, 56.442, 56.258,
56.532, 56.264), `10` = c(56.3, 56.5, 56.2, 56.5, 56.7, 56.5),
`12` = c(56.11, 56.46, 56.1, 56.35, 56.36, 56.37)), row.names = c(NA,
-6L), class = "data.frame"), SiO2 = structure(list(Determination_No = 1:6,
`2` = c(7.63, 7.65, 7.73, 7.67, 7.67, 7.67), `3` = c(7.84,
7.69, 7.59, 7.77, 7.74, 7.64), `4` = c(7.67, 7.74, 7.62,
7.81, 7.66, 7.8), `5` = c(7.91, 7.84, 7.96, 7.87, 7.84, 7.92
), `7` = c(7.77, 7.83, 7.76, 7.78, 7.65, 7.74), `8` = c(7.936,
7.685, 7.863, 7.838, 7.828, 7.767), `10` = c(7.872684992,
7.851291827, 7.872684992, 7.722932832, 7.680146501, 7.615967003
), `12` = c(7.64, 7.71, 7.71, 7.65, 7.82, 7.68)), row.names = c(NA,
-6L), class = "data.frame"), Al2O3 = structure(list(Determination_No = 1:6,
`2` = c(2.01, 2.02, 2.03, 2.01, 2.02, 2), `3` = c(2.01, 2.01,
2, 2.02, 2.02, 2.03), `4` = c(2, 2.03, 1.99, 2.01, 2.01,
2.01), `5` = c(2.02, 2.02, 2.05, 2.03, 2.02, 2.03), `7` = c(NA,
NA, NA, NA, NA, NA), `8` = c(2.053, 2.044, 2.041, 2.038,
2.008, 2.02), `10` = c(2.002830415, 2.021725042, 2.021725042,
1.983935789, 2.002830415, 2.021725042), `12` = c(NA, NA,
NA, NA, NA, NA)), row.names = c(NA, -6L), class = "data.frame"),
TiO2 = structure(list(Determination_No = 1:6, `2` = c(0.07,
0.07, 0.07, 0.07, 0.07, 0.07), `3` = c(NA, NA, NA, NA, NA,
NA), `4` = c(0.07, 0.07, 0.07, 0.07, 0.07, 0.07), `5` = c(0.07,
0.07, 0.07, 0.07, 0.07, 0.07), `7` = c(NA, NA, NA, NA, NA,
NA), `8` = c(NA, NA, NA, NA, NA, NA), `10` = c(0.066721378,
0.066721378, 0.066721378, 0.066721378, 0.066721378, 0.066721378
), `12` = c(NA, NA, NA, NA, NA, NA)), row.names = c(NA, -6L
), class = "data.frame"), Sn = structure(list(Determination_No = 1:6,
`2` = c(NA, NA, NA, NA, NA, NA), `3` = c(NA, NA, NA,
NA, NA, NA), `4` = c(NA, NA, NA, NA, NA, NA), `5` = c(NA,
NA, NA, NA, NA, NA), `7` = c(NA, NA, NA, NA, NA, NA),
`8` = c(0.004, 0.004, 0.002, 0.007, 0.003, 0.004), `10` = c(NA,
NA, NA, NA, NA, NA), `12` = c(NA, NA, NA, NA, NA, NA)), row.names = c(NA,
-6L), class = "data.frame"))
下面的代码
lab.Anova <- function(x,...){
x <- gather(x, Determination_No) #convert the data to long format for Anova
colnames(x) <- c("Lab_No", "Results") #Rename the columns
LabLm <- lm(Results ~ as.factor(x$Lab_No), data = x) #Create a linear Model for Anova
Lab.Aov <- anova(LabLm)
Lab.Critical <- cbind(Lab.Aov, 'F Critical Value' = qf(1 - 0.05, Lab.Aov[1, 1], Lab.Aov[2, 1]))
Lab.SDWithinLab <- round(sqrt(Lab.Critical[2,3]),digits = 4) #Assign the variance to a variable from ANOVA results
Lab.SDBetweenLab <- round(sqrt(Lab.Critical[1,3]),digits = 4)
list(Lab.SDWithinLab,Lab.SDBetweenLab)
}
我正在尝试对实验室数量进行计数,然后如果 >2 进行方差分析测试,否则 return NA 类似
lab.Anova <- function(x,...){
x <- gather(x, Determination_No) #convert the data to long format for Anova
colnames(x) <- c("Lab_No", "Results") #Rename the columns
totlabs <-count(unique(x$Lab_No | x$Lab_No == !is.na ))
if totlabs > 2
LabLm <- lm(Results ~ as.factor(x$Lab_No), data = x) #Create a linear Model for Anova
Lab.Aov <- anova(LabLm)
Lab.Critical <- cbind(Lab.Aov, 'F Critical Value' = qf(1 - 0.05, Lab.Aov[1, 1], Lab.Aov[2, 1]))
Lab.SDWithinLab <- round(sqrt(Lab.Critical[2,3]),digits = 4) #Assign the variance to a variable from ANOVA results
Lab.SDBetweenLab <- round(sqrt(Lab.Critical[1,3]),digits = 4)
else
Lab.SDWithinLab <- NA
Lab.SDBetweenLab <- NA
list(Lab.SDWithinLab,Lab.SDBetweenLab)
}
我在使用 totlabs 时遇到错误,因为它不喜欢 |和 !is.na
提前致谢
您可以尝试以下功能:
library(dplyr)
library(tidyr)
lab.Anova <- function(x,...){
x <- gather(x, Determination_No)
colnames(x) <- c("Lab_No", "Results")
totlabs <- n_distinct(x$Lab_No[!is.na(x$Results)], na.rm = TRUE)
if(totlabs > 2) {
LabLm <- lm(Results ~ factor(Lab_No), data = x)
Lab.Aov <- anova(LabLm)
Lab.Critical <- cbind(Lab.Aov, 'F Critical Value' = qf(1 - 0.05, Lab.Aov[1, 1], Lab.Aov[2, 1]))
Lab.SDWithinLab <- round(sqrt(Lab.Critical[2,3]),digits = 4)
Lab.SDBetweenLab <- round(sqrt(Lab.Critical[1,3]),digits = 4)
} else {
Lab.SDWithinLab <- NA
Lab.SDBetweenLab <- NA
}
list(Lab.SDWithinLab,Lab.SDBetweenLab)
}
lapply(df, lab.Anova)