对数据帧列表进行方差分析并忽略组数太少的数据帧

ANOVA over a list of dataframes and ignore dataframe where number of groups is too small

我有一个数据框列表,其中包含化学信息,这些信息比较了实验室之间大约 30 种分析物的结果。每个分析物都存储为数据帧列表中的单个数据帧

我编写了一个函数,可以在每个数据帧上执行方差分析,除非实验室数量少于 3 个,否则方差分析应该至少有 3 个组进行比较。我正在尝试设置一个条件(if 语句)来识别不满足 3 实验室要求的分析物,然后 return NA,如果确实满足要求,它会继续进行操作。

下面是我的数据框列表示例

list(Fe = structure(list(Determination_No = 1:6, `2` = c(NA, 
NA, NA, NA, NA, NA), `3` = c(56.83, 56.54, 56.18, 56.5, 56.51, 
56.34), `4` = c(56.39, 56.43, 56.53, 56.31, 56.47, 56.35), `5` = c(56.32, 
56.29, 56.31, 56.32, 56.39, 56.32), `7` = c(56.48, 56.4, 56.54, 
56.43, 56.73, 56.62), `8` = c(56.382, 56.258, 56.442, 56.258, 
56.532, 56.264), `10` = c(56.3, 56.5, 56.2, 56.5, 56.7, 56.5), 
    `12` = c(56.11, 56.46, 56.1, 56.35, 56.36, 56.37)), row.names = c(NA, 
-6L), class = "data.frame"), SiO2 = structure(list(Determination_No = 1:6, 
    `2` = c(7.63, 7.65, 7.73, 7.67, 7.67, 7.67), `3` = c(7.84, 
    7.69, 7.59, 7.77, 7.74, 7.64), `4` = c(7.67, 7.74, 7.62, 
    7.81, 7.66, 7.8), `5` = c(7.91, 7.84, 7.96, 7.87, 7.84, 7.92
    ), `7` = c(7.77, 7.83, 7.76, 7.78, 7.65, 7.74), `8` = c(7.936, 
    7.685, 7.863, 7.838, 7.828, 7.767), `10` = c(7.872684992, 
    7.851291827, 7.872684992, 7.722932832, 7.680146501, 7.615967003
    ), `12` = c(7.64, 7.71, 7.71, 7.65, 7.82, 7.68)), row.names = c(NA, 
-6L), class = "data.frame"), Al2O3 = structure(list(Determination_No = 1:6, 
    `2` = c(2.01, 2.02, 2.03, 2.01, 2.02, 2), `3` = c(2.01, 2.01, 
    2, 2.02, 2.02, 2.03), `4` = c(2, 2.03, 1.99, 2.01, 2.01, 
    2.01), `5` = c(2.02, 2.02, 2.05, 2.03, 2.02, 2.03), `7` = c(NA, 
    NA, NA, NA, NA, NA), `8` = c(2.053, 2.044, 2.041, 2.038, 
    2.008, 2.02), `10` = c(2.002830415, 2.021725042, 2.021725042, 
    1.983935789, 2.002830415, 2.021725042), `12` = c(NA, NA, 
    NA, NA, NA, NA)), row.names = c(NA, -6L), class = "data.frame"), 
    TiO2 = structure(list(Determination_No = 1:6, `2` = c(0.07, 
    0.07, 0.07, 0.07, 0.07, 0.07), `3` = c(NA, NA, NA, NA, NA, 
    NA), `4` = c(0.07, 0.07, 0.07, 0.07, 0.07, 0.07), `5` = c(0.07, 
    0.07, 0.07, 0.07, 0.07, 0.07), `7` = c(NA, NA, NA, NA, NA, 
    NA), `8` = c(NA, NA, NA, NA, NA, NA), `10` = c(0.066721378, 
    0.066721378, 0.066721378, 0.066721378, 0.066721378, 0.066721378
    ), `12` = c(NA, NA, NA, NA, NA, NA)), row.names = c(NA, -6L
    ), class = "data.frame"), Sn = structure(list(Determination_No = 1:6, 
        `2` = c(NA, NA, NA, NA, NA, NA), `3` = c(NA, NA, NA, 
        NA, NA, NA), `4` = c(NA, NA, NA, NA, NA, NA), `5` = c(NA, 
        NA, NA, NA, NA, NA), `7` = c(NA, NA, NA, NA, NA, NA), 
        `8` = c(0.004, 0.004, 0.002, 0.007, 0.003, 0.004), `10` = c(NA, 
        NA, NA, NA, NA, NA), `12` = c(NA, NA, NA, NA, NA, NA)), row.names = c(NA, 
    -6L), class = "data.frame"))


下面的代码

lab.Anova <- function(x,...){
  x <- gather(x, Determination_No) #convert the data to long format for Anova
  colnames(x) <- c("Lab_No", "Results") #Rename the columns
  LabLm <- lm(Results ~ as.factor(x$Lab_No), data = x) #Create a linear Model for Anova
  Lab.Aov <- anova(LabLm)
  Lab.Critical <- cbind(Lab.Aov, 'F Critical Value' = qf(1 - 0.05, Lab.Aov[1, 1], Lab.Aov[2, 1]))
  Lab.SDWithinLab <- round(sqrt(Lab.Critical[2,3]),digits = 4) #Assign the variance to a variable from ANOVA results
  Lab.SDBetweenLab <- round(sqrt(Lab.Critical[1,3]),digits = 4)
  list(Lab.SDWithinLab,Lab.SDBetweenLab)
}

我正在尝试对实验室数量进行计数,然后如果 >2 进行方差分析测试,否则 return NA 类似


lab.Anova <- function(x,...){
  x <- gather(x, Determination_No) #convert the data to long format for Anova
  colnames(x) <- c("Lab_No", "Results") #Rename the columns
  totlabs <-count(unique(x$Lab_No | x$Lab_No == !is.na ))
    if totlabs > 2
      LabLm <- lm(Results ~ as.factor(x$Lab_No), data = x) #Create a linear Model for Anova
      Lab.Aov <- anova(LabLm)
      Lab.Critical <- cbind(Lab.Aov, 'F Critical Value' = qf(1 - 0.05, Lab.Aov[1, 1], Lab.Aov[2, 1]))
      Lab.SDWithinLab <- round(sqrt(Lab.Critical[2,3]),digits = 4) #Assign the variance to a variable from ANOVA results
      Lab.SDBetweenLab <- round(sqrt(Lab.Critical[1,3]),digits = 4)
  
    else 
      Lab.SDWithinLab <- NA
      Lab.SDBetweenLab <- NA
  list(Lab.SDWithinLab,Lab.SDBetweenLab)
}

我在使用 totlabs 时遇到错误,因为它不喜欢 |和 !is.na

提前致谢

您可以尝试以下功能:

library(dplyr)
library(tidyr)

lab.Anova <- function(x,...){
  x <- gather(x, Determination_No) 
  colnames(x) <- c("Lab_No", "Results") 
  totlabs <- n_distinct(x$Lab_No[!is.na(x$Results)], na.rm = TRUE)
  if(totlabs > 2) {
    LabLm <- lm(Results ~ factor(Lab_No), data = x) 
    Lab.Aov <- anova(LabLm)
    Lab.Critical <- cbind(Lab.Aov, 'F Critical Value' = qf(1 - 0.05, Lab.Aov[1, 1], Lab.Aov[2, 1]))
    Lab.SDWithinLab <- round(sqrt(Lab.Critical[2,3]),digits = 4) 
    Lab.SDBetweenLab <- round(sqrt(Lab.Critical[1,3]),digits = 4)
  } else {
    Lab.SDWithinLab <- NA
    Lab.SDBetweenLab <- NA  
  }
  list(Lab.SDWithinLab,Lab.SDBetweenLab)
}

lapply(df, lab.Anova)