忽略要应用于数据帧列表 R 的 Ifelse 语句 R 中的 NA
Ignoring NAs in an Ifelse statement R to be applied over a list of dataframes R
我有一个函数可以计算数据帧列表中多列值的 z 分数。下面是我的数据框片段
df <- list(Al2O3 = structure(list(Determination_No = c(1, 2, 3, 4,
5, 6, 7, 8, 9, 10), `2` = c(2.04, 2.07, 2.05, 2.07, 2.1, 2.08,
NA, NA, NA, NA), `3` = c(2.08, 2.1, 2.08, 2.13, 2.1, 2.08, NA,
NA, NA, NA), `4` = c(2.08, 2.08, 2.09, 2.06, 2.08, 2.07, 2.07,
2.06, 2.08, 2.08), `5` = c(2.11, 2.09, 2.1, 2.08, 2.09, 2.09,
NA, NA, NA, NA), `7` = c(2.06, 2.05, 2.04, 2.05, 2.04, 2.03,
NA, NA, NA, NA), `8` = c(2.078, 2.065, 2.057, 2.063, 2.067, 2.066,
NA, NA, NA, NA), `10` = c(2.191776681, 2.153987428, 2.153987428,
2.097303548, 2.116198175, 2.116198175, NA, NA, NA, NA), `12` = c(2.24,
2.08, 2.12, 2.15, 2.15, 2.15, NA, NA, NA, NA), `36` = c(2.07,
2.082, 2.048, 2.046, 2.086, 2.069, NA, NA, NA, NA)), class = "data.frame", row.names = c(NA,
-10L)), As = structure(list(Determination_No = c(1, 2, 3, 4,
5, 6, 7, 8, 9, 10), `2` = c(0.002, 0.001, 0.001, 0.001, 0.002,
0.001, NA, NA, NA, NA), `3` = c(0.003, 0.002, 0.002, 0.002, 0.001,
0.002, NA, NA, NA, NA), `4` = c(0.001, 0.002, 0.001, 0.002, 0.002,
0.002, 0.001, 0.002, 0.002, 0.003), `5` = c(0.002, 0.001, 0.001,
0.001, 0.001, 0.002, NA, NA, NA, NA), `7` = c(NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), `8` = c(NA, 0.001, NA, NA, NA, NA, NA, NA, NA, NA),
`10` = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), `12` = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), `36` = c(0.0053, 0.0053, 0.0053,
0.00454, 0.0053, 0.0053, NA, NA, NA, NA)), class = "data.frame", row.names = c(NA,
-10L)), Ba = structure(list(Determination_No = c(1, 2, 3, 4,
5, 6, 7, 8, 9, 10), `2` = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_),
`3` = c(NA, NA, NA, NA, 0.001, NA, NA, NA, NA, NA), `4` = c(0.004,
0.003, 0.003, 0.004, 0.003, 0.002, 0.004, 0.002, 0.005, NA
), `5` = c(NA, NA, NA, NA, NA, 0.003, NA, NA, NA, NA), `7` = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), `8` = c(0.002, 0.003, NA,
NA, NA, 0.002, NA, NA, NA, NA), `10` = c(NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), `12` = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), `36` = c(0.00089566, 0.00089566, 0.00089566, 0.00089566,
0.00089566, 0.00089566, NA, NA, NA, NA)), class = "data.frame", row.names = c(NA,
-10L))
我的意图是让函数根据 Z 得分值计算更多统计数据。我的挑战是我的数据框中有很多 NA。当我去应用我的 if 语句时,由于存在 NA 值,它不起作用。我的功能如下
ZMax <- 3.5
FinalStats <- function(x,...){
unlistdata <- unlist(x[-1])
GrandMean <- mean(unlistdata,na.rm = T)
GrandSD <- sd(unlistdata,na.rm=T)
ZScore <- abs(((x[-1])-GrandMean)/GrandSD)
if(ZScore > ZMax){
LabMean <- mapply(mean, x[-1], na.rm = T) #Calculate Mean by columns
SD.All <- unlist(x[-1])
ConsensusValue <- mean(LabMean)
Uncertainty <- sd(SD.All, na.rm = T)
}else{
LabMedian <- mapply(median, x[-1], na.rm = T) #Calculate Median by columns
LabMedian[is.infinite(LabMedian)] <- NA #convert any Inf values to NA
SD.All <- unlist(x[-1])
ConsensusValue <- LabMedian
Uncertainty <- sd(SD.All, na.rm = T)
}
FinalValues <- cbind(ConsensusValue,Uncertainty) #combined the desired Info
return(FinalValues)
}
df.stats <- lapply(df,FinalStats)
如何让 if 语句忽略 NA 值?
我试过按以下方式在 base R 中使用 ifelse
FinalStats <- function(x,...){
unlistdata <- unlist(x[-1])
GrandMean <- mean(unlistdata,na.rm = T)
GrandSD <- sd(unlistdata,na.rm=T)
ZScore <- abs(((x[-1])-GrandMean)/GrandSD)
ConsensusValue <- ifelse((is.na(ZScore > ZMax)),
mean(mapply(mean, x[-1], na.rm = T)),
median(mapply(median,x[-1],na.rm=T)))
return(ConsensusValue)
}
不幸的是,在我的示例中,我尝试使用 ifelse 语句在第一个数据帧上部分起作用,而在其他两个数据帧上仅 returns NA。
我要查找的结果是单个值,它可以是每列平均值的平均值,也可以是每列中值的中值。根据 Z 分数,我得到的是具有平均值(看起来正确)或一系列 NAs
的数据帧列表
我尝试在 ifelse 语句之外计算平均值和中值,然后使用 ifelse 语句来选择我想要的值,但我得到的是值的数据框而不是单个值。但是,如果我 return 在 ifelse 之外计算的平均值或中位数,那么我会得到正确的结果。
FinalStats <- function(x,...){
unlistdata <- unlist(x[-1])
GrandMean <- mean(unlistdata,na.rm = T)
GrandSD <- sd(unlistdata,na.rm=T)
ZScore <- abs(((x[-1])-GrandMean)/GrandSD)
LabMean <- mean(mapply(mean, x[-1], na.rm = T),na.rm=T) #Calculate Mean by columns
LabMedian <- median(mapply(median, x[-1], na.rm = T),na.rm = T) #Calculate Median by columns
LabMedian[is.infinite(LabMedian)] <- NA #convert any Inf values to NA
ConsensusValue <- ifelse(!is.na(ZScore > ZMax),
LabMean,
LabMedian)
return(ConsensusValue)
}
CatergoreisStats <- lapply(df,FinalStats)
我意识到我的 if 语句正在检查每个数据帧中的每个值,然后分配平均值或中值。我想要做的是检查每个数据帧是否有任何值超过我的 Zmax,然后分配平均值或中值。
我认为您应该使用 if(any(...))
作为条件,因为您要检查 ZScore
中的任何值是否大于 ZMax
。 NA
值可以用 any
中的 na.rm = TRUE
忽略。
ZMax <- 3.5
FinalStats <- function(x,...){
unlistdata <- unlist(x[-1])
GrandMean <- mean(unlistdata,na.rm = T)
GrandSD <- sd(unlistdata,na.rm=T)
ZScore <- abs(((x[-1])-GrandMean)/GrandSD)
if(any(ZScore > ZMax, na.rm = TRUE)){
LabMean <- mapply(mean, x[-1], na.rm = T) #Calculate Mean by columns
SD.All <- unlist(x[-1])
ConsensusValue <- mean(LabMean)
Uncertainty <- sd(SD.All, na.rm = T)
}else{
LabMedian <- mapply(median, x[-1], na.rm = T) #Calculate Median by columns
LabMedian[is.infinite(LabMedian)] <- NA #convert any Inf values to NA
SD.All <- unlist(x[-1])
ConsensusValue <- LabMedian
Uncertainty <- sd(SD.All, na.rm = T)
}
FinalValues <- cbind(ConsensusValue,Uncertainty) #combined the desired Info
return(FinalValues)
}
这个returns-
CatergoreisStats <- lapply(df,FinalStats)
CatergoreisStats
#$Al2O3
# ConsensusValue Uncertainty
#[1,] 2.088453 0.03880474
#$As
# ConsensusValue Uncertainty
#2 0.0010 0.001475832
#3 0.0020 0.001475832
#4 0.0020 0.001475832
#5 0.0010 0.001475832
#7 NA 0.001475832
#8 0.0010 0.001475832
#10 NA 0.001475832
#12 NA 0.001475832
#36 0.0053 0.001475832
#$Ba
# ConsensusValue Uncertainty
#2 NA 0.001303559
#3 0.00100000 0.001303559
#4 0.00300000 0.001303559
#5 0.00300000 0.001303559
#7 NA 0.001303559
#8 0.00200000 0.001303559
#10 NA 0.001303559
#12 NA 0.001303559
#36 0.00089566 0.001303559
我有一个函数可以计算数据帧列表中多列值的 z 分数。下面是我的数据框片段
df <- list(Al2O3 = structure(list(Determination_No = c(1, 2, 3, 4,
5, 6, 7, 8, 9, 10), `2` = c(2.04, 2.07, 2.05, 2.07, 2.1, 2.08,
NA, NA, NA, NA), `3` = c(2.08, 2.1, 2.08, 2.13, 2.1, 2.08, NA,
NA, NA, NA), `4` = c(2.08, 2.08, 2.09, 2.06, 2.08, 2.07, 2.07,
2.06, 2.08, 2.08), `5` = c(2.11, 2.09, 2.1, 2.08, 2.09, 2.09,
NA, NA, NA, NA), `7` = c(2.06, 2.05, 2.04, 2.05, 2.04, 2.03,
NA, NA, NA, NA), `8` = c(2.078, 2.065, 2.057, 2.063, 2.067, 2.066,
NA, NA, NA, NA), `10` = c(2.191776681, 2.153987428, 2.153987428,
2.097303548, 2.116198175, 2.116198175, NA, NA, NA, NA), `12` = c(2.24,
2.08, 2.12, 2.15, 2.15, 2.15, NA, NA, NA, NA), `36` = c(2.07,
2.082, 2.048, 2.046, 2.086, 2.069, NA, NA, NA, NA)), class = "data.frame", row.names = c(NA,
-10L)), As = structure(list(Determination_No = c(1, 2, 3, 4,
5, 6, 7, 8, 9, 10), `2` = c(0.002, 0.001, 0.001, 0.001, 0.002,
0.001, NA, NA, NA, NA), `3` = c(0.003, 0.002, 0.002, 0.002, 0.001,
0.002, NA, NA, NA, NA), `4` = c(0.001, 0.002, 0.001, 0.002, 0.002,
0.002, 0.001, 0.002, 0.002, 0.003), `5` = c(0.002, 0.001, 0.001,
0.001, 0.001, 0.002, NA, NA, NA, NA), `7` = c(NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), `8` = c(NA, 0.001, NA, NA, NA, NA, NA, NA, NA, NA),
`10` = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), `12` = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), `36` = c(0.0053, 0.0053, 0.0053,
0.00454, 0.0053, 0.0053, NA, NA, NA, NA)), class = "data.frame", row.names = c(NA,
-10L)), Ba = structure(list(Determination_No = c(1, 2, 3, 4,
5, 6, 7, 8, 9, 10), `2` = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_),
`3` = c(NA, NA, NA, NA, 0.001, NA, NA, NA, NA, NA), `4` = c(0.004,
0.003, 0.003, 0.004, 0.003, 0.002, 0.004, 0.002, 0.005, NA
), `5` = c(NA, NA, NA, NA, NA, 0.003, NA, NA, NA, NA), `7` = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), `8` = c(0.002, 0.003, NA,
NA, NA, 0.002, NA, NA, NA, NA), `10` = c(NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), `12` = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), `36` = c(0.00089566, 0.00089566, 0.00089566, 0.00089566,
0.00089566, 0.00089566, NA, NA, NA, NA)), class = "data.frame", row.names = c(NA,
-10L))
我的意图是让函数根据 Z 得分值计算更多统计数据。我的挑战是我的数据框中有很多 NA。当我去应用我的 if 语句时,由于存在 NA 值,它不起作用。我的功能如下
ZMax <- 3.5
FinalStats <- function(x,...){
unlistdata <- unlist(x[-1])
GrandMean <- mean(unlistdata,na.rm = T)
GrandSD <- sd(unlistdata,na.rm=T)
ZScore <- abs(((x[-1])-GrandMean)/GrandSD)
if(ZScore > ZMax){
LabMean <- mapply(mean, x[-1], na.rm = T) #Calculate Mean by columns
SD.All <- unlist(x[-1])
ConsensusValue <- mean(LabMean)
Uncertainty <- sd(SD.All, na.rm = T)
}else{
LabMedian <- mapply(median, x[-1], na.rm = T) #Calculate Median by columns
LabMedian[is.infinite(LabMedian)] <- NA #convert any Inf values to NA
SD.All <- unlist(x[-1])
ConsensusValue <- LabMedian
Uncertainty <- sd(SD.All, na.rm = T)
}
FinalValues <- cbind(ConsensusValue,Uncertainty) #combined the desired Info
return(FinalValues)
}
df.stats <- lapply(df,FinalStats)
如何让 if 语句忽略 NA 值?
我试过按以下方式在 base R 中使用 ifelse
FinalStats <- function(x,...){
unlistdata <- unlist(x[-1])
GrandMean <- mean(unlistdata,na.rm = T)
GrandSD <- sd(unlistdata,na.rm=T)
ZScore <- abs(((x[-1])-GrandMean)/GrandSD)
ConsensusValue <- ifelse((is.na(ZScore > ZMax)),
mean(mapply(mean, x[-1], na.rm = T)),
median(mapply(median,x[-1],na.rm=T)))
return(ConsensusValue)
}
不幸的是,在我的示例中,我尝试使用 ifelse 语句在第一个数据帧上部分起作用,而在其他两个数据帧上仅 returns NA。
我要查找的结果是单个值,它可以是每列平均值的平均值,也可以是每列中值的中值。根据 Z 分数,我得到的是具有平均值(看起来正确)或一系列 NAs
的数据帧列表我尝试在 ifelse 语句之外计算平均值和中值,然后使用 ifelse 语句来选择我想要的值,但我得到的是值的数据框而不是单个值。但是,如果我 return 在 ifelse 之外计算的平均值或中位数,那么我会得到正确的结果。
FinalStats <- function(x,...){
unlistdata <- unlist(x[-1])
GrandMean <- mean(unlistdata,na.rm = T)
GrandSD <- sd(unlistdata,na.rm=T)
ZScore <- abs(((x[-1])-GrandMean)/GrandSD)
LabMean <- mean(mapply(mean, x[-1], na.rm = T),na.rm=T) #Calculate Mean by columns
LabMedian <- median(mapply(median, x[-1], na.rm = T),na.rm = T) #Calculate Median by columns
LabMedian[is.infinite(LabMedian)] <- NA #convert any Inf values to NA
ConsensusValue <- ifelse(!is.na(ZScore > ZMax),
LabMean,
LabMedian)
return(ConsensusValue)
}
CatergoreisStats <- lapply(df,FinalStats)
我意识到我的 if 语句正在检查每个数据帧中的每个值,然后分配平均值或中值。我想要做的是检查每个数据帧是否有任何值超过我的 Zmax,然后分配平均值或中值。
我认为您应该使用 if(any(...))
作为条件,因为您要检查 ZScore
中的任何值是否大于 ZMax
。 NA
值可以用 any
中的 na.rm = TRUE
忽略。
ZMax <- 3.5
FinalStats <- function(x,...){
unlistdata <- unlist(x[-1])
GrandMean <- mean(unlistdata,na.rm = T)
GrandSD <- sd(unlistdata,na.rm=T)
ZScore <- abs(((x[-1])-GrandMean)/GrandSD)
if(any(ZScore > ZMax, na.rm = TRUE)){
LabMean <- mapply(mean, x[-1], na.rm = T) #Calculate Mean by columns
SD.All <- unlist(x[-1])
ConsensusValue <- mean(LabMean)
Uncertainty <- sd(SD.All, na.rm = T)
}else{
LabMedian <- mapply(median, x[-1], na.rm = T) #Calculate Median by columns
LabMedian[is.infinite(LabMedian)] <- NA #convert any Inf values to NA
SD.All <- unlist(x[-1])
ConsensusValue <- LabMedian
Uncertainty <- sd(SD.All, na.rm = T)
}
FinalValues <- cbind(ConsensusValue,Uncertainty) #combined the desired Info
return(FinalValues)
}
这个returns-
CatergoreisStats <- lapply(df,FinalStats)
CatergoreisStats
#$Al2O3
# ConsensusValue Uncertainty
#[1,] 2.088453 0.03880474
#$As
# ConsensusValue Uncertainty
#2 0.0010 0.001475832
#3 0.0020 0.001475832
#4 0.0020 0.001475832
#5 0.0010 0.001475832
#7 NA 0.001475832
#8 0.0010 0.001475832
#10 NA 0.001475832
#12 NA 0.001475832
#36 0.0053 0.001475832
#$Ba
# ConsensusValue Uncertainty
#2 NA 0.001303559
#3 0.00100000 0.001303559
#4 0.00300000 0.001303559
#5 0.00300000 0.001303559
#7 NA 0.001303559
#8 0.00200000 0.001303559
#10 NA 0.001303559
#12 NA 0.001303559
#36 0.00089566 0.001303559