比较 2 列,如果一列等于 NA,则使用另一列更新它
compare 2 columns and if one equal NA, use the other to update it
我的数据框如下所示:
# A tibble: 15 x 3
hm_variant_id x y
<chr> <dbl> <dbl>
1 10_64249655_C_T 0.0749 NA
2 10_92361338_T_C -0.0346 NA
3 10_5224192_T_C -0.0683 NA
4 10_25993366_G_C 0.0231 NA
5 10_20364_A_G NA 1.9
6 10_20665_TAAAC_T 0.0202 NA
7 10_22591_G_T NA 0.8
8 10_23187_G_A -0.250 NA
我想遍历我的数据并比较 x 和 y 并以一种方式相互更新:
if x = NA, then x = log(y)
if y = NA, then y = exp(x)
我的尝试是:
test2 <- test %>% mutate(new_or = exp(hm_beta))
但这会创建一个新列并且不会真正比较 x 和 y。
test <- as.data.frame(test)
calculate <- function(data) {
x <- as.numeric(data["x"])
y <- as.numeric(data["y"])
if (is.na(x) == TRUE){
x <- log(y)
}
if (is.na(y) == TRUE){
y <- exp(x)
}
}
But I get this error:
Error in calculate_beta_ors(test) :
'list' object cannot be coerced to type 'double'
在这里进行的最干净的方法是什么?理想情况下使用 tidyverse,但任何一种方法都可以。
使用简单 ifelse
:
transform(df, x = ifelse(is.na(x), log(y), x),
y = ifelse(is.na(y), exp(x), y))
这也可以用dplyr
函数类似地写成:
library(dplyr)
df %>%
mutate(x = if_else(is.na(x), log(y), x),
y = if_else(is.na(y), exp(x), y))
# hm_variant_id x y
#1 10_64249655_C_T 0.075 1.08
#2 10_92361338_T_C -0.035 0.97
#3 10_5224192_T_C -0.068 0.93
#4 10_25993366_G_C 0.023 1.02
#5 10_20364_A_G 0.642 1.90
#6 10_20665_TAAAC_T 0.020 1.02
#7 10_22591_G_T -0.223 0.80
#8 10_23187_G_A -0.250 0.78
数据
df <- structure(list(hm_variant_id = c("10_64249655_C_T", "10_92361338_T_C",
"10_5224192_T_C", "10_25993366_G_C", "10_20364_A_G", "10_20665_TAAAC_T",
"10_22591_G_T", "10_23187_G_A"), x = c(0.0749, -0.0346, -0.0683,
0.0231, NA, 0.0202, NA, -0.25), y = c(NA, NA, NA, NA, 1.9, NA,
0.8, NA)), class = "data.frame", row.names = c(NA, -8L))
使用 ifelse:
df$new_or <- ifelse(is.na(df$x) & !is.na(df$y), log(df$y),
ifelse(is.na(df$y) & !is.na(df$x), exp(df$x), NA))
df
# hm_variant_id x y new_or
# 1 10_64249655_C_T 0.0749 NA 1.0777764
# 2 10_92361338_T_C -0.0346 NA 0.9659917
# 3 10_5224192_T_C -0.0683 NA 0.9339802
# 4 10_25993366_G_C 0.0231 NA 1.0233689
# 5 10_20364_A_G NA 1.9 0.6418539
# 6 10_20665_TAAAC_T 0.0202 NA 1.0204054
# 7 10_22591_G_T NA 0.8 -0.2231436
# 8 10_23187_G_A -0.2500 NA 0.7788008
我们可以使用case_when
library(dplyr)
df %>%
mutate(x = case_when(is.na(x) ~ log(y), TRUE ~ x),
y = case_when(is.na(y) ~ exp(x), TRUE ~ y))
我的数据框如下所示:
# A tibble: 15 x 3
hm_variant_id x y
<chr> <dbl> <dbl>
1 10_64249655_C_T 0.0749 NA
2 10_92361338_T_C -0.0346 NA
3 10_5224192_T_C -0.0683 NA
4 10_25993366_G_C 0.0231 NA
5 10_20364_A_G NA 1.9
6 10_20665_TAAAC_T 0.0202 NA
7 10_22591_G_T NA 0.8
8 10_23187_G_A -0.250 NA
我想遍历我的数据并比较 x 和 y 并以一种方式相互更新:
if x = NA, then x = log(y)
if y = NA, then y = exp(x)
我的尝试是:
test2 <- test %>% mutate(new_or = exp(hm_beta))
但这会创建一个新列并且不会真正比较 x 和 y。
test <- as.data.frame(test)
calculate <- function(data) {
x <- as.numeric(data["x"])
y <- as.numeric(data["y"])
if (is.na(x) == TRUE){
x <- log(y)
}
if (is.na(y) == TRUE){
y <- exp(x)
}
}
But I get this error:
Error in calculate_beta_ors(test) :
'list' object cannot be coerced to type 'double'
在这里进行的最干净的方法是什么?理想情况下使用 tidyverse,但任何一种方法都可以。
使用简单 ifelse
:
transform(df, x = ifelse(is.na(x), log(y), x),
y = ifelse(is.na(y), exp(x), y))
这也可以用dplyr
函数类似地写成:
library(dplyr)
df %>%
mutate(x = if_else(is.na(x), log(y), x),
y = if_else(is.na(y), exp(x), y))
# hm_variant_id x y
#1 10_64249655_C_T 0.075 1.08
#2 10_92361338_T_C -0.035 0.97
#3 10_5224192_T_C -0.068 0.93
#4 10_25993366_G_C 0.023 1.02
#5 10_20364_A_G 0.642 1.90
#6 10_20665_TAAAC_T 0.020 1.02
#7 10_22591_G_T -0.223 0.80
#8 10_23187_G_A -0.250 0.78
数据
df <- structure(list(hm_variant_id = c("10_64249655_C_T", "10_92361338_T_C",
"10_5224192_T_C", "10_25993366_G_C", "10_20364_A_G", "10_20665_TAAAC_T",
"10_22591_G_T", "10_23187_G_A"), x = c(0.0749, -0.0346, -0.0683,
0.0231, NA, 0.0202, NA, -0.25), y = c(NA, NA, NA, NA, 1.9, NA,
0.8, NA)), class = "data.frame", row.names = c(NA, -8L))
使用 ifelse:
df$new_or <- ifelse(is.na(df$x) & !is.na(df$y), log(df$y),
ifelse(is.na(df$y) & !is.na(df$x), exp(df$x), NA))
df
# hm_variant_id x y new_or
# 1 10_64249655_C_T 0.0749 NA 1.0777764
# 2 10_92361338_T_C -0.0346 NA 0.9659917
# 3 10_5224192_T_C -0.0683 NA 0.9339802
# 4 10_25993366_G_C 0.0231 NA 1.0233689
# 5 10_20364_A_G NA 1.9 0.6418539
# 6 10_20665_TAAAC_T 0.0202 NA 1.0204054
# 7 10_22591_G_T NA 0.8 -0.2231436
# 8 10_23187_G_A -0.2500 NA 0.7788008
我们可以使用case_when
library(dplyr)
df %>%
mutate(x = case_when(is.na(x) ~ log(y), TRUE ~ x),
y = case_when(is.na(y) ~ exp(x), TRUE ~ y))