在 dplyr 管道中使用动态变量的 for 循环中的强制错误引入的 NA
NAs introduced by coercion error in for loop with dynamic variables in dplyr pipeline
我已经编写了以下代码和示例数据。但是,变异变量 returns 所有 NAs.
我收到错误“NAs introduced by coercion”,因此在 eval(i_tlag_baseline)[ 附近包含了 as.numeric() =29=],但这似乎无法解决问题。
此外,class() 数据框中的剪切变量在 for 循环外手动输入变量时显示为数字和 dyplyr 管道。
如何解决这个问题?
代码
names.dfs <- c("df1", "df2", "df3")
for (i in names.dfs){
df_i <- get(i)
i_t_210 <- paste0(i,"_t_210")
i_tlag_baseline <- paste0(i,"_timediff")
df_i <- df_i %>%
mutate({{i_t_210}} := cut(as.numeric(eval(i_tlag_baseline)), breaks = c(-2,0,10,22,34,46,58,70), labels = c("baseline","Timepoint0.5", "Timepoint1", "Timepoint2", "Timepoint3","Timepoint4","Timepoint5")))
assign(paste0(i), df_i)
}
示例数据
df1 <- structure(list(ResultsID = c(4, 4, 3, 3, 1, 1), RepeatNo = c(0L,
0L, 0L, 0L, 0L, 0L), Submitted_df1 = structure(c(17484, 17484,
17488, 17497, 17502, 17509), class = "Date"), df1_timediff = c(0,
0, 0, 0.295687885010267, 0, 0.229979466119097)), row.names = c(NA,
-6L), groups = structure(list(ResultsID = c(1, 3, 4), .rows = structure(list(
5:6, 3:4, 1:2), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -3L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
df2 <- structure(list(ResultsID = c(1, 5, 3, 1, 2, 4), RepeatNo = c(0L,
0L, 0L, 0L, 0L, 0L), Submitted_df2 = structure(c(16856, 16858,
16861, 16869, 16875, 16888), class = "Date"), df2_timediff = c(0,
0, 0, 0.427104722792608, 0, 0)), row.names = c(NA, -6L), groups = structure(list(
ResultsID = c(1, 2, 3, 4, 5), .rows = structure(list(c(1L,
4L), 5L, 3L, 6L, 2L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -5L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
df3 <- structure(list(ResultsID = c(4, 1, 3, 4, 3, 2), RepeatNo = c(0L,
0L, 0L, 0L, 0L, 0L), Submitted_df3 = structure(c(17912, 17913,
17915, 17916, 17919, 17921), class = "Date"), df3_timediff = c(0,
0, 0, 0.131416837782341, 0.131416837782341, 0)), row.names = c(NA,
-6L), groups = structure(list(ResultsID = c(1, 2, 3, 4), .rows = structure(list(
2L, 6L, c(3L, 5L), c(1L, 4L)), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -4L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
这应该有效:
names.dfs <- c("df1", "df2", "df3")
for (i in names.dfs){
df_i <- get(i)
i_t_210 <- paste0(i,"_t_210")
i_tlag_baseline <- paste0(i,"_timediff")
df_i <- df_i %>%
mutate({{i_t_210}} := cut(as.numeric(UQ(rlang::sym(i_tlag_baseline))),
breaks = c(-2,0,10,22,34,46,58,70),
labels = c("baseline","Timepoint0.5", "Timepoint1",
"Timepoint2", "Timepoint3","Timepoint4",
"Timepoint5")))
assign(paste0(i), df_i)
}
我发现 this answer 在这些情况下非常有用。
我已经编写了以下代码和示例数据。但是,变异变量 returns 所有 NAs.
我收到错误“NAs introduced by coercion”,因此在 eval(i_tlag_baseline)[ 附近包含了 as.numeric() =29=],但这似乎无法解决问题。
此外,class() 数据框中的剪切变量在 for 循环外手动输入变量时显示为数字和 dyplyr 管道。
如何解决这个问题?
代码
names.dfs <- c("df1", "df2", "df3")
for (i in names.dfs){
df_i <- get(i)
i_t_210 <- paste0(i,"_t_210")
i_tlag_baseline <- paste0(i,"_timediff")
df_i <- df_i %>%
mutate({{i_t_210}} := cut(as.numeric(eval(i_tlag_baseline)), breaks = c(-2,0,10,22,34,46,58,70), labels = c("baseline","Timepoint0.5", "Timepoint1", "Timepoint2", "Timepoint3","Timepoint4","Timepoint5")))
assign(paste0(i), df_i)
}
示例数据
df1 <- structure(list(ResultsID = c(4, 4, 3, 3, 1, 1), RepeatNo = c(0L,
0L, 0L, 0L, 0L, 0L), Submitted_df1 = structure(c(17484, 17484,
17488, 17497, 17502, 17509), class = "Date"), df1_timediff = c(0,
0, 0, 0.295687885010267, 0, 0.229979466119097)), row.names = c(NA,
-6L), groups = structure(list(ResultsID = c(1, 3, 4), .rows = structure(list(
5:6, 3:4, 1:2), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -3L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
df2 <- structure(list(ResultsID = c(1, 5, 3, 1, 2, 4), RepeatNo = c(0L,
0L, 0L, 0L, 0L, 0L), Submitted_df2 = structure(c(16856, 16858,
16861, 16869, 16875, 16888), class = "Date"), df2_timediff = c(0,
0, 0, 0.427104722792608, 0, 0)), row.names = c(NA, -6L), groups = structure(list(
ResultsID = c(1, 2, 3, 4, 5), .rows = structure(list(c(1L,
4L), 5L, 3L, 6L, 2L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -5L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
df3 <- structure(list(ResultsID = c(4, 1, 3, 4, 3, 2), RepeatNo = c(0L,
0L, 0L, 0L, 0L, 0L), Submitted_df3 = structure(c(17912, 17913,
17915, 17916, 17919, 17921), class = "Date"), df3_timediff = c(0,
0, 0, 0.131416837782341, 0.131416837782341, 0)), row.names = c(NA,
-6L), groups = structure(list(ResultsID = c(1, 2, 3, 4), .rows = structure(list(
2L, 6L, c(3L, 5L), c(1L, 4L)), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -4L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
这应该有效:
names.dfs <- c("df1", "df2", "df3")
for (i in names.dfs){
df_i <- get(i)
i_t_210 <- paste0(i,"_t_210")
i_tlag_baseline <- paste0(i,"_timediff")
df_i <- df_i %>%
mutate({{i_t_210}} := cut(as.numeric(UQ(rlang::sym(i_tlag_baseline))),
breaks = c(-2,0,10,22,34,46,58,70),
labels = c("baseline","Timepoint0.5", "Timepoint1",
"Timepoint2", "Timepoint3","Timepoint4",
"Timepoint5")))
assign(paste0(i), df_i)
}
我发现 this answer 在这些情况下非常有用。