在 dplyr 管道中使用动态变量的 for 循环中的强制错误引入的 NA

NAs introduced by coercion error in for loop with dynamic variables in dplyr pipeline

我已经编写了以下代码和示例数据。但是,变异变量 returns 所有 NAs.

我收到错误“NAs introduced by coercion”,因此在 eval(i_tlag_baseline)[ 附近包含了 as.numeric() =29=],但这似乎无法解决问题。

此外,class() 数据框中的剪切变量在 for 循环外手动输入变量时显示为数字和 dyplyr 管道。

如何解决这个问题?

代码

names.dfs <- c("df1", "df2", "df3")

for (i in names.dfs){

  df_i <- get(i)  
  
  i_t_210 <- paste0(i,"_t_210")
  
  i_tlag_baseline <- paste0(i,"_timediff")
  
  df_i <- df_i %>%
          mutate({{i_t_210}} := cut(as.numeric(eval(i_tlag_baseline)), breaks = c(-2,0,10,22,34,46,58,70), labels = c("baseline","Timepoint0.5", "Timepoint1", "Timepoint2", "Timepoint3","Timepoint4","Timepoint5")))
  
  assign(paste0(i), df_i)
  
}

示例数据

df1 <- structure(list(ResultsID = c(4, 4, 3, 3, 1, 1), RepeatNo = c(0L, 
0L, 0L, 0L, 0L, 0L), Submitted_df1 = structure(c(17484, 17484, 
17488, 17497, 17502, 17509), class = "Date"), df1_timediff = c(0, 
0, 0, 0.295687885010267, 0, 0.229979466119097)), row.names = c(NA, 
-6L), groups = structure(list(ResultsID = c(1, 3, 4), .rows = structure(list(
    5:6, 3:4, 1:2), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), row.names = c(NA, -3L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))

df2 <- structure(list(ResultsID = c(1, 5, 3, 1, 2, 4), RepeatNo = c(0L, 
0L, 0L, 0L, 0L, 0L), Submitted_df2 = structure(c(16856, 16858, 
16861, 16869, 16875, 16888), class = "Date"), df2_timediff = c(0, 
0, 0, 0.427104722792608, 0, 0)), row.names = c(NA, -6L), groups = structure(list(
    ResultsID = c(1, 2, 3, 4, 5), .rows = structure(list(c(1L, 
    4L), 5L, 3L, 6L, 2L), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), row.names = c(NA, -5L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))
  
df3 <-  structure(list(ResultsID = c(4, 1, 3, 4, 3, 2), RepeatNo = c(0L, 
0L, 0L, 0L, 0L, 0L), Submitted_df3 = structure(c(17912, 17913, 
17915, 17916, 17919, 17921), class = "Date"), df3_timediff = c(0, 
0, 0, 0.131416837782341, 0.131416837782341, 0)), row.names = c(NA, 
-6L), groups = structure(list(ResultsID = c(1, 2, 3, 4), .rows = structure(list(
    2L, 6L, c(3L, 5L), c(1L, 4L)), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), row.names = c(NA, -4L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame")) 

这应该有效:

names.dfs <- c("df1", "df2", "df3")

for (i in names.dfs){
  
  df_i <- get(i)  
  
  i_t_210 <- paste0(i,"_t_210")
  
  i_tlag_baseline <- paste0(i,"_timediff")
  
  df_i <- df_i %>%
    mutate({{i_t_210}} := cut(as.numeric(UQ(rlang::sym(i_tlag_baseline))), 
                              breaks = c(-2,0,10,22,34,46,58,70), 
                              labels = c("baseline","Timepoint0.5", "Timepoint1", 
                                         "Timepoint2", "Timepoint3","Timepoint4",
                                         "Timepoint5")))
  assign(paste0(i), df_i)
  
}  

我发现 this answer 在这些情况下非常有用。