从 R 中的多个 data.table 输入创建向量

creating a vector from multiple data.table inputs in R

我正在尝试创建一个向量,其中包含来自名为 runVars:

data.table 的串联信息
runVars
    calculationType calculateHCC cutOffTime historyCutOff numberOfHistroicalYears
 1:        COMPLETE         TRUE          9         FALSE                       1
 2:        COMPLETE         TRUE          9         FALSE                       2
 3:        COMPLETE         TRUE          9         FALSE                       1
 4:        COMPLETE         TRUE          9         FALSE                       2
 5:        COMPLETE         TRUE         10         FALSE                       1
 6:        COMPLETE         TRUE         10         FALSE                       2
 7:        COMPLETE         TRUE         10         FALSE                       1
 8:        COMPLETE         TRUE         10         FALSE                       2
 9:        COMPLETE        FALSE          9         FALSE                       1
10:        COMPLETE        FALSE          9         FALSE                       2
11:        COMPLETE        FALSE          9         FALSE                       1
12:        COMPLETE        FALSE          9         FALSE                       2
13:        COMPLETE        FALSE         10         FALSE                       1
14:        COMPLETE        FALSE         10         FALSE                       2
15:        COMPLETE        FALSE         10         FALSE                       1
16:        COMPLETE        FALSE         10         FALSE                       2

我要创建的矢量应该如下所示:

 [1] "histyears1_hcc1_histcut0_9m_COMPLETE"  "histyears2_hcc1_histcut0_9m_COMPLETE"  "histyears1_hcc1_histcut0_9m_COMPLETE"  "histyears2_hcc1_histcut0_9m_COMPLETE" 
 [5] "histyears1_hcc1_histcut0_10m_COMPLETE" "histyears2_hcc1_histcut0_10m_COMPLETE" "histyears1_hcc1_histcut0_10m_COMPLETE" "histyears2_hcc1_histcut0_10m_COMPLETE"
 [9] "histyears1_hcc0_histcut0_9m_COMPLETE"  "histyears2_hcc0_histcut0_9m_COMPLETE"  "histyears1_hcc0_histcut0_9m_COMPLETE"  "histyears2_hcc0_histcut0_9m_COMPLETE" 
[13] "histyears1_hcc0_histcut0_10m_COMPLETE" "histyears2_hcc0_histcut0_10m_COMPLETE" "histyears1_hcc0_histcut0_10m_COMPLETE" "histyears2_hcc0_histcut0_10m_COMPLETE"

我使用 data.table 方法创建了这个向量:

setDT(runVars)
runVars[,runName := paste0("histyears", numberOfHistroicalYears, "_hcc", as.integer(calculateHCC),
                  "_histcut", as.integer(historyCutOff), "_", cutOffTime, "m_", calculationType)]
subset <- runVars$runName

不过,我不是很喜欢这个,它看起来有点笨拙。我选择了 lapply(),但我不知道它有什么问题?

subset <- lapply(runVars, function(x){
  paste0("histyears", x$numberOfHistroicalYears, "_hcc", as.integer(x$calculateHCC),
                  "_histcut", as.integer(x$historyCutOff), "_", x$cutOffTime, "m_", x$calculationType)
})

Error in x$numberOfHistroicalYears : $ operator is invalid for atomic vectors

有什么解决办法吗?

你所拥有的是正确的,你不应该在这里使用 lapply

这是 sprintf 的另一个变体,可能更短。

library(data.table)
runVars[,runName := sprintf("histyears%s_hcc%d_histcut%d_%sm_%s",
           numberOfHistroicalYears, as.integer(calculateHCC),
           as.integer(historyCutOff), cutOffTime, calculationType)]
runVars$runName

# [1] "histyears1_hcc1_histcut0_9m_COMPLETE"  "histyears2_hcc1_histcut0_9m_COMPLETE" 
# [3] "histyears1_hcc1_histcut0_9m_COMPLETE"  "histyears2_hcc1_histcut0_9m_COMPLETE" 
# [5] "histyears1_hcc1_histcut0_10m_COMPLETE" "histyears2_hcc1_histcut0_10m_COMPLETE"
# [7] "histyears1_hcc1_histcut0_10m_COMPLETE" "histyears2_hcc1_histcut0_10m_COMPLETE"
# [9] "histyears1_hcc0_histcut0_9m_COMPLETE"  "histyears2_hcc0_histcut0_9m_COMPLETE" 
#[11] "histyears1_hcc0_histcut0_9m_COMPLETE"  "histyears2_hcc0_histcut0_9m_COMPLETE" 
#[13] "histyears1_hcc0_histcut0_10m_COMPLETE" "histyears2_hcc0_histcut0_10m_COMPLETE"
#[15] "histyears1_hcc0_histcut0_10m_COMPLETE" "histyears2_hcc0_histcut0_10m_COMPLETE"

这是我的答案...也使用 sprintf(),但方法略有不同

#setup sprintf-string
string_format <- "histyears%s_hcc%s_histcut%s_%sm_%s"
#what columns to use
cols <- c("numberOfHistroicalYears", "calculateHCC", "historyCutOff", "cutOffTime", "calculationType")
#run
DT[, do.call(sprintf, c(.SD, fmt = string_format)), .SDcols = cols]
# [1] "histyears1_hccTRUE_histcutFALSE_9m_COMPLETE"   "histyears2_hccTRUE_histcutFALSE_9m_COMPLETE"  
# [3] "histyears1_hccTRUE_histcutFALSE_9m_COMPLETE"   "histyears2_hccTRUE_histcutFALSE_9m_COMPLETE"  
# [5] "histyears1_hccTRUE_histcutFALSE_10m_COMPLETE"  "histyears2_hccTRUE_histcutFALSE_10m_COMPLETE" 
# [7] "histyears1_hccTRUE_histcutFALSE_10m_COMPLETE"  "histyears2_hccTRUE_histcutFALSE_10m_COMPLETE" 
# [9] "histyears1_hccFALSE_histcutFALSE_9m_COMPLETE"  "histyears2_hccFALSE_histcutFALSE_9m_COMPLETE" 
# [11] "histyears1_hccFALSE_histcutFALSE_9m_COMPLETE"  "histyears2_hccFALSE_histcutFALSE_9m_COMPLETE" 
# [13] "histyears1_hccFALSE_histcutFALSE_10m_COMPLETE" "histyears2_hccFALSE_histcutFALSE_10m_COMPLETE"
# [15] "histyears1_hccFALSE_histcutFALSE_10m_COMPLETE" "histyears2_hccFALSE_histcutFALSE_10m_COMPLETE"