从 R 中的多个 data.table 输入创建向量
creating a vector from multiple data.table inputs in R
我正在尝试创建一个向量,其中包含来自名为 runVars
:
的 data.table
的串联信息
runVars
calculationType calculateHCC cutOffTime historyCutOff numberOfHistroicalYears
1: COMPLETE TRUE 9 FALSE 1
2: COMPLETE TRUE 9 FALSE 2
3: COMPLETE TRUE 9 FALSE 1
4: COMPLETE TRUE 9 FALSE 2
5: COMPLETE TRUE 10 FALSE 1
6: COMPLETE TRUE 10 FALSE 2
7: COMPLETE TRUE 10 FALSE 1
8: COMPLETE TRUE 10 FALSE 2
9: COMPLETE FALSE 9 FALSE 1
10: COMPLETE FALSE 9 FALSE 2
11: COMPLETE FALSE 9 FALSE 1
12: COMPLETE FALSE 9 FALSE 2
13: COMPLETE FALSE 10 FALSE 1
14: COMPLETE FALSE 10 FALSE 2
15: COMPLETE FALSE 10 FALSE 1
16: COMPLETE FALSE 10 FALSE 2
我要创建的矢量应该如下所示:
[1] "histyears1_hcc1_histcut0_9m_COMPLETE" "histyears2_hcc1_histcut0_9m_COMPLETE" "histyears1_hcc1_histcut0_9m_COMPLETE" "histyears2_hcc1_histcut0_9m_COMPLETE"
[5] "histyears1_hcc1_histcut0_10m_COMPLETE" "histyears2_hcc1_histcut0_10m_COMPLETE" "histyears1_hcc1_histcut0_10m_COMPLETE" "histyears2_hcc1_histcut0_10m_COMPLETE"
[9] "histyears1_hcc0_histcut0_9m_COMPLETE" "histyears2_hcc0_histcut0_9m_COMPLETE" "histyears1_hcc0_histcut0_9m_COMPLETE" "histyears2_hcc0_histcut0_9m_COMPLETE"
[13] "histyears1_hcc0_histcut0_10m_COMPLETE" "histyears2_hcc0_histcut0_10m_COMPLETE" "histyears1_hcc0_histcut0_10m_COMPLETE" "histyears2_hcc0_histcut0_10m_COMPLETE"
我使用 data.table 方法创建了这个向量:
setDT(runVars)
runVars[,runName := paste0("histyears", numberOfHistroicalYears, "_hcc", as.integer(calculateHCC),
"_histcut", as.integer(historyCutOff), "_", cutOffTime, "m_", calculationType)]
subset <- runVars$runName
不过,我不是很喜欢这个,它看起来有点笨拙。我选择了 lapply()
,但我不知道它有什么问题?
subset <- lapply(runVars, function(x){
paste0("histyears", x$numberOfHistroicalYears, "_hcc", as.integer(x$calculateHCC),
"_histcut", as.integer(x$historyCutOff), "_", x$cutOffTime, "m_", x$calculationType)
})
Error in x$numberOfHistroicalYears : $ operator is invalid for
atomic vectors
有什么解决办法吗?
你所拥有的是正确的,你不应该在这里使用 lapply
。
这是 sprintf
的另一个变体,可能更短。
library(data.table)
runVars[,runName := sprintf("histyears%s_hcc%d_histcut%d_%sm_%s",
numberOfHistroicalYears, as.integer(calculateHCC),
as.integer(historyCutOff), cutOffTime, calculationType)]
runVars$runName
# [1] "histyears1_hcc1_histcut0_9m_COMPLETE" "histyears2_hcc1_histcut0_9m_COMPLETE"
# [3] "histyears1_hcc1_histcut0_9m_COMPLETE" "histyears2_hcc1_histcut0_9m_COMPLETE"
# [5] "histyears1_hcc1_histcut0_10m_COMPLETE" "histyears2_hcc1_histcut0_10m_COMPLETE"
# [7] "histyears1_hcc1_histcut0_10m_COMPLETE" "histyears2_hcc1_histcut0_10m_COMPLETE"
# [9] "histyears1_hcc0_histcut0_9m_COMPLETE" "histyears2_hcc0_histcut0_9m_COMPLETE"
#[11] "histyears1_hcc0_histcut0_9m_COMPLETE" "histyears2_hcc0_histcut0_9m_COMPLETE"
#[13] "histyears1_hcc0_histcut0_10m_COMPLETE" "histyears2_hcc0_histcut0_10m_COMPLETE"
#[15] "histyears1_hcc0_histcut0_10m_COMPLETE" "histyears2_hcc0_histcut0_10m_COMPLETE"
这是我的答案...也使用 sprintf()
,但方法略有不同
#setup sprintf-string
string_format <- "histyears%s_hcc%s_histcut%s_%sm_%s"
#what columns to use
cols <- c("numberOfHistroicalYears", "calculateHCC", "historyCutOff", "cutOffTime", "calculationType")
#run
DT[, do.call(sprintf, c(.SD, fmt = string_format)), .SDcols = cols]
# [1] "histyears1_hccTRUE_histcutFALSE_9m_COMPLETE" "histyears2_hccTRUE_histcutFALSE_9m_COMPLETE"
# [3] "histyears1_hccTRUE_histcutFALSE_9m_COMPLETE" "histyears2_hccTRUE_histcutFALSE_9m_COMPLETE"
# [5] "histyears1_hccTRUE_histcutFALSE_10m_COMPLETE" "histyears2_hccTRUE_histcutFALSE_10m_COMPLETE"
# [7] "histyears1_hccTRUE_histcutFALSE_10m_COMPLETE" "histyears2_hccTRUE_histcutFALSE_10m_COMPLETE"
# [9] "histyears1_hccFALSE_histcutFALSE_9m_COMPLETE" "histyears2_hccFALSE_histcutFALSE_9m_COMPLETE"
# [11] "histyears1_hccFALSE_histcutFALSE_9m_COMPLETE" "histyears2_hccFALSE_histcutFALSE_9m_COMPLETE"
# [13] "histyears1_hccFALSE_histcutFALSE_10m_COMPLETE" "histyears2_hccFALSE_histcutFALSE_10m_COMPLETE"
# [15] "histyears1_hccFALSE_histcutFALSE_10m_COMPLETE" "histyears2_hccFALSE_histcutFALSE_10m_COMPLETE"
我正在尝试创建一个向量,其中包含来自名为 runVars
:
data.table
的串联信息
runVars
calculationType calculateHCC cutOffTime historyCutOff numberOfHistroicalYears
1: COMPLETE TRUE 9 FALSE 1
2: COMPLETE TRUE 9 FALSE 2
3: COMPLETE TRUE 9 FALSE 1
4: COMPLETE TRUE 9 FALSE 2
5: COMPLETE TRUE 10 FALSE 1
6: COMPLETE TRUE 10 FALSE 2
7: COMPLETE TRUE 10 FALSE 1
8: COMPLETE TRUE 10 FALSE 2
9: COMPLETE FALSE 9 FALSE 1
10: COMPLETE FALSE 9 FALSE 2
11: COMPLETE FALSE 9 FALSE 1
12: COMPLETE FALSE 9 FALSE 2
13: COMPLETE FALSE 10 FALSE 1
14: COMPLETE FALSE 10 FALSE 2
15: COMPLETE FALSE 10 FALSE 1
16: COMPLETE FALSE 10 FALSE 2
我要创建的矢量应该如下所示:
[1] "histyears1_hcc1_histcut0_9m_COMPLETE" "histyears2_hcc1_histcut0_9m_COMPLETE" "histyears1_hcc1_histcut0_9m_COMPLETE" "histyears2_hcc1_histcut0_9m_COMPLETE"
[5] "histyears1_hcc1_histcut0_10m_COMPLETE" "histyears2_hcc1_histcut0_10m_COMPLETE" "histyears1_hcc1_histcut0_10m_COMPLETE" "histyears2_hcc1_histcut0_10m_COMPLETE"
[9] "histyears1_hcc0_histcut0_9m_COMPLETE" "histyears2_hcc0_histcut0_9m_COMPLETE" "histyears1_hcc0_histcut0_9m_COMPLETE" "histyears2_hcc0_histcut0_9m_COMPLETE"
[13] "histyears1_hcc0_histcut0_10m_COMPLETE" "histyears2_hcc0_histcut0_10m_COMPLETE" "histyears1_hcc0_histcut0_10m_COMPLETE" "histyears2_hcc0_histcut0_10m_COMPLETE"
我使用 data.table 方法创建了这个向量:
setDT(runVars)
runVars[,runName := paste0("histyears", numberOfHistroicalYears, "_hcc", as.integer(calculateHCC),
"_histcut", as.integer(historyCutOff), "_", cutOffTime, "m_", calculationType)]
subset <- runVars$runName
不过,我不是很喜欢这个,它看起来有点笨拙。我选择了 lapply()
,但我不知道它有什么问题?
subset <- lapply(runVars, function(x){
paste0("histyears", x$numberOfHistroicalYears, "_hcc", as.integer(x$calculateHCC),
"_histcut", as.integer(x$historyCutOff), "_", x$cutOffTime, "m_", x$calculationType)
})
Error in x$numberOfHistroicalYears : $ operator is invalid for atomic vectors
有什么解决办法吗?
你所拥有的是正确的,你不应该在这里使用 lapply
。
这是 sprintf
的另一个变体,可能更短。
library(data.table)
runVars[,runName := sprintf("histyears%s_hcc%d_histcut%d_%sm_%s",
numberOfHistroicalYears, as.integer(calculateHCC),
as.integer(historyCutOff), cutOffTime, calculationType)]
runVars$runName
# [1] "histyears1_hcc1_histcut0_9m_COMPLETE" "histyears2_hcc1_histcut0_9m_COMPLETE"
# [3] "histyears1_hcc1_histcut0_9m_COMPLETE" "histyears2_hcc1_histcut0_9m_COMPLETE"
# [5] "histyears1_hcc1_histcut0_10m_COMPLETE" "histyears2_hcc1_histcut0_10m_COMPLETE"
# [7] "histyears1_hcc1_histcut0_10m_COMPLETE" "histyears2_hcc1_histcut0_10m_COMPLETE"
# [9] "histyears1_hcc0_histcut0_9m_COMPLETE" "histyears2_hcc0_histcut0_9m_COMPLETE"
#[11] "histyears1_hcc0_histcut0_9m_COMPLETE" "histyears2_hcc0_histcut0_9m_COMPLETE"
#[13] "histyears1_hcc0_histcut0_10m_COMPLETE" "histyears2_hcc0_histcut0_10m_COMPLETE"
#[15] "histyears1_hcc0_histcut0_10m_COMPLETE" "histyears2_hcc0_histcut0_10m_COMPLETE"
这是我的答案...也使用 sprintf()
,但方法略有不同
#setup sprintf-string
string_format <- "histyears%s_hcc%s_histcut%s_%sm_%s"
#what columns to use
cols <- c("numberOfHistroicalYears", "calculateHCC", "historyCutOff", "cutOffTime", "calculationType")
#run
DT[, do.call(sprintf, c(.SD, fmt = string_format)), .SDcols = cols]
# [1] "histyears1_hccTRUE_histcutFALSE_9m_COMPLETE" "histyears2_hccTRUE_histcutFALSE_9m_COMPLETE"
# [3] "histyears1_hccTRUE_histcutFALSE_9m_COMPLETE" "histyears2_hccTRUE_histcutFALSE_9m_COMPLETE"
# [5] "histyears1_hccTRUE_histcutFALSE_10m_COMPLETE" "histyears2_hccTRUE_histcutFALSE_10m_COMPLETE"
# [7] "histyears1_hccTRUE_histcutFALSE_10m_COMPLETE" "histyears2_hccTRUE_histcutFALSE_10m_COMPLETE"
# [9] "histyears1_hccFALSE_histcutFALSE_9m_COMPLETE" "histyears2_hccFALSE_histcutFALSE_9m_COMPLETE"
# [11] "histyears1_hccFALSE_histcutFALSE_9m_COMPLETE" "histyears2_hccFALSE_histcutFALSE_9m_COMPLETE"
# [13] "histyears1_hccFALSE_histcutFALSE_10m_COMPLETE" "histyears2_hccFALSE_histcutFALSE_10m_COMPLETE"
# [15] "histyears1_hccFALSE_histcutFALSE_10m_COMPLETE" "histyears2_hccFALSE_histcutFALSE_10m_COMPLETE"