data.table 通过表达式快速访问列

data.table fast access to column by expression

我试图通过给定的表达式从 data.table 中获取一列。我收到 CaseID 作为表达式。 expr_caseid <- expression(CaseID)如何以最快的方式通过表达式获取列?

library(data.table)
dt_fcst <- data.table(CaseID = as.integer(runif(1e8)*100))

expr_caseid <- expression(CaseID)

testExpr = function(DT_, expr_){
    DT_[[deparse(substitute(expr_))]]
}

testGetElement = function(DT_, expr_){
    getElement(DT_, deparse(substitute(expr_)))
}

library(microbenchmark)
microbenchmark(
##  by_char = dt_fcst[['CaseID']],
    by_deparse = testExpr(dt_fcst, CaseID),
##  by_expr = dt_fcst[, list(CaseID)],
##  by_dollar = dt_fcst$CaseID,
    by_eval = eval(
        expr_caseid, 
        envir = as.environment(dt_fcst)
    ),
    by_getElement = testGetElement(dt_fcst, CaseID)
#   ,by_index = dt_fcst@.Data[[1]]
, times = 1000L)

性能测量结果:

单位:微秒

          expr  min    lq    mean median   uq   max neval cld
    by_deparse 37.2 41.35 55.0700  46.15 60.6 357.8  1000   b
       by_eval 15.9 22.30 29.2194  24.80 34.3 289.8  1000  a 
 by_getElement 38.3 42.20 55.9087  47.30 63.2 283.3  1000   b

评论来自 Frank

Unit: microseconds
            expr  min    lq     mean median    uq     max neval cld
 by_evalNoCoerce  1.8  4.00   8.7652   5.30  7.60   479.8  1000  a 


microbenchmark(
##  by_char = dt_fcst[['CaseID']],
    by_deparse = testExpr(dt_fcst, CaseID),
##  by_expr = dt_fcst[, list(CaseID)],
##  by_dollar = dt_fcst$CaseID,
    by_eval = eval(
        expr_caseid, 
        envir = as.environment(dt_fcst)
    ),
    by_getElement = testGetElement(dt_fcst, CaseID),
    by_evalNoCoerce = eval(expr_caseid, dt_fcst)
#   ,by_index = dt_fcst@.Data[[1]]
, times = 1000L)