在 data.table 中分配多个 lapplys?
Assignment with multiple lapplys in data.table?
是否有比以下两个更优雅的解决方案来对 data.table 列集执行多项操作?我所说的“更优雅”是指单行而不是结合中间结果。
请注意,我在此特定示例中使用了 by
,但该问题也适用于非 by
情况。
library(data.table)
# create toy data.table
dt <- data.table(
ID=sample(1:10, 50, replace=TRUE),
A=letters[sample(1:26, 50, replace=TRUE)],
B=letters[sample(1:26, 50, replace=TRUE)],
C=letters[sample(1:26, 50, replace=TRUE)],
D=letters[sample(1:26, 50, replace=TRUE)]
)
# two sets of columns to process differently
use_all <- c("A","B")
just_first <- c("C","D")
# do these separately, then bind columns. assumes the `by` column is identical across the two:
results <- data.table(
dt[, lapply(.SD, function(x) x[1]), by=ID, .SDcols= just_first],
dt[, lapply(.SD, function(x) list(x)), by=ID, .SDcols= use_all][, .SD, .SDcols=-"ID"]
)
# do these separately, then merge. doesn't assume the `by` column is identically ordered:
results <- merge(
dt[, lapply(.SD, function(x) x[1]), by=ID, .SDcols= just_first],
dt[, lapply(.SD, function(x) list(x)), by=ID, .SDcols= use_all],
by="ID"
)
out <- dt[, Map(function(x, nm) if (nm %in% just_first) x[1] else list(x),
.SD, names(.SD)),
by = ID, .SDcols = c(use_all, just_first)]
out
# ID A B C D
# <int> <list> <list> <char> <char>
# 1: 1 f,b,w,x,g u,s,y,x,r f q
# 2: 5 f,e,l,t,n,j v,p,i,w,x,b f t
# 3: 9 t,h,m,j p,z,m,n o q
# 4: 10 c,b,q,e,n,b,... v,i,w,j,a,s,... b a
# 5: 4 v,j,a,i,i,x,... q,y,h,e,p,n,... j b
# 6: 2 u,g,k,e,w,u,... l,f,z,f,k,p,... w h
# 7: 8 f,c,e,r,h,y u,k,y,q,e,v i e
# 8: 7 z,d k,q a m
# 9: 3 d,p,d a,j,q n f
# 10: 6 v,r y,o z t
# results <- data.table(...) # first of your two `results`
all.equal(out, results[,c(1,4,5,2,3)]) # column-order is different
# [1] TRUE
可重现的数据:
set.seed(42)
dt <- data.table(
ID=sample(1:10, 50, replace=TRUE),
A=letters[sample(1:26, 50, replace=TRUE)],
B=letters[sample(1:26, 50, replace=TRUE)],
C=letters[sample(1:26, 50, replace=TRUE)],
D=letters[sample(1:26, 50, replace=TRUE)]
)
head(dt, 3)
# ID A B C D
# <int> <char> <char> <char> <char>
# 1: 1 f u f q
# 2: 5 f v f t
# 3: 1 b s t a
是否有比以下两个更优雅的解决方案来对 data.table 列集执行多项操作?我所说的“更优雅”是指单行而不是结合中间结果。
请注意,我在此特定示例中使用了 by
,但该问题也适用于非 by
情况。
library(data.table)
# create toy data.table
dt <- data.table(
ID=sample(1:10, 50, replace=TRUE),
A=letters[sample(1:26, 50, replace=TRUE)],
B=letters[sample(1:26, 50, replace=TRUE)],
C=letters[sample(1:26, 50, replace=TRUE)],
D=letters[sample(1:26, 50, replace=TRUE)]
)
# two sets of columns to process differently
use_all <- c("A","B")
just_first <- c("C","D")
# do these separately, then bind columns. assumes the `by` column is identical across the two:
results <- data.table(
dt[, lapply(.SD, function(x) x[1]), by=ID, .SDcols= just_first],
dt[, lapply(.SD, function(x) list(x)), by=ID, .SDcols= use_all][, .SD, .SDcols=-"ID"]
)
# do these separately, then merge. doesn't assume the `by` column is identically ordered:
results <- merge(
dt[, lapply(.SD, function(x) x[1]), by=ID, .SDcols= just_first],
dt[, lapply(.SD, function(x) list(x)), by=ID, .SDcols= use_all],
by="ID"
)
out <- dt[, Map(function(x, nm) if (nm %in% just_first) x[1] else list(x),
.SD, names(.SD)),
by = ID, .SDcols = c(use_all, just_first)]
out
# ID A B C D
# <int> <list> <list> <char> <char>
# 1: 1 f,b,w,x,g u,s,y,x,r f q
# 2: 5 f,e,l,t,n,j v,p,i,w,x,b f t
# 3: 9 t,h,m,j p,z,m,n o q
# 4: 10 c,b,q,e,n,b,... v,i,w,j,a,s,... b a
# 5: 4 v,j,a,i,i,x,... q,y,h,e,p,n,... j b
# 6: 2 u,g,k,e,w,u,... l,f,z,f,k,p,... w h
# 7: 8 f,c,e,r,h,y u,k,y,q,e,v i e
# 8: 7 z,d k,q a m
# 9: 3 d,p,d a,j,q n f
# 10: 6 v,r y,o z t
# results <- data.table(...) # first of your two `results`
all.equal(out, results[,c(1,4,5,2,3)]) # column-order is different
# [1] TRUE
可重现的数据:
set.seed(42)
dt <- data.table(
ID=sample(1:10, 50, replace=TRUE),
A=letters[sample(1:26, 50, replace=TRUE)],
B=letters[sample(1:26, 50, replace=TRUE)],
C=letters[sample(1:26, 50, replace=TRUE)],
D=letters[sample(1:26, 50, replace=TRUE)]
)
head(dt, 3)
# ID A B C D
# <int> <char> <char> <char> <char>
# 1: 1 f u f q
# 2: 5 f v f t
# 3: 1 b s t a