如何在列表列和外部向量之间的 data.table 中进行行匹配?
How to do rowwise matching in data.table between list-column and external vector?
我正在尝试对嵌套在 data.table
列表列中的向量元素进行行匹配。
library(tibble)
library(data.table)
my_dt <-
tibble::tribble(
~col_x, ~col_y,
"a", c(1, 2, 3),
"b", c(4, 5, 6),
"c", c(7, 8, 9)
) %>%
as.data.table()
external_vec <- 1:9
我想将 col_y
中的每个元素与 external_vec
进行匹配,这样我将在 my_dt
中得到一个新列,该列等同于 external_vec %in% col_y
,例如:
# desired output
## col_x col_y new_col
## 1: a 1,2,3 TRUE, TRUE, TRUE,FALSE,FALSE,FALSE,...
## 2: b 4,5,6 FALSE,FALSE,FALSE, TRUE, TRUE, TRUE,...
## 3: c 7,8,9 FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,...
编辑
我知道如何实现基于 tidyverse purrr
的解决方案,如下所示。但是,我正在寻找 data.table
本机功能。
# this works but relies on purrr and dplyr, which I hope to avoid in this problem
my_dt %>%
tibble::add_column(ext_vec_as_col = list(external_vec)) %>%
mutate(new_col = purrr::map2(.x = ext_vec_as_col, .y = col_y, .f = ~.x %in% .y) )
#> col_x col_y ext_vec_as_col new_col
#> 1: a 1,2,3 1,2,3,4,5,6,... TRUE, TRUE, TRUE,FALSE,FALSE,FALSE,...
#> 2: b 4,5,6 1,2,3,4,5,6,... FALSE,FALSE,FALSE, TRUE, TRUE, TRUE,...
#> 3: c 7,8,9 1,2,3,4,5,6,... FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,...
我有 tidyverse 解决方案:
library(tibble)
library(dplyr)
library(tidyr)
library(purrr)
library(magrittr)
my_dt <-
tibble::tribble(
~col_x, ~col_y,
"a", c(1, 2, 3),
"b", c(4, 5, 6),
"c", c(7, 8, 9)
)
external_vec <- 1:9
my_dt %>%
mutate(new_col = col_y %>%
map(~external_vec %in% .x)) %>%
unnest_wider(col_y, names_sep = "_") %>% # this is just to see the result..
unnest_wider(new_col, names_sep = "_") # this is just to see the result..
通常,library(tidyverse)
包括很多这样的内容,但我只是觉得最好能准确地知道完成这项工作需要什么。
data.table
解决方案
我认为这是等价的:
my_dt[, new_col := lapply(col_y, \(x) external_vec %in% x)]
my_dt
性能
因为这很重要,这里有一些性能基准
bench::mark(
tidy =
my_dt %>%
mutate(new_col = col_y %>%
map(~external_vec %in% .x)),
dt = my_dt[, new_col := lapply(col_y, \(x) external_vec %in% x)],
op_tidy = my_dt %>%
tibble::add_column(ext_vec_as_col = list(external_vec)) %>%
mutate(new_col = purrr::map2(.x = ext_vec_as_col, .y = col_y, .f = ~.x %in% .y)),
check = FALSE
) %>%
plot()
我正在尝试对嵌套在 data.table
列表列中的向量元素进行行匹配。
library(tibble)
library(data.table)
my_dt <-
tibble::tribble(
~col_x, ~col_y,
"a", c(1, 2, 3),
"b", c(4, 5, 6),
"c", c(7, 8, 9)
) %>%
as.data.table()
external_vec <- 1:9
我想将 col_y
中的每个元素与 external_vec
进行匹配,这样我将在 my_dt
中得到一个新列,该列等同于 external_vec %in% col_y
,例如:
# desired output
## col_x col_y new_col
## 1: a 1,2,3 TRUE, TRUE, TRUE,FALSE,FALSE,FALSE,...
## 2: b 4,5,6 FALSE,FALSE,FALSE, TRUE, TRUE, TRUE,...
## 3: c 7,8,9 FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,...
编辑
我知道如何实现基于 tidyverse purrr
的解决方案,如下所示。但是,我正在寻找 data.table
本机功能。
# this works but relies on purrr and dplyr, which I hope to avoid in this problem
my_dt %>%
tibble::add_column(ext_vec_as_col = list(external_vec)) %>%
mutate(new_col = purrr::map2(.x = ext_vec_as_col, .y = col_y, .f = ~.x %in% .y) )
#> col_x col_y ext_vec_as_col new_col
#> 1: a 1,2,3 1,2,3,4,5,6,... TRUE, TRUE, TRUE,FALSE,FALSE,FALSE,...
#> 2: b 4,5,6 1,2,3,4,5,6,... FALSE,FALSE,FALSE, TRUE, TRUE, TRUE,...
#> 3: c 7,8,9 1,2,3,4,5,6,... FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,...
我有 tidyverse 解决方案:
library(tibble)
library(dplyr)
library(tidyr)
library(purrr)
library(magrittr)
my_dt <-
tibble::tribble(
~col_x, ~col_y,
"a", c(1, 2, 3),
"b", c(4, 5, 6),
"c", c(7, 8, 9)
)
external_vec <- 1:9
my_dt %>%
mutate(new_col = col_y %>%
map(~external_vec %in% .x)) %>%
unnest_wider(col_y, names_sep = "_") %>% # this is just to see the result..
unnest_wider(new_col, names_sep = "_") # this is just to see the result..
通常,library(tidyverse)
包括很多这样的内容,但我只是觉得最好能准确地知道完成这项工作需要什么。
data.table
解决方案
我认为这是等价的:
my_dt[, new_col := lapply(col_y, \(x) external_vec %in% x)]
my_dt
性能
因为这很重要,这里有一些性能基准
bench::mark(
tidy =
my_dt %>%
mutate(new_col = col_y %>%
map(~external_vec %in% .x)),
dt = my_dt[, new_col := lapply(col_y, \(x) external_vec %in% x)],
op_tidy = my_dt %>%
tibble::add_column(ext_vec_as_col = list(external_vec)) %>%
mutate(new_col = purrr::map2(.x = ext_vec_as_col, .y = col_y, .f = ~.x %in% .y)),
check = FALSE
) %>%
plot()