R数据框到嵌套列表
R dataframe to nested list
我想将此格式 (tbl) 的数据帧转换为以下嵌套列表 (tbllst):
library(tidyr)
tbl <- tribble(
~Col1, ~Col2, ~Col3,
"Var1", "Var1_1", "Var1_1_1",
"Var1", "Var1_1", "Var1_1_2",
"Var1", "Var1_2", "Var1_2_1",
"Var1", "Var1_2", "Var1_2_2",
)
tbllst <- list(
Col1 = list(
"Var1" = list(
Col2 = list(
"Var1_1" = list(
Col3 = c(
"Var1_1_1",
"Var1_1_2"
)
),
"Var1_2" = list(
Col3 = c(
"Var1_2_1",
"Var1_2_2"
)
)
)
)
)
)
是否有实现此目的的自动化方法?
rrapply
包中的函数 rrapply()
有一个选项 how = "unmelt"
将融化的 data.frame 转换为嵌套列表,其中 data.frame 成为嵌套列表中的节点路径。
要应用此功能,我们首先需要将tbl
data.frame转换为rrapply()
所需的输入格式:
library(purrr)
library(dplyr)
library(rrapply)
## put data.frame in format for rrapply-function
tbl1 <- imap_dfc(tbl, ~bind_cols(.y, .x)) %>%
group_by(across(num_range(prefix = "...", range = 1:5))) %>%
summarize(`...6` = list(c(`...6`)))
tbl1
#> # A tibble: 2 x 6
#> # Groups: ...1, ...2, ...3, ...4 [2]
#> ...1 ...2 ...3 ...4 ...5 ...6
#> <chr> <chr> <chr> <chr> <chr> <list>
#> 1 Col1 Var1 Col2 Var1_1 Col3 <chr [2]>
#> 2 Col1 Var1 Col2 Var1_2 Col3 <chr [2]>
## unmelt to nested list
ls_tbl <- rrapply(tbl1, how = "unmelt")
str(ls_tbl)
#> List of 1
#> $ Col1:List of 1
#> ..$ Var1:List of 1
#> .. ..$ Col2:List of 2
#> .. .. ..$ Var1_1:List of 1
#> .. .. .. ..$ Col3: chr [1:2] "Var1_1_1" "Var1_1_2"
#> .. .. ..$ Var1_2:List of 1
#> .. .. .. ..$ Col3: chr [1:2] "Var1_2_1" "Var1_2_2"
注意,group_by()
和summarize()
操作的目的只是在单个Col3
节点下获取多个var1_%_%
。以下是相当容易的(但不会产生完全相同的结果):
ls_tbl <- rrapply(imap_dfc(tbl, ~bind_cols(.y, .x)), how = "unmelt")
str(ls_tbl)
#> List of 1
#> $ Col1:List of 1
#> ..$ Var1:List of 1
#> .. ..$ Col2:List of 2
#> .. .. ..$ Var1_1:List of 2
#> .. .. .. ..$ Col3: chr "Var1_1_1"
#> .. .. .. ..$ Col3: chr "Var1_1_2"
#> .. .. ..$ Var1_2:List of 2
#> .. .. .. ..$ Col3: chr "Var1_2_1"
#> .. .. .. ..$ Col3: chr "Var1_2_2"
这是另一个使用 data.table
+ rrapply
的选项
library(data.table)
library(rrapply)
dt <- setDT(tbl)[, Map(function(...) list2DF(.(...)), names(.SD), .SD)]
rrapply(dt[, lapply(.SD, list), c(head(names(dt), -1))], how = "unmelt")
这给出了
$Col1
$Col1$Var1
$Col1$Var1$Col2
$Col1$Var1$Col2$Var1_1
$Col1$Var1$Col2$Var1_1$Col3
[1] "Var1_1_1" "Var1_1_2"
$Col1$Var1$Col2$Var1_2
$Col1$Var1$Col2$Var1_2$Col3
[1] "Var1_2_1" "Var1_2_1"
我想将此格式 (tbl) 的数据帧转换为以下嵌套列表 (tbllst):
library(tidyr)
tbl <- tribble(
~Col1, ~Col2, ~Col3,
"Var1", "Var1_1", "Var1_1_1",
"Var1", "Var1_1", "Var1_1_2",
"Var1", "Var1_2", "Var1_2_1",
"Var1", "Var1_2", "Var1_2_2",
)
tbllst <- list(
Col1 = list(
"Var1" = list(
Col2 = list(
"Var1_1" = list(
Col3 = c(
"Var1_1_1",
"Var1_1_2"
)
),
"Var1_2" = list(
Col3 = c(
"Var1_2_1",
"Var1_2_2"
)
)
)
)
)
)
是否有实现此目的的自动化方法?
rrapply
包中的函数 rrapply()
有一个选项 how = "unmelt"
将融化的 data.frame 转换为嵌套列表,其中 data.frame 成为嵌套列表中的节点路径。
要应用此功能,我们首先需要将tbl
data.frame转换为rrapply()
所需的输入格式:
library(purrr)
library(dplyr)
library(rrapply)
## put data.frame in format for rrapply-function
tbl1 <- imap_dfc(tbl, ~bind_cols(.y, .x)) %>%
group_by(across(num_range(prefix = "...", range = 1:5))) %>%
summarize(`...6` = list(c(`...6`)))
tbl1
#> # A tibble: 2 x 6
#> # Groups: ...1, ...2, ...3, ...4 [2]
#> ...1 ...2 ...3 ...4 ...5 ...6
#> <chr> <chr> <chr> <chr> <chr> <list>
#> 1 Col1 Var1 Col2 Var1_1 Col3 <chr [2]>
#> 2 Col1 Var1 Col2 Var1_2 Col3 <chr [2]>
## unmelt to nested list
ls_tbl <- rrapply(tbl1, how = "unmelt")
str(ls_tbl)
#> List of 1
#> $ Col1:List of 1
#> ..$ Var1:List of 1
#> .. ..$ Col2:List of 2
#> .. .. ..$ Var1_1:List of 1
#> .. .. .. ..$ Col3: chr [1:2] "Var1_1_1" "Var1_1_2"
#> .. .. ..$ Var1_2:List of 1
#> .. .. .. ..$ Col3: chr [1:2] "Var1_2_1" "Var1_2_2"
注意,group_by()
和summarize()
操作的目的只是在单个Col3
节点下获取多个var1_%_%
。以下是相当容易的(但不会产生完全相同的结果):
ls_tbl <- rrapply(imap_dfc(tbl, ~bind_cols(.y, .x)), how = "unmelt")
str(ls_tbl)
#> List of 1
#> $ Col1:List of 1
#> ..$ Var1:List of 1
#> .. ..$ Col2:List of 2
#> .. .. ..$ Var1_1:List of 2
#> .. .. .. ..$ Col3: chr "Var1_1_1"
#> .. .. .. ..$ Col3: chr "Var1_1_2"
#> .. .. ..$ Var1_2:List of 2
#> .. .. .. ..$ Col3: chr "Var1_2_1"
#> .. .. .. ..$ Col3: chr "Var1_2_2"
这是另一个使用 data.table
+ rrapply
library(data.table)
library(rrapply)
dt <- setDT(tbl)[, Map(function(...) list2DF(.(...)), names(.SD), .SD)]
rrapply(dt[, lapply(.SD, list), c(head(names(dt), -1))], how = "unmelt")
这给出了
$Col1
$Col1$Var1
$Col1$Var1$Col2
$Col1$Var1$Col2$Var1_1
$Col1$Var1$Col2$Var1_1$Col3
[1] "Var1_1_1" "Var1_1_2"
$Col1$Var1$Col2$Var1_2
$Col1$Var1$Col2$Var1_2$Col3
[1] "Var1_2_1" "Var1_2_1"