如何使用 imap 在子列表中定义 df

how to define a df in a sublist using imap

我有一个list of list,每个sublist也有多个df。现在我想使用 imap 知道每个子列表中每个 df 的列数。我怎样才能正确指向 df。

示例列表可以使用:

构建
lst1<-list(`101-01-101` = list(Demographics = structure(list(SubjectID = c("Subject ID", 
"101-01-101"), BRTHDTC = c("Birthday", "1953-07-07"), SEX = c("Gender", 
"Female")), row.names = c(NA, -2L), class = c("tbl_df", "tbl", 
"data.frame")), DiseaseStatus = structure(list(SubjectID = c("Subject ID", 
"101-01-101"), DSDT = c("DS Date", "2016-03-14"), DSDT_P = c("DS Date Prob", 
NA)), row.names = c(NA, -2L), class = c("tbl_df", "tbl", "data.frame"
))), `101-02-102` = list(Demographics = structure(list(SubjectID = c("Subject ID", 
"101-02-102"), BRTHDTC = c("Birthday", "1963-07-02"), SEX = c("Gender", 
"Female")), row.names = c(NA, -2L), class = c("tbl_df", "tbl", 
"data.frame")), DiseaseStatus = structure(list(SubjectID = c("Subject ID", 
"101-02-102"), DSDT = c("DS Date", "2017-04-04"), DSDT_P = c("DS Date Prob", 
NA)), row.names = c(NA, -2L), class = c("tbl_df", "tbl", "data.frame"
))), `101-03-103` = list(Demographics = structure(list(SubjectID = c("Subject ID", 
"101-03-103"), BRTHDTC = c("Birthday", "1940-09-11"), SEX = c("Gender", 
"Male")), row.names = c(NA, -2L), class = c("tbl_df", "tbl", 
"data.frame")), DiseaseStatus = structure(list(SubjectID = c("Subject ID", 
"101-03-103"), DSDT = c("DS Date", NA), DSDT_P = c("DS Date Prob", 
"UN-UNK-2015")), row.names = c(NA, -2L), class = c("tbl_df", 
"tbl", "data.frame"))), `101-04-104` = list(Demographics = structure(list(
    SubjectID = c("Subject ID", "101-04-104"), BRTHDTC = c("Birthday", 
    "1955-12-31"), SEX = c("Gender", "Male")), row.names = c(NA, 
-2L), class = c("tbl_df", "tbl", "data.frame")), DiseaseStatus = structure(list(
    SubjectID = c("Subject ID", "101-04-104"), DSDT = c("DS Date", 
    "2016-05-02"), DSDT_P = c("DS Date Prob", NA)), row.names = c(NA, 
-2L), class = c("tbl_df", "tbl", "data.frame"))), `104-05-201` = list(
    Demographics = structure(list(SubjectID = c("Subject ID", 
    "104-05-201"), BRTHDTC = c("Birthday", "1950-12-04"), SEX = c("Gender", 
    "Female")), row.names = c(NA, -2L), class = c("tbl_df", "tbl", 
    "data.frame")), DiseaseStatus = structure(list(SubjectID = c("Subject ID", 
    "104-05-201"), DSDT = c("DS Date", "2018-07-06"), DSDT_P = c("DS Date Prob", 
    NA)), row.names = c(NA, -2L), class = c("tbl_df", "tbl", 
    "data.frame"))))

我试着用两个imap达到那个水平,但最后迷失了方向。有人可以帮我解决这个问题并告诉我如何正确指向子列表中的 df。

我的代码是这样的:

   imap ( ~ { 
   wb = createWorkbook()
     imap(.x, ~ {     
       addWorksheet(wb, .y)
       writeData(wb, .y, .x)
       setColWidths(wb, .y, cols = 1:ncol(.x), widths = "auto")
      })

saveWorkbook(wb, file.path("C:/Users/",
                sprintf("subject_%s.xlsx", .y)))
                }
  )

更新:

如果子列表中的 df 包含这样的东西:

您可以 map_depth 获得巨大优势。您只需为 .depth 参数分配一个值,这样该函数就会应用到您指定的深度。为了获得更好看的输出,我只做了 2 处修改:

library(purrr)

map_depth(lst1, 2, ~ length(.x)) %>%
  map(~ .x %>% bind_cols())

$`101-01-101`
# A tibble: 1 x 2
  Demographics DiseaseStatus
         <int>         <int>
1            3             3

$`101-02-102`
# A tibble: 1 x 2
  Demographics DiseaseStatus
         <int>         <int>
1            3             3

$`101-03-103`
# A tibble: 1 x 2
  Demographics DiseaseStatus
         <int>         <int>
1            3             3

$`101-04-104`
# A tibble: 1 x 2
  Demographics DiseaseStatus
         <int>         <int>
1            3             3

$`104-05-201`
# A tibble: 1 x 2
  Demographics DiseaseStatus
         <int>         <int>
1            3             3

或者这个。但是输出的信息不是很丰富。

map_depth(lst1, 2, ~ length(.x)) %>%
  map(~ .x %>% bind_cols()) %>%
  exec(rbind, !!!.)

# A tibble: 5 x 2
  Demographics DiseaseStatus
*        <int>         <int>
1            3             3
2            3             3
3            3             3
4            3             3
5            3             3

到目前为止,我无法理解您的代码,但这应该重写为。不确定你想要什么我放了一个空行

imap ( ~ { 
   wb = createWorkbook()
     imap(.x, function(a, b) {     
       addWorksheet(wb, b)
       writeData(wb, b, a)
       setColWidths(wb, b, cols = 1:ncol(a), widths = "auto")
      })

saveWorkbook(wb, file.path("C:/Users/",
                sprintf("subject_%s.xlsx", _________)))
                }
  )

实际上你有两个问题-

  • imap_* 中的不可见函数需要两个参数。
  • 现在你的另一个问题是在另一个函数中编写一个 lambda 函数。这是我至今没有解决的问题。

你之前写的表达式可以正确地写成

imap(lst1, function(.x, .y) imap(.x, function(xy, yz) print(ncol(xy))))

[1] 3
[1] 3
[1] 3
[1] 3
[1] 3
[1] 3
[1] 3
[1] 3
[1] 3
[1] 3
$`101-01-101`
$`101-01-101`$Demographics
[1] 3

$`101-01-101`$DiseaseStatus
[1] 3


$`101-02-102`
$`101-02-102`$Demographics
[1] 3

$`101-02-102`$DiseaseStatus
[1] 3


$`101-03-103`
$`101-03-103`$Demographics
[1] 3

$`101-03-103`$DiseaseStatus
[1] 3


$`101-04-104`
$`101-04-104`$Demographics
[1] 3

$`101-04-104`$DiseaseStatus
[1] 3


$`104-05-201`
$`104-05-201`$Demographics
[1] 3

$`104-05-201`$DiseaseStatus
[1] 3

或者,如果您想要其他东西

imap_dfr(lst1, ~ .x %>% as.data.frame() %>% ncol())
# A tibble: 1 x 5
  `101-01-101` `101-02-102` `101-03-103` `101-04-104` `104-05-201`
         <int>        <int>        <int>        <int>        <int>
1            6            6            6            6            6

还是这个?

map_df(lst1, ~map(.x, function(xy) ncol(xy)))
# map_df(lst1, ~map(.x, ncol)) ##alternative
# A tibble: 5 x 2
  Demographics DiseaseStatus
         <int>         <int>
1            3             3
2            3             3
3            3             3
4            3             3
5            3             3