如何在只有 ID 但没有其他内容的列表中识别 df
How to identify df in a list that only have ID but nothing else in it
我有如下列表:
lst<- list(`101-01-101` = list(Demographics = structure(list(SubjectID = c("SubjectID",
"101-01-101"), BRTHDTC = c("BRTHDTC", "1953-07-07"), SEX = c("SEX",
"Female")), row.names = c(NA, -2L), class = c("tbl_df", "tbl",
"data.frame")), DiseaseStatus = structure(list(SubjectID = c("SubjectID",
"101-01-101"), DSDT = c("DSDT", "2016-03-14"), DSDT_P = c("DSDT_P",
NA)), row.names = c(NA, -2L), class = c("tbl_df", "tbl", "data.frame"
)), Visits = structure(list(SubjectID = c("SubjectID", "101-01-101"
), Visit = c("Visit", "Screening: -28 Days to Day 1"), VISND = c("VISND",
NA)), row.names = c(NA, -2L), class = c("tbl_df", "tbl", "data.frame"
))), `101-02-102` = list(Demographics = structure(list(SubjectID = c("SubjectID",
"101-02-102"), BRTHDTC = c("BRTHDTC", "1963-07-02"), SEX = c("SEX",
"Female")), row.names = c(NA, -2L), class = c("tbl_df", "tbl",
"data.frame")), DiseaseStatus = structure(list(SubjectID = c("SubjectID",
"101-02-102"), DSDT = c("DSDT", "2017-04-04"), DSDT_P = c("DSDT_P",
NA)), row.names = c(NA, -2L), class = c("tbl_df", "tbl", "data.frame"
)), Visits = structure(list(SubjectID = c("SubjectID", "101-02-102"
), Visit = c("Visit", NA), VISND = c("VISND", NA)), row.names = c(NA,
-2L), class = c("tbl_df", "tbl", "data.frame"))), `101-03-103` = list(
Demographics = structure(list(SubjectID = c("SubjectID",
"101-03-103"), BRTHDTC = c("BRTHDTC", "1940-09-11"), SEX = c("SEX",
"Male")), row.names = c(NA, -2L), class = c("tbl_df", "tbl",
"data.frame")), DiseaseStatus = structure(list(SubjectID = c("SubjectID",
"101-03-103"), DSDT = c("DSDT", NA), DSDT_P = c("DSDT_P",
NA)), row.names = c(NA, -2L), class = c("tbl_df", "tbl",
"data.frame")), Visits = structure(list(SubjectID = c("SubjectID",
"101-03-103"), Visit = c("Visit", "Screening: -28 Days to Day 1"
), VISND = c("VISND", NA)), row.names = c(NA, -2L), class = c("tbl_df",
"tbl", "data.frame"))))
我想将 subjectID
输入更改为 NA
如果列表中的 df
仅具有 SubjectID
中的值,但所有 NA
其他变量。我该怎么办?
我正在考虑为每个 df
计算 none NA
个输入,如果值等于 1,则删除所有记录或将 SubjectID
更改为 [=12] =].我想仍然保留列名,但是该怎么做?
在我的示例文件中,101-02-102
应该将 Visits 设为空 df
,并且 101-03-103
应该将 DiseaseStatus
设为空 df
。
因为是嵌套列表,所以用double lapply
,在里面的list
,元素上,取NA上逻辑矩阵的colSums
,判断是否相等到行数 (nrow
),然后将 SubjectID 分配给 NA,return 数据
lst2 <- lapply(lst1, function(x) {x <- lapply(x, function(y)
{if(all(colSums(is.na(y[-1])) == nrow(y))) y$SubjectID <- NA_character_
y}); x})
-输出
lst2
$`101-01-101`
$`101-01-101`$Demographics
# A tibble: 1 x 3
SubjectID BRTHDTC SEX
<chr> <chr> <chr>
1 101-01-101 1953-07-07 Female
$`101-01-101`$DiseaseStatus
# A tibble: 1 x 3
SubjectID DSDT DSDT_P
<chr> <chr> <chr>
1 101-01-101 2016-03-14 <NA>
$`101-01-101`$Visits
# A tibble: 1 x 3
SubjectID Visit VISND
<chr> <chr> <chr>
1 101-01-101 Screening: -28 Days to Day 1 <NA>
$`101-02-102`
$`101-02-102`$Demographics
# A tibble: 1 x 3
SubjectID BRTHDTC SEX
<chr> <chr> <chr>
1 101-02-102 1963-07-02 Female
$`101-02-102`$DiseaseStatus
# A tibble: 1 x 3
SubjectID DSDT DSDT_P
<chr> <chr> <chr>
1 101-02-102 2017-04-04 <NA>
$`101-02-102`$Visits
# A tibble: 1 x 3
SubjectID Visit VISND
<chr> <chr> <chr>
1 <NA> <NA> <NA>
$`101-03-103`
$`101-03-103`$Demographics
# A tibble: 1 x 3
SubjectID BRTHDTC SEX
<chr> <chr> <chr>
1 101-03-103 1940-09-11 Male
$`101-03-103`$DiseaseStatus
# A tibble: 1 x 3
SubjectID DSDT DSDT_P
<chr> <chr> <chr>
1 <NA> <NA> <NA>
$`101-03-103`$Visits
# A tibble: 1 x 3
SubjectID Visit VISND
<chr> <chr> <chr>
1 101-03-103 Screening: -28 Days to Day 1 <NA>
更新案例
lapply(lst, function(x) {x <- lapply(x, function(y)
{if(all(colSums(is.na(y[-1, -1])) == nrow(y[-1, ])))
y$SubjectID <- NA_character_
y[-1,]})
x})
更新解决方案
我已经根据 OP 的新要求更新了我的解决方案,因此第二行的 SubjectID
将是 NA
,前提是所有其他变量也是 NA
。
library(purrr)
lst %>%
map(~ .x %>%
map(~ if(all(is.na(.x[2, -1]))) {
.x %>%
mutate(SubjectID = if_else(row_number() == 1, SubjectID, NA_character_))
} else {
.x
}))
$`101-01-101`
$`101-01-101`$Demographics
# A tibble: 2 x 3
SubjectID BRTHDTC SEX
<chr> <chr> <chr>
1 SubjectID BRTHDTC SEX
2 101-01-101 1953-07-07 Female
$`101-01-101`$DiseaseStatus
# A tibble: 2 x 3
SubjectID DSDT DSDT_P
<chr> <chr> <chr>
1 SubjectID DSDT DSDT_P
2 101-01-101 2016-03-14 NA
$`101-01-101`$Visits
# A tibble: 2 x 3
SubjectID Visit VISND
<chr> <chr> <chr>
1 SubjectID Visit VISND
2 101-01-101 Screening: -28 Days to Day 1 NA
$`101-02-102`
$`101-02-102`$Demographics
# A tibble: 2 x 3
SubjectID BRTHDTC SEX
<chr> <chr> <chr>
1 SubjectID BRTHDTC SEX
2 101-02-102 1963-07-02 Female
$`101-02-102`$DiseaseStatus
# A tibble: 2 x 3
SubjectID DSDT DSDT_P
<chr> <chr> <chr>
1 SubjectID DSDT DSDT_P
2 101-02-102 2017-04-04 NA
$`101-02-102`$Visits
# A tibble: 2 x 3
SubjectID Visit VISND
<chr> <chr> <chr>
1 SubjectID Visit VISND
2 NA NA NA
$`101-03-103`
$`101-03-103`$Demographics
# A tibble: 2 x 3
SubjectID BRTHDTC SEX
<chr> <chr> <chr>
1 SubjectID BRTHDTC SEX
2 101-03-103 1940-09-11 Male
$`101-03-103`$DiseaseStatus
# A tibble: 2 x 3
SubjectID DSDT DSDT_P
<chr> <chr> <chr>
1 SubjectID DSDT DSDT_P
2 NA NA NA
$`101-03-103`$Visits
# A tibble: 2 x 3
SubjectID Visit VISND
<chr> <chr> <chr>
1 SubjectID Visit VISND
2 101-03-103 Screening: -28 Days to Day 1 NA
我有如下列表:
lst<- list(`101-01-101` = list(Demographics = structure(list(SubjectID = c("SubjectID",
"101-01-101"), BRTHDTC = c("BRTHDTC", "1953-07-07"), SEX = c("SEX",
"Female")), row.names = c(NA, -2L), class = c("tbl_df", "tbl",
"data.frame")), DiseaseStatus = structure(list(SubjectID = c("SubjectID",
"101-01-101"), DSDT = c("DSDT", "2016-03-14"), DSDT_P = c("DSDT_P",
NA)), row.names = c(NA, -2L), class = c("tbl_df", "tbl", "data.frame"
)), Visits = structure(list(SubjectID = c("SubjectID", "101-01-101"
), Visit = c("Visit", "Screening: -28 Days to Day 1"), VISND = c("VISND",
NA)), row.names = c(NA, -2L), class = c("tbl_df", "tbl", "data.frame"
))), `101-02-102` = list(Demographics = structure(list(SubjectID = c("SubjectID",
"101-02-102"), BRTHDTC = c("BRTHDTC", "1963-07-02"), SEX = c("SEX",
"Female")), row.names = c(NA, -2L), class = c("tbl_df", "tbl",
"data.frame")), DiseaseStatus = structure(list(SubjectID = c("SubjectID",
"101-02-102"), DSDT = c("DSDT", "2017-04-04"), DSDT_P = c("DSDT_P",
NA)), row.names = c(NA, -2L), class = c("tbl_df", "tbl", "data.frame"
)), Visits = structure(list(SubjectID = c("SubjectID", "101-02-102"
), Visit = c("Visit", NA), VISND = c("VISND", NA)), row.names = c(NA,
-2L), class = c("tbl_df", "tbl", "data.frame"))), `101-03-103` = list(
Demographics = structure(list(SubjectID = c("SubjectID",
"101-03-103"), BRTHDTC = c("BRTHDTC", "1940-09-11"), SEX = c("SEX",
"Male")), row.names = c(NA, -2L), class = c("tbl_df", "tbl",
"data.frame")), DiseaseStatus = structure(list(SubjectID = c("SubjectID",
"101-03-103"), DSDT = c("DSDT", NA), DSDT_P = c("DSDT_P",
NA)), row.names = c(NA, -2L), class = c("tbl_df", "tbl",
"data.frame")), Visits = structure(list(SubjectID = c("SubjectID",
"101-03-103"), Visit = c("Visit", "Screening: -28 Days to Day 1"
), VISND = c("VISND", NA)), row.names = c(NA, -2L), class = c("tbl_df",
"tbl", "data.frame"))))
我想将 subjectID
输入更改为 NA
如果列表中的 df
仅具有 SubjectID
中的值,但所有 NA
其他变量。我该怎么办?
我正在考虑为每个 df
计算 none NA
个输入,如果值等于 1,则删除所有记录或将 SubjectID
更改为 [=12] =].我想仍然保留列名,但是该怎么做?
在我的示例文件中,101-02-102
应该将 Visits 设为空 df
,并且 101-03-103
应该将 DiseaseStatus
设为空 df
。
因为是嵌套列表,所以用double lapply
,在里面的list
,元素上,取NA上逻辑矩阵的colSums
,判断是否相等到行数 (nrow
),然后将 SubjectID 分配给 NA,return 数据
lst2 <- lapply(lst1, function(x) {x <- lapply(x, function(y)
{if(all(colSums(is.na(y[-1])) == nrow(y))) y$SubjectID <- NA_character_
y}); x})
-输出
lst2
$`101-01-101`
$`101-01-101`$Demographics
# A tibble: 1 x 3
SubjectID BRTHDTC SEX
<chr> <chr> <chr>
1 101-01-101 1953-07-07 Female
$`101-01-101`$DiseaseStatus
# A tibble: 1 x 3
SubjectID DSDT DSDT_P
<chr> <chr> <chr>
1 101-01-101 2016-03-14 <NA>
$`101-01-101`$Visits
# A tibble: 1 x 3
SubjectID Visit VISND
<chr> <chr> <chr>
1 101-01-101 Screening: -28 Days to Day 1 <NA>
$`101-02-102`
$`101-02-102`$Demographics
# A tibble: 1 x 3
SubjectID BRTHDTC SEX
<chr> <chr> <chr>
1 101-02-102 1963-07-02 Female
$`101-02-102`$DiseaseStatus
# A tibble: 1 x 3
SubjectID DSDT DSDT_P
<chr> <chr> <chr>
1 101-02-102 2017-04-04 <NA>
$`101-02-102`$Visits
# A tibble: 1 x 3
SubjectID Visit VISND
<chr> <chr> <chr>
1 <NA> <NA> <NA>
$`101-03-103`
$`101-03-103`$Demographics
# A tibble: 1 x 3
SubjectID BRTHDTC SEX
<chr> <chr> <chr>
1 101-03-103 1940-09-11 Male
$`101-03-103`$DiseaseStatus
# A tibble: 1 x 3
SubjectID DSDT DSDT_P
<chr> <chr> <chr>
1 <NA> <NA> <NA>
$`101-03-103`$Visits
# A tibble: 1 x 3
SubjectID Visit VISND
<chr> <chr> <chr>
1 101-03-103 Screening: -28 Days to Day 1 <NA>
更新案例
lapply(lst, function(x) {x <- lapply(x, function(y)
{if(all(colSums(is.na(y[-1, -1])) == nrow(y[-1, ])))
y$SubjectID <- NA_character_
y[-1,]})
x})
更新解决方案
我已经根据 OP 的新要求更新了我的解决方案,因此第二行的 SubjectID
将是 NA
,前提是所有其他变量也是 NA
。
library(purrr)
lst %>%
map(~ .x %>%
map(~ if(all(is.na(.x[2, -1]))) {
.x %>%
mutate(SubjectID = if_else(row_number() == 1, SubjectID, NA_character_))
} else {
.x
}))
$`101-01-101`
$`101-01-101`$Demographics
# A tibble: 2 x 3
SubjectID BRTHDTC SEX
<chr> <chr> <chr>
1 SubjectID BRTHDTC SEX
2 101-01-101 1953-07-07 Female
$`101-01-101`$DiseaseStatus
# A tibble: 2 x 3
SubjectID DSDT DSDT_P
<chr> <chr> <chr>
1 SubjectID DSDT DSDT_P
2 101-01-101 2016-03-14 NA
$`101-01-101`$Visits
# A tibble: 2 x 3
SubjectID Visit VISND
<chr> <chr> <chr>
1 SubjectID Visit VISND
2 101-01-101 Screening: -28 Days to Day 1 NA
$`101-02-102`
$`101-02-102`$Demographics
# A tibble: 2 x 3
SubjectID BRTHDTC SEX
<chr> <chr> <chr>
1 SubjectID BRTHDTC SEX
2 101-02-102 1963-07-02 Female
$`101-02-102`$DiseaseStatus
# A tibble: 2 x 3
SubjectID DSDT DSDT_P
<chr> <chr> <chr>
1 SubjectID DSDT DSDT_P
2 101-02-102 2017-04-04 NA
$`101-02-102`$Visits
# A tibble: 2 x 3
SubjectID Visit VISND
<chr> <chr> <chr>
1 SubjectID Visit VISND
2 NA NA NA
$`101-03-103`
$`101-03-103`$Demographics
# A tibble: 2 x 3
SubjectID BRTHDTC SEX
<chr> <chr> <chr>
1 SubjectID BRTHDTC SEX
2 101-03-103 1940-09-11 Male
$`101-03-103`$DiseaseStatus
# A tibble: 2 x 3
SubjectID DSDT DSDT_P
<chr> <chr> <chr>
1 SubjectID DSDT DSDT_P
2 NA NA NA
$`101-03-103`$Visits
# A tibble: 2 x 3
SubjectID Visit VISND
<chr> <chr> <chr>
1 SubjectID Visit VISND
2 101-03-103 Screening: -28 Days to Day 1 NA