按列填充/完成/展开
Fill / complete / expand columnwise
我有这个数据框:
df <- structure(list(lg0 = c(FALSE, FALSE, TRUE, TRUE, TRUE, TRUE,
TRUE), lg2 = c(FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE
), lg4 = c(TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE), ld0 = c(TRUE,
TRUE, TRUE, FALSE, FALSE, TRUE, FALSE), ld1 = c(FALSE, FALSE,
FALSE, TRUE, FALSE, FALSE, TRUE), ld2 = c(FALSE, FALSE, FALSE,
FALSE, TRUE, FALSE, FALSE), ld4 = c(FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE)), class = "data.frame", row.names = c(NA,
-7L))
lg0 lg2 lg4 ld0 ld1 ld2 ld4
1 FALSE FALSE TRUE TRUE FALSE FALSE FALSE
2 FALSE TRUE FALSE TRUE FALSE FALSE FALSE
3 TRUE FALSE FALSE TRUE FALSE FALSE FALSE
4 TRUE FALSE FALSE FALSE TRUE FALSE FALSE
5 TRUE FALSE FALSE FALSE FALSE TRUE FALSE
6 TRUE FALSE FALSE TRUE FALSE FALSE FALSE
7 TRUE FALSE FALSE FALSE TRUE FALSE FALSE
如何添加缺少的列 lg1
、lg3
、ld3
填充 FALSE
。
我想到'quasi'完成/填充/扩展缺失的列以得到这个:
这应该独立于哪些列不存在,这可以改变。
期望输出:
lg0 lg1 lg2 lg3 lg4 ld0 ld1 ld2 ld3 ld4
FALSE FALSE FALSE FALSE TRUE TRUE FALSE FALSE FALSE FALSE
FALSE FALSE TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
TRUE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
TRUE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
TRUE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
到目前为止我已经尝试过:
library(tidyr)
library(dplyr)
library(readr)
df2 <- df1 %>%
pivot_longer(
everything()
) %>%
mutate(id = parse_number(name))
expand(df2, id, name)
我认为你可以使用以下解决方案:
library(dplyr)
library(tidyr)
df %>%
pivot_longer(everything()) %>%
extract(name, c("base", "pre"), regex = "(\D+)(\d+)") %>%
mutate(id = rep(1:ncol(df), each = ncol(df))) %>%
group_by(id, base) %>%
mutate(pre = as.integer(pre)) %>%
complete(pre = seq(min(pre), max(pre), 1), fill = list(value = FALSE)) %>%
unite("New_Name", c(base, pre), sep = "") %>%
pivot_wider(names_from = New_Name, values_from = value) %>%
ungroup() %>%
select(-id) %>%
relocate(starts_with("lg"))
# A tibble: 7 x 10
lg0 lg1 lg2 lg3 lg4 ld0 ld1 ld2 ld3 ld4
<lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl>
1 FALSE FALSE FALSE FALSE TRUE TRUE FALSE FALSE FALSE FALSE
2 FALSE FALSE TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
3 TRUE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
4 TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
5 TRUE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
6 TRUE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
7 TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
在base R
中,我们可以构建FALSE
的新数据集,并根据匹配的列名
将'df'赋值回它
nm1 <- paste0(rep(c("lg", "ld"), each = 5), 0:4)
df2 <- as.data.frame(matrix(FALSE, nrow = nrow(df),
ncol = length(nm1), dimnames = list(NULL, nm1)))
df2[names(df)] <- df
-输出
> df2
lg0 lg1 lg2 lg3 lg4 ld0 ld1 ld2 ld3 ld4
1 FALSE FALSE FALSE FALSE TRUE TRUE FALSE FALSE FALSE FALSE
2 FALSE FALSE TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
3 TRUE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
4 TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
5 TRUE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
6 TRUE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
7 TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
如果我们想自动构造nm1
d1 <- as.numeric(sub("\D+", "", names(df)))
nm1 <- c(t(outer(unique(sub("\d+", "", names(df))),
min(d1):max(d1), FUN = paste0)))
df[setdiff(nm1, names(df))] <- FALSE
df[nm1]
我有这个数据框:
df <- structure(list(lg0 = c(FALSE, FALSE, TRUE, TRUE, TRUE, TRUE,
TRUE), lg2 = c(FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE
), lg4 = c(TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE), ld0 = c(TRUE,
TRUE, TRUE, FALSE, FALSE, TRUE, FALSE), ld1 = c(FALSE, FALSE,
FALSE, TRUE, FALSE, FALSE, TRUE), ld2 = c(FALSE, FALSE, FALSE,
FALSE, TRUE, FALSE, FALSE), ld4 = c(FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE)), class = "data.frame", row.names = c(NA,
-7L))
lg0 lg2 lg4 ld0 ld1 ld2 ld4
1 FALSE FALSE TRUE TRUE FALSE FALSE FALSE
2 FALSE TRUE FALSE TRUE FALSE FALSE FALSE
3 TRUE FALSE FALSE TRUE FALSE FALSE FALSE
4 TRUE FALSE FALSE FALSE TRUE FALSE FALSE
5 TRUE FALSE FALSE FALSE FALSE TRUE FALSE
6 TRUE FALSE FALSE TRUE FALSE FALSE FALSE
7 TRUE FALSE FALSE FALSE TRUE FALSE FALSE
如何添加缺少的列 lg1
、lg3
、ld3
填充 FALSE
。
我想到'quasi'完成/填充/扩展缺失的列以得到这个:
这应该独立于哪些列不存在,这可以改变。
期望输出:
lg0 lg1 lg2 lg3 lg4 ld0 ld1 ld2 ld3 ld4
FALSE FALSE FALSE FALSE TRUE TRUE FALSE FALSE FALSE FALSE
FALSE FALSE TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
TRUE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
TRUE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
TRUE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
到目前为止我已经尝试过:
library(tidyr)
library(dplyr)
library(readr)
df2 <- df1 %>%
pivot_longer(
everything()
) %>%
mutate(id = parse_number(name))
expand(df2, id, name)
我认为你可以使用以下解决方案:
library(dplyr)
library(tidyr)
df %>%
pivot_longer(everything()) %>%
extract(name, c("base", "pre"), regex = "(\D+)(\d+)") %>%
mutate(id = rep(1:ncol(df), each = ncol(df))) %>%
group_by(id, base) %>%
mutate(pre = as.integer(pre)) %>%
complete(pre = seq(min(pre), max(pre), 1), fill = list(value = FALSE)) %>%
unite("New_Name", c(base, pre), sep = "") %>%
pivot_wider(names_from = New_Name, values_from = value) %>%
ungroup() %>%
select(-id) %>%
relocate(starts_with("lg"))
# A tibble: 7 x 10
lg0 lg1 lg2 lg3 lg4 ld0 ld1 ld2 ld3 ld4
<lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl>
1 FALSE FALSE FALSE FALSE TRUE TRUE FALSE FALSE FALSE FALSE
2 FALSE FALSE TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
3 TRUE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
4 TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
5 TRUE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
6 TRUE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
7 TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
在base R
中,我们可以构建FALSE
的新数据集,并根据匹配的列名
nm1 <- paste0(rep(c("lg", "ld"), each = 5), 0:4)
df2 <- as.data.frame(matrix(FALSE, nrow = nrow(df),
ncol = length(nm1), dimnames = list(NULL, nm1)))
df2[names(df)] <- df
-输出
> df2
lg0 lg1 lg2 lg3 lg4 ld0 ld1 ld2 ld3 ld4
1 FALSE FALSE FALSE FALSE TRUE TRUE FALSE FALSE FALSE FALSE
2 FALSE FALSE TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
3 TRUE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
4 TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
5 TRUE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
6 TRUE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
7 TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
如果我们想自动构造nm1
d1 <- as.numeric(sub("\D+", "", names(df)))
nm1 <- c(t(outer(unique(sub("\d+", "", names(df))),
min(d1):max(d1), FUN = paste0)))
df[setdiff(nm1, names(df))] <- FALSE
df[nm1]