在另一列的值是唯一的之后连接一列中的值
Concatenate values in one column after the value of another column is unique
我有一个看起来像这样的小标题:
library(tidyverse)
df <- tibble(table_name = c("horse", "x", "x", "x", "dog", "x", "rat", "x", "x", "x", "x", "x"),
value_str = c(NA, "a", "b", "c", NA, "a", NA, "b", "d", "e", "f", "g"))
> df
# A tibble: 12 x 2
table_name value_str
<chr> <chr>
1 horse <NA>
2 x a
3 x b
4 x c
5 dog <NA>
6 x a
7 rat <NA>
8 x b
9 x d
10 x e
11 x f
12 x g
我想为 "horse"、"dog" 和 "rat" 生成单独的向量,其中包含 "horse" 下面行中来自 value_str
的字符串,直到"dog"、"dog" 直到 "rat","rat" 直到结束。我希望输出类似于以下向量:
vec_horse <- tibble(horse = c("a", "b", "c")) %>% pull(., horse)
vec_dog <- tibble(dog = c("a")) %>% pull(., dog)
vec_rat <- tibble(rat = c("b", "d", "e", "f", "g")) %>% pull(., rat)
> vec_horse
[1] "a" "b" "c"
我会为 df$table_name
做 group_by()
,但在这种情况下它不起作用,因为它与 df$value_str
.
的值的位置有关
我无法将 df$value_str
折叠成单个向量,因为输出需要为 df$table_name
中每个唯一类别的单独向量。
提前致谢!
d = df %>%
mutate(table_name = if_else(table_name == "x", NA_character_, table_name)) %>%
fill(table_name) %>%
group_by(table_name) %>%
summarise(value_str = list(unique(value_str[!is.na(value_str)]))) %>%
ungroup()
setNames(d$value_str, d$table_name)
如果我们想在全局环境中创建三个对象,一种选择是(不推荐)
lst1 <- lapply(split(df$value_str, paste0("vec_",
zoo::na.locf(replace(df$table_name, df$table_name== "x", NA)))),
function(x) unique(na.omit(x)))
list2env(lst1, .GlobalEnv)
vec_dog
#[1] "a"
vec_horse
#[1] "a" "b" "c"
vec_rat
#[1] "b" "d" "e" "f" "g"
我有一个看起来像这样的小标题:
library(tidyverse)
df <- tibble(table_name = c("horse", "x", "x", "x", "dog", "x", "rat", "x", "x", "x", "x", "x"),
value_str = c(NA, "a", "b", "c", NA, "a", NA, "b", "d", "e", "f", "g"))
> df
# A tibble: 12 x 2
table_name value_str
<chr> <chr>
1 horse <NA>
2 x a
3 x b
4 x c
5 dog <NA>
6 x a
7 rat <NA>
8 x b
9 x d
10 x e
11 x f
12 x g
我想为 "horse"、"dog" 和 "rat" 生成单独的向量,其中包含 "horse" 下面行中来自 value_str
的字符串,直到"dog"、"dog" 直到 "rat","rat" 直到结束。我希望输出类似于以下向量:
vec_horse <- tibble(horse = c("a", "b", "c")) %>% pull(., horse)
vec_dog <- tibble(dog = c("a")) %>% pull(., dog)
vec_rat <- tibble(rat = c("b", "d", "e", "f", "g")) %>% pull(., rat)
> vec_horse
[1] "a" "b" "c"
我会为 df$table_name
做 group_by()
,但在这种情况下它不起作用,因为它与 df$value_str
.
我无法将 df$value_str
折叠成单个向量,因为输出需要为 df$table_name
中每个唯一类别的单独向量。
提前致谢!
d = df %>%
mutate(table_name = if_else(table_name == "x", NA_character_, table_name)) %>%
fill(table_name) %>%
group_by(table_name) %>%
summarise(value_str = list(unique(value_str[!is.na(value_str)]))) %>%
ungroup()
setNames(d$value_str, d$table_name)
如果我们想在全局环境中创建三个对象,一种选择是(不推荐)
lst1 <- lapply(split(df$value_str, paste0("vec_",
zoo::na.locf(replace(df$table_name, df$table_name== "x", NA)))),
function(x) unique(na.omit(x)))
list2env(lst1, .GlobalEnv)
vec_dog
#[1] "a"
vec_horse
#[1] "a" "b" "c"
vec_rat
#[1] "b" "d" "e" "f" "g"