按连续 2 个 ID 折叠文本
collapse text by 2 ID's in a row
我有一个与此主题类似的问题:"Collapse text by group in data frame [duplicate]"
group text
a a1
a a2
a a3
b b1
b b2
c c1
c c2
c c3
c c4
我想折叠两个连续的 ID(不是整个 ID 组)
group text
a a1a2
a a2a3
b b1b2
c c1c2
c c2c3
c c3c4
这个怎么样:
library(tidyverse)
df %>%
group_by(group) %>%
mutate(text = c(paste0(text[1:(n()-1)],text[2:n()]),NA)) %>%
filter(!is.na(text))
或
df %>%
group_by(group) %>%
summarise(text = list(paste0(text[1:(n()-1)],text[2:n()]))) %>%
unnest
group text
1 a a1a2
2 a a2a3
3 b b1b2
4 c c1c2
5 c c2c3
6 c c3c4
上面的代码假设组长度总是大于一。如果有单行组,您将需要一个 if
语句来区别对待它们。例如,如果我们添加一行 group="d" 和 text="d1" 你可以这样做:
df %>%
group_by(group) %>%
summarise(text = if(n()==1) list(text) else list(paste0(text[1:(n()-1)],text[2:n()]))) %>%
unnest
group text
<chr> <chr>
1 a a1a2
2 a a2a3
3 b b1b2
4 c c1c2
5 c c2c3
6 c c3c4
7 d d1
你可以试试:
unlist(by(df2$text,df2$group,function(x)paste0(head(x,-1),x[-1])))
a1 a2 b c1 c2 c3
"a1a2" "a2a3" "b1b2" "c1c2" "c2c3" "c3c4"
替代tidyverse
答案:
library(tidyverse)
dat %>%
group_by(group) %>%
mutate(text=paste0(lag(text),text)) %>% slice(-1)
使用data.table
:
library(data.table)
setDT(dat)
dat[, paste0(shift(text,1), text)[-1], by=group]
# group V1
#1: a a1a2
#2: a a2a3
#3: b b1b2
#4: c c1c2
#5: c c2c3
#6: c c3c4
另一个 base R
选项 split
和 stack
stack(lapply(split(df1$text, df1$group), function(x) paste0(x[-length(x)], x[-1])))[2:1]
我有一个与此主题类似的问题:"Collapse text by group in data frame [duplicate]"
group text
a a1
a a2
a a3
b b1
b b2
c c1
c c2
c c3
c c4
我想折叠两个连续的 ID(不是整个 ID 组)
group text
a a1a2
a a2a3
b b1b2
c c1c2
c c2c3
c c3c4
这个怎么样:
library(tidyverse)
df %>%
group_by(group) %>%
mutate(text = c(paste0(text[1:(n()-1)],text[2:n()]),NA)) %>%
filter(!is.na(text))
或
df %>%
group_by(group) %>%
summarise(text = list(paste0(text[1:(n()-1)],text[2:n()]))) %>%
unnest
group text 1 a a1a2 2 a a2a3 3 b b1b2 4 c c1c2 5 c c2c3 6 c c3c4
上面的代码假设组长度总是大于一。如果有单行组,您将需要一个 if
语句来区别对待它们。例如,如果我们添加一行 group="d" 和 text="d1" 你可以这样做:
df %>%
group_by(group) %>%
summarise(text = if(n()==1) list(text) else list(paste0(text[1:(n()-1)],text[2:n()]))) %>%
unnest
group text <chr> <chr> 1 a a1a2 2 a a2a3 3 b b1b2 4 c c1c2 5 c c2c3 6 c c3c4 7 d d1
你可以试试:
unlist(by(df2$text,df2$group,function(x)paste0(head(x,-1),x[-1])))
a1 a2 b c1 c2 c3
"a1a2" "a2a3" "b1b2" "c1c2" "c2c3" "c3c4"
替代tidyverse
答案:
library(tidyverse)
dat %>%
group_by(group) %>%
mutate(text=paste0(lag(text),text)) %>% slice(-1)
使用data.table
:
library(data.table)
setDT(dat)
dat[, paste0(shift(text,1), text)[-1], by=group]
# group V1
#1: a a1a2
#2: a a2a3
#3: b b1b2
#4: c c1c2
#5: c c2c3
#6: c c3c4
另一个 base R
选项 split
和 stack
stack(lapply(split(df1$text, df1$group), function(x) paste0(x[-length(x)], x[-1])))[2:1]