Split/melt 数据
Split/melt data
假设我有以下 data.frame
df <- data.frame(letters = c("a, b", "a", "b", "a, c"), value = c(1, 2, 3, 4))
df
#> letters value
#> 1 a, b 1
#> 2 a 2
#> 3 b 3
#> 4 a, c 4
我想split/melt到
#> letters value
#> 1 a 1
#> 2 b 1
#> 3 a 2
#> 4 b 3
#> 5 a 4
#> 6 c 4
为了每个 letters
有一条记录
你可以试试strsplit
。根据 ,
拆分字符,然后重复值字段。
char <- strsplit(as.character(df$letters), ',')
data.frame(letter=unlist(char), value=rep(df$value, sapply(char, FUN=length)))
letter value
#1 a 1
#2 b 1
#3 a 2
#4 b 3
#5 a 4
#6 c 4
根据@docendo discimus 在评论中更新以获得更快的结果,您可以尝试,
char <- strsplit(as.character(df$letters), ',', fixed = T)
data.frame(letter=unlist(char), value=rep(df$value, lengths(char)))
我们可以使用cSplit
library(splitstackshape)
cSplit(df, "letters", ", ", "long")
# letters value
#1: a 1
#2: b 1
#3: a 2
#4: b 3
#5: a 4
#6: c 4
你也可以按照这一步做
df <- data.frame(letters = c("a, b", "a", "b", "a, c"), value = c(1, 2, 3, 4))
l <- do.call(c,strsplit(as.character(df$letters),split = ","))
Indx <- grepl(",",df$letters)
x <- c()
for(i in 1:length(Indx)){
ifelse(Indx[i],x <- c(x,rep(df$value[i],2)),x<-c(x,df$value[i]))
}
df <- data.frame(l=l,x=x)
library(dplyr)
library(tidyr)
df <- data.frame(letters = c("a, b", "a", "b", "a, c"), value = c(1, 2, 3, 4))
df %>% mutate(letters = strsplit(as.character(letters), ", ")) %>% unnest(letters)
Source: local data frame [6 x 2]
value letters
(dbl) (chr)
1 1 a
2 1 b
3 2 a
4 3 b
5 4 a
6 4 c
假设我有以下 data.frame
df <- data.frame(letters = c("a, b", "a", "b", "a, c"), value = c(1, 2, 3, 4))
df
#> letters value
#> 1 a, b 1
#> 2 a 2
#> 3 b 3
#> 4 a, c 4
我想split/melt到
#> letters value
#> 1 a 1
#> 2 b 1
#> 3 a 2
#> 4 b 3
#> 5 a 4
#> 6 c 4
为了每个 letters
你可以试试strsplit
。根据 ,
拆分字符,然后重复值字段。
char <- strsplit(as.character(df$letters), ',')
data.frame(letter=unlist(char), value=rep(df$value, sapply(char, FUN=length)))
letter value
#1 a 1
#2 b 1
#3 a 2
#4 b 3
#5 a 4
#6 c 4
根据@docendo discimus 在评论中更新以获得更快的结果,您可以尝试,
char <- strsplit(as.character(df$letters), ',', fixed = T)
data.frame(letter=unlist(char), value=rep(df$value, lengths(char)))
我们可以使用cSplit
library(splitstackshape)
cSplit(df, "letters", ", ", "long")
# letters value
#1: a 1
#2: b 1
#3: a 2
#4: b 3
#5: a 4
#6: c 4
你也可以按照这一步做
df <- data.frame(letters = c("a, b", "a", "b", "a, c"), value = c(1, 2, 3, 4))
l <- do.call(c,strsplit(as.character(df$letters),split = ","))
Indx <- grepl(",",df$letters)
x <- c()
for(i in 1:length(Indx)){
ifelse(Indx[i],x <- c(x,rep(df$value[i],2)),x<-c(x,df$value[i]))
}
df <- data.frame(l=l,x=x)
library(dplyr)
library(tidyr)
df <- data.frame(letters = c("a, b", "a", "b", "a, c"), value = c(1, 2, 3, 4))
df %>% mutate(letters = strsplit(as.character(letters), ", ")) %>% unnest(letters)
Source: local data frame [6 x 2]
value letters
(dbl) (chr)
1 1 a
2 1 b
3 2 a
4 3 b
5 4 a
6 4 c