R:拆分格式如下的字符串 "xxx; yyy; zzz;"
R: splitting string that has format like this "xxx; yyy; zzz;"
我得到的原始数据是这样的,都在一列中
John;Peter;Eric;
Susan;Mary;Kate;
但我想将它们拆分为三个单独的列
John Peter Eric
Susan Mary Kate
谁能告诉我如何在 R 中做到这一点?提前致谢!
你可以试试cSplit
library(splitstackshape)
cSplit(df1, 'col1', ';')
# col1_1 col1_2 col1_3
#1: John Peter Eric
#2: Susan Mary Kate
或
library(tidyr)
separate(df1, col1, into=paste0('col', 1:4), ';')[-4]
# col1 col2 col3
#1 John Peter Eric
#2 Susan Mary Kate
或
extract(df1, col1, into=paste0('col', 1:3), '([^;]+);([^;]+);([^;]+)')
# col1 col2 col3
#1 John Peter Eric
#2 Susan Mary Kate
或使用base R
as.data.frame(do.call(rbind,strsplit(df1$col1, ';')))
数据
df1 <- structure(list(col1 = c("John;Peter;Eric;", "Susan;Mary;Kate;"
)), .Names = "col1", class = "data.frame", row.names = c(NA, -2L))
将 fread()
添加到拍品中
x <- "John;Peter;Eric;
Susan;Mary;Kate;"
data.table::fread(x, header = FALSE, drop = 4)
# V1 V2 V3
# 1: John Peter Eric
# 2: Susan Mary Kate
而对于直接返回一个数据帧,
data.table::fread(x, header = FALSE, drop = 4, data.table = FALSE)
# V1 V2 V3
# 1 John Peter Eric
# 2 Susan Mary Kate
对于可以转换为数据框的快速矩阵,
library(stringi)
stri_split_fixed(stri_split_lines1(x), ";", omit = TRUE, simplify = TRUE)
# [,1] [,2] [,3]
# [1,] "John" "Peter" "Eric"
# [2,] "Susan" "Mary" "Kate"
base R:
matrix(regmatches(x,gregexpr("([aA-zZ]+)",x,perl=TRUE))[[1]],ncol=3,byrow=T)
[,1] [,2] [,3]
[1,] "John" "Peter" "Eric"
[2,] "Susan" "Mary" "Kate"
我得到的原始数据是这样的,都在一列中
John;Peter;Eric;
Susan;Mary;Kate;
但我想将它们拆分为三个单独的列
John Peter Eric
Susan Mary Kate
谁能告诉我如何在 R 中做到这一点?提前致谢!
你可以试试cSplit
library(splitstackshape)
cSplit(df1, 'col1', ';')
# col1_1 col1_2 col1_3
#1: John Peter Eric
#2: Susan Mary Kate
或
library(tidyr)
separate(df1, col1, into=paste0('col', 1:4), ';')[-4]
# col1 col2 col3
#1 John Peter Eric
#2 Susan Mary Kate
或
extract(df1, col1, into=paste0('col', 1:3), '([^;]+);([^;]+);([^;]+)')
# col1 col2 col3
#1 John Peter Eric
#2 Susan Mary Kate
或使用base R
as.data.frame(do.call(rbind,strsplit(df1$col1, ';')))
数据
df1 <- structure(list(col1 = c("John;Peter;Eric;", "Susan;Mary;Kate;"
)), .Names = "col1", class = "data.frame", row.names = c(NA, -2L))
将 fread()
添加到拍品中
x <- "John;Peter;Eric;
Susan;Mary;Kate;"
data.table::fread(x, header = FALSE, drop = 4)
# V1 V2 V3
# 1: John Peter Eric
# 2: Susan Mary Kate
而对于直接返回一个数据帧,
data.table::fread(x, header = FALSE, drop = 4, data.table = FALSE)
# V1 V2 V3
# 1 John Peter Eric
# 2 Susan Mary Kate
对于可以转换为数据框的快速矩阵,
library(stringi)
stri_split_fixed(stri_split_lines1(x), ";", omit = TRUE, simplify = TRUE)
# [,1] [,2] [,3]
# [1,] "John" "Peter" "Eric"
# [2,] "Susan" "Mary" "Kate"
base R:
matrix(regmatches(x,gregexpr("([aA-zZ]+)",x,perl=TRUE))[[1]],ncol=3,byrow=T)
[,1] [,2] [,3]
[1,] "John" "Peter" "Eric"
[2,] "Susan" "Mary" "Kate"