向量元素到数据框的不同列
Element of vector to different columns of data frame
我有一个 df:
group number id
1 A abcd 1
2 A abcd 2
3 A abcd 3
4 A efgh 4
5 A efgh 5
6 B abcd 1
7 B abcd 2
8 B abcd 3
9 B abcd 9
10 B ijkl 10
我想做成这样:
group number data1 data2 data3 data4 Length
1 A abcd 1 2 3 3
2 A efgh 4 5 2
3 B abcd 1 2 3 9 4
4 B ijkl 10 1
对不起我只能这样去df2:
group number data Length
1 A abcd c(1,2,3) 3
2 A efgh c(4,5) 2
3 B abcd c(1,2,3,9) 4
4 B ijkl 10 1
我的代码在这里:
library(tidyverse)
df <- data.frame (group = c(rep('A',5),rep("B",5)),
number = c(rep('abcd',3),rep('efgh',2),rep('abcd',4),rep('ijkl',1)),
id = c(1,2,3,4,5,1,2,3,9,10))
df2 <- df %>%
group_by(group,number) %>%
nest() %>%
mutate(data=map(data,~unlist(.x, recursive = TRUE, use.names = FALSE)),
Length= map(data, ~length(.x)))
请随意从 df 或 df2 开始,with(out) 任何包都可以。
您可以将名称 count
更改为 length
(另外,我更喜欢将 'space' 更改为 NA
,如果要更改它,df2[is.na(df2)]=''
)
选项 1
df <- data.frame (group = c(rep('A',5),rep("B",5)),
number = c(rep('abcd',3),rep('efgh',2),rep('abcd',4),rep('ijkl',1)),
id = c(1,2,3,4,5,1,2,3,9,10))
df2 <- df %>%
group_by(group,number) %>%
mutate(data=toString(id),count=n())
library(splitstackshape)
cSplit(df2, 3, drop = TRUE,sep=',')
group number count data_1 data_2 data_3 data_4
1: A abcd 3 1 2 3 NA
2: A efgh 2 4 5 NA NA
3: B abcd 4 1 2 3 9
4: B ijkl 1 10 NA NA NA
选项 2
library(dplyr)
library(tidyr)
df2 <- df %>%
group_by(group,number) %>%
summarise(data=toString(id),count=n())%>%separate_rows(data)%>% mutate(Col = paste0("data", 1:n()))%>%spread(Col, data)
df2
# A tibble: 4 x 8
# Groups: group [2]
group number count data1 data2 data3 data4 data5
* <fctr> <fctr> <int> <chr> <chr> <chr> <chr> <chr>
1 A abcd 3 1 2 3 <NA> <NA>
2 A efgh 2 <NA> <NA> <NA> 4 5
3 B abcd 4 1 2 3 9 <NA>
4 B ijkl 1 <NA> <NA> <NA> <NA> 10
在基地 R
temp = split(df, paste(df$group, df$number))
columns = max(sapply(temp, NROW))
do.call(rbind, lapply(temp, function(a)
cbind(group = a$group[1],
number = a$number[1],
setNames(data.frame(t(a$id[1:columns])), paste0("data", 1:columns)),
length = length(a$id))
))
# group number data1 data2 data3 data4 length
#A abcd A abcd 1 2 3 NA 3
#A efgh A efgh 4 5 NA NA 2
#B abcd B abcd 1 2 3 9 4
#B ijkl B ijkl 10 NA NA NA 1
我必须盲目地给你,但那应该有效或接近:
library(tidyverse)
df %>%
group_by(group,number) %>%
mutate(key = paste0("data",row_number()),length = n()) %>%
ungroup %>%
spread(key,id,"")
要使其从您的嵌套数据中工作,我认为您必须将这些向量更改为 1 行 data.frames 相同的列号和名称,然后使用 unnest,这要复杂得多! :)
这是一个使用data.table
的选项
library(data.table)
dcast(setDT(df), group + number~ paste0("data", rowid(group, number)),
value.var = 'id', fill = 0)[,
length := Reduce(`+`, lapply(.SD, `>`, 0)), .SDcols = data1:data4][]
# group number data1 data2 data3 data4 length
#1: A abcd 1 2 3 0 3
#2: A efgh 4 5 0 0 2
#3: B abcd 1 2 3 9 4
#4: B ijkl 10 0 0 0 1
这是 的变体,它计算 Length
before 从长格式重塑为宽格式,并在中使用 prefix
参数调用 rowid()
:
library(data.table)
data.table(df)[, Length := .N, by = .(group, number)][
, dcast(.SD, group + number + Length ~ rowid(group, number, prefix = "data"),
value.var = "id")]
group number Length data1 data2 data3 data4
1: A abcd 3 1 2 3 NA
2: A efgh 2 4 5 NA NA
3: B abcd 4 1 2 3 9
4: B ijkl 1 10 NA NA NA
对于漂亮的打印,NA
值可以转换为白色 space:
data.table(df)[, Length := .N, by = .(group, number)][
, dcast(.SD, group + number + Length ~ rowid(group, number, prefix = "data"),
as.character, value.var = "id", fill = "")]
group number Length data1 data2 data3 data4
1: A abcd 3 1 2 3
2: A efgh 2 4 5
3: B abcd 4 1 2 3 9
4: B ijkl 1 10
我有一个 df:
group number id
1 A abcd 1
2 A abcd 2
3 A abcd 3
4 A efgh 4
5 A efgh 5
6 B abcd 1
7 B abcd 2
8 B abcd 3
9 B abcd 9
10 B ijkl 10
我想做成这样:
group number data1 data2 data3 data4 Length
1 A abcd 1 2 3 3
2 A efgh 4 5 2
3 B abcd 1 2 3 9 4
4 B ijkl 10 1
对不起我只能这样去df2:
group number data Length
1 A abcd c(1,2,3) 3
2 A efgh c(4,5) 2
3 B abcd c(1,2,3,9) 4
4 B ijkl 10 1
我的代码在这里:
library(tidyverse)
df <- data.frame (group = c(rep('A',5),rep("B",5)),
number = c(rep('abcd',3),rep('efgh',2),rep('abcd',4),rep('ijkl',1)),
id = c(1,2,3,4,5,1,2,3,9,10))
df2 <- df %>%
group_by(group,number) %>%
nest() %>%
mutate(data=map(data,~unlist(.x, recursive = TRUE, use.names = FALSE)),
Length= map(data, ~length(.x)))
请随意从 df 或 df2 开始,with(out) 任何包都可以。
您可以将名称 count
更改为 length
(另外,我更喜欢将 'space' 更改为 NA
,如果要更改它,df2[is.na(df2)]=''
)
选项 1
df <- data.frame (group = c(rep('A',5),rep("B",5)),
number = c(rep('abcd',3),rep('efgh',2),rep('abcd',4),rep('ijkl',1)),
id = c(1,2,3,4,5,1,2,3,9,10))
df2 <- df %>%
group_by(group,number) %>%
mutate(data=toString(id),count=n())
library(splitstackshape)
cSplit(df2, 3, drop = TRUE,sep=',')
group number count data_1 data_2 data_3 data_4
1: A abcd 3 1 2 3 NA
2: A efgh 2 4 5 NA NA
3: B abcd 4 1 2 3 9
4: B ijkl 1 10 NA NA NA
选项 2
library(dplyr)
library(tidyr)
df2 <- df %>%
group_by(group,number) %>%
summarise(data=toString(id),count=n())%>%separate_rows(data)%>% mutate(Col = paste0("data", 1:n()))%>%spread(Col, data)
df2
# A tibble: 4 x 8
# Groups: group [2]
group number count data1 data2 data3 data4 data5
* <fctr> <fctr> <int> <chr> <chr> <chr> <chr> <chr>
1 A abcd 3 1 2 3 <NA> <NA>
2 A efgh 2 <NA> <NA> <NA> 4 5
3 B abcd 4 1 2 3 9 <NA>
4 B ijkl 1 <NA> <NA> <NA> <NA> 10
在基地 R
temp = split(df, paste(df$group, df$number))
columns = max(sapply(temp, NROW))
do.call(rbind, lapply(temp, function(a)
cbind(group = a$group[1],
number = a$number[1],
setNames(data.frame(t(a$id[1:columns])), paste0("data", 1:columns)),
length = length(a$id))
))
# group number data1 data2 data3 data4 length
#A abcd A abcd 1 2 3 NA 3
#A efgh A efgh 4 5 NA NA 2
#B abcd B abcd 1 2 3 9 4
#B ijkl B ijkl 10 NA NA NA 1
我必须盲目地给你,但那应该有效或接近:
library(tidyverse)
df %>%
group_by(group,number) %>%
mutate(key = paste0("data",row_number()),length = n()) %>%
ungroup %>%
spread(key,id,"")
要使其从您的嵌套数据中工作,我认为您必须将这些向量更改为 1 行 data.frames 相同的列号和名称,然后使用 unnest,这要复杂得多! :)
这是一个使用data.table
library(data.table)
dcast(setDT(df), group + number~ paste0("data", rowid(group, number)),
value.var = 'id', fill = 0)[,
length := Reduce(`+`, lapply(.SD, `>`, 0)), .SDcols = data1:data4][]
# group number data1 data2 data3 data4 length
#1: A abcd 1 2 3 0 3
#2: A efgh 4 5 0 0 2
#3: B abcd 1 2 3 9 4
#4: B ijkl 10 0 0 0 1
这是 Length
before 从长格式重塑为宽格式,并在中使用 prefix
参数调用 rowid()
:
library(data.table)
data.table(df)[, Length := .N, by = .(group, number)][
, dcast(.SD, group + number + Length ~ rowid(group, number, prefix = "data"),
value.var = "id")]
group number Length data1 data2 data3 data4 1: A abcd 3 1 2 3 NA 2: A efgh 2 4 5 NA NA 3: B abcd 4 1 2 3 9 4: B ijkl 1 10 NA NA NA
对于漂亮的打印,NA
值可以转换为白色 space:
data.table(df)[, Length := .N, by = .(group, number)][
, dcast(.SD, group + number + Length ~ rowid(group, number, prefix = "data"),
as.character, value.var = "id", fill = "")]
group number Length data1 data2 data3 data4 1: A abcd 3 1 2 3 2: A efgh 2 4 5 3: B abcd 4 1 2 3 9 4: B ijkl 1 10