如何转换 data.table 的多个列和值?
How to cast multiple columns and values of a data.table?
我的数据结构如下:
DT <- data.table(Id = c(1, 1, 1, 1, 10, 100, 100, 101, 101, 101),
Date = as.Date(c("1997-01-01", "1997-01-02", "1997-01-03", "1997-01-04",
"1997-01-02", "1997-01-02", "1997-01-04", "1997-01-03",
"1997-01-04", "1997-01-04")),
group = c(1,1,1,1,1,2,2,2,2,2),
Price.1 = c(29, 25, 14, 26, 30, 16, 13, 62, 12, 6),
Price.2 = c(4, 5, 6, 6, 8, 2, 3, 5, 7, 8))
>DT
Id Date group Price.1 Price.2
1: 1 1997-01-01 1 29 4
2: 1 1997-01-02 1 25 5
3: 1 1997-01-03 1 14 6
4: 1 1997-01-04 1 26 6
5: 10 1997-01-02 1 30 8
6: 100 1997-01-02 2 16 2
7: 100 1997-01-04 2 13 3
8: 101 1997-01-03 2 62 5
9: 101 1997-01-04 2 12 7
10: 101 1997-01-04 2 6 8
我正在尝试投射它(使用 dcast.data.table):
dcast.data.table(DT, Id ~ Date, fun = sum, value.var = "Price.1")
dcast.data.table(DT, Id ~ group, fun = sum, value.var = "Price.1")
dcast.data.table(DT, Id ~ Date, fun = sum, value.var = "Price.2")
dcast.data.table(DT, Id ~ group, fun = sum, value.var = "Price.2")
但我试图获得以下内容而不是 4 个单独的输出:
Id 1997-01-01 1997-01-02 1997-01-03 1997-01-04 1 2 Price
1: 1 29 25 14 26 94 0 Price.1
2: 10 0 30 0 0 30 0 Price.1
3: 100 0 16 0 13 0 29 Price.1
4: 101 0 0 62 18 0 80 Price.1
5: 1 4 5 6 6 21 0 Price.2
6: 10 0 8 0 0 8 0 Price.2
7: 100 0 2 0 3 0 5 Price.2
8: 101 0 0 5 15 0 20 Price.2
我的解决方法是使用 rbind、cbind 和 merge。
cbind(rbind(merge(dcast.data.table(DT, Id ~ Date, fun = sum, value.var = "Price.1"),
dcast.data.table(DT, Id ~ group, fun = sum, value.var = "Price.1"), by = "Id", all.x = T),
merge(dcast.data.table(DT, Id ~ Date, fun = sum, value.var = "Price.2"),
dcast.data.table(DT, Id ~ group, fun = sum, value.var = "Price.2"), by = "Id", all.x = T)),
Price = c("Price.1","Price.1","Price.1","Price.1","Price.2","Price.2","Price.2","Price.2"))
是否有更简洁的现有方法来执行此操作?
这真的是一次试错:我希望它能奏效。
library(data.table) #version 1.9.4
library(reshape2)
kk <- melt(DT,id.vars=c("Id","Date","group"),
measure.vars = c("Price.1","Price.2"),
value.name = "Price")
dcast(kk, Id + variable + group ~ Date, value.var = "Price", fun = sum,margins="Date")
# ^ use of margins borrowed from @Frank.
# Id variable group 1997-01-01 1997-01-02 1997-01-03 1997-01-04 (all)
# 1 1 Price.1 1 29 25 14 26 94
# 2 1 Price.2 1 4 5 6 6 21
# 3 10 Price.1 1 0 30 0 0 30
# 4 10 Price.2 1 0 8 0 0 8
# 5 100 Price.1 2 0 16 0 13 29
# 6 100 Price.2 2 0 2 0 3 5
# 7 101 Price.1 2 0 0 62 18 80
# 8 101 Price.2 2 0 0 5 15 20
我假设每个 Id
映射到一个唯一的 group
并删除该变量,但除此之外这与 @user227710 的答案基本相同。
Idg <- unique(DT[,.(Id,group)])
DT[,group:=NULL]
res <- dcast(
melt(DT, id.vars = c("Id","Date")),
variable+Id ~ Date,
value.var = "value",
fill = 0,
margins = "Date",
fun.aggregate = sum
)
# and if you want the group back...
setDT(res) # needed before data.table 1.9.5, where using dcast.data.table is another option
setkey(res,Id)
res[Idg][order(variable,Id)]
这给出了
variable Id 1997-01-01 1997-01-02 1997-01-03 1997-01-04 (all) group
1: Price.1 1 29 25 14 26 94 1
2: Price.2 1 4 5 6 6 21 1
3: Price.1 10 0 30 0 0 30 1
4: Price.2 10 0 8 0 0 8 1
5: Price.1 100 0 16 0 13 29 2
6: Price.2 100 0 2 0 3 5 2
7: Price.1 101 0 0 62 18 80 2
8: Price.2 101 0 0 5 15 20 2
为了比较,dplyr
中的一个解决方案(因为我还没有学会如何让我的大脑正确地融化东西。)
# aggregate the data completely
## (rows 9 & 10 need to be collapsed, and spread works on a single key)
DTT <-
DT %>%
group_by(Id, Date, group) %>%
summarise(Price.1 = sum(Price.1), Price.2 = sum(Price.2)) %>%
left_join(DT) %>%
unite(id_grp, Id, group, sep = "_") %>%
group_by(id_grp) %>%
mutate(s1 = sum(Price.1), s2 = sum(Price.2))
# pivot out the index into cartesian (long to wide) for 1st Price set
DW1 <-
DTT %>%
select(-Price.2) %>%
spread(Date, Price.1) %>%
mutate(Price = "Price.1")
# pivot out the index into cartesian (long to wide) for 2nd Price set
DW2 <-
DTT %>%
select(-Price.1) %>%
spread(Date, Price.2) %>%
mutate(Price = "Price.2")
# Bind records back together and make purdy
DWFin <-
bind_rows(DW1,DW2) %>%
separate(id_grp, c("Id", "group")) %>%
mutate(g = group, p = str_sub(Price, -1),
n1 = ifelse(group == 1 & p == 1, s1, ifelse(group == 1 & p == 2, s2, 0)),
n2 = ifelse(group == 2 & p == 2, s2, ifelse(group == 2 & p == 1, s1, 0))) %>%
select(Id, starts_with("19"), "1" = n1, "2" = n2, Price)
DWFin
Source: local data table [8 x 8]
# tbl_dt [8 × 8]
Id `1997-01-01` `1997-01-02` `1997-01-03` `1997-01-04` `1` `2` Price
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
1 1 29 25 14 26 94 0 Price.1
2 10 NA 30 NA NA 30 0 Price.1
3 100 NA 16 NA 13 0 29 Price.1
4 101 NA NA 62 18 0 80 Price.1
5 1 4 5 6 6 21 0 Price.2
6 10 NA 8 NA NA 8 0 Price.2
7 100 NA 2 NA 3 0 5 Price.2
8 101 NA NA 5 15 0 20 Price.2
我的数据结构如下:
DT <- data.table(Id = c(1, 1, 1, 1, 10, 100, 100, 101, 101, 101),
Date = as.Date(c("1997-01-01", "1997-01-02", "1997-01-03", "1997-01-04",
"1997-01-02", "1997-01-02", "1997-01-04", "1997-01-03",
"1997-01-04", "1997-01-04")),
group = c(1,1,1,1,1,2,2,2,2,2),
Price.1 = c(29, 25, 14, 26, 30, 16, 13, 62, 12, 6),
Price.2 = c(4, 5, 6, 6, 8, 2, 3, 5, 7, 8))
>DT
Id Date group Price.1 Price.2
1: 1 1997-01-01 1 29 4
2: 1 1997-01-02 1 25 5
3: 1 1997-01-03 1 14 6
4: 1 1997-01-04 1 26 6
5: 10 1997-01-02 1 30 8
6: 100 1997-01-02 2 16 2
7: 100 1997-01-04 2 13 3
8: 101 1997-01-03 2 62 5
9: 101 1997-01-04 2 12 7
10: 101 1997-01-04 2 6 8
我正在尝试投射它(使用 dcast.data.table):
dcast.data.table(DT, Id ~ Date, fun = sum, value.var = "Price.1")
dcast.data.table(DT, Id ~ group, fun = sum, value.var = "Price.1")
dcast.data.table(DT, Id ~ Date, fun = sum, value.var = "Price.2")
dcast.data.table(DT, Id ~ group, fun = sum, value.var = "Price.2")
但我试图获得以下内容而不是 4 个单独的输出:
Id 1997-01-01 1997-01-02 1997-01-03 1997-01-04 1 2 Price
1: 1 29 25 14 26 94 0 Price.1
2: 10 0 30 0 0 30 0 Price.1
3: 100 0 16 0 13 0 29 Price.1
4: 101 0 0 62 18 0 80 Price.1
5: 1 4 5 6 6 21 0 Price.2
6: 10 0 8 0 0 8 0 Price.2
7: 100 0 2 0 3 0 5 Price.2
8: 101 0 0 5 15 0 20 Price.2
我的解决方法是使用 rbind、cbind 和 merge。
cbind(rbind(merge(dcast.data.table(DT, Id ~ Date, fun = sum, value.var = "Price.1"),
dcast.data.table(DT, Id ~ group, fun = sum, value.var = "Price.1"), by = "Id", all.x = T),
merge(dcast.data.table(DT, Id ~ Date, fun = sum, value.var = "Price.2"),
dcast.data.table(DT, Id ~ group, fun = sum, value.var = "Price.2"), by = "Id", all.x = T)),
Price = c("Price.1","Price.1","Price.1","Price.1","Price.2","Price.2","Price.2","Price.2"))
是否有更简洁的现有方法来执行此操作?
这真的是一次试错:我希望它能奏效。
library(data.table) #version 1.9.4
library(reshape2)
kk <- melt(DT,id.vars=c("Id","Date","group"),
measure.vars = c("Price.1","Price.2"),
value.name = "Price")
dcast(kk, Id + variable + group ~ Date, value.var = "Price", fun = sum,margins="Date")
# ^ use of margins borrowed from @Frank.
# Id variable group 1997-01-01 1997-01-02 1997-01-03 1997-01-04 (all)
# 1 1 Price.1 1 29 25 14 26 94
# 2 1 Price.2 1 4 5 6 6 21
# 3 10 Price.1 1 0 30 0 0 30
# 4 10 Price.2 1 0 8 0 0 8
# 5 100 Price.1 2 0 16 0 13 29
# 6 100 Price.2 2 0 2 0 3 5
# 7 101 Price.1 2 0 0 62 18 80
# 8 101 Price.2 2 0 0 5 15 20
我假设每个 Id
映射到一个唯一的 group
并删除该变量,但除此之外这与 @user227710 的答案基本相同。
Idg <- unique(DT[,.(Id,group)])
DT[,group:=NULL]
res <- dcast(
melt(DT, id.vars = c("Id","Date")),
variable+Id ~ Date,
value.var = "value",
fill = 0,
margins = "Date",
fun.aggregate = sum
)
# and if you want the group back...
setDT(res) # needed before data.table 1.9.5, where using dcast.data.table is another option
setkey(res,Id)
res[Idg][order(variable,Id)]
这给出了
variable Id 1997-01-01 1997-01-02 1997-01-03 1997-01-04 (all) group
1: Price.1 1 29 25 14 26 94 1
2: Price.2 1 4 5 6 6 21 1
3: Price.1 10 0 30 0 0 30 1
4: Price.2 10 0 8 0 0 8 1
5: Price.1 100 0 16 0 13 29 2
6: Price.2 100 0 2 0 3 5 2
7: Price.1 101 0 0 62 18 80 2
8: Price.2 101 0 0 5 15 20 2
为了比较,dplyr
中的一个解决方案(因为我还没有学会如何让我的大脑正确地融化东西。)
# aggregate the data completely
## (rows 9 & 10 need to be collapsed, and spread works on a single key)
DTT <-
DT %>%
group_by(Id, Date, group) %>%
summarise(Price.1 = sum(Price.1), Price.2 = sum(Price.2)) %>%
left_join(DT) %>%
unite(id_grp, Id, group, sep = "_") %>%
group_by(id_grp) %>%
mutate(s1 = sum(Price.1), s2 = sum(Price.2))
# pivot out the index into cartesian (long to wide) for 1st Price set
DW1 <-
DTT %>%
select(-Price.2) %>%
spread(Date, Price.1) %>%
mutate(Price = "Price.1")
# pivot out the index into cartesian (long to wide) for 2nd Price set
DW2 <-
DTT %>%
select(-Price.1) %>%
spread(Date, Price.2) %>%
mutate(Price = "Price.2")
# Bind records back together and make purdy
DWFin <-
bind_rows(DW1,DW2) %>%
separate(id_grp, c("Id", "group")) %>%
mutate(g = group, p = str_sub(Price, -1),
n1 = ifelse(group == 1 & p == 1, s1, ifelse(group == 1 & p == 2, s2, 0)),
n2 = ifelse(group == 2 & p == 2, s2, ifelse(group == 2 & p == 1, s1, 0))) %>%
select(Id, starts_with("19"), "1" = n1, "2" = n2, Price)
DWFin
Source: local data table [8 x 8]
# tbl_dt [8 × 8]
Id `1997-01-01` `1997-01-02` `1997-01-03` `1997-01-04` `1` `2` Price
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
1 1 29 25 14 26 94 0 Price.1
2 10 NA 30 NA NA 30 0 Price.1
3 100 NA 16 NA 13 0 29 Price.1
4 101 NA NA 62 18 0 80 Price.1
5 1 4 5 6 6 21 0 Price.2
6 10 NA 8 NA NA 8 0 Price.2
7 100 NA 2 NA 3 0 5 Price.2
8 101 NA NA 5 15 0 20 Price.2