如何在 data.table 中链接顺序、分组、变异、枢轴?
How to chain order, group, mutate, pivot in data.table?
我是 data.table 的新手,正在尝试复制一些 dplyr 代码,但遇到 的问题data.table 代码中的枢轴/重塑数据。
库
library(data.table)
library(lubridate)
library(tidyverse)
df
test_df <- data.frame(id = c(1234, 1234, 5678, 5678),
date = c("2021-10-10","2021-10-10", "2021-8-10", "2021-8-15"),
Amount = c(54767, 96896, 34534, 79870)) %>%
mutate(date = ymd(date))
dplyr代码:
test_df %>%
group_by(id) %>%
arrange(date) %>%
mutate(Amt_first = first(Amount),
Amt_last = last(Amount)) %>%
ungroup() %>%
pivot_longer(cols = c(Amt_first:Amt_last), names_to = "Amt_catg",
values_to = "Amt_val")
结果:
# A tibble: 8 x 5
id date Amount Amt_catg Amt_val
<dbl> <date> <dbl> <chr> <dbl>
1 5678 2021-08-10 34534 Amt_first 34534
2 5678 2021-08-10 34534 Amt_last 79870
3 5678 2021-08-15 79870 Amt_first 34534
4 5678 2021-08-15 79870 Amt_last 79870
5 1234 2021-10-10 54767 Amt_first 54767
6 1234 2021-10-10 54767 Amt_last 96896
7 1234 2021-10-10 96896 Amt_first 54767
8 1234 2021-10-10 96896 Amt_last 96896
data.table 尝试:
setDT(test_df)[order(date),
`:=`(Amt_first = data.table::first(Amount),
Amt_last = data.table::last(Amount)),
by = id] %>%
# https://cran.r-project.org/web/packages/data.table/vignettes/datatable-reshape.html
data.table::melt(measure.vars = c("Amt_first","Amt_Last"),
variable.name = "Amt_catg", value.name = "Amt_val")
我在这里遇到错误。
你收到一个错误,因为你拼错了 Amt_last
:
setDT(test_df)[order(date),
`:=`(Amt_first = data.table::first(Amount),
Amt_last = data.table::last(Amount)),
by = id] %>%
data.table::melt(id.vars=c('id','date'),measure.vars = c("Amt_first","Amt_last"),
variable.name = "Amt_catg", value.name = "Amt_val")
id date Amount Amt_catg Amt_val
1: 1234 2021-10-10 54767 Amt_first 54767
2: 1234 2021-10-10 96896 Amt_first 54767
3: 5678 2021-08-10 34534 Amt_first 34534
4: 5678 2021-08-15 79870 Amt_first 34534
5: 1234 2021-10-10 54767 Amt_last 96896
6: 1234 2021-10-10 96896 Amt_last 96896
7: 5678 2021-08-10 34534 Amt_last 79870
8: 5678 2021-08-15 79870 Amt_last 79870
可以用full join实现输出:
setDT(test_df)
merge(
test_df,
test_df[order(date),
.(Amt_catg = c('Amt_first', 'Amt_last'),
Amt_val = Amount[c(1L, .N)]),
by = id]
)
# id date Amount Amt_catg Amt_val
# 1: 1234 2021-10-10 54767 Amt_first 54767
# 2: 1234 2021-10-10 54767 Amt_last 96896
# 3: 1234 2021-10-10 96896 Amt_first 54767
# 4: 1234 2021-10-10 96896 Amt_last 96896
# 5: 5678 2021-08-10 34534 Amt_first 34534
# 6: 5678 2021-08-10 34534 Amt_last 79870
# 7: 5678 2021-08-15 79870 Amt_first 34534
# 8: 5678 2021-08-15 79870 Amt_last 79870
我是 data.table 的新手,正在尝试复制一些 dplyr 代码,但遇到 的问题data.table 代码中的枢轴/重塑数据。
库
library(data.table)
library(lubridate)
library(tidyverse)
df
test_df <- data.frame(id = c(1234, 1234, 5678, 5678),
date = c("2021-10-10","2021-10-10", "2021-8-10", "2021-8-15"),
Amount = c(54767, 96896, 34534, 79870)) %>%
mutate(date = ymd(date))
dplyr代码:
test_df %>%
group_by(id) %>%
arrange(date) %>%
mutate(Amt_first = first(Amount),
Amt_last = last(Amount)) %>%
ungroup() %>%
pivot_longer(cols = c(Amt_first:Amt_last), names_to = "Amt_catg",
values_to = "Amt_val")
结果:
# A tibble: 8 x 5
id date Amount Amt_catg Amt_val
<dbl> <date> <dbl> <chr> <dbl>
1 5678 2021-08-10 34534 Amt_first 34534
2 5678 2021-08-10 34534 Amt_last 79870
3 5678 2021-08-15 79870 Amt_first 34534
4 5678 2021-08-15 79870 Amt_last 79870
5 1234 2021-10-10 54767 Amt_first 54767
6 1234 2021-10-10 54767 Amt_last 96896
7 1234 2021-10-10 96896 Amt_first 54767
8 1234 2021-10-10 96896 Amt_last 96896
data.table 尝试:
setDT(test_df)[order(date),
`:=`(Amt_first = data.table::first(Amount),
Amt_last = data.table::last(Amount)),
by = id] %>%
# https://cran.r-project.org/web/packages/data.table/vignettes/datatable-reshape.html
data.table::melt(measure.vars = c("Amt_first","Amt_Last"),
variable.name = "Amt_catg", value.name = "Amt_val")
我在这里遇到错误。
你收到一个错误,因为你拼错了 Amt_last
:
setDT(test_df)[order(date),
`:=`(Amt_first = data.table::first(Amount),
Amt_last = data.table::last(Amount)),
by = id] %>%
data.table::melt(id.vars=c('id','date'),measure.vars = c("Amt_first","Amt_last"),
variable.name = "Amt_catg", value.name = "Amt_val")
id date Amount Amt_catg Amt_val
1: 1234 2021-10-10 54767 Amt_first 54767
2: 1234 2021-10-10 96896 Amt_first 54767
3: 5678 2021-08-10 34534 Amt_first 34534
4: 5678 2021-08-15 79870 Amt_first 34534
5: 1234 2021-10-10 54767 Amt_last 96896
6: 1234 2021-10-10 96896 Amt_last 96896
7: 5678 2021-08-10 34534 Amt_last 79870
8: 5678 2021-08-15 79870 Amt_last 79870
可以用full join实现输出:
setDT(test_df)
merge(
test_df,
test_df[order(date),
.(Amt_catg = c('Amt_first', 'Amt_last'),
Amt_val = Amount[c(1L, .N)]),
by = id]
)
# id date Amount Amt_catg Amt_val
# 1: 1234 2021-10-10 54767 Amt_first 54767
# 2: 1234 2021-10-10 54767 Amt_last 96896
# 3: 1234 2021-10-10 96896 Amt_first 54767
# 4: 1234 2021-10-10 96896 Amt_last 96896
# 5: 5678 2021-08-10 34534 Amt_first 34534
# 6: 5678 2021-08-10 34534 Amt_last 79870
# 7: 5678 2021-08-15 79870 Amt_first 34534
# 8: 5678 2021-08-15 79870 Amt_last 79870