基于两组日期传播值
Spreading values based on two sets of dates
我想通过输入 1 进行映射并向输入 2 添加列,以便获得如下所示的预期结果。我可以看到如何根据最后一位代码实现我想要的单行输入 1,但无法遍历输入 1 的所有行。有没有办法用 purrr
?
library(tidyverse)
library(lubridate)
# Input 1
fc_periods <- data_frame(
period = c("m1", "m2", "m3"),
start = c(ymd("2018-01-01"), ymd("2018-02-01"), ymd("2018-03-01")),
end = c(ymd("2018-01-31"), ymd("2018-02-28"), ymd("2018-03-31")),
interval = interval(start, end)
)
# Input 2
unspread_data <- data_frame(
act_start = c(ymd("2017-11-01"), ymd("2018-01-10"), ymd("2018-02-14"), ymd("2017-12-01")),
act_end = c(ymd("2018-04-30"), ymd("2018-01-25"), ymd("2018-03-16"), ymd("2017-12-31")),
value = c(600, 100, 200, 999)
)
# Desired outcome
spread_data <- data_frame(
act_start = c(ymd("2017-11-01"), ymd("2018-01-10"), ymd("2018-02-14"), ymd("2017-12-01")),
act_end = c(ymd("2018-04-30"), ymd("2018-01-25"), ymd("2018-03-16"), ymd("2017-12-31")),
value = c(600, 100, 200, 999),
M1 = c(100, 100, 0, 0),
M2 = c(100, 0, 100, 0),
M3 = c(100, 0, 100, 0)
)
# If dealing with a single period, e.g. M1, then could do this:
start <- ymd("2018-01-01")
end <- ymd("2018-01-31")
interval <- interval(start, end)
spread_data <- unspread_data %>%
mutate(
overlap1 = if_else(act_start %within% interval & act_end %within% interval, as.numeric(act_end - act_start), 0),
overlap2 = if_else(act_start %within% interval, as.numeric(end - act_start), 0),
overlap3 = if_else(act_end %within% interval, as.numeric(act_end - start), 0),
overlap4 = if_else(act_start < start & act_end > end, as.numeric(end - start), 0),
days = as.numeric(act_end - act_start),
overlap = if_else(overlap1 > 0, overlap1,
if_else(overlap2 > 0, overlap2,
if_else(overlap3 > 0, overlap3,
if_else(overlap4 > 0, overlap4, 0)))),
fraction = if_else(days > 0, overlap / days, 0),
M1 = fraction * value
)
试过这种方法:
spread_data <- fc_periods %>% pmap_df(function(period, start, end, interval){
unspread_data %>%
mutate(
overlap1 = if_else(act_start %within% interval & act_end %within% interval, as.numeric(act_end - act_start), 0),
overlap2 = if_else(act_start %within% interval, as.numeric(end - act_start), 0),
overlap3 = if_else(act_end %within% interval, as.numeric(act_end - start), 0),
overlap4 = if_else(act_start < start & act_end > end, as.numeric(end - start), 0),
days = as.numeric(act_end - act_start),
overlap = if_else(overlap1 > 0, overlap1,
if_else(overlap2 > 0, overlap2,
if_else(overlap3 > 0, overlap3,
if_else(overlap4 > 0, overlap4, 0)))),
fraction = if_else(days > 0, overlap / days, 0),
period = fraction * value
)
})
出现此错误:
Error in mutate_impl(.data, dots) :
Evaluation error: no slot of name "group" for this object of class "derivedDefaultMethod".
这行得通。问题似乎是试图在 pmap
内的函数中使用 %within%
或 interval
(不确定是哪个问题)。仅使用开始日期和结束日期即可解决问题。
library(tidyverse)
#> ── Attaching packages ─────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
#> ✔ ggplot2 2.2.1 ✔ purrr 0.2.4
#> ✔ tibble 1.4.2 ✔ dplyr 0.7.4
#> ✔ tidyr 0.8.0 ✔ stringr 1.3.0
#> ✔ readr 1.1.1 ✔ forcats 0.3.0
#> ── Conflicts ────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
#> ✖ dplyr::filter() masks stats::filter()
#> ✖ dplyr::lag() masks stats::lag()
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following object is masked from 'package:base':
#>
#> date
# Input 1
fc_periods <- data_frame(
period = c("m1", "m2", "m3"),
start = c(ymd("2018-01-01"), ymd("2018-02-01"), ymd("2018-03-01")),
end = c(ymd("2018-01-31"), ymd("2018-02-28"), ymd("2018-03-31"))
)
# Input 2
unspread_data <- data_frame(
act_start = c(ymd("2017-11-01"), ymd("2018-01-10"), ymd("2018-02-14"), ymd("2017-12-01")),
act_end = c(ymd("2018-04-30"), ymd("2018-01-25"), ymd("2018-03-16"), ymd("2017-12-31")),
value = c(600, 100, 200, 999)
)
spread_data <- fc_periods %>% pmap_df(function(period, start, end, int){
unspread_data %>%
mutate(
month = period,
overlap1 = if_else(act_start >= start & act_start <= end & act_end >= start & act_end <= end, as.numeric(act_end - act_start), 0),
overlap2 = if_else(act_start >= start & act_start <= end, as.numeric(as_date(end) - act_start), 0),
overlap3 = if_else(act_end >= start & act_end <= end, as.numeric(act_end - as_date(start)), 0),
overlap4 = if_else(act_start < start & act_end > end, as.numeric(as_date(end) - as_date(start)), 0),
days = as.numeric(act_end - act_start),
overlap = if_else(overlap1 > 0, overlap1,
if_else(overlap2 > 0, overlap2,
if_else(overlap3 > 0, overlap3,
if_else(overlap4 > 0, overlap4, 0)))),
fraction = if_else(days > 0, overlap / days, 0),
rev = fraction * value
)
})
我想通过输入 1 进行映射并向输入 2 添加列,以便获得如下所示的预期结果。我可以看到如何根据最后一位代码实现我想要的单行输入 1,但无法遍历输入 1 的所有行。有没有办法用 purrr
?
library(tidyverse)
library(lubridate)
# Input 1
fc_periods <- data_frame(
period = c("m1", "m2", "m3"),
start = c(ymd("2018-01-01"), ymd("2018-02-01"), ymd("2018-03-01")),
end = c(ymd("2018-01-31"), ymd("2018-02-28"), ymd("2018-03-31")),
interval = interval(start, end)
)
# Input 2
unspread_data <- data_frame(
act_start = c(ymd("2017-11-01"), ymd("2018-01-10"), ymd("2018-02-14"), ymd("2017-12-01")),
act_end = c(ymd("2018-04-30"), ymd("2018-01-25"), ymd("2018-03-16"), ymd("2017-12-31")),
value = c(600, 100, 200, 999)
)
# Desired outcome
spread_data <- data_frame(
act_start = c(ymd("2017-11-01"), ymd("2018-01-10"), ymd("2018-02-14"), ymd("2017-12-01")),
act_end = c(ymd("2018-04-30"), ymd("2018-01-25"), ymd("2018-03-16"), ymd("2017-12-31")),
value = c(600, 100, 200, 999),
M1 = c(100, 100, 0, 0),
M2 = c(100, 0, 100, 0),
M3 = c(100, 0, 100, 0)
)
# If dealing with a single period, e.g. M1, then could do this:
start <- ymd("2018-01-01")
end <- ymd("2018-01-31")
interval <- interval(start, end)
spread_data <- unspread_data %>%
mutate(
overlap1 = if_else(act_start %within% interval & act_end %within% interval, as.numeric(act_end - act_start), 0),
overlap2 = if_else(act_start %within% interval, as.numeric(end - act_start), 0),
overlap3 = if_else(act_end %within% interval, as.numeric(act_end - start), 0),
overlap4 = if_else(act_start < start & act_end > end, as.numeric(end - start), 0),
days = as.numeric(act_end - act_start),
overlap = if_else(overlap1 > 0, overlap1,
if_else(overlap2 > 0, overlap2,
if_else(overlap3 > 0, overlap3,
if_else(overlap4 > 0, overlap4, 0)))),
fraction = if_else(days > 0, overlap / days, 0),
M1 = fraction * value
)
试过这种方法:
spread_data <- fc_periods %>% pmap_df(function(period, start, end, interval){
unspread_data %>%
mutate(
overlap1 = if_else(act_start %within% interval & act_end %within% interval, as.numeric(act_end - act_start), 0),
overlap2 = if_else(act_start %within% interval, as.numeric(end - act_start), 0),
overlap3 = if_else(act_end %within% interval, as.numeric(act_end - start), 0),
overlap4 = if_else(act_start < start & act_end > end, as.numeric(end - start), 0),
days = as.numeric(act_end - act_start),
overlap = if_else(overlap1 > 0, overlap1,
if_else(overlap2 > 0, overlap2,
if_else(overlap3 > 0, overlap3,
if_else(overlap4 > 0, overlap4, 0)))),
fraction = if_else(days > 0, overlap / days, 0),
period = fraction * value
)
})
出现此错误:
Error in mutate_impl(.data, dots) :
Evaluation error: no slot of name "group" for this object of class "derivedDefaultMethod".
这行得通。问题似乎是试图在 pmap
内的函数中使用 %within%
或 interval
(不确定是哪个问题)。仅使用开始日期和结束日期即可解决问题。
library(tidyverse)
#> ── Attaching packages ─────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
#> ✔ ggplot2 2.2.1 ✔ purrr 0.2.4
#> ✔ tibble 1.4.2 ✔ dplyr 0.7.4
#> ✔ tidyr 0.8.0 ✔ stringr 1.3.0
#> ✔ readr 1.1.1 ✔ forcats 0.3.0
#> ── Conflicts ────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
#> ✖ dplyr::filter() masks stats::filter()
#> ✖ dplyr::lag() masks stats::lag()
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following object is masked from 'package:base':
#>
#> date
# Input 1
fc_periods <- data_frame(
period = c("m1", "m2", "m3"),
start = c(ymd("2018-01-01"), ymd("2018-02-01"), ymd("2018-03-01")),
end = c(ymd("2018-01-31"), ymd("2018-02-28"), ymd("2018-03-31"))
)
# Input 2
unspread_data <- data_frame(
act_start = c(ymd("2017-11-01"), ymd("2018-01-10"), ymd("2018-02-14"), ymd("2017-12-01")),
act_end = c(ymd("2018-04-30"), ymd("2018-01-25"), ymd("2018-03-16"), ymd("2017-12-31")),
value = c(600, 100, 200, 999)
)
spread_data <- fc_periods %>% pmap_df(function(period, start, end, int){
unspread_data %>%
mutate(
month = period,
overlap1 = if_else(act_start >= start & act_start <= end & act_end >= start & act_end <= end, as.numeric(act_end - act_start), 0),
overlap2 = if_else(act_start >= start & act_start <= end, as.numeric(as_date(end) - act_start), 0),
overlap3 = if_else(act_end >= start & act_end <= end, as.numeric(act_end - as_date(start)), 0),
overlap4 = if_else(act_start < start & act_end > end, as.numeric(as_date(end) - as_date(start)), 0),
days = as.numeric(act_end - act_start),
overlap = if_else(overlap1 > 0, overlap1,
if_else(overlap2 > 0, overlap2,
if_else(overlap3 > 0, overlap3,
if_else(overlap4 > 0, overlap4, 0)))),
fraction = if_else(days > 0, overlap / days, 0),
rev = fraction * value
)
})