基于两组日期传播值

Spreading values based on two sets of dates

我想通过输入 1 进行映射并向输入 2 添加列,以便获得如下所示的预期结果。我可以看到如何根据最后一位代码实现我想要的单行输入 1,但无法遍历输入 1 的所有行。有没有办法用 purrr?

library(tidyverse)
library(lubridate)

# Input 1

fc_periods <- data_frame(
  period = c("m1", "m2", "m3"),
  start = c(ymd("2018-01-01"), ymd("2018-02-01"), ymd("2018-03-01")),
  end = c(ymd("2018-01-31"), ymd("2018-02-28"), ymd("2018-03-31")),
  interval = interval(start, end)
)

# Input 2

unspread_data <- data_frame(
  act_start = c(ymd("2017-11-01"), ymd("2018-01-10"), ymd("2018-02-14"), ymd("2017-12-01")),
  act_end = c(ymd("2018-04-30"), ymd("2018-01-25"), ymd("2018-03-16"), ymd("2017-12-31")),
  value = c(600, 100, 200, 999)
)

# Desired outcome

spread_data <- data_frame(
  act_start = c(ymd("2017-11-01"), ymd("2018-01-10"), ymd("2018-02-14"), ymd("2017-12-01")),
  act_end = c(ymd("2018-04-30"), ymd("2018-01-25"), ymd("2018-03-16"), ymd("2017-12-31")),
  value = c(600, 100, 200, 999),
  M1 = c(100, 100, 0, 0),
  M2 = c(100, 0, 100, 0),
  M3 = c(100, 0, 100, 0)
)

# If dealing with a single period, e.g. M1, then could do this:

start <- ymd("2018-01-01")
end <- ymd("2018-01-31")
interval <- interval(start, end)

spread_data <- unspread_data %>% 
   mutate(
         overlap1 = if_else(act_start %within% interval & act_end %within% interval, as.numeric(act_end - act_start), 0),
         overlap2 = if_else(act_start %within% interval, as.numeric(end - act_start), 0),
         overlap3 = if_else(act_end %within% interval, as.numeric(act_end - start), 0),
         overlap4 = if_else(act_start < start & act_end > end, as.numeric(end - start), 0),
         days = as.numeric(act_end - act_start),
         overlap = if_else(overlap1 > 0, overlap1, 
                              if_else(overlap2 > 0, overlap2, 
                                      if_else(overlap3 > 0, overlap3, 
                                              if_else(overlap4 > 0, overlap4, 0)))),
         fraction = if_else(days > 0, overlap / days, 0),
         M1 = fraction * value
   )

试过这种方法:

spread_data <- fc_periods %>% pmap_df(function(period, start, end, interval){
  unspread_data %>% 
    mutate(
      overlap1 = if_else(act_start %within% interval & act_end %within% interval, as.numeric(act_end - act_start), 0),
      overlap2 = if_else(act_start %within% interval, as.numeric(end - act_start), 0),
      overlap3 = if_else(act_end %within% interval, as.numeric(act_end - start), 0),
      overlap4 = if_else(act_start < start & act_end > end, as.numeric(end - start), 0),
      days = as.numeric(act_end - act_start),
      overlap = if_else(overlap1 > 0, overlap1,
                      if_else(overlap2 > 0, overlap2,
                              if_else(overlap3 > 0, overlap3,
                                      if_else(overlap4 > 0, overlap4, 0)))),
      fraction = if_else(days > 0, overlap / days, 0),
      period = fraction * value
  )
})

出现此错误:

    Error in mutate_impl(.data, dots) : 
  Evaluation error: no slot of name "group" for this object of class "derivedDefaultMethod".

这行得通。问题似乎是试图在 pmap 内的函数中使用 %within%interval(不确定是哪个问题)。仅使用开始日期和结束日期即可解决问题。

library(tidyverse)
#> ── Attaching packages ─────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
#> ✔ ggplot2 2.2.1     ✔ purrr   0.2.4
#> ✔ tibble  1.4.2     ✔ dplyr   0.7.4
#> ✔ tidyr   0.8.0     ✔ stringr 1.3.0
#> ✔ readr   1.1.1     ✔ forcats 0.3.0
#> ── Conflicts ────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
#> ✖ dplyr::filter() masks stats::filter()
#> ✖ dplyr::lag()    masks stats::lag()
library(lubridate)
#> 
#> Attaching package: 'lubridate'
#> The following object is masked from 'package:base':
#> 
#>     date

# Input 1

fc_periods <- data_frame(
  period = c("m1", "m2", "m3"),
  start = c(ymd("2018-01-01"), ymd("2018-02-01"), ymd("2018-03-01")),
  end = c(ymd("2018-01-31"), ymd("2018-02-28"), ymd("2018-03-31"))
)

# Input 2

unspread_data <- data_frame(
  act_start = c(ymd("2017-11-01"), ymd("2018-01-10"), ymd("2018-02-14"), ymd("2017-12-01")),
  act_end = c(ymd("2018-04-30"), ymd("2018-01-25"), ymd("2018-03-16"), ymd("2017-12-31")),
  value = c(600, 100, 200, 999)
)

spread_data <- fc_periods %>% pmap_df(function(period, start, end, int){
  unspread_data %>% 
    mutate(
      month = period,
      overlap1 = if_else(act_start >= start & act_start <= end & act_end >= start & act_end <= end, as.numeric(act_end - act_start), 0),
      overlap2 = if_else(act_start >= start & act_start <= end, as.numeric(as_date(end) - act_start), 0),
      overlap3 = if_else(act_end >= start & act_end <= end, as.numeric(act_end - as_date(start)), 0),
      overlap4 = if_else(act_start < start & act_end > end, as.numeric(as_date(end) - as_date(start)), 0),
      days = as.numeric(act_end - act_start),
      overlap = if_else(overlap1 > 0, overlap1,
                      if_else(overlap2 > 0, overlap2,
                              if_else(overlap3 > 0, overlap3,
                                      if_else(overlap4 > 0, overlap4, 0)))),
      fraction = if_else(days > 0, overlap / days, 0),
      rev = fraction * value
      )
})