总结多个时间间隔而不计算 lubridate 中的重叠时间

Sum multiple time intervals without counting overlapping times in lubridate

我需要对同一观察中多个时间间隔的天数求和。我在 Whosebug 中看到了许多关于此任务的不同示例。不过,我无法使用我的数据重现它们,因为我必须在超过两次重叠的时间和多个时间间隔内进行重现。

library(lubridate)
library(dplyr)

a <- c(as_date(0), as_date(8), as_date(80),as_date(60))
b <-c(as_date(2), as_date(20), as_date(100),as_date(80))
c <-c(as_date(1), as_date(16), as_date(95),as_date(85))
d <- c(as_date(100), as_date(19), as_date(120),as_date(100))
e <-c(as_date(0), as_date(50), as_date(101),as_date(65))
f <- c(as_date(150), as_date(100), as_date(200),as_date(200))

df <- data.frame(int.1 = interval(a, b), int.2 = interval(c, d), int.3 = interval(e, f))

我可以计算间隔之间的总时间,但包括重叠的时间:

df %>%
  mutate(overlapping.time = int.1 %/% days(1) + int.2 %/% days(1) + int.3 %/% days(1))


                           int.1                          int.2                          int.3 overlapping.time
1 1970-01-01 UTC--1970-01-03 UTC 1970-01-02 UTC--1970-04-11 UTC 1970-01-01 UTC--1970-05-31 UTC              251
2 1970-01-09 UTC--1970-01-21 UTC 1970-01-17 UTC--1970-01-20 UTC 1970-02-20 UTC--1970-04-11 UTC               65
3 1970-03-22 UTC--1970-04-11 UTC 1970-04-06 UTC--1970-05-01 UTC 1970-04-12 UTC--1970-07-20 UTC              144
4 1970-03-02 UTC--1970-03-22 UTC 1970-03-27 UTC--1970-04-11 UTC 1970-03-07 UTC--1970-07-20 UTC              170

下面是一个函数 overlapping_days(),它将获取一组间隔列并计算重叠天数的总数。请参阅内嵌注释以了解其工作原理。它涵盖了完全包含在另一个区间内、部分重叠的区间,并且不对列之间的关系做出任何假设。从您之前的计算中减去该函数的结果将得到您想要的结果。请注意,我对您最初发布的数据进行了一些修改。

library(lubridate)
#> 
#> Attaching package: 'lubridate'
#> The following object is masked from 'package:base':
#> 
#>     date
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:lubridate':
#> 
#>     intersect, setdiff, union
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union

a <- c(as_date(0), as_date(1), as_date(80),as_date(60))
b <-c(as_date(20), as_date(22), as_date(100),as_date(80))
c <-c(as_date(1), as_date(16), as_date(95),as_date(85))
d <- c(as_date(3), as_date(19), as_date(120),as_date(100))
e <-c(as_date(0), as_date(50), as_date(101),as_date(65))
f <- c(as_date(150), as_date(100), as_date(200),as_date(200))

df <- data.frame(int.1 = interval(a, b), int.2 = interval(c, d), int.3 = interval(e, f))
overlapping_days <- function(...) {
  # Collect the vectors passed into a list
  ll <- list(...)
  # Create all possible 2-combinations for the number of columns passed in.
  combinations <- combn(length(ll), 2)
  # Create a column for each combination, and a row for each element in the vectors.
  overlaps <- matrix(data = 0, nrow = length(ll[[1]]), ncol = length(combinations))
  # Loop through the combinations
  iterations <- seq_len(ncol(combinations))
  for (k in iterations) {
    # I'll refer to each of these indices as intervals -- they each represent
    # a vector passed in.
    i <- combinations[1, k]
    j <- combinations[2, k]
    overlaps[,k] <- case_when(
      # If the interval i is within interval j, add i to the overlap
      ll[[i]] %within% ll[[j]] ~ ll[[i]] %/% days(1),
      # If the interval j is within interval i, add j to the overlap
      ll[[j]] %within% ll[[i]] ~ ll[[j]] %/% days(1),
      # If they overlap, either int_start(i) < int_end(j), or int_start(j) < int_end(i)
      # Calculate the appropriate difference -- these look backwards but
      # are needed so a positive number is produced.
      int_overlaps(ll[[i]], ll[[j]]) & int_start(ll[[j]]) < int_end(ll[[i]]) ~
        int_start(ll[[j]]) %--% int_end(ll[[i]]) %/% days(1),
      int_overlaps(ll[[j]], ll[[i]]) & int_start(ll[[i]]) < int_end(ll[[j]]) ~
        int_start(ll[[i]]) %--% int_end(ll[[j]]) %/% days(1),
      # If none of these are true, the intervals do not overlap and we add 0 to
      # the overlap amount.
        TRUE ~ 0
    )
  }
  # Sum across rows to get the total number of overlapping days.
  rowSums(overlaps)
}

df %>%
  mutate(overlapping.time = int.1 %/% days(1) + int.2 %/% days(1) + int.3 %/% days(1), overlap =  overlapping_days(int.1, int.2, int.3))
#> Note: method with signature 'Timespan#Timespan' chosen for function '%/%',
#>  target signature 'Interval#Period'.
#>  "Interval#ANY", "ANY#Period" would also be valid
#>                            int.1                          int.2
#> 1 1970-01-01 UTC--1970-01-21 UTC 1970-01-02 UTC--1970-01-04 UTC
#> 2 1970-01-02 UTC--1970-01-23 UTC 1970-01-17 UTC--1970-01-20 UTC
#> 3 1970-03-22 UTC--1970-04-11 UTC 1970-04-06 UTC--1970-05-01 UTC
#> 4 1970-03-02 UTC--1970-03-22 UTC 1970-03-27 UTC--1970-04-11 UTC
#>                            int.3 overlapping.time overlap
#> 1 1970-01-01 UTC--1970-05-31 UTC              172      24
#> 2 1970-02-20 UTC--1970-04-11 UTC               74       3
#> 3 1970-04-12 UTC--1970-07-20 UTC              144      24
#> 4 1970-03-07 UTC--1970-07-20 UTC              170      30