计算给定时间段内时间间隔的持续时间
Calculate duration of a time interval within a given period
我有一个包含开始时间和长度(以秒为单位)的数据帧:
dates<-data.frame(start=as.POSIXct(c("2010-04-03 03:02:38 UTC","2010-04-03 06:03:14 UTC","2010-04-20 03:05:52 UTC","2010-04-20 03:17:42 UTC","2010-04-21 03:09:38 UTC","2010-04-21 07:10:14 UTC","2010-04-21 08:12:52 UTC","2010-04-23 03:13:42 UTC","2010-04-23 03:25:42 UTC","2010-04-23 03:36:38 UTC","2010-04-23 08:58:14 UTC","2010-04-24 03:21:52 UTC","2010-04-24 03:22:42 UTC","2010-04-24 07:24:19 UTC","2010-04-24 07:55:19 UTC")),length=c(3600,300,900,3600,300,900,3600,300,900,3600,300,900,3600,300,900))
> dates
start length
1 2010-04-03 03:02:38 3600
2 2010-04-03 06:03:14 300
3 2010-04-20 03:05:52 900
4 2010-04-20 03:17:42 3600
5 2010-04-21 03:09:38 300
6 2010-04-21 07:10:14 900
7 2010-04-21 08:12:52 3600
8 2010-04-23 03:13:42 300
9 2010-04-23 03:25:42 900
10 2010-04-23 03:36:38 3600
11 2010-04-23 08:58:14 300
12 2010-04-24 03:21:52 900
13 2010-04-24 03:22:42 3600
14 2010-04-24 07:24:19 300
15 2010-04-24 07:55:19 900
我需要找到从 2010-04-02 00:00:00 到 2010-04-21 09:00:00 期间以及从 2010-04- 期间的总持续时间(长度) 23 03:15:00 至 2010-04-24 08:00:00.
棘手的部分是给定的长度可以 运行 超过指定时间段的末尾,我不想计算那个额外的持续时间。
我希望得到:
- 2010-04-02 00:00:00 到 2010-04-21 09:00:00
12428 秒
- 2010-04-23 03:15:00 到 2010-04-24 08:00:00
为 10103 秒
我想使用 lubridate
并为每一行定义一个间隔,然后对持续时间求和,但我想不出来。
这是一个例子:
library(lubridate)
t0 <- as.POSIXct('2010-04-02 00:00:00')
t1 <- as.POSIXct('2010-04-21 09:00:00')
sum(dates$length[dates$start %within% interval(t0,t1)])
# [1] 13200
不确定具体问的是什么。另一个答案只是对指定时间间隔内的开始时间求和 length
。但是,我将这个问题解释为想要处理长度可能 运行 超过指定时间段结束的事件,而不是计算超过指定时间段结束的时间(反之亦然,开始时间在期开始)。例如,第 7 行 运行 远远超过 2010-04-21 09:00:00。这就是提供预期输出很有帮助的原因!
无论如何,这里有一种方法可以执行我认为您的意思是包装在一个函数中。方法基本上是创建一个新的开始和结束,如果事件 运行 结束,它是指定间隔的边缘。我可能遗漏了一些边缘情况,欢迎改进!
dates<-data.frame(start=as.POSIXct(c("2010-04-03 03:02:38 UTC","2010-04-03 06:03:14 UTC","2010-04-20 03:05:52 UTC","2010-04-20 03:17:42 UTC","2010-04-21 03:09:38 UTC","2010-04-21 07:10:14 UTC","2010-04-21 08:12:52 UTC","2010-04-23 03:13:42 UTC","2010-04-23 03:25:42 UTC","2010-04-23 03:36:38 UTC","2010-04-23 08:58:14 UTC","2010-04-24 03:21:52 UTC","2010-04-24 03:22:42 UTC","2010-04-24 07:24:19 UTC","2010-04-24 07:55:19 UTC")),length=c(3600,300,900,3600,300,900,3600,300,900,3600,300,900,3600,300,900))
library(dplyr)
library(lubridate)
length_within <- function(tbl, interval_start, interval_end){
intv_start = as.POSIXct(interval_start)
intv_end = as.POSIXct(interval_end)
tbl %>%
mutate(
end = start + length,
counted_start = ifelse(start < intv_start, intv_start, start),
counted_end = ifelse(end > intv_end, intv_end, end),
seconds = counted_end - counted_start
) %>%
filter(seconds >= 0) %>%
summarise(total = sum(seconds)) %>%
`[[` (1)
}
length_within(dates,"2010-04-02 00:00:00", "2010-04-21 09:00:00")
#> [1] 12428
length_within(dates,"2010-04-23 03:15:00", "2010-04-24 08:00:00")
#> [1] 10103
另一种可能的解决方案可以通过使用 dplyr
中的 first
和 last
函数来实现。 first
和 last
函数将允许我们仅调整第一行和最后一行的长度 sum
。
library(dplyr)
calculate_duration <- function(df, start_time, end_time){
start_time <- as.POSIXct(start_time)
end_time <- as.POSIXct(end_time)
df %>% filter((start+length) >= start_time & start < end_time) %>%
arrange(start) %>%
summarise(last_time = last(start) + last(length),
first_time = first(start) + first(length),
sum = sum(length) -
ifelse(last_time > end_time,
difftime(last_time, end_time, units = 'secs'), 0L) -
ifelse(first(start) < start_time,
difftime(start_time, first(start), units = 'secs'), 0L) ) %>%
select(sum)
}
calculate_duration(dates,"2010-04-02 00:00:00", "2010-04-21 09:00:00")
# sum
#1 12428
calculate_duration(dates,"2010-04-23 03:15:00", "2010-04-24 08:00:00")
# sum
#1 10103
# Data
dates<-data.frame(start=as.POSIXct(c("2010-04-03 03:02:38 UTC","2010-04-03 06:03:14 UTC",
"2010-04-20 03:05:52 UTC","2010-04-20 03:17:42 UTC","2010-04-21 03:09:38 UTC",
"2010-04-21 07:10:14 UTC","2010-04-21 08:12:52 UTC","2010-04-23 03:13:42 UTC",
"2010-04-23 03:25:42 UTC","2010-04-23 03:36:38 UTC","2010-04-23 08:58:14 UTC",
"2010-04-24 03:21:52 UTC","2010-04-24 03:22:42 UTC","2010-04-24 07:24:19 UTC",
"2010-04-24 07:55:19 UTC")),
length=c(3600,300,900,3600,300,900,3600,300,900,3600,300,900,3600,300,900))
我有一个包含开始时间和长度(以秒为单位)的数据帧:
dates<-data.frame(start=as.POSIXct(c("2010-04-03 03:02:38 UTC","2010-04-03 06:03:14 UTC","2010-04-20 03:05:52 UTC","2010-04-20 03:17:42 UTC","2010-04-21 03:09:38 UTC","2010-04-21 07:10:14 UTC","2010-04-21 08:12:52 UTC","2010-04-23 03:13:42 UTC","2010-04-23 03:25:42 UTC","2010-04-23 03:36:38 UTC","2010-04-23 08:58:14 UTC","2010-04-24 03:21:52 UTC","2010-04-24 03:22:42 UTC","2010-04-24 07:24:19 UTC","2010-04-24 07:55:19 UTC")),length=c(3600,300,900,3600,300,900,3600,300,900,3600,300,900,3600,300,900))
> dates
start length
1 2010-04-03 03:02:38 3600
2 2010-04-03 06:03:14 300
3 2010-04-20 03:05:52 900
4 2010-04-20 03:17:42 3600
5 2010-04-21 03:09:38 300
6 2010-04-21 07:10:14 900
7 2010-04-21 08:12:52 3600
8 2010-04-23 03:13:42 300
9 2010-04-23 03:25:42 900
10 2010-04-23 03:36:38 3600
11 2010-04-23 08:58:14 300
12 2010-04-24 03:21:52 900
13 2010-04-24 03:22:42 3600
14 2010-04-24 07:24:19 300
15 2010-04-24 07:55:19 900
我需要找到从 2010-04-02 00:00:00 到 2010-04-21 09:00:00 期间以及从 2010-04- 期间的总持续时间(长度) 23 03:15:00 至 2010-04-24 08:00:00.
棘手的部分是给定的长度可以 运行 超过指定时间段的末尾,我不想计算那个额外的持续时间。
我希望得到:
- 2010-04-02 00:00:00 到 2010-04-21 09:00:00 12428 秒
- 2010-04-23 03:15:00 到 2010-04-24 08:00:00 为 10103 秒
我想使用 lubridate
并为每一行定义一个间隔,然后对持续时间求和,但我想不出来。
这是一个例子:
library(lubridate)
t0 <- as.POSIXct('2010-04-02 00:00:00')
t1 <- as.POSIXct('2010-04-21 09:00:00')
sum(dates$length[dates$start %within% interval(t0,t1)])
# [1] 13200
不确定具体问的是什么。另一个答案只是对指定时间间隔内的开始时间求和 length
。但是,我将这个问题解释为想要处理长度可能 运行 超过指定时间段结束的事件,而不是计算超过指定时间段结束的时间(反之亦然,开始时间在期开始)。例如,第 7 行 运行 远远超过 2010-04-21 09:00:00。这就是提供预期输出很有帮助的原因!
无论如何,这里有一种方法可以执行我认为您的意思是包装在一个函数中。方法基本上是创建一个新的开始和结束,如果事件 运行 结束,它是指定间隔的边缘。我可能遗漏了一些边缘情况,欢迎改进!
dates<-data.frame(start=as.POSIXct(c("2010-04-03 03:02:38 UTC","2010-04-03 06:03:14 UTC","2010-04-20 03:05:52 UTC","2010-04-20 03:17:42 UTC","2010-04-21 03:09:38 UTC","2010-04-21 07:10:14 UTC","2010-04-21 08:12:52 UTC","2010-04-23 03:13:42 UTC","2010-04-23 03:25:42 UTC","2010-04-23 03:36:38 UTC","2010-04-23 08:58:14 UTC","2010-04-24 03:21:52 UTC","2010-04-24 03:22:42 UTC","2010-04-24 07:24:19 UTC","2010-04-24 07:55:19 UTC")),length=c(3600,300,900,3600,300,900,3600,300,900,3600,300,900,3600,300,900))
library(dplyr)
library(lubridate)
length_within <- function(tbl, interval_start, interval_end){
intv_start = as.POSIXct(interval_start)
intv_end = as.POSIXct(interval_end)
tbl %>%
mutate(
end = start + length,
counted_start = ifelse(start < intv_start, intv_start, start),
counted_end = ifelse(end > intv_end, intv_end, end),
seconds = counted_end - counted_start
) %>%
filter(seconds >= 0) %>%
summarise(total = sum(seconds)) %>%
`[[` (1)
}
length_within(dates,"2010-04-02 00:00:00", "2010-04-21 09:00:00")
#> [1] 12428
length_within(dates,"2010-04-23 03:15:00", "2010-04-24 08:00:00")
#> [1] 10103
另一种可能的解决方案可以通过使用 dplyr
中的 first
和 last
函数来实现。 first
和 last
函数将允许我们仅调整第一行和最后一行的长度 sum
。
library(dplyr)
calculate_duration <- function(df, start_time, end_time){
start_time <- as.POSIXct(start_time)
end_time <- as.POSIXct(end_time)
df %>% filter((start+length) >= start_time & start < end_time) %>%
arrange(start) %>%
summarise(last_time = last(start) + last(length),
first_time = first(start) + first(length),
sum = sum(length) -
ifelse(last_time > end_time,
difftime(last_time, end_time, units = 'secs'), 0L) -
ifelse(first(start) < start_time,
difftime(start_time, first(start), units = 'secs'), 0L) ) %>%
select(sum)
}
calculate_duration(dates,"2010-04-02 00:00:00", "2010-04-21 09:00:00")
# sum
#1 12428
calculate_duration(dates,"2010-04-23 03:15:00", "2010-04-24 08:00:00")
# sum
#1 10103
# Data
dates<-data.frame(start=as.POSIXct(c("2010-04-03 03:02:38 UTC","2010-04-03 06:03:14 UTC",
"2010-04-20 03:05:52 UTC","2010-04-20 03:17:42 UTC","2010-04-21 03:09:38 UTC",
"2010-04-21 07:10:14 UTC","2010-04-21 08:12:52 UTC","2010-04-23 03:13:42 UTC",
"2010-04-23 03:25:42 UTC","2010-04-23 03:36:38 UTC","2010-04-23 08:58:14 UTC",
"2010-04-24 03:21:52 UTC","2010-04-24 03:22:42 UTC","2010-04-24 07:24:19 UTC",
"2010-04-24 07:55:19 UTC")),
length=c(3600,300,900,3600,300,900,3600,300,900,3600,300,900,3600,300,900))