给定总消耗量、开始和结束时间,有没有办法计算每 30 分钟间隔的总耗电量?
Is there a way to calculate total power consumption per 30 minute interval given total consumtion, Start and End time?
所以我有一个包含充电会话的数据框。每个充电会话都有一个开始时间、一个结束时间,以及在开始和结束时间之间的会话期间消耗的总千瓦时。
充电会话的开始和结束时间确实重叠,而不是同时在多个充电站发生。我需要知道所有充电会话和充电站每 30 分钟间隔消耗的总千瓦时
start.time <- c(
"2020-01-01 02:04:14 GMT", "2020-01-01 13:40:42 GMT",
"2020-01-01 16:37:10 GMT", "2020-01-01 22:45:54 GMT",
"2020-01-02 12:17:41 GMT","2020-01-02 15:52:12 GMT")
end.time <- c(
"2020-01-01 09:42:50 GMT", "2020-01-01 14:24:48 GMT",
"2020-01-01 16:50:23 GMT","2020-01-02 02:11:46 GMT",
"2020-01-02 15:00:28 GMT","2020-01-02 16:08:50 GMT")
total.kwh <- c(23.493, 2.475, 0.691, 2.418, 9.778, 0.894)
这是您要找的吗?
library(dplyr)
df <- tibble(start.time, end.time, total.kwh)
df <- df %>% mutate(kwh.per.30mins = total.kwh*30/as.numeric(difftime(end.time, start.time, units = "mins")))
df
# A tibble: 6 x 4
start.time end.time total.kwh kwh.per.30mins
<chr> <chr> <dbl> <dbl>
1 2020-01-01 02:04:14 GMT 2020-01-01 09:42:50 GMT 23.5 1.54
2 2020-01-01 13:40:42 GMT 2020-01-01 14:24:48 GMT 2.48 1.68
3 2020-01-01 16:37:10 GMT 2020-01-01 16:50:23 GMT 0.691 1.57
4 2020-01-01 22:45:54 GMT 2020-01-02 02:11:46 GMT 2.42 0.352
5 2020-01-02 12:17:41 GMT 2020-01-02 15:00:28 GMT 9.78 1.80
6 2020-01-02 15:52:12 GMT 2020-01-02 16:08:50 GMT 0.894 1.61
您的根本问题是将基于单个充电会话的数据转换为基于日历时间的数据。一旦你做到了,一切都是微不足道的。
数据整理
library(tidyverse)
library(lubridate)
# Your input data
df <- tibble(
start.time=c(
"2020-01-01 02:04:14 GMT", "2020-01-01 13:40:42 GMT",
"2020-01-01 16:37:10 GMT", "2020-01-01 22:45:54 GMT",
"2020-01-02 12:17:41 GMT","2020-01-02 15:52:12 GMT"),
end.time=c(
"2020-01-01 09:42:50 GMT", "2020-01-01 14:24:48 GMT",
"2020-01-01 16:50:23 GMT","2020-01-02 02:11:46 GMT",
"2020-01-02 15:00:28 GMT","2020-01-02 16:08:50 GMT"),
totalKWH=c(23.493, 2.475, 0.691, 2.418, 9.778, 0.894),
)
df
# A tibble: 6 x 3
start.time end.time totalKWH
<chr> <chr> <dbl>
1 2020-01-01 02:04:14 GMT 2020-01-01 09:42:50 GMT 23.5
2 2020-01-01 13:40:42 GMT 2020-01-01 14:24:48 GMT 2.48
3 2020-01-01 16:37:10 GMT 2020-01-01 16:50:23 GMT 0.691
4 2020-01-01 22:45:54 GMT 2020-01-02 02:11:46 GMT 2.42
5 2020-01-02 12:17:41 GMT 2020-01-02 15:00:28 GMT 9.78
6 2020-01-02 15:52:12 GMT 2020-01-02 16:08:50 GMT 0.894
从字符转换为日期时间并导出一些有用的附加变量
df <- df %>%
# Necessary to get seq to work
rowwise() %>%
mutate(
Session=row_number(),
StartTime=ymd_hms(start.time),
EndTime=ymd_hms(end.time),
Duration=dseconds(StartTime %--% EndTime),
kwhPerSecond=totalKWH / as.numeric(Duration),
Time=list(as.list(seq(StartTime, EndTime, dseconds(1))))
) %>%
select(-start.time, -end.time) %>%
# Remove side effects of rowwise
ungroup()
df
# A tibble: 6 x 7
totalKWH Session StartTime EndTime Duration kwhPerSecond Time
<dbl> <int> <dttm> <dttm> <Duration> <dbl> <list>
1 23.5 1 2020-01-01 02:04:14 2020-01-01 09:42:50 27516s (~7.64 hours) 0.000854 <list [27,517]>
2 2.48 1 2020-01-01 13:40:42 2020-01-01 14:24:48 2646s (~44.1 minutes) 0.000935 <list [2,647]>
3 0.691 1 2020-01-01 16:37:10 2020-01-01 16:50:23 793s (~13.22 minutes) 0.000871 <list [794]>
4 2.42 1 2020-01-01 22:45:54 2020-01-02 02:11:46 12352s (~3.43 hours) 0.000196 <list [12,353]>
5 9.78 1 2020-01-02 12:17:41 2020-01-02 15:00:28 9767s (~2.71 hours) 0.00100 <list [9,768]>
6 0.894 1 2020-01-02 15:52:12 2020-01-02 16:08:50 998s (~16.63 minutes) 0.000896 <list [999]>
从每会话一行转换为每秒一行
df <- df %>%
# The two calls to unnest is not a typo
unnest(Time) %>%
unnest(Time) %>%
select(Session, Time, kwhPerSecond)
df
# A tibble: 54,078 x 3
Session Time kwhPerSecond
<int> <dttm> <dbl>
1 1 2020-01-01 02:04:14 0.000854
2 1 2020-01-01 02:04:15 0.000854
3 1 2020-01-01 02:04:16 0.000854
4 1 2020-01-01 02:04:17 0.000854
5 1 2020-01-01 02:04:18 0.000854
6 1 2020-01-01 02:04:19 0.000854
7 1 2020-01-01 02:04:20 0.000854
8 1 2020-01-01 02:04:21 0.000854
9 1 2020-01-01 02:04:22 0.000854
10 1 2020-01-01 02:04:23 0.000854
# … with 54,068 more rows
记下我们的基线
midnight <- as_date(min(df$Time))
midnight
[1] "2020-01-01"
从一天中的时间转换为从午夜开始的半小时时段
df <- df %>% mutate(Slot=ceiling((midnight %--% Time) /dminutes(30)))
df
# A tibble: 54,078 x 4
Session Time kwhPerSecond Slot
<int> <dttm> <dbl> <dbl>
1 1 2020-01-01 02:04:14 0.000854 5
2 1 2020-01-01 02:04:15 0.000854 5
3 1 2020-01-01 02:04:16 0.000854 5
4 1 2020-01-01 02:04:17 0.000854 5
5 1 2020-01-01 02:04:18 0.000854 5
6 1 2020-01-01 02:04:19 0.000854 5
7 1 2020-01-01 02:04:20 0.000854 5
8 1 2020-01-01 02:04:21 0.000854 5
9 1 2020-01-01 02:04:22 0.000854 5
10 1 2020-01-01 02:04:23 0.000854 5
# … with 54,068 more rows
终于...
分析
df %>%
group_by(Slot) %>%
summarise(
SlotStart=min(Time),
TotalPowerConsumption=sum(kwhPerSecond),
.groups="drop"
)
# A tibble: 36 x 3
Slot SlotStart TotalPowerConsumption
<dbl> <dttm> <dbl>
1 5 2020-01-01 02:04:14 1.32
2 6 2020-01-01 02:30:01 1.54
3 7 2020-01-01 03:00:01 1.54
4 8 2020-01-01 03:30:01 1.54
5 9 2020-01-01 04:00:01 1.54
6 10 2020-01-01 04:30:01 1.54
7 11 2020-01-01 05:00:01 1.54
8 12 2020-01-01 05:30:01 1.54
9 13 2020-01-01 06:00:01 1.54
10 14 2020-01-01 06:30:01 1.54
# … with 26 more rows
最后一项整理工作是确保第一个时段的“开始时间”从半小时开始。但我想我已经做得够多了。你应该能够从这里解决这个问题。
所以我有一个包含充电会话的数据框。每个充电会话都有一个开始时间、一个结束时间,以及在开始和结束时间之间的会话期间消耗的总千瓦时。
充电会话的开始和结束时间确实重叠,而不是同时在多个充电站发生。我需要知道所有充电会话和充电站每 30 分钟间隔消耗的总千瓦时
start.time <- c(
"2020-01-01 02:04:14 GMT", "2020-01-01 13:40:42 GMT",
"2020-01-01 16:37:10 GMT", "2020-01-01 22:45:54 GMT",
"2020-01-02 12:17:41 GMT","2020-01-02 15:52:12 GMT")
end.time <- c(
"2020-01-01 09:42:50 GMT", "2020-01-01 14:24:48 GMT",
"2020-01-01 16:50:23 GMT","2020-01-02 02:11:46 GMT",
"2020-01-02 15:00:28 GMT","2020-01-02 16:08:50 GMT")
total.kwh <- c(23.493, 2.475, 0.691, 2.418, 9.778, 0.894)
这是您要找的吗?
library(dplyr)
df <- tibble(start.time, end.time, total.kwh)
df <- df %>% mutate(kwh.per.30mins = total.kwh*30/as.numeric(difftime(end.time, start.time, units = "mins")))
df
# A tibble: 6 x 4
start.time end.time total.kwh kwh.per.30mins
<chr> <chr> <dbl> <dbl>
1 2020-01-01 02:04:14 GMT 2020-01-01 09:42:50 GMT 23.5 1.54
2 2020-01-01 13:40:42 GMT 2020-01-01 14:24:48 GMT 2.48 1.68
3 2020-01-01 16:37:10 GMT 2020-01-01 16:50:23 GMT 0.691 1.57
4 2020-01-01 22:45:54 GMT 2020-01-02 02:11:46 GMT 2.42 0.352
5 2020-01-02 12:17:41 GMT 2020-01-02 15:00:28 GMT 9.78 1.80
6 2020-01-02 15:52:12 GMT 2020-01-02 16:08:50 GMT 0.894 1.61
您的根本问题是将基于单个充电会话的数据转换为基于日历时间的数据。一旦你做到了,一切都是微不足道的。
数据整理
library(tidyverse)
library(lubridate)
# Your input data
df <- tibble(
start.time=c(
"2020-01-01 02:04:14 GMT", "2020-01-01 13:40:42 GMT",
"2020-01-01 16:37:10 GMT", "2020-01-01 22:45:54 GMT",
"2020-01-02 12:17:41 GMT","2020-01-02 15:52:12 GMT"),
end.time=c(
"2020-01-01 09:42:50 GMT", "2020-01-01 14:24:48 GMT",
"2020-01-01 16:50:23 GMT","2020-01-02 02:11:46 GMT",
"2020-01-02 15:00:28 GMT","2020-01-02 16:08:50 GMT"),
totalKWH=c(23.493, 2.475, 0.691, 2.418, 9.778, 0.894),
)
df
# A tibble: 6 x 3
start.time end.time totalKWH
<chr> <chr> <dbl>
1 2020-01-01 02:04:14 GMT 2020-01-01 09:42:50 GMT 23.5
2 2020-01-01 13:40:42 GMT 2020-01-01 14:24:48 GMT 2.48
3 2020-01-01 16:37:10 GMT 2020-01-01 16:50:23 GMT 0.691
4 2020-01-01 22:45:54 GMT 2020-01-02 02:11:46 GMT 2.42
5 2020-01-02 12:17:41 GMT 2020-01-02 15:00:28 GMT 9.78
6 2020-01-02 15:52:12 GMT 2020-01-02 16:08:50 GMT 0.894
从字符转换为日期时间并导出一些有用的附加变量
df <- df %>%
# Necessary to get seq to work
rowwise() %>%
mutate(
Session=row_number(),
StartTime=ymd_hms(start.time),
EndTime=ymd_hms(end.time),
Duration=dseconds(StartTime %--% EndTime),
kwhPerSecond=totalKWH / as.numeric(Duration),
Time=list(as.list(seq(StartTime, EndTime, dseconds(1))))
) %>%
select(-start.time, -end.time) %>%
# Remove side effects of rowwise
ungroup()
df
# A tibble: 6 x 7
totalKWH Session StartTime EndTime Duration kwhPerSecond Time
<dbl> <int> <dttm> <dttm> <Duration> <dbl> <list>
1 23.5 1 2020-01-01 02:04:14 2020-01-01 09:42:50 27516s (~7.64 hours) 0.000854 <list [27,517]>
2 2.48 1 2020-01-01 13:40:42 2020-01-01 14:24:48 2646s (~44.1 minutes) 0.000935 <list [2,647]>
3 0.691 1 2020-01-01 16:37:10 2020-01-01 16:50:23 793s (~13.22 minutes) 0.000871 <list [794]>
4 2.42 1 2020-01-01 22:45:54 2020-01-02 02:11:46 12352s (~3.43 hours) 0.000196 <list [12,353]>
5 9.78 1 2020-01-02 12:17:41 2020-01-02 15:00:28 9767s (~2.71 hours) 0.00100 <list [9,768]>
6 0.894 1 2020-01-02 15:52:12 2020-01-02 16:08:50 998s (~16.63 minutes) 0.000896 <list [999]>
从每会话一行转换为每秒一行
df <- df %>%
# The two calls to unnest is not a typo
unnest(Time) %>%
unnest(Time) %>%
select(Session, Time, kwhPerSecond)
df
# A tibble: 54,078 x 3
Session Time kwhPerSecond
<int> <dttm> <dbl>
1 1 2020-01-01 02:04:14 0.000854
2 1 2020-01-01 02:04:15 0.000854
3 1 2020-01-01 02:04:16 0.000854
4 1 2020-01-01 02:04:17 0.000854
5 1 2020-01-01 02:04:18 0.000854
6 1 2020-01-01 02:04:19 0.000854
7 1 2020-01-01 02:04:20 0.000854
8 1 2020-01-01 02:04:21 0.000854
9 1 2020-01-01 02:04:22 0.000854
10 1 2020-01-01 02:04:23 0.000854
# … with 54,068 more rows
记下我们的基线
midnight <- as_date(min(df$Time))
midnight
[1] "2020-01-01"
从一天中的时间转换为从午夜开始的半小时时段
df <- df %>% mutate(Slot=ceiling((midnight %--% Time) /dminutes(30)))
df
# A tibble: 54,078 x 4
Session Time kwhPerSecond Slot
<int> <dttm> <dbl> <dbl>
1 1 2020-01-01 02:04:14 0.000854 5
2 1 2020-01-01 02:04:15 0.000854 5
3 1 2020-01-01 02:04:16 0.000854 5
4 1 2020-01-01 02:04:17 0.000854 5
5 1 2020-01-01 02:04:18 0.000854 5
6 1 2020-01-01 02:04:19 0.000854 5
7 1 2020-01-01 02:04:20 0.000854 5
8 1 2020-01-01 02:04:21 0.000854 5
9 1 2020-01-01 02:04:22 0.000854 5
10 1 2020-01-01 02:04:23 0.000854 5
# … with 54,068 more rows
终于...
分析
df %>%
group_by(Slot) %>%
summarise(
SlotStart=min(Time),
TotalPowerConsumption=sum(kwhPerSecond),
.groups="drop"
)
# A tibble: 36 x 3
Slot SlotStart TotalPowerConsumption
<dbl> <dttm> <dbl>
1 5 2020-01-01 02:04:14 1.32
2 6 2020-01-01 02:30:01 1.54
3 7 2020-01-01 03:00:01 1.54
4 8 2020-01-01 03:30:01 1.54
5 9 2020-01-01 04:00:01 1.54
6 10 2020-01-01 04:30:01 1.54
7 11 2020-01-01 05:00:01 1.54
8 12 2020-01-01 05:30:01 1.54
9 13 2020-01-01 06:00:01 1.54
10 14 2020-01-01 06:30:01 1.54
# … with 26 more rows
最后一项整理工作是确保第一个时段的“开始时间”从半小时开始。但我想我已经做得够多了。你应该能够从这里解决这个问题。