Select 并过滤数据数据 03:00 am 到 03:00 am 第二天
Select and filter data data from 03:00 am to 03:00 am next day
我正在尝试为每个日期和 ID 组合(凌晨 3 点到第二天凌晨 3 点)查找第二天 03:00:00 和 03:00:00 之间出现的第一个 "ON" 记录.
#dummy data
df <- tibble::tribble(
~id, ~code, ~start_day, ~hhmmss, ~end_time,
"7050-1", "ON", 20200227, "000000", 20200227002400,
"7050-1", "SNOOZE", 20200227, "002400", 20200227003400,
"7050-1", "OFF", 20200227, "003400", 20200227003545,
"7050-1", "ON", 20200227, "003545", 20200227004815,
"7050-1", "SLP", 20200227, "004815", 20200227021400,
"7050-1", "SLP", 20200227, "021400", 20200227073415,
"7050-1", "ON", 20200227, "073415", 20200227164515,
"7050-1", "ON", 20200228, "025936", 20200227045936,
"265-1", "ON", 20200227, "000000", 20200227002400,
"265-1", "SNOOZE", 20200227, "164515", 20200227165515,
"265-1", "OFF", 20200227, "165515", 20200228025936,
"265-1", "ON", 20200228, "023536", 20200228025536,
"265-1", "OFF", 20200228, "025536", 20200228003000,
"265-1", "ON", 20200228, "03000", 20200228035936,
"265-2", "OFF", 20200228, "000000", 20200228180000,
"265-2", "OFF", 20200228, "180000", 20200228235959,
"265-2", "ON", 20200229, "000000", 20200229020000,
)
这是我目前的尝试
df %>%
mutate(
time = format(strptime(hhmmss, format = "%H%M%S"), format = "%H:%M:%S"),
time = hms::as.hms(time, format = "%H:%M:%S") ,
date = ymd(start_day) ) %>%
group_by(date,id) %>%
filter(time >= hms::as.hms("02:59:59", format = "%H:%M:%S") & code == "ON")
但我不应该那样使用过滤器,因为这样我会丢失 265-2
记录。这是我的愿望输出。 (** 265-2 记录发生在班次 28-02-28 03:00:00 和 28-02-29 03:00:00 之间,应该分配给日期 28/02 而不是 29/02 。希望清楚
id code date time
7050-1 ON 20200227 07:34:15
265-1 ON 20200228 03:00:00
265-2 ON 20200228 00:00:00
idk,也许我没理解你的逻辑,但希望这对你有所帮助
df %>%
mutate(date = ymd(start_day),
time = format(strptime(hhmmss, format = "%H%M%S"), format = "%H:%M:%S"),
time = hms::as.hms(time, format = "%H:%M:%S")) %>%
mutate(new_date = as_datetime(glue::glue('{date} {time}')) - hours(3),
new_date = as_date(new_date)) %>%
filter(code == "ON")
# A tibble: 8 x 8
id code start_day hhmmss end_time date time new_date
<chr> <chr> <dbl> <chr> <dbl> <date> <drtn> <date>
1 7050-1 ON 20200227 000000 2.02e13 2020-02-27 00:00 2020-02-26
2 7050-1 ON 20200227 003545 2.02e13 2020-02-27 00:35 2020-02-26
3 7050-1 ON 20200227 073415 2.02e13 2020-02-27 07:34 2020-02-27
4 7050-1 ON 20200228 025936 2.02e13 2020-02-28 02:59 2020-02-27
5 265-1 ON 20200227 000000 2.02e13 2020-02-27 00:00 2020-02-26
6 265-1 ON 20200228 023536 2.02e13 2020-02-28 02:35 2020-02-27
7 265-1 ON 20200228 03000 2.02e13 2020-02-28 03:00 2020-02-28
8 265-2 ON 20200229 000000 2.02e13 2020-02-29 00:00 2020-02-28
我正在尝试为每个日期和 ID 组合(凌晨 3 点到第二天凌晨 3 点)查找第二天 03:00:00 和 03:00:00 之间出现的第一个 "ON" 记录.
#dummy data
df <- tibble::tribble(
~id, ~code, ~start_day, ~hhmmss, ~end_time,
"7050-1", "ON", 20200227, "000000", 20200227002400,
"7050-1", "SNOOZE", 20200227, "002400", 20200227003400,
"7050-1", "OFF", 20200227, "003400", 20200227003545,
"7050-1", "ON", 20200227, "003545", 20200227004815,
"7050-1", "SLP", 20200227, "004815", 20200227021400,
"7050-1", "SLP", 20200227, "021400", 20200227073415,
"7050-1", "ON", 20200227, "073415", 20200227164515,
"7050-1", "ON", 20200228, "025936", 20200227045936,
"265-1", "ON", 20200227, "000000", 20200227002400,
"265-1", "SNOOZE", 20200227, "164515", 20200227165515,
"265-1", "OFF", 20200227, "165515", 20200228025936,
"265-1", "ON", 20200228, "023536", 20200228025536,
"265-1", "OFF", 20200228, "025536", 20200228003000,
"265-1", "ON", 20200228, "03000", 20200228035936,
"265-2", "OFF", 20200228, "000000", 20200228180000,
"265-2", "OFF", 20200228, "180000", 20200228235959,
"265-2", "ON", 20200229, "000000", 20200229020000,
)
这是我目前的尝试
df %>%
mutate(
time = format(strptime(hhmmss, format = "%H%M%S"), format = "%H:%M:%S"),
time = hms::as.hms(time, format = "%H:%M:%S") ,
date = ymd(start_day) ) %>%
group_by(date,id) %>%
filter(time >= hms::as.hms("02:59:59", format = "%H:%M:%S") & code == "ON")
但我不应该那样使用过滤器,因为这样我会丢失 265-2
记录。这是我的愿望输出。 (** 265-2 记录发生在班次 28-02-28 03:00:00 和 28-02-29 03:00:00 之间,应该分配给日期 28/02 而不是 29/02 。希望清楚
id code date time
7050-1 ON 20200227 07:34:15
265-1 ON 20200228 03:00:00
265-2 ON 20200228 00:00:00
idk,也许我没理解你的逻辑,但希望这对你有所帮助
df %>%
mutate(date = ymd(start_day),
time = format(strptime(hhmmss, format = "%H%M%S"), format = "%H:%M:%S"),
time = hms::as.hms(time, format = "%H:%M:%S")) %>%
mutate(new_date = as_datetime(glue::glue('{date} {time}')) - hours(3),
new_date = as_date(new_date)) %>%
filter(code == "ON")
# A tibble: 8 x 8
id code start_day hhmmss end_time date time new_date
<chr> <chr> <dbl> <chr> <dbl> <date> <drtn> <date>
1 7050-1 ON 20200227 000000 2.02e13 2020-02-27 00:00 2020-02-26
2 7050-1 ON 20200227 003545 2.02e13 2020-02-27 00:35 2020-02-26
3 7050-1 ON 20200227 073415 2.02e13 2020-02-27 07:34 2020-02-27
4 7050-1 ON 20200228 025936 2.02e13 2020-02-28 02:59 2020-02-27
5 265-1 ON 20200227 000000 2.02e13 2020-02-27 00:00 2020-02-26
6 265-1 ON 20200228 023536 2.02e13 2020-02-28 02:35 2020-02-27
7 265-1 ON 20200228 03000 2.02e13 2020-02-28 03:00 2020-02-28
8 265-2 ON 20200229 000000 2.02e13 2020-02-29 00:00 2020-02-28