组合指示标记开始和结束的 POSIXct 列
Combine POSIXct columns indicating start and end of a marker
我有我的主要数据 df
,还有一个单独的数据框 df_marker
,表示 start 和 end 的标记。我的 df
数据框是一秒一秒的,并且是 POSIXct
格式。我想将 df_marker
加入 df
并能够在 start 和 end 内的每一秒显示标记.
代表:
library(dplyr)
df <- tibble(
date = as.POSIXct(c("2020-11-17 12:00:00", "2020-11-17 12:00:01", "2020-11-17 12:00:02", "2020-11-17 12:00:03", "2020-11-17 12:00:04", "2020-11-17 12:00:05"))
)
df
#> # A tibble: 6 x 1
#> date
#> <dttm>
#> 1 2020-11-17 12:00:00
#> 2 2020-11-17 12:00:01
#> 3 2020-11-17 12:00:02
#> 4 2020-11-17 12:00:03
#> 5 2020-11-17 12:00:04
#> 6 2020-11-17 12:00:05
df_marker <- tibble(
start = as.POSIXct("2020-11-17 12:00:02"),
end = as.POSIXct("2020-11-17 12:00:05"),
marker = "marker_1"
)
df_marker
#> # A tibble: 1 x 3
#> start end marker
#> <dttm> <dttm> <chr>
#> 1 2020-11-17 12:00:02 2020-11-17 12:00:05 marker_1
这是我的预期输出:
#> # A tibble: 6 x 2
#> date marker
#> <dttm> <chr>
#> 1 2020-11-17 12:00:00 <NA>
#> 2 2020-11-17 12:00:01 <NA>
#> 3 2020-11-17 12:00:02 marker_1
#> 4 2020-11-17 12:00:03 marker_1
#> 5 2020-11-17 12:00:04 marker_1
#> 6 2020-11-17 12:00:05 marker_1
有什么想法吗?
这个有用吗:
library(dplyr)
library(tidyr)
library(purrr)
df_marker %>% mutate(date = map2(start, end, `:`)) %>% unnest(date) %>%
mutate(date = as.POSIXct(date, origin = '1970-01-01')) %>% select(3,4) %>%
right_join(df) %>% arrange(date) %>% select(2,1)
Joining, by = "date"
# A tibble: 6 x 2
date marker
<dttm> <chr>
1 2020-11-17 12:00:00 NA
2 2020-11-17 12:00:01 NA
3 2020-11-17 12:00:02 marker_1
4 2020-11-17 12:00:03 marker_1
5 2020-11-17 12:00:04 marker_1
6 2020-11-17 12:00:05 marker_1
一分钟一分钟:
library(lubridate)
df
# A tibble: 6 x 1
date
<dttm>
1 2020-11-17 12:00:00
2 2020-11-17 12:01:01
3 2020-11-17 12:02:02
4 2020-11-17 12:03:03
5 2020-11-17 12:04:04
6 2020-11-17 12:05:05
df_marker
# A tibble: 1 x 3
start end marker
<dttm> <dttm> <chr>
1 2020-11-17 12:01:02 2020-11-17 12:03:05 marker_1
df_marker %>% mutate(across(start:end, ~ round_date(., unit = '1 minutes'))) %>%
mutate(date = map2(start, end, `:`)) %>% unnest(date) %>%
mutate(date = as.POSIXct(date, origin = '1970-01-01')) %>% select(3,4) %>%
right_join(df %>% mutate(date = round_date(date, unit = '1 minutes'))) %>%
arrange(date) %>% select(2,1)
Joining, by = "date"
# A tibble: 6 x 2
date marker
<dttm> <chr>
1 2020-11-17 12:00:00 NA
2 2020-11-17 12:01:00 marker_1
3 2020-11-17 12:02:00 marker_1
4 2020-11-17 12:03:00 marker_1
5 2020-11-17 12:04:00 NA
6 2020-11-17 12:05:00 NA
作为 Karthik S 的出色答案的替代方案,您还可以尝试 interval()
for df_marker
和稍后的 %within%
来检查 df
中的任何日期在此时间间隔内。
这两个函数来自lubridate
包。
library(lubridate)
df_marker <- df_marker %>%
mutate(interval = interval(start,end))
start end marker interval
<dttm> <dttm> <chr> <interval>
1 2020-11-17 12:01:02 2020-11-17 12:03:05 marker_1 2020-11-17 12:01:00 CET--2020-11-17 12:03:00 CET
df <- df %>%
mutate(marker = ifelse(date %within% df_marker$interval,
df_marker$marker, NA))
> df
# A tibble: 7 x 2
date marker
<dttm> <chr>
1 2020-11-17 12:00:00 NA
2 2020-11-17 12:01:01 NA
3 2020-11-17 12:02:02 marker_1
4 2020-11-17 12:03:03 marker_1
5 2020-11-17 12:04:04 NA
6 2020-11-17 12:05:05 NA
7 2020-11-17 12:06:06 NA
为了一分钟一分钟,从两个表中抓取 round_date()
间隔和日期列。此功能也来自 lubridate
包。
df_marker <- df_marker %>%
mutate(interval = interval(round_date(start,"minute"), round_date(end,"minute")))
df <- df %>%
mutate(marker = ifelse(round_date(date, "minute") %within% df_marker$interval,
df_marker$marker, NA))
> df
# A tibble: 7 x 2
date marker
<dttm> <chr>
1 2020-11-17 12:00:00 NA
2 2020-11-17 12:01:01 marker_1
3 2020-11-17 12:02:02 marker_1
4 2020-11-17 12:03:03 marker_1
5 2020-11-17 12:04:04 NA
6 2020-11-17 12:05:05 NA
我有我的主要数据 df
,还有一个单独的数据框 df_marker
,表示 start 和 end 的标记。我的 df
数据框是一秒一秒的,并且是 POSIXct
格式。我想将 df_marker
加入 df
并能够在 start 和 end 内的每一秒显示标记.
代表:
library(dplyr)
df <- tibble(
date = as.POSIXct(c("2020-11-17 12:00:00", "2020-11-17 12:00:01", "2020-11-17 12:00:02", "2020-11-17 12:00:03", "2020-11-17 12:00:04", "2020-11-17 12:00:05"))
)
df
#> # A tibble: 6 x 1
#> date
#> <dttm>
#> 1 2020-11-17 12:00:00
#> 2 2020-11-17 12:00:01
#> 3 2020-11-17 12:00:02
#> 4 2020-11-17 12:00:03
#> 5 2020-11-17 12:00:04
#> 6 2020-11-17 12:00:05
df_marker <- tibble(
start = as.POSIXct("2020-11-17 12:00:02"),
end = as.POSIXct("2020-11-17 12:00:05"),
marker = "marker_1"
)
df_marker
#> # A tibble: 1 x 3
#> start end marker
#> <dttm> <dttm> <chr>
#> 1 2020-11-17 12:00:02 2020-11-17 12:00:05 marker_1
这是我的预期输出:
#> # A tibble: 6 x 2
#> date marker
#> <dttm> <chr>
#> 1 2020-11-17 12:00:00 <NA>
#> 2 2020-11-17 12:00:01 <NA>
#> 3 2020-11-17 12:00:02 marker_1
#> 4 2020-11-17 12:00:03 marker_1
#> 5 2020-11-17 12:00:04 marker_1
#> 6 2020-11-17 12:00:05 marker_1
有什么想法吗?
这个有用吗:
library(dplyr)
library(tidyr)
library(purrr)
df_marker %>% mutate(date = map2(start, end, `:`)) %>% unnest(date) %>%
mutate(date = as.POSIXct(date, origin = '1970-01-01')) %>% select(3,4) %>%
right_join(df) %>% arrange(date) %>% select(2,1)
Joining, by = "date"
# A tibble: 6 x 2
date marker
<dttm> <chr>
1 2020-11-17 12:00:00 NA
2 2020-11-17 12:00:01 NA
3 2020-11-17 12:00:02 marker_1
4 2020-11-17 12:00:03 marker_1
5 2020-11-17 12:00:04 marker_1
6 2020-11-17 12:00:05 marker_1
一分钟一分钟:
library(lubridate)
df
# A tibble: 6 x 1
date
<dttm>
1 2020-11-17 12:00:00
2 2020-11-17 12:01:01
3 2020-11-17 12:02:02
4 2020-11-17 12:03:03
5 2020-11-17 12:04:04
6 2020-11-17 12:05:05
df_marker
# A tibble: 1 x 3
start end marker
<dttm> <dttm> <chr>
1 2020-11-17 12:01:02 2020-11-17 12:03:05 marker_1
df_marker %>% mutate(across(start:end, ~ round_date(., unit = '1 minutes'))) %>%
mutate(date = map2(start, end, `:`)) %>% unnest(date) %>%
mutate(date = as.POSIXct(date, origin = '1970-01-01')) %>% select(3,4) %>%
right_join(df %>% mutate(date = round_date(date, unit = '1 minutes'))) %>%
arrange(date) %>% select(2,1)
Joining, by = "date"
# A tibble: 6 x 2
date marker
<dttm> <chr>
1 2020-11-17 12:00:00 NA
2 2020-11-17 12:01:00 marker_1
3 2020-11-17 12:02:00 marker_1
4 2020-11-17 12:03:00 marker_1
5 2020-11-17 12:04:00 NA
6 2020-11-17 12:05:00 NA
作为 Karthik S 的出色答案的替代方案,您还可以尝试 interval()
for df_marker
和稍后的 %within%
来检查 df
中的任何日期在此时间间隔内。
这两个函数来自lubridate
包。
library(lubridate)
df_marker <- df_marker %>%
mutate(interval = interval(start,end))
start end marker interval
<dttm> <dttm> <chr> <interval>
1 2020-11-17 12:01:02 2020-11-17 12:03:05 marker_1 2020-11-17 12:01:00 CET--2020-11-17 12:03:00 CET
df <- df %>%
mutate(marker = ifelse(date %within% df_marker$interval,
df_marker$marker, NA))
> df
# A tibble: 7 x 2
date marker
<dttm> <chr>
1 2020-11-17 12:00:00 NA
2 2020-11-17 12:01:01 NA
3 2020-11-17 12:02:02 marker_1
4 2020-11-17 12:03:03 marker_1
5 2020-11-17 12:04:04 NA
6 2020-11-17 12:05:05 NA
7 2020-11-17 12:06:06 NA
为了一分钟一分钟,从两个表中抓取 round_date()
间隔和日期列。此功能也来自 lubridate
包。
df_marker <- df_marker %>%
mutate(interval = interval(round_date(start,"minute"), round_date(end,"minute")))
df <- df %>%
mutate(marker = ifelse(round_date(date, "minute") %within% df_marker$interval,
df_marker$marker, NA))
> df
# A tibble: 7 x 2
date marker
<dttm> <chr>
1 2020-11-17 12:00:00 NA
2 2020-11-17 12:01:01 marker_1
3 2020-11-17 12:02:02 marker_1
4 2020-11-17 12:03:03 marker_1
5 2020-11-17 12:04:04 NA
6 2020-11-17 12:05:05 NA