如何按时间段总结小标题?

How to summarize a tibble by time periods?

编辑:我更新了输入,添加了预期的输出。

我有一个 table,其中包含 time-dates 和一个分组标准 NEL_Hotspots。 我试图根据这些规则总结 table:

NEL_Hotspots 分组的观察结果,然后是同一天(24 小时)内的所有观察结果,并且 Wind_direc 在 +- 10 范围内。

这是更大的 table:

的一小部分
structure(list(Serial_number = c(10, 8, 9, 20, 21, 23, 3, 5, 
7, 11, 13, 20, 24), Date_time = c("3/31/05 1:57", "3/31/05 4:12", 
"3/31/05 18:12", "4/1/05 2:12", "4/1/05 3:12", "4/3/05 16:12", 
"3/28/05 9:57", "3/30/05 13:42", "3/31/05 1:57", "4/10/05 10:57", 
"4/10/05 18:57", "4/10/05 20:13", "4/10/05 21:30"), Wind_direc = c(50, 
60, 70, 60, 70, 70, 60, 140, 50, 270, 300, 310, 290), NEL_Hotspots = c(0, 
0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1), Dust_Intens = c("weak", 
"weak", "weak", "weak", "medium", "weak", "weak", "medium", "weak", 
"weak", "medium", "medium", "high"), Area_km2 = c(290, 241, 225, 
240, 340, 320, 176, 143, 211, 72, 171, 167, 121)), .Names = c("Serial_number", 
"Date_time", "Wind_direc", "NEL_Hotspots", "Dust_Intens", "Area_km2"
), class = c("spec_tbl_df", "tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-13L), spec = structure(list(cols = structure(list(Serial_number = structure(list(), class = c("collector_double", 
"collector")), Date_time = structure(list(), class = c("collector_character", 
"collector")), Wind_direc = structure(list(), class = c("collector_double", 
"collector")), NEL_Hotspots = structure(list(), class = c("collector_double", 
"collector")), Dust_Intens = structure(list(), class = c("collector_character", 
"collector")), Area_km2 = structure(list(), class = c("collector_double", 
"collector"))), .Names = c("Serial_number", "Date_time", "Wind_direc", 
"NEL_Hotspots", "Dust_Intens", "Area_km2")), default = structure(list(), class = c("collector_guess", 
"collector")), skip = 1), .Names = c("cols", "default", "skip"
), class = "col_spec"))

加载数据后,我使用 lubridate 中的 df <- df %>% mutate(full_date = ymd_hms(Date_time)) 创建列 full_date

预期输出为:

structure(list(`First Date_time` = c("3/31/05 1:57", "3/31/05 18:12", 
"4/1/05 2:12", "4/3/05 16:12", "3/28/05 9:57", "3/30/05 13:42", 
"3/31/05 1:57", "4/10/05 10:57", "4/10/05 18:57"), `Last Date_time` = c("3/31/05 4:12", 
"3/31/05 18:12", "4/1/05 3:12", "4/3/05 16:12", "3/28/05 9:57", 
"3/30/05 13:42", "3/31/05 1:57", "4/10/05 10:57", "4/10/05 21:30"
), Wind_direc_avg = c(55, 70, 60, 70, 60, 140, 50, 270, 300), 
    wind_direc_min = c(50, 70, 60, 70, 60, 140, 50, 270, 290), 
    wind_direc_max = c(60, 70, 70, 70, 60, 140, 50, 270, 310), 
    NEL_Hotspots = c(0, 0, 0, 0, 1, 1, 1, 1, 1), Dust_Intens = c("weak,weak", 
    "weak", "weak,medium", "weak", "weak", "medium", "weak", 
    "weak", "medium, medium, high"), Area_km2_avg = c(265.5, 
    225, 290, 320, 176, 143, 211, 72, 153), Area_km2_stdv = c(34.64, 
    0, 70.71, 0, 0, 0, 0, 0, 27.78), events_count = c(2, 1, 2, 
    1, 1, 1, 1, 1, 3), serial_numbers = c("10, 8", "9", "20, 21", 
    "23", "3", "5", "7", "11", "13, 20, 24")), .Names = c("First Date_time", 
"Last Date_time", "Wind_direc_avg", "wind_direc_min", "wind_direc_max", 
"NEL_Hotspots", "Dust_Intens", "Area_km2_avg", "Area_km2_stdv", 
"events_count", "serial_numbers"), class = c("spec_tbl_df", "tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -9L), spec = structure(list(
    cols = structure(list(`First Date_time` = structure(list(), class = c("collector_character", 
    "collector")), `Last Date_time` = structure(list(), class = c("collector_character", 
    "collector")), Wind_direc_avg = structure(list(), class = c("collector_double", 
    "collector")), wind_direc_min = structure(list(), class = c("collector_double", 
    "collector")), wind_direc_max = structure(list(), class = c("collector_double", 
    "collector")), NEL_Hotspots = structure(list(), class = c("collector_double", 
    "collector")), Dust_Intens = structure(list(), class = c("collector_character", 
    "collector")), Area_km2_avg = structure(list(), class = c("collector_double", 
    "collector")), Area_km2_stdv = structure(list(), class = c("collector_double", 
    "collector")), events_count = structure(list(), class = c("collector_double", 
    "collector")), serial_numbers = structure(list(), class = c("collector_character", 
    "collector"))), .Names = c("First Date_time", "Last Date_time", 
    "Wind_direc_avg", "wind_direc_min", "wind_direc_max", "NEL_Hotspots", 
    "Dust_Intens", "Area_km2_avg", "Area_km2_stdv", "events_count", 
    "serial_numbers")), default = structure(list(), class = c("collector_guess", 
    "collector")), skip = 1), .Names = c("cols", "default", "skip"
), class = "col_spec"))

如有任何帮助,我将不胜感激!

尝试根据您的情况创建群组。当 -

时创建一个新组
  • 日期变更
  • 风值每+10变化

为每个组计算您想要在 summarise

中的所有统计数据
library(dplyr)

df %>%
  mutate(Date_time = lubridate::mdy_hm(Date_time), 
         date = as.Date(Date_time)) %>%
  group_by(date) %>%
  group_by(val = lag(ceiling((Wind_direc - first(Wind_direc))/10), 
                     default = 0), .add = TRUE) %>%
  summarise(first_date_time = first(Date_time), 
            last_date_time = last(Date_time), 
            Wind_direc_avg = mean(Wind_direc), 
            Wind_direc_min = min(Wind_direc), 
            Wind_direc_max = max(Wind_direc), 
            NEL_Hotspots = sum(NEL_Hotspots), 
            Dust_Intens = toString(Dust_Intens), 
            Area_km2_avg = mean(Area_km2))