每次事件出现在 R 中的数据帧上时,如何计算 pre 和 post 值
How to calculate the pre and post values each time an event appears on a dataframe in R
我这里有一个像这个例子的数据库,我需要计算事件发生前 10 分钟和事件发生后 10 分钟的平均值、总和和标准差。数据框中的数字只是为了说明。
df = data.frame(seq(ISOdatetime(2020,5,4,0,0,0), ISOdatetime(2020,5,4,0,19,0), by=(60*1)))
colnames(df) = "date"
df$results = c(18, 25, 35, 96, 100, 50, 48, 67, 36, 98, 46, 75, 67, 36, 98, 46, 75, 67, 36, 98)
df$events = c(NA, NA, NA, NA, NA, NA, "event A", NA, NA, "event B", NA, "event C", NA, NA, NA, NA, NA, NA, "event D", NA)
我想在一个新的数据框中加入这些计算,包含事件名称和日期,以及前后时刻的平均值、总和和标准差值。感谢您的帮助。
编辑:
我想将事件(即前 10 分钟)的前值(总和、平均值和标准差)和事件的后续值(即,如果事件发生了 10:01,我想提前 9 分钟乘坐“01”+。所以,我需要将“avg.pre”、“sd.pre”、“[=22=”中的值分开]”和“avg.pos”、“sd.pos”和“sum.pos”
new.df = data.frame(c("event A", "event B", "event C", "event D")); colnames(new.df) = "events"
new.df$date.pre = c("2020-05-04 00:06:00", "2020-05-04 00:09:00", " 2020-05-04 00:11:00", " 2020-05-04 00:18:00")
new.df$avg.pre = c(53.14, 52.77...)
new.df$sd.pre = c(32.72, 31.18,...)
new.df$sum.pre = c(372, 475, 68,...)
new.df$avg.pos = c(61.71, 64.4, 25,...)
new.df$sd.pos = c(23.26, 23.06, 46,...)
new.df$sum.pos = c(617, 644, 68,...)
您可以在每个事件前后尝试此方法过滤数据 10 分钟:
library(dplyr)
purrr::map_df(unique(na.omit(df$evets)), function(x) {
i <- df$date[df$evets == x & !is.na(df$evets)]
bind_cols(data.frame(events = x,
date = i), df %>%
filter(between(date, i - 10*60, i)) %>%
summarise(pre.avg = mean(results, na.rm = TRUE),
pre.sd = sd(results, na.rm = TRUE),
pre.sum = sum(results, na.rm = TRUE)),
df %>%
filter(between(date, i, i + 10*60)) %>%
summarise(post.avg = mean(results, na.rm = TRUE),
post.sd = sd(results, na.rm = TRUE),
post.sum = sum(results, na.rm = TRUE)))
})
# events date pre.avg pre.sd pre.sum post.avg post.sd post.sum
#1 event A 2020-05-04 00:06:00 53.14286 32.72323 372 62.90909 22.42969 692
#2 event B 2020-05-04 00:09:00 57.30000 31.18066 573 67.45455 24.10960 742
#3 event C 2020-05-04 00:11:00 61.45455 27.30701 676 66.44444 23.47931 598
#4 event D 2020-05-04 00:18:00 61.81818 23.49391 680 67.00000 43.84062 134
我这里有一个像这个例子的数据库,我需要计算事件发生前 10 分钟和事件发生后 10 分钟的平均值、总和和标准差。数据框中的数字只是为了说明。
df = data.frame(seq(ISOdatetime(2020,5,4,0,0,0), ISOdatetime(2020,5,4,0,19,0), by=(60*1)))
colnames(df) = "date"
df$results = c(18, 25, 35, 96, 100, 50, 48, 67, 36, 98, 46, 75, 67, 36, 98, 46, 75, 67, 36, 98)
df$events = c(NA, NA, NA, NA, NA, NA, "event A", NA, NA, "event B", NA, "event C", NA, NA, NA, NA, NA, NA, "event D", NA)
我想在一个新的数据框中加入这些计算,包含事件名称和日期,以及前后时刻的平均值、总和和标准差值。感谢您的帮助。
编辑: 我想将事件(即前 10 分钟)的前值(总和、平均值和标准差)和事件的后续值(即,如果事件发生了 10:01,我想提前 9 分钟乘坐“01”+。所以,我需要将“avg.pre”、“sd.pre”、“[=22=”中的值分开]”和“avg.pos”、“sd.pos”和“sum.pos”
new.df = data.frame(c("event A", "event B", "event C", "event D")); colnames(new.df) = "events"
new.df$date.pre = c("2020-05-04 00:06:00", "2020-05-04 00:09:00", " 2020-05-04 00:11:00", " 2020-05-04 00:18:00")
new.df$avg.pre = c(53.14, 52.77...)
new.df$sd.pre = c(32.72, 31.18,...)
new.df$sum.pre = c(372, 475, 68,...)
new.df$avg.pos = c(61.71, 64.4, 25,...)
new.df$sd.pos = c(23.26, 23.06, 46,...)
new.df$sum.pos = c(617, 644, 68,...)
您可以在每个事件前后尝试此方法过滤数据 10 分钟:
library(dplyr)
purrr::map_df(unique(na.omit(df$evets)), function(x) {
i <- df$date[df$evets == x & !is.na(df$evets)]
bind_cols(data.frame(events = x,
date = i), df %>%
filter(between(date, i - 10*60, i)) %>%
summarise(pre.avg = mean(results, na.rm = TRUE),
pre.sd = sd(results, na.rm = TRUE),
pre.sum = sum(results, na.rm = TRUE)),
df %>%
filter(between(date, i, i + 10*60)) %>%
summarise(post.avg = mean(results, na.rm = TRUE),
post.sd = sd(results, na.rm = TRUE),
post.sum = sum(results, na.rm = TRUE)))
})
# events date pre.avg pre.sd pre.sum post.avg post.sd post.sum
#1 event A 2020-05-04 00:06:00 53.14286 32.72323 372 62.90909 22.42969 692
#2 event B 2020-05-04 00:09:00 57.30000 31.18066 573 67.45455 24.10960 742
#3 event C 2020-05-04 00:11:00 61.45455 27.30701 676 66.44444 23.47931 598
#4 event D 2020-05-04 00:18:00 61.81818 23.49391 680 67.00000 43.84062 134