我如何自动计算 10 分钟时间步长中提供的值以 1 小时时间步长中的平均值？

Question

我的数据框包含 4 年内 10 分钟时间步长的数据。我的其他数据以 1 小时的时间步长提供。所以我需要将 10 分钟的数据转换为 1 小时的平均值，以便将它与我的其他数据结合起来。

这是我数据的前 25 行的输入：

    structure(list(...1 = structure(c(1420070400, 1420071000, 1420071600, 
1420072200, 1420072800, 1420073400, 1420074000, 1420074600, 1420075200, 
1420075800, 1420076400, 1420077000, 1420077600, 1420078200, 1420078800, 
1420079400, 1420080000, 1420080600, 1420081200, 1420081800, 1420082400, 
1420083000, 1420083600, 1420084200, 1420084800), tzone = "UTC", class = c("POSIXct", 
"POSIXt")), Dd1_1...2 = c(28.43869, 28.52743, 28.52743, 28.52743, 
28.57417, 28.57417, 28.57417, 28.67358, 28.67358, 28.67358, 28.62449, 
28.62449, 28.62449, 28.74658, 28.74658, 28.74658, 29.01779, 29.01779, 
29.01779, 28.84344, 28.84344, 28.84344, 28.7406, 28.7406, 28.7406
), Dd1_3...3 = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), 
    Dd1_5...4 = c(30.58685, 31.82736, 31.82736, 31.82736, 29.77018, 
    29.77018, 29.77018, 29.91568, 29.91568, 29.91568, 29.69438, 
    29.69438, 29.69438, 30.45421, 30.45421, 30.45421, 30.06922, 
    30.06922, 30.06922, 30.15059, 30.15059, 30.15059, 30.41779, 
    30.41779, 30.41779), Dd1_1...5 = c(28.24055, 28.273, 28.273, 
    28.273, 28.26098, 28.26098, 28.26098, 28.29342, 28.29342, 
    28.29342, 28.24657, 28.24657, 28.24657, 28.34867, 28.34867, 
    28.34867, 28.30063, 28.30063, 28.30063, 28.17804, 28.17804, 
    28.17804, 28.30423, 28.30423, 28.30423), Dd1_3...6 = c(25.25317, 
    25.53266, 25.53266, 25.53266, 25.50203, 25.50203, 25.50203, 
    25.49713, 25.49713, 25.49713, 25.30346, 25.30346, 25.30346, 
    25.36477, 25.36477, 25.36477, 25.46037, 25.46037, 25.46037, 
    25.26299, 25.26299, 25.26299, 25.37948, 25.37948, 25.37948
    ), Dd1_5...7 = c(27.75881, 27.92162, 27.92162, 27.92162, 
    28.65922, 28.65922, 28.65922, 26.65354, 26.65354, 26.65354, 
    27.10426, 27.10426, 27.10426, 28.48427, 28.48427, 28.48427, 
    26.80799, 26.80799, 26.80799, 27.33572, 27.33572, 27.33572, 
    27.20004, 27.20004, 27.20004), Dd1_1...8 = c(28.23214, 28.81236, 
    28.81236, 28.81236, 28.81834, 28.81834, 28.81834, 28.29463, 
    28.29463, 28.29463, 28.35347, 28.35347, 28.35347, 28.19247, 
    28.19247, 28.19247, 28.30664, 28.30664, 28.30664, 28.24776, 
    28.24776, 28.24776, 28.2694, 28.2694, 28.2694), Dd1_3...9 = c(29.27281, 
    29.31683, 29.31683, 29.31683, 29.75479, 29.75479, 29.75479, 
    29.69912, 29.69912, 29.69912, 29.97595, 29.97595, 29.97595, 
    29.19542, 29.19542, 29.19542, 29.19661, 29.19661, 29.19661, 
    29.90859, 29.90859, 29.90859, 29.57938, 29.57938, 29.57938
    ), Dd1_5...10 = c(28.02402, 28.33666, 28.33666, 28.33666, 
    28.48547, 28.48547, 28.48547, 27.7383, 27.7383, 27.7383, 
    NA, NA, NA, 27.69604, 27.69604, 27.69604, 28.44109, 28.44109, 
    28.44109, 28.77409, 28.77409, 28.77409, 27.93246, 27.93246, 
    27.93246), Dd1_1...23 = c(-11.97827, -11.99765, -11.96991, 
    -12.04475, -12.07237, -12.12492, -12.1001, -12.15241, -12.1885, 
    -12.19432, -12.25232, -12.31324, -12.28841, -12.34945, -12.38554, 
    -12.36858, -12.40782, -12.44645, -12.46292, -12.50446, -12.54636, 
    -12.54369, -12.58269, -12.62713, -12.61042), Dd1_3...24 = c(-9.715732, 
    -9.693569, -9.621514, -9.632534, -9.57162, -9.563144, -9.48576, 
    -9.479946, -9.435623, -9.363811, -9.3913, -9.360904, -9.29454, 
    -9.277949, -9.261358, -9.183611, -9.18676, -9.150551, -9.08673, 
    -9.095086, -9.056576, -8.96793, -8.888002, -8.863055, -8.74692
    ), Dd1_5...25 = c(-9.247674, -9.076073, -8.876861, -8.791121, 
    -8.69424, -8.583553, -8.395483, -8.317978, -8.232359, -8.171687, 
    -8.105203, -8.091396, -7.887099, -7.862273, -7.765512, -7.594275, 
    -7.555886, -7.511563, -7.442535, -7.337661, -7.238479, -7.064819, 
    -6.924341, -6.858099, -6.615532)), row.names = c(NA, -25L
), class = c("tbl_df", "tbl", "data.frame"))

我希望有一个简单的解决方案来转换数据。

Answer 1

修改后的答案：

df["average"] = df["values"].rolling(6).mean()

现在，平均列将具有您想要的值，并且可以被挑选到新的数据框中。

旧答案：如果第一行是第 0 小时、第 0 分钟和第 0 秒，那么第 6 条记录将是第 1 小时、第 0 分钟和第 0 秒。第 12 条记录将是第 2 小时 0 分 0 秒

所以我们正在考虑挑选第 0、6、12 条记录等等...

如果您认为这有帮助，那么我的代码可能是这样的

dfHour = df[df.index % 6 == 0]

以上将为您提供那些索引是 6 的倍数的行。

Answer 2

整洁宇宙

library(dplyr)
dat %>%
  group_by(`...1` = trunc(`...1`, "hour")) %>%
  summarize(across(everything(), mean), .groups = "drop")
# # A tibble: 5 x 13
#   ...1                Dd1_1...2 Dd1_3...3 Dd1_5...4 Dd1_1...5 Dd1_3...6 Dd1_5...7 Dd1_1...8 Dd1_3...9 Dd1_5...10 Dd1_1...23 Dd1_3...24 Dd1_5...25
#   <dttm>                  <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>      <dbl>      <dbl>      <dbl>      <dbl>
# 1 2015-01-01 00:00:00      28.5        NA      30.9      28.3      25.5      28.1      28.7      29.5       28.3      -12.0      -9.63      -8.88
# 2 2015-01-01 01:00:00      28.6        NA      29.8      28.3      25.4      27.1      28.4      29.8       NA        -12.2      -9.42      -8.22
# 3 2015-01-01 02:00:00      28.8        NA      30.2      28.3      25.4      27.7      28.3      29.3       NA        -12.4      -9.23      -7.70
# 4 2015-01-01 03:00:00      28.8        NA      30.2      28.2      25.3      27.2      28.3      29.7       28.4      -12.5      -8.99      -7.14
# 5 2015-01-01 04:00:00      28.7        NA      30.4      28.3      25.4      27.2      28.3      29.6       27.9      -12.6      -8.75      -6.62

如果您需要对 NA 的 Dd1_t...10 值进行平均并删除 NA，那么只需

dat %>%
  group_by(`...1` = trunc(`...1`, "hour")) %>%
  summarize(across(everything(), ~ mean(., na.rm = TRUE)), groups = "drop")
# # A tibble: 5 x 14
#   ...1                Dd1_1...2 Dd1_3...3 Dd1_5...4 Dd1_1...5 Dd1_3...6 Dd1_5...7 Dd1_1...8 Dd1_3...9 Dd1_5...10 Dd1_1...23 Dd1_3...24 Dd1_5...25 groups
#   <dttm>                  <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>      <dbl>      <dbl>      <dbl>      <dbl> <chr> 
# 1 2015-01-01 00:00:00      28.5       NaN      30.9      28.3      25.5      28.1      28.7      29.5       28.3      -12.0      -9.63      -8.88 drop  
# 2 2015-01-01 01:00:00      28.6       NaN      29.8      28.3      25.4      27.1      28.4      29.8       27.9      -12.2      -9.42      -8.22 drop  
# 3 2015-01-01 02:00:00      28.8       NaN      30.2      28.3      25.4      27.7      28.3      29.3       28.0      -12.4      -9.23      -7.70 drop  
# 4 2015-01-01 03:00:00      28.8       NaN      30.2      28.2      25.3      27.2      28.3      29.7       28.4      -12.5      -8.99      -7.14 drop  
# 5 2015-01-01 04:00:00      28.7       NaN      30.4      28.3      25.4      27.2      28.3      29.6       27.9      -12.6      -8.75      -6.62 drop

虽然现在你有一个 NaN 问题:-)

data.table

library(data.table)
DT <- as.data.table(dat)
DT[, lapply(.SD, mean), by = list(`...1`=as.POSIXct(trunc(`...1`, "hour")))]

同样，对于 NAs:

DT[, lapply(.SD, mean, na.rm = TRUE), by = list(`...1`=as.POSIXct(trunc(`...1`, "hour")))]

Answer 3

不清楚你的 table 叫什么。所以我称它为dat.

试试这个：

library(dplyr)
library(lubridate)
dat %>% 
  mutate(hour = floor_date(x = ...1, unit = 'hour')) %>% 
  group_by(hour) %>% 
  select(-...1) %>% 
  summarise_all(mean)

Mutate 使用 lubridate 包中的 floor_date 提取日期的小时截断日期并将其放入名为 'hour' 的列中。然后我们按那个小时列分组，select 出（带负号）日期字段。最后，我们通过 mean

总结所有其他列

我如何自动计算 10 分钟时间步长中提供的值以 1 小时时间步长中的平均值？

How can i automatically calculate values provided in 10 min time steps to average values in 1 h time steps?

average

r

time-series

lubridate

data.table

整洁宇宙

data.table