过滤多个时间范围的多个特定列
Filtering multiple, specific columns for multiple time ranges
我正在 QCing 数据,有几个 tanks/data_types 有需要删除的错误数据,跨越多个时间范围。包含错误数据的 data_types、坦克和时间范围已在单独的数据框中报告,其中的一个片段包含在 QC_table:
中
structure(list(trial = c(1L, 1L, 1L, 1L, 2L, 2L), data_type = c("Temp",
"pH", "pH", "pH", "Temp", "Temp"), tank = c("29", "40", "40",
"40", "13", "29"), date_time_start = c("2021-03-31 8:30", "2021-03-31 7:50",
"2021-03-31 10:25", "2021-03-31 17:05", "2021-04-07 10:25", "2021-04-08 10:30"
), date_time_end = c("2021-03-31 18:00", "2021-03-31 8:15", "2021-03-31 10:40",
"2021-03-31 17:30", "2021-04-07 17:20", "2021-04-10 18:25"),
to.be.removed = c("yes ", "yes ", "yes ", "yes ", "yes ",
"yes "), reason = c("calibration error", "faulty probe",
"faulty probe", "faulty probe", "calibration error", "faulty probe"
), data_type_tank = c("WalchemTempTank29", "pH_Tank40", "pH_Tank40",
"pH_Tank40", "WalchemTempTank13", "WalchemTempTank29")), row.names = c(NA,
-6L), class = "data.frame")
还有额外的试验和坦克。我的方法是创建一个包含所有需要删除的数据的新数据框(基于 QC table 中的 data_type_tank 和 date_time_start/end 列),然后删除该数据框来自原始数据框。我不知道这是否最合乎逻辑,但我不知道如何从原始数据框中删除数据。
我使用以下方法构建了一个新数据框:
new_dataframe <- dataframe %>%
select(c(Measurement.time, Trial, contains(urchin_temp_pH_QC$data_type_tank))) %>% head(10)
structure(list(Measurement.time = c("2021-03-30 11:00", "2021-03-30 11:05",
"2021-03-30 11:10", "2021-03-30 11:15", "2021-03-30 11:20", "2021-03-30 11:25",
"2021-03-30 11:30", "2021-03-30 11:35", "2021-03-30 11:40", "2021-03-30 11:45"
), Trial = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), WalchemTempTank29_Avg = c("18.8",
"18.67", "18.58", "18.48", "18.38", "18.29", "18.2", "18.12",
"18.03", "18"), WalchemTempTank29_Std = c("0.037", "0.025", "0.032",
"0.029", "0.017", "0.018", "0.026", "0.025", "0.024", "0.023"
), pH_Tank40_Avg = c("7.859", "7.863", "7.868", "7.866", "7.863",
"7.864", "7.865", "7.867", "7.869", "7.87"), pH_Tank40_Std = c("0.007",
"0.006", "0.002", "0.001", "0.002", "0.001", "0.002", "0.001",
"0.002", "0.004"), WalchemTempTank13_Avg = c("10.26", "10.22",
"10.21", "10.24", "10.27", "10.3", "10.32", "10.34", "10.37",
"10.4"), WalchemTempTank13_Std = c("0.01", "0.013", "0.005",
"0.01", "0.007", "0.006", "0.006", "0.008", "0.008", "0.005")), row.names = 4:13, class = "data.frame")
但是现在,基于 QC table 有一些行 (date/time) 我需要删除或子集化,但仅限于特定列(即那些包含data_type_tank)。我想我可以手动执行此操作,使用下面的代码,然后在需要的地方绑定或加入 columns/rows,但这似乎是一个艰巨的过程。
subset_row_1_QC_table <- dataframe %>% select(Measurement.time, contains("WalchemTempTank29")) %>%
subset(Measurement.time >= as.POSIXct("2021-03-31 08:30") & Measurement.time <= as.POSIXct("2021-03-31 18:00"))
有没有什么方法可以自动执行此过程,根据不同数据框中的列删除特定于列的行或对其进行子集化?我认为理想情况下,我的数据框看起来像 eg:
的扩展版本
Measurement.time
Trial
WaterTempTank29_Avg
WaterTempTank29_Std
pH_Tank40_Avg
pH_Tank40_Std
2021-03-31 08:30
1
18.8
0.037
NA
NA
[all 5-min intervals]
NA
NA
2021-03-31 18:00
1
18.36
0.023
NA
NA
2021-03-31 07:50
1
NA
NA
7.854
0.001
[all 5-min intervals]
1
NA
NA
7.88
0.001
2021-03-31 08:15
1
NA
NA
7.84
0.001
2021-03-31 10:25
1
NA
NA
7.881
0.001
[all 5-min intervals]
1
NA
NA
7.804
0.001
2021-03-31 10:40
1
NA
NA
7.881
0
如有任何帮助,我们将不胜感激!我希望我已经能够正确解释我的问题,Whosebug 的新用户。
干杯,
编辑:感谢 r2evans 和 GuedesBF——希望现在是 better/fixed。
我会这样做。
- 为变量
CF
和 df
准备数据
CF = structure(list(trial = c(1L, 1L, 1L), data_type = c("Temp", "pH",
"pH"), tank = c("29", "40", "40"), date_time_start = structure(c(1617204600,
1617202200, 1617211500), tzone = "", class = c("POSIXct", "POSIXt"
)), date_time_end = structure(c(1617238800, 1617203700, 1617212400
), tzone = "", class = c("POSIXct", "POSIXt")), to.be.removed = c("yes ",
"yes ", "yes "), reason = c("calibration error", "faulty probe",
"faulty probe"), data_type_tank = c("WalchemTempTank29", "pH_Tank40",
"pH_Tank40")), row.names = c(NA, -3L), groups = structure(list(
data_type = c("pH", "Temp"), tank = c("40", "29"), .rows = structure(list(
2:3, 1L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = 1:2, class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
df = structure(list(Measurement.time = c("2021-03-30 11:00", "2021-03-30 11:05",
"2021-03-30 11:10", "2021-03-30 11:15", "2021-03-31 18:30", "2021-03-30 11:25",
"2021-03-30 11:30", "2021-03-30 11:35", "2021-03-31 17:00", "2021-03-31 19:28"
), Trial = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), WalchemTempTank29_Avg = c("18.8",
"18.67", "18.58", "18.48", "18.38", "18.29", "18.2", "18.12",
"18.03", "18"), WalchemTempTank29_Std = c("0.037", "0.025", "0.032",
"0.029", "0.017", "0.018", "0.026", "0.025", "0.024", "0.023"
), pH_Tank40_Avg = c("7.859", "7.863", "7.868", "7.866", "7.863",
"7.864", "7.865", "7.867", "7.869", "7.87"), pH_Tank40_Std = c("0.007",
"0.006", "0.002", "0.001", "0.002", "0.001", "0.002", "0.001",
"0.002", "0.004"), WalchemTempTank13_Avg = c("10.26", "10.22",
"10.21", "10.24", "10.27", "10.3", "10.32", "10.34", "10.37",
"10.4"), WalchemTempTank13_Std = c("0.01", "0.013", "0.005",
"0.01", "0.007", "0.006", "0.006", "0.008", "0.008", "0.005")), row.names = 4:13, class = "data.frame")
请注意,我更改了变量 Measurement.time
中的一些数据以将事件包含在 CF
table 中。
- 分别准备 tables
CFTemp
和 CFpH
CFTemp = CF %>% ungroup() %>%
filter(data_type == "Temp") %>%
mutate(Temp = "Temp",
Temp_start = date_time_start,
Temp_end = date_time_end) %>%
select(Temp, Temp_start, Temp_end)
CFpH = CF %>% ungroup() %>%
filter(data_type == "pH") %>%
mutate(pH = "pH",
pH_start = date_time_start,
pH_end = date_time_end) %>%
select(pH, pH_start, pH_end)
- 准备两个函数,返回具有二进制值的向量,并且应删除其索引数据。
fTemp = function(df) CFTemp %>% left_join(df, by="Temp") %>%
mutate(TempRm = Measurement.time>=Temp_start & Measurement.time<=Temp_end) %>%
group_by(ID) %>%
summarise(TempRm = any(TempRm)) %>%
pull(TempRm)
fpH = function(df) CFpH %>% left_join(df, by="pH") %>%
mutate(pHRm = Measurement.time>=pH_start & Measurement.time<=pH_end) %>%
group_by(ID) %>%
summarise(pHRm = any(pHRm)) %>%
pull(pHRm)
- 转换数据框
df1 = df %>% as_tibble() %>%
mutate(Measurement.time = as.POSIXct(Measurement.time),
ID = 1:nrow(.),
Temp = "Temp",
pH = "pH") %>%
mutate(
TmpRm = fTemp(.),
pHRm = fpH(.)
) %>%
mutate(
WalchemTempTank29_Avg = ifelse(TmpRm, NA, WalchemTempTank29_Avg),
WalchemTempTank29_Std = ifelse(TmpRm, NA, WalchemTempTank29_Std),
WalchemTempTank13_Avg = ifelse(TmpRm, NA, WalchemTempTank13_Avg),
WalchemTempTank13_Std = ifelse(TmpRm, NA, WalchemTempTank13_Std),
pH_Tank40_Avg = ifelse(pHRm, NA, pH_Tank40_Avg),
pH_Tank40_Std = ifelse(pHRm, NA, pH_Tank40_Std),
) %>%
select(Measurement.time:WalchemTempTank13_Std)
df1
输出
# A tibble: 10 x 8
Measurement.time Trial WalchemTempTank29_Avg WalchemTempTank29_Std pH_Tank40_Avg pH_Tank40_Std WalchemTempTank13_Avg WalchemTempTank13_Std
<dttm> <int> <chr> <chr> <chr> <chr> <chr> <chr>
1 2021-03-30 11:00:00 1 18.8 0.037 7.859 0.007 10.26 0.01
2 2021-03-30 11:05:00 1 18.67 0.025 7.863 0.006 10.22 0.013
3 2021-03-30 11:10:00 1 18.58 0.032 7.868 0.002 10.21 0.005
4 2021-03-30 11:15:00 1 18.48 0.029 7.866 0.001 10.24 0.01
5 2021-03-31 18:30:00 1 NA NA 7.863 0.002 NA NA
6 2021-03-30 11:25:00 1 18.29 0.018 7.864 0.001 10.3 0.006
7 2021-03-30 11:30:00 1 18.2 0.026 7.865 0.002 10.32 0.006
8 2021-03-30 11:35:00 1 18.12 0.025 7.867 0.001 10.34 0.008
9 2021-03-31 17:00:00 1 18.03 0.024 NA NA 10.37 0.008
10 2021-03-31 19:28:00 1 NA NA NA NA NA NA
仅此而已。
更新 1
library(tidyverse)
CFTemp = CF %>% ungroup() %>%
filter(data_type == "Temp") %>%
mutate(Temp = "Temp",
Temp_start = date_time_start,
Temp_end = date_time_end) %>%
select(Temp, tank, Temp_start, Temp_end)
CFpH = CF %>% ungroup() %>%
filter(data_type == "pH") %>%
mutate(pH = "pH",
pH_start = date_time_start,
pH_end = date_time_end) %>%
select(pH, pH_start, pH_end)
fTemp = function(df, Tank){
out = CFTemp %>% filter(tank==Tank) %>%
left_join(df, by="Temp") %>%
mutate(TempRm = Measurement.time>=Temp_start & Measurement.time<=Temp_end) %>%
group_by(ID) %>%
summarise(TempRm = any(TempRm)) %>%
pull(TempRm)
if(length(out)==0) FALSE else out
}
fpH = function(df) CFpH %>% left_join(df, by="pH") %>%
mutate(pHRm = Measurement.time>=pH_start & Measurement.time<=pH_end) %>%
group_by(ID) %>%
summarise(pHRm = any(pHRm)) %>%
pull(pHRm)
df1 = df %>% as_tibble() %>% #Step 1
mutate(Measurement.time = as.POSIXct(Measurement.time),
ID = 1:nrow(.),
Temp = "Temp",
pH = "pH") %>%
mutate( #Step 2
TmpRm29 = fTemp(., 29),
TmpRm13 = fTemp(., 13),
pHRm = fpH(.)
) %>%
mutate( #Step 3
WalchemTempTank29_Avg = ifelse(TmpRm29, NA, WalchemTempTank29_Avg),
WalchemTempTank29_Std = ifelse(TmpRm29, NA, WalchemTempTank29_Std),
WalchemTempTank13_Avg = ifelse(TmpRm13, NA, WalchemTempTank13_Avg),
WalchemTempTank13_Std = ifelse(TmpRm13, NA, WalchemTempTank13_Std),
pH_Tank40_Avg = ifelse(pHRm, NA, pH_Tank40_Avg),
pH_Tank40_Std = ifelse(pHRm, NA, pH_Tank40_Std),
) %>%
select(Measurement.time:WalchemTempTank13_Std)
df1
输出
# A tibble: 10 x 8
Measurement.time Trial WalchemTempTank29_Avg WalchemTempTank29_Std pH_Tank40_Avg pH_Tank40_Std WalchemTempTank13_Avg WalchemTempTank13_Std
<dttm> <int> <chr> <chr> <chr> <chr> <chr> <chr>
1 2021-03-30 11:00:00 1 18.8 0.037 7.859 0.007 10.26 0.01
2 2021-03-30 11:05:00 1 18.67 0.025 7.863 0.006 10.22 0.013
3 2021-03-30 11:10:00 1 18.58 0.032 7.868 0.002 10.21 0.005
4 2021-03-30 11:15:00 1 18.48 0.029 7.866 0.001 10.24 0.01
5 2021-03-31 18:30:00 1 NA NA 7.863 0.002 10.27 0.007
6 2021-03-30 11:25:00 1 18.29 0.018 7.864 0.001 10.3 0.006
7 2021-03-30 11:30:00 1 18.2 0.026 7.865 0.002 10.32 0.006
8 2021-03-30 11:35:00 1 18.12 0.025 7.867 0.001 10.34 0.008
9 2021-03-31 17:00:00 1 18.03 0.024 NA NA 10.37 0.008
10 2021-03-31 19:28:00 1 NA NA NA NA 10.4 0.005
我正在 QCing 数据,有几个 tanks/data_types 有需要删除的错误数据,跨越多个时间范围。包含错误数据的 data_types、坦克和时间范围已在单独的数据框中报告,其中的一个片段包含在 QC_table:
中structure(list(trial = c(1L, 1L, 1L, 1L, 2L, 2L), data_type = c("Temp",
"pH", "pH", "pH", "Temp", "Temp"), tank = c("29", "40", "40",
"40", "13", "29"), date_time_start = c("2021-03-31 8:30", "2021-03-31 7:50",
"2021-03-31 10:25", "2021-03-31 17:05", "2021-04-07 10:25", "2021-04-08 10:30"
), date_time_end = c("2021-03-31 18:00", "2021-03-31 8:15", "2021-03-31 10:40",
"2021-03-31 17:30", "2021-04-07 17:20", "2021-04-10 18:25"),
to.be.removed = c("yes ", "yes ", "yes ", "yes ", "yes ",
"yes "), reason = c("calibration error", "faulty probe",
"faulty probe", "faulty probe", "calibration error", "faulty probe"
), data_type_tank = c("WalchemTempTank29", "pH_Tank40", "pH_Tank40",
"pH_Tank40", "WalchemTempTank13", "WalchemTempTank29")), row.names = c(NA,
-6L), class = "data.frame")
还有额外的试验和坦克。我的方法是创建一个包含所有需要删除的数据的新数据框(基于 QC table 中的 data_type_tank 和 date_time_start/end 列),然后删除该数据框来自原始数据框。我不知道这是否最合乎逻辑,但我不知道如何从原始数据框中删除数据。
我使用以下方法构建了一个新数据框:
new_dataframe <- dataframe %>%
select(c(Measurement.time, Trial, contains(urchin_temp_pH_QC$data_type_tank))) %>% head(10)
structure(list(Measurement.time = c("2021-03-30 11:00", "2021-03-30 11:05",
"2021-03-30 11:10", "2021-03-30 11:15", "2021-03-30 11:20", "2021-03-30 11:25",
"2021-03-30 11:30", "2021-03-30 11:35", "2021-03-30 11:40", "2021-03-30 11:45"
), Trial = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), WalchemTempTank29_Avg = c("18.8",
"18.67", "18.58", "18.48", "18.38", "18.29", "18.2", "18.12",
"18.03", "18"), WalchemTempTank29_Std = c("0.037", "0.025", "0.032",
"0.029", "0.017", "0.018", "0.026", "0.025", "0.024", "0.023"
), pH_Tank40_Avg = c("7.859", "7.863", "7.868", "7.866", "7.863",
"7.864", "7.865", "7.867", "7.869", "7.87"), pH_Tank40_Std = c("0.007",
"0.006", "0.002", "0.001", "0.002", "0.001", "0.002", "0.001",
"0.002", "0.004"), WalchemTempTank13_Avg = c("10.26", "10.22",
"10.21", "10.24", "10.27", "10.3", "10.32", "10.34", "10.37",
"10.4"), WalchemTempTank13_Std = c("0.01", "0.013", "0.005",
"0.01", "0.007", "0.006", "0.006", "0.008", "0.008", "0.005")), row.names = 4:13, class = "data.frame")
但是现在,基于 QC table 有一些行 (date/time) 我需要删除或子集化,但仅限于特定列(即那些包含data_type_tank)。我想我可以手动执行此操作,使用下面的代码,然后在需要的地方绑定或加入 columns/rows,但这似乎是一个艰巨的过程。
subset_row_1_QC_table <- dataframe %>% select(Measurement.time, contains("WalchemTempTank29")) %>%
subset(Measurement.time >= as.POSIXct("2021-03-31 08:30") & Measurement.time <= as.POSIXct("2021-03-31 18:00"))
有没有什么方法可以自动执行此过程,根据不同数据框中的列删除特定于列的行或对其进行子集化?我认为理想情况下,我的数据框看起来像 eg:
的扩展版本Measurement.time | Trial | WaterTempTank29_Avg | WaterTempTank29_Std | pH_Tank40_Avg | pH_Tank40_Std |
---|---|---|---|---|---|
2021-03-31 08:30 | 1 | 18.8 | 0.037 | NA | NA |
[all 5-min intervals] | NA | NA | |||
2021-03-31 18:00 | 1 | 18.36 | 0.023 | NA | NA |
2021-03-31 07:50 | 1 | NA | NA | 7.854 | 0.001 |
[all 5-min intervals] | 1 | NA | NA | 7.88 | 0.001 |
2021-03-31 08:15 | 1 | NA | NA | 7.84 | 0.001 |
2021-03-31 10:25 | 1 | NA | NA | 7.881 | 0.001 |
[all 5-min intervals] | 1 | NA | NA | 7.804 | 0.001 |
2021-03-31 10:40 | 1 | NA | NA | 7.881 | 0 |
如有任何帮助,我们将不胜感激!我希望我已经能够正确解释我的问题,Whosebug 的新用户。
干杯,
编辑:感谢 r2evans 和 GuedesBF——希望现在是 better/fixed。
我会这样做。
- 为变量
CF
和df
准备数据
CF = structure(list(trial = c(1L, 1L, 1L), data_type = c("Temp", "pH",
"pH"), tank = c("29", "40", "40"), date_time_start = structure(c(1617204600,
1617202200, 1617211500), tzone = "", class = c("POSIXct", "POSIXt"
)), date_time_end = structure(c(1617238800, 1617203700, 1617212400
), tzone = "", class = c("POSIXct", "POSIXt")), to.be.removed = c("yes ",
"yes ", "yes "), reason = c("calibration error", "faulty probe",
"faulty probe"), data_type_tank = c("WalchemTempTank29", "pH_Tank40",
"pH_Tank40")), row.names = c(NA, -3L), groups = structure(list(
data_type = c("pH", "Temp"), tank = c("40", "29"), .rows = structure(list(
2:3, 1L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = 1:2, class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
df = structure(list(Measurement.time = c("2021-03-30 11:00", "2021-03-30 11:05",
"2021-03-30 11:10", "2021-03-30 11:15", "2021-03-31 18:30", "2021-03-30 11:25",
"2021-03-30 11:30", "2021-03-30 11:35", "2021-03-31 17:00", "2021-03-31 19:28"
), Trial = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), WalchemTempTank29_Avg = c("18.8",
"18.67", "18.58", "18.48", "18.38", "18.29", "18.2", "18.12",
"18.03", "18"), WalchemTempTank29_Std = c("0.037", "0.025", "0.032",
"0.029", "0.017", "0.018", "0.026", "0.025", "0.024", "0.023"
), pH_Tank40_Avg = c("7.859", "7.863", "7.868", "7.866", "7.863",
"7.864", "7.865", "7.867", "7.869", "7.87"), pH_Tank40_Std = c("0.007",
"0.006", "0.002", "0.001", "0.002", "0.001", "0.002", "0.001",
"0.002", "0.004"), WalchemTempTank13_Avg = c("10.26", "10.22",
"10.21", "10.24", "10.27", "10.3", "10.32", "10.34", "10.37",
"10.4"), WalchemTempTank13_Std = c("0.01", "0.013", "0.005",
"0.01", "0.007", "0.006", "0.006", "0.008", "0.008", "0.005")), row.names = 4:13, class = "data.frame")
请注意,我更改了变量 Measurement.time
中的一些数据以将事件包含在 CF
table 中。
- 分别准备 tables
CFTemp
和CFpH
CFTemp = CF %>% ungroup() %>%
filter(data_type == "Temp") %>%
mutate(Temp = "Temp",
Temp_start = date_time_start,
Temp_end = date_time_end) %>%
select(Temp, Temp_start, Temp_end)
CFpH = CF %>% ungroup() %>%
filter(data_type == "pH") %>%
mutate(pH = "pH",
pH_start = date_time_start,
pH_end = date_time_end) %>%
select(pH, pH_start, pH_end)
- 准备两个函数,返回具有二进制值的向量,并且应删除其索引数据。
fTemp = function(df) CFTemp %>% left_join(df, by="Temp") %>%
mutate(TempRm = Measurement.time>=Temp_start & Measurement.time<=Temp_end) %>%
group_by(ID) %>%
summarise(TempRm = any(TempRm)) %>%
pull(TempRm)
fpH = function(df) CFpH %>% left_join(df, by="pH") %>%
mutate(pHRm = Measurement.time>=pH_start & Measurement.time<=pH_end) %>%
group_by(ID) %>%
summarise(pHRm = any(pHRm)) %>%
pull(pHRm)
- 转换数据框
df1 = df %>% as_tibble() %>%
mutate(Measurement.time = as.POSIXct(Measurement.time),
ID = 1:nrow(.),
Temp = "Temp",
pH = "pH") %>%
mutate(
TmpRm = fTemp(.),
pHRm = fpH(.)
) %>%
mutate(
WalchemTempTank29_Avg = ifelse(TmpRm, NA, WalchemTempTank29_Avg),
WalchemTempTank29_Std = ifelse(TmpRm, NA, WalchemTempTank29_Std),
WalchemTempTank13_Avg = ifelse(TmpRm, NA, WalchemTempTank13_Avg),
WalchemTempTank13_Std = ifelse(TmpRm, NA, WalchemTempTank13_Std),
pH_Tank40_Avg = ifelse(pHRm, NA, pH_Tank40_Avg),
pH_Tank40_Std = ifelse(pHRm, NA, pH_Tank40_Std),
) %>%
select(Measurement.time:WalchemTempTank13_Std)
df1
输出
# A tibble: 10 x 8
Measurement.time Trial WalchemTempTank29_Avg WalchemTempTank29_Std pH_Tank40_Avg pH_Tank40_Std WalchemTempTank13_Avg WalchemTempTank13_Std
<dttm> <int> <chr> <chr> <chr> <chr> <chr> <chr>
1 2021-03-30 11:00:00 1 18.8 0.037 7.859 0.007 10.26 0.01
2 2021-03-30 11:05:00 1 18.67 0.025 7.863 0.006 10.22 0.013
3 2021-03-30 11:10:00 1 18.58 0.032 7.868 0.002 10.21 0.005
4 2021-03-30 11:15:00 1 18.48 0.029 7.866 0.001 10.24 0.01
5 2021-03-31 18:30:00 1 NA NA 7.863 0.002 NA NA
6 2021-03-30 11:25:00 1 18.29 0.018 7.864 0.001 10.3 0.006
7 2021-03-30 11:30:00 1 18.2 0.026 7.865 0.002 10.32 0.006
8 2021-03-30 11:35:00 1 18.12 0.025 7.867 0.001 10.34 0.008
9 2021-03-31 17:00:00 1 18.03 0.024 NA NA 10.37 0.008
10 2021-03-31 19:28:00 1 NA NA NA NA NA NA
仅此而已。
更新 1
library(tidyverse)
CFTemp = CF %>% ungroup() %>%
filter(data_type == "Temp") %>%
mutate(Temp = "Temp",
Temp_start = date_time_start,
Temp_end = date_time_end) %>%
select(Temp, tank, Temp_start, Temp_end)
CFpH = CF %>% ungroup() %>%
filter(data_type == "pH") %>%
mutate(pH = "pH",
pH_start = date_time_start,
pH_end = date_time_end) %>%
select(pH, pH_start, pH_end)
fTemp = function(df, Tank){
out = CFTemp %>% filter(tank==Tank) %>%
left_join(df, by="Temp") %>%
mutate(TempRm = Measurement.time>=Temp_start & Measurement.time<=Temp_end) %>%
group_by(ID) %>%
summarise(TempRm = any(TempRm)) %>%
pull(TempRm)
if(length(out)==0) FALSE else out
}
fpH = function(df) CFpH %>% left_join(df, by="pH") %>%
mutate(pHRm = Measurement.time>=pH_start & Measurement.time<=pH_end) %>%
group_by(ID) %>%
summarise(pHRm = any(pHRm)) %>%
pull(pHRm)
df1 = df %>% as_tibble() %>% #Step 1
mutate(Measurement.time = as.POSIXct(Measurement.time),
ID = 1:nrow(.),
Temp = "Temp",
pH = "pH") %>%
mutate( #Step 2
TmpRm29 = fTemp(., 29),
TmpRm13 = fTemp(., 13),
pHRm = fpH(.)
) %>%
mutate( #Step 3
WalchemTempTank29_Avg = ifelse(TmpRm29, NA, WalchemTempTank29_Avg),
WalchemTempTank29_Std = ifelse(TmpRm29, NA, WalchemTempTank29_Std),
WalchemTempTank13_Avg = ifelse(TmpRm13, NA, WalchemTempTank13_Avg),
WalchemTempTank13_Std = ifelse(TmpRm13, NA, WalchemTempTank13_Std),
pH_Tank40_Avg = ifelse(pHRm, NA, pH_Tank40_Avg),
pH_Tank40_Std = ifelse(pHRm, NA, pH_Tank40_Std),
) %>%
select(Measurement.time:WalchemTempTank13_Std)
df1
输出
# A tibble: 10 x 8
Measurement.time Trial WalchemTempTank29_Avg WalchemTempTank29_Std pH_Tank40_Avg pH_Tank40_Std WalchemTempTank13_Avg WalchemTempTank13_Std
<dttm> <int> <chr> <chr> <chr> <chr> <chr> <chr>
1 2021-03-30 11:00:00 1 18.8 0.037 7.859 0.007 10.26 0.01
2 2021-03-30 11:05:00 1 18.67 0.025 7.863 0.006 10.22 0.013
3 2021-03-30 11:10:00 1 18.58 0.032 7.868 0.002 10.21 0.005
4 2021-03-30 11:15:00 1 18.48 0.029 7.866 0.001 10.24 0.01
5 2021-03-31 18:30:00 1 NA NA 7.863 0.002 10.27 0.007
6 2021-03-30 11:25:00 1 18.29 0.018 7.864 0.001 10.3 0.006
7 2021-03-30 11:30:00 1 18.2 0.026 7.865 0.002 10.32 0.006
8 2021-03-30 11:35:00 1 18.12 0.025 7.867 0.001 10.34 0.008
9 2021-03-31 17:00:00 1 18.03 0.024 NA NA 10.37 0.008
10 2021-03-31 19:28:00 1 NA NA NA NA 10.4 0.005