有条件地过滤,如果 A 列中的值超出,则超出相应时间戳(B 列)的进一步观察将被丢弃
Filter conditionally, where if a value is exceeded in column A, further observations beyond the respective timestamp (column B) are dropped
我正在使用 R 中的鱼类遥测数据,下面提供了一个简化的数据集:
df <- structure(list(DATE.TIME = structure(c(1560900051, 1560900101,
1560927373, 1560927504, 1560927533, 1560927585, 1560927689, 1560899962,
1560900026, 1560900026, 1560900076, 1560927328, 1560927498, 1560927529,
1560927558, 1560907660, 1560907720, 1560908037, 1560925131, 1560925260,
1560931034, 1560907630, 1560907695, 1560907746, 1560907804, 1560908189,
1560908268, 1560925097, 1560925300, 1560925426), class = c("POSIXct",
"POSIXt"), tzone = "Canada/Atlantic"), TAG = c(1310230L, 1310230L,
1310230L, 1310230L, 1310230L, 1310230L, 1310230L, 1310230L, 1310230L,
1310230L, 1310230L, 1310230L, 1310230L, 1310230L, 1310230L, 1311038L,
1311038L, 1311038L, 1311038L, 1311038L, 1311038L, 1311038L, 1311038L,
1311038L, 1311038L, 1311038L, 1311038L, 1311038L, 1311038L, 1311038L
), SENSOR.ID = c(5665L, 5665L, 5665L, 5665L, 5665L, 5665L, 5665L,
5666L, 5666L, 5666L, 5666L, 5666L, 5666L, 5666L, 5666L, 5821L,
5821L, 5821L, 5821L, 5821L, 5821L, 5822L, 5822L, 5822L, 5822L,
5822L, 5822L, 5822L, 5822L, 5822L), SENSOR.VALUE = c(8.1796,
8.1796, 35.0095, 35.0095, 35.0095, 35.0095, 35.0095, 0.9024,
0, 0, 0, 34.2986, 0.9024, 18.9544, 18.9544, 8.4934, 8.4934, 8.4934,
35.0095, 35.0095, 35.0095, 0, 0, 0, 0, 0, 0, 13.5388, 1.805,
1.805), SENSOR = c("temp", "temp", "temp", "temp", "temp", "temp",
"temp", "depth", "depth", "depth", "depth", "depth", "depth",
"depth", "depth", "temp", "temp", "temp", "temp", "temp", "temp",
"depth", "depth", "depth", "depth", "depth", "depth", "depth",
"depth", "depth")), row.names = c(435151L, 435152L, 435203L,
435204L, 435205L, 435206L, 435207L, 435614L, 435615L, 435616L,
435617L, 435664L, 435665L, 435666L, 435667L, 455286L, 455287L,
455288L, 455295L, 455296L, 455297L, 455553L, 455554L, 455555L,
455556L, 455557L, 455558L, 455568L, 455569L, 455570L), class = "data.frame")
数据结构如下:
- DATE.TIME=鱼检测时间戳
- TAG=植入鱼体内的声学标签的唯一 ID
- SENSOR.ID=每个传感器的唯一 ID(温度和深度),每个
2 个传感器 ID
- TAG SENSOR.VALUE=记录的温度(摄氏度)或深度(米)
- SENSOR=表示传感器类型(温度或深度)的分类变量
我想做的是 subset/filter 这个数据,这样当温度升高到 30C 以上(表示捕食)时,将从温度和深度传感器中删除任何后续检测。植入鱼体内的每个 TAG 交替传输其温度或深度 SENSOR.ID 和 SENSOR.VALUE。我可以为温度传感器数据做这个过滤器:
dfsub <- subset(df, SENSOR=="temp" & SENSOR.VALUE<30)
但这仍然允许在深度传感器上的捕食事件之后包含检测,在这种情况下,它现在将反映捕食者的移动。理想情况下,过滤器将识别温度升高到 30C 以上的第一个实例的时间戳,并删除每条鱼(即 TAG)超过该时间的所有观察结果。一旦通过过滤器,我正在寻找解析数据集的方法,如下所示。
df <- structure(list(DATE.TIME = structure(c(1560900051, 1560900101,
1560899962, 1560900026, 1560900026, 1560900076, 1560907660, 1560907720,
1560908037, 1560907630, 1560907695, 1560907746, 1560907804, 1560908189,
1560908268, 1560925097), class = c("POSIXct", "POSIXt"), tzone = "Canada/Atlantic"),
TAG = c(1310230L, 1310230L, 1310230L, 1310230L, 1310230L,
1310230L, 1311038L, 1311038L, 1311038L, 1311038L, 1311038L,
1311038L, 1311038L, 1311038L, 1311038L, 1311038L), SENSOR.ID = c(5665L,
5665L, 5666L, 5666L, 5666L, 5666L, 5821L, 5821L, 5821L, 5822L,
5822L, 5822L, 5822L, 5822L, 5822L, 5822L), SENSOR.VALUE = c(8.1796,
8.1796, 0.9024, 0, 0, 0, 8.4934, 8.4934, 8.4934, 0, 0, 0,
0, 0, 0, 13.5388), SENSOR = c("temp", "temp", "depth", "depth",
"depth", "depth", "temp", "temp", "temp", "depth", "depth",
"depth", "depth", "depth", "depth", "depth")), row.names = c(435151L,
435152L, 435614L, 435615L, 435616L, 435617L, 455286L, 455287L,
455288L, 455553L, 455554L, 455555L, 455556L, 455557L, 455558L,
455568L), class = "data.frame")
感谢您的见解!
很棒的数据集!这是一个使用 tidyr::fill
的选项。我对您的数据进行了一些编辑和精简,以制作更好的代表。
还出于教学目的将其分成多个步骤,但实际上,您应该在单个管道链中执行此操作。
library(tidyverse)
fishdat <- tibble::tribble(
~DATE.TIME, ~FISH.TAG, ~SENSOR.ID, ~SENSOR.VALUE, ~SENSOR,
"2019-06-18 20:19:41", 1, 65, 9, "temp",
"2019-06-18 20:20:51", 1, 65, 37, "temp",
"2019-06-18 20:19:22", 1, 66, 1, "depth",
"2019-06-18 20:21:16", 1, 66, 0, "depth",
"2019-06-18 22:27:40", 2, 21, 35, "temp",
"2019-06-18 22:33:57", 2, 21, 38, "temp",
"2019-06-18 22:27:10", 2, 22, 0, "depth",
"2019-06-19 3:18:17", 2, 22, 13, "depth"
)
标记表示捕食的值
fishdat_marked <-
fishdat %>%
mutate(predated = ifelse(SENSOR == "temp" & SENSOR.VALUE > 30,
"predated",
NA_character_))
fishdat_marked
#> # A tibble: 8 × 6
#> DATE.TIME FISH.TAG SENSOR.ID SENSOR.VALUE SENSOR predated
#> <chr> <dbl> <dbl> <dbl> <chr> <chr>
#> 1 2019-06-18 20:19:41 1 65 9 temp <NA>
#> 2 2019-06-18 20:20:51 1 65 37 temp predated
#> 3 2019-06-18 20:19:22 1 66 1 depth <NA>
#> 4 2019-06-18 20:21:16 1 66 0 depth <NA>
#> 5 2019-06-18 22:27:40 2 21 35 temp <NA>
#> 6 2019-06-18 22:33:57 2 21 38 temp predated
#> 7 2019-06-18 22:27:10 2 22 0 depth <NA>
#> 8 2019-06-19 3:18:17 2 22 13 depth <NA>
沿着捕食标记向下级联
fishdat_filled <-
fishdat_marked %>%
group_by(FISH.TAG) %>% ## for each fish
arrange(DATE.TIME, .by_group = T) %>%
fill(predated, .direction = "down")
fishdat_filled
#> # A tibble: 8 × 6
#> # Groups: FISH.TAG [2]
#> DATE.TIME FISH.TAG SENSOR.ID SENSOR.VALUE SENSOR predated
#> <chr> <dbl> <dbl> <dbl> <chr> <chr>
#> 1 2019-06-18 20:19:22 1 66 1 depth <NA>
#> 2 2019-06-18 20:19:41 1 65 9 temp <NA>
#> 3 2019-06-18 20:20:51 1 65 37 temp predated
#> 4 2019-06-18 20:21:16 1 66 0 depth predated
#> 5 2019-06-18 22:27:10 2 22 0 depth <NA>
#> 6 2019-06-18 22:27:40 2 21 35 temp <NA>
#> 7 2019-06-18 22:33:57 2 21 38 temp predated
#> 8 2019-06-19 3:18:17 2 22 13 depth predated
现在,过滤
fishdat_filled %>%
filter(is.na(predated))
#> # A tibble: 4 × 6
#> # Groups: FISH.TAG [2]
#> DATE.TIME FISH.TAG SENSOR.ID SENSOR.VALUE SENSOR predated
#> <chr> <dbl> <dbl> <dbl> <chr> <chr>
#> 1 2019-06-18 20:19:22 1 66 1 depth <NA>
#> 2 2019-06-18 20:19:41 1 65 9 temp <NA>
#> 3 2019-06-18 22:27:10 2 22 0 depth <NA>
#> 4 2019-06-18 22:27:40 2 21 35 temp <NA>
由 reprex package (v2.0.1)
于 2021-10-22 创建
我正在使用 R 中的鱼类遥测数据,下面提供了一个简化的数据集:
df <- structure(list(DATE.TIME = structure(c(1560900051, 1560900101,
1560927373, 1560927504, 1560927533, 1560927585, 1560927689, 1560899962,
1560900026, 1560900026, 1560900076, 1560927328, 1560927498, 1560927529,
1560927558, 1560907660, 1560907720, 1560908037, 1560925131, 1560925260,
1560931034, 1560907630, 1560907695, 1560907746, 1560907804, 1560908189,
1560908268, 1560925097, 1560925300, 1560925426), class = c("POSIXct",
"POSIXt"), tzone = "Canada/Atlantic"), TAG = c(1310230L, 1310230L,
1310230L, 1310230L, 1310230L, 1310230L, 1310230L, 1310230L, 1310230L,
1310230L, 1310230L, 1310230L, 1310230L, 1310230L, 1310230L, 1311038L,
1311038L, 1311038L, 1311038L, 1311038L, 1311038L, 1311038L, 1311038L,
1311038L, 1311038L, 1311038L, 1311038L, 1311038L, 1311038L, 1311038L
), SENSOR.ID = c(5665L, 5665L, 5665L, 5665L, 5665L, 5665L, 5665L,
5666L, 5666L, 5666L, 5666L, 5666L, 5666L, 5666L, 5666L, 5821L,
5821L, 5821L, 5821L, 5821L, 5821L, 5822L, 5822L, 5822L, 5822L,
5822L, 5822L, 5822L, 5822L, 5822L), SENSOR.VALUE = c(8.1796,
8.1796, 35.0095, 35.0095, 35.0095, 35.0095, 35.0095, 0.9024,
0, 0, 0, 34.2986, 0.9024, 18.9544, 18.9544, 8.4934, 8.4934, 8.4934,
35.0095, 35.0095, 35.0095, 0, 0, 0, 0, 0, 0, 13.5388, 1.805,
1.805), SENSOR = c("temp", "temp", "temp", "temp", "temp", "temp",
"temp", "depth", "depth", "depth", "depth", "depth", "depth",
"depth", "depth", "temp", "temp", "temp", "temp", "temp", "temp",
"depth", "depth", "depth", "depth", "depth", "depth", "depth",
"depth", "depth")), row.names = c(435151L, 435152L, 435203L,
435204L, 435205L, 435206L, 435207L, 435614L, 435615L, 435616L,
435617L, 435664L, 435665L, 435666L, 435667L, 455286L, 455287L,
455288L, 455295L, 455296L, 455297L, 455553L, 455554L, 455555L,
455556L, 455557L, 455558L, 455568L, 455569L, 455570L), class = "data.frame")
数据结构如下:
- DATE.TIME=鱼检测时间戳
- TAG=植入鱼体内的声学标签的唯一 ID
- SENSOR.ID=每个传感器的唯一 ID(温度和深度),每个 2 个传感器 ID
- TAG SENSOR.VALUE=记录的温度(摄氏度)或深度(米)
- SENSOR=表示传感器类型(温度或深度)的分类变量
我想做的是 subset/filter 这个数据,这样当温度升高到 30C 以上(表示捕食)时,将从温度和深度传感器中删除任何后续检测。植入鱼体内的每个 TAG 交替传输其温度或深度 SENSOR.ID 和 SENSOR.VALUE。我可以为温度传感器数据做这个过滤器:
dfsub <- subset(df, SENSOR=="temp" & SENSOR.VALUE<30)
但这仍然允许在深度传感器上的捕食事件之后包含检测,在这种情况下,它现在将反映捕食者的移动。理想情况下,过滤器将识别温度升高到 30C 以上的第一个实例的时间戳,并删除每条鱼(即 TAG)超过该时间的所有观察结果。一旦通过过滤器,我正在寻找解析数据集的方法,如下所示。
df <- structure(list(DATE.TIME = structure(c(1560900051, 1560900101,
1560899962, 1560900026, 1560900026, 1560900076, 1560907660, 1560907720,
1560908037, 1560907630, 1560907695, 1560907746, 1560907804, 1560908189,
1560908268, 1560925097), class = c("POSIXct", "POSIXt"), tzone = "Canada/Atlantic"),
TAG = c(1310230L, 1310230L, 1310230L, 1310230L, 1310230L,
1310230L, 1311038L, 1311038L, 1311038L, 1311038L, 1311038L,
1311038L, 1311038L, 1311038L, 1311038L, 1311038L), SENSOR.ID = c(5665L,
5665L, 5666L, 5666L, 5666L, 5666L, 5821L, 5821L, 5821L, 5822L,
5822L, 5822L, 5822L, 5822L, 5822L, 5822L), SENSOR.VALUE = c(8.1796,
8.1796, 0.9024, 0, 0, 0, 8.4934, 8.4934, 8.4934, 0, 0, 0,
0, 0, 0, 13.5388), SENSOR = c("temp", "temp", "depth", "depth",
"depth", "depth", "temp", "temp", "temp", "depth", "depth",
"depth", "depth", "depth", "depth", "depth")), row.names = c(435151L,
435152L, 435614L, 435615L, 435616L, 435617L, 455286L, 455287L,
455288L, 455553L, 455554L, 455555L, 455556L, 455557L, 455558L,
455568L), class = "data.frame")
感谢您的见解!
很棒的数据集!这是一个使用 tidyr::fill
的选项。我对您的数据进行了一些编辑和精简,以制作更好的代表。
还出于教学目的将其分成多个步骤,但实际上,您应该在单个管道链中执行此操作。
library(tidyverse)
fishdat <- tibble::tribble(
~DATE.TIME, ~FISH.TAG, ~SENSOR.ID, ~SENSOR.VALUE, ~SENSOR,
"2019-06-18 20:19:41", 1, 65, 9, "temp",
"2019-06-18 20:20:51", 1, 65, 37, "temp",
"2019-06-18 20:19:22", 1, 66, 1, "depth",
"2019-06-18 20:21:16", 1, 66, 0, "depth",
"2019-06-18 22:27:40", 2, 21, 35, "temp",
"2019-06-18 22:33:57", 2, 21, 38, "temp",
"2019-06-18 22:27:10", 2, 22, 0, "depth",
"2019-06-19 3:18:17", 2, 22, 13, "depth"
)
标记表示捕食的值
fishdat_marked <-
fishdat %>%
mutate(predated = ifelse(SENSOR == "temp" & SENSOR.VALUE > 30,
"predated",
NA_character_))
fishdat_marked
#> # A tibble: 8 × 6
#> DATE.TIME FISH.TAG SENSOR.ID SENSOR.VALUE SENSOR predated
#> <chr> <dbl> <dbl> <dbl> <chr> <chr>
#> 1 2019-06-18 20:19:41 1 65 9 temp <NA>
#> 2 2019-06-18 20:20:51 1 65 37 temp predated
#> 3 2019-06-18 20:19:22 1 66 1 depth <NA>
#> 4 2019-06-18 20:21:16 1 66 0 depth <NA>
#> 5 2019-06-18 22:27:40 2 21 35 temp <NA>
#> 6 2019-06-18 22:33:57 2 21 38 temp predated
#> 7 2019-06-18 22:27:10 2 22 0 depth <NA>
#> 8 2019-06-19 3:18:17 2 22 13 depth <NA>
沿着捕食标记向下级联
fishdat_filled <-
fishdat_marked %>%
group_by(FISH.TAG) %>% ## for each fish
arrange(DATE.TIME, .by_group = T) %>%
fill(predated, .direction = "down")
fishdat_filled
#> # A tibble: 8 × 6
#> # Groups: FISH.TAG [2]
#> DATE.TIME FISH.TAG SENSOR.ID SENSOR.VALUE SENSOR predated
#> <chr> <dbl> <dbl> <dbl> <chr> <chr>
#> 1 2019-06-18 20:19:22 1 66 1 depth <NA>
#> 2 2019-06-18 20:19:41 1 65 9 temp <NA>
#> 3 2019-06-18 20:20:51 1 65 37 temp predated
#> 4 2019-06-18 20:21:16 1 66 0 depth predated
#> 5 2019-06-18 22:27:10 2 22 0 depth <NA>
#> 6 2019-06-18 22:27:40 2 21 35 temp <NA>
#> 7 2019-06-18 22:33:57 2 21 38 temp predated
#> 8 2019-06-19 3:18:17 2 22 13 depth predated
现在,过滤
fishdat_filled %>%
filter(is.na(predated))
#> # A tibble: 4 × 6
#> # Groups: FISH.TAG [2]
#> DATE.TIME FISH.TAG SENSOR.ID SENSOR.VALUE SENSOR predated
#> <chr> <dbl> <dbl> <dbl> <chr> <chr>
#> 1 2019-06-18 20:19:22 1 66 1 depth <NA>
#> 2 2019-06-18 20:19:41 1 65 9 temp <NA>
#> 3 2019-06-18 22:27:10 2 22 0 depth <NA>
#> 4 2019-06-18 22:27:40 2 21 35 temp <NA>
由 reprex package (v2.0.1)
于 2021-10-22 创建