R 在行内的多个条件下使用 any() - 更复杂的版本
R using any() on multiple conditions within row - More Complex Version
所以昨天在这里解决了这个更简单的版本:
但是,我不确定是否可以将其扩展到我的第二个需求,因此我试图确定 From_ID 中的 ID 是否在其中任何较早的 To_ID 行中组.
library(dplyr);library(anytime)
Tag <- c('V1','V1','V1','V1','V1','V1','V2','V2','V2','V3','V3','V3','V3','V4','V4','V4')
From_ID <- c('BL342','SD234','FR212','PX123','KJ214','BL342', 'FR231','BL231','CV231', 'KJ875','PX239','TR543','FR342', 'DS329','FR321','DF321')
To_ID <- c('FR212','BL342','SD234','FG342','BL342','KJ214', 'BL231','FR231','KJ123', 'FG432','KJ123','TR998','PX239', 'HG734','DF321','FR321')
Date <- sample(seq(anydate('2017-01-01'), anydate('2020-01-01'), by="day"), 16)
df <- data.frame(Tag, From_ID, To_ID, Date) %>% group_by(Tag) %>% arrange(Tag, desc(Date)) %>% mutate(Rank = row_number())
在我不关心行位置的情况下,我以前做过:
library(stringr)
#pivot wider, unite, str_extract to get a list of words, then detect in that list using case_when
wide <- df %>%
pivot_wider(id_cols = Tag, names_from = Date, values_from = To_ID) %>%
unite(d, contains("-"), sep =",", na.rm=T) %>%
mutate(Extract = str_extract_all(d, "\[a-z]{2}[0-9]{3}")) %>%
full_join(df) %>%
mutate(SY_Del = case_when(b == 'Farm' &
str_detect(Extract, From_ID) == T ~ T,
TRUE ~ F)
预期输出 = 当 From_ID 包含在日期早于被评估的行的任何行中时的标志,即
expected output
Tag From_ID To_ID Date Rank Flag
1 V1 FR212 SD234 2019-08-24 1 TRUE
2 V1 BL342 KJ214 2019-05-05 2 TRUE
3 V1 PX123 FG342 2019-04-22 3 FALSE
4 V1 KJ214 BL342 2019-01-01 4 FALSE
5 V1 SD234 BL342 2018-09-13 5 FALSE
6 V1 BL342 FR212 2018-04-30 6 FALSE
7 V2 FR231 BL231 2019-09-16 1 TRUE
8 V2 CV231 KJ123 2018-01-07 2 FALSE
9 V2 BL231 FR231 2017-01-11 3 FALSE
10 V3 KJ875 FG432 2019-11-14 1 FALSE
11 V3 TR543 TR998 2019-10-26 2 FALSE
12 V3 FR342 PX239 2019-07-02 3 FALSE
13 V3 PX239 KJ123 2017-07-15 4 FALSE
14 V4 DS329 HG734 2019-01-30 1 FALSE
15 V4 DF321 FR321 2017-05-06 2 TRUE
16 V4 FR321 DF321 2017-03-20 3 FALSE
将 'Date' 转换为 Date
class(dmy
- 来自 lubridate
),arrange
数据由 'Tag', 'Date',按 'Tag' 分组,通过遍历 row_number
创建 'Flag2' 列,检查元素 'From_ID' 是否是 %in%
'To_ID' 从第一行到那一行,ungroup
和 arrange
数据集返回原始顺序,基于 'Tag'、'Rank' 列
library(dplyr)
library(purrr)
library(lubridate)
df %>%
# // convert to Date class
mutate(Date = dmy(Date)) %>%
# // order the dataset by Tag, Date
arrange(Tag, Date) %>%
# // grouped by Tag
group_by(Tag) %>%
# // loop over the sequence of rows
mutate(Flag2 = map_lgl(row_number(),
# // check whether the 'From_ID' of that row is in
# // any of the previous row elements of 'To_ID'
~ From_ID[.x] %in% To_ID[1:(.x)])) %>%
ungroup %>%
# // order back to original
arrange(Tag, Rank)
# A tibble: 16 x 7
# Tag From_ID To_ID Date Rank Flag Flag2
# <chr> <chr> <chr> <date> <int> <lgl> <lgl>
# 1 V1 FR212 SD234 2019-08-24 1 TRUE TRUE
# 2 V1 BL342 KJ214 2019-05-05 2 TRUE TRUE
# 3 V1 PX123 FG342 2019-04-22 3 FALSE FALSE
# 4 V1 KJ214 BL342 2019-01-01 4 FALSE FALSE
# 5 V1 SD234 BL342 2018-09-13 5 FALSE FALSE
# 6 V1 BL342 FR212 2018-04-30 6 FALSE FALSE
# 7 V2 FR231 BL231 2019-09-16 1 TRUE TRUE
# 8 V2 CV231 KJ123 2018-01-07 2 FALSE FALSE
# 9 V2 BL231 FR231 2017-01-11 3 FALSE FALSE
#10 V3 KJ875 FG432 2019-11-14 1 FALSE FALSE
#11 V3 TR543 TR998 2019-10-26 2 FALSE FALSE
#12 V3 FR342 PX239 2019-07-02 3 FALSE FALSE
#13 V3 PX239 KJ123 2017-07-15 4 FALSE FALSE
#14 V4 DS329 HG734 2019-01-30 1 FALSE FALSE
#15 V4 DF321 FR321 2017-05-06 2 TRUE TRUE
#16 V4 FR321 DF321 2017-03-20 3 FALSE FALSE
数据
df <- structure(list(Tag = c("V1", "V1", "V1", "V1", "V1", "V1", "V2",
"V2", "V2", "V3", "V3", "V3", "V3", "V4", "V4", "V4"), From_ID = c("FR212",
"BL342", "PX123", "KJ214", "SD234", "BL342", "FR231", "CV231",
"BL231", "KJ875", "TR543", "FR342", "PX239", "DS329", "DF321",
"FR321"), To_ID = c("SD234", "KJ214", "FG342", "BL342", "BL342",
"FR212", "BL231", "KJ123", "FR231", "FG432", "TR998", "PX239",
"KJ123", "HG734", "FR321", "DF321"), Date = c("24/08/2019", "5/05/2019",
"22/04/2019", "1/01/2019", "13/09/2018", "30/04/2018", "16/09/2019",
"7/01/2018", "11/01/2017", "14/11/2019", "26/10/2019", "2/07/2019",
"15/07/2017", "30/01/2019", "6/05/2017", "20/03/2017"), Rank = c(1L,
2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 1L, 2L, 3L, 4L, 1L, 2L, 3L),
Flag = c(TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE)),
class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16"))
所以昨天在这里解决了这个更简单的版本:
但是,我不确定是否可以将其扩展到我的第二个需求,因此我试图确定 From_ID 中的 ID 是否在其中任何较早的 To_ID 行中组.
library(dplyr);library(anytime)
Tag <- c('V1','V1','V1','V1','V1','V1','V2','V2','V2','V3','V3','V3','V3','V4','V4','V4')
From_ID <- c('BL342','SD234','FR212','PX123','KJ214','BL342', 'FR231','BL231','CV231', 'KJ875','PX239','TR543','FR342', 'DS329','FR321','DF321')
To_ID <- c('FR212','BL342','SD234','FG342','BL342','KJ214', 'BL231','FR231','KJ123', 'FG432','KJ123','TR998','PX239', 'HG734','DF321','FR321')
Date <- sample(seq(anydate('2017-01-01'), anydate('2020-01-01'), by="day"), 16)
df <- data.frame(Tag, From_ID, To_ID, Date) %>% group_by(Tag) %>% arrange(Tag, desc(Date)) %>% mutate(Rank = row_number())
在我不关心行位置的情况下,我以前做过:
library(stringr)
#pivot wider, unite, str_extract to get a list of words, then detect in that list using case_when
wide <- df %>%
pivot_wider(id_cols = Tag, names_from = Date, values_from = To_ID) %>%
unite(d, contains("-"), sep =",", na.rm=T) %>%
mutate(Extract = str_extract_all(d, "\[a-z]{2}[0-9]{3}")) %>%
full_join(df) %>%
mutate(SY_Del = case_when(b == 'Farm' &
str_detect(Extract, From_ID) == T ~ T,
TRUE ~ F)
预期输出 = 当 From_ID 包含在日期早于被评估的行的任何行中时的标志,即 expected output
Tag From_ID To_ID Date Rank Flag
1 V1 FR212 SD234 2019-08-24 1 TRUE
2 V1 BL342 KJ214 2019-05-05 2 TRUE
3 V1 PX123 FG342 2019-04-22 3 FALSE
4 V1 KJ214 BL342 2019-01-01 4 FALSE
5 V1 SD234 BL342 2018-09-13 5 FALSE
6 V1 BL342 FR212 2018-04-30 6 FALSE
7 V2 FR231 BL231 2019-09-16 1 TRUE
8 V2 CV231 KJ123 2018-01-07 2 FALSE
9 V2 BL231 FR231 2017-01-11 3 FALSE
10 V3 KJ875 FG432 2019-11-14 1 FALSE
11 V3 TR543 TR998 2019-10-26 2 FALSE
12 V3 FR342 PX239 2019-07-02 3 FALSE
13 V3 PX239 KJ123 2017-07-15 4 FALSE
14 V4 DS329 HG734 2019-01-30 1 FALSE
15 V4 DF321 FR321 2017-05-06 2 TRUE
16 V4 FR321 DF321 2017-03-20 3 FALSE
将 'Date' 转换为 Date
class(dmy
- 来自 lubridate
),arrange
数据由 'Tag', 'Date',按 'Tag' 分组,通过遍历 row_number
创建 'Flag2' 列,检查元素 'From_ID' 是否是 %in%
'To_ID' 从第一行到那一行,ungroup
和 arrange
数据集返回原始顺序,基于 'Tag'、'Rank' 列
library(dplyr)
library(purrr)
library(lubridate)
df %>%
# // convert to Date class
mutate(Date = dmy(Date)) %>%
# // order the dataset by Tag, Date
arrange(Tag, Date) %>%
# // grouped by Tag
group_by(Tag) %>%
# // loop over the sequence of rows
mutate(Flag2 = map_lgl(row_number(),
# // check whether the 'From_ID' of that row is in
# // any of the previous row elements of 'To_ID'
~ From_ID[.x] %in% To_ID[1:(.x)])) %>%
ungroup %>%
# // order back to original
arrange(Tag, Rank)
# A tibble: 16 x 7
# Tag From_ID To_ID Date Rank Flag Flag2
# <chr> <chr> <chr> <date> <int> <lgl> <lgl>
# 1 V1 FR212 SD234 2019-08-24 1 TRUE TRUE
# 2 V1 BL342 KJ214 2019-05-05 2 TRUE TRUE
# 3 V1 PX123 FG342 2019-04-22 3 FALSE FALSE
# 4 V1 KJ214 BL342 2019-01-01 4 FALSE FALSE
# 5 V1 SD234 BL342 2018-09-13 5 FALSE FALSE
# 6 V1 BL342 FR212 2018-04-30 6 FALSE FALSE
# 7 V2 FR231 BL231 2019-09-16 1 TRUE TRUE
# 8 V2 CV231 KJ123 2018-01-07 2 FALSE FALSE
# 9 V2 BL231 FR231 2017-01-11 3 FALSE FALSE
#10 V3 KJ875 FG432 2019-11-14 1 FALSE FALSE
#11 V3 TR543 TR998 2019-10-26 2 FALSE FALSE
#12 V3 FR342 PX239 2019-07-02 3 FALSE FALSE
#13 V3 PX239 KJ123 2017-07-15 4 FALSE FALSE
#14 V4 DS329 HG734 2019-01-30 1 FALSE FALSE
#15 V4 DF321 FR321 2017-05-06 2 TRUE TRUE
#16 V4 FR321 DF321 2017-03-20 3 FALSE FALSE
数据
df <- structure(list(Tag = c("V1", "V1", "V1", "V1", "V1", "V1", "V2",
"V2", "V2", "V3", "V3", "V3", "V3", "V4", "V4", "V4"), From_ID = c("FR212",
"BL342", "PX123", "KJ214", "SD234", "BL342", "FR231", "CV231",
"BL231", "KJ875", "TR543", "FR342", "PX239", "DS329", "DF321",
"FR321"), To_ID = c("SD234", "KJ214", "FG342", "BL342", "BL342",
"FR212", "BL231", "KJ123", "FR231", "FG432", "TR998", "PX239",
"KJ123", "HG734", "FR321", "DF321"), Date = c("24/08/2019", "5/05/2019",
"22/04/2019", "1/01/2019", "13/09/2018", "30/04/2018", "16/09/2019",
"7/01/2018", "11/01/2017", "14/11/2019", "26/10/2019", "2/07/2019",
"15/07/2017", "30/01/2019", "6/05/2017", "20/03/2017"), Rank = c(1L,
2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 1L, 2L, 3L, 4L, 1L, 2L, 3L),
Flag = c(TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE)),
class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16"))