对于每个日期,如何计算最近的支付期until/since(又名lead/lag)天数?
For each date, how to calculate days until/since (aka lead/lag) the closest pay period?
我的 date
范围如下。其中两个日期是 payDay
。对于发薪日前后 3 天的每个日期,我想 return payDay
之前和之后的天数。
下面,whatIHave
说明了我的数据,whatIWant
显示了结果。我想在 dplyr
中完成此操作。任何帮助将不胜感激。谢谢。
whatIHave <- data.frame(
date = seq(as.Date("2019/11/01"), as.Date("2019/12/01"), "days"),
payDay = c(0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0))
whatIWant <- data.frame(
date = seq(as.Date("2019/11/01"), as.Date("2019/12/01"), "days"),
payDay = c(0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0),
payDayLag = c(0,0,0,0,0,0,0,0,0,-3,-2,-1,0,1,2,3,0,0,0,0,0,-3,-2,-1,0,1,2,3,0,0,0))
一个选项是识别 'payDay' 为 1
的行
library(data.table)
library(dplyr)
ind <- which(whatIHave$payDay == 1)
根据'ind'
创建行索引序列
v1 <- unlist(lapply(ind, function(i) (i-3):(i+3)))
然后,使用基于通过检查 'v1' %in%
行序列 (row_number()
) 创建的逻辑向量的分组变量,通过减去 'payDayLag'来自 'payDay' 为 1
的行索引中的 row_number()
whatIHave %>%
group_by(group = rleid(row_number() %in% v1)) %>%
mutate(payDayLag = if(all(payDay == 0)) 0
else row_number() - row_number()[payDay==1]) %>%
ungroup %>%
select(-group)
# A tibble: 31 x 3
# date payDay payDayLag
# <date> <dbl> <dbl>
# 1 2019-11-01 0 0
# 2 2019-11-02 0 0
# 3 2019-11-03 0 0
# 4 2019-11-04 0 0
# 5 2019-11-05 0 0
# 6 2019-11-06 0 0
# 7 2019-11-07 0 0
# 8 2019-11-08 0 0
# 9 2019-11-09 0 0
#10 2019-11-10 0 -3
# … with 21 more rows
如果我们想在单链中使用它
library(tidyverse)
whatIHave %>%
mutate(ind = row_number() * payDay) %>%
filter(payDay == 1) %>%
mutate(ind = map(ind, ~ (.x-3):(.x+3))) %>%
group_by(grp = row_number()) %>%
unnest %>%
mutate(payDayLag = row_number() - row_number()[4]) %>%
ungroup %>%
select(-payDay, -grp, -date) %>%
right_join(whatIHave %>%
mutate(ind = row_number())) %>%
mutate(payDayLag = replace_na(payDayLag, 0))
或没有加入
whatIHave %>%
mutate(ind = list(map(which(payDay == 1), ~ (.x -3):(.x + 3)))) %>%
group_by(grp = rleid(row_number() %in% unlist(ind[[1]]) )) %>%
select(-ind) %>%
mutate(payDayLag = if(all(payDay == 0)) 0
else row_number() - row_number()[payDay == 1]) %>%
ungroup %>%
select(-grp)
我的 date
范围如下。其中两个日期是 payDay
。对于发薪日前后 3 天的每个日期,我想 return payDay
之前和之后的天数。
下面,whatIHave
说明了我的数据,whatIWant
显示了结果。我想在 dplyr
中完成此操作。任何帮助将不胜感激。谢谢。
whatIHave <- data.frame(
date = seq(as.Date("2019/11/01"), as.Date("2019/12/01"), "days"),
payDay = c(0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0))
whatIWant <- data.frame(
date = seq(as.Date("2019/11/01"), as.Date("2019/12/01"), "days"),
payDay = c(0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0),
payDayLag = c(0,0,0,0,0,0,0,0,0,-3,-2,-1,0,1,2,3,0,0,0,0,0,-3,-2,-1,0,1,2,3,0,0,0))
一个选项是识别 'payDay' 为 1
的行library(data.table)
library(dplyr)
ind <- which(whatIHave$payDay == 1)
根据'ind'
创建行索引序列v1 <- unlist(lapply(ind, function(i) (i-3):(i+3)))
然后,使用基于通过检查 'v1' %in%
行序列 (row_number()
) 创建的逻辑向量的分组变量,通过减去 'payDayLag'来自 'payDay' 为 1
row_number()
whatIHave %>%
group_by(group = rleid(row_number() %in% v1)) %>%
mutate(payDayLag = if(all(payDay == 0)) 0
else row_number() - row_number()[payDay==1]) %>%
ungroup %>%
select(-group)
# A tibble: 31 x 3
# date payDay payDayLag
# <date> <dbl> <dbl>
# 1 2019-11-01 0 0
# 2 2019-11-02 0 0
# 3 2019-11-03 0 0
# 4 2019-11-04 0 0
# 5 2019-11-05 0 0
# 6 2019-11-06 0 0
# 7 2019-11-07 0 0
# 8 2019-11-08 0 0
# 9 2019-11-09 0 0
#10 2019-11-10 0 -3
# … with 21 more rows
如果我们想在单链中使用它
library(tidyverse)
whatIHave %>%
mutate(ind = row_number() * payDay) %>%
filter(payDay == 1) %>%
mutate(ind = map(ind, ~ (.x-3):(.x+3))) %>%
group_by(grp = row_number()) %>%
unnest %>%
mutate(payDayLag = row_number() - row_number()[4]) %>%
ungroup %>%
select(-payDay, -grp, -date) %>%
right_join(whatIHave %>%
mutate(ind = row_number())) %>%
mutate(payDayLag = replace_na(payDayLag, 0))
或没有加入
whatIHave %>%
mutate(ind = list(map(which(payDay == 1), ~ (.x -3):(.x + 3)))) %>%
group_by(grp = rleid(row_number() %in% unlist(ind[[1]]) )) %>%
select(-ind) %>%
mutate(payDayLag = if(all(payDay == 0)) 0
else row_number() - row_number()[payDay == 1]) %>%
ungroup %>%
select(-grp)