R - dplyr 如何使用 i+1 循环式条件

R - dplyr how to use the i+1 loop-style condition

我正在尝试了解如何将 [i+1]dplyr 一起使用。

我的数据是这样的

> head(dtat)
  id sex variable   value
1  1   m    08:00 partner
2  1   f    08:00 partner
3  1   m    08:15 partner
4  1   f    08:15 partner
5  1   m    08:30 partner
6  1   f    08:30 partner

我想检查 idvariable 的值是否与 mf 的值相同。

例如,检查 id == 18:00 (partner) 对于 m 是否也是 partner 对于 f8:00

我知道怎么做,因为我想在这里 value[i+1]

dtat %>% 
 group_by(id, variable) %>% 
 mutate(as.numeric (value == value [i+1] )) 

我还想在 variable 中检查 lag +1。例如,比较 id == 18:00 处的 m 等于 8:15 处的 f

有什么想法吗? 谢谢

没有 lag 的输出应该是(check 是有问题的匹配变量)

   id variable check sex   value
1   1    08:00     1   m partner
2   1    08:00     1   f partner
3   1    08:15     1   m partner
4   1    08:15     1   f partner
5   1    08:30     1   m partner
6   1    08:30     1   f partner
7   2    08:00     0   m       *
8   2    08:00     0   f   alone
9   2    08:15     0   m       *
10  2    08:15     0   f   alone
11  2    08:30     0   m       *
12  2    08:30     0   f partner
13  3    08:00     0   m partner
14  3    08:00     0   f nuclear
15  3    08:15     0   m partner
16  3    08:15     0   f nuclear
17  3    08:30     0   m partner
18  3    08:30     0   f nuclear

数据

dtat = structure(list(id = c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L), sex = c("m", "f", "m", "f", 
"m", "f", "m", "f", "m", "f", "m", "f", "m", "f", "m", "f", "m", 
"f"), variable = structure(c(1L, 1L, 2L, 2L, 3L, 3L, 1L, 1L, 
2L, 2L, 3L, 3L, 1L, 1L, 2L, 2L, 3L, 3L), .Label = c("08:00", 
"08:15", "08:30", "08:45", "09:00", "09:15", "09:30", "09:45", 
"10:00", "10:15", "10:30", "10:45", "11:00", "11:15", "11:30", 
"11:45", "12:00", "12:15", "12:30", "12:45", "13:00", "13:15", 
"13:30", "13:45", "14:00", "14:15", "14:30", "14:45", "15:00", 
"15:15", "15:30", "15:45", "16:00", "16:15", "16:30", "16:45", 
"17:00", "17:15", "17:30", "17:45", "18:00", "18:15", "18:30", 
"18:45", "19:00", "19:15", "19:30", "19:45", "20:00", "20:15", 
"20:30", "20:45", "21:00", "21:15", "21:30", "21:45"), class = "factor"), 
value = c("partner", "partner", "partner", "partner", "partner", 
"partner", "*", "alone", "*", "alone", "*", "partner", "partner", 
"nuclear", "partner", "nuclear", "partner", "nuclear")), class = "data.frame", row.names = c(NA, 
-18L), .Names = c("id", "sex", "variable", "value"))

假设我们有兴趣通过将当前观察与下一个观察进行比较来创建 'check' 列,可以使用 lead。我们按 'id'、'variable' 分组,使用 == 比较创建一个逻辑向量,并通过 + 将其强制转换为二进制。

 dtat %>% 
      group_by(id, variable) %>%
      mutate(check= +(value==lead(value, default=value[1L])))
#    id sex variable   value check
#1   1   m    08:00 partner     1
#2   1   f    08:00 partner     1
#3   1   m    08:15 partner     1
#4   1   f    08:15 partner     1
#5   1   m    08:30 partner     1
#6   1   f    08:30 partner     1
#7   2   m    08:00       *     0
#8   2   f    08:00   alone     0
#9   2   m    08:15       *     0
#10  2   f    08:15   alone     0
#11  2   m    08:30       *     0
#12  2   f    08:30 partner     0
#13  3   m    08:00 partner     0
#14  3   f    08:00 nuclear     0
#15  3   m    08:15 partner     0
#16  3   f    08:15 nuclear     0
#17  3   m    08:30 partner     0
#18  3   f    08:30 nuclear     0

或者另一个选项是 n_distinct 检查每个组中的 unique 个元素中的 length 个。

dtat %>%
    group_by(id, variable) %>%
    mutate(check=+(n_distinct(value)==1))

刚想到可能像

dtat %>% 
  group_by(id, variable) %>% 
  mutate(ep = 1:n()) %>% 
  mutate(check = as.numeric (value[ep == 1] == value[ep == 2]))