R - dplyr 如何使用 i+1 循环式条件
R - dplyr how to use the i+1 loop-style condition
我正在尝试了解如何将 [i+1]
与 dplyr
一起使用。
我的数据是这样的
> head(dtat)
id sex variable value
1 1 m 08:00 partner
2 1 f 08:00 partner
3 1 m 08:15 partner
4 1 f 08:15 partner
5 1 m 08:30 partner
6 1 f 08:30 partner
我想检查 id
和 variable
的值是否与 m
和 f
的值相同。
例如,检查 id == 1
在 8:00
(partner
) 对于 m
是否也是 partner
对于 f
在 8:00
。
我知道怎么做,因为我想在这里 value[i+1]
dtat %>%
group_by(id, variable) %>%
mutate(as.numeric (value == value [i+1] ))
我还想在 variable
中检查 lag
+1
。例如,比较 id == 1
与 8:00
处的 m
等于 8:15
处的 f
。
有什么想法吗?
谢谢
没有 lag
的输出应该是(check
是有问题的匹配变量)
id variable check sex value
1 1 08:00 1 m partner
2 1 08:00 1 f partner
3 1 08:15 1 m partner
4 1 08:15 1 f partner
5 1 08:30 1 m partner
6 1 08:30 1 f partner
7 2 08:00 0 m *
8 2 08:00 0 f alone
9 2 08:15 0 m *
10 2 08:15 0 f alone
11 2 08:30 0 m *
12 2 08:30 0 f partner
13 3 08:00 0 m partner
14 3 08:00 0 f nuclear
15 3 08:15 0 m partner
16 3 08:15 0 f nuclear
17 3 08:30 0 m partner
18 3 08:30 0 f nuclear
数据
dtat = structure(list(id = c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L), sex = c("m", "f", "m", "f",
"m", "f", "m", "f", "m", "f", "m", "f", "m", "f", "m", "f", "m",
"f"), variable = structure(c(1L, 1L, 2L, 2L, 3L, 3L, 1L, 1L,
2L, 2L, 3L, 3L, 1L, 1L, 2L, 2L, 3L, 3L), .Label = c("08:00",
"08:15", "08:30", "08:45", "09:00", "09:15", "09:30", "09:45",
"10:00", "10:15", "10:30", "10:45", "11:00", "11:15", "11:30",
"11:45", "12:00", "12:15", "12:30", "12:45", "13:00", "13:15",
"13:30", "13:45", "14:00", "14:15", "14:30", "14:45", "15:00",
"15:15", "15:30", "15:45", "16:00", "16:15", "16:30", "16:45",
"17:00", "17:15", "17:30", "17:45", "18:00", "18:15", "18:30",
"18:45", "19:00", "19:15", "19:30", "19:45", "20:00", "20:15",
"20:30", "20:45", "21:00", "21:15", "21:30", "21:45"), class = "factor"),
value = c("partner", "partner", "partner", "partner", "partner",
"partner", "*", "alone", "*", "alone", "*", "partner", "partner",
"nuclear", "partner", "nuclear", "partner", "nuclear")), class = "data.frame", row.names = c(NA,
-18L), .Names = c("id", "sex", "variable", "value"))
假设我们有兴趣通过将当前观察与下一个观察进行比较来创建 'check' 列,可以使用 lead
。我们按 'id'、'variable' 分组,使用 ==
比较创建一个逻辑向量,并通过 +
将其强制转换为二进制。
dtat %>%
group_by(id, variable) %>%
mutate(check= +(value==lead(value, default=value[1L])))
# id sex variable value check
#1 1 m 08:00 partner 1
#2 1 f 08:00 partner 1
#3 1 m 08:15 partner 1
#4 1 f 08:15 partner 1
#5 1 m 08:30 partner 1
#6 1 f 08:30 partner 1
#7 2 m 08:00 * 0
#8 2 f 08:00 alone 0
#9 2 m 08:15 * 0
#10 2 f 08:15 alone 0
#11 2 m 08:30 * 0
#12 2 f 08:30 partner 0
#13 3 m 08:00 partner 0
#14 3 f 08:00 nuclear 0
#15 3 m 08:15 partner 0
#16 3 f 08:15 nuclear 0
#17 3 m 08:30 partner 0
#18 3 f 08:30 nuclear 0
或者另一个选项是 n_distinct
检查每个组中的 unique
个元素中的 length
个。
dtat %>%
group_by(id, variable) %>%
mutate(check=+(n_distinct(value)==1))
刚想到可能像
dtat %>%
group_by(id, variable) %>%
mutate(ep = 1:n()) %>%
mutate(check = as.numeric (value[ep == 1] == value[ep == 2]))
我正在尝试了解如何将 [i+1]
与 dplyr
一起使用。
我的数据是这样的
> head(dtat)
id sex variable value
1 1 m 08:00 partner
2 1 f 08:00 partner
3 1 m 08:15 partner
4 1 f 08:15 partner
5 1 m 08:30 partner
6 1 f 08:30 partner
我想检查 id
和 variable
的值是否与 m
和 f
的值相同。
例如,检查 id == 1
在 8:00
(partner
) 对于 m
是否也是 partner
对于 f
在 8:00
。
我知道怎么做,因为我想在这里 value[i+1]
dtat %>%
group_by(id, variable) %>%
mutate(as.numeric (value == value [i+1] ))
我还想在 variable
中检查 lag
+1
。例如,比较 id == 1
与 8:00
处的 m
等于 8:15
处的 f
。
有什么想法吗? 谢谢
没有 lag
的输出应该是(check
是有问题的匹配变量)
id variable check sex value
1 1 08:00 1 m partner
2 1 08:00 1 f partner
3 1 08:15 1 m partner
4 1 08:15 1 f partner
5 1 08:30 1 m partner
6 1 08:30 1 f partner
7 2 08:00 0 m *
8 2 08:00 0 f alone
9 2 08:15 0 m *
10 2 08:15 0 f alone
11 2 08:30 0 m *
12 2 08:30 0 f partner
13 3 08:00 0 m partner
14 3 08:00 0 f nuclear
15 3 08:15 0 m partner
16 3 08:15 0 f nuclear
17 3 08:30 0 m partner
18 3 08:30 0 f nuclear
数据
dtat = structure(list(id = c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L), sex = c("m", "f", "m", "f",
"m", "f", "m", "f", "m", "f", "m", "f", "m", "f", "m", "f", "m",
"f"), variable = structure(c(1L, 1L, 2L, 2L, 3L, 3L, 1L, 1L,
2L, 2L, 3L, 3L, 1L, 1L, 2L, 2L, 3L, 3L), .Label = c("08:00",
"08:15", "08:30", "08:45", "09:00", "09:15", "09:30", "09:45",
"10:00", "10:15", "10:30", "10:45", "11:00", "11:15", "11:30",
"11:45", "12:00", "12:15", "12:30", "12:45", "13:00", "13:15",
"13:30", "13:45", "14:00", "14:15", "14:30", "14:45", "15:00",
"15:15", "15:30", "15:45", "16:00", "16:15", "16:30", "16:45",
"17:00", "17:15", "17:30", "17:45", "18:00", "18:15", "18:30",
"18:45", "19:00", "19:15", "19:30", "19:45", "20:00", "20:15",
"20:30", "20:45", "21:00", "21:15", "21:30", "21:45"), class = "factor"),
value = c("partner", "partner", "partner", "partner", "partner",
"partner", "*", "alone", "*", "alone", "*", "partner", "partner",
"nuclear", "partner", "nuclear", "partner", "nuclear")), class = "data.frame", row.names = c(NA,
-18L), .Names = c("id", "sex", "variable", "value"))
假设我们有兴趣通过将当前观察与下一个观察进行比较来创建 'check' 列,可以使用 lead
。我们按 'id'、'variable' 分组,使用 ==
比较创建一个逻辑向量,并通过 +
将其强制转换为二进制。
dtat %>%
group_by(id, variable) %>%
mutate(check= +(value==lead(value, default=value[1L])))
# id sex variable value check
#1 1 m 08:00 partner 1
#2 1 f 08:00 partner 1
#3 1 m 08:15 partner 1
#4 1 f 08:15 partner 1
#5 1 m 08:30 partner 1
#6 1 f 08:30 partner 1
#7 2 m 08:00 * 0
#8 2 f 08:00 alone 0
#9 2 m 08:15 * 0
#10 2 f 08:15 alone 0
#11 2 m 08:30 * 0
#12 2 f 08:30 partner 0
#13 3 m 08:00 partner 0
#14 3 f 08:00 nuclear 0
#15 3 m 08:15 partner 0
#16 3 f 08:15 nuclear 0
#17 3 m 08:30 partner 0
#18 3 f 08:30 nuclear 0
或者另一个选项是 n_distinct
检查每个组中的 unique
个元素中的 length
个。
dtat %>%
group_by(id, variable) %>%
mutate(check=+(n_distinct(value)==1))
刚想到可能像
dtat %>%
group_by(id, variable) %>%
mutate(ep = 1:n()) %>%
mutate(check = as.numeric (value[ep == 1] == value[ep == 2]))