在其他列中使用条件,在 loop/lapply/mutate 中使用循环列的数字索引
Use conditions in other columns and numeric index of loop-columns in loop/lapply/mutate across
我有这样一个数据框:
> df
V1 V2 V3 V4 V5 V6
1 1 1 2 NA 1 0
2 0 0 2 1 NA 1
3 1 0 2 1 1 NA
4 0 1 2 0 0 NA
5 1 0 2 1 1 NA
6 0 0 2 NA 1 1
7 0 1 2 NA 1 NA
8 0 0 2 NA 1 NA
9 1 0 2 1 1 1
10 0 1 2 1 1 NA
dput 如下(编辑:更正):
df <- structure(list(V1 = c(1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 0L),
V2 = c(1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L), V3 = c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), V4 = c(NA, 1L, 1L, 0L,
1L, NA, NA, NA, 1L, 1L), V5 = c(1L, NA, 1L, 0L, 1L, 1L, 1L,
1L, 1L, 1L), V6 = c(0L, 1L, NA, NA, NA, 1L, NA, NA, 1L, NA
)), row.names = c(NA, -10L), class = "data.frame")
我正在寻找让 V1:V3
保持不变的代码。对于 V4:V6
我想应用类似下面的 if_else
-语句:
if_else(df$V1 == 0 & df$V2 == 1 & "index of loop columns" > df$V3, 1, "do nothing")
例如,对于第 4/7/10$V6 行,NA
将更改为 1
,因为以下语句为真:
if_else(df$V1 == 0 & df$V2 == 1 & numerical index [3] > df$V3 [2], 1, df$V6
其余行应保持不变,V4
和 V5
也应保持不变,因为索引为 1
和 2
,因此永远不会大于 [=24] =].
我用 for 循环和 lapply 产生了一些死胡同,因为我无法弄清楚如何将 >
运算符的特定数字索引获取到我的代码中。如有任何建议,我将不胜感激!谢谢!
我认为这行得通。这有点难说,因为 dput()
与您问题中的打印数据不匹配...
df <- structure(list(V1 = c(1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 0L),
V2 = c(1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L), V3 = c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), V4 = c(NA, 1L, 1L, NA,
1L, NA, NA, NA, 1L, 1L), V5 = c(1L, NA, 1L, 1L, NA, 1L, 1L,
1L, 1L, 1L), V6 = c(NA, 1L, NA, NA, NA, 1L, NA, NA, 1L, NA
)), class = "data.frame", row.names = c(NA, -10L))
df
# V1 V2 V3 V4 V5 V6
# 1 1 1 2 NA 1 NA
# 2 0 0 2 1 NA 1
# 3 1 0 2 1 1 NA
# 4 0 1 2 NA 1 NA
# 5 1 0 2 1 NA NA
# 6 0 0 2 NA 1 1
# 7 0 1 2 NA 1 NA
# 8 0 0 2 NA 1 NA
# 9 1 0 2 1 1 1
# 10 0 1 2 1 1 NA
library(dplyr)
cols_to_loop = c("V4", "V5", "V6")
for (i in seq_along(cols_to_loop)) {
df = mutate(df, across(cols_to_loop[i], ~if_else(V1 == 0 & V2 == 1 & i > V3, 1L, .) ))
}
df
# V1 V2 V3 V4 V5 V6
# 1 1 1 2 NA 1 NA
# 2 0 0 2 1 NA 1
# 3 1 0 2 1 1 NA
# 4 0 1 2 NA 1 1
# 5 1 0 2 1 NA NA
# 6 0 0 2 NA 1 1
# 7 0 1 2 NA 1 1
# 8 0 0 2 NA 1 NA
# 9 1 0 2 1 1 1
# 10 0 1 2 1 1 1
我有这样一个数据框:
> df
V1 V2 V3 V4 V5 V6
1 1 1 2 NA 1 0
2 0 0 2 1 NA 1
3 1 0 2 1 1 NA
4 0 1 2 0 0 NA
5 1 0 2 1 1 NA
6 0 0 2 NA 1 1
7 0 1 2 NA 1 NA
8 0 0 2 NA 1 NA
9 1 0 2 1 1 1
10 0 1 2 1 1 NA
dput 如下(编辑:更正):
df <- structure(list(V1 = c(1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 0L),
V2 = c(1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L), V3 = c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), V4 = c(NA, 1L, 1L, 0L,
1L, NA, NA, NA, 1L, 1L), V5 = c(1L, NA, 1L, 0L, 1L, 1L, 1L,
1L, 1L, 1L), V6 = c(0L, 1L, NA, NA, NA, 1L, NA, NA, 1L, NA
)), row.names = c(NA, -10L), class = "data.frame")
我正在寻找让 V1:V3
保持不变的代码。对于 V4:V6
我想应用类似下面的 if_else
-语句:
if_else(df$V1 == 0 & df$V2 == 1 & "index of loop columns" > df$V3, 1, "do nothing")
例如,对于第 4/7/10$V6 行,NA
将更改为 1
,因为以下语句为真:
if_else(df$V1 == 0 & df$V2 == 1 & numerical index [3] > df$V3 [2], 1, df$V6
其余行应保持不变,V4
和 V5
也应保持不变,因为索引为 1
和 2
,因此永远不会大于 [=24] =].
我用 for 循环和 lapply 产生了一些死胡同,因为我无法弄清楚如何将 >
运算符的特定数字索引获取到我的代码中。如有任何建议,我将不胜感激!谢谢!
我认为这行得通。这有点难说,因为 dput()
与您问题中的打印数据不匹配...
df <- structure(list(V1 = c(1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 0L),
V2 = c(1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L), V3 = c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), V4 = c(NA, 1L, 1L, NA,
1L, NA, NA, NA, 1L, 1L), V5 = c(1L, NA, 1L, 1L, NA, 1L, 1L,
1L, 1L, 1L), V6 = c(NA, 1L, NA, NA, NA, 1L, NA, NA, 1L, NA
)), class = "data.frame", row.names = c(NA, -10L))
df
# V1 V2 V3 V4 V5 V6
# 1 1 1 2 NA 1 NA
# 2 0 0 2 1 NA 1
# 3 1 0 2 1 1 NA
# 4 0 1 2 NA 1 NA
# 5 1 0 2 1 NA NA
# 6 0 0 2 NA 1 1
# 7 0 1 2 NA 1 NA
# 8 0 0 2 NA 1 NA
# 9 1 0 2 1 1 1
# 10 0 1 2 1 1 NA
library(dplyr)
cols_to_loop = c("V4", "V5", "V6")
for (i in seq_along(cols_to_loop)) {
df = mutate(df, across(cols_to_loop[i], ~if_else(V1 == 0 & V2 == 1 & i > V3, 1L, .) ))
}
df
# V1 V2 V3 V4 V5 V6
# 1 1 1 2 NA 1 NA
# 2 0 0 2 1 NA 1
# 3 1 0 2 1 1 NA
# 4 0 1 2 NA 1 1
# 5 1 0 2 1 NA NA
# 6 0 0 2 NA 1 1
# 7 0 1 2 NA 1 1
# 8 0 0 2 NA 1 NA
# 9 1 0 2 1 1 1
# 10 0 1 2 1 1 1