过滤数据框以仅获取在列中具有值且在第一个值 R 之后的任何列中具有另一个值的行
Filter data frame to get only rows that have a value in column and another value in any column after first value, R
我正在寻找一种方法来过滤以下数据框,以便我最终只得到在包含 1 的列之后的某些列中具有 1 且在任何其他列中具有 2 的行。我很感激寻求帮助!
data_rel1 <- structure(list(job1category = c(NA, 1, 2, 2, 1, 1, 2, 1, 1, 1,
1, 1, 1, 1, NA, 1, 1, 4, 1, 1, NA, NA, 1, 1, 1, 1, 1, 1, 2, 1,
1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 1, 1, 1, NA, 7, NA, 5, 1,
5, NA, 2, 5, 1, NA, 1, 5, 1, 1, 1, 1, 5, 1, 5, 4, 1, 4, 5, 4,
NA, 5, NA, 5, 4, 3, 6, 1, 4, 4, 5, 4, 1, NA, 1, NA, 1, NA, 1,
1, 1, 1, 1, 4, 1, 1, 1, NA, 1, NA), job2category = c(3, 2, 1,
2, 3, 1, 2, 2, 1, 1, 1, NA, 2, 1, NA, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 5, 3, 3, 1, 1, 2, 4, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1,
1, 1, 7, 7, 1, 1, 1, NA, 4, 1, 1, NA, 2, 1, 1, 1, 1, NA, 5, NA,
4, 5, 4, NA, 5, 2, 4, 4, 2, 7, 5, NA, 5, 2, NA, 4, NA, 1, 5,
NA, 1, NA, 1, 1, 1, 1, 5, 2, NA, 4, 1, 1, 1, NA, 1, NA), job3category = c(3,
2, 1, 2, 3, 1, 2, 2, 1, 1, 1, NA, 2, 1, NA, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 5, 3, 3, 1, 1, 2, 4, 1, 1, 1, 1, 1, 1, 1, 1, 2,
2, 1, 1, 1, 7, 7, 1, 1, 1, NA, 4, 1, 1, NA, 2, 1, 1, 1, 1, NA,
5, NA, 4, 5, 4, NA, 5, 2, 4, 4, 2, 7, 5, NA, 5, 2, NA, 4, NA,
1, 5, NA, 1, NA, 1, 1, 1, 1, 5, 2, NA, 4, 1, 1, 1, NA, 1, NA),
job4category = c(3, 2, 1, 2, 3, 1, 2, 2, 1, 1, 1, NA, 2,
1, NA, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 3, 3, 1, 1, 2,
4, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 7, 7, 1, 1, 1,
NA, 4, 1, 1, NA, 2, 1, 1, 1, 1, NA, 5, NA, 4, 5, 4, NA, 5,
2, 4, 4, 2, 7, 5, NA, 5, 2, NA, 4, NA, 1, 5, NA, 1, NA, 1,
1, 1, 1, 5, 2, NA, 4, 1, 1, 1, NA, 1, NA)), row.names = c(NA,
-100L), class = c("tbl_df", "tbl", "data.frame"))
您可以尝试使用 apply
-
data_rel1[apply(data_rel1, 1, function(x) {
inds <- which(x == 1)
length(inds) && any(which(x == 2) > min(inds))
}), ]
# job1category job2category job3category job4category
# <dbl> <dbl> <dbl> <dbl>
#1 1 2 2 2
#2 1 2 2 2
#3 1 2 2 2
#4 1 2 2 2
#5 1 2 2 2
#6 1 2 2 2
#7 1 2 2 2
#8 1 2 2 2
我正在寻找一种方法来过滤以下数据框,以便我最终只得到在包含 1 的列之后的某些列中具有 1 且在任何其他列中具有 2 的行。我很感激寻求帮助!
data_rel1 <- structure(list(job1category = c(NA, 1, 2, 2, 1, 1, 2, 1, 1, 1,
1, 1, 1, 1, NA, 1, 1, 4, 1, 1, NA, NA, 1, 1, 1, 1, 1, 1, 2, 1,
1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 1, 1, 1, NA, 7, NA, 5, 1,
5, NA, 2, 5, 1, NA, 1, 5, 1, 1, 1, 1, 5, 1, 5, 4, 1, 4, 5, 4,
NA, 5, NA, 5, 4, 3, 6, 1, 4, 4, 5, 4, 1, NA, 1, NA, 1, NA, 1,
1, 1, 1, 1, 4, 1, 1, 1, NA, 1, NA), job2category = c(3, 2, 1,
2, 3, 1, 2, 2, 1, 1, 1, NA, 2, 1, NA, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 5, 3, 3, 1, 1, 2, 4, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1,
1, 1, 7, 7, 1, 1, 1, NA, 4, 1, 1, NA, 2, 1, 1, 1, 1, NA, 5, NA,
4, 5, 4, NA, 5, 2, 4, 4, 2, 7, 5, NA, 5, 2, NA, 4, NA, 1, 5,
NA, 1, NA, 1, 1, 1, 1, 5, 2, NA, 4, 1, 1, 1, NA, 1, NA), job3category = c(3,
2, 1, 2, 3, 1, 2, 2, 1, 1, 1, NA, 2, 1, NA, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 5, 3, 3, 1, 1, 2, 4, 1, 1, 1, 1, 1, 1, 1, 1, 2,
2, 1, 1, 1, 7, 7, 1, 1, 1, NA, 4, 1, 1, NA, 2, 1, 1, 1, 1, NA,
5, NA, 4, 5, 4, NA, 5, 2, 4, 4, 2, 7, 5, NA, 5, 2, NA, 4, NA,
1, 5, NA, 1, NA, 1, 1, 1, 1, 5, 2, NA, 4, 1, 1, 1, NA, 1, NA),
job4category = c(3, 2, 1, 2, 3, 1, 2, 2, 1, 1, 1, NA, 2,
1, NA, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 3, 3, 1, 1, 2,
4, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 7, 7, 1, 1, 1,
NA, 4, 1, 1, NA, 2, 1, 1, 1, 1, NA, 5, NA, 4, 5, 4, NA, 5,
2, 4, 4, 2, 7, 5, NA, 5, 2, NA, 4, NA, 1, 5, NA, 1, NA, 1,
1, 1, 1, 5, 2, NA, 4, 1, 1, 1, NA, 1, NA)), row.names = c(NA,
-100L), class = c("tbl_df", "tbl", "data.frame"))
您可以尝试使用 apply
-
data_rel1[apply(data_rel1, 1, function(x) {
inds <- which(x == 1)
length(inds) && any(which(x == 2) > min(inds))
}), ]
# job1category job2category job3category job4category
# <dbl> <dbl> <dbl> <dbl>
#1 1 2 2 2
#2 1 2 2 2
#3 1 2 2 2
#4 1 2 2 2
#5 1 2 2 2
#6 1 2 2 2
#7 1 2 2 2
#8 1 2 2 2