R - 在嵌套 for 循环中为数据框子集赋值

Question

R 版本 3.3.2

我正在尝试根据该数据框的其他变量的值，使用嵌套的 for 循环将某些值分配给我的数据框的空变量。但是输出不是我所期望的。

这是一个可重现的例子：

id <- c("ID61", "ID61", "ID63", "ID69", "ID69", "ID69", "ID69", "ID69", "ID80", "ID80", "ID80", "ID81", "ID81", "ID81", "ID81")
Round <- c(1, 2, 1, 1, 2, 3, 4, 5, 1, 2, 3, 1, 2, 3, 4)
nrPosRound <- c(2, 0, 2, 15, 8, 4, 4, 0, 3, 1, 1, 0, 0, 0, 0)
Y <- rep(NA, 15)
df <- data.frame(id, Round, nrPosRound, Y)

我得到的数据框如下所示：

> df
     id Round nrPosRound Y
1  ID61     1          2 NA
2  ID61     2          0 NA
3  ID63     1          2 NA
4  ID69     1         15 NA
5  ID69     2          8 NA
6  ID69     3          4 NA
7  ID69     4          4 NA
8  ID69     5          0 NA
9  ID80     1          3 NA
10 ID80     2          1 NA
11 ID80     3          1 NA
12 ID81     1          0 NA
13 ID81     2          0 NA
14 ID81     3          0 NA
15 ID81     4          0 NA

在嵌套的 for 循环之后，我希望它看起来像这样：

> df
     id Round nrPosRound Y
1  ID61     1          2 FP
2  ID61     2          0 FP
3  ID63     1          2 FP
4  ID69     1         15 FP
5  ID69     2          8 FP
6  ID69     3          4 FP
7  ID69     4          4 FP
8  ID69     5          0 FP
9  ID80     1          3 1
10 ID80     2          1 1
11 ID80     3          1 1
12 ID81     1          0 0
13 ID81     2          0 0
14 ID81     3          0 0
15 ID81     4          0 0

我想要的是将值'1'赋值给变量'Y'，如果对于同一个'id'，在某个'Round'中，有3个或更多的Positives (nrPosRound >= 3) 并且在接下来的轮次中至少有 1 个 Positive (nrPosRound >= 1)。如果在每个 'Round' 中对于相同的 'id'，'nrPosRound' 为“0”，则 'Y' 将被分配值“0”。如果不满足先前的条件，则应将 'Y' 分配为 'FP'（误报）。如果 'id' 只有 1 个 'Round'，如果 'nrPosRound' >= 3，'Y' 的值将是“1”；如果 'nrPosRound' == 0，则值为“0”；值 'FP' 如果 'nrPosRound' <= 3.

这是我的代码，带有嵌套的 for 循环：

for (i in 1:nrow(df)) {

  current_id <- df$id[i]
  id_group <- df[df$id == curr_id, ]

  for (j in 1:nrow(id_group)) {

    current_Round <- id_group$Round[j] 
    remainder_Rounds <- id_group$Round[(j+1):nrow(id_group)]

    current_nrPos <- id_group$nrPosRound[id_group$Round == current_Round]
    remainder_nrPos <- id_group$nrPosRound[id_group$Round %in% remainder_Rounds]

    ifelse(curr_nrPos >= 3 & remainder_nrPos >= 1,
           df$Y[i] <- 1, ifelse(curr_nrPos == 0 & remainder_nrPos == 0,
                                        df$Y[i] <- 0, "FP"))
  }
}

我认为问题与 'remainder_nrPos' 有关，因为第二个 ifelse 没有像我希望的那样工作。我尝试了多种方法，但似乎无法使它像我预期的那样工作。感谢您的帮助！

Answer 1

这可以用 dplyr 完成。在下面的代码中，我先group_by id。

我创建了一个中间变量min_from_last 来查看每一轮之后是否有一个零。为此，我首先使用 arrange(desc(Round)) 从最后重新排序。之后，我使用 cummin 获得累计最小值。

然后，我重新排序数据并执行三个 ifelse 以获得您想要的结果。顺便说一句，你可能不需要第二个 ifelse，因为它会被第一个 ifelse 捕获，但我把它包括在你的问题中。

id <- c("ID61", "ID61", "ID63", "ID69", "ID69", "ID69", "ID69", "ID69", "ID80", "ID80", "ID80", "ID81", "ID81", "ID81", "ID81")
Round <- c(1, 2, 1, 1, 2, 3, 4, 5, 1, 2, 3, 1, 2, 3, 4)
nrPosRound <- c(2, 0, 2, 15, 8, 4, 4, 0, 3, 1, 1, 0, 0, 0, 0)
df1 <- data.frame(id, Round, nrPosRound,stringsAsFactors=FALSE)

library(dplyr)
df1 %>%
group_by(id) %>%
arrange(desc(Round)) %>%
mutate(min_from_last=cummin(nrPosRound)) %>%
arrange(Round)  %>%
mutate(Y= ifelse(max(nrPosRound)>=3 & min_from_last>0 ,"1",
           ifelse(n()==1 & nrPosRound>=3,"1",
           ifelse(max(nrPosRound)==0,"0","FP"))))

      id Round nrPosRound min_from_last     Y
   (chr) (dbl)      (dbl)         (dbl) (chr)
1   ID61     1          2             0    FP
2   ID61     2          0             0    FP
3   ID63     1          2             2    FP
4   ID69     1         15             0    FP
5   ID69     2          8             0    FP
6   ID69     3          4             0    FP
7   ID69     4          4             0    FP
8   ID69     5          0             0    FP
9   ID80     1          3             1     1
10  ID80     2          1             1     1
11  ID80     3          1             1     1
12  ID81     1          0             0     0
13  ID81     2          0             0     0
14  ID81     3          0             0     0
15  ID81     4          0             0     0

Answer 2

这是一个基本的 R 解决方案。

id.vals <- unique(df$id)

for (i in 1:length(id.vals)) {
    group.ind <- df$id == id.vals[i]
    id_group  <- df[group.ind, 'nrPosRound']
    n   <- length(id_group)
    Y   <- rep(NA, n)
    g3  <- any(id_group >= 3)
    a0  <- all(id_group == 0)
    for (j in 1:n) {
        if (g3 & all(id_group[j:n] >= 1)) Y[j] <- 1
        else if (a0) Y[j] <- 0
        else Y[j] <- 'FP'
    }
    df$Y[group.ind] <- Y
}

R - 在嵌套 for 循环中为数据框子集赋值

R - assigning values to data frame subsets in nested for loop

for-loop

r

subset

nested-loops

dataframe