使用数据集中其他列的值创建一个新列

Create a new column with values from other columns in the dataset

我想创建一个列,其中填充来自我的数据集中的一个或另一个列的信息,这取决于第三列。

我的数据集充满了 0 和 1 值,如下所示:

df <- data.frame(PatientID = c("0002" ,"0004", "0005", "0006" ,"0009" ,"0010" ,"0018", "0019" ,"0020" ,"0027", "0039" ,"0041" ,"0042", "0043" ,"0044" ,"0045", "0046", "0047" ,"0048" ,"0049", "0055"),
                RCA = c( 1 , 1 , 0 , 1 , 1, 1  ,0 , 0 , 0  ,0,  0 , 0 , 0 , 0  ,1 , 1 , 1 , 0 , 1 , 1  ,1), 
                RCB= c(1 , 1 , 1 , 1 , 0, 0  ,0 , 0 , 0  ,0,  0 , 0 , 0 , 0  ,1 , 0 , 1 , 0 , 1 , 1  ,1),
                RCC = c( 1 , 1 , 1 , 1 , 0, 0  ,0 , 0 , 0  ,1 , 1 , 1  , 0 , 0  ,1 , 1 , 1 , 1  , 1 , 1  ,1), stringsAsFactors = F)

第四列df$RCD,需要满足以下条件:

如果df$RCC为1则df$RCD=df$RCB,如果df$RCC为0则df$RCD=df$RCA

希望这是有道理的。为了清楚起见,我附上了一个示例输出。

非常感谢! 最好的问候

library(dplyr)
df %>% 
  mutate(
    RCD = ifelse(test = RCC == 1, yes = RCB, no = RCA)
  ))

你可以试试tidyverse

library(tidyverse)
df %>% 
  mutate(RCD = case_when(RCC == 1 ~ RCB,
                         RCC == 0 ~ RCA))
   PatientID RCA RCB RCC RCD
1       0002   1   1   1   1
2       0004   1   1   1   1
3       0005   0   1   1   1
4       0006   1   1   1   1
5       0009   1   0   0   1
6       0010   1   0   0   1
7       0018   0   0   0   0
8       0019   0   0   0   0
9       0020   0   0   0   0
10      0027   0   0   1   0
11      0039   0   0   1   0
12      0041   0   0   1   0
13      0042   0   0   0   0
14      0043   0   0   0   0
15      0044   1   1   1   1
16      0045   1   0   1   0
17      0046   1   1   1   1
18      0047   0   0   1   0
19      0048   1   1   1   1
20      0049   1   1   1   1
21      0055   1   1   1   1

在基础 R 中仅使用 ifelse

df$RCD <- ifelse(df$RCC == 1, df$RCB, df$RCA)

只是为了表明有太多方法可以做到这一点

df <- data.frame(PatientID = c("0002" ,"0004", "0005", "0006" ,"0009" ,"0010" ,"0018", "0019" ,"0020" ,"0027", "0039" ,"0041" ,"0042", "0043" ,"0044" ,"0045", "0046", "0047" ,"0048" ,"0049", "0055"),
                 RCA = c( 1 , 1 , 0 , 1 , 1, 1  ,0 , 0 , 0  ,0,  0 , 0 , 0 , 0  ,1 , 1 , 1 , 0 , 1 , 1  ,1), 
                 RCB= c(1 , 1 , 1 , 1 , 0, 0  ,0 , 0 , 0  ,0,  0 , 0 , 0 , 0  ,1 , 0 , 1 , 0 , 1 , 1  ,1),
                 RCC = c( 1 , 1 , 1 , 1 , 0, 0  ,0 , 0 , 0  ,1 , 1 , 1  , 0 , 0  ,1 , 1 , 1 , 1  , 1 , 1  ,1), stringsAsFactors = F)

library(tidyverse)

df %>% rowwise %>%
  mutate(RCD = get(c('RCA', 'RCB')[1 + RCC]))
#> # A tibble: 21 x 5
#> # Rowwise: 
#>    PatientID   RCA   RCB   RCC   RCD
#>    <chr>     <dbl> <dbl> <dbl> <dbl>
#>  1 0002          1     1     1     1
#>  2 0004          1     1     1     1
#>  3 0005          0     1     1     1
#>  4 0006          1     1     1     1
#>  5 0009          1     0     0     1
#>  6 0010          1     0     0     1
#>  7 0018          0     0     0     0
#>  8 0019          0     0     0     0
#>  9 0020          0     0     0     0
#> 10 0027          0     0     1     0
#> # ... with 11 more rows

reprex package (v2.0.0)

创建于 2021-06-21