根据 R dplyr 中的条件将字符转换为 NA
Convert a character to NA based on a condition in R dplyr
我有一个看起来像这样的数据框
library(tidyverse)
df3 <- tibble(col1 = c("apple",rep("banana",3)),
col2 = c("aple", "banan","bananb","banat"),
count_col1 = c(1,4,4,4),
count_col2 = c(4,1,1,1))
df3
#> # A tibble: 4 × 4
#> col1 col2 count_col1 count_col2
#> <chr> <chr> <dbl> <dbl>
#> 1 apple aple 1 4
#> 2 banana banan 4 1
#> 3 banana bananb 4 1
#> 4 banana banat 4 1
由 reprex package (v2.0.1)
创建于 2022-02-17
我想 group_by col1 并且当 count_col2 > count_col1 将 col1 的值转换为 NA,
当 count_col1 > count_col2 时,col2 的值将转换为 NA。
我希望我的数据看起来像这样
#> # A tibble: 4 × 4
#> col1 col2 count_col1 count_col2
#> <chr> <chr> <dbl> <dbl>
#> 1 NA aple 1 4
#> 2 banana NA 4 1
#> 3 banana NA 4 1
#> 4 banana NA 4 1
我不确定这是否可以通过 mutate(case_when...)
到目前为止我失败了
df3 %>%
group_by(col1) %>%
mutate(case_when(count_col2 > count_col1 ~ col1==NA,
count_col1 > count_col2 ~ col2==NA ))
您可以使用 ifelse()
实现所需的输出,即
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
df3 <- tibble(col1 = c("apple",rep("banana",3)),
col2 = c("aple", "banan","bananb","banat"),
count_col1 = c(1,4,4,4),
count_col2 = c(4,1,1,1))
df3
#> # A tibble: 4 × 4
#> col1 col2 count_col1 count_col2
#> <chr> <chr> <dbl> <dbl>
#> 1 apple aple 1 4
#> 2 banana banan 4 1
#> 3 banana bananb 4 1
#> 4 banana banat 4 1
df3 %>%
group_by(col1) %>%
mutate(col1 = ifelse(count_col2 > count_col1, NA, col1),
col2 = ifelse(count_col1 > count_col2, NA, col2))
#> # A tibble: 4 × 4
#> # Groups: col1 [2]
#> col1 col2 count_col1 count_col2
#> <chr> <chr> <dbl> <dbl>
#> 1 <NA> aple 1 4
#> 2 banana <NA> 4 1
#> 3 banana <NA> 4 1
#> 4 banana <NA> 4 1
由 reprex package (v2.0.1)
于 2022-02-18 创建
或 case_when()
:
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
df3 <- tibble(col1 = c("apple",rep("banana",3)),
col2 = c("aple", "banan","bananb","banat"),
count_col1 = c(1,4,4,4),
count_col2 = c(4,1,1,1))
df3
#> # A tibble: 4 × 4
#> col1 col2 count_col1 count_col2
#> <chr> <chr> <dbl> <dbl>
#> 1 apple aple 1 4
#> 2 banana banan 4 1
#> 3 banana bananb 4 1
#> 4 banana banat 4 1
df3 %>%
group_by(col1) %>%
mutate(col1 = case_when(count_col2 > count_col1 ~ NA_character_,
TRUE ~ col1),
col2 = case_when(count_col1 > count_col2 ~ NA_character_,
TRUE ~ col2))
#> # A tibble: 4 × 4
#> # Groups: col1 [2]
#> col1 col2 count_col1 count_col2
#> <chr> <chr> <dbl> <dbl>
#> 1 <NA> aple 1 4
#> 2 banana <NA> 4 1
#> 3 banana <NA> 4 1
#> 4 banana <NA> 4 1
由 reprex package (v2.0.1)
于 2022-02-18 创建
这是否解决了您的问题?
我不确定您是否真的需要在此处 group_by
,因为即使使用 group_by
,count_col1
的每个值都会与 count_col2
的相应值进行比较。在“组”中没有发生任何事情。
这是一个基本的 R 选项 -
df3$col1[df3$count_col2 > df3$count_col1] <- NA
df3$col2[df3$count_col1 > df3$count_col2] <- NA
df3
# col1 col2 count_col1 count_col2
# <chr> <chr> <dbl> <dbl>
#1 NA aple 1 4
#2 banana NA 4 1
#3 banana NA 4 1
#4 banana NA 4 1
我有一个看起来像这样的数据框
library(tidyverse)
df3 <- tibble(col1 = c("apple",rep("banana",3)),
col2 = c("aple", "banan","bananb","banat"),
count_col1 = c(1,4,4,4),
count_col2 = c(4,1,1,1))
df3
#> # A tibble: 4 × 4
#> col1 col2 count_col1 count_col2
#> <chr> <chr> <dbl> <dbl>
#> 1 apple aple 1 4
#> 2 banana banan 4 1
#> 3 banana bananb 4 1
#> 4 banana banat 4 1
由 reprex package (v2.0.1)
创建于 2022-02-17我想 group_by col1 并且当 count_col2 > count_col1 将 col1 的值转换为 NA,
当 count_col1 > count_col2 时,col2 的值将转换为 NA。
我希望我的数据看起来像这样
#> # A tibble: 4 × 4
#> col1 col2 count_col1 count_col2
#> <chr> <chr> <dbl> <dbl>
#> 1 NA aple 1 4
#> 2 banana NA 4 1
#> 3 banana NA 4 1
#> 4 banana NA 4 1
我不确定这是否可以通过 mutate(case_when...) 到目前为止我失败了
df3 %>%
group_by(col1) %>%
mutate(case_when(count_col2 > count_col1 ~ col1==NA,
count_col1 > count_col2 ~ col2==NA ))
您可以使用 ifelse()
实现所需的输出,即
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
df3 <- tibble(col1 = c("apple",rep("banana",3)),
col2 = c("aple", "banan","bananb","banat"),
count_col1 = c(1,4,4,4),
count_col2 = c(4,1,1,1))
df3
#> # A tibble: 4 × 4
#> col1 col2 count_col1 count_col2
#> <chr> <chr> <dbl> <dbl>
#> 1 apple aple 1 4
#> 2 banana banan 4 1
#> 3 banana bananb 4 1
#> 4 banana banat 4 1
df3 %>%
group_by(col1) %>%
mutate(col1 = ifelse(count_col2 > count_col1, NA, col1),
col2 = ifelse(count_col1 > count_col2, NA, col2))
#> # A tibble: 4 × 4
#> # Groups: col1 [2]
#> col1 col2 count_col1 count_col2
#> <chr> <chr> <dbl> <dbl>
#> 1 <NA> aple 1 4
#> 2 banana <NA> 4 1
#> 3 banana <NA> 4 1
#> 4 banana <NA> 4 1
由 reprex package (v2.0.1)
于 2022-02-18 创建或 case_when()
:
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
df3 <- tibble(col1 = c("apple",rep("banana",3)),
col2 = c("aple", "banan","bananb","banat"),
count_col1 = c(1,4,4,4),
count_col2 = c(4,1,1,1))
df3
#> # A tibble: 4 × 4
#> col1 col2 count_col1 count_col2
#> <chr> <chr> <dbl> <dbl>
#> 1 apple aple 1 4
#> 2 banana banan 4 1
#> 3 banana bananb 4 1
#> 4 banana banat 4 1
df3 %>%
group_by(col1) %>%
mutate(col1 = case_when(count_col2 > count_col1 ~ NA_character_,
TRUE ~ col1),
col2 = case_when(count_col1 > count_col2 ~ NA_character_,
TRUE ~ col2))
#> # A tibble: 4 × 4
#> # Groups: col1 [2]
#> col1 col2 count_col1 count_col2
#> <chr> <chr> <dbl> <dbl>
#> 1 <NA> aple 1 4
#> 2 banana <NA> 4 1
#> 3 banana <NA> 4 1
#> 4 banana <NA> 4 1
由 reprex package (v2.0.1)
于 2022-02-18 创建这是否解决了您的问题?
我不确定您是否真的需要在此处 group_by
,因为即使使用 group_by
,count_col1
的每个值都会与 count_col2
的相应值进行比较。在“组”中没有发生任何事情。
这是一个基本的 R 选项 -
df3$col1[df3$count_col2 > df3$count_col1] <- NA
df3$col2[df3$count_col1 > df3$count_col2] <- NA
df3
# col1 col2 count_col1 count_col2
# <chr> <chr> <dbl> <dbl>
#1 NA aple 1 4
#2 banana NA 4 1
#3 banana NA 4 1
#4 banana NA 4 1