如果每个列分别满足条件,则将值设置为零
Set value to zero if condition is fulfilled for each column separately
如果有 >= 10 的条目,我会尝试在数据 table 中将该 ID 的所有其他值设置为该特定年份的零。如果没有大于或等于 10 的值,则所有值保持不变。如果有多个值大于 10,则只保留最大的条目。
这里我有一个样本数据table:
library(data.table)
data = data.table(
ID = c("a1", "a2", "a2", "a1", "a2", "a1", "a1"),
"2018" = c(3,5,11,3,9,22,6),
"2019" = c(3,5,6,21,1,4,0),
"2020" = c(0,4,13,9,16,7,9),
"2021" = c(4,0,3,8,5,4,6))
我用 for 循环试过了,但是我不能用二维来做。
期望的结果如下所示:
solution <- data.table(
ID = c("a1", "a2", "a2", "a1", "a2", "a1", "a1"),
"2018" = c(0,0,11,0,0,22,0),
"2019" = c(0,5,6,21,1,0,0),
"2020" = c(0,0,0,9,16,7,9),
"2021" = c(4,0,3,8,5,4,6))
我们可以使用
library(data.table)
nm1 <- names(data)[-1]
data[, (nm1) := lapply(.SD, function(x) if(any(x >= 10))
replace(x, x != max(x), 0) else x), ID]
-输出
> data
ID 2018 2019 2020 2021
1: a1 0 0 0 4
2: a2 0 5 0 0
3: a2 11 6 0 3
4: a1 0 21 9 8
5: a2 0 1 16 5
6: a1 22 0 7 4
7: a1 0 0 9 6
如果您可以接受 tibble
而不是 data.table
的解决方案,
library(tidyverse)
data = tibble(
ID = c("a1", "a2", "a2", "a1", "a2", "a1", "a1"),
"2018" = c(3,5,11,3,9,22,6),
"2019" = c(3,5,6,21,1,4,0),
"2020" = c(0,4,13,9,16,7,9),
"2021" = c(4,0,3,8,5,4,6))
data %>%
mutate(row_id = 1:n()) %>%
pivot_longer(-c(row_id, ID), names_to = "year") %>%
group_by(year, ID) %>%
mutate(
value = if (any(value >= 10)) {
if_else(1:length(value) == which.max(value), value, 0)
} else {
value
}) %>%
ungroup() %>%
pivot_wider(names_from = year)
#> # A tibble: 7 x 6
#> ID row_id `2018` `2019` `2020` `2021`
#> <chr> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 a1 1 0 0 0 4
#> 2 a2 2 0 5 0 0
#> 3 a2 3 11 6 0 3
#> 4 a1 4 0 21 9 8
#> 5 a2 5 0 1 16 5
#> 6 a1 6 22 0 7 4
#> 7 a1 7 0 0 9 6
由 reprex package (v1.0.0)
于 2021-08-19 创建
如果有 >= 10 的条目,我会尝试在数据 table 中将该 ID 的所有其他值设置为该特定年份的零。如果没有大于或等于 10 的值,则所有值保持不变。如果有多个值大于 10,则只保留最大的条目。
这里我有一个样本数据table:
library(data.table)
data = data.table(
ID = c("a1", "a2", "a2", "a1", "a2", "a1", "a1"),
"2018" = c(3,5,11,3,9,22,6),
"2019" = c(3,5,6,21,1,4,0),
"2020" = c(0,4,13,9,16,7,9),
"2021" = c(4,0,3,8,5,4,6))
我用 for 循环试过了,但是我不能用二维来做。 期望的结果如下所示:
solution <- data.table(
ID = c("a1", "a2", "a2", "a1", "a2", "a1", "a1"),
"2018" = c(0,0,11,0,0,22,0),
"2019" = c(0,5,6,21,1,0,0),
"2020" = c(0,0,0,9,16,7,9),
"2021" = c(4,0,3,8,5,4,6))
我们可以使用
library(data.table)
nm1 <- names(data)[-1]
data[, (nm1) := lapply(.SD, function(x) if(any(x >= 10))
replace(x, x != max(x), 0) else x), ID]
-输出
> data
ID 2018 2019 2020 2021
1: a1 0 0 0 4
2: a2 0 5 0 0
3: a2 11 6 0 3
4: a1 0 21 9 8
5: a2 0 1 16 5
6: a1 22 0 7 4
7: a1 0 0 9 6
如果您可以接受 tibble
而不是 data.table
的解决方案,
library(tidyverse)
data = tibble(
ID = c("a1", "a2", "a2", "a1", "a2", "a1", "a1"),
"2018" = c(3,5,11,3,9,22,6),
"2019" = c(3,5,6,21,1,4,0),
"2020" = c(0,4,13,9,16,7,9),
"2021" = c(4,0,3,8,5,4,6))
data %>%
mutate(row_id = 1:n()) %>%
pivot_longer(-c(row_id, ID), names_to = "year") %>%
group_by(year, ID) %>%
mutate(
value = if (any(value >= 10)) {
if_else(1:length(value) == which.max(value), value, 0)
} else {
value
}) %>%
ungroup() %>%
pivot_wider(names_from = year)
#> # A tibble: 7 x 6
#> ID row_id `2018` `2019` `2020` `2021`
#> <chr> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 a1 1 0 0 0 4
#> 2 a2 2 0 5 0 0
#> 3 a2 3 11 6 0 3
#> 4 a1 4 0 21 9 8
#> 5 a2 5 0 1 16 5
#> 6 a1 6 22 0 7 4
#> 7 a1 7 0 0 9 6
由 reprex package (v1.0.0)
于 2021-08-19 创建