增加 ifelse True 计数并写入列
Increment the ifelse True count and write to a column
我有一个小问题,我想在每次满足 if 条件时增加计数并写入列。截至目前,我只能改变字符串。我该怎么做?
treeTable_w_bucket %>%
mutate(bucket = ifelse(Feature == "g201a", "test", bucket))
Tree Node Feature Bucket
0 0 g201a NA
1 1 re20s NA
2 0 g201a NA
3 0 g201a NA
结果
Tree Node Feature Bucket
0 0 g201a bucket_1
1 0 re20s NA
2 0 g201s bucket_2
3 0 g201s bucket_3
我们可以使用replace
library(dplyr)
library(stringr)
treeTable_w_bucket <- treeTable_w_bucket %>%
mutate(bucket = replace(Bucket, Feature == 'g201a',
str_c('bucket_', seq_len(sum(Feature == 'g201a')))))
或使用base R
i1 <- treeTable_w_bucket$Feature == 'g201a'
treeTable_w_bucket$bucket[i1] <- paste0("bucket_", cumsum(i1)[i1])
treeTable_w_bucket
# Tree Node Feature Bucket bucket
#1 0 0 g201a NA bucket_1
#2 1 1 re20s NA <NA>
#3 2 0 g201a NA bucket_2
#4 3 0 g201a NA bucket_3
数据
treeTable_w_bucket <- structure(list(Tree = 0:3, Node = c(0L, 1L, 0L, 0L), Feature = c("g201a",
"re20s", "g201a", "g201a"), Bucket = c(NA, NA, NA, NA)), class = "data.frame", row.names = c(NA,
-4L))
使用cumsum
我们可以计算Feature = "g201a"
和replace
的值不是"g201a"
到NA
的次数。
df$Bucket <- with(df, replace(paste0('bucket_', cumsum(Feature == "g201a")),
Feature != "g201a", NA))
# Tree Node Feature Bucket
#1 0 0 g201a bucket_1
#2 1 1 re20s <NA>
#3 2 0 g201a bucket_2
#4 3 0 g201a bucket_3
这也可以在 dplyr
管道中完成
library(dplyr)
df %>%
mutate(Bucket = replace(paste0('bucket_', cumsum(Feature == "g201a")),
Feature != "g201a", NA))
数据
df <- structure(list(Tree = 0:3, Node = c(0L, 1L, 0L, 0L), Feature = structure(c(1L,
2L, 1L, 1L), .Label = c("g201a", "re20s"), class = "factor")), row.names = c(NA,
-4L), class = "data.frame")
我有一个小问题,我想在每次满足 if 条件时增加计数并写入列。截至目前,我只能改变字符串。我该怎么做?
treeTable_w_bucket %>%
mutate(bucket = ifelse(Feature == "g201a", "test", bucket))
Tree Node Feature Bucket
0 0 g201a NA
1 1 re20s NA
2 0 g201a NA
3 0 g201a NA
结果
Tree Node Feature Bucket
0 0 g201a bucket_1
1 0 re20s NA
2 0 g201s bucket_2
3 0 g201s bucket_3
我们可以使用replace
library(dplyr)
library(stringr)
treeTable_w_bucket <- treeTable_w_bucket %>%
mutate(bucket = replace(Bucket, Feature == 'g201a',
str_c('bucket_', seq_len(sum(Feature == 'g201a')))))
或使用base R
i1 <- treeTable_w_bucket$Feature == 'g201a'
treeTable_w_bucket$bucket[i1] <- paste0("bucket_", cumsum(i1)[i1])
treeTable_w_bucket
# Tree Node Feature Bucket bucket
#1 0 0 g201a NA bucket_1
#2 1 1 re20s NA <NA>
#3 2 0 g201a NA bucket_2
#4 3 0 g201a NA bucket_3
数据
treeTable_w_bucket <- structure(list(Tree = 0:3, Node = c(0L, 1L, 0L, 0L), Feature = c("g201a",
"re20s", "g201a", "g201a"), Bucket = c(NA, NA, NA, NA)), class = "data.frame", row.names = c(NA,
-4L))
使用cumsum
我们可以计算Feature = "g201a"
和replace
的值不是"g201a"
到NA
的次数。
df$Bucket <- with(df, replace(paste0('bucket_', cumsum(Feature == "g201a")),
Feature != "g201a", NA))
# Tree Node Feature Bucket
#1 0 0 g201a bucket_1
#2 1 1 re20s <NA>
#3 2 0 g201a bucket_2
#4 3 0 g201a bucket_3
这也可以在 dplyr
管道中完成
library(dplyr)
df %>%
mutate(Bucket = replace(paste0('bucket_', cumsum(Feature == "g201a")),
Feature != "g201a", NA))
数据
df <- structure(list(Tree = 0:3, Node = c(0L, 1L, 0L, 0L), Feature = structure(c(1L,
2L, 1L, 1L), .Label = c("g201a", "re20s"), class = "factor")), row.names = c(NA,
-4L), class = "data.frame")