在条件基础上更改列的值(缺失数据和分类)
Change values of columns on conditional bases (Missing Data and Categorical)
对于问卷,我想复制 df1,其中 x 列发生以下两件事:
将 x(编码为 0)的缺失数据替换为“-1”。所以所有的0都必须变成-1.
x 的特定部分用数字而不是类别编码。我创建了一个函数来将不同的类别分配给不同的值
categorise <- function(a_vector) { a_vector = case_when(
a_vector >= 0 & a_vector < 50 ~ 1,
a_vector >= 50 & a_vector < 500 ~ 2,
a_vector >= 500 & a_vector < 5000 ~ 3,
a_vector >= 5000 & a_vector < 50000 ~ 4,
a_vector >= 50000 & a_vector < 500000 ~ 5,
a_vector >= 500000 & a_vector < 5000000 ~ 6,
a_vector >= 5000000 & a_vector < 50000000 ~ 7,
a_vector >= 50000000 & a_vector < 500000000 ~ 8)
strong texta_vector }
我们可以使用findInterval
replace(findInterval(a_vector, c(0, 50, 500, 5000, 50000,
500000, 5000000, 50000000)), a_vector == 0, -9)
#[1] -9 1 1 2 2 3 4
或 cut
as.integer(cut(a_vector, breaks = c(0, 50, 500, 5000, 50000, 500000, 5000000, 50000000)))
数据
a_vector <- c(0L, 1L, 10L, 65L, 250L, 555L, 5000L)
我想你可以这样写 categorise_losses
函数:
categorise_losses <- function(x)
as.integer(log10(x / 5)) + 1L
# an example of using the function
categorise_losses(c(1L, 10L, 65L, 250L, 555L, 5000L))
#R> [1] 1 1 2 2 3 4
# compare with the OP's function
library(dplyr)
categorise_losses_OP <- function(x)
case_when(
x >= 0 & x < 50 ~ 1,
x >= 50 & x < 500 ~ 2,
x >= 500 & x < 5000 ~ 3,
x >= 5000 & x < 50000 ~ 4,
x >= 50000 & x < 500000 ~ 5,
x >= 500000 & x < 5000000 ~ 6,
x >= 5000000 & x < 50000000 ~ 7,
x >= 50000000 & x < 500000000 ~ 8)
# we get the same
all.equal(categorise_losses_OP(1:500000),
categorise_losses (1:500000))
#R> [1] TRUE
处理0变成-9的情况,那么可以使用:
categorise_losses <- function(x)
suppressWarnings(ifelse(x == 0, -9L, as.integer(log10(x / 5)) + 1L))
categorise_losses(c(0L, 1L, 10L, 65L, 250L, 555L, 5000L))
#R> [1] -9 1 1 2 2 3 4
要在列条目的子集上使用该函数,您可以使用 $
访问该列,然后使用 []
对您需要的条目进行子集化,如下所示:
# data set example
dat <- data.frame(year = c(1950L, 1950L, 1950L, 2010L, 2010L, 2010L),
crop_loss = c(0L, 5L, 95L, -9L, -9L, 1L))
# use the function on the data
categorise_losses <- function(x)
suppressWarnings(ifelse(x == 0, -9L, as.integer(log10(x / 5)) + 1L))
dat$crop_loss[dat$year <= 2006L] <-
categorise_losses(dat$crop_loss[dat$year <= 2006L])
# the result
dat
#R> year crop_loss
#R> 1 1950 -9
#R> 2 1950 1
#R> 3 1950 2
#R> 4 2010 -9
#R> 5 2010 -9
#R> 6 2010 1
对于问卷,我想复制 df1,其中 x 列发生以下两件事:
将 x(编码为 0)的缺失数据替换为“-1”。所以所有的0都必须变成-1.
x 的特定部分用数字而不是类别编码。我创建了一个函数来将不同的类别分配给不同的值
categorise <- function(a_vector) { a_vector = case_when(
a_vector >= 0 & a_vector < 50 ~ 1,
a_vector >= 50 & a_vector < 500 ~ 2,
a_vector >= 500 & a_vector < 5000 ~ 3,
a_vector >= 5000 & a_vector < 50000 ~ 4,
a_vector >= 50000 & a_vector < 500000 ~ 5,
a_vector >= 500000 & a_vector < 5000000 ~ 6,
a_vector >= 5000000 & a_vector < 50000000 ~ 7,
a_vector >= 50000000 & a_vector < 500000000 ~ 8)
strong texta_vector }
我们可以使用findInterval
replace(findInterval(a_vector, c(0, 50, 500, 5000, 50000,
500000, 5000000, 50000000)), a_vector == 0, -9)
#[1] -9 1 1 2 2 3 4
或 cut
as.integer(cut(a_vector, breaks = c(0, 50, 500, 5000, 50000, 500000, 5000000, 50000000)))
数据
a_vector <- c(0L, 1L, 10L, 65L, 250L, 555L, 5000L)
我想你可以这样写 categorise_losses
函数:
categorise_losses <- function(x)
as.integer(log10(x / 5)) + 1L
# an example of using the function
categorise_losses(c(1L, 10L, 65L, 250L, 555L, 5000L))
#R> [1] 1 1 2 2 3 4
# compare with the OP's function
library(dplyr)
categorise_losses_OP <- function(x)
case_when(
x >= 0 & x < 50 ~ 1,
x >= 50 & x < 500 ~ 2,
x >= 500 & x < 5000 ~ 3,
x >= 5000 & x < 50000 ~ 4,
x >= 50000 & x < 500000 ~ 5,
x >= 500000 & x < 5000000 ~ 6,
x >= 5000000 & x < 50000000 ~ 7,
x >= 50000000 & x < 500000000 ~ 8)
# we get the same
all.equal(categorise_losses_OP(1:500000),
categorise_losses (1:500000))
#R> [1] TRUE
处理0变成-9的情况,那么可以使用:
categorise_losses <- function(x)
suppressWarnings(ifelse(x == 0, -9L, as.integer(log10(x / 5)) + 1L))
categorise_losses(c(0L, 1L, 10L, 65L, 250L, 555L, 5000L))
#R> [1] -9 1 1 2 2 3 4
要在列条目的子集上使用该函数,您可以使用 $
访问该列,然后使用 []
对您需要的条目进行子集化,如下所示:
# data set example
dat <- data.frame(year = c(1950L, 1950L, 1950L, 2010L, 2010L, 2010L),
crop_loss = c(0L, 5L, 95L, -9L, -9L, 1L))
# use the function on the data
categorise_losses <- function(x)
suppressWarnings(ifelse(x == 0, -9L, as.integer(log10(x / 5)) + 1L))
dat$crop_loss[dat$year <= 2006L] <-
categorise_losses(dat$crop_loss[dat$year <= 2006L])
# the result
dat
#R> year crop_loss
#R> 1 1950 -9
#R> 2 1950 1
#R> 3 1950 2
#R> 4 2010 -9
#R> 5 2010 -9
#R> 6 2010 1