每行对多个变量的乘积求和
Summing the products of multiple variables per row
我有一个data.table如下:
library(data.table)
set.seed(1)
DT <- data.table(panelID = sample(50,50), # Creates a panel ID
Country = c(rep("Albania",30),rep("Belarus",50), rep("Chilipepper",20)),
some_NA = sample(0:5, 6),
some_NA_factor = sample(0:5, 6),
Group = c(rep(1,20),rep(2,20),rep(3,20),rep(4,20),rep(5,20)),
Time = rep(seq(as.Date("2010-01-03"), length=20, by="1 month") - 1,5),
norm = round(runif(100)/10,2),
Income = sample(0:5, 6),
Happiness = sample(10,10),
Sex = round(rnorm(10,0.75,0.3),2),
Age = sample(100,100),
Educ = round(rnorm(10,0.75,0.3),2))
DT [, uniqueID := .I] # Creates a unique ID
DT[DT == 0] <- NA #
DT$some_NA_factor <- factor(DT$some_NA_factor)
现在,我想(出于某些人为原因)使用 data.table 对每次观察的收入和教育以及性别和年龄的乘积求和。请注意,我的实际数据有更多变量,其中一些是 NA。我试过了:
DT<- setDT(DT)[, newvar:= sum((Income *Educ),
(Sex * Age), na.rm=TRUE)]
但这需要列的总和。我也试过:
DT<- setDT(DT)[, newvar:= rowSums((Income *Educ),
(Sex * Age), na.rm=TRUE)]
但这不起作用:
Error in base::rowSums(x, na.rm = na.rm, dims = dims, ...) :
'x' must be an array of at least two dimensions
在 data.table 中执行此操作的正确方法是什么?
DT[, newvar := rowSums(data.table(Income*Educ, Sex * Age), na.rm=TRUE)]
# ALternatively:
DT[, newvar := {x = Income*Educ; y = Sex * Age; fifelse(is.na(x), y, fifelse(is.na(y), x, x + y ))}]
注:
setDT()
只有在 data.frame 还不是 data.table 时才需要。 <-
(在 data.table.
中使用 :=
时不需要分配结果
我有一个data.table如下:
library(data.table)
set.seed(1)
DT <- data.table(panelID = sample(50,50), # Creates a panel ID
Country = c(rep("Albania",30),rep("Belarus",50), rep("Chilipepper",20)),
some_NA = sample(0:5, 6),
some_NA_factor = sample(0:5, 6),
Group = c(rep(1,20),rep(2,20),rep(3,20),rep(4,20),rep(5,20)),
Time = rep(seq(as.Date("2010-01-03"), length=20, by="1 month") - 1,5),
norm = round(runif(100)/10,2),
Income = sample(0:5, 6),
Happiness = sample(10,10),
Sex = round(rnorm(10,0.75,0.3),2),
Age = sample(100,100),
Educ = round(rnorm(10,0.75,0.3),2))
DT [, uniqueID := .I] # Creates a unique ID
DT[DT == 0] <- NA #
DT$some_NA_factor <- factor(DT$some_NA_factor)
现在,我想(出于某些人为原因)使用 data.table 对每次观察的收入和教育以及性别和年龄的乘积求和。请注意,我的实际数据有更多变量,其中一些是 NA。我试过了:
DT<- setDT(DT)[, newvar:= sum((Income *Educ),
(Sex * Age), na.rm=TRUE)]
但这需要列的总和。我也试过:
DT<- setDT(DT)[, newvar:= rowSums((Income *Educ),
(Sex * Age), na.rm=TRUE)]
但这不起作用:
Error in base::rowSums(x, na.rm = na.rm, dims = dims, ...) :
'x' must be an array of at least two dimensions
在 data.table 中执行此操作的正确方法是什么?
DT[, newvar := rowSums(data.table(Income*Educ, Sex * Age), na.rm=TRUE)]
# ALternatively:
DT[, newvar := {x = Income*Educ; y = Sex * Age; fifelse(is.na(x), y, fifelse(is.na(y), x, x + y ))}]
注:
setDT()
只有在 data.frame 还不是 data.table 时才需要。 <-
(在 data.table.
:=
时不需要分配结果