R预测期望变量不在lm对象中
R predict expecting variable not in lm object
我已经建立了一种方法来创建一个纠错模型 (ECM),该模型是多个 ECM 的平均值。为此,我利用 R 中的 lm()
函数创建多个表示 ECM 的 lm
对象。我正在平均每个对象的系数以获得最终模型。 lm
对象表示 ECM 的方式是,在对数据进行 运行 lm()
之前,我将数据转换为 ECM 所需的格式。
我还使用 AIC 向后选择来消除我不需要的变量。该过程似乎在创建 ECM 时运行良好。但是,当我创建一个列名与我的模型中的系数匹配的数据框时,我收到一条错误消息,指出数据中缺少必要的变量。但是,在最终模型中未包含此变量,因此没有必要对其进行预测。那么为什么 predict()
寻找那个变量呢?我做错了什么?
#Load data
library(ecm)
data(Wilshire)
trn <- Wilshire[Wilshire$date<='2015-12-01',]
y <- trn$Wilshire5000
xeq <- xtr <- trn[c('CorpProfits', 'FedFundsRate', 'UnempRate')]
#Function to split data into k partitions and build k models, each on a (k-1)/k subset of the data
avelm <- function(formula, data, k = 5, seed = 5, ...) {
lmall <- lm(formula, data, ...)
modellist <- 1:k
set.seed(seed)
models <- lapply(modellist, function(i) {
tstIdx <- sample(nrow(data), 1/k * nrow(data))
trn <- data[-tstIdx, ]
lm(as.formula(formula), data = trn)
})
lmnames <- names(lmall$coefficients)
lmall$coefficients <- rowMeans(as.data.frame(sapply(models, function(m) coef(m))))
names(lmall$coefficients) <- lmnames
lmall$fitted.values <- predict(lmall, data)
target <- trimws(gsub("~.*$", "", formula))
lmall$residuals <- data[, target] - lmall$fitted.values
return(lmall)
}
#Function to create an ECM using backwards selection based on AIC (leveraged avelm function above)
aveecmback <- function (y, xeq, xtr, k = 5, seed = 5, ...) {
xeqnames <- names(xeq)
xeqnames <- paste0(xeqnames, "Lag1")
xeq <- as.data.frame(xeq)
xeq <- rbind(rep(NA, ncol(xeq)), xeq[1:(nrow(xeq) - 1), ])
xtrnames <- names(xtr)
xtrnames <- paste0("delta", xtrnames)
xtr <- as.data.frame(xtr)
xtr <- data.frame(apply(xtr, 2, diff, 1))
yLag1 <- y[1:(length(y) - 1)]
x <- cbind(xtr, xeq[complete.cases(xeq), ])
x <- cbind(x, yLag1)
names(x) <- c(xtrnames, xeqnames, "yLag1")
x$dy <- diff(y, 1)
formula <- "dy ~ ."
model <- avelm(formula, data = x, k = k, seed = seed, ...)
fullAIC <- partialAIC <- AIC(model)
while (partialAIC <= fullAIC) {
todrop <- rownames(drop1(model))[-grep("none|yLag1", rownames(drop1(model)))][which.min(drop1(model)$AIC[-grep("none|yLag1", rownames(drop1(model)))])]
formula <- paste0(formula, " - ", todrop)
model <- avelm(formula, data = x, seed = seed, ...)
partialAIC <- AIC(model)
if (partialAIC < fullAIC & length(rownames(drop1(model))) > 2) {
fullAIC <- partialAIC
}
}
return(model)
}
finalmodel <- aveecmback(y, xeq, xtr)
print(finalmodel)
Call:
lm(formula = formula, data = data)
Coefficients:
(Intercept) deltaCorpProfits deltaUnempRate CorpProfitsLag1 yLag1
-0.177771 0.012733 -1.204489 0.002046 -0.024294
#Create data frame to predict on
set.seed(2)
df <- data.frame(deltaCorpProfits=rnorm(5), deltaUnempRate=rnorm(5), CorpProfitsLag1=rnorm(5), yLag1=rnorm(5))
predict(finalmodel, df)
Error in eval(predvars, data, env) : object 'deltaFedFundsRate' not found
我明白了。问题出在我在 while 循环内修改 formula
的 aveecmback()
函数部分。相反,如果我修改 x
以删除变量,问题就会消失。这是因为像这样的东西在数据框中仍然需要 disp
,即使它已在公式中删除:
data(mtcars)
model <- lm(mpg~.-disp, mtcars)
predict(model, mtcars[-which(names(mtcars) %in% 'disp')])
Error in eval(predvars, data, env) : object 'disp' not found
然而,像这样的东西将允许 predict()
在没有 disp
的数据帧上工作:
data(mtcars)
model <- lm(mpg~., mtcars[-which(names(mtcars) %in% 'disp')])
predict(model, mtcars[-which(names(mtcars) %in% 'disp')])
Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive Hornet Sportabout
22.37587 22.07853 26.58631 20.82285 17.26052
Valiant Duster 360 Merc 240D Merc 230 Merc 280
20.46572 14.04956 22.38273 24.20323 18.97756
Merc 280C Merc 450SE Merc 450SL Merc 450SLC Cadillac Fleetwood
19.37670 15.10244 16.12864 16.26339 11.31787
Lincoln Continental Chrysler Imperial Fiat 128 Honda Civic Toyota Corolla
10.68985 10.65062 28.03687 29.29545 29.42472
Toyota Corona Dodge Challenger AMC Javelin Camaro Z28 Pontiac Firebird
23.72382 16.91215 17.78366 13.53713 16.15156
Fiat X1-9 Porsche 914-2 Lotus Europa Ford Pantera L Ferrari Dino
28.35383 26.31886 27.36155 18.86561 19.75073
Maserati Bora Volvo 142E
13.86302 24.78865
我已经建立了一种方法来创建一个纠错模型 (ECM),该模型是多个 ECM 的平均值。为此,我利用 R 中的 lm()
函数创建多个表示 ECM 的 lm
对象。我正在平均每个对象的系数以获得最终模型。 lm
对象表示 ECM 的方式是,在对数据进行 运行 lm()
之前,我将数据转换为 ECM 所需的格式。
我还使用 AIC 向后选择来消除我不需要的变量。该过程似乎在创建 ECM 时运行良好。但是,当我创建一个列名与我的模型中的系数匹配的数据框时,我收到一条错误消息,指出数据中缺少必要的变量。但是,在最终模型中未包含此变量,因此没有必要对其进行预测。那么为什么 predict()
寻找那个变量呢?我做错了什么?
#Load data
library(ecm)
data(Wilshire)
trn <- Wilshire[Wilshire$date<='2015-12-01',]
y <- trn$Wilshire5000
xeq <- xtr <- trn[c('CorpProfits', 'FedFundsRate', 'UnempRate')]
#Function to split data into k partitions and build k models, each on a (k-1)/k subset of the data
avelm <- function(formula, data, k = 5, seed = 5, ...) {
lmall <- lm(formula, data, ...)
modellist <- 1:k
set.seed(seed)
models <- lapply(modellist, function(i) {
tstIdx <- sample(nrow(data), 1/k * nrow(data))
trn <- data[-tstIdx, ]
lm(as.formula(formula), data = trn)
})
lmnames <- names(lmall$coefficients)
lmall$coefficients <- rowMeans(as.data.frame(sapply(models, function(m) coef(m))))
names(lmall$coefficients) <- lmnames
lmall$fitted.values <- predict(lmall, data)
target <- trimws(gsub("~.*$", "", formula))
lmall$residuals <- data[, target] - lmall$fitted.values
return(lmall)
}
#Function to create an ECM using backwards selection based on AIC (leveraged avelm function above)
aveecmback <- function (y, xeq, xtr, k = 5, seed = 5, ...) {
xeqnames <- names(xeq)
xeqnames <- paste0(xeqnames, "Lag1")
xeq <- as.data.frame(xeq)
xeq <- rbind(rep(NA, ncol(xeq)), xeq[1:(nrow(xeq) - 1), ])
xtrnames <- names(xtr)
xtrnames <- paste0("delta", xtrnames)
xtr <- as.data.frame(xtr)
xtr <- data.frame(apply(xtr, 2, diff, 1))
yLag1 <- y[1:(length(y) - 1)]
x <- cbind(xtr, xeq[complete.cases(xeq), ])
x <- cbind(x, yLag1)
names(x) <- c(xtrnames, xeqnames, "yLag1")
x$dy <- diff(y, 1)
formula <- "dy ~ ."
model <- avelm(formula, data = x, k = k, seed = seed, ...)
fullAIC <- partialAIC <- AIC(model)
while (partialAIC <= fullAIC) {
todrop <- rownames(drop1(model))[-grep("none|yLag1", rownames(drop1(model)))][which.min(drop1(model)$AIC[-grep("none|yLag1", rownames(drop1(model)))])]
formula <- paste0(formula, " - ", todrop)
model <- avelm(formula, data = x, seed = seed, ...)
partialAIC <- AIC(model)
if (partialAIC < fullAIC & length(rownames(drop1(model))) > 2) {
fullAIC <- partialAIC
}
}
return(model)
}
finalmodel <- aveecmback(y, xeq, xtr)
print(finalmodel)
Call:
lm(formula = formula, data = data)
Coefficients:
(Intercept) deltaCorpProfits deltaUnempRate CorpProfitsLag1 yLag1
-0.177771 0.012733 -1.204489 0.002046 -0.024294
#Create data frame to predict on
set.seed(2)
df <- data.frame(deltaCorpProfits=rnorm(5), deltaUnempRate=rnorm(5), CorpProfitsLag1=rnorm(5), yLag1=rnorm(5))
predict(finalmodel, df)
Error in eval(predvars, data, env) : object 'deltaFedFundsRate' not found
我明白了。问题出在我在 while 循环内修改 formula
的 aveecmback()
函数部分。相反,如果我修改 x
以删除变量,问题就会消失。这是因为像这样的东西在数据框中仍然需要 disp
,即使它已在公式中删除:
data(mtcars)
model <- lm(mpg~.-disp, mtcars)
predict(model, mtcars[-which(names(mtcars) %in% 'disp')])
Error in eval(predvars, data, env) : object 'disp' not found
然而,像这样的东西将允许 predict()
在没有 disp
的数据帧上工作:
data(mtcars)
model <- lm(mpg~., mtcars[-which(names(mtcars) %in% 'disp')])
predict(model, mtcars[-which(names(mtcars) %in% 'disp')])
Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive Hornet Sportabout
22.37587 22.07853 26.58631 20.82285 17.26052
Valiant Duster 360 Merc 240D Merc 230 Merc 280
20.46572 14.04956 22.38273 24.20323 18.97756
Merc 280C Merc 450SE Merc 450SL Merc 450SLC Cadillac Fleetwood
19.37670 15.10244 16.12864 16.26339 11.31787
Lincoln Continental Chrysler Imperial Fiat 128 Honda Civic Toyota Corolla
10.68985 10.65062 28.03687 29.29545 29.42472
Toyota Corona Dodge Challenger AMC Javelin Camaro Z28 Pontiac Firebird
23.72382 16.91215 17.78366 13.53713 16.15156
Fiat X1-9 Porsche 914-2 Lotus Europa Ford Pantera L Ferrari Dino
28.35383 26.31886 27.36155 18.86561 19.75073
Maserati Bora Volvo 142E
13.86302 24.78865