获取对数因变量的拟合值到 R 中的原始面板数据集

Get fitted values of a log dependent variable to the original panel data set in R

我正在尝试合并对数对数模型因变量的拟合值。我的数据集是一个不平衡的面板。我尝试按照 here 指示的方式进行操作。但我的问题是不同的,因为我已经将我的大数据集转换成一个 plm 对象,它是一个日志因变量。

我的简单数据集可以通过以下代码访问。

dat = structure(list(Time = structure(c(9L, 7L, 15L, 1L, 17L, 13L, 
11L, 3L, 23L, 21L, 19L, 5L, 10L, 8L, 16L, 2L, 18L, 14L, 12L, 
4L, 24L, 22L, 20L, 6L), .Label = c("Apr-00", "Apr-01", "Aug-00", 
"Aug-01", "Dec-00", "Dec-01", "Feb-00", "Feb-01", "Jan-00", "Jan-01", 
"Jul-00", "Jul-01", "Jun-00", "Jun-01", "Mar-00", "Mar-01", "May-00", 
"May-01", "Nov-00", "Nov-01", "Oct-00", "Oct-01", "Sep-00", "Sep-01"
), class = "factor"), Firm = structure(c(1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L), .Label = c("A", "B"), class = "factor"), Out = c(161521L, 
142452L, 365697L, 355789L, 376843L, 258762L, 255447L, 188545L, 
213663L, 273209L, 317468L, 238668L, 241286L, 135288L, 363609L, 
318472L, 446279L, 390230L, 118945L, 174887L, 183770L, 197832L, 
317468L, 238668L), Lab = c(261L, 334L, 156L, 134L, 159L, 119L, 
41L, 247L, 251L, 62L, 525L, 217L, 298L, 109L, 7L, NA, 0L, 50L, 
143L, 85L, 80L, 214L, 525L, 217L), Cap = c(13L, 15L, 14L, 12L, 
15L, 12L, 45L, 75L, NA, 12L, 15L, 16L, 42L, 45L, 24L, 56L, 12L, 
12L, 45L, NA, 15L, 12L, 15L, 16L)), .Names = c("Time", "Firm", 
"Out", "Lab", "Cap"), class = "data.frame", row.names = c(NA, 
-24L))

我的数据集如下所示,缺少预测变量的数据。

    +--------+------+--------+-----+-----+
|  Time  | Firm |  Out   | Lab | Cap |
+--------+------+--------+-----+-----+
| Jan-00 | A    | 161521 | 261 |  13 |
| Feb-00 | A    | 142452 | 334 |  15 |
| Mar-00 | A    | 365697 | 156 |  14 |
| Apr-00 | A    | 355789 | 134 |  12 |
| May-00 | A    | 376843 | 159 |  15 |
| Jun-00 | A    | 258762 | 119 |  12 |
| Jul-00 | A    | 255447 |  41 |  45 |
| Aug-00 | A    | 188545 | 247 |  75 |
| Sep-00 | A    | 213663 | 251 |     |
| Oct-00 | A    | 273209 |  62 |  12 |
| Nov-00 | A    | 317468 | 525 |  15 |
| Dec-00 | A    | 238668 | 217 |  16 |
| Jan-01 | B    | 241286 | 298 |  42 |
| Feb-01 | B    | 135288 | 109 |  45 |
| Mar-01 | B    | 363609 |   7 |  24 |
| Apr-01 | B    | 318472 |     |  56 |
| May-01 | B    | 446279 |   0 |  12 |
| Jun-01 | B    | 390230 |  50 |  12 |
| Jul-01 | B    | 118945 | 143 |  45 |
| Aug-01 | B    | 174887 |  85 |     |
| Sep-01 | B    | 183770 |  80 |  15 |
| Oct-01 | B    | 197832 | 214 |  12 |
| Nov-01 | B    | 317468 | 525 |  15 |
| Dec-01 | B    | 238668 | 217 |  16 |
+--------+------+--------+-----+-----+ 

我可以使用以下代码得到拟合值

library(zoo)
library(plm)

Sys.setlocale("LC_TIME", "English")
dat["time1"] <- as.yearmon(dat$Time,format="%b-%y")
pdat <-pdata.frame(dat,index=c("Firm","time1"))
Model1<- plm(log(Out) ~ lag(log(Cap), 1) + log(Lab + 1),
         model = "within", data=pdat)
summary(Model1)

library(data.table)
FV_Log <- data.table(Model1$model[[1]] - Model1$residuals)

但是pdat的观察是24个观察FV_Log是19个观察,所以我无法将它合并到pdat中。我的 pdat 很大,有数千个观察结果,我使用代码创建了许多变量。因此,我们将不胜感激将拟合值正确合并到原始 pdat 中(不更改顺序)的任何帮助。

您面临的问题是双重的。首先,您有 NA 个,您的模型在计算残差时正在下降,因此您的结果自然会包含更少的元素。第二个问题是你滞后于你的一个协变量,它为你的第一次观察(首先是时间)产生一个 NA - 毕竟这就是滞后所做的。要克服该问题,您需要来自前一个时间段的额外数据,否则您必须放弃该观察。假设您无法访问之前时间段的数据,这就是我解决问题的方法。

 #First I would create a new variable for CAP and just lag and log that separately, rather than applying the function in the formula of the model itself
 pdat$Cap.lag.ln<-lag(log(pdat$Cap), 1)
 pdat$Cap<-NULL #deleting the old variable to clear up the mess

 #Dont necessarily need the na.omit but it couldn't hurt...
 Model1<- plm(log(Out) ~ Cap.lag.ln + log(Lab + 1),
    model = "within", data=pdat, na.omit=TRUE)
 FV_Log <- data.table(Model1$model[[1]] - Model1$residuals)

 #Now this is where you reduce your original dataset (pdat) by getting rid of the NAs
 pdat2<-na.omit(pdat)
 #You will notice that they're the same dimensions now and you can cbind
 pdat3 <-cbind(pdat2,FV_Log)
      Time Firm    Out Lab    time1 Cap.lag.ln       V1
 1: Feb-00    A 142452 334 Feb 2000          2 12.41211
 2: Mar-00    A 365697 156 Mar 2000          2 12.54861
 3: Apr-00    A 355789 134 Apr 2000          2 12.57580
 4: May-00    A 376843 159 May 2000          2 12.54520
 5: Jun-00    A 258762 119 Jun 2000          2 12.59702
 6: Jul-00    A 255447  41 Jul 2000          2 12.78611
 7: Aug-00    A 188545 247 Aug 2000          3 12.28887
 8: Sep-00    A 213663 251 Sep 2000          4 12.10858
 9: Nov-00    A 317468 525 Nov 2000          2 12.33084
 10: Dec-00   A 238668 217 Dec 2000          2 12.48949
 11: Feb-01   B 135288 109 Feb 2001          3 12.20776
 12: Mar-01   B 363609   7 Mar 2001          3 12.67984
 13: May-01   B 446279   0 May 2001          4 12.87698
 14: Jun-01   B 390230  50 Jun 2001          2 12.52360
 15: Jul-01   B 118945 143 Jul 2001          2 12.33665
 16: Aug-01   B 174887  85 Aug 2001          3 12.25209
 17: Oct-01   B 197832 214 Oct 2001          2 12.26445
 18: Nov-01   B 317468 525 Nov 2001          2 12.10331
 19: Dec-01   B 238668 217 Dec 2001          2 12.26195

如果您想找回那些 NA 的背影,您可以执行以下操作:

 pdat3 <-as.data.frame(pdat3)
 pdat4<-merge(pdat3, pdat, 
    by=c("Time","Firm","Out", "Lab","time1"), 
    all.x=TRUE,all.y=TRUE)

   Time Firm    Out Lab    time1 Cap.lag.ln       V1 Cap
 1  Apr-00    A 355789 134 Apr 2000          2 12.57580  12
 2  Apr-01    B 318472  NA Apr 2001         NA       NA  56
 3  Aug-00    A 188545 247 Aug 2000          3 12.28887  75
 4  Aug-01    B 174887  85 Aug 2001          3 12.25209  NA
 5  Dec-00    A 238668 217 Dec 2000          2 12.48949  16
 6  Dec-01    B 238668 217 Dec 2001          2 12.26195  16
 7  Feb-00    A 142452 334 Feb 2000          2 12.41211  15
 8  Feb-01    B 135288 109 Feb 2001          3 12.20776  45
 9  Jan-00    A 161521 261 Jan 2000         NA       NA  13
 10 Jan-01    B 241286 298 Jan 2001         NA       NA  42
 11 Jul-00    A 255447  41 Jul 2000          2 12.78611  45
 12 Jul-01    B 118945 143 Jul 2001          2 12.33665  45
 13 Jun-00    A 258762 119 Jun 2000          2 12.59702  12
 14 Jun-01    B 390230  50 Jun 2001          2 12.52360  12
 15 Mar-00    A 365697 156 Mar 2000          2 12.54861  14
 16 Mar-01    B 363609   7 Mar 2001          3 12.67984  24
 17 May-00    A 376843 159 May 2000          2 12.54520  15
 18 May-01    B 446279   0 May 2001          4 12.87698  12
 19 Nov-00    A 317468 525 Nov 2000          2 12.33084  15
 20 Nov-01    B 317468 525 Nov 2001          2 12.10331  15
 21 Oct-00    A 273209  62 Oct 2000         NA       NA  12
 22 Oct-01    B 197832 214 Oct 2001          2 12.26445  12
 23 Sep-00    A 213663 251 Sep 2000          4 12.10858  NA
 24 Sep-01    B 183770  80 Sep 2001         NA       NA  15