循环 Data.table 中的所有变量以创建 n 滞后

Looping all Variables in Data.table to create n-lags

我有一个 data.table 有 200 个 obs 和 20 个变量,我需要在一个循环中从这 20 个变量中获取 n 滞后。

我正在尝试类似以下的操作,但无法正常工作。

请帮忙?

nombresvar = names(Model_X)

for (j in nombresvar) for(i in 1:3)

 Model_X[,c(paste0(j, i)) := lag(c(paste0('Model_X$', j)), i)]

问题来自 滞后函数,因为我的代码似乎无法正常工作,当尝试为每个变量生成 4 个滞后变量时,名称在 Nombresvar 向量中。

每个循环的滞后函数应该如下所示:

lag ( ModelX$Variable1, 1)
lag ( ModelX$Variable1, 2)
lag ( ModelX$Variable1, 3)
lag ( ModelX$Variable2, 1)
....
lag ( ModelX$VariableN, 3)

不确定这是否是您的问题:

# Random data 200 obs, 20 vars 
set.seed(1)
df <- data.frame(replicate(20,sample(0:100,200,rep=TRUE)))

# fucntion for getting lags
lags <- function(x,lag) { 
  lag(x,lag) 
}

require(dplyr) # using lag from dplyr (has opposite lead)
# lapply saves lags to a list 
lag1 <- lapply(df,lags, lag =1)
lag2 <- lapply(df,lags, lag =2)
lag3 <- lapply(df,lags, lag =3)

# cbind to data frame 
lag_1_df <- as.data.frame(do.call(cbind,lag1))
lag_2_df <- as.data.frame(do.call(cbind,lag2))
lag_3_df <- as.data.frame(do.call(cbind,lag3))

# names 
names_lag1 <- rep(1,length(lag_1_df))
var_names <- rep(1:length(lag_1_df),1)
var_names <- paste("var",var_names)
names_lag1 <- paste(var_names,"lag",names_lag1)
colnames(lag_1_df) <- names_lag1
# column names lag 2
names_lag2 <- rep(2,length(lag_2_df))
var_names <- rep(1:length(lag_2_df),1)
var_names <- paste("var",var_names)
names_lag2 <- paste(var_names,"lag",names_lag2)
colnames(lag_2_df) <- names_lag2
# column names lag 3
names_lag3 <- rep(3,length(lag_3_df))
var_names <- rep(1:length(lag_3_df),1)
var_names <- paste("var",var_names)
names_lag3 <- paste(var_names,"lag",names_lag3)
colnames(lag_3_df) <- names_lag3


# Place all in same data frame 
all_df <- cbind(lag_1_df,lag_2_df,lag_3_df)
head(all_df)

输出:

> head(all_df)
  var 1 lag 1 var 2 lag 1 var 3 lag 1 var 4 lag 1 var 5 lag 1 var 6 lag 1 var 7 lag 1 var 8 lag 1 var 9 lag 1
1          NA          NA          NA          NA          NA          NA          NA          NA          NA
2          26          27          66          82          86          53          37          24          13
3          37          22          18          93           3          69          74          65           4
4          57          52          96          14          98          38          94          98           3
5          91          27          90          75          75          96          67          38          92
6          20          18          95          98          27          11          70          46          84
  var 10 lag 1 var 11 lag 1 var 12 lag 1 var 13 lag 1 var 14 lag 1 var 15 lag 1 var 16 lag 1 var 17 lag 1 var 18 lag 1
1           NA           NA           NA           NA           NA           NA           NA           NA           NA
2            6           88           60            6           63           94           81           64           95
3           35           97           95            5           21           56           48          100            7
4           58           87           12           98           91           57           17           51           37
5           54           44           52           80           59           10           40           94           67
6           61           19           94            3           17           39           82           19            1
  var 19 lag 1 var 20 lag 1 var 1 lag 2 var 2 lag 2 var 3 lag 2 var 4 lag 2 var 5 lag 2 var 6 lag 2 var 7 lag 2
1           NA           NA          NA          NA          NA          NA          NA          NA          NA
2           77           28          NA          NA          NA          NA          NA          NA          NA
3           59           75          26          27          66          82          86          53          37
4           72           61          37          22          18          93           3          69          74
5           28           51          57          52          96          14          98          38          94
6           44           88          91          27          90          75          75          96          67

使用 shiftset 来自 data.table

library(data.table)

DT <- data.table(foo = seq_len(10),
                 bar = seq_len(10)*2L,
                 baz = seq_len(10)*3L)

LagCols <- c("bar","baz")
LagLengths <- seq_len(2)

for(y in LagCols){
  for (z in LagLengths) set(DT, j = eval(paste0(y,"_lag_",z)), value = shift(DT[[y]],n = z, type = "lag"))
}

print(DT)

给出以下内容:

    foo bar baz bar_lag_1 bar_lag_2 baz_lag_1 baz_lag_2
 1:   1   2   3        NA        NA        NA        NA
 2:   2   4   6         2        NA         3        NA
 3:   3   6   9         4         2         6         3
 4:   4   8  12         6         4         9         6
 5:   5  10  15         8         6        12         9
 6:   6  12  18        10         8        15        12
 7:   7  14  21        12        10        18        15
 8:   8  16  24        14        12        21        18
 9:   9  18  27        16        14        24        21
10:  10  20  30        18        16        27        24

有一种更简单的方法来创建额外的滞后列。 data-tableshift() 函数的 n 参数定义为

Non-negative integer vector denoting the offset to lead or lag the input by. To create multiple lead/lag vectors, provide multiple values to n

所以,

DT[, shift(baz, 0:3)]

returns

    V1 V2 V3 V4
 1:  3 NA NA NA
 2:  6  3 NA NA
 3:  9  6  3 NA
 4: 12  9  6  3
 5: 15 12  9  6
 6: 18 15 12  9
 7: 21 18 15 12
 8: 24 21 18 15
 9: 27 24 21 18
10: 30 27 24 21

现在,OP 已要求移动每个变量并根据移动量命名新列。这可以通过

DT[, unlist(lapply(.SD, shift, n = 0:3), recursive = FALSE)]
    foo1 foo2 foo3 foo4 bar1 bar2 bar3 bar4 baz1 baz2 baz3 baz4
 1:    1   NA   NA   NA    2   NA   NA   NA    3   NA   NA   NA
 2:    2    1   NA   NA    4    2   NA   NA    6    3   NA   NA
 3:    3    2    1   NA    6    4    2   NA    9    6    3   NA
 4:    4    3    2    1    8    6    4    2   12    9    6    3
 5:    5    4    3    2   10    8    6    4   15   12    9    6
 6:    6    5    4    3   12   10    8    6   18   15   12    9
 7:    7    6    5    4   14   12   10    8   21   18   15   12
 8:    8    7    6    5   16   14   12   10   24   21   18   15
 9:    9    8    7    6   18   16   14   12   27   24   21   18
10:   10    9    8    7   20   18   16   14   30   27   24   21

数据

为了对比,使用了的样本数据

library(data.table)
DT <- data.table(foo = seq_len(10),
                 bar = seq_len(10)*2L,
                 baz = seq_len(10)*3L)