如何使用 statsmodels 循环自递归过滤器以生成多列

How to Loop autogressive recursive filter with statsmodels to generate multiple columns

你好,我正在尝试将 statmodel 的自回归过滤器函数包含在一个循环中,以生成新列。

但是我的脚本 运行 时出现错误,如果我只在单个列上计算它就不会发生错误:

TypeError: 无法将序列乘以 'float'

类型的非整数
import statsmodels.api as sm
import statsmodels.tsa as tsa
import statsmodels.formula.api as smf
import pandas as pd


#generate dataset :


data = pd.DataFrame({'effect1': [], 'effect2': [], 'effect3': []})
for i in range(10):
    data = data.append({'effect1': i, 'effect2': i*2, 'effect3': i*3}, ignore_index=True)


#check data type    
data = data.astype('int64')


#compute autoregressive recursive filter

carry_over = (.1,.2)
rolling_df = (tsa.filters.filtertools.recursive_filter(col, i)
                .rename({col: '{0}_{1:d}'.format(col, i)       
                               for col in data.columns}, axis=1) 
                for i in carry_over)                                

data = pd.concat((data, *rolling_df), axis=1)   
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-42-74a899bff628> in <module>
     10                 for i in carry_over)                                
     11 
---> 12 data = pd.concat((data, *rolling_df), axis=1)

<ipython-input-42-74a899bff628> in <genexpr>(.0)
      8                 .rename({col: '{0}_{1:d}'.format(col, i)       
      9                                for col in data.columns}, axis=1) 
---> 10                 for i in carry_over)                                
     11 
     12 data = pd.concat((data, *rolling_df), axis=1)

~\AppData\Local\Continuum\anaconda3\envs\regressor\lib\site-packages\statsmodels\tsa\filters\filtertools.py in recursive_filter(x, ar_coeff, init)
    208         zi = None
    209 
--> 210     y = signal.lfilter([1.], np.r_[1, -ar_coeff], x, zi=zi)
    211 
    212     if init is not None:

~\AppData\Local\Continuum\anaconda3\envs\regressor\lib\site-packages\scipy\signal\signaltools.py in lfilter(b, a, x, axis, zi)
   1395     else:
   1396         if zi is None:
-> 1397             return sigtools._linear_filter(b, a, x, axis)
   1398         else:
   1399             return sigtools._linear_filter(b, a, x, axis, zi)

TypeError: can't multiply sequence by non-int of type 'float'

这里修改了如何计算rolling_df:

# Optional: bind the function to a shorter name. You could also 
# import this as something terse, or just leave it as is
f = tsa.filters.filtertools.recursive_filter

rolling_df = [data.apply(f, ar_coeff=i)
                  .add_suffix(f'_{i}') 
              for i in carry_over]

data = pd.concat((data, *rolling_df), axis=1)

data

   effect1  effect2  effect3  effect1_0.1  effect2_0.1  effect3_0.1  effect1_0.2  effect2_0.2  effect3_0.2
0        0        0        0     0.000000     0.000000     0.000000     0.000000     0.000000     0.000000
1        1        2        3     1.000000     2.000000     3.000000     1.000000     2.000000     3.000000
2        2        4        6     2.100000     4.200000     6.300000     2.200000     4.400000     6.600000
3        3        6        9     3.210000     6.420000     9.630000     3.440000     6.880000    10.320000
4        4        8       12     4.321000     8.642000    12.963000     4.688000     9.376000    14.064000
5        5       10       15     5.432100    10.864200    16.296300     5.937600    11.875200    17.812800
6        6       12       18     6.543210    13.086420    19.629630     7.187520    14.375040    21.562560
7        7       14       21     7.654321    15.308642    22.962963     8.437504    16.875008    25.312512
8        8       16       24     8.765432    17.530864    26.296296     9.687501    19.375002    29.062502
9        9       18       27     9.876543    19.753086    29.629630    10.937500    21.875000    32.812500