如何用 SAS 中的数据步骤替换联接

How to replace a join with a datastep in SAS

我正在尝试根据客户端 ID 和时间范围计算给定变量的一些统计信息。我当前的解决方案如下所示,但是,我想知道是否有一种方法可以将代码重新格式化为数据步而不是 sql 连接,因为连接需要很长时间才能在我的真实数据集上执行.

data have1(drop=t);
id = 1;
dt = '31dec2020'd;
do t=1 to 10;
    dt = dt + 1;
    var = rand('uniform');
    output;
end;
format dt ddmmyyp10.;
run;

data have2(drop=t);
id = 2;
dt = '31dec2020'd;
do t=1 to 10;
    dt = dt + 1;
    var = rand('uniform');
    output;
end;
format dt ddmmyyp10.;
run;

data have_fin;
set have1 have2;
run;

Proc sql;
create table want1 as
select a.id, a.dt,a.var, mean(b.var) as mean_var_3d
from have_fin as a
left join have_fin as b
    on a.id = b.id and intnx('day',a.dt,-3,'S') < b.dt <= a.dt
group by 1,2,3;
Quit;

Proc sql;
create table want2 as
select a.id, a.dt,a.var, mean(b.var) as mean_var_3d
from have_fin as a
left join have_fin as b
    on a.id = b.id and intnx('day',a.dt,-6,'S') < b.dt <= a.dt
group by 1,2,3;
Quit;

改用temporary arrays and a single data step

这一步完成同样的事情。

  1. 对数据进行排序以确保顺序正确
  2. 为要计算的每组移动平均线声明一个临时数组。
  3. 确保数组在每个 ID 的开头为空
  4. 在正确的索引中为数组赋值。 MOD() 允许您动态索引数据,而无需包含单独的计数器变量。
  5. 取数组的平均值。如果您希望数组忽略前两个值 - 因为它只有 1/2 数据点,您也可以有条件地计算它。
*sort to ensure data is in correct order (Step 1);
proc sort data=have_fin;
by id dt;
run;

data want;

*Step 2;
array p3{0:2} _temporary_;
array p6(0:5) _temporary_;


set have_fin; 
by ID;

*clear values at the start of each ID for the array Step3;
if first.ID then call missing(of p3{*}, of p6(*));

*assign the value to the array, the mod function indexes the array so it's continuously the most recent 3/6 values;
*Step 4;
p3{mod(_n_,3)} = var;
p6{mod(_n_,6)} = var;

*Step 5 - calculates statistic of interest, average in this case;
mean3d = mean(of p3(*));
mean6d = mean(of p6(*));
;
run;

如果您有 SAS/ETS 许可,这就非常简单了。

*prints product to log - check if you have SAS/ETS licensed;
proc product_status;run;

*sorts data;
proc sort data=have_fin;
by id dt;
run;

*calculates moving average;
proc expand data=have_fin out=want_expand;
    by ID;
    id dt;
    convert var = mean_3d / method=none transformout= (movave 3);
    convert var = mean_6d / method=none transformout= (movave 6);
run;