SAS-将数据附加到 table 并在结果 table 上增加一个标志

SAS-Append data to a table and increment a flag on the resultant table

我在 SAS "Table_A" 和 "Table_A_Archive" 上有两个 table,作为我的 ETL 过程的一部分 "Table_A" 是每天创建的,数据应该存档在 "Table_A_Archive" 上。当数据在 "Table_A_Archive" 上存档时,标志将是 created/updated "DT_FLAG"。

第一天 table 的样子

 "Table_A"
 | ID  | Load_Date
 ------ -------------
 | 100 | 01JUN2020:12:13:56

 "Table_A_Archive"
  | ID  | Load_Date           | DT_FLAG
  ------ --------------------- ---------
  | 100 | 01JUN2020:12:13:56  | 1

第 2 天

 "Table_A"
 | ID  | Load_Date
 ------ ------------
 | 101 | 02JUN2020:12:13:56

 "Table_A_Archive"
 | ID  | Load_Date           | DT_FLAG
 ------ --------------------- ---------
 | 100 | 01JUN2020:12:13:56  | 2
 | 101 | 02JUN2020:12:13:56  | 1

新数据加载DT_FLAG为1,旧记录DT_FLAG加1。Load_Date是关键点。我写了一个 SAS 代码,但它看起来有点乱,有人可以帮我做一个 SAS Datastep

 %macro Cntl_archive(table_name=,arch_table_name=);
 %GLOBAL WRK;

 %if %sysfunc(exist(&arch_table_name.))  %then %do;

 proc append base=&arch_table_name. data=&table_name. force;
 run;

 proc sql;
 Create table TEMP as 
 Select distinct Load_Date,Load_Date as WRK from &arch_table_name.
 order by Load_Date desc
 ;quit;

 proc rank data=TEMP descending out=TEMP;
 var WRK;
 ranks count;
 run;

 data &arch_table_name. (drop=DT_FLAG);
 set    &arch_table_name.;
 run;

 proc sql;
 Create table &arch_table_name. as
 Select T0.*,T1.count as DT_FLAG from &arch_table_name. T0
 inner join TEMP T1 on T0.Load_Date=T1.Load_Date
 ;quit
 %end;
 %else %do;

data &arch_table_name.;
set &table_name.;
DT_FLAG= 1; 
IS_ACTIVE='';
run;
%end;
%mend Cntl_archive;

我想通过 merge 语句做同样的事情:

%if %sysfunc(exist(Table_A_Archive)) = 0 %then %do;
  data Table_A_Archive;
    set Table_A;
  run;
%end;

data Table_A_Archive;
  merge Table_A_Archive(in=ALL) Table_A;
  by ID;

  if ALL then DT_FLAG = sum(DT_FLAG,1);
  else DT_FLAG = 1;
run;

考虑到您可能希望这项日常工作尽可能快,我建议使用updatemodify语句来代替merge

%if %sysfunc(exist(Table_A_Archive)) = 0 %then %do;
  data Table_A_Archive;
    set Table_A;
  run;
%end;

data Table_A_Archive;
  update Table_A_Archive Table_A;
  by ID;

  if _iorc_ = %sysrc(_sok) then DT_FLAG = sum(DT_FLAG,1);
run;

它更高效,因为它可以更新(或修改)数据而无需创建数据集的副本。

再次考虑 proc sql 与计数相关的子查询。不幸的是,SAS 不允许更新 table 自身的值,因此使用临时 table 副本。下面假设 ID 每天递增。

proc sql;
    insert into Table_A_Archive (ID, Load_Date)
    select ID, Load_Date
    from Table_A;

    create table temp as
    select ID, Load_Date from Table_A_Archive;

    update Table_A_Archive t
    set DT_Flag = (select count(*) 
                   from temp sub
                   where t.ID <= sub.ID 
                   and   t.Load_Date = sub.Load_Date);

    drop table temp;
quit;

这是一种使用 MODIFY 语句更新现有观察值中 DT_FLAG 的值并附加新值的方法。

首先让我们创建初始 A 并使用它创建一个带有额外变量的空 A_ARCHIVE。 (请注意,我重命名了您的时间戳变量,以避免因名为 "date" 的变量具有日期时间值而不是日期值而引起的混淆。)

data a ;
  input id load_dt :datetime.;
  format load_dt datetime19.;
cards;
100 01JUN2020:12:13:56
;

data a_archive;
  stop;
  set a ;
  dt_flag=0;
run;

现在让我们将 A 附加到 A_ARCHIVE。

data a_archive;
  do while(not eof1);
    modify a_archive end=eof1;
    dt_flag=sum(dt_flag,1);
    replace;
  end;
  do until(eof2);
    set a end=eof2;
    dt_flag=1;
    output;
  end;
run;

现在您可以制作新版本的 A 并重新运行相同的数据步骤来附加它。

data a ;
  input id load_dt :datetime.;
  format load_dt datetime19.;
cards;
101 02JUN2020:12:13:56
;

data a_archive;
  do while(not eof1);
    modify a_archive end=eof1;
    dt_flag=sum(dt_flag,1);
    replace;
  end;
  do until(eof2);
    set a end=eof2;
    dt_flag=1;
    output;
  end;
run;

结果:

Obs     id                load_dt  dt_flag

 1     100     01JUN2020:12:13:56       2
 2     101     02JUN2020:12:13:56       1

使用 Proc APPEND 并在需要时即时计算 DT_FLAG。除了向其中添加记录外,无需弄乱存档。

On-the-fly 将是 DATA 步视图。

示例:

示例 want 数据集在 WORK. 中,但在您的实际案例中会是一些 PERM.

* simulate a clean start and some ETL activity with APPEND archiving;

proc delete data=want;
proc delete data=want_archive;

* DAY 1,  load #1;

data DAILY_ETL;
  ID = 100;   load_date = today()-100;   format load_date yymmdd10.;
run;
data want; 
  set DAILY_ETL; 
run;
proc append base=want_archive data=want;
run;

* DAY 2, load #2;

data DAILY_ETL;
  ID = 100;   load_date = today()-99;   format load_date yymmdd10.;
run;
data want; 
  set DAILY_ETL; 
run;
proc append base=want_archive data=want;
run;

* DAY 4, load #3;
data DAILY_ETL;
  ID = 100;   load_date = today()-97;   format load_date yymmdd10.;
run;
data want; 
  set DAILY_ETL; 
run;
proc append base=want_archive data=want;
run;

正在观看

* view for on-the-fly DT_FLAG (do once);

data want_archive_v;
  set want_archive nobs=N;
  dt_flag = N - _N_ + 1;
run;

dm 'viewtable want_archive_v';

我试过用这个方法解决的

%macro Cntl_archive(table_name=,arch_table_name=);

%if %sysfunc(exist(&arch_table_name.))  %then %do;

data Data_append;
set &table_name.;
if _n_ = 1
then do;
  set &arch_table_name.(keep=dt_flag) point=nobs nobs=nobs;
dt_flag + 1;
end;
run;

proc append base=&arch_table_name. data=Data_append force;
run;

%end;
%else %do;

data &arch_table_name.;
set &table_name.;
DT_FLAG= 1; 
IS_ACTIVE='';
run;
%end;
%mend Cntl_archive;