SAS-将数据附加到 table 并在结果 table 上增加一个标志
SAS-Append data to a table and increment a flag on the resultant table
我在 SAS "Table_A" 和 "Table_A_Archive" 上有两个 table,作为我的 ETL 过程的一部分 "Table_A" 是每天创建的,数据应该存档在 "Table_A_Archive" 上。当数据在 "Table_A_Archive" 上存档时,标志将是 created/updated "DT_FLAG"。
第一天 table 的样子
"Table_A"
| ID | Load_Date
------ -------------
| 100 | 01JUN2020:12:13:56
"Table_A_Archive"
| ID | Load_Date | DT_FLAG
------ --------------------- ---------
| 100 | 01JUN2020:12:13:56 | 1
第 2 天
"Table_A"
| ID | Load_Date
------ ------------
| 101 | 02JUN2020:12:13:56
"Table_A_Archive"
| ID | Load_Date | DT_FLAG
------ --------------------- ---------
| 100 | 01JUN2020:12:13:56 | 2
| 101 | 02JUN2020:12:13:56 | 1
新数据加载DT_FLAG为1,旧记录DT_FLAG加1。Load_Date是关键点。我写了一个 SAS 代码,但它看起来有点乱,有人可以帮我做一个 SAS Datastep
%macro Cntl_archive(table_name=,arch_table_name=);
%GLOBAL WRK;
%if %sysfunc(exist(&arch_table_name.)) %then %do;
proc append base=&arch_table_name. data=&table_name. force;
run;
proc sql;
Create table TEMP as
Select distinct Load_Date,Load_Date as WRK from &arch_table_name.
order by Load_Date desc
;quit;
proc rank data=TEMP descending out=TEMP;
var WRK;
ranks count;
run;
data &arch_table_name. (drop=DT_FLAG);
set &arch_table_name.;
run;
proc sql;
Create table &arch_table_name. as
Select T0.*,T1.count as DT_FLAG from &arch_table_name. T0
inner join TEMP T1 on T0.Load_Date=T1.Load_Date
;quit
%end;
%else %do;
data &arch_table_name.;
set &table_name.;
DT_FLAG= 1;
IS_ACTIVE='';
run;
%end;
%mend Cntl_archive;
我想通过 merge
语句做同样的事情:
%if %sysfunc(exist(Table_A_Archive)) = 0 %then %do;
data Table_A_Archive;
set Table_A;
run;
%end;
data Table_A_Archive;
merge Table_A_Archive(in=ALL) Table_A;
by ID;
if ALL then DT_FLAG = sum(DT_FLAG,1);
else DT_FLAG = 1;
run;
考虑到您可能希望这项日常工作尽可能快,我建议使用update
或modify
语句来代替merge
:
%if %sysfunc(exist(Table_A_Archive)) = 0 %then %do;
data Table_A_Archive;
set Table_A;
run;
%end;
data Table_A_Archive;
update Table_A_Archive Table_A;
by ID;
if _iorc_ = %sysrc(_sok) then DT_FLAG = sum(DT_FLAG,1);
run;
它更高效,因为它可以更新(或修改)数据而无需创建数据集的副本。
再次考虑 proc sql
与计数相关的子查询。不幸的是,SAS 不允许更新 table 自身的值,因此使用临时 table 副本。下面假设 ID 每天递增。
proc sql;
insert into Table_A_Archive (ID, Load_Date)
select ID, Load_Date
from Table_A;
create table temp as
select ID, Load_Date from Table_A_Archive;
update Table_A_Archive t
set DT_Flag = (select count(*)
from temp sub
where t.ID <= sub.ID
and t.Load_Date = sub.Load_Date);
drop table temp;
quit;
这是一种使用 MODIFY
语句更新现有观察值中 DT_FLAG 的值并附加新值的方法。
首先让我们创建初始 A 并使用它创建一个带有额外变量的空 A_ARCHIVE。 (请注意,我重命名了您的时间戳变量,以避免因名为 "date" 的变量具有日期时间值而不是日期值而引起的混淆。)
data a ;
input id load_dt :datetime.;
format load_dt datetime19.;
cards;
100 01JUN2020:12:13:56
;
data a_archive;
stop;
set a ;
dt_flag=0;
run;
现在让我们将 A 附加到 A_ARCHIVE。
data a_archive;
do while(not eof1);
modify a_archive end=eof1;
dt_flag=sum(dt_flag,1);
replace;
end;
do until(eof2);
set a end=eof2;
dt_flag=1;
output;
end;
run;
现在您可以制作新版本的 A 并重新运行相同的数据步骤来附加它。
data a ;
input id load_dt :datetime.;
format load_dt datetime19.;
cards;
101 02JUN2020:12:13:56
;
data a_archive;
do while(not eof1);
modify a_archive end=eof1;
dt_flag=sum(dt_flag,1);
replace;
end;
do until(eof2);
set a end=eof2;
dt_flag=1;
output;
end;
run;
结果:
Obs id load_dt dt_flag
1 100 01JUN2020:12:13:56 2
2 101 02JUN2020:12:13:56 1
使用 Proc APPEND
并在需要时即时计算 DT_FLAG
。除了向其中添加记录外,无需弄乱存档。
On-the-fly 将是 DATA 步视图。
示例:
示例 want
数据集在 WORK.
中,但在您的实际案例中会是一些 PERM.
。
* simulate a clean start and some ETL activity with APPEND archiving;
proc delete data=want;
proc delete data=want_archive;
* DAY 1, load #1;
data DAILY_ETL;
ID = 100; load_date = today()-100; format load_date yymmdd10.;
run;
data want;
set DAILY_ETL;
run;
proc append base=want_archive data=want;
run;
* DAY 2, load #2;
data DAILY_ETL;
ID = 100; load_date = today()-99; format load_date yymmdd10.;
run;
data want;
set DAILY_ETL;
run;
proc append base=want_archive data=want;
run;
* DAY 4, load #3;
data DAILY_ETL;
ID = 100; load_date = today()-97; format load_date yymmdd10.;
run;
data want;
set DAILY_ETL;
run;
proc append base=want_archive data=want;
run;
正在观看
* view for on-the-fly DT_FLAG (do once);
data want_archive_v;
set want_archive nobs=N;
dt_flag = N - _N_ + 1;
run;
dm 'viewtable want_archive_v';
我试过用这个方法解决的
%macro Cntl_archive(table_name=,arch_table_name=);
%if %sysfunc(exist(&arch_table_name.)) %then %do;
data Data_append;
set &table_name.;
if _n_ = 1
then do;
set &arch_table_name.(keep=dt_flag) point=nobs nobs=nobs;
dt_flag + 1;
end;
run;
proc append base=&arch_table_name. data=Data_append force;
run;
%end;
%else %do;
data &arch_table_name.;
set &table_name.;
DT_FLAG= 1;
IS_ACTIVE='';
run;
%end;
%mend Cntl_archive;
我在 SAS "Table_A" 和 "Table_A_Archive" 上有两个 table,作为我的 ETL 过程的一部分 "Table_A" 是每天创建的,数据应该存档在 "Table_A_Archive" 上。当数据在 "Table_A_Archive" 上存档时,标志将是 created/updated "DT_FLAG"。
第一天 table 的样子
"Table_A"
| ID | Load_Date
------ -------------
| 100 | 01JUN2020:12:13:56
"Table_A_Archive"
| ID | Load_Date | DT_FLAG
------ --------------------- ---------
| 100 | 01JUN2020:12:13:56 | 1
第 2 天
"Table_A"
| ID | Load_Date
------ ------------
| 101 | 02JUN2020:12:13:56
"Table_A_Archive"
| ID | Load_Date | DT_FLAG
------ --------------------- ---------
| 100 | 01JUN2020:12:13:56 | 2
| 101 | 02JUN2020:12:13:56 | 1
新数据加载DT_FLAG为1,旧记录DT_FLAG加1。Load_Date是关键点。我写了一个 SAS 代码,但它看起来有点乱,有人可以帮我做一个 SAS Datastep
%macro Cntl_archive(table_name=,arch_table_name=);
%GLOBAL WRK;
%if %sysfunc(exist(&arch_table_name.)) %then %do;
proc append base=&arch_table_name. data=&table_name. force;
run;
proc sql;
Create table TEMP as
Select distinct Load_Date,Load_Date as WRK from &arch_table_name.
order by Load_Date desc
;quit;
proc rank data=TEMP descending out=TEMP;
var WRK;
ranks count;
run;
data &arch_table_name. (drop=DT_FLAG);
set &arch_table_name.;
run;
proc sql;
Create table &arch_table_name. as
Select T0.*,T1.count as DT_FLAG from &arch_table_name. T0
inner join TEMP T1 on T0.Load_Date=T1.Load_Date
;quit
%end;
%else %do;
data &arch_table_name.;
set &table_name.;
DT_FLAG= 1;
IS_ACTIVE='';
run;
%end;
%mend Cntl_archive;
我想通过 merge
语句做同样的事情:
%if %sysfunc(exist(Table_A_Archive)) = 0 %then %do;
data Table_A_Archive;
set Table_A;
run;
%end;
data Table_A_Archive;
merge Table_A_Archive(in=ALL) Table_A;
by ID;
if ALL then DT_FLAG = sum(DT_FLAG,1);
else DT_FLAG = 1;
run;
考虑到您可能希望这项日常工作尽可能快,我建议使用update
或modify
语句来代替merge
:
%if %sysfunc(exist(Table_A_Archive)) = 0 %then %do;
data Table_A_Archive;
set Table_A;
run;
%end;
data Table_A_Archive;
update Table_A_Archive Table_A;
by ID;
if _iorc_ = %sysrc(_sok) then DT_FLAG = sum(DT_FLAG,1);
run;
它更高效,因为它可以更新(或修改)数据而无需创建数据集的副本。
再次考虑 proc sql
与计数相关的子查询。不幸的是,SAS 不允许更新 table 自身的值,因此使用临时 table 副本。下面假设 ID 每天递增。
proc sql;
insert into Table_A_Archive (ID, Load_Date)
select ID, Load_Date
from Table_A;
create table temp as
select ID, Load_Date from Table_A_Archive;
update Table_A_Archive t
set DT_Flag = (select count(*)
from temp sub
where t.ID <= sub.ID
and t.Load_Date = sub.Load_Date);
drop table temp;
quit;
这是一种使用 MODIFY
语句更新现有观察值中 DT_FLAG 的值并附加新值的方法。
首先让我们创建初始 A 并使用它创建一个带有额外变量的空 A_ARCHIVE。 (请注意,我重命名了您的时间戳变量,以避免因名为 "date" 的变量具有日期时间值而不是日期值而引起的混淆。)
data a ;
input id load_dt :datetime.;
format load_dt datetime19.;
cards;
100 01JUN2020:12:13:56
;
data a_archive;
stop;
set a ;
dt_flag=0;
run;
现在让我们将 A 附加到 A_ARCHIVE。
data a_archive;
do while(not eof1);
modify a_archive end=eof1;
dt_flag=sum(dt_flag,1);
replace;
end;
do until(eof2);
set a end=eof2;
dt_flag=1;
output;
end;
run;
现在您可以制作新版本的 A 并重新运行相同的数据步骤来附加它。
data a ;
input id load_dt :datetime.;
format load_dt datetime19.;
cards;
101 02JUN2020:12:13:56
;
data a_archive;
do while(not eof1);
modify a_archive end=eof1;
dt_flag=sum(dt_flag,1);
replace;
end;
do until(eof2);
set a end=eof2;
dt_flag=1;
output;
end;
run;
结果:
Obs id load_dt dt_flag
1 100 01JUN2020:12:13:56 2
2 101 02JUN2020:12:13:56 1
使用 Proc APPEND
并在需要时即时计算 DT_FLAG
。除了向其中添加记录外,无需弄乱存档。
On-the-fly 将是 DATA 步视图。
示例:
示例 want
数据集在 WORK.
中,但在您的实际案例中会是一些 PERM.
。
* simulate a clean start and some ETL activity with APPEND archiving;
proc delete data=want;
proc delete data=want_archive;
* DAY 1, load #1;
data DAILY_ETL;
ID = 100; load_date = today()-100; format load_date yymmdd10.;
run;
data want;
set DAILY_ETL;
run;
proc append base=want_archive data=want;
run;
* DAY 2, load #2;
data DAILY_ETL;
ID = 100; load_date = today()-99; format load_date yymmdd10.;
run;
data want;
set DAILY_ETL;
run;
proc append base=want_archive data=want;
run;
* DAY 4, load #3;
data DAILY_ETL;
ID = 100; load_date = today()-97; format load_date yymmdd10.;
run;
data want;
set DAILY_ETL;
run;
proc append base=want_archive data=want;
run;
正在观看
* view for on-the-fly DT_FLAG (do once);
data want_archive_v;
set want_archive nobs=N;
dt_flag = N - _N_ + 1;
run;
dm 'viewtable want_archive_v';
我试过用这个方法解决的
%macro Cntl_archive(table_name=,arch_table_name=);
%if %sysfunc(exist(&arch_table_name.)) %then %do;
data Data_append;
set &table_name.;
if _n_ = 1
then do;
set &arch_table_name.(keep=dt_flag) point=nobs nobs=nobs;
dt_flag + 1;
end;
run;
proc append base=&arch_table_name. data=Data_append force;
run;
%end;
%else %do;
data &arch_table_name.;
set &table_name.;
DT_FLAG= 1;
IS_ACTIVE='';
run;
%end;
%mend Cntl_archive;