在 SAS 中重复一个宏

Repeat a macro in SAS

我正在尝试 select 从下面的数据集 (test2) 中随机抽取 6 个案例。这些案例必须按特定顺序 select 编辑,即 ED、CCM、MAT、CAC。第一遍将来自付款人 1 (P1),第二遍来自付款人 2 (P2),直到我得到总共 6 个案例。在每种度量类型中,随机数最小的情况首先得到 selected。下面是我正在使用的宏,我希望付款人能够重复使用它。

ID  Measure Payer
1439  CAC  P1
1135  CCM  P1
1736  ED   P1
1737  MAT  P1
1738  CCM  P2
2351  ED   P2
4251  ED   P1


DATA CAC CCM ED MAT;
set test2;
    if measure = 'CAC' then output CAC;
else if measure = 'CCM'  then output CCM;
else if measure = 'ED' then output ED;
else if measure = 'MAT' then output MAT;
RUN;


%MACRO select (dsn,num);

DATA &dsn;
  set &dsn;

  min_random=min(random);
RUN;
PROC SORT data=&dsn;
  by Payer min_random;
RUN;
DATA &dsn;
   set &dsn;
   if _N_ le #
RUN;

 %MEND select;

%SELECT(ED,1);  %SELECT(CCM,1);  %SELECT(MAT,1); %SELECT(CAC,1); 

DATA sample1A;
set ED CCM MAT CAC;
RUN;

对于上面的样本数据集,6个案例的输出应该是

1736  ED   P1
1135  CCM  P1
1737  MAT  P1
1439  CAC  P1
2351  ED   P2
1738  CCM  P2

我尝试通过以下方式解决这个问题:

  1. 计算获取记录数所需的完整循环次数。每个循环将获取 4 条记录(每条记录用于 ED、CCM、MAT、CAC)。在你的情况下,它将是 1.
  2. 计算获取记录数所需的半循环次数。在您的情况下,它将是两个 - 每个用于 ED 和 CCM。

下面是代码,几乎不言自明。


正在创建示例数据集

data test2;
infile datalines;
input ID Measure $ Payer $;
datalines;
1439  CAC  P1
1135  CCM  P1
1736  ED   P1
1737  MAT  P1
1738  CCM  P2
2351  ED   P2
4251  ED   P1
;
run;


DATA CAC CCM ED MAT;
set test2;
    if measure = 'CAC' then output CAC;
else if measure = 'CCM'  then output CCM;
else if measure = 'ED' then output ED;
else if measure = 'MAT' then output MAT;
RUN;


%MACRO select (dsn,num);

DATA &dsn;
  set &dsn;

  min_random=ranuni(0);
RUN;
PROC SORT data=&dsn;
  by Payer min_random;
RUN;
DATA &dsn;
   set &dsn;
   if _N_ le #
RUN;

proc append base=sample1A data=&dsn. force;
run;

%MEND select;

%macro loop;

%let inp=ED,CCM,MAT,CAC;
%let Num_of_records_to_extract=6;
%let Num_of_distinct_measure=4;

data _NULL_;
loop_count=int(&Num_of_records_to_extract./&Num_of_distinct_measure.);
semi_loop_count=mod(&Num_of_records_to_extract.,&Num_of_distinct_measure.);
call symputx("loop_count",loop_count);
call symputx("semi_loop_count",semi_loop_count);
run;

%if &loop_count. ge 1 %then %do;
   %do i=1 %to &loop_count.; 
       %do j=1 %to 4;
          %SELECT(%sysfunc(scan("&inp.",&j.,",")),1);  
        %end;
    %end;
%end;

%if &semi_loop_count. ge 1 %then %do;
       %do k=1 %to &semi_loop_count.;
          %SELECT(%sysfunc(scan("&inp.",&k.,",")),1);  
        %end;
%end;


%mend;
%loop;

这是我如何实现的..

data test2;set test2;
ran=ranuni(123);
if measure='ED' then order=1;
if measure='CCM' then order=2;
if measure='MAT' then order=3;
if measure='CAC' then order=4;
run;

proc sort data= test2 out=P1;
by order payer ran;
where payer='P1';
run;

proc sort data= test2 out=P2;
by order payer ran;
where payer='P2';
run;

data S1 S3;set P1;
by order ran;
if first.order then output S1;else
output S3;
run;

data S2 S4;set P2;
by order ran;
if first.order then output S2;else
output S4;
run;

data sample;
set S1 S2 S3 S4;
run;

data sample;set sample (obs=6);
run;

繁忙的方式较少。任何时候你都可以使用分组处理而不是宏循环,你应该这样做。在这种情况下,不需要将数据集分成很多部分来执行此操作,如果数据集很大,就 I/O 而言这是昂贵的。

这是一个易于维护的解决方案...未经测试:

    %let SAMPLE_SIZE=6 ;

    proc format ; 
       value $measure2order
        'ED'  = '1'
        'CCM' = '2'
        'MAT' = '3'
        'CAC' = '4'
        other = 'X'
           ; run;

    *--- changing dataset name because I am tossing  ---* ;
    *---    records with unexpected values (or more  ---* ;
    *---    specifically, measures that are not      ---* ;
    *---    needed for this particular sample.)      ---* ;
    data SAMPLE 
         ODD_STUFF ;
    set test2 ;
       random=ranuni(123) ;
       order=put(measure,$measure2order.) ;
       if order='X' then output ODD_STUFF ;
       else output SAMPLE ;
       run; 

       proc sort data=SAMPLE ;
          by payer order random ;
          run;

          data SAMPLE ;
          set SAMPLE ;
          by payer order random ;
             if first.order ;
             sample_count+1 ;
             output ;
             if sample_count GE &SAMPLE_SIZE then stop ; 
             drop random sample_count ;
             run; 

(顺便注意...这里的问题定义似乎很古怪。假设您有一组很好的数据,您的样本将为付款人 = 1 的四个度量中的每一个都有 1 个随机记录,以及 1 个随机记录对于付款人 = 2 的前 2 个措施中的每一个。)