PROC SQL 合并不匹配
PROC SQL MERGE MISMATCH
ATTACHED SCREENSHOT OF DESIRED OUTPUT要求的条件是
“A 中的主题 = B 中的主题
和
在 A 中访问 NE(不等于)在 B 中访问"
我想通过使用 Proc SQL 过程从下面的 Tables A 和 B 中找到确切的不匹配和遗漏的 VISIT,有人可以帮我吗?
Table一个
SUBJECT Test VISIT
1001 ABCB 1
1001 ABCD 2
1001 ABCD 3
1001 ABCD 5
Table B
SUBJECT Test VISIT1
1001 ABCD 2
1001 ABCD 1
1001 ABCD 4
预期输出:
SUBJECT Test VISIT VISIT1
1001 ABCD 3
1001 ABCD 5
1001 ABCD 4
访问 3 和 5 存在于数据集 A 中,不存在于数据集 B 中,访问 4 存在于数据集 2 中,不存在于数据集 A 中,就像 WISE
数据集代码-
DATA A;
LENGTH SUBJECT 8 Test visit 8;
INPUT SUBJECT Test $ visit ;
DATALINES;
1001 ABCD 1
1001 ABCD 2
1001 ABCD 3
1001 ABCD 5
;
RUN;
DATA B;
LENGTH SUBJECT 8 Test visit1 8;
INPUT SUBJECT Test $ visit1 ;
DATALINES;
1001 ABCD 2
1001 ABCD 1
1001 ABCD 4
;
RUN;
提前致谢!
我试过的代码如下(但没有按预期工作)-
****************(VISIT ) in A and not in B****;
proc sql;
create table SS1 as
select distinct a.* FROM
A a where a.visit not in(select s.visit1 from B s WHERE A.SUBJECT = S.SUBJECT );
create table INRAVE as
select * from SS1 A
left join
B B
on a.subject=b.SUBJECT and a.VISIT NE b.VISIT1
where b.SUBJECT is not null
;
quit;
****************VISIT in B and not in A****;
proc sql;
create table SS2 as
select distinct a.* from
B a where a.VISIT1 not in(select S.VISIT from A s WHERE A.SUBJECT = S.SUBJECT );
create table INVENDOR as
select * from SS2 A
left join
A B
on a.subject=b.SUBJECT and a.VISIT1 NE b.VISIT
where b.SUBJECT is not null
;
quit;
data ALL;;
set inrave invendor;
where subject=subject ;
RUN;
看来你很了解SQL,为什么不试试union all
,就像这样:
proc sql noprint;
create table C as
select *, 'A' as Source from A
where catx('@',SUBJECT,Test,visit) not in (
select distinct catx('@',SUBJECT,Test,visit1) from B
)
union all corr
select *, 'B' as Source from B(rename=VISIT1=VISIT)
where catx('@',SUBJECT,Test,visit) not in (
select distinct catx('@',SUBJECT,Test,visit) from A
)
;
create table D(drop=TmpVISIT Source) as
select *,
case when Source = 'B' then . else TmpVISIT end as VISIT,
case when Source = 'B' then TmpVISIT else . end as VISIT1
from C(rename=VISIT=TmpVISIT);
quit;
我从数据集 A 中获取所有在数据集 B 中不重复的观测值,然后对数据集 B 执行相反的操作。
好吧,我还有一个更短的解决方案:
proc sql noprint;
select catx('@',SUBJECT,Test,visit) into :Ununique separated by '" "' from (
select * from A union all select * from B(rename=visit1=visit)
)
group by SUBJECT, Test, visit
having count(*) > 1;
quit;
data D;
set A B;
if catx('@',SUBJECT,Test,coalesce(visit1,visit)) in ("&Ununique") then delete;
run;
然而,此方法受限于宏变量的最大长度。
ATTACHED SCREENSHOT OF DESIRED OUTPUT要求的条件是 “A 中的主题 = B 中的主题 和 在 A 中访问 NE(不等于)在 B 中访问"
我想通过使用 Proc SQL 过程从下面的 Tables A 和 B 中找到确切的不匹配和遗漏的 VISIT,有人可以帮我吗?
Table一个
SUBJECT Test VISIT
1001 ABCB 1
1001 ABCD 2
1001 ABCD 3
1001 ABCD 5
Table B
SUBJECT Test VISIT1
1001 ABCD 2
1001 ABCD 1
1001 ABCD 4
预期输出:
SUBJECT Test VISIT VISIT1
1001 ABCD 3
1001 ABCD 5
1001 ABCD 4
访问 3 和 5 存在于数据集 A 中,不存在于数据集 B 中,访问 4 存在于数据集 2 中,不存在于数据集 A 中,就像 WISE 数据集代码-
DATA A;
LENGTH SUBJECT 8 Test visit 8;
INPUT SUBJECT Test $ visit ;
DATALINES;
1001 ABCD 1
1001 ABCD 2
1001 ABCD 3
1001 ABCD 5
;
RUN;
DATA B;
LENGTH SUBJECT 8 Test visit1 8;
INPUT SUBJECT Test $ visit1 ;
DATALINES;
1001 ABCD 2
1001 ABCD 1
1001 ABCD 4
;
RUN;
提前致谢!
我试过的代码如下(但没有按预期工作)-
****************(VISIT ) in A and not in B****;
proc sql;
create table SS1 as
select distinct a.* FROM
A a where a.visit not in(select s.visit1 from B s WHERE A.SUBJECT = S.SUBJECT );
create table INRAVE as
select * from SS1 A
left join
B B
on a.subject=b.SUBJECT and a.VISIT NE b.VISIT1
where b.SUBJECT is not null
;
quit;
****************VISIT in B and not in A****;
proc sql;
create table SS2 as
select distinct a.* from
B a where a.VISIT1 not in(select S.VISIT from A s WHERE A.SUBJECT = S.SUBJECT );
create table INVENDOR as
select * from SS2 A
left join
A B
on a.subject=b.SUBJECT and a.VISIT1 NE b.VISIT
where b.SUBJECT is not null
;
quit;
data ALL;;
set inrave invendor;
where subject=subject ;
RUN;
看来你很了解SQL,为什么不试试union all
,就像这样:
proc sql noprint;
create table C as
select *, 'A' as Source from A
where catx('@',SUBJECT,Test,visit) not in (
select distinct catx('@',SUBJECT,Test,visit1) from B
)
union all corr
select *, 'B' as Source from B(rename=VISIT1=VISIT)
where catx('@',SUBJECT,Test,visit) not in (
select distinct catx('@',SUBJECT,Test,visit) from A
)
;
create table D(drop=TmpVISIT Source) as
select *,
case when Source = 'B' then . else TmpVISIT end as VISIT,
case when Source = 'B' then TmpVISIT else . end as VISIT1
from C(rename=VISIT=TmpVISIT);
quit;
我从数据集 A 中获取所有在数据集 B 中不重复的观测值,然后对数据集 B 执行相反的操作。
好吧,我还有一个更短的解决方案:
proc sql noprint;
select catx('@',SUBJECT,Test,visit) into :Ununique separated by '" "' from (
select * from A union all select * from B(rename=visit1=visit)
)
group by SUBJECT, Test, visit
having count(*) > 1;
quit;
data D;
set A B;
if catx('@',SUBJECT,Test,coalesce(visit1,visit)) in ("&Ununique") then delete;
run;
然而,此方法受限于宏变量的最大长度。