使用 proc sql 过滤更高的值
Filtering higher values using proc sql
我需要根据每组 var1 的较高值过滤数据集。
我有这个数据集:
Var1 t avg
AA 1 0.02
AA 0 0.21
BB 1 0.05
BB 0 0.20
CC 1 0.10
CC 0 0.14
构建如下
Proc sql;
Select
Var1,
t,
Avg(var2) as avg
From my_data
Group by 1,2
Order by 1,2; quit;
我的预期输出是
AA 0 0.21
BB 0 0.20
CC 0 0.14
你能给我一些提示吗?我想我应该先对它们进行排名,然后 select 排名值 = 1 的观察结果(如果按降序排列)
这在 proc sql
中相当棘手。我想这会做你想做的事:
proc sql;
select Var1, t, avg(avg_var2) as average
from my_data d
group by 1
having avg(avg_var2) = (select max(average)
from (select avg(avg_var2) as average
from my_data d2
where d2.var2 = d.var
group by var1, t
)
);
根据您的输入,使用以下方法之一添加下一步。
您可以将 SQL 与 HAVING 子句一起使用以获得最大值。
proc sql;
create table want as
select var1, t, avg
from have
group by var1
having avg=max(avg);
quit;
或者,假设平均值中不可能出现重复项,您可以轻松地使用 PROC SORT。
proc sort data=have;
by var1 descending avg;
run;
proc sort data=have out=want_highest nodupkey;
by var1;
run;
您可以计算 sub-query 中每个 id
、t
组的平均值以及 select 组中 id
的最高平均值。
示例:
data raw;
call streaminit(123);
do id = 1 to 100;
do t = 0 to rand('integer',1,2);
do rep = 1 to rand('integer', 20);
y = round(rand('uniform'), 0.01);
output;
end;
end;
end;
run;
proc sql;
create table want as
select id, t, avg
from (
select id, t, mean(y) as avg format=5.2
from raw
group by id, t
) as averages
group by id
having avg = max(avg)
;
我需要根据每组 var1 的较高值过滤数据集。
我有这个数据集:
Var1 t avg
AA 1 0.02
AA 0 0.21
BB 1 0.05
BB 0 0.20
CC 1 0.10
CC 0 0.14
构建如下
Proc sql;
Select
Var1,
t,
Avg(var2) as avg
From my_data
Group by 1,2
Order by 1,2; quit;
我的预期输出是
AA 0 0.21
BB 0 0.20
CC 0 0.14
你能给我一些提示吗?我想我应该先对它们进行排名,然后 select 排名值 = 1 的观察结果(如果按降序排列)
这在 proc sql
中相当棘手。我想这会做你想做的事:
proc sql;
select Var1, t, avg(avg_var2) as average
from my_data d
group by 1
having avg(avg_var2) = (select max(average)
from (select avg(avg_var2) as average
from my_data d2
where d2.var2 = d.var
group by var1, t
)
);
根据您的输入,使用以下方法之一添加下一步。
您可以将 SQL 与 HAVING 子句一起使用以获得最大值。
proc sql;
create table want as
select var1, t, avg
from have
group by var1
having avg=max(avg);
quit;
或者,假设平均值中不可能出现重复项,您可以轻松地使用 PROC SORT。
proc sort data=have;
by var1 descending avg;
run;
proc sort data=have out=want_highest nodupkey;
by var1;
run;
您可以计算 sub-query 中每个 id
、t
组的平均值以及 select 组中 id
的最高平均值。
示例:
data raw;
call streaminit(123);
do id = 1 to 100;
do t = 0 to rand('integer',1,2);
do rep = 1 to rand('integer', 20);
y = round(rand('uniform'), 0.01);
output;
end;
end;
end;
run;
proc sql;
create table want as
select id, t, avg
from (
select id, t, mean(y) as avg format=5.2
from raw
group by id, t
) as averages
group by id
having avg = max(avg)
;