过程中按类别分组的案例陈述和聚合 sql
Case Statement and Aggregation within a Group By classe in proc sql
我在组内聚合和使用案例时遇到了一些问题。
objective是检查每个交易密钥的Indicator。如果存在“1”指标,那么我们必须 select 最大值 (Change_Date)。如果全为 0,则 min(Change_Date)。除此之外,与 Change_date 关联的 Initial_key 必须填充为 Final_key.
输出看起来像这样
你可以试试下面的,
如果我观察你提供的测试数据
- 首先我们尝试在
name
和 trxn_key
组中找到 max(indicator)
。
- 其次根据上面的值,我们决定是取
min(change_date)
和min(initial_key)
还是max(change_date)
和 max(initial_key)
Because you don't need aggregated result we need to use analytic function
which will not affect the final output rows.
SELECT t1.name
,t1.initial_key
,t1.change_date
,t1.indicator
,t1.trxn_key
,t1.trxn_date
,CASE
WHEN max_ind = 1
THEN
MAX(CASE WHEN indicator = 1 THEN change_date END) OVER (PARTITION BY NAME,trxn_key)
WHEN max_ind = 0
THEN
MIN(CASE WHEN indicator = 0 THEN change_date END) OVER (PARTITION BY NAME,trxn_key)
END final_date
,CASE
WHEN max_ind = 1
THEN
MAX(CASE WHEN indicator = 1 THEN initial_key END) OVER (PARTITION BY NAME,trxn_key)
WHEN max_ind = 0
THEN
MIN(CASE WHEN indicator = 0 THEN initial_key END) OVER (PARTITION BY NAME,trxn_key)
END final_key
FROM
(
SELECT NAME
,initial_key
,change_date
,indicator
,trxn_key
,trxn_date
,MAX(indicator) OVER (PARTITION BY NAME,trxn_key) max_ind
FROM table1
) t1
ORDER BY trxn_key,trxn_date,initial_key,change_date;
您可以使用 DOW 循环处理组(在内部使用 SET
& BY
语句执行 until 循环)
A DATA
具有串行 DOW 循环(一步中两个)的步进程序可以让第一个循环处理该组,以最需要的方式对其进行测量,第二个循环输出记录中计算的值第一个循环。
示例:
data have;
input name $ initial_key change_date indicator trxn_key trxn_date;
attrib change_date trxn_date informat=date9. format=date9.;
datalines;
ABC 1 17feb20 0 1 16feb20
ABC 2 21feb20 0 1 16feb20
ABC 3 25feb20 0 1 16feb20
ABC 1 17feb20 1 2 20feb20
ABC 2 21feb20 0 2 20feb20
ABC 3 25feb20 0 2 20feb20
ABC 1 17feb20 1 3 22feb20
ABC 2 21feb20 1 3 22feb20
ABC 3 25feb20 0 3 22feb20
ABC 1 17feb20 1 4 26feb20
ABC 2 21feb20 1 4 26feb20
ABC 3 25feb20 1 4 26feb20
;
data want;
* first dow loop, compute min and max_ associated values;
do until (last.trxn_key);
set have;
by name trxn_key;
if missing(min_date) or change_date < min_date then do;
min_date = change_date;
min_key = initial_key;
end;
if missing(max_date) or change_date > max_date then
if indicator then do;
max_date = change_date;
max_key = initial_key;
max_flag = 1;
end;
end;
* compute final values per business rules;
if max_flag then do;
final_date = max_date;
final_key = max_key;
end;
else do;
final_date = min_date;
final_key = min_key;
end;
* second dow loop, output with final values;
do until (last.trxn_key);
set have;
by name trxn_key;
OUTPUT;
end;
format final_date min_date max_date date9.;
drop min_: max_:;
run;
您可以使用聚合获取最后两列。如果我理解正确的话:
select trxn_key,
coalesce(max(case when indicator = 1 then change_date end),
min(change_date)
) as final_date,
coalesce(max(case when indicator = 1 then initial_key end),
min(initial_key)
) as final_key
from t
group by trxn_key;
然后加入:
proc sql;
select t.*, tt.final_date, tt.final_key
from t join
(select trxn_key,
coalesce(max(case when indicator = 1 then change_date end),
min(change_date)
) as final_date,
coalesce(max(case when indicator = 1 then initial_key end),
min(initial_key)
) as final_key
from t
group by trxn_key
) tt
on tt.trxn_key = t.trxn_key;
我在组内聚合和使用案例时遇到了一些问题。
objective是检查每个交易密钥的Indicator。如果存在“1”指标,那么我们必须 select 最大值 (Change_Date)。如果全为 0,则 min(Change_Date)。除此之外,与 Change_date 关联的 Initial_key 必须填充为 Final_key.
输出看起来像这样
你可以试试下面的,
如果我观察你提供的测试数据
- 首先我们尝试在
name
和trxn_key
组中找到max(indicator)
。 - 其次根据上面的值,我们决定是取
min(change_date)
和min(initial_key)
还是max(change_date)
和max(initial_key)
Because you don't need aggregated result we need to use
analytic function
which will not affect the final output rows.
SELECT t1.name
,t1.initial_key
,t1.change_date
,t1.indicator
,t1.trxn_key
,t1.trxn_date
,CASE
WHEN max_ind = 1
THEN
MAX(CASE WHEN indicator = 1 THEN change_date END) OVER (PARTITION BY NAME,trxn_key)
WHEN max_ind = 0
THEN
MIN(CASE WHEN indicator = 0 THEN change_date END) OVER (PARTITION BY NAME,trxn_key)
END final_date
,CASE
WHEN max_ind = 1
THEN
MAX(CASE WHEN indicator = 1 THEN initial_key END) OVER (PARTITION BY NAME,trxn_key)
WHEN max_ind = 0
THEN
MIN(CASE WHEN indicator = 0 THEN initial_key END) OVER (PARTITION BY NAME,trxn_key)
END final_key
FROM
(
SELECT NAME
,initial_key
,change_date
,indicator
,trxn_key
,trxn_date
,MAX(indicator) OVER (PARTITION BY NAME,trxn_key) max_ind
FROM table1
) t1
ORDER BY trxn_key,trxn_date,initial_key,change_date;
您可以使用 DOW 循环处理组(在内部使用 SET
& BY
语句执行 until 循环)
A DATA
具有串行 DOW 循环(一步中两个)的步进程序可以让第一个循环处理该组,以最需要的方式对其进行测量,第二个循环输出记录中计算的值第一个循环。
示例:
data have;
input name $ initial_key change_date indicator trxn_key trxn_date;
attrib change_date trxn_date informat=date9. format=date9.;
datalines;
ABC 1 17feb20 0 1 16feb20
ABC 2 21feb20 0 1 16feb20
ABC 3 25feb20 0 1 16feb20
ABC 1 17feb20 1 2 20feb20
ABC 2 21feb20 0 2 20feb20
ABC 3 25feb20 0 2 20feb20
ABC 1 17feb20 1 3 22feb20
ABC 2 21feb20 1 3 22feb20
ABC 3 25feb20 0 3 22feb20
ABC 1 17feb20 1 4 26feb20
ABC 2 21feb20 1 4 26feb20
ABC 3 25feb20 1 4 26feb20
;
data want;
* first dow loop, compute min and max_ associated values;
do until (last.trxn_key);
set have;
by name trxn_key;
if missing(min_date) or change_date < min_date then do;
min_date = change_date;
min_key = initial_key;
end;
if missing(max_date) or change_date > max_date then
if indicator then do;
max_date = change_date;
max_key = initial_key;
max_flag = 1;
end;
end;
* compute final values per business rules;
if max_flag then do;
final_date = max_date;
final_key = max_key;
end;
else do;
final_date = min_date;
final_key = min_key;
end;
* second dow loop, output with final values;
do until (last.trxn_key);
set have;
by name trxn_key;
OUTPUT;
end;
format final_date min_date max_date date9.;
drop min_: max_:;
run;
您可以使用聚合获取最后两列。如果我理解正确的话:
select trxn_key,
coalesce(max(case when indicator = 1 then change_date end),
min(change_date)
) as final_date,
coalesce(max(case when indicator = 1 then initial_key end),
min(initial_key)
) as final_key
from t
group by trxn_key;
然后加入:
proc sql;
select t.*, tt.final_date, tt.final_key
from t join
(select trxn_key,
coalesce(max(case when indicator = 1 then change_date end),
min(change_date)
) as final_date,
coalesce(max(case when indicator = 1 then initial_key end),
min(initial_key)
) as final_key
from t
group by trxn_key
) tt
on tt.trxn_key = t.trxn_key;