Select 仅组内连续 2 行的唯一值最少
Select only the least unique value of 2 consecutive rows within a group
我有以下 table 我想生成下面描述的数据 "WANT" 我已经尝试了几个 SQL 分析函数(rank () over)但是我似乎总是遇到障碍。对于解决此任务的任何见解,我将不胜感激
CREATE TABLE TABLEA (
id VARCHAR(2) ,
val1 VARCHAR(2),
val2 VARCHAR(2),
val3 VARCHAR(2),
dt_val VARCHAR(8)
)
;
-- --data
INSERT INTO TABLEA
(id, val1, val2, val3, dt_val)
VALUES
('1', '2', '3', '4', '20151011'),
('1', '2', '', '4', '20151012'),
('1', '2', '3', '4', '20151013'),
('2', '4', '3', '4', '20151101'),
('2', '4', '3', '4', '20151102'),
('2', '4', '', '', '20151103'),
('2', '4', '3', '4', '20151104'),
('3', '4', '3', '4', '20151110'),
('4', '4', '3', '4', '20151110'),
('4', '4', '3', '4', '20151111'),
('4', '4', '', '4', '20151112'),
('4', '4', '', '4', '20151113'),
('5', '4', '3', '4', '20151111'),
('5', '4', '3', '4', '20151112'),
('5', '4', '3', '4', '20151113'),
('5', '4', '3', '4', '20151114'),
;
想要
我想要以下结果。如果连续 2 行相同,我想 select 最少 (dt_val)。
Id val1 val2 val3 dt_val
------ ------ ------ ------ ----------------
1 2 3 4 20151011
1 2 (null) 4 20151012
1 2 3 4 20151013
2 4 3 4 20151101
2 4 (null) (null) 20151103
2 4 3 4 20151104
3 4 3 4 20151110
4 4 3 4 20151110
4 4 (null) 4 20151112
5 4 3 4 20151111
这个查询给了我需要行:
select id, val1, val2, val3, min(dt_val) min_dt
from (select t.*,
dt_val - row_number() over (partition by val1, val2, val3 order by dt_val) diff
from tablea t)
group by id, val1, val2, val3, diff
order by id, min(dt_val)
您可以使用 根据连续的行获取每个 ID 的组:
select a.*,
row_number() over (partition by id order by dt_val)
- row_number() over (partition by id, val1, val2, val3 order by dt_val) as grp
from tablea a
order by id, dt_val;
然后应用聚合函数:
select id, val1, val2, val3, min(dt_val) as dt_val
from (
select a.*,
row_number() over (partition by id order by dt_val)
- row_number() over (partition by id, val1, val2, val3 order by dt_val) as grp
from tablea a
)
group by id, val1, val2, val3, grp
order by id, dt_val;
ID VAL1 VAL2 VAL3 DT_VAL
-- ---- ---- ---- --------
1 2 3 4 20151011
1 2 4 20151012
1 2 3 4 20151013
2 4 3 4 20151101
2 4 20151103
2 4 3 4 20151104
3 4 3 4 20151110
4 4 3 4 20151110
4 4 4 20151112
5 4 3 4 20151111
(我猜这和Ponder的想法基本一样...)
如果它们实际上是日期而不是字符串,这也将起作用 - db<>fiddle。 (和 Ponder 一样,多亏了日期算法!)
-- Oracle 12c+
with s (id, val1, val2, val3, dt_val) as (
select 1, '2', '3', '4', '20151011' from dual union all
select 1, '2', '' , '4', '20151012' from dual union all
select 1, '2', '3', '4', '20151013' from dual union all
select 2, '4', '3', '4', '20151101' from dual union all
select 2, '4', '3', '4', '20151102' from dual union all
select 2, '4', '' , '' , '20151103' from dual union all
select 2, '4', '3', '4', '20151104' from dual union all
select 3, '4', '3', '4', '20151110' from dual union all
select 4, '4', '3', '4', '20151110' from dual union all
select 4, '4', '3', '4', '20151111' from dual union all
select 4, '4', '' , '4', '20151112' from dual union all
select 4, '4', '' , '4', '20151113' from dual union all
select 5, '4', '3', '4', '20151111' from dual union all
select 5, '4', '3', '4', '20151112' from dual union all
select 5, '4', '3', '4', '20151113' from dual union all
select 5, '4', '3', '4', '20151114' from dual)
select *
from s
match_recognize (
order by id
measures
v.id as id,
v.val1 as val1,
v.val2 as val2,
v.val3 as val3,
first(v.dt_val) as dt_val
pattern (v+)
define v as
decode(v.id , first(id ), 0) = 0 and
decode(v.val1, first(val1), 0) = 0 and
decode(v.val2, first(val2), 0) = 0 and
decode(v.val3, first(val3), 0) = 0
);
ID VAL1 VAL2 VAL3 DT_VAL
--- ---- ---- ---- --------
1 2 3 4 20151011
1 2 4 20151012
1 2 3 4 20151013
2 4 3 4 20151101
2 4 20151103
2 4 3 4 20151104
3 4 3 4 20151110
4 4 3 4 20151110
4 4 4 20151112
5 4 3 4 20151111
10 rows selected.
我有以下 table 我想生成下面描述的数据 "WANT" 我已经尝试了几个 SQL 分析函数(rank () over)但是我似乎总是遇到障碍。对于解决此任务的任何见解,我将不胜感激
CREATE TABLE TABLEA (
id VARCHAR(2) ,
val1 VARCHAR(2),
val2 VARCHAR(2),
val3 VARCHAR(2),
dt_val VARCHAR(8)
)
;
-- --data
INSERT INTO TABLEA
(id, val1, val2, val3, dt_val)
VALUES
('1', '2', '3', '4', '20151011'),
('1', '2', '', '4', '20151012'),
('1', '2', '3', '4', '20151013'),
('2', '4', '3', '4', '20151101'),
('2', '4', '3', '4', '20151102'),
('2', '4', '', '', '20151103'),
('2', '4', '3', '4', '20151104'),
('3', '4', '3', '4', '20151110'),
('4', '4', '3', '4', '20151110'),
('4', '4', '3', '4', '20151111'),
('4', '4', '', '4', '20151112'),
('4', '4', '', '4', '20151113'),
('5', '4', '3', '4', '20151111'),
('5', '4', '3', '4', '20151112'),
('5', '4', '3', '4', '20151113'),
('5', '4', '3', '4', '20151114'),
;
想要 我想要以下结果。如果连续 2 行相同,我想 select 最少 (dt_val)。
Id val1 val2 val3 dt_val
------ ------ ------ ------ ----------------
1 2 3 4 20151011
1 2 (null) 4 20151012
1 2 3 4 20151013
2 4 3 4 20151101
2 4 (null) (null) 20151103
2 4 3 4 20151104
3 4 3 4 20151110
4 4 3 4 20151110
4 4 (null) 4 20151112
5 4 3 4 20151111
这个查询给了我需要行:
select id, val1, val2, val3, min(dt_val) min_dt
from (select t.*,
dt_val - row_number() over (partition by val1, val2, val3 order by dt_val) diff
from tablea t)
group by id, val1, val2, val3, diff
order by id, min(dt_val)
您可以使用
select a.*,
row_number() over (partition by id order by dt_val)
- row_number() over (partition by id, val1, val2, val3 order by dt_val) as grp
from tablea a
order by id, dt_val;
然后应用聚合函数:
select id, val1, val2, val3, min(dt_val) as dt_val
from (
select a.*,
row_number() over (partition by id order by dt_val)
- row_number() over (partition by id, val1, val2, val3 order by dt_val) as grp
from tablea a
)
group by id, val1, val2, val3, grp
order by id, dt_val;
ID VAL1 VAL2 VAL3 DT_VAL
-- ---- ---- ---- --------
1 2 3 4 20151011
1 2 4 20151012
1 2 3 4 20151013
2 4 3 4 20151101
2 4 20151103
2 4 3 4 20151104
3 4 3 4 20151110
4 4 3 4 20151110
4 4 4 20151112
5 4 3 4 20151111
(我猜这和Ponder的想法基本一样...)
如果它们实际上是日期而不是字符串,这也将起作用 - db<>fiddle。 (和 Ponder 一样,多亏了日期算法!)
-- Oracle 12c+
with s (id, val1, val2, val3, dt_val) as (
select 1, '2', '3', '4', '20151011' from dual union all
select 1, '2', '' , '4', '20151012' from dual union all
select 1, '2', '3', '4', '20151013' from dual union all
select 2, '4', '3', '4', '20151101' from dual union all
select 2, '4', '3', '4', '20151102' from dual union all
select 2, '4', '' , '' , '20151103' from dual union all
select 2, '4', '3', '4', '20151104' from dual union all
select 3, '4', '3', '4', '20151110' from dual union all
select 4, '4', '3', '4', '20151110' from dual union all
select 4, '4', '3', '4', '20151111' from dual union all
select 4, '4', '' , '4', '20151112' from dual union all
select 4, '4', '' , '4', '20151113' from dual union all
select 5, '4', '3', '4', '20151111' from dual union all
select 5, '4', '3', '4', '20151112' from dual union all
select 5, '4', '3', '4', '20151113' from dual union all
select 5, '4', '3', '4', '20151114' from dual)
select *
from s
match_recognize (
order by id
measures
v.id as id,
v.val1 as val1,
v.val2 as val2,
v.val3 as val3,
first(v.dt_val) as dt_val
pattern (v+)
define v as
decode(v.id , first(id ), 0) = 0 and
decode(v.val1, first(val1), 0) = 0 and
decode(v.val2, first(val2), 0) = 0 and
decode(v.val3, first(val3), 0) = 0
);
ID VAL1 VAL2 VAL3 DT_VAL
--- ---- ---- ---- --------
1 2 3 4 20151011
1 2 4 20151012
1 2 3 4 20151013
2 4 3 4 20151101
2 4 20151103
2 4 3 4 20151104
3 4 3 4 20151110
4 4 3 4 20151110
4 4 4 20151112
5 4 3 4 20151111
10 rows selected.