sql 中的 select 行,每个 ID 重复多次 end_date >= start_date
select rows in sql with end_date >= start_date for each ID repeated multiple times
附上数据的图像。在我的 table 中,我有 3 列 id
、start date
和 end date
,以及这样的值:
id start date end date
-------------------------------
100 2015-01-01 2015-12-31
100 2016-01-10 2018-12-31
200 2015-02-15 2016-03-15
200 2016-03-15 2016-12-31
300 2016-01-01 2016-12-31
400 2017-01-01 2017-12-31
500 2017-02-01 2017-12-31
600 2017-01-15 2017-03-05
600 2017-02-01 2018-12-31
我希望我的输出是
id start date end date
--------------------------------
100 2015-01-01 2015-12-31
100 2016-01-10 2018-12-31
200 2015-02-15 2016-12-31
300 2016-01-01 2016-12-31
400 2017-01-01 2017-12-31
500 2017-02-01 2017-12-31
600 2017-01-15 2018-12-31
查询:
select
id, *
from
dbo.test_sl
where
id in (select id
from dbo.test_sl
where end_date >= start_date
group by id)
请帮助我获得我正在寻找的输出。
假设只有两条记录可以合并在一起,你可以LEFT JOIN
把table自己加上一个CASE
来显示自合并记录的结束日期, 如果可用的话。
SELECT
t1.id,
min(t1.start_date),
CASE WHEN t2.end_date IS NULL THEN t1.end_date ELSE t2.end_date END
FROM
table t1
LEFT JOIN table t2
ON t1.id = t2.id
AND t2.start_date > t1.start_date
AND t2.start_date <= t1.end_date
GROUP BY
t1.id,
CASE WHEN t2.end_date IS NULL THEN t1.end_date ELSE t2.end_date END
ORDER BY 1
测试于 this SQL Fiddle
这是一个使用递归 CTE 的解决方案。
它基本上循环遍历每个 id 的日期,并为重叠的 end_date/start_date 保留最小的 start_date。
然后将结果分组,因此不再有重叠。
在 rextester 上测试 here。
WITH SRC AS
(
SELECT id, start_date, end_date,
row_number() over (partition by id order by start_date) as rn
FROM test_sl
)
, RCTE AS
(
SELECT id, rn, start_date, end_date
FROM SRC
WHERE rn = 1
UNION ALL
SELECT t.id, t.rn, iif(r.end_date >= t.start_date, r.start_date, t.start_date), t.end_date
FROM RCTE r
JOIN SRC t ON t.id = r.id AND t.rn = r.rn + 1
)
SELECT id, start_date, max(end_date) as end_date
FROM RCTE
GROUP BY id, start_date
ORDER BY id, start_date;
这是一个缺口和孤岛问题的例子。在这种情况下,您想要找到 不 与相同 id
重叠的相邻行。这些是组的开始。一个组的开头的累加和,提供一个分组号,可以用来聚合。
在查询中,这看起来像:
select id, min(startdate), max(enddate)
from (select t.*,
sum(isstart) over (partition by id order by startdate) as grp
from (select t.*,
(case when exists (select 1
from test_sl t2
where t2.id = t.id and
t2.startdate < t.startdate and
t2.enddate >= t.startdate
)
then 0 else 1
end) as isstart
from test_sl t
) t
) t
group by id, grp;
附上数据的图像。在我的 table 中,我有 3 列 id
、start date
和 end date
,以及这样的值:
id start date end date
-------------------------------
100 2015-01-01 2015-12-31
100 2016-01-10 2018-12-31
200 2015-02-15 2016-03-15
200 2016-03-15 2016-12-31
300 2016-01-01 2016-12-31
400 2017-01-01 2017-12-31
500 2017-02-01 2017-12-31
600 2017-01-15 2017-03-05
600 2017-02-01 2018-12-31
我希望我的输出是
id start date end date
--------------------------------
100 2015-01-01 2015-12-31
100 2016-01-10 2018-12-31
200 2015-02-15 2016-12-31
300 2016-01-01 2016-12-31
400 2017-01-01 2017-12-31
500 2017-02-01 2017-12-31
600 2017-01-15 2018-12-31
查询:
select
id, *
from
dbo.test_sl
where
id in (select id
from dbo.test_sl
where end_date >= start_date
group by id)
请帮助我获得我正在寻找的输出。
假设只有两条记录可以合并在一起,你可以LEFT JOIN
把table自己加上一个CASE
来显示自合并记录的结束日期, 如果可用的话。
SELECT
t1.id,
min(t1.start_date),
CASE WHEN t2.end_date IS NULL THEN t1.end_date ELSE t2.end_date END
FROM
table t1
LEFT JOIN table t2
ON t1.id = t2.id
AND t2.start_date > t1.start_date
AND t2.start_date <= t1.end_date
GROUP BY
t1.id,
CASE WHEN t2.end_date IS NULL THEN t1.end_date ELSE t2.end_date END
ORDER BY 1
测试于 this SQL Fiddle
这是一个使用递归 CTE 的解决方案。
它基本上循环遍历每个 id 的日期,并为重叠的 end_date/start_date 保留最小的 start_date。
然后将结果分组,因此不再有重叠。
在 rextester 上测试 here。
WITH SRC AS
(
SELECT id, start_date, end_date,
row_number() over (partition by id order by start_date) as rn
FROM test_sl
)
, RCTE AS
(
SELECT id, rn, start_date, end_date
FROM SRC
WHERE rn = 1
UNION ALL
SELECT t.id, t.rn, iif(r.end_date >= t.start_date, r.start_date, t.start_date), t.end_date
FROM RCTE r
JOIN SRC t ON t.id = r.id AND t.rn = r.rn + 1
)
SELECT id, start_date, max(end_date) as end_date
FROM RCTE
GROUP BY id, start_date
ORDER BY id, start_date;
这是一个缺口和孤岛问题的例子。在这种情况下,您想要找到 不 与相同 id
重叠的相邻行。这些是组的开始。一个组的开头的累加和,提供一个分组号,可以用来聚合。
在查询中,这看起来像:
select id, min(startdate), max(enddate)
from (select t.*,
sum(isstart) over (partition by id order by startdate) as grp
from (select t.*,
(case when exists (select 1
from test_sl t2
where t2.id = t.id and
t2.startdate < t.startdate and
t2.enddate >= t.startdate
)
then 0 else 1
end) as isstart
from test_sl t
) t
) t
group by id, grp;