对 sql 中的日期范围进行计数和求和
count and sum over date ranges in sql
当员工更换 his/her 部门时,我遇到了一个特定的场景。
具体细节如下:
我在 SAP HANA 数据库中有 2 个表 - 部门和员工。
Dept_ID|Start_Date |End_Date
1 |15-Jan-2017|31-Dec-9999
Emp_ID|Dept_ID|Start_Date |End_Date
123 |1 |1-Jan-2017 |31-Dec-9999
456 |1 |1-Jan-2017 |31-Dec-9999
789 |1 |1-Jan-2017 |25-Jan-2017
789 |2 |26-Jan-2017 |31-Dec-9999
666 |1 |23-Jan-2017 |31-Dec-9999
输出中我需要的是一段时间内每个部门的员工数量 -
Dept_ID|Emp_Count|Start_Date |End_Date
1 |3 |15-Jan-2017|23-Jan-2017
1 |4 |23-Jan-2017|25-Jan-2017
1 |3 |25-Jan-2017|31-Dec-9999
我尝试使用 CTE 求和(分区),但我无法获得所需的结果。
请帮我解决这个问题。
编辑:
添加创建定义和插入语句
CREATE COLUMN TABLE DEPT ("DEPT_ID" NVARCHAR(400) NOT NULL ,
"START_DATE" LONGDATE CS_LONGDATE NOT NULL ,
"END_DATE" LONGDATE CS_LONGDATE NOT NULL ,
PRIMARY KEY INVERTED VALUE ("DEPT_ID",
"START_DATE")) UNLOAD PRIORITY 5 AUTO MERGE
CREATE COLUMN TABLE EMP ("EMP_ID" NVARCHAR(400) NOT NULL ,
"DEPT_ID" NVARCHAR(4000),
"START_DATE" LONGDATE CS_LONGDATE NOT NULL ,
"END_DATE" LONGDATE CS_LONGDATE NOT NULL ,
PRIMARY KEY INVERTED VALUE ("EMP_ID",
"START_DATE")) UNLOAD PRIORITY 5 AUTO MERGE
insert into DEPT values('1','15.01.2017 22:58:09.0','31.12.9999 00:00:00.0')
insert into EMP values('123','1','01.01.2017 22:58:09.0','31.12.9999 00:00:00.0')
insert into EMP values('456','1','01.01.2017 22:58:09.0','31.12.9999 00:00:00.0')
insert into EMP values('789','1','01.01.2017 22:58:09.0','25.01.2017 10:00:00.0')
insert into EMP values('789','2','25.01.2017 10:00:00.0','31.12.9999 00:00:00.0')
insert into EMP values('666','1','23.01.2017 22:58:09.0','31.12.9999 00:00:00.0')
请看下面写在SQL服务器上。
DECLARE @Department TABLE
(
[Dept_ID] [int] NOT NULL,
[Start_Date] [date] NOT NULL,
[End_Date] [date] NOT NULL
)
DECLARE @Employee TABLE
(
[Emp_ID] [int] NOT NULL,
[Dept_ID] [int] NOT NULL,
[Start_Date] [date] NOT NULL,
[End_Date] [date] NOT NULL
)
DECLARE @Ranges TABLE
(
[Range_ID] [int] NOT NULL,
[Dept_ID] [int] NOT NULL,
[Start_Date] [date] NOT NULL,
[End_Date] [date] NOT NULL
)
INSERT INTO @Department (Dept_ID, Start_Date, End_Date)
VALUES (1, '15-Jan-2017', '31-Dec-9999')
INSERT INTO @Employee (Emp_ID, Dept_ID, Start_Date, End_Date)
VALUES (123,1,'1-Jan-2017','31-Dec-9999'),
(456,1,'1-Jan-2017','31-Dec-9999'),
(789,1,'1-Jan-2017','25-Jan-2017'),
(789,2,'26-Jan-2017','31-Dec-9999'),
(666,1,'23-Jan-2017','31-Dec-9999')
INSERT INTO @Ranges (Range_ID,Dept_ID, Start_Date, End_Date)
VALUES (1,1,'20170115','20170123'),
(2,1,'20170123','20170125'),
(3,1,'20170125','99991231')
SELECT E.Dept_ID, COUNT(*) As Emp_Count, R.Start_Date, R.End_Date
FROM @Employee E
INNER JOIN @Department D ON D.Dept_ID = E.Dept_ID
INNER JOIN @Ranges R ON R.Dept_ID = D.Dept_ID
WHERE 1=1
AND E.Start_Date <= R.Start_Date
AND E.End_Date >= R.End_Date
AND D.Start_Date <= R.Start_Date
AND D.End_Date >= R.End_Date
GROUP BY E.Dept_ID, R.Start_Date, R.End_Date
不幸的是,我无法在 SAP HANA 上对其进行测试,因此,我 post SQL 服务器的解决方案。我尝试使用我发现对 SAP HANA 也有效的语法。
with dates as
(
select *, row_number() over (partition by t.dept_id order by dat) rn
from
(
select dept_id, start_date dat from emp where emp.start_date > (select start_date from dept where dept_id = emp.dept_id)
union all
select dept_id, end_date dat from emp where emp.end_date < (select end_date from dept where dept_id = emp.dept_id)
union all
select dept_id, start_date dat from dept
union all
select dept_id, end_date dat from dept
) t
)
select e.dept_id, count(*), t.startd, t.endd
from emp e
join
(
select d1.dept_id, d1.dat startd, d2.dat endd
from dates d1
join dates d2 on d1.dept_id = d2.dept_id and d1.rn + 1 = d2.rn
) t on t.dept_id = e.dept_id and e.start_date < t.endd and e.end_date > t.startd
group by e.dept_id, t.startd, t.endd
结果
dept_id count startd endd
1 3 15/01/2017 23/01/2017
1 4 23/01/2017 25/01/2017
1 3 25/01/2017 31/12/9999
我昨天没能完成这个,但由于我已经做了一些准备,这里对 做了一些小改动。差异是:
- 使用联接在员工行中包含部门 from/to 日期
- 使用
lead()
函数而不是 row_number()
(避免自连接)
- 部门table
中有部门2行
CREATE TABLE Department
([Dept_ID] int, [Start_Date] datetime, [End_Date] datetime)
;
INSERT INTO Department
([Dept_ID], [Start_Date], [End_Date])
VALUES
(1, '2017-01-15 00:00:00', '9999-12-31 00:00:00'),
(2, '2017-01-15 00:00:00', '9999-12-31 00:00:00')
;
CREATE TABLE Employee
([Emp_ID] int, [Dept_ID] int, [Start_Date] datetime, [End_Date] datetime)
;
INSERT INTO Employee
([Emp_ID], [Dept_ID], [Start_Date], [End_Date])
VALUES
(123, 1, '2017-01-01 00:00:00', '9999-12-31 00:00:00'),
(456, 1, '2017-01-01 00:00:00', '9999-12-31 00:00:00'),
(789, 1, '2017-01-01 00:00:00', '2017-01-25 00:00:00'),
(789, 2, '2017-01-26 00:00:00', '9999-12-31 00:00:00'),
(666, 1, '2017-01-23 00:00:00', '9999-12-31 00:00:00')
;
查询 1:
WITH
e AS (
SELECT e.*, d.start_date stdt, d.end_date endt
FROM Employee e
INNER JOIN Department d ON e.dept_id = d.dept_id
),
range AS (
SELECT
dept_id
, start_date AS from_date
, LEAD(start_date) OVER (PARTITION BY dept_id
ORDER BY start_date) to_date
FROM (
SELECT dept_id , start_date FROM e WHERE e.start_date > e.stdt
UNION ALL
SELECT dept_id , end_date FROM e WHERE e.end_date < e.endt
UNION ALL
SELECT dept_id , start_date FROM Department
UNION ALL
SELECT dept_id , end_date FROM Department
) r
)
SELECT
e.dept_id
, r.from_date
, r.to_date
, COUNT(*) num_employees
FROM Employee e
INNER JOIN range r ON e.dept_id = r.dept_id
AND e.start_date < r.to_date
AND e.end_date > r.from_date
AND r.to_date IS NOT NULL
GROUP BY
e.dept_id
, r.from_date
, r.to_date
| dept_id | from_date | to_date | num_employees |
|---------|----------------------|----------------------|---------------|
| 1 | 2017-01-15T00:00:00Z | 2017-01-23T00:00:00Z | 3 |
| 1 | 2017-01-23T00:00:00Z | 2017-01-25T00:00:00Z | 4 |
| 1 | 2017-01-25T00:00:00Z | 9999-12-31T00:00:00Z | 3 |
| 2 | 2017-01-26T00:00:00Z | 9999-12-31T00:00:00Z | 1 |
当员工更换 his/her 部门时,我遇到了一个特定的场景。
具体细节如下: 我在 SAP HANA 数据库中有 2 个表 - 部门和员工。
Dept_ID|Start_Date |End_Date
1 |15-Jan-2017|31-Dec-9999
Emp_ID|Dept_ID|Start_Date |End_Date
123 |1 |1-Jan-2017 |31-Dec-9999
456 |1 |1-Jan-2017 |31-Dec-9999
789 |1 |1-Jan-2017 |25-Jan-2017
789 |2 |26-Jan-2017 |31-Dec-9999
666 |1 |23-Jan-2017 |31-Dec-9999
输出中我需要的是一段时间内每个部门的员工数量 -
Dept_ID|Emp_Count|Start_Date |End_Date
1 |3 |15-Jan-2017|23-Jan-2017
1 |4 |23-Jan-2017|25-Jan-2017
1 |3 |25-Jan-2017|31-Dec-9999
我尝试使用 CTE 求和(分区),但我无法获得所需的结果。
请帮我解决这个问题。
编辑: 添加创建定义和插入语句
CREATE COLUMN TABLE DEPT ("DEPT_ID" NVARCHAR(400) NOT NULL ,
"START_DATE" LONGDATE CS_LONGDATE NOT NULL ,
"END_DATE" LONGDATE CS_LONGDATE NOT NULL ,
PRIMARY KEY INVERTED VALUE ("DEPT_ID",
"START_DATE")) UNLOAD PRIORITY 5 AUTO MERGE
CREATE COLUMN TABLE EMP ("EMP_ID" NVARCHAR(400) NOT NULL ,
"DEPT_ID" NVARCHAR(4000),
"START_DATE" LONGDATE CS_LONGDATE NOT NULL ,
"END_DATE" LONGDATE CS_LONGDATE NOT NULL ,
PRIMARY KEY INVERTED VALUE ("EMP_ID",
"START_DATE")) UNLOAD PRIORITY 5 AUTO MERGE
insert into DEPT values('1','15.01.2017 22:58:09.0','31.12.9999 00:00:00.0')
insert into EMP values('123','1','01.01.2017 22:58:09.0','31.12.9999 00:00:00.0')
insert into EMP values('456','1','01.01.2017 22:58:09.0','31.12.9999 00:00:00.0')
insert into EMP values('789','1','01.01.2017 22:58:09.0','25.01.2017 10:00:00.0')
insert into EMP values('789','2','25.01.2017 10:00:00.0','31.12.9999 00:00:00.0')
insert into EMP values('666','1','23.01.2017 22:58:09.0','31.12.9999 00:00:00.0')
请看下面写在SQL服务器上。
DECLARE @Department TABLE
(
[Dept_ID] [int] NOT NULL,
[Start_Date] [date] NOT NULL,
[End_Date] [date] NOT NULL
)
DECLARE @Employee TABLE
(
[Emp_ID] [int] NOT NULL,
[Dept_ID] [int] NOT NULL,
[Start_Date] [date] NOT NULL,
[End_Date] [date] NOT NULL
)
DECLARE @Ranges TABLE
(
[Range_ID] [int] NOT NULL,
[Dept_ID] [int] NOT NULL,
[Start_Date] [date] NOT NULL,
[End_Date] [date] NOT NULL
)
INSERT INTO @Department (Dept_ID, Start_Date, End_Date)
VALUES (1, '15-Jan-2017', '31-Dec-9999')
INSERT INTO @Employee (Emp_ID, Dept_ID, Start_Date, End_Date)
VALUES (123,1,'1-Jan-2017','31-Dec-9999'),
(456,1,'1-Jan-2017','31-Dec-9999'),
(789,1,'1-Jan-2017','25-Jan-2017'),
(789,2,'26-Jan-2017','31-Dec-9999'),
(666,1,'23-Jan-2017','31-Dec-9999')
INSERT INTO @Ranges (Range_ID,Dept_ID, Start_Date, End_Date)
VALUES (1,1,'20170115','20170123'),
(2,1,'20170123','20170125'),
(3,1,'20170125','99991231')
SELECT E.Dept_ID, COUNT(*) As Emp_Count, R.Start_Date, R.End_Date
FROM @Employee E
INNER JOIN @Department D ON D.Dept_ID = E.Dept_ID
INNER JOIN @Ranges R ON R.Dept_ID = D.Dept_ID
WHERE 1=1
AND E.Start_Date <= R.Start_Date
AND E.End_Date >= R.End_Date
AND D.Start_Date <= R.Start_Date
AND D.End_Date >= R.End_Date
GROUP BY E.Dept_ID, R.Start_Date, R.End_Date
不幸的是,我无法在 SAP HANA 上对其进行测试,因此,我 post SQL 服务器的解决方案。我尝试使用我发现对 SAP HANA 也有效的语法。
with dates as
(
select *, row_number() over (partition by t.dept_id order by dat) rn
from
(
select dept_id, start_date dat from emp where emp.start_date > (select start_date from dept where dept_id = emp.dept_id)
union all
select dept_id, end_date dat from emp where emp.end_date < (select end_date from dept where dept_id = emp.dept_id)
union all
select dept_id, start_date dat from dept
union all
select dept_id, end_date dat from dept
) t
)
select e.dept_id, count(*), t.startd, t.endd
from emp e
join
(
select d1.dept_id, d1.dat startd, d2.dat endd
from dates d1
join dates d2 on d1.dept_id = d2.dept_id and d1.rn + 1 = d2.rn
) t on t.dept_id = e.dept_id and e.start_date < t.endd and e.end_date > t.startd
group by e.dept_id, t.startd, t.endd
结果
dept_id count startd endd
1 3 15/01/2017 23/01/2017
1 4 23/01/2017 25/01/2017
1 3 25/01/2017 31/12/9999
我昨天没能完成这个,但由于我已经做了一些准备,这里对
- 使用联接在员工行中包含部门 from/to 日期
- 使用
lead()
函数而不是row_number()
(避免自连接) - 部门table 中有部门2行
CREATE TABLE Department
([Dept_ID] int, [Start_Date] datetime, [End_Date] datetime)
;
INSERT INTO Department
([Dept_ID], [Start_Date], [End_Date])
VALUES
(1, '2017-01-15 00:00:00', '9999-12-31 00:00:00'),
(2, '2017-01-15 00:00:00', '9999-12-31 00:00:00')
;
CREATE TABLE Employee
([Emp_ID] int, [Dept_ID] int, [Start_Date] datetime, [End_Date] datetime)
;
INSERT INTO Employee
([Emp_ID], [Dept_ID], [Start_Date], [End_Date])
VALUES
(123, 1, '2017-01-01 00:00:00', '9999-12-31 00:00:00'),
(456, 1, '2017-01-01 00:00:00', '9999-12-31 00:00:00'),
(789, 1, '2017-01-01 00:00:00', '2017-01-25 00:00:00'),
(789, 2, '2017-01-26 00:00:00', '9999-12-31 00:00:00'),
(666, 1, '2017-01-23 00:00:00', '9999-12-31 00:00:00')
;
查询 1:
WITH
e AS (
SELECT e.*, d.start_date stdt, d.end_date endt
FROM Employee e
INNER JOIN Department d ON e.dept_id = d.dept_id
),
range AS (
SELECT
dept_id
, start_date AS from_date
, LEAD(start_date) OVER (PARTITION BY dept_id
ORDER BY start_date) to_date
FROM (
SELECT dept_id , start_date FROM e WHERE e.start_date > e.stdt
UNION ALL
SELECT dept_id , end_date FROM e WHERE e.end_date < e.endt
UNION ALL
SELECT dept_id , start_date FROM Department
UNION ALL
SELECT dept_id , end_date FROM Department
) r
)
SELECT
e.dept_id
, r.from_date
, r.to_date
, COUNT(*) num_employees
FROM Employee e
INNER JOIN range r ON e.dept_id = r.dept_id
AND e.start_date < r.to_date
AND e.end_date > r.from_date
AND r.to_date IS NOT NULL
GROUP BY
e.dept_id
, r.from_date
, r.to_date
| dept_id | from_date | to_date | num_employees |
|---------|----------------------|----------------------|---------------|
| 1 | 2017-01-15T00:00:00Z | 2017-01-23T00:00:00Z | 3 |
| 1 | 2017-01-23T00:00:00Z | 2017-01-25T00:00:00Z | 4 |
| 1 | 2017-01-25T00:00:00Z | 9999-12-31T00:00:00Z | 3 |
| 2 | 2017-01-26T00:00:00Z | 9999-12-31T00:00:00Z | 1 |