对 sql 中的日期范围进行计数和求和

count and sum over date ranges in sql

当员工更换 his/her 部门时,我遇到了一个特定的场景。

具体细节如下: 我在 SAP HANA 数据库中有 2 个表 - 部门和员工。

Dept_ID|Start_Date |End_Date

1      |15-Jan-2017|31-Dec-9999

Emp_ID|Dept_ID|Start_Date  |End_Date

123   |1      |1-Jan-2017  |31-Dec-9999

456   |1      |1-Jan-2017  |31-Dec-9999

789   |1      |1-Jan-2017  |25-Jan-2017

789   |2      |26-Jan-2017 |31-Dec-9999

666   |1      |23-Jan-2017 |31-Dec-9999

输出中我需要的是一段时间内每个部门的员工数量 -

Dept_ID|Emp_Count|Start_Date |End_Date

1      |3        |15-Jan-2017|23-Jan-2017

1      |4        |23-Jan-2017|25-Jan-2017

1      |3        |25-Jan-2017|31-Dec-9999

我尝试使用 CTE 求和(分区),但我无法获得所需的结果。

请帮我解决这个问题。

编辑: 添加创建定义和插入语句

CREATE COLUMN TABLE DEPT ("DEPT_ID" NVARCHAR(400) NOT NULL ,
 "START_DATE" LONGDATE CS_LONGDATE NOT NULL ,
 "END_DATE" LONGDATE CS_LONGDATE NOT NULL ,
 PRIMARY KEY INVERTED VALUE ("DEPT_ID",
 "START_DATE")) UNLOAD PRIORITY 5 AUTO MERGE 

CREATE COLUMN TABLE EMP ("EMP_ID" NVARCHAR(400) NOT NULL ,
 "DEPT_ID" NVARCHAR(4000),
 "START_DATE" LONGDATE CS_LONGDATE NOT NULL ,
 "END_DATE" LONGDATE CS_LONGDATE NOT NULL ,
 PRIMARY KEY INVERTED VALUE ("EMP_ID",
 "START_DATE")) UNLOAD PRIORITY 5 AUTO MERGE 

insert into DEPT values('1','15.01.2017 22:58:09.0','31.12.9999 00:00:00.0')


insert into EMP values('123','1','01.01.2017 22:58:09.0','31.12.9999 00:00:00.0')
insert into EMP values('456','1','01.01.2017 22:58:09.0','31.12.9999 00:00:00.0')
insert into EMP values('789','1','01.01.2017 22:58:09.0','25.01.2017 10:00:00.0')
insert into EMP values('789','2','25.01.2017 10:00:00.0','31.12.9999 00:00:00.0')
insert into EMP values('666','1','23.01.2017 22:58:09.0','31.12.9999 00:00:00.0')    

请看下面写在SQL服务器上。

DECLARE @Department TABLE 
( 
    [Dept_ID] [int] NOT NULL,   
    [Start_Date] [date] NOT NULL,
    [End_Date] [date] NOT NULL
)

DECLARE @Employee TABLE 
(
    [Emp_ID]  [int] NOT NULL,
    [Dept_ID] [int] NOT NULL,   
    [Start_Date] [date] NOT NULL,
    [End_Date] [date] NOT NULL
)

DECLARE @Ranges TABLE 
(
    [Range_ID]  [int] NOT NULL,    
    [Dept_ID]  [int] NOT NULL,    
    [Start_Date] [date] NOT NULL,
    [End_Date] [date] NOT NULL
)

INSERT INTO @Department (Dept_ID, Start_Date, End_Date)
VALUES (1, '15-Jan-2017', '31-Dec-9999')

INSERT INTO @Employee (Emp_ID, Dept_ID, Start_Date, End_Date)
VALUES (123,1,'1-Jan-2017','31-Dec-9999'),
        (456,1,'1-Jan-2017','31-Dec-9999'),
        (789,1,'1-Jan-2017','25-Jan-2017'),
        (789,2,'26-Jan-2017','31-Dec-9999'),
        (666,1,'23-Jan-2017','31-Dec-9999')

INSERT INTO @Ranges (Range_ID,Dept_ID, Start_Date, End_Date)
VALUES (1,1,'20170115','20170123'),
        (2,1,'20170123','20170125'),
        (3,1,'20170125','99991231')

SELECT E.Dept_ID, COUNT(*) As Emp_Count, R.Start_Date, R.End_Date
FROM @Employee E
    INNER JOIN   @Department D ON D.Dept_ID = E.Dept_ID
    INNER JOIN   @Ranges R ON R.Dept_ID = D.Dept_ID
WHERE 1=1
    AND E.Start_Date <= R.Start_Date
    AND E.End_Date >= R.End_Date
    AND D.Start_Date <= R.Start_Date
    AND D.End_Date >= R.End_Date    
GROUP BY E.Dept_ID, R.Start_Date, R.End_Date

不幸的是,我无法在 SAP HANA 上对其进行测试,因此,我 post SQL 服务器的解决方案。我尝试使用我发现对 SAP HANA 也有效的语法。

with dates as
(
  select *, row_number() over (partition by t.dept_id order by dat) rn
  from
  (
    select dept_id, start_date dat from emp where emp.start_date > (select start_date from dept where dept_id = emp.dept_id)
    union all
    select dept_id, end_date dat from emp where emp.end_date < (select end_date from dept where dept_id = emp.dept_id)
    union all
    select dept_id, start_date  dat from dept
    union all
    select dept_id, end_date dat from dept
  ) t
)
select e.dept_id, count(*), t.startd, t.endd
from emp e
join
(
  select d1.dept_id, d1.dat startd, d2.dat endd
  from dates d1
  join dates d2 on d1.dept_id = d2.dept_id and d1.rn + 1 = d2.rn
) t on t.dept_id = e.dept_id and e.start_date < t.endd and e.end_date > t.startd
group by e.dept_id, t.startd, t.endd

demo

结果

dept_id count   startd     endd
1       3       15/01/2017 23/01/2017
1       4       23/01/2017 25/01/2017
1       3       25/01/2017 31/12/9999

我昨天没能完成这个,但由于我已经做了一些准备,这里对 做了一些小改动。差异是:

  • 使用联接在员工行中包含部门 from/to 日期
  • 使用 lead() 函数而不是 row_number()(避免自连接)
  • 部门table
  • 中有部门2行

Demo at SQL Fiddle

CREATE TABLE Department 
    ([Dept_ID] int, [Start_Date] datetime, [End_Date] datetime)
;

INSERT INTO Department 
    ([Dept_ID], [Start_Date], [End_Date])
VALUES
    (1, '2017-01-15 00:00:00', '9999-12-31 00:00:00'),
    (2, '2017-01-15 00:00:00', '9999-12-31 00:00:00')
;


CREATE TABLE Employee 
    ([Emp_ID] int, [Dept_ID] int, [Start_Date] datetime, [End_Date] datetime)
;

INSERT INTO Employee 
    ([Emp_ID], [Dept_ID], [Start_Date], [End_Date])
VALUES
    (123, 1, '2017-01-01 00:00:00', '9999-12-31 00:00:00'),
    (456, 1, '2017-01-01 00:00:00', '9999-12-31 00:00:00'),
    (789, 1, '2017-01-01 00:00:00', '2017-01-25 00:00:00'),
    (789, 2, '2017-01-26 00:00:00', '9999-12-31 00:00:00'),
    (666, 1, '2017-01-23 00:00:00', '9999-12-31 00:00:00')
;

查询 1:

WITH
      e AS (
                  SELECT e.*, d.start_date stdt, d.end_date endt
                  FROM Employee e
                  INNER JOIN Department d ON e.dept_id = d.dept_id
            ),
      range AS (
                  SELECT
                        dept_id 
                      , start_date AS from_date 
                      , LEAD(start_date) OVER (PARTITION BY dept_id
                                               ORDER BY start_date) to_date
                  FROM (
                        SELECT dept_id , start_date FROM e WHERE e.start_date > e.stdt
                        UNION ALL
                        SELECT dept_id , end_date   FROM e WHERE e.end_date < e.endt
                        UNION ALL
                        SELECT dept_id , start_date FROM Department
                        UNION ALL
                        SELECT dept_id , end_date   FROM Department
                  ) r
            )
SELECT
      e.dept_id
    , r.from_date
    , r.to_date
    , COUNT(*) num_employees
FROM Employee e
INNER JOIN range r ON e.dept_id = r.dept_id
      AND e.start_date < r.to_date
      AND e.end_date > r.from_date
      AND r.to_date IS NOT NULL
GROUP BY
      e.dept_id
    , r.from_date
    , r.to_date

Results:

| dept_id |            from_date |              to_date | num_employees |
|---------|----------------------|----------------------|---------------|
|       1 | 2017-01-15T00:00:00Z | 2017-01-23T00:00:00Z |             3 |
|       1 | 2017-01-23T00:00:00Z | 2017-01-25T00:00:00Z |             4 |
|       1 | 2017-01-25T00:00:00Z | 9999-12-31T00:00:00Z |             3 |
|       2 | 2017-01-26T00:00:00Z | 9999-12-31T00:00:00Z |             1 |