SQL 显示 SCD 类型 2 维度的时间趋势的查询 table
SQL query to show time trend of a SCD Type 2 dimension table
Table 如下:
pk,EmployeeNo,Building,ValidFrom,ValidTo.
1, 1, a, 2000-01-01, 2008-06-01
2, 1, b, 2008-06-01, 2010-06-01
3, 1, c, 2010-06-01, 2011-08-01
4, 2, a, 2000-01-01, 2008-06-01
5, 2, b, 2008-06-01, 2010-06-01
6, 2, c, 2010-06-01, 2011-08-01
我是一个相对新手 SQL 的开发人员,我使用的是 MS SQL 服务器。我想设计一个 SQL 查询,它可以采用上面的 table - 使用类型 2 构建缓慢变化的维度(具有有效日期和有效日期) - 并生成 table随着时间的推移,每日趋势客户计数。
输出如下:
Date, Employee Count
1/1/2000, 2
1/2/2000, 2
您可以使用聚合和累积总和 -- 在对数据进行逆透视后:
with d as (
select validfrom as dte, 1 as inc
from t
union all
select validto, -1
from t
)
select dte, sum(sum(inc)) over (order by dte)
from d
group by dte
order by dte;
注意:这假定不包括 validto
日期。如果你想把它包括在内,那就加“1”。
编辑:
如果您只需要这两个日期的结果,则使用:
select v.dte,
(select count(*)
from t
where t.validfrom <= v.dte and t.validto >= t.validto
) as cnt
from (values ('2020-01-01'), ('2020-01-02')) v(dte);
以下 SQL 似乎有效。我假设员工在 ValidTo 日期之前有效,但不是在 ValidTo 日期之前有效,以避免在 ValidFrom 和 ValidTo 重叠时重复计算这些日期;如果这个假设不正确,您需要修改 INNER JOIN 逻辑。
--Get the min and max dates in the employee table
DECLARE @todate date, @fromdate date
SELECT @fromdate= min(ValidFrom) from [POC].[dbo].[EmployeeSCD2];
SELECT @todate= max(validto) from [POC].[dbo].[EmployeeSCD2];
-- Generate a list of all the dates between the two dates
WITH calendar (FromDate) AS (
SELECT @fromdate AS FromDate
UNION ALL
SELECT DATEADD(day, 1, FromDate)
FROM Calendar
WHERE FromDate < @todate
)
-- Join the list of dates to the employee table - gives a record per date per employee active on that date
SELECT CAL.FromDate, COUNT(EMP.EMPLOYEENO) 'Employee Count'
FROM CALENDAR CAL
INNER JOIN [POC].[dbo].[EmployeeSCD2] EMP ON CAL.FromDate >= EMP.ValidFrom AND CAL.FromDate < EMP.ValidTo
group by cal.FromDate
OPTION (MAXRECURSION 0) -- Without this parameter the recursion stops after 100 loops
;
一个简单的方法是建立一个日期 table,然后计算在该特定日期有多少员工(或客户)。
;with t as ( -- this is your table/Dimension
select pk,EmployeeNo,Building,ValidFrom,ValidTo
from (values (1, 1, 'a', '2000-01-01', '2008-06-01'),
(2, 1, 'b', '2008-06-01', '2010-06-01'),
(3, 1, 'c', '2010-06-01', '2011-08-01'),
(4, 2, 'a', '2000-01-01', '2008-06-01'),
(5, 2, 'b', '2008-06-01', '2010-06-01'),
(6, 2, 'c', '2010-06-01', '2011-08-01')
)t (pk,EmployeeNo,Building,ValidFrom,ValidTo)
)
, dates as ( -- this is a recursive query, building a date table from 2000-01-01 up to today
select convert(date, '2000-01-01') as d
union all
select DATEADD(d,1,d)
from dates
where d < getdate()-1
)
select d.d, count(1) as [Employe count]
from dates d
inner join t on d.d between t.ValidFrom and t.ValidTo -- will join only valid employee
group by d.d
option (maxrecursion 0) -- since our dates table recurse over more than 100 times and the dataset is fairly small, you need this option
Table 如下:
pk,EmployeeNo,Building,ValidFrom,ValidTo.
1, 1, a, 2000-01-01, 2008-06-01
2, 1, b, 2008-06-01, 2010-06-01
3, 1, c, 2010-06-01, 2011-08-01
4, 2, a, 2000-01-01, 2008-06-01
5, 2, b, 2008-06-01, 2010-06-01
6, 2, c, 2010-06-01, 2011-08-01
我是一个相对新手 SQL 的开发人员,我使用的是 MS SQL 服务器。我想设计一个 SQL 查询,它可以采用上面的 table - 使用类型 2 构建缓慢变化的维度(具有有效日期和有效日期) - 并生成 table随着时间的推移,每日趋势客户计数。
输出如下:
Date, Employee Count
1/1/2000, 2
1/2/2000, 2
您可以使用聚合和累积总和 -- 在对数据进行逆透视后:
with d as (
select validfrom as dte, 1 as inc
from t
union all
select validto, -1
from t
)
select dte, sum(sum(inc)) over (order by dte)
from d
group by dte
order by dte;
注意:这假定不包括 validto
日期。如果你想把它包括在内,那就加“1”。
编辑:
如果您只需要这两个日期的结果,则使用:
select v.dte,
(select count(*)
from t
where t.validfrom <= v.dte and t.validto >= t.validto
) as cnt
from (values ('2020-01-01'), ('2020-01-02')) v(dte);
以下 SQL 似乎有效。我假设员工在 ValidTo 日期之前有效,但不是在 ValidTo 日期之前有效,以避免在 ValidFrom 和 ValidTo 重叠时重复计算这些日期;如果这个假设不正确,您需要修改 INNER JOIN 逻辑。
--Get the min and max dates in the employee table
DECLARE @todate date, @fromdate date
SELECT @fromdate= min(ValidFrom) from [POC].[dbo].[EmployeeSCD2];
SELECT @todate= max(validto) from [POC].[dbo].[EmployeeSCD2];
-- Generate a list of all the dates between the two dates
WITH calendar (FromDate) AS (
SELECT @fromdate AS FromDate
UNION ALL
SELECT DATEADD(day, 1, FromDate)
FROM Calendar
WHERE FromDate < @todate
)
-- Join the list of dates to the employee table - gives a record per date per employee active on that date
SELECT CAL.FromDate, COUNT(EMP.EMPLOYEENO) 'Employee Count'
FROM CALENDAR CAL
INNER JOIN [POC].[dbo].[EmployeeSCD2] EMP ON CAL.FromDate >= EMP.ValidFrom AND CAL.FromDate < EMP.ValidTo
group by cal.FromDate
OPTION (MAXRECURSION 0) -- Without this parameter the recursion stops after 100 loops
;
一个简单的方法是建立一个日期 table,然后计算在该特定日期有多少员工(或客户)。
;with t as ( -- this is your table/Dimension
select pk,EmployeeNo,Building,ValidFrom,ValidTo
from (values (1, 1, 'a', '2000-01-01', '2008-06-01'),
(2, 1, 'b', '2008-06-01', '2010-06-01'),
(3, 1, 'c', '2010-06-01', '2011-08-01'),
(4, 2, 'a', '2000-01-01', '2008-06-01'),
(5, 2, 'b', '2008-06-01', '2010-06-01'),
(6, 2, 'c', '2010-06-01', '2011-08-01')
)t (pk,EmployeeNo,Building,ValidFrom,ValidTo)
)
, dates as ( -- this is a recursive query, building a date table from 2000-01-01 up to today
select convert(date, '2000-01-01') as d
union all
select DATEADD(d,1,d)
from dates
where d < getdate()-1
)
select d.d, count(1) as [Employe count]
from dates d
inner join t on d.d between t.ValidFrom and t.ValidTo -- will join only valid employee
group by d.d
option (maxrecursion 0) -- since our dates table recurse over more than 100 times and the dataset is fairly small, you need this option