在不使用过程的情况下以 'range' 格式序列化日期
Serialize dates in a 'range' format without using a procedure
我需要在列中查找日期范围并以简洁的格式将它们序列化(start
- end
用于范围或 date
用于单个日期范围) .
我的 CTE (readings
) returns 数据集类似于:
ID VALUE DATE
1234567 A 2012-05-09
1234567 A 2012-05-10
1234567 A 2012-05-11
1234567 A 2012-05-16
1234567 A 2012-05-17
1234567 A 2012-05-20
1234567 B 2012-05-11
1234567 B 2012-05-12
1234567 B 2012-05-13
1234567 B 2012-05-14
我已经能够得到:
ID VALUE TOTAL_DAYS DATES
1234567 A 6 2012-05-09; 2012-05-10; 2012-05-11; 2012-05-16; 2012-05-17; 2012-05-20
1234567 B 4 2012-05-11; 2012-05-12; 2012-05-13; 2012-05-14
使用:
readings AS (
...
)
,
reading_aggr AS (
SELECT ID, [VALUE]
,count(distinct date) TOTAL_DAYS
,STUFF((
SELECT '; ' + cast(date as varchar)
FROM readings r0
WHERE id=r0.id
AND value=r0.value
ORDER BY date
FOR XML PATH(''),TYPE).value('(./text())[1]','VARCHAR(MAX)'
),1,2,'') AS DATES
FROM readings
GROUP BY id, [value]
)
SELECT * FROM readings_aggr
我想这样格式化:
ID VALUE TOTAL_DAYS DATES
1234567 A 6 2012-05-09 - 2012-05-11; 2012-05-16 - 2012-05-17; 2012-05-20
1234567 B 4 2012-05-11 - 2012-05-14
如果不使用程序方法,这可能吗?
您可能可以使用 CLR 聚合来执行此操作。
这是一个来自 MSDN 的示例,它将您的数据连接在一起。只需将逗号更改为分号,您就可以使用当前格式进行更清晰的查询。
https://msdn.microsoft.com/en-us/library/ms165055%28v=vs.90%29.aspx
一旦到位,您可以调整累积 and/or 终止方法以尽可能查看数据和输出范围。您可能希望将值累积到类似 SortedList 而不是 StringBuilder 的对象中,然后在 Terminate 方法中进行范围分析。
您可以使用此查询:
SELECT ID, VALUE, MIN([DATE]) AS startDate, MAX([DATE]) AS endDate
FROM (
SELECT ID, VALUE, DATE,
DATEDIFF(Day, '1900-01-01' , [DATE])- ROW_NUMBER() OVER( PARTITION BY ID, VALUE ORDER BY [DATE] ) AS DateGroup
FROM readings ) rGroups
GROUP BY ID, VALUE, DateGroup
得到一个table包含你数据的所有开始-结束间隔的表达式:
ID VALUE startDate endDate
--------------------------------------
1234567 A 2012-05-09 2012-05-11
1234567 A 2012-05-16 2012-05-17
1234567 A 2012-05-20 2012-05-20
1234567 B 2012-05-11 2012-05-14
然后在 reading_aggr
:
中使用上面的查询
;WITH start_end_readings AS (
SELECT ID, VALUE, MIN([DATE]) AS startDate, MAX([DATE]) AS endDate
FROM (
SELECT ID, VALUE, DATE, DATEDIFF(Day, '1900-01-01' , [DATE])- ROW_NUMBER() OVER( PARTITION BY ID, VALUE ORDER BY [DATE] ) AS DateGroup
FROM readings ) rGroups
GROUP BY ID, VALUE, DateGroup
), readings_aggr AS (
SELECT ID, [VALUE]
,count(distinct date) TOTAL_DAYS
,STUFF((
SELECT '; ' + cast(startDate as varchar) +
CASE WHEN startDate <> endDate THEN ' - ' + cast(endDate as varchar)
ELSE ''
END
FROM start_end_readings r0
WHERE r1.id=r0.id AND r1.value=r0.value
ORDER BY startDate
FOR XML PATH(''),TYPE).value('(./text())[1]','VARCHAR(MAX)'
),1,2,'') AS DATES
FROM readings AS r1
GROUP BY id, [value]
)
SELECT * FROM readings_aggr
获得想要的结果:
ID VALUE TOTAL_DAYS DATES
===========================================================================
1234567 A 6 2012-05-09 - 2012-05-11; 2012-05-16 - 2012-05-17; 2012-05-20
1234567 B 4 2012-05-11 - 2012-05-14
你可以这样做:
DECLARE @t TABLE ( ID INT, V CHAR(1), D DATE )
INSERT INTO @t
VALUES ( 1234567, 'A', '2012-05-09' ),
( 1234567, 'A', '2012-05-10' ),
( 1234567, 'A', '2012-05-11' ),
( 1234567, 'A', '2012-05-16' ),
( 1234567, 'A', '2012-05-17' ),
( 1234567, 'A', '2012-05-20' ),
( 1234567, 'B', '2012-05-11' ),
( 1234567, 'B', '2012-05-12' ),
( 1234567, 'B', '2012-05-13' ),
( 1234567, 'B', '2012-05-14' );
WITH cte1
AS ( SELECT ID ,
V ,
CASE WHEN MIN(D) <> MAX(D)
THEN CONVERT(NVARCHAR(MAX), MIN(D), 121) + ' - '
+ CONVERT(NVARCHAR(MAX), MAX(D), 121)
ELSE CONVERT(NVARCHAR(MAX), MIN(D), 121)
END AS D ,
COUNT(*) AS cn
FROM ( SELECT ID ,
V ,
D ,
DATEADD(dd,
-ROW_NUMBER() OVER ( PARTITION BY V ORDER BY D ),
D) AS rn
FROM @t
) a
GROUP BY ID ,
V ,
rn
),-- SELECT * FROM cte1,
cte2
AS ( SELECT ID ,
V ,
SUM(cn) TOTAL_DAYS ,
STUFF((SELECT '; ' + D
FROM cte1 r0
WHERE cte1.id = r0.id
AND cte1.V = r0.V
FOR XML PATH('') ,
TYPE).value('(./text())[1]', 'VARCHAR(MAX)'),
1, 2, '') AS DATES
FROM cte1
GROUP BY id ,
V
)
SELECT *
FROM cte2
输出:
ID V TOTAL_DAYS DATES
1234567 A 6 2012-05-09 - 2012-05-11; 2012-05-16 - 2012-05-17; 2012-05-20
1234567 B 4 2012-05-11 - 2012-05-14
这个想法是先获得岛屿(https://www.simple-talk.com/sql/t-sql-programming/the-sql-of-gaps-and-islands-in-sequences/)然后应用你的东西。我知道@Betsos 超过了我,但这有点不同。但是思路是一样的。
我需要在列中查找日期范围并以简洁的格式将它们序列化(start
- end
用于范围或 date
用于单个日期范围) .
我的 CTE (readings
) returns 数据集类似于:
ID VALUE DATE
1234567 A 2012-05-09
1234567 A 2012-05-10
1234567 A 2012-05-11
1234567 A 2012-05-16
1234567 A 2012-05-17
1234567 A 2012-05-20
1234567 B 2012-05-11
1234567 B 2012-05-12
1234567 B 2012-05-13
1234567 B 2012-05-14
我已经能够得到:
ID VALUE TOTAL_DAYS DATES
1234567 A 6 2012-05-09; 2012-05-10; 2012-05-11; 2012-05-16; 2012-05-17; 2012-05-20
1234567 B 4 2012-05-11; 2012-05-12; 2012-05-13; 2012-05-14
使用:
readings AS (
...
)
,
reading_aggr AS (
SELECT ID, [VALUE]
,count(distinct date) TOTAL_DAYS
,STUFF((
SELECT '; ' + cast(date as varchar)
FROM readings r0
WHERE id=r0.id
AND value=r0.value
ORDER BY date
FOR XML PATH(''),TYPE).value('(./text())[1]','VARCHAR(MAX)'
),1,2,'') AS DATES
FROM readings
GROUP BY id, [value]
)
SELECT * FROM readings_aggr
我想这样格式化:
ID VALUE TOTAL_DAYS DATES
1234567 A 6 2012-05-09 - 2012-05-11; 2012-05-16 - 2012-05-17; 2012-05-20
1234567 B 4 2012-05-11 - 2012-05-14
如果不使用程序方法,这可能吗?
您可能可以使用 CLR 聚合来执行此操作。
这是一个来自 MSDN 的示例,它将您的数据连接在一起。只需将逗号更改为分号,您就可以使用当前格式进行更清晰的查询。
https://msdn.microsoft.com/en-us/library/ms165055%28v=vs.90%29.aspx
一旦到位,您可以调整累积 and/or 终止方法以尽可能查看数据和输出范围。您可能希望将值累积到类似 SortedList 而不是 StringBuilder 的对象中,然后在 Terminate 方法中进行范围分析。
您可以使用此查询:
SELECT ID, VALUE, MIN([DATE]) AS startDate, MAX([DATE]) AS endDate
FROM (
SELECT ID, VALUE, DATE,
DATEDIFF(Day, '1900-01-01' , [DATE])- ROW_NUMBER() OVER( PARTITION BY ID, VALUE ORDER BY [DATE] ) AS DateGroup
FROM readings ) rGroups
GROUP BY ID, VALUE, DateGroup
得到一个table包含你数据的所有开始-结束间隔的表达式:
ID VALUE startDate endDate
--------------------------------------
1234567 A 2012-05-09 2012-05-11
1234567 A 2012-05-16 2012-05-17
1234567 A 2012-05-20 2012-05-20
1234567 B 2012-05-11 2012-05-14
然后在 reading_aggr
:
;WITH start_end_readings AS (
SELECT ID, VALUE, MIN([DATE]) AS startDate, MAX([DATE]) AS endDate
FROM (
SELECT ID, VALUE, DATE, DATEDIFF(Day, '1900-01-01' , [DATE])- ROW_NUMBER() OVER( PARTITION BY ID, VALUE ORDER BY [DATE] ) AS DateGroup
FROM readings ) rGroups
GROUP BY ID, VALUE, DateGroup
), readings_aggr AS (
SELECT ID, [VALUE]
,count(distinct date) TOTAL_DAYS
,STUFF((
SELECT '; ' + cast(startDate as varchar) +
CASE WHEN startDate <> endDate THEN ' - ' + cast(endDate as varchar)
ELSE ''
END
FROM start_end_readings r0
WHERE r1.id=r0.id AND r1.value=r0.value
ORDER BY startDate
FOR XML PATH(''),TYPE).value('(./text())[1]','VARCHAR(MAX)'
),1,2,'') AS DATES
FROM readings AS r1
GROUP BY id, [value]
)
SELECT * FROM readings_aggr
获得想要的结果:
ID VALUE TOTAL_DAYS DATES
===========================================================================
1234567 A 6 2012-05-09 - 2012-05-11; 2012-05-16 - 2012-05-17; 2012-05-20
1234567 B 4 2012-05-11 - 2012-05-14
你可以这样做:
DECLARE @t TABLE ( ID INT, V CHAR(1), D DATE )
INSERT INTO @t
VALUES ( 1234567, 'A', '2012-05-09' ),
( 1234567, 'A', '2012-05-10' ),
( 1234567, 'A', '2012-05-11' ),
( 1234567, 'A', '2012-05-16' ),
( 1234567, 'A', '2012-05-17' ),
( 1234567, 'A', '2012-05-20' ),
( 1234567, 'B', '2012-05-11' ),
( 1234567, 'B', '2012-05-12' ),
( 1234567, 'B', '2012-05-13' ),
( 1234567, 'B', '2012-05-14' );
WITH cte1
AS ( SELECT ID ,
V ,
CASE WHEN MIN(D) <> MAX(D)
THEN CONVERT(NVARCHAR(MAX), MIN(D), 121) + ' - '
+ CONVERT(NVARCHAR(MAX), MAX(D), 121)
ELSE CONVERT(NVARCHAR(MAX), MIN(D), 121)
END AS D ,
COUNT(*) AS cn
FROM ( SELECT ID ,
V ,
D ,
DATEADD(dd,
-ROW_NUMBER() OVER ( PARTITION BY V ORDER BY D ),
D) AS rn
FROM @t
) a
GROUP BY ID ,
V ,
rn
),-- SELECT * FROM cte1,
cte2
AS ( SELECT ID ,
V ,
SUM(cn) TOTAL_DAYS ,
STUFF((SELECT '; ' + D
FROM cte1 r0
WHERE cte1.id = r0.id
AND cte1.V = r0.V
FOR XML PATH('') ,
TYPE).value('(./text())[1]', 'VARCHAR(MAX)'),
1, 2, '') AS DATES
FROM cte1
GROUP BY id ,
V
)
SELECT *
FROM cte2
输出:
ID V TOTAL_DAYS DATES
1234567 A 6 2012-05-09 - 2012-05-11; 2012-05-16 - 2012-05-17; 2012-05-20
1234567 B 4 2012-05-11 - 2012-05-14
这个想法是先获得岛屿(https://www.simple-talk.com/sql/t-sql-programming/the-sql-of-gaps-and-islands-in-sequences/)然后应用你的东西。我知道@Betsos 超过了我,但这有点不同。但是思路是一样的。