按时间间隔划分
Partition by time-interval
我正在寻找按日期时间值在分区上 运行 window 函数的最佳方法。但是,我不想按确切时间进行分区,而是希望按日期时间进行分区,例如彼此相差 15 分钟以内。
这是我的 table 的一小部分。
CREATE TABLE my_table(ID VARCHAR(5), in_time DATETIME)
INSERT INTO my_table (ID, in_time) VALUES
('4844', '2017-04-06 10:15:00.000'),
('5221', '2017-11-24 11:18:00.000'),
('5221', '2017-11-24 11:18:00.000'),
('5221', '2017-11-25 14:23:00.000'),
('8486', '2017-10-10 15:30:00.000'),
('8486', '2017-10-10 15:32:00.000'),
('8486', '2017-10-10 15:46:00.000'), -- new row after updating question
('8486', '2017-10-10 16:00:00.000') -- new row after updating question
这是我现在使用的查询:
SELECT *,
ROW_NUMBER() OVER(PARTITION BY ID, in_time ORDER BY ID, in_time) AS filter_row
FROM my_table
正如预期的那样,我得到了这个:
ID in_time filter_row
4844 2017-04-06 10:15:00.000 1
5221 2017-11-24 11:18:00.000 1
5221 2017-11-24 11:18:00.000 2
5221 2017-11-25 14:23:00.000 1
8486 2017-10-10 15:30:00.000 1
8486 2017-10-10 15:32:00.000 1
8486 2017-10-10 15:46:00.000 1
8486 2017-10-10 16:00:00.000 1
我想实现的是:
ID in_time filter_row
4844 2017-04-06 10:15:00.000 1
5221 2017-11-24 11:18:00.000 1
5221 2017-11-24 11:18:00.000 2
5221 2017-11-25 14:23:00.000 1
8486 2017-10-10 15:30:00.000 1
8486 2017-10-10 15:32:00.000 2 -- < notice the 2 here
8486 2017-10-10 15:46:00.000 3 -- < notice the 3 here
8486 2017-10-10 16:00:00.000 4 -- < notice the 4 here
正如您在上面看到的那样,带有 ID = 8486
的行应该一起划分,因为它们各自的 in_time
和 in_time
之间只有 2、14 和 14 分钟排在上面。如何有效地做到这一点?
以下示例通过根据以分钟为单位的指定间隔计算间隔开始时间并按该值进行分区来提供所需的结果。
DECLARE @IntervalMinutes int = 15;
SELECT *,
ROW_NUMBER() OVER(
PARTITION BY ID
, (DATEADD(minute, (DATEDIFF(minute, '', in_time)/@IntervalMinutes)*@IntervalMinutes, '')
)
ORDER BY ID, in_time) AS filter_row
FROM my_table;
编辑:
以上代码计算定长区间。您更新的问题可以通过 ID
识别超出所需间隔的岛屿来解决。下面的方法使用 NOT EXISTS
和 CROSS APPLY
来识别这些岛屿并确定每个岛屿的间隔开始和结束时间。
DECLARE @IntervalMinutes int = 15;
WITH
start_intervals AS (
SELECT DISTINCT
ID
, in_time
FROM dbo.my_table AS a
WHERE NOT EXISTS(
SELECT 1
FROM dbo.my_table AS b
WHERE
b.ID = a.ID
AND b.in_time < a.in_time
AND b.in_time > DATEADD(minute, -@IntervalMinutes, a.in_time)
)
)
, end_intervals AS (
SELECT
ID
, in_time
FROM dbo.my_table AS a
WHERE NOT EXISTS(
SELECT 1
FROM dbo.my_table AS b
WHERE
b.ID = a.ID
AND b.in_time > a.in_time
AND b.in_time < DATEADD(minute, @IntervalMinutes, a.in_time)
)
)
, intervals AS (
SELECT
ID
, start_intervals.in_time AS start_interval
, end_intervals.in_time AS end_interval
FROM start_intervals
CROSS APPLY(
SELECT TOP(1) in_time
FROM end_intervals
WHERE
end_intervals.ID = start_intervals.ID
AND end_intervals.in_time >= start_intervals.in_time
) AS end_intervals
)
SELECT
my_table.ID
, my_table.in_time
, ROW_NUMBER() OVER(PARTITION BY my_table.ID, intervals.start_interval ORDER BY(intervals.start_interval)) AS filter_row
FROM dbo.my_table
JOIN intervals ON my_table.in_time BETWEEN intervals.start_interval AND intervals.end_interval
我正在寻找按日期时间值在分区上 运行 window 函数的最佳方法。但是,我不想按确切时间进行分区,而是希望按日期时间进行分区,例如彼此相差 15 分钟以内。
这是我的 table 的一小部分。
CREATE TABLE my_table(ID VARCHAR(5), in_time DATETIME)
INSERT INTO my_table (ID, in_time) VALUES
('4844', '2017-04-06 10:15:00.000'),
('5221', '2017-11-24 11:18:00.000'),
('5221', '2017-11-24 11:18:00.000'),
('5221', '2017-11-25 14:23:00.000'),
('8486', '2017-10-10 15:30:00.000'),
('8486', '2017-10-10 15:32:00.000'),
('8486', '2017-10-10 15:46:00.000'), -- new row after updating question
('8486', '2017-10-10 16:00:00.000') -- new row after updating question
这是我现在使用的查询:
SELECT *,
ROW_NUMBER() OVER(PARTITION BY ID, in_time ORDER BY ID, in_time) AS filter_row
FROM my_table
正如预期的那样,我得到了这个:
ID in_time filter_row
4844 2017-04-06 10:15:00.000 1
5221 2017-11-24 11:18:00.000 1
5221 2017-11-24 11:18:00.000 2
5221 2017-11-25 14:23:00.000 1
8486 2017-10-10 15:30:00.000 1
8486 2017-10-10 15:32:00.000 1
8486 2017-10-10 15:46:00.000 1
8486 2017-10-10 16:00:00.000 1
我想实现的是:
ID in_time filter_row
4844 2017-04-06 10:15:00.000 1
5221 2017-11-24 11:18:00.000 1
5221 2017-11-24 11:18:00.000 2
5221 2017-11-25 14:23:00.000 1
8486 2017-10-10 15:30:00.000 1
8486 2017-10-10 15:32:00.000 2 -- < notice the 2 here
8486 2017-10-10 15:46:00.000 3 -- < notice the 3 here
8486 2017-10-10 16:00:00.000 4 -- < notice the 4 here
正如您在上面看到的那样,带有 ID = 8486
的行应该一起划分,因为它们各自的 in_time
和 in_time
之间只有 2、14 和 14 分钟排在上面。如何有效地做到这一点?
以下示例通过根据以分钟为单位的指定间隔计算间隔开始时间并按该值进行分区来提供所需的结果。
DECLARE @IntervalMinutes int = 15;
SELECT *,
ROW_NUMBER() OVER(
PARTITION BY ID
, (DATEADD(minute, (DATEDIFF(minute, '', in_time)/@IntervalMinutes)*@IntervalMinutes, '')
)
ORDER BY ID, in_time) AS filter_row
FROM my_table;
编辑:
以上代码计算定长区间。您更新的问题可以通过 ID
识别超出所需间隔的岛屿来解决。下面的方法使用 NOT EXISTS
和 CROSS APPLY
来识别这些岛屿并确定每个岛屿的间隔开始和结束时间。
DECLARE @IntervalMinutes int = 15;
WITH
start_intervals AS (
SELECT DISTINCT
ID
, in_time
FROM dbo.my_table AS a
WHERE NOT EXISTS(
SELECT 1
FROM dbo.my_table AS b
WHERE
b.ID = a.ID
AND b.in_time < a.in_time
AND b.in_time > DATEADD(minute, -@IntervalMinutes, a.in_time)
)
)
, end_intervals AS (
SELECT
ID
, in_time
FROM dbo.my_table AS a
WHERE NOT EXISTS(
SELECT 1
FROM dbo.my_table AS b
WHERE
b.ID = a.ID
AND b.in_time > a.in_time
AND b.in_time < DATEADD(minute, @IntervalMinutes, a.in_time)
)
)
, intervals AS (
SELECT
ID
, start_intervals.in_time AS start_interval
, end_intervals.in_time AS end_interval
FROM start_intervals
CROSS APPLY(
SELECT TOP(1) in_time
FROM end_intervals
WHERE
end_intervals.ID = start_intervals.ID
AND end_intervals.in_time >= start_intervals.in_time
) AS end_intervals
)
SELECT
my_table.ID
, my_table.in_time
, ROW_NUMBER() OVER(PARTITION BY my_table.ID, intervals.start_interval ORDER BY(intervals.start_interval)) AS filter_row
FROM dbo.my_table
JOIN intervals ON my_table.in_time BETWEEN intervals.start_interval AND intervals.end_interval