SQL 计算每小时金额的语句

SQL statement to calculate an Hourly amount

我需要计算每小时消耗的资源量。例如,考虑 phone 通话分钟数。对于以下记录,我想生成以下结果:

Call_ID   StartDateTime     EndDateTime  
1         3/15/2014 11:25   3/15/2014 14:01  
2         3/15/2014 11:50   3/15/2014 13:10  
3         3/15/2014 12:05   3/15/2014 12:55  
4         3/15/2014 13:04   3/15/2014 15:02  
5         3/15/2014 13:15   3/15/2014 14:22  
6         3/15/2014 14:35   3/15/2014 15:18  
7         3/15/2014 15:10   3/15/2014 15:29  

Date        Hour    CallMinutes  
3/15/2014   11       45  
3/15/2014   12       170  
3/15/2014   13       171  
3/15/2014   14       108  
3/15/2014   15       39  

我有 SQL 可以执行此操作,但它使用游标,我想找到一个更快的替代方法。这是我现在的工作代码:

/* create & fill the Inputs table */  
CREATE TABLE PhoneCalls(  
    Call_ID         int IDENTITY(1,1) NOT NULL,  
    StartDateTime   datetime NOT NULL,  
    EndDateTime     datetime NOT NULL,  
 CONSTRAINT PK_PhoneCalls PRIMARY KEY CLUSTERED ( Call_ID ASC) ON [PRIMARY]  
) ON [PRIMARY];  

INSERT INTO PhoneCalls (StartDateTime, EndDateTime) VALUES ('3/15/2014 11:25', '3/15/2014 14:01');  
INSERT INTO PhoneCalls (StartDateTime, EndDateTime) VALUES ('3/15/2014 11:50', '3/15/2014 13:10');  
INSERT INTO PhoneCalls (StartDateTime, EndDateTime) VALUES ('3/15/2014 12:05','3/15/2014 12:55');  
INSERT INTO PhoneCalls (StartDateTime, EndDateTime) VALUES ('3/15/2014 13:04','3/15/2014 15:02');  
INSERT INTO PhoneCalls (StartDateTime, EndDateTime) VALUES ('3/15/2014 13:15','3/15/2014 14:22');  
INSERT INTO PhoneCalls (StartDateTime, EndDateTime) VALUES ('3/15/2014 14:35','3/15/2014 15:18');  
INSERT INTO PhoneCalls (StartDateTime, EndDateTime) VALUES ('3/15/2014 15:10','3/15/2014 15:29');  

/* Create the Temp table to hold the intermediate results */
Create Table #HourlyCallTimes (
    Call_ID     int NOT NULL,
    CallDate    date NOT NULL,
    CallHour    smallint NOT NULL,
    CallTime    int NOT NULL);

/* Determine Hourly totals of CallTime using a cursor */
SET NOCOUNT ON;
DECLARE @Call_ID int, @StartDt datetime, @EndDt datetime, @CallLength int, @Hour int, @CallTime int;
declare CallsCursor cursor for
    select Call_ID, StartDateTime, EndDateTime, DATEDIFF(minute, StartDateTime, EndDateTime) as CallLength
    FROM PhoneCalls
open CallsCursor
fetch next from CallsCursor into @Call_ID, @StartDt, @EndDt, @CallLength
while @@FETCH_STATUS = 0
begin
    /* make sure that the call was at least a minute long */
    IF (@CallLength > 0.01)
    BEGIN
        -- Record the call time for the first partial hour
        SET @Hour = DATEPART(Hour, @StartDt)
        SET @CallTime = 60 - DATEPART(Minute, @StartDt)
        if (@CallLength < @CallTime) -- check if this us the last (partial) hour
            SET @CallTime = @CallLength
        INSERT INTO #HourlyCallTimes (Call_ID, CallDate, CallHour, CallTime)
            VALUES (@Call_ID, CAST(@StartDt as DATE), @Hour, @CallTime)

        -- move to the beginning of the next hour
        SET @StartDt = DATEADD(HOUR,@Hour+1,CAST(CAST(@StartDt as DATE) as DateTime))
        SET @CallLength = @CallLength - @CallTime
        While @StartDt < @EndDt
        BEGIN
            SET @Hour = DATEPART(Hour, @StartDt)
            SET @CallTime = 60;
            if (@CallLength < @CallTime)  -- check if this is the last (partial) hour
                SET @CallTime = @CallLength
        INSERT INTO #HourlyCallTimes (Call_ID, CallDate, CallHour, CallTime)
            VALUES (@Call_ID, CAST(@StartDt as DATE), @Hour, @CallTime)
            -- move to the beginning of the next hour
            SET @StartDt = DATEADD(Hour,1,@StartDt)     
            SET @CallLength = @CallLength - @CallTime
        END
    END
    fetch next from CallsCursor into @Call_ID, @StartDt, @EndDt, @CallLength
END
close CallsCursor;
deallocate CallsCursor;

/* here's the final results table */
Create Table HourlyCallTotals (
    CallDate    date NOT NULL,
    CallHour    smallint NOT NULL,
    CallTime    int NOT NULL);

Insert into HourlyCallTotals (CallDate, CallHour, CallTime)
    select CallDate, CallHour, SUM(CallTime) from #HourlyCallTimes group by CallDate, CallHour;

DROP TABLE #HourlyCallTimes;
DROP TABLE PhoneCalls;

SELECT * FROM HourlyCallTotals;
DROP TABLE HourlyCallTotals;

/* expected Results */
/*  CallDate    CallHour    CallTime
    3/15/2014       11          45
    3/15/2014       12          170
    3/15/2014       13          171
    3/15/2014       14          108
    3/15/2014       15          39 */

您可以尝试这个递归 CTE 以获得所需的结果:

;WITH cte_break AS
(
SELECT 
CONVERT(DATE,StartDateTime) AS CallDate, 
DATEPART(HOUR,StartDateTime) AS CallHour, 
CASE WHEN DATEDIFF(MINUTE,StartDateTime, EndDateTime) < 60 AND DATEPART(HOUR, StartDateTime) = DATEPART(HOUR, EndDateTime) THEN DATEDIFF(MINUTE,StartDateTime, EndDateTime) 
ELSE (60 - DATEPART(MINUTE,StartDateTime)) END AS CallTime,
DATEADD(minute, CASE WHEN DATEDIFF(MINUTE,StartDateTime, EndDateTime) < 60 THEN DATEDIFF(MINUTE,StartDateTime, EndDateTime) ELSE (60 - DATEPART(MINUTE,StartDateTime)) END , StartDateTime) AS CalcStartTime,
EndDateTime
FROM dbo.PhoneCalls

UNION ALL
SELECT 
CONVERT(DATE,CalcStartTime) AS CallDate, 
DATEPART(HOUR,CalcStartTime) AS CallHour, 
CASE WHEN DATEDIFF(MINUTE,CalcStartTime, EndDateTime) < 60 AND DATEPART(HOUR, CalcStartTime) = DATEPART(HOUR, cte_break.EndDateTime) THEN DATEDIFF(MINUTE,CalcStartTime, EndDateTime) 
ELSE (60 - DATEPART(MINUTE,CalcStartTime)) END AS CallTime,
DATEADD(minute, CASE WHEN DATEDIFF(MINUTE,CalcStartTime, EndDateTime) < 60 THEN DATEDIFF(MINUTE,CalcStartTime, EndDateTime) ELSE (60 - DATEPART(MINUTE,CalcStartTime)) END , CalcStartTime) AS CalcStartTime,
EndDateTime
FROM cte_break
WHERE CalcStartTime < EndDateTime

)
SELECT CallDate, CallHour, SUM(CallTime) AS CallTime FROM cte_break
GROUP BY CallDate, CallHour
ORDER BY CallDate, CallHour

要摆脱 cursor/loop:您需要列出所有可能的时间(从 min(StartHour) 到 max(EndHour))和计算每小时的每次通话时间。把它想象成一个矩阵,其中列是小时,行是呼叫,只需将列相加即可得到每小时的呼叫时间。

ID Hour0 Hour1 Hour2 Hour3 ...
1 0 0 4 0
2 0 0 3 0
3 0 1 2 0
...

现在,需要完成更多 counting/calculations 工作(最坏情况下是循环方法的 23 倍),但是从循环更改为基于集合的逻辑的好处远远超过成本。

Procedural vs Set-Based SQL

这里给出部分解决方案(不能处理不同日期):

DECLARE @maxHour INT, @minHour INT, @disHours INT

SELECT  @minHour = MIN(DATEPART(HOUR, StartDateTime)),
    @maxHour = MAX(DATEPART(HOUR, EndDateTime)),
    @disHours = @maxHour - @minHour + 1
FROM    PhoneCalls

-- Filling out a range from @minHour to @maxHour
SELECT TOP (@disHours) @minHour -1 + ROW_NUMBER() OVER (ORDER BY [object_id]) AS CallHour
INTO    #hours
FROM    sys.all_objects 
ORDER BY CallHour

-- Count the per call per hour call time
SELECT  CallHour,
    CASE
        WHEN DATEPART(HOUR, StartDateTime) < CallHour AND DATEPART(HOUR, EndDateTime) > CallHour THEN 60
        WHEN DATEPART(HOUR, StartDateTime) = CallHour AND DATEPART(HOUR, EndDateTime) = CallHour THEN DATEPART(MINUTE, EndDateTime) - DATEPART(MINUTE, StartDateTime)
        WHEN DATEPART(HOUR, StartDateTime) < CallHour AND DATEPART(HOUR, EndDateTime) = CallHour THEN DATEPART(MINUTE, EndDateTime)
        WHEN DATEPART(HOUR, StartDateTime) = CallHour AND DATEPART(HOUR, EndDateTime) > CallHour THEN 60 - DATEPART(MINUTE, StartDateTime)
        ELSE 0
    END     AS CallTimePerCallPerHour
INTO    #cross
FROM    PhoneCalls
    CROSS JOIN #hours

-- The final result
SELECT  CallHour,
    SUM(CallTimePerCallPerHour) AS CallTime
FROM    #cross
WHERE   CallTimePerCallPerHour > 0  -- To left out hours with 0 CallTime in the result
GROUP BY CallHour

DROP TABLE #hours
DROP TABLE #cross

对于大约 1M 行,随机 start/end 时间范围从 00:00 到 23:59(基于集合的方法的最坏情况),运行时间从 90 秒下降到 20 秒.

至于不同的日期,循环遍历日期可能仍然是个好主意,否则中间 table 会变得太大。

也可能有不同日期开始和结束的通话。这可能很难处理,但由于它们无论如何都会成为少数,也许只需要添加一个预处理步骤来挑选它们并将它们分开:

3/15/2016 23:49 - 3/16/2016 00:05 =>
3/15/2016 23:49 - 3/15/2016 23:60 // instead of 24:00 so we don't need to count hour 24
3/16/2016 00:00 - 3/16/2016 00:05