有人可以帮我替换以下 SQL 代码中的光标吗
Can someone help me with replacing the cursor in the following SQL code
这是经过 table 的 SQL 代码,其中包含各种记录,这些记录具有开始和结束数据,就像一个缓慢变化的维度。在检查维度时,我们发现有时开始日期不在结束日期之后。所以我们循环 table 并重新创建 start/end 日期以确保 start/end 日期实际上是可用日期的 min/max。但是处理 900 万行的过程花费的时间太长。我很想使用 while 循环,但不知道如何保留 min/max 值并检测 ID 中的开关。
DECLARE @MINDDEB Datetime
DECLARE @DDEB Datetime
DECLARE @DFIN Datetime
DECLARE @CECV float
DECLARE @AIDNEBHNPRPEP int
DECLARE @CETA float
DECLARE @AIDNEBHNPRAFI int
DECLARE @MAXDFIN Datetime
DECLARE @OLDCECV float
DECLARE @OLDAIDNEBHNPRPEP int
DECLARE @OLDCETA float
DECLARE @OLDAIDNEBHNPRAFI int
SET @MINDDEB=9999-12-31
SET @MAXDFIN=1835-12-31
drop table dbo.TBYDWHTEBHNPRAFIVRS_CURSORTABLE
--T2
SELECT DDEB, DFIN, CETA,AIDNEBHNPRAFI
into dbo.TBYDWHTEBHNPRAFIVRS_CURSORTABLE
FROM dbo.TBYDWHTEBHNPRAFIVRS
WHERE AIDNEBHNPRAFI=-1
ORDER BY AIDNEBHNPRAFI,DDEB
DECLARE T2_CURSOR CURSOR FOR
select DDEB, DFIN, CETA,AIDNEBHNPRAFI
from TBYDWHTEBHNPRAFIVRS
where DFIN>DDEB and BRRDDEL<>1 and CSTUVRS = 1 --and AIDNEBHNPRAFI in (1080,1033143,2311904)
order by AIDNEBHNPRAFI,DDEB,CETA,CECV
OPEN T2_CURSOR
FETCH NEXT FROM T2_CURSOR
INTO @DDEB, @DFIN, @CETA,@AIDNEBHNPRAFI
SET @OLDAIDNEBHNPRAFI=@AIDNEBHNPRAFI
SET @OLDCETA=@CETA
--SET @OLDCECV=@CECV
WHILE @@FETCH_STATUS = 0
BEGIN
if ( @OLDCETA=@CETA and @OLDAIDNEBHNPRAFI=@AIDNEBHNPRAFI)
BEGIN
--print 'entering first if'
IF (@MINDDEB>@DDEB)
BEGIN
SET @MINDDEB=@DDEB
END
IF (@MAXDFIN<@DFIN)
BEGIN
SET @MAXDFIN=@DFIN
END
END
--print ' OLD CECV '+convert(varchar(10),@OLDCECV) +' new CECV '+ convert(varchar(10),@CECV) +' OLDAID '+ convert(varchar(10),@OLDAIDNEBHNPRPEP) +' NEWAID '+ convert(varchar(10),@AIDNEBHNPRPEP)
-- +' NEWDDEB '+ convert(varchar(10),@DDEB,101) +' MINDDEB '+ convert(varchar(10),@MINDDEB,101) +' NEWDFIn '+ convert(varchar(10),@DFIN,101) +' MAXDFIN '+ convert(varchar(10),@MAXDFIN,101)
FETCH NEXT FROM T2_CURSOR
INTO @DDEB, @DFIN, @CETA,@AIDNEBHNPRAFI
if ( @OLDCETA<>@CETA or @OLDAIDNEBHNPRAFI<>@AIDNEBHNPRAFI)
BEGIN
INSERT INTO TBYDWHTEBHNPRAFIVRS_CURSORTABLE (DDEB, DFIN, CETA,AIDNEBHNPRAFI )
VALUES (@MINDDEB,@MAXDFIN,@OLDCETA,@OLDAIDNEBHNPRAFI)
SET @OLDAIDNEBHNPRAFI=@AIDNEBHNPRAFI
--SET @OLDCECV=@CECV
SET @OLDCETA=@CETA
SET @MINDDEB=@DDEB
SET @MAXDFIN=@DFIN
END
END
INSERT INTO TBYDWHTEBHNPRAFIVRS_CURSORTABLE (DDEB, DFIN, CETA,AIDNEBHNPRAFI )
VALUES (@MINDDEB,@MAXDFIN,@OLDCETA,@OLDAIDNEBHNPRAFI)
CLOSE T2_CURSOR;
DEALLOCATE T2_CURSOR;
这是源数据
DDEB DFIN CETA AIDNEBHNPRAFI
2006-03-01 00:00:00.000 2006-04-30 23:59:59.000 1 231272
2006-05-01 00:00:00.000 2006-11-30 23:59:59.000 1 231272
2006-12-01 00:00:00.000 2007-04-30 23:59:59.000 1 231272
2007-05-01 00:00:00.000 2008-04-30 23:59:59.000 1 231272
2008-05-01 00:00:00.000 2008-08-31 23:59:59.000 1 231272
2008-09-01 00:00:00.000 2008-10-31 23:59:59.000 2 231272
2008-11-01 00:00:00.000 2009-04-30 23:59:59.000 1 231272
2009-05-01 00:00:00.000 2010-01-31 23:59:59.000 1 231272
2010-02-01 00:00:00.000 9999-12-31 23:59:59.000 14 231272
这是我们用光标得到的结果
DDEB DFIN CETA AIDNEBHNPRAFI
2006-03-01 00:00:00.000 2008-08-31 23:59:59.000 1 231272
2008-09-01 00:00:00.000 2008-10-31 23:59:59.000 2 231272
2008-11-01 00:00:00.000 2010-01-31 23:59:59.000 1 231272
2010-02-01 00:00:00.000 9999-12-31 23:59:59.000 14 231272
这是一次尝试。返回的结果匹配,但您需要使用更多数据对其进行测试。我还对数据类型做了假设。如果您的不同,则需要相应地进行更新。
CTE 识别没有连续伙伴(停止和下一次开始之间相差 1 秒)的开始和停止时间的间隔。然后它为这些时间分配序列号,并根据序列加入开始和停止时间。
详细地说,2006 年 3 月 1 日的开始时间没有紧接在前的停止时间,因此它被包括在内并且是其组的第一个开始序列。 11/1/2008 符合相同的标准,是其组的下一个开始顺序。 8/31/2008 是没有后续开始时间的第一个停止时间,并且是其组的第一个停止序列。由于 2006 年 3 月 1 日和 2008 年 8 月 31 日都是他们组中的第一个序列,因此它们标记了该组第一个不间断间隔的开始和停止。
IF OBJECT_ID('tempdb..#temp') IS NOT NULL
DROP TABLE #temp;
GO
CREATE TABLE #temp (
DDEB DATETIME,
DFIN DATETIME,
CETA INT,
AIDNEBHNPRAFI INT
);
INSERT INTO #temp
VALUES ('2006-03-01 00:00:00.000', '2006-04-30 23:59:59.000', '1', '231272'),
(' 2006-05-01 00:00:00.000', '2006-11-30 23:59:59.000', '1', '231272'),
(' 2006-12-01 00:00:00.000', '2007-04-30 23:59:59.000', '1', '231272'),
(' 2007-05-01 00:00:00.000', '2008-04-30 23:59:59.000', '1', '231272'),
(' 2008-05-01 00:00:00.000', '2008-08-31 23:59:59.000', '1', '231272'),
(' 2008-09-01 00:00:00.000', '2008-10-31 23:59:59.000', '2', '231272'),
(' 2008-11-01 00:00:00.000', '2009-04-30 23:59:59.000', '1', '231272'),
(' 2009-05-01 00:00:00.000', '2010-01-31 23:59:59.000', '1', '231272'),
(' 2010-02-01 00:00:00.000', '9999-12-31 23:59:59.000', '14', '231272');
WITH Starts AS (
SELECT *, RowSeq = ROW_NUMBER() OVER(PARTITION BY ceta, t.AIDNEBHNPRAFI ORDER BY ddeb)
FROM #temp AS t
WHERE NOT EXISTS ( SELECT 1
FROM #temp AS It
WHERE t.CETA = It.CETA
AND t.AIDNEBHNPRAFI = It.AIDNEBHNPRAFI
AND It.DFIN = DATEADD(SECOND, -1, t.DDEB)
)
), Finishes AS (
SELECT *, RowSeq = ROW_NUMBER() OVER(PARTITION BY ceta, t.AIDNEBHNPRAFI ORDER BY dfin)
FROM #temp AS t
WHERE NOT EXISTS ( SELECT 1
FROM #temp AS It
WHERE t.CETA = It.CETA
AND t.AIDNEBHNPRAFI = It.AIDNEBHNPRAFI
AND t.DFIN = DATEADD(SECOND, -1, it.DDEB)
)
)
SELECT Starts.ddeb, Finishes.DFIN, starts.ceta, starts.AIDNEBHNPRAFI
FROM Starts
INNER JOIN Finishes
ON Starts.AIDNEBHNPRAFI = Finishes.AIDNEBHNPRAFI
AND Starts.CETA = Finishes.CETA
AND Starts.RowSeq = Finishes.RowSeq
ORDER BY Starts.ddeb
结果:
DDEB DFIN CETA AIDNEBHNPRAFI
2006-03-01 00:00:00.000 2008-08-31 23:59:59.000 1 231272
2008-09-01 00:00:00.000 2008-10-31 23:59:59.000 2 231272
2008-11-01 00:00:00.000 2010-01-31 23:59:59.000 1 231272
2010-02-01 00:00:00.000 9999-12-31 23:59:59.000 14 231272
Wes H 让我走上了正确的轨道。我的一位同事仔细查看了代码并得出了这个结论。它在 4 分 8 秒内处理了 960 万行。
With CTETest(StartDate, EndDate, value, ID)
AS
(
SELECT [DDEB],
Case When
DFIN = '9999-12-31 23:59:59.000'
THEN
DFIN
ELSE
DATEADD(SECOND, 1, [DFIN])
END
, [CETA]
, [AIDNEBHNPRAFI] as ID
FROM [dbo].[TBYDWHTEBHNPRAFIVRS]
where DFIN>DDEB and BRRDDEL<>1 and CSTUVRS = 1
),
T1(ID, value, isStartTS, isEndTS, TS)
AS (
SELECT ID, value, 1, 0, StartDate
FROM CTETest
UNION ALL
SELECT ID, value, 0, 1, EndDate
FROM CTETest
),
T2 (ID, value, TS, cr_ttl, prv_ttl) as (
select ID, value, TS,
sum(isStartTS) over (Partition by ID, value order by TS,isEndTS rows unbounded Preceding) -
sum(isEndTS) over (partition by ID, Value order by TS,isEndTS rows unbounded preceding)as cr_ttl,
sum(isStartTS) over (Partition by ID, value order by TS,isEndTS rows between unbounded Preceding and 1 preceding ) -
sum(isEndTS) over (partition by ID, value order by ts,isEndTS rows between unbounded preceding and 1 preceding)as prv_ttl
from T1 ),
T3 (ID, value, TS, cr_ttl, prv_ttl) as (
Select ID, value, TS, cr_ttl, prv_ttl
from T2
where cr_ttl = 0 or prv_ttl is null or prv_ttl = 0
),
T4 (ID, value, StartDate ,EndDate, prv_ttl) as (
select ID, value, TS,
max(TS) over (partition by ID, value order by TS rows between current row and 1 following),
prv_ttl
from T3
)
select ID as [AIDNEBHNPRAFI], value as CETA,StartDate as DDEB, Case When
EndDate = '9999-12-31 23:59:59.000' THEN EndDate ELSE DateADD (second, -1, EndDate) END as DFIN
into [dbo].[TBYDWHTEBHNPRAFIVRS_CTE]
from T4
where prv_ttl is Null or prv_ttl = 0
--order by ID, StartDate
这是经过 table 的 SQL 代码,其中包含各种记录,这些记录具有开始和结束数据,就像一个缓慢变化的维度。在检查维度时,我们发现有时开始日期不在结束日期之后。所以我们循环 table 并重新创建 start/end 日期以确保 start/end 日期实际上是可用日期的 min/max。但是处理 900 万行的过程花费的时间太长。我很想使用 while 循环,但不知道如何保留 min/max 值并检测 ID 中的开关。
DECLARE @MINDDEB Datetime
DECLARE @DDEB Datetime
DECLARE @DFIN Datetime
DECLARE @CECV float
DECLARE @AIDNEBHNPRPEP int
DECLARE @CETA float
DECLARE @AIDNEBHNPRAFI int
DECLARE @MAXDFIN Datetime
DECLARE @OLDCECV float
DECLARE @OLDAIDNEBHNPRPEP int
DECLARE @OLDCETA float
DECLARE @OLDAIDNEBHNPRAFI int
SET @MINDDEB=9999-12-31
SET @MAXDFIN=1835-12-31
drop table dbo.TBYDWHTEBHNPRAFIVRS_CURSORTABLE
--T2
SELECT DDEB, DFIN, CETA,AIDNEBHNPRAFI
into dbo.TBYDWHTEBHNPRAFIVRS_CURSORTABLE
FROM dbo.TBYDWHTEBHNPRAFIVRS
WHERE AIDNEBHNPRAFI=-1
ORDER BY AIDNEBHNPRAFI,DDEB
DECLARE T2_CURSOR CURSOR FOR
select DDEB, DFIN, CETA,AIDNEBHNPRAFI
from TBYDWHTEBHNPRAFIVRS
where DFIN>DDEB and BRRDDEL<>1 and CSTUVRS = 1 --and AIDNEBHNPRAFI in (1080,1033143,2311904)
order by AIDNEBHNPRAFI,DDEB,CETA,CECV
OPEN T2_CURSOR
FETCH NEXT FROM T2_CURSOR
INTO @DDEB, @DFIN, @CETA,@AIDNEBHNPRAFI
SET @OLDAIDNEBHNPRAFI=@AIDNEBHNPRAFI
SET @OLDCETA=@CETA
--SET @OLDCECV=@CECV
WHILE @@FETCH_STATUS = 0
BEGIN
if ( @OLDCETA=@CETA and @OLDAIDNEBHNPRAFI=@AIDNEBHNPRAFI)
BEGIN
--print 'entering first if'
IF (@MINDDEB>@DDEB)
BEGIN
SET @MINDDEB=@DDEB
END
IF (@MAXDFIN<@DFIN)
BEGIN
SET @MAXDFIN=@DFIN
END
END
--print ' OLD CECV '+convert(varchar(10),@OLDCECV) +' new CECV '+ convert(varchar(10),@CECV) +' OLDAID '+ convert(varchar(10),@OLDAIDNEBHNPRPEP) +' NEWAID '+ convert(varchar(10),@AIDNEBHNPRPEP)
-- +' NEWDDEB '+ convert(varchar(10),@DDEB,101) +' MINDDEB '+ convert(varchar(10),@MINDDEB,101) +' NEWDFIn '+ convert(varchar(10),@DFIN,101) +' MAXDFIN '+ convert(varchar(10),@MAXDFIN,101)
FETCH NEXT FROM T2_CURSOR
INTO @DDEB, @DFIN, @CETA,@AIDNEBHNPRAFI
if ( @OLDCETA<>@CETA or @OLDAIDNEBHNPRAFI<>@AIDNEBHNPRAFI)
BEGIN
INSERT INTO TBYDWHTEBHNPRAFIVRS_CURSORTABLE (DDEB, DFIN, CETA,AIDNEBHNPRAFI )
VALUES (@MINDDEB,@MAXDFIN,@OLDCETA,@OLDAIDNEBHNPRAFI)
SET @OLDAIDNEBHNPRAFI=@AIDNEBHNPRAFI
--SET @OLDCECV=@CECV
SET @OLDCETA=@CETA
SET @MINDDEB=@DDEB
SET @MAXDFIN=@DFIN
END
END
INSERT INTO TBYDWHTEBHNPRAFIVRS_CURSORTABLE (DDEB, DFIN, CETA,AIDNEBHNPRAFI )
VALUES (@MINDDEB,@MAXDFIN,@OLDCETA,@OLDAIDNEBHNPRAFI)
CLOSE T2_CURSOR;
DEALLOCATE T2_CURSOR;
这是源数据
DDEB DFIN CETA AIDNEBHNPRAFI
2006-03-01 00:00:00.000 2006-04-30 23:59:59.000 1 231272
2006-05-01 00:00:00.000 2006-11-30 23:59:59.000 1 231272
2006-12-01 00:00:00.000 2007-04-30 23:59:59.000 1 231272
2007-05-01 00:00:00.000 2008-04-30 23:59:59.000 1 231272
2008-05-01 00:00:00.000 2008-08-31 23:59:59.000 1 231272
2008-09-01 00:00:00.000 2008-10-31 23:59:59.000 2 231272
2008-11-01 00:00:00.000 2009-04-30 23:59:59.000 1 231272
2009-05-01 00:00:00.000 2010-01-31 23:59:59.000 1 231272
2010-02-01 00:00:00.000 9999-12-31 23:59:59.000 14 231272
这是我们用光标得到的结果
DDEB DFIN CETA AIDNEBHNPRAFI
2006-03-01 00:00:00.000 2008-08-31 23:59:59.000 1 231272
2008-09-01 00:00:00.000 2008-10-31 23:59:59.000 2 231272
2008-11-01 00:00:00.000 2010-01-31 23:59:59.000 1 231272
2010-02-01 00:00:00.000 9999-12-31 23:59:59.000 14 231272
这是一次尝试。返回的结果匹配,但您需要使用更多数据对其进行测试。我还对数据类型做了假设。如果您的不同,则需要相应地进行更新。 CTE 识别没有连续伙伴(停止和下一次开始之间相差 1 秒)的开始和停止时间的间隔。然后它为这些时间分配序列号,并根据序列加入开始和停止时间。
详细地说,2006 年 3 月 1 日的开始时间没有紧接在前的停止时间,因此它被包括在内并且是其组的第一个开始序列。 11/1/2008 符合相同的标准,是其组的下一个开始顺序。 8/31/2008 是没有后续开始时间的第一个停止时间,并且是其组的第一个停止序列。由于 2006 年 3 月 1 日和 2008 年 8 月 31 日都是他们组中的第一个序列,因此它们标记了该组第一个不间断间隔的开始和停止。
IF OBJECT_ID('tempdb..#temp') IS NOT NULL
DROP TABLE #temp;
GO
CREATE TABLE #temp (
DDEB DATETIME,
DFIN DATETIME,
CETA INT,
AIDNEBHNPRAFI INT
);
INSERT INTO #temp
VALUES ('2006-03-01 00:00:00.000', '2006-04-30 23:59:59.000', '1', '231272'),
(' 2006-05-01 00:00:00.000', '2006-11-30 23:59:59.000', '1', '231272'),
(' 2006-12-01 00:00:00.000', '2007-04-30 23:59:59.000', '1', '231272'),
(' 2007-05-01 00:00:00.000', '2008-04-30 23:59:59.000', '1', '231272'),
(' 2008-05-01 00:00:00.000', '2008-08-31 23:59:59.000', '1', '231272'),
(' 2008-09-01 00:00:00.000', '2008-10-31 23:59:59.000', '2', '231272'),
(' 2008-11-01 00:00:00.000', '2009-04-30 23:59:59.000', '1', '231272'),
(' 2009-05-01 00:00:00.000', '2010-01-31 23:59:59.000', '1', '231272'),
(' 2010-02-01 00:00:00.000', '9999-12-31 23:59:59.000', '14', '231272');
WITH Starts AS (
SELECT *, RowSeq = ROW_NUMBER() OVER(PARTITION BY ceta, t.AIDNEBHNPRAFI ORDER BY ddeb)
FROM #temp AS t
WHERE NOT EXISTS ( SELECT 1
FROM #temp AS It
WHERE t.CETA = It.CETA
AND t.AIDNEBHNPRAFI = It.AIDNEBHNPRAFI
AND It.DFIN = DATEADD(SECOND, -1, t.DDEB)
)
), Finishes AS (
SELECT *, RowSeq = ROW_NUMBER() OVER(PARTITION BY ceta, t.AIDNEBHNPRAFI ORDER BY dfin)
FROM #temp AS t
WHERE NOT EXISTS ( SELECT 1
FROM #temp AS It
WHERE t.CETA = It.CETA
AND t.AIDNEBHNPRAFI = It.AIDNEBHNPRAFI
AND t.DFIN = DATEADD(SECOND, -1, it.DDEB)
)
)
SELECT Starts.ddeb, Finishes.DFIN, starts.ceta, starts.AIDNEBHNPRAFI
FROM Starts
INNER JOIN Finishes
ON Starts.AIDNEBHNPRAFI = Finishes.AIDNEBHNPRAFI
AND Starts.CETA = Finishes.CETA
AND Starts.RowSeq = Finishes.RowSeq
ORDER BY Starts.ddeb
结果:
DDEB DFIN CETA AIDNEBHNPRAFI
2006-03-01 00:00:00.000 2008-08-31 23:59:59.000 1 231272
2008-09-01 00:00:00.000 2008-10-31 23:59:59.000 2 231272
2008-11-01 00:00:00.000 2010-01-31 23:59:59.000 1 231272
2010-02-01 00:00:00.000 9999-12-31 23:59:59.000 14 231272
Wes H 让我走上了正确的轨道。我的一位同事仔细查看了代码并得出了这个结论。它在 4 分 8 秒内处理了 960 万行。
With CTETest(StartDate, EndDate, value, ID)
AS
(
SELECT [DDEB],
Case When
DFIN = '9999-12-31 23:59:59.000'
THEN
DFIN
ELSE
DATEADD(SECOND, 1, [DFIN])
END
, [CETA]
, [AIDNEBHNPRAFI] as ID
FROM [dbo].[TBYDWHTEBHNPRAFIVRS]
where DFIN>DDEB and BRRDDEL<>1 and CSTUVRS = 1
),
T1(ID, value, isStartTS, isEndTS, TS)
AS (
SELECT ID, value, 1, 0, StartDate
FROM CTETest
UNION ALL
SELECT ID, value, 0, 1, EndDate
FROM CTETest
),
T2 (ID, value, TS, cr_ttl, prv_ttl) as (
select ID, value, TS,
sum(isStartTS) over (Partition by ID, value order by TS,isEndTS rows unbounded Preceding) -
sum(isEndTS) over (partition by ID, Value order by TS,isEndTS rows unbounded preceding)as cr_ttl,
sum(isStartTS) over (Partition by ID, value order by TS,isEndTS rows between unbounded Preceding and 1 preceding ) -
sum(isEndTS) over (partition by ID, value order by ts,isEndTS rows between unbounded preceding and 1 preceding)as prv_ttl
from T1 ),
T3 (ID, value, TS, cr_ttl, prv_ttl) as (
Select ID, value, TS, cr_ttl, prv_ttl
from T2
where cr_ttl = 0 or prv_ttl is null or prv_ttl = 0
),
T4 (ID, value, StartDate ,EndDate, prv_ttl) as (
select ID, value, TS,
max(TS) over (partition by ID, value order by TS rows between current row and 1 following),
prv_ttl
from T3
)
select ID as [AIDNEBHNPRAFI], value as CETA,StartDate as DDEB, Case When
EndDate = '9999-12-31 23:59:59.000' THEN EndDate ELSE DateADD (second, -1, EndDate) END as DFIN
into [dbo].[TBYDWHTEBHNPRAFIVRS_CTE]
from T4
where prv_ttl is Null or prv_ttl = 0
--order by ID, StartDate