删除匹配集
Removing matching sets
这是我的模式和数据的示例:
declare @temp table (rowid int identity(1,1), groupNumber int, typeName varchar(10), valueA int, valueB int, effectiveDate datetime, expiredDate datetime)
insert @temp values (234545, 'Upper', 1, 1000, '1/1/18 11:31:00', '2/1/18 22:01:00')
insert @temp values (234545, 'Lower', 2, 0, '1/1/18 11:31:00', '2/1/18 22:01:00')
insert @temp values (234545, 'Upper', 1, 1000, '2/1/18 22:01:00', '4/15/18 05:39:00')
insert @temp values (234545, 'Lower', 2, 0, '2/1/18 22:01:00', '4/15/18 05:39:00')
insert @temp values (234545, 'Upper', 1, 900, '4/15/18 05:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Lower', 2, 0, '4/15/18 05:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Upper', 1, 900, '4/15/18 06:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Lower', 2, 0, '4/15/18 06:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Upper', 1, 800, '6/1/18 10:32:00', null)
insert @temp values (234545, 'Lower', 2, 0, '6/1/18 10:32:00', null)
select *
from @temp
每个组号有多个集合,一个集合由生效日期定义,并且生效日期总是等于上一个集合的到期日期。所以在示例中有 5 个集合,我想要做的是删除多余的集合,即第 3/4 和 5/6 行。当 valueA 或 valueB 发生变化时,我只关心新的集合。最终我的目标是通过一次一个循环遍历组来清理生产数据库中的这些数据,因为大约 60% 的行反映了从一组到下一组在任何重要方面(即 typeName、valueA)根本没有变化, 值 B).
要注意的是,当我删除那 4 行时,我还需要将前两行的过期日期设置为等于第 7 行和第 8 行的有效日期,因为它们总是需要排队。
另一个问题是我想 运行 一个循环遍历 table 的脚本(可能通过 SQL 代理作业)(obvs 不是 table 生产中的变量)并删除行并使用每个 groupNumber 的新事务更新 expiredDates。如果我在作业完成之前停止它并且它正在处理事务(这很可能),有没有办法自动让它回滚?
您可以使用 windows 函数生成两个排名,一个升序,另一个降序来识别具有较高值和较低值的行。
那么您可以只过滤排名第一的值:
;WITH summary AS (
SELECT p.rowid
,p.groupNumber
,p.typeName
,p.valueA
,p.valueB
,p.effectiveDate
,p.expiredDate
,rank() OVER(PARTITION BY p.groupNumber ORDER BY p.effectiveDate ) AS rk_min
,rank() OVER(PARTITION BY p.groupNumber ORDER BY p.effectiveDate desc) AS rk_max
FROM @temp p)
SELECT s.rowid, s. groupNumber, s.typeName, s.valueA, s.valueB,
s.effectiveDate, s.expiredDate
FROM summary s
WHERE s.rk_min = 1 or s.rk_max=1
ORDER BY s.rowid
结果:
如果你想识别极端之间的内部值,只需将 where 条件更改为 WHERE s.rk_min > 1 and s.rk_max > 1
:
;WITH summary AS (
SELECT p.rowid
,p.groupNumber
,p.typeName
,p.valueA
,p.valueB
,p.effectiveDate
,p.expiredDate
,rank() OVER(PARTITION BY p.groupNumber ORDER BY p.effectiveDate ) AS rk_min
,rank() OVER(PARTITION BY p.groupNumber ORDER BY p.effectiveDate desc) AS rk_max
FROM @temp p)
SELECT s.rowid, s. groupNumber, s.typeName, s.valueA, s.valueB,
s.effectiveDate, s.expiredDate
FROM summary s
WHERE s.rk_min > 1 and s.rk_max > 1
ORDER BY s.rowid
结果:
这不是最终脚本,因为存在一些疑问。
@疑问1:什么是多余的rows/sets?为什么3/4和5/6行是多余的?。答案应该涵盖所有可能的情况。
@疑问2:前两行expiredDate将更新为最后两行中哪一行的expiredDate。更新时前两行和后两行之间的关系是什么?
CREATE table #temp (rowid int identity(1,1), groupNumber int, typeName varchar(10), valueA int, valueB int
, effectiveDate datetime, expiredDate datetime,isLineup int default(0))
insert #temp values (234545, 'Upper', 1, 1000, '1/1/18 11:31:00', '2/1/18 22:01:00',0)
insert #temp values (234545, 'Lower', 2, 0, '1/1/18 11:31:00', '2/1/18 22:01:00',0)
insert #temp values (234545, 'Upper', 1, 1000, '2/1/18 22:01:00', '4/15/18 05:39:00',0)
insert #temp values (234545, 'Lower', 2, 0, '2/1/18 22:01:00', '4/15/18 05:39:00',0)
insert #temp values (234545, 'Upper', 1, 900, '4/15/18 05:39:00', '6/1/18 10:32:00',0)
insert #temp values (234545, 'Lower', 2, 0, '4/15/18 05:39:00', '6/1/18 10:32:00',0)
insert #temp values (234545, 'Upper', 1, 900, '6/1/18 10:32:00', null,0)
insert #temp values (234545, 'Lower', 2, 0, '6/1/18 10:32:00', null,0)
CREATE table #temp1 (rowid int,effectiveDate datetime,Flag int )
--select * from #temp
-- Main Script
Begin Try
BEGIN TRANSACTION
-- Criteria to decide superflous rows
insert into #temp1 (rowid ,effectiveDate ,Flag )
select top 2 rowid,effectiveDate,0 Flag from #temp where isLineup=0 ORDER by rowid
insert into #temp1 (rowid ,effectiveDate ,Flag )
select top 2 rowid,effectiveDate,1 Flag from #temp where isLineup=0 ORDER by rowid desc
--- End
delete FROM #temp
where not EXISTS(select 1 from #temp1 c where c.rowid=#temp.rowid )
update C
set expiredDate=ca.effectiveDate
,isLineup=1
from #temp c
cross apply(select top 1 effectiveDate from #temp1 c1 where c1.Flag=1 )ca
where c.isLineup=0
COMMIT
End Try
begin Catch
if (@@trancount>0)
ROLLBACK TRAN
-- log error
end Catch
-- End Main
select * from #temp
select * from #temp1
drop TABLE #temp
drop table #temp1
我找到答案了:
declare @temp table (rowid int identity(1,1), groupNumber int, typeName varchar(10), valueA int, valueB int, effectiveDate datetime, expiredDate datetime)
insert @temp values (234545, 'Upper', 1, 1000, '1/1/18 11:31:00', '2/1/18 22:01:00')
insert @temp values (234545, 'Lower', 2, 0, '1/1/18 11:31:00', '2/1/18 22:01:00')
insert @temp values (234545, 'Upper', 1, 1000, '2/1/18 22:01:00', '4/15/18 05:39:00')
insert @temp values (234545, 'Lower', 2, 0, '2/1/18 22:01:00', '4/15/18 05:39:00')
insert @temp values (234545, 'Upper', 1, 900, '4/15/18 05:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Lower', 2, 0, '4/15/18 05:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Upper', 1, 900, '4/15/18 06:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Lower', 2, 0, '4/15/18 06:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Upper', 1, 800, '6/1/18 10:32:00', null)
insert @temp values (234545, 'Lower', 2, 0, '6/1/18 10:32:00', null)
select * from @temp
DECLARE MY_CURSOR Cursor STATIC
FOR SELECT DISTINCT groupNumber FROM @temp
Open My_Cursor
DECLARE @groupNumber int
Fetch NEXT FROM MY_Cursor INTO @groupNumber
While (@@FETCH_STATUS <> -1)
BEGIN
IF OBJECT_ID('tempdb..#temp') IS NOT NULL DROP TABLE #temp
SELECT RANK() OVER (PARTITION BY rp2.groupNumber ORDER BY rp2.EffectiveDate) AS TheRank, rp2.groupNumber, rp2.EffectiveDate,
TotalvalueA = SUM(rp2.valueA), ChecksumTotal = SUM(ISNULL(rp2.valueA,0) + ISNULL(rp2.valueB,0)), --assumes valueA and valueB can never be reversed
(
Select CAST(rp.typeName as varchar(2)) + ',' AS [text()]
From @temp rp
Where rp.groupNumber = rp2.groupNumber AND rp.groupNumber = @groupNumber
and rp.EffectiveDate = rp2.EffectiveDate
GROUP BY rp.typeName
ORDER BY MIN(rp.typeName)
For XML PATH ('')
) typesXML,
DeleteSet = 0
INTO #temp
FROM @temp rp2
WHERE rp2.groupNumber = @groupNumber
GROUP BY rp2.groupNumber, rp2.EffectiveDate
UPDATE t2
SET DeleteSet = 1
From #temp t1
LEFT JOIN #temp t2 ON t1.TheRank = t2.TheRank - 1
WHERE t1.TotalvalueA = t2.TotalvalueA AND t1.ChecksumTotal = t2.ChecksumTotal AND t1.typesXML = t2.typesXML
AND t2.TheRank <> (SELECT MAX(TheRank) FROM #temp)
BEGIN TRAN
DELETE rp
FROM @temp rp
JOIN #temp t ON t.groupNumber = rp.groupNumber AND rp.EffectiveDate = t.EffectiveDate AND t.DeleteSet = 1
if @@error != 0 raiserror('Script failed', 20, -1) with log
UPDATE rp
SET ExpiredDate = t2.NewExpiredDate
FROM @temp rp
JOIN (SELECT * , NewExpiredDate = LEAD(EffectiveDate) OVER (ORDER BY TheRank) FROM #temp WHERE DeleteSet = 0) t2 ON t2.groupNumber = rp.groupNumber AND rp.EffectiveDate = t2.EffectiveDate
JOIN #temp t ON t.TheRank = t2.TheRank + 1
WHERE rp.groupNumber = @groupNumber AND t2.NewExpiredDate IS NOT NULL AND rp.ExpiredDate <> t2.NewExpiredDate
AND t.DeleteSet = 1
if @@error != 0 raiserror('Script failed', 20, -1) with log
PRINT 'No Errors ... Committing changes for ' + CAST(@groupNumber as varchar(15))
COMMIT
--select * from @temp
--ROLLBACK
--dbcc opentran
WAITFOR DELAY '00:00:00:005';
FETCH NEXT FROM MY_CURSOR INTO @groupNumber
END
CLOSE MY_CURSOR
DEALLOCATE MY_CURSOR
GO
这是我的模式和数据的示例:
declare @temp table (rowid int identity(1,1), groupNumber int, typeName varchar(10), valueA int, valueB int, effectiveDate datetime, expiredDate datetime)
insert @temp values (234545, 'Upper', 1, 1000, '1/1/18 11:31:00', '2/1/18 22:01:00')
insert @temp values (234545, 'Lower', 2, 0, '1/1/18 11:31:00', '2/1/18 22:01:00')
insert @temp values (234545, 'Upper', 1, 1000, '2/1/18 22:01:00', '4/15/18 05:39:00')
insert @temp values (234545, 'Lower', 2, 0, '2/1/18 22:01:00', '4/15/18 05:39:00')
insert @temp values (234545, 'Upper', 1, 900, '4/15/18 05:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Lower', 2, 0, '4/15/18 05:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Upper', 1, 900, '4/15/18 06:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Lower', 2, 0, '4/15/18 06:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Upper', 1, 800, '6/1/18 10:32:00', null)
insert @temp values (234545, 'Lower', 2, 0, '6/1/18 10:32:00', null)
select *
from @temp
每个组号有多个集合,一个集合由生效日期定义,并且生效日期总是等于上一个集合的到期日期。所以在示例中有 5 个集合,我想要做的是删除多余的集合,即第 3/4 和 5/6 行。当 valueA 或 valueB 发生变化时,我只关心新的集合。最终我的目标是通过一次一个循环遍历组来清理生产数据库中的这些数据,因为大约 60% 的行反映了从一组到下一组在任何重要方面(即 typeName、valueA)根本没有变化, 值 B).
要注意的是,当我删除那 4 行时,我还需要将前两行的过期日期设置为等于第 7 行和第 8 行的有效日期,因为它们总是需要排队。
另一个问题是我想 运行 一个循环遍历 table 的脚本(可能通过 SQL 代理作业)(obvs 不是 table 生产中的变量)并删除行并使用每个 groupNumber 的新事务更新 expiredDates。如果我在作业完成之前停止它并且它正在处理事务(这很可能),有没有办法自动让它回滚?
您可以使用 windows 函数生成两个排名,一个升序,另一个降序来识别具有较高值和较低值的行。
那么您可以只过滤排名第一的值:
;WITH summary AS (
SELECT p.rowid
,p.groupNumber
,p.typeName
,p.valueA
,p.valueB
,p.effectiveDate
,p.expiredDate
,rank() OVER(PARTITION BY p.groupNumber ORDER BY p.effectiveDate ) AS rk_min
,rank() OVER(PARTITION BY p.groupNumber ORDER BY p.effectiveDate desc) AS rk_max
FROM @temp p)
SELECT s.rowid, s. groupNumber, s.typeName, s.valueA, s.valueB,
s.effectiveDate, s.expiredDate
FROM summary s
WHERE s.rk_min = 1 or s.rk_max=1
ORDER BY s.rowid
结果:
如果你想识别极端之间的内部值,只需将 where 条件更改为 WHERE s.rk_min > 1 and s.rk_max > 1
:
;WITH summary AS (
SELECT p.rowid
,p.groupNumber
,p.typeName
,p.valueA
,p.valueB
,p.effectiveDate
,p.expiredDate
,rank() OVER(PARTITION BY p.groupNumber ORDER BY p.effectiveDate ) AS rk_min
,rank() OVER(PARTITION BY p.groupNumber ORDER BY p.effectiveDate desc) AS rk_max
FROM @temp p)
SELECT s.rowid, s. groupNumber, s.typeName, s.valueA, s.valueB,
s.effectiveDate, s.expiredDate
FROM summary s
WHERE s.rk_min > 1 and s.rk_max > 1
ORDER BY s.rowid
结果:
这不是最终脚本,因为存在一些疑问。
@疑问1:什么是多余的rows/sets?为什么3/4和5/6行是多余的?。答案应该涵盖所有可能的情况。
@疑问2:前两行expiredDate将更新为最后两行中哪一行的expiredDate。更新时前两行和后两行之间的关系是什么?
CREATE table #temp (rowid int identity(1,1), groupNumber int, typeName varchar(10), valueA int, valueB int
, effectiveDate datetime, expiredDate datetime,isLineup int default(0))
insert #temp values (234545, 'Upper', 1, 1000, '1/1/18 11:31:00', '2/1/18 22:01:00',0)
insert #temp values (234545, 'Lower', 2, 0, '1/1/18 11:31:00', '2/1/18 22:01:00',0)
insert #temp values (234545, 'Upper', 1, 1000, '2/1/18 22:01:00', '4/15/18 05:39:00',0)
insert #temp values (234545, 'Lower', 2, 0, '2/1/18 22:01:00', '4/15/18 05:39:00',0)
insert #temp values (234545, 'Upper', 1, 900, '4/15/18 05:39:00', '6/1/18 10:32:00',0)
insert #temp values (234545, 'Lower', 2, 0, '4/15/18 05:39:00', '6/1/18 10:32:00',0)
insert #temp values (234545, 'Upper', 1, 900, '6/1/18 10:32:00', null,0)
insert #temp values (234545, 'Lower', 2, 0, '6/1/18 10:32:00', null,0)
CREATE table #temp1 (rowid int,effectiveDate datetime,Flag int )
--select * from #temp
-- Main Script
Begin Try
BEGIN TRANSACTION
-- Criteria to decide superflous rows
insert into #temp1 (rowid ,effectiveDate ,Flag )
select top 2 rowid,effectiveDate,0 Flag from #temp where isLineup=0 ORDER by rowid
insert into #temp1 (rowid ,effectiveDate ,Flag )
select top 2 rowid,effectiveDate,1 Flag from #temp where isLineup=0 ORDER by rowid desc
--- End
delete FROM #temp
where not EXISTS(select 1 from #temp1 c where c.rowid=#temp.rowid )
update C
set expiredDate=ca.effectiveDate
,isLineup=1
from #temp c
cross apply(select top 1 effectiveDate from #temp1 c1 where c1.Flag=1 )ca
where c.isLineup=0
COMMIT
End Try
begin Catch
if (@@trancount>0)
ROLLBACK TRAN
-- log error
end Catch
-- End Main
select * from #temp
select * from #temp1
drop TABLE #temp
drop table #temp1
我找到答案了:
declare @temp table (rowid int identity(1,1), groupNumber int, typeName varchar(10), valueA int, valueB int, effectiveDate datetime, expiredDate datetime)
insert @temp values (234545, 'Upper', 1, 1000, '1/1/18 11:31:00', '2/1/18 22:01:00')
insert @temp values (234545, 'Lower', 2, 0, '1/1/18 11:31:00', '2/1/18 22:01:00')
insert @temp values (234545, 'Upper', 1, 1000, '2/1/18 22:01:00', '4/15/18 05:39:00')
insert @temp values (234545, 'Lower', 2, 0, '2/1/18 22:01:00', '4/15/18 05:39:00')
insert @temp values (234545, 'Upper', 1, 900, '4/15/18 05:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Lower', 2, 0, '4/15/18 05:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Upper', 1, 900, '4/15/18 06:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Lower', 2, 0, '4/15/18 06:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Upper', 1, 800, '6/1/18 10:32:00', null)
insert @temp values (234545, 'Lower', 2, 0, '6/1/18 10:32:00', null)
select * from @temp
DECLARE MY_CURSOR Cursor STATIC
FOR SELECT DISTINCT groupNumber FROM @temp
Open My_Cursor
DECLARE @groupNumber int
Fetch NEXT FROM MY_Cursor INTO @groupNumber
While (@@FETCH_STATUS <> -1)
BEGIN
IF OBJECT_ID('tempdb..#temp') IS NOT NULL DROP TABLE #temp
SELECT RANK() OVER (PARTITION BY rp2.groupNumber ORDER BY rp2.EffectiveDate) AS TheRank, rp2.groupNumber, rp2.EffectiveDate,
TotalvalueA = SUM(rp2.valueA), ChecksumTotal = SUM(ISNULL(rp2.valueA,0) + ISNULL(rp2.valueB,0)), --assumes valueA and valueB can never be reversed
(
Select CAST(rp.typeName as varchar(2)) + ',' AS [text()]
From @temp rp
Where rp.groupNumber = rp2.groupNumber AND rp.groupNumber = @groupNumber
and rp.EffectiveDate = rp2.EffectiveDate
GROUP BY rp.typeName
ORDER BY MIN(rp.typeName)
For XML PATH ('')
) typesXML,
DeleteSet = 0
INTO #temp
FROM @temp rp2
WHERE rp2.groupNumber = @groupNumber
GROUP BY rp2.groupNumber, rp2.EffectiveDate
UPDATE t2
SET DeleteSet = 1
From #temp t1
LEFT JOIN #temp t2 ON t1.TheRank = t2.TheRank - 1
WHERE t1.TotalvalueA = t2.TotalvalueA AND t1.ChecksumTotal = t2.ChecksumTotal AND t1.typesXML = t2.typesXML
AND t2.TheRank <> (SELECT MAX(TheRank) FROM #temp)
BEGIN TRAN
DELETE rp
FROM @temp rp
JOIN #temp t ON t.groupNumber = rp.groupNumber AND rp.EffectiveDate = t.EffectiveDate AND t.DeleteSet = 1
if @@error != 0 raiserror('Script failed', 20, -1) with log
UPDATE rp
SET ExpiredDate = t2.NewExpiredDate
FROM @temp rp
JOIN (SELECT * , NewExpiredDate = LEAD(EffectiveDate) OVER (ORDER BY TheRank) FROM #temp WHERE DeleteSet = 0) t2 ON t2.groupNumber = rp.groupNumber AND rp.EffectiveDate = t2.EffectiveDate
JOIN #temp t ON t.TheRank = t2.TheRank + 1
WHERE rp.groupNumber = @groupNumber AND t2.NewExpiredDate IS NOT NULL AND rp.ExpiredDate <> t2.NewExpiredDate
AND t.DeleteSet = 1
if @@error != 0 raiserror('Script failed', 20, -1) with log
PRINT 'No Errors ... Committing changes for ' + CAST(@groupNumber as varchar(15))
COMMIT
--select * from @temp
--ROLLBACK
--dbcc opentran
WAITFOR DELAY '00:00:00:005';
FETCH NEXT FROM MY_CURSOR INTO @groupNumber
END
CLOSE MY_CURSOR
DEALLOCATE MY_CURSOR
GO