删除匹配集

Removing matching sets

这是我的模式和数据的示例:

declare @temp table (rowid int identity(1,1), groupNumber int, typeName varchar(10), valueA int, valueB int, effectiveDate datetime, expiredDate datetime)

insert @temp values (234545, 'Upper', 1, 1000, '1/1/18 11:31:00', '2/1/18 22:01:00')
insert @temp values (234545, 'Lower', 2, 0, '1/1/18 11:31:00', '2/1/18 22:01:00')
insert @temp values (234545, 'Upper', 1, 1000, '2/1/18 22:01:00', '4/15/18 05:39:00')
insert @temp values (234545, 'Lower', 2, 0, '2/1/18 22:01:00', '4/15/18 05:39:00')
insert @temp values (234545, 'Upper', 1, 900, '4/15/18 05:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Lower', 2, 0, '4/15/18 05:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Upper', 1, 900, '4/15/18 06:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Lower', 2, 0, '4/15/18 06:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Upper', 1, 800, '6/1/18 10:32:00', null)
insert @temp values (234545, 'Lower', 2, 0, '6/1/18 10:32:00', null)

select *
from @temp

每个组号有多个集合,一个集合由生效日期定义,并且生效日期总是等于上一个集合的到期日期。所以在示例中有 5 个集合,我想要做的是删除多余的集合,即第 3/4 和 5/6 行。当 valueA 或 valueB 发生变化时,我只关心新的集合。最终我的目标是通过一次一个循环遍历组来清理生产数据库中的这些数据,因为大约 60% 的行反映了从一组到下一组在任何重要方面(即 typeName、valueA)根本没有变化, 值 B).

要注意的是,当我删除那 4 行时,我还需要将前两行的过期日期设置为等于第 7 行和第 8 行的有效日期,因为它们总是需要排队。

另一个问题是我想 运行 一个循环遍历 table 的脚本(可能通过 SQL 代理作业)(obvs 不是 table 生产中的变量)并删除行并使用每个 groupNumber 的新事务更新 expiredDates。如果我在作业完成之前停止它并且它正在处理事务(这很可能),有没有办法自动让它回滚?

您可以使用 windows 函数生成两个排名,一个升序,另一个降序来识别具有较高值和较低值的行。

那么您可以只过滤排名第一的值:

;WITH summary AS (
    SELECT  p.rowid 
           ,p.groupNumber 
           ,p.typeName 
           ,p.valueA 
           ,p.valueB 
           ,p.effectiveDate 
           ,p.expiredDate 
           ,rank() OVER(PARTITION BY p.groupNumber ORDER BY p.effectiveDate ) AS rk_min
           ,rank() OVER(PARTITION BY p.groupNumber ORDER BY p.effectiveDate desc) AS rk_max
      FROM @temp p)
SELECT s.rowid, s. groupNumber, s.typeName, s.valueA, s.valueB,
       s.effectiveDate, s.expiredDate 
FROM summary s
WHERE s.rk_min = 1 or s.rk_max=1
ORDER BY s.rowid

结果:

如果你想识别极端之间的内部值,只需将 where 条件更改为 WHERE s.rk_min > 1 and s.rk_max > 1:

;WITH summary AS (
    SELECT  p.rowid 
           ,p.groupNumber 
           ,p.typeName 
           ,p.valueA 
           ,p.valueB 
           ,p.effectiveDate 
           ,p.expiredDate 
           ,rank() OVER(PARTITION BY p.groupNumber ORDER BY p.effectiveDate ) AS rk_min
           ,rank() OVER(PARTITION BY p.groupNumber ORDER BY p.effectiveDate desc) AS rk_max
      FROM @temp p)
SELECT s.rowid, s. groupNumber, s.typeName, s.valueA, s.valueB, 
       s.effectiveDate, s.expiredDate 
FROM summary s
WHERE s.rk_min > 1 and s.rk_max > 1
ORDER BY s.rowid

结果:

这不是最终脚本,因为存在一些疑问。

@疑问1:什么是多余的rows/sets?为什么3/4和5/6行是多余的?。答案应该涵盖所有可能的情况。

@疑问2:前两行expiredDate将更新为最后两行中哪一行的expiredDate。更新时前两行和后两行之间的关系是什么?

CREATE table #temp (rowid int identity(1,1), groupNumber int, typeName varchar(10), valueA int, valueB int
, effectiveDate datetime, expiredDate datetime,isLineup int default(0))

insert #temp values (234545, 'Upper', 1, 1000, '1/1/18 11:31:00', '2/1/18 22:01:00',0)
insert #temp values (234545, 'Lower', 2, 0, '1/1/18 11:31:00', '2/1/18 22:01:00',0)
insert #temp values (234545, 'Upper', 1, 1000, '2/1/18 22:01:00', '4/15/18 05:39:00',0)
insert #temp values (234545, 'Lower', 2, 0, '2/1/18 22:01:00', '4/15/18 05:39:00',0)
insert #temp values (234545, 'Upper', 1, 900, '4/15/18 05:39:00', '6/1/18 10:32:00',0)
insert #temp values (234545, 'Lower', 2, 0, '4/15/18 05:39:00', '6/1/18 10:32:00',0)
insert #temp values (234545, 'Upper', 1, 900, '6/1/18 10:32:00', null,0)
insert #temp values (234545, 'Lower', 2, 0, '6/1/18 10:32:00', null,0)



CREATE table #temp1 (rowid int,effectiveDate datetime,Flag int )
--select * from #temp

-- Main Script

Begin Try
BEGIN TRANSACTION

-- Criteria to decide superflous rows
insert into #temp1 (rowid ,effectiveDate ,Flag  )
select top 2 rowid,effectiveDate,0 Flag from #temp where isLineup=0 ORDER by rowid
insert into #temp1 (rowid ,effectiveDate ,Flag  )
select top 2 rowid,effectiveDate,1 Flag from #temp where isLineup=0 ORDER by rowid desc
--- End

delete FROM #temp 
where not EXISTS(select 1 from #temp1 c where c.rowid=#temp.rowid )

update C 
set expiredDate=ca.effectiveDate
,isLineup=1
from #temp c
cross apply(select top 1 effectiveDate from #temp1 c1 where c1.Flag=1 )ca
where c.isLineup=0

COMMIT

End Try
begin Catch

if (@@trancount>0)
ROLLBACK TRAN

-- log error

end Catch

-- End Main

select * from #temp
select * from #temp1

drop TABLE #temp
drop table #temp1

我找到答案了:

declare @temp table (rowid int identity(1,1), groupNumber int, typeName varchar(10), valueA int, valueB int, effectiveDate datetime, expiredDate datetime)

insert @temp values (234545, 'Upper', 1, 1000, '1/1/18 11:31:00', '2/1/18 22:01:00')
insert @temp values (234545, 'Lower', 2, 0, '1/1/18 11:31:00', '2/1/18 22:01:00')
insert @temp values (234545, 'Upper', 1, 1000, '2/1/18 22:01:00', '4/15/18 05:39:00')
insert @temp values (234545, 'Lower', 2, 0, '2/1/18 22:01:00', '4/15/18 05:39:00')
insert @temp values (234545, 'Upper', 1, 900, '4/15/18 05:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Lower', 2, 0, '4/15/18 05:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Upper', 1, 900, '4/15/18 06:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Lower', 2, 0, '4/15/18 06:39:00', '6/1/18 10:32:00')
insert @temp values (234545, 'Upper', 1, 800, '6/1/18 10:32:00', null)
insert @temp values (234545, 'Lower', 2, 0, '6/1/18 10:32:00', null)

select * from @temp

DECLARE MY_CURSOR Cursor STATIC 
FOR SELECT DISTINCT groupNumber FROM @temp 

Open My_Cursor 
DECLARE @groupNumber int
Fetch NEXT FROM MY_Cursor INTO @groupNumber
While (@@FETCH_STATUS <> -1)
BEGIN
            IF OBJECT_ID('tempdb..#temp') IS NOT NULL DROP TABLE #temp  

            SELECT RANK() OVER (PARTITION BY rp2.groupNumber ORDER BY rp2.EffectiveDate) AS TheRank, rp2.groupNumber, rp2.EffectiveDate,
                    TotalvalueA = SUM(rp2.valueA), ChecksumTotal = SUM(ISNULL(rp2.valueA,0) + ISNULL(rp2.valueB,0)), --assumes valueA and valueB can never be reversed
                                    (
                                        Select CAST(rp.typeName as varchar(2)) + ',' AS [text()]
                                        From @temp rp
                                        Where rp.groupNumber = rp2.groupNumber AND rp.groupNumber = @groupNumber
                                            and rp.EffectiveDate = rp2.EffectiveDate
                                        GROUP BY rp.typeName
                                        ORDER BY MIN(rp.typeName)                    
                                        For XML PATH ('')
                                    ) typesXML,
                    DeleteSet = 0
            INTO #temp
            FROM @temp rp2
            WHERE rp2.groupNumber = @groupNumber
            GROUP BY rp2.groupNumber, rp2.EffectiveDate

            UPDATE t2
            SET DeleteSet = 1
            From #temp t1
            LEFT JOIN #temp t2 ON t1.TheRank = t2.TheRank - 1
            WHERE t1.TotalvalueA = t2.TotalvalueA AND t1.ChecksumTotal = t2.ChecksumTotal AND t1.typesXML = t2.typesXML
            AND t2.TheRank <> (SELECT MAX(TheRank) FROM #temp)

            BEGIN TRAN

            DELETE rp
            FROM @temp rp 
            JOIN #temp t ON t.groupNumber = rp.groupNumber AND rp.EffectiveDate = t.EffectiveDate AND t.DeleteSet = 1

            if @@error != 0 raiserror('Script failed', 20, -1) with log

            UPDATE rp
            SET ExpiredDate = t2.NewExpiredDate
            FROM @temp rp
            JOIN (SELECT * , NewExpiredDate = LEAD(EffectiveDate) OVER (ORDER BY TheRank) FROM #temp WHERE DeleteSet = 0) t2 ON t2.groupNumber = rp.groupNumber AND rp.EffectiveDate = t2.EffectiveDate
            JOIN #temp t ON t.TheRank = t2.TheRank + 1
            WHERE rp.groupNumber = @groupNumber AND t2.NewExpiredDate IS NOT NULL AND rp.ExpiredDate <> t2.NewExpiredDate
            AND t.DeleteSet = 1

            if @@error != 0 raiserror('Script failed', 20, -1) with log

            PRINT 'No Errors ... Committing changes for ' + CAST(@groupNumber as varchar(15))
            COMMIT
            --select * from @temp
            --ROLLBACK
            --dbcc opentran
    WAITFOR DELAY '00:00:00:005';
    FETCH NEXT FROM MY_CURSOR INTO @groupNumber
END
CLOSE MY_CURSOR
DEALLOCATE MY_CURSOR
GO