如何删除复杂 SQL 查询中的某些重复项
How do I remove certain duplicates in a complex SQL query
我正在编写一个查询,需要它来删除 a.GenUserID 的所有重复项,同时保留最近的登录日期(即 b.LogDateTime),但该日期必须早于 6 个月。如果有更晚的日期,则必须将其删除。
我希望这是有道理的。
SELECT DISTINCT
a.GenUserID,
c.DeletionDate,
b.LogDateTime,
(CASE c.Disabled WHEN 0 THEN 'NO' else 'YES - ARCHIVED' end)
FROM RioReport.dbo.GenUser a
LEFT JOIN dbo.GenUserArchive c on a.GenUserID = c.GenUserID
LEFT JOIN dbo.GenUserAccessHistory b on a.GenUserID = b.ExtraInfo
WHERE(a.Disabled=0 or c.Disabled=0)
AND c.DeletionDate IS NOT NULL
AND ((DateAdd(MM, -6, GetDate()) > b.LogDateTime or b.LogDateTime IS NULL))
ORDER BY a.GenUserID, b.LogDateTime desc
使用cte和window函数
;with ctr as (
select a.GenUserID, a.DeletionDate, a.LogDateTime
row_number()over(partition by a.GenUserID order by b.LogDateTime desc) rnk
from RioReport.dbo.GenUser a )
select a.GenUserID, a.DeletionDate, a.LogDateTime,
CASE WHEN DATEDIFF(mm,LogDateTime,getdate())<6 THEN 'NO' else 'YES - ARCHIVED' end)
from ctr a where a.rnk=1
您可以将 row_number()
信息添加到您的查询中,并将该查询包装到一个外部查询中,该查询仅从该结果中获取编号为 1 的记录:
select *
from (
select a.GenUserID,
c.DeletionDate,
b.LogDateTime,
case c.Disabled when 0 then 'NO' else 'YES - ARCHIVED' end as diabled,
row_number() over (partition by a.GenUserID
order by b.LogDateTime desc) as rn
from RioReport.dbo.GenUser a
inner join dbo.GenUserArchive c
on a.GenUserID = c.GenUserID
left join dbo.GenUserAccessHistory b
on a.GenUserID = b.ExtraInfo
where (a.Disabled=0 or c.Disabled=0)
and c.DeletionDate is not null
and (DateAdd(MM, -6, GetDate()) > b.LogDateTime or b.LogDateTime is null)
)
where rn = 1
order by a.GenUserID
请注意,您可以将第一个 left join
变成 inner join
而无需对结果集进行任何更改,因为您对其其中一个字段进行了非空检查。 inner join
是首选,可能会提高性能。
如果 GenUserAccessHistory.LogDateTime
始终为非空,则可以通过将 DateAdd(MM, -6, GetDate()) > b.LogDateTime
条件移动到适当的连接 on
子句来避免测试 or b.LogDateTime is null
。
生成的行号将按照 LogDateTime
值的降序排列,并为每个不同的用户从 1 重新开始。
没有 window 功能的替代方案
自 SQL Server 2008 起支持 row_number()
和其他 window 函数。在您写的评论中您不能使用它。如果是这种情况,这里有一个使用通用 table 表达式的替代方法(自 SQL Server 2005 起支持):
;with cte as (
select a.GenUserID,
c.DeletionDate,
b.LogDateTime,
case c.Disabled when 0 then 'NO' else 'YES - ARCHIVED' end as disabled,
from RioReport.dbo.GenUser a
inner join dbo.GenUserArchive c
on a.GenUserID = c.GenUserID
left join dbo.GenUserAccessHistory b
on a.GenUserID = b.ExtraInfo
where (a.Disabled=0 or c.Disabled=0)
and c.DeletionDate is not null
and (DateAdd(MM, -6, GetDate()) > b.LogDateTime or b.LogDateTime is null)
)
select *
from cte main
where LogDateTime is null
or not exists (select 1
from cte sub
where sub.GenUserID = main.GenUserID
and sub.LogDateTime > main.LogDateTime)
order by GenUserID
尝试使用以下查询。
;WITH CTE_Group
AS(
SELECT
ROW_NUMBER() OVER (PARTITION BY a.GenUserID ORDER BY b.LogDateTime DESC) as RNO,
a.GenUserID,
c.DeletionDate,
b.LogDateTime,
(CASE c.Disabled WHEN 0 THEN 'NO' else 'YES - ARCHIVED' end) IsArchived
FROM RioReport.dbo.GenUser a
LEFT JOIN dbo.GenUserArchive c on a.GenUserID = c.GenUserID
LEFT JOIN dbo.GenUserAccessHistory b on a.GenUserID = b.ExtraInfo
WHERE(a.Disabled=0 or c.Disabled=0)
AND c.DeletionDate IS NOT NULL
AND ((DateAdd(MM, -6, GetDate()) > b.LogDateTime or b.LogDateTime IS NULL)))
SELECT GenUserID,
DeletionDate,
LogDateTime,
IsArchived
FROM WITH_CTE_Group
WHERE RNO=1
我正在编写一个查询,需要它来删除 a.GenUserID 的所有重复项,同时保留最近的登录日期(即 b.LogDateTime),但该日期必须早于 6 个月。如果有更晚的日期,则必须将其删除。 我希望这是有道理的。
SELECT DISTINCT
a.GenUserID,
c.DeletionDate,
b.LogDateTime,
(CASE c.Disabled WHEN 0 THEN 'NO' else 'YES - ARCHIVED' end)
FROM RioReport.dbo.GenUser a
LEFT JOIN dbo.GenUserArchive c on a.GenUserID = c.GenUserID
LEFT JOIN dbo.GenUserAccessHistory b on a.GenUserID = b.ExtraInfo
WHERE(a.Disabled=0 or c.Disabled=0)
AND c.DeletionDate IS NOT NULL
AND ((DateAdd(MM, -6, GetDate()) > b.LogDateTime or b.LogDateTime IS NULL))
ORDER BY a.GenUserID, b.LogDateTime desc
使用cte和window函数
;with ctr as (
select a.GenUserID, a.DeletionDate, a.LogDateTime
row_number()over(partition by a.GenUserID order by b.LogDateTime desc) rnk
from RioReport.dbo.GenUser a )
select a.GenUserID, a.DeletionDate, a.LogDateTime,
CASE WHEN DATEDIFF(mm,LogDateTime,getdate())<6 THEN 'NO' else 'YES - ARCHIVED' end)
from ctr a where a.rnk=1
您可以将 row_number()
信息添加到您的查询中,并将该查询包装到一个外部查询中,该查询仅从该结果中获取编号为 1 的记录:
select *
from (
select a.GenUserID,
c.DeletionDate,
b.LogDateTime,
case c.Disabled when 0 then 'NO' else 'YES - ARCHIVED' end as diabled,
row_number() over (partition by a.GenUserID
order by b.LogDateTime desc) as rn
from RioReport.dbo.GenUser a
inner join dbo.GenUserArchive c
on a.GenUserID = c.GenUserID
left join dbo.GenUserAccessHistory b
on a.GenUserID = b.ExtraInfo
where (a.Disabled=0 or c.Disabled=0)
and c.DeletionDate is not null
and (DateAdd(MM, -6, GetDate()) > b.LogDateTime or b.LogDateTime is null)
)
where rn = 1
order by a.GenUserID
请注意,您可以将第一个 left join
变成 inner join
而无需对结果集进行任何更改,因为您对其其中一个字段进行了非空检查。 inner join
是首选,可能会提高性能。
如果 GenUserAccessHistory.LogDateTime
始终为非空,则可以通过将 DateAdd(MM, -6, GetDate()) > b.LogDateTime
条件移动到适当的连接 on
子句来避免测试 or b.LogDateTime is null
。
生成的行号将按照 LogDateTime
值的降序排列,并为每个不同的用户从 1 重新开始。
没有 window 功能的替代方案
自 SQL Server 2008 起支持row_number()
和其他 window 函数。在您写的评论中您不能使用它。如果是这种情况,这里有一个使用通用 table 表达式的替代方法(自 SQL Server 2005 起支持):
;with cte as (
select a.GenUserID,
c.DeletionDate,
b.LogDateTime,
case c.Disabled when 0 then 'NO' else 'YES - ARCHIVED' end as disabled,
from RioReport.dbo.GenUser a
inner join dbo.GenUserArchive c
on a.GenUserID = c.GenUserID
left join dbo.GenUserAccessHistory b
on a.GenUserID = b.ExtraInfo
where (a.Disabled=0 or c.Disabled=0)
and c.DeletionDate is not null
and (DateAdd(MM, -6, GetDate()) > b.LogDateTime or b.LogDateTime is null)
)
select *
from cte main
where LogDateTime is null
or not exists (select 1
from cte sub
where sub.GenUserID = main.GenUserID
and sub.LogDateTime > main.LogDateTime)
order by GenUserID
尝试使用以下查询。
;WITH CTE_Group
AS(
SELECT
ROW_NUMBER() OVER (PARTITION BY a.GenUserID ORDER BY b.LogDateTime DESC) as RNO,
a.GenUserID,
c.DeletionDate,
b.LogDateTime,
(CASE c.Disabled WHEN 0 THEN 'NO' else 'YES - ARCHIVED' end) IsArchived
FROM RioReport.dbo.GenUser a
LEFT JOIN dbo.GenUserArchive c on a.GenUserID = c.GenUserID
LEFT JOIN dbo.GenUserAccessHistory b on a.GenUserID = b.ExtraInfo
WHERE(a.Disabled=0 or c.Disabled=0)
AND c.DeletionDate IS NOT NULL
AND ((DateAdd(MM, -6, GetDate()) > b.LogDateTime or b.LogDateTime IS NULL)))
SELECT GenUserID,
DeletionDate,
LogDateTime,
IsArchived
FROM WITH_CTE_Group
WHERE RNO=1