查找最近 1 小时内的重复数据
Find duplicate data in last 1 hour
我正在寻找一个 SQL 脚本来查找在过去 1 小时内有超过 2 个条目的数据。
我有一个 table 有 user_id & event_time。我想要一种方法来确定 user_id 在过去 1 小时内是否有超过 1 个条目。
到目前为止,我已经尝试了以下方法:
- 创建临时文件 table 以放置所有重复条目:
SELECT a.*
INTO #temp
FROM Table a
JOIN (
SELECT USERID, COUNT(*) AS Duplicates
FROM Table
GROUP BY userid
HAVING count(*) > 1
) AS b ON a.userid = b.USERID
- 运行 self 加入获取时差小于或等于1小时的记录:
SELECT a.*
FROM #temp a
INNER JOIN #temp b ON a.userid = b.USERID
WHERE DATEDIFF(hour, a.EVENTTIME, b.EVENTTIME) = 1
一旦第一个脚本是 运行,它就会提供大约 800 多行重复数据。但是在 运行 第二个脚本之后,我得到的数据以千计。
有人可以帮忙吗?
按照日期差异小时的顺序为每组 user_id
给出行号。记得过滤最近 1 小时内有 event_date
的行。
查询
;with cte as(
select [rn] = row_number() over(
partition by [user_id]
order by [user_id], datediff(hour, [event_time], getdate())
), *
from [your_table_name]
where datediff(hour, [event_time], getdate()) < 2
)
select * from [your_table_name] as [t1]
where exists(
select 1 from cte as [t2]
where [t1].[user_id]= [t2].[user_id]
and [t2].[rn] > 1
);
cross apply
可用于根据您的条件获取每个事件的所有相关事件,如下所示:
With CTE As (
Select USERID, EVENTTIME, Row_Number() Over (Order by USERID, EVENTTIME) As ID
From Tbl
)
Select a.ID, a.USERID, a.EVENTTIME, T.ID, T.USERID, T.EVENTTIME
From CTE As a Cross Apply (Select ID, USERID, EVENTTIME
From CTE
Where Abs(datediff(minute, a.EVENTTIME, EVENTTIME))<=60
And USERID=a.USERID And ID<>a.ID) As T
Order by a.ID, a.USERID, a.EVENTTIME, T.ID, T.USERID, T.EVENTTIME
或者您可以在不绑定特定事件的情况下获取事件列表:
With CTE As (
Select USERID, EVENTTIME, Row_Number() Over (Order by USERID, EVENTTIME) As ID
From Tbl
)
Select T.USERID, T.EVENTTIME
From CTE As a Cross Apply (Select USERID, EVENTTIME
From CTE
Where Abs(datediff(minute, a.EVENTTIME, EVENTTIME))<=60
And USERID=a.USERID And ID<>a.ID) As T
Group by T.USERID, T.EVENTTIME
要仅获取最后一小时的事件,您可以将适当的过滤器添加到 CTE
中的 Where
子句。
With CTE As (
Select USERID, EVENTTIME, Row_Number() Over (Order by USERID, EVENTTIME) As ID
From Tbl
Where EVENTTIME Between dateadd(minute, -60, GetDate()) And GetDate()
)
Select T.USERID, T.EVENTTIME
From CTE As a Cross Apply (Select USERID, EVENTTIME
From CTE
Where Abs(datediff(minute, a.EVENTTIME, EVENTTIME))<=60
And USERID=a.USERID
And ID<>a.ID) As T
Group by T.USERID, T.EVENTTIME
我正在寻找一个 SQL 脚本来查找在过去 1 小时内有超过 2 个条目的数据。 我有一个 table 有 user_id & event_time。我想要一种方法来确定 user_id 在过去 1 小时内是否有超过 1 个条目。
到目前为止,我已经尝试了以下方法:
- 创建临时文件 table 以放置所有重复条目:
SELECT a.*
INTO #temp
FROM Table a
JOIN (
SELECT USERID, COUNT(*) AS Duplicates
FROM Table
GROUP BY userid
HAVING count(*) > 1
) AS b ON a.userid = b.USERID
- 运行 self 加入获取时差小于或等于1小时的记录:
SELECT a.*
FROM #temp a
INNER JOIN #temp b ON a.userid = b.USERID
WHERE DATEDIFF(hour, a.EVENTTIME, b.EVENTTIME) = 1
一旦第一个脚本是 运行,它就会提供大约 800 多行重复数据。但是在 运行 第二个脚本之后,我得到的数据以千计。 有人可以帮忙吗?
按照日期差异小时的顺序为每组 user_id
给出行号。记得过滤最近 1 小时内有 event_date
的行。
查询
;with cte as(
select [rn] = row_number() over(
partition by [user_id]
order by [user_id], datediff(hour, [event_time], getdate())
), *
from [your_table_name]
where datediff(hour, [event_time], getdate()) < 2
)
select * from [your_table_name] as [t1]
where exists(
select 1 from cte as [t2]
where [t1].[user_id]= [t2].[user_id]
and [t2].[rn] > 1
);
cross apply
可用于根据您的条件获取每个事件的所有相关事件,如下所示:
With CTE As (
Select USERID, EVENTTIME, Row_Number() Over (Order by USERID, EVENTTIME) As ID
From Tbl
)
Select a.ID, a.USERID, a.EVENTTIME, T.ID, T.USERID, T.EVENTTIME
From CTE As a Cross Apply (Select ID, USERID, EVENTTIME
From CTE
Where Abs(datediff(minute, a.EVENTTIME, EVENTTIME))<=60
And USERID=a.USERID And ID<>a.ID) As T
Order by a.ID, a.USERID, a.EVENTTIME, T.ID, T.USERID, T.EVENTTIME
或者您可以在不绑定特定事件的情况下获取事件列表:
With CTE As (
Select USERID, EVENTTIME, Row_Number() Over (Order by USERID, EVENTTIME) As ID
From Tbl
)
Select T.USERID, T.EVENTTIME
From CTE As a Cross Apply (Select USERID, EVENTTIME
From CTE
Where Abs(datediff(minute, a.EVENTTIME, EVENTTIME))<=60
And USERID=a.USERID And ID<>a.ID) As T
Group by T.USERID, T.EVENTTIME
要仅获取最后一小时的事件,您可以将适当的过滤器添加到 CTE
中的 Where
子句。
With CTE As (
Select USERID, EVENTTIME, Row_Number() Over (Order by USERID, EVENTTIME) As ID
From Tbl
Where EVENTTIME Between dateadd(minute, -60, GetDate()) And GetDate()
)
Select T.USERID, T.EVENTTIME
From CTE As a Cross Apply (Select USERID, EVENTTIME
From CTE
Where Abs(datediff(minute, a.EVENTTIME, EVENTTIME))<=60
And USERID=a.USERID
And ID<>a.ID) As T
Group by T.USERID, T.EVENTTIME