SQL 查找不完整的记录集
SQL Find An Incomplete Set of Records
"Not possible" 是一个有效的答案...
给定 table 个字段(非真实字段名称):
DateTime, Message, User
给定查询(到return所有集合):
SELECT * FROM tbl WHERE User = 'mfamoso' ORDER BY [DateTime] DESC
一组中的给定数据(3 个预期记录):
+-------------------------+------------------------------------------------------------------------------------------+---------+
| DateTime | Message | User |
+-------------------------+------------------------------------------------------------------------------------------+---------+
| 2019-03-11 12:55:00.097 | {"V":"1.0.0","In":"Program.Main()","E":"","D":"Exporting...."} | mfamoso |
| 2019-03-11 12:55:00.270 | {"V":"1.0.0","In":"Program.Main()","E":"","D":"PreExportImprintHeader ran successfully"} | mfamoso |
| 2019-03-11 12:55:08.543 | {"V":"1.0.0","In":"Program.Main()","E":"","D":"CleanUp ran successfully"} | mfamoso |
+-------------------------+------------------------------------------------------------------------------------------+---------+
问题:如果部分集合缺失(记录"CleanUp ran successfully"缺失),有没有办法得到如下查询结果,例如:
如果主查询 运行s 成功,我可以有多个集合,因为主查询 运行s 每 10 分钟作为一次服务。所有集合应该有 3 条记录。我需要一个查询,如果没有 运行.
,将 return 缺少所有 3 条记录的集合
+-------------------------+------------------------------------------------------------------------------------------+---------+
| DateTime | Message | User |
+-------------------------+------------------------------------------------------------------------------------------+---------+
| 2019-03-11 12:55:00.097 | {"V":"1.0.0","In":"Program.Main()","E":"","D":"Exporting...."} | mfamoso |
| 2019-03-11 12:55:00.270 | {"V":"1.0.0","In":"Program.Main()","E":"","D":"PreExportImprintHeader ran successfully"} | mfamoso |
+-------------------------+------------------------------------------------------------------------------------------+---------+
我猜查询会使用计数来确保有 3 条记录,如果没有 3 条则显示它们 - 但是如何确定它们是集合的一部分?创建记录的过程大约需要 10 秒到 运行,并且每 10 分钟 运行。这可能需要存储过程。
DateTime
变化,Message
不变。
每组只有10毫秒?
然后对于一组用户,应该有 3 个具有相同的舍入时间到最近的分钟。
这个查询使用了这个想法。
SELECT q.[User], q.[DateTime]
, q.Message
FROM
(
SELECT t.[User], t.[DateTime], t.Message,
COUNT(*) OVER (PARTITION BY t.[User], DATEADD(minute, DATEDIFF(minute, 0, DATEADD(s, 30, t.[DateTime])), 0)) AS Cnt
FROM tbl t
WHERE t.[User] = 'mfamoso'
) q
WHERE Cnt != 3
ORDER BY [DateTime] DESC;
只是这种查询没有检查1组的3条消息是否不同。
我会尝试下面的方法。它需要使用临时 table 来添加字段(标识字段、PositionInSet 和 SetNum)。基本上,charindex 用于识别三种潜在记录类型(导出、印记、清理)中的每一种,并使用 rownumber() 根据它们在每个集合中的预期位置(第一、第二或第三)将它们分组在一起。最后,返回所有记录数 <> 3 的集合。
create table #t (
UniqueID int identity(1,1),
DateTime datetime,
Message varchar(500),
PositionInSet int,
SetNum int
)
insert into #t (DateTime, Message)
select '2019-03-11 12:55:00.097', '{"V":"1.0.0","In":"Program.Main()","E":"","D":"Exporting...."}'
union all
select '2019-03-11 12:55:00.270', '{"V":"1.0.0","In":"Program.Main()","E":"","D":"PreExportImprintHeader ran successfully"}'
union all
select '2019-03-11 12:55:08.543', '{"V":"1.0.0","In":"Program.Main()","E":"","D":"CleanUp ran successfully"}'
union all
select '2019-03-11 12:55:00.097', '{"V":"1.0.0","In":"Program.Main()","E":"","D":"Exporting...."}'
union all
select '2019-03-11 12:55:00.270', '{"V":"1.0.0","In":"Program.Main()","E":"","D":"PreExportImprintHeader ran successfully"}'
union all
select '2019-03-11 12:55:00.097', '{"V":"1.0.0","In":"Program.Main()","E":"","D":"Exporting...."}'
union all
select '2019-03-11 12:55:00.270', '{"V":"1.0.0","In":"Program.Main()","E":"","D":"PreExportImprintHeader ran successfully"}'
union all
select '2019-03-11 12:55:08.543', '{"V":"1.0.0","In":"Program.Main()","E":"","D":"CleanUp ran successfully"}'
/***********************************/
update #t
set PositionInSet = case
when CHARINDEX('exporting', Message) <> 0 then 1
when CHARINDEX('imprint', Message) <> 0 then 2
when CHARINDEX('cleanup', Message) <> 0 then 3
end
update t
set t.SetNum = t2.SetNum
from (
select
DateTime,
Message,
PositionInSet,
row_number() over (partition by PositionInSet order by UniqueID) as SetNum,
UniqueID
from #t
) t2
inner join #t t on t2.UniqueID = t.UniqueID
select * from #t
inner join (
select
COUNT(*) as Count,
SetNum
from #t
group by SetNum
having COUNT(*) <> 3
) grp on #t.SetNum = grp.SetNum
/***********************************/
基本上,这种情况的挑战在于尝试对您的集合进行分组。由于您没有该集合的标识符,我们将不得不依赖 10 分钟内 DateTime
的接近度(或更短时间,因为该过程需要不到一分钟或大约 10 秒)。如果您在一分钟内收到消息,我们可以将其分组。当然,总有这样一种情况,您可以在一分钟内停止并启动该过程,这会弄乱您的查询。
现在考虑到该分组,然后您可以查询那些不在一组 3 条消息中的消息。
编辑
对不起。我忘了这是 SQL Server 2008。您必须在此处使用自连接。
;with
t0(dt,m,u,dtt) as (
-- no lead/lag in SQL Server 2008; use self join
select a.[DateTime], a.[Message], a.[User], c.[DateTime]
from t a
outer apply (select top 1 [DateTime] from t b where b.[DateTime]<a.[DateTime] order by b.[DateTime] desc) c
),
t1(dt,m,u,nxt) as (
-- get the indicator where the difference in time is over 60 seconds
select dt, m, u, case when isnull(datediff(ss,dtt,dt),0)<60 then 0 else 1 end
from t0
),
t2(dt,m,u,groupid) as (
-- make the indicator into group id
select dt,m, u, sum(nxt) over (partition by 1 order by dt)
from t1
),
t3(groupid) as (
-- filter out sets with less than 3 messages
select groupid
from t2
group by groupid
having count(*)<3
)
select t2.*
from t2
join t3 on t2.groupid=t3.groupid
原创
对于 SQL Server 2012 及更高版本:
;with t1(dt,m,u,nxt) as (
-- get the indicator where the difference in time is over 60 seconds
select [DateTime], [Message], [User], case when isnull(datediff(ss,lag([DateTime]) over (order by [DateTime]),[DateTime]),0)<60 then 0 else 1 end
from tbl
),
t2(dt,m,u,groupid) as (
-- make the indicator into group id
select dt,m, u, sum(nxt) over (partition by 1 order by dt)
from t1
),
t3(groupid) as (
-- filter out sets with less than 3 messages
select a.groupid
from t2 a
group by a.groupid
having count(*)<3
)
select t2.*
from t2
join t3 on t2.groupid=t3.groupid
"Not possible" 是一个有效的答案...
给定 table 个字段(非真实字段名称):
DateTime, Message, User
给定查询(到return所有集合):
SELECT * FROM tbl WHERE User = 'mfamoso' ORDER BY [DateTime] DESC
一组中的给定数据(3 个预期记录):
+-------------------------+------------------------------------------------------------------------------------------+---------+
| DateTime | Message | User |
+-------------------------+------------------------------------------------------------------------------------------+---------+
| 2019-03-11 12:55:00.097 | {"V":"1.0.0","In":"Program.Main()","E":"","D":"Exporting...."} | mfamoso |
| 2019-03-11 12:55:00.270 | {"V":"1.0.0","In":"Program.Main()","E":"","D":"PreExportImprintHeader ran successfully"} | mfamoso |
| 2019-03-11 12:55:08.543 | {"V":"1.0.0","In":"Program.Main()","E":"","D":"CleanUp ran successfully"} | mfamoso |
+-------------------------+------------------------------------------------------------------------------------------+---------+
问题:如果部分集合缺失(记录"CleanUp ran successfully"缺失),有没有办法得到如下查询结果,例如:
如果主查询 运行s 成功,我可以有多个集合,因为主查询 运行s 每 10 分钟作为一次服务。所有集合应该有 3 条记录。我需要一个查询,如果没有 运行.
,将 return 缺少所有 3 条记录的集合+-------------------------+------------------------------------------------------------------------------------------+---------+
| DateTime | Message | User |
+-------------------------+------------------------------------------------------------------------------------------+---------+
| 2019-03-11 12:55:00.097 | {"V":"1.0.0","In":"Program.Main()","E":"","D":"Exporting...."} | mfamoso |
| 2019-03-11 12:55:00.270 | {"V":"1.0.0","In":"Program.Main()","E":"","D":"PreExportImprintHeader ran successfully"} | mfamoso |
+-------------------------+------------------------------------------------------------------------------------------+---------+
我猜查询会使用计数来确保有 3 条记录,如果没有 3 条则显示它们 - 但是如何确定它们是集合的一部分?创建记录的过程大约需要 10 秒到 运行,并且每 10 分钟 运行。这可能需要存储过程。
DateTime
变化,Message
不变。
每组只有10毫秒?
然后对于一组用户,应该有 3 个具有相同的舍入时间到最近的分钟。
这个查询使用了这个想法。
SELECT q.[User], q.[DateTime]
, q.Message
FROM
(
SELECT t.[User], t.[DateTime], t.Message,
COUNT(*) OVER (PARTITION BY t.[User], DATEADD(minute, DATEDIFF(minute, 0, DATEADD(s, 30, t.[DateTime])), 0)) AS Cnt
FROM tbl t
WHERE t.[User] = 'mfamoso'
) q
WHERE Cnt != 3
ORDER BY [DateTime] DESC;
只是这种查询没有检查1组的3条消息是否不同。
我会尝试下面的方法。它需要使用临时 table 来添加字段(标识字段、PositionInSet 和 SetNum)。基本上,charindex 用于识别三种潜在记录类型(导出、印记、清理)中的每一种,并使用 rownumber() 根据它们在每个集合中的预期位置(第一、第二或第三)将它们分组在一起。最后,返回所有记录数 <> 3 的集合。
create table #t (
UniqueID int identity(1,1),
DateTime datetime,
Message varchar(500),
PositionInSet int,
SetNum int
)
insert into #t (DateTime, Message)
select '2019-03-11 12:55:00.097', '{"V":"1.0.0","In":"Program.Main()","E":"","D":"Exporting...."}'
union all
select '2019-03-11 12:55:00.270', '{"V":"1.0.0","In":"Program.Main()","E":"","D":"PreExportImprintHeader ran successfully"}'
union all
select '2019-03-11 12:55:08.543', '{"V":"1.0.0","In":"Program.Main()","E":"","D":"CleanUp ran successfully"}'
union all
select '2019-03-11 12:55:00.097', '{"V":"1.0.0","In":"Program.Main()","E":"","D":"Exporting...."}'
union all
select '2019-03-11 12:55:00.270', '{"V":"1.0.0","In":"Program.Main()","E":"","D":"PreExportImprintHeader ran successfully"}'
union all
select '2019-03-11 12:55:00.097', '{"V":"1.0.0","In":"Program.Main()","E":"","D":"Exporting...."}'
union all
select '2019-03-11 12:55:00.270', '{"V":"1.0.0","In":"Program.Main()","E":"","D":"PreExportImprintHeader ran successfully"}'
union all
select '2019-03-11 12:55:08.543', '{"V":"1.0.0","In":"Program.Main()","E":"","D":"CleanUp ran successfully"}'
/***********************************/
update #t
set PositionInSet = case
when CHARINDEX('exporting', Message) <> 0 then 1
when CHARINDEX('imprint', Message) <> 0 then 2
when CHARINDEX('cleanup', Message) <> 0 then 3
end
update t
set t.SetNum = t2.SetNum
from (
select
DateTime,
Message,
PositionInSet,
row_number() over (partition by PositionInSet order by UniqueID) as SetNum,
UniqueID
from #t
) t2
inner join #t t on t2.UniqueID = t.UniqueID
select * from #t
inner join (
select
COUNT(*) as Count,
SetNum
from #t
group by SetNum
having COUNT(*) <> 3
) grp on #t.SetNum = grp.SetNum
/***********************************/
基本上,这种情况的挑战在于尝试对您的集合进行分组。由于您没有该集合的标识符,我们将不得不依赖 10 分钟内 DateTime
的接近度(或更短时间,因为该过程需要不到一分钟或大约 10 秒)。如果您在一分钟内收到消息,我们可以将其分组。当然,总有这样一种情况,您可以在一分钟内停止并启动该过程,这会弄乱您的查询。
现在考虑到该分组,然后您可以查询那些不在一组 3 条消息中的消息。
编辑
对不起。我忘了这是 SQL Server 2008。您必须在此处使用自连接。
;with
t0(dt,m,u,dtt) as (
-- no lead/lag in SQL Server 2008; use self join
select a.[DateTime], a.[Message], a.[User], c.[DateTime]
from t a
outer apply (select top 1 [DateTime] from t b where b.[DateTime]<a.[DateTime] order by b.[DateTime] desc) c
),
t1(dt,m,u,nxt) as (
-- get the indicator where the difference in time is over 60 seconds
select dt, m, u, case when isnull(datediff(ss,dtt,dt),0)<60 then 0 else 1 end
from t0
),
t2(dt,m,u,groupid) as (
-- make the indicator into group id
select dt,m, u, sum(nxt) over (partition by 1 order by dt)
from t1
),
t3(groupid) as (
-- filter out sets with less than 3 messages
select groupid
from t2
group by groupid
having count(*)<3
)
select t2.*
from t2
join t3 on t2.groupid=t3.groupid
原创
对于 SQL Server 2012 及更高版本:
;with t1(dt,m,u,nxt) as (
-- get the indicator where the difference in time is over 60 seconds
select [DateTime], [Message], [User], case when isnull(datediff(ss,lag([DateTime]) over (order by [DateTime]),[DateTime]),0)<60 then 0 else 1 end
from tbl
),
t2(dt,m,u,groupid) as (
-- make the indicator into group id
select dt,m, u, sum(nxt) over (partition by 1 order by dt)
from t1
),
t3(groupid) as (
-- filter out sets with less than 3 messages
select a.groupid
from t2 a
group by a.groupid
having count(*)<3
)
select t2.*
from t2
join t3 on t2.groupid=t3.groupid