SQL 查找不完整的记录集

Question

"Not possible" 是一个有效的答案...

给定 table 个字段（非真实字段名称）：

DateTime, Message, User

给定查询（到return所有集合）：

SELECT * FROM tbl WHERE User = 'mfamoso' ORDER BY [DateTime] DESC

一组中的给定数据（3 个预期记录）：

+-------------------------+------------------------------------------------------------------------------------------+---------+
|        DateTime         |                                         Message                                          |  User   |
+-------------------------+------------------------------------------------------------------------------------------+---------+
| 2019-03-11 12:55:00.097 | {"V":"1.0.0","In":"Program.Main()","E":"","D":"Exporting...."}                           | mfamoso |
| 2019-03-11 12:55:00.270 | {"V":"1.0.0","In":"Program.Main()","E":"","D":"PreExportImprintHeader ran successfully"} | mfamoso |
| 2019-03-11 12:55:08.543 | {"V":"1.0.0","In":"Program.Main()","E":"","D":"CleanUp ran successfully"}                | mfamoso |
+-------------------------+------------------------------------------------------------------------------------------+---------+

问题：如果部分集合缺失（记录"CleanUp ran successfully"缺失），有没有办法得到如下查询结果，例如：

如果主查询运行s 成功，我可以有多个集合，因为主查询运行s 每 10 分钟作为一次服务。所有集合应该有 3 条记录。我需要一个查询，如果没有运行.

，将 return 缺少所有 3 条记录的集合

+-------------------------+------------------------------------------------------------------------------------------+---------+
|        DateTime         |                                         Message                                          |  User   |
+-------------------------+------------------------------------------------------------------------------------------+---------+
| 2019-03-11 12:55:00.097 | {"V":"1.0.0","In":"Program.Main()","E":"","D":"Exporting...."}                           | mfamoso |
| 2019-03-11 12:55:00.270 | {"V":"1.0.0","In":"Program.Main()","E":"","D":"PreExportImprintHeader ran successfully"} | mfamoso |
+-------------------------+------------------------------------------------------------------------------------------+---------+

我猜查询会使用计数来确保有 3 条记录，如果没有 3 条则显示它们 - 但是如何确定它们是集合的一部分？创建记录的过程大约需要 10 秒到运行，并且每 10 分钟运行。这可能需要存储过程。

DateTime变化，Message不变。

Answer 1

每组只有10毫秒？

然后对于一组用户，应该有 3 个具有相同的舍入时间到最近的分钟。

这个查询使用了这个想法。

SELECT q.[User], q.[DateTime]
 , q.Message
FROM
(
  SELECT t.[User], t.[DateTime], t.Message,
   COUNT(*) OVER (PARTITION BY t.[User], DATEADD(minute, DATEDIFF(minute, 0, DATEADD(s, 30, t.[DateTime])), 0)) AS Cnt
  FROM tbl t
  WHERE t.[User] = 'mfamoso'
) q
WHERE Cnt != 3
ORDER BY [DateTime] DESC;

只是这种查询没有检查1组的3条消息是否不同。

Answer 2

我会尝试下面的方法。它需要使用临时 table 来添加字段（标识字段、PositionInSet 和 SetNum）。基本上，charindex 用于识别三种潜在记录类型（导出、印记、清理）中的每一种，并使用 rownumber() 根据它们在每个集合中的预期位置（第一、第二或第三）将它们分组在一起。最后，返回所有记录数 <> 3 的集合。

create table #t (
    UniqueID int identity(1,1),
    DateTime datetime,
    Message varchar(500),
    PositionInSet int,
    SetNum int
)

insert into #t (DateTime, Message)
    select '2019-03-11 12:55:00.097', '{"V":"1.0.0","In":"Program.Main()","E":"","D":"Exporting...."}'
    union all
    select '2019-03-11 12:55:00.270', '{"V":"1.0.0","In":"Program.Main()","E":"","D":"PreExportImprintHeader ran successfully"}'
    union all
    select '2019-03-11 12:55:08.543', '{"V":"1.0.0","In":"Program.Main()","E":"","D":"CleanUp ran successfully"}'

    union all
    select '2019-03-11 12:55:00.097', '{"V":"1.0.0","In":"Program.Main()","E":"","D":"Exporting...."}'
    union all
    select '2019-03-11 12:55:00.270', '{"V":"1.0.0","In":"Program.Main()","E":"","D":"PreExportImprintHeader ran successfully"}'

    union all
    select '2019-03-11 12:55:00.097', '{"V":"1.0.0","In":"Program.Main()","E":"","D":"Exporting...."}'
    union all
    select '2019-03-11 12:55:00.270', '{"V":"1.0.0","In":"Program.Main()","E":"","D":"PreExportImprintHeader ran successfully"}'
    union all
    select '2019-03-11 12:55:08.543', '{"V":"1.0.0","In":"Program.Main()","E":"","D":"CleanUp ran successfully"}'


/***********************************/

update #t
set PositionInSet = case
    when CHARINDEX('exporting', Message) <> 0 then 1
    when CHARINDEX('imprint', Message) <> 0 then 2
    when CHARINDEX('cleanup', Message) <> 0 then 3
end


update t
set t.SetNum = t2.SetNum
from (
    select
        DateTime,
        Message,
        PositionInSet,
        row_number() over (partition by PositionInSet order by UniqueID) as SetNum,
        UniqueID
    from #t
) t2
inner join #t t on t2.UniqueID = t.UniqueID


select * from #t
inner join (
    select
        COUNT(*) as Count,
        SetNum
    from #t
    group by SetNum
    having COUNT(*) <> 3
) grp on #t.SetNum = grp.SetNum

/***********************************/

Answer 3

基本上，这种情况的挑战在于尝试对您的集合进行分组。由于您没有该集合的标识符，我们将不得不依赖 10 分钟内 DateTime 的接近度（或更短时间，因为该过程需要不到一分钟或大约 10 秒）。如果您在一分钟内收到消息，我们可以将其分组。当然，总有这样一种情况，您可以在一分钟内停止并启动该过程，这会弄乱您的查询。

现在考虑到该分组，然后您可以查询那些不在一组 3 条消息中的消息。

编辑

对不起。我忘了这是 SQL Server 2008。您必须在此处使用自连接。

;with 
t0(dt,m,u,dtt) as (
-- no lead/lag in SQL Server 2008; use self join
select a.[DateTime], a.[Message], a.[User], c.[DateTime]
from t a
outer apply (select top 1 [DateTime] from t b where b.[DateTime]<a.[DateTime] order by b.[DateTime] desc) c
), 
t1(dt,m,u,nxt) as (
-- get the indicator where the difference in time is over 60 seconds
  select dt, m, u, case when isnull(datediff(ss,dtt,dt),0)<60 then 0 else 1 end
    from t0
),
t2(dt,m,u,groupid) as (
-- make the indicator into group id
select dt,m, u, sum(nxt) over (partition by 1 order by dt) 
from t1
),
t3(groupid) as (
-- filter out sets with less than 3 messages
select groupid
from t2
group by groupid
having count(*)<3
)
select t2.*
from t2
join t3 on t2.groupid=t3.groupid

原创

对于 SQL Server 2012 及更高版本：

;with t1(dt,m,u,nxt) as (
-- get the indicator where the difference in time is over 60 seconds
  select [DateTime], [Message], [User], case when isnull(datediff(ss,lag([DateTime]) over (order by [DateTime]),[DateTime]),0)<60 then 0 else 1 end
    from tbl
),
t2(dt,m,u,groupid) as (
-- make the indicator into group id
select dt,m, u, sum(nxt) over (partition by 1 order by dt) 
from t1
),
t3(groupid) as (
-- filter out sets with less than 3 messages
select a.groupid
from t2 a
group by a.groupid
having count(*)<3
)
select t2.*
from t2
join t3 on t2.groupid=t3.groupid

SQL 查找不完整的记录集

SQL Find An Incomplete Set of Records

sql

sql-server

sql-server-2008