Sql 服务器 - 获取每个 ID 值更改次数的最快方法
Sql Server - Fastest way to get number of times value changed for each ID
我正在编写 sql 查询以根据以下要求显示不同类型的结果:
- 显示值从 1 变为 0 的记录,给定时间戳中的最大次数
- 显示值从 1 变为 0 的记录,给定时间戳中的最短次数
- 显示值从 1 变为 0 的前 10 条记录,给定时间戳中的最大次数
示例数据:
+----------+-------------+-------------+
| DeviceId | CaptureTime | SensorValue |
+----------+-------------+-------------+
| DC001 | 02/01/2017 | 0 |
| DC001 | 02/02/2017 | 1 |
| DC001 | 02/03/2017 | 0 |
| DC001 | 02/04/2017 | 1 |
| DC001 | 02/05/2017 | 0 |
| DC001 | 02/07/2017 | 1 |
| DC001 | 02/08/2017 | 0 |
| DC001 | 02/10/2017 | 1 |
| DC001 | 02/01/2017 | 0 |
| DC001 | 02/01/2017 | 0 |
| DC002 | 02/02/2017 | 1 |
| DC002 | 02/02/2017 | 0 |
| DC002 | 02/02/2017 | 1 |
| DC002 | 02/02/2017 | 1 |
| DC002 | 02/02/2017 | 1 |
| DC002 | 02/03/2017 | 1 |
| DC002 | 02/03/2017 | 0 |
| DC002 | 02/03/2017 | 0 |
| DC002 | 02/03/2017 | 1 |
| DC002 | 02/03/2017 | 1 |
| DC003 | 02/03/2017 | 1 |
| DC003 | 02/03/2017 | 1 |
| DC003 | 02/03/2017 | 0 |
| DC003 | 02/03/2017 | 1 |
| DC003 | 02/03/2017 | 1 |
| DC003 | 02/04/2017 | 1 |
| DC003 | 02/05/2017 | 1 |
| DC003 | 02/06/2017 | 1 |
| DC003 | 02/07/2017 | 1 |
| DC003 | 02/08/2017 | 1 |
| DC004 | 02/09/2017 | 0 |
| DC004 | 02/10/2017 | 0 |
| DC004 | 02/11/2017 | 1 |
| DC004 | 02/12/2017 | 0 |
| DC004 | 02/12/2017 | 1 |
| DC004 | 02/12/2017 | 1 |
| DC004 | 02/12/2017 | 1 |
| DC004 | 02/12/2017 | 1 |
| DC004 | 02/12/2017 | 1 |
| DC004 | 02/12/2017 | 1 |
| DC005 | 02/12/2017 | 0 |
| DC005 | 02/12/2017 | 0 |
| DC005 | 02/12/2017 | 0 |
| DC005 | 02/12/2017 | 0 |
| DC005 | 02/14/2017 | 0 |
| DC005 | 02/14/2017 | 0 |
| DC005 | 02/14/2017 | 0 |
| DC005 | 02/14/2017 | 0 |
| DC005 | 02/14/2017 | 0 |
| DC005 | 02/14/2017 | 0 |
+----------+-------------+-------------+
我已经为所有三个要求创建了以下通用查询:
</p>
<pre><code>DECLARE @HoursBack INT
, @MinMax VARCHAR(3)
, @TopRows INT
SELECT TOP (@TopRows) COUNT(TD1.DeviceId) PickedNoOfTimes, ItemName -- I have removed table to get ItemName to simplify this query
FROM tTrayDetails AS TD1
WHERE TD1.SensorValue = 0
AND TD1.CaptureTime > DATEADD(HOUR, -@HoursBack, GETDATE())
AND TD1.SensorValue <> (
SELECT TOP 1 SensorValue
FROM tTrayDetails TD2
WHERE TD2.CaptureTime < TD1.CaptureTime
ORDER BY TD2.CaptureTime DESC
)
GROUP BY TD1.DeviceId
ORDER BY CASE WHEN @MinMax = 'Max' THEN COUNT(TD1.DeviceId) END DESC
, CASE WHEN @MinMax = 'Min' THEN COUNT(TD1.DeviceId) END ASC
此查询适用于所有三个要求,只需设置不同的@HoursBack、@MinMax 和@TopRows 变量值即可。
这是我的三个要求 :
- @HoursBack = 24,@MinMax='Max',@TopRows=1
- @HoursBack = 24,@MinMax='Min',@TopRows=1
- @HoursBack = 24,@MinMax='Max',@TopRows=10
现在的问题是:这个查询大约需要 40 秒来执行,仅适用于测试环境中的 14K 条记录。
在生产环境中,每天会添加 2-4K 条记录,因此此查询执行时间会增加。
如何使用大量数据更快地将查询更改为 运行。
这将仅计算 SensorValue 从 1 变为 0 的那些行:
WITH cte AS
(
SELECT DeviceId,
-- previous row = 1 and current row = 0
CASE WHEN LAG(SensorValue)
Over (PARTITION BY DeviceId
ORDER BY CaptureTime) = 1
AND SensorValue = 0
THEN 1
ELSE 0
END AS ChangeFlag
FROM tTrayDetails AS t
WHERE ....
)
SELECT DeviceId, Count(*)
FROM cte
WHERE ChangeFlag = 1
GROUP BY DeviceId
现在应用你的TOP
/ORDER BY
...
这里是:
declare
@topRows int = 2,
@minMax nvarchar(3) = 'max',
@hoursBack int = 1000,
@now datetime = getdate();
;with _raw
as (
select
DeviceId,
case when SensorValue = 0 and lag(SensorValue) over (partition by DeviceId order by CaptureTime) = 1
then 1
else 0
end as Val
from tTrayDetails
where
CaptureTime > dateadd(hour, -@hoursBack, @now)
)
, _combined
as (
select
DeviceId,
sum(Val) as Val,
(case when @minMax = 'min' then 1 else -1 end) * sum(Val) as Ord
from _raw
group by
DeviceId
)
select top(@topRows)
DeviceId, Val
from _combined
order by
Ord, DeviceId
和测试脚本一样:
create table #tTrayDetails
(
DeviceId nvarchar(128),
CaptureTime datetime not null,
SensorValue int not null
)
insert into #tTrayDetails(DeviceId, CaptureTime, SensorValue) values
('DC001', '2017-01-01 01:00:00', 0),
('DC001', '2017-01-01 02:00:00', 1),
('DC001', '2017-01-02 01:00:00', 0),
('DC001', '2017-01-03 01:00:00', 1),
('DC001', '2017-01-04 01:00:00', 0),
('DC002', '2017-01-01 01:00:00', 0),
('DC002', '2017-01-01 02:00:00', 0),
('DC002', '2017-01-01 03:00:00', 1),
('DC002', '2017-01-01 04:00:00', 1),
('DC002', '2017-01-01 05:00:00', 1),
('DC002', '2017-01-01 06:00:00', 0),
('DC003', '2017-01-01 06:00:00', 0)
declare
@topRows int = 2,
@minMax nvarchar(3) = 'max',
@hoursBack int = 1000,
@now datetime = getdate();
;with _raw
as (
select
DeviceId,
case when SensorValue = 0 and lag(SensorValue) over (partition by DeviceId order by CaptureTime) = 1
then 1
else 0
end as Val
from #tTrayDetails
where
CaptureTime > dateadd(hour, -@hoursBack, @now)
)
, _combined
as (
select
DeviceId,
sum(Val) as Val,
(case when @minMax = 'min' then 1 else -1 end) * sum(Val) as Ord
from _raw
group by
DeviceId
)
select top(@topRows)
DeviceId, Val
from _combined
order by
Ord, DeviceId
drop table #tTrayDetails
我正在编写 sql 查询以根据以下要求显示不同类型的结果:
- 显示值从 1 变为 0 的记录,给定时间戳中的最大次数
- 显示值从 1 变为 0 的记录,给定时间戳中的最短次数
- 显示值从 1 变为 0 的前 10 条记录,给定时间戳中的最大次数
示例数据:
+----------+-------------+-------------+ | DeviceId | CaptureTime | SensorValue | +----------+-------------+-------------+ | DC001 | 02/01/2017 | 0 | | DC001 | 02/02/2017 | 1 | | DC001 | 02/03/2017 | 0 | | DC001 | 02/04/2017 | 1 | | DC001 | 02/05/2017 | 0 | | DC001 | 02/07/2017 | 1 | | DC001 | 02/08/2017 | 0 | | DC001 | 02/10/2017 | 1 | | DC001 | 02/01/2017 | 0 | | DC001 | 02/01/2017 | 0 | | DC002 | 02/02/2017 | 1 | | DC002 | 02/02/2017 | 0 | | DC002 | 02/02/2017 | 1 | | DC002 | 02/02/2017 | 1 | | DC002 | 02/02/2017 | 1 | | DC002 | 02/03/2017 | 1 | | DC002 | 02/03/2017 | 0 | | DC002 | 02/03/2017 | 0 | | DC002 | 02/03/2017 | 1 | | DC002 | 02/03/2017 | 1 | | DC003 | 02/03/2017 | 1 | | DC003 | 02/03/2017 | 1 | | DC003 | 02/03/2017 | 0 | | DC003 | 02/03/2017 | 1 | | DC003 | 02/03/2017 | 1 | | DC003 | 02/04/2017 | 1 | | DC003 | 02/05/2017 | 1 | | DC003 | 02/06/2017 | 1 | | DC003 | 02/07/2017 | 1 | | DC003 | 02/08/2017 | 1 | | DC004 | 02/09/2017 | 0 | | DC004 | 02/10/2017 | 0 | | DC004 | 02/11/2017 | 1 | | DC004 | 02/12/2017 | 0 | | DC004 | 02/12/2017 | 1 | | DC004 | 02/12/2017 | 1 | | DC004 | 02/12/2017 | 1 | | DC004 | 02/12/2017 | 1 | | DC004 | 02/12/2017 | 1 | | DC004 | 02/12/2017 | 1 | | DC005 | 02/12/2017 | 0 | | DC005 | 02/12/2017 | 0 | | DC005 | 02/12/2017 | 0 | | DC005 | 02/12/2017 | 0 | | DC005 | 02/14/2017 | 0 | | DC005 | 02/14/2017 | 0 | | DC005 | 02/14/2017 | 0 | | DC005 | 02/14/2017 | 0 | | DC005 | 02/14/2017 | 0 | | DC005 | 02/14/2017 | 0 | +----------+-------------+-------------+
我已经为所有三个要求创建了以下通用查询:
</p>
<pre><code>DECLARE @HoursBack INT
, @MinMax VARCHAR(3)
, @TopRows INT
SELECT TOP (@TopRows) COUNT(TD1.DeviceId) PickedNoOfTimes, ItemName -- I have removed table to get ItemName to simplify this query
FROM tTrayDetails AS TD1
WHERE TD1.SensorValue = 0
AND TD1.CaptureTime > DATEADD(HOUR, -@HoursBack, GETDATE())
AND TD1.SensorValue <> (
SELECT TOP 1 SensorValue
FROM tTrayDetails TD2
WHERE TD2.CaptureTime < TD1.CaptureTime
ORDER BY TD2.CaptureTime DESC
)
GROUP BY TD1.DeviceId
ORDER BY CASE WHEN @MinMax = 'Max' THEN COUNT(TD1.DeviceId) END DESC
, CASE WHEN @MinMax = 'Min' THEN COUNT(TD1.DeviceId) END ASC
此查询适用于所有三个要求,只需设置不同的@HoursBack、@MinMax 和@TopRows 变量值即可。
这是我的三个要求 :
- @HoursBack = 24,@MinMax='Max',@TopRows=1
- @HoursBack = 24,@MinMax='Min',@TopRows=1
- @HoursBack = 24,@MinMax='Max',@TopRows=10
现在的问题是:这个查询大约需要 40 秒来执行,仅适用于测试环境中的 14K 条记录。
在生产环境中,每天会添加 2-4K 条记录,因此此查询执行时间会增加。
如何使用大量数据更快地将查询更改为 运行。
这将仅计算 SensorValue 从 1 变为 0 的那些行:
WITH cte AS
(
SELECT DeviceId,
-- previous row = 1 and current row = 0
CASE WHEN LAG(SensorValue)
Over (PARTITION BY DeviceId
ORDER BY CaptureTime) = 1
AND SensorValue = 0
THEN 1
ELSE 0
END AS ChangeFlag
FROM tTrayDetails AS t
WHERE ....
)
SELECT DeviceId, Count(*)
FROM cte
WHERE ChangeFlag = 1
GROUP BY DeviceId
现在应用你的TOP
/ORDER BY
...
这里是:
declare
@topRows int = 2,
@minMax nvarchar(3) = 'max',
@hoursBack int = 1000,
@now datetime = getdate();
;with _raw
as (
select
DeviceId,
case when SensorValue = 0 and lag(SensorValue) over (partition by DeviceId order by CaptureTime) = 1
then 1
else 0
end as Val
from tTrayDetails
where
CaptureTime > dateadd(hour, -@hoursBack, @now)
)
, _combined
as (
select
DeviceId,
sum(Val) as Val,
(case when @minMax = 'min' then 1 else -1 end) * sum(Val) as Ord
from _raw
group by
DeviceId
)
select top(@topRows)
DeviceId, Val
from _combined
order by
Ord, DeviceId
和测试脚本一样:
create table #tTrayDetails
(
DeviceId nvarchar(128),
CaptureTime datetime not null,
SensorValue int not null
)
insert into #tTrayDetails(DeviceId, CaptureTime, SensorValue) values
('DC001', '2017-01-01 01:00:00', 0),
('DC001', '2017-01-01 02:00:00', 1),
('DC001', '2017-01-02 01:00:00', 0),
('DC001', '2017-01-03 01:00:00', 1),
('DC001', '2017-01-04 01:00:00', 0),
('DC002', '2017-01-01 01:00:00', 0),
('DC002', '2017-01-01 02:00:00', 0),
('DC002', '2017-01-01 03:00:00', 1),
('DC002', '2017-01-01 04:00:00', 1),
('DC002', '2017-01-01 05:00:00', 1),
('DC002', '2017-01-01 06:00:00', 0),
('DC003', '2017-01-01 06:00:00', 0)
declare
@topRows int = 2,
@minMax nvarchar(3) = 'max',
@hoursBack int = 1000,
@now datetime = getdate();
;with _raw
as (
select
DeviceId,
case when SensorValue = 0 and lag(SensorValue) over (partition by DeviceId order by CaptureTime) = 1
then 1
else 0
end as Val
from #tTrayDetails
where
CaptureTime > dateadd(hour, -@hoursBack, @now)
)
, _combined
as (
select
DeviceId,
sum(Val) as Val,
(case when @minMax = 'min' then 1 else -1 end) * sum(Val) as Ord
from _raw
group by
DeviceId
)
select top(@topRows)
DeviceId, Val
from _combined
order by
Ord, DeviceId
drop table #tTrayDetails