SQL Server 2008 R2:调整查询
SQL Server 2008 R2: Tuning query
我有以下 table 和 10 亿条记录。
create table PfTest
(
cola int,
colb int,
colc date,
cold varchar(10),
ID int
);
现在我想显示特定日期和非特定日期的记录。
为此我使用了以下两种类型的查询:
查询 1:
select DISTINCT cola, colb, colc, cold, ID
from PfTest
WHERE colc In ('2014-01-01')
AND cold NOT IN (SELECT cold
FROM PfTest
WHERE ID = 1
AND colc IN ('2014-01-02', '2014-01-03',
'2014-01-04', '2014-01-05', '2014-01-06'));
查询 2:
WITH cte AS
(
SELECT DISTINCT cola, colb, colc, cold, ID
FROM PfTest
WHERE cold NOT IN (SELECT cold FROM PfTest
WHERE ID = 1
AND colc IN('2014-01-02', '2014-01-03',
'2014-01-04', '2014-01-05', '2014-01-06'))
)
SELECT cola, colb, colc, cold, ID
FROM cte
WHERE colc IN ('2014-01-01');
以上两个查询计划的执行相同。两者都需要花费大量时间来执行。我可以针对这种情况编写一些更好的查询吗?
这是您的查询,没有 DISTINCT
(这似乎是不必要的):
select cola, colb, colc, cold, ID
from PfTest
WHERE colc In ('2014-01-01') AND
cold NOT IN (SELECT cold
from PfTest
WHERE ID = 1 AND
colc IN ('2014-01-02', '2014-01-03', '2014-01-04', '2014-01-05', '2014-01-06')
);
我将从索引开始。 PFTest(colc, cold)
和 PFTest(id, colc, cold)
.
如果子查询 returns 有大量数据——比如数百万行——那么在这种情况下,您最好使用临时 table。我会先尝试索引。如果这不起作用,一个临时 table 索引 cold
可能会起作用。此外,虽然它对性能影响不大,但我会使用 NOT EXISTS
而不是 NOT IN
:
来表达查询
select cola, colb, colc, cold, ID
from PfTest t
WHERE colc In ('2014-01-01') AND
NOT EXISTS (SELECT 1
from PfTest t2
WHERE t2.cold = t1.cold AND t2.ID = 1 AND
t2.colc IN ('2014-01-02', '2014-01-03', '2014-01-04', '2014-01-05', '2014-01-06')
);
对于这个版本,最好的索引是PfTest(cold, id, colc)
。
当匹配列具有 NULL
值时,NOT EXISTS
具有更直观的行为。
首先
select DISTINCT cola, colb, colc, cold, ID
from PfTest
WHERE colc In ('2014-01-01')
AND cold NOT IN (SELECT cold
FROM PfTest
WHERE ID = 1
AND colc IN ('2014-01-02', '2014-01-03',
'2014-01-04', '2014-01-05', '2014-01-06'));
与
相同
select DISTINCT cola, colb, colc, cold, ID
from PfTest
WHERE colc In ('2014-01-01')
AND colc NOT IN ('2014-01-02', '2014-01-03',
'2014-01-04', '2014-01-05', '2014-01-06')
AND NOT(ID =1);
因为内外table是一样的。
由于您不想一次又一次地重复使用 table(因为它包含十亿行),因此将数据拉到临时 table 是更好的方法实践。然后在其上创建 suitable 索引。
select cola, colb, colc, cold, ID
INTO #PfTest
FROM PfTest
CREATE NONCLUSTERED INDEX IX_PFTEST1 ON #PfTest(id) INCLUDE (cola, colb, colc, cold)
CREATE NONCLUSTERED INDEX IX_PFTEST2 ON #PfTest(colc) INCLUDE (cola, colb, id, cold)
CREATE NONCLUSTERED INDEX IX_PFTEST3 ON #PfTest(cold) INCLUDE (cola, colb, id, colc)
select cola, colb, colc, cold, ID
from #PfTest
WHERE colc In ('2014-01-01')
INTERSECT
select cola, colb, colc, cold, id
from
(select cola, colb, colc, cold, ID
from #PfTest
EXCEPT
SELECT cola, colb, colc, cold, 1 id FROM #PfTest
where
colc IN('2014-01-02', '2014-01-03',
'2014-01-04', '2014-01-05', '2014-01-06'))A
使用 EXCEPT
代替 NOT IN
以稍微提高性能。
我有以下 table 和 10 亿条记录。
create table PfTest
(
cola int,
colb int,
colc date,
cold varchar(10),
ID int
);
现在我想显示特定日期和非特定日期的记录。
为此我使用了以下两种类型的查询:
查询 1:
select DISTINCT cola, colb, colc, cold, ID
from PfTest
WHERE colc In ('2014-01-01')
AND cold NOT IN (SELECT cold
FROM PfTest
WHERE ID = 1
AND colc IN ('2014-01-02', '2014-01-03',
'2014-01-04', '2014-01-05', '2014-01-06'));
查询 2:
WITH cte AS
(
SELECT DISTINCT cola, colb, colc, cold, ID
FROM PfTest
WHERE cold NOT IN (SELECT cold FROM PfTest
WHERE ID = 1
AND colc IN('2014-01-02', '2014-01-03',
'2014-01-04', '2014-01-05', '2014-01-06'))
)
SELECT cola, colb, colc, cold, ID
FROM cte
WHERE colc IN ('2014-01-01');
以上两个查询计划的执行相同。两者都需要花费大量时间来执行。我可以针对这种情况编写一些更好的查询吗?
这是您的查询,没有 DISTINCT
(这似乎是不必要的):
select cola, colb, colc, cold, ID
from PfTest
WHERE colc In ('2014-01-01') AND
cold NOT IN (SELECT cold
from PfTest
WHERE ID = 1 AND
colc IN ('2014-01-02', '2014-01-03', '2014-01-04', '2014-01-05', '2014-01-06')
);
我将从索引开始。 PFTest(colc, cold)
和 PFTest(id, colc, cold)
.
如果子查询 returns 有大量数据——比如数百万行——那么在这种情况下,您最好使用临时 table。我会先尝试索引。如果这不起作用,一个临时 table 索引 cold
可能会起作用。此外,虽然它对性能影响不大,但我会使用 NOT EXISTS
而不是 NOT IN
:
select cola, colb, colc, cold, ID
from PfTest t
WHERE colc In ('2014-01-01') AND
NOT EXISTS (SELECT 1
from PfTest t2
WHERE t2.cold = t1.cold AND t2.ID = 1 AND
t2.colc IN ('2014-01-02', '2014-01-03', '2014-01-04', '2014-01-05', '2014-01-06')
);
对于这个版本,最好的索引是PfTest(cold, id, colc)
。
NULL
值时,NOT EXISTS
具有更直观的行为。
首先
select DISTINCT cola, colb, colc, cold, ID
from PfTest
WHERE colc In ('2014-01-01')
AND cold NOT IN (SELECT cold
FROM PfTest
WHERE ID = 1
AND colc IN ('2014-01-02', '2014-01-03',
'2014-01-04', '2014-01-05', '2014-01-06'));
与
相同select DISTINCT cola, colb, colc, cold, ID
from PfTest
WHERE colc In ('2014-01-01')
AND colc NOT IN ('2014-01-02', '2014-01-03',
'2014-01-04', '2014-01-05', '2014-01-06')
AND NOT(ID =1);
因为内外table是一样的。
由于您不想一次又一次地重复使用 table(因为它包含十亿行),因此将数据拉到临时 table 是更好的方法实践。然后在其上创建 suitable 索引。
select cola, colb, colc, cold, ID
INTO #PfTest
FROM PfTest
CREATE NONCLUSTERED INDEX IX_PFTEST1 ON #PfTest(id) INCLUDE (cola, colb, colc, cold)
CREATE NONCLUSTERED INDEX IX_PFTEST2 ON #PfTest(colc) INCLUDE (cola, colb, id, cold)
CREATE NONCLUSTERED INDEX IX_PFTEST3 ON #PfTest(cold) INCLUDE (cola, colb, id, colc)
select cola, colb, colc, cold, ID
from #PfTest
WHERE colc In ('2014-01-01')
INTERSECT
select cola, colb, colc, cold, id
from
(select cola, colb, colc, cold, ID
from #PfTest
EXCEPT
SELECT cola, colb, colc, cold, 1 id FROM #PfTest
where
colc IN('2014-01-02', '2014-01-03',
'2014-01-04', '2014-01-05', '2014-01-06'))A
使用 EXCEPT
代替 NOT IN
以稍微提高性能。