SQL Server 2008 R2:调整查询

SQL Server 2008 R2: Tuning query

我有以下 table 和 10 亿条记录。

create table PfTest
(
    cola int,
    colb int,
    colc date,
    cold varchar(10),
    ID int
);

现在我想显示特定日期和非特定日期的记录。

为此我使用了以下两种类型的查询:

查询 1:

select DISTINCT cola, colb, colc, cold, ID
from PfTest
WHERE colc In ('2014-01-01') 
  AND cold NOT IN (SELECT cold 
                   FROM PfTest 
                   WHERE ID = 1 
                     AND colc IN ('2014-01-02', '2014-01-03', 
                                  '2014-01-04', '2014-01-05', '2014-01-06'));

查询 2:

WITH cte AS
(
    SELECT DISTINCT cola, colb, colc, cold, ID
    FROM PfTest
    WHERE cold NOT IN (SELECT cold FROM PfTest 
                       WHERE ID = 1 
                         AND colc IN('2014-01-02', '2014-01-03',
                                     '2014-01-04', '2014-01-05', '2014-01-06'))
) 
SELECT cola, colb, colc, cold, ID
FROM cte 
WHERE colc IN ('2014-01-01');   

以上两个查询计划的执行相同。两者都需要花费大量时间来执行。我可以针对这种情况编写一些更好的查询吗?

这是您的查询,没有 DISTINCT(这似乎是不必要的):

select cola, colb, colc, cold, ID
from PfTest
WHERE colc In ('2014-01-01') AND 
      cold NOT IN (SELECT cold
                   from PfTest
                   WHERE ID = 1 AND
                         colc IN ('2014-01-02', '2014-01-03', '2014-01-04', '2014-01-05', '2014-01-06')
                  );

我将从索引开始。 PFTest(colc, cold)PFTest(id, colc, cold).

如果子查询 returns 有大量数据——比如数百万行——那么在这种情况下,您最好使用临时 table。我会先尝试索引。如果这不起作用,一个临时 table 索引 cold 可能会起作用。此外,虽然它对性能影响不大,但我会使用 NOT EXISTS 而不是 NOT IN:

来表达查询
select cola, colb, colc, cold, ID
from PfTest t
WHERE colc In ('2014-01-01') AND 
      NOT EXISTS (SELECT 1
                  from PfTest t2
                  WHERE t2.cold = t1.cold AND t2.ID = 1 AND
                        t2.colc IN ('2014-01-02', '2014-01-03', '2014-01-04', '2014-01-05', '2014-01-06')
                 );

对于这个版本,最好的索引是PfTest(cold, id, colc)

当匹配列具有 NULL 值时,

NOT EXISTS 具有更直观的行为。

首先

select DISTINCT cola, colb, colc, cold, ID
from PfTest
WHERE colc In ('2014-01-01') 
  AND cold NOT IN (SELECT cold 
                   FROM PfTest 
                   WHERE ID = 1 
                     AND colc IN ('2014-01-02', '2014-01-03', 
                                  '2014-01-04', '2014-01-05', '2014-01-06'));

相同
select DISTINCT cola, colb, colc, cold, ID
from PfTest
WHERE colc In ('2014-01-01') 
  AND colc NOT IN ('2014-01-02', '2014-01-03', 
                                  '2014-01-04', '2014-01-05', '2014-01-06')

AND NOT(ID =1);

因为内外table是一样的。

由于您不想一次又一次地重复使用 table(因为它包含十亿行),因此将数据拉到临时 table 是更好的方法实践。然后在其上创建 suitable 索引。

select cola, colb, colc, cold, ID
INTO #PfTest
FROM PfTest

CREATE NONCLUSTERED INDEX IX_PFTEST1 ON #PfTest(id) INCLUDE (cola, colb, colc, cold)
CREATE NONCLUSTERED INDEX IX_PFTEST2 ON #PfTest(colc) INCLUDE (cola, colb, id, cold)
CREATE NONCLUSTERED INDEX IX_PFTEST3 ON #PfTest(cold) INCLUDE (cola, colb, id, colc)

select cola, colb, colc, cold, ID
from #PfTest
WHERE colc In ('2014-01-01') 

INTERSECT

select cola, colb, colc, cold, id
from

(select cola, colb, colc, cold, ID
from #PfTest

EXCEPT


SELECT cola, colb, colc, cold, 1 id FROM #PfTest 
 where
 colc IN('2014-01-02', '2014-01-03',
                                 '2014-01-04', '2014-01-05', '2014-01-06'))A

使用 EXCEPT 代替 NOT IN 以稍微提高性能。