如何提高 sql 脚本性能

How to improve sql script performance

以下脚本在 运行 时非常慢。

我不知道如何提高脚本的性能。 即使有一个视图也需要花费很多分钟。 有什么想法请分享给我。

SELECT DISTINCT
        ( id )
FROM    ( SELECT DISTINCT
                    ct.id AS id
          FROM      [Customer].[dbo].[Contact] ct
                    LEFT JOIN [Customer].[dbo].[Customer_ids] hnci ON ct.id = hnci.contact_id
          WHERE     hnci.customer_id IN (
                    SELECT DISTINCT
                            ( [Customer_ID] )
                    FROM    [Transactions].[dbo].[Transaction_Header]
                    WHERE   actual_transaction_date > '20120218' )
          UNION
          SELECT DISTINCT
                    contact_id AS id
          FROM      [Customer].[dbo].[Restaurant_Attendance]
          WHERE     ( created > '2012-02-18 00:00:00.000'
                      OR modified > '2012-02-18 00:00:00.000'
                    )
                    AND ( [Fifth_Floor_London] = 1
                          OR [Fourth_Floor_Leeds] = 1
                          OR [Second_Floor_Bristol] = 1
                        )
          UNION
          SELECT DISTINCT
                    ( ct.id )
          FROM      [Customer].[dbo].[Contact] ct
                    INNER JOIN [Customer].[dbo].[Wifinity_Devices] wfd ON ct.wifinity_uniqueID = wfd.[CustomerUniqueID]
                                                              AND startconnection > '2012-02-17'
          UNION
          SELECT DISTINCT
                    comdt.id AS id
          FROM      [Customer].[dbo].[Complete_dataset] comdt
                    LEFT JOIN [Customer].[dbo].[Aggregate_Spend_Counts] agsc ON comdt.id = agsc.contact_id
          WHERE     agsc.contact_id IS NULL
                    AND ( opt_out_Mail <> 1
                          OR opt_out_email <> 1
                          OR opt_out_SMS <> 1
                          OR opt_out_Mail IS NULL
                          OR opt_out_email IS NULL
                          OR opt_out_SMS IS NULL
                        )
                    AND ( address_1 IS NOT NULL
                          OR email IS NOT NULL
                          OR mobile IS NOT NULL
                        )
          UNION
          SELECT DISTINCT
                    ( contact_id ) AS id
          FROM      [Customer].[dbo].[VIP_Card_Holders]
          WHERE     VIP_Card_number IS NOT NULL
        ) AS tbl

试试这个,temptable 应该能帮到你:

    IF OBJECT_ID('Tempdb..#Temp1') IS NOT NULL 
        DROP TABLE #Temp1

    --Low perfomance because of using "WHERE  hnci.customer_id IN ( .... ) " - loop join must be
    --and this "where" condition will apply to two tables after left join, 
    --so result will be same as with two inner joints but with bad perfomance

    --SELECT DISTINCT
    --        ct.id AS id
    --INTO    #temp1
    --FROM    [Customer].[dbo].[Contact] ct
    --        LEFT JOIN [Customer].[dbo].[Customer_ids] hnci ON ct.id = hnci.contact_id
    --WHERE   hnci.customer_id IN (
    --        SELECT DISTINCT
    --                ( [Customer_ID] )
    --        FROM    [Transactions].[dbo].[Transaction_Header]
    --        WHERE   actual_transaction_date > '20120218' )    
    --------------------------------------------------------------------------------
    --this will give the same result but with better perfomance then previouse one
    --------------------------------------------------------------------------------
    SELECT DISTINCT
            ct.id AS id
    INTO    #temp1
    FROM    [Customer].[dbo].[Contact] ct
            JOIN [Customer].[dbo].[Customer_ids] hnci ON ct.id = hnci.contact_id
            JOIN ( SELECT DISTINCT
                            ( [Customer_ID] )
                   FROM     [Transactions].[dbo].[Transaction_Header]
                   WHERE    actual_transaction_date > '20120218'
                 ) T ON hnci.customer_id = T.[Customer_ID]
    --------------------------------------------------------------------------------
    --------------------------------------------------------------------------------              
    INSERT  INTO #temp1
            ( id
            )
            SELECT DISTINCT
                    contact_id AS id
            FROM    [Customer].[dbo].[Restaurant_Attendance]
            WHERE   ( created > '2012-02-18 00:00:00.000'
                      OR modified > '2012-02-18 00:00:00.000'
                    )
                    AND ( [Fifth_Floor_London] = 1
                          OR [Fourth_Floor_Leeds] = 1
                          OR [Second_Floor_Bristol] = 1
                        )
    INSERT  INTO #temp1
            ( id
            )
            SELECT DISTINCT
                    ( ct.id )
            FROM    [Customer].[dbo].[Contact] ct
                    INNER JOIN [Customer].[dbo].[Wifinity_Devices] wfd ON ct.wifinity_uniqueID = wfd.[CustomerUniqueID]
                                                                  AND startconnection > '2012-02-17'
    INSERT  INTO #temp1
            ( id
            )
            SELECT DISTINCT
                    comdt.id AS id
            FROM    [Customer].[dbo].[Complete_dataset] comdt
                    LEFT JOIN [Customer].[dbo].[Aggregate_Spend_Counts] agsc ON comdt.id = agsc.contact_id
            WHERE   agsc.contact_id IS NULL
                    AND ( opt_out_Mail <> 1
                          OR opt_out_email <> 1
                          OR opt_out_SMS <> 1
                          OR opt_out_Mail IS NULL
                          OR opt_out_email IS NULL
                          OR opt_out_SMS IS NULL
                        )
                    AND ( address_1 IS NOT NULL
                          OR email IS NOT NULL
                          OR mobile IS NOT NULL
                        )
    INSERT  INTO #temp1
            ( id
            )
            SELECT DISTINCT
                    ( contact_id ) AS id
            FROM    [Customer].[dbo].[VIP_Card_Holders]
            WHERE   VIP_Card_number IS NOT NULL

    SELECT DISTINCT
            id
    FROM    #temp1 AS T

如评论中所述,一次优化一个。看看哪个花费的时间最长,然后专注于那个。

union 将删除重复项,因此您不需要对单个查询使用 distinct

你先试试这个:

左联接被 WHERE hnci.customer_id IN 终止,因此您最好有一个联接。

子查询效率不高,因为不能在 IN 上使用索引。
查询优化器不知道 ( select .. ) 中的内容 return 因此它无法优化索引的使用。

SELECT ct.id AS id
  FROM [Customer].[dbo].[Contact] ct
  JOIN [Customer].[dbo].[Customer_ids] hnci 
    ON ct.id = hnci.contact_id
  JOIN [Transactions].[dbo].[Transaction_Header] th 
    on hnci.customer_id = th.[Customer_ID] 
   and th.actual_transaction_date > '20120218'

在第二次连接中,查询优化器有机会首先应用哪个条件。假设 [Customer].[dbo].[Customer_ids].[customer_id] 和 [Transactions].[dbo].[Transaction_Header] 每个都有索引。查询优化器可以选择在 [Transactions].[dbo].[Transaction_Header].[actual_transaction_date] 之前应用它。 如果 [actual_transaction_date] 没有被索引,那么它肯定会首先执行另一个 ID 连接。

在 ( select ... ) 中,查询优化器别无选择,只能先应用 actual_transaction_date > '20120218'。好吧,有时查询优化器足够聪明,可以在内部外部使用索引,但为什么查询优化器很难做到这一点。我发现如果您使决策更容易,查询优化器会做出更好的决策。

子查询的连接也有同样的问题。您从查询优化器中获取选项。给查询优化器一个喘息的空间。

Where exists 通常也比 in 快。

或者条件通常也较慢,请改用更多联合语句。 并学会正确使用左连接。如果在左联接右侧的 table 上有 where 条件(除了 id 为 null 的地方),它将转换为内部联接。如果这不是您想要的,那么您的代码当前为您提供了不正确的结果集。

有关如何修复的说明,请参阅 http://wiki.lessthandot.com/index.php/WHERE_conditions_on_a_LEFT_JOIN

哇,从哪里开始...

--this distinct does nothing.  Union is already distinct
--SELECT DISTINCT
--        ( id )
--FROM    ( 
SELECT DISTINCT [Customer_ID] as ID
          FROM     [Transactions].[dbo].[Transaction_Header] 
               where actual_transaction_date > '20120218' )
          UNION
          SELECT 
                    contact_id AS id
          FROM      [Customer].[dbo].[Restaurant_Attendance]
-- not sure that you are getting the date range you want.  Should these be >= 
-- if you want everything that occurred on the 18th or after you want >= '2012-02-18 00:00:00.000'
-- if you want everything that occurred on the 19th or after you want >= '2012-02-19 00:00:00.000'
-- the way you have it now, you will get everything on the 18th unless it happened exactly at midnight
          WHERE     ( created > '2012-02-18 00:00:00.000'
                      OR modified > '2012-02-18 00:00:00.000'
                    )
                    AND ( [Fifth_Floor_London] = 1
                          OR [Fourth_Floor_Leeds] = 1
                          OR [Second_Floor_Bristol] = 1
                        )
-- all of this does nothing because we already have every id in the contact table from the first query
--          UNION
--          SELECT 
--                    ( ct.id )
--          FROM      [Customer].[dbo].[Contact] ct
--                    INNER JOIN [Customer].[dbo].[Wifinity_Devices] wfd ON ct.wifinity_uniqueID = wfd.[CustomerUniqueID]
--                                                             AND startconnection > '2012-02-17'
          UNION
-- cleaned this up with isnull function and coalesce
          SELECT 
                    comdt.id AS id
          FROM      [Customer].[dbo].[Complete_dataset] comdt
                    LEFT JOIN [Customer].[dbo].[Aggregate_Spend_Counts] agsc ON comdt.id = agsc.contact_id
          WHERE     agsc.contact_id IS NULL
                    AND ( isnull(opt_out_Mail,0) <> 1
                          OR isnull(opt_out_email,0) <> 1
                          OR isnull(opt_out_SMS,0) <> 1
                        )
                    AND coalesce(address_1 , email, mobile) IS NOT NULL
          UNION
          SELECT 
                    ( contact_id ) AS id
          FROM      [Customer].[dbo].[VIP_Card_Holders]
          WHERE     VIP_Card_number IS NOT NULL
--        ) AS tbl