为什么我的 CTE 连接更新比我的 Table 变量连接慢得多?

Why does my CTE join update so much slower than my Table variable join?

我看过几个类似的帖子,但它们似乎都与海量数据库有关。今天早上在一个小型实时数据库中看到这个问题后,我创建了一个虚拟数据库来演示这个问题。

此数据的基础如下:一家公司跟踪其 100 个客户的股票投资组合。 1000 只股票中的每只都有一份每日记录,其中列出了拥有它的四位投资者及其百分比。不幸的是,它有一个小故障,允许所有者多次出现。该程序解析数据并将记录分开,因此每天每只股票有 4 条记录,然后它会将每个所有者的投资组合总数相加。但是,由于有多个记录,这可能会夸大该所有者的价值。因此,将插入一个标志来识别这些重复项中的任何一个。在代码的后面,每行的值乘以该标志,重复为 0,否则为 1。

我有五种更新该标志的方法。我从 0 开始,这只是使用带有 SELECT 语句的 CTE 作为基线;大约需要 0.07 秒。 1 使用带有 JOIN 的 CTE 来更新 table,大约需要 48 秒。 2 使用嵌套的 select 语句而不是 CTE,大约需要 48 秒。 3 将该 CTE 转储到 table 变量并加入该变量,大约需要 0.13 秒。 4 我原以为会是效率最低的,因为它使用了一个计数器循环,一次更新一行,但只用了0.17秒。 5 使用 CASE 语句更新所有行,加入 CTE,大约需要 48 秒。

DECLARE @OwnRec TABLE (
      StockID           INT
    , TradeDate         DATE
    , Shares            DECIMAL(4,0)
    , Price             DECIMAL(4,2)
    , Owner1            INT
    , Owner1Pct         DECIMAL(3,2)
    , Owner2            INT
    , Owner2Pct         DECIMAL(3,2)
    , Owner3            INT
    , Owner3Pct         DECIMAL(3,2)
    , Owner4            INT
    , Owner4Pct         DECIMAL(3,2)
    )

DECLARE @OwnRec2 TABLE (
      RecID             INT IDENTITY
    , StockID           INT
    , TradeDate         DATE
    , Shares            DECIMAL(4,0)
    , Price             DECIMAL(4,2)
    , Owner0            INT
    , Owner0Pct         DECIMAL(3,2)
    , OwnerNum          INT
    , DupeOwner         TINYINT
    )

DECLARE @CullDupe TABLE (
      ID                INT IDENTITY
    , RecID             INT
    )

DECLARE   @Method       INT
        , @Counter1     INT = 0
        , @StartTime    DATETIME

--Populate tables with dummy data
WHILE @Counter1 < 1000
    BEGIN
        SET @Counter1 += 1
        INSERT INTO @OwnRec (
              StockID
            , TradeDate
            , Shares    
            , Price     
            , Owner1    
            , Owner1Pct
            , Owner2    
            , Owner2Pct
            , Owner3    
            , Owner3Pct
            , Owner4    
            , Owner4Pct
            )
        SELECT @Counter1
            , '2016-09-26'
            , ROUND((RAND() * 1000 + 500)/25,0)*25
            , ROUND((RAND() * 30 + 20),2)
            , ROUND((RAND() * 100 + .5),0)
            , CAST(ROUND((RAND() * 5 + .5),0)*.05 AS DECIMAL(3,2))
            , ROUND((RAND() * 100 + .5),0)
            , CAST(ROUND((RAND() * 5 + .5),0)*.05 AS DECIMAL(3,2))
            , ROUND((RAND() * 100 + .5),0)
            , CAST(ROUND((RAND() * 5 + .5),0)*.05 AS DECIMAL(3,2))
            , ROUND((RAND() * 100 + .5),0)
            , CAST(ROUND((RAND() * 5 + .5),0)*.05 AS DECIMAL(3,2))
    END

SET @Counter1 = 0

WHILE @Counter1 < 1000
    BEGIN
        SET @Counter1 += 1
        INSERT INTO @OwnRec (
              StockID
            , TradeDate
            , Shares    
            , Price     
            , Owner1    
            , Owner1Pct
            , Owner2    
            , Owner2Pct
            , Owner3    
            , Owner3Pct
            , Owner4    
            , Owner4Pct
            )
        SELECT @Counter1 + 1000
            , '2016-09-27'
            , Shares
            , ROUND(Price * ROUND(RAND()*10 + .5,0)*.01+.95,2)
            , Owner1    
            , Owner1Pct
            , Owner2    
            , Owner2Pct
            , Owner3    
            , Owner3Pct
            , Owner4    
            , Owner4Pct
            FROM @OwnRec WHERE StockID = @Counter1
    END

UPDATE orx
    SET Owner2Pct = Owner1Pct
        FROM @OwnRec orx
            WHERE Owner1 = Owner2

UPDATE orx
    SET Owner3Pct = Owner1Pct
        FROM @OwnRec orx
            WHERE Owner1 = Owner3

UPDATE orx
    SET Owner4Pct = Owner1Pct
        FROM @OwnRec orx
            WHERE Owner1 = Owner4

UPDATE orx
    SET Owner3Pct = Owner2Pct
        FROM @OwnRec orx
            WHERE Owner2 = Owner3

UPDATE orx
    SET Owner4Pct = Owner2Pct
        FROM @OwnRec orx
            WHERE Owner2 = Owner4

UPDATE orx
    SET Owner4Pct = Owner3Pct
        FROM @OwnRec orx
            WHERE Owner3 = Owner4

INSERT INTO @OwnRec2
    SELECT StockID, TradeDate, Shares, Price, Owner1 AS Owner0, Owner1Pct, 1, 1 AS Owner0Pct
        FROM @OwnRec
    UNION
    SELECT StockID, TradeDate, Shares, Price, Owner2 AS Owner0, Owner2Pct, 2, 1 AS Owner0Pct
        FROM @OwnRec
    UNION
    SELECT StockID, TradeDate, Shares, Price, Owner3 AS Owner0, Owner3Pct, 3, 1 AS Owner0Pct
        FROM @OwnRec
    UNION
    SELECT StockID, TradeDate, Shares, Price, Owner4 AS Owner0, Owner4Pct, 4, 1 AS Owner0Pct
        FROM @OwnRec
--END Populate tables with dummy data

SET @StartTime = GETDATE()

SET @Method = 5 -- Choose which method to test


--CASE 0: Just identify duplicates

IF @Method = 0
    BEGIN
        ; WITH CullDupe
            AS (
                SELECT RecID, ROW_NUMBER() OVER (PARTITION BY StockID, TradeDate, Owner0 ORDER BY OwnerNum) AS rn
                    FROM @OwnRec2
                )
        SELECT * FROM CullDupe WHERE rn > 1
    END


--CASE 1: Update on JOIN to CTE

IF @Method = 1
    BEGIN
        ; WITH CullDupe
            AS (
                SELECT RecID, ROW_NUMBER() OVER (PARTITION BY StockID, TradeDate, Owner0 ORDER BY OwnerNum) AS rn
                    FROM @OwnRec2
                )
        UPDATE OR2
            SET DupeOwner = 0
                FROM @OwnRec2 OR2
                    JOIN CullDupe cd
                        ON OR2.RecID = cd.RecID
                    WHERE rn > 1
    END


--CASE 2: Update on JOIN to nested SELECT

IF @Method = 2
    BEGIN
        UPDATE OR2
            SET DupeOwner = 0
                FROM @OwnRec2 OR2
                    JOIN (SELECT RecID, ROW_NUMBER() OVER
                        (PARTITION BY StockID, TradeDate, Owner0 ORDER BY OwnerNum) AS rn
                        FROM @OwnRec2) cd
                        ON OR2.RecID = cd.RecID
                    WHERE rn > 1
    END


--CASE 3: Update on JOIN to temp table

IF @Method = 3
    BEGIN
        ; WITH CullDupe
            AS (
                SELECT RecID, ROW_NUMBER() OVER (PARTITION BY StockID, TradeDate, Owner0 ORDER BY OwnerNum) AS rn
                    FROM @OwnRec2
                )

        INSERT INTO @CullDupe SELECT RecID FROM CullDupe WHERE rn > 1

        UPDATE OR2
            SET DupeOwner = 0
                FROM @OwnRec2 OR2
                    JOIN @CullDupe cd
                        ON OR2.RecID = cd.RecID
    END


--CASE 4: Update using counted loop

IF @Method = 4
    BEGIN
        ; WITH CullDupe
            AS (
                SELECT RecID, ROW_NUMBER() OVER (PARTITION BY StockID, TradeDate, Owner0 ORDER BY OwnerNum) AS rn
                    FROM @OwnRec2
                )

        INSERT INTO @CullDupe SELECT RecID FROM CullDupe WHERE rn > 1
        SET @Counter1 = 0
        WHILE @Counter1 < (SELECT MAX(ID) FROM @CullDupe)
            BEGIN
                SET @Counter1 += 1
                UPDATE OR2
                    SET DupeOwner = 0
                        FROM @OwnRec2 OR2
                            WHERE RecID = (SELECT RecID FROM @CullDupe WHERE ID = @Counter1)
            END
    END


--CASE 5: Update using JOIN to CTE, but updating all rows (CASE to identify)

IF @Method = 5
    BEGIN
        ; WITH CullDupe
            AS (
                SELECT RecID, ROW_NUMBER() OVER (PARTITION BY StockID, TradeDate, Owner0 ORDER BY OwnerNum) AS rn
                    FROM @OwnRec2
                )

        UPDATE OR2
            SET DupeOwner = CASE WHEN rn > 1 THEN 0 ELSE 1 END
                FROM @OwnRec2 OR2
                    JOIN CullDupe cd
                        ON OR2.RecID = cd.RecID
    END

SELECT 'Method ' + CAST(@Method AS NVARCHAR(1)) + ': ' + CAST(DATEDIFF(ms,@StartTime,GETDATE()) AS NVARCHAR(10)) + ' milliseconds'

这是 table 变量的常见问题。

引用它们的语句的执行计划甚至在批处理开始执行之前编译,因此在插入语句执行之前编译。

如果您 select 您的问题执行计划之一并查看属性 window,您将看到 table 基数为 0。

尽管如此,它仍然假设 1 行将从空 table 中发出,因为这是大多数情况下执行计划中叶运算符的最小行估计。嵌套循环内部的子树对来自驱动 table 的每一行执行一次。由于这估计是 1 行,下面突出显示的子树估计会执行一次。事实上,整个子树将被执行 8,000 次(包括昂贵的 table 扫描和排序运算符)。

当您将行编号的结果具体化为 table 变量时,您存储该子树的结果,从而确保它只计算一次(尽管使用它的计划仍然有一个次优嵌套循环加入新的 table 变量)。

单行估计的常见解决方案是将 OPTION (RECOMPILE) 添加到问题语句中,以便可以考虑语句执行时的 table 基数,或者使用跟踪标志 2453(可以触发基数更改后自动重新编译) 或使用 #temp table 代替(它可以触发自动重新编译并额外受益于列统计信息)

有关其中一些内容的更多详细信息,请参见 in my answer here