为什么我的 CTE 连接更新比我的 Table 变量连接慢得多?
Why does my CTE join update so much slower than my Table variable join?
我看过几个类似的帖子,但它们似乎都与海量数据库有关。今天早上在一个小型实时数据库中看到这个问题后,我创建了一个虚拟数据库来演示这个问题。
此数据的基础如下:一家公司跟踪其 100 个客户的股票投资组合。 1000 只股票中的每只都有一份每日记录,其中列出了拥有它的四位投资者及其百分比。不幸的是,它有一个小故障,允许所有者多次出现。该程序解析数据并将记录分开,因此每天每只股票有 4 条记录,然后它会将每个所有者的投资组合总数相加。但是,由于有多个记录,这可能会夸大该所有者的价值。因此,将插入一个标志来识别这些重复项中的任何一个。在代码的后面,每行的值乘以该标志,重复为 0,否则为 1。
我有五种更新该标志的方法。我从 0 开始,这只是使用带有 SELECT 语句的 CTE 作为基线;大约需要 0.07 秒。 1 使用带有 JOIN 的 CTE 来更新 table,大约需要 48 秒。 2 使用嵌套的 select 语句而不是 CTE,大约需要 48 秒。 3 将该 CTE 转储到 table 变量并加入该变量,大约需要 0.13 秒。 4 我原以为会是效率最低的,因为它使用了一个计数器循环,一次更新一行,但只用了0.17秒。 5 使用 CASE 语句更新所有行,加入 CTE,大约需要 48 秒。
DECLARE @OwnRec TABLE (
StockID INT
, TradeDate DATE
, Shares DECIMAL(4,0)
, Price DECIMAL(4,2)
, Owner1 INT
, Owner1Pct DECIMAL(3,2)
, Owner2 INT
, Owner2Pct DECIMAL(3,2)
, Owner3 INT
, Owner3Pct DECIMAL(3,2)
, Owner4 INT
, Owner4Pct DECIMAL(3,2)
)
DECLARE @OwnRec2 TABLE (
RecID INT IDENTITY
, StockID INT
, TradeDate DATE
, Shares DECIMAL(4,0)
, Price DECIMAL(4,2)
, Owner0 INT
, Owner0Pct DECIMAL(3,2)
, OwnerNum INT
, DupeOwner TINYINT
)
DECLARE @CullDupe TABLE (
ID INT IDENTITY
, RecID INT
)
DECLARE @Method INT
, @Counter1 INT = 0
, @StartTime DATETIME
--Populate tables with dummy data
WHILE @Counter1 < 1000
BEGIN
SET @Counter1 += 1
INSERT INTO @OwnRec (
StockID
, TradeDate
, Shares
, Price
, Owner1
, Owner1Pct
, Owner2
, Owner2Pct
, Owner3
, Owner3Pct
, Owner4
, Owner4Pct
)
SELECT @Counter1
, '2016-09-26'
, ROUND((RAND() * 1000 + 500)/25,0)*25
, ROUND((RAND() * 30 + 20),2)
, ROUND((RAND() * 100 + .5),0)
, CAST(ROUND((RAND() * 5 + .5),0)*.05 AS DECIMAL(3,2))
, ROUND((RAND() * 100 + .5),0)
, CAST(ROUND((RAND() * 5 + .5),0)*.05 AS DECIMAL(3,2))
, ROUND((RAND() * 100 + .5),0)
, CAST(ROUND((RAND() * 5 + .5),0)*.05 AS DECIMAL(3,2))
, ROUND((RAND() * 100 + .5),0)
, CAST(ROUND((RAND() * 5 + .5),0)*.05 AS DECIMAL(3,2))
END
SET @Counter1 = 0
WHILE @Counter1 < 1000
BEGIN
SET @Counter1 += 1
INSERT INTO @OwnRec (
StockID
, TradeDate
, Shares
, Price
, Owner1
, Owner1Pct
, Owner2
, Owner2Pct
, Owner3
, Owner3Pct
, Owner4
, Owner4Pct
)
SELECT @Counter1 + 1000
, '2016-09-27'
, Shares
, ROUND(Price * ROUND(RAND()*10 + .5,0)*.01+.95,2)
, Owner1
, Owner1Pct
, Owner2
, Owner2Pct
, Owner3
, Owner3Pct
, Owner4
, Owner4Pct
FROM @OwnRec WHERE StockID = @Counter1
END
UPDATE orx
SET Owner2Pct = Owner1Pct
FROM @OwnRec orx
WHERE Owner1 = Owner2
UPDATE orx
SET Owner3Pct = Owner1Pct
FROM @OwnRec orx
WHERE Owner1 = Owner3
UPDATE orx
SET Owner4Pct = Owner1Pct
FROM @OwnRec orx
WHERE Owner1 = Owner4
UPDATE orx
SET Owner3Pct = Owner2Pct
FROM @OwnRec orx
WHERE Owner2 = Owner3
UPDATE orx
SET Owner4Pct = Owner2Pct
FROM @OwnRec orx
WHERE Owner2 = Owner4
UPDATE orx
SET Owner4Pct = Owner3Pct
FROM @OwnRec orx
WHERE Owner3 = Owner4
INSERT INTO @OwnRec2
SELECT StockID, TradeDate, Shares, Price, Owner1 AS Owner0, Owner1Pct, 1, 1 AS Owner0Pct
FROM @OwnRec
UNION
SELECT StockID, TradeDate, Shares, Price, Owner2 AS Owner0, Owner2Pct, 2, 1 AS Owner0Pct
FROM @OwnRec
UNION
SELECT StockID, TradeDate, Shares, Price, Owner3 AS Owner0, Owner3Pct, 3, 1 AS Owner0Pct
FROM @OwnRec
UNION
SELECT StockID, TradeDate, Shares, Price, Owner4 AS Owner0, Owner4Pct, 4, 1 AS Owner0Pct
FROM @OwnRec
--END Populate tables with dummy data
SET @StartTime = GETDATE()
SET @Method = 5 -- Choose which method to test
--CASE 0: Just identify duplicates
IF @Method = 0
BEGIN
; WITH CullDupe
AS (
SELECT RecID, ROW_NUMBER() OVER (PARTITION BY StockID, TradeDate, Owner0 ORDER BY OwnerNum) AS rn
FROM @OwnRec2
)
SELECT * FROM CullDupe WHERE rn > 1
END
--CASE 1: Update on JOIN to CTE
IF @Method = 1
BEGIN
; WITH CullDupe
AS (
SELECT RecID, ROW_NUMBER() OVER (PARTITION BY StockID, TradeDate, Owner0 ORDER BY OwnerNum) AS rn
FROM @OwnRec2
)
UPDATE OR2
SET DupeOwner = 0
FROM @OwnRec2 OR2
JOIN CullDupe cd
ON OR2.RecID = cd.RecID
WHERE rn > 1
END
--CASE 2: Update on JOIN to nested SELECT
IF @Method = 2
BEGIN
UPDATE OR2
SET DupeOwner = 0
FROM @OwnRec2 OR2
JOIN (SELECT RecID, ROW_NUMBER() OVER
(PARTITION BY StockID, TradeDate, Owner0 ORDER BY OwnerNum) AS rn
FROM @OwnRec2) cd
ON OR2.RecID = cd.RecID
WHERE rn > 1
END
--CASE 3: Update on JOIN to temp table
IF @Method = 3
BEGIN
; WITH CullDupe
AS (
SELECT RecID, ROW_NUMBER() OVER (PARTITION BY StockID, TradeDate, Owner0 ORDER BY OwnerNum) AS rn
FROM @OwnRec2
)
INSERT INTO @CullDupe SELECT RecID FROM CullDupe WHERE rn > 1
UPDATE OR2
SET DupeOwner = 0
FROM @OwnRec2 OR2
JOIN @CullDupe cd
ON OR2.RecID = cd.RecID
END
--CASE 4: Update using counted loop
IF @Method = 4
BEGIN
; WITH CullDupe
AS (
SELECT RecID, ROW_NUMBER() OVER (PARTITION BY StockID, TradeDate, Owner0 ORDER BY OwnerNum) AS rn
FROM @OwnRec2
)
INSERT INTO @CullDupe SELECT RecID FROM CullDupe WHERE rn > 1
SET @Counter1 = 0
WHILE @Counter1 < (SELECT MAX(ID) FROM @CullDupe)
BEGIN
SET @Counter1 += 1
UPDATE OR2
SET DupeOwner = 0
FROM @OwnRec2 OR2
WHERE RecID = (SELECT RecID FROM @CullDupe WHERE ID = @Counter1)
END
END
--CASE 5: Update using JOIN to CTE, but updating all rows (CASE to identify)
IF @Method = 5
BEGIN
; WITH CullDupe
AS (
SELECT RecID, ROW_NUMBER() OVER (PARTITION BY StockID, TradeDate, Owner0 ORDER BY OwnerNum) AS rn
FROM @OwnRec2
)
UPDATE OR2
SET DupeOwner = CASE WHEN rn > 1 THEN 0 ELSE 1 END
FROM @OwnRec2 OR2
JOIN CullDupe cd
ON OR2.RecID = cd.RecID
END
SELECT 'Method ' + CAST(@Method AS NVARCHAR(1)) + ': ' + CAST(DATEDIFF(ms,@StartTime,GETDATE()) AS NVARCHAR(10)) + ' milliseconds'
这是 table 变量的常见问题。
引用它们的语句的执行计划甚至在批处理开始执行之前编译,因此在插入语句执行之前编译。
如果您 select 您的问题执行计划之一并查看属性 window,您将看到 table 基数为 0。
尽管如此,它仍然假设 1 行将从空 table 中发出,因为这是大多数情况下执行计划中叶运算符的最小行估计。嵌套循环内部的子树对来自驱动 table 的每一行执行一次。由于这估计是 1 行,下面突出显示的子树估计会执行一次。事实上,整个子树将被执行 8,000 次(包括昂贵的 table 扫描和排序运算符)。
当您将行编号的结果具体化为 table 变量时,您存储该子树的结果,从而确保它只计算一次(尽管使用它的计划仍然有一个次优嵌套循环加入新的 table 变量)。
单行估计的常见解决方案是将 OPTION (RECOMPILE)
添加到问题语句中,以便可以考虑语句执行时的 table 基数,或者使用跟踪标志 2453(可以触发基数更改后自动重新编译)
或使用 #temp table 代替(它可以触发自动重新编译并额外受益于列统计信息)
有关其中一些内容的更多详细信息,请参见 in my answer here。
我看过几个类似的帖子,但它们似乎都与海量数据库有关。今天早上在一个小型实时数据库中看到这个问题后,我创建了一个虚拟数据库来演示这个问题。
此数据的基础如下:一家公司跟踪其 100 个客户的股票投资组合。 1000 只股票中的每只都有一份每日记录,其中列出了拥有它的四位投资者及其百分比。不幸的是,它有一个小故障,允许所有者多次出现。该程序解析数据并将记录分开,因此每天每只股票有 4 条记录,然后它会将每个所有者的投资组合总数相加。但是,由于有多个记录,这可能会夸大该所有者的价值。因此,将插入一个标志来识别这些重复项中的任何一个。在代码的后面,每行的值乘以该标志,重复为 0,否则为 1。
我有五种更新该标志的方法。我从 0 开始,这只是使用带有 SELECT 语句的 CTE 作为基线;大约需要 0.07 秒。 1 使用带有 JOIN 的 CTE 来更新 table,大约需要 48 秒。 2 使用嵌套的 select 语句而不是 CTE,大约需要 48 秒。 3 将该 CTE 转储到 table 变量并加入该变量,大约需要 0.13 秒。 4 我原以为会是效率最低的,因为它使用了一个计数器循环,一次更新一行,但只用了0.17秒。 5 使用 CASE 语句更新所有行,加入 CTE,大约需要 48 秒。
DECLARE @OwnRec TABLE (
StockID INT
, TradeDate DATE
, Shares DECIMAL(4,0)
, Price DECIMAL(4,2)
, Owner1 INT
, Owner1Pct DECIMAL(3,2)
, Owner2 INT
, Owner2Pct DECIMAL(3,2)
, Owner3 INT
, Owner3Pct DECIMAL(3,2)
, Owner4 INT
, Owner4Pct DECIMAL(3,2)
)
DECLARE @OwnRec2 TABLE (
RecID INT IDENTITY
, StockID INT
, TradeDate DATE
, Shares DECIMAL(4,0)
, Price DECIMAL(4,2)
, Owner0 INT
, Owner0Pct DECIMAL(3,2)
, OwnerNum INT
, DupeOwner TINYINT
)
DECLARE @CullDupe TABLE (
ID INT IDENTITY
, RecID INT
)
DECLARE @Method INT
, @Counter1 INT = 0
, @StartTime DATETIME
--Populate tables with dummy data
WHILE @Counter1 < 1000
BEGIN
SET @Counter1 += 1
INSERT INTO @OwnRec (
StockID
, TradeDate
, Shares
, Price
, Owner1
, Owner1Pct
, Owner2
, Owner2Pct
, Owner3
, Owner3Pct
, Owner4
, Owner4Pct
)
SELECT @Counter1
, '2016-09-26'
, ROUND((RAND() * 1000 + 500)/25,0)*25
, ROUND((RAND() * 30 + 20),2)
, ROUND((RAND() * 100 + .5),0)
, CAST(ROUND((RAND() * 5 + .5),0)*.05 AS DECIMAL(3,2))
, ROUND((RAND() * 100 + .5),0)
, CAST(ROUND((RAND() * 5 + .5),0)*.05 AS DECIMAL(3,2))
, ROUND((RAND() * 100 + .5),0)
, CAST(ROUND((RAND() * 5 + .5),0)*.05 AS DECIMAL(3,2))
, ROUND((RAND() * 100 + .5),0)
, CAST(ROUND((RAND() * 5 + .5),0)*.05 AS DECIMAL(3,2))
END
SET @Counter1 = 0
WHILE @Counter1 < 1000
BEGIN
SET @Counter1 += 1
INSERT INTO @OwnRec (
StockID
, TradeDate
, Shares
, Price
, Owner1
, Owner1Pct
, Owner2
, Owner2Pct
, Owner3
, Owner3Pct
, Owner4
, Owner4Pct
)
SELECT @Counter1 + 1000
, '2016-09-27'
, Shares
, ROUND(Price * ROUND(RAND()*10 + .5,0)*.01+.95,2)
, Owner1
, Owner1Pct
, Owner2
, Owner2Pct
, Owner3
, Owner3Pct
, Owner4
, Owner4Pct
FROM @OwnRec WHERE StockID = @Counter1
END
UPDATE orx
SET Owner2Pct = Owner1Pct
FROM @OwnRec orx
WHERE Owner1 = Owner2
UPDATE orx
SET Owner3Pct = Owner1Pct
FROM @OwnRec orx
WHERE Owner1 = Owner3
UPDATE orx
SET Owner4Pct = Owner1Pct
FROM @OwnRec orx
WHERE Owner1 = Owner4
UPDATE orx
SET Owner3Pct = Owner2Pct
FROM @OwnRec orx
WHERE Owner2 = Owner3
UPDATE orx
SET Owner4Pct = Owner2Pct
FROM @OwnRec orx
WHERE Owner2 = Owner4
UPDATE orx
SET Owner4Pct = Owner3Pct
FROM @OwnRec orx
WHERE Owner3 = Owner4
INSERT INTO @OwnRec2
SELECT StockID, TradeDate, Shares, Price, Owner1 AS Owner0, Owner1Pct, 1, 1 AS Owner0Pct
FROM @OwnRec
UNION
SELECT StockID, TradeDate, Shares, Price, Owner2 AS Owner0, Owner2Pct, 2, 1 AS Owner0Pct
FROM @OwnRec
UNION
SELECT StockID, TradeDate, Shares, Price, Owner3 AS Owner0, Owner3Pct, 3, 1 AS Owner0Pct
FROM @OwnRec
UNION
SELECT StockID, TradeDate, Shares, Price, Owner4 AS Owner0, Owner4Pct, 4, 1 AS Owner0Pct
FROM @OwnRec
--END Populate tables with dummy data
SET @StartTime = GETDATE()
SET @Method = 5 -- Choose which method to test
--CASE 0: Just identify duplicates
IF @Method = 0
BEGIN
; WITH CullDupe
AS (
SELECT RecID, ROW_NUMBER() OVER (PARTITION BY StockID, TradeDate, Owner0 ORDER BY OwnerNum) AS rn
FROM @OwnRec2
)
SELECT * FROM CullDupe WHERE rn > 1
END
--CASE 1: Update on JOIN to CTE
IF @Method = 1
BEGIN
; WITH CullDupe
AS (
SELECT RecID, ROW_NUMBER() OVER (PARTITION BY StockID, TradeDate, Owner0 ORDER BY OwnerNum) AS rn
FROM @OwnRec2
)
UPDATE OR2
SET DupeOwner = 0
FROM @OwnRec2 OR2
JOIN CullDupe cd
ON OR2.RecID = cd.RecID
WHERE rn > 1
END
--CASE 2: Update on JOIN to nested SELECT
IF @Method = 2
BEGIN
UPDATE OR2
SET DupeOwner = 0
FROM @OwnRec2 OR2
JOIN (SELECT RecID, ROW_NUMBER() OVER
(PARTITION BY StockID, TradeDate, Owner0 ORDER BY OwnerNum) AS rn
FROM @OwnRec2) cd
ON OR2.RecID = cd.RecID
WHERE rn > 1
END
--CASE 3: Update on JOIN to temp table
IF @Method = 3
BEGIN
; WITH CullDupe
AS (
SELECT RecID, ROW_NUMBER() OVER (PARTITION BY StockID, TradeDate, Owner0 ORDER BY OwnerNum) AS rn
FROM @OwnRec2
)
INSERT INTO @CullDupe SELECT RecID FROM CullDupe WHERE rn > 1
UPDATE OR2
SET DupeOwner = 0
FROM @OwnRec2 OR2
JOIN @CullDupe cd
ON OR2.RecID = cd.RecID
END
--CASE 4: Update using counted loop
IF @Method = 4
BEGIN
; WITH CullDupe
AS (
SELECT RecID, ROW_NUMBER() OVER (PARTITION BY StockID, TradeDate, Owner0 ORDER BY OwnerNum) AS rn
FROM @OwnRec2
)
INSERT INTO @CullDupe SELECT RecID FROM CullDupe WHERE rn > 1
SET @Counter1 = 0
WHILE @Counter1 < (SELECT MAX(ID) FROM @CullDupe)
BEGIN
SET @Counter1 += 1
UPDATE OR2
SET DupeOwner = 0
FROM @OwnRec2 OR2
WHERE RecID = (SELECT RecID FROM @CullDupe WHERE ID = @Counter1)
END
END
--CASE 5: Update using JOIN to CTE, but updating all rows (CASE to identify)
IF @Method = 5
BEGIN
; WITH CullDupe
AS (
SELECT RecID, ROW_NUMBER() OVER (PARTITION BY StockID, TradeDate, Owner0 ORDER BY OwnerNum) AS rn
FROM @OwnRec2
)
UPDATE OR2
SET DupeOwner = CASE WHEN rn > 1 THEN 0 ELSE 1 END
FROM @OwnRec2 OR2
JOIN CullDupe cd
ON OR2.RecID = cd.RecID
END
SELECT 'Method ' + CAST(@Method AS NVARCHAR(1)) + ': ' + CAST(DATEDIFF(ms,@StartTime,GETDATE()) AS NVARCHAR(10)) + ' milliseconds'
这是 table 变量的常见问题。
引用它们的语句的执行计划甚至在批处理开始执行之前编译,因此在插入语句执行之前编译。
如果您 select 您的问题执行计划之一并查看属性 window,您将看到 table 基数为 0。
尽管如此,它仍然假设 1 行将从空 table 中发出,因为这是大多数情况下执行计划中叶运算符的最小行估计。嵌套循环内部的子树对来自驱动 table 的每一行执行一次。由于这估计是 1 行,下面突出显示的子树估计会执行一次。事实上,整个子树将被执行 8,000 次(包括昂贵的 table 扫描和排序运算符)。
当您将行编号的结果具体化为 table 变量时,您存储该子树的结果,从而确保它只计算一次(尽管使用它的计划仍然有一个次优嵌套循环加入新的 table 变量)。
单行估计的常见解决方案是将 OPTION (RECOMPILE)
添加到问题语句中,以便可以考虑语句执行时的 table 基数,或者使用跟踪标志 2453(可以触发基数更改后自动重新编译)
或使用 #temp table 代替(它可以触发自动重新编译并额外受益于列统计信息)
有关其中一些内容的更多详细信息,请参见 in my answer here。