希望删除 CTE 上丑陋的自连接
Hoping to remove an ugly self-join on a CTE
我有一个创建排序字典的查询(排序是因为有一个增量 id
来识别键的相对位置).
然后我想知道,对于每一行,value
是否作为 key
存在于字典中以后的任何其他行中。我正在使用 CROSS APPLY
中的相关查询来做到这一点。实际上是 CTE 上的自我加入。
据我了解,这意味着表示 Dictionary 的 CTE 必须计算两次?
除了使用 table 变量 (在函数内部),还有其他建议吗?
WITH
dictionary([id], [key], [val]) AS
(
SELECT 1, 'a', 'b'
UNION ALL SELECT 2, 'b', 'c'
UNION ALL SELECT 3, 'c', 'a'
UNION ALL SELECT 4, 'x', 'w'
UNION ALL SELECT 5, 'y', 'x'
UNION ALL SELECT 6, 'z', 'y'
)
SELECT
*
FROM
dictionary dict
CROSS APPLY
(
SELECT COUNT(*) FROM dictionary WHERE dictionary.id > dict.id AND dictionary.[key] = dict.[val]
)
lookup(hits)
CROSS APPLY
(
SELECT 1, 3 WHERE lookup.hits = 0
UNION ALL
SELECT 1, 2 WHERE lookup.hits > 0
UNION ALL
SELECT 2, 3 WHERE lookup.hits > 0
)
map([from], [to])
-- [key]s 'c', 'x', 'y' and 'z' should only have one output rows
-- It's "acceptable" for only 'z' to have just one output row IFF a self join can be avoided
我能想到的其他选项都是自连接的变体...
dictionary dict
LEFT JOIN
(
SELECT key, MAX(id) AS id FROM dictionary GROUP BY key
)
lookup
ON lookup.key = dict.value
AND lookup.id > dict.id
或者...
dictionary dict
OUTER APPLY
(
SELECT 1 WHERE EXISTS (SELECT * FROM dictionary WHERE dictionary.id > dict.id AND dictionary.key = dict.value)
)
lookup(hits)
但是,我正在尝试避免 CTE 的自连接,可能是我没有想到的窗口函数?只是为了避免 CTE 被计算两次...
(忽略lookup.id > dict.id
方面很好,如果这意味着避免自连接...)
编辑: 添加了更完整的示例,还有一个 SQL Fiddle,感谢@MartinSmith指出一些不一致之处...
http://sqlfiddle.com/#!6/9eecb7db59d16c80417c72d1e1f4fbf1/17407
这是您可以使用窗口函数的一种方法。
首先将行逆轴化,使键和值成为通用的 terms
然后使用 MAX ... OVER (PARTITION BY term)
查找最高行的 ID,其中该术语用作键。
在此示例中,它然后设置一个标志并丢弃由逆透视添加的重复行(保留该对中的 context = 'v'
行,因为这是具有标志所需信息的行)。
然后您可以使用它加入包含您的 map
值的 table 值构造函数。
WITH dictionary(id, [key], value)
AS (
SELECT 1, 'a', 'b'
UNION ALL SELECT 2, 'b', 'c'
UNION ALL SELECT 3, 'c', 'a'
UNION ALL SELECT 4, 'x', 'w'
UNION ALL SELECT 5, 'y', 'x'
UNION ALL SELECT 6, 'z', 'y'
),
t1
AS (SELECT dict.*,
context,
highest_id_where_term_is_key = MAX(CASE
WHEN context = 'k'
THEN v.id
END) OVER (PARTITION BY term)
FROM dictionary dict
CROSS APPLY (VALUES(id, [key], 'k'),
(id, value, 'v')) v(id, term, context)),
t2
AS (SELECT *,
val_in_later_key = CASE
WHEN id < highest_id_where_term_is_key
THEN 1
ELSE 0
END
FROM t1
WHERE context = 'v'
-- Discard duplicate row from the unpivot - only want the "value" row
)
SELECT id,
[key],
value,
highest_id_where_term_is_key,
map.[from],
map.[to]
FROM t2
JOIN (VALUES (1, 3, 0),
(1, 2, 1),
(2, 3, 1) ) map([from], [to], [flg])
ON map.flg = t2.val_in_later_key
ORDER BY id
Returns
+----+-----+-------+------------------------------+------+----+
| id | key | value | highest_id_where_term_is_key | from | to |
+----+-----+-------+------------------------------+------+----+
| 1 | a | b | 2 | 1 | 2 |
| 1 | a | b | 2 | 2 | 3 |
| 2 | b | c | 3 | 1 | 2 |
| 2 | b | c | 3 | 2 | 3 |
| 3 | c | a | 1 | 1 | 3 |
| 4 | x | w | NULL | 1 | 3 |
| 5 | y | x | 4 | 1 | 3 |
| 6 | z | y | 5 | 1 | 3 |
+----+-----+-------+------------------------------+------+----+
我有一个创建排序字典的查询(排序是因为有一个增量 id
来识别键的相对位置).
然后我想知道,对于每一行,value
是否作为 key
存在于字典中以后的任何其他行中。我正在使用 CROSS APPLY
中的相关查询来做到这一点。实际上是 CTE 上的自我加入。
据我了解,这意味着表示 Dictionary 的 CTE 必须计算两次?
除了使用 table 变量 (在函数内部),还有其他建议吗?
WITH
dictionary([id], [key], [val]) AS
(
SELECT 1, 'a', 'b'
UNION ALL SELECT 2, 'b', 'c'
UNION ALL SELECT 3, 'c', 'a'
UNION ALL SELECT 4, 'x', 'w'
UNION ALL SELECT 5, 'y', 'x'
UNION ALL SELECT 6, 'z', 'y'
)
SELECT
*
FROM
dictionary dict
CROSS APPLY
(
SELECT COUNT(*) FROM dictionary WHERE dictionary.id > dict.id AND dictionary.[key] = dict.[val]
)
lookup(hits)
CROSS APPLY
(
SELECT 1, 3 WHERE lookup.hits = 0
UNION ALL
SELECT 1, 2 WHERE lookup.hits > 0
UNION ALL
SELECT 2, 3 WHERE lookup.hits > 0
)
map([from], [to])
-- [key]s 'c', 'x', 'y' and 'z' should only have one output rows
-- It's "acceptable" for only 'z' to have just one output row IFF a self join can be avoided
我能想到的其他选项都是自连接的变体...
dictionary dict
LEFT JOIN
(
SELECT key, MAX(id) AS id FROM dictionary GROUP BY key
)
lookup
ON lookup.key = dict.value
AND lookup.id > dict.id
或者...
dictionary dict
OUTER APPLY
(
SELECT 1 WHERE EXISTS (SELECT * FROM dictionary WHERE dictionary.id > dict.id AND dictionary.key = dict.value)
)
lookup(hits)
但是,我正在尝试避免 CTE 的自连接,可能是我没有想到的窗口函数?只是为了避免 CTE 被计算两次...
(忽略lookup.id > dict.id
方面很好,如果这意味着避免自连接...)
编辑: 添加了更完整的示例,还有一个 SQL Fiddle,感谢@MartinSmith指出一些不一致之处...
http://sqlfiddle.com/#!6/9eecb7db59d16c80417c72d1e1f4fbf1/17407
这是您可以使用窗口函数的一种方法。
首先将行逆轴化,使键和值成为通用的 terms
然后使用 MAX ... OVER (PARTITION BY term)
查找最高行的 ID,其中该术语用作键。
在此示例中,它然后设置一个标志并丢弃由逆透视添加的重复行(保留该对中的 context = 'v'
行,因为这是具有标志所需信息的行)。
然后您可以使用它加入包含您的 map
值的 table 值构造函数。
WITH dictionary(id, [key], value)
AS (
SELECT 1, 'a', 'b'
UNION ALL SELECT 2, 'b', 'c'
UNION ALL SELECT 3, 'c', 'a'
UNION ALL SELECT 4, 'x', 'w'
UNION ALL SELECT 5, 'y', 'x'
UNION ALL SELECT 6, 'z', 'y'
),
t1
AS (SELECT dict.*,
context,
highest_id_where_term_is_key = MAX(CASE
WHEN context = 'k'
THEN v.id
END) OVER (PARTITION BY term)
FROM dictionary dict
CROSS APPLY (VALUES(id, [key], 'k'),
(id, value, 'v')) v(id, term, context)),
t2
AS (SELECT *,
val_in_later_key = CASE
WHEN id < highest_id_where_term_is_key
THEN 1
ELSE 0
END
FROM t1
WHERE context = 'v'
-- Discard duplicate row from the unpivot - only want the "value" row
)
SELECT id,
[key],
value,
highest_id_where_term_is_key,
map.[from],
map.[to]
FROM t2
JOIN (VALUES (1, 3, 0),
(1, 2, 1),
(2, 3, 1) ) map([from], [to], [flg])
ON map.flg = t2.val_in_later_key
ORDER BY id
Returns
+----+-----+-------+------------------------------+------+----+
| id | key | value | highest_id_where_term_is_key | from | to |
+----+-----+-------+------------------------------+------+----+
| 1 | a | b | 2 | 1 | 2 |
| 1 | a | b | 2 | 2 | 3 |
| 2 | b | c | 3 | 1 | 2 |
| 2 | b | c | 3 | 2 | 3 |
| 3 | c | a | 1 | 1 | 3 |
| 4 | x | w | NULL | 1 | 3 |
| 5 | y | x | 4 | 1 | 3 |
| 6 | z | y | 5 | 1 | 3 |
+----+-----+-------+------------------------------+------+----+