将查询重写为更简单的形式
Rewriting a query into simpler form
SELECT
ID,
PRD_QTY,
CASE
WHEN PRD_QTY = 'TRUE'
THEN CONCAT(PRD_NO, ':', 'high')
END AS Test_1
FROM
(SELECT
ID, ORD_NBR, PRD_QTY,
CASE
WHEN listagg(DISTINCT item_detail, ',') WITHIN GROUP (ORDER BY item_detail) = ''
THEN 'null'
ELSE LISTAGG(DISTINCT split(item_detail, ';')[1]::text,',') WITHIN GROUP (ORDER BY split(item_detail, ';')[1]::text)
END AS PRD_NO
FROM
(SELECT DISTINCT
ID, A.ORD_NBR, C.PRD_QTY,
REGEXP_REPLACE(item_detail,'\;;;;;.*$','') AS item_detail
FROM
Table_1 A
JOIN
Table_2 B ON A.ST_ID = B.ST_ID
JOIN
Table_3 C ON C.ST_ID = B.ST_ID
WHERE
DATE_COL = '2022-02-22')
GROUP BY
ID, ORD_NBR, PRD_QTY)
是否可以将此查询重写为更简单的形式?我不想使用多个子查询。
可以使用通用 table 表达式 (CTE) 重写:
WITH cte AS (
SELECT DISTINCT ID, A.ORD_NBR, C.PRD_QTY,
REGEXP_REPLACE(item_detail,'\;;;;;.*$','') AS item_detail
FROM Table_1 A
JOIN Table_2 B
ON A.ST_ID = B.ST_ID
JOIN Table_3 C
ON C.ST_ID = B.ST_ID
WHERE DATE_COL = '2022-02-22'
), cte2 AS (
SELECT ID, ORD_NBR, PRD_QTY,
CASE
WHEN LISTAGG(DISTINCT item_detail, ',')
WITHIN GROUP (ORDER BY item_detail) = ''
THEN 'null'
ELSE LISTAGG(DISTINCT split(item_detail, ';')[1]::text,',')
WITHIN GROUP (ORDER BY split(item_detail, ';')[1]::text)
END AS PRD_NO
FROM cte
GROUP BY ID, ORD_NBR, PRD_QTY
)
SELECT ID, PRD_QTY,
CASE WHEN PRD_QTY = 'TRUE' THEN CONCAT(PRD_NO, ':', 'high') END AS Test_1
FROM cte2;
此代码的一个问题是您在 cte
(Lukasz 上下文)select 中使用 DISTINCT,并且 DISTINCT 与 GROUP BY 相同。所以在 cte2
中它是 GROUP BY ID, ORD_NBR, PRD_QTY
,并且对 item_detail 做了一些事情,这就是之前的 DISTINCT 所做的..
现在出于性能原因,分两步执行此操作可能会有所帮助,但这不能作为 Stack overflow 上的建议,因为它 100% 与您的数据相关。
然后在 cte2
的情况下,您将一些项目变成一个列表,并将其与一个空字符串进行比较。这是比较煽情的。因为从逻辑上讲,唯一的情况是当您有一个 null 作为 item_detail
,或者您有一个空字符串(或同时有两个空字符串)和其他任何它都不会聚合为空字符串。
SELECT column1
,listagg(column2) as la
,la = '' as was_empty
FROM VALUES
(1, ''),-- solo empty
(2, null), -- solo null
(3, null), -- both null and empty
(3, ''),
(4, 'pot'), -- both pot and empty
(4, ''),
(5, 'pot'), -- both pot and null
(5, null),
(6, 'pot'), -- two valid strings
(6, 'kettle')
GROUP BY 1
ORDER BY 1;
COLUMN1
LA
WAS_EMPTY
1
TRUE
2
TRUE
3
TRUE
4
pot
FALSE
5
pot
FALSE
6
potkettle
FALSE
第一个 LISTAGG 的输出就是这个测试,因此如果没有学到任何其他东西,ORDER BY 就 100% 浪费了。但是这段代码试图找到的是“是否有 none 空字符串。
count_if(item_detail <> '') > 0
比
更具可读性和切题性
LISTAGG(DISTINCT item_detail, ',') WITHIN GROUP (ORDER BY item_detail) = ''
还有那个问题你把 item_detail
拆开,所以应该把它推得更高。
所以cte2 and cte
可以写成:
WITH cte AS (
SELECT
ID,
A.ORD_NBR,
C.PRD_QTY,
SPLIT(REGEXP_REPLACE(item_detail, '\;;;;;.*$', ''), ';')[1]::text AS sub_value,
FROM table_1 AS a
JOIN table_2 AS b
ON a.st_id = b.st_id
JOIN table_3 AS c
on c.st_id = b.st_id
WHERE date_col = '2022-02-22'
), cte2 AS (
SELECT
id,
ord_nbr,
prd_qty,
NULLIF(LISTAGG(DISTINCT sub_value, ',') WITHIN GROUP (ORDER BY sub_value), '') AS prd_no
FROM cte
GROUP BY 1,2,3
)
SELECT
id,
prd_qty,
IFF( prd_qty = 'TRUE', CONCAT(prd_no, ':', 'high'), null) AS test_1
FROM cte2;
因为 SPLIT()[1]::text
on null 或太小的数据输出为 null:
SELECT column1
,SPLIT(column1, ';')[1]::text
FROM VALUES
(null),
(''),
('A'),
('A;B'),
('A;B;C');
给出:
COLUMN1
SPLIT(COLUMN1, ';')1::TEXT
null
null
"empty string"
null
A
null
A;B
B
A;B;C
B
但是如果你想把sub_value
捣碎两次,SQL可以写成:
WITH cte AS (
SELECT
id,
a.ord_nbr,
c.prd_qty,
NULLIF(LISTAGG(DISTINCT SPLIT(REGEXP_REPLACE(item_detail, '\;;;;;.*$', ''), ';')[1]::text, ',')
WITHIN GROUP (ORDER BY SPLIT(REGEXP_REPLACE(item_detail, '\;;;;;.*$', ''), ';')[1]::text), '') AS prd_no
FROM table_1 AS a
JOIN table_2 AS b
ON a.st_id = b.st_id
JOIN table_3 AS c
on c.st_id = b.st_id
WHERE date_col = '2022-02-22'
GROUP BY 1,2,3
)
SELECT id, prd_qty,
IFF( prd_qty = 'TRUE', CONCAT(prd_no, ':', 'high'), null) AS test_1
FROM cte;
但是如果我们回到 LISTAGG 示例:
SELECT column1
--,listagg(nullif(column2,''), ',') as la
,listagg(column2) as la
,la = '' as was_empty
,count_if(column2 <> '')
FROM VALUES
(1, ''),-- solo empty
(2, null), -- solo null
(3, null), -- both null and empty
(3, ''),
(4, 'pot'), -- both pot and empty
(4, ''),
(5, 'pot'), -- both pot and null
(5, null),
(6, 'pot'), -- two valid strings
(6, 'kettle')
GROUP BY 1
ORDER BY 1;
然后我们将分隔符放入:
listagg(column2, ',')
对于 #4 值,我们得到答案 pot,
,因此 sub_value
在生成时实际上应该是 NULLIF(<value>, '')
:
因此:较长的形式为:
WITH cte AS (
SELECT DISTINCT
ID,
A.ORD_NBR,
C.PRD_QTY,
NULLIF(SPLIT(REGEXP_REPLACE(item_detail, '\;;;;;.*$', ''), ';')[1]::text,'') AS sub_value,
FROM table_1 AS a
JOIN table_2 AS b
ON a.st_id = b.st_id
JOIN table_3 AS c
on c.st_id = b.st_id
WHERE date_col = '2022-02-22'
), cte2 AS (
SELECT
id,
ord_nbr,
prd_qty,
NULLIF(LISTAGG(sub_value, ',') WITHIN GROUP (ORDER BY sub_value), '') AS prd_no
FROM cte
GROUP BY 1,2,3
)
SELECT
id,
prd_qty,
IFF( prd_qty = 'TRUE', CONCAT(prd_no, ':', 'high'), null) AS test_1
FROM cte2;
或更短的形式:
WITH cte AS (
SELECT
id,
a.ord_nbr,
c.prd_qty,
NULLIF(LISTAGG(DISTINCT NULLIF(SPLIT(REGEXP_REPLACE(item_detail, '\;;;;;.*$', ''), ';')[1]::text,'')
WITHIN GROUP (ORDER BY NULLIF(SPLIT(REGEXP_REPLACE(item_detail, '\;;;;;.*$', ''), ';')[1]::text,''), '') AS prd_no
FROM table_1 AS a
JOIN table_2 AS b
ON a.st_id = b.st_id
JOIN table_3 AS c
on c.st_id = b.st_id
WHERE date_col = '2022-02-22'
GROUP BY 1,2,3
)
SELECT id, prd_qty,
IFF( prd_qty = 'TRUE', CONCAT(prd_no, ':', 'high'), null) AS test_1
FROM cte;
哦是的:
CONCAT(prd_no, ':', 'high')
与
相同
CONCAT(prd_no, ':high')
假设您可以将最后一个 IFF/CASE 推入分组发生的同一代码中:
SELECT column1
,listagg(distinct nullif(column2,''), ',') within group (order by nullif(column2,'')) as la
,nullif(la,'') as was_empty
,iff(column1%2=0, CONCAT(was_empty, ':', 'high'), null) as test_1
FROM VALUES
(1, ''),-- solo empty
(2, null), -- solo null
(3, null), -- both null and empty
(3, ''),
(4, 'pot'), -- both pot and empty
(4, ''),
(5, 'pot'), -- both pot and null
(5, null),
(6, 'pot'), -- two valid strings
(6, 'kettle')
GROUP BY 1
ORDER BY 1;
真正的代码可以是:
SELECT
id,
a.ord_nbr,
c.prd_qty,
NULLIF(LISTAGG(DISTINCT NULLIF(SPLIT(REGEXP_REPLACE(item_detail, '\;;;;;.*$', ''), ';')[1]::text,'')
WITHIN GROUP (ORDER BY NULLIF(SPLIT(REGEXP_REPLACE(item_detail, '\;;;;;.*$', ''), ';')[1]::text,''), '') AS prd_no,
IFF( prd_qty = 'TRUE', CONCAT(prd_no, ':', 'high'), null) AS test_1
FROM table_1 AS a
JOIN table_2 AS b
ON a.st_id = b.st_id
JOIN table_3 AS c
on c.st_id = b.st_id
WHERE date_col = '2022-02-22'
GROUP BY 1,2,3;
SELECT
ID,
PRD_QTY,
CASE
WHEN PRD_QTY = 'TRUE'
THEN CONCAT(PRD_NO, ':', 'high')
END AS Test_1
FROM
(SELECT
ID, ORD_NBR, PRD_QTY,
CASE
WHEN listagg(DISTINCT item_detail, ',') WITHIN GROUP (ORDER BY item_detail) = ''
THEN 'null'
ELSE LISTAGG(DISTINCT split(item_detail, ';')[1]::text,',') WITHIN GROUP (ORDER BY split(item_detail, ';')[1]::text)
END AS PRD_NO
FROM
(SELECT DISTINCT
ID, A.ORD_NBR, C.PRD_QTY,
REGEXP_REPLACE(item_detail,'\;;;;;.*$','') AS item_detail
FROM
Table_1 A
JOIN
Table_2 B ON A.ST_ID = B.ST_ID
JOIN
Table_3 C ON C.ST_ID = B.ST_ID
WHERE
DATE_COL = '2022-02-22')
GROUP BY
ID, ORD_NBR, PRD_QTY)
是否可以将此查询重写为更简单的形式?我不想使用多个子查询。
可以使用通用 table 表达式 (CTE) 重写:
WITH cte AS (
SELECT DISTINCT ID, A.ORD_NBR, C.PRD_QTY,
REGEXP_REPLACE(item_detail,'\;;;;;.*$','') AS item_detail
FROM Table_1 A
JOIN Table_2 B
ON A.ST_ID = B.ST_ID
JOIN Table_3 C
ON C.ST_ID = B.ST_ID
WHERE DATE_COL = '2022-02-22'
), cte2 AS (
SELECT ID, ORD_NBR, PRD_QTY,
CASE
WHEN LISTAGG(DISTINCT item_detail, ',')
WITHIN GROUP (ORDER BY item_detail) = ''
THEN 'null'
ELSE LISTAGG(DISTINCT split(item_detail, ';')[1]::text,',')
WITHIN GROUP (ORDER BY split(item_detail, ';')[1]::text)
END AS PRD_NO
FROM cte
GROUP BY ID, ORD_NBR, PRD_QTY
)
SELECT ID, PRD_QTY,
CASE WHEN PRD_QTY = 'TRUE' THEN CONCAT(PRD_NO, ':', 'high') END AS Test_1
FROM cte2;
此代码的一个问题是您在 cte
(Lukasz 上下文)select 中使用 DISTINCT,并且 DISTINCT 与 GROUP BY 相同。所以在 cte2
中它是 GROUP BY ID, ORD_NBR, PRD_QTY
,并且对 item_detail 做了一些事情,这就是之前的 DISTINCT 所做的..
现在出于性能原因,分两步执行此操作可能会有所帮助,但这不能作为 Stack overflow 上的建议,因为它 100% 与您的数据相关。
然后在 cte2
的情况下,您将一些项目变成一个列表,并将其与一个空字符串进行比较。这是比较煽情的。因为从逻辑上讲,唯一的情况是当您有一个 null 作为 item_detail
,或者您有一个空字符串(或同时有两个空字符串)和其他任何它都不会聚合为空字符串。
SELECT column1
,listagg(column2) as la
,la = '' as was_empty
FROM VALUES
(1, ''),-- solo empty
(2, null), -- solo null
(3, null), -- both null and empty
(3, ''),
(4, 'pot'), -- both pot and empty
(4, ''),
(5, 'pot'), -- both pot and null
(5, null),
(6, 'pot'), -- two valid strings
(6, 'kettle')
GROUP BY 1
ORDER BY 1;
COLUMN1 | LA | WAS_EMPTY |
---|---|---|
1 | TRUE | |
2 | TRUE | |
3 | TRUE | |
4 | pot | FALSE |
5 | pot | FALSE |
6 | potkettle | FALSE |
第一个 LISTAGG 的输出就是这个测试,因此如果没有学到任何其他东西,ORDER BY 就 100% 浪费了。但是这段代码试图找到的是“是否有 none 空字符串。
count_if(item_detail <> '') > 0
比
更具可读性和切题性LISTAGG(DISTINCT item_detail, ',') WITHIN GROUP (ORDER BY item_detail) = ''
还有那个问题你把 item_detail
拆开,所以应该把它推得更高。
所以cte2 and cte
可以写成:
WITH cte AS (
SELECT
ID,
A.ORD_NBR,
C.PRD_QTY,
SPLIT(REGEXP_REPLACE(item_detail, '\;;;;;.*$', ''), ';')[1]::text AS sub_value,
FROM table_1 AS a
JOIN table_2 AS b
ON a.st_id = b.st_id
JOIN table_3 AS c
on c.st_id = b.st_id
WHERE date_col = '2022-02-22'
), cte2 AS (
SELECT
id,
ord_nbr,
prd_qty,
NULLIF(LISTAGG(DISTINCT sub_value, ',') WITHIN GROUP (ORDER BY sub_value), '') AS prd_no
FROM cte
GROUP BY 1,2,3
)
SELECT
id,
prd_qty,
IFF( prd_qty = 'TRUE', CONCAT(prd_no, ':', 'high'), null) AS test_1
FROM cte2;
因为 SPLIT()[1]::text
on null 或太小的数据输出为 null:
SELECT column1
,SPLIT(column1, ';')[1]::text
FROM VALUES
(null),
(''),
('A'),
('A;B'),
('A;B;C');
给出:
COLUMN1 | SPLIT(COLUMN1, ';')1::TEXT |
---|---|
null | null |
"empty string" | null |
A | null |
A;B | B |
A;B;C | B |
但是如果你想把sub_value
捣碎两次,SQL可以写成:
WITH cte AS (
SELECT
id,
a.ord_nbr,
c.prd_qty,
NULLIF(LISTAGG(DISTINCT SPLIT(REGEXP_REPLACE(item_detail, '\;;;;;.*$', ''), ';')[1]::text, ',')
WITHIN GROUP (ORDER BY SPLIT(REGEXP_REPLACE(item_detail, '\;;;;;.*$', ''), ';')[1]::text), '') AS prd_no
FROM table_1 AS a
JOIN table_2 AS b
ON a.st_id = b.st_id
JOIN table_3 AS c
on c.st_id = b.st_id
WHERE date_col = '2022-02-22'
GROUP BY 1,2,3
)
SELECT id, prd_qty,
IFF( prd_qty = 'TRUE', CONCAT(prd_no, ':', 'high'), null) AS test_1
FROM cte;
但是如果我们回到 LISTAGG 示例:
SELECT column1
--,listagg(nullif(column2,''), ',') as la
,listagg(column2) as la
,la = '' as was_empty
,count_if(column2 <> '')
FROM VALUES
(1, ''),-- solo empty
(2, null), -- solo null
(3, null), -- both null and empty
(3, ''),
(4, 'pot'), -- both pot and empty
(4, ''),
(5, 'pot'), -- both pot and null
(5, null),
(6, 'pot'), -- two valid strings
(6, 'kettle')
GROUP BY 1
ORDER BY 1;
然后我们将分隔符放入:
listagg(column2, ',')
对于 #4 值,我们得到答案 pot,
,因此 sub_value
在生成时实际上应该是 NULLIF(<value>, '')
:
因此:较长的形式为:
WITH cte AS (
SELECT DISTINCT
ID,
A.ORD_NBR,
C.PRD_QTY,
NULLIF(SPLIT(REGEXP_REPLACE(item_detail, '\;;;;;.*$', ''), ';')[1]::text,'') AS sub_value,
FROM table_1 AS a
JOIN table_2 AS b
ON a.st_id = b.st_id
JOIN table_3 AS c
on c.st_id = b.st_id
WHERE date_col = '2022-02-22'
), cte2 AS (
SELECT
id,
ord_nbr,
prd_qty,
NULLIF(LISTAGG(sub_value, ',') WITHIN GROUP (ORDER BY sub_value), '') AS prd_no
FROM cte
GROUP BY 1,2,3
)
SELECT
id,
prd_qty,
IFF( prd_qty = 'TRUE', CONCAT(prd_no, ':', 'high'), null) AS test_1
FROM cte2;
或更短的形式:
WITH cte AS (
SELECT
id,
a.ord_nbr,
c.prd_qty,
NULLIF(LISTAGG(DISTINCT NULLIF(SPLIT(REGEXP_REPLACE(item_detail, '\;;;;;.*$', ''), ';')[1]::text,'')
WITHIN GROUP (ORDER BY NULLIF(SPLIT(REGEXP_REPLACE(item_detail, '\;;;;;.*$', ''), ';')[1]::text,''), '') AS prd_no
FROM table_1 AS a
JOIN table_2 AS b
ON a.st_id = b.st_id
JOIN table_3 AS c
on c.st_id = b.st_id
WHERE date_col = '2022-02-22'
GROUP BY 1,2,3
)
SELECT id, prd_qty,
IFF( prd_qty = 'TRUE', CONCAT(prd_no, ':', 'high'), null) AS test_1
FROM cte;
哦是的:
CONCAT(prd_no, ':', 'high')
与
相同CONCAT(prd_no, ':high')
假设您可以将最后一个 IFF/CASE 推入分组发生的同一代码中:
SELECT column1
,listagg(distinct nullif(column2,''), ',') within group (order by nullif(column2,'')) as la
,nullif(la,'') as was_empty
,iff(column1%2=0, CONCAT(was_empty, ':', 'high'), null) as test_1
FROM VALUES
(1, ''),-- solo empty
(2, null), -- solo null
(3, null), -- both null and empty
(3, ''),
(4, 'pot'), -- both pot and empty
(4, ''),
(5, 'pot'), -- both pot and null
(5, null),
(6, 'pot'), -- two valid strings
(6, 'kettle')
GROUP BY 1
ORDER BY 1;
真正的代码可以是:
SELECT
id,
a.ord_nbr,
c.prd_qty,
NULLIF(LISTAGG(DISTINCT NULLIF(SPLIT(REGEXP_REPLACE(item_detail, '\;;;;;.*$', ''), ';')[1]::text,'')
WITHIN GROUP (ORDER BY NULLIF(SPLIT(REGEXP_REPLACE(item_detail, '\;;;;;.*$', ''), ';')[1]::text,''), '') AS prd_no,
IFF( prd_qty = 'TRUE', CONCAT(prd_no, ':', 'high'), null) AS test_1
FROM table_1 AS a
JOIN table_2 AS b
ON a.st_id = b.st_id
JOIN table_3 AS c
on c.st_id = b.st_id
WHERE date_col = '2022-02-22'
GROUP BY 1,2,3;