将查询重写为更简单的形式

Question

SELECT 
    ID,
    PRD_QTY,
    CASE 
        WHEN PRD_QTY = 'TRUE' 
            THEN CONCAT(PRD_NO, ':', 'high') 
    END AS Test_1 
FROM 
    (SELECT 
         ID, ORD_NBR, PRD_QTY,
         CASE 
             WHEN listagg(DISTINCT item_detail, ',') WITHIN GROUP (ORDER BY item_detail) = '' 
                 THEN 'null' 
                 ELSE LISTAGG(DISTINCT split(item_detail, ';')[1]::text,',') WITHIN GROUP (ORDER BY split(item_detail, ';')[1]::text) 
         END AS PRD_NO
     FROM 
         (SELECT DISTINCT
              ID, A.ORD_NBR, C.PRD_QTY,
              REGEXP_REPLACE(item_detail,'\;;;;;.*$','') AS item_detail 
          FROM 
              Table_1 A 
          JOIN 
              Table_2 B ON A.ST_ID = B.ST_ID
          JOIN 
              Table_3 C ON C.ST_ID = B.ST_ID
          WHERE 
              DATE_COL = '2022-02-22')
     GROUP BY 
         ID, ORD_NBR, PRD_QTY)

是否可以将此查询重写为更简单的形式？我不想使用多个子查询。

Answer 1

可以使用通用 table 表达式 (CTE) 重写：

WITH cte AS (
  SELECT DISTINCT ID, A.ORD_NBR, C.PRD_QTY,
         REGEXP_REPLACE(item_detail,'\;;;;;.*$','') AS item_detail 
  FROM Table_1 A 
  JOIN Table_2 B 
    ON A.ST_ID = B.ST_ID
  JOIN Table_3 C 
    ON C.ST_ID = B.ST_ID
  WHERE DATE_COL = '2022-02-22'
), cte2 AS (
    SELECT ID, ORD_NBR, PRD_QTY,
       CASE 
       WHEN LISTAGG(DISTINCT item_detail, ',') 
            WITHIN GROUP (ORDER BY item_detail) = '' 
       THEN 'null' 
       ELSE LISTAGG(DISTINCT split(item_detail, ';')[1]::text,',') 
            WITHIN GROUP (ORDER BY split(item_detail, ';')[1]::text) 
       END AS PRD_NO
     FROM cte
     GROUP BY ID, ORD_NBR, PRD_QTY
)
SELECT ID, PRD_QTY,
  CASE WHEN PRD_QTY = 'TRUE' THEN CONCAT(PRD_NO, ':', 'high') END AS Test_1 
FROM cte2;

Answer 2

此代码的一个问题是您在 cte（Lukasz 上下文）select 中使用 DISTINCT，并且 DISTINCT 与 GROUP BY 相同。所以在 cte2 中它是 GROUP BY ID, ORD_NBR, PRD_QTY，并且对 item_detail 做了一些事情，这就是之前的 DISTINCT 所做的..

现在出于性能原因，分两步执行此操作可能会有所帮助，但这不能作为 Stack overflow 上的建议，因为它 100% 与您的数据相关。

然后在 cte2 的情况下，您将一些项目变成一个列表，并将其与一个空字符串进行比较。这是比较煽情的。因为从逻辑上讲，唯一的情况是当您有一个 null 作为 item_detail，或者您有一个空字符串（或同时有两个空字符串）和其他任何它都不会聚合为空字符串。

SELECT column1
    ,listagg(column2) as la
    ,la = '' as was_empty
FROM VALUES
    (1, ''),-- solo empty
    (2, null), -- solo null
    (3, null), -- both null and empty 
    (3, ''),
    (4, 'pot'), -- both pot and empty 
    (4, ''),
    (5, 'pot'), -- both pot and null 
    (5, null),
    (6, 'pot'), -- two valid strings
    (6, 'kettle')
GROUP BY 1
ORDER BY 1;

COLUMN1	LA	WAS_EMPTY
1		TRUE
2		TRUE
3		TRUE
4	pot	FALSE
5	pot	FALSE
6	potkettle	FALSE

第一个 LISTAGG 的输出就是这个测试，因此如果没有学到任何其他东西，ORDER BY 就 100% 浪费了。但是这段代码试图找到的是“是否有 none 空字符串。

count_if(item_detail <> '') > 0

比

更具可读性和切题性

LISTAGG(DISTINCT item_detail, ',') WITHIN GROUP (ORDER BY item_detail) = ''

还有那个问题你把 item_detail 拆开，所以应该把它推得更高。

所以cte2 and cte可以写成：

WITH cte AS (
    SELECT
        ID, 
        A.ORD_NBR, 
        C.PRD_QTY,
        SPLIT(REGEXP_REPLACE(item_detail, '\;;;;;.*$', ''), ';')[1]::text AS sub_value,
    FROM table_1 AS a 
    JOIN table_2 AS b 
        ON a.st_id = b.st_id
    JOIN table_3 AS c 
        on c.st_id = b.st_id
    WHERE date_col = '2022-02-22'
), cte2 AS (
    SELECT  
        id, 
        ord_nbr, 
        prd_qty,
        NULLIF(LISTAGG(DISTINCT sub_value, ',') WITHIN GROUP (ORDER BY sub_value), '') AS prd_no
    FROM cte
    GROUP BY 1,2,3
)
SELECT 
    id, 
    prd_qty,
    IFF( prd_qty = 'TRUE', CONCAT(prd_no, ':', 'high'), null) AS test_1 
FROM cte2;

因为 SPLIT()[1]::text on null 或太小的数据输出为 null:

SELECT column1
    ,SPLIT(column1, ';')[1]::text
FROM VALUES
    (null),
    (''),
    ('A'),
    ('A;B'),
    ('A;B;C');

给出：

COLUMN1	SPLIT(COLUMN1, ';')1::TEXT
null	null
"empty string"	null
A	null
A;B	B
A;B;C	B

但是如果你想把sub_value捣碎两次，SQL可以写成：

WITH cte AS (
    SELECT
        id, 
        a.ord_nbr, 
        c.prd_qty,
        NULLIF(LISTAGG(DISTINCT SPLIT(REGEXP_REPLACE(item_detail, '\;;;;;.*$', ''), ';')[1]::text, ',') 
            WITHIN GROUP (ORDER BY SPLIT(REGEXP_REPLACE(item_detail, '\;;;;;.*$', ''), ';')[1]::text), '') AS prd_no    
    FROM table_1 AS a 
    JOIN table_2 AS b 
        ON a.st_id = b.st_id
    JOIN table_3 AS c 
        on c.st_id = b.st_id
    WHERE date_col = '2022-02-22'
    GROUP BY 1,2,3
)
SELECT id, prd_qty,
  IFF( prd_qty = 'TRUE', CONCAT(prd_no, ':', 'high'), null) AS test_1 
FROM cte;

但是如果我们回到 LISTAGG 示例：

SELECT column1
    --,listagg(nullif(column2,''), ',') as la
    ,listagg(column2) as la
    ,la = '' as was_empty
    ,count_if(column2 <> '')
FROM VALUES
    (1, ''),-- solo empty
    (2, null), -- solo null
    (3, null), -- both null and empty 
    (3, ''),
    (4, 'pot'), -- both pot and empty 
    (4, ''),
    (5, 'pot'), -- both pot and null 
    (5, null),
    (6, 'pot'), -- two valid strings
    (6, 'kettle')
GROUP BY 1
ORDER BY 1;

然后我们将分隔符放入：

listagg(column2, ',')

对于 #4 值，我们得到答案 pot,，因此 sub_value 在生成时实际上应该是 NULLIF(<value>, '')：

因此：较长的形式为：

WITH cte AS (
    SELECT DISTINCT
        ID, 
        A.ORD_NBR, 
        C.PRD_QTY,
        NULLIF(SPLIT(REGEXP_REPLACE(item_detail, '\;;;;;.*$', ''), ';')[1]::text,'') AS sub_value,
    FROM table_1 AS a 
    JOIN table_2 AS b 
        ON a.st_id = b.st_id
    JOIN table_3 AS c 
        on c.st_id = b.st_id
    WHERE date_col = '2022-02-22'
), cte2 AS (
    SELECT  
        id, 
        ord_nbr, 
        prd_qty,
        NULLIF(LISTAGG(sub_value, ',') WITHIN GROUP (ORDER BY sub_value), '') AS prd_no
    FROM cte
    GROUP BY 1,2,3
)
SELECT 
    id, 
    prd_qty,
    IFF( prd_qty = 'TRUE', CONCAT(prd_no, ':', 'high'), null) AS test_1 
FROM cte2;

或更短的形式：

WITH cte AS (
    SELECT
        id, 
        a.ord_nbr, 
        c.prd_qty,
        NULLIF(LISTAGG(DISTINCT NULLIF(SPLIT(REGEXP_REPLACE(item_detail, '\;;;;;.*$', ''), ';')[1]::text,'') 
                WITHIN GROUP (ORDER BY NULLIF(SPLIT(REGEXP_REPLACE(item_detail, '\;;;;;.*$', ''), ';')[1]::text,''), '') AS prd_no  
    FROM table_1 AS a 
    JOIN table_2 AS b 
        ON a.st_id = b.st_id
    JOIN table_3 AS c 
        on c.st_id = b.st_id
    WHERE date_col = '2022-02-22'
    GROUP BY 1,2,3
)
SELECT id, prd_qty,
  IFF( prd_qty = 'TRUE', CONCAT(prd_no, ':', 'high'), null) AS test_1 
FROM cte;

哦是的：

CONCAT(prd_no, ':', 'high')

与

相同

CONCAT(prd_no, ':high')

假设您可以将最后一个 IFF/CASE 推入分组发生的同一代码中：

SELECT column1
    ,listagg(distinct nullif(column2,''), ',') within group (order by nullif(column2,'')) as la
    ,nullif(la,'') as was_empty
    ,iff(column1%2=0, CONCAT(was_empty, ':', 'high'), null) as test_1
FROM VALUES
    (1, ''),-- solo empty
    (2, null), -- solo null
    (3, null), -- both null and empty 
    (3, ''),
    (4, 'pot'), -- both pot and empty 
    (4, ''),
    (5, 'pot'), -- both pot and null 
    (5, null),
    (6, 'pot'), -- two valid strings
    (6, 'kettle')
GROUP BY 1
ORDER BY 1;

真正的代码可以是：

SELECT
    id, 
    a.ord_nbr, 
    c.prd_qty,
    NULLIF(LISTAGG(DISTINCT NULLIF(SPLIT(REGEXP_REPLACE(item_detail, '\;;;;;.*$', ''), ';')[1]::text,'') 
            WITHIN GROUP (ORDER BY NULLIF(SPLIT(REGEXP_REPLACE(item_detail, '\;;;;;.*$', ''), ';')[1]::text,''), '') AS prd_no,
    IFF( prd_qty = 'TRUE', CONCAT(prd_no, ':', 'high'), null) AS test_1 
FROM table_1 AS a 
JOIN table_2 AS b 
    ON a.st_id = b.st_id
JOIN table_3 AS c 
    on c.st_id = b.st_id
WHERE date_col = '2022-02-22'
GROUP BY 1,2,3;

将查询重写为更简单的形式

Rewriting a query into simpler form

snowflake-schema

snowflake-cloud-data-platform