每个 ID 的数据透视 table 摘要
Summary from pivot table for each ID
我在 SNOWFLAKE 数据库工作。我有那种类型的 table(枢轴产品):
我只需要一行 CUSTOMER_ID 有滞后,像这样:
我该怎么做?
我试试:
SELECT
CUSTOMER_ID,
SUM(LAG_1) AS LAG_0_1,
SUM(LAG_2) AS LAG_1_2,
SUM(LAG_3) AS LAG_2_3,
SUM(LAG_4) AS LAG_3_4,
SUM(LAG_5) AS LAG_4_5
FROM TEMP_TABLE
GROUP BY 1
但输出与我的预期完全不同。谢谢。
如果lag_1lag_2有效
,LAG(column,1) OVER (blar blar blar) AS lag_1
,LAG(column,2) OVER (blar blar blar) AS lag_1
并且您想避免空值,
然后使用 IGNORE NULLS 获取最近的非空行:
,LAG(column) IGNORE NULLS OVER (blar blar blar) AS lag
或者,如果您想要它们,如果不同的 ORDER 可以执行 n-lags 但 COALESCE 结果按偏好顺序排列。
COALESCE(lag5, lag_3, lag_1, lag_4, lag_2) as awesome_lag_order
因此,如果您拥有所呈现的数据,并且想要输出,则可以使用 FIRST_VALUE(或反转 ORDER BY 并使用 LAST_VALUE),然后使用 DISTINCT 减少结果像这样(我添加了一个排序列以使事情对我来说更简单):
SELECT DISTINCT customer_id
,first_value(lag_1) ignore nulls over (partition by customer_id order by order_id desc) AS lag_1
,first_value(lag_2) ignore nulls over (partition by customer_id order by order_id desc) AS lag_2
,first_value(lag_3) ignore nulls over (partition by customer_id order by order_id desc) AS lag_3
,first_value(lag_4) ignore nulls over (partition by customer_id order by order_id desc) AS lag_4
,first_value(lag_5) ignore nulls over (partition by customer_id order by order_id desc) AS lag_5
FROM VALUES
(10001, 1, null, 3, null, null, null),
(10001, 2, null, null, 12, null, null),
(10001, 3, null, null, null, 5, null),
(10001, 4, null, null, null, null, 27),
(10002, 1, null, 7, null, null, null),
(10002, 2, null, null, 3, null, null),
(10002, 3, null, null, null, 4, null),
(10002, 4, null, null, null, null, 12)
v(customer_id, order_id, lag_1, lag_2, lag_3, lag_4, lag_5)
ORDER BY 1,2;
将给予:
CUSTOMER_ID
LAG_1
LAG_2
LAG_3
LAG_4
LAG_5
10,001
NULL
3
12
5
27
10,002
NULL
7
3
4
12
FIRST_VALUE 允许您控制顺序,因为如果您的数据具有较低的优先级值,那么较大的值:
FROM VALUES
(10001, 0, null, 100, 100, 100, 1000),
(10001, 1, null, 3, null, null, null),
(10001, 2, null, null, 12, null, null),
(10001, 3, null, null, null, 5, null),
(10001, 4, null, null, null, null, 27),
(10002, 1, null, 7, null, null, null),
(10002, 2, null, null, 3, null, null),
(10002, 3, null, null, null, 4, null),
(10002, 4, null, null, null, null, 12)
然后使用 MAX 将得到该值:
SELECT customer_id
,max(lag_1) AS lag_1
,max(lag_2) AS lag_2
,max(lag_3) AS lag_3
,max(lag_4) AS lag_4
,max(lag_5) AS lag_5
FROM VALUES
(10001, 0, null, 100, 100, 100, 1000),
(10001, 1, null, 3, null, null, null),
(10001, 2, null, null, 12, null, null),
(10001, 3, null, null, null, 5, null),
(10001, 4, null, null, null, null, 27),
(10002, 1, null, 7, null, null, null),
(10002, 2, null, null, 3, null, null),
(10002, 3, null, null, null, 4, null),
(10002, 4, null, null, null, null, 12)
v(customer_id, order_id, lag_1, lag_2, lag_3, lag_4, lag_5)
GROUP BY 1
ORDER BY 1,2;
给出:
CUSTOMER_ID
LAG_1
LAG_2
LAG_3
LAG_4
LAG_5
10,001
100
100
100
1,000
10,002
7
3
4
12
你可能想要哪个..目前还不清楚。
另一种选择是,如果您已经想使用 GROUP BY 子句,并且 void distinct,则只使用 ARRAY_AGG 和 return 数组的第一个值:
SELECT customer_id
,get(array_agg(lag_1) WITHIN GROUP (order by order_id desc),0) AS lag_1
,get(array_agg(lag_2) WITHIN GROUP (order by order_id desc),0) AS lag_2
,get(array_agg(lag_3) WITHIN GROUP (order by order_id desc),0) AS lag_3
,get(array_agg(lag_4) WITHIN GROUP (order by order_id desc),0) AS lag_4
,get(array_agg(lag_5) WITHIN GROUP (order by order_id desc),0) AS lag_5
FROM VALUES
(10001, 0, null, 100, 100, 100, 1000),
(10001, 1, null, 3, null, null, null),
(10001, 2, null, null, 12, null, null),
(10001, 3, null, null, null, 5, null),
(10001, 4, null, null, null, null, 27),
(10002, 1, null, 7, null, null, null),
(10002, 2, null, null, 3, null, null),
(10002, 3, null, null, null, 4, null),
(10002, 4, null, null, null, null, 12)
v(customer_id, order_id, lag_1, lag_2, lag_3, lag_4, lag_5)
GROUP BY 1
ORDER BY 1;
给出:
CUSTOMER_ID
LAG_1
LAG_2
LAG_3
LAG_4
LAG_5
10,001
3
12
5
27
10,002
7
3
4
12
我在 SNOWFLAKE 数据库工作。我有那种类型的 table(枢轴产品):
我只需要一行 CUSTOMER_ID 有滞后,像这样:
我该怎么做? 我试试:
SELECT
CUSTOMER_ID,
SUM(LAG_1) AS LAG_0_1,
SUM(LAG_2) AS LAG_1_2,
SUM(LAG_3) AS LAG_2_3,
SUM(LAG_4) AS LAG_3_4,
SUM(LAG_5) AS LAG_4_5
FROM TEMP_TABLE
GROUP BY 1
但输出与我的预期完全不同。谢谢。
如果lag_1lag_2有效
,LAG(column,1) OVER (blar blar blar) AS lag_1
,LAG(column,2) OVER (blar blar blar) AS lag_1
并且您想避免空值,
然后使用 IGNORE NULLS 获取最近的非空行:
,LAG(column) IGNORE NULLS OVER (blar blar blar) AS lag
或者,如果您想要它们,如果不同的 ORDER 可以执行 n-lags 但 COALESCE 结果按偏好顺序排列。
COALESCE(lag5, lag_3, lag_1, lag_4, lag_2) as awesome_lag_order
因此,如果您拥有所呈现的数据,并且想要输出,则可以使用 FIRST_VALUE(或反转 ORDER BY 并使用 LAST_VALUE),然后使用 DISTINCT 减少结果像这样(我添加了一个排序列以使事情对我来说更简单):
SELECT DISTINCT customer_id
,first_value(lag_1) ignore nulls over (partition by customer_id order by order_id desc) AS lag_1
,first_value(lag_2) ignore nulls over (partition by customer_id order by order_id desc) AS lag_2
,first_value(lag_3) ignore nulls over (partition by customer_id order by order_id desc) AS lag_3
,first_value(lag_4) ignore nulls over (partition by customer_id order by order_id desc) AS lag_4
,first_value(lag_5) ignore nulls over (partition by customer_id order by order_id desc) AS lag_5
FROM VALUES
(10001, 1, null, 3, null, null, null),
(10001, 2, null, null, 12, null, null),
(10001, 3, null, null, null, 5, null),
(10001, 4, null, null, null, null, 27),
(10002, 1, null, 7, null, null, null),
(10002, 2, null, null, 3, null, null),
(10002, 3, null, null, null, 4, null),
(10002, 4, null, null, null, null, 12)
v(customer_id, order_id, lag_1, lag_2, lag_3, lag_4, lag_5)
ORDER BY 1,2;
将给予:
CUSTOMER_ID | LAG_1 | LAG_2 | LAG_3 | LAG_4 | LAG_5 |
---|---|---|---|---|---|
10,001 | NULL | 3 | 12 | 5 | 27 |
10,002 | NULL | 7 | 3 | 4 | 12 |
FIRST_VALUE 允许您控制顺序,因为如果您的数据具有较低的优先级值,那么较大的值:
FROM VALUES
(10001, 0, null, 100, 100, 100, 1000),
(10001, 1, null, 3, null, null, null),
(10001, 2, null, null, 12, null, null),
(10001, 3, null, null, null, 5, null),
(10001, 4, null, null, null, null, 27),
(10002, 1, null, 7, null, null, null),
(10002, 2, null, null, 3, null, null),
(10002, 3, null, null, null, 4, null),
(10002, 4, null, null, null, null, 12)
然后使用 MAX 将得到该值:
SELECT customer_id
,max(lag_1) AS lag_1
,max(lag_2) AS lag_2
,max(lag_3) AS lag_3
,max(lag_4) AS lag_4
,max(lag_5) AS lag_5
FROM VALUES
(10001, 0, null, 100, 100, 100, 1000),
(10001, 1, null, 3, null, null, null),
(10001, 2, null, null, 12, null, null),
(10001, 3, null, null, null, 5, null),
(10001, 4, null, null, null, null, 27),
(10002, 1, null, 7, null, null, null),
(10002, 2, null, null, 3, null, null),
(10002, 3, null, null, null, 4, null),
(10002, 4, null, null, null, null, 12)
v(customer_id, order_id, lag_1, lag_2, lag_3, lag_4, lag_5)
GROUP BY 1
ORDER BY 1,2;
给出:
CUSTOMER_ID | LAG_1 | LAG_2 | LAG_3 | LAG_4 | LAG_5 |
---|---|---|---|---|---|
10,001 | 100 | 100 | 100 | 1,000 | |
10,002 | 7 | 3 | 4 | 12 |
你可能想要哪个..目前还不清楚。
另一种选择是,如果您已经想使用 GROUP BY 子句,并且 void distinct,则只使用 ARRAY_AGG 和 return 数组的第一个值:
SELECT customer_id
,get(array_agg(lag_1) WITHIN GROUP (order by order_id desc),0) AS lag_1
,get(array_agg(lag_2) WITHIN GROUP (order by order_id desc),0) AS lag_2
,get(array_agg(lag_3) WITHIN GROUP (order by order_id desc),0) AS lag_3
,get(array_agg(lag_4) WITHIN GROUP (order by order_id desc),0) AS lag_4
,get(array_agg(lag_5) WITHIN GROUP (order by order_id desc),0) AS lag_5
FROM VALUES
(10001, 0, null, 100, 100, 100, 1000),
(10001, 1, null, 3, null, null, null),
(10001, 2, null, null, 12, null, null),
(10001, 3, null, null, null, 5, null),
(10001, 4, null, null, null, null, 27),
(10002, 1, null, 7, null, null, null),
(10002, 2, null, null, 3, null, null),
(10002, 3, null, null, null, 4, null),
(10002, 4, null, null, null, null, 12)
v(customer_id, order_id, lag_1, lag_2, lag_3, lag_4, lag_5)
GROUP BY 1
ORDER BY 1;
给出:
CUSTOMER_ID | LAG_1 | LAG_2 | LAG_3 | LAG_4 | LAG_5 |
---|---|---|---|---|---|
10,001 | 3 | 12 | 5 | 27 | |
10,002 | 7 | 3 | 4 | 12 |