当前行之前和完全匹配之后的行之间的行
ROWS BETWEEN CURRENT ROW PRECEDING AND EXACT MATCH FOLLOWING
考虑以下虚拟数据集
ID EVENT VALUE SORT_KEY
1 submitted 10 1
1 action 20 2
1 closed 30 3
1 action 30 4
2 submitted 10 1
2 action 10 2
2 action 10 3
2 closed 10 4
2 action 10 5
3 action 29 1
3 submitted 20 2
3 action 10 3
3 closed 10 4
3 action 10 5
4 action 10 1
我想汇总提交(包括)和关闭之间每个 ID 的所有操作。我不关心这些边界之外的事件。
我想知道是否可以构建一个 window 函数按 id 分区并跟随直到匹配表达式。
想要的结果:
ID EVENT VALUE_SUM
1 submitted 60
2 submitted 40
3 submitted 40
计算这个的查询如下所示:
SELECT
id
, event
, SUM(value) OVER (PARTITION BY id ROWS BETWEEN CURRENT ROW PRECEDING AND event='closed' FOLLOWING) as value_sum
FROM my_table
WHERE event = 'submitted'
我知道可以通过自身的多个连接来做到这一点,但是由于数据的大小和优化原因,我想知道是否可以通过 window功能。谢谢
以下是 BigQuery SQL
没有加入,但仍然只是快速草图,所以可能仍然是 refactoring/optimizing
的选项
#standardSQL
SELECT id, SUM(VALUE) AS val
FROM (
SELECT id, EVENT, VALUE,
SUM(boundary) OVER(PARTITION BY ID ORDER BY SORT_KEY ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) grp
FROM (
SELECT *,
COUNTIF(EVENT IN ('submitted', 'closed')) OVER(PARTITION BY ID, EVENT ORDER BY SORT_KEY) boundary
FROM `project.dataset.table` t
)
)
WHERE grp = 1
OR (grp = 2 AND EVENT = 'closed')
GROUP BY ID
您可以按照您的问题使用虚拟数据来测试/玩它:
#standardSQL
WITH `project.dataset.table` AS (
SELECT 1 ID, 'submitted' EVENT, 10 VALUE, 1 SORT_KEY UNION ALL
SELECT 1, 'action', 20, 2 UNION ALL
SELECT 1, 'closed', 30, 3 UNION ALL
SELECT 1, 'action', 30, 4 UNION ALL
SELECT 2, 'submitted', 10, 1 UNION ALL
SELECT 2, 'action', 10, 2 UNION ALL
SELECT 2, 'action', 10, 3 UNION ALL
SELECT 2, 'closed', 10, 4 UNION ALL
SELECT 2, 'action', 10, 5 UNION ALL
SELECT 3, 'action', 29, 1 UNION ALL
SELECT 3, 'submitted', 20, 2 UNION ALL
SELECT 3, 'action', 10, 3 UNION ALL
SELECT 3, 'closed', 10, 4 UNION ALL
SELECT 3, 'action', 10, 5 UNION ALL
SELECT 4, 'action', 10, 1
)
SELECT id, SUM(VALUE) AS val
FROM (
SELECT id, EVENT, VALUE,
SUM(boundary) OVER(PARTITION BY ID ORDER BY SORT_KEY ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) grp
FROM (
SELECT *,
COUNTIF(EVENT IN ('submitted', 'closed')) OVER(PARTITION BY ID, EVENT ORDER BY SORT_KEY) boundary
FROM `project.dataset.table` t
)
)
WHERE grp = 1
OR (grp = 2 AND EVENT = 'closed')
GROUP BY ID
ORDER BY ID
结果是
Row id val
1 1 60
2 2 40
3 3 40
考虑以下虚拟数据集
ID EVENT VALUE SORT_KEY
1 submitted 10 1
1 action 20 2
1 closed 30 3
1 action 30 4
2 submitted 10 1
2 action 10 2
2 action 10 3
2 closed 10 4
2 action 10 5
3 action 29 1
3 submitted 20 2
3 action 10 3
3 closed 10 4
3 action 10 5
4 action 10 1
我想汇总提交(包括)和关闭之间每个 ID 的所有操作。我不关心这些边界之外的事件。 我想知道是否可以构建一个 window 函数按 id 分区并跟随直到匹配表达式。
想要的结果:
ID EVENT VALUE_SUM
1 submitted 60
2 submitted 40
3 submitted 40
计算这个的查询如下所示:
SELECT
id
, event
, SUM(value) OVER (PARTITION BY id ROWS BETWEEN CURRENT ROW PRECEDING AND event='closed' FOLLOWING) as value_sum
FROM my_table
WHERE event = 'submitted'
我知道可以通过自身的多个连接来做到这一点,但是由于数据的大小和优化原因,我想知道是否可以通过 window功能。谢谢
以下是 BigQuery SQL 没有加入,但仍然只是快速草图,所以可能仍然是 refactoring/optimizing
的选项#standardSQL
SELECT id, SUM(VALUE) AS val
FROM (
SELECT id, EVENT, VALUE,
SUM(boundary) OVER(PARTITION BY ID ORDER BY SORT_KEY ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) grp
FROM (
SELECT *,
COUNTIF(EVENT IN ('submitted', 'closed')) OVER(PARTITION BY ID, EVENT ORDER BY SORT_KEY) boundary
FROM `project.dataset.table` t
)
)
WHERE grp = 1
OR (grp = 2 AND EVENT = 'closed')
GROUP BY ID
您可以按照您的问题使用虚拟数据来测试/玩它:
#standardSQL
WITH `project.dataset.table` AS (
SELECT 1 ID, 'submitted' EVENT, 10 VALUE, 1 SORT_KEY UNION ALL
SELECT 1, 'action', 20, 2 UNION ALL
SELECT 1, 'closed', 30, 3 UNION ALL
SELECT 1, 'action', 30, 4 UNION ALL
SELECT 2, 'submitted', 10, 1 UNION ALL
SELECT 2, 'action', 10, 2 UNION ALL
SELECT 2, 'action', 10, 3 UNION ALL
SELECT 2, 'closed', 10, 4 UNION ALL
SELECT 2, 'action', 10, 5 UNION ALL
SELECT 3, 'action', 29, 1 UNION ALL
SELECT 3, 'submitted', 20, 2 UNION ALL
SELECT 3, 'action', 10, 3 UNION ALL
SELECT 3, 'closed', 10, 4 UNION ALL
SELECT 3, 'action', 10, 5 UNION ALL
SELECT 4, 'action', 10, 1
)
SELECT id, SUM(VALUE) AS val
FROM (
SELECT id, EVENT, VALUE,
SUM(boundary) OVER(PARTITION BY ID ORDER BY SORT_KEY ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) grp
FROM (
SELECT *,
COUNTIF(EVENT IN ('submitted', 'closed')) OVER(PARTITION BY ID, EVENT ORDER BY SORT_KEY) boundary
FROM `project.dataset.table` t
)
)
WHERE grp = 1
OR (grp = 2 AND EVENT = 'closed')
GROUP BY ID
ORDER BY ID
结果是
Row id val
1 1 60
2 2 40
3 3 40