BigQuery - Select 一列的最小值取决于另一列中的值
BigQuery - Select the minimum value of a column dependant upon the value in another column
假设我有一个 table 这样的
userId eventType timing
647 'jump' 32.7
123 'skip' 13.1
647 'skip' 24.4
433 'jump' 12.7
433 'skip' 53.6
647 'jump' 2.4
647 'jump' 64.4
123 'skip' 14.0
433 'jump' 4.3
123 'jump' 18.6
我想输出一个 table,每个 userId 一行,列作为 userId,eventType 为该 userId 的最小时间 'skip' 和 eventType 为 [=25 的最小时间=] 对于同一个 userId。像这样。
userID first_skip first_jump
647 24.4 2.4
123 13.1 18.6
433 53.6 4.3
我意识到我可以通过连接来做到这一点。
#standardSQL
WITH `project.dataset.table` AS (
SELECT 647 userId, 'jump' eventType, 32.7 timing UNION ALL
SELECT 123, 'skip', 13.1 UNION ALL
SELECT 647, 'skip', 24.4 UNION ALL
SELECT 433, 'jump', 12.7 UNION ALL
SELECT 433, 'skip', 53.6 UNION ALL
SELECT 647, 'jump', 2.4 UNION ALL
SELECT 647, 'jump', 64.4 UNION ALL
SELECT 123, 'skip', 14.0 UNION ALL
SELECT 433, 'jump', 4.3 UNION ALL
SELECT 123, 'jump', 18.6
)
SELECT
raw.userID,
MIN(skips.timing) AS first_skip,
MIN(jumps.timing) AS first_jump,
FROM `project.dataset.table` AS raw
LEFT JOIN `project.dataset.table` AS skips ON raw.userId = skips.userId
LEFT JOIN `project.dataset.table` AS jumps ON raw.userId = jumps.userId
WHERE skips.eventType = 'skip' AND jumps.eventType = 'jump'
GROUP BY userId
但是,我的实际数据非常大,而且还有一些 eventType 类别,这意味着查询需要永远处理。我想知道是否有更好、更有效的方法来做到这一点而不使用连接。也许使用 window
或 partition
?
使用条件聚合:
select userid,
min(case when eventtype = 'skip' then timing end) first_skip,
min(case when eventtype = 'jump' then timing end) first_jump
from mytable
group by userid
您可以使用条件聚合:
select user_id,
min(case when eventtype = 'skip' then timing end) as skip,
min(case when eventtype = 'jump' then timing end) as jump
from t
group by user_id;
假设我有一个 table 这样的
userId eventType timing
647 'jump' 32.7
123 'skip' 13.1
647 'skip' 24.4
433 'jump' 12.7
433 'skip' 53.6
647 'jump' 2.4
647 'jump' 64.4
123 'skip' 14.0
433 'jump' 4.3
123 'jump' 18.6
我想输出一个 table,每个 userId 一行,列作为 userId,eventType 为该 userId 的最小时间 'skip' 和 eventType 为 [=25 的最小时间=] 对于同一个 userId。像这样。
userID first_skip first_jump
647 24.4 2.4
123 13.1 18.6
433 53.6 4.3
我意识到我可以通过连接来做到这一点。
#standardSQL
WITH `project.dataset.table` AS (
SELECT 647 userId, 'jump' eventType, 32.7 timing UNION ALL
SELECT 123, 'skip', 13.1 UNION ALL
SELECT 647, 'skip', 24.4 UNION ALL
SELECT 433, 'jump', 12.7 UNION ALL
SELECT 433, 'skip', 53.6 UNION ALL
SELECT 647, 'jump', 2.4 UNION ALL
SELECT 647, 'jump', 64.4 UNION ALL
SELECT 123, 'skip', 14.0 UNION ALL
SELECT 433, 'jump', 4.3 UNION ALL
SELECT 123, 'jump', 18.6
)
SELECT
raw.userID,
MIN(skips.timing) AS first_skip,
MIN(jumps.timing) AS first_jump,
FROM `project.dataset.table` AS raw
LEFT JOIN `project.dataset.table` AS skips ON raw.userId = skips.userId
LEFT JOIN `project.dataset.table` AS jumps ON raw.userId = jumps.userId
WHERE skips.eventType = 'skip' AND jumps.eventType = 'jump'
GROUP BY userId
但是,我的实际数据非常大,而且还有一些 eventType 类别,这意味着查询需要永远处理。我想知道是否有更好、更有效的方法来做到这一点而不使用连接。也许使用 window
或 partition
?
使用条件聚合:
select userid,
min(case when eventtype = 'skip' then timing end) first_skip,
min(case when eventtype = 'jump' then timing end) first_jump
from mytable
group by userid
您可以使用条件聚合:
select user_id,
min(case when eventtype = 'skip' then timing end) as skip,
min(case when eventtype = 'jump' then timing end) as jump
from t
group by user_id;