在 BigQuery 中查找模式

Find the mode in BigQuery

众数是一组中出现次数最多的值。

我想要这样的东西:

SELECT
    t.id as t_id,
    GROUP_CONCAT(t.value) as value_list,
    MODE(t.value) AS value_mode
FROM dataset.table as t
GROUP BY t_id

例如:

t_id    value_list     value_mode
1       2,2,2,3,6,6    2

这是怎么做到的?

编辑:value_list 只是为了说明目的。只需要模式

对于你的例子,我会这样解决:

SELECT x, w mode
FROM (
  SELECT COUNT(*) c, w, ROW_NUMBER() OVER(ORDER BY c DESC) rn, FIRST(x) x
  FROM (
    SELECT SPLIT(x) w, x FROM (SELECT "2,2,2,3,6,6" x)
  )
  GROUP BY 2
)
WHERE rn=1

并且在 GROUP_CONCAT 查询中:

SELECT gc, w mode
FROM (
  SELECT COUNT(*) c, w, ROW_NUMBER() OVER(ORDER BY c DESC) rn, FIRST(gc) gc
  FROM (
    SELECT GROUP_CONCAT(w) OVER() gc, w
    FROM (FLATTEN((
      SELECT SPLIT(x) w, x FROM (SELECT "2,2,2,3,6,6" x)), w)
    )
  )
  GROUP BY 2
)
WHERE rn=1

并处理分区:

SELECT tid, gc value_list, w value_mode
FROM (
  SELECT tid, COUNT(*) c, w, ROW_NUMBER() OVER(PARTITION BY tid ORDER BY c DESC) rn, FIRST(gc) gc
  FROM (
    SELECT tid, GROUP_CONCAT(w) OVER(PARTITION BY tid) gc, w
    FROM (FLATTEN((
      SELECT 1 tid, SPLIT(x) w, x FROM (SELECT "2,2,2,3,6,6" x)), w)
    )
  )
  GROUP BY tid, w
)
WHERE rn=1
select id, value as value_list, v as value_mode
from (
  select 
    id, value, v, 
    count(1) as c, 
    row_number() over(partition by id order by c desc) as top
  from (
    select id, value, split(value) as v 
    from dataset.table 
  )
  group by id, value, v
)
where top = 1

我经常需要找到各个组的价格模式(例如长度和安培数)以过滤掉销售价格等。 我通常使用两种方法,一种是创建数组,另一种是按频率顺序取消嵌套。我使用的一种方法是 LIMIT 另一种方法是 [OFFSET(0)] 以防您想获得第 N 个值。

两者都包含在下面:

WITH t AS (SELECT 18 AS length, 
'HIGH' as amps, 
99.95 price UNION ALL
SELECT 18,  "HIGH", 99.95 UNION ALL
SELECT 18,  "HIGH", 5.95 UNION ALL
SELECT 18,  "LOW", 33.95 UNION ALL
SELECT 18,  "LOW", 33.95 UNION ALL
SELECT 18,  "LOW", 4.5 UNION ALL
SELECT 3,  "HIGH", 77.95 UNION ALL
SELECT 3,  "HIGH", 77.95 UNION ALL
SELECT 3,  "HIGH", 9.99 UNION ALL
SELECT 3,  "LOW", 44.95 UNION ALL
SELECT 3,  "LOW", 44.95 UNION ALL
SELECT 3,  "LOW", 5.65 
)

SELECT
  length,
  amps,

  -- By Limit
  (SELECT x FROM UNNEST(price_array) x 
    GROUP BY x ORDER BY COUNT(*) DESC LIMIT 1 ) most_freq_price,
  (SELECT x FROM UNNEST(price_array) x 
    GROUP BY x ORDER BY COUNT(*) ASC  LIMIT 1 ) least_freq_price,

  -- By Offset
  ARRAY((SELECT x FROM UNNEST(price_array) x 
    GROUP BY x ORDER BY COUNT(*) DESC))[OFFSET(0)] most_freq_price_offset,
  ARRAY((SELECT x FROM UNNEST(price_array) x 
    GROUP BY x ORDER BY COUNT(*) ASC))[OFFSET(0)] least_freq_price_offset

FROM (
  SELECT 
    length,
    amps,
    ARRAY_AGG(price) price_array
  FROM t
  GROUP BY 1,2
 )