如何用group by查询group by?
how to query group by with group by?
我的table:
bucket | bytes | url
-----
a | 111 | url_1
a | 222 | url_1
a | 222 | url_2
a | 222 | url_3
a | 222 | url_4
a | 222 | url_5
a | 222 | url_6
a | 222 | url_7
a | 222 | url_8
a | 222 | url_9
a | 222 | url_10
a | 222 | url_11
b | 444 | url_4
b | 555 | url_4
c | 444 | url_5
我要查询:
- 按桶分组,例如
select bucket from table group by bucket
.
- 对于每个桶,按 url 分组,如
select sum(bytes) from table where bucket = <every_one_bucket_in_the_group_by> group by url order by sum(bytes) desc limit 10
。对于每个桶,它最多有 10 个结果行。
如何组合这 2 个子查询?
结果应该是这样的:
bucket | url | sum_bytes
----
a | url_1 | 333
a | url_2 | 222
a | url_3 | 222
a | url_4 | 222
a | url_5 | 222
a | url_6 | 222
a | url_7 | 222
a | url_8 | 222
a | url_9 | 222
a | url_10 | 222
b | url_4 | 999
c | url_5 | 444
你找到下面一个了吗
select bucket,url,sum(bytes) as sumbyte from table
group by bucket,url
order by sumbyte desc
您需要使用 limit by-子句:
select bucket, url, sum(bytes) sum_bytes
from (
/* emulate test dataset */
select data.1 bucket, data.2 bytes, data.3 url
from (
select arrayJoin([
('a', 111, 'url_01'),
('a', 222, 'url_01'),
('a', 222, 'url_02'),
('a', 222, 'url_03'),
('a', 222, 'url_04'),
('a', 222, 'url_05'),
('a', 222, 'url_06'),
('a', 222, 'url_07'),
('a', 222, 'url_08'),
('a', 222, 'url_09'),
('a', 222, 'url_10'),
('a', 222, 'url_11'),
('b', 444, 'url_04'),
('b', 555, 'url_04'),
('c', 444, 'url_05')]) data)
)
group by bucket, url
order by bucket, url
limit 10 by bucket
/*
┌─bucket─┬─url────┬─sum_bytes─┐
│ a │ url_01 │ 333 │
│ a │ url_02 │ 222 │
│ a │ url_03 │ 222 │
│ a │ url_04 │ 222 │
│ a │ url_05 │ 222 │
│ a │ url_06 │ 222 │
│ a │ url_07 │ 222 │
│ a │ url_08 │ 222 │
│ a │ url_09 │ 222 │
│ a │ url_10 │ 222 │
│ b │ url_04 │ 999 │
│ c │ url_05 │ 444 │
└────────┴────────┴───────────┘
*/
我的table:
bucket | bytes | url
-----
a | 111 | url_1
a | 222 | url_1
a | 222 | url_2
a | 222 | url_3
a | 222 | url_4
a | 222 | url_5
a | 222 | url_6
a | 222 | url_7
a | 222 | url_8
a | 222 | url_9
a | 222 | url_10
a | 222 | url_11
b | 444 | url_4
b | 555 | url_4
c | 444 | url_5
我要查询:
- 按桶分组,例如
select bucket from table group by bucket
. - 对于每个桶,按 url 分组,如
select sum(bytes) from table where bucket = <every_one_bucket_in_the_group_by> group by url order by sum(bytes) desc limit 10
。对于每个桶,它最多有 10 个结果行。
如何组合这 2 个子查询?
结果应该是这样的:
bucket | url | sum_bytes
----
a | url_1 | 333
a | url_2 | 222
a | url_3 | 222
a | url_4 | 222
a | url_5 | 222
a | url_6 | 222
a | url_7 | 222
a | url_8 | 222
a | url_9 | 222
a | url_10 | 222
b | url_4 | 999
c | url_5 | 444
你找到下面一个了吗
select bucket,url,sum(bytes) as sumbyte from table
group by bucket,url
order by sumbyte desc
您需要使用 limit by-子句:
select bucket, url, sum(bytes) sum_bytes
from (
/* emulate test dataset */
select data.1 bucket, data.2 bytes, data.3 url
from (
select arrayJoin([
('a', 111, 'url_01'),
('a', 222, 'url_01'),
('a', 222, 'url_02'),
('a', 222, 'url_03'),
('a', 222, 'url_04'),
('a', 222, 'url_05'),
('a', 222, 'url_06'),
('a', 222, 'url_07'),
('a', 222, 'url_08'),
('a', 222, 'url_09'),
('a', 222, 'url_10'),
('a', 222, 'url_11'),
('b', 444, 'url_04'),
('b', 555, 'url_04'),
('c', 444, 'url_05')]) data)
)
group by bucket, url
order by bucket, url
limit 10 by bucket
/*
┌─bucket─┬─url────┬─sum_bytes─┐
│ a │ url_01 │ 333 │
│ a │ url_02 │ 222 │
│ a │ url_03 │ 222 │
│ a │ url_04 │ 222 │
│ a │ url_05 │ 222 │
│ a │ url_06 │ 222 │
│ a │ url_07 │ 222 │
│ a │ url_08 │ 222 │
│ a │ url_09 │ 222 │
│ a │ url_10 │ 222 │
│ b │ url_04 │ 999 │
│ c │ url_05 │ 444 │
└────────┴────────┴───────────┘
*/