期望分布table到return每个分片的结果,而不是聚合值
Expect the distributed table to return the results of each shard, not the aggregated value
有一个用户标签tabletable_tag,对应的分布table是table_tag _all,集群中有6个分片,sharding_key为intHash64(user_id).
通过设置参数distributed_product_mode='local'和distributed_group_by_no_merge=1使得returned 结果是 6 个单独分片的值而不是聚合值。
以下是两个测试。
测试 1 得到正确的结果(count_1 的 6 条记录),但测试 2 是聚合的(只有 2 条记录)。
如何让测试 2 return 成为 6 个分片的结果?
create table database.table_tag on cluster c1
(
user_id String,
tag1 String,
dt
)
order by user_id
partition by dt
setting index_granularity=128;
/* test 1: */
select
sum(case when tag1='1' then 1 else 0 end ) as count_1
from database.table_tag_all
where dt in ('202102','202101')
SETTINGS distributed_group_by_no_merge=1;
/* test 2: */
select count_1,count(1)
from (
select
user_id,
sum(case when tag1='1' then 1 else 0 end ) as count_1
from database.table_tag_all
where dt in ('202102','202101')
group by user_id
SETTINGS distributed_group_by_no_merge=1;
) t1
group by count_1
SETTINGS distributed_group_by_no_merge=1;
/*
count_1 count()
0 1222122
1 33821
*/
试试这个查询:
SELECT
shard_number,
count_1,
count()
FROM
(
SELECT
any(_shard_num) AS shard_number,
countIf(tag1 = '1') AS count_1
FROM database.table_tag_all
WHERE dt in ('202102','202101')
GROUP BY user_id
)
GROUP BY
shard_number,
count_1
SETTINGS distributed_group_by_no_merge = 1
有一个用户标签tabletable_tag,对应的分布table是table_tag _all,集群中有6个分片,sharding_key为intHash64(user_id).
通过设置参数distributed_product_mode='local'和distributed_group_by_no_merge=1使得returned 结果是 6 个单独分片的值而不是聚合值。
以下是两个测试。 测试 1 得到正确的结果(count_1 的 6 条记录),但测试 2 是聚合的(只有 2 条记录)。 如何让测试 2 return 成为 6 个分片的结果?
create table database.table_tag on cluster c1
(
user_id String,
tag1 String,
dt
)
order by user_id
partition by dt
setting index_granularity=128;
/* test 1: */
select
sum(case when tag1='1' then 1 else 0 end ) as count_1
from database.table_tag_all
where dt in ('202102','202101')
SETTINGS distributed_group_by_no_merge=1;
/* test 2: */
select count_1,count(1)
from (
select
user_id,
sum(case when tag1='1' then 1 else 0 end ) as count_1
from database.table_tag_all
where dt in ('202102','202101')
group by user_id
SETTINGS distributed_group_by_no_merge=1;
) t1
group by count_1
SETTINGS distributed_group_by_no_merge=1;
/*
count_1 count()
0 1222122
1 33821
*/
试试这个查询:
SELECT
shard_number,
count_1,
count()
FROM
(
SELECT
any(_shard_num) AS shard_number,
countIf(tag1 = '1') AS count_1
FROM database.table_tag_all
WHERE dt in ('202102','202101')
GROUP BY user_id
)
GROUP BY
shard_number,
count_1
SETTINGS distributed_group_by_no_merge = 1