物化视图的数据块触发时序
Data block trigger timing of Materialized View
我们知道物化视图是由插入触发的。一次插入的大量数据会被分成多个块,每个块触发一个MVselect,根据this doc.
MV select 会在收到一个块的所有行后触发吗?还是取决于select?比如说,如果 select 尝试计算插入数据的行数,那么在接收到所有数据之前不会触发它?如果select只是在做数据replication/reversed索引,收到块的一条记录就触发?
块插入到主 table 后,插入触发 MatView。所以插入只是将指向行块(在内存中)的指针传递到 MatView 中。
-- by default clickhouse-client forms blocks = 1048545
-- clickhouse-client by itself parses input stream and inserts into
-- server in Native format
select value from system.settings where name = 'max_insert_block_size';
1048545
-- here is a setup which traces sizes of blocks in rows
create table test(a Int, b String) engine=Null;
create materialized view test_mv
Engine=MergeTree order by ts as
select now() ts, count() rows from test;
-- create file with 100mil rows in TSV format
clickhouse-client -q "select number, 'x' from numbers(100000000) format TSV" >test.tsv
clickhouse-client -q "insert into test format TSV" <test.tsv
select max(rows), min(rows), count() from test_mv;
┌─max(rows)─┬─min(rows)─┬─count()─┐
│ 1947598 │ 32 │ 65 │
└───────────┴───────────┴─────────┘
-- 1947598 -<- several blocks were squashed into a single block because
-- of parallel parsing
-- 65 blocks were passed
truncate test_mv;
-- let's disable parallel parsing
clickhouse-client --input_format_parallel_parsing=0 -q "insert into test format TSV" <test.tsv
select max(rows), min(rows), count() from test_mv;
┌─max(rows)─┬─min(rows)─┬─count()─┐
│ 1048545 │ 388225 │ 96 │
└───────────┴───────────┴─────────┘
-- got 1048545 = max_insert_block_size
-- 96 blocks were passed
-- 100000000 - 95 * 1048545 = 388225
-- (95 blocks by max_insert_block_size and a remain 1 block = 388225 rows
truncate test_mv;
-- let's grow max_insert_block_size
clickhouse-client --max_insert_block_size=10000000000 --input_format_parallel_parsing=0 -q "insert into test format TSV" <test.tsv
select max(rows), min(rows), count() from test_mv;
┌─max(rows)─┬─min(rows)─┬─count()─┐
│ 100000000 │ 100000000 │ 1 │
└───────────┴───────────┴─────────┘
-- 1 block == 100 mil rows
更多https://kb.altinity.com/altinity-kb-queries-and-syntax/atomic-insert/
我们知道物化视图是由插入触发的。一次插入的大量数据会被分成多个块,每个块触发一个MVselect,根据this doc.
MV select 会在收到一个块的所有行后触发吗?还是取决于select?比如说,如果 select 尝试计算插入数据的行数,那么在接收到所有数据之前不会触发它?如果select只是在做数据replication/reversed索引,收到块的一条记录就触发?
块插入到主 table 后,插入触发 MatView。所以插入只是将指向行块(在内存中)的指针传递到 MatView 中。
-- by default clickhouse-client forms blocks = 1048545
-- clickhouse-client by itself parses input stream and inserts into
-- server in Native format
select value from system.settings where name = 'max_insert_block_size';
1048545
-- here is a setup which traces sizes of blocks in rows
create table test(a Int, b String) engine=Null;
create materialized view test_mv
Engine=MergeTree order by ts as
select now() ts, count() rows from test;
-- create file with 100mil rows in TSV format
clickhouse-client -q "select number, 'x' from numbers(100000000) format TSV" >test.tsv
clickhouse-client -q "insert into test format TSV" <test.tsv
select max(rows), min(rows), count() from test_mv;
┌─max(rows)─┬─min(rows)─┬─count()─┐
│ 1947598 │ 32 │ 65 │
└───────────┴───────────┴─────────┘
-- 1947598 -<- several blocks were squashed into a single block because
-- of parallel parsing
-- 65 blocks were passed
truncate test_mv;
-- let's disable parallel parsing
clickhouse-client --input_format_parallel_parsing=0 -q "insert into test format TSV" <test.tsv
select max(rows), min(rows), count() from test_mv;
┌─max(rows)─┬─min(rows)─┬─count()─┐
│ 1048545 │ 388225 │ 96 │
└───────────┴───────────┴─────────┘
-- got 1048545 = max_insert_block_size
-- 96 blocks were passed
-- 100000000 - 95 * 1048545 = 388225
-- (95 blocks by max_insert_block_size and a remain 1 block = 388225 rows
truncate test_mv;
-- let's grow max_insert_block_size
clickhouse-client --max_insert_block_size=10000000000 --input_format_parallel_parsing=0 -q "insert into test format TSV" <test.tsv
select max(rows), min(rows), count() from test_mv;
┌─max(rows)─┬─min(rows)─┬─count()─┐
│ 100000000 │ 100000000 │ 1 │
└───────────┴───────────┴─────────┘
-- 1 block == 100 mil rows
更多https://kb.altinity.com/altinity-kb-queries-and-syntax/atomic-insert/