物化视图的数据块触发时序

Data block trigger timing of Materialized View

我们知道物化视图是由插入触发的。一次插入的大量数据会被分成多个块,每个块触发一个MVselect,根据this doc.

MV select 会在收到一个块的所有行后触发吗?还是取决于select?比如说,如果 select 尝试计算插入数据的行数,那么在接收到所有数据之前不会触发它?如果select只是在做数据replication/reversed索引,收到块的一条记录就触发?

块插入到主 table 后,插入触发 MatView。所以插入只是将指向行块(在内存中)的指针传递到 MatView 中。

-- by default clickhouse-client forms blocks = 1048545
-- clickhouse-client by itself parses input stream and inserts into 
-- server in Native format

select value from system.settings where name = 'max_insert_block_size';
1048545


-- here is a setup which traces sizes of blocks in rows
create table test(a Int, b String) engine=Null;
create materialized view test_mv 
Engine=MergeTree order by ts as 
select now() ts, count() rows from test;

-- create file with 100mil rows in TSV format
clickhouse-client -q "select number, 'x' from numbers(100000000) format TSV" >test.tsv

clickhouse-client -q "insert into test format TSV" <test.tsv

select max(rows), min(rows), count() from test_mv;
┌─max(rows)─┬─min(rows)─┬─count()─┐
│   1947598 │        32 │      65 │
└───────────┴───────────┴─────────┘

-- 1947598 -<- several blocks were squashed into a single block because 
-- of parallel parsing
-- 65 blocks were passed

truncate test_mv;



-- let's disable parallel parsing
clickhouse-client --input_format_parallel_parsing=0 -q "insert into test format TSV" <test.tsv

select max(rows), min(rows), count() from test_mv;
┌─max(rows)─┬─min(rows)─┬─count()─┐
│   1048545 │    388225 │      96 │
└───────────┴───────────┴─────────┘

-- got 1048545 = max_insert_block_size
-- 96 blocks were passed
-- 100000000 - 95 * 1048545 = 388225 
-- (95 blocks by max_insert_block_size and a remain 1 block = 388225 rows



truncate test_mv;



-- let's grow max_insert_block_size
clickhouse-client --max_insert_block_size=10000000000  --input_format_parallel_parsing=0 -q "insert into test format TSV" <test.tsv

select max(rows), min(rows), count() from test_mv;
┌─max(rows)─┬─min(rows)─┬─count()─┐
│ 100000000 │ 100000000 │       1 │
└───────────┴───────────┴─────────┘

-- 1 block == 100 mil rows

更多https://kb.altinity.com/altinity-kb-queries-and-syntax/atomic-insert/