Data block trigger timing of Materialized View

We've known materialized view is triggered by insertion. Large amount of data from one insertion will be divided into multiple blocks, each block will trigger one MV select, according to this doc.

Will MV select triggered after all the rows of a block received? Or it depends on the select? Say, if the select tries to count rows of inserted data, it won't be trigger until all data received? If the select is just doing data replication/reversed indexing, it will be triggered as soon as one record of the block received?

Solution

Insert triggers MatView after block is inserted into the main table. So the insert just passes the pointer to the block of rows (in memory) into MatView.

-- by default clickhouse-client forms blocks = 1048545
-- clickhouse-client by itself parses input stream and inserts into 
-- server in Native format

select value from system.settings where name = 'max_insert_block_size';
1048545


-- here is a setup which traces sizes of blocks in rows
create table test(a Int, b String) engine=Null;
create materialized view test_mv 
Engine=MergeTree order by ts as 
select now() ts, count() rows from test;

-- create file with 100mil rows in TSV format
clickhouse-client -q "select number, 'x' from numbers(100000000) format TSV" >test.tsv

clickhouse-client -q "insert into test format TSV" <test.tsv

select max(rows), min(rows), count() from test_mv;
┌─max(rows)─┬─min(rows)─┬─count()─┐
│   1947598 │        32 │      65 │
└───────────┴───────────┴─────────┘

-- 1947598 -<- several blocks were squashed into a single block because 
-- of parallel parsing
-- 65 blocks were passed

truncate test_mv;



-- let's disable parallel parsing
clickhouse-client --input_format_parallel_parsing=0 -q "insert into test format TSV" <test.tsv

select max(rows), min(rows), count() from test_mv;
┌─max(rows)─┬─min(rows)─┬─count()─┐
│   1048545 │    388225 │      96 │
└───────────┴───────────┴─────────┘

-- got 1048545 = max_insert_block_size
-- 96 blocks were passed
-- 100000000 - 95 * 1048545 = 388225 
-- (95 blocks by max_insert_block_size and a remain 1 block = 388225 rows



truncate test_mv;



-- let's grow max_insert_block_size
clickhouse-client --max_insert_block_size=10000000000  --input_format_parallel_parsing=0 -q "insert into test format TSV" <test.tsv

select max(rows), min(rows), count() from test_mv;
┌─max(rows)─┬─min(rows)─┬─count()─┐
│ 100000000 │ 100000000 │       1 │
└───────────┴───────────┴─────────┘

-- 1 block == 100 mil rows

more https://kb.altinity.com/altinity-kb-queries-and-syntax/atomic-insert/