【发布时间】:2022-01-18 13:42:42
【问题描述】:
我们知道物化视图是由插入触发的。一次插入的大量数据会被分成多个block,每个block会触发一个MV select,根据this doc.
在收到块的所有行后会触发 MV 选择吗?还是取决于选择?说,如果选择尝试计算插入数据的行数,直到收到所有数据才会触发?如果select只是做数据复制/反向索引,收到block的一条记录就会触发?
【问题讨论】:
标签: clickhouse
我们知道物化视图是由插入触发的。一次插入的大量数据会被分成多个block,每个block会触发一个MV select,根据this doc.
在收到块的所有行后会触发 MV 选择吗?还是取决于选择?说,如果选择尝试计算插入数据的行数,直到收到所有数据才会触发?如果select只是做数据复制/反向索引,收到block的一条记录就会触发?
【问题讨论】:
标签: clickhouse
块插入主表后,Insert 触发 MatView。所以插入只是将指向行块(在内存中)的指针传递给 MatView。
-- by default clickhouse-client forms blocks = 1048545
-- clickhouse-client by itself parses input stream and inserts into
-- server in Native format
select value from system.settings where name = 'max_insert_block_size';
1048545
-- here is a setup which traces sizes of blocks in rows
create table test(a Int, b String) engine=Null;
create materialized view test_mv
Engine=MergeTree order by ts as
select now() ts, count() rows from test;
-- create file with 100mil rows in TSV format
clickhouse-client -q "select number, 'x' from numbers(100000000) format TSV" >test.tsv
clickhouse-client -q "insert into test format TSV" <test.tsv
select max(rows), min(rows), count() from test_mv;
┌─max(rows)─┬─min(rows)─┬─count()─┐
│ 1947598 │ 32 │ 65 │
└───────────┴───────────┴─────────┘
-- 1947598 -<- several blocks were squashed into a single block because
-- of parallel parsing
-- 65 blocks were passed
truncate test_mv;
-- let's disable parallel parsing
clickhouse-client --input_format_parallel_parsing=0 -q "insert into test format TSV" <test.tsv
select max(rows), min(rows), count() from test_mv;
┌─max(rows)─┬─min(rows)─┬─count()─┐
│ 1048545 │ 388225 │ 96 │
└───────────┴───────────┴─────────┘
-- got 1048545 = max_insert_block_size
-- 96 blocks were passed
-- 100000000 - 95 * 1048545 = 388225
-- (95 blocks by max_insert_block_size and a remain 1 block = 388225 rows
truncate test_mv;
-- let's grow max_insert_block_size
clickhouse-client --max_insert_block_size=10000000000 --input_format_parallel_parsing=0 -q "insert into test format TSV" <test.tsv
select max(rows), min(rows), count() from test_mv;
┌─max(rows)─┬─min(rows)─┬─count()─┐
│ 100000000 │ 100000000 │ 1 │
└───────────┴───────────┴─────────┘
-- 1 block == 100 mil rows
更多https://kb.altinity.com/altinity-kb-queries-and-syntax/atomic-insert/
【讨论】: