实体化视图的填充不正确
Incorrect populating of materialized view
'test_sessions'table
CREATE TABLE IF NOT EXISTS test_sessions (
id UInt64,
name String,
created_at DateTime
)
ENGINE = MergeTree()
PARTITION BY toYYYYMM(created_at)
ORDER BY name;
'test_sessions'table数据
INSERT INTO test_sessions(id, name, created_at) VALUES
(1, 'start', now()),
(1, 'stop', now() + INTERVAL 1 day),
(2, 'start', now() + INTERVAL 1 HOUR );
+----+-------+---------------------+
| id | name | created_at |
+----+-------+---------------------+
| 1 | start | 2020-11-10 07:58:19 |
+----+-------+---------------------+
| 2 | start | 2020-11-10 08:58:19 |
+----+-------+---------------------+
| 1 | stop | 2020-11-11 07:58:19 |
+----+-------+---------------------+
'finished_sessions'物化视图
CREATE MATERIALIZED VIEW finished_sessions (
id UInt64,
start_at DateTime,
end_at DateTime
)
ENGINE = AggregatingMergeTree
PARTITION BY toYYYYMM(start_at)
ORDER BY (id)
POPULATE AS
SELECT
id,
minIf(created_at, name = 'start') AS start_at,
maxIf(created_at, name = 'stop') AS end_at
FROM test_sessions
GROUP BY id
HAVING end_at <> '1970-01-01 00:00:00';
'finished_sessions'物化视图数据
SELECT * FROM finished_sessions;
+----+---------------------+---------------------+
| id | start_at | end_at |
+----+---------------------+---------------------+
| 1 | 2020-11-10 07:58:19 | 2020-11-11 07:58:19 |
+----+---------------------+---------------------+
直到这一刻,一切正常:只有 1 个关闭的会话
第二场结束后
INSERT INTO test_sessions(id, name, created_at) VALUES
(2, 'stop', now())
发生错误填充
SELECT * from finished_sessions ORDER BY id;
+----+-------------------------------+---------------------+
| id | start_at | end_at |
+----+-------------------------------+---------------------+
| 1 | 2020-11-10 07:58:19 | 2020-11-11 07:58:19 |
+----+-------------------------------+---------------------+
| 2 | ---> 1970-01-01 00:00:00 <--- | 2020-11-10 08:06:24 |
+----+-------------------------------+---------------------+
如何解决?
你应该使用 AggregateFunction 或更好的 SimpleAggregateFunction
无法通过 AggregateFunction 划分 table。因为 AggregateFunction 是在合并期间计算的,并且合并是在分区上执行的。
MV 是一个插入触发器。 https://youtu.be/ckChUkC3Pns?list=PLO3lfQbpDVI-hyw4MyqxEk3rDHw95SzxJ https://den-crane.github.io/Everything_you_should_know_about_materialized_views_commented.pdf
CREATE TABLE IF NOT EXISTS test_sessions (
id UInt64,
name String,
created_at DateTime
)
ENGINE = MergeTree()
PARTITION BY toYYYYMM(created_at)
ORDER BY name;
INSERT INTO test_sessions(id, name, created_at) VALUES
(1, 'start', now()),
(1, 'stop', now() + INTERVAL 1 day),
(2, 'start', now() + INTERVAL 1 HOUR );
CREATE MATERIALIZED VIEW finished_sessions
ENGINE = AggregatingMergeTree
ORDER BY (id)
POPULATE AS
SELECT
id,
minStateIf(created_at, name = 'start') AS start_at,
maxStateIf(created_at, name = 'stop') AS end_at
FROM test_sessions
GROUP BY id
INSERT INTO test_sessions(id, name, created_at) VALUES
(2, 'stop', now());
SELECT
id,
minMerge(start_at),
maxMerge(end_at)
FROM finished_sessions
GROUP BY id
Query id: d797eee4-6088-40b8-aa12-b10da62b60c5
┌─id─┬──minMerge(start_at)─┬────maxMerge(end_at)─┐
│ 2 │ 2020-11-10 15:18:19 │ 2020-11-10 14:21:54 │
│ 1 │ 2020-11-10 14:18:19 │ 2020-11-11 14:18:19 │
└────┴─────────────────────┴─────────────────────┘
CREATE TABLE IF NOT EXISTS test_sessions (
id UInt64,
name String,
created_at DateTime
)
ENGINE = MergeTree()
PARTITION BY toYYYYMM(created_at)
ORDER BY name;
INSERT INTO test_sessions(id, name, created_at) VALUES
(1, 'start', now()),
(1, 'stop', now() + INTERVAL 1 day),
(2, 'start', now() + INTERVAL 1 HOUR );
CREATE MATERIALIZED VIEW finished_sessions
(
id UInt64,
start_at SimpleAggregateFunction(min,DateTime),
end_at SimpleAggregateFunction(max,DateTime)
)
ENGINE = AggregatingMergeTree
ORDER BY (id)
POPULATE AS
SELECT
id,
minIf(created_at, name = 'start') AS start_at,
maxIf(created_at, name = 'stop') AS end_at
FROM test_sessions
GROUP BY id;
INSERT INTO test_sessions(id, name, created_at) VALUES
(2, 'stop', now())
optimize table finished_sessions final;
SELECT
id,
min(start_at),
max(end_at)
FROM finished_sessions
GROUP BY id
┌─id─┬───────min(start_at)─┬─────────max(end_at)─┐
│ 2 │ 1970-01-01 00:00:00 │ 2020-11-10 14:29:30 │
│ 1 │ 2020-11-10 14:29:15 │ 2020-11-11 14:29:15 │
└────┴─────────────────────┴─────────────────────┘
'test_sessions'table
CREATE TABLE IF NOT EXISTS test_sessions (
id UInt64,
name String,
created_at DateTime
)
ENGINE = MergeTree()
PARTITION BY toYYYYMM(created_at)
ORDER BY name;
'test_sessions'table数据
INSERT INTO test_sessions(id, name, created_at) VALUES
(1, 'start', now()),
(1, 'stop', now() + INTERVAL 1 day),
(2, 'start', now() + INTERVAL 1 HOUR );
+----+-------+---------------------+
| id | name | created_at |
+----+-------+---------------------+
| 1 | start | 2020-11-10 07:58:19 |
+----+-------+---------------------+
| 2 | start | 2020-11-10 08:58:19 |
+----+-------+---------------------+
| 1 | stop | 2020-11-11 07:58:19 |
+----+-------+---------------------+
'finished_sessions'物化视图
CREATE MATERIALIZED VIEW finished_sessions (
id UInt64,
start_at DateTime,
end_at DateTime
)
ENGINE = AggregatingMergeTree
PARTITION BY toYYYYMM(start_at)
ORDER BY (id)
POPULATE AS
SELECT
id,
minIf(created_at, name = 'start') AS start_at,
maxIf(created_at, name = 'stop') AS end_at
FROM test_sessions
GROUP BY id
HAVING end_at <> '1970-01-01 00:00:00';
'finished_sessions'物化视图数据
SELECT * FROM finished_sessions;
+----+---------------------+---------------------+
| id | start_at | end_at |
+----+---------------------+---------------------+
| 1 | 2020-11-10 07:58:19 | 2020-11-11 07:58:19 |
+----+---------------------+---------------------+
直到这一刻,一切正常:只有 1 个关闭的会话
第二场结束后
INSERT INTO test_sessions(id, name, created_at) VALUES
(2, 'stop', now())
发生错误填充
SELECT * from finished_sessions ORDER BY id;
+----+-------------------------------+---------------------+
| id | start_at | end_at |
+----+-------------------------------+---------------------+
| 1 | 2020-11-10 07:58:19 | 2020-11-11 07:58:19 |
+----+-------------------------------+---------------------+
| 2 | ---> 1970-01-01 00:00:00 <--- | 2020-11-10 08:06:24 |
+----+-------------------------------+---------------------+
如何解决?
你应该使用 AggregateFunction 或更好的 SimpleAggregateFunction
无法通过 AggregateFunction 划分 table。因为 AggregateFunction 是在合并期间计算的,并且合并是在分区上执行的。
MV 是一个插入触发器。 https://youtu.be/ckChUkC3Pns?list=PLO3lfQbpDVI-hyw4MyqxEk3rDHw95SzxJ https://den-crane.github.io/Everything_you_should_know_about_materialized_views_commented.pdf
CREATE TABLE IF NOT EXISTS test_sessions ( id UInt64, name String, created_at DateTime ) ENGINE = MergeTree() PARTITION BY toYYYYMM(created_at) ORDER BY name; INSERT INTO test_sessions(id, name, created_at) VALUES (1, 'start', now()), (1, 'stop', now() + INTERVAL 1 day), (2, 'start', now() + INTERVAL 1 HOUR ); CREATE MATERIALIZED VIEW finished_sessions ENGINE = AggregatingMergeTree ORDER BY (id) POPULATE AS SELECT id, minStateIf(created_at, name = 'start') AS start_at, maxStateIf(created_at, name = 'stop') AS end_at FROM test_sessions GROUP BY id INSERT INTO test_sessions(id, name, created_at) VALUES (2, 'stop', now()); SELECT id, minMerge(start_at), maxMerge(end_at) FROM finished_sessions GROUP BY id Query id: d797eee4-6088-40b8-aa12-b10da62b60c5 ┌─id─┬──minMerge(start_at)─┬────maxMerge(end_at)─┐ │ 2 │ 2020-11-10 15:18:19 │ 2020-11-10 14:21:54 │ │ 1 │ 2020-11-10 14:18:19 │ 2020-11-11 14:18:19 │ └────┴─────────────────────┴─────────────────────┘
CREATE TABLE IF NOT EXISTS test_sessions ( id UInt64, name String, created_at DateTime ) ENGINE = MergeTree() PARTITION BY toYYYYMM(created_at) ORDER BY name; INSERT INTO test_sessions(id, name, created_at) VALUES (1, 'start', now()), (1, 'stop', now() + INTERVAL 1 day), (2, 'start', now() + INTERVAL 1 HOUR ); CREATE MATERIALIZED VIEW finished_sessions ( id UInt64, start_at SimpleAggregateFunction(min,DateTime), end_at SimpleAggregateFunction(max,DateTime) ) ENGINE = AggregatingMergeTree ORDER BY (id) POPULATE AS SELECT id, minIf(created_at, name = 'start') AS start_at, maxIf(created_at, name = 'stop') AS end_at FROM test_sessions GROUP BY id; INSERT INTO test_sessions(id, name, created_at) VALUES (2, 'stop', now()) optimize table finished_sessions final; SELECT id, min(start_at), max(end_at) FROM finished_sessions GROUP BY id ┌─id─┬───────min(start_at)─┬─────────max(end_at)─┐ │ 2 │ 1970-01-01 00:00:00 │ 2020-11-10 14:29:30 │ │ 1 │ 2020-11-10 14:29:15 │ 2020-11-11 14:29:15 │ └────┴─────────────────────┴─────────────────────┘