基于另一个 table 的时间序列查询

Time series query based on another table

初始数据

CREATE TABLE a_table (
    id UInt8,
    created_at DateTime
)
ENGINE = MergeTree()
PARTITION BY tuple()
ORDER BY id;

CREATE TABLE b_table (
    id UInt8,
    started_at DateTime,
    stopped_at DateTime
)
ENGINE = MergeTree()
PARTITION BY tuple()
ORDER BY id;

INSERT INTO a_table (id, created_at) VALUES
(1, '2020-01-01 00:00:00'),
(2, '2020-01-02 00:00:00'),
(3, '2020-01-03 00:00:00')
;

INSERT INTO b_table (id, started_at, stopped_at) VALUES
(1, '2020-01-01 00:00:00', '2020-01-01 23:59:59'),
(2, '2020-01-02 00:00:00', '2020-01-02 23:59:59'),
(3, '2020-01-04 00:00:00', '2020-01-04 23:59:59')
;

预期结果: 'a_table' 行按条件

b_table.started_at >= a_table.created_at AND
b_table.stopped_at <= a_table.created_at
+----+---------------------+
| id | created_at          |
+----+---------------------+
| 1  | 2020-01-01 00:00:00 |
+----+---------------------+
| 2  | 2020-01-02 00:00:00 |
+----+---------------------+

我尝试了什么

-- No errors, empty result
SELECT a_table.*
FROM a_table
INNER JOIN b_table
ON b_table.id = a_table.id
WHERE b_table.started_at >= a_table.created_at
  ANd b_table.stopped_at <= a_table.created_at
;

SELECT a_table.*
FROM a_table
ASOF INNER JOIN (
    SELECT * FROM b_table
) q
ON  q.id = a_table.id
AND q.started_at >= a_table.created_at
-- Error:
-- Invalid expression for JOIN ON.
-- ASOF JOIN expects exactly one inequality in ON section, 
-- unexpected stopped_at <= created_at.
-- AND q.stopped_at <= a_table.created_at
;

WHERE b_table.started_at >= a_table.created_at ANd b_table.stopped_at <= a_table.created_at

条件错误 >= <= --> <= >=

20.8.7.15

SELECT
    a_table.*,
    b_table.*
FROM a_table
INNER JOIN b_table ON b_table.id = a_table.id
WHERE (b_table.started_at <= a_table.created_at) AND (b_table.stopped_at >= a_table.created_at)

┌─id─┬──────────created_at─┬─b_table.id─┬──────────started_at─┬──────────stopped_at─┐
│  1 │ 2020-01-01 00:00:00 │          1 │ 2020-01-01 00:00:00 │ 2020-01-01 23:59:59 │
│  2 │ 2020-01-02 00:00:00 │          2 │ 2020-01-02 00:00:00 │ 2020-01-02 23:59:59 │
└────┴─────────────────────┴────────────┴─────────────────────┴─────────────────────┘

在实际生产中,这样的查询是行不通的。因为JOIN很慢。

需要重新设计。很难说为什么不知道为什么会有第二个table。可能我会使用 rangeHashed 外部字典。