Clickhouse:计算多个数组列的平均值
Clickhouse: calculating average on multiple array columns
我有一个 table,其中包含遥测信息,例如冷却风扇速度、温度等。
遥测数据存储在数组中,因为冷却风扇或其他项目的数量是动态的,可能会不时发生变化,并且最大大小未知。
我正在尝试编写查询以获取 n 次(例如 5 分钟)的平均风扇速度、温度和功耗
Table 看起来像这样:
CREATE TABLE stats
(
`datetime` DateTime('UTC') DEFAULT now(),
`worker_id` UInt64,
`project_id` UInt64,
`platform_type` UInt8,
`temp` Array(Int32),
`fan` Array(Int32),
`units` Int32,
`power` Array(Float32),
`power_total` Float32
)
ENGINE = MergeTree()
PARTITION BY toYYYYMM(datetime)
ORDER BY (worker_id, datetime);
数据看起来像这样:
────────────datetime─┬─worker_id─┬─temp─────────────────────────────────────┬─power─────────────────────────────────────┬─fan──────────────────────────────────────┐
│ 2021-07-26 16:37:00 │ 3081 │ [51,55,53,51,56,62,58,57,55,64,63,55,61] │ [120,91,60,91,60,90,90,60,90,89,90,60,89] │ [70,70,70,70,70,70,70,70,70,80,80,70,70] │
│ 2021-07-26 16:37:10 │ 3081 │ [51,56,54,52,56,63,58,58,55,64,63,56,62] │ [120,89,60,90,60,90,89,60,88,88,90,60,90] │ [70,70,70,70,70,70,70,70,70,80,80,70,70] │
│ 2021-07-26 16:37:20 │ 3081 │ [51,56,54,52,56,63,58,58,55,64,63,56,62] │ [120,91,60,92,60,90,91,60,88,88,90,60,89] │ [70,70,70,70,70,70,70,70,70,80,80,70,70] │
│ 2021-07-26 16:37:30 │ 3081 │ [51,56,54,52,57,62,58,58,56,64,63,57,62] │ [119,90,60,92,60,90,91,60,91,88,90,60,89] │ [70,70,70,69,70,70,70,70,70,80,80,70,70] │
│ 2021-07-26 16:37:40 │ 3081 │ [51,56,55,52,57,63,58,58,56,64,63,56,62] │ [119,91,60,88,60,90,90,60,90,90,89,60,89] │ [70,70,70,70,70,70,70,70,68,80,80,70,70] │
└─────────────────────┴───────────┴──────────────────────────────────────────┴───────────────────────────────────────────┴──────────────────────────────────────────┘
我能够编写一个查询来获取单个列的平均值,但很难为其余列编写一个最佳查询。
select worker_id, round(avg(temp_value), 1) as temp_avg, temp_index
from (
select datetime, worker_id, temp from stats
WHERE `worker_id` = 3081
and `datetime` between
toDateTime('2021-07-26 19:40:00', 'UTC') AND
toDateTime('2021-07-26 19:45:00', 'UTC')
order by `datetime` ASC
) array join
temp as temp_value,
arrayEnumerate(temp) as temp_index
group by (worker_id, temp_index)
order by temp_index ASC;
你能建议我一个最佳查询来计算 temp
、fan
、power
和 power_total
的平均值吗?
考虑使用 ForEach 聚合函数组合器:
SELECT worker_id, avgForEach(temp) temp_avg, avgForEach(power) power_avg, avgForEach(fan) fan_avg
FROM (
/* emulate the test dataset */
SELECT data.1 datetime, data.2 worker_id, data.3 temp, data.4 power, data.5 fan
FROM (
SELECT arrayJoin([
(('2021-07-26 16:37:00'), 3081, [51,55,53,51,56,62,58,57,55,64,63,55,61], [120,91,60,91,60,90,90,60,90,89,90,60,89], [70,70,70,70,70,70,70,70,70,80,80,70,70]),
(('2021-07-26 16:37:10'), 3081, [51,56,54,52,56,63,58,58,55,64,63,56,62], [120,89,60,90,60,90,89,60,88,88,90,60,90], [70,70,70,70,70,70,70,70,70,80,80,70,70]),
(('2021-07-26 16:37:20'), 3081, [51,56,54,52,56,63,58,58,55,64,63,56,62], [120,91,60,92,60,90,91,60,88,88,90,60,89], [70,70,70,70,70,70,70,70,70,80,80,70,70]),
(('2021-07-26 16:37:30'), 3081, [51,56,54,52,57,62,58,58,56,64,63,57,62], [119,90,60,92,60,90,91,60,91,88,90,60,89], [70,70,70,69,70,70,70,70,70,80,80,70,70]),
(('2021-07-26 16:37:40'), 3081, [51,56,55,52,57,63,58,58,56,64,63,56,62], [119,91,60,88,60,90,90,60,90,90,89,60,89], [70,70,70,70,70,70,70,70,68,80,80,70,70])]) as data)
)
/*WHERE {timerange condition}*/
GROUP BY worker_id
/*
┌─worker_id─┬─temp_avg───────────────────────────────────────────────┬─power_avg─────────────────────────────────────────────────┬─fan_avg──────────────────────────────────────┐
│ 3081 │ [51,55.8,54,51.8,56.4,62.6,58,57.8,55.4,64,63,56,61.8] │ [119.6,90.4,60,90.6,60,90,90.2,60,89.4,88.6,89.8,60,89.2] │ [70,70,70,69.8,70,70,70,70,69.6,80,80,70,70] │
└───────────┴────────────────────────────────────────────────────────┴───────────────────────────────────────────────────────────┴──────────────────────────────────────────────┘
*/
select worker_id, round(avg(temp_value), 1) as temp_avg,
round(avg(fan_value), 1) as fan_avg,
round(avg(power_value), 1) as power_avg,
temp_index
from (
select datetime, worker_id, temp, fan, power from stats
WHERE `worker_id` = 3081
and `datetime` between
toDateTime('2021-07-26 19:40:00', 'UTC') AND
toDateTime('2021-07-26 19:45:00', 'UTC')
order by `datetime` ASC
) array join
temp as temp_value,
fan as fan_value,
power as power_value,
arrayEnumerate(temp) as temp_index
group by (worker_id, temp_index)
order by temp_index ASC;
select worker_id,
avgForEach(temp),
avgForEach(fan),
avgForEach(power)
from stats
WHERE `worker_id` = 3081
and `datetime` between
toDateTime('2021-07-26 19:40:00', 'UTC') AND
toDateTime('2021-07-26 19:45:00', 'UTC')
group by worker_id
我有一个 table,其中包含遥测信息,例如冷却风扇速度、温度等。
遥测数据存储在数组中,因为冷却风扇或其他项目的数量是动态的,可能会不时发生变化,并且最大大小未知。
我正在尝试编写查询以获取 n 次(例如 5 分钟)的平均风扇速度、温度和功耗
Table 看起来像这样:
CREATE TABLE stats
(
`datetime` DateTime('UTC') DEFAULT now(),
`worker_id` UInt64,
`project_id` UInt64,
`platform_type` UInt8,
`temp` Array(Int32),
`fan` Array(Int32),
`units` Int32,
`power` Array(Float32),
`power_total` Float32
)
ENGINE = MergeTree()
PARTITION BY toYYYYMM(datetime)
ORDER BY (worker_id, datetime);
数据看起来像这样:
────────────datetime─┬─worker_id─┬─temp─────────────────────────────────────┬─power─────────────────────────────────────┬─fan──────────────────────────────────────┐
│ 2021-07-26 16:37:00 │ 3081 │ [51,55,53,51,56,62,58,57,55,64,63,55,61] │ [120,91,60,91,60,90,90,60,90,89,90,60,89] │ [70,70,70,70,70,70,70,70,70,80,80,70,70] │
│ 2021-07-26 16:37:10 │ 3081 │ [51,56,54,52,56,63,58,58,55,64,63,56,62] │ [120,89,60,90,60,90,89,60,88,88,90,60,90] │ [70,70,70,70,70,70,70,70,70,80,80,70,70] │
│ 2021-07-26 16:37:20 │ 3081 │ [51,56,54,52,56,63,58,58,55,64,63,56,62] │ [120,91,60,92,60,90,91,60,88,88,90,60,89] │ [70,70,70,70,70,70,70,70,70,80,80,70,70] │
│ 2021-07-26 16:37:30 │ 3081 │ [51,56,54,52,57,62,58,58,56,64,63,57,62] │ [119,90,60,92,60,90,91,60,91,88,90,60,89] │ [70,70,70,69,70,70,70,70,70,80,80,70,70] │
│ 2021-07-26 16:37:40 │ 3081 │ [51,56,55,52,57,63,58,58,56,64,63,56,62] │ [119,91,60,88,60,90,90,60,90,90,89,60,89] │ [70,70,70,70,70,70,70,70,68,80,80,70,70] │
└─────────────────────┴───────────┴──────────────────────────────────────────┴───────────────────────────────────────────┴──────────────────────────────────────────┘
我能够编写一个查询来获取单个列的平均值,但很难为其余列编写一个最佳查询。
select worker_id, round(avg(temp_value), 1) as temp_avg, temp_index
from (
select datetime, worker_id, temp from stats
WHERE `worker_id` = 3081
and `datetime` between
toDateTime('2021-07-26 19:40:00', 'UTC') AND
toDateTime('2021-07-26 19:45:00', 'UTC')
order by `datetime` ASC
) array join
temp as temp_value,
arrayEnumerate(temp) as temp_index
group by (worker_id, temp_index)
order by temp_index ASC;
你能建议我一个最佳查询来计算 temp
、fan
、power
和 power_total
的平均值吗?
考虑使用 ForEach 聚合函数组合器:
SELECT worker_id, avgForEach(temp) temp_avg, avgForEach(power) power_avg, avgForEach(fan) fan_avg
FROM (
/* emulate the test dataset */
SELECT data.1 datetime, data.2 worker_id, data.3 temp, data.4 power, data.5 fan
FROM (
SELECT arrayJoin([
(('2021-07-26 16:37:00'), 3081, [51,55,53,51,56,62,58,57,55,64,63,55,61], [120,91,60,91,60,90,90,60,90,89,90,60,89], [70,70,70,70,70,70,70,70,70,80,80,70,70]),
(('2021-07-26 16:37:10'), 3081, [51,56,54,52,56,63,58,58,55,64,63,56,62], [120,89,60,90,60,90,89,60,88,88,90,60,90], [70,70,70,70,70,70,70,70,70,80,80,70,70]),
(('2021-07-26 16:37:20'), 3081, [51,56,54,52,56,63,58,58,55,64,63,56,62], [120,91,60,92,60,90,91,60,88,88,90,60,89], [70,70,70,70,70,70,70,70,70,80,80,70,70]),
(('2021-07-26 16:37:30'), 3081, [51,56,54,52,57,62,58,58,56,64,63,57,62], [119,90,60,92,60,90,91,60,91,88,90,60,89], [70,70,70,69,70,70,70,70,70,80,80,70,70]),
(('2021-07-26 16:37:40'), 3081, [51,56,55,52,57,63,58,58,56,64,63,56,62], [119,91,60,88,60,90,90,60,90,90,89,60,89], [70,70,70,70,70,70,70,70,68,80,80,70,70])]) as data)
)
/*WHERE {timerange condition}*/
GROUP BY worker_id
/*
┌─worker_id─┬─temp_avg───────────────────────────────────────────────┬─power_avg─────────────────────────────────────────────────┬─fan_avg──────────────────────────────────────┐
│ 3081 │ [51,55.8,54,51.8,56.4,62.6,58,57.8,55.4,64,63,56,61.8] │ [119.6,90.4,60,90.6,60,90,90.2,60,89.4,88.6,89.8,60,89.2] │ [70,70,70,69.8,70,70,70,70,69.6,80,80,70,70] │
└───────────┴────────────────────────────────────────────────────────┴───────────────────────────────────────────────────────────┴──────────────────────────────────────────────┘
*/
select worker_id, round(avg(temp_value), 1) as temp_avg,
round(avg(fan_value), 1) as fan_avg,
round(avg(power_value), 1) as power_avg,
temp_index
from (
select datetime, worker_id, temp, fan, power from stats
WHERE `worker_id` = 3081
and `datetime` between
toDateTime('2021-07-26 19:40:00', 'UTC') AND
toDateTime('2021-07-26 19:45:00', 'UTC')
order by `datetime` ASC
) array join
temp as temp_value,
fan as fan_value,
power as power_value,
arrayEnumerate(temp) as temp_index
group by (worker_id, temp_index)
order by temp_index ASC;
select worker_id,
avgForEach(temp),
avgForEach(fan),
avgForEach(power)
from stats
WHERE `worker_id` = 3081
and `datetime` between
toDateTime('2021-07-26 19:40:00', 'UTC') AND
toDateTime('2021-07-26 19:45:00', 'UTC')
group by worker_id