在postgresql中计算运行的长度
Calculating the length of a run in postgresql
我有一个来自日志记录应用程序的数据集。它记录时间以及我的小部件是否正常:
CREATE TABLE runs (time int, ok int);
INSERT INTO runs VALUES
(1, NULL),
(2, 1),
(3, 1),
(4, 1),
(5, NULL),
(6, NULL),
(7, 1),
(8, 1),
(9, NULL),
(10, 1)
我想使用 window 函数(我认为)来确定这些 "ok"-ness 运行的长度。所以最终数据集应该是这样的:
time | ok_length
----------------
2 | 3
7 | 2
10 | 1
据我所知:
SELECT
time,
ok,
CASE WHEN
LAG(ok) OVER (ORDER BY time) IS NOT null
THEN SUM(ok) OVER (ORDER BY time) END
FROM runs
ORDER BY time
但这是完全错误的。谁能帮忙?也许我必须对 window 函数末尾的帧做一些事情,但该帧必须有一个条件,当它到达 NULL 时停止。
这是我正在使用的 SQL fiddle:http://sqlfiddle.com/#!17/98bf4/3
我认为有一些方法可以简化这一点,但这些基于值查询的计数类型总是有点冗长。主要作品有:
group_start_cte
- 延迟标记作为不同逻辑分组开始的行。
group_cte
- 为所有行提供组 ID 的累计总和。
group_cnt
- 按逻辑分组 ID 计算分区。
first_time_for_group
- 获取组开始时间。
最后我们将 group_cnt
和 first_time_for_group
放在一起:
WITH
group_start_cte AS (
SELECT
TIME,
ok,
CASE
WHEN LAG(ok) OVER (ORDER BY TIME asc) is distinct from ok
THEN TRUE
END AS group_start
FROM
runs
),
group_cte AS (
SELECT
TIME,
ok,
group_start,
SUM(CASE WHEN group_start THEN 1 ELSE 0 END) OVER (ORDER BY TIME asc) AS grp_id
FROM
group_start_cte
),
first_time_for_group as (
SELECT
time,
grp_id
FROM
group_cte
WHERE
group_start IS TRUE
),
group_cnt AS (
SELECT
grp_id,
count(*) AS ok_length
FROM
group_cte
WHERE
ok IS NOT NULL
GROUP BY
grp_id
)
SELECT
TIME,
ok_length
FROM
group_cnt
LEFT JOIN first_time_for_group
USING (grp_id)
ORDER BY
time ASC
;
这里有一个不那么冗长的解决方案:
select distinct
min(time) over (partition by gp)
, sum(ok) over (partition by gp)
from (
select *
, time - row_number() over (partition by ok order by time asc) gp
from runs
where ok is not null
) rs
order by 1
我有一个来自日志记录应用程序的数据集。它记录时间以及我的小部件是否正常:
CREATE TABLE runs (time int, ok int);
INSERT INTO runs VALUES
(1, NULL),
(2, 1),
(3, 1),
(4, 1),
(5, NULL),
(6, NULL),
(7, 1),
(8, 1),
(9, NULL),
(10, 1)
我想使用 window 函数(我认为)来确定这些 "ok"-ness 运行的长度。所以最终数据集应该是这样的:
time | ok_length
----------------
2 | 3
7 | 2
10 | 1
据我所知:
SELECT
time,
ok,
CASE WHEN
LAG(ok) OVER (ORDER BY time) IS NOT null
THEN SUM(ok) OVER (ORDER BY time) END
FROM runs
ORDER BY time
但这是完全错误的。谁能帮忙?也许我必须对 window 函数末尾的帧做一些事情,但该帧必须有一个条件,当它到达 NULL 时停止。 这是我正在使用的 SQL fiddle:http://sqlfiddle.com/#!17/98bf4/3
我认为有一些方法可以简化这一点,但这些基于值查询的计数类型总是有点冗长。主要作品有:
group_start_cte
- 延迟标记作为不同逻辑分组开始的行。group_cte
- 为所有行提供组 ID 的累计总和。group_cnt
- 按逻辑分组 ID 计算分区。first_time_for_group
- 获取组开始时间。
最后我们将 group_cnt
和 first_time_for_group
放在一起:
WITH
group_start_cte AS (
SELECT
TIME,
ok,
CASE
WHEN LAG(ok) OVER (ORDER BY TIME asc) is distinct from ok
THEN TRUE
END AS group_start
FROM
runs
),
group_cte AS (
SELECT
TIME,
ok,
group_start,
SUM(CASE WHEN group_start THEN 1 ELSE 0 END) OVER (ORDER BY TIME asc) AS grp_id
FROM
group_start_cte
),
first_time_for_group as (
SELECT
time,
grp_id
FROM
group_cte
WHERE
group_start IS TRUE
),
group_cnt AS (
SELECT
grp_id,
count(*) AS ok_length
FROM
group_cte
WHERE
ok IS NOT NULL
GROUP BY
grp_id
)
SELECT
TIME,
ok_length
FROM
group_cnt
LEFT JOIN first_time_for_group
USING (grp_id)
ORDER BY
time ASC
;
这里有一个不那么冗长的解决方案:
select distinct
min(time) over (partition by gp)
, sum(ok) over (partition by gp)
from (
select *
, time - row_number() over (partition by ok order by time asc) gp
from runs
where ok is not null
) rs
order by 1