如何将前向填充作为 PL/PGSQL 函数
How to do forward fill as a PL/PGSQL function
我正在尝试创建一个等同于 pandas 'ffill' 函数的 pl/pgsql。该函数应前向填充空值。在示例中,我可以进行前向填充,但是当我尝试从我的过程创建函数时出现错误。该函数似乎准确地反映了过程,但我在 ... as
.
部分遇到语法错误
为什么?我应该阅读什么来澄清?
-- Forward fill experiment
DROP TABLE IF EXISTS example;
create temporary table example(id int, str text, val integer);
insert into example values
(1, 'a', null),
(1, null, 1),
(2, 'b', 2),
(2,null ,null );
select * from example
select (case
when str is null
then lag(str,1) over (order by id)
else str
end) as str,
(case
when val is null
then lag(val,1) over (order by id)
else val
end) as val
from example
-- Forward fill function
create or replace function ffill(text, text, text) -- takes column to fill, the table, and the ordering column
returns text as $$
begin
select (case
when is null
then lag( ,1) over (order by )
else
end) as
from ;
end;
$$ LANGUAGE plpgsql;
更新 1:我用不同的方法做了一些额外的实验。代码如下。它使用与上面相同的示例 table。
CREATE OR REPLACE FUNCTION GapFillInternal(
s anyelement,
v anyelement) RETURNS anyelement AS
$$
declare
temp alias for [=12=] ;
begin
RAISE NOTICE 's= %, v= %', s, v;
if v is null and s notnull then
temp := s;
elsif s is null and v notnull then
temp := v;
elsif s notnull and v notnull then
temp := v;
else
temp := null;
end if;
RAISE NOTICE 'temp= %', temp;
return temp;
END;
$$ LANGUAGE PLPGSQL;
CREATE AGGREGATE GapFill(anyelement) (
SFUNC=GapFillInternal,
STYPE=anyelement
);
select id, str, val, GapFill(val) OVER (ORDER by id) as valx
from example;
结果table是这样的:
我不明白 valx
列第一行的“1”是从哪里来的。从 raise notice
输出应该是 Null
并且这似乎是 CREATE AGGREGATE
docs.
的正确期望
正确调用
您显示的查询似乎不正确,测试用例太少无法显示。
假设您要“向前填充”按 id
分区,您必须这样说:
SELECT row_num, id
, str, gap_fill(str) OVER w AS strx
, val, gap_fill(val) OVER w AS valx
FROM example
WINDOW w AS (PARTITION BY id ORDER BY row_num); -- !
WINDOW
子句只是为了避免重复拼写相同的 window 框架而提供的语法便利。重要的部分是添加的 PARTITION
子句。
更简单的功能
实际上更简单:
CREATE OR REPLACE FUNCTION gap_fill_internal(s anyelement, v anyelement)
RETURNS anyelement
LANGUAGE plpgsql AS
$func$
BEGIN
RETURN COALESCE(v, s); -- that's all!
END
$func$;
CREATE AGGREGATE gap_fill(anyelement) (
SFUNC = gap_fill_internal,
STYPE = anyelement
);
在快速测试中速度稍快。
标准SQL
没有自定义函数:
SELECT row_num, id
, str, first_value(str) OVER (PARTITION BY id, ct_str ORDER BY row_num) AS strx
, val, first_value(val) OVER (PARTITION BY id, ct_val ORDER BY row_num) AS valx
FROM (
SELECT *, count(str) OVER w AS ct_str, count(val) OVER w AS ct_val
FROM example
WINDOW w AS (PARTITION BY id ORDER BY row_num)
) sub;
使用子查询查询变得更加复杂。性能相似。快速测试中速度稍慢。
这些相关答案中的更多解释:
- Carry over long sequence of missing values with Postgres
- Retrieve last known value for each column of a row
db<>fiddle here - 显示所有扩展测试用例
我正在尝试创建一个等同于 pandas 'ffill' 函数的 pl/pgsql。该函数应前向填充空值。在示例中,我可以进行前向填充,但是当我尝试从我的过程创建函数时出现错误。该函数似乎准确地反映了过程,但我在 ... as
.
为什么?我应该阅读什么来澄清?
-- Forward fill experiment
DROP TABLE IF EXISTS example;
create temporary table example(id int, str text, val integer);
insert into example values
(1, 'a', null),
(1, null, 1),
(2, 'b', 2),
(2,null ,null );
select * from example
select (case
when str is null
then lag(str,1) over (order by id)
else str
end) as str,
(case
when val is null
then lag(val,1) over (order by id)
else val
end) as val
from example
-- Forward fill function
create or replace function ffill(text, text, text) -- takes column to fill, the table, and the ordering column
returns text as $$
begin
select (case
when is null
then lag( ,1) over (order by )
else
end) as
from ;
end;
$$ LANGUAGE plpgsql;
更新 1:我用不同的方法做了一些额外的实验。代码如下。它使用与上面相同的示例 table。
CREATE OR REPLACE FUNCTION GapFillInternal(
s anyelement,
v anyelement) RETURNS anyelement AS
$$
declare
temp alias for [=12=] ;
begin
RAISE NOTICE 's= %, v= %', s, v;
if v is null and s notnull then
temp := s;
elsif s is null and v notnull then
temp := v;
elsif s notnull and v notnull then
temp := v;
else
temp := null;
end if;
RAISE NOTICE 'temp= %', temp;
return temp;
END;
$$ LANGUAGE PLPGSQL;
CREATE AGGREGATE GapFill(anyelement) (
SFUNC=GapFillInternal,
STYPE=anyelement
);
select id, str, val, GapFill(val) OVER (ORDER by id) as valx
from example;
结果table是这样的:
我不明白 valx
列第一行的“1”是从哪里来的。从 raise notice
输出应该是 Null
并且这似乎是 CREATE AGGREGATE
docs.
正确调用
您显示的查询似乎不正确,测试用例太少无法显示。
假设您要“向前填充”按 id
分区,您必须这样说:
SELECT row_num, id
, str, gap_fill(str) OVER w AS strx
, val, gap_fill(val) OVER w AS valx
FROM example
WINDOW w AS (PARTITION BY id ORDER BY row_num); -- !
WINDOW
子句只是为了避免重复拼写相同的 window 框架而提供的语法便利。重要的部分是添加的 PARTITION
子句。
更简单的功能
实际上更简单:
CREATE OR REPLACE FUNCTION gap_fill_internal(s anyelement, v anyelement)
RETURNS anyelement
LANGUAGE plpgsql AS
$func$
BEGIN
RETURN COALESCE(v, s); -- that's all!
END
$func$;
CREATE AGGREGATE gap_fill(anyelement) (
SFUNC = gap_fill_internal,
STYPE = anyelement
);
在快速测试中速度稍快。
标准SQL
没有自定义函数:
SELECT row_num, id
, str, first_value(str) OVER (PARTITION BY id, ct_str ORDER BY row_num) AS strx
, val, first_value(val) OVER (PARTITION BY id, ct_val ORDER BY row_num) AS valx
FROM (
SELECT *, count(str) OVER w AS ct_str, count(val) OVER w AS ct_val
FROM example
WINDOW w AS (PARTITION BY id ORDER BY row_num)
) sub;
使用子查询查询变得更加复杂。性能相似。快速测试中速度稍慢。
这些相关答案中的更多解释:
- Carry over long sequence of missing values with Postgres
- Retrieve last known value for each column of a row
db<>fiddle here - 显示所有扩展测试用例