自上一个空值以来滞后获得第一个非空值
lag to get first non null value since the previous null value
下面是我试图在 Redshift 数据库中实现的示例。
我有一个变量 current_value
,我想创建一个新列 value_desired
即:
- 如果前一行为空,则与
current_value
相同
- 如果前一行为非空值,则等于前一个非空值
这听起来很容易,但我还没有找到实现它的方法。
row_numb current_value value_desired
1
2
3 47 47
4
5 45 45
6
7
8 42 42
9 41 42
10 40 42
11 39 42
12 38 42
13
14 36 36
15
16
17 33 33
18 32 33
我试过使用 LAG() 函数,但我只能得到以前的值(不是 "non-null" 块中的第一个值),这是我的看法:
SELECT *
, CASE WHEN current_value is not null and LAG(current_value) is null THEN current_value
WHEN current_value is not null and LAG(current_value) is not null
THEN LAG(current_value)
ELSE NULL END AS value_desired
FROM test1
非常感谢任何帮助,谢谢。
使用 FIRST_VALUE() 而不是 LAG()
这是正确的答案,给出了正确的结果。
这里有一些巧妙的技巧,我建议你仔细看看,让我知道需要澄清的地方
根据您的问题创建测试数据。
drop table if exists test_table ;
create table test_table (row_num int,current_value int);
insert into test_table(row_num, current_value)
values
(1,null),
(2,null),
(3,47),
(4,null),
(5,45),
(6,null),
(7,null),
(8 ,42),
(9 ,41),
(10,40 ),
(11,39 ),
(12,38 ),
(13,null),
(14,36),
(15,null),
(16,null),
(17 ,33),
(18,32 )
;
然后运行这个代码
SELECT DISTINCT
j1.row_num,
CASE WHEN j1.current_value IS NULL
THEN NULL
ELSE
last_value(j2.current_value)
OVER (
PARTITION BY j1.row_num
ORDER BY j2.row_num
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING ) END AS value_desired
FROM test_table AS j1
LEFT JOIN (SELECT
row_num,
current_value,
lag(current_value, 1)
OVER (
ORDER BY row_num ) AS prev_cval
FROM test_table) AS j2
ON j1.row_num >= j2.row_num AND j2.current_value IS NOT NULL
AND j2.prev_cval IS NULL
ORDER BY j1.row_num;
我们的测试数据
DROP TABLE IF EXISTS test_table;
CREATE TABLE test_table (
row_num INT,
current_value INT
);
INSERT INTO test_table (row_num, current_value)
VALUES
(1, NULL),
(2, NULL),
(3, 47),
(4, NULL),
(5, 45),
(6, NULL),
(7, NULL),
(8, 42),
(9, 41),
(10, 40),
(11, 39),
(12, 38),
(13, NULL),
(14, 36),
(15, NULL),
(16, NULL),
(17, 33),
(18, 32);
我们知道的:
- 当current_value不为空且前面的current_value为空时,desired_value等于current_value
- 让我们将其称为 first_desired_value,根据下面的 q_first_desired_value 子查询
- 当那个first_desired_value不为null时,就是我们的desired_value
- 当 current_value 不为 null
时,我们只需要将 first_desired_value 传播到其他行
- 当 current_value 不为空时,期望值是所有前面行中的 the last first_desired_value (在当前行旁边)并排除可能已添加到先前帧中的 NULL 值
综上所述,查询如下
WITH q_first_desired_value AS
(
SELECT
row_num,
current_value,
CASE WHEN LAG(current_value, 1)
OVER (
ORDER BY row_num ) IS NULL
THEN current_value
ELSE NULL END AS first_desired_value
FROM test_table
ORDER BY row_num
)
SELECT
row_num,
current_value,
CASE WHEN first_desired_value IS NOT NULL
THEN first_desired_value
WHEN current_value IS NOT NULL
THEN LAST_VALUE(first_desired_value) IGNORE NULLS
OVER (
ORDER BY row_num ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING ) END AS desired_value
FROM q_first_desired_value;
下面是我试图在 Redshift 数据库中实现的示例。
我有一个变量 current_value
,我想创建一个新列 value_desired
即:
- 如果前一行为空,则与
current_value
相同 - 如果前一行为非空值,则等于前一个非空值
这听起来很容易,但我还没有找到实现它的方法。
row_numb current_value value_desired
1
2
3 47 47
4
5 45 45
6
7
8 42 42
9 41 42
10 40 42
11 39 42
12 38 42
13
14 36 36
15
16
17 33 33
18 32 33
我试过使用 LAG() 函数,但我只能得到以前的值(不是 "non-null" 块中的第一个值),这是我的看法:
SELECT *
, CASE WHEN current_value is not null and LAG(current_value) is null THEN current_value
WHEN current_value is not null and LAG(current_value) is not null
THEN LAG(current_value)
ELSE NULL END AS value_desired
FROM test1
非常感谢任何帮助,谢谢。
使用 FIRST_VALUE() 而不是 LAG()
这是正确的答案,给出了正确的结果。 这里有一些巧妙的技巧,我建议你仔细看看,让我知道需要澄清的地方
根据您的问题创建测试数据。
drop table if exists test_table ;
create table test_table (row_num int,current_value int);
insert into test_table(row_num, current_value)
values
(1,null),
(2,null),
(3,47),
(4,null),
(5,45),
(6,null),
(7,null),
(8 ,42),
(9 ,41),
(10,40 ),
(11,39 ),
(12,38 ),
(13,null),
(14,36),
(15,null),
(16,null),
(17 ,33),
(18,32 )
;
然后运行这个代码
SELECT DISTINCT
j1.row_num,
CASE WHEN j1.current_value IS NULL
THEN NULL
ELSE
last_value(j2.current_value)
OVER (
PARTITION BY j1.row_num
ORDER BY j2.row_num
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING ) END AS value_desired
FROM test_table AS j1
LEFT JOIN (SELECT
row_num,
current_value,
lag(current_value, 1)
OVER (
ORDER BY row_num ) AS prev_cval
FROM test_table) AS j2
ON j1.row_num >= j2.row_num AND j2.current_value IS NOT NULL
AND j2.prev_cval IS NULL
ORDER BY j1.row_num;
我们的测试数据
DROP TABLE IF EXISTS test_table;
CREATE TABLE test_table (
row_num INT,
current_value INT
);
INSERT INTO test_table (row_num, current_value)
VALUES
(1, NULL),
(2, NULL),
(3, 47),
(4, NULL),
(5, 45),
(6, NULL),
(7, NULL),
(8, 42),
(9, 41),
(10, 40),
(11, 39),
(12, 38),
(13, NULL),
(14, 36),
(15, NULL),
(16, NULL),
(17, 33),
(18, 32);
我们知道的:
- 当current_value不为空且前面的current_value为空时,desired_value等于current_value
- 让我们将其称为 first_desired_value,根据下面的 q_first_desired_value 子查询
- 当那个first_desired_value不为null时,就是我们的desired_value
- 当 current_value 不为 null 时,我们只需要将 first_desired_value 传播到其他行
- 当 current_value 不为空时,期望值是所有前面行中的 the last first_desired_value (在当前行旁边)并排除可能已添加到先前帧中的 NULL 值
综上所述,查询如下
WITH q_first_desired_value AS
(
SELECT
row_num,
current_value,
CASE WHEN LAG(current_value, 1)
OVER (
ORDER BY row_num ) IS NULL
THEN current_value
ELSE NULL END AS first_desired_value
FROM test_table
ORDER BY row_num
)
SELECT
row_num,
current_value,
CASE WHEN first_desired_value IS NOT NULL
THEN first_desired_value
WHEN current_value IS NOT NULL
THEN LAST_VALUE(first_desired_value) IGNORE NULLS
OVER (
ORDER BY row_num ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING ) END AS desired_value
FROM q_first_desired_value;