Snowflake - 如何创建包含唯一记录的摘要 table

Snowflake - How to create summary table containing unique records

我正在寻找有关如何生成摘要 table 或从现有 table 查看的一些 Snowflake 语法帮助。我的摘要 table 应该有来自现有 table 的每个唯一 ID 的 1 行以及指示是否已达到各种里程碑(根据摘要列名称)的布尔值。由于我是 Snowflake 新手,因此感谢您的帮助。谢谢

Existing Table

Desired Summary Table/View

试试这个,看看这是否有助于获得您想要的东西。

SELECT ID, decode(HIT_PIPELINE_A, NULL,FALSE,TRUE) ,
           decode(HIT_PIPELINE_B, NULL,FALSE,TRUE),
           decode(HIT_PIPELINE_C, NULL,FALSE,TRUE),
           decode(HIT_STAGE_4, NULL,FALSE,TRUE),
           decode(HIT_STAGE_5, NULL,FALSE,TRUE),
           decode(HIT_STAGE_6, NULL,FALSE,TRUE) FROM 
(
SELECT * from tab1
PIVOT(MAx(PIPELINE_ID) FOR stage_id IN ('1','2','3','4','5','6')) 
 AS P(ID,DT,HIT_PIPELINE_A,HIT_PIPELINE_B,HIT_PIPELINE_C,HIT_STAGE_4,HIT_STAGE_5,HIT_STAGE_6)
) order by ID; 

                                        
 
  
create or replace table Tab1  (ID varchar2(100), updated date, pipeline_id varchar2(100), stage_id varchar2(10));
insert into tab1 values(1111, to_date('02/01/2022 09:01:00','mm/dd/yyyy hh:mm:ss'), 'A', '1' );
insert into tab1 values(1111, to_date('02/01/2022 10:01:00','mm/dd/yyyy hh:mm:ss'), 'A', '2' );
insert into tab1 values(1111, to_date('02/01/2022 11:01:00','mm/dd/yyyy hh:mm:ss'), 'B', '5' );

insert into tab1 values(2222, to_date('02/02/2022 13:01:00','mm/dd/yyyy hh:mm:ss'), 'A', '1' );
insert into tab1 values(2222, to_date('02/03/2022 18:01:00','mm/dd/yyyy hh:mm:ss'), 'B', '5' );
insert into tab1 values(2222, to_date('02/04/2022 07:01:00','mm/dd/yyyy hh:mm:ss'), 'B', '6' );

insert into tab1 values(3333, to_date('02/02/2022 14:01:00','mm/dd/yyyy hh:mm:ss'), 'A', '1' );
insert into tab1 values(3333, to_date('02/03/2022 18:01:00','mm/dd/yyyy hh:mm:ss'), 'A', '2' );
insert into tab1 values(3333, to_date('02/03/2022 07:01:00','mm/dd/yyyy hh:mm:ss'), 'C', '7' );
insert into tab1 values(3333, to_date('02/03/2022 21:01:00','mm/dd/yyyy hh:mm:ss'), 'C', '8' );
insert into tab1 values(3333, to_date('02/05/2022 17:01:00','mm/dd/yyyy hh:mm:ss'), 'C', '9' );

所以使用Himanshu的数据,谢谢:

WITH fake_data(id, updated, pipeline_id, stage_id) AS (
    SELECT column1, to_date(column2,'mm/dd/yyyy hh:mm:ss'), column3, column4
    FROM VALUES
        (1111, '02/01/2022 09:01:00', 'A', '1' ),
        (1111, '02/01/2022 10:01:00', 'A', '2' ),
        (1111, '02/01/2022 11:01:00', 'B', '5' ),                          
        (2222, '02/02/2022 13:01:00', 'A', '1' ),
        (2222, '02/03/2022 18:01:00', 'B', '5' ),
        (2222, '02/04/2022 07:01:00', 'B', '6' ),                                         
        (3333, '02/02/2022 14:01:00', 'A', '1' ),
        (3333, '02/03/2022 18:01:00', 'A', '2' ),
        (3333, '02/03/2022 07:01:00', 'C', '7' ),
        (3333, '02/03/2022 21:01:00', 'C', '8' ),
        (3333, '02/05/2022 17:01:00', 'C', '9' )
)

我们正在对每个 id 进行聚合,我们想使用 COUNT_IF 来查看有多少行符合条件,如果是 >0 我们很高兴

SELECT 
    id, 
    count_if(pipeline_id='A')>0 AS hit_stage_a,
    count_if(pipeline_id='B')>0 AS hit_stage_b,
    count_if(pipeline_id='C')>0 AS hit_stage_c,
    count_if(stage_id='4')>0 AS hit_stage_4,
    count_if(stage_id='5')>0 AS hit_stage_5,
    count_if(stage_id='6')>0 AS hit_stage_6
FROM fake_data
GROUP BY 1
ORDER BY 1;

给出:

ID HIT_STAGE_A HIT_STAGE_B HIT_STAGE_C HIT_STAGE_4 HIT_STAGE_5 HIT_STAGE_6
1111 TRUE TRUE FALSE FALSE TRUE FALSE
2222 TRUE TRUE FALSE FALSE TRUE TRUE
3333 TRUE FALSE TRUE FALSE FALSE FALSE