根据虚拟列和顺序日期查找 Min/Max 日期范围

Find Min/Max Date Range based on Dummy Columns and sequential dates

我有以下table。我需要找到每个 "Seq" 列的 min/max 日期,其中有一个 1 或连续的 1。每行只能有一个“1”

Animal  Calendar_Date   SeqA    SeqB    SeqC    SeqD    SeqE
Cat     2/5/2017        0       0       0       1       0
Cat     2/6/2017        1       0       0       0       0
Cat     2/7/2017        1       0       0       0       0
Cat     2/8/2017        1       0       0       0       0
Cat     2/9/2017        1       0       0       0       0
Cat     2/10/2017       0       0       0       0       1
Cat     2/11/2017       0       0       0       0       1
Cat     2/12/2017       0       0       0       0       1
Cat     2/13/2017       0       0       0       0       1
Dog     2/5/2017        1       0       0       0       0
Dog     2/6/2017        1       0       0       0       0
Dog     2/7/2017        0       1       0       0       0
Dog     2/8/2017        0       1       0       0       0
Dog     2/9/2017        1       0       0       0       0
Dog     2/10/2017       1       0       0       0       0
Dog     2/11/2017       0       0       0       1       0
Dog     2/12/2017       0       0       0       1       0
Dog     2/13/2017       0       0       0       1       0

想要的结果是这样的。我已经尝试使用 row_number、rank 和 dense_rank 但未能破解此问题。

Animal  Sequence    min         Max
Cat     D           2/5/2017    2/5/2017
Cat     A           2/6/2017    2/9/2017
Cat     E           2/10/2017   2/13/2017
Dog     A           2/5/2017    2/6/2017
Dog     B           2/7/2017    2/8/2017
Dog     A           2/9/2017    2/10/2017
Dog     D           2/11/2017   2/13/2017

dnoeth 的以下答案有效。但是,在我的数据集中有一些实例 AnimalCalendar_Date 有多个 Seq 填充了 1。这意味着它不是每个 Animal 每个 Calendar_Date 的唯一 Seq。我不得不修改代码,运行 修改了五次(每个 Seq 一次),然后 union 它们一起修改,如下所示。

SELECT Animal,
   cast(Min(Trim(Both ',' FROM Seq)) as varchar(8)) as POS_Type, 
   Min(Calendar_Date) as Min_Date, 
   Max(Calendar_Date) as Max_Date
FROM
 (
   SELECT Animal, Calendar_Date, Seq,
      -- calculate groups of consecutive values
      Sum(flag)
      Over (PARTITION BY Animal
            ORDER BY Calendar_Date
            ROWS Unbounded Preceding) AS grp
   FROM
    (
           SELECT Animal, Calendar_Date,
         -- combine all columns into one
         CASE WHEN SeqA = 1 THEN 'SeqA' ELSE null END 
--         CASE WHEN SeqB = 1 THEN 'SeqB' ELSE null END ||
--         CASE WHEN SeqC = 1 THEN 'SeqC' ELSE null END ||
--         CASE WHEN SeqD = 1 THEN 'SeqD' ELSE null END ||
--         CASE WHEN SeqE = 1 THEN 'SeqE' ELSE null END 
         AS Seq,
         -- check if current and previous row are different
         CASE WHEN Min(Seq) -- or LAG in TD16.10
              Over (PARTITION BY Animal
                    ORDER BY Calendar_Date
                    ROWS BETWEEN 1 Preceding AND 1 Preceding) = Seq
              THEN 0
              ELSE 1
         END AS flag
      FROM   Table_A 

        --and SeqA = 1
    ) AS dt
      where seq is not null
 ) AS dt

GROUP BY Animal, grp

Union

SELECT Animal,
   Min(Trim(Both ',' FROM Seq)) as POS_Type, 
   Min(Calendar_Date) as Min_Date, 
   Max(Calendar_Date) as Max_Date
FROM
 (
   SELECT Animal, Calendar_Date, Seq,
      -- calculate groups of consecutive values
      Sum(flag)
      Over (PARTITION BY Animal
            ORDER BY Calendar_Date
            ROWS Unbounded Preceding) AS grp
   FROM
    (
           SELECT Animal, Calendar_Date,
         -- combine all columns into one
--         CASE WHEN SeqA = 1 THEN 'SeqA' ELSE null END 
         CASE WHEN SeqB = 1 THEN 'SeqB' ELSE null END 
--         CASE WHEN SeqC = 1 THEN 'SeqC' ELSE null END ||
--         CASE WHEN SeqD = 1 THEN 'SeqD' ELSE null END ||
--         CASE WHEN SeqE = 1 THEN 'SeqE' ELSE null END 
         AS Seq,
         -- check if current and previous row are different
         CASE WHEN Min(Seq) -- or LAG in TD16.10
              Over (PARTITION BY Animal
                    ORDER BY Calendar_Date
                    ROWS BETWEEN 1 Preceding AND 1 Preceding) = Seq
              THEN 0
              ELSE 1
         END AS flag
      FROM   Table_A 

        --and SeqA = 1
    ) AS dt
      where seq is not null
 ) AS dt

GROUP BY Animal, grp

Union

SELECT Animal,
   Min(Trim(Both ',' FROM Seq)) as POS_Type, 
   Min(Calendar_Date) as Min_Date, 
   Max(Calendar_Date) as Max_Date
FROM
 (
   SELECT Animal, Calendar_Date, Seq,
      -- calculate groups of consecutive values
      Sum(flag)
      Over (PARTITION BY Animal
            ORDER BY Calendar_Date
            ROWS Unbounded Preceding) AS grp
   FROM
    (
           SELECT Animal, Calendar_Date,
         -- combine all columns into one
--         CASE WHEN SeqA = 1 THEN 'SeqA' ELSE null END 
--         CASE WHEN SeqB = 1 THEN 'SeqB' ELSE null END 
         CASE WHEN SeqC = 1 THEN 'SeqC' ELSE null END 
--         CASE WHEN SeqD = 1 THEN 'SeqD' ELSE null END 
--         CASE WHEN SeqE = 1 THEN 'SeqE' ELSE null END 
         AS Seq,
         -- check if current and previous row are different
         CASE WHEN Min(Seq) -- or LAG in TD16.10
              Over (PARTITION BY Animal
                    ORDER BY Calendar_Date
                    ROWS BETWEEN 1 Preceding AND 1 Preceding) = Seq
              THEN 0
              ELSE 1
         END AS flag
      FROM   Table_A 

        --and SeqA = 1
    ) AS dt
      where seq is not null
 ) AS dt

GROUP BY Animal, grp

Union

SELECT Animal,
   Min(Trim(Both ',' FROM Seq)) as POS_Type, 
   Min(Calendar_Date) as Min_Date, 
   Max(Calendar_Date) as Max_Date
FROM
 (
   SELECT Animal, Calendar_Date, Seq,
      -- calculate groups of consecutive values
      Sum(flag)
      Over (PARTITION BY Animal
            ORDER BY Calendar_Date
            ROWS Unbounded Preceding) AS grp
   FROM
    (
           SELECT Animal, Calendar_Date,
         -- combine all columns into one
--         CASE WHEN SeqA = 1 THEN 'SeqA' ELSE null END 
--         CASE WHEN SeqB = 1 THEN 'SeqB' ELSE null END 
--         CASE WHEN SeqC = 1 THEN 'SeqC' ELSE null END 
         CASE WHEN SeqD = 1 THEN 'SeqD' ELSE null END 
--         CASE WHEN SeqE = 1 THEN 'SeqE' ELSE null END 
         AS Seq,
         -- check if current and previous row are different
         CASE WHEN Min(Seq) -- or LAG in TD16.10
              Over (PARTITION BY Animal
                    ORDER BY Calendar_Date
                    ROWS BETWEEN 1 Preceding AND 1 Preceding) = Seq
              THEN 0
              ELSE 1
         END AS flag
      FROM   Table_A 

        --and SeqA = 1
    ) AS dt
      where seq is not null
 ) AS dt

GROUP BY Animal, grp

Union

SELECT Animal,
   Min(Trim(Both ',' FROM Seq)) as POS_Type, 
   Min(Calendar_Date) as Min_Date, 
   Max(Calendar_Date) as Max_Date
FROM
 (
   SELECT Animal, Calendar_Date, Seq,
      -- calculate groups of consecutive values
      Sum(flag)
      Over (PARTITION BY Animal
            ORDER BY Calendar_Date
            ROWS Unbounded Preceding) AS grp
   FROM
    (
           SELECT Animal, Calendar_Date,
         -- combine all columns into one
--         CASE WHEN SeqA = 1 THEN 'SeqA' ELSE null END 
--         CASE WHEN SeqB = 1 THEN 'SeqB' ELSE null END 
--         CASE WHEN SeqC = 1 THEN 'SeqC' ELSE null END 
--         CASE WHEN SeqD = 1 THEN 'SeqD' ELSE null END 
         CASE WHEN SeqE = 1 THEN 'SeqE' ELSE null END 
         AS Seq,
         -- check if current and previous row are different
         CASE WHEN Min(Seq) -- or LAG in TD16.10
              Over (PARTITION BY Animal
                    ORDER BY Calendar_Date
                    ROWS BETWEEN 1 Preceding AND 1 Preceding) = Seq
              THEN 0
              ELSE 1
         END AS flag
      FROM   Table_A 

        --and SeqA = 1
    ) AS dt
      where seq is not null
 ) AS dt

GROUP BY Animal, grp

由于您只有五列,一种方法是手动旋转它们,然后执行简单的 GROUP BY 以获得结果:

SELECT Animal, Sequence, MIN(Calendar_Date), MAX(Calendar_Date)
FROM (
    SELECT Animal, 'A' AS Sequence, Calendar_Date FROM MyTable WHERE SeqA=1
    UNION ALL
    SELECT Animal, 'B' AS Sequence, Calendar_Date FROM MyTable WHERE SeqB=1
    UNION ALL
    SELECT Animal, 'C' AS Sequence, Calendar_Date FROM MyTable WHERE SeqC=1
    UNION ALL
    SELECT Animal, 'D' AS Sequence, Calendar_Date FROM MyTable WHERE SeqD=1
    UNION ALL
    SELECT Animal, 'E' AS Sequence, Calendar_Date FROM MyTable WHERE SeqE=1
) dt
GROUP BY Animal, Sequence

根据您的数据,这可能是最有效的:

select animal,
       (case when seqA = 1 then 'A'
             when seqB = 1 then 'B'
             when seqC = 1 then 'C'
             when seqD = 1 then 'D'
             when seqE = 1 then 'E'
        end) as sequence,
       min(calendar_date), max(calendar_date)     
from t
group by seqA, seqB, seqC, seqD, seqE;

之所以有效,是因为 "seq" 列中只有一列在每一行中都有一个值。

老实说,我建议向 table 添加一个计算列并使用它:

alter table animal
    add sequence as (case when seqA = 1 then 'A'
                          when seqB = 1 then 'B'
                          when seqC = 1 then 'C'
                          when seqD = 1 then 'D'
                          when seqE = 1 then 'E'
                     end);

那么你可以这样做:

select animal, sequence, min(calendar_date), max(calendar_date)     
from t
group by animal, sequence;

遗憾的是,Teradata 不支持计算列。 Teradata 标签是在我最初回答后添加的。但是,您可以使用视图来获得大致相同的效果。

您需要嵌套的 OLAP 函数:

SELECT Animal,
   Min(Trim(Both ',' FROM Seq)), 
   Min(Calendar_Date), 
   Max(Calendar_Date) 
FROM
 (
   SELECT Animal, Calendar_Date, Seq,
      -- calculate groups of consecutive values
      Sum(flag)
      Over (PARTITION BY Animal
            ORDER BY Calendar_Date
            ROWS Unbounded Preceding) AS grp
   FROM
    (
      SELECT Animal, Calendar_Date,
         -- combine all columns into one
         CASE WHEN SeqA = 1 THEN 'A' ELSE ',' END ||
         CASE WHEN SeqB = 1 THEN 'B' ELSE ',' END ||
         CASE WHEN SeqC = 1 THEN 'C' ELSE ',' END ||
         CASE WHEN SeqD = 1 THEN 'D' ELSE ',' END ||
         CASE WHEN SeqE = 1 THEN 'E' ELSE ',' END AS Seq,
         -- check if current and previous row are different
         CASE WHEN Min(Seq) -- or LAG in TD16.10
              Over (PARTITION BY Animal
                    ORDER BY Calendar_Date
                    ROWS BETWEEN 1 Preceding AND 1 Preceding) = Seq
              THEN 0
              ELSE 1
         END AS flag
      FROM tab
    ) AS dt
 ) AS dt
GROUP BY Animal, grp