Search code examples
sqldateteradatagaps-and-islands

create time range with 2 columns date_time


The problem I am facing is how to find distinct time periods from multiple time periods with overlap in Teradata ANSI SQL.

For example, the attached tables contain multiple overlapping time periods, how can I combine those time periods into 3 unique time periods in Teradata SQL???

I think I can do it in python with the loop function, but not sure how to do it in SQL

ID Start Date End Date
001 2005-01-01 2006-01-01
001 2005-01-01 2007-01-01
001 2008-01-01 2008-06-01
001 2008-04-01 2008-12-01
001 2010-01-01 2010-05-01
001 2010-04-01 2010-12-01
001 2010-11-01 2012-01-01

My expected result is:

ID start_Date end_date
001 2005-01-01 2007-01-01
001 2008-01-01 2008-12-01
001 2010-01-01 2012-01-01

Solution

  • From Oracle 12, you can use MATCH_RECOGNIZE to perform a row-by-row comparison:

    SELECT *
    FROM   table_name
    MATCH_RECOGNIZE(
      PARTITION BY id
      ORDER BY start_date
      MEASURES
        FIRST(start_date) AS start_date,
        MAX(end_date) AS end_date
      ONE ROW PER MATCH
      PATTERN (overlapping_ranges* last_range)
      DEFINE overlapping_ranges AS NEXT(start_date) <= MAX(end_date)
    )
    

    Which, for the sample data:

    CREATE TABLE table_name (ID, Start_Date, End_Date) AS
    SELECT '001', DATE '2005-01-01', DATE '2006-01-01' FROM DUAL UNION ALL
    SELECT '001', DATE '2005-01-01', DATE '2007-01-01' FROM DUAL UNION ALL
    SELECT '001', DATE '2008-01-01', DATE '2008-06-01' FROM DUAL UNION ALL
    SELECT '001', DATE '2008-04-01', DATE '2008-12-01' FROM DUAL UNION ALL
    SELECT '001', DATE '2010-01-01', DATE '2010-05-01' FROM DUAL UNION ALL
    SELECT '001', DATE '2010-04-01', DATE '2010-12-01' FROM DUAL UNION ALL
    SELECT '001', DATE '2010-11-01', DATE '2012-01-01' FROM DUAL;
    

    Outputs:

    ID START_DATE END_DATE
    001 2005-01-01 00:00:00 2007-01-01 00:00:00
    001 2008-01-01 00:00:00 2008-12-01 00:00:00
    001 2010-01-01 00:00:00 2012-01-01 00:00:00

    db<>fiddle here


    Update: Alternative query

    SELECT id,
           start_date,
           end_date
    FROM   (
      SELECT id,
             dt,
             SUM(cnt) OVER (PARTITION BY id ORDER BY dt) AS grp,
             cnt
      FROM   (
        SELECT ID,
               dt,
               SUM(type) OVER (PARTITION BY id ORDER BY dt, ROWNUM) * type AS cnt
        FROM   table_name
        UNPIVOT (dt FOR type IN (start_date AS 1, end_date AS -1))
      )
      WHERE  cnt IN (1,0)
    )
    PIVOT (MAX(dt) FOR cnt IN (1 AS start_date, 0 AS end_date))
    

    Or, an equivalent that does not use UNPIVOT, PIVOT or ROWNUM and works in both Oracle and PostgreSQL:

    SELECT id,
           MAX(CASE cnt WHEN 1 THEN dt END) AS start_date,
           MAX(CASE cnt WHEN 0 THEN dt END) AS end_date
    FROM   (
      SELECT id,
             dt,
             SUM(cnt) OVER (PARTITION BY id ORDER BY dt) AS grp,
             cnt
      FROM   (
        SELECT ID,
               dt,
               SUM(type) OVER (PARTITION BY id ORDER BY dt, rn) * type AS cnt
        FROM   (
          SELECT r.*,
                 ROW_NUMBER() OVER (PARTITION BY id ORDER BY dt ASC, type DESC) AS rn
          FROM   (
            SELECT id, 1 AS type, start_date AS dt FROM table_name
            UNION ALL
            SELECT id, -1 AS type, end_date AS dt FROM table_name
          ) r
        ) p
      ) s
      WHERE  cnt IN (1,0)
    ) t
    GROUP BY id, grp
    

    Update 2: Another Alternative

    SELECT id,
           MIN(start_date) AS start_date,
           MAX(end_Date) AS end_date
    FROM   (
      SELECT t.*,
             SUM(CASE WHEN start_date <= prev_max THEN 0 ELSE 1 END)
               OVER (PARTITION BY id ORDER BY start_date) AS grp
      FROM   (
        SELECT t.*,
               MAX(end_date) OVER (
                 PARTITION BY id ORDER BY start_date
                 ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
               ) AS prev_max
        FROM   table_name t
      ) t
    ) t
    GROUP BY id, grp
    

    db<>fiddle Oracle PostgreSQL