Search code examples
sqloracle-databasegaps-and-islands

Oracle SQL - How to return common date periods and "divide" when there are gaps between periods


I'm trying to return common date periods (per id) from below data, but I cannot find a way to handle case when date periods have a gap between common periods. Can anyone help?

|id|code_id|code|date_from|date_to|
|--|--|--|--|--|
|10|100|    1000    |02/02/2022             |03/02/2022 23:57:00|
|10|100|    1000    |07/02/2022 01:00:00    |08/02/2022         |
|10|100|    2000    |02/02/2022             |02/02/2022 23:00:00|
|10|100|    2000    |07/02/2022 03:00:00    |08/02/2022         |
|10|200|    2000    |02/02/2022 02:14:00    |04/02/2022 21:37:00|
|20|100|    1000    |01/02/2022 05:00:00    |03/02/2022         |
|30|100|    2000    |02/02/2022             |02/02/2022 23:00:00|
|30|200|    2000    |02/02/2022 02:14:00    |04/02/2022         |
|40|100|    2000    |07/02/2022 03:00:00    |08/02/2022 23:10:00|
|50|200|    2000    |04/02/2022             |04/02/2022 21:37:00|
|50|200|    3000    |04/02/2022 02:12:00    |05/02/2022 23:31:00|

Below simple query works fine, but only for ids which have one common period (with no gaps).

I would expect for id = 10 to return two rows (as there is a gap between dates) for periods which are:

I) 02/02/2022 00:00:00 <-> 04/02/2022 21:37:00

II) 07/02/2022 01:00:00 <-> 08/02/2022 00:00:00

 SELECT id
       ,MIN(date_from) date_from
       ,MAX(date_to)   date_to
  FROM my_gtt 
  GROUP BY id
  ORDER BY id

Current results (but id = 10 is incorrect):

|id|date_from|date_to|
|--|--|--|
|10|    02/02/2022          |08/02/2022         |
|20|    01/02/2022 05:00:00 |03/02/2022         |
|30|    02/02/2022          |04/02/2022         |
|40|    07/02/2022 03:00:00 |08/02/2022 23:10:00|
|50|    04/02/2022          |05/02/2022 23:31:00|

Data and table creation:

CREATE GLOBAL TEMPORARY TABLE my_gtt
(
  id         NUMBER(10),
  code_id    NUMBER(10),
  code       NUMBER(10),
  date_from  DATE,
  date_to    DATE
)
ON COMMIT PRESERVE ROWS;

INSERT INTO my_gtt VALUES (10, 100, 1000, TO_DATE('02-02-2022', 'dd-mm-yyyy'), TO_DATE('03-02-2022 23:57:00', 'dd-mm-yyyy hh24:mi:ss'));
INSERT INTO my_gtt VALUES (10, 100, 1000, TO_DATE('07-02-2022 01:00:00', 'dd-mm-yyyy hh24:mi:ss'), TO_DATE('08-02-2022', 'dd-mm-yyyy'));
INSERT INTO my_gtt VALUES (10, 100, 2000, TO_DATE('02-02-2022', 'dd-mm-yyyy'), TO_DATE('02-02-2022 23:00:00', 'dd-mm-yyyy hh24:mi:ss'));
INSERT INTO my_gtt VALUES (10, 100, 2000, TO_DATE('07-02-2022 03:00:00', 'dd-mm-yyyy hh24:mi:ss'), TO_DATE('08-02-2022', 'dd-mm-yyyy'));
INSERT INTO my_gtt VALUES (10, 200, 2000, TO_DATE('02-02-2022 02:14:00', 'dd-mm-yyyy hh24:mi:ss'), TO_DATE('04-02-2022 21:37:00', 'dd-mm-yyyy hh24:mi:ss'));
INSERT INTO my_gtt VALUES (20, 100, 1000, TO_DATE('01-02-2022 05:00:00', 'dd-mm-yyyy hh24:mi:ss'), TO_DATE('03-02-2022', 'dd-mm-yyyy'));
INSERT INTO my_gtt VALUES (30, 100, 2000, TO_DATE('02-02-2022', 'dd-mm-yyyy'), TO_DATE('02-02-2022 23:00:00', 'dd-mm-yyyy hh24:mi:ss'));
INSERT INTO my_gtt VALUES (30, 200, 2000, TO_DATE('02-02-2022 02:14:00', 'dd-mm-yyyy hh24:mi:ss'), TO_DATE('04-02-2022', 'dd-mm-yyyy'));
INSERT INTO my_gtt VALUES (40, 100, 2000, TO_DATE('07-02-2022 03:00:00', 'dd-mm-yyyy hh24:mi:ss'), TO_DATE('08-02-2022 23:10:00', 'dd-mm-yyyy hh24:mi:ss'));
INSERT INTO my_gtt VALUES (50, 200, 2000, TO_DATE('04-02-2022', 'dd-mm-yyyy'), TO_DATE('04-02-2022 21:37:00', 'dd-mm-yyyy hh24:mi:ss'));
INSERT INTO my_gtt VALUES (50, 200, 3000, TO_DATE('04-02-2022 02:12:00', 'dd-mm-yyyy hh24:mi:ss'), TO_DATE('05-02-2022 23:31:00', 'dd-mm-yyyy hh24:mi:ss'));

Solution

  • From Oracle 12, MATCH_RECOGNIZE is the simplest solution:

    SELECT *
    FROM   my_gtt
    MATCH_RECOGNIZE (
      PARTITION BY id
      ORDER     BY date_from, date_to
      MEASURES
        MIN(date_from) AS start_date,
        MAX(date_to)   AS end_date
      PATTERN (overlap* last_row)
      DEFINE
        overlap AS MAX(date_to) >= NEXT(date_from)
    );
    

    However, if you are on an earlier version you can find the output using:

    SELECT id,
           MIN(dt) AS date_from,
           MAX(dt) AS date_to
    FROM   (
      SELECT id,
             dt,
             SUM(value) OVER (PARTITION BY id ORDER BY dt, ROWNUM) AS match_no
      FROM   (
        SELECT id,
               dt,
               type * SUM(type) OVER (PARTITION BY id ORDER BY dt, ROWNUM) AS value
        FROM   my_gtt
        UNPIVOT (dt FOR type IN (date_from AS 1, date_to AS -1))
      )
      WHERE  value IN (1,0)
    )
    GROUP BY id, match_no
    

    Which, for the sample data:

    INSERT INTO my_gtt VALUES (10, 100, 1000, TO_DATE('02-02-2022', 'dd-mm-yyyy'), TO_DATE('03-02-2022 23:57:00', 'dd-mm-yyyy hh24:mi:ss'));
    INSERT INTO my_gtt VALUES (10, 100, 1000, TO_DATE('07-02-2022 01:00:00', 'dd-mm-yyyy hh24:mi:ss'), TO_DATE('08-02-2022', 'dd-mm-yyyy'));
    INSERT INTO my_gtt VALUES (10, 100, 2000, TO_DATE('02-02-2022', 'dd-mm-yyyy'), TO_DATE('02-02-2022 23:00:00', 'dd-mm-yyyy hh24:mi:ss'));
    INSERT INTO my_gtt VALUES (10, 100, 2000, TO_DATE('07-02-2022 03:00:00', 'dd-mm-yyyy hh24:mi:ss'), TO_DATE('08-02-2022', 'dd-mm-yyyy'));
    INSERT INTO my_gtt VALUES (10, 200, 2000, TO_DATE('02-02-2022 02:14:00', 'dd-mm-yyyy hh24:mi:ss'), TO_DATE('04-02-2022 21:37:00', 'dd-mm-yyyy hh24:mi:ss'));
    INSERT INTO my_gtt VALUES (20, 100, 1000, TO_DATE('01-02-2022 05:00:00', 'dd-mm-yyyy hh24:mi:ss'), TO_DATE('03-02-2022', 'dd-mm-yyyy'));
    INSERT INTO my_gtt VALUES (30, 100, 2000, TO_DATE('02-02-2022', 'dd-mm-yyyy'), TO_DATE('02-02-2022 23:00:00', 'dd-mm-yyyy hh24:mi:ss'));
    INSERT INTO my_gtt VALUES (30, 200, 2000, TO_DATE('02-02-2022 02:14:00', 'dd-mm-yyyy hh24:mi:ss'), TO_DATE('04-02-2022', 'dd-mm-yyyy'));
    INSERT INTO my_gtt VALUES (40, 100, 2000, TO_DATE('07-02-2022 03:00:00', 'dd-mm-yyyy hh24:mi:ss'), TO_DATE('08-02-2022 23:10:00', 'dd-mm-yyyy hh24:mi:ss'));
    INSERT INTO my_gtt VALUES (50, 200, 2000, TO_DATE('04-02-2022', 'dd-mm-yyyy'), TO_DATE('04-02-2022 21:37:00', 'dd-mm-yyyy hh24:mi:ss'));
    INSERT INTO my_gtt VALUES (50, 200, 3000, TO_DATE('04-02-2022 02:12:00', 'dd-mm-yyyy hh24:mi:ss'), TO_DATE('05-02-2022 23:31:00', 'dd-mm-yyyy hh24:mi:ss'));
    INSERT INTO my_gtt VALUES (60, 200, 3000, DATE '2022-01-01', DATE '2022-01-10');
    INSERT INTO my_gtt VALUES (60, 200, 3000, DATE '2022-01-02', DATE '2022-01-04');
    INSERT INTO my_gtt VALUES (60, 200, 3000, DATE '2022-01-06', DATE '2022-01-11');
    INSERT INTO my_gtt VALUES (60, 200, 3000, DATE '2022-01-13', DATE '2022-01-16');
    INSERT INTO my_gtt VALUES (60, 200, 3000, DATE '2022-01-14', DATE '2022-01-15');
    

    Both output:

    ID START_DATE END_DATE
    10 2022-02-02 00:00:00 2022-02-04 21:37:00
    10 2022-02-07 01:00:00 2022-02-08 00:00:00
    20 2022-02-01 05:00:00 2022-02-03 00:00:00
    30 2022-02-02 00:00:00 2022-02-04 00:00:00
    40 2022-02-07 03:00:00 2022-02-08 23:10:00
    50 2022-02-04 00:00:00 2022-02-05 23:31:00
    60 2022-01-01 00:00:00 2022-01-11 00:00:00
    60 2022-01-13 00:00:00 2022-01-16 00:00:00

    db<>fiddle here