Search code examples
sqloraclecommon-table-expression

Aggregate Path elements with Timecodes / Timeranges


I have the following code. Finally, I would like to summarize my original data. The overlapping periods should be summarized and the min/max dates should be output.

The following code gives the longest PATH for each ID. So far so good. Unfortunately, I don't know how to use the ID to specify the longest overall PATH depending on the group.

Original table:

ID DATEN BEGINN_ZEITSTEMPEL ENDE_ZEITSTEMPEL GRUPPE
1 Datensatz1A 2023-05-01 00:00:00 2023-06-01 00:00:00 1
2 Datensatz2A 2023-04-01 00:00:00 2023-05-15 00:00:00 1
3 Datensatz3A 2023-03-01 00:00:00 2023-05-30 00:00:00 1
4 Datensatz4A 2023-02-01 00:00:00 2023-05-29 00:00:00 1
5 Datensatz5A 2023-05-14 00:00:00 2023-05-15 00:00:00 2
6 Datensatz1B 2023-05-29 00:00:00 2023-05-30 00:00:00 2
7 Datensatz2B 2023-05-01 00:00:00 2023-08-01 00:00:00 3
8 Datensatz3B 2023-05-01 00:00:00 2023-09-01 00:00:00 3
9 Datensatz4B 2023-05-01 00:00:00 2023-06-01 00:00:00 3
10 Datensatz5B 2021-05-01 00:00:00 2022-06-01 00:00:00 3

Final Table:

START_ID DATEN BEGINN_ZEITSTEMPEL ENDE_ZEITSTEMPEL GRUPPE PATH
1 Datensatz1A 2023-02-01 00:00:00 2023-06-01 00:00:00 1 2 -> 4 -> 3 -> 1
5 Datensatz5A 2023-05-14 00:00:00 2023-05-15 00:00:00 2 5
6 Datensatz1B 2023-05-29 00:00:00 2023-05-30 00:00:00 2 6
7 Datensatz3B 2023-05-01 00:00:00 2023-09-01 00:00:00 3 9 -> 7 -> 8
10 Datensatz5B 2021-05-01 00:00:00 2022-06-01 00:00:00 3 10
    WITH CTE as (
            Select 1 as ID, 'Datensatz1A' as Daten, TO_DATE('2023-05-01 00:00:00', 'YYYY-MM-DD HH24:MI:SS') as Beginn_Zeitstempel, TO_DATE('2023-06-01 00:00:00', 'YYYY-MM-DD HH24:MI:SS') as Ende_Zeitstempel, 1 as Gruppe FROM DUAL
            UNION Select 2 as ID, 'Datensatz2A' as Daten, TO_DATE('2023-04-01 00:00:00', 'YYYY-MM-DD HH24:MI:SS') as Beginn_Zeitstempel, TO_DATE('2023-05-15 00:00:00', 'YYYY-MM-DD HH24:MI:SS') as Ende_Zeitstempel, 1 as Gruppe FROM DUAL
            UNION Select 3 as ID, 'Datensatz3A' as Daten, TO_DATE('2023-03-01 00:00:00', 'YYYY-MM-DD HH24:MI:SS') as Beginn_Zeitstempel, TO_DATE('2023-05-30 00:00:00', 'YYYY-MM-DD HH24:MI:SS') as Ende_Zeitstempel, 1 as Gruppe FROM DUAL
            UNION Select 4 as ID, 'Datensatz4A' as Daten, TO_DATE('2023-02-01 00:00:00', 'YYYY-MM-DD HH24:MI:SS') as Beginn_Zeitstempel, TO_DATE('2023-05-29 00:00:00', 'YYYY-MM-DD HH24:MI:SS') as Ende_Zeitstempel, 1 as Gruppe FROM DUAL
            UNION Select 5 as ID, 'Datensatz5A' as Daten, TO_DATE('2023-05-14 00:00:00', 'YYYY-MM-DD HH24:MI:SS') as Beginn_Zeitstempel, TO_DATE('2023-05-15 00:00:00', 'YYYY-MM-DD HH24:MI:SS') as Ende_Zeitstempel, 2 as Gruppe FROM DUAL
            UNION Select 6 as ID, 'Datensatz1B' as Daten, TO_DATE('2023-05-29 00:00:00', 'YYYY-MM-DD HH24:MI:SS') as Beginn_Zeitstempel, TO_DATE('2023-05-30 00:00:00', 'YYYY-MM-DD HH24:MI:SS') as Ende_Zeitstempel, 2 as Gruppe FROM DUAL
            UNION Select 7 as ID, 'Datensatz2B' as Daten, TO_DATE('2023-05-01 00:00:00', 'YYYY-MM-DD HH24:MI:SS') as Beginn_Zeitstempel, TO_DATE('2023-08-01 00:00:00', 'YYYY-MM-DD HH24:MI:SS') as Ende_Zeitstempel, 3 as Gruppe FROM DUAL
            UNION Select 8 as ID, 'Datensatz3B' as Daten, TO_DATE('2023-05-01 00:00:00', 'YYYY-MM-DD HH24:MI:SS') as Beginn_Zeitstempel, TO_DATE('2023-09-01 00:00:00', 'YYYY-MM-DD HH24:MI:SS') as Ende_Zeitstempel, 3 as Gruppe FROM DUAL
            UNION Select 9 as ID, 'Datensatz4B' as Daten, TO_DATE('2023-05-01 00:00:00', 'YYYY-MM-DD HH24:MI:SS') as Beginn_Zeitstempel, TO_DATE('2023-06-01 00:00:00', 'YYYY-MM-DD HH24:MI:SS') as Ende_Zeitstempel, 3 as Gruppe FROM DUAL
            UNION Select 10 as ID, 'Datensatz5B' as Daten, TO_DATE('2021-05-01 00:00:00', 'YYYY-MM-DD HH24:MI:SS') as Beginn_Zeitstempel, TO_DATE('2022-06-01 00:00:00', 'YYYY-MM-DD HH24:MI:SS') as Ende_Zeitstempel, 3 as Gruppe FROM DUAL
    ),
    RecursiveCTE (Start_ID, End_ID, Daten, Beginn_Zeitstempel, Ende_Zeitstempel, Gruppe, Path) AS (
        SELECT
            ID,
            ID,
            Daten,
            Beginn_Zeitstempel,
            Ende_Zeitstempel,
            Gruppe,
            CAST(ID AS VARCHAR2(100)) AS PATH
        FROM CTE
        WHERE NOT EXISTS (
            SELECT 1
            FROM CTE c
            WHERE c.ID = CTE.ID
            AND c.Gruppe = CTE.Gruppe
            AND c.Beginn_Zeitstempel < CTE.Beginn_Zeitstempel
        )
        UNION ALL
        SELECT
            r.Start_ID,
            c.ID,
            c.Daten,
            LEAST(r.Beginn_Zeitstempel, c.Beginn_Zeitstempel),
            GREATEST(r.Ende_Zeitstempel, c.Ende_Zeitstempel),
            c.Gruppe,
            r.Path || ' -> ' || c.ID AS Pfad
            --r.Path || c.ID AS PATH        
        FROM RecursiveCTE r
        JOIN CTE c ON r.End_ID <> c.ID
                    AND r.Gruppe = c.Gruppe
                    AND r.Ende_Zeitstempel >= c.Beginn_Zeitstempel
                    AND r.Ende_Zeitstempel <= c.Ende_Zeitstempel
                    AND INSTR(r.Path, c.ID) = 0
    ),
    -- SELECT    * from RecursiveCTE
    MaxPathCTE AS (
        SELECT
            Start_ID,
            Gruppe,
            MAX(PATH) KEEP (DENSE_RANK LAST ORDER BY LENGTH(PATH)) AS MAX_PATH
        FROM RecursiveCTE
        GROUP BY Start_ID, Gruppe
    )
    SELECT
        r.Start_ID,
        r.Daten,
        r.Beginn_Zeitstempel,
        r.Ende_Zeitstempel,
        r.Gruppe,
        r.Path
    FROM RecursiveCTE r
    INNER JOIN MaxPathCTE m ON r.Start_ID = m.Start_ID AND r.Gruppe = m.Gruppe AND r.Path = m.MAX_PATH
    ORDER BY r.Start_ID;

Solution

  • From Oracle 12, you can use MATCH_RECOGNIZE for row-by-row pattern matching:

    SELECT *
    FROM   table_name
    MATCH_RECOGNIZE (
      PARTITION BY Gruppe
      ORDER     BY Ende_Zeitstempel DESC, Beginn_Zeitstempel DESC
      MEASURES
        FIRST(id) AS start_id,
        LAST(ID) AS last_id,
        COUNT(*) AS num_rows,
        FIRST(daten) AS daten,
        MIN(Beginn_Zeitstempel) AS Beginn_Zeitstempel,
        FIRST(Ende_Zeitstempel) AS Ende_Zeitstempel
      PATTERN (overlapping* non_overlapping)
      DEFINE
        overlapping AS MIN(Beginn_Zeitstempel) <= NEXT(Ende_Zeitstempel)
    );
    

    Which, for the sample data:

    CREATE TABLE table_name (id, daten, Beginn_Zeitstempel, Ende_Zeitstempel, Gruppe) AS
      Select  1, 'Datensatz1A', DATE '2023-05-01', DATE '2023-06-01', 1 FROM DUAL UNION ALL
      Select  2, 'Datensatz2A', DATE '2023-04-01', DATE '2023-05-15', 1 FROM DUAL UNION ALL
      Select  3, 'Datensatz3A', DATE '2023-03-01', DATE '2023-05-30', 1 FROM DUAL UNION ALL
      Select  4, 'Datensatz4A', DATE '2023-02-01', DATE '2023-05-29', 1 FROM DUAL UNION ALL
      Select  5, 'Datensatz5A', DATE '2023-05-14', DATE '2023-05-15', 2 FROM DUAL UNION ALL
      Select  6, 'Datensatz1B', DATE '2023-05-29', DATE '2023-05-30', 2 FROM DUAL UNION ALL
      Select  7, 'Datensatz2B', DATE '2023-05-01', DATE '2023-08-01', 3 FROM DUAL UNION ALL
      Select  8, 'Datensatz3B', DATE '2023-05-01', DATE '2023-09-01', 3 FROM DUAL UNION ALL
      Select  9, 'Datensatz4B', DATE '2023-05-01', DATE '2023-06-01', 3 FROM DUAL UNION ALL
      Select 10, 'Datensatz5B', DATE '2021-05-01', DATE '2022-06-01', 3 FROM DUAL
    

    Outputs:

    GRUPPE START_ID LAST_ID NUM_ROWS DATEN BEGINN_ZEITSTEMPEL ENDE_ZEITSTEMPEL
    1 1 2 4 Datensatz1A 2023-02-01 00:00:00 2023-06-01 00:00:00
    2 6 6 1 Datensatz1B 2023-05-29 00:00:00 2023-05-30 00:00:00
    2 5 5 1 Datensatz5A 2023-05-14 00:00:00 2023-05-15 00:00:00
    3 8 9 3 Datensatz3B 2023-05-01 00:00:00 2023-09-01 00:00:00
    3 10 10 1 Datensatz5B 2021-05-01 00:00:00 2022-06-01 00:00:00

    If you want all the rows with the summary data:

    SELECT m.*,
           COUNT(*) OVER (PARTITION BY gruppe, match_num) AS total_rows
    FROM   table_name
    MATCH_RECOGNIZE (
      PARTITION BY Gruppe
      ORDER     BY Ende_Zeitstempel DESC, Beginn_Zeitstempel DESC
      MEASURES
        FIRST(id) AS start_id,
        MATCH_NUMBER() AS match_num,
        LAST(ID) AS last_id,
        COUNT(*) AS match_index,
        FIRST(daten) AS first_daten,
        MIN(Beginn_Zeitstempel) AS min_Beginn_Zeitstempel,
        FIRST(Ende_Zeitstempel) AS first_Ende_Zeitstempel
      ALL ROWS PER MATCH
      PATTERN (overlapping* non_overlapping)
      DEFINE
        overlapping AS MIN(Beginn_Zeitstempel) <= NEXT(Ende_Zeitstempel)
    ) m
    

    Which outputs:

    GRUPPE ENDE_ZEITSTEMPEL BEGINN_ZEITSTEMPEL START_ID MATCH_NUM LAST_ID MATCH_INDEX FIRST_DATEN MIN_BEGINN_ZEITSTEMPEL FIRST_ENDE_ZEITSTEMPEL ID DATEN TOTAL_ROWS
    1 2023-06-01 00:00:00 2023-05-01 00:00:00 1 1 1 1 Datensatz1A 2023-05-01 00:00:00 2023-06-01 00:00:00 1 Datensatz1A 4
    1 2023-05-30 00:00:00 2023-03-01 00:00:00 1 1 3 2 Datensatz1A 2023-03-01 00:00:00 2023-06-01 00:00:00 3 Datensatz3A 4
    1 2023-05-29 00:00:00 2023-02-01 00:00:00 1 1 4 3 Datensatz1A 2023-02-01 00:00:00 2023-06-01 00:00:00 4 Datensatz4A 4
    1 2023-05-15 00:00:00 2023-04-01 00:00:00 1 1 2 4 Datensatz1A 2023-02-01 00:00:00 2023-06-01 00:00:00 2 Datensatz2A 4
    2 2023-05-30 00:00:00 2023-05-29 00:00:00 6 1 6 1 Datensatz1B 2023-05-29 00:00:00 2023-05-30 00:00:00 6 Datensatz1B 1
    2 2023-05-15 00:00:00 2023-05-14 00:00:00 5 2 5 1 Datensatz5A 2023-05-14 00:00:00 2023-05-15 00:00:00 5 Datensatz5A 1
    3 2023-09-01 00:00:00 2023-05-01 00:00:00 8 1 8 1 Datensatz3B 2023-05-01 00:00:00 2023-09-01 00:00:00 8 Datensatz3B 3
    3 2023-08-01 00:00:00 2023-05-01 00:00:00 8 1 7 2 Datensatz3B 2023-05-01 00:00:00 2023-09-01 00:00:00 7 Datensatz2B 3
    3 2023-06-01 00:00:00 2023-05-01 00:00:00 8 1 9 3 Datensatz3B 2023-05-01 00:00:00 2023-09-01 00:00:00 9 Datensatz4B 3
    3 2022-06-01 00:00:00 2021-05-01 00:00:00 10 2 10 1 Datensatz5B 2021-05-01 00:00:00 2022-06-01 00:00:00 10 Datensatz5B 1

    fiddle