Search code examples
sqldatabasetreevertica

How to get groups from predecessor and successor table with SQL Server or Vertica SQL


I have the following data structure within a SQL database (ideally Vertica).

id_predecessor id_successor
1 3
2 3
3 4
3 5
47 48
48 49
... ...

The table contains relationships between orders by that multiple orders could be connected to a group. For example there is one order group (A) with 5 orders total. The relationship is as follows:

example

And another order group (B) with a simple relationship:

1 — 2 - 3

... and I would like to get the following structure out of it:

net_id id level
A 1 1
A 2 1
A 3 2
A 4 3
A 5 3
B 47 1
B 48 2
B 49 3
... ... ...

For each net that is connected I would like to assign a unique net id. The main problem is, that I can have multiple start nodes and also multiple end nodes in each net, so I do not have a clear starting point, to assign a net id. I do not care about the value format of the net id, it should just be a unique id for each net, to filter for specific nets.

Thank you, hope there is a easy solution.


Solution

  • I had to make it a multi-step process.

    a) Create your input table

    CREATE LOCAL TEMPORARY TABLE indata
    ON COMMIT PRESERVE ROWS AS
    WITH
    indata(id_predecessor,id_successor) AS (
              SELECT 1,3 
    UNION ALL SELECT 2,3 
    UNION ALL SELECT 3,4 
    UNION ALL SELECT 3,5 
    UNION ALL SELECT 47,48
    UNION ALL SELECT 48,49
    )
    SELECT * FROM indata;                                                                                                                                                                                          
    
    

    b) Create the three-node, two-edge, graphs with a recursive query.

    -- 1. create 3-node graphs with a recursive query
    CREATE LOCAL TEMPORARY TABLE graph
    ON COMMIT PRESERVE ROWS AS
    WITH RECURSIVE rec AS (
      SELECT
        id_predecessor
      , id_successor
      , id_successor AS id_middle
      , (id_predecessor::VARCHAR(3)||'>'||id_successor::VARCHAR(3))::VARCHAR(32) AS graph
      , 1 AS lvl
      FROM indata
      WHERE id_predecessor NOT IN (
        SELECT id_successor FROM indata
      )
      UNION ALL
      SELECT
        c.id_predecessor
      , c.id_successor
      , p.id_middle
      , (p.graph ||'>'||c.id_successor::VARCHAR(3))::VARCHAR(32) AS graph
      , p.lvl + 1 AS lvl
      FROM indata c
      JOIN rec    p ON c.id_predecessor = p.id_successor
    )
    SELECT
      *
    FROM rec
    WHERE lvl = 2
    ORDER BY id_middle;
    
    --SELECT * FROM graph;
    -- out  id_predecessor | id_successor | id_middle |  graph   | lvl 
    -- out ----------------+--------------+-----------+----------+-----
    -- out               3 |            4 |         3 | 1>3>4    |   2
    -- out               3 |            4 |         3 | 2>3>4    |   2
    -- out               3 |            5 |         3 | 1>3>5    |   2
    -- out               3 |            5 |         3 | 2>3>5    |   2
    -- out              48 |           49 |        48 | 47>48>49 |   2
    

    Then,

    • get the distinct middle ID-s from the query above
    • using the sort order of the middle-IDs, generate 'A' and 'B' in a lookup table
    • you can use the graph column built here, to get one row per node in the graph, using the EXPLODE() function
    • and join that EXPLODE() query back to the distinct group identifiers, applying a DISTINCT to avoid doublets.
    WITH
    ids_middle AS (
        SELECT DISTINCT
          id_middle
        FROM graph
    )
    ,
    groupids(id_grp,id_middle) AS (
      SELECT
        CHR(ASCII('A')-1+ROW_NUMBER() OVER(ORDER BY id_middle))
      , id_middle
      FROM  ids_middle
      -- SELECT * FROM groupids;
      -- out  id_grp | id_middle
      -- out --------+--------
      -- out  A      |      3
      -- out  B      |     48
    )
    ,
    vertical AS (
      SELECT
        EXPLODE(
          id_middle
        , lvl
        , STRING_TO_ARRAY(graph USING PARAMETERS collection_delimiter='>')
        ) OVER(PARTITION BEST)
      FROM graph
        -- out  id_middle | lvl | position | value 
        -- out -----------+-----+----------+-------
        -- out          3 |   2 |        0 | 1
        -- out          3 |   2 |        1 | 3
        -- out          3 |   2 |        2 | 4
        -- out          3 |   2 |        0 | 2
        -- out          3 |   2 |        1 | 3
        -- out          3 |   2 |        2 | 4
        -- out          3 |   2 |        0 | 1
        -- out          3 |   2 |        1 | 3
        -- out          3 |   2 |        2 | 5
        -- out          3 |   2 |        0 | 2
        -- out          3 |   2 |        1 | 3
        -- out          3 |   2 |        2 | 5
        -- out         48 |   2 |        0 | 47
        -- out         48 |   2 |        1 | 48
        -- out         48 |   2 |        2 | 49
    )
    SELECT DISTINCT
      id_grp
    , value        AS id
    , position + 1 AS level
    FROM vertical
    JOIN groupids USING(id_middle)
    ORDER BY id_grp;
    -- out  id_grp | id | level 
    -- out --------+----+-------
    -- out  A      | 1  |     1
    -- out  A      | 2  |     1
    -- out  A      | 3  |     2
    -- out  A      | 4  |     3
    -- out  A      | 5  |     3
    -- out  B      | 47 |     1
    -- out  B      | 48 |     2
    -- out  B      | 49 |     3