I have the following data structure within a SQL database (ideally Vertica).
id_predecessor | id_successor |
---|---|
1 | 3 |
2 | 3 |
3 | 4 |
3 | 5 |
47 | 48 |
48 | 49 |
... | ... |
The table contains relationships between orders by that multiple orders could be connected to a group. For example there is one order group (A) with 5 orders total. The relationship is as follows:
And another order group (B) with a simple relationship:
1 — 2 - 3
... and I would like to get the following structure out of it:
net_id | id | level |
---|---|---|
A | 1 | 1 |
A | 2 | 1 |
A | 3 | 2 |
A | 4 | 3 |
A | 5 | 3 |
B | 47 | 1 |
B | 48 | 2 |
B | 49 | 3 |
... | ... | ... |
For each net that is connected I would like to assign a unique net id. The main problem is, that I can have multiple start nodes and also multiple end nodes in each net, so I do not have a clear starting point, to assign a net id. I do not care about the value format of the net id, it should just be a unique id for each net, to filter for specific nets.
Thank you, hope there is a easy solution.
I had to make it a multi-step process.
a) Create your input table
CREATE LOCAL TEMPORARY TABLE indata
ON COMMIT PRESERVE ROWS AS
WITH
indata(id_predecessor,id_successor) AS (
SELECT 1,3
UNION ALL SELECT 2,3
UNION ALL SELECT 3,4
UNION ALL SELECT 3,5
UNION ALL SELECT 47,48
UNION ALL SELECT 48,49
)
SELECT * FROM indata;
b) Create the three-node, two-edge, graphs with a recursive query.
-- 1. create 3-node graphs with a recursive query
CREATE LOCAL TEMPORARY TABLE graph
ON COMMIT PRESERVE ROWS AS
WITH RECURSIVE rec AS (
SELECT
id_predecessor
, id_successor
, id_successor AS id_middle
, (id_predecessor::VARCHAR(3)||'>'||id_successor::VARCHAR(3))::VARCHAR(32) AS graph
, 1 AS lvl
FROM indata
WHERE id_predecessor NOT IN (
SELECT id_successor FROM indata
)
UNION ALL
SELECT
c.id_predecessor
, c.id_successor
, p.id_middle
, (p.graph ||'>'||c.id_successor::VARCHAR(3))::VARCHAR(32) AS graph
, p.lvl + 1 AS lvl
FROM indata c
JOIN rec p ON c.id_predecessor = p.id_successor
)
SELECT
*
FROM rec
WHERE lvl = 2
ORDER BY id_middle;
--SELECT * FROM graph;
-- out id_predecessor | id_successor | id_middle | graph | lvl
-- out ----------------+--------------+-----------+----------+-----
-- out 3 | 4 | 3 | 1>3>4 | 2
-- out 3 | 4 | 3 | 2>3>4 | 2
-- out 3 | 5 | 3 | 1>3>5 | 2
-- out 3 | 5 | 3 | 2>3>5 | 2
-- out 48 | 49 | 48 | 47>48>49 | 2
Then,
graph
column built here, to get one row per node in the graph, using the EXPLODE()
functionEXPLODE()
query back to the distinct group identifiers, applying a DISTINCT to avoid doublets.WITH
ids_middle AS (
SELECT DISTINCT
id_middle
FROM graph
)
,
groupids(id_grp,id_middle) AS (
SELECT
CHR(ASCII('A')-1+ROW_NUMBER() OVER(ORDER BY id_middle))
, id_middle
FROM ids_middle
-- SELECT * FROM groupids;
-- out id_grp | id_middle
-- out --------+--------
-- out A | 3
-- out B | 48
)
,
vertical AS (
SELECT
EXPLODE(
id_middle
, lvl
, STRING_TO_ARRAY(graph USING PARAMETERS collection_delimiter='>')
) OVER(PARTITION BEST)
FROM graph
-- out id_middle | lvl | position | value
-- out -----------+-----+----------+-------
-- out 3 | 2 | 0 | 1
-- out 3 | 2 | 1 | 3
-- out 3 | 2 | 2 | 4
-- out 3 | 2 | 0 | 2
-- out 3 | 2 | 1 | 3
-- out 3 | 2 | 2 | 4
-- out 3 | 2 | 0 | 1
-- out 3 | 2 | 1 | 3
-- out 3 | 2 | 2 | 5
-- out 3 | 2 | 0 | 2
-- out 3 | 2 | 1 | 3
-- out 3 | 2 | 2 | 5
-- out 48 | 2 | 0 | 47
-- out 48 | 2 | 1 | 48
-- out 48 | 2 | 2 | 49
)
SELECT DISTINCT
id_grp
, value AS id
, position + 1 AS level
FROM vertical
JOIN groupids USING(id_middle)
ORDER BY id_grp;
-- out id_grp | id | level
-- out --------+----+-------
-- out A | 1 | 1
-- out A | 2 | 1
-- out A | 3 | 2
-- out A | 4 | 3
-- out A | 5 | 3
-- out B | 47 | 1
-- out B | 48 | 2
-- out B | 49 | 3