I have an entity table that list row per entity, the rows are ordered (using id column) to show an implied hierarchy. Note for row 'B2', 'C1' does not get filled down because that is different second level entity. The same with 'A3' last row.
I need to transform, Table_Entity (see sql definition below):
id | Level_1 | Level_2 | Level_3 | Level_4 | |
---|---|---|---|---|---|
0 | 1 | A1 | null | null | null |
1 | 2 | null | B1 | null | null |
2 | 3 | null | null | C1 | null |
3 | 4 | null | B2 | null | null |
4 | 5 | A2 | null | null | null |
5 | 6 | null | B3 | null | null |
6 | 7 | null | null | C2 | null |
7 | 8 | null | null | C3 | null |
8 | 9 | null | null | null | D1 |
9 | 10 | A3 | null | null | null |
to, Table_Entity_OUTPUT (see SQL definition below):
id | Level_1 | Level_2 | Level_3 | Level_4 | |
---|---|---|---|---|---|
0 | 1 | A1 | null | null | null |
1 | 2 | A1 | B1 | null | null |
2 | 3 | A1 | B1 | C1 | null |
3 | 4 | A1 | B2 | null | null |
4 | 5 | A2 | null | null | null |
5 | 6 | A2 | B3 | null | null |
6 | 7 | A2 | B3 | C2 | null |
7 | 8 | A2 | B3 | C3 | null |
8 | 9 | A2 | B3 | C3 | D1 |
9 | 10 | A3 | null | null | null |
SQL code for input and expected output tables
CREATE TABLE Table_Entity (
id int,
Level_1 Varchar(10),
Level_2 Varchar(10),
Level_3 Varchar(10),
Level_4 Varchar(10)
);
INSERT INTO Table_Entity(id, Level_1, Level_2, Level_3, Level_4) VALUES (1,'A1', null, null, null);
INSERT INTO Table_Entity(id,Level_1, Level_2, Level_3, Level_4) VALUES (2, null, 'B1', null, null);
INSERT INTO Table_Entity(id, Level_1, Level_2, Level_3, Level_4) VALUES (3, null, null, 'C1', null);
INSERT INTO Table_Entity(id, Level_1, Level_2, Level_3, Level_4) VALUES (4, null, 'B2', null, null);
INSERT INTO Table_Entity(id, Level_1, Level_2, Level_3, Level_4) VALUES (5, 'A2', null, null, null);
INSERT INTO Table_Entity(id, Level_1, Level_2, Level_3, Level_4) VALUES (6, null, 'B3', null, null);
INSERT INTO Table_Entity(id, Level_1, Level_2, Level_3, Level_4) VALUES (7, null, null, 'C2', null);
INSERT INTO Table_Entity(id, Level_1, Level_2, Level_3, Level_4) VALUES (8, null, null, 'C3', null);
INSERT INTO Table_Entity(id, Level_1, Level_2, Level_3, Level_4) VALUES (9, null, null, null, 'D1');
INSERT INTO Table_Entity(id, Level_1, Level_2, Level_3, Level_4) VALUES (10, 'A3', null, null, null);
CREATE TABLE Table_Entity_OUTPUT (
id int,
Level_1 Varchar(10),
Level_2 Varchar(10),
Level_3 Varchar(10),
Level_4 Varchar(10)
);
INSERT INTO Table_Entity_OUTPUT(id, Level_1, Level_2, Level_3, Level_4) VALUES (1, 'A1', null, null, null);
INSERT INTO Table_Entity_OUTPUT(id, Level_1, Level_2, Level_3, Level_4) VALUES (2, 'A1', 'B1', null, null);
INSERT INTO Table_Entity_OUTPUT(id, Level_1, Level_2, Level_3, Level_4) VALUES (3, 'A1', 'B1', 'C1', null);
INSERT INTO Table_Entity_OUTPUT(id, Level_1, Level_2, Level_3, Level_4) VALUES (4, 'A1', 'B2', null, null);
INSERT INTO Table_Entity_OUTPUT(id, Level_1, Level_2, Level_3, Level_4) VALUES (5, 'A2', null, null, null);
INSERT INTO Table_Entity_OUTPUT(id, Level_1, Level_2, Level_3, Level_4) VALUES (6, 'A2', 'B3', null, null);
INSERT INTO Table_Entity_OUTPUT(id, Level_1, Level_2, Level_3, Level_4) VALUES (7, 'A2', 'B3', 'C2', null);
INSERT INTO Table_Entity_OUTPUT(id, Level_1, Level_2, Level_3, Level_4) VALUES (8, 'A2', 'B3', 'C3', null);
INSERT INTO Table_Entity_OUTPUT(id, Level_1, Level_2, Level_3, Level_4) VALUES (9, 'A2', 'B3', 'C3', 'D1');
INSERT INTO Table_Entity_OUTPUT(id, Level_1, Level_2, Level_3, Level_4) VALUES (10, 'A3', null, null, null);
In the absense of SKIP NULLS / IGNORE NULLS, my first thought is to create groups based on incremental changes in the relevant column AND its parent columns. Then use MAX(value) OVER (PARTITION BY group_id)
.
WITH
ids AS
(
SELECT
*,
COUNT(Level_1) OVER (ORDER BY id) AS Level_1_id,
COUNT(Level_2) OVER (ORDER BY id) AS Level_2_id,
COUNT(Level_3) OVER (ORDER BY id) AS Level_3_id,
COUNT(Level_4) OVER (ORDER BY id) AS Level_4_id
FROM
Table_Entity
)
SELECT
*,
MAX(Level_1) OVER (PARTITION BY Level_1_id),
MAX(Level_2) OVER (PARTITION BY Level_1_id, Level_2_id),
MAX(Level_3) OVER (PARTITION BY Level_1_id, Level_2_id, Level_3_id),
MAX(Level_4) OVER (PARTITION BY Level_1_id, Level_2_id, Level_3_id, Level_4_id)
FROM
ids
id | Level_1 | Level_2 | Level_3 | Level_4 | Level_1_id | Level_2_id | Level_3_id | Level_4_id | (No column name) | (No column name) | (No column name) | (No column name) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | A1 | null | null | null | 1 | 0 | 0 | 0 | A1 | null | null | null |
2 | null | B1 | null | null | 1 | 1 | 0 | 0 | A1 | B1 | null | null |
3 | null | null | C1 | null | 1 | 1 | 1 | 0 | A1 | B1 | C1 | null |
4 | null | B2 | null | null | 1 | 2 | 1 | 0 | A1 | B2 | null | null |
5 | A2 | null | null | null | 2 | 2 | 1 | 0 | A2 | null | null | null |
6 | null | B3 | null | null | 2 | 3 | 1 | 0 | A2 | B3 | null | null |
7 | null | null | C2 | null | 2 | 3 | 2 | 0 | A2 | B3 | C2 | null |
8 | null | null | C3 | null | 2 | 3 | 3 | 0 | A2 | B3 | C3 | null |
9 | null | null | null | D1 | 2 | 3 | 3 | 1 | A2 | B3 | C3 | D1 |
10 | A3 | null | null | null | 3 | 3 | 3 | 1 | A3 | null | null | null |