Search code examples
sqlsql-servert-sqlsql-server-2012

SQL Hierarchy Fill Down


I have an entity table that list row per entity, the rows are ordered (using id column) to show an implied hierarchy. Note for row 'B2', 'C1' does not get filled down because that is different second level entity. The same with 'A3' last row.

I need to transform, Table_Entity (see sql definition below):

id Level_1 Level_2 Level_3 Level_4
0 1 A1 null null null
1 2 null B1 null null
2 3 null null C1 null
3 4 null B2 null null
4 5 A2 null null null
5 6 null B3 null null
6 7 null null C2 null
7 8 null null C3 null
8 9 null null null D1
9 10 A3 null null null

to, Table_Entity_OUTPUT (see SQL definition below):

id Level_1 Level_2 Level_3 Level_4
0 1 A1 null null null
1 2 A1 B1 null null
2 3 A1 B1 C1 null
3 4 A1 B2 null null
4 5 A2 null null null
5 6 A2 B3 null null
6 7 A2 B3 C2 null
7 8 A2 B3 C3 null
8 9 A2 B3 C3 D1
9 10 A3 null null null

SQL code for input and expected output tables

CREATE TABLE Table_Entity (
  id int,
  Level_1 Varchar(10),
  Level_2 Varchar(10),
  Level_3 Varchar(10),
  Level_4 Varchar(10)
);

INSERT INTO Table_Entity(id, Level_1, Level_2, Level_3, Level_4) VALUES (1,'A1', null, null, null);
INSERT INTO Table_Entity(id,Level_1, Level_2, Level_3, Level_4) VALUES (2, null, 'B1', null, null);
INSERT INTO Table_Entity(id, Level_1, Level_2, Level_3, Level_4) VALUES (3, null, null, 'C1', null);
INSERT INTO Table_Entity(id, Level_1, Level_2, Level_3, Level_4) VALUES (4, null, 'B2', null, null);
INSERT INTO Table_Entity(id, Level_1, Level_2, Level_3, Level_4) VALUES (5, 'A2', null, null, null);
INSERT INTO Table_Entity(id, Level_1, Level_2, Level_3, Level_4) VALUES (6, null, 'B3', null, null);
INSERT INTO Table_Entity(id, Level_1, Level_2, Level_3, Level_4) VALUES (7, null, null, 'C2', null);
INSERT INTO Table_Entity(id, Level_1, Level_2, Level_3, Level_4) VALUES (8, null, null, 'C3', null);
INSERT INTO Table_Entity(id, Level_1, Level_2, Level_3, Level_4) VALUES (9, null, null, null, 'D1');
INSERT INTO Table_Entity(id, Level_1, Level_2, Level_3, Level_4) VALUES (10, 'A3', null, null, null);

CREATE TABLE Table_Entity_OUTPUT (
  id int, 
  Level_1 Varchar(10),
  Level_2 Varchar(10),
  Level_3 Varchar(10),
  Level_4 Varchar(10)
);

INSERT INTO Table_Entity_OUTPUT(id, Level_1, Level_2, Level_3, Level_4) VALUES (1, 'A1', null, null, null);
INSERT INTO Table_Entity_OUTPUT(id, Level_1, Level_2, Level_3, Level_4) VALUES (2, 'A1', 'B1', null, null);
INSERT INTO Table_Entity_OUTPUT(id, Level_1, Level_2, Level_3, Level_4) VALUES (3, 'A1', 'B1', 'C1', null);
INSERT INTO Table_Entity_OUTPUT(id, Level_1, Level_2, Level_3, Level_4) VALUES (4, 'A1', 'B2', null, null);
INSERT INTO Table_Entity_OUTPUT(id, Level_1, Level_2, Level_3, Level_4) VALUES (5, 'A2', null, null, null);
INSERT INTO Table_Entity_OUTPUT(id, Level_1, Level_2, Level_3, Level_4) VALUES (6, 'A2', 'B3', null, null);
INSERT INTO Table_Entity_OUTPUT(id, Level_1, Level_2, Level_3, Level_4) VALUES (7, 'A2', 'B3', 'C2', null);
INSERT INTO Table_Entity_OUTPUT(id, Level_1, Level_2, Level_3, Level_4) VALUES (8, 'A2', 'B3', 'C3', null);
INSERT INTO Table_Entity_OUTPUT(id, Level_1, Level_2, Level_3, Level_4) VALUES (9, 'A2', 'B3', 'C3', 'D1');
INSERT INTO Table_Entity_OUTPUT(id, Level_1, Level_2, Level_3, Level_4) VALUES (10, 'A3', null, null, null);

Solution

  • In the absense of SKIP NULLS / IGNORE NULLS, my first thought is to create groups based on incremental changes in the relevant column AND its parent columns. Then use MAX(value) OVER (PARTITION BY group_id).

    WITH
      ids AS
    (
      SELECT
        *,
        COUNT(Level_1) OVER (ORDER BY id)   AS Level_1_id,
        COUNT(Level_2) OVER (ORDER BY id)   AS Level_2_id,
        COUNT(Level_3) OVER (ORDER BY id)   AS Level_3_id,
        COUNT(Level_4) OVER (ORDER BY id)   AS Level_4_id
      FROM
        Table_Entity
    )
    SELECT
      *,
      MAX(Level_1) OVER (PARTITION BY Level_1_id),
      MAX(Level_2) OVER (PARTITION BY Level_1_id, Level_2_id),
      MAX(Level_3) OVER (PARTITION BY Level_1_id, Level_2_id, Level_3_id),
      MAX(Level_4) OVER (PARTITION BY Level_1_id, Level_2_id, Level_3_id, Level_4_id)
    FROM
      ids
    
    
    id Level_1 Level_2 Level_3 Level_4 Level_1_id Level_2_id Level_3_id Level_4_id (No column name) (No column name) (No column name) (No column name)
    1 A1 null null null 1 0 0 0 A1 null null null
    2 null B1 null null 1 1 0 0 A1 B1 null null
    3 null null C1 null 1 1 1 0 A1 B1 C1 null
    4 null B2 null null 1 2 1 0 A1 B2 null null
    5 A2 null null null 2 2 1 0 A2 null null null
    6 null B3 null null 2 3 1 0 A2 B3 null null
    7 null null C2 null 2 3 2 0 A2 B3 C2 null
    8 null null C3 null 2 3 3 0 A2 B3 C3 null
    9 null null null D1 2 3 3 1 A2 B3 C3 D1
    10 A3 null null null 3 3 3 1 A3 null null null

    fiddle