Search code examples
viewdb2teradatadatabase-normalizationdenormalization

SQL: ?Create a VIEW from 3NF tables to look like a single row Repeating Group


We have a legacy table, containing repeating groups.

We want to swap to a normalised parent child, and then create a view that looks exactly like the old table. We can upgrade the system progressively with no impact.

I'm having some trouble getting my head around how to write the view efficiently - there's a lot of data behind it (100s of millions of rows). We can do it in DB2 and/or Teradata

Sample data

OLD_TABLE

 PID 
 CID_COUNT -- count of values used 
 CID_1
 CVALUE_1
 CID_2
 CVALUE_2

NEW_PARENT_TABLE

PID
CID_COUNT

NEW_CHILD_TABLE

PID
CID
CVALUE

Solution

  • Assuming you are using Db2 11.1.0.0 or above, or Db2 Warehouse, this code will give you the answer you want.

    DROP TABLE NEW_PARENT_TABLE;
    DROP TABLE NEW_CHILD_TABLE;
    
    CREATE  OR REPLACE VIEW OLD_TABLE (PID, NAME, CID_COUNT, CID_1, CVALUE_1,  CID_2, CVALUE_2)
    AS (VALUES 
        (123,'FRED',2,1   ,'Stilton',2  ,'Cheddar')
    ,   (124,'MARY',1,1   ,'Butter' ,null,null)
    ,   (125,'BOB', 0,null,null     ,null,null)
    )
    ;
    CREATE TABLE NEW_PARENT_TABLE (
          PID INT NOT NULL PRIMARY KEY
        , NAME CHAR(4) NOT NULl UNIQUE
        , CID_COUNT SMALLINT NOT NULL
        ) ORGANIZE BY ROW
    ;
    INSERT INTO NEW_PARENT_TABLE
    SELECT DISTINCT PID, NAME, CID_COUNT FROM OLD_TABLE
    ;
    CREATE TABLE NEW_CHILD_TABLE (
          PID INT NOT NULL REFERENCES NEW_PARENT_TABLE
        , CID INT NOT NULL
        , CVALUE VARCHAR(16) NOT NULL
        , PRIMARY KEY ( PID, CID )
        ) ORGANIZE BY ROW
    ;
    INSERT INTO NEW_CHILD_TABLE
    SELECT N.*
    FROM OLD_TABLE O
    ,   TABLE(VALUES (O.PID, O.CID_1, O.CVALUE_1)
                ,    (O.PID, O.CID_2, O.CVALUE_2)
                ) AS N (PID, CID, CVALUE )
    WHERE
        N.CID IS NOT NULL;
    
    CREATE OR REPLACE VIEW NEW_VIEW AS
    SELECT PID, CID_COUNT
    ,      MAX(CASE WHEN CID = 1 THEN CID    END) AS CID_1
    ,      MAX(CASE WHEN CID = 1 THEN CVALUE END) AS CVALUE_1
    ,      MAX(CASE WHEN CID = 2 THEN CID    END) AS CID_2
    ,      MAX(CASE WHEN CID = 2 THEN CVALUE END) AS CVALUE_2
     FROM NEW_PARENT_TABLE
     LEFT JOIN NEW_CHILD_TABLE USING (PID)
     GROUP BY PID, CID_COUNT
     ;
     SELECT * FROM NEW_VIEW
    
     PID CID_COUNT CID_1 CVALUE_1 CID_2 CVALUE_2
     --- --------- ----- -------- ----- --------
     123         2     1 Stilton      2 Cheddar
     124         1     1 Butter    NULL NULL
     125         0  NULL NULL      NULL NULL
    

    If on DB2 (for LUW) 10.5, use ON not USING in the NEW_VIEW and qualify the columns. If on DB2 10.1 or less, also remove the ORGANIZE BY ROW