Search code examples
sqldb2pivotdynamic-pivot

dynamic pivot SQL Query in DB2


I have table like below in DB2 where all these 3 columns are VARCHAR data type

DATE_F    FILE_NM   ROW_ID  COLUMN_NM   COLUMN_VAL 
20200131  ABC.TXT     1      ABC_COL1     123
20200131  ABC.TXT     1      ABC_COL2     XYZ
20201231  ABC.TXT     2      ABC_COL1     456
20201231  ABC.TXT     2      ABC_COL2     XY1
20200630  XYZ.TXT     1      XYZ_COL1     PQR
20200630  XYZ.TXT     1      XYZ_COL2     567
20200630  XYZ.TXT     1      XYZ_COL3     MNO

I need a dynamic PIVOT query which should select values for ABC_COL1 and ABC_COL2 in row format when FILE_NM filter is used as ABC.TXT

The same query should select the values for XYZ_COL1, XYZ_COL2 and XYZ_COL3 in row format when FILE_NM filter is used as XYZ.TXT

output should be like below when FILE_NM=ABC.TXT

DATE_F    ROW_ID   ABC_COL1   ABC_COL2    
20200131    1        123       XYZ
20201231    2        456       XY1

output should be like below when FILE_NM=XYZ.TXT

 DATE_F    ROW_ID     XYZ_COL1     XYZ_COL2     XYZ_COL3
20200630    1           PQR         567           MNO

So, one one query will be able to extract the data. Can any function or equivalent can do the task in this case ?


Solution

  • You may try the following generic Stored Procedure doing pivoting.

    --#SET TERMINATOR @
    create or replace procedure pivot
    (
      in  sel_stmt varchar(4000)
    , in  row_cols varchar(200)
    , in  col_col  varchar(128)
    , in  agg_col  varchar(128)
    , in  agg_fn   varchar(10)
    , in  tmp_tbl  varchar(128)
    , in  null_ind varchar(10)
    , out rc       int
    , out msg      varchar(128)
    , out stmt     varchar(4000)
    )
    LANGUAGE SQL
    DYNAMIC RESULT sets 1
    BEGIN
     declare QUOT1        char(1) default '''';
     declare QUOT2        char(1) default '"';
     declare SQLCODE      int default 0;
     declare SQLTYPE_ID   int;
     declare SQLTYPE      varchar(128);
     declare SQLLENGTH    int;
     declare SQLSCALE     int;
     declare SQLNAME_DATA varchar(128);
     declare SQLTYPEF     varchar(128);
     declare col_val      varchar(4000);
     declare apo          varchar(1);
     declare l1 RESULT_set_LOCATOR VARYING;
    
     declare c2 cursor for s2;
     declare c_out cursor with return for s_out;
    
     declare EXIT HANDLER FOR SQLEXCEPTION
     BEGIN
       GET DIAGNOSTICS EXCEPTION 1 MSG = MESSAGE_TEXT;
       set RC = SQLCODE;
     END;
    
     set col_col=upper(col_col);
     set agg_col=upper(agg_col);
     -- insert result of select statement into temp table
     set stmt = 'describe '||sel_stmt;
     call SYSPROC.ADMIN_CMD(stmt);
     set stmt = '';
     associate result set locator (l1)
     with procedure SYSPROC.ADMIN_CMD;
     allocate c1 cursor for result set l1;
     --open c1;
     fetch c1 into SQLTYPE_ID, SQLTYPE, SQLLENGTH, SQLSCALE, SQLNAME_DATA;
     while (SQLCODE!=100) do
       set SQLTYPEF = SQLTYPE
       ||case 
          when SQLTYPE IN ('DECIMAL', 'DECFLOAT', 'CHARACTER', 'VARCHAR') then 
             '('||RTRIM(CHAR(SQLLENGTH))
           ||case when SQLTYPE='DECIMAL' then ','||RTRIM(CHAR(SQLSCALE)) else '' end
           ||')'                                
          else ''
         end;
       if (col_col=SQLNAME_DATA) then
         set apo = 
          case 
           when SQLTYPE in ('DECIMAL', 'DECFLOAT', 'INTEGER', 'SMALLINT', 'BIGINT', 'REAL', 'DOUBLE') then '' 
           else QUOT1 
          end;
       end if;
       set stmt = stmt||', '||SQLNAME_DATA||' '||SQLTYPEF;
       fetch c1 into SQLTYPE_ID, SQLTYPE, SQLLENGTH, SQLSCALE, SQLNAME_DATA;
     end while;
     close c1;
     set stmt = 
       'declare global temporary table '||tmp_tbl||'('||substr(stmt, 3)
     ||') with replace on commit preserve rows not logged';
     execute immediate stmt;
     set stmt = 'insert into '||tmp_tbl||' '||sel_stmt;
     execute immediate stmt;
    
     -- construct select statement
     set stmt = 'select distinct rtrim(char('||col_col||')) from '||tmp_tbl||' order by 1';
     prepare s2 from stmt;
     set stmt='';
     open c2;
     fetch c2 into col_val;
     while (SQLCODE!=100) do
       set stmt = 
         stmt||', '||agg_fn||'('
       ||'case when '||col_col||' '
       ||case when col_val is null then 'IS NULL' else ('='||apo||replace(col_val, QUOT1, QUOT1||QUOT1)||apo) end
       ||' then '||agg_col||' end) as '||QUOT2||coalesce(replace(col_val, QUOT2, QUOT2||QUOT2), null_ind)||QUOT2;
       fetch c2 into col_val;
     end while;
     close c2;
     -- add to the select statement groups
     set row_cols = nullif(row_cols, '');
     set stmt = 
       'select '||case when row_cols is not null then row_cols||',' else coalesce(row_cols, '') end
     ||substr(stmt, 2)||' from '||tmp_tbl||' '
     ||case when row_cols is not null then ('group by '||row_cols||' order by '||row_cols) else '' end;
     -- execute this statement
     prepare s_out from stmt;
     open c_out;
    END@
    

    Parameter description:

    PARM DESC
    sel_stmt Any valid SELECT statement for source data generation
    row_cols Comma separated list of column names used in the GROUP BY of final statement
    col_col A column name to pivot
    agg_col A column name to aggregate
    agg_fn Any valid Db2 aggregation function for the column name in "agg_col" parameter
    tmp_tbl DGTT name for intermediate result
    null_ind Null indicator
    rc Return code (OUT)
    msg Message text (OUT)
    stmt The final SELECT generated (OUT)

    Brief description:
    DGTT passed in tmp_tbl is created and populated with the result of a SELECT statement passed in sel_stmt. All the column names specified in row_cols, col_col, agg_col must be in the SELECT list of this statement.
    The final SELECT statement on this DGTT is dynamically generated using the following rules:

    • for each distinct value Vx in the column name passed in col_col an additional column expression is generated like: , <agg_fn> (case when <col_col> = Vx then <agg_col> end) as "Vx" (, <agg_fn> (case when <col_col> IS NULL then <agg_col> end) as "<null_ind>" is used for Vx IS NULL), where <parameter> means string value passed with this parameter.
    • row_cols column list is used in the GROUP BY, if it's not NULL.

    Use for your case:

    DECLARE GLOBAL TEMPORARY TABLE SESSION.MYTAB 
    (DATE_F, FILE_NM, ROW_ID, COLUMN_NM, COLUMN_VAL)
    AS
    (
    VALUES
      ('20200131', 'ABC.TXT', 1, 'ABC_COL1', '123')
    , ('20200131', 'ABC.TXT', 1, 'ABC_COL2', 'XYZ')
    , ('20201231', 'ABC.TXT', 2, 'ABC_COL1', '456')
    , ('20201231', 'ABC.TXT', 2, 'ABC_COL2', 'XY1')
    , ('20200630', 'XYZ.TXT', 1, 'XYZ_COL1', 'PQR')
    , ('20200630', 'XYZ.TXT', 1, 'XYZ_COL2', '567')
    , ('20200630', 'XYZ.TXT', 1, 'XYZ_COL3', 'MNO')
    ) WITH DATA WITH REPLACE ON COMMIT PRESERVE ROWS NOT LOGGED;
    

    1-st call:

    call pivot
    (
      'select * from session.mytab where file_nm = ''ABC.TXT'''
    , 'DATE_F, ROW_ID'
    , 'COLUMN_NM'
    , 'COLUMN_VAL'
    , 'max'
    , 'session.tab1'
    , '-'
    , ?, ?, ?
    );
    
    DATE_F ROW_ID ABC_COL1 ABC_COL2
    20200131 1 123 XYZ
    20201231 2 456 XY1

    2-nd call:

    call pivot
    (
      'select * from session.mytab where file_nm = ''XYZ.TXT'''
    , 'DATE_F, ROW_ID'
    , 'COLUMN_NM'
    , 'COLUMN_VAL'
    , 'max'
    , 'session.tab1'
    , '-'
    , ?, ?, ?
    );
    
    DATE_F ROW_ID XYZ_COL1 XYZ_COL2 XYZ_COL3
    20200630 1 PQR 567 MNO