Search code examples
oracle-databasemergerows

Combine two rows in one row using CASE in Oracle


enter image description here

Here is my table, I want to merge the two rows of NUM_FILES and Total_size as "TABLE_NAME","TBL_ID","PART_ID","TABLE_TYPE","TABLE_LOCATION","TABLE_OWNER","DATABASE_NAME","NUM_FILES","TOTAL_SIZE" products_partitioned,2,2,EXTERNAL_TABLE,hdfs://sandbox-hdp.hortonworks.com:8020/HIVE_ROVER_IT/bikestores/products,hive,rovertesting,"3",4563

my full query is:

SELECT DISTINCT tbl.tbl_name TABLE_NAME, tbl.TBL_ID TBL_ID, pp.PART_ID,
                tbl.tbl_type TABLE_TYPE,
                sds.location TABLE_LOCATION,
                tbl.OWNER TABLE_OWNER,
                --tbl.LAST_ACCESS_TIME ASSET_DATE_LAST_MODIFIED,
                dbs.name DATABASE_NAME,
                CASE pp.PARAM_KEY 
                    WHEN 'numFiles' THEN pp.PARAM_VALUE
                END AS NUM_FILES,   
                CASE pp.PARAM_KEY 
                    WHEN 'totalSize' THEN pp.PARAM_VALUE
                END AS TOTAL_SIZE
              FROM TBLS tbl
              INNER JOIN SDS ON tbl.tbl_id = sds.cd_id
              INNER JOIN DBS ON dbs.db_id = tbl.db_id
              LEFT JOIN PARTITIONS ON tbl.TBL_ID = PARTITIONS.TBL_ID 
              INNER JOIN PARTITION_PARAMS pp ON pp.PART_ID = PARTITIONS.PART_ID
              WHERE pp.PARAM_KEY IN ('totalSize', 'numFiles') AND tbl.tbl_type IN ('MANAGED_TABLE','EXTERNAL_TABLE')
              GROUP BY (tbl.tbl_name, tbl.TBL_ID, pp.PART_ID, tbl.tbl_type, sds.location, tbl.OWNER, dbs.name, pp.PARAM_KEY, pp.PARAM_VALUE)
              ORDER BY TBL_ID, PART_ID ; 
             

Solution

  • From my point of view, you should sum those values up and then group by the rest of non-aggregated columns:

      select tbl.tbl_name    table_name,
             tbl.tbl_id      tbl_id,
             pp.part_id,
             tbl.tbl_type    table_type,
             sds.location    table_location,
             tbl.owner       table_owner,
             dbs.name        database_name,
             --
             sum (case pp.param_key when 'numFiles' then pp.param_value end)  num_files,
             --
             sum (case pp.param_key when 'totalSize' then pp.param_value end) total_size
        from tbls tbl
             inner join sds on tbl.tbl_id = sds.cd_id
             inner join dbs on dbs.db_id = tbl.db_id
             left join partitions on tbl.tbl_id = partitions.tbl_id
             inner join partition_params pp on pp.part_id = partitions.part_id
       where     pp.param_key in ('totalSize', 'numFiles')
             and tbl.tbl_type in ('MANAGED_TABLE', 'EXTERNAL_TABLE')
    group by tbl.tbl_name,
             tbl.tbl_id,
             pp.part_id,
             tbl.tbl_type,
             sds.location,
             tbl.owner,
             dbs.name