Search code examples
sqloracle-databasewith-statementsql-tuning

sql fine tuning


In a Table, I need to combine 2 records into one record, based on columns (in this case column 1.WORK_ORDER_NUM, 2.ESN 3.PLANT 4. REMD_PART_NUM 5. REMD_PART_SERIAL). If these 5 columns are equal then I have to use Aggregate function in other column to make it into one record.[In this case the columns are LLP_TRACKD_PART_IND,REMD_PART_TSN and REMD_PART_CSN].

This is what I've tried:

SELECT decode (PLANT ,'ECL','CELMA','EDS','CELMA',PLANT)PLANT,
 COUNT(*) RECORD_COUNT,
 COUNT(DISTINCT OFF.REMD_PART_NUM) REMD_PART_NUM_COUNT,
 COUNT(DISTINCT OFF.REMD_PART_SERIAL) REMD_PART_SER_NUM_COUNT,
  COUNT(DECODE(LLP_TRACKD_PART_IND,'LL',LLP_TRACKD_PART_IND,NULL)) LL_COUNT,
 COUNT(DECODE(LLP_TRACKD_PART_IND,'LR',LLP_TRACKD_PART_IND,NULL)) LR_COUNT,
 COUNT(DECODE(LLP_TRACKD_PART_IND,'TR',LLP_TRACKD_PART_IND,NULL)) TR_COUNT,
 SUM(OFF.REMD_PART_QTY) TOTAL_REMD_PART_QTY, 
  SUM(decode(LLP_TRACKD_PART_IND,null,0,
     CASE
       WHEN REGEXP_LIKE(REMD_PART_TSN, '^-?\d+(\.\d+)?$')
            THEN CAST(REMD_PART_TSN AS NUMBER)
        ELSE 0
    END
  )) TOTAL_TSN,
 SUM(decode(LLP_TRACKD_PART_IND,null,0,
    CASE
        WHEN REGEXP_LIKE(REMD_PART_CSN, '^-?\d+(\.\d+)?$')
            THEN CAST(REMD_PART_CSN AS NUMBER)
        ELSE 0
    END
 )) TOTAL_CSN
FROM (with t as ( SELECT distinct PLANT,
WORK_ORDER_NUM,ESN,REMD_PART_NUM,REMD_PART_SERIAL,REMD_PART_IIN,
LLP_TRACKD_PART_IND,
REMD_PART_QTY,REMD_PART_TSN,REMD_PART_CSN,REMD_PART_TSO,REMD_PART_CSO
,REMD_PART_TSC,REMD_PART_CSC,REMD_CYCLE_REMAIN          
FROM <TABLE1> 
WHERE 
REMD_PART_NUM is not null 
)
select DISTINCT PLANT,WORK_ORDER_NUM,ESN,REMD_PART_NUM,REMD_PART_SERIAL 
,REMD_PART_IIN
 ,(select max(LLP_TRACKD_PART_IND) from t bb  where aa.PLANT=bb.PLANT and 
aa.WORK_ORDER_NUM=bb.WORK_ORDER_NUM 
 and aa.ESN=bb.ESN 
  and aa.REMD_PART_NUM=bb.REMD_PART_NUM
  and aa.REMD_PART_SERIAL=bb.REMD_PART_SERIAL) LLP_TRACKD_PART_IND
 ,REMD_PART_QTY
 ,(select max(REMD_PART_TSN) from t bb  where aa.PLANT=bb.PLANT and 
aa.WORK_ORDER_NUM=bb.WORK_ORDER_NUM 
 and aa.ESN=bb.ESN 
 and aa.REMD_PART_NUM=bb.REMD_PART_NUM
 and aa.REMD_PART_SERIAL=bb.REMD_PART_SERIAL) REMD_PART_TSN
 ,
(select max(REMD_PART_CSN) from t bb  where aa.PLANT=bb.PLANT and             
aa.WORK_ORDER_NUM=bb.WORK_ORDER_NUM 
  and aa.ESN=bb.ESN 
  and aa.REMD_PART_NUM=bb.REMD_PART_NUM
  and aa.REMD_PART_SERIAL=bb.REMD_PART_SERIAL) REMD_PART_CSN
  from t aa) OFF
WHERE 
REMD_PART_NUM is not null
GROUP BY decode (PLANT ,'ECL','CELMA','EDS','CELMA',PLANT) 

And it takes around 8 hours to complete. Is there any other way to complete it faster. It took 8 hours to complete


Solution

  • Start with a better formatting of the query, it will allow you to easily understand the code and notice repeated patterns:

    SELECT decode (PLANT ,'ECL','CELMA','EDS','CELMA',PLANT)PLANT,
        COUNT(*) RECORD_COUNT,
        COUNT(DISTINCT OFF.REMD_PART_NUM) REMD_PART_NUM_COUNT,
        COUNT(DISTINCT OFF.REMD_PART_SERIAL) REMD_PART_SER_NUM_COUNT,
        COUNT(DECODE(LLP_TRACKD_PART_IND,'LL',LLP_TRACKD_PART_IND,NULL)) LL_COUNT,
        COUNT(DECODE(LLP_TRACKD_PART_IND,'LR',LLP_TRACKD_PART_IND,NULL)) LR_COUNT,
        COUNT(DECODE(LLP_TRACKD_PART_IND,'TR',LLP_TRACKD_PART_IND,NULL)) TR_COUNT,
        SUM(OFF.REMD_PART_QTY) TOTAL_REMD_PART_QTY, 
        SUM(decode(LLP_TRACKD_PART_IND,null,0,
                CASE
                    WHEN REGEXP_LIKE(REMD_PART_TSN, '^-?\d+(\.\d+)?$')
                    THEN CAST(REMD_PART_TSN AS NUMBER)
                    ELSE 0
                END
            )) TOTAL_TSN,
        SUM(decode(LLP_TRACKD_PART_IND,null,0,
                CASE
                    WHEN REGEXP_LIKE(REMD_PART_CSN, '^-?\d+(\.\d+)?$')
                    THEN CAST(REMD_PART_CSN AS NUMBER)
                    ELSE 0
                END
            )) TOTAL_CSN
    FROM (
        with t as ( 
            SELECT distinct PLANT, WORK_ORDER_NUM,ESN,REMD_PART_NUM,REMD_PART_SERIAL,REMD_PART_IIN,
                    LLP_TRACKD_PART_IND,
                    REMD_PART_QTY,REMD_PART_TSN,REMD_PART_CSN,REMD_PART_TSO,REMD_PART_CSO
                    ,REMD_PART_TSC,REMD_PART_CSC,REMD_CYCLE_REMAIN          
            FROM <TABLE1> 
            WHERE REMD_PART_NUM is not null 
        )
        select DISTINCT PLANT,WORK_ORDER_NUM,ESN,REMD_PART_NUM,REMD_PART_SERIAL 
                ,REMD_PART_IIN
                ,(  select max(LLP_TRACKD_PART_IND) 
                    from t bb  
                    where aa.PLANT=bb.PLANT 
                        and aa.WORK_ORDER_NUM=bb.WORK_ORDER_NUM 
                        and aa.ESN=bb.ESN 
                        and aa.REMD_PART_NUM=bb.REMD_PART_NUM
                        and aa.REMD_PART_SERIAL=bb.REMD_PART_SERIAL
                ) LLP_TRACKD_PART_IND
                ,REMD_PART_QTY
                ,(  select max(REMD_PART_TSN) from t bb  
                    where aa.PLANT=bb.PLANT 
                        and aa.WORK_ORDER_NUM=bb.WORK_ORDER_NUM 
                        and aa.ESN=bb.ESN 
                        and aa.REMD_PART_NUM=bb.REMD_PART_NUM
                        and aa.REMD_PART_SERIAL=bb.REMD_PART_SERIAL
                ) REMD_PART_TSN
                ,
                (   select max(REMD_PART_CSN) from t bb  
                    where aa.PLANT=bb.PLANT 
                        and aa.WORK_ORDER_NUM=bb.WORK_ORDER_NUM 
                        and aa.ESN=bb.ESN 
                        and aa.REMD_PART_NUM=bb.REMD_PART_NUM
                        and aa.REMD_PART_SERIAL=bb.REMD_PART_SERIAL
                ) REMD_PART_CSN
        from t aa
    ) OFF
    WHERE REMD_PART_NUM is not null
    GROUP BY decode (PLANT ,'ECL','CELMA','EDS','CELMA',PLANT)
    ;
    

    You will see that the below pattern is repeated 3 times (3 almost identical subqueries):

    select max( some_field ) 
    from t bb  
    where aa.PLANT=bb.PLANT 
      and aa.WORK_ORDER_NUM=bb.WORK_ORDER_NUM 
      and aa.ESN=bb.ESN 
      and aa.REMD_PART_NUM=bb.REMD_PART_NUM
      and aa.REMD_PART_SERIAL=bb.REMD_PART_SERIAL
    

    You will also easily see that there are 3 very expensive sort operations in this query - one DISTINCT in the innermost subquery, then another DISTINCT in another subquery, then finally GROUP BY operation (a kind of DISTINCT) in the top level query.


    Looking only on your query one can easily eliminate one sort (DISTINCT) using analytic functions in this way:

    SELECT * FROM (
        SELECT PLANT,WORK_ORDER_NUM,ESN,REMD_PART_NUM,REMD_PART_SERIAL,
               REMD_PART_IIN, REMD_PART_QTY,
               max( LLP_TRACKD_PART_IND ) over 
                  (partition by PLANT, WORK_ORDER_NUM, ESN, REMD_PART_NUM, REMD_PART_SERIAL) 
               as LLP_TRACKD_PART_IND,
               max( REMD_PART_TSN )       over 
                  (partition by PLANT, WORK_ORDER_NUM, ESN, REMD_PART_NUM, REMD_PART_SERIAL) 
               as REMD_PART_TSN,
               max( REMD_PART_CSN )       over 
                  (partition by PLANT, WORK_ORDER_NUM, ESN, REMD_PART_NUM, REMD_PART_SERIAL) 
               as REMD_PART_CSN,
               row_number() over 
                  (partition by PLANT, WORK_ORDER_NUM, ESN, REMD_PART_NUM, REMD_PART_SERIAL, REMD_PART_IIN, REMD_PART_QTY 
                       order by PLANT) as Rn
        FROM TABLE1
        WHERE REMD_PART_NUM is not null 
    )
    WHERE rn = 1
    

    so the final query migh be:

    SELECT decode (PLANT ,'ECL','CELMA','EDS','CELMA',PLANT)PLANT,
        COUNT(*) RECORD_COUNT,
        COUNT(DISTINCT OFF.REMD_PART_NUM) REMD_PART_NUM_COUNT,
        COUNT(DISTINCT OFF.REMD_PART_SERIAL) REMD_PART_SER_NUM_COUNT,
        COUNT(DECODE(LLP_TRACKD_PART_IND,'LL',LLP_TRACKD_PART_IND,NULL)) LL_COUNT,
        COUNT(DECODE(LLP_TRACKD_PART_IND,'LR',LLP_TRACKD_PART_IND,NULL)) LR_COUNT,
        COUNT(DECODE(LLP_TRACKD_PART_IND,'TR',LLP_TRACKD_PART_IND,NULL)) TR_COUNT,
        SUM(OFF.REMD_PART_QTY) TOTAL_REMD_PART_QTY, 
        SUM(decode(LLP_TRACKD_PART_IND,null,0,
                CASE
                    WHEN REGEXP_LIKE(REMD_PART_TSN, '^-?\d+(\.\d+)?$')
                    THEN CAST(REMD_PART_TSN AS NUMBER)
                    ELSE 0
                END
            )) TOTAL_TSN,
        SUM(decode(LLP_TRACKD_PART_IND,null,0,
                CASE
                    WHEN REGEXP_LIKE(REMD_PART_CSN, '^-?\d+(\.\d+)?$')
                    THEN CAST(REMD_PART_CSN AS NUMBER)
                    ELSE 0
                END
            )) TOTAL_CSN
    FROM (
        SELECT PLANT,WORK_ORDER_NUM,ESN,REMD_PART_NUM,REMD_PART_SERIAL,REMD_PART_IIN, REMD_PART_QTY,
               max( LLP_TRACKD_PART_IND ) over (partition by PLANT, WORK_ORDER_NUM, ESN, REMD_PART_NUM, REMD_PART_SERIAL) as LLP_TRACKD_PART_IND,
               max( REMD_PART_TSN )       over (partition by PLANT, WORK_ORDER_NUM, ESN, REMD_PART_NUM, REMD_PART_SERIAL) as REMD_PART_TSN,
               max( REMD_PART_CSN )       over (partition by PLANT, WORK_ORDER_NUM, ESN, REMD_PART_NUM, REMD_PART_SERIAL) as REMD_PART_CSN,
               row_number()               over (partition by PLANT, WORK_ORDER_NUM, ESN, REMD_PART_NUM, REMD_PART_SERIAL, REMD_PART_IIN, REMD_PART_QTY 
                                                order by PLANT) as Rn
        FROM TABLE1
        WHERE REMD_PART_NUM is not null 
    )
    WHERE rn = 1
    GROUP BY decode (PLANT ,'ECL','CELMA','EDS','CELMA',PLANT)
    

    I feel that this query could be further optimized, but it would require insight into the structure of the tables and a knowledge of business requirements.


    There are still some microoptimalizations possible, though.
    This pattern:

    SUM(decode(LLP_TRACKD_PART_IND,null,0,
            CASE
                WHEN REGEXP_LIKE(REMD_PART_TSN, '^-?\d+(\.\d+)?$')
                THEN CAST(REMD_PART_TSN AS NUMBER)
                ELSE 0
            END
        )) TOTAL_TSN,
    

    can be replaced with this one:

    coalesce( 
        SUM( 
            CASE
                WHEN REGEXP_LIKE(REMD_PART_TSN, '^-?\d+(\.\d+)?$')
                THEN CAST(REMD_PART_TSN AS NUMBER)
                ELSE 0
            END
        ), 0
    )
    

    since SUM ignores nulls, then checking for NULL value for each record is a waste. For a small number of records ( < 1 million) it does not matter, but for hundreds of millions of records you can achieve the effect of scale - say 0.05 ms for checking each record multiplied by 10.000.000 records can give 500 seconds.