Search code examples
sqlhivequery-optimizationhiveqlapache-tez

An exception thrown out while running an HQL in hive


I want to run a select statement and put the result into a table, I'm sure its not syntax error.

HQL:

INSERT overwrite table datalake_rci.MID_DealerVehicleOutputValue
--MIDDealerVehicleOutputValueID int,
select
b.MIDDealerVehicleOutputValueID
,b.DealerID --string,
,b.CHASSIS --string,
,b.DIMDealerID --bigint,
,b.DIMVehicleID --bigint,

,case when a.DIMDealerID is not null
      then 1 else b.DIMOutputValueID
end DIMOutputValueID  --int     

,b.OutputValueName --string,
,b.OutputValueName_CN --string,
,b.OutputValueCode --varchar(50),
,b.OutputValueOrder --int
from datalake_rci.MID_DealerVehicleOutputValue b
left outer join
(
    select w.low,w.DIMDealerID, w.DIMVehicleID,w.OutputValueOrder,w.row_num from (
        select z.low,z.DIMDealerID, z.DIMVehicleID, z.OutputValueOrder,
               row_number() over(partition by z.DIMDealerID order by z.OutputValueOrder desc) row_num
        from
        (
            select t1.low,y.DIMDealerID, y.DIMVehicleID, y.OutputValueOrder
           from
            (
                select b.DIMDealerID, b.cnt*l.Rate low
                from
                    (select DIMDealerID, count(*) cnt
                        from datalake_rci.MID_DealerVehicleOutputValue
                        group by DIMDealerID) b
                    cross join
                        (select Rate from datalake_rci.DIM_OutputValue where OutputValueCode = 'Low') l
            ) t1
            inner join
            (select DIMDealerID, DIMVehicleID, OutputValueOrder
                from datalake_rci.MID_DealerVehicleOutputValue) y
            on t1.DIMDealerID = y.DIMDealerID
        ) z
    ) w
    where w.row_num <= w.low
) a on b.DIMDealerID = a.DIMDealerID;

and then I got below output:

--------------------------------------------------------------------------------
        VERTICES      STATUS  TOTAL  COMPLETED  RUNNING  PENDING  FAILED  KILLED
--------------------------------------------------------------------------------
Map 1 .........      RUNNING    207        200        0        7       0       0
Map 6 ..........   SUCCEEDED      1          1        0        0       0       0
Map 7 .........      RUNNING    207        198        0        9       0       0
Map 8 .........      RUNNING    207        201        0        6       0       0
Reducer 2 .....      RUNNING     40         37        0        3       0       0
Reducer 3 .....      RUNNING     98         94        0        4       0       0
Reducer 4 .....      RUNNING     44         41        0        3       0       0
Reducer 5 ...        RUNNING    746        376        0      370      35     233
--------------------------------------------------------------------------------
VERTICES: 01/08  [===================>>-------] 74%   ELAPSED TIME: 5795.98 s  
--------------------------------------------------------------------------------
ERROR : Status: Failed
ERROR : Vertex re-running, vertexName=Map 8, vertexId=vertex_1549678950511_24672_1_00
ERROR : Vertex re-running, vertexName=Map 1, vertexId=vertex_1549678950511_24672_1_03
ERROR : Vertex re-running, vertexName=Map 7, vertexId=vertex_1549678950511_24672_1_01
ERROR : Vertex re-running, vertexName=Reducer 3, vertexId=vertex_1549678950511_24672_1_05
ERROR : Vertex re-running, vertexName=Reducer 4, vertexId=vertex_1549678950511_24672_1_06
ERROR : Vertex re-running, vertexName=Reducer 2, vertexId=vertex_1549678950511_24672_1_04
ERROR : Vertex re-running, vertexName=Map 8, vertexId=vertex_1549678950511_24672_1_00
ERROR : Vertex re-running, vertexName=Map 7, vertexId=vertex_1549678950511_24672_1_01
ERROR : Vertex re-running, vertexName=Map 1, vertexId=vertex_1549678950511_24672_1_03
ERROR : Vertex re-running, vertexName=Reducer 2, vertexId=vertex_1549678950511_24672_1_04
ERROR : Vertex re-running, vertexName=Reducer 3, vertexId=vertex_1549678950511_24672_1_05
ERROR : Vertex re-running, vertexName=Reducer 4, vertexId=vertex_1549678950511_24672_1_06
ERROR : Vertex re-running, vertexName=Map 8, vertexId=vertex_1549678950511_24672_1_00
ERROR : Vertex re-running, vertexName=Map 1, vertexId=vertex_1549678950511_24672_1_03
ERROR : Vertex re-running, vertexName=Reducer 3, vertexId=vertex_1549678950511_24672_1_05
ERROR : Vertex re-running, vertexName=Map 7, vertexId=vertex_1549678950511_24672_1_01
ERROR : Vertex re-running, vertexName=Reducer 4, vertexId=vertex_1549678950511_24672_1_06
ERROR : Vertex re-running, vertexName=Reducer 2, vertexId=vertex_1549678950511_24672_1_04
ERROR : Vertex re-running, vertexName=Map 8, vertexId=vertex_1549678950511_24672_1_00
ERROR : Vertex re-running, vertexName=Map 1, vertexId=vertex_1549678950511_24672_1_03
ERROR : Vertex re-running, vertexName=Map 7, vertexId=vertex_1549678950511_24672_1_01
ERROR : Vertex re-running, vertexName=Reducer 2, vertexId=vertex_1549678950511_24672_1_04
ERROR : Vertex re-running, vertexName=Reducer 3, vertexId=vertex_1549678950511_24672_1_05
ERROR : Vertex re-running, vertexName=Reducer 4, vertexId=vertex_1549678950511_24672_1_06
ERROR : Vertex re-running, vertexName=Map 8, vertexId=vertex_1549678950511_24672_1_00
ERROR : Vertex re-running, vertexName=Map 1, vertexId=vertex_1549678950511_24672_1_03
ERROR : Vertex re-running, vertexName=Map 7, vertexId=vertex_1549678950511_24672_1_01
ERROR : Vertex re-running, vertexName=Reducer 2, vertexId=vertex_1549678950511_24672_1_04
ERROR : Vertex re-running, vertexName=Reducer 3, vertexId=vertex_1549678950511_24672_1_05
ERROR : Vertex re-running, vertexName=Reducer 4, vertexId=vertex_1549678950511_24672_1_06
ERROR : Vertex re-running, vertexName=Map 8, vertexId=vertex_1549678950511_24672_1_00
ERROR : Vertex re-running, vertexName=Map 7, vertexId=vertex_1549678950511_24672_1_01
ERROR : Vertex re-running, vertexName=Reducer 2, vertexId=vertex_1549678950511_24672_1_04
ERROR : Vertex re-running, vertexName=Reducer 3, vertexId=vertex_1549678950511_24672_1_05
ERROR : Vertex re-running, vertexName=Reducer 4, vertexId=vertex_1549678950511_24672_1_06
ERROR : Vertex re-running, vertexName=Map 8, vertexId=vertex_1549678950511_24672_1_00
ERROR : Vertex re-running, vertexName=Reducer 3, vertexId=vertex_1549678950511_24672_1_05
ERROR : Vertex re-running, vertexName=Map 1, vertexId=vertex_1549678950511_24672_1_03
ERROR : Vertex re-running, vertexName=Map 7, vertexId=vertex_1549678950511_24672_1_01
ERROR : Vertex re-running, vertexName=Reducer 4, vertexId=vertex_1549678950511_24672_1_06
ERROR : Vertex re-running, vertexName=Reducer 2, vertexId=vertex_1549678950511_24672_1_04
ERROR : Vertex re-running, vertexName=Map 8, vertexId=vertex_1549678950511_24672_1_00
ERROR : Vertex re-running, vertexName=Map 1, vertexId=vertex_1549678950511_24672_1_03
ERROR : Vertex re-running, vertexName=Map 8, vertexId=vertex_1549678950511_24672_1_00
ERROR : Vertex re-running, vertexName=Reducer 2, vertexId=vertex_1549678950511_24672_1_04
ERROR : Vertex re-running, vertexName=Reducer 3, vertexId=vertex_1549678950511_24672_1_05
ERROR : Vertex re-running, vertexName=Map 7, vertexId=vertex_1549678950511_24672_1_01
ERROR : Vertex re-running, vertexName=Map 8, vertexId=vertex_1549678950511_24672_1_00
ERROR : Vertex re-running, vertexName=Map 1, vertexId=vertex_1549678950511_24672_1_03
ERROR : Vertex re-running, vertexName=Map 7, vertexId=vertex_1549678950511_24672_1_01
ERROR : Vertex re-running, vertexName=Reducer 2, vertexId=vertex_1549678950511_24672_1_04
ERROR : Vertex re-running, vertexName=Reducer 3, vertexId=vertex_1549678950511_24672_1_05
ERROR : Vertex re-running, vertexName=Map 8, vertexId=vertex_1549678950511_24672_1_00
ERROR : Vertex re-running, vertexName=Reducer 2, vertexId=vertex_1549678950511_24672_1_04
ERROR : Vertex re-running, vertexName=Reducer 3, vertexId=vertex_1549678950511_24672_1_05
ERROR : Vertex re-running, vertexName=Reducer 4, vertexId=vertex_1549678950511_24672_1_06
ERROR : Vertex failed, vertexName=Reducer 5, vertexId=vertex_1549678950511_24672_1_07, diagnostics=[Task failed, taskId=task_1549678950511_24672_1_07_000375, diagnostics=[TaskAttempt 0 failed, info=[Container container_1549678950511_24672_01_000184 finished with diagnostics set to [Container failed, exitCode=-100. Container released on a *lost* node]], TaskAttempt 1 failed, info=[Error: exceptionThrown=org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle$ShuffleError: error in shuffle in fetcher {Reducer_4} #2
              at org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle$RunShuffleCallable.callInternal(Shuffle.java:382)
              at org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle$RunShuffleCallable.callInternal(Shuffle.java:334)
              at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
              at java.util.concurrent.FutureTask.run(FutureTask.java:266)
              at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
              at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
              at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.NullPointerException
              at sun.net.www.protocol.http.HttpURLConnection.getInputStream0(HttpURLConnection.java:1601)
              at sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1492)
              at org.apache.tez.runtime.library.common.shuffle.HttpConnection.getInputStream(HttpConnection.java:253)
              at org.apache.tez.runtime.library.common.shuffle.orderedgrouped.FetcherOrderedGrouped.setupConnection(FetcherOrderedGrouped.java:356)
              at org.apache.tez.runtime.library.common.shuffle.orderedgrouped.FetcherOrderedGrouped.copyFromHost(FetcherOrderedGrouped.java:264)
              at org.apache.tez.runtime.library.common.shuffle.orderedgrouped.FetcherOrderedGrouped.fetchNext(FetcherOrderedGrouped.java:176)
              at org.apache.tez.runtime.library.common.shuffle.orderedgrouped.FetcherOrderedGrouped.run(FetcherOrderedGrouped.java:191)
, errorMessage=Shuffle Runner Failed:org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle$ShuffleError: error in shuffle in fetcher {Reducer_4} #2
              at org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle$RunShuffleCallable.callInternal(Shuffle.java:382)
              at org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle$RunShuffleCallable.callInternal(Shuffle.java:334)
              at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
              at java.util.concurrent.FutureTask.run(FutureTask.java:266)
              at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
              at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
              at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.NullPointerException
              at sun.net.www.protocol.http.HttpURLConnection.getInputStream0(HttpURLConnection.java:1601)
              at sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1492)
              at org.apache.tez.runtime.library.common.shuffle.HttpConnection.getInputStream(HttpConnection.java:253)
              at org.apache.tez.runtime.library.common.shuffle.orderedgrouped.FetcherOrderedGrouped.setupConnection(FetcherOrderedGrouped.java:356)
              at org.apache.tez.runtime.library.common.shuffle.orderedgrouped.FetcherOrderedGrouped.copyFromHost(FetcherOrderedGrouped.java:264)
              at org.apache.tez.runtime.library.common.shuffle.orderedgrouped.FetcherOrderedGrouped.fetchNext(FetcherOrderedGrouped.java:176)
              at org.apache.tez.runtime.library.common.shuffle.orderedgrouped.FetcherOrderedGrouped.run(FetcherOrderedGrouped.java:191)
], TaskAttempt 2 failed, info=[Container container_1549678950511_24672_01_000292 finished with diagnostics set to [Container failed, exitCode=-100. Container released on a *lost* node]], TaskAttempt 3 failed, info=[Container container_1549678950511_24672_01_000312 finished with diagnostics set to [Container failed, exitCode=-100. Container released on a *lost* node]]], Vertex did not succeed due to OWN_TASK_FAILURE, failedTasks:1 killedTasks:369, Vertex vertex_1549678950511_24672_1_07 [Reducer 5] killed/failed due to:OWN_TASK_FAILURE]
ERROR : Vertex killed, vertexName=Reducer 4, vertexId=vertex_1549678950511_24672_1_06, diagnostics=[Vertex received Kill while in RUNNING state., Vertex did not succeed due to OTHER_VERTEX_FAILURE, failedTasks:0 killedTasks:3, Vertex vertex_1549678950511_24672_1_06 [Reducer 4] killed/failed due to:OTHER_VERTEX_FAILURE]
ERROR : Vertex killed, vertexName=Reducer 3, vertexId=vertex_1549678950511_24672_1_05, diagnostics=[Vertex received Kill while in RUNNING state., Vertex did not succeed due to OTHER_VERTEX_FAILURE, failedTasks:0 killedTasks:4, Vertex vertex_1549678950511_24672_1_05 [Reducer 3] killed/failed due to:OTHER_VERTEX_FAILURE]
ERROR : Vertex killed, vertexName=Reducer 2, vertexId=vertex_1549678950511_24672_1_04, diagnostics=[Vertex received Kill while in RUNNING state., Vertex did not succeed due to OTHER_VERTEX_FAILURE, failedTasks:0 killedTasks:3, Vertex vertex_1549678950511_24672_1_04 [Reducer 2] killed/failed due to:OTHER_VERTEX_FAILURE]
ERROR : Vertex killed, vertexName=Map 1, vertexId=vertex_1549678950511_24672_1_03, diagnostics=[Vertex received Kill while in RUNNING state., Vertex did not succeed due to OTHER_VERTEX_FAILURE, failedTasks:0 killedTasks:7, Vertex vertex_1549678950511_24672_1_03 [Map 1] killed/failed due to:OTHER_VERTEX_FAILURE]
ERROR : Vertex killed, vertexName=Map 7, vertexId=vertex_1549678950511_24672_1_01, diagnostics=[Vertex received Kill while in RUNNING state., Vertex did not succeed due to OTHER_VERTEX_FAILURE, failedTasks:0 killedTasks:9, Vertex vertex_1549678950511_24672_1_01 [Map 7] killed/failed due to:OTHER_VERTEX_FAILURE]
ERROR : Vertex killed, vertexName=Map 8, vertexId=vertex_1549678950511_24672_1_00, diagnostics=[Vertex received Kill while in RUNNING state., Vertex did not succeed due to OTHER_VERTEX_FAILURE, failedTasks:0 killedTasks:6, Vertex vertex_1549678950511_24672_1_00 [Map 8] killed/failed due to:OTHER_VERTEX_FAILURE]
ERROR : DAG did not succeed due to VERTEX_FAILURE. failedVertices:1 killedVertices:6
Error: Error while processing statement: FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.tez.TezTask. Vertex re-running, vertexName=Map 8, vertexId=vertex_1549678950511_24672_1_00Vertex re-running, vertexName=Map 1, vertexId=vertex_1549678950511_24672_1_03Vertex re-running, vertexName=Map 7, vertexId=vertex_1549678950511_24672_1_01Vertex re-running, vertexName=Reducer 3, vertexId=vertex_1549678950511_24672_1_05Vertex re-running, vertexName=Reducer 4, vertexId=vertex_1549678950511_24672_1_06Vertex re-running, vertexName=Reducer 2, vertexId=vertex_1549678950511_24672_1_04Vertex re-running, vertexName=Map 8, vertexId=vertex_1549678950511_24672_1_00Vertex re-running, vertexName=Map 7, vertexId=vertex_1549678950511_24672_1_01Vertex re-running, vertexName=Map 1, vertexId=vertex_1549678950511_24672_1_03Vertex re-running, vertexName=Reducer 2, vertexId=vertex_1549678950511_24672_1_04Vertex re-running, vertexName=Reducer 3, vertexId=vertex_1549678950511_24672_1_05Vertex re-running, vertexName=Reducer 4, vertexId=vertex_1549678950511_24672_1_06Vertex re-running, vertexName=Map 8, vertexId=vertex_1549678950511_24672_1_00Vertex re-running, vertexName=Map 1, vertexId=vertex_1549678950511_24672_1_03Vertex re-running, vertexName=Reducer 3, vertexId=vertex_1549678950511_24672_1_05Vertex re-running, vertexName=Map 7, vertexId=vertex_1549678950511_24672_1_01Vertex re-running, vertexName=Reducer 4, vertexId=vertex_1549678950511_24672_1_06Vertex re-running, vertexName=Reducer 2, vertexId=vertex_1549678950511_24672_1_04Vertex re-running, vertexName=Map 8, vertexId=vertex_1549678950511_24672_1_00Vertex re-running, vertexName=Map 1, vertexId=vertex_1549678950511_24672_1_03Vertex re-running, vertexName=Map 7, vertexId=vertex_1549678950511_24672_1_01Vertex re-running, vertexName=Reducer 2, vertexId=vertex_1549678950511_24672_1_04Vertex re-running, vertexName=Reducer 3, vertexId=vertex_1549678950511_24672_1_05Vertex re-running, vertexName=Reducer 4, vertexId=vertex_1549678950511_24672_1_06Vertex re-running, vertexName=Map 8, vertexId=vertex_1549678950511_24672_1_00Vertex re-running, vertexName=Map 1, vertexId=vertex_1549678950511_24672_1_03Vertex re-running, vertexName=Map 7, vertexId=vertex_1549678950511_24672_1_01Vertex re-running, vertexName=Reducer 2, vertexId=vertex_1549678950511_24672_1_04Vertex re-running, vertexName=Reducer 3, vertexId=vertex_1549678950511_24672_1_05Vertex re-running, vertexName=Reducer 4, vertexId=vertex_1549678950511_24672_1_06Vertex re-running, vertexName=Map 8, vertexId=vertex_1549678950511_24672_1_00Vertex re-running, vertexName=Map 7, vertexId=vertex_1549678950511_24672_1_01Vertex re-running, vertexName=Reducer 2, vertexId=vertex_1549678950511_24672_1_04Vertex re-running, vertexName=Reducer 3, vertexId=vertex_1549678950511_24672_1_05Vertex re-running, vertexName=Reducer 4, vertexId=vertex_1549678950511_24672_1_06Vertex re-running, vertexName=Map 8, vertexId=vertex_1549678950511_24672_1_00Vertex re-running, vertexName=Reducer 3, vertexId=vertex_1549678950511_24672_1_05Vertex re-running, vertexName=Map 1, vertexId=vertex_1549678950511_24672_1_03Vertex re-running, vertexName=Map 7, vertexId=vertex_1549678950511_24672_1_01Vertex re-running, vertexName=Reducer 4, vertexId=vertex_1549678950511_24672_1_06Vertex re-running, vertexName=Reducer 2, vertexId=vertex_1549678950511_24672_1_04Vertex re-running, vertexName=Map 8, vertexId=vertex_1549678950511_24672_1_00Vertex re-running, vertexName=Map 1, vertexId=vertex_1549678950511_24672_1_03Vertex re-running, vertexName=Map 8, vertexId=vertex_1549678950511_24672_1_00Vertex re-running, vertexName=Reducer 2, vertexId=vertex_1549678950511_24672_1_04Vertex re-running, vertexName=Reducer 3, vertexId=vertex_1549678950511_24672_1_05Vertex re-running, vertexName=Map 7, vertexId=vertex_1549678950511_24672_1_01Vertex re-running, vertexName=Map 8, vertexId=vertex_1549678950511_24672_1_00Vertex re-running, vertexName=Map 1, vertexId=vertex_1549678950511_24672_1_03Vertex re-running, vertexName=Map 7, vertexId=vertex_1549678950511_24672_1_01Vertex re-running, vertexName=Reducer 2, vertexId=vertex_1549678950511_24672_1_04Vertex re-running, vertexName=Reducer 3, vertexId=vertex_1549678950511_24672_1_05Vertex re-running, vertexName=Map 8, vertexId=vertex_1549678950511_24672_1_00Vertex re-running, vertexName=Reducer 2, vertexId=vertex_1549678950511_24672_1_04Vertex re-running, vertexName=Reducer 3, vertexId=vertex_1549678950511_24672_1_05Vertex re-running, vertexName=Reducer 4, vertexId=vertex_1549678950511_24672_1_06Vertex failed, vertexName=Reducer 5, vertexId=vertex_1549678950511_24672_1_07, diagnostics=[Task failed, taskId=task_1549678950511_24672_1_07_000375, diagnostics=[TaskAttempt 0 failed, info=[Container container_1549678950511_24672_01_000184 finished with diagnostics set to [Container failed, exitCode=-100. Container released on a *lost* node]], TaskAttempt 1 failed, info=[Error: exceptionThrown=org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle$ShuffleError: error in shuffle in fetcher {Reducer_4} #2
              at org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle$RunShuffleCallable.callInternal(Shuffle.java:382)
              at org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle$RunShuffleCallable.callInternal(Shuffle.java:334)
              at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
              at java.util.concurrent.FutureTask.run(FutureTask.java:266)
              at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
              at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
              at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.NullPointerException
              at sun.net.www.protocol.http.HttpURLConnection.getInputStream0(HttpURLConnection.java:1601)
              at sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1492)
              at org.apache.tez.runtime.library.common.shuffle.HttpConnection.getInputStream(HttpConnection.java:253)
              at org.apache.tez.runtime.library.common.shuffle.orderedgrouped.FetcherOrderedGrouped.setupConnection(FetcherOrderedGrouped.java:356)
              at org.apache.tez.runtime.library.common.shuffle.orderedgrouped.FetcherOrderedGrouped.copyFromHost(FetcherOrderedGrouped.java:264)
              at org.apache.tez.runtime.library.common.shuffle.orderedgrouped.FetcherOrderedGrouped.fetchNext(FetcherOrderedGrouped.java:176)
              at org.apache.tez.runtime.library.common.shuffle.orderedgrouped.FetcherOrderedGrouped.run(FetcherOrderedGrouped.java:191)
, errorMessage=Shuffle Runner Failed:org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle$ShuffleError: error in shuffle in fetcher {Reducer_4} #2
              at org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle$RunShuffleCallable.callInternal(Shuffle.java:382)
              at org.apache.tez.runtime.library.common.shuffle.orderedgrouped.Shuffle$RunShuffleCallable.callInternal(Shuffle.java:334)
              at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
              at java.util.concurrent.FutureTask.run(FutureTask.java:266)
              at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
              at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
              at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.NullPointerException
              at sun.net.www.protocol.http.HttpURLConnection.getInputStream0(HttpURLConnection.java:1601)
              at sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1492)
              at org.apache.tez.runtime.library.common.shuffle.HttpConnection.getInputStream(HttpConnection.java:253)
              at org.apache.tez.runtime.library.common.shuffle.orderedgrouped.FetcherOrderedGrouped.setupConnection(FetcherOrderedGrouped.java:356)
              at org.apache.tez.runtime.library.common.shuffle.orderedgrouped.FetcherOrderedGrouped.copyFromHost(FetcherOrderedGrouped.java:264)
              at org.apache.tez.runtime.library.common.shuffle.orderedgrouped.FetcherOrderedGrouped.fetchNext(FetcherOrderedGrouped.java:176)
              at org.apache.tez.runtime.library.common.shuffle.orderedgrouped.FetcherOrderedGrouped.run(FetcherOrderedGrouped.java:191)
], TaskAttempt 2 failed, info=[Container container_1549678950511_24672_01_000292 finished with diagnostics set to [Container failed, exitCode=-100. Container released on a *lost* node]], TaskAttempt 3 failed, info=[Container container_1549678950511_24672_01_000312 finished with diagnostics set to [Container failed, exitCode=-100. Container released on a *lost* node]]], Vertex did not succeed due to OWN_TASK_FAILURE, failedTasks:1 killedTasks:369, Vertex vertex_1549678950511_24672_1_07 [Reducer 5] killed/failed due to:OWN_TASK_FAILURE]Vertex killed, vertexName=Reducer 4, vertexId=vertex_1549678950511_24672_1_06, diagnostics=[Vertex received Kill while in RUNNING state., Vertex did not succeed due to OTHER_VERTEX_FAILURE, failedTasks:0 killedTasks:3, Vertex vertex_1549678950511_24672_1_06 [Reducer 4] killed/failed due to:OTHER_VERTEX_FAILURE]Vertex killed, vertexName=Reducer 3, vertexId=vertex_1549678950511_24672_1_05, diagnostics=[Vertex received Kill while in RUNNING state., Vertex did not succeed due to OTHER_VERTEX_FAILURE, failedTasks:0 killedTasks:4, Vertex vertex_1549678950511_24672_1_05 [Reducer 3] killed/failed due to:OTHER_VERTEX_FAILURE]Vertex killed, vertexName=Reducer 2, vertexId=vertex_1549678950511_24672_1_04, diagnostics=[Vertex received Kill while in RUNNING state., Vertex did not succeed due to OTHER_VERTEX_FAILURE, failedTasks:0 killedTasks:3, Vertex vertex_1549678950511_24672_1_04 [Reducer 2] killed/failed due to:OTHER_VERTEX_FAILURE]Vertex killed, vertexName=Map 1, vertexId=vertex_1549678950511_24672_1_03, diagnostics=[Vertex received Kill while in RUNNING state., Vertex did not succeed due to OTHER_VERTEX_FAILURE, failedTasks:0 killedTasks:7, Vertex vertex_1549678950511_24672_1_03 [Map 1] killed/failed due to:OTHER_VERTEX_FAILURE]Vertex killed, vertexName=Map 7, vertexId=vertex_1549678950511_24672_1_01, diagnostics=[Vertex received Kill while in RUNNING state., Vertex did not succeed due to OTHER_VERTEX_FAILURE, failedTasks:0 killedTasks:9, Vertex vertex_1549678950511_24672_1_01 [Map 7] killed/failed due to:OTHER_VERTEX_FAILURE]Vertex killed, vertexName=Map 8, vertexId=vertex_1549678950511_24672_1_00, diagnostics=[Vertex received Kill while in RUNNING state., Vertex did not succeed due to OTHER_VERTEX_FAILURE, failedTasks:0 killedTasks:6, Vertex vertex_1549678950511_24672_1_00 [Map 8] killed/failed due to:OTHER_VERTEX_FAILURE]DAG did not succeed due to VERTEX_FAILURE. failedVertices:1 killedVertices:6 (state=08S01,code=2)

I've tried twice but I get the same result. By the way, there are a total of 336258079 rows in the MID_DealerVehicleOutputValue table. Is this causing the error? Some other similar statements are running successfully prior to this one but there are not as many rows to be processed.


Solution

  • Before doing any memory adjustments, try to rewrite your query in a better way. It does unnecessary work performing extra joins. First of all you can greatly simplify it by removing unnecessary inner join with subquery in which you are calculating count(*). Use analytic count(*) over(partition by DIMDealerID) instead:

    INSERT overwrite table datalake_rci.MID_DealerVehicleOutputValue
    --MIDDealerVehicleOutputValueID int,
    select
    b.MIDDealerVehicleOutputValueID
    ,b.DealerID --string,
    ,b.CHASSIS --string,
    ,b.DIMDealerID --bigint,
    ,b.DIMVehicleID --bigint,
    
    ,case when a.DIMDealerID is not null
          then 1 else b.DIMOutputValueID
    end DIMOutputValueID  --int     
    
    ,b.OutputValueName --string,
    ,b.OutputValueName_CN --string,
    ,b.OutputValueCode --varchar(50),
    ,b.OutputValueOrder --int
    from datalake_rci.MID_DealerVehicleOutputValue b
    left outer join
    (
        select w.low,w.DIMDealerID, w.DIMVehicleID,w.OutputValueOrder,w.row_num from 
        (
            select z.low,z.DIMDealerID, z.DIMVehicleID, z.OutputValueOrder,
                   row_number() over(partition by z.DIMDealerID order by z.OutputValueOrder desc) row_num
            from
            (
               select DIMDealerID, DIMVehicleID, OutputValueOrder,
                        count(*) over(partition by DIMDealerID) * l.Rate low
                    from datalake_rci.MID_DealerVehicleOutputValue 
                    cross join
                            (select Rate from datalake_rci.DIM_OutputValue where OutputValueCode = 'Low') l
    
            ) z
        ) w
        where w.row_num <= w.low
    ) a on b.DIMDealerID = a.DIMDealerID; 
    

    And as the next step of your query optimizing try to remove unnecessary left join. Like this:

    INSERT overwrite table datalake_rci.MID_DealerVehicleOutputValue
    --MIDDealerVehicleOutputValueID int,
    select
     s.MIDDealerVehicleOutputValueID
    ,s.DealerID --string,
    ,s.CHASSIS --string,
    ,s.DIMDealerID --bigint,
    ,s.DIMVehicleID --bigint,
    
    ,case when s.row_num <= s.low        --you do not need join to calculate this
          then 1 else s.DIMOutputValueID
      end DIMOutputValueID  --int     
    
    ,s.OutputValueName --string,
    ,s.OutputValueName_CN --string,
    ,s.OutputValueCode --varchar(50),
    ,s.OutputValueOrder --int
    from 
        (
            select s.low,s.DIMDealerID, s.DIMVehicleID, s.OutputValueOrder, s.MIDDealerVehicleOutputValueID,s.DealerID,s.CHASSIS,s.OutputValueName, s.OutputValueName_CN,s.OutputValueCode,s.OutputValueOrder
                   row_number() over(partition by s.DIMDealerID order by s.OutputValueOrder desc) row_num
            from
            (
               select s.DIMDealerID, s.DIMVehicleID, s.OutputValueOrder, s.MIDDealerVehicleOutputValueID,s.DealerID,s.CHASSIS,s.OutputValueName, s.OutputValueName_CN,s.OutputValueCode,s.OutputValueOrder
                        count(*) over(partition by DIMDealerID) * l.Rate low
                    from datalake_rci.MID_DealerVehicleOutputValue s
                    cross join
                            (select Rate from datalake_rci.DIM_OutputValue where OutputValueCode = 'Low') l
    
            ) s
        ) s; --one or two subqueries also can be removed
    

    Of course my query may contain some bugs and it should be tested carefully, but I hope you have got the idea. It is almost always possible to eliminate self-joins. Finally your query will do only read each table only once and many other heavy steps will be eliminated. I expect you will get rid of at least two reducer and two mapper vertices.

    Also I'd suggest to increase mapper parallelism. Tune these settings, try to reduce figures until you get more mappers running:

    --tune mapper parallelizm
    set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
    set tez.grouping.max-size=67108864;
    set tez.grouping.min-size=32000000;