Search code examples
mapreducehadoop-yarntachyon

mapreduce job failed with "org.apache.commons.codec.binary.Base64.encodeBase64String([B)Ljava/lang/String;"


I ran a MR job on my cluster on 3 nodes which have 128GB and 40 cores each.

Cluster Info: 10.8.12.16: namenode active,datanode,alluxio master,alluxio worker 10.8.12.17: namenode standby,datanode,alluxio master standby,alluxio worker 10.8.12.18: datanode,alluxio worker

version info: os: red hat el6 alluxio: 1.2.0 hadoop:2.7.2 jdk:1.8

I have compiled alluxio with hadoop version 2.7.2

I have configured hdfs and alluxio with HA. And both alluxio and hdfs can start successfully without any error in logs.

When just ran my mapreduce job without using alluxio, it failed with error:

16/09/29 21:10:30 INFO mapreduce.Job: Job job_1475154019111_0005 failed with state FAILED due to: Application application_1475154019111_0005 failed 2 times due to AM Container for appattempt_1475154019111_0005_000002 exited with  exitCode: -1
For more detailed output, check application tracking page:http://sq-hbase1.800best.com:8088/cluster/app/application_1475154019111_0005Then, click on links to logs of each attempt.
Diagnostics: org.apache.commons.codec.binary.Base64.encodeBase64String([B)Ljava/lang/String;
Failing this attempt. Failing the application.
16/09/29 21:10:30 INFO mapreduce.Job: Counters: 0

My MR line count job source code:

/**
 * MR统计一个文件的行数
 * @author Wan Kaiming on 2016/9/9
 * @version 1.0
 */
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


import java.io.IOException;
public class MRLineCount {




    //Mapper主要就是每行形成一个分片,key是一行的内容,value是值1
    public static class LineMapper
            extends Mapper<Object, Text, Text, IntWritable>{


        //统计行数
        private final static IntWritable one = new IntWritable(1);
        private Text word = new Text("总行数为:");


        public void map(Object key, Text value, Context context
        ) throws IOException, InterruptedException {


            //由于我们使用TextInputFormar,其中Key是每个数据记录在数据分片中字节偏移量 Text类型value存储的是一行的内容


            //传递给reducer的key全部固定为一个值,value就是值1,代表一行。这样reducer可以全部在一个key里面求和
            context.write(word, one);


        }
    }




    //reducer对行数进行统计求和,注意输出的key为null,我们只要计算总数即可
    public static class IntSumReducer
            extends Reducer<Text,IntWritable,Text,IntWritable> {
        private IntWritable result = new IntWritable();


        public void reduce(Text key, Iterable<IntWritable> values,
                           Context context
        ) throws IOException, InterruptedException {
            int sum = 0;
            for (IntWritable val : values) {
                sum += val.get();
            }
            result.set(sum);
            context.write(key, result);
        }
    }


    public static void main(String[] args) throws Exception {
        System.setProperty("HADOOP_USER_NAME", "appadmin");


        Configuration conf = new Configuration();


        //设置hdfs和yarn地址
        //使用alluxio
        //conf.set("fs.defaultFS", "alluxio://10.8.12.16:19998");
        //使用hdfs
        conf.set("fs.defaultFS", "hdfs://10.8.12.17:9000");
        conf.set("yarn.resourcemanager.hostname","10.8.12.16");


        System.out.println(" codec location "+org.apache.commons.codec.binary.Base64.class.getProtectionDomain().getCodeSource());


        //使用默认配置创建一个Job实例
        Job job = Job.getInstance(conf, "linecount");


        job.setJar("E:\\JavaProjects\\Learning\\out\\artifacts\\hadoop_test_jar\\hadoop-test.jar");
        job.setJarByClass(MRLineCount.class);


        //设置mapper,combiner和reducer
        job.setMapperClass(LineMapper.class);
        job.setCombinerClass(IntSumReducer.class);
        job.setReducerClass(IntSumReducer.class);
        //设置输出的key,value类,由于我们不需要输出key的内容,就使用NullWritable类型
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        //设置参数
        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));


        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

hdfs-site.xml

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at


    http://www.apache.org/licenses/LICENSE-2.0


  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->


<!-- Put site-specific property overrides in this file. -->






<configuration>
 <!--复制的份数,默认为3份,仅仅在datanode上配置-->
        <property>
        <name>dfs.replication</name>
                <value>2</value>
        </property>


 <!--存储命名空间和操作日志相关的元数据信息的本地文件系统目录-->
        <property>
        <name>dfs.namenode.name.dir</name>
                <value>file:/home/appadmin/hadoop-2.7.2/hdfs/namenode</value>
        </property>


<!--存储命名空间和操作日志相关的元数据信息的本地文件系统目录-->
        <property>
                <name>dfs.namenode.data.dir</name>
                <value>file:/home/appadmin/hadoop-2.7.2/hdfs/datanode</value>
        </property>


  <!--指定hdfs的nameservice为ns,需要和core-site.xml中的保持一致 -->    
    <property>    
            <name>dfs.nameservices</name>    
            <value>ns</value>    
    </property>  

<!-- ns下面有两个NameNode,分别是nn1,nn2,名字可以自取,最多配置2个 -->
    <property>
           <name>dfs.ha.namenodes.ns</name>
           <value>nn1,nn2</value>
    </property>



<!-- nn1的RPC通信地址 -->
    <property>
           <name>dfs.namenode.rpc-address.ns.nn1</name>
           <value>10.8.12.16:9000</value>
    </property>

<!-- nn1的http通信地址 -->
    <property>
            <name>dfs.namenode.http-address.ns.nn1</name>
            <value>10.8.12.16:50070</value>
    </property>

<!-- nn2的RPC通信地址 -->
    <property>
            <name>dfs.namenode.rpc-address.ns.nn2</name>
            <value>10.8.12.17:9000</value>
    </property>

<!-- nn2的http通信地址 -->
    <property>
            <name>dfs.namenode.http-address.ns.nn2</name>
            <value>10.8.12.17:50070</value>
    </property>

<!-- 指定NameNode的元数据(edit log)存放在哪些JournalNode上,以及存放位置,最后的是journalid,一般用集群的nameservice名字来代替 -->
    <property>
             <name>dfs.namenode.shared.edits.dir</name>
             <value>qjournal://10.8.12.16:8485;10.8.12.17:8485;10.8.12.18:8485/ns</value>
    </property>


<!-- 指定JournalNode在本地磁盘存放数据的位置 -->
    <property>
              <name>dfs.journalnode.edits.dir</name>
              <value>/home/appadmin/hadoop-2.7.2/journal</value>
    </property>

<!-- 开启NameNode故障时自动切换 -->
    <property>
              <name>dfs.ha.automatic-failover.enabled</name>
              <value>true</value>
    </property>

<!-- 配置失败自动切换实现方式 -->
    <property>
                <name>dfs.client.failover.proxy.provider.ns</name>
                <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
    </property>

<!-- 配置隔离机制 -->
<!--
这个参数比较重要,主要用于在主备节点切换时实现隔离机制的,在官方网站中做了相当详细的配置说明,其大概意思为:主备架构解决单点故障问题时,必须要认真解决的是脑裂问题,即出现两个 master 同时对外提供服务,导致系统处于不一致状态,可能导致数据丢失等潜在问题。在 HDFS HA 中, JournalNode 只允许一个 NameNode 写数据,不会出现两个 Active NameNode 的问题,但是,当主备切换时,之前的 Active NameNode 可能仍在处理客户端的 RPC 请求,为此,需要增加隔离机制( fencing )将之前的 Active NameNode 杀死。 HDFS 允许用户配置多个隔离机制,当发生主备切换时,将顺次执行这些隔离机制,直到一个返回成功。 Hadoop 2.0 内部打包了两种类型的隔离机制,分别是 shell  和 sshfence 。


-->
    <property>
                 <name>dfs.ha.fencing.methods</name>
                 <value>sshfence</value>
    </property>



<!-- 使用隔离机制时需要ssh免登陆 -->
    <property>
                <name>dfs.ha.fencing.ssh.private-key-files</name>
                <value>/home/appadmin/.ssh/id_rsa</value>
    </property>



<!-- 在NN和DN上开启WebHDFS (REST API)功能,不是必须 -->                                                                    
    <property>    
           <name>dfs.webhdfs.enabled</name>    
           <value>true</value>    
    </property>    




 <property>
  <name>fs.alluxio.impl</name>
    <value>alluxio.hadoop.FileSystem</value>
      <description>The Alluxio FileSystem (Hadoop 1.x and 2.x)</description>
      </property>
      <property>
        <name>fs.alluxio-ft.impl</name>
          <value>alluxio.hadoop.FaultTolerantFileSystem</value>
          <description>The Alluxio FileSystem (Hadoop 1.x and 2.x) with fault tolerant support</description>
        </property>
        <property>
              <name>fs.AbstractFileSystem.alluxio.impl</name>
                <value>alluxio.hadoop.AlluxioFileSystem</value>
                <description>The Alluxio AbstractFileSystem (Hadoop 2.x)</description>
        </property>



</configuration>

core-site.xml

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at


    http://www.apache.org/licenses/LICENSE-2.0


  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->


<!-- Put site-specific property overrides in this file. -->


<configuration>
 <property>
 <name>hadoop.tmp.dir</name> 
 <value>/home/appadmin/hadoop-2.7.2/tmp</value>
 </property>
 <!--高可用配置,这里使用hsfs-site.xml中配置的nameservice-->
 <property>
 <name>fs.defaultFS</name>
 <value>hdfs://ns</value>
 </property>
 <!--使用zk用于主备namenode切换-->
 <property>
        <name>ha.zookeeper.quorum</name>
        <value>10.8.12.16:2181,10.8.12.17:2181,10.8.12.18:2181</value>
 </property>




 <property>
  <name>fs.alluxio.impl</name>
    <value>alluxio.hadoop.FileSystem</value>
      <description>The Alluxio FileSystem (Hadoop 1.x and 2.x)</description>
      </property>
      <property>
        <name>fs.alluxio-ft.impl</name>
          <value>alluxio.hadoop.FaultTolerantFileSystem</value>
        <description>The Alluxio FileSystem (Hadoop 1.x and 2.x) with fault tolerant support</description>
        </property>
        <property>
              <name>fs.AbstractFileSystem.alluxio.impl</name>
                <value>alluxio.hadoop.AlluxioFileSystem</value>
                <description>The Alluxio AbstractFileSystem (Hadoop 2.x)</description>
        </property>


</configuration>

yarn-site.xml

<configuration>


 <property>
 <name>yarn.resourcemanager.address</name>
 <value>10.8.12.16:8032</value>
 </property>
 <property>
 <name>yarn.resourcemanager.scheduler.address</name>
 <value>10.8.12.16:8030</value>
 </property>
 <property>
 <name>yarn.resourcemanager.resource-tracker.address</name>
 <value>10.8.12.16:8031</value>
 </property>
 <property>
 <name>yarn.resourcemanager.admin.address</name>
 <value>10.8.12.16:8033</value>
 </property>
 <property>
 <name>yarn.resourcemanager.webapp.address</name>
 <value>10.8.12.16:8088</value>
 </property>
 <property>
 <name>mapreduce.framework.name</name>
 <value>yarn</value>
 </property>
 <!--支持mr任务提交到yarn-->
 <property>
 <name>yarn.nodemanager.aux-services</name>
 <value>mapreduce_shuffle</value>
 </property>



 <property>
 <name>yarn.nodemanager.resource.memory-mb</name>
 <value>81408</value>
 </property>


 <property>
 <name>yarn.scheduler.minimum-allocation-mb</name>
 <value>27136</value>
 </property>


 <property>
 <name>yarn.scheduler.maximum-allocation-mb</name>
 <value>81408</value>
 </property>


 <property>
 <name>yarn.app.mapreduce.am.resource.mb</name>
 <value>27136</value>
 </property>


 <property>
 <name>yarn.app.mapreduce.am.command-opts</name>
 <value>-Xmx21708m</value>
 </property>




 <property>
 <name>yarn.nodemanager.pmem-check-enabled</name>
 <value>false</value>
 </property>


 <property>
 <name>yarn.nodemanager.vmem-check-enabled</name>
 <value>false</value>
 </property>


</configuration>

mapred-site.xml

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at


    http://www.apache.org/licenses/LICENSE-2.0


  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->


<!-- Put site-specific property overrides in this file. -->


<configuration>
 <property>
 <name>mapreduce.framework.name</name>
 <value>yarn</value>
 </property>




 <property>
 <name>mapreduce.map.memory.mb</name>
 <value>27136</value>
 </property>


 <property>
 <name>mapreduce.map.java.opts</name>
 <value>-Xmx21708m</value>
 </property>



 <property>
 <name>mapreduce.reduce.memory.mb</name>
 <value>27136</value>
 </property>


 <property>
 <name>mapreduce.reduce.java.opts</name>
 <value>-Xmx21708m</value>
 </property>
</configuration>

hadoop-env.sh

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


# Set Hadoop-specific environment variables here.


# The only required environment variable is JAVA_HOME.  All others are
# optional.  When running a distributed configuration it is best to
# set JAVA_HOME in this file, so that it is correctly defined on
# remote nodes.


# The java implementation to use.
export JAVA_HOME=${JAVA_HOME}


# The jsvc implementation to use. Jsvc is required to run secure datanodes
# that bind to privileged ports to provide authentication of data transfer
# protocol.  Jsvc is not required if SASL is configured for authentication of
# data transfer protocol using non-privileged ports.
#export JSVC_HOME=${JSVC_HOME}


export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"}


# Extra Java CLASSPATH elements.  Automatically insert capacity-scheduler.
for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do
  if [ "$HADOOP_CLASSPATH" ]; then
    export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
  else
    export HADOOP_CLASSPATH=$f
  fi
done


# The maximum amount of heap to use, in MB. Default is 1000.
#export HADOOP_HEAPSIZE=
#export HADOOP_NAMENODE_INIT_HEAPSIZE=""


# Extra Java runtime options.  Empty by default.
export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"


# Command specific options appended to HADOOP_OPTS when specified
export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_NAMENODE_OPTS"
export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANODE_OPTS"


export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS"


export HADOOP_NFS3_OPTS="$HADOOP_NFS3_OPTS"
export HADOOP_PORTMAP_OPTS="-Xmx512m $HADOOP_PORTMAP_OPTS"


# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS"
#HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS"


# On secure datanodes, user to run the datanode as after dropping privileges.
# This **MUST** be uncommented to enable secure HDFS if using privileged ports
# to provide authentication of data transfer protocol.  This **MUST NOT** be
# defined if SASL is configured for authentication of data transfer protocol
# using non-privileged ports.
export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER}


# Where log files are stored.  $HADOOP_HOME/logs by default.
#export HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$USER


# Where log files are stored in the secure data environment.
export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}


###
# HDFS Mover specific parameters
###
# Specify the JVM options to be used when starting the HDFS Mover.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HADOOP_MOVER_OPTS=""


###
# Advanced Users Only!
###


# The directory where pid files are stored. /tmp by default.
# NOTE: this should be set to a directory that can only be written to by 
#       the user that will run the hadoop daemons.  Otherwise there is the
#       potential for a symlink attack.
export HADOOP_PID_DIR=${HADOOP_HOME}/pids
export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR}


# A string representing this instance of hadoop. $USER by default.
export HADOOP_IDENT_STRING=$USER


export HADOOP_CLASSPATH=/home/appadmin/alluxio-1.2.0/core/client/target/alluxio-core-client-1.2.0-jar-with-dependencies.jar:${HADOOP_CLASSPATH}

PS: I have copied alluxio-core-client-1.2.0-jar-with-dependencies to $HADOOP_HOME/share/hadoop/common/lib on every node.


Solution

  • There was a dependency issue in the pom files for Alluxio 1.2 that caused the wrong version of that library to be included. This issue is fixed in Alluxio 1.3.0.