Search code examples
eclipsehadoopmapreducedistributed-cachingdistributed-cache

Map Reduce Distributed Cache


I am not able to compile my DriverClass at the job.waitforcompletion(boolean) clause.It gives me a NoClassFoundException.If I catch the exception ,the run method throws the error that its expecting a int value.I am using MapReduce New API.Could anyone suggest what is the issue :

import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;


public class Dist_Driver extends Configured implements Tool {

    public int run(String args[]) throws IOException, InterruptedException {

        // Configuration phase
        // Configuration conf=new Configuration();
        Job job = new Job(new Configuration());
        job.setJarByClass(Dist_Driver.class);

        // Mapper Reducer InputFormat
        job.setInputFormatClass(FileInputFormat.class);
        // Mapper and Reducer Class
        job.setMapperClass(Dist_Mapper.class);
        job.setReducerClass(DistCache_Reducer.class);

        job.setOutputFormatClass(TextOutputFormat.class);
        job.setInputFormatClass(KeyValueTextInputFormat.class);
        // set FileInputOutput
        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        // setting number of reduce tasks and submit it
        job.setNumReduceTasks(2);

        // Lets check if the file exist
        File f1 = new File("/home/hdfs/trials_mapreduce_progams/emp_id");
        if (f1.exists())
            System.out.println("The Files Exists");
        else
            System.out.println("The File doesnot exist");
        URI path1;
        try {
            path1 = new URI(
                    "/home/hdfs/trials_mapreduce_progams/emp_lookup.txt");
            DistributedCache.addCacheFile(path1, job.getConfiguration());
        } catch (URISyntaxException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

        if (job.waitForCompletion(true))
            return 0;
        else
            return 1;

    }

    public static void main(String[] args) throws Exception {

        int exitcode = ToolRunner.run(new Dist_Driver(), args);
        System.exit(exitcode);

    }

}

Solution

  • Just add the ClassNotFoundException to the run method signature

    public int run(String args[]) throws IOException, 
                                         InterruptedException, 
                                         ClassNotFoundException {
    

    The reason you get an error when you try and try/catch it is because if there is a ClassNotFoundException thrown during execution, there will be no return value, and the method has to return something.

    If you really want to catch it, just return 1 in the catch clause, which is the error exit code