Search code examples
javahadoopmapreducehbase

Read text file from System to Hbase MapReduce


I need to load data from text file to Map Reduce, I have searched the web, but I didn't find any right solution for my work.

Is there any method or class which reads a text /csv file from a system and store the data into HBASE Table.


Solution

  • For reading from text file first of all the text file should be in hdfs. You need to specify input format and outputformat for job

    Job job = new Job(conf, "example");
    FileInputFormat.addInputPath(job, new Path("PATH to text file"));
    job.setInputFormatClass(TextInputFormat.class);
    job.setMapperClass(YourMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    TableMapReduceUtil.initTableReducerJob("hbase_table_name", YourReducer.class, job);
    job.waitForCompletion(true);
    

    YourReducer should extends org.apache.hadoop.hbase.mapreduce.TableReducer<Text, Text, Text>

    Sample reducer code

    public class YourReducer extends TableReducer<Text, Text, Text> {    
    private byte[] rawUpdateColumnFamily = Bytes.toBytes("colName");
    /**
    * Called once at the beginning of the task.
    */
    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
    // something that need to be done at start of reducer
    }
    
    @Override
    public void reduce(Text keyin, Iterable<Text> values, Context context) throws IOException, InterruptedException {
    // aggregate counts
    int valuesCount = 0;
    for (Text val : values) {
       valuesCount += 1;
       // put date in table
       Put put = new Put(keyin.toString().getBytes());
       long explicitTimeInMs = new Date().getTime();
       put.add(rawUpdateColumnFamily, Bytes.toBytes("colName"), explicitTimeInMs,val.toString().getBytes());
       context.write(keyin, put);
    
    
          }
        }
    }
    

    Sample mapper class

    public static class YourMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
    private final static IntWritable one = new IntWritable(1);
    private Text word = new Text();
    public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String line = value.toString();
        StringTokenizer tokenizer = new StringTokenizer(line);
        while (tokenizer.hasMoreTokens()) {
            word.set(tokenizer.nextToken());
            context.write(word, one);
            }
        }
    }