Search code examples
javahadoophbasehadoop2

doing a ValueFilter and Count values on hbase shell


I am working with HBase Shell and was wondering if it is possible to count the values which the following scan command filters?

scan 'table', { COLUMNS => 'cf:c', FILTER => "ValueFilter( =, 'substring:myvalue' )" }

It should display the sum on the shell. Any ideas?

Thank you for your help.


Solution

  • count command doesn't support filters. only scan does.

    AFAIK in hbase shell filter + count is not possible.

    You can do the below for small number of rows.

    For Small data :

    So I d suggest you have to do some thing like this with hbase java client

    scan with your value filter here ....
    
    for (Result rs = scanner.next(); rs != null; rs = scanner.next()) {
        count++;
    }
    

    For huge data (for speed and parallelism we need to use Mapreduce or some other distributed thing here...) :

    I would suggest mapreduce program to count number of rows. in the driver scan object you need to set your value filter as below example.

    import org.apache.hadoop.conf.Configured;
    import org.apache.hadoop.hbase.HBaseConfiguration;
    import org.apache.hadoop.hbase.client.Result;
    import org.apache.hadoop.hbase.client.Scan;
    import org.apache.hadoop.hbase.filter.*;
    import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
    import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
    import org.apache.hadoop.hbase.mapreduce.TableMapper;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
    import org.apache.hadoop.util.Tool;
    import org.apache.hadoop.util.ToolRunner;
    
    public class SimpleRowCounter extends Configured implements Tool {
    
      static class RowCounterMapper extends TableMapper<ImmutableBytesWritable, Result> {
        public static enum Counters { ROWS }
    
        @Override
        public void map(ImmutableBytesWritable row, Result value, Context context) {
          context.getCounter(Counters.ROWS).increment(1);
        }
      }
    
      @Override
      public int run(String[] args) throws Exception {
        if (args.length != 1) {
          System.err.println("Usage: SimpleRowCounter <tablename>");
          return -1;
        }
        String tableName = args[0];
        Scan scan = new Scan();
    

    Filter valFilter = new ValueFilter(CompareFilter.CompareOp.GREATER_OR_EQUAL, new BinaryComparator(Bytes.toBytes("1500"))); scan.setFilter(valFilter );

        Job job = new Job(getConf(), getClass().getSimpleName());
        job.setJarByClass(getClass());
        TableMapReduceUtil.initTableMapperJob(tableName, scan,
            RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);
        job.setNumReduceTasks(0);
        job.setOutputFormatClass(NullOutputFormat.class);
        return job.waitForCompletion(true) ? 0 : 1;
      }
    
      public static void main(String[] args) throws Exception {
        int exitCode = ToolRunner.run(HBaseConfiguration.create(),
            new SimpleRowCounter(), args);
        System.exit(exitCode);
      }
    }