In the Python module happybase, I can retrieve all rows that have a row key starting with a given string (i.e, search using a partial row key).
Let's say I have a rowkey in the format of (ID|TYPE|DATE), I would be able to find all rows with an ID of 1 and a TYPE of A by:
import happybase
connection = happybase.Connection('hmaster-host.com')
table = connection.table('table_name')
for key, data in table.scan(row_prefix="1|A|"):
print key, data
This is what I have so far as a totally client side Java program for anyone trying to do the basics using the Java HBase API, but I can only search for a row using the full row key:
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.util.Bytes;
//class foo {
public static void main(String[] args) {
Configuration conf = new Configuration();
conf.addResource(new Path("C:\\core-site.xml"));
conf.addResource(new Path("C:\\hbase-site.xml"));
HTable table = new HTable(conf, "table_name");
Result row = table.get(new Get(Bytes.toBytes("1|A|2014-01-01 00:00")));
printRow(row);
}
public static void printRow(Result result) {
String returnString = "";
returnString += Bytes.toString(result.getValue(Bytes.toBytes("cf"), Bytes.toBytes("id"))) + ", ";
returnString += Bytes.toString(result.getValue(Bytes.toBytes("cf"), Bytes.toBytes("type"))) + ", ";
returnString += Bytes.toString(result.getValue(Bytes.toBytes("cf"), Bytes.toBytes("date")));
System.out.println(returnString);
}
//}
Where "cf" is the name of the column family.
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.PrefixFilter;
import org.apache.hadoop.hbase.util.Bytes;
//class foo {
public static void main(String[] args) {
Configuration conf = new Configuration();
conf.addResource(new Path("C:\\core-site.xml"));
conf.addResource(new Path("C:\\hbase-site.xml"));
HTable table = new HTable(conf, "table_name");
byte[] prefix = Bytes.toBytes("1|A|");
Scan scan = new Scan(prefix);
Filter prefixFilter = new PrefixFilter(prefix);
scan.setFilter(prefixFilter);
ResultScanner resultScanner = table.getScanner(scan);
printRows(resultScanner);
//Result row = table.get(new Get(Bytes.toBytes("1|A|2014-01-01 00:00")));
//printRow(row);
}
public static void printRows(ResultScanner resultScanner) {
for (Iterator<Result> iterator = results.iterator(); iterator.hasNext();) {
printRow(iterator.next();
}
}
public static void printRow(Result result) {
String returnString = "";
returnString += Bytes.toString(result.getValue(Bytes.toBytes("cf"), Bytes.toBytes("id"))) + ", ";
returnString += Bytes.toString(result.getValue(Bytes.toBytes("cf"), Bytes.toBytes("type"))) + ", ";
returnString += Bytes.toString(result.getValue(Bytes.toBytes("cf"), Bytes.toBytes("date")));
System.out.println(returnString);
}
//}
Note that I use the setFilter
method, whereas the answer below uses the addFilter
method, on account of us using different APIs.
You are using the HTable get
operation so you're only getting back one row (note that you can specify a prefix here as well and you don't have to give the complete key)
If you want to get back multiple rows you should use a Scan
byte[] prefix=Bytes.toBytes("1|A|");
Scan scan = new Scan(prefix);
PrefixFilter prefixFilter = new PrefixFilter(prefix);
scan.addFilter(prefixFilter);
ResultScanner resultScanner = table.getScanner(scan);