http://student-lp.iteye.com/blog/2309841
一、hbase查询方式
hbase的查询实现只提供两种方式:
-
按指定rowkey获取唯一一条记录:get方法。
-
按指定条件获取一批记录:scan方法。
实现条件查询功能使用的就是scan方式,scan在使用时有以下几点值的注意:
-
scan可以通过setCaching与setBatch方法提高速度(以空间换时间)
-
scan可以通过setStartRow与setEndRow来限定范围。范围越小,性能越高。
-
scan可以通过setFilter方法添加过滤器,这也是分页、多条件查询的基础。
二、RowFilter使用
operator
|
description
|
less
|
小于
|
less_or_equal
|
小于等于
|
equal
|
等于
|
not_equal
|
不等于
|
greater_or_equal
|
大于等于
|
greater
|
大于
|
no_op
|
排除所有
|
Comparator
|
description
|
BinaryComparator
|
使用bytes.comparaTo()比较
|
BinaryPrefixComparator
|
和BinaryComparator差不多,从前面开始比较
|
NullComparator
|
|
BitComparator
|
|
RegexStringComparator
|
正则表达式
|
subStringComparator
|
把数字当成字符串,用contains()来判断
|
-
import java.io.IOException;
-
-
import org.apache.hadoop.conf.Configuration;
-
import org.apache.hadoop.hbase.HBaseConfiguration;
-
import org.apache.hadoop.hbase.HColumnDescriptor;
-
import org.apache.hadoop.hbase.HTableDescriptor;
-
import org.apache.hadoop.hbase.client.HBaseAdmin;
-
import org.apache.hadoop.hbase.client.HTable;
-
import org.apache.hadoop.hbase.client.Put;
-
import org.apache.hadoop.hbase.client.Result;
-
import org.apache.hadoop.hbase.client.ResultScanner;
-
import org.apache.hadoop.hbase.client.Scan;
-
import org.apache.hadoop.hbase.filter.BinaryComparator;
-
import org.apache.hadoop.hbase.filter.BinaryPrefixComparator;
-
import org.apache.hadoop.hbase.filter.CompareFilter;
-
import org.apache.hadoop.hbase.filter.Filter;
-
import org.apache.hadoop.hbase.filter.RegexStringComparator;
-
import org.apache.hadoop.hbase.filter.RowFilter;
-
import org.apache.hadoop.hbase.filter.SubstringComparator;
-
-
public class TestHbaseRowFilter {
-
String tableName = "test_row_filter";
-
Configuration config = HBaseConfiguration.create();
-
-
-
-
-
-
public void testRowFilter() throws IOException {
-
-
HTable table = new HTable(config, tableName);
-
Scan scan = new Scan();
-
-
System.out.println("小于等于row010的行");
-
Filter filter1 = new RowFilter(CompareFilter.CompareOp.LESS_OR_EQUAL,
-
new BinaryComparator("row010".getBytes()));
-
scan.setFilter(filter1);
-
ResultScanner scanner1 = table.getScanner(scan);
-
for (Result res : scanner1) {
-
System.out.println(res);
-
}
-
scanner1.close();
-
-
System.out.println("正则获取结尾为5的行");
-
Filter filter2 = new RowFilter(CompareFilter.CompareOp.EQUAL,
-
new RegexStringComparator(".*5[ DISCUZ_CODE_0 ]quot;));
-
scan.setFilter(filter2);
-
ResultScanner scanner2 = table.getScanner(scan);
-
for (Result res : scanner2) {
-
System.out.println(res);
-
}
-
scanner2.close();
-
-
System.out.println("包含有5的行");
-
Filter filter3 = new RowFilter(CompareFilter.CompareOp.EQUAL,
-
new SubstringComparator("5"));
-
scan.setFilter(filter3);
-
ResultScanner scanner3 = table.getScanner(scan);
-
for (Result res : scanner3) {
-
System.out.println(res);
-
}
-
scanner3.close();
-
-
System.out.println("开头是row01的");
-
Filter filter4 = new RowFilter(CompareFilter.CompareOp.EQUAL,
-
new BinaryPrefixComparator("row01".getBytes()));
-
scan.setFilter(filter4);
-
ResultScanner scanner4 = table.getScanner(scan);
-
for (Result res : scanner4) {
-
System.out.println(res);
-
}
-
scanner3.close();
-
}
-
-
-
-
-
public void init() {
-
-
try {
-
HBaseAdmin admin = new HBaseAdmin(config);
-
if (!admin.tableExists(tableName)) {
-
HTableDescriptor htd = new HTableDescriptor(tableName);
-
HColumnDescriptor hcd1 = new HColumnDescriptor("data");
-
htd.addFamily(hcd1);
-
HColumnDescriptor hcd2 = new HColumnDescriptor("url");
-
htd.addFamily(hcd2);
-
-
admin.createTable(htd);
-
}
-
-
HTable table = new HTable(config, tableName);
-
-
table.setAutoFlush(false);
-
int count = 50;
-
for (int i = 1; i <= count; ++i) {
-
Put p = new Put(String.format("row%03d", i).getBytes());
-
p.add("data".getBytes(), String.format("col%01d", i % 10)
-
.getBytes(), String.format("data%03d", i).getBytes());
-
p.add("url".getBytes(), String.format("col%01d", i % 10)
-
.getBytes(), String.format("url%03d", i).getBytes());
-
table.put(p);
-
}
-
table.close();
-
-
} catch (IOException e) {
-
e.printStackTrace();
-
}
-
}
-
-
-
-
-
-
public static void main(String[] args) throws IOException {
-
TestHbaseRowFilter test = new TestHbaseRowFilter();
-
test.init();
-
test.testRowFilter();
-
}
-
-
}
-
小于等于row010的行
-
keyvalues={row001/data:col1/1364133382268/Put/vlen=7, row001/url:col1/1364133382268/Put/vlen=6}
-
keyvalues={row002/data:col2/1364133382268/Put/vlen=7, row002/url:col2/1364133382268/Put/vlen=6}
-
keyvalues={row003/data:col3/1364133382268/Put/vlen=7, row003/url:col3/1364133382268/Put/vlen=6}
-
keyvalues={row004/data:col4/1364133382268/Put/vlen=7, row004/url:col4/1364133382268/Put/vlen=6}
-
keyvalues={row005/data:col5/1364133382268/Put/vlen=7, row005/url:col5/1364133382268/Put/vlen=6}
-
keyvalues={row006/data:col6/1364133382268/Put/vlen=7, row006/url:col6/1364133382268/Put/vlen=6}
-
keyvalues={row007/data:col7/1364133382268/Put/vlen=7, row007/url:col7/1364133382268/Put/vlen=6}
-
keyvalues={row008/data:col8/1364133382268/Put/vlen=7, row008/url:col8/1364133382268/Put/vlen=6}
-
keyvalues={row009/data:col9/1364133382268/Put/vlen=7, row009/url:col9/1364133382268/Put/vlen=6}
-
keyvalues={row010/data:col0/1364133382268/Put/vlen=7, row010/url:col0/1364133382268/Put/vlen=6}
-
正则获取结尾为5的行
-
keyvalues={row005/data:col5/1364133382268/Put/vlen=7, row005/url:col5/1364133382268/Put/vlen=6}
-
keyvalues={row015/data:col5/1364133382268/Put/vlen=7, row015/url:col5/1364133382268/Put/vlen=6}
-
keyvalues={row025/data:col5/1364133382268/Put/vlen=7, row025/url:col5/1364133382268/Put/vlen=6}
-
keyvalues={row035/data:col5/1364133382268/Put/vlen=7, row035/url:col5/1364133382268/Put/vlen=6}
-
keyvalues={row045/data:col5/1364133382268/Put/vlen=7, row045/url:col5/1364133382268/Put/vlen=6}
-
包行有5的行
-
keyvalues={row005/data:col5/1364133382268/Put/vlen=7, row005/url:col5/1364133382268/Put/vlen=6}
-
keyvalues={row015/data:col5/1364133382268/Put/vlen=7, row015/url:col5/1364133382268/Put/vlen=6}
-
keyvalues={row025/data:col5/1364133382268/Put/vlen=7, row025/url:col5/1364133382268/Put/vlen=6}
-
keyvalues={row035/data:col5/1364133382268/Put/vlen=7, row035/url:col5/1364133382268/Put/vlen=6}
-
keyvalues={row045/data:col5/1364133382268/Put/vlen=7, row045/url:col5/1364133382268/Put/vlen=6}
-
keyvalues={row050/data:col0/1364133382268/Put/vlen=7, row050/url:col0/1364133382268/Put/vlen=6}
-
开头是row01的
-
keyvalues={row010/data:col0/1364133382268/Put/vlen=7, row010/url:col0/1364133382268/Put/vlen=6}
-
keyvalues={row011/data:col1/1364133382268/Put/vlen=7, row011/url:col1/1364133382268/Put/vlen=6}
-
keyvalues={row012/data:col2/1364133382268/Put/vlen=7, row012/url:col2/1364133382268/Put/vlen=6}
-
keyvalues={row013/data:col3/1364133382268/Put/vlen=7, row013/url:col3/1364133382268/Put/vlen=6}
-
keyvalues={row014/data:col4/1364133382268/Put/vlen=7, row014/url:col4/1364133382268/Put/vlen=6}
-
keyvalues={row015/data:col5/1364133382268/Put/vlen=7, row015/url:col5/1364133382268/Put/vlen=6}
-
keyvalues={row016/data:col6/1364133382268/Put/vlen=7, row016/url:col6/1364133382268/Put/vlen=6}
-
keyvalues={row017/data:col7/1364133382268/Put/vlen=7, row017/url:col7/1364133382268/Put/vlen=6}
-
keyvalues={row018/data:col8/1364133382268/Put/vlen=7, row018/url:col8/1364133382268/Put/vlen=6}
-
keyvalues={row019/data:col9/1364133382268/Put/vlen=7, row019/url:col9/1364133382268/Put/vlen=6}
阅读(924) | 评论(0) | 转发(0) |