Chinaunix首页 | 论坛 | 博客
  • 博客访问: 4608486
  • 博文数量: 1214
  • 博客积分: 13195
  • 博客等级: 上将
  • 技术积分: 9105
  • 用 户 组: 普通用户
  • 注册时间: 2007-01-19 14:41
个人简介

C++,python,热爱算法和机器学习

文章分类

全部博文(1214)

文章存档

2021年(13)

2020年(49)

2019年(14)

2018年(27)

2017年(69)

2016年(100)

2015年(106)

2014年(240)

2013年(5)

2012年(193)

2011年(155)

2010年(93)

2009年(62)

2008年(51)

2007年(37)

分类: 大数据

2017-11-20 11:05:27

http://student-lp.iteye.com/blog/2309841

一、hbase查询方式

    hbase的查询实现只提供两种方式:

  • 按指定rowkey获取唯一一条记录:get方法。
  • 按指定条件获取一批记录:scan方法。

    实现条件查询功能使用的就是scan方式,scan在使用时有以下几点值的注意:

  • scan可以通过setCaching与setBatch方法提高速度(以空间换时间)
  • scan可以通过setStartRow与setEndRow来限定范围。范围越小,性能越高。
  • scan可以通过setFilter方法添加过滤器,这也是分页、多条件查询的基础。

二、RowFilter使用

operator description

less

 小于

less_or_equal

 小于等于
equal  等于
not_equal  不等于
greater_or_equal  大于等于
greater  大于
no_op  排除所有
Comparator description
BinaryComparator 使用bytes.comparaTo()比较
BinaryPrefixComparator 和BinaryComparator差不多,从前面开始比较
NullComparator  
BitComparator  
RegexStringComparator 正则表达式
subStringComparator 把数字当成字符串,用contains()来判断
Java代码  收藏代码
  1. import java.io.IOException;  
  2.    
  3. import org.apache.hadoop.conf.Configuration;  
  4. import org.apache.hadoop.hbase.HBaseConfiguration;  
  5. import org.apache.hadoop.hbase.HColumnDescriptor;  
  6. import org.apache.hadoop.hbase.HTableDescriptor;  
  7. import org.apache.hadoop.hbase.client.HBaseAdmin;  
  8. import org.apache.hadoop.hbase.client.HTable;  
  9. import org.apache.hadoop.hbase.client.Put;  
  10. import org.apache.hadoop.hbase.client.Result;  
  11. import org.apache.hadoop.hbase.client.ResultScanner;  
  12. import org.apache.hadoop.hbase.client.Scan;  
  13. import org.apache.hadoop.hbase.filter.BinaryComparator;  
  14. import org.apache.hadoop.hbase.filter.BinaryPrefixComparator;  
  15. import org.apache.hadoop.hbase.filter.CompareFilter;  
  16. import org.apache.hadoop.hbase.filter.Filter;  
  17. import org.apache.hadoop.hbase.filter.RegexStringComparator;  
  18. import org.apache.hadoop.hbase.filter.RowFilter;  
  19. import org.apache.hadoop.hbase.filter.SubstringComparator;  
  20.    
  21. public class TestHbaseRowFilter {  
  22.         String tableName = "test_row_filter";  
  23.         Configuration config = HBaseConfiguration.create();  
  24.    
  25.         /** 
  26.          * 部分代码来自hbase权威指南 
  27.          * @throws IOException 
  28.          */  
  29.         public void testRowFilter() throws IOException {  
  30.    
  31.                 HTable table = new HTable(config, tableName);  
  32.                 Scan scan = new Scan();  
  33.    
  34.                 System.out.println("小于等于row010的行");  
  35.                 Filter filter1 = new RowFilter(CompareFilter.CompareOp.LESS_OR_EQUAL,  
  36.                                 new BinaryComparator("row010".getBytes()));  
  37.                 scan.setFilter(filter1);  
  38.                 ResultScanner scanner1 = table.getScanner(scan);  
  39.                 for (Result res : scanner1) {  
  40.                         System.out.println(res);  
  41.                 }  
  42.                 scanner1.close();  
  43.    
  44.                 System.out.println("正则获取结尾为5的行");  
  45.                 Filter filter2 = new RowFilter(CompareFilter.CompareOp.EQUAL,  
  46.                                 new RegexStringComparator(".*5[        DISCUZ_CODE_0        ]quot;));  
  47.                 scan.setFilter(filter2);  
  48.                 ResultScanner scanner2 = table.getScanner(scan);  
  49.                 for (Result res : scanner2) {  
  50.                         System.out.println(res);  
  51.                 }  
  52.                 scanner2.close();  
  53.    
  54.                 System.out.println("包含有5的行");  
  55.                 Filter filter3 = new RowFilter(CompareFilter.CompareOp.EQUAL,  
  56.                                 new SubstringComparator("5"));  
  57.                 scan.setFilter(filter3);  
  58.                 ResultScanner scanner3 = table.getScanner(scan);  
  59.                 for (Result res : scanner3) {  
  60.                         System.out.println(res);  
  61.                 }  
  62.                 scanner3.close();  
  63.    
  64.                 System.out.println("开头是row01的");  
  65.                 Filter filter4 = new RowFilter(CompareFilter.CompareOp.EQUAL,  
  66.                                 new BinaryPrefixComparator("row01".getBytes()));  
  67.                 scan.setFilter(filter4);  
  68.                 ResultScanner scanner4 = table.getScanner(scan);  
  69.                 for (Result res : scanner4) {  
  70.                         System.out.println(res);  
  71.                 }  
  72.                 scanner3.close();  
  73.         }  
  74.    
  75.         /** 
  76.          * 初始化数据 
  77.          */  
  78.         public void init() {  
  79.                 // 创建表和初始化数据  
  80.                 try {  
  81.                         HBaseAdmin admin = new HBaseAdmin(config);  
  82.                         if (!admin.tableExists(tableName)) {  
  83.                                 HTableDescriptor htd = new HTableDescriptor(tableName);  
  84.                                 HColumnDescriptor hcd1 = new HColumnDescriptor("data");  
  85.                                 htd.addFamily(hcd1);  
  86.                                 HColumnDescriptor hcd2 = new HColumnDescriptor("url");  
  87.                                 htd.addFamily(hcd2);  
  88.    
  89.                                 admin.createTable(htd);  
  90.                         }  
  91.    
  92.                         HTable table = new HTable(config, tableName);  
  93.    
  94.                         table.setAutoFlush(false);  
  95.                         int count = 50;  
  96.                         for (int i = 1; i <= count; ++i) {  
  97.                                 Put p = new Put(String.format("row%03d", i).getBytes());  
  98.                                 p.add("data".getBytes(), String.format("col%01d", i % 10)  
  99.                                                 .getBytes(), String.format("data%03d", i).getBytes());  
  100.                                 p.add("url".getBytes(), String.format("col%01d", i % 10)  
  101.                                                 .getBytes(), String.format("url%03d", i).getBytes());  
  102.                                 table.put(p);  
  103.                         }  
  104.                         table.close();  
  105.    
  106.                 } catch (IOException e) {  
  107.                         e.printStackTrace();  
  108.                 }  
  109.         }  
  110.    
  111.         /** 
  112.          * @param args 
  113.          * @throws IOException 
  114.          */  
  115.         public static void main(String[] args) throws IOException {  
  116.                 TestHbaseRowFilter test = new TestHbaseRowFilter();  
  117.                 test.init();  
  118.                 test.testRowFilter();  
  119.         }  
  120.    
  121. }  
Java代码  收藏代码
  1. 小于等于row010的行  
  2. keyvalues={row001/data:col1/1364133382268/Put/vlen=7, row001/url:col1/1364133382268/Put/vlen=6}  
  3. keyvalues={row002/data:col2/1364133382268/Put/vlen=7, row002/url:col2/1364133382268/Put/vlen=6}  
  4. keyvalues={row003/data:col3/1364133382268/Put/vlen=7, row003/url:col3/1364133382268/Put/vlen=6}  
  5. keyvalues={row004/data:col4/1364133382268/Put/vlen=7, row004/url:col4/1364133382268/Put/vlen=6}  
  6. keyvalues={row005/data:col5/1364133382268/Put/vlen=7, row005/url:col5/1364133382268/Put/vlen=6}  
  7. keyvalues={row006/data:col6/1364133382268/Put/vlen=7, row006/url:col6/1364133382268/Put/vlen=6}  
  8. keyvalues={row007/data:col7/1364133382268/Put/vlen=7, row007/url:col7/1364133382268/Put/vlen=6}  
  9. keyvalues={row008/data:col8/1364133382268/Put/vlen=7, row008/url:col8/1364133382268/Put/vlen=6}  
  10. keyvalues={row009/data:col9/1364133382268/Put/vlen=7, row009/url:col9/1364133382268/Put/vlen=6}  
  11. keyvalues={row010/data:col0/1364133382268/Put/vlen=7, row010/url:col0/1364133382268/Put/vlen=6}  
  12. 正则获取结尾为5的行  
  13. keyvalues={row005/data:col5/1364133382268/Put/vlen=7, row005/url:col5/1364133382268/Put/vlen=6}  
  14. keyvalues={row015/data:col5/1364133382268/Put/vlen=7, row015/url:col5/1364133382268/Put/vlen=6}  
  15. keyvalues={row025/data:col5/1364133382268/Put/vlen=7, row025/url:col5/1364133382268/Put/vlen=6}  
  16. keyvalues={row035/data:col5/1364133382268/Put/vlen=7, row035/url:col5/1364133382268/Put/vlen=6}  
  17. keyvalues={row045/data:col5/1364133382268/Put/vlen=7, row045/url:col5/1364133382268/Put/vlen=6}  
  18. 包行有5的行  
  19. keyvalues={row005/data:col5/1364133382268/Put/vlen=7, row005/url:col5/1364133382268/Put/vlen=6}  
  20. keyvalues={row015/data:col5/1364133382268/Put/vlen=7, row015/url:col5/1364133382268/Put/vlen=6}  
  21. keyvalues={row025/data:col5/1364133382268/Put/vlen=7, row025/url:col5/1364133382268/Put/vlen=6}  
  22. keyvalues={row035/data:col5/1364133382268/Put/vlen=7, row035/url:col5/1364133382268/Put/vlen=6}  
  23. keyvalues={row045/data:col5/1364133382268/Put/vlen=7, row045/url:col5/1364133382268/Put/vlen=6}  
  24. keyvalues={row050/data:col0/1364133382268/Put/vlen=7, row050/url:col0/1364133382268/Put/vlen=6}  
  25. 开头是row01的  
  26. keyvalues={row010/data:col0/1364133382268/Put/vlen=7, row010/url:col0/1364133382268/Put/vlen=6}  
  27. keyvalues={row011/data:col1/1364133382268/Put/vlen=7, row011/url:col1/1364133382268/Put/vlen=6}  
  28. keyvalues={row012/data:col2/1364133382268/Put/vlen=7, row012/url:col2/1364133382268/Put/vlen=6}  
  29. keyvalues={row013/data:col3/1364133382268/Put/vlen=7, row013/url:col3/1364133382268/Put/vlen=6}  
  30. keyvalues={row014/data:col4/1364133382268/Put/vlen=7, row014/url:col4/1364133382268/Put/vlen=6}  
  31. keyvalues={row015/data:col5/1364133382268/Put/vlen=7, row015/url:col5/1364133382268/Put/vlen=6}  
  32. keyvalues={row016/data:col6/1364133382268/Put/vlen=7, row016/url:col6/1364133382268/Put/vlen=6}  
  33. keyvalues={row017/data:col7/1364133382268/Put/vlen=7, row017/url:col7/1364133382268/Put/vlen=6}  
  34. keyvalues={row018/data:col8/1364133382268/Put/vlen=7, row018/url:col8/1364133382268/Put/vlen=6}  
  35. keyvalues={row019/data:col9/1364133382268/Put/vlen=7, row019/url:col9/1364133382268/Put/vlen=6}  
阅读(931) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~