Chinaunix首页 | 论坛 | 博客
  • 博客访问: 81653
  • 博文数量: 29
  • 博客积分: 0
  • 博客等级: 民兵
  • 技术积分: 225
  • 用 户 组: 普通用户
  • 注册时间: 2014-03-06 15:31
文章分类

全部博文(29)

文章存档

2015年(18)

2014年(11)

我的朋友

分类: HADOOP

2014-09-03 11:06:30

转自http://a123159521.iteye.com/blog/1226924

点击(此处)折叠或打开

  1. package org.frame.base.hbase.hadoop;

  2. import java.io.IOException;
  3. import java.util.StringTokenizer;

  4. import org.apache.hadoop.conf.Configuration;
  5. import org.apache.hadoop.fs.Path;
  6. import org.apache.hadoop.io.IntWritable;
  7. import org.apache.hadoop.io.Text;
  8. import org.apache.hadoop.mapreduce.Job;
  9. import org.apache.hadoop.mapreduce.Mapper;
  10. import org.apache.hadoop.mapreduce.Reducer;
  11. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
  12. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
  13. import org.apache.hadoop.util.GenericOptionsParser;

  14. public class WordCount {

  15.   /**
  16.    * TokenizerMapper 继续自 Mapper
  17.    *
  18.    * [一个文件就一个map,两个文件就会有两个map]
  19.    * map[这里读入输入文件内容 以" \t\n\r\f" 进行分割,然后设置 word ==> one 的key/value对]
  20.    *
  21.    * @param Object Input key Type:
  22.    * @param Text Input value Type:
  23.    * @param Text Output key Type:
  24.    * @param IntWritable Output value Type:
  25.    *
  26.    * Writable的主要特点是它使得Hadoop框架知道对一个Writable类型的对象怎样进行serialize以及deserialize.
  27.    * WritableComparable在Writable的基础上增加了compareT接口,使得Hadoop框架知道怎样对WritableComparable类型的对象进行排序。
  28.    *
  29.    * @author yangchunlong.tw
  30.    *
  31.    */
  32.   public static class TokenizerMapper
  33.        extends Mapper<Object, Text, Text, IntWritable>{

  34.     private final static IntWritable one = new IntWritable(1);
  35.     private Text word = new Text();
  36.     public void map(Object key, Text value, Context context
  37.                     ) throws IOException, InterruptedException {
  38.       StringTokenizer itr = new StringTokenizer(value.toString());
  39.       while (itr.hasMoreTokens()) {
  40.         word.set(itr.nextToken());
  41.         context.write(word, one);
  42.       }
  43.     }
  44.   }

  45.   /**
  46.    * IntSumReducer 继承自 Reducer
  47.    *
  48.    * [不管几个Map,都只有一个Reduce,这是一个汇总]
  49.    * reduce[循环所有的map值,把word ==> one 的key/value对进行汇总]
  50.    *
  51.    * 这里的key为Mapper设置的word[每一个key/value都会有一次reduce]
  52.    *
  53.    * 当循环结束后,最后的确context就是最后的结果.
  54.    *
  55.    * @author yangchunlong.tw
  56.    *
  57.    */
  58.   public static class IntSumReducer
  59.        extends Reducer<Text,IntWritable,Text,IntWritable> {
  60.     private IntWritable result = new IntWritable();

  61.     public void reduce(Text key, Iterable<IntWritable> values,
  62.                        Context context
  63.                        ) throws IOException, InterruptedException {
  64.       int sum = 0;
  65.       for (IntWritable val : values) {
  66.         sum += val.get();
  67.       }
  68.       result.set(sum);
  69.       context.write(key, result);
  70.     }
  71.   }

  72.   public static void main(String[] args) throws Exception {
  73.     Configuration conf = new Configuration();
  74.     String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
  75.     /**
  76.      * 这里必须有输入/输出
  77.      */
  78.     if (otherArgs.length != 2) {
  79.       System.err.println("Usage: wordcount ");
  80.       System.exit(2);
  81.     }
  82.     Job job = new Job(conf, "word count");
  83.     job.setJarByClass(WordCount.class);//主类
  84.     job.setMapperClass(TokenizerMapper.class);//mapper
  85.     job.setCombinerClass(IntSumReducer.class);//作业合成类
  86.     job.setReducerClass(IntSumReducer.class);//reducer
  87.     job.setOutputKeyClass(Text.class);//设置作业输出数据的关键类
  88.     job.setOutputValueClass(IntWritable.class);//设置作业输出值类
  89.     FileInputFormat.addInputPath(job, new Path(otherArgs[0]));//文件输入
  90.     FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));//文件输出
  91.     System.exit(job.waitForCompletion(true) ? 0 : 1);//等待完成退出.
  92.   }
  93. }

阅读(1364) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~