Chinaunix首页 | 论坛 | 博客
  • 博客访问: 916887
  • 博文数量: 119
  • 博客积分: 2493
  • 博客等级: 大尉
  • 技术积分: 2363
  • 用 户 组: 普通用户
  • 注册时间: 2012-06-03 14:00
文章分类

全部博文(119)

文章存档

2013年(19)

2012年(100)

分类: Windows平台

2013-05-26 21:00:38

1.建立测试程序 WordCount.java

  1. import java.io.IOException;
  2. import java.util.ArrayList;
  3. import java.util.Iterator;
  4. import java.util.List;
  5. import java.util.StringTokenizer;

  6. import org.apache.hadoop.conf.Configuration;
  7. import org.apache.hadoop.conf.Configured;
  8. import org.apache.hadoop.fs.Path;
  9. import org.apache.hadoop.io.IntWritable;
  10. import org.apache.hadoop.io.LongWritable;
  11. import org.apache.hadoop.io.Text;
  12. import org.apache.hadoop.mapred.FileInputFormat;
  13. import org.apache.hadoop.mapred.FileOutputFormat;
  14. import org.apache.hadoop.mapred.JobClient;
  15. import org.apache.hadoop.mapred.JobConf;
  16. import org.apache.hadoop.mapred.MapReduceBase;
  17. import org.apache.hadoop.mapred.Mapper;
  18. import org.apache.hadoop.mapred.OutputCollector;
  19. import org.apache.hadoop.mapred.Reducer;
  20. import org.apache.hadoop.mapred.Reporter;
  21. import org.apache.hadoop.util.Tool;
  22. import org.apache.hadoop.util.ToolRunner;

  23. public class WordCount extends Configured implements Tool {

  24.     public static class MapClass extends MapReduceBase implements
  25.             Mapper<LongWritable, Text, Text, IntWritable> {

  26.         private final static IntWritable one = new IntWritable(1);
  27.         private Text word = new Text();

  28.         public void map(LongWritable key, Text value,
  29.                 OutputCollector<Text, IntWritable> output, Reporter reporter)
  30.                 throws IOException {
  31.             String line = value.toString();
  32.             StringTokenizer itr = new StringTokenizer(line);
  33.             while (itr.hasMoreTokens()) {
  34.                 word.set(itr.nextToken());
  35.                 output.collect(word, one);
  36.             }
  37.         }
  38.     }

  39.     /**
  40.      * A reducer class that just emits the sum of the input values.
  41.      */
  42.     public static class Reduce extends MapReduceBase implements
  43.             Reducer<Text, IntWritable, Text, IntWritable> {

  44.         public void reduce(Text key, Iterator<IntWritable> values,
  45.                 OutputCollector<Text, IntWritable> output, Reporter reporter)
  46.                 throws IOException {
  47.             int sum = 0;
  48.             while (values.hasNext()) {
  49.                 sum += values.next().get();
  50.             }
  51.             output.collect(key, new IntWritable(sum));
  52.         }
  53.     }

  54.     static int printUsage() {
  55.         System.out.println("wordcount [-m ] [-r ] ");
  56.         ToolRunner.printGenericCommandUsage(System.out);
  57.         return -1;
  58.     }

  59.     /**
  60.      * The main driver for word count map/reduce program. Invoke this method to
  61.      * submit the map/reduce job.
  62.      *
  63.      * @throws IOException
  64.      * When there is communication problems with the job tracker.
  65.      */
  66.     public int run(String[] args) throws Exception {
  67.         JobConf conf = new JobConf(getConf(), WordCount.class);
  68.         conf.setJobName("wordcount");

  69.         // the keys are words (strings)
  70.         conf.setOutputKeyClass(Text.class);
  71.         // the values are counts (ints)
  72.         conf.setOutputValueClass(IntWritable.class);

  73.         conf.setMapperClass(MapClass.class);
  74.         conf.setCombinerClass(Reduce.class);
  75.         conf.setReducerClass(Reduce.class);

  76.         List<String> other_args = new ArrayList<String>();
  77.         for (int i = 0; i < args.length; ++i) {
  78.             try {
  79.                 if ("-m".equals(args[i])) {
  80.                     conf.setNumMapTasks(Integer.parseInt(args[++i]));
  81.                 } else if ("-r".equals(args[i])) {
  82.                     conf.setNumReduceTasks(Integer.parseInt(args[++i]));
  83.                 } else {
  84.                     other_args.add(args[i]);
  85.                 }
  86.             } catch (NumberFormatException except) {
  87.                 System.out.println("ERROR: Integer expected instead of "
  88.                         + args[i]);
  89.                 return printUsage();
  90.             } catch (ArrayIndexOutOfBoundsException except) {
  91.                 System.out.println("ERROR: Required parameter missing from "
  92.                         + args[i - 1]);
  93.                 return printUsage();
  94.             }
  95.         }

  96.         // Make sure there are exactly 2 parameters left.
  97.         if (other_args.size() != 2) {
  98.             System.out.println("ERROR: Wrong number of parameters: "
  99.                     + other_args.size() + " instead of 2.");
  100.             return printUsage();
  101.         }
  102.         FileInputFormat.setInputPaths(conf, other_args.get(0));
  103.         FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));

  104.         JobClient.runJob(conf);
  105.         return 0;
  106.     }

  107.     public static void main(String[] args) throws Exception {
  108.         int res = ToolRunner.run(new Configuration(), new WordCount(), args);
  109.         System.exit(res);
  110.     }

  111. }

编译上述程序,利用下述命令将编译所得的class文件打包成jar包

 jar cvf  WordCount.jar *.class 

接下来的操作和运行源码包中例子一样。就不多说了。



转载地址:http://blog.csdn.net/aspirinxp/article/details/6590468

 

阅读(5101) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~