Chinaunix首页 | 论坛 | 博客
  • 博客访问: 176623
  • 博文数量: 77
  • 博客积分: 1400
  • 博客等级: 上尉
  • 技术积分: 990
  • 用 户 组: 普通用户
  • 注册时间: 2009-06-21 18:13
文章分类

全部博文(77)

文章存档

2011年(1)

2009年(76)

我的朋友

分类:

2009-07-19 22:55:11

Hadoop编程实现之WordCount

 

该实例为当初旁听北大分布式存储-云计算课程所写

“WordCount.java”

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
public class WordCount
{
public static void main(String[] args)
{
   //JobClient client = new JobClient();
   JobConf conf = new JobConf(WordCount.class);
   conf.setJobName("wordcount");
  
   // TODO: specify output types
   conf.setOutputKeyClass(Text.class);
   conf.setOutputValueClass(IntWritable.class);

   FileInputFormat.setInputPaths(conf, new Path("input")); //指定输入文件路径
   FileOutputFormat.setOutputPath(conf, new Path("output")); //指定输出文件路径
   // TODO: specify input and output DIRECTORIES (not files)
   //conf.setInputPath(new Path("src"));
   //conf.setOutputPath(new Path("out"));

   // TODO: specify a mapper
   conf.setMapperClass(Map.class);
   //conf.setMapperClass(org.apache.hadoop.mapred.lib.IdentityMapper.class);

   // TODO: specify a reducer
   conf.setCombinerClass(Reduce.class);
   conf.setReducerClass(Reduce.class);
   //conf.setReducerClass(org.apache.hadoop.mapred.lib.IdentityReducer.class);
  
   //client.setConf(conf);
   try
   {
    JobClient.runJob(conf);
   } catch (Exception e) {
    e.printStackTrace();
   }
}
}

“Map.java”

import java.io.IOException;
import java.util.*;

import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;

public class Map extends MapReduceBase implements Mapper
{
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();

public void map(WritableComparable key, Writable values,
OutputCollector output, Reporter reporter) throws IOException
{
   String line = values.toString();
   StringTokenizer itr = new StringTokenizer(line.toLowerCase());
   while(itr.hasMoreTokens())
   {
    word.set(itr.nextToken());
    output.collect(word, one);
   }
}
}

Reduce.java

import java.io.IOException;
import java.util.Iterator;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;

public class Reduce extends MapReduceBase implements Reducer {

public void reduce(WritableComparable _key, Iterator values,
    OutputCollector output, Reporter reporter) throws IOException
{
   //replace KeyType with the real type of your key
   Text key = (Text) _key;
  
   int sum = 0;
   while(values.hasNext())
   {
    //replace ValueType with the real type of your value
    IntWritable value = (IntWritable) values.next();
   
    //process value
    sum += value.get();
   }
   output.collect(key, new IntWritable(sum));
}
}

阅读(805) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~