该实例为当初旁听北大分布式存储-云计算课程所写
“WordCount.java”
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
public class WordCount
{
public static void main(String[] args)
{
//JobClient client = new JobClient();
JobConf conf = new JobConf(WordCount.class);
conf.setJobName("wordcount");
// TODO: specify output types
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class);
FileInputFormat.setInputPaths(conf, new Path("input")); //指定输入文件路径
FileOutputFormat.setOutputPath(conf, new Path("output")); //指定输出文件路径
// TODO: specify input and output DIRECTORIES (not files)
//conf.setInputPath(new Path("src"));
//conf.setOutputPath(new Path("out"));
// TODO: specify a mapper
conf.setMapperClass(Map.class);
//conf.setMapperClass(org.apache.hadoop.mapred.lib.IdentityMapper.class);
// TODO: specify a reducer
conf.setCombinerClass(Reduce.class);
conf.setReducerClass(Reduce.class);
//conf.setReducerClass(org.apache.hadoop.mapred.lib.IdentityReducer.class);
//client.setConf(conf);
try
{
JobClient.runJob(conf);
} catch (Exception e) {
e.printStackTrace();
}
}
}
“Map.java”
import java.io.IOException;
import java.util.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
public class Map extends MapReduceBase implements Mapper
{
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(WritableComparable key, Writable values,
OutputCollector output, Reporter reporter) throws IOException
{
String line = values.toString();
StringTokenizer itr = new StringTokenizer(line.toLowerCase());
while(itr.hasMoreTokens())
{
word.set(itr.nextToken());
output.collect(word, one);
}
}
}
Reduce.java
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
public class Reduce extends MapReduceBase implements Reducer {
public void reduce(WritableComparable _key, Iterator values,
OutputCollector output, Reporter reporter) throws IOException
{
//replace KeyType with the real type of your key
Text key = (Text) _key;
int sum = 0;
while(values.hasNext())
{
//replace ValueType with the real type of your value
IntWritable value = (IntWritable) values.next();
//process value
sum += value.get();
}
output.collect(key, new IntWritable(sum));
}
}