1.建立测试程序 WordCount.java
-
import java.io.IOException;
-
import java.util.ArrayList;
-
import java.util.Iterator;
-
import java.util.List;
-
import java.util.StringTokenizer;
-
-
import org.apache.hadoop.conf.Configuration;
-
import org.apache.hadoop.conf.Configured;
-
import org.apache.hadoop.fs.Path;
-
import org.apache.hadoop.io.IntWritable;
-
import org.apache.hadoop.io.LongWritable;
-
import org.apache.hadoop.io.Text;
-
import org.apache.hadoop.mapred.FileInputFormat;
-
import org.apache.hadoop.mapred.FileOutputFormat;
-
import org.apache.hadoop.mapred.JobClient;
-
import org.apache.hadoop.mapred.JobConf;
-
import org.apache.hadoop.mapred.MapReduceBase;
-
import org.apache.hadoop.mapred.Mapper;
-
import org.apache.hadoop.mapred.OutputCollector;
-
import org.apache.hadoop.mapred.Reducer;
-
import org.apache.hadoop.mapred.Reporter;
-
import org.apache.hadoop.util.Tool;
-
import org.apache.hadoop.util.ToolRunner;
-
-
public class WordCount extends Configured implements Tool {
-
-
public static class MapClass extends MapReduceBase implements
-
Mapper<LongWritable, Text, Text, IntWritable> {
-
-
private final static IntWritable one = new IntWritable(1);
-
private Text word = new Text();
-
-
public void map(LongWritable key, Text value,
-
OutputCollector<Text, IntWritable> output, Reporter reporter)
-
throws IOException {
-
String line = value.toString();
-
StringTokenizer itr = new StringTokenizer(line);
-
while (itr.hasMoreTokens()) {
-
word.set(itr.nextToken());
-
output.collect(word, one);
-
}
-
}
-
}
-
-
/**
-
* A reducer class that just emits the sum of the input values.
-
*/
-
public static class Reduce extends MapReduceBase implements
-
Reducer<Text, IntWritable, Text, IntWritable> {
-
-
public void reduce(Text key, Iterator<IntWritable> values,
-
OutputCollector<Text, IntWritable> output, Reporter reporter)
-
throws IOException {
-
int sum = 0;
-
while (values.hasNext()) {
-
sum += values.next().get();
-
}
-
output.collect(key, new IntWritable(sum));
-
}
-
}
-
-
static int printUsage() {
-
System.out.println("wordcount [-m ] [-r ] );
-
ToolRunner.printGenericCommandUsage(System.out);
-
return -1;
-
}
-
-
/**
-
* The main driver for word count map/reduce program. Invoke this method to
-
* submit the map/reduce job.
-
*
-
* @throws IOException
-
* When there is communication problems with the job tracker.
-
*/
-
public int run(String[] args) throws Exception {
-
JobConf conf = new JobConf(getConf(), WordCount.class);
-
conf.setJobName("wordcount");
-
-
// the keys are words (strings)
-
conf.setOutputKeyClass(Text.class);
-
// the values are counts (ints)
-
conf.setOutputValueClass(IntWritable.class);
-
-
conf.setMapperClass(MapClass.class);
-
conf.setCombinerClass(Reduce.class);
-
conf.setReducerClass(Reduce.class);
-
-
List<String> other_args = new ArrayList<String>();
-
for (int i = 0; i < args.length; ++i) {
-
try {
-
if ("-m".equals(args[i])) {
-
conf.setNumMapTasks(Integer.parseInt(args[++i]));
-
} else if ("-r".equals(args[i])) {
-
conf.setNumReduceTasks(Integer.parseInt(args[++i]));
-
} else {
-
other_args.add(args[i]);
-
}
-
} catch (NumberFormatException except) {
-
System.out.println("ERROR: Integer expected instead of "
-
+ args[i]);
-
return printUsage();
-
} catch (ArrayIndexOutOfBoundsException except) {
-
System.out.println("ERROR: Required parameter missing from "
-
+ args[i - 1]);
-
return printUsage();
-
}
-
}
-
-
// Make sure there are exactly 2 parameters left.
-
if (other_args.size() != 2) {
-
System.out.println("ERROR: Wrong number of parameters: "
-
+ other_args.size() + " instead of 2.");
-
return printUsage();
-
}
-
FileInputFormat.setInputPaths(conf, other_args.get(0));
-
FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));
-
-
JobClient.runJob(conf);
-
return 0;
-
}
-
-
public static void main(String[] args) throws Exception {
-
int res = ToolRunner.run(new Configuration(), new WordCount(), args);
-
System.exit(res);
-
}
-
-
}
编译上述程序,利用下述命令将编译所得的class文件打包成jar包
$ bin/hadoop fs -cat /tmp/output/part-00000
jar cvf WordCount.jar *.class
接下来的操作和运行源码包中例子一样。就不多说了。
转载地址:http://blog.csdn.net/aspirinxp/article/details/6590468
阅读(5101) | 评论(0) | 转发(0) |