Submit hadoop jobs using the old java api

Copyright notice: This is the original article of the blogger. It can't be reproduced without the permission of the blogger. https://blog.csdn.net/qq1010885678/article/details/43735491

Or use the previous word count example


Custom Mapper class

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;

//The custom Mapper class must inherit MapReduceBase and implement Mapper interface
public class JMapper extends MapReduceBase implements
		Mapper<LongWritable, Text, Text, LongWritable> {

	@Override
	public void map(LongWritable key, Text value,
			OutputCollector<Text, LongWritable> collector, Reporter reporter)
			throws IOException {
		String[] ss = value.toString().split("\t");
		for (String s : ss) {
			//Use collector.collect instead of context.write
			collector.collect(new Text(s), new LongWritable(1));
		}
	}

}

Custom Reducer class

import java.io.IOException;
import java.util.Iterator;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;

//The customized Reducer class must inherit MapReduceBase and implement the Reducer interface
public class JReducer extends MapReduceBase implements
		Reducer<Text, LongWritable, Text, LongWritable> {

	@Override
	public void reduce(Text key, Iterator<LongWritable> value,
			OutputCollector<Text, LongWritable> collector, Reporter reporter)
			throws IOException {
		long sum = 0;
		//Because value is not available, foreach loop can be used, so use while instead
		while (value.hasNext()) {
			sum += value.next().get();
		}
		collector.collect(key, new LongWritable(sum));
	}

}

JSubmit class to run the submitted code

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;

public class JSubmit {
	public static void main(String[] args) throws IOException,
			URISyntaxException, InterruptedException, ClassNotFoundException {
		Path outPath = new Path("hdfs://localhost:9000/out");
		Path inPath = new Path("/home/hadoop/word");
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(new URI("hdfs://localhost:9000"), conf);
		if (fs.exists(outPath)) {
			fs.delete(outPath, true);
		}
		// Use JobConf instead of Job
		JobConf job = new JobConf(conf, JSubmit.class);
		FileInputFormat.setInputPaths(job, inPath);
		job.setInputFormat(TextInputFormat.class);
		job.setMapperClass(JMapper.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(LongWritable.class);
		job.setReducerClass(JReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(LongWritable.class);
		FileOutputFormat.setOutputPath(job, outPath);
		job.setOutputFormat(TextOutputFormat.class);
		// Use JobClient.runJob instead of job.waitForCompletion
		JobClient.runJob(job);
	}
}

Can see

In fact, the api of the old version is not very different, only a few classes have been replaced

Note that the old version of api is similar to the class name of the new version of api.

But the packages are different. The packages of the old version are all mapred, while the new version are all mapreduce




Keywords: Java Hadoop Apache

Added by Loryman on Sun, 08 Dec 2019 09:36:31 +0200