Copyright notice: This is the original article of the blogger. It can't be reproduced without the permission of the blogger. https://blog.csdn.net/qq1010885678/article/details/43735491
Or use the previous word count example
Custom Mapper class
import java.io.IOException; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reporter; //The custom Mapper class must inherit MapReduceBase and implement Mapper interface public class JMapper extends MapReduceBase implements Mapper<LongWritable, Text, Text, LongWritable> { @Override public void map(LongWritable key, Text value, OutputCollector<Text, LongWritable> collector, Reporter reporter) throws IOException { String[] ss = value.toString().split("\t"); for (String s : ss) { //Use collector.collect instead of context.write collector.collect(new Text(s), new LongWritable(1)); } } }
Custom Reducer class
import java.io.IOException; import java.util.Iterator; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; //The customized Reducer class must inherit MapReduceBase and implement the Reducer interface public class JReducer extends MapReduceBase implements Reducer<Text, LongWritable, Text, LongWritable> { @Override public void reduce(Text key, Iterator<LongWritable> value, OutputCollector<Text, LongWritable> collector, Reporter reporter) throws IOException { long sum = 0; //Because value is not available, foreach loop can be used, so use while instead while (value.hasNext()) { sum += value.next().get(); } collector.collect(key, new LongWritable(sum)); } }
JSubmit class to run the submitted code
import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hadoop.mapred.TextOutputFormat; public class JSubmit { public static void main(String[] args) throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException { Path outPath = new Path("hdfs://localhost:9000/out"); Path inPath = new Path("/home/hadoop/word"); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(new URI("hdfs://localhost:9000"), conf); if (fs.exists(outPath)) { fs.delete(outPath, true); } // Use JobConf instead of Job JobConf job = new JobConf(conf, JSubmit.class); FileInputFormat.setInputPaths(job, inPath); job.setInputFormat(TextInputFormat.class); job.setMapperClass(JMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setReducerClass(JReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileOutputFormat.setOutputPath(job, outPath); job.setOutputFormat(TextOutputFormat.class); // Use JobClient.runJob instead of job.waitForCompletion JobClient.runJob(job); } }
Can see
In fact, the api of the old version is not very different, only a few classes have been replaced
Note that the old version of api is similar to the class name of the new version of api.
But the packages are different. The packages of the old version are all mapred, while the new version are all mapreduce