Database Reference
In-Depth Information
Java Word-Count Example 2
Using the same Map and Reduce class es as the first example, you will find this second example adds pattern filtering
to the code. You can find the file, wc-ex2.java, in the Apache Software Foundation Hadoop 1.2.1 Map Reduce tutorial
at the Hadoop website ( hadoop.apache.org/docs/r1.2.1/ ). Here's the complete listing:
01 package org.myorg;
02
03 import java.io.*;
04 import java.util.*;
05
06 import org.apache.hadoop.fs.Path;
07 import org.apache.hadoop.filecache.DistributedCache;
08 import org.apache.hadoop.conf.*;
09 import org.apache.hadoop.io.*;
10 import org.apache.hadoop.mapred.*;
11 import org.apache.hadoop.util.*;
12
13 public class WordCount extends Configured implements Tool
14 {
15
16 /*------------------------------------------------------------------*/
17 public static class Map extends MapReduceBase
18 implements Mapper < LongWritable, Text, Text, IntWritable >
19 {
20
21 static enum Counters
22 {
23 INPUT_WORDS
24 }
25
26 private final static IntWritable one = new IntWritable(1);
27 private Text word = new Text();
28
29 private boolean caseSensitive = true;
30 private Set < String > patternsToSkip = new HashSet < String > ();
31
32 private long numRecords = 0;
33 private String inputFile;
34
35 /*---------------------------------------------------------------*/
36 public void configure(JobConf job)
37 {
38 caseSensitive = job.getBoolean("wordcount.case.sensitive", true);
39 inputFile = job.get("map.input.file");
40
41 if (job.getBoolean("wordcount.skip.patterns", false))
42 {
43 Path[] patternsFiles = new Path[0];
44 try
45 {
46 patternsFiles = DistributedCache.getLocalCacheFiles(job);
47 }
 
Search WWH ::




Custom Search