Database Reference
In-Depth Information
48 catch (IOException ioe)
49 {
50 System.err.println("Caught exception while getting cached
51 files: " + StringUtils.stringifyException(ioe));
52 }
53 for (Path patternsFile: patternsFiles)
54 {
55 parseSkipFile(patternsFile);
56 }
57 }
58 }
59 /*----------------------------------------------------------------*/
60 private void parseSkipFile(Path patternsFile)
61 {
62 try
63 {
64 BufferedReader fis = new BufferedReader(new
65 FileReader(patternsFile.toString()));
66 String pattern = null;
67 while ((pattern = fis.readLine()) != null)
68 {
69 patternsToSkip.add(pattern);
70 }
71 }
72 catch (IOException ioe)
73 {
74 System.err.println("Caught exception while parsing cached file '"
75 + patternsFile + "' : " + StringUtils.stringifyException(ioe));
76 }
77 }
78 /*----------------------------------------------------------------*/
79 public void map(LongWritable key, Text value, OutputCollector < Text,
80 IntWritable > output, Reporter reporter) throws IOException
81 {
82 String line = (caseSensitive) ? value.toString() :
83 value.toString().toLowerCase();
84
85 for (String pattern: patternsToSkip)
86 {
87 line = line.replaceAll(pattern, "");
88 }
89
90 StringTokenizer tokenizer = new StringTokenizer(line);
91 while (tokenizer.hasMoreTokens())
92 {
93 word.set(tokenizer.nextToken());
94 output.collect(word, one);
95 reporter.incrCounter(Counters.INPUT_WORDS, 1);
96 }
97
Search WWH ::




Custom Search