Database Reference
In-Depth Information
MapReduce
HBase classes and utilities in the
org.apache.hadoop.hbase.mapreduce
pack-
age facilitate using HBase as a source and/or sink in MapReduce jobs. The
TableIn-
putFormat
class makes splits on region boundaries so maps are handed a single region
to work on. The
TableOutputFormat
will write the result of the reduce into HBase.
RowCounter
in the HBase
mapreduce
package) runs a map task to count rows using
TableInputFormat
.
Example 20-2. A MapReduce application to count the number of rows in an HBase table
public class
SimpleRowCounter
extends
Configured
implements
Tool
{
static class
RowCounterMapper
extends
TableMapper
<
ImmutableBytesWritable
,
Result
> {
public static enum
Counters
{
ROWS
}
@Override
public
void
map
(
ImmutableBytesWritable row
,
Result value
,
Context
context
) {
context
.
getCounter
(
Counters
.
ROWS
).
increment
(
1
);
}
}
@Override
public
int
run
(
String
[]
args
)
throws
Exception
{
if
(
args
.
length
!=
1
) {
System
.
err
.
println
(
"Usage: SimpleRowCounter <tablename>"
);
return
-
1
;
}
String tableName
=
args
[
0
];
Scan scan
=
new
Scan
();
scan
.
setFilter
(
new
FirstKeyOnlyFilter
());
Job job
=
new
Job
(
getConf
(),
getClass
().
getSimpleName
());
job
.
setJarByClass
(
getClass
());
TableMapReduceUtil
.
initTableMapperJob
(
tableName
,
scan
,
RowCounterMapper
.
class
,
ImmutableBytesWritable
.
class
,
Result
.
class
,
job
);
job
.
setNumReduceTasks
(
0
);
job
.
setOutputFormatClass
(
NullOutputFormat
.
class
);
return
job
.
waitForCompletion
(
true
) ?
0
:
1
;
}