Database Reference
In-Depth Information
A Java Example
Spark is implemented in Scala, which as a JVM-based language has excellent integration
with Java. It is straightforward — albeit verbose — to express the same example in Java
(see
Example 19-2
)
.
[
129
]
Example 19-2. Java application to find the maximum temperature, using Spark
public class
MaxTemperatureSpark
{
public static
void
main
(
String
[]
args
)
throws
Exception
{
if
(
args
.
length
!=
2
) {
System
.
err
.
println
(
"Usage: MaxTemperatureSpark <input path>
<output path>"
);
System
.
exit
(-
1
);
}
SparkConf conf
=
new
SparkConf
();
JavaSparkContext sc
=
new
JavaSparkContext
(
"local"
,
"MaxTemperatureSpark"
,
conf
);
JavaRDD
<
String
>
lines
=
sc
.
textFile
(
args
[
0
]);
JavaRDD
<
String
[]>
records
=
lines
.
map
(
new
Function
<
String
,
String
[]>() {
@Override
public
String
[]
call
(
String s
) {
return
s
.
split
(
"\t"
);
}
});
JavaRDD
<
String
[]>
filtered
=
records
.
filter
(
new
Function
<
String
[],
Boolean
>() {
@Override
public
Boolean call
(
String
[]
rec
) {
return
rec
[
1
] !=
"9999"
&&
rec
[
2
].
matches
(
"[01459]"
);
}
});
JavaPairRDD
<
Integer
,
Integer
>
tuples
=
filtered
.
mapToPair
(
new
PairFunction
<
String
[],
Integer
,
Integer
>() {
@Override
public
Tuple2
<
Integer
,
Integer
>
call
(
String
[]
rec
) {
return new
Tuple2
<
Integer
,
Integer
>(
Integer
.
parseInt
(
rec
[
0
]),
Integer
.
parseInt
(
rec
[
1
]));
}
}
);
JavaPairRDD
<
Integer
,
Integer
>
maxTemps
=
tuples
.
reduceByKey
(
new
Function2
<
Integer
,
Integer
,
Integer
>() {
@Override
public
Integer call
(
Integer i1
,
Integer i2
) {
return
Math
.
max
(
i1
,
i2
);
}