Database Reference
In-Depth Information
A Java Example
Spark is implemented in Scala, which as a JVM-based language has excellent integration
with Java. It is straightforward — albeit verbose — to express the same example in Java
(see Example 19-2 ) . [ 129 ]
Example 19-2. Java application to find the maximum temperature, using Spark
public class MaxTemperatureSpark {
public static void main ( String [] args ) throws Exception {
if ( args . length != 2 ) {
System . err . println ( "Usage: MaxTemperatureSpark <input path>
<output path>" );
System . exit (- 1 );
}
SparkConf conf = new SparkConf ();
JavaSparkContext sc = new JavaSparkContext ( "local" ,
"MaxTemperatureSpark" , conf );
JavaRDD < String > lines = sc . textFile ( args [ 0 ]);
JavaRDD < String []> records = lines . map ( new Function < String ,
String []>() {
@Override public String [] call ( String s ) {
return s . split ( "\t" );
}
});
JavaRDD < String []> filtered = records . filter ( new Function < String [],
Boolean >() {
@Override public Boolean call ( String [] rec ) {
return rec [ 1 ] != "9999" && rec [ 2 ]. matches ( "[01459]" );
}
});
JavaPairRDD < Integer , Integer > tuples = filtered . mapToPair (
new PairFunction < String [], Integer , Integer >() {
@Override public Tuple2 < Integer , Integer > call ( String [] rec ) {
return new Tuple2 < Integer , Integer >(
Integer . parseInt ( rec [ 0 ]), Integer . parseInt ( rec [ 1 ]));
}
}
);
JavaPairRDD < Integer , Integer > maxTemps = tuples . reduceByKey (
new Function2 < Integer , Integer , Integer >() {
@Override public Integer call ( Integer i1 , Integer i2 ) {
return Math . max ( i1 , i2 );
}
Search WWH ::




Custom Search