Skip to content
Snippets Groups Projects
Commit 06303a62 authored by Matei Zaharia's avatar Matei Zaharia
Browse files

Optimize JavaPageRank to use reduceByKey instead of groupByKey

parent 01f20a94
No related branches found
No related tags found
No related merge requests found
......@@ -23,6 +23,7 @@ import spark.api.java.JavaRDD;
import spark.api.java.JavaSparkContext;
import spark.api.java.function.FlatMapFunction;
import spark.api.java.function.Function;
import spark.api.java.function.Function2;
import spark.api.java.function.PairFlatMapFunction;
import spark.api.java.function.PairFunction;
......@@ -39,12 +40,11 @@ import java.util.ArrayList;
* where URL and their neighbors are separated by space(s).
*/
public class JavaPageRank {
private static double sum(List<Double> numbers) {
double out = 0.0;
for (double number : numbers) {
out += number;
private static class Sum extends Function2<Double, Double, Double> {
@Override
public Double call(Double a, Double b) {
return a + b;
}
return out;
}
public static void main(String[] args) throws Exception {
......@@ -91,16 +91,15 @@ public class JavaPageRank {
for (String n : s._1) {
results.add(new Tuple2<String, Double>(n, s._2 / s._1.size()));
}
return results;
}
});
// Re-calculates URL ranks based on neighbor contributions.
ranks = contribs.groupByKey().mapValues(new Function<List<Double>, Double>() {
ranks = contribs.reduceByKey(new Sum()).mapValues(new Function<Double, Double>() {
@Override
public Double call(List<Double> cs) throws Exception {
return 0.15 + sum(cs) * 0.85;
public Double call(Double sum) throws Exception {
return 0.15 + sum * 0.85;
}
});
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment