diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala index 2897272a8b8357b50f851b222fb34456cad01997..1c95bc4bfcaaf99a3f19ef3a71f4f5a2e913ff1f 100644 --- a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala @@ -139,9 +139,12 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)]) * math.ceil(numItems * samplingRate) over all key values. */ def sampleByKey(withReplacement: Boolean, - fractions: java.util.Map[K, Double], + fractions: java.util.Map[K, jl.Double], seed: Long): JavaPairRDD[K, V] = - new JavaPairRDD[K, V](rdd.sampleByKey(withReplacement, fractions.asScala, seed)) + new JavaPairRDD[K, V](rdd.sampleByKey( + withReplacement, + fractions.asScala.mapValues(_.toDouble).toMap, // map to Scala Double; toMap to serialize + seed)) /** * Return a subset of this RDD sampled by key (via stratified sampling). @@ -154,7 +157,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)]) * Use Utils.random.nextLong as the default seed for the random number generator. */ def sampleByKey(withReplacement: Boolean, - fractions: java.util.Map[K, Double]): JavaPairRDD[K, V] = + fractions: java.util.Map[K, jl.Double]): JavaPairRDD[K, V] = sampleByKey(withReplacement, fractions, Utils.random.nextLong) /** @@ -168,9 +171,12 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)]) * two additional passes. */ def sampleByKeyExact(withReplacement: Boolean, - fractions: java.util.Map[K, Double], + fractions: java.util.Map[K, jl.Double], seed: Long): JavaPairRDD[K, V] = - new JavaPairRDD[K, V](rdd.sampleByKeyExact(withReplacement, fractions.asScala, seed)) + new JavaPairRDD[K, V](rdd.sampleByKeyExact( + withReplacement, + fractions.asScala.mapValues(_.toDouble).toMap, // map to Scala Double; toMap to serialize + seed)) /** * Return a subset of this RDD sampled by key (via stratified sampling) containing exactly @@ -186,7 +192,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)]) */ def sampleByKeyExact( withReplacement: Boolean, - fractions: java.util.Map[K, Double]): JavaPairRDD[K, V] = + fractions: java.util.Map[K, jl.Double]): JavaPairRDD[K, V] = sampleByKeyExact(withReplacement, fractions, Utils.random.nextLong) /** diff --git a/core/src/test/java/org/apache/spark/JavaAPISuite.java b/core/src/test/java/org/apache/spark/JavaAPISuite.java index 0f65554516153c2e33b48069ae7354a9436c8102..04f92d60167d8eba3247193a9e5f0208808eff26 100644 --- a/core/src/test/java/org/apache/spark/JavaAPISuite.java +++ b/core/src/test/java/org/apache/spark/JavaAPISuite.java @@ -44,7 +44,6 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; import com.google.common.collect.Iterators; import com.google.common.collect.Lists; -import com.google.common.collect.Maps; import com.google.common.base.Throwables; import com.google.common.io.Files; import org.apache.hadoop.io.IntWritable; @@ -1644,7 +1643,7 @@ public class JavaAPISuite implements Serializable { return new Tuple2<>(i % 2, 1); } }); - Map<Integer, Object> fractions = Maps.newHashMap(); + Map<Integer, Double> fractions = new HashMap<>(); fractions.put(0, 0.5); fractions.put(1, 1.0); JavaPairRDD<Integer, Integer> wr = rdd2.sampleByKey(true, fractions, 1L); @@ -1670,7 +1669,7 @@ public class JavaAPISuite implements Serializable { return new Tuple2<>(i % 2, 1); } }); - Map<Integer, Object> fractions = Maps.newHashMap(); + Map<Integer, Double> fractions = new HashMap<>(); fractions.put(0, 0.5); fractions.put(1, 1.0); JavaPairRDD<Integer, Integer> wrExact = rdd2.sampleByKeyExact(true, fractions, 1L); diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaStratifiedSamplingExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaStratifiedSamplingExample.java index 72bbb2a8fa46458998e719b6a8cacb81257a0fea..286b95cfbc33dd73d0cc15513c2e9512707c51d0 100644 --- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaStratifiedSamplingExample.java +++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaStratifiedSamplingExample.java @@ -37,22 +37,19 @@ public class JavaStratifiedSamplingExample { @SuppressWarnings("unchecked") // $example on$ - List<Tuple2<Integer, Character>> list = new ArrayList<>( - Arrays.<Tuple2<Integer, Character>>asList( - new Tuple2(1, 'a'), - new Tuple2(1, 'b'), - new Tuple2(2, 'c'), - new Tuple2(2, 'd'), - new Tuple2(2, 'e'), - new Tuple2(3, 'f') - ) + List<Tuple2<Integer, Character>> list = Arrays.asList( + new Tuple2<>(1, 'a'), + new Tuple2<>(1, 'b'), + new Tuple2<>(2, 'c'), + new Tuple2<>(2, 'd'), + new Tuple2<>(2, 'e'), + new Tuple2<>(3, 'f') ); JavaPairRDD<Integer, Character> data = jsc.parallelizePairs(list); - // specify the exact fraction desired from each key Map<K, Object> - ImmutableMap<Integer, Object> fractions = - ImmutableMap.of(1, (Object)0.1, 2, (Object) 0.6, 3, (Object) 0.3); + // specify the exact fraction desired from each key Map<K, Double> + ImmutableMap<Integer, Double> fractions = ImmutableMap.of(1, 0.1, 2, 0.6, 3, 0.3); // Get an approximate sample from each stratum JavaPairRDD<Integer, Character> approxSample = data.sampleByKey(false, fractions);