From 9032f7c0d5f1ae7985a20d54ca04c297201aae85 Mon Sep 17 00:00:00 2001 From: CodingCat <zhunansjtu@gmail.com> Date: Wed, 12 Mar 2014 17:43:12 -0700 Subject: [PATCH] SPARK-1160: Deprecate toArray in RDD https://spark-project.atlassian.net/browse/SPARK-1160 reported by @mateiz: "It's redundant with collect() and the name doesn't make sense in Java, where we return a List (we can't return an array due to the way Java generics work). It's also missing in Python." In this patch, I deprecated the method and changed the source files using it by replacing toArray with collect() directly Author: CodingCat <zhunansjtu@gmail.com> Closes #105 from CodingCat/SPARK-1060 and squashes the following commits: 286f163 [CodingCat] deprecate in JavaRDDLike ee17b4e [CodingCat] add message and since 2ff7319 [CodingCat] deprecate toArray in RDD --- .../main/scala/org/apache/spark/api/java/JavaRDDLike.scala | 1 + .../main/scala/org/apache/spark/rdd/PairRDDFunctions.scala | 2 +- core/src/main/scala/org/apache/spark/rdd/RDD.scala | 1 + core/src/main/scala/org/apache/spark/rdd/SampledRDD.scala | 4 ++-- .../src/main/scala/org/apache/spark/examples/SparkALS.scala | 4 ++-- .../scala/org/apache/spark/examples/mllib/SparkSVD.scala | 2 +- .../src/main/scala/org/apache/spark/mllib/linalg/SVD.scala | 4 ++-- .../test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala | 6 +++--- 8 files changed, 13 insertions(+), 11 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala index a89419bbd1..3df68d4ce5 100644 --- a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala +++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala @@ -283,6 +283,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable { /** * Return an array that contains all of the elements in this RDD. */ + @deprecated("use collect", "1.0.0") def toArray(): JList[T] = collect() /** diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala index 2384c8f2b6..b20ed99f89 100644 --- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala +++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala @@ -423,7 +423,7 @@ class PairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)]) * Return the key-value pairs in this RDD to the master as a Map. */ def collectAsMap(): Map[K, V] = { - val data = self.toArray() + val data = self.collect() val map = new mutable.HashMap[K, V] map.sizeHint(data.length) data.foreach { case (k, v) => map.put(k, v) } diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala index 4afa7523dd..b50c9963b9 100644 --- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala @@ -658,6 +658,7 @@ abstract class RDD[T: ClassTag]( /** * Return an array that contains all of the elements in this RDD. */ + @deprecated("use collect", "1.0.0") def toArray(): Array[T] = collect() /** diff --git a/core/src/main/scala/org/apache/spark/rdd/SampledRDD.scala b/core/src/main/scala/org/apache/spark/rdd/SampledRDD.scala index b50307cfa4..4ceea557f5 100644 --- a/core/src/main/scala/org/apache/spark/rdd/SampledRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/SampledRDD.scala @@ -26,13 +26,13 @@ import cern.jet.random.engine.DRand import org.apache.spark.{Partition, TaskContext} -@deprecated("Replaced by PartitionwiseSampledRDDPartition", "1.0") +@deprecated("Replaced by PartitionwiseSampledRDDPartition", "1.0.0") private[spark] class SampledRDDPartition(val prev: Partition, val seed: Int) extends Partition with Serializable { override val index: Int = prev.index } -@deprecated("Replaced by PartitionwiseSampledRDD", "1.0") +@deprecated("Replaced by PartitionwiseSampledRDD", "1.0.0") class SampledRDD[T: ClassTag]( prev: RDD[T], withReplacement: Boolean, diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala index 17bafc2218..ce4b3c8451 100644 --- a/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala +++ b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala @@ -128,11 +128,11 @@ object SparkALS { println("Iteration " + iter + ":") ms = sc.parallelize(0 until M, slices) .map(i => update(i, msb.value(i), usb.value, Rc.value)) - .toArray + .collect() msb = sc.broadcast(ms) // Re-broadcast ms because it was updated us = sc.parallelize(0 until U, slices) .map(i => update(i, usb.value(i), msb.value, algebra.transpose(Rc.value))) - .toArray + .collect() usb = sc.broadcast(us) // Re-broadcast us because it was updated println("RMSE = " + rmse(R, ms, us)) println() diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/SparkSVD.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/SparkSVD.scala index 19676fcc1a..ce2b133368 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/SparkSVD.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/SparkSVD.scala @@ -54,6 +54,6 @@ object SparkSVD { val s = decomposed.S.data val v = decomposed.V.data - println("singular values = " + s.toArray.mkString) + println("singular values = " + s.collect().mkString) } } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala index 8803c4c1a0..e4a26eeb07 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala @@ -109,7 +109,7 @@ object SVD { // Construct jblas A^T A locally val ata = DoubleMatrix.zeros(n, n) - for (entry <- emits.toArray) { + for (entry <- emits.collect()) { ata.put(entry._1._1, entry._1._2, entry._2) } @@ -178,7 +178,7 @@ object SVD { val s = decomposed.S.data val v = decomposed.V.data - println("Computed " + s.toArray.length + " singular values and vectors") + println("Computed " + s.collect().length + " singular values and vectors") u.saveAsTextFile(output_u) s.saveAsTextFile(output_s) v.saveAsTextFile(output_v) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala index 32f3f141cd..a92386865a 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala @@ -50,7 +50,7 @@ class SVDSuite extends FunSuite with BeforeAndAfterAll { val m = matrix.m val n = matrix.n val ret = DoubleMatrix.zeros(m, n) - matrix.data.toArray.map(x => ret.put(x.i, x.j, x.mval)) + matrix.data.collect().map(x => ret.put(x.i, x.j, x.mval)) ret } @@ -106,7 +106,7 @@ class SVDSuite extends FunSuite with BeforeAndAfterAll { val u = decomposed.U val s = decomposed.S val v = decomposed.V - val retrank = s.data.toArray.length + val retrank = s.data.collect().length assert(retrank == 1, "rank returned not one") @@ -139,7 +139,7 @@ class SVDSuite extends FunSuite with BeforeAndAfterAll { val u = decomposed.U val s = decomposed.S val v = decomposed.V - val retrank = s.data.toArray.length + val retrank = s.data.collect().length val densea = getDenseMatrix(a) val svd = Singular.sparseSVD(densea) -- GitLab