From d0c6e9f63982b1508487d6a283f6aae62f0e36e7 Mon Sep 17 00:00:00 2001 From: Reynold Xin <rxin@cs.berkeley.edu> Date: Wed, 16 May 2012 14:16:55 -0700 Subject: [PATCH] Made some RDD dependencies transient to reduce the amount of data needed to be serialized in closure serialization. This can significantly reduce the task setup time in Shark when the query involves a large number of (Hive) partitions. --- core/src/main/scala/spark/RDD.scala | 2 +- core/src/main/scala/spark/UnionRDD.scala | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/core/src/main/scala/spark/RDD.scala b/core/src/main/scala/spark/RDD.scala index 7fe6633f1b..fa53d9be2c 100644 --- a/core/src/main/scala/spark/RDD.scala +++ b/core/src/main/scala/spark/RDD.scala @@ -48,7 +48,7 @@ abstract class RDD[T: ClassManifest](@transient sc: SparkContext) extends Serial // Methods that must be implemented by subclasses def splits: Array[Split] def compute(split: Split): Iterator[T] - val dependencies: List[Dependency[_]] + @transient val dependencies: List[Dependency[_]] // Optionally overridden by subclasses to specify how they are partitioned val partitioner: Option[Partitioner] = None diff --git a/core/src/main/scala/spark/UnionRDD.scala b/core/src/main/scala/spark/UnionRDD.scala index 6fded339ee..4c0f255e6b 100644 --- a/core/src/main/scala/spark/UnionRDD.scala +++ b/core/src/main/scala/spark/UnionRDD.scala @@ -16,7 +16,7 @@ class UnionSplit[T: ClassManifest]( class UnionRDD[T: ClassManifest]( sc: SparkContext, - rdds: Seq[RDD[T]]) + @transient rdds: Seq[RDD[T]]) extends RDD[T](sc) with Serializable { @@ -33,7 +33,7 @@ class UnionRDD[T: ClassManifest]( override def splits = splits_ - override val dependencies = { + @transient override val dependencies = { val deps = new ArrayBuffer[Dependency[_]] var pos = 0 for ((rdd, index) <- rdds.zipWithIndex) { @@ -47,4 +47,4 @@ class UnionRDD[T: ClassManifest]( override def preferredLocations(s: Split): Seq[String] = s.asInstanceOf[UnionSplit[T]].preferredLocations() -} \ No newline at end of file +} -- GitLab