From d0c6e9f63982b1508487d6a283f6aae62f0e36e7 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@cs.berkeley.edu>
Date: Wed, 16 May 2012 14:16:55 -0700
Subject: [PATCH] Made some RDD dependencies transient to reduce the amount of
 data needed to be serialized in closure serialization. This can significantly
 reduce the task setup time in Shark when the query involves a large number of
 (Hive) partitions.

---
 core/src/main/scala/spark/RDD.scala      | 2 +-
 core/src/main/scala/spark/UnionRDD.scala | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/spark/RDD.scala b/core/src/main/scala/spark/RDD.scala
index 7fe6633f1b..fa53d9be2c 100644
--- a/core/src/main/scala/spark/RDD.scala
+++ b/core/src/main/scala/spark/RDD.scala
@@ -48,7 +48,7 @@ abstract class RDD[T: ClassManifest](@transient sc: SparkContext) extends Serial
   // Methods that must be implemented by subclasses
   def splits: Array[Split]
   def compute(split: Split): Iterator[T]
-  val dependencies: List[Dependency[_]]
+  @transient val dependencies: List[Dependency[_]]
   
   // Optionally overridden by subclasses to specify how they are partitioned
   val partitioner: Option[Partitioner] = None
diff --git a/core/src/main/scala/spark/UnionRDD.scala b/core/src/main/scala/spark/UnionRDD.scala
index 6fded339ee..4c0f255e6b 100644
--- a/core/src/main/scala/spark/UnionRDD.scala
+++ b/core/src/main/scala/spark/UnionRDD.scala
@@ -16,7 +16,7 @@ class UnionSplit[T: ClassManifest](
 
 class UnionRDD[T: ClassManifest](
     sc: SparkContext,
-    rdds: Seq[RDD[T]])
+    @transient rdds: Seq[RDD[T]])
   extends RDD[T](sc)
   with Serializable {
   
@@ -33,7 +33,7 @@ class UnionRDD[T: ClassManifest](
 
   override def splits = splits_
 
-  override val dependencies = {
+  @transient override val dependencies = {
     val deps = new ArrayBuffer[Dependency[_]]
     var pos = 0
     for ((rdd, index) <- rdds.zipWithIndex) {
@@ -47,4 +47,4 @@ class UnionRDD[T: ClassManifest](
 
   override def preferredLocations(s: Split): Seq[String] =
     s.asInstanceOf[UnionSplit[T]].preferredLocations()
-}
\ No newline at end of file
+}
-- 
GitLab