From 93b2f3a8826e189f6398c9b30bc00de205a3c64a Mon Sep 17 00:00:00 2001
From: zsxwing <zsxwing@gmail.com>
Date: Mon, 22 Dec 2014 11:20:00 -0800
Subject: [PATCH] [SPARK-4918][Core] Reuse Text in saveAsTextFile

Reuse Text in saveAsTextFile to reduce GC.

/cc rxin

Author: zsxwing <zsxwing@gmail.com>

Closes #3762 from zsxwing/SPARK-4918 and squashes the following commits:

59f03eb [zsxwing] Reuse Text in saveAsTextFile
---
 .../main/scala/org/apache/spark/rdd/RDD.scala    | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index a94206963b..f47c2d1fcd 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -1186,7 +1186,13 @@ abstract class RDD[T: ClassTag](
     // same bytecodes for `saveAsTextFile`.
     val nullWritableClassTag = implicitly[ClassTag[NullWritable]]
     val textClassTag = implicitly[ClassTag[Text]]
-    val r = this.map(x => (NullWritable.get(), new Text(x.toString)))
+    val r = this.mapPartitions { iter =>
+      val text = new Text()
+      iter.map { x =>
+        text.set(x.toString)
+        (NullWritable.get(), text)
+      }
+    }
     RDD.rddToPairRDDFunctions(r)(nullWritableClassTag, textClassTag, null)
       .saveAsHadoopFile[TextOutputFormat[NullWritable, Text]](path)
   }
@@ -1198,7 +1204,13 @@ abstract class RDD[T: ClassTag](
     // https://issues.apache.org/jira/browse/SPARK-2075
     val nullWritableClassTag = implicitly[ClassTag[NullWritable]]
     val textClassTag = implicitly[ClassTag[Text]]
-    val r = this.map(x => (NullWritable.get(), new Text(x.toString)))
+    val r = this.mapPartitions { iter =>
+      val text = new Text()
+      iter.map { x =>
+        text.set(x.toString)
+        (NullWritable.get(), text)
+      }
+    }
     RDD.rddToPairRDDFunctions(r)(nullWritableClassTag, textClassTag, null)
       .saveAsHadoopFile[TextOutputFormat[NullWritable, Text]](path, codec)
   }
-- 
GitLab