From 2d7e8d8c48e07adea41f255e357aa750b9ddcdc6 Mon Sep 17 00:00:00 2001
From: Ankur Dave <ankurdave@gmail.com>
Date: Fri, 10 Jan 2014 23:46:02 -0800
Subject: [PATCH] Add GC note to GraphLab

---
 .../src/main/scala/org/apache/spark/graphx/GraphLab.scala   | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphLab.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphLab.scala
index 016bab5d02..2f828ad807 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/GraphLab.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphLab.scala
@@ -35,6 +35,12 @@ object GraphLab extends Logging {
    * @tparam ED the graph edge attribute type
    * @tparam A the type accumulated during the gather phase
    * @return the resulting graph after the algorithm converges
+   *
+   * @note Unlike [[Pregel]], this implementation of [[GraphLab]] does not unpersist RDDs from
+   * previous iterations. As a result, long-running iterative GraphLab programs will eventually fill
+   * the Spark cache. Though Spark will evict RDDs from old iterations eventually, garbage
+   * collection will take longer than necessary since it must examine the entire cache. This will be
+   * fixed in a future update.
    */
   def apply[VD: ClassTag, ED: ClassTag, A: ClassTag]
     (graph: Graph[VD, ED], numIter: Int,
-- 
GitLab