From 2d7e8d8c48e07adea41f255e357aa750b9ddcdc6 Mon Sep 17 00:00:00 2001 From: Ankur Dave <ankurdave@gmail.com> Date: Fri, 10 Jan 2014 23:46:02 -0800 Subject: [PATCH] Add GC note to GraphLab --- .../src/main/scala/org/apache/spark/graphx/GraphLab.scala | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphLab.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphLab.scala index 016bab5d02..2f828ad807 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/GraphLab.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphLab.scala @@ -35,6 +35,12 @@ object GraphLab extends Logging { * @tparam ED the graph edge attribute type * @tparam A the type accumulated during the gather phase * @return the resulting graph after the algorithm converges + * + * @note Unlike [[Pregel]], this implementation of [[GraphLab]] does not unpersist RDDs from + * previous iterations. As a result, long-running iterative GraphLab programs will eventually fill + * the Spark cache. Though Spark will evict RDDs from old iterations eventually, garbage + * collection will take longer than necessary since it must examine the entire cache. This will be + * fixed in a future update. */ def apply[VD: ClassTag, ED: ClassTag, A: ClassTag] (graph: Graph[VD, ED], numIter: Int, -- GitLab