From aa1837875a3febad2f22b91a294f91749852b42f Mon Sep 17 00:00:00 2001
From: Andrew Or <andrew@databricks.com>
Date: Wed, 13 May 2015 16:29:52 -0700
Subject: [PATCH] [SPARK-7502] DAG visualization: gracefully handle removed
 stages

Old stages are removed without much feedback to the user. This happens very often in streaming. See screenshots below for more detail. zsxwing

**Before**

<img src="https://cloud.githubusercontent.com/assets/2133137/7621031/643cc1e0-f978-11e4-8f42-09decaac44a7.png" width="500px"/>

-------------------------
**After**
<img src="https://cloud.githubusercontent.com/assets/2133137/7621037/6e37348c-f978-11e4-84a5-e44e154f9b13.png" width="400px"/>

Author: Andrew Or <andrew@databricks.com>

Closes #6132 from andrewor14/dag-viz-remove-gracefully and squashes the following commits:

43175cd [Andrew Or] Handle removed jobs and stages gracefully
---
 .../apache/spark/ui/static/spark-dag-viz.css   |  4 ++++
 .../apache/spark/ui/static/spark-dag-viz.js    | 18 +++++++++++++-----
 .../ui/scope/RDDOperationGraphListener.scala   | 11 ++++++++---
 3 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.css b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.css
index 1846acb742..eedefb44b9 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.css
+++ b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.css
@@ -44,6 +44,10 @@
   stroke-width: 1px;
 }
 
+#dag-viz-graph div#empty-dag-viz-message {
+  margin: 15px;
+}
+
 /* Job page specific styles */
 
 #dag-viz-graph svg.job marker#marker-arrow path {
diff --git a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
index e2ec00b9c3..8138eb0d4f 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
@@ -86,7 +86,7 @@ function toggleDagViz(forJob) {
   $(arrowSelector).toggleClass('arrow-open');
   var shouldShow = $(arrowSelector).hasClass("arrow-open");
   if (shouldShow) {
-    var shouldRender = graphContainer().select("svg").empty();
+    var shouldRender = graphContainer().select("*").empty();
     if (shouldRender) {
       renderDagViz(forJob);
     }
@@ -117,10 +117,18 @@ function renderDagViz(forJob) {
 
   // If there is not a dot file to render, fail fast and report error
   var jobOrStage = forJob ? "job" : "stage";
-  if (metadataContainer().empty()) {
-    graphContainer()
-      .append("div")
-      .text("No visualization information available for this " + jobOrStage);
+  if (metadataContainer().empty() ||
+      metadataContainer().selectAll("div").empty()) {
+    var message =
+      "<b>No visualization information available for this " + jobOrStage + "!</b><br/>" +
+      "If this is an old " + jobOrStage + ", its visualization metadata may have been " +
+      "cleaned up over time.<br/> You may consider increasing the value of ";
+    if (forJob) {
+      message += "<i>spark.ui.retainedJobs</i> and <i>spark.ui.retainedStages</i>.";
+    } else {
+      message += "<i>spark.ui.retainedStages</i>";
+    }
+    graphContainer().append("div").attr("id", "empty-dag-viz-message").html(message);
     return;
   }
 
diff --git a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraphListener.scala b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraphListener.scala
index f0f7007d77..3b77a1e12c 100644
--- a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraphListener.scala
+++ b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraphListener.scala
@@ -42,9 +42,14 @@ private[ui] class RDDOperationGraphListener(conf: SparkConf) extends SparkListen
 
   /** Return the graph metadata for the given stage, or None if no such information exists. */
   def getOperationGraphForJob(jobId: Int): Seq[RDDOperationGraph] = {
-    jobIdToStageIds.get(jobId)
-      .map { sids => sids.flatMap { sid => stageIdToGraph.get(sid) } }
-      .getOrElse { Seq.empty }
+    val stageIds = jobIdToStageIds.get(jobId).getOrElse { Seq.empty }
+    val graphs = stageIds.flatMap { sid => stageIdToGraph.get(sid) }
+    // If the metadata for some stages have been removed, do not bother rendering this job
+    if (stageIds.size != graphs.size) {
+      Seq.empty
+    } else {
+      graphs
+    }
   }
 
   /** Return the graph metadata for the given stage, or None if no such information exists. */
-- 
GitLab