From 3113da9c7067bbf90639866ae9d946f02cc484ff Mon Sep 17 00:00:00 2001 From: Andrew Or <andrew@databricks.com> Date: Wed, 13 May 2015 21:04:13 -0700 Subject: [PATCH] [HOT FIX #6125] Do not wait for all stages to start rendering zsxwing Author: Andrew Or <andrew@databricks.com> Closes #6138 from andrewor14/dag-viz-clean-properly and squashes the following commits: 19d4e98 [Andrew Or] Add synchronize 02542d6 [Andrew Or] Rename overloaded variable d11bee1 [Andrew Or] Don't wait until all stages have started before rendering --- .../ui/scope/RDDOperationGraphListener.scala | 34 ++++++++++--------- .../RDDOperationGraphListenerSuite.scala | 1 - 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraphListener.scala b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraphListener.scala index 3b77a1e12c..aa9c25cb5c 100644 --- a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraphListener.scala +++ b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraphListener.scala @@ -41,11 +41,11 @@ private[ui] class RDDOperationGraphListener(conf: SparkConf) extends SparkListen conf.getInt("spark.ui.retainedStages", SparkUI.DEFAULT_RETAINED_STAGES) /** Return the graph metadata for the given stage, or None if no such information exists. */ - def getOperationGraphForJob(jobId: Int): Seq[RDDOperationGraph] = { - val stageIds = jobIdToStageIds.get(jobId).getOrElse { Seq.empty } - val graphs = stageIds.flatMap { sid => stageIdToGraph.get(sid) } + def getOperationGraphForJob(jobId: Int): Seq[RDDOperationGraph] = synchronized { + val _stageIds = jobIdToStageIds.get(jobId).getOrElse { Seq.empty } + val graphs = _stageIds.flatMap { sid => stageIdToGraph.get(sid) } // If the metadata for some stages have been removed, do not bother rendering this job - if (stageIds.size != graphs.size) { + if (_stageIds.size != graphs.size) { Seq.empty } else { graphs @@ -53,16 +53,29 @@ private[ui] class RDDOperationGraphListener(conf: SparkConf) extends SparkListen } /** Return the graph metadata for the given stage, or None if no such information exists. */ - def getOperationGraphForStage(stageId: Int): Option[RDDOperationGraph] = { + def getOperationGraphForStage(stageId: Int): Option[RDDOperationGraph] = synchronized { stageIdToGraph.get(stageId) } /** On job start, construct a RDDOperationGraph for each stage in the job for display later. */ override def onJobStart(jobStart: SparkListenerJobStart): Unit = synchronized { val jobId = jobStart.jobId + val stageInfos = jobStart.stageInfos + jobIds += jobId jobIdToStageIds(jobId) = jobStart.stageInfos.map(_.stageId).sorted + stageInfos.foreach { stageInfo => + stageIds += stageInfo.stageId + stageIdToGraph(stageInfo.stageId) = RDDOperationGraph.makeOperationGraph(stageInfo) + // Remove state for old stages + if (stageIds.size >= retainedStages) { + val toRemove = math.max(retainedStages / 10, 1) + stageIds.take(toRemove).foreach { id => stageIdToGraph.remove(id) } + stageIds.trimStart(toRemove) + } + } + // Remove state for old jobs if (jobIds.size >= retainedJobs) { val toRemove = math.max(retainedJobs / 10, 1) @@ -71,15 +84,4 @@ private[ui] class RDDOperationGraphListener(conf: SparkConf) extends SparkListen } } - /** Remove graph metadata for old stages */ - override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted): Unit = synchronized { - val stageInfo = stageSubmitted.stageInfo - stageIds += stageInfo.stageId - stageIdToGraph(stageInfo.stageId) = RDDOperationGraph.makeOperationGraph(stageInfo) - if (stageIds.size >= retainedStages) { - val toRemove = math.max(retainedStages / 10, 1) - stageIds.take(toRemove).foreach { id => stageIdToGraph.remove(id) } - stageIds.trimStart(toRemove) - } - } } diff --git a/core/src/test/scala/org/apache/spark/ui/scope/RDDOperationGraphListenerSuite.scala b/core/src/test/scala/org/apache/spark/ui/scope/RDDOperationGraphListenerSuite.scala index 619b38ac02..c659fc1e8b 100644 --- a/core/src/test/scala/org/apache/spark/ui/scope/RDDOperationGraphListenerSuite.scala +++ b/core/src/test/scala/org/apache/spark/ui/scope/RDDOperationGraphListenerSuite.scala @@ -31,7 +31,6 @@ class RDDOperationGraphListenerSuite extends FunSuite { assert(numStages > 0, "I will not run a job with 0 stages for you.") val stageInfos = (0 until numStages).map { _ => val stageInfo = new StageInfo(stageIdCounter, 0, "s", 0, Seq.empty, Seq.empty, "d") - listener.onStageSubmitted(new SparkListenerStageSubmitted(stageInfo)) stageIdCounter += 1 stageInfo } -- GitLab