Skip to content
Snippets Groups Projects
Commit b55563c1 authored by Ala Luszczak's avatar Ala Luszczak Committed by Reynold Xin
Browse files

[SPARK-19607] Finding QueryExecution that matches provided executionId

## What changes were proposed in this pull request?

Implementing a mapping between executionId and corresponding QueryExecution in SQLExecution.

## How was this patch tested?

Adds a unit test.

Author: Ala Luszczak <ala@databricks.com>

Closes #16940 from ala/execution-id.
parent 3973403d
No related branches found
No related tags found
No related merge requests found
......@@ -17,6 +17,7 @@
package org.apache.spark.sql.execution
import java.util.concurrent.ConcurrentHashMap
import java.util.concurrent.atomic.AtomicLong
import org.apache.spark.SparkContext
......@@ -32,6 +33,12 @@ object SQLExecution {
private def nextExecutionId: Long = _nextExecutionId.getAndIncrement
private val executionIdToQueryExecution = new ConcurrentHashMap[Long, QueryExecution]()
def getQueryExecution(executionId: Long): QueryExecution = {
executionIdToQueryExecution.get(executionId)
}
/**
* Wrap an action that will execute "queryExecution" to track all Spark jobs in the body so that
* we can connect them with an execution.
......@@ -44,6 +51,7 @@ object SQLExecution {
if (oldExecutionId == null) {
val executionId = SQLExecution.nextExecutionId
sc.setLocalProperty(EXECUTION_ID_KEY, executionId.toString)
executionIdToQueryExecution.put(executionId, queryExecution)
val r = try {
// sparkContext.getCallSite() would first try to pick up any call site that was previously
// set, then fall back to Utils.getCallSite(); call Utils.getCallSite() directly on
......@@ -60,6 +68,7 @@ object SQLExecution {
executionId, System.currentTimeMillis()))
}
} finally {
executionIdToQueryExecution.remove(executionId)
sc.setLocalProperty(EXECUTION_ID_KEY, null)
}
r
......
......@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution
import java.util.Properties
import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
import org.apache.spark.sql.SparkSession
class SQLExecutionSuite extends SparkFunSuite {
......@@ -102,6 +103,33 @@ class SQLExecutionSuite extends SparkFunSuite {
}
}
test("Finding QueryExecution for given executionId") {
val spark = SparkSession.builder.master("local[*]").appName("test").getOrCreate()
import spark.implicits._
var queryExecution: QueryExecution = null
spark.sparkContext.addSparkListener(new SparkListener {
override def onJobStart(jobStart: SparkListenerJobStart): Unit = {
val executionIdStr = jobStart.properties.getProperty(SQLExecution.EXECUTION_ID_KEY)
if (executionIdStr != null) {
queryExecution = SQLExecution.getQueryExecution(executionIdStr.toLong)
}
SQLExecutionSuite.canProgress = true
}
})
val df = spark.range(1).map { x =>
while (!SQLExecutionSuite.canProgress) {
Thread.sleep(1)
}
x
}
df.collect()
assert(df.queryExecution === queryExecution)
}
}
/**
......@@ -114,3 +142,7 @@ private class BadSparkContext(conf: SparkConf) extends SparkContext(conf) {
override protected def initialValue(): Properties = new Properties()
}
}
object SQLExecutionSuite {
@volatile var canProgress = false
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment