Skip to content
Snippets Groups Projects
Commit a41b68b9 authored by Nilanjan Raychaudhuri's avatar Nilanjan Raychaudhuri Committed by Andrew Or
Browse files

[SPARK-12265][MESOS] Spark calls System.exit inside driver instead of throwing exception

This takes over #10729 and makes sure that `spark-shell` fails with a proper error message. There is a slight behavioral change: before this change `spark-shell` would exit, while now the REPL is still there, but `sc` and `sqlContext` are not defined and the error is visible to the user.

Author: Nilanjan Raychaudhuri <nraychaudhuri@gmail.com>
Author: Iulian Dragos <jaguarul@gmail.com>

Closes #10921 from dragos/pr/10729.
parent 51b03b71
No related branches found
No related tags found
No related merge requests found
......@@ -573,6 +573,7 @@ private[spark] class MesosClusterScheduler(
override def slaveLost(driver: SchedulerDriver, slaveId: SlaveID): Unit = {}
override def error(driver: SchedulerDriver, error: String): Unit = {
logError("Error received: " + error)
markErr()
}
/**
......
......@@ -375,6 +375,7 @@ private[spark] class MesosSchedulerBackend(
override def error(d: SchedulerDriver, message: String) {
inClassLoader() {
logError("Mesos error: " + message)
markErr()
scheduler.error(message)
}
}
......
......@@ -106,28 +106,37 @@ private[mesos] trait MesosSchedulerUtils extends Logging {
registerLatch.await()
return
}
@volatile
var error: Option[Exception] = None
// We create a new thread that will block inside `mesosDriver.run`
// until the scheduler exists
new Thread(Utils.getFormattedClassName(this) + "-mesos-driver") {
setDaemon(true)
override def run() {
mesosDriver = newDriver
try {
mesosDriver = newDriver
val ret = mesosDriver.run()
logInfo("driver.run() returned with code " + ret)
if (ret != null && ret.equals(Status.DRIVER_ABORTED)) {
System.exit(1)
error = Some(new SparkException("Error starting driver, DRIVER_ABORTED"))
markErr()
}
} catch {
case e: Exception => {
logError("driver.run() failed", e)
System.exit(1)
error = Some(e)
markErr()
}
}
}
}.start()
registerLatch.await()
// propagate any error to the calling thread. This ensures that SparkContext creation fails
// without leaving a broken context that won't be able to schedule any tasks
error.foreach(throw _)
}
}
......@@ -144,6 +153,10 @@ private[mesos] trait MesosSchedulerUtils extends Logging {
registerLatch.countDown()
}
protected def markErr(): Unit = {
registerLatch.countDown()
}
def createResource(name: String, amount: Double, role: Option[String] = None): Resource = {
val builder = Resource.newBuilder()
.setName(name)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment