Skip to content
Snippets Groups Projects
Commit 915f8eeb authored by Marcelo Vanzin's avatar Marcelo Vanzin Committed by Patrick Wendell
Browse files

[SPARK-4584] [yarn] Remove security manager from Yarn AM.

The security manager adds a lot of overhead to the runtime of the
app, and causes a severe performance regression. Even stubbing out
all unneeded methods (all except checkExit()) does not help.

So, instead, penalize users who do an explicit System.exit() by leaving
them in "undefined behavior" territory: if they do that, the Yarn
backend won't be able to report the final app status to the RM.
The result is that the final status of the application might not match
the user's expectations.

One side-effect of the change is that users who do an explicit
System.exit() will lose the AM retry functionality. Since there is
no way to know if the exit was because of success or failure, the
AM right now errs on the side of it being a successful exit.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #3484 from vanzin/SPARK-4584 and squashes the following commits:

21f2502 [Marcelo Vanzin] Do not retry apps that use System.exit().
4198b3b [Marcelo Vanzin] [SPARK-4584] [yarn] Remove security manager from Yarn AM.
parent e464f0ac
No related branches found
No related tags found
No related merge requests found
......@@ -60,7 +60,7 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments,
@volatile private var exitCode = 0
@volatile private var unregistered = false
@volatile private var finished = false
@volatile private var finalStatus = FinalApplicationStatus.UNDEFINED
@volatile private var finalStatus = FinalApplicationStatus.SUCCEEDED
@volatile private var finalMsg: String = ""
@volatile private var userClassThread: Thread = _
......@@ -106,10 +106,14 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments,
val isLastAttempt = client.getAttemptId().getAttemptId() >= maxAppAttempts
if (!finished) {
// this shouldn't ever happen, but if it does assume weird failure
finish(FinalApplicationStatus.FAILED,
ApplicationMaster.EXIT_UNCAUGHT_EXCEPTION,
"shutdown hook called without cleanly finishing")
// This happens when the user application calls System.exit(). We have the choice
// of either failing or succeeding at this point. We report success to avoid
// retrying applications that have succeeded (System.exit(0)), which means that
// applications that explicitly exit with a non-zero status will also show up as
// succeeded in the RM UI.
finish(finalStatus,
ApplicationMaster.EXIT_SUCCESS,
"Shutdown hook called before final status was reported.")
}
if (!unregistered) {
......@@ -164,17 +168,18 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments,
final def finish(status: FinalApplicationStatus, code: Int, msg: String = null) = synchronized {
if (!finished) {
val inShutdown = Utils.inShutdown()
logInfo(s"Final app status: ${status}, exitCode: ${code}" +
Option(msg).map(msg => s", (reason: $msg)").getOrElse(""))
exitCode = code
finalStatus = status
finalMsg = msg
finished = true
if (Thread.currentThread() != reporterThread && reporterThread != null) {
if (!inShutdown && Thread.currentThread() != reporterThread && reporterThread != null) {
logDebug("shutting down reporter thread")
reporterThread.interrupt()
}
if (Thread.currentThread() != userClassThread && userClassThread != null) {
if (!inShutdown && Thread.currentThread() != userClassThread && userClassThread != null) {
logDebug("shutting down user thread")
userClassThread.interrupt()
}
......@@ -214,7 +219,6 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments,
private def runDriver(securityMgr: SecurityManager): Unit = {
addAmIpFilter()
setupSystemSecurityManager()
userClassThread = startUserClass()
// This a bit hacky, but we need to wait until the spark.driver.port property has
......@@ -402,46 +406,10 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments,
}
}
/**
* This system security manager applies to the entire process.
* It's main purpose is to handle the case if the user code does a System.exit.
* This allows us to catch that and properly set the YARN application status and
* cleanup if needed.
*/
private def setupSystemSecurityManager(): Unit = {
try {
var stopped = false
System.setSecurityManager(new java.lang.SecurityManager() {
override def checkExit(paramInt: Int) {
if (!stopped) {
logInfo("In securityManager checkExit, exit code: " + paramInt)
if (paramInt == 0) {
finish(FinalApplicationStatus.SUCCEEDED, ApplicationMaster.EXIT_SUCCESS)
} else {
finish(FinalApplicationStatus.FAILED,
paramInt,
"User class exited with non-zero exit code")
}
stopped = true
}
}
// required for the checkExit to work properly
override def checkPermission(perm: java.security.Permission): Unit = {}
})
}
catch {
case e: SecurityException =>
finish(FinalApplicationStatus.FAILED,
ApplicationMaster.EXIT_SECURITY,
"Error in setSecurityManager")
logError("Error in setSecurityManager:", e)
}
}
/**
* Start the user class, which contains the spark driver, in a separate Thread.
* If the main routine exits cleanly or exits with System.exit(0) we
* assume it was successful, for all other cases we assume failure.
* If the main routine exits cleanly or exits with System.exit(N) for any N
* we assume it was successful, for all other cases we assume failure.
*
* Returns the user thread that was started.
*/
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment