Skip to content

Commit 8cec431

Browse files
Marcelo Vanzinpwendell
authored andcommitted
[SPARK-4584] [yarn] Remove security manager from Yarn AM.
The security manager adds a lot of overhead to the runtime of the app, and causes a severe performance regression. Even stubbing out all unneeded methods (all except checkExit()) does not help. So, instead, penalize users who do an explicit System.exit() by leaving them in "undefined behavior" territory: if they do that, the Yarn backend won't be able to report the final app status to the RM. The result is that the final status of the application might not match the user's expectations. One side-effect of the change is that users who do an explicit System.exit() will lose the AM retry functionality. Since there is no way to know if the exit was because of success or failure, the AM right now errs on the side of it being a successful exit. Author: Marcelo Vanzin <[email protected]> Closes #3484 from vanzin/SPARK-4584 and squashes the following commits: 21f2502 [Marcelo Vanzin] Do not retry apps that use System.exit(). 4198b3b [Marcelo Vanzin] [SPARK-4584] [yarn] Remove security manager from Yarn AM. (cherry picked from commit 915f8ee) Signed-off-by: Patrick Wendell <[email protected]>
1 parent 3219834 commit 8cec431

File tree

1 file changed

+14
-46
lines changed

1 file changed

+14
-46
lines changed

yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala

Lines changed: 14 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments,
6060
@volatile private var exitCode = 0
6161
@volatile private var unregistered = false
6262
@volatile private var finished = false
63-
@volatile private var finalStatus = FinalApplicationStatus.UNDEFINED
63+
@volatile private var finalStatus = FinalApplicationStatus.SUCCEEDED
6464
@volatile private var finalMsg: String = ""
6565
@volatile private var userClassThread: Thread = _
6666

@@ -106,10 +106,14 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments,
106106
val isLastAttempt = client.getAttemptId().getAttemptId() >= maxAppAttempts
107107

108108
if (!finished) {
109-
// this shouldn't ever happen, but if it does assume weird failure
110-
finish(FinalApplicationStatus.FAILED,
111-
ApplicationMaster.EXIT_UNCAUGHT_EXCEPTION,
112-
"shutdown hook called without cleanly finishing")
109+
// This happens when the user application calls System.exit(). We have the choice
110+
// of either failing or succeeding at this point. We report success to avoid
111+
// retrying applications that have succeeded (System.exit(0)), which means that
112+
// applications that explicitly exit with a non-zero status will also show up as
113+
// succeeded in the RM UI.
114+
finish(finalStatus,
115+
ApplicationMaster.EXIT_SUCCESS,
116+
"Shutdown hook called before final status was reported.")
113117
}
114118

115119
if (!unregistered) {
@@ -164,17 +168,18 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments,
164168

165169
final def finish(status: FinalApplicationStatus, code: Int, msg: String = null) = synchronized {
166170
if (!finished) {
171+
val inShutdown = Utils.inShutdown()
167172
logInfo(s"Final app status: ${status}, exitCode: ${code}" +
168173
Option(msg).map(msg => s", (reason: $msg)").getOrElse(""))
169174
exitCode = code
170175
finalStatus = status
171176
finalMsg = msg
172177
finished = true
173-
if (Thread.currentThread() != reporterThread && reporterThread != null) {
178+
if (!inShutdown && Thread.currentThread() != reporterThread && reporterThread != null) {
174179
logDebug("shutting down reporter thread")
175180
reporterThread.interrupt()
176181
}
177-
if (Thread.currentThread() != userClassThread && userClassThread != null) {
182+
if (!inShutdown && Thread.currentThread() != userClassThread && userClassThread != null) {
178183
logDebug("shutting down user thread")
179184
userClassThread.interrupt()
180185
}
@@ -214,7 +219,6 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments,
214219

215220
private def runDriver(securityMgr: SecurityManager): Unit = {
216221
addAmIpFilter()
217-
setupSystemSecurityManager()
218222
userClassThread = startUserClass()
219223

220224
// This a bit hacky, but we need to wait until the spark.driver.port property has
@@ -402,46 +406,10 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments,
402406
}
403407
}
404408

405-
/**
406-
* This system security manager applies to the entire process.
407-
* It's main purpose is to handle the case if the user code does a System.exit.
408-
* This allows us to catch that and properly set the YARN application status and
409-
* cleanup if needed.
410-
*/
411-
private def setupSystemSecurityManager(): Unit = {
412-
try {
413-
var stopped = false
414-
System.setSecurityManager(new java.lang.SecurityManager() {
415-
override def checkExit(paramInt: Int) {
416-
if (!stopped) {
417-
logInfo("In securityManager checkExit, exit code: " + paramInt)
418-
if (paramInt == 0) {
419-
finish(FinalApplicationStatus.SUCCEEDED, ApplicationMaster.EXIT_SUCCESS)
420-
} else {
421-
finish(FinalApplicationStatus.FAILED,
422-
paramInt,
423-
"User class exited with non-zero exit code")
424-
}
425-
stopped = true
426-
}
427-
}
428-
// required for the checkExit to work properly
429-
override def checkPermission(perm: java.security.Permission): Unit = {}
430-
})
431-
}
432-
catch {
433-
case e: SecurityException =>
434-
finish(FinalApplicationStatus.FAILED,
435-
ApplicationMaster.EXIT_SECURITY,
436-
"Error in setSecurityManager")
437-
logError("Error in setSecurityManager:", e)
438-
}
439-
}
440-
441409
/**
442410
* Start the user class, which contains the spark driver, in a separate Thread.
443-
* If the main routine exits cleanly or exits with System.exit(0) we
444-
* assume it was successful, for all other cases we assume failure.
411+
* If the main routine exits cleanly or exits with System.exit(N) for any N
412+
* we assume it was successful, for all other cases we assume failure.
445413
*
446414
* Returns the user thread that was started.
447415
*/

0 commit comments

Comments
 (0)