diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index f2641851cbe7c77d262613e788f52dc6eff5e04c..89318712a5777eee2046f7ec1246b471148c666e 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -196,7 +196,7 @@ class SparkContext( case "yarn-standalone" => val scheduler = try { - val clazz = Class.forName("spark.scheduler.cluster.YarnClusterScheduler") + val clazz = Class.forName("org.apache.spark.scheduler.cluster.YarnClusterScheduler") val cons = clazz.getConstructor(classOf[SparkContext]) cons.newInstance(this).asInstanceOf[ClusterScheduler] } catch { diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala index 478e5a0aaf2ac529c7dd733174cca69c511b5b2f..29968c273c31d59e5b45e1b8237f860c9eb21bdf 100644 --- a/core/src/main/scala/org/apache/spark/SparkEnv.scala +++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala @@ -62,7 +62,7 @@ class SparkEnv ( val yarnMode = java.lang.Boolean.valueOf(System.getProperty("SPARK_YARN_MODE", System.getenv("SPARK_YARN_MODE"))) if(yarnMode) { try { - Class.forName("spark.deploy.yarn.YarnSparkHadoopUtil").newInstance.asInstanceOf[SparkHadoopUtil] + Class.forName("org.apache.spark.deploy.yarn.YarnSparkHadoopUtil").newInstance.asInstanceOf[SparkHadoopUtil] } catch { case th: Throwable => throw new SparkException("Unable to load YARN support", th) } diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala index 3f8a96fc47261fd5700b737ebbb009d858b90b56..9a2cf20de79a9c5de0ad88ae9482ae0f34b3934b 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala @@ -50,7 +50,7 @@ private[spark] class SparkDeploySchedulerBackend( "org.apache.spark.executor.StandaloneExecutorBackend", args, sc.executorEnvs) val sparkHome = sc.getSparkHome().getOrElse(null) val appDesc = new ApplicationDescription(appName, maxCores, executorMemory, command, sparkHome, - sc.ui.appHttpUIAddress) + "http://" + sc.ui.appUIAddress) client = new Client(sc.env.actorSystem, master, appDesc, this) client.start() diff --git a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala index 5d54420e7224b130ffe4c59a743a32556d81f62d..48eb096063510007bc04284d6deb8c3808a65660 100644 --- a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala +++ b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala @@ -79,7 +79,6 @@ private[spark] class SparkUI(sc: SparkContext) extends Logging { server.foreach(_.stop()) } - private[spark] def appHttpUIAddress = "http://" + appUIAddress private[spark] def appUIAddress = host + ":" + boundPort.getOrElse("-1") } diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala index 9190b8ac2c139c1166aaca303958bb0c0f3c66c9..d3af75630eea919fd3b04cfcdb09d00d69a04c81 100644 --- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala +++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala @@ -33,49 +33,38 @@ private[spark] object UIUtils { return uiRoot + resource } - private[spark] val storageStr = addBaseUri("/storage") - private[spark] val stagesStr = addBaseUri("/stages") - private[spark] val envStr = addBaseUri("/environment") - private[spark] val executorsStr = addBaseUri("/executors") - private[spark] val bootstrapMinCssStr = addBaseUri("/static/bootstrap.min.css") - private[spark] val webuiCssStr = addBaseUri("/static/webui.css") - private[spark] val sortTableStr = addBaseUri("/static/sorttable.js") - private[spark] val sparkLogoHdStr = addBaseUri("/static/spark-logo-77x50px-hd.png") - private[spark] val sparkLogoStr = addBaseUri("/static/spark_logo.png") - - /** Returns a spark page with correctly formatted headers */ def headerSparkPage(content: => Seq[Node], sc: SparkContext, title: String, page: Page.Value) : Seq[Node] = { val jobs = page match { - case Stages => <li class="active"><a href={stagesStr}>Stages</a></li> - case _ => <li><a href="/stages">Stages</a></li> + case Stages => <li class="active"><a href={"%s/stages".format(UIUtils.addBaseUri())}>Stages</a></li> + case _ => <li><a href={"%s/stages".format(UIUtils.addBaseUri())}>Stages</a></li> } val storage = page match { - case Storage => <li class="active"><a href={storageStr}>Storage</a></li> - case _ => <li><a href={storageStr}>Storage</a></li> + case Storage => <li class="active"><a href={"%s/storage".format(UIUtils.addBaseUri())}>Storage</a></li> + case _ => <li><a href={"%s/storage".format(UIUtils.addBaseUri())}>Storage</a></li> } val environment = page match { - case Environment => <li class="active"><a href={envStr}>Environment</a></li> - case _ => <li><a href={envStr}>Environment</a></li> + case Environment => <li class="active"><a href={"%s/environment".format(UIUtils.addBaseUri())}>Environment</a></li> + case _ => <li><a href={"%s/environment".format(UIUtils.addBaseUri())}>Environment</a></li> } val executors = page match { - case Executors => <li class="active"><a href={executorsStr}>Executors</a></li> - case _ => <li><a href={executorsStr}>Executors</a></li> + case Executors => <li class="active"><a href={"%s/executors".format(UIUtils.addBaseUri())}>Executors</a></li> + case _ => <li><a href={"%s/executors".format(UIUtils.addBaseUri())}>Executors</a></li> } <html> <head> <meta http-equiv="Content-type" content="text/html; charset=utf-8" /> - <link rel="stylesheet" href={bootstrapMinCssStr} type="text/css" /> - <link rel="stylesheet" href={webuiCssStr} type="text/css" /> - <script src={sortTableStr}></script> + <link rel="stylesheet" href={"%s/static/bootstrap.min.css".format(UIUtils.addBaseUri())} type="text/css" /> + <link rel="stylesheet" href={"%s/static/webui.css".format(UIUtils.addBaseUri())} type="text/css" /> + <script src={"%s/static/sorttable.js".format(UIUtils.addBaseUri())} ></script> <title>{sc.appName} - {title}</title> </head> <body> <div class="navbar navbar-static-top"> <div class="navbar-inner"> - <a href="/" class="brand"><img src={sparkLogoHdStr} /></a> + <a href={"%s/".format(UIUtils.addBaseUri())} class="brand"><img src={"%s/static/spark-logo-77x50px-hd.png".format(UIUtils.addBaseUri())} /></a> <ul class="nav"> {jobs} {storage} @@ -105,9 +94,9 @@ private[spark] object UIUtils { <html> <head> <meta http-equiv="Content-type" content="text/html; charset=utf-8" /> - <link rel="stylesheet" href={bootstrapMinCssStr} type="text/css" /> - <link rel="stylesheet" href="/static/webui.css" type="text/css" /> - <script src={sortTableStr}></script> + <link rel="stylesheet" href={"%s/static/bootstrap.min.css".format(UIUtils.addBaseUri())} type="text/css" /> + <link rel="stylesheet" href={"%s/static/webui.css".format(UIUtils.addBaseUri())} type="text/css" /> + <script src={"%s/static/sorttable.js".format(UIUtils.addBaseUri())} ></script> <title>{title}</title> </head> <body> @@ -115,7 +104,7 @@ private[spark] object UIUtils { <div class="row-fluid"> <div class="span12"> <h3 style="vertical-align: middle; display: inline-block;"> - <img src="/static/spark-logo-77x50px-hd.png" style="margin-right: 15px;" /> + <img src={"%s/static/spark-logo-77x50px-hd.png".format(UIUtils.addBaseUri())} style="margin-right: 15px;" /> {title} </h3> </div> diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index bb47fc0a2ca3b90c3a26aa94f67e017c3ddfb4c0..468800b2bd474341893da92d6a21d870a132153b 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -613,7 +613,7 @@ private[spark] object Utils extends Logging { * A regular expression to match classes of the "core" Spark API that we want to skip when * finding the call site of a method. */ - private val SPARK_CLASS_REGEX = """^spark(\.api\.java)?(\.rdd)?\.[A-Z]""".r + private val SPARK_CLASS_REGEX = """^org\.apache\.spark(\.api\.java)?(\.util)?(\.rdd)?\.[A-Z]""".r private[spark] class CallSiteInfo(val lastSparkMethod: String, val firstUserFile: String, val firstUserLine: Int, val firstUserClass: String) diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index fded8961852184ef73c0b7208d7469491329a18a..93421efcbc30fce2fd3415d0f34c61379e42af47 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -42,7 +42,7 @@ This would be used to connect to the cluster, write to the dfs and submit jobs t The command to launch the YARN Client is as follows: - SPARK_JAR=<SPARK_YARN_JAR_FILE> ./spark-class spark.deploy.yarn.Client \ + SPARK_JAR=<SPARK_YARN_JAR_FILE> ./spark-class org.apache.spark.deploy.yarn.Client \ --jar <YOUR_APP_JAR_FILE> \ --class <APP_MAIN_CLASS> \ --args <APP_MAIN_ARGUMENTS> \ @@ -54,9 +54,9 @@ The command to launch the YARN Client is as follows: For example: - SPARK_JAR=./yarn/target/spark-yarn-assembly-{{site.SPARK_VERSION}}.jar ./spark-class spark.deploy.yarn.Client \ + SPARK_JAR=./yarn/target/spark-yarn-assembly-{{site.SPARK_VERSION}}.jar ./spark-class org.apache.spark.deploy.yarn.Client \ --jar examples/target/scala-{{site.SCALA_VERSION}}/spark-examples_{{site.SCALA_VERSION}}-{{site.SPARK_VERSION}}.jar \ - --class spark.examples.SparkPi \ + --class org.apache.spark.examples.SparkPi \ --args yarn-standalone \ --num-workers 3 \ --master-memory 4g \ diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index bbeca245a844a4af8abaac61ce55f55073607cb9..858b58d338d71491b21d6098f84be8d34d0491f5 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -29,7 +29,8 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.hadoop.yarn.ipc.YarnRPC import org.apache.hadoop.yarn.util.{ConverterUtils, Records} import scala.collection.JavaConversions._ -import org.apache.spark.{SparkContext, Logging, Utils} +import org.apache.spark.{SparkContext, Logging} +import org.apache.spark.util.Utils import org.apache.hadoop.security.UserGroupInformation import java.security.PrivilegedExceptionAction @@ -56,10 +57,14 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e appAttemptId = getApplicationAttemptId() resourceManager = registerWithResourceManager() - // TODO: Uncomment when hadoop is on a version which has this fixed. + // Workaround until hadoop moves to something which has + // https://issues.apache.org/jira/browse/HADOOP-8406 - fixed in (2.0.2-alpha but no 0.23 line) + // ignore result + // This does not, unfortunately, always work reliably ... but alleviates the bug a lot of times + // Hence args.workerCores = numCore disabled above. Any better option ? + // Compute number of threads for akka //val minimumMemory = appMasterResponse.getMinimumResourceCapability().getMemory() - //if (minimumMemory > 0) { // val mem = args.workerMemory + YarnAllocationHandler.MEMORY_OVERHEAD // val numCore = (mem / minimumMemory) + (if (0 != (mem % minimumMemory)) 1 else 0) @@ -70,12 +75,6 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e // args.workerCores = numCore // } //} - - // Workaround until hadoop moves to something which has - // https://issues.apache.org/jira/browse/HADOOP-8406 - // ignore result - // This does not, unfortunately, always work reliably ... but alleviates the bug a lot of times - // Hence args.workerCores = numCore disabled above. Any better option ? // org.apache.hadoop.io.compress.CompressionCodecFactory.getCodecClasses(conf) ApplicationMaster.register(this) diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala index f47e23b63f4eaaae9f2b54213a1e68ef3af8d6a2..f76a5ddd39e90d4998d7712113dd0949823d5e18 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala @@ -80,7 +80,7 @@ class ApplicationMasterArguments(val args: Array[String]) { System.err.println("Unknown/unsupported param " + unknownParam) } System.err.println( - "Usage: spark.deploy.yarn.ApplicationMaster [options] \n" + + "Usage: org.apache.spark.deploy.yarn.ApplicationMaster [options] \n" + "Options:\n" + " --jar JAR_PATH Path to your application's JAR file (required)\n" + " --class CLASS_NAME Name of your application's main class (required)\n" + diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 48e737ed79774a38adc4dd408e38947c5283c74d..844c707834ecf0ba770b2b09bee6b1af9de6cefd 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -33,7 +33,8 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.hadoop.yarn.ipc.YarnRPC import scala.collection.mutable.HashMap import scala.collection.JavaConversions._ -import org.apache.spark.{Logging, Utils} +import org.apache.spark.Logging +import org.apache.spark.util.Utils import org.apache.hadoop.yarn.util.{Apps, Records, ConverterUtils} import org.apache.hadoop.yarn.api.ApplicationConstants.Environment import org.apache.spark.deploy.SparkHadoopUtil @@ -254,7 +255,7 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl val commands = List[String](javaCommand + " -server " + JAVA_OPTS + - " spark.deploy.yarn.ApplicationMaster" + + " org.apache.spark.deploy.yarn.ApplicationMaster" + " --class " + args.userClass + " --jar " + args.userJar + userArgsToString(args) + diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala index 6cbfadc23be93d609fb349c8ba135038c9f7ac70..cd651904d27aca52db6f94e7a4f3c2cc697220a2 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala @@ -98,7 +98,7 @@ class ClientArguments(val args: Array[String]) { System.err.println("Unknown/unsupported param " + unknownParam) } System.err.println( - "Usage: spark.deploy.yarn.Client [options] \n" + + "Usage: org.apache.spark.deploy.yarn.Client [options] \n" + "Options:\n" + " --jar JAR_PATH Path to your application's JAR file (required)\n" + " --class CLASS_NAME Name of your application's main class (required)\n" + diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerRunnable.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerRunnable.scala index 72dcf7178eb304eedea6fe9738fa901d0c13eb78..6229167cb44263bc16c1e951dddadbc550ba17f3 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerRunnable.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerRunnable.scala @@ -37,7 +37,8 @@ import org.apache.hadoop.yarn.api.ApplicationConstants.Environment import scala.collection.JavaConversions._ import scala.collection.mutable.HashMap -import org.apache.spark.{Logging, Utils} +import org.apache.spark.Logging +import org.apache.spark.util.Utils class WorkerRunnable(container: Container, conf: Configuration, masterAddress: String, slaveId: String, hostname: String, workerMemory: Int, workerCores: Int) diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala index 0a3b3abc74773df006efd3b560aba58f346a6eba..6d6ef149cc2d8b6351013bce769c0bf421cd9eab 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala @@ -17,7 +17,8 @@ package org.apache.spark.deploy.yarn -import org.apache.spark.{Logging, Utils} +import org.apache.spark.Logging +import org.apache.spark.util.Utils import org.apache.spark.scheduler.SplitInfo import scala.collection import org.apache.hadoop.yarn.api.records.{AMResponse, ApplicationAttemptId, ContainerId, Priority, Resource, ResourceRequest, ContainerStatus, Container} diff --git a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala index bbc96cfef7934922933c4e69cd36587243d4d2f5..29b3f22e13697b38bc501e2f914d8fc0a202d722 100644 --- a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala +++ b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala @@ -19,6 +19,7 @@ package org.apache.spark.scheduler.cluster import org.apache.spark._ import org.apache.spark.deploy.yarn.{ApplicationMaster, YarnAllocationHandler} +import org.apache.spark.util.Utils import org.apache.hadoop.conf.Configuration /**