Skip to content
Snippets Groups Projects
Commit 603af51b authored by Prashant Sharma's avatar Prashant Sharma
Browse files

Merge branch 'master' into akka-bug-fix

Conflicts:
	core/pom.xml
	core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
	pom.xml
	project/SparkBuild.scala
	streaming/pom.xml
	yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
parents 17db6a90 d2efe135
No related branches found
No related tags found
No related merge requests found
Showing
with 682 additions and 280 deletions
......@@ -54,7 +54,7 @@ versions without YARN, use:
# Cloudera CDH 4.2.0 with MapReduce v1
$ SPARK_HADOOP_VERSION=2.0.0-mr1-cdh4.2.0 sbt/sbt assembly
For Apache Hadoop 2.x, 0.23.x, Cloudera CDH MRv2, and other Hadoop versions
For Apache Hadoop 2.0.X, 2.1.X, 0.23.x, Cloudera CDH MRv2, and other Hadoop versions
with YARN, also set `SPARK_YARN=true`:
# Apache Hadoop 2.0.5-alpha
......@@ -63,8 +63,10 @@ with YARN, also set `SPARK_YARN=true`:
# Cloudera CDH 4.2.0 with MapReduce v2
$ SPARK_HADOOP_VERSION=2.0.0-cdh4.2.0 SPARK_YARN=true sbt/sbt assembly
For convenience, these variables may also be set through the `conf/spark-env.sh` file
described below.
When building for Hadoop 2.2.X and newer, you'll need to include the additional `new-yarn` profile:
# Apache Hadoop 2.2.X and newer
$ mvn -Dyarn.version=2.2.0 -Dhadoop.version=2.2.0 -Pnew-yarn
When developing a Spark application, specify the Hadoop version by adding the
"hadoop-client" artifact to your project's dependencies. For example, if you're
......
......@@ -95,11 +95,15 @@
<version>0.3.1</version>
</dependency>
<dependency>
<groupId>com.typesafe.akka</groupId>
<groupId>${akka.group}</groupId>
<artifactId>akka-actor_2.10</artifactId>
</dependency>
<dependency>
<groupId>${akka.group}</groupId>
<artifactId>akka-remote_2.10</artifactId>
</dependency>
<dependency>
<groupId>com.typesafe.akka</groupId>
<groupId>${akka.group}</groupId>
<artifactId>akka-slf4j_2.10</artifactId>
</dependency>
<dependency>
......@@ -118,10 +122,6 @@
<groupId>colt</groupId>
<artifactId>colt</artifactId>
</dependency>
<dependency>
<groupId>com.github.scala-incubator.io</groupId>
<artifactId>scala-io-file_2.10</artifactId>
</dependency>
<dependency>
<groupId>org.apache.mesos</groupId>
<artifactId>mesos</artifactId>
......
......@@ -99,7 +99,7 @@ class SimpleFutureAction[T] private[spark](jobWaiter: JobWaiter[_], resultFunc:
override def ready(atMost: Duration)(implicit permit: CanAwait): SimpleFutureAction.this.type = {
if (!atMost.isFinite()) {
awaitResult()
} else {
} else jobWaiter.synchronized {
val finishTime = System.currentTimeMillis() + atMost.toMillis
while (!isCompleted) {
val time = System.currentTimeMillis()
......
......@@ -246,12 +246,12 @@ private[spark] class MapOutputTrackerMaster extends MapOutputTracker {
case Some(bytes) =>
return bytes
case None =>
statuses = mapStatuses(shuffleId)
statuses = mapStatuses.getOrElse(shuffleId, Array[MapStatus]())
epochGotten = epoch
}
}
// If we got here, we failed to find the serialized locations in the cache, so we pulled
// out a snapshot of the locations as "locs"; let's serialize and return that
// out a snapshot of the locations as "statuses"; let's serialize and return that
val bytes = MapOutputTracker.serializeMapStatuses(statuses)
logInfo("Size of output statuses for shuffle %d is %d bytes".format(shuffleId, bytes.length))
// Add them into the table only if the epoch hasn't changed while we were working
......@@ -276,6 +276,10 @@ private[spark] class MapOutputTrackerMaster extends MapOutputTracker {
override def updateEpoch(newEpoch: Long) {
// This might be called on the MapOutputTrackerMaster if we're running in local mode.
}
def has(shuffleId: Int): Boolean = {
cachedSerializedStatuses.get(shuffleId).isDefined || mapStatuses.contains(shuffleId)
}
}
private[spark] object MapOutputTracker {
......
......@@ -83,7 +83,7 @@ class SparkContext(
val sparkHome: String = null,
val jars: Seq[String] = Nil,
val environment: Map[String, String] = Map(),
// This is used only by yarn for now, but should be relevant to other cluster types (mesos, etc)
// This is used only by YARN for now, but should be relevant to other cluster types (Mesos, etc)
// too. This is typically generated from InputFormatInfo.computePreferredLocations .. host, set
// of data-local splits on host
val preferredNodeLocationData: scala.collection.Map[String, scala.collection.Set[SplitInfo]] =
......@@ -155,123 +155,11 @@ class SparkContext(
executorEnvs("SPARK_USER") = sparkUser
// Create and start the scheduler
private[spark] var taskScheduler: TaskScheduler = {
// Regular expression used for local[N] master format
val LOCAL_N_REGEX = """local\[([0-9]+)\]""".r
// Regular expression for local[N, maxRetries], used in tests with failing tasks
val LOCAL_N_FAILURES_REGEX = """local\[([0-9]+)\s*,\s*([0-9]+)\]""".r
// Regular expression for simulating a Spark cluster of [N, cores, memory] locally
val LOCAL_CLUSTER_REGEX = """local-cluster\[\s*([0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*]""".r
// Regular expression for connecting to Spark deploy clusters
val SPARK_REGEX = """spark://(.*)""".r
// Regular expression for connection to Mesos cluster
val MESOS_REGEX = """mesos://(.*)""".r
// Regular expression for connection to Simr cluster
val SIMR_REGEX = """simr://(.*)""".r
master match {
case "local" =>
new LocalScheduler(1, 0, this)
case LOCAL_N_REGEX(threads) =>
new LocalScheduler(threads.toInt, 0, this)
case LOCAL_N_FAILURES_REGEX(threads, maxFailures) =>
new LocalScheduler(threads.toInt, maxFailures.toInt, this)
case SPARK_REGEX(sparkUrl) =>
val scheduler = new ClusterScheduler(this)
val masterUrls = sparkUrl.split(",").map("spark://" + _)
val backend = new SparkDeploySchedulerBackend(scheduler, this, masterUrls, appName)
scheduler.initialize(backend)
scheduler
case SIMR_REGEX(simrUrl) =>
val scheduler = new ClusterScheduler(this)
val backend = new SimrSchedulerBackend(scheduler, this, simrUrl)
scheduler.initialize(backend)
scheduler
case LOCAL_CLUSTER_REGEX(numSlaves, coresPerSlave, memoryPerSlave) =>
// Check to make sure memory requested <= memoryPerSlave. Otherwise Spark will just hang.
val memoryPerSlaveInt = memoryPerSlave.toInt
if (SparkContext.executorMemoryRequested > memoryPerSlaveInt) {
throw new SparkException(
"Asked to launch cluster with %d MB RAM / worker but requested %d MB/worker".format(
memoryPerSlaveInt, SparkContext.executorMemoryRequested))
}
val scheduler = new ClusterScheduler(this)
val localCluster = new LocalSparkCluster(
numSlaves.toInt, coresPerSlave.toInt, memoryPerSlaveInt)
val masterUrls = localCluster.start()
val backend = new SparkDeploySchedulerBackend(scheduler, this, masterUrls, appName)
scheduler.initialize(backend)
backend.shutdownCallback = (backend: SparkDeploySchedulerBackend) => {
localCluster.stop()
}
scheduler
case "yarn-standalone" =>
val scheduler = try {
val clazz = Class.forName("org.apache.spark.scheduler.cluster.YarnClusterScheduler")
val cons = clazz.getConstructor(classOf[SparkContext])
cons.newInstance(this).asInstanceOf[ClusterScheduler]
} catch {
// TODO: Enumerate the exact reasons why it can fail
// But irrespective of it, it means we cannot proceed !
case th: Throwable => {
throw new SparkException("YARN mode not available ?", th)
}
}
val backend = new CoarseGrainedSchedulerBackend(scheduler, this.env.actorSystem)
scheduler.initialize(backend)
scheduler
case "yarn-client" =>
val scheduler = try {
val clazz = Class.forName("org.apache.spark.scheduler.cluster.YarnClientClusterScheduler")
val cons = clazz.getConstructor(classOf[SparkContext])
cons.newInstance(this).asInstanceOf[ClusterScheduler]
} catch {
case th: Throwable => {
throw new SparkException("YARN mode not available ?", th)
}
}
val backend = try {
val clazz = Class.forName("org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend")
val cons = clazz.getConstructor(classOf[ClusterScheduler], classOf[SparkContext])
cons.newInstance(scheduler, this).asInstanceOf[CoarseGrainedSchedulerBackend]
} catch {
case th: Throwable => {
throw new SparkException("YARN mode not available ?", th)
}
}
scheduler.initialize(backend)
scheduler
case MESOS_REGEX(mesosUrl) =>
MesosNativeLibrary.load()
val scheduler = new ClusterScheduler(this)
val coarseGrained = System.getProperty("spark.mesos.coarse", "false").toBoolean
val backend = if (coarseGrained) {
new CoarseMesosSchedulerBackend(scheduler, this, mesosUrl, appName)
} else {
new MesosSchedulerBackend(scheduler, this, mesosUrl, appName)
}
scheduler.initialize(backend)
scheduler
case _ =>
throw new SparkException("Could not parse Master URL: '" + master + "'")
}
}
private[spark] var taskScheduler = SparkContext.createTaskScheduler(this, master, appName)
taskScheduler.start()
@volatile private[spark] var dagScheduler = new DAGScheduler(taskScheduler)
dagScheduler.start()
ui.start()
......@@ -1138,6 +1026,124 @@ object SparkContext {
.map(Utils.memoryStringToMb)
.getOrElse(512)
}
// Creates a task scheduler based on a given master URL. Extracted for testing.
private
def createTaskScheduler(sc: SparkContext, master: String, appName: String): TaskScheduler = {
// Regular expression used for local[N] master format
val LOCAL_N_REGEX = """local\[([0-9]+)\]""".r
// Regular expression for local[N, maxRetries], used in tests with failing tasks
val LOCAL_N_FAILURES_REGEX = """local\[([0-9]+)\s*,\s*([0-9]+)\]""".r
// Regular expression for simulating a Spark cluster of [N, cores, memory] locally
val LOCAL_CLUSTER_REGEX = """local-cluster\[\s*([0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*]""".r
// Regular expression for connecting to Spark deploy clusters
val SPARK_REGEX = """spark://(.*)""".r
// Regular expression for connection to Mesos cluster by mesos:// or zk:// url
val MESOS_REGEX = """(mesos|zk)://.*""".r
// Regular expression for connection to Simr cluster
val SIMR_REGEX = """simr://(.*)""".r
master match {
case "local" =>
new LocalScheduler(1, 0, sc)
case LOCAL_N_REGEX(threads) =>
new LocalScheduler(threads.toInt, 0, sc)
case LOCAL_N_FAILURES_REGEX(threads, maxFailures) =>
new LocalScheduler(threads.toInt, maxFailures.toInt, sc)
case SPARK_REGEX(sparkUrl) =>
val scheduler = new ClusterScheduler(sc)
val masterUrls = sparkUrl.split(",").map("spark://" + _)
val backend = new SparkDeploySchedulerBackend(scheduler, sc, masterUrls, appName)
scheduler.initialize(backend)
scheduler
case LOCAL_CLUSTER_REGEX(numSlaves, coresPerSlave, memoryPerSlave) =>
// Check to make sure memory requested <= memoryPerSlave. Otherwise Spark will just hang.
val memoryPerSlaveInt = memoryPerSlave.toInt
if (SparkContext.executorMemoryRequested > memoryPerSlaveInt) {
throw new SparkException(
"Asked to launch cluster with %d MB RAM / worker but requested %d MB/worker".format(
memoryPerSlaveInt, SparkContext.executorMemoryRequested))
}
val scheduler = new ClusterScheduler(sc)
val localCluster = new LocalSparkCluster(
numSlaves.toInt, coresPerSlave.toInt, memoryPerSlaveInt)
val masterUrls = localCluster.start()
val backend = new SparkDeploySchedulerBackend(scheduler, sc, masterUrls, appName)
scheduler.initialize(backend)
backend.shutdownCallback = (backend: SparkDeploySchedulerBackend) => {
localCluster.stop()
}
scheduler
case "yarn-standalone" =>
val scheduler = try {
val clazz = Class.forName("org.apache.spark.scheduler.cluster.YarnClusterScheduler")
val cons = clazz.getConstructor(classOf[SparkContext])
cons.newInstance(sc).asInstanceOf[ClusterScheduler]
} catch {
// TODO: Enumerate the exact reasons why it can fail
// But irrespective of it, it means we cannot proceed !
case th: Throwable => {
throw new SparkException("YARN mode not available ?", th)
}
}
val backend = new CoarseGrainedSchedulerBackend(scheduler, sc.env.actorSystem)
scheduler.initialize(backend)
scheduler
case "yarn-client" =>
val scheduler = try {
val clazz = Class.forName("org.apache.spark.scheduler.cluster.YarnClientClusterScheduler")
val cons = clazz.getConstructor(classOf[SparkContext])
cons.newInstance(sc).asInstanceOf[ClusterScheduler]
} catch {
case th: Throwable => {
throw new SparkException("YARN mode not available ?", th)
}
}
val backend = try {
val clazz = Class.forName("org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend")
val cons = clazz.getConstructor(classOf[ClusterScheduler], classOf[SparkContext])
cons.newInstance(scheduler, sc).asInstanceOf[CoarseGrainedSchedulerBackend]
} catch {
case th: Throwable => {
throw new SparkException("YARN mode not available ?", th)
}
}
scheduler.initialize(backend)
scheduler
case mesosUrl @ MESOS_REGEX(_) =>
MesosNativeLibrary.load()
val scheduler = new ClusterScheduler(sc)
val coarseGrained = System.getProperty("spark.mesos.coarse", "false").toBoolean
val url = mesosUrl.stripPrefix("mesos://") // strip scheme from raw Mesos URLs
val backend = if (coarseGrained) {
new CoarseMesosSchedulerBackend(scheduler, sc, url, appName)
} else {
new MesosSchedulerBackend(scheduler, sc, url, appName)
}
scheduler.initialize(backend)
scheduler
case SIMR_REGEX(simrUrl) =>
val scheduler = new ClusterScheduler(sc)
val backend = new SimrSchedulerBackend(scheduler, sc, simrUrl)
scheduler.initialize(backend)
scheduler
case _ =>
throw new SparkException("Could not parse Master URL: '" + master + "'")
}
}
}
/**
......
......@@ -104,7 +104,7 @@ abstract class RDD[T: ClassTag](
protected def getPreferredLocations(split: Partition): Seq[String] = Nil
/** Optionally overridden by subclasses to specify how they are partitioned. */
val partitioner: Option[Partitioner] = None
@transient val partitioner: Option[Partitioner] = None
// =======================================================================
// Methods and fields available on all RDDs
......@@ -117,7 +117,7 @@ abstract class RDD[T: ClassTag](
val id: Int = sc.newRddId()
/** A friendly name for this RDD */
var name: String = null
@transient var name: String = null
/** Assign a name to this RDD */
def setName(_name: String) = {
......@@ -126,7 +126,7 @@ abstract class RDD[T: ClassTag](
}
/** User-defined generator of this RDD*/
var generator = Utils.getCallSiteInfo.firstUserClass
@transient var generator = Utils.getCallSiteInfo.firstUserClass
/** Reset generator*/
def setGenerator(_generator: String) = {
......@@ -938,7 +938,7 @@ abstract class RDD[T: ClassTag](
private var storageLevel: StorageLevel = StorageLevel.NONE
/** Record user function generating this RDD. */
private[spark] val origin = Utils.formatSparkCallSite
@transient private[spark] val origin = Utils.formatSparkCallSite
private[spark] def elementClassTag: ClassTag[T] = classTag[T]
......@@ -953,7 +953,7 @@ abstract class RDD[T: ClassTag](
def context = sc
// Avoid handling doCheckpoint multiple times to prevent excessive recursion
private var doCheckpointCalled = false
@transient private var doCheckpointCalled = false
/**
* Performs the checkpointing of this RDD by saving this. It is called by the DAGScheduler
......
......@@ -23,7 +23,8 @@ import scala.reflect.ClassTag
private[spark] class ZippedPartitionsPartition(
idx: Int,
@transient rdds: Seq[RDD[_]])
@transient rdds: Seq[RDD[_]],
@transient val preferredLocations: Seq[String])
extends Partition {
override val index: Int = idx
......@@ -48,27 +49,21 @@ abstract class ZippedPartitionsBaseRDD[V: ClassTag](
if (preservesPartitioning) firstParent[Any].partitioner else None
override def getPartitions: Array[Partition] = {
val sizes = rdds.map(x => x.partitions.size)
if (!sizes.forall(x => x == sizes(0))) {
val numParts = rdds.head.partitions.size
if (!rdds.forall(rdd => rdd.partitions.size == numParts)) {
throw new IllegalArgumentException("Can't zip RDDs with unequal numbers of partitions")
}
val array = new Array[Partition](sizes(0))
for (i <- 0 until sizes(0)) {
array(i) = new ZippedPartitionsPartition(i, rdds)
Array.tabulate[Partition](numParts) { i =>
val prefs = rdds.map(rdd => rdd.preferredLocations(rdd.partitions(i)))
// Check whether there are any hosts that match all RDDs; otherwise return the union
val exactMatchLocations = prefs.reduce((x, y) => x.intersect(y))
val locs = if (!exactMatchLocations.isEmpty) exactMatchLocations else prefs.flatten.distinct
new ZippedPartitionsPartition(i, rdds, locs)
}
array
}
override def getPreferredLocations(s: Partition): Seq[String] = {
val parts = s.asInstanceOf[ZippedPartitionsPartition].partitions
val prefs = rdds.zip(parts).map { case (rdd, p) => rdd.preferredLocations(p) }
// Check whether there are any hosts that match all RDDs; otherwise return the union
val exactMatchLocations = prefs.reduce((x, y) => x.intersect(y))
if (!exactMatchLocations.isEmpty) {
exactMatchLocations
} else {
prefs.flatten.distinct
}
s.asInstanceOf[ZippedPartitionsPartition].preferredLocations
}
override def clearDependencies() {
......
......@@ -65,12 +65,13 @@ private[scheduler] case class CompletionEvent(
taskMetrics: TaskMetrics)
extends DAGSchedulerEvent
private[scheduler]
case class ExecutorGained(execId: String, host: String) extends DAGSchedulerEvent
private[scheduler] case class ExecutorGained(execId: String, host: String) extends DAGSchedulerEvent
private[scheduler] case class ExecutorLost(execId: String) extends DAGSchedulerEvent
private[scheduler]
case class TaskSetFailed(taskSet: TaskSet, reason: String) extends DAGSchedulerEvent
private[scheduler] case object ResubmitFailedStages extends DAGSchedulerEvent
private[scheduler] case object StopDAGScheduler extends DAGSchedulerEvent
......@@ -31,6 +31,7 @@ private[spark] class JobWaiter[T](
private var finishedTasks = 0
// Is the job as a whole finished (succeeded or failed)?
@volatile
private var _jobFinished = totalTasks == 0
def jobFinished = _jobFinished
......
......@@ -37,7 +37,7 @@ case class SparkListenerTaskGettingResult(
case class SparkListenerTaskEnd(task: Task[_], reason: TaskEndReason, taskInfo: TaskInfo,
taskMetrics: TaskMetrics) extends SparkListenerEvents
case class SparkListenerJobStart(job: ActiveJob, properties: Properties = null)
case class SparkListenerJobStart(job: ActiveJob, stageIds: Array[Int], properties: Properties = null)
extends SparkListenerEvents
case class SparkListenerJobEnd(job: ActiveJob, jobResult: JobResult)
......
......@@ -99,8 +99,8 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
this.dagScheduler = dagScheduler
}
def initialize(context: SchedulerBackend) {
backend = context
def initialize(backend: SchedulerBackend) {
this.backend = backend
// temporarily set rootPool name to empty
rootPool = new Pool("", schedulingMode, 0, 0)
schedulableBuilder = {
......@@ -172,7 +172,9 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
backend.killTask(tid, execId)
}
}
tsm.error("Stage %d was cancelled".format(stageId))
logInfo("Stage %d was cancelled".format(stageId))
tsm.removeAllRunningTasks()
taskSetFinished(tsm)
}
}
......
......@@ -574,7 +574,7 @@ private[spark] class ClusterTaskSetManager(
runningTasks = runningTasksSet.size
}
private def removeAllRunningTasks() {
private[cluster] def removeAllRunningTasks() {
val numRunningTasks = runningTasksSet.size
runningTasksSet.clear()
if (parent != null) {
......
......@@ -74,7 +74,7 @@ class LocalActor(localScheduler: LocalScheduler, private var freeCores: Int)
}
}
private[spark] class LocalScheduler(threads: Int, val maxFailures: Int, val sc: SparkContext)
private[spark] class LocalScheduler(val threads: Int, val maxFailures: Int, val sc: SparkContext)
extends TaskScheduler
with ExecutorBackend
with Logging {
......@@ -144,7 +144,8 @@ private[spark] class LocalScheduler(threads: Int, val maxFailures: Int, val sc:
localActor ! KillTask(tid)
}
}
tsm.error("Stage %d was cancelled".format(stageId))
logInfo("Stage %d was cancelled".format(stageId))
taskSetFinished(tsm)
}
}
......@@ -192,17 +193,19 @@ private[spark] class LocalScheduler(threads: Int, val maxFailures: Int, val sc:
synchronized {
taskIdToTaskSetId.get(taskId) match {
case Some(taskSetId) =>
val taskSetManager = activeTaskSets(taskSetId)
taskSetTaskIds(taskSetId) -= taskId
state match {
case TaskState.FINISHED =>
taskSetManager.taskEnded(taskId, state, serializedData)
case TaskState.FAILED =>
taskSetManager.taskFailed(taskId, state, serializedData)
case TaskState.KILLED =>
taskSetManager.error("Task %d was killed".format(taskId))
case _ => {}
val taskSetManager = activeTaskSets.get(taskSetId)
taskSetManager.foreach { tsm =>
taskSetTaskIds(taskSetId) -= taskId
state match {
case TaskState.FINISHED =>
tsm.taskEnded(taskId, state, serializedData)
case TaskState.FAILED =>
tsm.taskFailed(taskId, state, serializedData)
case TaskState.KILLED =>
tsm.error("Task %d was killed".format(taskId))
case _ => {}
}
}
case None =>
logInfo("Ignoring update from TID " + taskId + " because its task set is gone")
......
......@@ -62,7 +62,7 @@ class ShuffleBlockManager(blockManager: BlockManager) {
// Turning off shuffle file consolidation causes all shuffle Blocks to get their own file.
// TODO: Remove this once the shuffle file consolidation feature is stable.
val consolidateShuffleFiles =
System.getProperty("spark.shuffle.consolidateFiles", "true").toBoolean
System.getProperty("spark.shuffle.consolidateFiles", "false").toBoolean
private val bufferSize = System.getProperty("spark.shuffle.file.buffer.kb", "100").toInt * 1024
......
......@@ -101,7 +101,7 @@ class StorageLevel private(
var result = ""
result += (if (useDisk) "Disk " else "")
result += (if (useMemory) "Memory " else "")
result += (if (deserialized) "Deserialized " else "Serialized")
result += (if (deserialized) "Deserialized " else "Serialized ")
result += "%sx Replicated".format(replication)
result
}
......
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.storage
import java.util.concurrent.atomic.AtomicLong
......
......@@ -60,11 +60,13 @@ private[spark] class StagePage(parent: JobProgressUI) {
var activeTime = 0L
listener.stageIdToTasksActive(stageId).foreach(activeTime += _.timeRunning(now))
val finishedTasks = listener.stageIdToTaskInfos(stageId).filter(_._1.finished)
val summary =
<div>
<ul class="unstyled">
<li>
<strong>CPU time: </strong>
<strong>Total duration across all tasks: </strong>
{parent.formatDuration(listener.stageIdToTime.getOrElse(stageId, 0L) + activeTime)}
</li>
{if (hasShuffleRead)
......@@ -104,6 +106,33 @@ private[spark] class StagePage(parent: JobProgressUI) {
val serviceQuantiles = "Duration" +: Distribution(serviceTimes).get.getQuantiles().map(
ms => parent.formatDuration(ms.toLong))
val gettingResultTimes = validTasks.map{case (info, metrics, exception) =>
if (info.gettingResultTime > 0) {
(info.finishTime - info.gettingResultTime).toDouble
} else {
0.0
}
}
val gettingResultQuantiles = ("Time spent fetching task results" +:
Distribution(gettingResultTimes).get.getQuantiles().map(
millis => parent.formatDuration(millis.toLong)))
// The scheduler delay includes the network delay to send the task to the worker
// machine and to send back the result (but not the time to fetch the task result,
// if it needed to be fetched from the block manager on the worker).
val schedulerDelays = validTasks.map{case (info, metrics, exception) =>
val totalExecutionTime = {
if (info.gettingResultTime > 0) {
(info.gettingResultTime - info.launchTime).toDouble
} else {
(info.finishTime - info.launchTime).toDouble
}
}
totalExecutionTime - metrics.get.executorRunTime
}
val schedulerDelayQuantiles = ("Scheduler delay" +:
Distribution(schedulerDelays).get.getQuantiles().map(
millis => parent.formatDuration(millis.toLong)))
def getQuantileCols(data: Seq[Double]) =
Distribution(data).get.getQuantiles().map(d => Utils.bytesToString(d.toLong))
......@@ -119,7 +148,10 @@ private[spark] class StagePage(parent: JobProgressUI) {
}
val shuffleWriteQuantiles = "Shuffle Write" +: getQuantileCols(shuffleWriteSizes)
val listings: Seq[Seq[String]] = Seq(serviceQuantiles,
val listings: Seq[Seq[String]] = Seq(
serviceQuantiles,
gettingResultQuantiles,
schedulerDelayQuantiles,
if (hasShuffleRead) shuffleReadQuantiles else Nil,
if (hasShuffleWrite) shuffleWriteQuantiles else Nil)
......@@ -152,21 +184,18 @@ private[spark] class StagePage(parent: JobProgressUI) {
else metrics.map(m => parent.formatDuration(m.executorRunTime)).getOrElse("")
val gcTime = metrics.map(m => m.jvmGCTime).getOrElse(0L)
var shuffleReadSortable: String = ""
var shuffleReadReadable: String = ""
if (shuffleRead) {
shuffleReadSortable = metrics.flatMap{m => m.shuffleReadMetrics}.map{s => s.remoteBytesRead}.toString()
shuffleReadReadable = metrics.flatMap{m => m.shuffleReadMetrics}.map{s =>
Utils.bytesToString(s.remoteBytesRead)}.getOrElse("")
}
val maybeShuffleRead = metrics.flatMap{m => m.shuffleReadMetrics}.map{s => s.remoteBytesRead}
val shuffleReadSortable = maybeShuffleRead.map(_.toString).getOrElse("")
val shuffleReadReadable = maybeShuffleRead.map{Utils.bytesToString(_)}.getOrElse("")
var shuffleWriteSortable: String = ""
var shuffleWriteReadable: String = ""
if (shuffleWrite) {
shuffleWriteSortable = metrics.flatMap{m => m.shuffleWriteMetrics}.map{s => s.shuffleBytesWritten}.toString()
shuffleWriteReadable = metrics.flatMap{m => m.shuffleWriteMetrics}.map{s =>
Utils.bytesToString(s.shuffleBytesWritten)}.getOrElse("")
}
val maybeShuffleWrite = metrics.flatMap{m => m.shuffleWriteMetrics}.map{s => s.shuffleBytesWritten}
val shuffleWriteSortable = maybeShuffleWrite.map(_.toString).getOrElse("")
val shuffleWriteReadable = maybeShuffleWrite.map{Utils.bytesToString(_)}.getOrElse("")
val maybeWriteTime = metrics.flatMap{m => m.shuffleWriteMetrics}.map{s => s.shuffleWriteTime}
val writeTimeSortable = maybeWriteTime.map(_.toString).getOrElse("")
val writeTimeReadable = maybeWriteTime.map{ t => t / (1000 * 1000)}.map{ ms =>
if (ms == 0) "" else parent.formatDuration(ms)}.getOrElse("")
<tr>
<td>{info.index}</td>
......@@ -187,8 +216,8 @@ private[spark] class StagePage(parent: JobProgressUI) {
</td>
}}
{if (shuffleWrite) {
<td>{metrics.flatMap{m => m.shuffleWriteMetrics}.map{s =>
parent.formatDuration(s.shuffleWriteTime / (1000 * 1000))}.getOrElse("")}
<td sorttable_customkey={writeTimeSortable}>
{writeTimeReadable}
</td>
<td sorttable_customkey={shuffleWriteSortable}>
{shuffleWriteReadable}
......
......@@ -114,7 +114,7 @@ class JobCancellationSuite extends FunSuite with ShouldMatchers with BeforeAndAf
// Once A is cancelled, job B should finish fairly quickly.
assert(jobB.get() === 100)
}
/*
test("two jobs sharing the same stage") {
// sem1: make sure cancel is issued after some tasks are launched
// sem2: make sure the first stage is not finished until cancel is issued
......@@ -148,7 +148,7 @@ class JobCancellationSuite extends FunSuite with ShouldMatchers with BeforeAndAf
intercept[SparkException] { f1.get() }
intercept[SparkException] { f2.get() }
}
*/
def testCount() {
// Cancel before launching any tasks
{
......
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark
import org.scalatest.{FunSuite, PrivateMethodTester}
import org.apache.spark.scheduler.TaskScheduler
import org.apache.spark.scheduler.cluster.{ClusterScheduler, SimrSchedulerBackend, SparkDeploySchedulerBackend}
import org.apache.spark.scheduler.cluster.mesos.{CoarseMesosSchedulerBackend, MesosSchedulerBackend}
import org.apache.spark.scheduler.local.LocalScheduler
class SparkContextSchedulerCreationSuite
extends FunSuite with PrivateMethodTester with LocalSparkContext with Logging {
def createTaskScheduler(master: String): TaskScheduler = {
// Create local SparkContext to setup a SparkEnv. We don't actually want to start() the
// real schedulers, so we don't want to create a full SparkContext with the desired scheduler.
sc = new SparkContext("local", "test")
val createTaskSchedulerMethod = PrivateMethod[TaskScheduler]('createTaskScheduler)
SparkContext invokePrivate createTaskSchedulerMethod(sc, master, "test")
}
test("bad-master") {
val e = intercept[SparkException] {
createTaskScheduler("localhost:1234")
}
assert(e.getMessage.contains("Could not parse Master URL"))
}
test("local") {
createTaskScheduler("local") match {
case s: LocalScheduler =>
assert(s.threads === 1)
assert(s.maxFailures === 0)
case _ => fail()
}
}
test("local-n") {
createTaskScheduler("local[5]") match {
case s: LocalScheduler =>
assert(s.threads === 5)
assert(s.maxFailures === 0)
case _ => fail()
}
}
test("local-n-failures") {
createTaskScheduler("local[4, 2]") match {
case s: LocalScheduler =>
assert(s.threads === 4)
assert(s.maxFailures === 2)
case _ => fail()
}
}
test("simr") {
createTaskScheduler("simr://uri") match {
case s: ClusterScheduler =>
assert(s.backend.isInstanceOf[SimrSchedulerBackend])
case _ => fail()
}
}
test("local-cluster") {
createTaskScheduler("local-cluster[3, 14, 512]") match {
case s: ClusterScheduler =>
assert(s.backend.isInstanceOf[SparkDeploySchedulerBackend])
case _ => fail()
}
}
def testYarn(master: String, expectedClassName: String) {
try {
createTaskScheduler(master) match {
case s: ClusterScheduler =>
assert(s.getClass === Class.forName(expectedClassName))
case _ => fail()
}
} catch {
case e: SparkException =>
assert(e.getMessage.contains("YARN mode not available"))
logWarning("YARN not available, could not test actual YARN scheduler creation")
case e: Throwable => fail(e)
}
}
test("yarn-standalone") {
testYarn("yarn-standalone", "org.apache.spark.scheduler.cluster.YarnClusterScheduler")
}
test("yarn-client") {
testYarn("yarn-client", "org.apache.spark.scheduler.cluster.YarnClientClusterScheduler")
}
def testMesos(master: String, expectedClass: Class[_]) {
try {
createTaskScheduler(master) match {
case s: ClusterScheduler =>
assert(s.backend.getClass === expectedClass)
case _ => fail()
}
} catch {
case e: UnsatisfiedLinkError =>
assert(e.getMessage.contains("no mesos in"))
logWarning("Mesos not available, could not test actual Mesos scheduler creation")
case e: Throwable => fail(e)
}
}
test("mesos fine-grained") {
System.setProperty("spark.mesos.coarse", "false")
testMesos("mesos://localhost:1234", classOf[MesosSchedulerBackend])
}
test("mesos coarse-grained") {
System.setProperty("spark.mesos.coarse", "true")
testMesos("mesos://localhost:1234", classOf[CoarseMesosSchedulerBackend])
}
test("mesos with zookeeper") {
System.setProperty("spark.mesos.coarse", "false")
testMesos("zk://localhost:1234,localhost:2345", classOf[MesosSchedulerBackend])
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment