Skip to content
Snippets Groups Projects
Commit d4d7993b authored by Matei Zaharia's avatar Matei Zaharia
Browse files

Several fixes to the work to log when no resources can be used by a job.

Fixed some of the messages as well as code style.
parent f33662c1
No related branches found
No related tags found
No related merge requests found
......@@ -205,10 +205,6 @@ private[spark] class Master(ip: String, port: Int, webUiPort: Int) extends Actor
}
}
}
if (workers.toArray.filter(_.state == WorkerState.ALIVE).size > 0 &&
firstApp != None && firstApp.get.executors.size == 0) {
logWarning("Could not find any machines with enough memory. Ensure that SPARK_WORKER_MEM > SPARK_MEM.")
}
}
def launchExecutor(worker: WorkerInfo, exec: ExecutorInfo, sparkHome: String) {
......@@ -254,6 +250,10 @@ private[spark] class Master(ip: String, port: Int, webUiPort: Int) extends Actor
if (firstApp == None) {
firstApp = Some(app)
}
val workersAlive = workers.filter(_.state == WorkerState.ALIVE).toArray
if (workersAlive.size > 0 && !workersAlive.exists(_.memoryFree >= desc.memoryPerSlave)) {
logWarning("Could not find any workers with enough memory for " + firstApp.get.id)
}
return app
}
......
......@@ -24,7 +24,7 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
// How often to check for speculative tasks
val SPECULATION_INTERVAL = System.getProperty("spark.speculation.interval", "100").toLong
// Threshold above which we warn user initial TaskSet may be starved
val STARVATION_TIMEOUT = System.getProperty("spark.starvation.timeout", "5000").toLong
val STARVATION_TIMEOUT = System.getProperty("spark.starvation.timeout", "15000").toLong
val activeTaskSets = new HashMap[String, TaskSetManager]
var activeTaskSetsQueue = new ArrayBuffer[TaskSetManager]
......@@ -106,8 +106,10 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
starvationTimer.scheduleAtFixedRate(new TimerTask() {
override def run() {
if (!hasLaunchedTask) {
logWarning("Initial TaskSet has not accepted any offers. " +
"Check the scheduler UI to ensure slaves are registered.")
logWarning("Initial job has not accepted any resources; " +
"check your cluster UI to ensure that workers are registered")
} else {
this.cancel()
}
}
}, STARVATION_TIMEOUT, STARVATION_TIMEOUT)
......@@ -169,7 +171,9 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
}
} while (launchedTask)
}
if (tasks.size > 0) hasLaunchedTask = true
if (tasks.size > 0) {
hasLaunchedTask = true
}
return tasks
}
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment