Skip to content
Snippets Groups Projects
Commit a1635741 authored by Akshat Aranya's avatar Akshat Aranya Committed by Josh Rosen
Browse files

[SPARK-7795] [CORE] Speed up task scheduling in standalone mode by reusing serializer

My experiments with scheduling very short tasks in standalone cluster mode indicated that a significant amount of time was being spent in scheduling the tasks (>500ms for 256 tasks).  I found that most of the time was being spent in creating a new instance of serializer for each task.  Changing this to just one serializer brought down the scheduling time to 8ms.

Author: Akshat Aranya <aaranya@quantcast.com>

Closes #6323 from coolfrood/master and squashes the following commits:

12d8c9e [Akshat Aranya] Reduce visibility of serializer
bd4a5dd [Akshat Aranya] Style fix
0b8ca93 [Akshat Aranya] Incorporate review comments
fe530cd [Akshat Aranya] Speed up task scheduling in standalone mode by reusing serializer instead of creating a new one for each task.
parent 63a5ce75
No related branches found
No related tags found
No related merge requests found
......@@ -69,6 +69,11 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
class DriverEndpoint(override val rpcEnv: RpcEnv, sparkProperties: Seq[(String, String)])
extends ThreadSafeRpcEndpoint with Logging {
// If this DriverEndpoint is changed to support multiple threads,
// then this may need to be changed so that we don't share the serializer
// instance across threads
private val ser = SparkEnv.get.closureSerializer.newInstance()
override protected def log = CoarseGrainedSchedulerBackend.this.log
private val addressToExecutorId = new HashMap[RpcAddress, String]
......@@ -163,7 +168,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
}
// Make fake resource offers on all executors
def makeOffers() {
private def makeOffers() {
launchTasks(scheduler.resourceOffers(executorDataMap.map { case (id, executorData) =>
new WorkerOffer(id, executorData.executorHost, executorData.freeCores)
}.toSeq))
......@@ -175,16 +180,15 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
}
// Make fake resource offers on just one executor
def makeOffers(executorId: String) {
private def makeOffers(executorId: String) {
val executorData = executorDataMap(executorId)
launchTasks(scheduler.resourceOffers(
Seq(new WorkerOffer(executorId, executorData.executorHost, executorData.freeCores))))
}
// Launch tasks returned by a set of resource offers
def launchTasks(tasks: Seq[Seq[TaskDescription]]) {
private def launchTasks(tasks: Seq[Seq[TaskDescription]]) {
for (task <- tasks.flatten) {
val ser = SparkEnv.get.closureSerializer.newInstance()
val serializedTask = ser.serialize(task)
if (serializedTask.limit >= akkaFrameSize - AkkaUtils.reservedSizeBytes) {
val taskSetId = scheduler.taskIdToTaskSetId(task.taskId)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment