Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
spark
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
cs525-sp18-g07
spark
Commits
aef9e5b9
Commit
aef9e5b9
authored
14 years ago
by
Matei Zaharia
Browse files
Options
Downloads
Patches
Plain Diff
Renamed ParallelOperation to Job
parent
b6debf5d
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/scala/spark/MesosScheduler.scala
+42
-42
42 additions, 42 deletions
src/scala/spark/MesosScheduler.scala
with
42 additions
and
42 deletions
src/scala/spark/MesosScheduler.scala
+
42
−
42
View file @
aef9e5b9
...
...
@@ -18,9 +18,9 @@ import mesos._
// 1) Right now, the scheduler uses a linear scan through the tasks to find a
// local one for a given node. It would be faster to have a separate list of
// pending tasks for each node.
// 2) Presenting a single slave in
ParallelOperation
.slaveOffer makes it
// 2) Presenting a single slave in
Job
.slaveOffer makes it
// difficult to balance tasks across nodes. It would be better to pass
// all the offers to the
ParallelOperation
and have it load-balance.
// all the offers to the
Job
and have it load-balance.
private
class
MesosScheduler
(
master
:
String
,
frameworkName
:
String
,
execArg
:
Array
[
Byte
])
extends
NScheduler
with
spark
.
Scheduler
with
Logging
...
...
@@ -33,14 +33,14 @@ extends NScheduler with spark.Scheduler with Logging
val
registeredLock
=
new
Object
()
// Current callback object (may be null)
var
active
Op
sQueue
=
new
Queue
[
Int
]
var
active
Op
s
=
new
HashMap
[
Int
,
ParallelOperation
]
private
var
next
Op
Id
=
0
private
[
spark
]
var
taskIdTo
Op
Id
=
new
HashMap
[
Int
,
Int
]
var
active
Job
sQueue
=
new
Queue
[
Int
]
var
active
Job
s
=
new
HashMap
[
Int
,
Job
]
private
var
next
Job
Id
=
0
private
[
spark
]
var
taskIdTo
Job
Id
=
new
HashMap
[
Int
,
Int
]
def
new
Op
Id
()
:
Int
=
{
val
id
=
next
Op
Id
next
Op
Id
+=
1
def
new
Job
Id
()
:
Int
=
{
val
id
=
next
Job
Id
next
Job
Id
+=
1
return
id
}
...
...
@@ -73,31 +73,31 @@ extends NScheduler with spark.Scheduler with Logging
new
ExecutorInfo
(
new
File
(
"spark-executor"
).
getCanonicalPath
(),
execArg
)
override
def
runTasks
[
T:
ClassManifest
](
tasks
:
Array
[
Task
[
T
]])
:
Array
[
T
]
=
{
var
op
Id
=
0
var
job
Id
=
0
waitForRegister
()
this
.
synchronized
{
op
Id
=
new
Op
Id
()
job
Id
=
new
Job
Id
()
}
val
my
Op
=
new
Simple
ParallelOperation
(
this
,
tasks
,
op
Id
)
val
my
Job
=
new
Simple
Job
(
this
,
tasks
,
job
Id
)
try
{
this
.
synchronized
{
this
.
active
Op
s
(
my
Op
.
op
Id
)
=
my
Op
this
.
active
Op
sQueue
+=
my
Op
.
op
Id
this
.
active
Job
s
(
my
Job
.
job
Id
)
=
my
Job
this
.
active
Job
sQueue
+=
my
Job
.
job
Id
}
driver
.
reviveOffers
();
my
Op
.
join
();
my
Job
.
join
();
}
finally
{
this
.
synchronized
{
this
.
active
Op
s
.
remove
(
my
Op
.
op
Id
)
this
.
active
Op
sQueue
.
dequeueAll
(
x
=>
(
x
==
my
Op
.
op
Id
))
this
.
active
Job
s
.
remove
(
my
Job
.
job
Id
)
this
.
active
Job
sQueue
.
dequeueAll
(
x
=>
(
x
==
my
Job
.
job
Id
))
}
}
if
(
my
Op
.
errorHappened
)
throw
new
SparkException
(
my
Op
.
errorMessage
,
my
Op
.
errorCode
)
if
(
my
Job
.
errorHappened
)
throw
new
SparkException
(
my
Job
.
errorMessage
,
my
Job
.
errorCode
)
else
return
my
Op
.
results
return
my
Job
.
results
}
override
def
registered
(
d
:
SchedulerDriver
,
frameworkId
:
String
)
{
...
...
@@ -122,13 +122,13 @@ extends NScheduler with spark.Scheduler with Logging
val
availableCpus
=
offers
.
map
(
_
.
getParams
.
get
(
"cpus"
).
toInt
)
val
availableMem
=
offers
.
map
(
_
.
getParams
.
get
(
"mem"
).
toInt
)
var
launchedTask
=
true
for
(
op
Id
<-
active
Op
sQueue
)
{
for
(
job
Id
<-
active
Job
sQueue
)
{
launchedTask
=
true
while
(
launchedTask
)
{
launchedTask
=
false
for
(
i
<-
0
until
offers
.
size
.
toInt
)
{
try
{
active
Ops
(
op
Id
).
slaveOffer
(
offers
.
get
(
i
),
availableCpus
(
i
),
availableMem
(
i
))
match
{
active
Jobs
(
job
Id
).
slaveOffer
(
offers
.
get
(
i
),
availableCpus
(
i
),
availableMem
(
i
))
match
{
case
Some
(
task
)
=>
tasks
.
add
(
task
)
availableCpus
(
i
)
-=
task
.
getParams
.
get
(
"cpus"
).
toInt
...
...
@@ -151,10 +151,10 @@ extends NScheduler with spark.Scheduler with Logging
override
def
statusUpdate
(
d
:
SchedulerDriver
,
status
:
TaskStatus
)
{
synchronized
{
try
{
taskIdTo
Op
Id
.
get
(
status
.
getTaskId
)
match
{
case
Some
(
op
Id
)
=>
if
(
active
Op
s
.
contains
(
op
Id
))
{
active
Ops
(
op
Id
).
statusUpdate
(
status
)
taskIdTo
Job
Id
.
get
(
status
.
getTaskId
)
match
{
case
Some
(
job
Id
)
=>
if
(
active
Job
s
.
contains
(
job
Id
))
{
active
Jobs
(
job
Id
).
statusUpdate
(
status
)
}
case
None
=>
logInfo
(
"TID "
+
status
.
getTaskId
+
" already finished"
)
...
...
@@ -168,10 +168,10 @@ extends NScheduler with spark.Scheduler with Logging
override
def
error
(
d
:
SchedulerDriver
,
code
:
Int
,
message
:
String
)
{
synchronized
{
if
(
active
Op
s
.
size
>
0
)
{
for
((
op
Id
,
active
Op
)
<-
active
Op
s
)
{
if
(
active
Job
s
.
size
>
0
)
{
for
((
job
Id
,
active
Job
)
<-
active
Job
s
)
{
try
{
active
Op
.
error
(
code
,
message
)
active
Job
.
error
(
code
,
message
)
}
catch
{
case
e
:
Exception
=>
logError
(
"Exception in error callback"
,
e
)
}
...
...
@@ -195,16 +195,16 @@ extends NScheduler with spark.Scheduler with Logging
// Trait representing an object that manages a parallel operation by
// implementing various scheduler callbacks.
trait
ParallelOperation
{
trait
Job
{
def
slaveOffer
(
s
:
SlaveOffer
,
availableCpus
:
Int
,
availableMem
:
Int
)
:
Option
[
TaskDescription
]
def
statusUpdate
(
t
:
TaskStatus
)
:
Unit
def
error
(
code
:
Int
,
message
:
String
)
:
Unit
}
class
Simple
ParallelOperation
[
T:
ClassManifest
](
sched
:
MesosScheduler
,
tasks
:
Array
[
Task
[
T
]],
val
op
Id
:
Int
)
extends
ParallelOperation
with
Logging
class
Simple
Job
[
T:
ClassManifest
](
sched
:
MesosScheduler
,
tasks
:
Array
[
Task
[
T
]],
val
job
Id
:
Int
)
extends
Job
with
Logging
{
// Maximum time to wait to run a task in a preferred location (in ms)
val
LOCALITY_WAIT
=
System
.
getProperty
(
"spark.locality.wait"
,
"3000"
).
toLong
...
...
@@ -258,12 +258,12 @@ extends ParallelOperation with Logging
tasks
(
i
).
preferredLocations
.
isEmpty
))
{
val
taskId
=
sched
.
newTaskId
()
sched
.
taskIdTo
Op
Id
(
taskId
)
=
op
Id
sched
.
taskIdTo
Job
Id
(
taskId
)
=
job
Id
tidToIndex
(
taskId
)
=
i
val
preferred
=
if
(
checkPref
)
"preferred"
else
"non-preferred"
val
message
=
"Starting task %d as
op
Id %d, TID %s on slave %s: %s (%s)"
.
format
(
i
,
op
Id
,
taskId
,
offer
.
getSlaveId
,
offer
.
getHost
,
preferred
)
"Starting task %d as
job
Id %d, TID %s on slave %s: %s (%s)"
.
format
(
i
,
job
Id
,
taskId
,
offer
.
getSlaveId
,
offer
.
getHost
,
preferred
)
logInfo
(
message
)
tasks
(
i
).
markStarted
(
offer
)
launched
(
i
)
=
true
...
...
@@ -302,8 +302,8 @@ extends ParallelOperation with Logging
val
index
=
tidToIndex
(
tid
)
if
(!
finished
(
index
))
{
tasksFinished
+=
1
logInfo
(
"Finished
opId
%d TID %d (progress: %d/%d)"
.
format
(
op
Id
,
tid
,
tasksFinished
,
numTasks
))
logInfo
(
"Finished
job
%d TID %d (progress: %d/%d)"
.
format
(
job
Id
,
tid
,
tasksFinished
,
numTasks
))
// Deserialize task result
val
result
=
Utils
.
deserialize
[
TaskResult
[
T
]](
status
.
getData
)
results
(
index
)
=
result
.
value
...
...
@@ -311,8 +311,8 @@ extends ParallelOperation with Logging
Accumulators
.
add
(
callingThread
,
result
.
accumUpdates
)
// Mark finished and stop if we've finished all the tasks
finished
(
index
)
=
true
// Remove TID ->
op
Id mapping from sched
sched
.
taskIdTo
Op
Id
.
remove
(
tid
)
// Remove TID ->
job
Id mapping from sched
sched
.
taskIdTo
Job
Id
.
remove
(
tid
)
if
(
tasksFinished
==
numTasks
)
setAllFinished
()
}
else
{
...
...
@@ -325,9 +325,9 @@ extends ParallelOperation with Logging
val
tid
=
status
.
getTaskId
val
index
=
tidToIndex
(
tid
)
if
(!
finished
(
index
))
{
logInfo
(
"Lost
opId
"
+
op
Id
+
" TID "
+
tid
)
logInfo
(
"Lost
job
"
+
job
Id
+
" TID "
+
tid
)
launched
(
index
)
=
false
sched
.
taskIdTo
Op
Id
.
remove
(
tid
)
sched
.
taskIdTo
Job
Id
.
remove
(
tid
)
tasksLaunched
-=
1
}
else
{
logInfo
(
"Ignoring task-lost event for TID "
+
tid
+
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment