Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
spark
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
cs525-sp18-g07
spark
Commits
ac480fd9
Commit
ac480fd9
authored
12 years ago
by
Shivaram Venkataraman
Browse files
Options
Downloads
Patches
Plain Diff
Clean up variables and counters in BlockFetcherIterator
parent
cb2f5046
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
core/src/main/scala/spark/storage/BlockFetcherIterator.scala
+31
-23
31 additions, 23 deletions
core/src/main/scala/spark/storage/BlockFetcherIterator.scala
with
31 additions
and
23 deletions
core/src/main/scala/spark/storage/BlockFetcherIterator.scala
+
31
−
23
View file @
ac480fd9
...
@@ -67,12 +67,20 @@ object BlockFetcherIterator {
...
@@ -67,12 +67,20 @@ object BlockFetcherIterator {
throw
new
IllegalArgumentException
(
"BlocksByAddress is null"
)
throw
new
IllegalArgumentException
(
"BlocksByAddress is null"
)
}
}
protected
var
_totalBlocks
=
blocksByAddress
.
map
(
_
.
_2
.
size
).
sum
// Total number blocks fetched (local + remote). Also number of FetchResults expected
logDebug
(
"Getting "
+
_totalBlocks
+
" blocks"
)
protected
var
_numBlocksToFetch
=
0
protected
var
startTime
=
System
.
currentTimeMillis
protected
var
startTime
=
System
.
currentTimeMillis
protected
val
localBlockIds
=
new
ArrayBuffer
[
String
]()
protected
val
localNonZeroBlocks
=
new
ArrayBuffer
[
String
]()
// This represents the number of local blocks, also counting zero-sized blocks
protected
val
remoteBlockIds
=
new
HashSet
[
String
]()
private
var
numLocal
=
0
// BlockIds for local blocks that need to be fetched. Excludes zero-sized blocks
protected
val
localBlocksToFetch
=
new
ArrayBuffer
[
String
]()
// This represents the number of remote blocks, also counting zero-sized blocks
private
var
numRemote
=
0
// BlockIds for remote blocks that need to be fetched. Excludes zero-sized blocks
protected
val
remoteBlocksToFetch
=
new
HashSet
[
String
]()
// A queue to hold our results.
// A queue to hold our results.
protected
val
results
=
new
LinkedBlockingQueue
[
FetchResult
]
protected
val
results
=
new
LinkedBlockingQueue
[
FetchResult
]
...
@@ -125,15 +133,15 @@ object BlockFetcherIterator {
...
@@ -125,15 +133,15 @@ object BlockFetcherIterator {
protected
def
splitLocalRemoteBlocks
()
:
ArrayBuffer
[
FetchRequest
]
=
{
protected
def
splitLocalRemoteBlocks
()
:
ArrayBuffer
[
FetchRequest
]
=
{
// Split local and remote blocks. Remote blocks are further split into FetchRequests of size
// Split local and remote blocks. Remote blocks are further split into FetchRequests of size
// at most maxBytesInFlight in order to limit the amount of data in flight.
// at most maxBytesInFlight in order to limit the amount of data in flight.
val
originalTotalBlocks
=
_totalBlocks
val
remoteRequests
=
new
ArrayBuffer
[
FetchRequest
]
val
remoteRequests
=
new
ArrayBuffer
[
FetchRequest
]
for
((
address
,
blockInfos
)
<-
blocksByAddress
)
{
for
((
address
,
blockInfos
)
<-
blocksByAddress
)
{
if
(
address
==
blockManagerId
)
{
if
(
address
==
blockManagerId
)
{
localBlockIds
++=
blockInfos
.
map
(
_
.
_1
)
numLocal
=
blockInfos
.
size
localNonZeroBlocks
++=
blockInfos
.
filter
(
_
.
_2
!=
0
).
map
(
_
.
_1
)
// Filter out zero-sized blocks
_totalBlocks
-=
(
localBlockIds
.
size
-
localNonZeroBlocks
.
size
)
localBlocksToFetch
++=
blockInfos
.
filter
(
_
.
_2
!=
0
).
map
(
_
.
_1
)
_numBlocksToFetch
+=
localBlocksToFetch
.
size
}
else
{
}
else
{
r
emote
BlockIds
+
+=
blockInfos
.
map
(
_
.
_1
)
numR
emote
+=
blockInfos
.
size
// Make our requests at least maxBytesInFlight / 5 in length; the reason to keep them
// Make our requests at least maxBytesInFlight / 5 in length; the reason to keep them
// smaller than maxBytesInFlight is to allow multiple, parallel fetches from up to 5
// smaller than maxBytesInFlight is to allow multiple, parallel fetches from up to 5
// nodes, rather than blocking on reading output from one node.
// nodes, rather than blocking on reading output from one node.
...
@@ -147,10 +155,10 @@ object BlockFetcherIterator {
...
@@ -147,10 +155,10 @@ object BlockFetcherIterator {
// Skip empty blocks
// Skip empty blocks
if
(
size
>
0
)
{
if
(
size
>
0
)
{
curBlocks
+=
((
blockId
,
size
))
curBlocks
+=
((
blockId
,
size
))
remoteBlocksToFetch
+=
blockId
_numBlocksToFetch
+=
1
curRequestSize
+=
size
curRequestSize
+=
size
}
else
if
(
size
==
0
)
{
}
else
if
(
size
<
0
)
{
_totalBlocks
-=
1
}
else
{
throw
new
BlockException
(
blockId
,
"Negative block size "
+
size
)
throw
new
BlockException
(
blockId
,
"Negative block size "
+
size
)
}
}
if
(
curRequestSize
>=
minRequestSize
)
{
if
(
curRequestSize
>=
minRequestSize
)
{
...
@@ -166,8 +174,8 @@ object BlockFetcherIterator {
...
@@ -166,8 +174,8 @@ object BlockFetcherIterator {
}
}
}
}
}
}
logInfo
(
"Getting "
+
_
total
Blocks
+
" non-zero-bytes blocks out of "
+
logInfo
(
"Getting "
+
_
num
Blocks
ToFetch
+
" non-zero-bytes blocks out of "
+
originalT
otalBlocks
+
" blocks"
)
t
otalBlocks
+
" blocks"
)
remoteRequests
remoteRequests
}
}
...
@@ -175,7 +183,7 @@ object BlockFetcherIterator {
...
@@ -175,7 +183,7 @@ object BlockFetcherIterator {
// Get the local blocks while remote blocks are being fetched. Note that it's okay to do
// Get the local blocks while remote blocks are being fetched. Note that it's okay to do
// these all at once because they will just memory-map some files, so they won't consume
// these all at once because they will just memory-map some files, so they won't consume
// any memory that might exceed our maxBytesInFlight
// any memory that might exceed our maxBytesInFlight
for
(
id
<-
local
NonZeroBlocks
)
{
for
(
id
<-
local
BlocksToFetch
)
{
getLocalFromDisk
(
id
,
serializer
)
match
{
getLocalFromDisk
(
id
,
serializer
)
match
{
case
Some
(
iter
)
=>
{
case
Some
(
iter
)
=>
{
// Pass 0 as size since it's not in flight
// Pass 0 as size since it's not in flight
...
@@ -201,7 +209,7 @@ object BlockFetcherIterator {
...
@@ -201,7 +209,7 @@ object BlockFetcherIterator {
sendRequest
(
fetchRequests
.
dequeue
())
sendRequest
(
fetchRequests
.
dequeue
())
}
}
val
numGets
=
remote
BlockId
s
.
size
-
fetchRequests
.
size
val
numGets
=
remote
Request
s
.
size
-
fetchRequests
.
size
logInfo
(
"Started "
+
numGets
+
" remote gets in "
+
Utils
.
getUsedTimeMs
(
startTime
))
logInfo
(
"Started "
+
numGets
+
" remote gets in "
+
Utils
.
getUsedTimeMs
(
startTime
))
// Get Local Blocks
// Get Local Blocks
...
@@ -213,7 +221,7 @@ object BlockFetcherIterator {
...
@@ -213,7 +221,7 @@ object BlockFetcherIterator {
//an iterator that will read fetched blocks off the queue as they arrive.
//an iterator that will read fetched blocks off the queue as they arrive.
@volatile
protected
var
resultsGotten
=
0
@volatile
protected
var
resultsGotten
=
0
override
def
hasNext
:
Boolean
=
resultsGotten
<
_
total
Blocks
override
def
hasNext
:
Boolean
=
resultsGotten
<
_
num
Blocks
ToFetch
override
def
next
()
:
(
String
,
Option
[
Iterator
[
Any
]])
=
{
override
def
next
()
:
(
String
,
Option
[
Iterator
[
Any
]])
=
{
resultsGotten
+=
1
resultsGotten
+=
1
...
@@ -230,9 +238,9 @@ object BlockFetcherIterator {
...
@@ -230,9 +238,9 @@ object BlockFetcherIterator {
}
}
// Implementing BlockFetchTracker trait.
// Implementing BlockFetchTracker trait.
override
def
totalBlocks
:
Int
=
_totalBlocks
override
def
totalBlocks
:
Int
=
numLocal
+
numRemote
override
def
numLocalBlocks
:
Int
=
localBlockIds
.
size
override
def
numLocalBlocks
:
Int
=
numLocal
override
def
numRemoteBlocks
:
Int
=
r
emote
BlockIds
.
size
override
def
numRemoteBlocks
:
Int
=
numR
emote
override
def
remoteFetchTime
:
Long
=
_remoteFetchTime
override
def
remoteFetchTime
:
Long
=
_remoteFetchTime
override
def
fetchWaitTime
:
Long
=
_fetchWaitTime
override
def
fetchWaitTime
:
Long
=
_fetchWaitTime
override
def
remoteBytesRead
:
Long
=
_remoteBytesRead
override
def
remoteBytesRead
:
Long
=
_remoteBytesRead
...
@@ -294,7 +302,7 @@ object BlockFetcherIterator {
...
@@ -294,7 +302,7 @@ object BlockFetcherIterator {
private
var
copiers
:
List
[
_
<:
Thread
]
=
null
private
var
copiers
:
List
[
_
<:
Thread
]
=
null
override
def
initialize
()
{
override
def
initialize
()
{
// Split Local Remote Blocks and
adjust totalBlocks to include only the non 0-byte blocks
// Split Local Remote Blocks and
set numBlocksToFetch
val
remoteRequests
=
splitLocalRemoteBlocks
()
val
remoteRequests
=
splitLocalRemoteBlocks
()
// Add the remote requests into our queue in a random order
// Add the remote requests into our queue in a random order
for
(
request
<-
Utils
.
randomize
(
remoteRequests
))
{
for
(
request
<-
Utils
.
randomize
(
remoteRequests
))
{
...
@@ -316,7 +324,7 @@ object BlockFetcherIterator {
...
@@ -316,7 +324,7 @@ object BlockFetcherIterator {
val
result
=
results
.
take
()
val
result
=
results
.
take
()
// if all the results has been retrieved, shutdown the copiers
// if all the results has been retrieved, shutdown the copiers
// NO need to stop the copiers if we got all the blocks ?
// NO need to stop the copiers if we got all the blocks ?
// if (resultsGotten == _
total
Blocks && copiers != null) {
// if (resultsGotten == _
num
Blocks
ToFetch
&& copiers != null) {
// stopCopiers()
// stopCopiers()
// }
// }
(
result
.
blockId
,
if
(
result
.
failed
)
None
else
Some
(
result
.
deserialize
()))
(
result
.
blockId
,
if
(
result
.
failed
)
None
else
Some
(
result
.
deserialize
()))
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment