Skip to content
Snippets Groups Projects
Commit cb976dfb authored by Reynold Xin's avatar Reynold Xin
Browse files

Merge pull request #209 from pwendell/better-docs

Improve docs for shuffle instrumentation
parents 18d6df0e 297c09d4
No related branches found
No related tags found
No related merge requests found
...@@ -61,50 +61,53 @@ object TaskMetrics { ...@@ -61,50 +61,53 @@ object TaskMetrics {
class ShuffleReadMetrics extends Serializable { class ShuffleReadMetrics extends Serializable {
/** /**
* Time when shuffle finishs * Absolute time when this task finished reading shuffle data
*/ */
var shuffleFinishTime: Long = _ var shuffleFinishTime: Long = _
/** /**
* Total number of blocks fetched in a shuffle (remote or local) * Number of blocks fetched in this shuffle by this task (remote or local)
*/ */
var totalBlocksFetched: Int = _ var totalBlocksFetched: Int = _
/** /**
* Number of remote blocks fetched in a shuffle * Number of remote blocks fetched in this shuffle by this task
*/ */
var remoteBlocksFetched: Int = _ var remoteBlocksFetched: Int = _
/** /**
* Local blocks fetched in a shuffle * Number of local blocks fetched in this shuffle by this task
*/ */
var localBlocksFetched: Int = _ var localBlocksFetched: Int = _
/** /**
* Total time that is spent blocked waiting for shuffle to fetch data * Time the task spent waiting for remote shuffle blocks. This only includes the time
* blocking on shuffle input data. For instance if block B is being fetched while the task is
* still not finished processing block A, it is not considered to be blocking on block B.
*/ */
var fetchWaitTime: Long = _ var fetchWaitTime: Long = _
/** /**
* The total amount of time for all the shuffle fetches. This adds up time from overlapping * Total time spent fetching remote shuffle blocks. This aggregates the time spent fetching all
* shuffles, so can be longer than task time * input blocks. Since block fetches are both pipelined and parallelized, this can
* exceed fetchWaitTime and executorRunTime.
*/ */
var remoteFetchTime: Long = _ var remoteFetchTime: Long = _
/** /**
* Total number of remote bytes read from a shuffle * Total number of remote bytes read from the shuffle by this task
*/ */
var remoteBytesRead: Long = _ var remoteBytesRead: Long = _
} }
class ShuffleWriteMetrics extends Serializable { class ShuffleWriteMetrics extends Serializable {
/** /**
* Number of bytes written for a shuffle * Number of bytes written for the shuffle by this task
*/ */
var shuffleBytesWritten: Long = _ var shuffleBytesWritten: Long = _
/** /**
* Time spent blocking on writes to disk or buffer cache, in nanoseconds. * Time the task spent blocking on writes to disk or buffer cache, in nanoseconds
*/ */
var shuffleWriteTime: Long = _ var shuffleWriteTime: Long = _
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment