From 062c336d06a0bd4e740a18d2349e03e311509243 Mon Sep 17 00:00:00 2001 From: jinxing <jinxing6042@126.com> Date: Sun, 9 Jul 2017 00:27:58 +0800 Subject: [PATCH] [SPARK-21343] Refine the document for spark.reducer.maxReqSizeShuffleToMem. ## What changes were proposed in this pull request? In current code, reducer can break the old shuffle service when `spark.reducer.maxReqSizeShuffleToMem` is enabled. Let's refine document. Author: jinxing <jinxing6042@126.com> Closes #18566 from jinxing64/SPARK-21343. --- .../org/apache/spark/internal/config/package.scala | 6 ++++-- docs/configuration.md | 10 ++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index a629810bf0..512d539ee9 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -323,9 +323,11 @@ package object config { private[spark] val REDUCER_MAX_REQ_SIZE_SHUFFLE_TO_MEM = ConfigBuilder("spark.reducer.maxReqSizeShuffleToMem") - .internal() .doc("The blocks of a shuffle request will be fetched to disk when size of the request is " + - "above this threshold. This is to avoid a giant request takes too much memory.") + "above this threshold. This is to avoid a giant request takes too much memory. We can " + + "enable this config by setting a specific value(e.g. 200m). Note that this config can " + + "be enabled only when the shuffle shuffle service is newer than Spark-2.2 or the shuffle" + + " service is disabled.") .bytesConf(ByteUnit.BYTE) .createWithDefault(Long.MaxValue) diff --git a/docs/configuration.md b/docs/configuration.md index 7dc23e441a..6ca84240c1 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -528,6 +528,16 @@ Apart from these, the following properties are also available, and may be useful By allowing it to limit the number of fetch requests, this scenario can be mitigated. </td> </tr> +<tr> + <td><code>spark.reducer.maxReqSizeShuffleToMem</code></td> + <td>Long.MaxValue</td> + <td> + The blocks of a shuffle request will be fetched to disk when size of the request is above + this threshold. This is to avoid a giant request takes too much memory. We can enable this + config by setting a specific value(e.g. 200m). Note that this config can be enabled only when + the shuffle shuffle service is newer than Spark-2.2 or the shuffle service is disabled. + </td> +</tr> <tr> <td><code>spark.shuffle.compress</code></td> <td>true</td> -- GitLab