diff --git a/core/src/main/scala/org/apache/spark/util/RpcUtils.scala b/core/src/main/scala/org/apache/spark/util/RpcUtils.scala index 5ae793e0e87a359eebc465dbdc6ca636d0fd6791..f16cc8e7e42c62a7d1f0929d98066829ac2e658f 100644 --- a/core/src/main/scala/org/apache/spark/util/RpcUtils.scala +++ b/core/src/main/scala/org/apache/spark/util/RpcUtils.scala @@ -48,11 +48,13 @@ object RpcUtils { /** Returns the default Spark timeout to use for RPC ask operations. */ def askTimeout(conf: SparkConf): FiniteDuration = { - conf.getTimeAsSeconds("spark.rpc.askTimeout", "30s") seconds + conf.getTimeAsSeconds("spark.rpc.askTimeout", + conf.get("spark.network.timeout", "120s")) seconds } /** Returns the default Spark timeout to use for RPC remote endpoint lookup. */ def lookupTimeout(conf: SparkConf): FiniteDuration = { - conf.getTimeAsSeconds("spark.rpc.lookupTimeout", "30s") seconds + conf.getTimeAsSeconds("spark.rpc.lookupTimeout", + conf.get("spark.network.timeout", "120s")) seconds } } diff --git a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala index d7d8014a20498bdd367e061462cdd3cdaa021046..272e6af0514e49b0944e16fbb96d262fba973e83 100644 --- a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala +++ b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala @@ -227,7 +227,7 @@ class SparkConfSuite extends FunSuite with LocalSparkContext with ResetSystemPro test("akka deprecated configs") { val conf = new SparkConf() - assert(!conf.contains("spark.rpc.num.retries")) + assert(!conf.contains("spark.rpc.numRetries")) assert(!conf.contains("spark.rpc.retry.wait")) assert(!conf.contains("spark.rpc.askTimeout")) assert(!conf.contains("spark.rpc.lookupTimeout")) diff --git a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala index 5fbda37c7cb88a5a97b518d23702d961ac633c5d..44c88b00c442a0ebb3d7d50927c59f1ab4b59b5e 100644 --- a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala +++ b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala @@ -156,7 +156,7 @@ abstract class RpcEnvSuite extends FunSuite with BeforeAndAfterAll { val conf = new SparkConf() conf.set("spark.rpc.retry.wait", "0") - conf.set("spark.rpc.num.retries", "1") + conf.set("spark.rpc.numRetries", "1") val anotherEnv = createRpcEnv(conf, "remote", 13345) // Use anotherEnv to find out the RpcEndpointRef val rpcEndpointRef = anotherEnv.setupEndpointRef("local", env.address, "ask-timeout") diff --git a/docs/configuration.md b/docs/configuration.md index d9e9e67026cbbde1f6cefbe2c3b29f3b6053ff83..d587b91124cb80bada317ae5c0b24883e4b7e8a6 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -963,8 +963,9 @@ Apart from these, the following properties are also available, and may be useful <td> Default timeout for all network interactions. This config will be used in place of <code>spark.core.connection.ack.wait.timeout</code>, <code>spark.akka.timeout</code>, - <code>spark.storage.blockManagerSlaveTimeoutMs</code> or - <code>spark.shuffle.io.connectionTimeout</code>, if they are not configured. + <code>spark.storage.blockManagerSlaveTimeoutMs</code>, + <code>spark.shuffle.io.connectionTimeout</code>, <code>spark.rpc.askTimeout</code> or + <code>spark.rpc.lookupTimeout</code> if they are not configured. </td> </tr> <tr> @@ -982,6 +983,35 @@ Apart from these, the following properties are also available, and may be useful This is only relevant for the Spark shell. </td> </tr> +<tr> + <td><code>spark.rpc.numRetries</code></td> + <td>3</td> + Number of times to retry before an RPC task gives up. + An RPC task will run at most times of this number. + <td> + </td> +</tr> +<tr> + <td><code>spark.rpc.retry.wait</code></td> + <td>3s</td> + <td> + Duration for an RPC ask operation to wait before retrying. + </td> +</tr> +<tr> + <td><code>spark.rpc.askTimeout</code></td> + <td>120s</td> + <td> + Duration for an RPC ask operation to wait before timing out. + </td> +</tr> +<tr> + <td><code>spark.rpc.lookupTimeout</code></td> + <td>120s</td> + Duration for an RPC remote endpoint lookup operation to wait before timing out. + <td> + </td> +</tr> </table> #### Scheduling