diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala index 7f8f0f513134f5f536e6f9efd72b8673da654f2c..6f5c31d7ab71c9b248db9a08a40440b808ee618c 100644 --- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala +++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala @@ -322,7 +322,7 @@ private[spark] class MapOutputTrackerMaster(conf: SparkConf, if (minSizeForBroadcast > maxRpcMessageSize) { val msg = s"spark.shuffle.mapOutput.minSizeForBroadcast ($minSizeForBroadcast bytes) must " + s"be <= spark.rpc.message.maxSize ($maxRpcMessageSize bytes) to prevent sending an rpc " + - "message that is to large." + "message that is too large." logError(msg) throw new IllegalArgumentException(msg) } diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index b42820a8eefdc345cc5de2ab648218b420c01a3f..02c009cdb52106091502fa85e4d0a0dd792c3b22 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -2570,8 +2570,8 @@ object SparkContext extends Logging { val serviceLoaders = ServiceLoader.load(classOf[ExternalClusterManager], loader).asScala.filter(_.canCreate(url)) if (serviceLoaders.size > 1) { - throw new SparkException(s"Multiple Cluster Managers ($serviceLoaders) registered " + - s"for the url $url:") + throw new SparkException( + s"Multiple external cluster managers registered for the url $url: $serviceLoaders") } serviceLoaders.headOption } diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala index 2eddb5ff54479b23c0075c08b24519ba75a4f058..080ba12c2f0d1d94824143c434525236f3004c49 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala @@ -24,7 +24,7 @@ import org.apache.spark.internal.Logging import org.apache.spark.util.Utils /** - * Command-line parser for the master. + * Command-line parser for the [[HistoryServer]]. */ private[history] class HistoryServerArguments(conf: SparkConf, args: Array[String]) extends Logging { diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index 78aed4fb584c44ec90fa57f52fb303ae5ed57f11..fb62682b6c6991a82e5b5a8578b5fcf39cf8b985 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -198,7 +198,7 @@ package object config { .createWithDefault(0) private[spark] val DRIVER_BLOCK_MANAGER_PORT = ConfigBuilder("spark.driver.blockManager.port") - .doc("Port to use for the block managed on the driver.") + .doc("Port to use for the block manager on the driver.") .fallbackConf(BLOCK_MANAGER_PORT) private[spark] val IGNORE_CORRUPT_FILES = ConfigBuilder("spark.files.ignoreCorruptFiles") diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala index d285e917b8a672c3062d86b83f411eb0a7f745a0..374abccf6ad55d0a294f22cba461f1822baff664 100644 --- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala @@ -1746,7 +1746,7 @@ abstract class RDD[T: ClassTag]( /** * Clears the dependencies of this RDD. This method must ensure that all references - * to the original parent RDDs is removed to enable the parent RDDs to be garbage + * to the original parent RDDs are removed to enable the parent RDDs to be garbage * collected. Subclasses of RDD may override this method for implementing their own cleaning * logic. See [[org.apache.spark.rdd.UnionRDD]] for an example. */ diff --git a/core/src/main/scala/org/apache/spark/rpc/RpcCallContext.scala b/core/src/main/scala/org/apache/spark/rpc/RpcCallContext.scala index f527ec86ab7b2d6ffc764f18bc490e7cb559ceaa..117f51c5b8f2a24f626caf66262f30479d79dda2 100644 --- a/core/src/main/scala/org/apache/spark/rpc/RpcCallContext.scala +++ b/core/src/main/scala/org/apache/spark/rpc/RpcCallContext.scala @@ -18,7 +18,7 @@ package org.apache.spark.rpc /** - * A callback that [[RpcEndpoint]] can use it to send back a message or failure. It's thread-safe + * A callback that [[RpcEndpoint]] can use to send back a message or failure. It's thread-safe * and can be called in any thread. */ private[spark] trait RpcCallContext { diff --git a/docs/monitoring.md b/docs/monitoring.md index 2eef4568d00e9050cf31c5168afd92b0f98b952e..7a1de52668f1ab36da9d8706ff37b4aa569c5d13 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -44,10 +44,8 @@ The spark jobs themselves must be configured to log events, and to log them to t writable directory. For example, if the server was configured with a log directory of `hdfs://namenode/shared/spark-logs`, then the client-side options would be: -``` -spark.eventLog.enabled true -spark.eventLog.dir hdfs://namenode/shared/spark-logs -``` + spark.eventLog.enabled true + spark.eventLog.dir hdfs://namenode/shared/spark-logs The history server can be configured as follows: diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/streaming/OutputMode.java b/sql/catalyst/src/main/java/org/apache/spark/sql/streaming/OutputMode.java index 49a18df2c72c0de0da3e371e994a194473da93f2..a515c1a109cf439572984ed90aba4b4b67203871 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/streaming/OutputMode.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/streaming/OutputMode.java @@ -46,7 +46,7 @@ public class OutputMode { /** * OutputMode in which all the rows in the streaming DataFrame/Dataset will be written - * to the sink every time these is some updates. This output mode can only be used in queries + * to the sink every time there are some updates. This output mode can only be used in queries * that contain aggregations. * * @since 2.0.0 diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala index f498e071b50a3e19df629171b1c678bbf184940a..256f64e320be89f640543a5b50b51c68089d4f03 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala @@ -21,7 +21,7 @@ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.types.{DataType, Decimal, StructType} /** - * An abstract class for row used internal in Spark SQL, which only contain the columns as + * An abstract class for row used internally in Spark SQL, which only contains the columns as * internal types. */ abstract class InternalRow extends SpecializedGetters with Serializable { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala index 4b8cac8f32b0644a364375409ee5bb7bd8770ab6..78897daec810787bf22b896dd4a9f680f6e2784a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala @@ -22,7 +22,7 @@ import org.apache.spark.sql.catalyst.expressions.Expression /** - * Interface for the system catalog (of columns, partitions, tables, and databases). + * Interface for the system catalog (of functions, partitions, tables, and databases). * * This is only used for non-temporary items, and implementations must be thread-safe as they * can be accessed in multiple threads. This is an external catalog because it is expected to diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala index 221f830aa8583243a2ceedc80006d786b889d91c..b93a5d0b7a0e541d652095bd8b91d9278e8f0b6c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala @@ -70,9 +70,9 @@ abstract class Expression extends TreeNode[Expression] { * children. * * Note that this means that an expression should be considered as non-deterministic if: - * - if it relies on some mutable internal state, or - * - if it relies on some implicit input that is not part of the children expression list. - * - if it has non-deterministic child or children. + * - it relies on some mutable internal state, or + * - it relies on some implicit input that is not part of the children expression list. + * - it has non-deterministic child or children. * * An example would be `SparkPartitionID` that relies on the partition id returned by TaskContext. * By default leaf expressions are deterministic as Nil.forall(_.deterministic) returns true. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index a8aa1e725524a891b920d8701186e4b3dc4e1087..fc323693a24ad894741dfd0855af8d7230604bab 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -930,7 +930,7 @@ case class InitializeJavaBean(beanInstance: Expression, setters: Map[String, Exp /** * Asserts that input values of a non-nullable child expression are not null. * - * Note that there are cases where `child.nullable == true`, while we still needs to add this + * Note that there are cases where `child.nullable == true`, while we still need to add this * assertion. Consider a nullable column `s` whose data type is a struct containing a non-nullable * `Int` field named `i`. Expression `s.i` is nullable because `s` can be null. However, for all * non-null `s`, `s.i` can't be null.