diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index e734366131943fe358ec8ed8c37e2eabab48c208..12e4f4f1ae8bb03013d8e189d6a78c1c62410d6e 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -489,7 +489,7 @@ processClosure <- function(node, oldEnv, defVars, checkedFuncs, newEnv) {
 #   checkedFunc An environment of function objects examined during cleanClosure. It can be
 #               considered as a "name"-to-"list of functions" mapping.
 # return value
-#   a new version of func that has an correct environment (closure).
+#   a new version of func that has a correct environment (closure).
 cleanClosure <- function(func, checkedFuncs = new.env()) {
   if (is.function(func)) {
     newEnv <- new.env(parent = .GlobalEnv)
diff --git a/core/src/main/scala/org/apache/spark/Accumulable.scala b/core/src/main/scala/org/apache/spark/Accumulable.scala
index 812145aaeedb754a5fb703fb976ca89b331825dc..5532931e2a7946b02aa8f04ce71c8b40828b0b5f 100644
--- a/core/src/main/scala/org/apache/spark/Accumulable.scala
+++ b/core/src/main/scala/org/apache/spark/Accumulable.scala
@@ -28,7 +28,7 @@ import org.apache.spark.util.{AccumulatorContext, AccumulatorMetadata, LegacyAcc
 
 
 /**
- * A data type that can be accumulated, i.e. has an commutative and associative "add" operation,
+ * A data type that can be accumulated, i.e. has a commutative and associative "add" operation,
  * but where the result type, `R`, may be different from the element type being added, `T`.
  *
  * You must define how to add data, and how to merge two of these together.  For some data types,
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
index bfb6a35f5bb93d8ee44527507b2b6aeed8b15c26..485a8b4222e5ae3f33c630e46d22a17240f488f1 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
@@ -789,7 +789,7 @@ class JavaSparkContext(val sc: SparkContext)
   def cancelAllJobs(): Unit = sc.cancelAllJobs()
 
   /**
-   * Returns an Java map of JavaRDDs that have marked themselves as persistent via cache() call.
+   * Returns a Java map of JavaRDDs that have marked themselves as persistent via cache() call.
    * Note that this does not necessarily mean the caching or computation was successful.
    */
   def getPersistentRDDs: JMap[java.lang.Integer, JavaRDD[_]] = {
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
index ab5b6c8380e8bfbf110399930e65367362677815..2822eb5d600244504ac656f565562c85891bd1d1 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
@@ -919,7 +919,7 @@ private class PythonAccumulatorParam(@transient private val serverHost: String,
 }
 
 /**
- * An Wrapper for Python Broadcast, which is written into disk by Python. It also will
+ * A Wrapper for Python Broadcast, which is written into disk by Python. It also will
  * write the data into disk after deserialization, then Python can read it from disks.
  */
 // scalastyle:off no.finalize
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 78606e06fbd3267d5a2faf2cbefe042c64f231fd..9be4cadcb430aaa604ef9bb69d11bb676b081ff3 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -408,12 +408,12 @@ object SparkSubmit {
       printErrorAndExit("SparkR is not supported for Mesos cluster.")
     }
 
-    // If we're running a R app, set the main class to our specific R runner
+    // If we're running an R app, set the main class to our specific R runner
     if (args.isR && deployMode == CLIENT) {
       if (args.primaryResource == SPARKR_SHELL) {
         args.mainClass = "org.apache.spark.api.r.RBackend"
       } else {
-        // If a R file is provided, add it to the child arguments and list of files to deploy.
+        // If an R file is provided, add it to the child arguments and list of files to deploy.
         // Usage: RRunner <main R file> [app arguments]
         args.mainClass = "org.apache.spark.deploy.RRunner"
         args.childArgs = ArrayBuffer(args.primaryResource) ++ args.childArgs
@@ -422,7 +422,7 @@ object SparkSubmit {
     }
 
     if (isYarnCluster && args.isR) {
-      // In yarn-cluster mode for a R app, add primary resource to files
+      // In yarn-cluster mode for an R app, add primary resource to files
       // that can be distributed with the job
       args.files = mergeFileLists(args.files, args.primaryResource)
     }
diff --git a/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala b/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
index 5426bf80bafc57d2f3fc329fc3769d049c88acfd..2f42916439d29e7a2359ebe4987b99acd956e73e 100644
--- a/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
@@ -34,7 +34,7 @@ private[spark] class JdbcPartition(idx: Int, val lower: Long, val upper: Long) e
 
 // TODO: Expose a jdbcRDD function in SparkContext and mark this as semi-private
 /**
- * An RDD that executes an SQL query on a JDBC connection and reads results.
+ * An RDD that executes a SQL query on a JDBC connection and reads results.
  * For usage example, see test case JdbcRDDSuite.
  *
  * @param getConnection a function that returns an open Connection.
@@ -138,7 +138,7 @@ object JdbcRDD {
   }
 
   /**
-   * Create an RDD that executes an SQL query on a JDBC connection and reads results.
+   * Create an RDD that executes a SQL query on a JDBC connection and reads results.
    * For usage example, see test case JavaAPISuite.testJavaJdbcRDD.
    *
    * @param connectionFactory a factory that returns an open Connection.
@@ -178,7 +178,7 @@ object JdbcRDD {
   }
 
   /**
-   * Create an RDD that executes an SQL query on a JDBC connection and reads results. Each row is
+   * Create an RDD that executes a SQL query on a JDBC connection and reads results. Each row is
    * converted into a `Object` array. For usage example, see test case JavaAPISuite.testJavaJdbcRDD.
    *
    * @param connectionFactory a factory that returns an open Connection.
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Pool.scala b/core/src/main/scala/org/apache/spark/scheduler/Pool.scala
index 732c89c39f0af29f6f78eda71f31a9dfb6f6cbe0..2a69a6c5e879028bcf0e50c211ed08f2ea774cac 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/Pool.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Pool.scala
@@ -26,7 +26,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.scheduler.SchedulingMode.SchedulingMode
 
 /**
- * An Schedulable entity that represents collection of Pools or TaskSetManagers
+ * A Schedulable entity that represents collection of Pools or TaskSetManagers
  */
 private[spark] class Pool(
     val poolName: String,
diff --git a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
index 6657104823e71476c42a776cdd6878f0df7b5c94..973676398ae54f02ad94b25eca73cc6b1b87a584 100644
--- a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
+++ b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
@@ -138,7 +138,7 @@ class BroadcastSuite extends SparkFunSuite with LocalSparkContext {
   }
 
   /**
-   * Verify the persistence of state associated with an TorrentBroadcast in a local-cluster.
+   * Verify the persistence of state associated with a TorrentBroadcast in a local-cluster.
    *
    * This test creates a broadcast variable, uses it on all executors, and then unpersists it.
    * In between each step, this test verifies that the broadcast blocks are present only on the
diff --git a/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala
index a7bb9aa4686ebd5abb9797d15ee0d1f9ed2f7fad..dd50e33da30ace12af917481dafdf2279fdd0910 100644
--- a/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala
@@ -408,7 +408,7 @@ class StandaloneRestSubmitSuite extends SparkFunSuite with BeforeAndAfterEach {
 
   /**
    * Start a [[StandaloneRestServer]] that communicates with the given endpoint.
-   * If `faulty` is true, start an [[FaultyStandaloneRestServer]] instead.
+   * If `faulty` is true, start a [[FaultyStandaloneRestServer]] instead.
    * Return the master URL that corresponds to the address of this server.
    */
   private def startServer(
diff --git a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
index 505cd476ff881df9a7a148e3c659d4b72dedd667..acdf21df9a16157b3f1be95945fb3a94a6ffdead 100644
--- a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
@@ -489,7 +489,7 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
   /**
    * Setup an [[RpcEndpoint]] to collect all network events.
    *
-   * @return the [[RpcEndpointRef]] and an `ConcurrentLinkedQueue` that contains network events.
+   * @return the [[RpcEndpointRef]] and a `ConcurrentLinkedQueue` that contains network events.
    */
   private def setupNetworkEndpoint(
       _env: RpcEnv,
diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
index b6765f064524f7395090e06e9eed184ba2de30d1..f28f429e0c54ee1f8c83c6d14714c1d16c2aaea7 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -1712,7 +1712,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with Timeou
   }
 
   test("reduce tasks should be placed locally with map output") {
-    // Create an shuffleMapRdd with 1 partition
+    // Create a shuffleMapRdd with 1 partition
     val shuffleMapRdd = new MyRDD(sc, 1, Nil)
     val shuffleDep = new ShuffleDependency(shuffleMapRdd, new HashPartitioner(2))
     val shuffleId = shuffleDep.shuffleId
@@ -1733,7 +1733,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with Timeou
 
   test("reduce task locality preferences should only include machines with largest map outputs") {
     val numMapTasks = 4
-    // Create an shuffleMapRdd with more partitions
+    // Create a shuffleMapRdd with more partitions
     val shuffleMapRdd = new MyRDD(sc, numMapTasks, Nil)
     val shuffleDep = new ShuffleDependency(shuffleMapRdd, new HashPartitioner(1))
     val shuffleId = shuffleDep.shuffleId
diff --git a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
index 0a8bbba6c5e472f59563bd9f0b3b59f61ed66e21..85ca9d39d4a3f454496c17847e6dc051c9b6fbd0 100644
--- a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
@@ -258,7 +258,7 @@ class JsonProtocolSuite extends SparkFunSuite {
   }
 
   test("FetchFailed backwards compatibility") {
-    // FetchFailed in Spark 1.1.0 does not have an "Message" property.
+    // FetchFailed in Spark 1.1.0 does not have a "Message" property.
     val fetchFailed = FetchFailed(BlockManagerId("With or", "without you", 15), 17, 18, 19,
       "ignored")
     val oldEvent = JsonProtocol.taskEndReasonToJson(fetchFailed)
diff --git a/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeBatchFetcher.scala b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeBatchFetcher.scala
index 5f234b1f0cccaecd91d5e4c56105327f04601e0c..8af7c23431063796b93adc541046e6ebb3e5f605 100644
--- a/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeBatchFetcher.scala
+++ b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeBatchFetcher.scala
@@ -26,7 +26,7 @@ import org.apache.spark.streaming.flume.sink._
 /**
  * This class implements the core functionality of [[FlumePollingReceiver]]. When started it
  * pulls data from Flume, stores it to Spark and then sends an Ack or Nack. This class should be
- * run via an [[java.util.concurrent.Executor]] as this implements [[Runnable]]
+ * run via a [[java.util.concurrent.Executor]] as this implements [[Runnable]]
  *
  * @param receiver The receiver that owns this instance.
  */
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBaseOps.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBaseOps.scala
index 31373a53cf93363a2820a542fafe75179c3ded12..43594573cf0136737f5c0dc4e01544a377e30d84 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBaseOps.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBaseOps.scala
@@ -27,7 +27,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.util.collection.BitSet
 
 /**
- * An class containing additional operations for subclasses of VertexPartitionBase that provide
+ * A class containing additional operations for subclasses of VertexPartitionBase that provide
  * implicit evidence of membership in the `VertexPartitionBaseOpsConstructor` typeclass (for
  * example, [[VertexPartition.VertexPartitionOpsConstructor]]).
  */
diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
index 909fec1c0665381de4246e705f226fe0d83068ac..c74e5d44a328df0e53469af49a7dba5c923af176 100644
--- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
+++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
@@ -548,7 +548,7 @@ object DenseVector {
 }
 
 /**
- * A sparse vector represented by an index array and an value array.
+ * A sparse vector represented by an index array and a value array.
  *
  * @param size size of the vector.
  * @param indices index array, assume to be strictly increasing.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
index b02aea92b72f940236da29327bb9cb8305308a03..25e56d70c2339e8b957e909e46935bad10c0713c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
@@ -82,7 +82,7 @@ abstract class PipelineStage extends Params with Logging {
  * be called on the input dataset to fit a model. Then the model, which is a transformer, will be
  * used to transform the dataset as the input to the next stage. If a stage is a [[Transformer]],
  * its [[Transformer#transform]] method will be called to produce the dataset for the next stage.
- * The fitted model from a [[Pipeline]] is an [[PipelineModel]], which consists of fitted models and
+ * The fitted model from a [[Pipeline]] is a [[PipelineModel]], which consists of fitted models and
  * transformers, corresponding to the pipeline stages. If there are no stages, the pipeline acts as
  * an identity transformer.
  */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index cecf3c43dfad40e6705e4da8bc1e37b0f1876d8c..1ea4d90e166b2a01a525a2abac3b0637b0cf7cc5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -854,7 +854,7 @@ class BinaryLogisticRegressionSummary private[classification] (
 
   /**
    * Returns the receiver operating characteristic (ROC) curve,
-   * which is an Dataframe having two fields (FPR, TPR)
+   * which is a Dataframe having two fields (FPR, TPR)
    * with (0.0, 0.0) prepended and (1.0, 1.0) appended to it.
    *
    * Note: This ignores instance weights (setting all to 1.0) from [[LogisticRegression.weightCol]].
@@ -874,7 +874,7 @@ class BinaryLogisticRegressionSummary private[classification] (
   lazy val areaUnderROC: Double = binaryMetrics.areaUnderROC()
 
   /**
-   * Returns the precision-recall curve, which is an Dataframe containing
+   * Returns the precision-recall curve, which is a Dataframe containing
    * two fields recall, precision with (0.0, 1.0) prepended to it.
    *
    * Note: This ignores instance weights (setting all to 1.0) from [[LogisticRegression.weightCol]].
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
index be3792eb7732f065b94b21eb53625ccefc18d494..71c8c42ce5eba23b3ee59d46fd52b8b4daa259e9 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
@@ -491,7 +491,7 @@ private[spark] object RandomForest extends Logging {
     timer.start("chooseSplits")
 
     // In each partition, iterate all instances and compute aggregate stats for each node,
-    // yield an (nodeIndex, nodeAggregateStats) pair for each node.
+    // yield a (nodeIndex, nodeAggregateStats) pair for each node.
     // After a `reduceByKey` operation,
     // stats of a node will be shuffled to a particular partition and be combined together,
     // then best splits for nodes are found there.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
index e4cc784cfe4213ba5ac045425d73dc2fcab87339..e4cbf5acbc11da780f521196548fddc41a114307 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
@@ -86,7 +86,7 @@ class LogisticRegressionModel @Since("1.3.0") (
   /**
    * Sets the threshold that separates positive predictions from negative predictions
    * in Binary Logistic Regression. An example with prediction score greater than or equal to
-   * this threshold is identified as an positive, and negative otherwise. The default value is 0.5.
+   * this threshold is identified as a positive, and negative otherwise. The default value is 0.5.
    * It is only used for binary classification.
    */
   @Since("1.0.0")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
index 783c1c835b3b7370cb1d1fe01fa4a1b7babeff23..7c3ccbb40b81239c4ac7c27f3af4618a188c06f9 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
@@ -44,7 +44,7 @@ class SVMModel @Since("1.1.0") (
 
   /**
    * Sets the threshold that separates positive predictions from negative predictions. An example
-   * with prediction score greater than or equal to this threshold is identified as an positive,
+   * with prediction score greater than or equal to this threshold is identified as a positive,
    * and negative otherwise. The default value is 0.0.
    */
   @Since("1.0.0")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/VectorTransformer.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/VectorTransformer.scala
index ca7385128d79aa7e580d57c179c24c3ef13d1cb5..9db725097ae9059ec6484e6c496becc58b2fb2b3 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/VectorTransformer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/VectorTransformer.scala
@@ -53,7 +53,7 @@ trait VectorTransformer extends Serializable {
   }
 
   /**
-   * Applies transformation on an JavaRDD[Vector].
+   * Applies transformation on a JavaRDD[Vector].
    *
    * @param data JavaRDD[Vector] to be transformed.
    * @return transformed JavaRDD[Vector].
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
index 02fd60da7d59022bf7f34b7efef5f1297e59cafc..91f065831c8040abc52764c30f379ad45dad8cbc 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
@@ -731,7 +731,7 @@ object DenseVector {
 }
 
 /**
- * A sparse vector represented by an index array and an value array.
+ * A sparse vector represented by an index array and a value array.
  *
  * @param size size of the vector.
  * @param indices index array, assume to be strictly increasing.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
index 97b03b340f20e61e350a3f0c36d9d5be05b792de..008b03d1cc33407e597b26f36f35ad6390b4cd8f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
@@ -24,7 +24,7 @@ import org.apache.spark.mllib.linalg.{Matrix, SparseMatrix, Vectors}
 import org.apache.spark.rdd.RDD
 
 /**
- * Represents an entry in an distributed matrix.
+ * Represents an entry in a distributed matrix.
  * @param i row index
  * @param j column index
  * @param value value of the entry
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala b/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala
index af4bc9f290c3e850b8e13d295631db0c9ab1961e..e28e1af5b0a263e567d0b5fdc1c1b960d709708f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala
@@ -47,7 +47,7 @@ class MLPairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)]) extends Se
       combOp = (queue1, queue2) => {
         queue1 ++= queue2
       }
-    ).mapValues(_.toArray.sorted(ord.reverse))  // This is an min-heap, so we reverse the order.
+    ).mapValues(_.toArray.sorted(ord.reverse))  // This is a min-heap, so we reverse the order.
   }
 }
 
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 7710fdf2e216e1b1840d8d5457cb816ab9f8fe05..77badebeb418ef8a8f199dece597927815d97bab 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -351,7 +351,7 @@ class BinaryLogisticRegressionSummary(LogisticRegressionSummary):
     def roc(self):
         """
         Returns the receiver operating characteristic (ROC) curve,
-        which is an Dataframe having two fields (FPR, TPR) with
+        which is a Dataframe having two fields (FPR, TPR) with
         (0.0, 0.0) prepended and (1.0, 1.0) appended to it.
 
         .. seealso:: `Wikipedia reference \
@@ -380,7 +380,7 @@ class BinaryLogisticRegressionSummary(LogisticRegressionSummary):
     @since("2.0.0")
     def pr(self):
         """
-        Returns the precision-recall curve, which is an Dataframe
+        Returns the precision-recall curve, which is a Dataframe
         containing two fields recall, precision with (0.0, 1.0) prepended
         to it.
 
diff --git a/python/pyspark/ml/pipeline.py b/python/pyspark/ml/pipeline.py
index eb6d3310d693b1ff0c5b1dfd9513e340ed89771a..0777527134ccb1fe795448394bd3b494c1079f1b 100644
--- a/python/pyspark/ml/pipeline.py
+++ b/python/pyspark/ml/pipeline.py
@@ -42,7 +42,7 @@ class Pipeline(Estimator, MLReadable, MLWritable):
     stage. If a stage is a :py:class:`Transformer`, its
     :py:meth:`Transformer.transform` method will be called to produce
     the dataset for the next stage. The fitted model from a
-    :py:class:`Pipeline` is an :py:class:`PipelineModel`, which
+    :py:class:`Pipeline` is a :py:class:`PipelineModel`, which
     consists of fitted models and transformers, corresponding to the
     pipeline stages. If there are no stages, the pipeline acts as an
     identity transformer.
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index f186217031193fdc1f8cdaa3abb7b07999b7eae3..3734f87405e5a0d147b7dd97b59b6f53fca85f01 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -52,7 +52,7 @@ class LinearClassificationModel(LinearModel):
 
         Sets the threshold that separates positive predictions from
         negative predictions. An example with prediction score greater
-        than or equal to this threshold is identified as an positive,
+        than or equal to this threshold is identified as a positive,
         and negative otherwise. It is used for binary classification
         only.
         """
diff --git a/python/pyspark/mllib/common.py b/python/pyspark/mllib/common.py
index 6bc2b1e64651ed6f3c571219068e102ef20b9a06..31afdf576b677bd8ab47a85b36bda3254b6aa5cb 100644
--- a/python/pyspark/mllib/common.py
+++ b/python/pyspark/mllib/common.py
@@ -60,7 +60,7 @@ _picklable_classes = [
 
 # this will call the MLlib version of pythonToJava()
 def _to_java_object_rdd(rdd):
-    """ Return an JavaRDD of Object by unpickling
+    """ Return a JavaRDD of Object by unpickling
 
     It will convert each Python object into Java object by Pyrolite, whenever the
     RDD is serialized in batch or not.
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 411e377a56f8ba0c1730404d1b750f5867ebd154..6afe7696622215db5541586685caf88a1dade538 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -1040,7 +1040,7 @@ class RDD(object):
         If the elements in RDD do not vary (max == min) always returns
         a single bucket.
 
-        It will return an tuple of buckets and histogram.
+        It will return a tuple of buckets and histogram.
 
         >>> rdd = sc.parallelize(range(51))
         >>> rdd.histogram(2)
@@ -2211,7 +2211,7 @@ class RDD(object):
         return values.collect()
 
     def _to_java_object_rdd(self):
-        """ Return an JavaRDD of Object by unpickling
+        """ Return a JavaRDD of Object by unpickling
 
         It will convert each Python object into Java object by Pyrolite, whenever the
         RDD is serialized in batch or not.
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index 8f7dcb54a7a2a4b64699e1919ea34c4a44e8827f..7c9f532f9412f22ed63a5a85ca6a190b0932c7f9 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -360,7 +360,7 @@ class SparkSession(object):
 
     def _createFromLocal(self, data, schema):
         """
-        Create an RDD for DataFrame from an list or pandas.DataFrame, returns
+        Create an RDD for DataFrame from a list or pandas.DataFrame, returns
         the RDD and schema.
         """
         # make sure data could consumed multiple times
diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
index cd75622cedf5e511665c86653785f3fe85480f3e..580aba651f1b0780a33b8a375a1dc9dd06abb1ef 100644
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@@ -72,7 +72,7 @@ class ContinuousQuery(object):
 
     @since(2.0)
     def processAllAvailable(self):
-        """Blocks until all available data in the source has been processed an committed to the
+        """Blocks until all available data in the source has been processed and committed to the
         sink. This method is intended for testing. Note that in the case of continually arriving
         data, this method may block forever. Additionally, this method is only guaranteed to block
         until data that has been synchronously appended data to a stream source prior to invocation.
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 7d8d0230b4ab34734f1a78f3efee537be57bffa8..bb2b95404af7a5a8fecc006460bb08c32353870c 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -1046,7 +1046,7 @@ def _need_converter(dataType):
 
 
 def _create_converter(dataType):
-    """Create an converter to drop the names of fields in obj """
+    """Create a converter to drop the names of fields in obj """
     if not _need_converter(dataType):
         return lambda x: x
 
diff --git a/python/pyspark/streaming/dstream.py b/python/pyspark/streaming/dstream.py
index fb6c66f2df9973b68a5f14de721222986bcf8d49..59977dcb435a88cacc9b23d7d2a0094938356822 100644
--- a/python/pyspark/streaming/dstream.py
+++ b/python/pyspark/streaming/dstream.py
@@ -608,8 +608,8 @@ class DStream(object):
 
 class TransformedDStream(DStream):
     """
-    TransformedDStream is an DStream generated by an Python function
-    transforming each RDD of an DStream to another RDDs.
+    TransformedDStream is a DStream generated by an Python function
+    transforming each RDD of a DStream to another RDDs.
 
     Multiple continuous transformations of DStream can be combined into
     one transformation.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
index 391001de26ece3669a63560a2b7ee66b87cb5f7b..d83eef7a4162910c25f5f56ed2fe7af3e2bdfd51 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
@@ -300,7 +300,7 @@ trait Row extends Serializable {
     getMap[K, V](i).asJava
 
   /**
-   * Returns the value at position i of struct type as an [[Row]] object.
+   * Returns the value at position i of struct type as a [[Row]] object.
    *
    * @throws ClassCastException when data type does not match.
    */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 0e6865646732b0b03068047cbfe37d9504927a03..d1ca99f79e9cadb253eb819b6cdba0afb2b21eda 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -37,7 +37,7 @@ import org.apache.spark.sql.catalyst.util.toPrettySQL
 import org.apache.spark.sql.types._
 
 /**
- * A trivial [[Analyzer]] with an dummy [[SessionCatalog]] and [[EmptyFunctionRegistry]].
+ * A trivial [[Analyzer]] with a dummy [[SessionCatalog]] and [[EmptyFunctionRegistry]].
  * Used for testing when all relations are already filled in and the analyzer needs only
  * to resolve attribute references.
  */
@@ -1496,7 +1496,7 @@ class Analyzer(
    * This rule handles three cases:
    *  - A [[Project]] having [[WindowExpression]]s in its projectList;
    *  - An [[Aggregate]] having [[WindowExpression]]s in its aggregateExpressions.
-   *  - An [[Filter]]->[[Aggregate]] pattern representing GROUP BY with a HAVING
+   *  - A [[Filter]]->[[Aggregate]] pattern representing GROUP BY with a HAVING
    *    clause and the [[Aggregate]] has [[WindowExpression]]s in its aggregateExpressions.
    * Note: If there is a GROUP BY clause in the query, aggregations and corresponding
    * filters (expressions in the HAVING clause) should be evaluated before any
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index eca837ccf0a78ecabdb28aa5245e0bcbbb7bc406..a7388c71de9e07d2c804cc3ae81899f07fb2ec88 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -410,7 +410,7 @@ object FunctionRegistry {
           case Failure(e) => throw new AnalysisException(e.getMessage)
         }
       } else {
-        // Otherwise, find an ctor method that matches the number of arguments, and use that.
+        // Otherwise, find a constructor method that matches the number of arguments, and use that.
         val params = Seq.fill(expressions.size)(classOf[Expression])
         val f = Try(tag.runtimeClass.getDeclaredConstructor(params : _*)) match {
           case Success(e) =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/MultiInstanceRelation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/MultiInstanceRelation.scala
index 394be47a588b71c0b71dc3016a657045bc9d21a9..95a3837ae1ab71c6d7dd63c596bce181b5c31837 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/MultiInstanceRelation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/MultiInstanceRelation.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.analysis
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 
 /**
- * A trait that should be mixed into query operators where an single instance might appear multiple
+ * A trait that should be mixed into query operators where a single instance might appear multiple
  * times in a logical query plan.  It is invalid to have multiple copies of the same attribute
  * produced by distinct operators in a query tree as this breaks the guarantee that expression
  * ids, which are used to differentiate attributes, are unique.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 371c198aa349353325964d2e34728f5105a0f9ca..943d1071e295adee512f307f6c709845bbd76bb0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -241,7 +241,7 @@ class SessionCatalog(
   /**
    * Retrieve the metadata of an existing metastore table.
    * If no database is specified, assume the table is in the current database.
-   * If the specified table is not found in the database then an [[NoSuchTableException]] is thrown.
+   * If the specified table is not found in the database then a [[NoSuchTableException]] is thrown.
    */
   def getTableMetadata(name: TableIdentifier): CatalogTable = {
     val db = formatDatabaseName(name.database.getOrElse(getCurrentDatabase))
@@ -266,7 +266,7 @@ class SessionCatalog(
   /**
    * Load files stored in given path into an existing metastore table.
    * If no database is specified, assume the table is in the current database.
-   * If the specified table is not found in the database then an [[NoSuchTableException]] is thrown.
+   * If the specified table is not found in the database then a [[NoSuchTableException]] is thrown.
    */
   def loadTable(
       name: TableIdentifier,
@@ -283,7 +283,7 @@ class SessionCatalog(
   /**
    * Load files stored in given path into the partition of an existing metastore table.
    * If no database is specified, assume the table is in the current database.
-   * If the specified table is not found in the database then an [[NoSuchTableException]] is thrown.
+   * If the specified table is not found in the database then a [[NoSuchTableException]] is thrown.
    */
   def loadPartition(
       name: TableIdentifier,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala
index c66d08d2cf8fc23fb480379b2818cce9772001df..8e46b962ff4325a8f318d1dac2c08fef0c7a4fd7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.catalog
 
 import org.apache.spark.sql.AnalysisException
 
-/** An trait that represents the type of a resourced needed by a function. */
+/** A trait that represents the type of a resourced needed by a function. */
 abstract class FunctionResourceType(val resourceType: String)
 
 object JarResource extends FunctionResourceType("jar")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpectsInputTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpectsInputTypes.scala
index b3dfac806f7fe56f0a9f603a91939973fbaad977..c15a2df50855aab10e96ec46ddf9ebf24a25d70d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpectsInputTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpectsInputTypes.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.types.AbstractDataType
 
 /**
- * An trait that gets mixin to define the expected input types of an expression.
+ * A trait that gets mixin to define the expected input types of an expression.
  *
  * This trait is typically used by operator expressions (e.g. [[Add]], [[Subtract]]) to define
  * expected input types without any implicit casting.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala
index 27ad8e4cf22ce3bd2c42bc5065d237120a84f142..c8d18667f7c4a1f18af7f723ae553a7d02fd2ea8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala
@@ -158,7 +158,7 @@ object UnsafeProjection {
 object FromUnsafeProjection {
 
   /**
-   * Returns an Projection for given StructType.
+   * Returns a Projection for given StructType.
    */
   def apply(schema: StructType): Projection = {
     apply(schema.fields.map(_.dataType))
@@ -172,7 +172,7 @@ object FromUnsafeProjection {
   }
 
   /**
-   * Returns an Projection for given sequence of Expressions (bounded).
+   * Returns a Projection for given sequence of Expressions (bounded).
    */
   private def create(exprs: Seq[Expression]): Projection = {
     GenerateSafeProjection.generate(exprs)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index f60d278c54873605f502a1f0092847dbb414c4a6..d603d3c73ecbc6023724dc3c1d77da47ac106b25 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -91,7 +91,7 @@ case class CreateMap(children: Seq[Expression]) extends Expression {
 
   override def checkInputDataTypes(): TypeCheckResult = {
     if (children.size % 2 != 0) {
-      TypeCheckResult.TypeCheckFailure(s"$prettyName expects an positive even number of arguments.")
+      TypeCheckResult.TypeCheckFailure(s"$prettyName expects a positive even number of arguments.")
     } else if (keys.map(_.dataType).distinct.length > 1) {
       TypeCheckResult.TypeCheckFailure("The given keys of function map should all be the same " +
         "type, but they are " + keys.map(_.dataType.simpleString).mkString("[", ", ", "]"))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala
index 03ea349221ec923a613a010bfd904ff242848fed..65eae869d40d1eb342e901445d3b5e26a45ed725 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala
@@ -148,7 +148,7 @@ abstract class NumericType extends AtomicType {
   // implicitly[Numeric[JvmType]] to be valid, we have to change JvmType from a type variable to a
   // type parameter and add a numeric annotation (i.e., [JvmType : Numeric]). This gets
   // desugared by the compiler into an argument to the objects constructor. This means there is no
-  // longer an no argument constructor and thus the JVM cannot serialize the object anymore.
+  // longer a no argument constructor and thus the JVM cannot serialize the object anymore.
   private[sql] val numeric: Numeric[InternalType]
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index 88fa5cd21d58fd485efea186665dde14768e0204..b248583d79e6b1dc0c0ca50cec6c07937599dede 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -314,7 +314,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
   def json(paths: String*): DataFrame = format("json").load(paths : _*)
 
   /**
-   * Loads an `JavaRDD[String]` storing JSON objects (one object per record) and
+   * Loads a `JavaRDD[String]` storing JSON objects (one object per record) and
    * returns the result as a [[DataFrame]].
    *
    * Unless the schema is specified using [[schema]] function, this function goes through the
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 2e14c5d486d4fe59b72de95d168a5a58e0d9ef14..0fb2400d1bafd6da62d7d1488d14db8c1fecc05b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -370,7 +370,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
 
   /**
    * :: DeveloperApi ::
-   * Creates a [[DataFrame]] from an [[JavaRDD]] containing [[Row]]s using the given schema.
+   * Creates a [[DataFrame]] from a [[JavaRDD]] containing [[Row]]s using the given schema.
    * It is important to make sure that the structure of every [[Row]] of the provided RDD matches
    * the provided schema. Otherwise, there will be runtime exception.
    *
@@ -384,7 +384,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
 
   /**
    * :: DeveloperApi ::
-   * Creates a [[DataFrame]] from an [[java.util.List]] containing [[Row]]s using the given schema.
+   * Creates a [[DataFrame]] from a [[java.util.List]] containing [[Row]]s using the given schema.
    * It is important to make sure that the structure of every [[Row]] of the provided List matches
    * the provided schema. Otherwise, there will be runtime exception.
    *
@@ -421,7 +421,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
   }
 
   /**
-   * Applies a schema to an List of Java Beans.
+   * Applies a schema to a List of Java Beans.
    *
    * WARNING: Since there is no guaranteed ordering for fields in a Java Bean,
    *          SELECT * queries will return the columns in an undefined order.
@@ -552,7 +552,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
   /**
    * :: Experimental ::
    * Creates a [[Dataset]] with a single [[LongType]] column named `id`, containing elements
-   * in an range from 0 to `end` (exclusive) with step value 1.
+   * in a range from 0 to `end` (exclusive) with step value 1.
    *
    * @since 2.0.0
    * @group dataset
@@ -563,7 +563,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
   /**
    * :: Experimental ::
    * Creates a [[Dataset]] with a single [[LongType]] column named `id`, containing elements
-   * in an range from `start` to `end` (exclusive) with step value 1.
+   * in a range from `start` to `end` (exclusive) with step value 1.
    *
    * @since 2.0.0
    * @group dataset
@@ -574,7 +574,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
   /**
    * :: Experimental ::
    * Creates a [[Dataset]] with a single [[LongType]] column named `id`, containing elements
-   * in an range from `start` to `end` (exclusive) with an step value.
+   * in a range from `start` to `end` (exclusive) with a step value.
    *
    * @since 2.0.0
    * @group dataset
@@ -587,7 +587,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
   /**
    * :: Experimental ::
    * Creates a [[Dataset]] with a single [[LongType]] column named `id`, containing elements
-   * in an range from `start` to `end` (exclusive) with an step value, with partition number
+   * in a range from `start` to `end` (exclusive) with a step value, with partition number
    * specified.
    *
    * @since 2.0.0
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
index b7ea2a89175fe8c1489f75d8d8876a33a6fca5e3..440952572d8c490bfaf47a01f592ae398c8c8e98 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
@@ -33,7 +33,7 @@ abstract class SQLImplicits {
   protected def _sqlContext: SQLContext
 
   /**
-   * Converts $"col name" into an [[Column]].
+   * Converts $"col name" into a [[Column]].
    *
    * @since 2.0.0
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 7d7fd0399dedab3218f3d77e17c7b8114bc98f6a..f5b16d07ad80ef462a7a66869fed007ee19d7613 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -305,7 +305,7 @@ class SparkSession private(
 
   /**
    * :: DeveloperApi ::
-   * Creates a [[DataFrame]] from an [[JavaRDD]] containing [[Row]]s using the given schema.
+   * Creates a [[DataFrame]] from a [[JavaRDD]] containing [[Row]]s using the given schema.
    * It is important to make sure that the structure of every [[Row]] of the provided RDD matches
    * the provided schema. Otherwise, there will be runtime exception.
    *
@@ -319,7 +319,7 @@ class SparkSession private(
 
   /**
    * :: DeveloperApi ::
-   * Creates a [[DataFrame]] from an [[java.util.List]] containing [[Row]]s using the given schema.
+   * Creates a [[DataFrame]] from a [[java.util.List]] containing [[Row]]s using the given schema.
    * It is important to make sure that the structure of every [[Row]] of the provided List matches
    * the provided schema. Otherwise, there will be runtime exception.
    *
@@ -365,7 +365,7 @@ class SparkSession private(
   }
 
   /**
-   * Applies a schema to an List of Java Beans.
+   * Applies a schema to a List of Java Beans.
    *
    * WARNING: Since there is no guaranteed ordering for fields in a Java Bean,
    *          SELECT * queries will return the columns in an undefined order.
@@ -475,7 +475,7 @@ class SparkSession private(
   /**
    * :: Experimental ::
    * Creates a [[Dataset]] with a single [[LongType]] column named `id`, containing elements
-   * in an range from 0 to `end` (exclusive) with step value 1.
+   * in a range from 0 to `end` (exclusive) with step value 1.
    *
    * @since 2.0.0
    * @group dataset
@@ -486,7 +486,7 @@ class SparkSession private(
   /**
    * :: Experimental ::
    * Creates a [[Dataset]] with a single [[LongType]] column named `id`, containing elements
-   * in an range from `start` to `end` (exclusive) with step value 1.
+   * in a range from `start` to `end` (exclusive) with step value 1.
    *
    * @since 2.0.0
    * @group dataset
@@ -499,7 +499,7 @@ class SparkSession private(
   /**
    * :: Experimental ::
    * Creates a [[Dataset]] with a single [[LongType]] column named `id`, containing elements
-   * in an range from `start` to `end` (exclusive) with an step value.
+   * in a range from `start` to `end` (exclusive) with a step value.
    *
    * @since 2.0.0
    * @group dataset
@@ -512,7 +512,7 @@ class SparkSession private(
   /**
    * :: Experimental ::
    * Creates a [[Dataset]] with a single [[LongType]] column named `id`, containing elements
-   * in an range from `start` to `end` (exclusive) with an step value, with partition number
+   * in a range from `start` to `end` (exclusive) with a step value, with partition number
    * specified.
    *
    * @since 2.0.0
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/SQLBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/SQLBuilder.scala
index 9dc367920ef84b65942890af10d25dc9e241d9bd..a8cc72f2e7b3a60e060b439f826f379f85f78fb6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/SQLBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/SQLBuilder.scala
@@ -274,7 +274,7 @@ class SQLBuilder(logicalPlan: LogicalPlan) extends Logging {
     //   5. the table alias for output columns of generator.
     //   6. the AS keyword
     //   7. the column alias, can be more than one, e.g. AS key, value
-    // An concrete example: "tbl LATERAL VIEW EXPLODE(map_col) sub_q AS key, value", and the builder
+    // A concrete example: "tbl LATERAL VIEW EXPLODE(map_col) sub_q AS key, value", and the builder
     // will put it in FROM clause later.
     build(
       childSQL,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationIterator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationIterator.scala
index f392b135ce7871a60ccfb6006f881e0edbd0adab..3f7f84988594a7fcfbd73ea0db64ffe98da1f122 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationIterator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationIterator.scala
@@ -86,7 +86,7 @@ class SortBasedAggregationIterator(
   // The aggregation buffer used by the sort-based aggregation.
   private[this] val sortBasedAggregationBuffer: MutableRow = newBuffer
 
-  // An SafeProjection to turn UnsafeRow into GenericInternalRow, because UnsafeRow can't be
+  // A SafeProjection to turn UnsafeRow into GenericInternalRow, because UnsafeRow can't be
   // compared to MutableRow (aggregation buffer) directly.
   private[this] val safeProj: Projection = FromUnsafeProjection(valueAttributes.map(_.dataType))
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala
index 4ceb710f4b2b453996fd1188dfe2ec642c0200f3..b047bc0641dd2d80234769352da809fdde73de21 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala
@@ -202,7 +202,7 @@ sealed trait BufferSetterGetterUtils {
 }
 
 /**
- * A Mutable [[Row]] representing an mutable aggregation buffer.
+ * A Mutable [[Row]] representing a mutable aggregation buffer.
  */
 private[sql] class MutableAggregationBufferImpl (
     schema: StructType,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
index 1041bab9d5d6cb83cc366d66ade1a48360b482c2..7a14879b8b9dfb17abe8eed12456cf009062d801 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
@@ -58,7 +58,7 @@ class MutableUnsafeRow(val writer: UnsafeRowWriter) extends GenericMutableRow(nu
 }
 
 /**
- * Generates bytecode for an [[ColumnarIterator]] for columnar cache.
+ * Generates bytecode for a [[ColumnarIterator]] for columnar cache.
  */
 object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarIterator] with Logging {
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
index 350508c1d9f4c0d4cef3e728a087dd072be07122..7503285ee2c1fb3dd80411b46142800eeffccfb6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
@@ -41,7 +41,7 @@ import org.apache.spark.sql.execution.SparkPlan
  *    is only done on top level columns, but formats should support pruning of nested columns as
  *    well.
  *  - Construct a reader function by passing filters and the schema into the FileFormat.
- *  - Using an partition pruning predicates, enumerate the list of files that should be read.
+ *  - Using a partition pruning predicates, enumerate the list of files that should be read.
  *  - Split the files into tasks and construct a FileScanRDD.
  *  - Add any projection or filters that must be evaluated after the scan.
  *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonParser.scala
index aeee2600a19ee3ff5970c9c0ed7fa60244af98dd..733fcbfea101ea11189686825cd63b37e6a83349 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonParser.scala
@@ -50,7 +50,7 @@ object JacksonParser extends Logging {
 
   /**
    * Parse the current token (and related children) according to a desired schema
-   * This is an wrapper for the method `convertField()` to handle a row wrapped
+   * This is a wrapper for the method `convertField()` to handle a row wrapped
    * with an array.
    */
   def convertRootField(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystRowConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystRowConverter.scala
index 6bf82bee67881571297233b39965fb1504155851..85b0bc17edd328b329439ad788135231e10610ac 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystRowConverter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystRowConverter.scala
@@ -68,7 +68,7 @@ private[parquet] trait HasParentContainerUpdater {
 }
 
 /**
- * A convenient converter class for Parquet group types with an [[HasParentContainerUpdater]].
+ * A convenient converter class for Parquet group types with a [[HasParentContainerUpdater]].
  */
 private[parquet] abstract class CatalystGroupConverter(val updater: ParentContainerUpdater)
   extends GroupConverter with HasParentContainerUpdater
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ExchangeCoordinator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ExchangeCoordinator.scala
index fb60d68f986d689aee280909b1d216392b3208f2..2ea6ee38a932a442c3c1ee065727ffd88bf2d6c1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ExchangeCoordinator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ExchangeCoordinator.scala
@@ -47,10 +47,10 @@ import org.apache.spark.sql.execution.{ShuffledRowRDD, SparkPlan}
  *    partitions.
  *
  * The workflow of this coordinator is described as follows:
- *  - Before the execution of a [[SparkPlan]], for an [[ShuffleExchange]] operator,
+ *  - Before the execution of a [[SparkPlan]], for a [[ShuffleExchange]] operator,
  *    if an [[ExchangeCoordinator]] is assigned to it, it registers itself to this coordinator.
  *    This happens in the `doPrepare` method.
- *  - Once we start to execute a physical plan, an [[ShuffleExchange]] registered to this
+ *  - Once we start to execute a physical plan, a [[ShuffleExchange]] registered to this
  *    coordinator will call `postShuffleRDD` to get its corresponding post-shuffle
  *    [[ShuffledRowRDD]].
  *    If this coordinator has made the decision on how to shuffle data, this [[ShuffleExchange]]
@@ -61,7 +61,7 @@ import org.apache.spark.sql.execution.{ShuffledRowRDD, SparkPlan}
  *    post-shuffle partitions and pack multiple pre-shuffle partitions with continuous indices
  *    to a single post-shuffle partition whenever necessary.
  *  - Finally, this coordinator will create post-shuffle [[ShuffledRowRDD]]s for all registered
- *    [[ShuffleExchange]]s. So, when an [[ShuffleExchange]] calls `postShuffleRDD`, this coordinator
+ *    [[ShuffleExchange]]s. So, when a [[ShuffleExchange]] calls `postShuffleRDD`, this coordinator
  *    can lookup the corresponding [[RDD]].
  *
  * The strategy used to determine the number of post-shuffle partitions is described as follows.
@@ -98,8 +98,8 @@ private[sql] class ExchangeCoordinator(
   @volatile private[this] var estimated: Boolean = false
 
   /**
-   * Registers an [[ShuffleExchange]] operator to this coordinator. This method is only allowed to
-   * be called in the `doPrepare` method of an [[ShuffleExchange]] operator.
+   * Registers a [[ShuffleExchange]] operator to this coordinator. This method is only allowed to
+   * be called in the `doPrepare` method of a [[ShuffleExchange]] operator.
    */
   @GuardedBy("this")
   def registerExchange(exchange: ShuffleExchange): Unit = synchronized {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
index f0efa52c3d86230557d6d6c94703bf02ebde6c3e..32f0bc5bf920d684274d7745f2e2ff9bfc8c617b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
 import org.apache.spark.util.collection.BitSet
 
 /**
- * Performs an sort merge join of two child relations.
+ * Performs a sort merge join of two child relations.
  */
 case class SortMergeJoinExec(
     leftKeys: Seq[Expression],
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/r/MapPartitionsRWrapper.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/r/MapPartitionsRWrapper.scala
index dc6f2ef37158418c27502fee83711d62fc63107b..6c76328c74830fdd8affb714568b6f5eb91ad175 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/r/MapPartitionsRWrapper.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/r/MapPartitionsRWrapper.scala
@@ -40,7 +40,7 @@ private[sql] case class MapPartitionsRWrapper(
 
     val (newIter, deserializer, colNames) =
       if (!isSerializedRData) {
-        // Serialize each row into an byte array that can be deserialized in the R worker
+        // Serialize each row into a byte array that can be deserialized in the R worker
         (iter.asInstanceOf[Iterator[Row]].map {row => rowToRBytes(row)},
          SerializationFormats.ROW, inputSchema.fieldNames)
       } else {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala
index 48925910ac8cf03956a696c2417547f3d643bf9a..eac658c6176cbd11af08425b5f36b98f21c9447d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala
@@ -133,7 +133,7 @@ abstract class UserDefinedAggregateFunction extends Serializable {
 
 /**
  * :: Experimental ::
- * A [[Row]] representing an mutable aggregation buffer.
+ * A [[Row]] representing a mutable aggregation buffer.
  *
  * This is not meant to be extended outside of Spark.
  */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
index 9f6137d6e3c7c3d9a7380622e7828ca8853b8884..0d6f98416bd422b8d8d453d2932ce47b6caec5ab 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -52,7 +52,7 @@ private[sql] class SharedState(val sparkContext: SparkContext) {
     org.apache.spark.util.Utils.getContextOrSparkClassLoader)
 
   /**
-   * Create a SQLListener then add it into SparkContext, and create an SQLTab if there is SparkUI.
+   * Create a SQLListener then add it into SparkContext, and create a SQLTab if there is SparkUI.
    */
   private def createListenerAndUI(sc: SparkContext): SQLListener = {
     if (SparkSession.sqlListener.get() == null) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ContinuousQuery.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ContinuousQuery.scala
index 451cfd85e3bc8ff2574e0ee7a361e0efa470830e..3bbb0b8a888feef92b93fd759488d7f69c6fdfa1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ContinuousQuery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ContinuousQuery.scala
@@ -93,7 +93,7 @@ trait ContinuousQuery {
   def awaitTermination(timeoutMs: Long): Boolean
 
   /**
-   * Blocks until all available data in the source has been processed an committed to the sink.
+   * Blocks until all available data in the source has been processed and committed to the sink.
    * This method is intended for testing. Note that in the case of continually arriving data, this
    * method may block forever. Additionally, this method is only guaranteed to block until data that
    * has been synchronously appended data to a [[org.apache.spark.sql.execution.streaming.Source]]
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala
index befdda1621074308d612ab51d36a69608abada07..5f896969188d75fb2edad4ec2b60dab0c2977856 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala
@@ -218,7 +218,7 @@ private[hive] trait HiveClient {
   /** Create a function in an existing database. */
   def createFunction(db: String, func: CatalogFunction): Unit
 
-  /** Drop an existing function an the database. */
+  /** Drop an existing function in the database. */
   def dropFunction(db: String, name: String): Unit
 
   /** Rename an existing function in the database. */
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
index 0589c8ece3910c6842677ae28658ea403b55a995..f5db73b715820679abdbe4182a9b52abca375dfd 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.types.StructType
 private[orc] object OrcFileOperator extends Logging {
   /**
    * Retrieves an ORC file reader from a given path.  The path can point to either a directory or a
-   * single ORC file.  If it points to an directory, it picks any non-empty ORC file within that
+   * single ORC file.  If it points to a directory, it picks any non-empty ORC file within that
    * directory.
    *
    * The reader returned by this method is mainly used for two purposes:
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
index 65d53debd67cf94bbf0c549fc07c4a0ddfd7ce01..a7652143a4252fe5586dbb814d350368e447fa14 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
@@ -37,7 +37,7 @@ import org.apache.spark.sql.hive.test.{TestHive, TestHiveQueryExecution}
  * Allows the creations of tests that execute the same query against both hive
  * and catalyst, comparing the results.
  *
- * The "golden" results from Hive are cached in an retrieved both from the classpath and
+ * The "golden" results from Hive are cached in and retrieved both from the classpath and
  * [[answerCache]] to speed up testing.
  *
  * See the documentation of public vals in this class for information on how test execution can be
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/State.scala b/streaming/src/main/scala/org/apache/spark/streaming/State.scala
index 42424d67d88381136cd421bbfbb6718b69a83c9b..3f560f889f055cb089a268303381a26fe37cdac4 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/State.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/State.scala
@@ -120,7 +120,7 @@ sealed abstract class State[S] {
   def isTimingOut(): Boolean
 
   /**
-   * Get the state as an [[scala.Option]]. It will be `Some(state)` if it exists, otherwise `None`.
+   * Get the state as a [[scala.Option]]. It will be `Some(state)` if it exists, otherwise `None`.
    */
   @inline final def getOption(): Option[S] = if (exists) Some(get()) else None
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
index 7e78fa1d7e159ce7921ca9119c60ebe62cf75881..4c4376a089f59ebf76f5c02153fc00a0f92fe0b5 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
@@ -349,7 +349,7 @@ class JavaStreamingContext(val ssc: StreamingContext) extends Closeable {
   }
 
   /**
-   * Create an input stream from an queue of RDDs. In each batch,
+   * Create an input stream from a queue of RDDs. In each batch,
    * it will process either one or all of the RDDs returned by the queue.
    *
    * NOTE:
@@ -369,7 +369,7 @@ class JavaStreamingContext(val ssc: StreamingContext) extends Closeable {
   }
 
   /**
-   * Create an input stream from an queue of RDDs. In each batch,
+   * Create an input stream from a queue of RDDs. In each batch,
    * it will process either one or all of the RDDs returned by the queue.
    *
    * NOTE:
@@ -393,7 +393,7 @@ class JavaStreamingContext(val ssc: StreamingContext) extends Closeable {
   }
 
   /**
-   * Create an input stream from an queue of RDDs. In each batch,
+   * Create an input stream from a queue of RDDs. In each batch,
    * it will process either one or all of the RDDs returned by the queue.
    *
    * NOTE:
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
index 147e8c129034b7205669ace7f0ffd178b99e5258..fa15a0bf65ab948b9690fd60205ad1086bdea081 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
@@ -157,7 +157,7 @@ abstract class DStream[T: ClassTag] (
   def persist(level: StorageLevel): DStream[T] = {
     if (this.isInitialized) {
       throw new UnsupportedOperationException(
-        "Cannot change storage level of an DStream after streaming context has started")
+        "Cannot change storage level of a DStream after streaming context has started")
     }
     this.storageLevel = level
     this
@@ -176,7 +176,7 @@ abstract class DStream[T: ClassTag] (
   def checkpoint(interval: Duration): DStream[T] = {
     if (isInitialized) {
       throw new UnsupportedOperationException(
-        "Cannot change checkpoint interval of an DStream after streaming context has started")
+        "Cannot change checkpoint interval of a DStream after streaming context has started")
     }
     persist()
     checkpointDuration = interval
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlock.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlock.scala
index 47968afef2dbf9448f3c915436a985949990e866..8c3a7977beae312b1ce65994d2b972a78c1193b7 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlock.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlock.scala
@@ -31,5 +31,5 @@ private[streaming] case class ArrayBufferBlock(arrayBuffer: ArrayBuffer[_]) exte
 /** class representing a block received as an Iterator */
 private[streaming] case class IteratorBlock(iterator: Iterator[_]) extends ReceivedBlock
 
-/** class representing a block received as an ByteBuffer */
+/** class representing a block received as a ByteBuffer */
 private[streaming] case class ByteBufferBlock(byteBuffer: ByteBuffer) extends ReceivedBlock
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverSchedulingPolicy.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverSchedulingPolicy.scala
index 391a461f081251fef2d82b884e9c03f014a4c66f..4105171a3db24df00954720c80cf6447d08f5ab8 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverSchedulingPolicy.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverSchedulingPolicy.scala
@@ -31,7 +31,7 @@ import org.apache.spark.streaming.receiver.Receiver
  *    all receivers at the same time. ReceiverTracker will call `scheduleReceivers` at this phase.
  *    It will try to schedule receivers such that they are evenly distributed. ReceiverTracker
  *    should update its `receiverTrackingInfoMap` according to the results of `scheduleReceivers`.
- *    `ReceiverTrackingInfo.scheduledLocations` for each receiver should be set to an location list
+ *    `ReceiverTrackingInfo.scheduledLocations` for each receiver should be set to a location list
  *    that contains the scheduled locations. Then when a receiver is starting, it will send a
  *    register request and `ReceiverTracker.registerReceiver` will be called. In
  *    `ReceiverTracker.registerReceiver`, if a receiver's scheduled locations is set, it should
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
index bdbac64b9bc794b65fb3d059cbe4e57e4f6d3b7e..bd8f9950bf1c79df6cc8426dc6df27aec15624ba 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
@@ -71,7 +71,7 @@ trait DStreamCheckpointTester { self: SparkFunSuite =>
   /**
    * Tests a streaming operation under checkpointing, by restarting the operation
    * from checkpoint file and verifying whether the final output is correct.
-   * The output is assumed to have come from a reliable queue which an replay
+   * The output is assumed to have come from a reliable queue which a replay
    * data as required.
    *
    * NOTE: This takes into consideration that the last batch processed before
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/rate/PIDRateEstimatorSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/rate/PIDRateEstimatorSuite.scala
index a1af95be81c8e809e309dd34391d37a30527324c..1a0460cd669afd841ce47cf05b6b75c084b03584 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/rate/PIDRateEstimatorSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/rate/PIDRateEstimatorSuite.scala
@@ -119,7 +119,7 @@ class PIDRateEstimatorSuite extends SparkFunSuite with Matchers {
 
   test("with no accumulated but some positive error, |I| > 0, follow the processing speed") {
     val p = new PIDRateEstimator(20, 1D, 1D, 0D, 10)
-    // prepare a series of batch updates, one every 20ms with an decreasing number of processed
+    // prepare a series of batch updates, one every 20ms with a decreasing number of processed
     // elements in each batch, but constant processing time, and no accumulated error. Even though
     // the integral part is non-zero, the estimated rate should follow only the proportional term,
     // asking for less and less elements