diff --git a/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala b/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala index f157a451ef3e73cb62af5c6c1880afeb68a98006..fa078ee25a129512d3491850a4028f8c840664c4 100644 --- a/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala +++ b/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala @@ -28,7 +28,7 @@ import org.apache.spark._ * of them will be combined together, showed in one line. */ private[spark] class ConsoleProgressBar(sc: SparkContext) extends Logging { - // Carrige return + // Carriage return val CR = '\r' // Update period of progress bar, in milliseconds val UPDATE_PERIOD = 200L diff --git a/core/src/test/scala/org/apache/sparktest/ImplicitSuite.scala b/core/src/test/scala/org/apache/sparktest/ImplicitSuite.scala index daa795a0434959cf369d1357c741f8ce543f80a2..2fb09ead4b2d880d832aa6f9fb86c596c9d6ed8c 100644 --- a/core/src/test/scala/org/apache/sparktest/ImplicitSuite.scala +++ b/core/src/test/scala/org/apache/sparktest/ImplicitSuite.scala @@ -26,11 +26,11 @@ package org.apache.sparktest */ class ImplicitSuite { - // We only want to test if `implict` works well with the compiler, so we don't need a real + // We only want to test if `implicit` works well with the compiler, so we don't need a real // SparkContext. def mockSparkContext[T]: org.apache.spark.SparkContext = null - // We only want to test if `implict` works well with the compiler, so we don't need a real RDD. + // We only want to test if `implicit` works well with the compiler, so we don't need a real RDD. def mockRDD[T]: org.apache.spark.rdd.RDD[T] = null def testRddToPairRDDFunctions(): Unit = { diff --git a/dev/run-tests.py b/dev/run-tests.py index b65d1a309cb4a7631ccd02df7ff1ea2642a4a1a4..aa6af564be19f6d1806bf29f36d1ba7c6d9e072f 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -563,7 +563,7 @@ def main(): # backwards compatibility checks if build_tool == "sbt": - # Note: compatiblity tests only supported in sbt for now + # Note: compatibility tests only supported in sbt for now detect_binary_inop_with_mima() # run the test suites diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java index b9dd3ad957714c03862aded023097c72dbe3e33c..da2012ad514b2533b4685f288a61c2bed1bf2bd8 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java @@ -228,7 +228,7 @@ class MyJavaLogisticRegressionModel * Create a copy of the model. * The copy is shallow, except for the embedded paramMap, which gets a deep copy. * <p> - * This is used for the defaul implementation of [[transform()]]. + * This is used for the default implementation of [[transform()]]. * * In Java, we have to make this method public since Java does not understand Scala's protected * modifier. diff --git a/examples/src/main/python/mllib/naive_bayes_example.py b/examples/src/main/python/mllib/naive_bayes_example.py index e7d5893d67413535ec93368edd801bb194d8523e..35724f7d6a92d489eceb09ef2ea6172ad6eaeb2d 100644 --- a/examples/src/main/python/mllib/naive_bayes_example.py +++ b/examples/src/main/python/mllib/naive_bayes_example.py @@ -47,7 +47,7 @@ if __name__ == "__main__": # $example on$ data = sc.textFile('data/mllib/sample_naive_bayes_data.txt').map(parseLine) - # Split data aproximately into training (60%) and test (40%) + # Split data approximately into training (60%) and test (40%) training, test = data.randomSplit([0.6, 0.4], seed=0) # Train a naive Bayes model. diff --git a/examples/src/main/python/mllib/ranking_metrics_example.py b/examples/src/main/python/mllib/ranking_metrics_example.py index 327791966c901aba6e82e3501919a7b790137b57..21333deded35dfb2424055b0bcce761574547ecb 100644 --- a/examples/src/main/python/mllib/ranking_metrics_example.py +++ b/examples/src/main/python/mllib/ranking_metrics_example.py @@ -47,7 +47,7 @@ if __name__ == "__main__": # Instantiate regression metrics to compare predicted and actual ratings metrics = RegressionMetrics(scoreAndLabels) - # Root mean sqaured error + # Root mean squared error print("RMSE = %s" % metrics.rootMeanSquaredError) # R-squared diff --git a/examples/src/main/python/mllib/word2vec.py b/examples/src/main/python/mllib/word2vec.py index 40d1b887927e0fd305dccf7c7d5862fbb5da90ca..4e7d4f7610c242fe8935b4163c3eb1d4fbdc7298 100644 --- a/examples/src/main/python/mllib/word2vec.py +++ b/examples/src/main/python/mllib/word2vec.py @@ -16,7 +16,7 @@ # # This example uses text8 file from http://mattmahoney.net/dc/text8.zip -# The file was downloadded, unziped and split into multiple lines using +# The file was downloaded, unzipped and split into multiple lines using # # wget http://mattmahoney.net/dc/text8.zip # unzip text8.zip diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala b/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala index a3901850f283e35b2412849442fbadb1ac10289f..f2e4c96fa56c53e5ac68a8fb37fedb02b0c8ac29 100644 --- a/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala +++ b/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala @@ -30,7 +30,7 @@ import breeze.linalg.{DenseVector, Vector} * org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS based on your needs. */ object LocalFileLR { - val D = 10 // Numer of dimensions + val D = 10 // Number of dimensions val rand = new Random(42) case class DataPoint(x: Vector[Double], y: Double) diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala b/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala index e4486b949fb3ee867f9d8a493a7474b9fbb78350..f7eb9e99367a4f77281e9955b63f242b61683989 100644 --- a/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala +++ b/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala @@ -35,7 +35,7 @@ import org.apache.spark._ * org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS based on your needs. */ object SparkHdfsLR { - val D = 10 // Numer of dimensions + val D = 10 // Number of dimensions val rand = new Random(42) case class DataPoint(x: Vector[Double], y: Double) diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala b/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala index 132800e6e4ca0c1a19c3dfc01aade4b87e799519..036e3d24c985f3b585ed42844ead8b24d2fe1ee4 100644 --- a/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala +++ b/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala @@ -36,7 +36,7 @@ import org.apache.spark._ */ object SparkLR { val N = 10000 // Number of data points - val D = 10 // Numer of dimensions + val D = 10 // Number of dimensions val R = 0.7 // Scaling factor val ITERATIONS = 5 val rand = new Random(42) diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala b/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala index 620ff07631c36757e9ee2dc1e9178de3a3642a45..94b67cb29beb03bd4bceca73909510eff9e4e130 100644 --- a/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala +++ b/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala @@ -60,10 +60,10 @@ object RDDRelation { // Write out an RDD as a parquet file with overwrite mode. df.write.mode(SaveMode.Overwrite).parquet("pair.parquet") - // Read in parquet file. Parquet files are self-describing so the schmema is preserved. + // Read in parquet file. Parquet files are self-describing so the schema is preserved. val parquetFile = sqlContext.read.parquet("pair.parquet") - // Queries can be run using the DSL on parequet files just like the original RDD. + // Queries can be run using the DSL on parquet files just like the original RDD. parquetFile.where($"key" === 1).select($"value".as("a")).collect().foreach(println) // These files can also be registered as tables. diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/TwitterPopularTags.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/TwitterPopularTags.scala index c386e39d529c25d96aa07e7954b55c6a3202d5de..5b69963cc88806e0b277c8df6d297a3dfc259c14 100644 --- a/examples/src/main/scala/org/apache/spark/examples/streaming/TwitterPopularTags.scala +++ b/examples/src/main/scala/org/apache/spark/examples/streaming/TwitterPopularTags.scala @@ -44,7 +44,7 @@ object TwitterPopularTags { val filters = args.takeRight(args.length - 4) // Set the system properties so that Twitter4j library used by twitter stream - // can use them to generat OAuth credentials + // can use them to generate OAuth credentials System.setProperty("twitter4j.oauth.consumerKey", consumerKey) System.setProperty("twitter4j.oauth.consumerSecret", consumerSecret) System.setProperty("twitter4j.oauth.accessToken", accessToken) diff --git a/graphx/src/test/scala/org/apache/spark/graphx/EdgeSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/EdgeSuite.scala index 094a63472eaabd99e2d1c0ff24a8403c1faf3218..4d6b899c83a04a3c1a31a7e38a5c45abfcfe1a45 100644 --- a/graphx/src/test/scala/org/apache/spark/graphx/EdgeSuite.scala +++ b/graphx/src/test/scala/org/apache/spark/graphx/EdgeSuite.scala @@ -21,7 +21,7 @@ import org.apache.spark.SparkFunSuite class EdgeSuite extends SparkFunSuite { test ("compare") { - // decending order + // descending order val testEdges: Array[Edge[Int]] = Array( Edge(0x7FEDCBA987654321L, -0x7FEDCBA987654321L, 1), Edge(0x2345L, 0x1234L, 1), diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala index 94a24b527ba3ea1bfb7505d121e6c1fee726d4d5..fd2f8d387556a0be0056fc2b900a352810f78ad6 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala @@ -541,7 +541,7 @@ object PrefixSpan extends Logging { } /** - * Represents a frequence sequence. + * Represents a frequent sequence. * @param sequence a sequence of itemsets stored as an Array of Arrays * @param freq frequency * @tparam Item item type diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 1ba6a075134c1da5fa2f1bf07de4ee1e2568bec1..a380c4cca2d27f3af5ff539a498606d01ec581a1 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -688,7 +688,7 @@ object Unidoc { "-noqualifier", "java.lang" ), - // Use GitHub repository for Scaladoc source linke + // Use GitHub repository for Scaladoc source links unidocSourceBase := s"https://github.com/apache/spark/tree/v${version.value}", scalacOptions in (ScalaUnidoc, unidoc) ++= Seq( diff --git a/python/pyspark/mllib/fpm.py b/python/pyspark/mllib/fpm.py index 5c9706cb8cb29c0cd65c16ad91bdfc867296026f..f339e508911660b6a0c24154ee67fca92c1a514e 100644 --- a/python/pyspark/mllib/fpm.py +++ b/python/pyspark/mllib/fpm.py @@ -127,7 +127,7 @@ class PrefixSpanModel(JavaModelWrapper): @since("1.6.0") def freqSequences(self): - """Gets frequence sequences""" + """Gets frequent sequences""" return self.call("getFreqSequences").map(lambda x: PrefixSpan.FreqSequence(x[0], x[1])) diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkIMain.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkIMain.scala index 7fcb423575d3931e94f74196b6702a90f6f31d27..fc260c031014d9a726112983f282336df6be4a49 100644 --- a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkIMain.scala +++ b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkIMain.scala @@ -72,7 +72,7 @@ import org.apache.spark.annotation.DeveloperApi * all variables defined by that code. To extract the result of an * interpreted line to show the user, a second "result object" is created * which imports the variables exported by the above object and then - * exports members called "$eval" and "$print". To accomodate user expressions + * exports members called "$eval" and "$print". To accommodate user expressions * that read from variables or methods defined in previous statements, "import" * statements are used. * @@ -1515,7 +1515,7 @@ import org.apache.spark.annotation.DeveloperApi exprTyper.symbolOfLine(code) /** - * Constucts type information based on the provided expression's final + * Constructs type information based on the provided expression's final * result or the definition provided. * * @param expr The expression or definition diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala index 8e432e8f3d96b43e605819be6e9f0f29125e8c40..46b3877a7cab346cb713246ba0c082ec1d09f63a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala @@ -29,7 +29,7 @@ private case object OracleDialect extends JdbcDialect { override def getCatalystType( sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = { // Handle NUMBER fields that have no precision/scale in special way - // because JDBC ResultSetMetaData converts this to 0 procision and -127 scale + // because JDBC ResultSetMetaData converts this to 0 precision and -127 scale // For more details, please see // https://github.com/apache/spark/pull/8780#issuecomment-145598968 // and diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/OffsetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/OffsetSuite.scala index 989465826d54e4eec220fa2bfbb8f30edb216d15..9590af4e7737d716cb531a42154cbdd54b56ad96 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/OffsetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/OffsetSuite.scala @@ -23,7 +23,7 @@ import org.apache.spark.sql.execution.streaming.{CompositeOffset, LongOffset, Of trait OffsetSuite extends SparkFunSuite { /** Creates test to check all the comparisons of offsets given a `one` that is less than `two`. */ def compare(one: Offset, two: Offset): Unit = { - test(s"comparision $one <=> $two") { + test(s"comparison $one <=> $two") { assert(one < two) assert(one <= two) assert(one <= one)