diff --git a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java index 5a36e18b0981162c02dd2f0f210c414abd8c8cde..b5a9d6671f7c4b19f1d5e01515e844c9420d3d43 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java +++ b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java @@ -94,7 +94,7 @@ public class TransportClientFactory implements Closeable { this.context = Preconditions.checkNotNull(context); this.conf = context.getConf(); this.clientBootstraps = Lists.newArrayList(Preconditions.checkNotNull(clientBootstraps)); - this.connectionPool = new ConcurrentHashMap<SocketAddress, ClientPool>(); + this.connectionPool = new ConcurrentHashMap<>(); this.numConnectionsPerPeer = conf.numConnectionsPerPeer(); this.rand = new Random(); diff --git a/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java b/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java index f0e2004d2de2ea16ca8b52f8950e2eaec4eeca70..8a69223c88ee445ed910c6fa3ef7127de761e63b 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java +++ b/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java @@ -64,9 +64,9 @@ public class TransportResponseHandler extends MessageHandler<ResponseMessage> { public TransportResponseHandler(Channel channel) { this.channel = channel; - this.outstandingFetches = new ConcurrentHashMap<StreamChunkId, ChunkReceivedCallback>(); - this.outstandingRpcs = new ConcurrentHashMap<Long, RpcResponseCallback>(); - this.streamCallbacks = new ConcurrentLinkedQueue<StreamCallback>(); + this.outstandingFetches = new ConcurrentHashMap<>(); + this.outstandingRpcs = new ConcurrentHashMap<>(); + this.streamCallbacks = new ConcurrentLinkedQueue<>(); this.timeOfLastRequestNs = new AtomicLong(0); } diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java b/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java index e2222ae08534ba01711d4487510f2219660919fd..ae7e520b2f709dbc89ce046a9a05706f9c042f1a 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java +++ b/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java @@ -63,7 +63,7 @@ public class OneForOneStreamManager extends StreamManager { // For debugging purposes, start with a random stream id to help identifying different streams. // This does not need to be globally unique, only unique to this class. nextStreamId = new AtomicLong((long) new Random().nextInt(Integer.MAX_VALUE) * 1000); - streams = new ConcurrentHashMap<Long, StreamState>(); + streams = new ConcurrentHashMap<>(); } @Override diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/sasl/ShuffleSecretManager.java b/common/network-shuffle/src/main/java/org/apache/spark/network/sasl/ShuffleSecretManager.java index 268cb40121754f94faf85d25dbc778123d80aaee..56a025c4d95d8efe47c476658131be3e1069a942 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/sasl/ShuffleSecretManager.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/sasl/ShuffleSecretManager.java @@ -37,7 +37,7 @@ public class ShuffleSecretManager implements SecretKeyHolder { private static final String SPARK_SASL_USER = "sparkSaslUser"; public ShuffleSecretManager() { - shuffleSecretMap = new ConcurrentHashMap<String, String>(); + shuffleSecretMap = new ConcurrentHashMap<>(); } /** diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillMerger.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillMerger.java index 2b1c860e559523782d3b97dd8c213530f33009ea..01aed95878cf648223621ec5c6b47f67a7f55d8b 100644 --- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillMerger.java +++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillMerger.java @@ -45,7 +45,7 @@ final class UnsafeSorterSpillMerger { } } }; - priorityQueue = new PriorityQueue<UnsafeSorterIterator>(numSpills, comparator); + priorityQueue = new PriorityQueue<>(numSpills, comparator); } /** diff --git a/core/src/main/scala/org/apache/spark/api/r/RRunner.scala b/core/src/main/scala/org/apache/spark/api/r/RRunner.scala index ff279ec270c2180ea7e24a43ae7075ff7494e505..07d1fa2c4a9a54be9c0a47484ef0debbb0da3432 100644 --- a/core/src/main/scala/org/apache/spark/api/r/RRunner.scala +++ b/core/src/main/scala/org/apache/spark/api/r/RRunner.scala @@ -182,7 +182,7 @@ private[spark] class RRunner[U]( } stream.flush() } catch { - // TODO: We should propogate this error to the task thread + // TODO: We should propagate this error to the task thread case e: Exception => logError("R Writer thread got an exception", e) } finally { diff --git a/core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala b/core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala index d397cca4b444d9473db9bbc198f9d2b8e3ce7bf8..8c67364ef1a05ca9163d999c4ff36270ba7d1f61 100644 --- a/core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala +++ b/core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala @@ -326,7 +326,7 @@ class GapSamplingReplacement( /** * Skip elements with replication factor zero (i.e. elements that won't be sampled). * Samples 'k' from geometric distribution P(k) = (1-q)(q)^k, where q = e^(-f), that is - * q is the probabililty of Poisson(0; f) + * q is the probability of Poisson(0; f) */ private def advance(): Unit = { val u = math.max(rng.nextDouble(), epsilon) diff --git a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java index 44733dcdafc4122fdfca42ed0428e85234187d26..30750b1bf1980dd6db122880cc93bdc38d584615 100644 --- a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java +++ b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java @@ -170,11 +170,11 @@ public class UnsafeShuffleWriterSuite { private UnsafeShuffleWriter<Object, Object> createWriter( boolean transferToEnabled) throws IOException { conf.set("spark.file.transferTo", String.valueOf(transferToEnabled)); - return new UnsafeShuffleWriter<Object, Object>( + return new UnsafeShuffleWriter<>( blockManager, shuffleBlockResolver, taskMemoryManager, - new SerializedShuffleHandle<Object, Object>(0, 1, shuffleDep), + new SerializedShuffleHandle<>(0, 1, shuffleDep), 0, // map id taskContext, conf diff --git a/examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java b/examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java index 8abc03e73d965517ef38a599d210016617f2f670..ebb0687b14ae0c1a023d66d6043baba96002c91e 100644 --- a/examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java +++ b/examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java @@ -82,10 +82,10 @@ public final class JavaLogQuery { String user = m.group(3); String query = m.group(5); if (!user.equalsIgnoreCase("-")) { - return new Tuple3<String, String, String>(ip, user, query); + return new Tuple3<>(ip, user, query); } } - return new Tuple3<String, String, String>(null, null, null); + return new Tuple3<>(null, null, null); } public static Stats extractStats(String line) { diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaMultiLabelClassificationMetricsExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaMultiLabelClassificationMetricsExample.java index 5904260e2d81adf9b4a56dbdf06fda69e4b3ef0e..bc99dc023fa7bb2ab116ba2e612334c5ea5695c5 100644 --- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaMultiLabelClassificationMetricsExample.java +++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaMultiLabelClassificationMetricsExample.java @@ -34,13 +34,13 @@ public class JavaMultiLabelClassificationMetricsExample { JavaSparkContext sc = new JavaSparkContext(conf); // $example on$ List<Tuple2<double[], double[]>> data = Arrays.asList( - new Tuple2<double[], double[]>(new double[]{0.0, 1.0}, new double[]{0.0, 2.0}), - new Tuple2<double[], double[]>(new double[]{0.0, 2.0}, new double[]{0.0, 1.0}), - new Tuple2<double[], double[]>(new double[]{}, new double[]{0.0}), - new Tuple2<double[], double[]>(new double[]{2.0}, new double[]{2.0}), - new Tuple2<double[], double[]>(new double[]{2.0, 0.0}, new double[]{2.0, 0.0}), - new Tuple2<double[], double[]>(new double[]{0.0, 1.0, 2.0}, new double[]{0.0, 1.0}), - new Tuple2<double[], double[]>(new double[]{1.0}, new double[]{1.0, 2.0}) + new Tuple2<>(new double[]{0.0, 1.0}, new double[]{0.0, 2.0}), + new Tuple2<>(new double[]{0.0, 2.0}, new double[]{0.0, 1.0}), + new Tuple2<>(new double[]{}, new double[]{0.0}), + new Tuple2<>(new double[]{2.0}, new double[]{2.0}), + new Tuple2<>(new double[]{2.0, 0.0}, new double[]{2.0, 0.0}), + new Tuple2<>(new double[]{0.0, 1.0, 2.0}, new double[]{0.0, 1.0}), + new Tuple2<>(new double[]{1.0}, new double[]{1.0, 2.0}) ); JavaRDD<Tuple2<double[], double[]>> scoreAndLabels = sc.parallelize(data); diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaPowerIterationClusteringExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaPowerIterationClusteringExample.java index b62fa90c3420f14793692ff82029c6f62fd00e8a..91c3bd72da3a7bba077293734fa29c4cbe086255 100644 --- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaPowerIterationClusteringExample.java +++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaPowerIterationClusteringExample.java @@ -40,11 +40,11 @@ public class JavaPowerIterationClusteringExample { @SuppressWarnings("unchecked") // $example on$ JavaRDD<Tuple3<Long, Long, Double>> similarities = sc.parallelize(Lists.newArrayList( - new Tuple3<Long, Long, Double>(0L, 1L, 0.9), - new Tuple3<Long, Long, Double>(1L, 2L, 0.9), - new Tuple3<Long, Long, Double>(2L, 3L, 0.9), - new Tuple3<Long, Long, Double>(3L, 4L, 0.1), - new Tuple3<Long, Long, Double>(4L, 5L, 0.9))); + new Tuple3<>(0L, 1L, 0.9), + new Tuple3<>(1L, 2L, 0.9), + new Tuple3<>(2L, 3L, 0.9), + new Tuple3<>(3L, 4L, 0.1), + new Tuple3<>(4L, 5L, 0.9))); PowerIterationClustering pic = new PowerIterationClustering() .setK(2) diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaStratifiedSamplingExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaStratifiedSamplingExample.java index c27fba278347b8ffd8dd649501e738e1ab798463..86c389e11cfdcb83a7b1867400cbdd04e10a4363 100644 --- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaStratifiedSamplingExample.java +++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaStratifiedSamplingExample.java @@ -36,7 +36,7 @@ public class JavaStratifiedSamplingExample { JavaSparkContext jsc = new JavaSparkContext(conf); // $example on$ - List<Tuple2<Integer, Character>> list = new ArrayList<Tuple2<Integer, Character>>( + List<Tuple2<Integer, Character>> list = new ArrayList<>( Arrays.<Tuple2<Integer, Character>>asList( new Tuple2(1, 'a'), new Tuple2(1, 'b'), diff --git a/examples/src/main/java/org/apache/spark/examples/streaming/JavaFlumeEventCount.java b/examples/src/main/java/org/apache/spark/examples/streaming/JavaFlumeEventCount.java index da56637fe891a8e28aeee507b79bbf7468797b39..bae4b78ac2f472b8568b6586b339cd3cd7bdfd23 100644 --- a/examples/src/main/java/org/apache/spark/examples/streaming/JavaFlumeEventCount.java +++ b/examples/src/main/java/org/apache/spark/examples/streaming/JavaFlumeEventCount.java @@ -19,7 +19,6 @@ package org.apache.spark.examples.streaming; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.Function; -import org.apache.spark.examples.streaming.StreamingExamples; import org.apache.spark.streaming.*; import org.apache.spark.streaming.api.java.*; import org.apache.spark.streaming.flume.FlumeUtils; @@ -58,7 +57,8 @@ public final class JavaFlumeEventCount { Duration batchInterval = new Duration(2000); SparkConf sparkConf = new SparkConf().setAppName("JavaFlumeEventCount"); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, batchInterval); - JavaReceiverInputDStream<SparkFlumeEvent> flumeStream = FlumeUtils.createStream(ssc, host, port); + JavaReceiverInputDStream<SparkFlumeEvent> flumeStream = + FlumeUtils.createStream(ssc, host, port); flumeStream.count(); diff --git a/external/flume/src/test/java/org/apache/spark/streaming/flume/JavaFlumeStreamSuite.java b/external/flume/src/test/java/org/apache/spark/streaming/flume/JavaFlumeStreamSuite.java index 3b5e0c7746b2c03c8dd618568cf84a4ff8ecb657..ada05f203b6a8fa251dfb02165757a8af5de946c 100644 --- a/external/flume/src/test/java/org/apache/spark/streaming/flume/JavaFlumeStreamSuite.java +++ b/external/flume/src/test/java/org/apache/spark/streaming/flume/JavaFlumeStreamSuite.java @@ -27,10 +27,11 @@ public class JavaFlumeStreamSuite extends LocalJavaStreamingContext { @Test public void testFlumeStream() { // tests the API, does not actually test data receiving - JavaReceiverInputDStream<SparkFlumeEvent> test1 = FlumeUtils.createStream(ssc, "localhost", 12345); - JavaReceiverInputDStream<SparkFlumeEvent> test2 = FlumeUtils.createStream(ssc, "localhost", 12345, - StorageLevel.MEMORY_AND_DISK_SER_2()); - JavaReceiverInputDStream<SparkFlumeEvent> test3 = FlumeUtils.createStream(ssc, "localhost", 12345, - StorageLevel.MEMORY_AND_DISK_SER_2(), false); + JavaReceiverInputDStream<SparkFlumeEvent> test1 = FlumeUtils.createStream(ssc, "localhost", + 12345); + JavaReceiverInputDStream<SparkFlumeEvent> test2 = FlumeUtils.createStream(ssc, "localhost", + 12345, StorageLevel.MEMORY_AND_DISK_SER_2()); + JavaReceiverInputDStream<SparkFlumeEvent> test3 = FlumeUtils.createStream(ssc, "localhost", + 12345, StorageLevel.MEMORY_AND_DISK_SER_2(), false); } } diff --git a/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java b/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java index 1e55aad5c918b356b4163cfb39252a7925ea61e7..a08c8dcba402b3021254816588416742e718b0ab 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java +++ b/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java @@ -34,7 +34,7 @@ class CommandBuilderUtils { /** The set of known JVM vendors. */ enum JavaVendor { Oracle, IBM, OpenJDK, Unknown - }; + } /** Returns whether the given string is null or empty. */ static boolean isEmpty(String s) { diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkLauncher.java b/launcher/src/main/java/org/apache/spark/launcher/SparkLauncher.java index a54215990137ef5ea08fcdcc1baa58d37ebcf461..a083f05a2a9f72dd79c98ae15ab9b3d372d702a9 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/SparkLauncher.java +++ b/launcher/src/main/java/org/apache/spark/launcher/SparkLauncher.java @@ -477,6 +477,6 @@ public class SparkLauncher { // No op. } - }; + } } diff --git a/launcher/src/test/java/org/apache/spark/launcher/LauncherServerSuite.java b/launcher/src/test/java/org/apache/spark/launcher/LauncherServerSuite.java index 5bf2babdd103636f175a52efa1e53f0f4ab9af97..a9039b3ec906279dd950dab9b4562767f14e311e 100644 --- a/launcher/src/test/java/org/apache/spark/launcher/LauncherServerSuite.java +++ b/launcher/src/test/java/org/apache/spark/launcher/LauncherServerSuite.java @@ -175,7 +175,7 @@ public class LauncherServerSuite extends BaseSuite { TestClient(Socket s) throws IOException { super(s); - this.inbound = new LinkedBlockingQueue<Message>(); + this.inbound = new LinkedBlockingQueue<>(); this.clientThread = new Thread(this); clientThread.setName("TestClient"); clientThread.setDaemon(true); diff --git a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java index b7f4f2efc5d84e3869adcc843d80efc43ddbfe87..29cbbe825bce52cbb01d22cace95587caa079bfe 100644 --- a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java +++ b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java @@ -160,7 +160,7 @@ public class SparkSubmitCommandBuilderSuite extends BaseSuite { "SparkPi", "42"); - Map<String, String> env = new HashMap<String, String>(); + Map<String, String> env = new HashMap<>(); List<String> cmd = buildCommand(sparkSubmitArgs, env); assertEquals("foo", findArgValue(cmd, parser.MASTER)); assertEquals("bar", findArgValue(cmd, parser.DEPLOY_MODE)); diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala index 23c4af17f90170cddfa73f81fde02ef584688b99..4525bf71f69e2d42be87a050ecc8fdcf69061fd4 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala @@ -205,7 +205,7 @@ final class DecisionTreeClassificationModel private[ml] ( @Since("2.0.0") lazy val featureImportances: Vector = TreeEnsembleModel.featureImportances(this, numFeatures) - /** Convert to spark.mllib DecisionTreeModel (losing some infomation) */ + /** Convert to spark.mllib DecisionTreeModel (losing some information) */ override private[spark] def toOld: OldDecisionTreeModel = { new OldDecisionTreeModel(rootNode.toOld(1), OldAlgo.Classification) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala index 3ce129b12c5eac78b76950c39027ca155ecdac5f..1d03a5b4f40481ce160d30c944718b0747e8c7fb 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala @@ -62,7 +62,7 @@ private[shared] object SharedParamsCodeGen { "every 10 iterations", isValid = "(interval: Int) => interval == -1 || interval >= 1"), ParamDesc[Boolean]("fitIntercept", "whether to fit an intercept term", Some("true")), ParamDesc[String]("handleInvalid", "how to handle invalid entries. Options are skip (which " + - "will filter out rows with bad values), or error (which will throw an errror). More " + + "will filter out rows with bad values), or error (which will throw an error). More " + "options may be added later", isValid = "ParamValidators.inArray(Array(\"skip\", \"error\"))"), ParamDesc[Boolean]("standardization", "whether to standardize the training features" + diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala index 96263c5bafd64f263d60d10a103d29e9a9a7138c..64d6af2766ca913b5141382cf51e8a803199d72c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala @@ -270,10 +270,10 @@ private[ml] trait HasFitIntercept extends Params { private[ml] trait HasHandleInvalid extends Params { /** - * Param for how to handle invalid entries. Options are skip (which will filter out rows with bad values), or error (which will throw an errror). More options may be added later. + * Param for how to handle invalid entries. Options are skip (which will filter out rows with bad values), or error (which will throw an error). More options may be added later. * @group param */ - final val handleInvalid: Param[String] = new Param[String](this, "handleInvalid", "how to handle invalid entries. Options are skip (which will filter out rows with bad values), or error (which will throw an errror). More options may be added later", ParamValidators.inArray(Array("skip", "error"))) + final val handleInvalid: Param[String] = new Param[String](this, "handleInvalid", "how to handle invalid entries. Options are skip (which will filter out rows with bad values), or error (which will throw an error). More options may be added later", ParamValidators.inArray(Array("skip", "error"))) /** @group getParam */ final def getHandleInvalid: String = $(handleInvalid) diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala index 0a3d00e47096f362d51022b05aaf26417b19f532..1289a317ee7f051df9480ddccb76c9a956f71ba2 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala @@ -205,7 +205,7 @@ final class DecisionTreeRegressionModel private[ml] ( @Since("2.0.0") lazy val featureImportances: Vector = TreeEnsembleModel.featureImportances(this, numFeatures) - /** Convert to spark.mllib DecisionTreeModel (losing some infomation) */ + /** Convert to spark.mllib DecisionTreeModel (losing some information) */ override private[spark] def toOld: OldDecisionTreeModel = { new OldDecisionTreeModel(rootNode.toOld(1), OldAlgo.Regression) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala index 1fad9d6d8c7a1b6048152d9c9b312a9f9b95ad2c..8ea767b2b32621de8a5c9cedd2d1169c086e0099 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala @@ -71,7 +71,7 @@ private[spark] trait DecisionTreeModel { */ private[ml] def maxSplitFeatureIndex(): Int = rootNode.maxSplitFeatureIndex() - /** Convert to spark.mllib DecisionTreeModel (losing some infomation) */ + /** Convert to spark.mllib DecisionTreeModel (losing some information) */ private[spark] def toOld: OldDecisionTreeModel } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala index c0404be019a97d05d2e5f1132feef6eea656cee3..f10570e662e07df87f05525615288d12790a6e63 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala @@ -418,7 +418,7 @@ class LogisticRegressionWithLBFGS private def run(input: RDD[LabeledPoint], initialWeights: Vector, userSuppliedWeights: Boolean): LogisticRegressionModel = { - // ml's Logisitic regression only supports binary classifcation currently. + // ml's Logistic regression only supports binary classification currently. if (numOfLinearPredictor == 1) { def runWithMlLogisitcRegression(elasticNetParam: Double) = { // Prepare the ml LogisticRegression based on our settings diff --git a/mllib/src/test/java/org/apache/spark/ml/param/JavaTestParams.java b/mllib/src/test/java/org/apache/spark/ml/param/JavaTestParams.java index 65841182df9b4fcebad13bb53dfebbfc0fd8db58..06f7fbb86e88ec89c9295fc6c62b20f489a4aa1f 100644 --- a/mllib/src/test/java/org/apache/spark/ml/param/JavaTestParams.java +++ b/mllib/src/test/java/org/apache/spark/ml/param/JavaTestParams.java @@ -89,7 +89,7 @@ public class JavaTestParams extends JavaParams { myDoubleParam_ = new DoubleParam(this, "myDoubleParam", "this is a double param", ParamValidators.inRange(0.0, 1.0)); List<String> validStrings = Arrays.asList("a", "b"); - myStringParam_ = new Param<String>(this, "myStringParam", "this is a string param", + myStringParam_ = new Param<>(this, "myStringParam", "this is a string param", ParamValidators.inArray(validStrings)); myDoubleArrayParam_ = new DoubleArrayParam(this, "myDoubleArrayParam", "this is a double param"); diff --git a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaStreamingLogisticRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaStreamingLogisticRegressionSuite.java index c9e5ee22f32737d16847f1e7c92824d8fb9611c5..62c6d9b7e390af00967da5c2136fb0637cc1561a 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaStreamingLogisticRegressionSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaStreamingLogisticRegressionSuite.java @@ -66,8 +66,8 @@ public class JavaStreamingLogisticRegressionSuite implements Serializable { JavaDStream<LabeledPoint> training = attachTestInputStream(ssc, Arrays.asList(trainingBatch, trainingBatch), 2); List<Tuple2<Integer, Vector>> testBatch = Arrays.asList( - new Tuple2<Integer, Vector>(10, Vectors.dense(1.0)), - new Tuple2<Integer, Vector>(11, Vectors.dense(0.0))); + new Tuple2<>(10, Vectors.dense(1.0)), + new Tuple2<>(11, Vectors.dense(0.0))); JavaPairDStream<Integer, Vector> test = JavaPairDStream.fromJavaDStream( attachTestInputStream(ssc, Arrays.asList(testBatch, testBatch), 2)); StreamingLogisticRegressionWithSGD slr = new StreamingLogisticRegressionWithSGD() diff --git a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaStreamingKMeansSuite.java b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaStreamingKMeansSuite.java index d644766d1e54dc66188af4e44e077350749274b1..62edbd3a298c0fba508762e031dd12613e7190db 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaStreamingKMeansSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaStreamingKMeansSuite.java @@ -66,8 +66,8 @@ public class JavaStreamingKMeansSuite implements Serializable { JavaDStream<Vector> training = attachTestInputStream(ssc, Arrays.asList(trainingBatch, trainingBatch), 2); List<Tuple2<Integer, Vector>> testBatch = Arrays.asList( - new Tuple2<Integer, Vector>(10, Vectors.dense(1.0)), - new Tuple2<Integer, Vector>(11, Vectors.dense(0.0))); + new Tuple2<>(10, Vectors.dense(1.0)), + new Tuple2<>(11, Vectors.dense(0.0))); JavaPairDStream<Integer, Vector> test = JavaPairDStream.fromJavaDStream( attachTestInputStream(ssc, Arrays.asList(testBatch, testBatch), 2)); StreamingKMeans skmeans = new StreamingKMeans() diff --git a/mllib/src/test/java/org/apache/spark/mllib/linalg/JavaVectorsSuite.java b/mllib/src/test/java/org/apache/spark/mllib/linalg/JavaVectorsSuite.java index 77c8c6274f374c27790136f58a23e92df3fa9cc9..4ba8e543a9a6bf5b78d2f435a305c8129aa55964 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/linalg/JavaVectorsSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/linalg/JavaVectorsSuite.java @@ -37,8 +37,8 @@ public class JavaVectorsSuite implements Serializable { public void sparseArrayConstruction() { @SuppressWarnings("unchecked") Vector v = Vectors.sparse(3, Arrays.asList( - new Tuple2<Integer, Double>(0, 2.0), - new Tuple2<Integer, Double>(2, 3.0))); + new Tuple2<>(0, 2.0), + new Tuple2<>(2, 3.0))); assertArrayEquals(new double[]{2.0, 0.0, 3.0}, v.toArray(), 0.0); } } diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaStreamingLinearRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaStreamingLinearRegressionSuite.java index dbf6488d410853e33e6183f5e2448501de4fa965..ea0ccd74489863f0749768083763a2c0bfa4f7fc 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaStreamingLinearRegressionSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaStreamingLinearRegressionSuite.java @@ -65,8 +65,8 @@ public class JavaStreamingLinearRegressionSuite implements Serializable { JavaDStream<LabeledPoint> training = attachTestInputStream(ssc, Arrays.asList(trainingBatch, trainingBatch), 2); List<Tuple2<Integer, Vector>> testBatch = Arrays.asList( - new Tuple2<Integer, Vector>(10, Vectors.dense(1.0)), - new Tuple2<Integer, Vector>(11, Vectors.dense(0.0))); + new Tuple2<>(10, Vectors.dense(1.0)), + new Tuple2<>(11, Vectors.dense(0.0))); JavaPairDStream<Integer, Vector> test = JavaPairDStream.fromJavaDStream( attachTestInputStream(ssc, Arrays.asList(testBatch, testBatch), 2)); StreamingLinearRegressionWithSGD slr = new StreamingLinearRegressionWithSGD() diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala index cccb7f8d1bcbcd77d70cec92281a7f20d622a54d..eb19d130939e493f1bac95766667d2d7521df61e 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala @@ -759,7 +759,7 @@ class LinearRegressionSuite .sliding(2) .forall(x => x(0) >= x(1))) } else { - // To clalify that the normal solver is used here. + // To clarify that the normal solver is used here. assert(model.summary.objectiveHistory.length == 1) assert(model.summary.objectiveHistory(0) == 0.0) val devianceResidualsR = Array(-0.47082, 0.34635) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/execution/UnsafeExternalRowSorter.java b/sql/catalyst/src/main/java/org/apache/spark/sql/execution/UnsafeExternalRowSorter.java index aa7fc2121e86c33d7f5478da04ab3b8ab3187b75..7784345a7a96636d3be48e94a8faba68ca18897f 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/execution/UnsafeExternalRowSorter.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/execution/UnsafeExternalRowSorter.java @@ -151,7 +151,7 @@ public final class UnsafeExternalRowSorter { Platform.throwException(e); } throw new RuntimeException("Exception should have been re-thrown in next()"); - }; + } }; } catch (IOException e) { cleanupResources(); diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystConf.scala index d5ac01500b15156a8fd7d0de69448b6b26cca790..2b98aacdd7264f64f541a43ca3e5c920b39ef6fd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystConf.scala @@ -26,7 +26,7 @@ private[spark] trait CatalystConf { def groupByOrdinal: Boolean /** - * Returns the [[Resolver]] for the current configuration, which can be used to determin if two + * Returns the [[Resolver]] for the current configuration, which can be used to determine if two * identifiers are equal. */ def resolver: Resolver = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala index 5f8899d5998a1d16035bb43b70aebd6a92b79c2b..a24a5db8d49cdfd472e8648fa7223cd400faf854 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala @@ -153,8 +153,8 @@ abstract class Expression extends TreeNode[Expression] { * evaluate to the same result. */ lazy val canonicalized: Expression = { - val canonicalizedChildred = children.map(_.canonicalized) - Canonicalize.execute(withNewChildren(canonicalizedChildred)) + val canonicalizedChildren = children.map(_.canonicalized) + Canonicalize.execute(withNewChildren(canonicalizedChildren)) } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index b64d3eea49a417fca84b307fb66b71d11081a6ef..1bebd4e904963f1a877b04070005778a77eeaded 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -509,7 +509,7 @@ class CodegenContext { /** * Checks and sets up the state and codegen for subexpression elimination. This finds the - * common subexpresses, generates the functions that evaluate those expressions and populates + * common subexpressions, generates the functions that evaluate those expressions and populates * the mapping of common subexpressions to the generated functions. */ private def subexpressionElimination(expressions: Seq[Expression]) = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala index 103ab365e319074c9d171cdac189487b80550b8e..35a7b4602074a1394b54c0022a888e9fe2a6d2d2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala @@ -222,7 +222,7 @@ object CaseWhen { } /** - * A factory method to faciliate the creation of this expression when used in parsers. + * A factory method to facilitate the creation of this expression when used in parsers. * @param branches Expressions at even position are the branch conditions, and expressions at odd * position are branch values. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 8541b1f7c62d0da66c2b31eb65a3a9389b3e03c3..61ea3e401057f36ad448446eac5dbb759c84372b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -965,7 +965,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { /** * Create a binary arithmetic expression. The following arithmetic operators are supported: - * - Mulitplication: '*' + * - Multiplication: '*' * - Division: '/' * - Hive Long Division: 'DIV' * - Modulo: '%' @@ -1270,7 +1270,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { } /** - * Create a double literal for a number denoted in scientifc notation. + * Create a double literal for a number denoted in scientific notation. */ override def visitScientificDecimalLiteral( ctx: ScientificDecimalLiteralContext): Literal = withOrigin(ctx) { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala index d9577dea1be36f1dd709dfb3a054a8f636057d84..c9c9599e7f4633c575b0213c507d9cb3f2fd7cc9 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala @@ -121,7 +121,7 @@ class RowTest extends FunSpec with Matchers { externalRow should be theSameInstanceAs externalRow.copy() } - it("copy should return same ref for interal rows") { + it("copy should return same ref for internal rows") { internalRow should be theSameInstanceAs internalRow.copy() } diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeKVExternalSorter.java b/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeKVExternalSorter.java index d3bfb00b3fa208cf20547d519f1caaa2ca7ff028..8132bba04caeb93eece622118ddfa21ba005d7a9 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeKVExternalSorter.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeKVExternalSorter.java @@ -272,5 +272,5 @@ public final class UnsafeKVExternalSorter { public void close() { cleanupResources(); } - }; + } } diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarBatch.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarBatch.java index 792e17911f1209092e40f5f34d0021311fb17da3..d1cc4e6d03cb2cbea1a54e8afb31f3624d9258f4 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarBatch.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarBatch.java @@ -79,7 +79,7 @@ public final class ColumnarBatch { /** * Called to close all the columns in this batch. It is not valid to access the data after - * calling this. This must be called at the end to clean up memory allcoations. + * calling this. This must be called at the end to clean up memory allocations. */ public void close() { for (ColumnVector c: columns) { @@ -315,7 +315,7 @@ public final class ColumnarBatch { public int numRows() { return numRows; } /** - * Returns the number of valid rowss. + * Returns the number of valid rows. */ public int numValidRows() { assert(numRowsFiltered <= numRows); diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java index b1429fe7cb5abcd5f69239a312768554e6523c06..708a00953abde6e0de129730647e6af61bf256e2 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java @@ -212,7 +212,7 @@ public final class OnHeapColumnVector extends ColumnVector { public void putIntsLittleEndian(int rowId, int count, byte[] src, int srcIndex) { int srcOffset = srcIndex + Platform.BYTE_ARRAY_OFFSET; for (int i = 0; i < count; ++i) { - intData[i + rowId] = Platform.getInt(src, srcOffset);; + intData[i + rowId] = Platform.getInt(src, srcOffset); srcIndex += 4; srcOffset += 4; } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/ContinuousQuery.scala b/sql/core/src/main/scala/org/apache/spark/sql/ContinuousQuery.scala index 1dc9a6893ebb68b3de24f9293b847a58800aabb0..d9973b092dc1152257c71916848b0d044196d3b9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/ContinuousQuery.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/ContinuousQuery.scala @@ -94,7 +94,7 @@ trait ContinuousQuery { /** * Blocks until all available data in the source has been processed an committed to the sink. * This method is intended for testing. Note that in the case of continually arriving data, this - * method may block forever. Additionally, this method is only guranteed to block until data that + * method may block forever. Additionally, this method is only guaranteed to block until data that * has been synchronously appended data to a [[org.apache.spark.sql.execution.streaming.Source]] * prior to invocation. (i.e. `getOffset` must immediately reflect the addition). */ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 41cb799b9714162e0b3c1bfc15c048f7dd124ac0..a39a2113e5fa7cc125c5cea088126cb0075c7ac9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -2077,7 +2077,7 @@ class Dataset[T] private[sql]( /** * Returns a new [[Dataset]] partitioned by the given partitioning expressions into - * `numPartitions`. The resulting Datasetis hash partitioned. + * `numPartitions`. The resulting Dataset is hash partitioned. * * This is the same operation as "DISTRIBUTE BY" in SQL (Hive QL). * diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala index 5bcc172ca7655274898641dd02a7316af8354455..e1fabf519aa28662d299461ff23600a904c3953c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala @@ -108,7 +108,7 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] { /** * Matches a plan whose single partition should be small enough to build a hash table. * - * Note: this assume that the number of partition is fixed, requires addtional work if it's + * Note: this assume that the number of partition is fixed, requires additional work if it's * dynamic. */ def canBuildHashMap(plan: LogicalPlan): Boolean = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Window.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Window.scala index 806089196c4a85677fd90cd5ced782b7b7675f77..8e9214fa258b2ba208dcee98a6a5da75e2979897 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Window.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Window.scala @@ -811,7 +811,7 @@ private[execution] final class UnboundedPrecedingWindowFunctionFrame( * * This is a very expensive operator to use, O(n * (n - 1) /2), because we need to maintain a * buffer and must do full recalculation after each row. Reverse iteration would be possible, if - * the communitativity of the used window functions can be guaranteed. + * the commutativity of the used window functions can be guaranteed. * * @param target to write results to. * @param processor to calculate the row values with. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala index fb1c6182cf956259731ee87bd46c55caa5ce3355..aba500ad8de2506bdf128cc357e2a4a8648956c8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala @@ -146,7 +146,7 @@ case class Filter(condition: Expression, child: SparkPlan) // This has the property of not doing redundant IsNotNull checks and taking better advantage of // short-circuiting, not loading attributes until they are needed. // This is very perf sensitive. - // TODO: revisit this. We can consider reodering predicates as well. + // TODO: revisit this. We can consider reordering predicates as well. val generatedIsNotNullChecks = new Array[Boolean](notNullPreds.length) val generated = otherPreds.map { c => val nullChecks = c.references.map { r => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnBuilder.scala index 7e26f19bb7449e4f880ebda5dd37e3c63ef85ddd..9a173367f4062b4f0c154c2057c6a41c75d1c493 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnBuilder.scala @@ -185,7 +185,7 @@ private[columnar] object ColumnBuilder { case udt: UserDefinedType[_] => return apply(udt.sqlType, initialSize, columnName, useCompression) case other => - throw new Exception(s"not suppported type: $other") + throw new Exception(s"not supported type: $other") } builder.initialize(initialSize, columnName, useCompression) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala index e0b6709c51d177f113218c87ca7a48ec3ca9556b..d603f63a0850198974e4abe3e2fcb1351fa97d2f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala @@ -296,7 +296,7 @@ private[sql] object StatFunctions extends Logging { val defaultRelativeError: Double = 0.01 /** - * Statisttics from the Greenwald-Khanna paper. + * Statistics from the Greenwald-Khanna paper. * @param value the sampled value * @param g the minimum rank jump from the previous value's minimum rank * @param delta the maximum span of the rank. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala index e819e95d61f9a77bd2571ecf41cdd5e82e2ffdd7..6921ae584dd846baa683fc1e9b2e8a5713f34925 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala @@ -32,7 +32,7 @@ object FileStreamSink { /** * A sink that writes out results to parquet files. Each batch is written out to a unique - * directory. After all of the files in a batch have been succesfully written, the list of + * directory. After all of the files in a batch have been successfully written, the list of * file paths is appended to the log atomically. In the case of partial failures, some duplicate * data may be present in the target directory, but only one copy of each file will be present * in the log. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala index 8ece3c971a52485ff62f1f0b6a9fbe566ae86b87..1e0a4a5d4ff0c09b9036eb3f9d47d14b120c6ff8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala @@ -178,7 +178,7 @@ private[state] class HDFSBackedStateStoreProvider( * This can be called only after committing all the updates made in the current thread. */ override def iterator(): Iterator[(UnsafeRow, UnsafeRow)] = { - verify(state == COMMITTED, "Cannot get iterator of store data before comitting") + verify(state == COMMITTED, "Cannot get iterator of store data before committing") HDFSBackedStateStoreProvider.this.iterator(newVersion) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala index d60e6185ac8d6831143677d82663208b70565ae7..07f63f928b8ff06067a60bfee037f874b456a92e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala @@ -220,7 +220,7 @@ private[state] object StateStore extends Logging { val executorId = SparkEnv.get.blockManager.blockManagerId.executorId val verified = coordinatorRef.map(_.verifyIfInstanceActive(storeId, executorId)).getOrElse(false) - logDebug(s"Verifyied whether the loaded instance $storeId is active: $verified" ) + logDebug(s"Verified whether the loaded instance $storeId is active: $verified" ) verified } catch { case NonFatal(e) => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala index ca2d909e2cccc538b6798cdeb1db58253d618eee..cfe4911cb7075d7afd3ddf68328e0dfd00be2cc1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala @@ -126,7 +126,7 @@ object JdbcDialects { /** * Register a dialect for use on all new matching jdbc [[org.apache.spark.sql.DataFrame]]. - * Readding an existing dialect will cause a move-to-front. + * Reading an existing dialect will cause a move-to-front. * * @param dialect The new dialect. */ diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java index a5ab446e08d654e6ac1ad8817c82ccafe017c517..873f681bdf9b9fdaab1cd6c0c3a98c9a9371595c 100644 --- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java +++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java @@ -318,14 +318,14 @@ public class JavaDatasetSuite implements Serializable { Encoder<Tuple3<Integer, Long, String>> encoder3 = Encoders.tuple(Encoders.INT(), Encoders.LONG(), Encoders.STRING()); List<Tuple3<Integer, Long, String>> data3 = - Arrays.asList(new Tuple3<Integer, Long, String>(1, 2L, "a")); + Arrays.asList(new Tuple3<>(1, 2L, "a")); Dataset<Tuple3<Integer, Long, String>> ds3 = context.createDataset(data3, encoder3); Assert.assertEquals(data3, ds3.collectAsList()); Encoder<Tuple4<Integer, String, Long, String>> encoder4 = Encoders.tuple(Encoders.INT(), Encoders.STRING(), Encoders.LONG(), Encoders.STRING()); List<Tuple4<Integer, String, Long, String>> data4 = - Arrays.asList(new Tuple4<Integer, String, Long, String>(1, "b", 2L, "a")); + Arrays.asList(new Tuple4<>(1, "b", 2L, "a")); Dataset<Tuple4<Integer, String, Long, String>> ds4 = context.createDataset(data4, encoder4); Assert.assertEquals(data4, ds4.collectAsList()); @@ -333,7 +333,7 @@ public class JavaDatasetSuite implements Serializable { Encoders.tuple(Encoders.INT(), Encoders.STRING(), Encoders.LONG(), Encoders.STRING(), Encoders.BOOLEAN()); List<Tuple5<Integer, String, Long, String, Boolean>> data5 = - Arrays.asList(new Tuple5<Integer, String, Long, String, Boolean>(1, "b", 2L, "a", true)); + Arrays.asList(new Tuple5<>(1, "b", 2L, "a", true)); Dataset<Tuple5<Integer, String, Long, String, Boolean>> ds5 = context.createDataset(data5, encoder5); Assert.assertEquals(data5, ds5.collectAsList()); @@ -354,7 +354,7 @@ public class JavaDatasetSuite implements Serializable { Encoders.tuple(Encoders.INT(), Encoders.tuple(Encoders.STRING(), Encoders.STRING(), Encoders.LONG())); List<Tuple2<Integer, Tuple3<String, String, Long>>> data2 = - Arrays.asList(tuple2(1, new Tuple3<String, String, Long>("a", "b", 3L))); + Arrays.asList(tuple2(1, new Tuple3<>("a", "b", 3L))); Dataset<Tuple2<Integer, Tuple3<String, String, Long>>> ds2 = context.createDataset(data2, encoder2); Assert.assertEquals(data2, ds2.collectAsList()); @@ -376,7 +376,7 @@ public class JavaDatasetSuite implements Serializable { Encoders.tuple(Encoders.DOUBLE(), Encoders.DECIMAL(), Encoders.DATE(), Encoders.TIMESTAMP(), Encoders.FLOAT()); List<Tuple5<Double, BigDecimal, Date, Timestamp, Float>> data = - Arrays.asList(new Tuple5<Double, BigDecimal, Date, Timestamp, Float>( + Arrays.asList(new Tuple5<>( 1.7976931348623157E308, new BigDecimal("0.922337203685477589"), Date.valueOf("1970-01-01"), new Timestamp(System.currentTimeMillis()), Float.MAX_VALUE)); Dataset<Tuple5<Double, BigDecimal, Date, Timestamp, Float>> ds = diff --git a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala index d160f8ab8c5fd46ad1350a0eeeb35b1925749321..f7f3bd78e9686cbcb33e8426f7d971a211200e5e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala @@ -105,10 +105,10 @@ abstract class QueryTest extends PlanTest { val expected = expectedAnswer.toSet.toSeq.map((a: Any) => a.toString).sorted val actual = decoded.toSet.toSeq.map((a: Any) => a.toString).sorted - val comparision = sideBySide("expected" +: expected, "spark" +: actual).mkString("\n") + val comparison = sideBySide("expected" +: expected, "spark" +: actual).mkString("\n") fail( s"""Decoded objects do not match expected objects: - |$comparision + |$comparison |${ds.resolvedTEncoder.deserializer.treeString} """.stripMargin) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchemaSuite.scala index 3a7cb25b4fa9b7f2451b5fa4d6e1c4ffa0dc2bfb..23d422635b0a93bc579f23939c590d14006380a8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchemaSuite.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.datasources.csv import org.apache.spark.SparkFunSuite import org.apache.spark.sql.types._ -class InferSchemaSuite extends SparkFunSuite { +class CSVInferSchemaSuite extends SparkFunSuite { test("String fields types are inferred correctly from null types") { assert(CSVInferSchema.inferField(NullType, "") == NullType) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala index 9746187d22907592cc09f6430339c30bb15f6081..a3017258d606ab2c5551eb9267d721021eb9c436 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala @@ -469,7 +469,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSQLContext { } } - testQuietly("SPARK-9849 DirectParquetOutputCommitter qualified name backwards compatiblity") { + testQuietly("SPARK-9849 DirectParquetOutputCommitter qualified name backwards compatibility") { val clonedConf = new Configuration(hadoopConfiguration) // Write to a parquet file and let it fail. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStressSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStressSuite.scala index 3916430cdf65dc50b45e9a12c0f04cc41636775b..5b49a0a86a04f26cc0fb864f1da309206f9f5ab3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStressSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStressSuite.scala @@ -29,7 +29,7 @@ import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.util.Utils /** - * A stress test for streamign queries that read and write files. This test constists of + * A stress test for streaming queries that read and write files. This test consists of * two threads: * - one that writes out `numRecords` distinct integers to files of random sizes (the total * number of records is fixed but each files size / creation time is random). diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala index 4afc8d18a6f8a3241e6e594e9ab4f8de8b94d649..93933023557e63fc36b5d0f1cdb46be44b769dbb 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -380,8 +380,8 @@ class TestHiveContext private[hive]( """.stripMargin.cmd, s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/episodes.avro")}' INTO TABLE episodes".cmd ), - // THIS TABLE IS NOT THE SAME AS THE HIVE TEST TABLE episodes_partitioned AS DYNAMIC PARITIONING - // IS NOT YET SUPPORTED + // THIS TABLE IS NOT THE SAME AS THE HIVE TEST TABLE episodes_partitioned AS DYNAMIC + // PARTITIONING IS NOT YET SUPPORTED TestTable("episodes_part", s"""CREATE TABLE episodes_part (title STRING, air_date STRING, doctor INT) |PARTITIONED BY (doctor_pt INT) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala index 4c1b425b163c0c7e6a5ab1bca6af76c9b1ecfc38..e67fcbedc3364c8a0b4aa698265aefa2f3b54615 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala @@ -482,7 +482,7 @@ abstract class HiveComparisonTest val tablesGenerated = queryList.zip(executions).flatMap { // We should take executedPlan instead of sparkPlan, because in following codes we // will run the collected plans. As we will do extra processing for sparkPlan such - // as adding exchage, collapsing codegen stages, etc., collecing sparkPlan here + // as adding exchange, collapsing codegen stages, etc., collecting sparkPlan here // will cause some errors when running these plans later. case (q, e) => e.executedPlan.collect { case i: InsertIntoHiveTable if tablesRead contains i.table.tableName => diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala index b6fc61d453c4c0e493b9aa403f51e4a68fe554d1..eac65d5720575860dd953e6ede1cd0e80e421225 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala @@ -311,7 +311,7 @@ class ParquetMetastoreSuite extends ParquetPartitioningTest { case ExecutedCommand(_: InsertIntoHadoopFsRelation) => // OK case o => fail("test_insert_parquet should be converted to a " + s"${classOf[HadoopFsRelation ].getCanonicalName} and " + - s"${classOf[InsertIntoDataSource].getCanonicalName} is expcted as the SparkPlan. " + + s"${classOf[InsertIntoDataSource].getCanonicalName} is expected as the SparkPlan. " + s"However, found a ${o.toString} ") } @@ -341,7 +341,7 @@ class ParquetMetastoreSuite extends ParquetPartitioningTest { case ExecutedCommand(_: InsertIntoHadoopFsRelation) => // OK case o => fail("test_insert_parquet should be converted to a " + s"${classOf[HadoopFsRelation ].getCanonicalName} and " + - s"${classOf[InsertIntoDataSource].getCanonicalName} is expcted as the SparkPlan." + + s"${classOf[InsertIntoDataSource].getCanonicalName} is expected as the SparkPlan." + s"However, found a ${o.toString} ") }