diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java index fb1226c09e246fb12f5e5165c4500c9f613c670b..22fd592a321d21ca0d75c1ee5697f54da24d583f 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java @@ -87,8 +87,11 @@ public class ExternalShuffleBlockHandler extends RpcHandler { blocks.add(blockManager.getBlockData(msg.appId, msg.execId, blockId)); } long streamId = streamManager.registerStream(client.getClientId(), blocks.iterator()); - logger.trace("Registered streamId {} with {} buffers for client {} from host {}", streamId, - msg.blockIds.length, client.getClientId(), NettyUtils.getRemoteAddress(client.getChannel())); + logger.trace("Registered streamId {} with {} buffers for client {} from host {}", + streamId, + msg.blockIds.length, + client.getClientId(), + NettyUtils.getRemoteAddress(client.getChannel())); callback.onSuccess(new StreamHandle(streamId, msg.blockIds.length).toByteBuffer()); } else if (msgObj instanceof RegisterExecutor) { diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaAFTSurvivalRegressionExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaAFTSurvivalRegressionExample.java index ecb7084e03122f307e141fc402c421b78cd26025..2c2aa6df47c7752dfad161d4152f4912caa4d38f 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaAFTSurvivalRegressionExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaAFTSurvivalRegressionExample.java @@ -33,7 +33,10 @@ import org.apache.spark.sql.types.*; public class JavaAFTSurvivalRegressionExample { public static void main(String[] args) { - SparkSession spark = SparkSession.builder().appName("JavaAFTSurvivalRegressionExample").getOrCreate(); + SparkSession spark = SparkSession + .builder() + .appName("JavaAFTSurvivalRegressionExample") + .getOrCreate(); // $example on$ List<Row> data = Arrays.asList( diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java index 9a9a10489b5061be54e5a5c8dd90283e787febd4..4b13ba6f9cea3b41cb599fcef2ea3dd17a042010 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java @@ -81,7 +81,10 @@ public class JavaALSExample { // $example off$ public static void main(String[] args) { - SparkSession spark = SparkSession.builder().appName("JavaALSExample").getOrCreate(); + SparkSession spark = SparkSession + .builder() + .appName("JavaALSExample") + .getOrCreate(); // $example on$ JavaRDD<Rating> ratingsRDD = spark diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizerExample.java index 88e4298a6191eac83d54eeee5e61e755db2a6610..5f964aca92096a129dfea46c86970b481b5b4d4c 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizerExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizerExample.java @@ -17,8 +17,6 @@ package org.apache.spark.examples.ml; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.SparkSession; @@ -26,7 +24,6 @@ import org.apache.spark.sql.SparkSession; import java.util.Arrays; import java.util.List; -import org.apache.spark.api.java.JavaRDD; import org.apache.spark.ml.feature.Binarizer; import org.apache.spark.sql.Row; import org.apache.spark.sql.RowFactory; @@ -38,7 +35,10 @@ import org.apache.spark.sql.types.StructType; public class JavaBinarizerExample { public static void main(String[] args) { - SparkSession spark = SparkSession.builder().appName("JavaBinarizerExample").getOrCreate(); + SparkSession spark = SparkSession + .builder() + .appName("JavaBinarizerExample") + .getOrCreate(); // $example on$ List<Row> data = Arrays.asList( diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaBisectingKMeansExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaBisectingKMeansExample.java index 51aa35084e845434a034741408a09e213c2822c3..810ad905c56af90e4c53cc08cd13e1f3bb6bb254 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaBisectingKMeansExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaBisectingKMeansExample.java @@ -42,7 +42,10 @@ import org.apache.spark.sql.types.StructType; public class JavaBisectingKMeansExample { public static void main(String[] args) { - SparkSession spark = SparkSession.builder().appName("JavaBisectingKMeansExample").getOrCreate(); + SparkSession spark = SparkSession + .builder() + .appName("JavaBisectingKMeansExample") + .getOrCreate(); // $example on$ List<Row> data = Arrays.asList( diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaBucketizerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaBucketizerExample.java index 0c24f52cf58ab58da36e4a2084f069662e0dfca6..691df3887a9bb671c9ebc32a2a430fa4005be541 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaBucketizerExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaBucketizerExample.java @@ -35,7 +35,10 @@ import org.apache.spark.sql.types.StructType; public class JavaBucketizerExample { public static void main(String[] args) { - SparkSession spark = SparkSession.builder().appName("JavaBucketizerExample").getOrCreate(); + SparkSession spark = SparkSession + .builder() + .appName("JavaBucketizerExample") + .getOrCreate(); // $example on$ double[] splits = {Double.NEGATIVE_INFINITY, -0.5, 0.0, 0.5, Double.POSITIVE_INFINITY}; diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaChiSqSelectorExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaChiSqSelectorExample.java index 684cf9a7145d275aa26ee99bb94909682f838556..f8f2fb14be1f1ed5482b2287d922c9755b37cd37 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaChiSqSelectorExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaChiSqSelectorExample.java @@ -17,9 +17,6 @@ package org.apache.spark.examples.ml; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.SparkSession; @@ -40,7 +37,10 @@ import org.apache.spark.sql.types.StructType; public class JavaChiSqSelectorExample { public static void main(String[] args) { - SparkSession spark = SparkSession.builder().appName("JavaChiSqSelectorExample").getOrCreate(); + SparkSession spark = SparkSession + .builder() + .appName("JavaChiSqSelectorExample") + .getOrCreate(); // $example on$ List<Row> data = Arrays.asList( diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaCountVectorizerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaCountVectorizerExample.java index 0631f9d6d5d74d50f26b6161aa9493e08a410c83..0a6b13601425b86cda7c6aa9737d22c00054a42e 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaCountVectorizerExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaCountVectorizerExample.java @@ -32,7 +32,10 @@ import org.apache.spark.sql.types.*; public class JavaCountVectorizerExample { public static void main(String[] args) { - SparkSession spark = SparkSession.builder().appName("JavaCountVectorizerExample").getOrCreate(); + SparkSession spark = SparkSession + .builder() + .appName("JavaCountVectorizerExample") + .getOrCreate(); // $example on$ // Input data: Each row is a bag of words from a sentence or document. diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaDCTExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaDCTExample.java index ec57a2445138a7268c96fa5c394b1f2d6dd93a2f..eee92c77a8c582909dc7d59427e4fbc09a8d67dc 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaDCTExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaDCTExample.java @@ -17,8 +17,6 @@ package org.apache.spark.examples.ml; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.SparkSession; @@ -26,7 +24,6 @@ import org.apache.spark.sql.SparkSession; import java.util.Arrays; import java.util.List; -import org.apache.spark.api.java.JavaRDD; import org.apache.spark.ml.feature.DCT; import org.apache.spark.mllib.linalg.VectorUDT; import org.apache.spark.mllib.linalg.Vectors; @@ -39,7 +36,10 @@ import org.apache.spark.sql.types.StructType; public class JavaDCTExample { public static void main(String[] args) { - SparkSession spark = SparkSession.builder().appName("JavaDCTExample").getOrCreate(); + SparkSession spark = SparkSession + .builder() + .appName("JavaDCTExample") + .getOrCreate(); // $example on$ List<Row> data = Arrays.asList( diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java index 90023ac06b79b94cc425833117b59506aa5dc8a8..49bad0afc0b7155a7786abd4876fcc1783330d70 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java @@ -49,7 +49,10 @@ import org.apache.spark.sql.SparkSession; public class JavaDeveloperApiExample { public static void main(String[] args) throws Exception { - SparkSession spark = SparkSession.builder().appName("JavaDeveloperApiExample").getOrCreate(); + SparkSession spark = SparkSession + .builder() + .appName("JavaDeveloperApiExample") + .getOrCreate(); // Prepare training data. List<LabeledPoint> localTraining = Lists.newArrayList( diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaElementwiseProductExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaElementwiseProductExample.java index a062a6fcd0c19e1c9d467b75fc69ce40207b05d2..9126242f9eb788a0db91fa784f400a2614a8f503 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaElementwiseProductExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaElementwiseProductExample.java @@ -17,8 +17,6 @@ package org.apache.spark.examples.ml; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.SparkSession; @@ -27,7 +25,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import org.apache.spark.api.java.JavaRDD; import org.apache.spark.ml.feature.ElementwiseProduct; import org.apache.spark.mllib.linalg.Vector; import org.apache.spark.mllib.linalg.VectorUDT; @@ -42,7 +39,9 @@ import org.apache.spark.sql.types.StructType; public class JavaElementwiseProductExample { public static void main(String[] args) { SparkSession spark = SparkSession - .builder().appName("JavaElementwiseProductExample").getOrCreate(); + .builder() + .appName("JavaElementwiseProductExample") + .getOrCreate(); // $example on$ // Create some vector data; also works for sparse vectors diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaGradientBoostedTreeClassifierExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaGradientBoostedTreeClassifierExample.java index a7c89b9d19974f22ca8c723dc63b5d570877a4b2..baacd796a058f5816fb8a7651782a8bd7fe9fb43 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaGradientBoostedTreeClassifierExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaGradientBoostedTreeClassifierExample.java @@ -17,8 +17,6 @@ package org.apache.spark.examples.ml; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; // $example on$ import org.apache.spark.ml.Pipeline; import org.apache.spark.ml.PipelineModel; @@ -35,11 +33,15 @@ import org.apache.spark.sql.SparkSession; public class JavaGradientBoostedTreeClassifierExample { public static void main(String[] args) { SparkSession spark = SparkSession - .builder().appName("JavaGradientBoostedTreeClassifierExample").getOrCreate(); + .builder() + .appName("JavaGradientBoostedTreeClassifierExample") + .getOrCreate(); // $example on$ // Load and parse the data file, converting it to a DataFrame. - Dataset<Row> data = spark.read().format("libsvm") + Dataset<Row> data = spark + .read() + .format("libsvm") .load("data/mllib/sample_libsvm_data.txt"); // Index labels, adding metadata to the label column. diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaIndexToStringExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaIndexToStringExample.java index ccd74f2920ce34a2f9adfae91630257232c5d34a..0064beb8c8f33be12738582818b1d5c001b82880 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaIndexToStringExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaIndexToStringExample.java @@ -37,7 +37,10 @@ import org.apache.spark.sql.types.StructType; public class JavaIndexToStringExample { public static void main(String[] args) { - SparkSession spark = SparkSession.builder().appName("JavaIndexToStringExample").getOrCreate(); + SparkSession spark = SparkSession + .builder() + .appName("JavaIndexToStringExample") + .getOrCreate(); // $example on$ List<Row> data = Arrays.asList( diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaKMeansExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaKMeansExample.java index e6d82a0513a305821d5146398ee3d78f649deadf..65e29ade299d1932ff0ba0b6642d47645884b003 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaKMeansExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaKMeansExample.java @@ -70,7 +70,10 @@ public class JavaKMeansExample { int k = Integer.parseInt(args[1]); // Parses the arguments - SparkSession spark = SparkSession.builder().appName("JavaKMeansExample").getOrCreate(); + SparkSession spark = SparkSession + .builder() + .appName("JavaKMeansExample") + .getOrCreate(); // $example on$ // Loads data diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaLDAExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaLDAExample.java index b8baca5920e8da93ad0f66d2b39f63facf9b0ba4..1c52f37867a85b3ed94005313dedeeb81a6ca6f1 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaLDAExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaLDAExample.java @@ -65,7 +65,10 @@ public class JavaLDAExample { String inputFile = "data/mllib/sample_lda_data.txt"; // Parses the arguments - SparkSession spark = SparkSession.builder().appName("JavaLDAExample").getOrCreate(); + SparkSession spark = SparkSession + .builder() + .appName("JavaLDAExample") + .getOrCreate(); // Loads data JavaRDD<Row> points = spark.read().text(inputFile).javaRDD().map(new ParseVector()); diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaMaxAbsScalerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaMaxAbsScalerExample.java index 80cdd364b93765143b3a1437f26eb6b7f272b45b..9a27b0e9e23b7f23e1fdbe4b1ca9b7d3fa0caf18 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaMaxAbsScalerExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaMaxAbsScalerExample.java @@ -28,13 +28,19 @@ import org.apache.spark.sql.SparkSession; public class JavaMaxAbsScalerExample { public static void main(String[] args) { - SparkSession spark = SparkSession.builder().appName("JavaMaxAbsScalerExample").getOrCreate(); + SparkSession spark = SparkSession + .builder() + .appName("JavaMaxAbsScalerExample") + .getOrCreate(); // $example on$ - Dataset<Row> dataFrame = spark.read().format("libsvm").load("data/mllib/sample_libsvm_data.txt"); + Dataset<Row> dataFrame = spark + .read() + .format("libsvm") + .load("data/mllib/sample_libsvm_data.txt"); MaxAbsScaler scaler = new MaxAbsScaler() - .setInputCol("features") - .setOutputCol("scaledFeatures"); + .setInputCol("features") + .setOutputCol("scaledFeatures"); // Compute summary statistics and generate MaxAbsScalerModel MaxAbsScalerModel scalerModel = scaler.fit(dataFrame); diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaMinMaxScalerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaMinMaxScalerExample.java index 022940fd1e67ce125b549874bbeea9d5158d32f4..37fa1c5434ea6ec8e172965e56b094da2587ff69 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaMinMaxScalerExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaMinMaxScalerExample.java @@ -28,10 +28,16 @@ import org.apache.spark.sql.Row; public class JavaMinMaxScalerExample { public static void main(String[] args) { - SparkSession spark = SparkSession.builder().appName("JavaMinMaxScalerExample").getOrCreate(); + SparkSession spark = SparkSession + .builder() + .appName("JavaMinMaxScalerExample") + .getOrCreate(); // $example on$ - Dataset<Row> dataFrame = spark.read().format("libsvm").load("data/mllib/sample_libsvm_data.txt"); + Dataset<Row> dataFrame = spark + .read() + .format("libsvm") + .load("data/mllib/sample_libsvm_data.txt"); MinMaxScaler scaler = new MinMaxScaler() .setInputCol("features") .setOutputCol("scaledFeatures"); diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaNGramExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaNGramExample.java index 325b7b5874800d58f664e260f8728b298b492064..899815f57c84b84889a97d9915d484b76ac1095a 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaNGramExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaNGramExample.java @@ -35,7 +35,10 @@ import org.apache.spark.sql.types.StructType; public class JavaNGramExample { public static void main(String[] args) { - SparkSession spark = SparkSession.builder().appName("JavaNGramExample").getOrCreate(); + SparkSession spark = SparkSession + .builder() + .appName("JavaNGramExample") + .getOrCreate(); // $example on$ List<Row> data = Arrays.asList( diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaNaiveBayesExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaNaiveBayesExample.java index 1f24a23609ebe9872797a24a8ed4196da9b0c5c1..50a46a5774fcca92f492964cd5d64ab586270444 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaNaiveBayesExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaNaiveBayesExample.java @@ -32,7 +32,10 @@ import org.apache.spark.sql.SparkSession; public class JavaNaiveBayesExample { public static void main(String[] args) { - SparkSession spark = SparkSession.builder().appName("JavaNaiveBayesExample").getOrCreate(); + SparkSession spark = SparkSession + .builder() + .appName("JavaNaiveBayesExample") + .getOrCreate(); // $example on$ // Load training data diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaNormalizerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaNormalizerExample.java index 4b3a718ea92c841df725281fede2a09bab5a8b12..abc38f85ea7745d3bc8567883b83c8d81b561c62 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaNormalizerExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaNormalizerExample.java @@ -27,7 +27,10 @@ import org.apache.spark.sql.Row; public class JavaNormalizerExample { public static void main(String[] args) { - SparkSession spark = SparkSession.builder().appName("JavaNormalizerExample").getOrCreate(); + SparkSession spark = SparkSession + .builder() + .appName("JavaNormalizerExample") + .getOrCreate(); // $example on$ Dataset<Row> dataFrame = diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoderExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoderExample.java index d6e4d21ead161ecbd524ab63a095ddbbbcbfc432..5d29e54549213a397ce7fec406b4f47c4ba47a5e 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoderExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoderExample.java @@ -37,7 +37,10 @@ import org.apache.spark.sql.types.StructType; public class JavaOneHotEncoderExample { public static void main(String[] args) { - SparkSession spark = SparkSession.builder().appName("JavaOneHotEncoderExample").getOrCreate(); + SparkSession spark = SparkSession + .builder() + .appName("JavaOneHotEncoderExample") + .getOrCreate(); // $example on$ List<Row> data = Arrays.asList( diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaOneVsRestExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaOneVsRestExample.java index 9cc983bd115df2e8200bbe3e1607713a15acb582..e0cb752224f7546ef54f7e742a86536d2b25e8b4 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaOneVsRestExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaOneVsRestExample.java @@ -58,7 +58,10 @@ public class JavaOneVsRestExample { public static void main(String[] args) { // parse the arguments Params params = parse(args); - SparkSession spark = SparkSession.builder().appName("JavaOneVsRestExample").getOrCreate(); + SparkSession spark = SparkSession + .builder() + .appName("JavaOneVsRestExample") + .getOrCreate(); // $example on$ // configure the base classifier diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaPCAExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaPCAExample.java index 6b1dcb68ba3040ce9a17d80307dc2dbdde47d553..ffa979ee013adeb7d82d0069e80683ee42bfad74 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaPCAExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaPCAExample.java @@ -37,7 +37,10 @@ import org.apache.spark.sql.types.StructType; public class JavaPCAExample { public static void main(String[] args) { - SparkSession spark = SparkSession.builder().appName("JavaPCAExample").getOrCreate(); + SparkSession spark = SparkSession + .builder() + .appName("JavaPCAExample") + .getOrCreate(); // $example on$ List<Row> data = Arrays.asList( diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaPipelineExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaPipelineExample.java index 556a4573263f148bcc19a0a83dce8d21353a7fd7..9a43189c91463c79ef0de8739bf8f3e529ca3fd1 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaPipelineExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaPipelineExample.java @@ -36,7 +36,10 @@ import org.apache.spark.sql.SparkSession; */ public class JavaPipelineExample { public static void main(String[] args) { - SparkSession spark = SparkSession.builder().appName("JavaPipelineExample").getOrCreate(); + SparkSession spark = SparkSession + .builder() + .appName("JavaPipelineExample") + .getOrCreate(); // $example on$ // Prepare training documents, which are labeled. diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaPolynomialExpansionExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaPolynomialExpansionExample.java index e328454c70591380b7d239981cae403f4e167bdc..7afcd0e50cd950dafbcf0c608a27623a36f375a4 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaPolynomialExpansionExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaPolynomialExpansionExample.java @@ -36,7 +36,10 @@ import org.apache.spark.sql.types.StructType; public class JavaPolynomialExpansionExample { public static void main(String[] args) { - SparkSession spark = SparkSession.builder().appName("JavaPolynomialExpansionExample").getOrCreate(); + SparkSession spark = SparkSession + .builder() + .appName("JavaPolynomialExpansionExample") + .getOrCreate(); // $example on$ PolynomialExpansion polyExpansion = new PolynomialExpansion() diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaRFormulaExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaRFormulaExample.java index 8282ce01d36f32eed0586933fa7bd43e758668ad..428067e0f7efee99156e0f237530dbb18515c75c 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaRFormulaExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaRFormulaExample.java @@ -35,7 +35,10 @@ import static org.apache.spark.sql.types.DataTypes.*; public class JavaRFormulaExample { public static void main(String[] args) { - SparkSession spark = SparkSession.builder().appName("JavaRFormulaExample").getOrCreate(); + SparkSession spark = SparkSession + .builder() + .appName("JavaRFormulaExample") + .getOrCreate(); // $example on$ StructType schema = createStructType(new StructField[]{ diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaSQLTransformerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaSQLTransformerExample.java index 492718bbdba8166809aad8b1fd2e0649ebeaaae3..2a3d62de41ab7f9eab61ecb529383c34c4fcc158 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaSQLTransformerExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaSQLTransformerExample.java @@ -31,7 +31,10 @@ import org.apache.spark.sql.types.*; public class JavaSQLTransformerExample { public static void main(String[] args) { - SparkSession spark = SparkSession.builder().appName("JavaSQLTransformerExample").getOrCreate(); + SparkSession spark = SparkSession + .builder() + .appName("JavaSQLTransformerExample") + .getOrCreate(); // $example on$ List<Row> data = Arrays.asList( diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleParamsExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleParamsExample.java index f906843640eda144e2abd2401d69c9ea5890e443..0787079ba4e5544a8eee26d64552ba02adc96b5a 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleParamsExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleParamsExample.java @@ -40,7 +40,10 @@ import org.apache.spark.sql.SparkSession; public class JavaSimpleParamsExample { public static void main(String[] args) { - SparkSession spark = SparkSession.builder().appName("JavaSimpleParamsExample").getOrCreate(); + SparkSession spark = SparkSession + .builder() + .appName("JavaSimpleParamsExample") + .getOrCreate(); // Prepare training data. // We use LabeledPoint, which is a JavaBean. Spark SQL can convert RDDs of JavaBeans diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaStandardScalerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaStandardScalerExample.java index 10f82f223386e08742fb058287ab2e6376283747..08ea285a0d53d310c6712af7b10170a637f441f5 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaStandardScalerExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaStandardScalerExample.java @@ -28,7 +28,10 @@ import org.apache.spark.sql.Row; public class JavaStandardScalerExample { public static void main(String[] args) { - SparkSession spark = SparkSession.builder().appName("JavaStandardScalerExample").getOrCreate(); + SparkSession spark = SparkSession + .builder() + .appName("JavaStandardScalerExample") + .getOrCreate(); // $example on$ Dataset<Row> dataFrame = diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaStopWordsRemoverExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaStopWordsRemoverExample.java index 23ed071c9f6e5a3cd039d140cd2f350517cbe358..def59944291241731c0df9101ba85063f055c543 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaStopWordsRemoverExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaStopWordsRemoverExample.java @@ -36,7 +36,10 @@ import org.apache.spark.sql.types.StructType; public class JavaStopWordsRemoverExample { public static void main(String[] args) { - SparkSession spark = SparkSession.builder().appName("JavaStopWordsRemoverExample").getOrCreate(); + SparkSession spark = SparkSession + .builder() + .appName("JavaStopWordsRemoverExample") + .getOrCreate(); // $example on$ StopWordsRemover remover = new StopWordsRemover() diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaStringIndexerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaStringIndexerExample.java index d4c2cf96a7cf3966da80cdd4c74980c8e54c1dfc..7533c1835e325ff7328ba13bdc082694d51b97bc 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaStringIndexerExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaStringIndexerExample.java @@ -35,7 +35,10 @@ import static org.apache.spark.sql.types.DataTypes.*; public class JavaStringIndexerExample { public static void main(String[] args) { - SparkSession spark = SparkSession.builder().appName("JavaStringIndexerExample").getOrCreate(); + SparkSession spark = SparkSession + .builder() + .appName("JavaStringIndexerExample") + .getOrCreate(); // $example on$ List<Row> data = Arrays.asList( diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaTfIdfExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaTfIdfExample.java index a8169917772c11f83fa68dfc57f9186c9f00e39a..6e0753959efd6982d39337e53b6d02dcefda2e9c 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaTfIdfExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaTfIdfExample.java @@ -38,7 +38,10 @@ import org.apache.spark.sql.types.StructType; public class JavaTfIdfExample { public static void main(String[] args) { - SparkSession spark = SparkSession.builder().appName("JavaTfIdfExample").getOrCreate(); + SparkSession spark = SparkSession + .builder() + .appName("JavaTfIdfExample") + .getOrCreate(); // $example on$ List<Row> data = Arrays.asList( diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java index a65735a5e51432f5b0ac2d4225342787db53109e..1cc16bb60d172ad304639e2b28eb66dc17afa440 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java @@ -36,7 +36,10 @@ import org.apache.spark.sql.types.StructType; public class JavaTokenizerExample { public static void main(String[] args) { - SparkSession spark = SparkSession.builder().appName("JavaTokenizerExample").getOrCreate(); + SparkSession spark = SparkSession + .builder() + .appName("JavaTokenizerExample") + .getOrCreate(); // $example on$ List<Row> data = Arrays.asList( diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorAssemblerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorAssemblerExample.java index 9569bc2412defe2c1dbab0871b027eefa7dc70a7..41f1d8750ac40bee8406826339fb7b8eb646f6ee 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorAssemblerExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorAssemblerExample.java @@ -35,7 +35,10 @@ import static org.apache.spark.sql.types.DataTypes.*; public class JavaVectorAssemblerExample { public static void main(String[] args) { - SparkSession spark = SparkSession.builder().appName("JavaVectorAssemblerExample").getOrCreate(); + SparkSession spark = SparkSession + .builder() + .appName("JavaVectorAssemblerExample") + .getOrCreate(); // $example on$ StructType schema = createStructType(new StructField[]{ diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorIndexerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorIndexerExample.java index 217d5a06d10a414d10414c37427ba28176d76f3d..dd9d757dd68315f0024e5c445cb6037a6aa6b21d 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorIndexerExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorIndexerExample.java @@ -30,7 +30,10 @@ import org.apache.spark.sql.Row; public class JavaVectorIndexerExample { public static void main(String[] args) { - SparkSession spark = SparkSession.builder().appName("JavaVectorIndexerExample").getOrCreate(); + SparkSession spark = SparkSession + .builder() + .appName("JavaVectorIndexerExample") + .getOrCreate(); // $example on$ Dataset<Row> data = spark.read().format("libsvm").load("data/mllib/sample_libsvm_data.txt"); diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorSlicerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorSlicerExample.java index 4f1ea824a3a9f267eabe2a39252dbf851493740d..24959c0e10f2bb633b13a3e16bfaa8a73205e680 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorSlicerExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorSlicerExample.java @@ -37,7 +37,10 @@ import org.apache.spark.sql.types.*; public class JavaVectorSlicerExample { public static void main(String[] args) { - SparkSession spark = SparkSession.builder().appName("JavaVectorSlicerExample").getOrCreate(); + SparkSession spark = SparkSession + .builder() + .appName("JavaVectorSlicerExample") + .getOrCreate(); // $example on$ Attribute[] attrs = new Attribute[]{ diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaWord2VecExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaWord2VecExample.java index d9b1a79b525483e7bcfb242f929abfba87aa3dc5..9be6e6353adcf40bf4e27b17c33243b792f4e3da 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaWord2VecExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaWord2VecExample.java @@ -32,7 +32,10 @@ import org.apache.spark.sql.types.*; public class JavaWord2VecExample { public static void main(String[] args) { - SparkSession spark = SparkSession.builder().appName("JavaWord2VecExample").getOrCreate(); + SparkSession spark = SparkSession + .builder() + .appName("JavaWord2VecExample") + .getOrCreate(); // $example on$ // Input data: Each row is a bag of words from a sentence or document. diff --git a/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java b/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java index ec2142e7569558317dec89b6fc161071812d2add..755b4f538104a343e6fcdc6148b97f60a82649bd 100644 --- a/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java +++ b/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java @@ -51,7 +51,10 @@ public class JavaSparkSQL { } public static void main(String[] args) throws Exception { - SparkSession spark = SparkSession.builder().appName("JavaSparkSQL").getOrCreate(); + SparkSession spark = SparkSession + .builder() + .appName("JavaSparkSQL") + .getOrCreate(); System.out.println("=== Data source: RDD ==="); // Load a text file and convert each line to a Java Bean. @@ -147,7 +150,8 @@ public class JavaSparkSQL { // a RDD[String] storing one JSON object per string. List<String> jsonData = Arrays.asList( "{\"name\":\"Yin\",\"address\":{\"city\":\"Columbus\",\"state\":\"Ohio\"}}"); - JavaRDD<String> anotherPeopleRDD = spark.createDataFrame(jsonData, String.class).toJSON().javaRDD(); + JavaRDD<String> anotherPeopleRDD = spark + .createDataFrame(jsonData, String.class).toJSON().javaRDD(); Dataset<Row> peopleFromJsonRDD = spark.read().json(anotherPeopleRDD); // Take a look at the schema of this new DataFrame. diff --git a/examples/src/main/java/org/apache/spark/examples/streaming/JavaSqlNetworkWordCount.java b/examples/src/main/java/org/apache/spark/examples/streaming/JavaSqlNetworkWordCount.java index 44f1e800fe4487d27715b61587a5d4028050bdad..57953ef74f795e16624b6c65e117dd41838b64a7 100644 --- a/examples/src/main/java/org/apache/spark/examples/streaming/JavaSqlNetworkWordCount.java +++ b/examples/src/main/java/org/apache/spark/examples/streaming/JavaSqlNetworkWordCount.java @@ -115,7 +115,10 @@ class JavaSparkSessionSingleton { private static transient SparkSession instance = null; public static SparkSession getInstance(SparkConf sparkConf) { if (instance == null) { - instance = SparkSession.builder().config(sparkConf).getOrCreate(); + instance = SparkSession + .builder() + .config(sparkConf) + .getOrCreate(); } return instance; } diff --git a/examples/src/main/python/ml/als_example.py b/examples/src/main/python/ml/als_example.py index e36444f18506eb4cc7cb55cfcc3b8f5f98d5b7c2..ff0829b0dd45a6b50ff4dfe6152340917beb017f 100644 --- a/examples/src/main/python/ml/als_example.py +++ b/examples/src/main/python/ml/als_example.py @@ -30,7 +30,10 @@ from pyspark.sql import Row # $example off$ if __name__ == "__main__": - spark = SparkSession.builder.appName("ALSExample").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("ALSExample")\ + .getOrCreate() # $example on$ lines = spark.read.text("data/mllib/als/sample_movielens_ratings.txt").rdd diff --git a/examples/src/main/python/ml/binarizer_example.py b/examples/src/main/python/ml/binarizer_example.py index 072187e64564c9bca9d4c90420dd8499f86febe4..4224a27dbef0c0bbb2a50ed06a641080c35a4165 100644 --- a/examples/src/main/python/ml/binarizer_example.py +++ b/examples/src/main/python/ml/binarizer_example.py @@ -23,7 +23,10 @@ from pyspark.ml.feature import Binarizer # $example off$ if __name__ == "__main__": - spark = SparkSession.builder.appName("BinarizerExample").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("BinarizerExample")\ + .getOrCreate() # $example on$ continuousDataFrame = spark.createDataFrame([ diff --git a/examples/src/main/python/ml/bisecting_k_means_example.py b/examples/src/main/python/ml/bisecting_k_means_example.py index 836a89cde0d60d372f09ee255dea7b9e2740212b..540a4bc3e4bf98641823fe4a44b22f6cd317872a 100644 --- a/examples/src/main/python/ml/bisecting_k_means_example.py +++ b/examples/src/main/python/ml/bisecting_k_means_example.py @@ -30,7 +30,10 @@ A simple example demonstrating a bisecting k-means clustering. """ if __name__ == "__main__": - spark = SparkSession.builder.appName("PythonBisectingKMeansExample").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("PythonBisectingKMeansExample")\ + .getOrCreate() # $example on$ data = spark.read.text("data/mllib/kmeans_data.txt").rdd diff --git a/examples/src/main/python/ml/bucketizer_example.py b/examples/src/main/python/ml/bucketizer_example.py index 288ec62bdfbb5cc28683951ff4482fdda0d5a7c9..8177e560ddef1f9943b576d4c4264624d0e893b4 100644 --- a/examples/src/main/python/ml/bucketizer_example.py +++ b/examples/src/main/python/ml/bucketizer_example.py @@ -23,7 +23,10 @@ from pyspark.ml.feature import Bucketizer # $example off$ if __name__ == "__main__": - spark = SparkSession.builder.appName("BucketizerExample").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("BucketizerExample")\ + .getOrCreate() # $example on$ splits = [-float("inf"), -0.5, 0.0, 0.5, float("inf")] diff --git a/examples/src/main/python/ml/chisq_selector_example.py b/examples/src/main/python/ml/chisq_selector_example.py index 8f58fc28deb9b49b7a190a25f60ac2917abae78b..8bafb942e0d2736ebaec16b912047c85e5930ada 100644 --- a/examples/src/main/python/ml/chisq_selector_example.py +++ b/examples/src/main/python/ml/chisq_selector_example.py @@ -24,7 +24,10 @@ from pyspark.mllib.linalg import Vectors # $example off$ if __name__ == "__main__": - spark = SparkSession.builder.appName("ChiSqSelectorExample").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("ChiSqSelectorExample")\ + .getOrCreate() # $example on$ df = spark.createDataFrame([ diff --git a/examples/src/main/python/ml/count_vectorizer_example.py b/examples/src/main/python/ml/count_vectorizer_example.py index 9dbf9959d17ef9b0598439ba16cc91858231e610..38cfac82fbe206c4c5f8f5cde41c93d1cb81ea00 100644 --- a/examples/src/main/python/ml/count_vectorizer_example.py +++ b/examples/src/main/python/ml/count_vectorizer_example.py @@ -23,7 +23,10 @@ from pyspark.ml.feature import CountVectorizer # $example off$ if __name__ == "__main__": - spark = SparkSession.builder.appName("CountVectorizerExample").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("CountVectorizerExample")\ + .getOrCreate() # $example on$ # Input data: Each row is a bag of words with a ID. diff --git a/examples/src/main/python/ml/cross_validator.py b/examples/src/main/python/ml/cross_validator.py index a61d0f63d277c712d60f84d08f052658fb0045a7..a41df6cf946fb47a44d5c04503dffced63b09f60 100644 --- a/examples/src/main/python/ml/cross_validator.py +++ b/examples/src/main/python/ml/cross_validator.py @@ -35,7 +35,10 @@ Run with: """ if __name__ == "__main__": - spark = SparkSession.builder.appName("CrossValidatorExample").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("CrossValidatorExample")\ + .getOrCreate() # $example on$ # Prepare training documents, which are labeled. training = spark.createDataFrame([ diff --git a/examples/src/main/python/ml/dataframe_example.py b/examples/src/main/python/ml/dataframe_example.py index b3e671038eb7ff6bf511dd45bf797c1f8e0b1644..a7d8b9056dffe77eafd67dab1340c3cea2e93576 100644 --- a/examples/src/main/python/ml/dataframe_example.py +++ b/examples/src/main/python/ml/dataframe_example.py @@ -33,7 +33,10 @@ if __name__ == "__main__": if len(sys.argv) > 2: print("Usage: dataframe_example.py <libsvm file>", file=sys.stderr) exit(-1) - spark = SparkSession.builder.appName("DataFrameExample").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("DataFrameExample")\ + .getOrCreate() if len(sys.argv) == 2: input = sys.argv[1] else: diff --git a/examples/src/main/python/ml/dct_example.py b/examples/src/main/python/ml/dct_example.py index 1bf8fc6d140c9a873041cbc61447dc0d40196851..e36fcdeaeed286307044ed8975acfbdda5c87e2f 100644 --- a/examples/src/main/python/ml/dct_example.py +++ b/examples/src/main/python/ml/dct_example.py @@ -24,7 +24,10 @@ from pyspark.mllib.linalg import Vectors from pyspark.sql import SparkSession if __name__ == "__main__": - spark = SparkSession.builder.appName("DCTExample").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("DCTExample")\ + .getOrCreate() # $example on$ df = spark.createDataFrame([ diff --git a/examples/src/main/python/ml/decision_tree_classification_example.py b/examples/src/main/python/ml/decision_tree_classification_example.py index d2318e24369d85c1b0005670ecb45af0b1e4b6f2..9b40b701ecaaf4f87170942988c4366b96815daf 100644 --- a/examples/src/main/python/ml/decision_tree_classification_example.py +++ b/examples/src/main/python/ml/decision_tree_classification_example.py @@ -29,7 +29,10 @@ from pyspark.ml.evaluation import MulticlassClassificationEvaluator from pyspark.sql import SparkSession if __name__ == "__main__": - spark = SparkSession.builder.appName("decision_tree_classification_example").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("decision_tree_classification_example")\ + .getOrCreate() # $example on$ # Load the data stored in LIBSVM format as a DataFrame. diff --git a/examples/src/main/python/ml/decision_tree_regression_example.py b/examples/src/main/python/ml/decision_tree_regression_example.py index 9e8cb382a9bfecd594e7147273078208b5afdb26..b734d4974a4f62c6a31001b9ea40b57f150fab06 100644 --- a/examples/src/main/python/ml/decision_tree_regression_example.py +++ b/examples/src/main/python/ml/decision_tree_regression_example.py @@ -29,7 +29,10 @@ from pyspark.ml.evaluation import RegressionEvaluator from pyspark.sql import SparkSession if __name__ == "__main__": - spark = SparkSession.builder.appName("decision_tree_classification_example").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("decision_tree_classification_example")\ + .getOrCreate() # $example on$ # Load the data stored in LIBSVM format as a DataFrame. diff --git a/examples/src/main/python/ml/elementwise_product_example.py b/examples/src/main/python/ml/elementwise_product_example.py index 6fa641b772d9527262b1f9f6539d26a00578f019..41727edcdb09e1cf2dc0696fc96c8280e5068c87 100644 --- a/examples/src/main/python/ml/elementwise_product_example.py +++ b/examples/src/main/python/ml/elementwise_product_example.py @@ -24,7 +24,10 @@ from pyspark.mllib.linalg import Vectors from pyspark.sql import SparkSession if __name__ == "__main__": - spark = SparkSession.builder.appName("ElementwiseProductExample").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("ElementwiseProductExample")\ + .getOrCreate() # $example on$ data = [(Vectors.dense([1.0, 2.0, 3.0]),), (Vectors.dense([4.0, 5.0, 6.0]),)] diff --git a/examples/src/main/python/ml/estimator_transformer_param_example.py b/examples/src/main/python/ml/estimator_transformer_param_example.py index 4993b5a9846c2a56b24bd9243571683ae5f61844..0fcae0e3fc225fd2521a086aabc9bddb6c5c998b 100644 --- a/examples/src/main/python/ml/estimator_transformer_param_example.py +++ b/examples/src/main/python/ml/estimator_transformer_param_example.py @@ -26,7 +26,10 @@ from pyspark.ml.classification import LogisticRegression from pyspark.sql import SparkSession if __name__ == "__main__": - spark = SparkSession.builder.appName("EstimatorTransformerParamExample").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("EstimatorTransformerParamExample")\ + .getOrCreate() # $example on$ # Prepare training data from a list of (label, features) tuples. diff --git a/examples/src/main/python/ml/gradient_boosted_tree_classifier_example.py b/examples/src/main/python/ml/gradient_boosted_tree_classifier_example.py index b09ad41da313ca79bd366e48793c1327d5a9bfed..50026d7b7e3e8a01cefeed2265572585207c0702 100644 --- a/examples/src/main/python/ml/gradient_boosted_tree_classifier_example.py +++ b/examples/src/main/python/ml/gradient_boosted_tree_classifier_example.py @@ -29,7 +29,10 @@ from pyspark.ml.evaluation import MulticlassClassificationEvaluator from pyspark.sql import SparkSession if __name__ == "__main__": - spark = SparkSession.builder.appName("gradient_boosted_tree_classifier_example").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("gradient_boosted_tree_classifier_example")\ + .getOrCreate() # $example on$ # Load and parse the data file, converting it to a DataFrame. diff --git a/examples/src/main/python/ml/gradient_boosted_tree_regressor_example.py b/examples/src/main/python/ml/gradient_boosted_tree_regressor_example.py index caa7cfc4e11f5380e6fec09fb16f60c485f5d1f9..5dd2272748d7093b9c0d6f06d0f620de1f636971 100644 --- a/examples/src/main/python/ml/gradient_boosted_tree_regressor_example.py +++ b/examples/src/main/python/ml/gradient_boosted_tree_regressor_example.py @@ -29,7 +29,10 @@ from pyspark.ml.evaluation import RegressionEvaluator from pyspark.sql import SparkSession if __name__ == "__main__": - spark = SparkSession.builder.appName("gradient_boosted_tree_regressor_example").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("gradient_boosted_tree_regressor_example")\ + .getOrCreate() # $example on$ # Load and parse the data file, converting it to a DataFrame. diff --git a/examples/src/main/python/ml/index_to_string_example.py b/examples/src/main/python/ml/index_to_string_example.py index dd04b2c4b0bfd9d8bdaf863310d6bf83280be0c8..523caac00c18af1628e7d0f4aea85f74946403c0 100644 --- a/examples/src/main/python/ml/index_to_string_example.py +++ b/examples/src/main/python/ml/index_to_string_example.py @@ -23,7 +23,10 @@ from pyspark.ml.feature import IndexToString, StringIndexer from pyspark.sql import SparkSession if __name__ == "__main__": - spark = SparkSession.builder.appName("IndexToStringExample").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("IndexToStringExample")\ + .getOrCreate() # $example on$ df = spark.createDataFrame( diff --git a/examples/src/main/python/ml/kmeans_example.py b/examples/src/main/python/ml/kmeans_example.py index 7d9d80e6452bd014e7cabd2c02a49331269cbebc..73823969554fad952b6bc05e385fb821ffbf724d 100644 --- a/examples/src/main/python/ml/kmeans_example.py +++ b/examples/src/main/python/ml/kmeans_example.py @@ -49,7 +49,10 @@ if __name__ == "__main__": path = sys.argv[1] k = sys.argv[2] - spark = SparkSession.builder.appName("PythonKMeansExample").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("PythonKMeansExample")\ + .getOrCreate() lines = spark.read.text(path).rdd data = lines.map(parseVector) diff --git a/examples/src/main/python/ml/linear_regression_with_elastic_net.py b/examples/src/main/python/ml/linear_regression_with_elastic_net.py index 99b7f7fe99de63b97bdff785410d4234c97a2f4e..620ab5b87e594256e55e40240fbe674404e532f2 100644 --- a/examples/src/main/python/ml/linear_regression_with_elastic_net.py +++ b/examples/src/main/python/ml/linear_regression_with_elastic_net.py @@ -23,7 +23,10 @@ from pyspark.ml.regression import LinearRegression from pyspark.sql import SparkSession if __name__ == "__main__": - spark = SparkSession.builder.appName("LinearRegressionWithElasticNet").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("LinearRegressionWithElasticNet")\ + .getOrCreate() # $example on$ # Load training data diff --git a/examples/src/main/python/ml/logistic_regression_with_elastic_net.py b/examples/src/main/python/ml/logistic_regression_with_elastic_net.py index 0d7112e72354fb3e8490c6fb99672069c9b72e60..33d0689f75cd57dd516f61ab605688e0ef370567 100644 --- a/examples/src/main/python/ml/logistic_regression_with_elastic_net.py +++ b/examples/src/main/python/ml/logistic_regression_with_elastic_net.py @@ -23,7 +23,10 @@ from pyspark.ml.classification import LogisticRegression from pyspark.sql import SparkSession if __name__ == "__main__": - spark = SparkSession.builder.appName("LogisticRegressionWithElasticNet").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("LogisticRegressionWithElasticNet")\ + .getOrCreate() # $example on$ # Load training data diff --git a/examples/src/main/python/ml/max_abs_scaler_example.py b/examples/src/main/python/ml/max_abs_scaler_example.py index 1cb95a98f08dc3ab13528cd1b2a24c87473f35d9..ab91198b083d12a2d169f72d0e05df037e3ad025 100644 --- a/examples/src/main/python/ml/max_abs_scaler_example.py +++ b/examples/src/main/python/ml/max_abs_scaler_example.py @@ -23,7 +23,10 @@ from pyspark.ml.feature import MaxAbsScaler from pyspark.sql import SparkSession if __name__ == "__main__": - spark = SparkSession.builder.appName("MaxAbsScalerExample").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("MaxAbsScalerExample")\ + .getOrCreate() # $example on$ dataFrame = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt") diff --git a/examples/src/main/python/ml/min_max_scaler_example.py b/examples/src/main/python/ml/min_max_scaler_example.py index 8d91a59e2b0bd81f9fce7b11b8294dc1320dcaf1..e3e7bc205b1ecf4151108b182dfba13a30f1b308 100644 --- a/examples/src/main/python/ml/min_max_scaler_example.py +++ b/examples/src/main/python/ml/min_max_scaler_example.py @@ -23,7 +23,10 @@ from pyspark.ml.feature import MinMaxScaler from pyspark.sql import SparkSession if __name__ == "__main__": - spark = SparkSession.builder.appName("MinMaxScalerExample").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("MinMaxScalerExample")\ + .getOrCreate() # $example on$ dataFrame = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt") diff --git a/examples/src/main/python/ml/n_gram_example.py b/examples/src/main/python/ml/n_gram_example.py index b7fecf0d685c0e2dfe0e8c0a574ac171f5f4185d..9ac07f2c8ee2076c16e822b13ca3f89bb200453c 100644 --- a/examples/src/main/python/ml/n_gram_example.py +++ b/examples/src/main/python/ml/n_gram_example.py @@ -23,7 +23,10 @@ from pyspark.ml.feature import NGram from pyspark.sql import SparkSession if __name__ == "__main__": - spark = SparkSession.builder.appName("NGramExample").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("NGramExample")\ + .getOrCreate() # $example on$ wordDataFrame = spark.createDataFrame([ diff --git a/examples/src/main/python/ml/naive_bayes_example.py b/examples/src/main/python/ml/naive_bayes_example.py index e37035542c7e8adb9b5a082fb8f4109ba3f66ba5..89255a2bae64cfd8df1646fd5799488400c5e296 100644 --- a/examples/src/main/python/ml/naive_bayes_example.py +++ b/examples/src/main/python/ml/naive_bayes_example.py @@ -24,7 +24,10 @@ from pyspark.ml.evaluation import MulticlassClassificationEvaluator from pyspark.sql import SparkSession if __name__ == "__main__": - spark = SparkSession.builder.appName("naive_bayes_example").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("naive_bayes_example")\ + .getOrCreate() # $example on$ # Load training data diff --git a/examples/src/main/python/ml/normalizer_example.py b/examples/src/main/python/ml/normalizer_example.py index ae2553761928cf023bc65fb1c67745d6d740f2f8..19012f51f4023302fa9fcb7dd9f4830ea3a1bec5 100644 --- a/examples/src/main/python/ml/normalizer_example.py +++ b/examples/src/main/python/ml/normalizer_example.py @@ -23,7 +23,10 @@ from pyspark.ml.feature import Normalizer from pyspark.sql import SparkSession if __name__ == "__main__": - spark = SparkSession.builder.appName("NormalizerExample").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("NormalizerExample")\ + .getOrCreate() # $example on$ dataFrame = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt") diff --git a/examples/src/main/python/ml/onehot_encoder_example.py b/examples/src/main/python/ml/onehot_encoder_example.py index 9acc363dc9e25c843412912e6d8da6198ae6703c..b9fceef68e7034adc4e6aaac461df018e9179bec 100644 --- a/examples/src/main/python/ml/onehot_encoder_example.py +++ b/examples/src/main/python/ml/onehot_encoder_example.py @@ -23,7 +23,10 @@ from pyspark.ml.feature import OneHotEncoder, StringIndexer from pyspark.sql import SparkSession if __name__ == "__main__": - spark = SparkSession.builder.appName("OneHotEncoderExample").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("OneHotEncoderExample")\ + .getOrCreate() # $example on$ df = spark.createDataFrame([ diff --git a/examples/src/main/python/ml/pca_example.py b/examples/src/main/python/ml/pca_example.py index adab151734aa863718b8b348143e4444def5330d..f1b3cdec7bd7781263c95da49fd8ad5399784614 100644 --- a/examples/src/main/python/ml/pca_example.py +++ b/examples/src/main/python/ml/pca_example.py @@ -24,7 +24,10 @@ from pyspark.mllib.linalg import Vectors from pyspark.sql import SparkSession if __name__ == "__main__": - spark = SparkSession.builder.appName("PCAExample").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("PCAExample")\ + .getOrCreate() # $example on$ data = [(Vectors.sparse(5, [(1, 1.0), (3, 7.0)]),), diff --git a/examples/src/main/python/ml/pipeline_example.py b/examples/src/main/python/ml/pipeline_example.py index ed9765d96165e923515dcd9be67298c8d565da99..bd10cfd7a252b75d93b27a382cd6ce91170fa733 100644 --- a/examples/src/main/python/ml/pipeline_example.py +++ b/examples/src/main/python/ml/pipeline_example.py @@ -27,7 +27,10 @@ from pyspark.ml.feature import HashingTF, Tokenizer from pyspark.sql import SparkSession if __name__ == "__main__": - spark = SparkSession.builder.appName("PipelineExample").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("PipelineExample")\ + .getOrCreate() # $example on$ # Prepare training documents from a list of (id, text, label) tuples. diff --git a/examples/src/main/python/ml/polynomial_expansion_example.py b/examples/src/main/python/ml/polynomial_expansion_example.py index 328b559320f8575573ed8e69e35a296881f5eadf..08882bcb256f4673ec3d3c520b096ddeddce33cf 100644 --- a/examples/src/main/python/ml/polynomial_expansion_example.py +++ b/examples/src/main/python/ml/polynomial_expansion_example.py @@ -24,7 +24,10 @@ from pyspark.mllib.linalg import Vectors from pyspark.sql import SparkSession if __name__ == "__main__": - spark = SparkSession.builder.appName("PolynomialExpansionExample").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("PolynomialExpansionExample")\ + .getOrCreate() # $example on$ df = spark\ diff --git a/examples/src/main/python/ml/random_forest_classifier_example.py b/examples/src/main/python/ml/random_forest_classifier_example.py index b0a93e050c54b4603e79774c0217d261ad9c7314..c618eaf60c2e9b16c825f6247b6521a574c6b66d 100644 --- a/examples/src/main/python/ml/random_forest_classifier_example.py +++ b/examples/src/main/python/ml/random_forest_classifier_example.py @@ -29,7 +29,10 @@ from pyspark.ml.evaluation import MulticlassClassificationEvaluator from pyspark.sql import SparkSession if __name__ == "__main__": - spark = SparkSession.builder.appName("random_forest_classifier_example").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("random_forest_classifier_example")\ + .getOrCreate() # $example on$ # Load and parse the data file, converting it to a DataFrame. diff --git a/examples/src/main/python/ml/random_forest_regressor_example.py b/examples/src/main/python/ml/random_forest_regressor_example.py index 4bb84f0de8ff3c966dcc8a4efe5e2c976aa2e38c..3a793737dba893eda0e8e9610fb2ca80b2125439 100644 --- a/examples/src/main/python/ml/random_forest_regressor_example.py +++ b/examples/src/main/python/ml/random_forest_regressor_example.py @@ -29,7 +29,10 @@ from pyspark.ml.evaluation import RegressionEvaluator from pyspark.sql import SparkSession if __name__ == "__main__": - spark = SparkSession.builder.appName("random_forest_regressor_example").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("random_forest_regressor_example")\ + .getOrCreate() # $example on$ # Load and parse the data file, converting it to a DataFrame. diff --git a/examples/src/main/python/ml/rformula_example.py b/examples/src/main/python/ml/rformula_example.py index 45cc116ac2f61e5485b60f3199d45eb9a7e7e13a..d5df3ce4f59159ac97855df17c2b1e20795c884c 100644 --- a/examples/src/main/python/ml/rformula_example.py +++ b/examples/src/main/python/ml/rformula_example.py @@ -23,7 +23,10 @@ from pyspark.ml.feature import RFormula from pyspark.sql import SparkSession if __name__ == "__main__": - spark = SparkSession.builder.appName("RFormulaExample").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("RFormulaExample")\ + .getOrCreate() # $example on$ dataset = spark.createDataFrame( diff --git a/examples/src/main/python/ml/simple_text_classification_pipeline.py b/examples/src/main/python/ml/simple_text_classification_pipeline.py index 3600c12211d0583ff8cda69b301df48dff59c5ff..886f43c0b08e8e8089f62fefa149146d42deebae 100644 --- a/examples/src/main/python/ml/simple_text_classification_pipeline.py +++ b/examples/src/main/python/ml/simple_text_classification_pipeline.py @@ -33,7 +33,10 @@ pipeline in Python. Run with: if __name__ == "__main__": - spark = SparkSession.builder.appName("SimpleTextClassificationPipeline").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("SimpleTextClassificationPipeline")\ + .getOrCreate() # Prepare training documents, which are labeled. training = spark.createDataFrame([ diff --git a/examples/src/main/python/ml/sql_transformer.py b/examples/src/main/python/ml/sql_transformer.py index 26045db4be6bcddb9a4aa4e5b6a02d1a3c467bbb..0bf8f35720c95cdd3558425313c691c1a5e14880 100644 --- a/examples/src/main/python/ml/sql_transformer.py +++ b/examples/src/main/python/ml/sql_transformer.py @@ -23,7 +23,10 @@ from pyspark.ml.feature import SQLTransformer from pyspark.sql import SparkSession if __name__ == "__main__": - spark = SparkSession.builder.appName("SQLTransformerExample").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("SQLTransformerExample")\ + .getOrCreate() # $example on$ df = spark.createDataFrame([ diff --git a/examples/src/main/python/ml/standard_scaler_example.py b/examples/src/main/python/ml/standard_scaler_example.py index c50804f6bf4e05f586fb70866a6617e1b9e75f8e..c0027480e69b3028525543505e329229e2a1302b 100644 --- a/examples/src/main/python/ml/standard_scaler_example.py +++ b/examples/src/main/python/ml/standard_scaler_example.py @@ -23,7 +23,10 @@ from pyspark.ml.feature import StandardScaler from pyspark.sql import SparkSession if __name__ == "__main__": - spark = SparkSession.builder.appName("StandardScalerExample").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("StandardScalerExample")\ + .getOrCreate() # $example on$ dataFrame = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt") diff --git a/examples/src/main/python/ml/stopwords_remover_example.py b/examples/src/main/python/ml/stopwords_remover_example.py index 57362673df78613a6d84d4dd23ed94efdc9af072..395fdeffc537987be93d247da42a62ec85755b64 100644 --- a/examples/src/main/python/ml/stopwords_remover_example.py +++ b/examples/src/main/python/ml/stopwords_remover_example.py @@ -23,7 +23,10 @@ from pyspark.ml.feature import StopWordsRemover from pyspark.sql import SparkSession if __name__ == "__main__": - spark = SparkSession.builder.appName("StopWordsRemoverExample").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("StopWordsRemoverExample")\ + .getOrCreate() # $example on$ sentenceData = spark.createDataFrame([ diff --git a/examples/src/main/python/ml/string_indexer_example.py b/examples/src/main/python/ml/string_indexer_example.py index aacd4f999bb1b9f3aadc53466364ff1bf199908a..a328e040f56365e7d923155a3de2753833051598 100644 --- a/examples/src/main/python/ml/string_indexer_example.py +++ b/examples/src/main/python/ml/string_indexer_example.py @@ -23,7 +23,10 @@ from pyspark.ml.feature import StringIndexer from pyspark.sql import SparkSession if __name__ == "__main__": - spark = SparkSession.builder.appName("StringIndexerExample").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("StringIndexerExample")\ + .getOrCreate() # $example on$ df = spark.createDataFrame( diff --git a/examples/src/main/python/ml/tf_idf_example.py b/examples/src/main/python/ml/tf_idf_example.py index 25df8166efc2e21a062b7596143871f57d600a7e..fb4ad992fb809cfa1cc936a7418227d3c5ab365d 100644 --- a/examples/src/main/python/ml/tf_idf_example.py +++ b/examples/src/main/python/ml/tf_idf_example.py @@ -23,7 +23,10 @@ from pyspark.ml.feature import HashingTF, IDF, Tokenizer from pyspark.sql import SparkSession if __name__ == "__main__": - spark = SparkSession.builder.appName("TfIdfExample").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("TfIdfExample")\ + .getOrCreate() # $example on$ sentenceData = spark.createDataFrame([ diff --git a/examples/src/main/python/ml/tokenizer_example.py b/examples/src/main/python/ml/tokenizer_example.py index 5be4b4cfe3a52ff974adf1b7cd5134cd72083a8a..e61ec920d22810302daa0ca2fd2e4152aae9c7d4 100644 --- a/examples/src/main/python/ml/tokenizer_example.py +++ b/examples/src/main/python/ml/tokenizer_example.py @@ -23,7 +23,10 @@ from pyspark.ml.feature import Tokenizer, RegexTokenizer from pyspark.sql import SparkSession if __name__ == "__main__": - spark = SparkSession.builder.appName("TokenizerExample").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("TokenizerExample")\ + .getOrCreate() # $example on$ sentenceDataFrame = spark.createDataFrame([ diff --git a/examples/src/main/python/ml/train_validation_split.py b/examples/src/main/python/ml/train_validation_split.py index 2e43a0f8aec541e959f779265045fc675e5dcfac..5f5c52aca8c42f64b61d82e04313ea14582491c3 100644 --- a/examples/src/main/python/ml/train_validation_split.py +++ b/examples/src/main/python/ml/train_validation_split.py @@ -31,7 +31,10 @@ Run with: """ if __name__ == "__main__": - spark = SparkSession.builder.appName("TrainValidationSplit").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("TrainValidationSplit")\ + .getOrCreate() # $example on$ # Prepare training and test data. data = spark.read.format("libsvm")\ diff --git a/examples/src/main/python/ml/vector_assembler_example.py b/examples/src/main/python/ml/vector_assembler_example.py index 019a9ea6f7503246a44b92bcb12105073d2969ab..b955ff00a81951e4e676ef962ff3bf6fec968dce 100644 --- a/examples/src/main/python/ml/vector_assembler_example.py +++ b/examples/src/main/python/ml/vector_assembler_example.py @@ -24,7 +24,10 @@ from pyspark.ml.feature import VectorAssembler from pyspark.sql import SparkSession if __name__ == "__main__": - spark = SparkSession.builder.appName("VectorAssemblerExample").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("VectorAssemblerExample")\ + .getOrCreate() # $example on$ dataset = spark.createDataFrame( diff --git a/examples/src/main/python/ml/vector_indexer_example.py b/examples/src/main/python/ml/vector_indexer_example.py index 3cf5b8ebf17ee043379106ab1782227e9160bea3..9b00e0f84136c2e771d92c975cf54653e1cda3db 100644 --- a/examples/src/main/python/ml/vector_indexer_example.py +++ b/examples/src/main/python/ml/vector_indexer_example.py @@ -23,7 +23,10 @@ from pyspark.ml.feature import VectorIndexer from pyspark.sql import SparkSession if __name__ == "__main__": - spark = SparkSession.builder.appName("VectorIndexerExample").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("VectorIndexerExample")\ + .getOrCreate() # $example on$ data = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt") diff --git a/examples/src/main/python/ml/vector_slicer_example.py b/examples/src/main/python/ml/vector_slicer_example.py index 0531bcdb06675462506e22f1076fe8a4a0e8cacc..b833a894eb841011356fcaa0f829aeb188340e09 100644 --- a/examples/src/main/python/ml/vector_slicer_example.py +++ b/examples/src/main/python/ml/vector_slicer_example.py @@ -25,7 +25,10 @@ from pyspark.sql.types import Row from pyspark.sql import SparkSession if __name__ == "__main__": - spark = SparkSession.builder.appName("VectorSlicerExample").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("VectorSlicerExample")\ + .getOrCreate() # $example on$ df = spark.createDataFrame([ diff --git a/examples/src/main/python/ml/word2vec_example.py b/examples/src/main/python/ml/word2vec_example.py index 6766a7b6aa45423fb9bfbdbd2360e0a3ef583dd2..66500bee152f7adaca8e6c0c32fddd044db2d89a 100644 --- a/examples/src/main/python/ml/word2vec_example.py +++ b/examples/src/main/python/ml/word2vec_example.py @@ -23,7 +23,10 @@ from pyspark.ml.feature import Word2Vec from pyspark.sql import SparkSession if __name__ == "__main__": - spark = SparkSession.builder.appName("Word2VecExample").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("Word2VecExample")\ + .getOrCreate() # $example on$ # Input data: Each row is a bag of words from a sentence or document. diff --git a/examples/src/main/python/mllib/binary_classification_metrics_example.py b/examples/src/main/python/mllib/binary_classification_metrics_example.py index 8f0fc9d45da2bebe0ea720e59db542026a4503eb..daf000e38dcd094cb23b0688d7d7dcedf2657213 100644 --- a/examples/src/main/python/mllib/binary_classification_metrics_example.py +++ b/examples/src/main/python/mllib/binary_classification_metrics_example.py @@ -18,20 +18,25 @@ Binary Classification Metrics Example. """ from __future__ import print_function -from pyspark import SparkContext +from pyspark.sql import SparkSession # $example on$ from pyspark.mllib.classification import LogisticRegressionWithLBFGS from pyspark.mllib.evaluation import BinaryClassificationMetrics -from pyspark.mllib.util import MLUtils +from pyspark.mllib.regression import LabeledPoint # $example off$ if __name__ == "__main__": - sc = SparkContext(appName="BinaryClassificationMetricsExample") + spark = SparkSession\ + .builder\ + .appName("BinaryClassificationMetricsExample")\ + .getOrCreate() # $example on$ # Several of the methods available in scala are currently missing from pyspark # Load training data in LIBSVM format - data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_binary_classification_data.txt") + data = spark\ + .read.format("libsvm").load("data/mllib/sample_binary_classification_data.txt")\ + .rdd.map(lambda row: LabeledPoint(row[0], row[1])) # Split data into training (60%) and test (40%) training, test = data.randomSplit([0.6, 0.4], seed=11L) @@ -53,4 +58,4 @@ if __name__ == "__main__": print("Area under ROC = %s" % metrics.areaUnderROC) # $example off$ - sc.stop() + spark.stop() diff --git a/examples/src/main/python/sql.py b/examples/src/main/python/sql.py index 59a46cb283c49ef4004251e5bf8c9be0bcf1bbe4..5594223465f8703c1b802df2129089cfa8a57ddb 100644 --- a/examples/src/main/python/sql.py +++ b/examples/src/main/python/sql.py @@ -25,7 +25,10 @@ from pyspark.sql.types import Row, StructField, StructType, StringType, IntegerT if __name__ == "__main__": - spark = SparkSession.builder.appName("PythonSQL").getOrCreate() + spark = SparkSession\ + .builder\ + .appName("PythonSQL")\ + .getOrCreate() # A list of Rows. Infer schema from the first row, create a DataFrame and print the schema rows = [Row(name="John", age=19), Row(name="Smith", age=23), Row(name="Sarah", age=18)] diff --git a/examples/src/main/python/streaming/sql_network_wordcount.py b/examples/src/main/python/streaming/sql_network_wordcount.py index 588cbfee14baf5b595d1fb7982a9e4e34ae7c925..f8801d4ea63e247beca378f9044379f87908f4d5 100644 --- a/examples/src/main/python/streaming/sql_network_wordcount.py +++ b/examples/src/main/python/streaming/sql_network_wordcount.py @@ -38,8 +38,10 @@ from pyspark.sql import Row, SparkSession def getSparkSessionInstance(sparkConf): if ('sparkSessionSingletonInstance' not in globals()): - globals()['sparkSessionSingletonInstance'] =\ - SparkSession.builder.config(conf=sparkConf).getOrCreate() + globals()['sparkSessionSingletonInstance'] = SparkSession\ + .builder\ + .config(conf=sparkConf)\ + .getOrCreate() return globals()['sparkSessionSingletonInstance'] diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/AFTSurvivalRegressionExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/AFTSurvivalRegressionExample.scala index 3795af83094a6c476fab28f1efbfafd172a96f65..2b224d50a0a3c27ca6c0b93e3a5ef52979a57244 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/AFTSurvivalRegressionExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/AFTSurvivalRegressionExample.scala @@ -30,7 +30,10 @@ import org.apache.spark.sql.SparkSession object AFTSurvivalRegressionExample { def main(args: Array[String]): Unit = { - val spark = SparkSession.builder.appName("AFTSurvivalRegressionExample").getOrCreate() + val spark = SparkSession + .builder + .appName("AFTSurvivalRegressionExample") + .getOrCreate() // $example on$ val training = spark.createDataFrame(Seq( diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala index 41750ca77914655c68452a21290cb0e5031bd9c9..7c1cfe293717aa0d6becacbe8ee201eee1c3a838 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala @@ -42,7 +42,10 @@ object ALSExample { // $example off$ def main(args: Array[String]) { - val spark = SparkSession.builder.appName("ALSExample").getOrCreate() + val spark = SparkSession + .builder + .appName("ALSExample") + .getOrCreate() import spark.implicits._ // $example on$ diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/BinarizerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/BinarizerExample.scala index 93c153f923232424aeb74803e419204ea5b0d316..82bc14789b461c7c361b1dbae7677fc4fcf957bf 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/BinarizerExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/BinarizerExample.scala @@ -25,7 +25,10 @@ import org.apache.spark.sql.{DataFrame, SparkSession} object BinarizerExample { def main(args: Array[String]): Unit = { - val spark = SparkSession.builder.appName("BinarizerExample").getOrCreate() + val spark = SparkSession + .builder + .appName("BinarizerExample") + .getOrCreate() // $example on$ val data = Array((0, 0.1), (1, 0.8), (2, 0.2)) val dataFrame: DataFrame = spark.createDataFrame(data).toDF("label", "feature") diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/BucketizerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/BucketizerExample.scala index 779ad33dbda0a4b12a8b21820269588be33b80a0..38cce34bb5091aafc8a96f425ed7d61763f31e66 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/BucketizerExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/BucketizerExample.scala @@ -25,7 +25,10 @@ import org.apache.spark.sql.SparkSession object BucketizerExample { def main(args: Array[String]): Unit = { - val spark = SparkSession.builder.appName("BucketizerExample").getOrCreate() + val spark = SparkSession + .builder + .appName("BucketizerExample") + .getOrCreate() // $example on$ val splits = Array(Double.NegativeInfinity, -0.5, 0.0, 0.5, Double.PositiveInfinity) diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/ChiSqSelectorExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/ChiSqSelectorExample.scala index 84ca1f0b564d24dbe6914c05694c2f98dff5807d..80f50cd3556af40ca1d76f1169aed889e1d36441 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/ChiSqSelectorExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/ChiSqSelectorExample.scala @@ -26,7 +26,10 @@ import org.apache.spark.sql.SparkSession object ChiSqSelectorExample { def main(args: Array[String]) { - val spark = SparkSession.builder.appName("ChiSqSelectorExample").getOrCreate() + val spark = SparkSession + .builder + .appName("ChiSqSelectorExample") + .getOrCreate() import spark.implicits._ // $example on$ diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/CountVectorizerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/CountVectorizerExample.scala index 9ab43a48bff8e98e368b293749f073e9855dae9d..51aa5179fa4a86318e50940581da882af4a2d0dd 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/CountVectorizerExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/CountVectorizerExample.scala @@ -25,7 +25,10 @@ import org.apache.spark.sql.SparkSession object CountVectorizerExample { def main(args: Array[String]) { - val spark = SparkSession.builder.appName("CounterVectorizerExample").getOrCreate() + val spark = SparkSession + .builder + .appName("CounterVectorizerExample") + .getOrCreate() // $example on$ val df = spark.createDataFrame(Seq( diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DCTExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DCTExample.scala index b415333c710c76fd2b470e6ea8d853c378c3b684..5a888b15eb1c9f91fbab6c173186bd058cab9120 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/DCTExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/DCTExample.scala @@ -26,7 +26,10 @@ import org.apache.spark.sql.SparkSession object DCTExample { def main(args: Array[String]): Unit = { - val spark = SparkSession.builder.appName("DCTExample").getOrCreate() + val spark = SparkSession + .builder + .appName("DCTExample") + .getOrCreate() // $example on$ val data = Seq( diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala index 2f892f8d72c1a632d92d9249fe1d2c03095d9922..6cb81cde6f79549453b7d7d88493fcfd62f7e27f 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala @@ -61,7 +61,10 @@ object DataFrameExample { } def run(params: Params) { - val spark = SparkSession.builder.appName(s"DataFrameExample with $params").getOrCreate() + val spark = SparkSession + .builder + .appName(s"DataFrameExample with $params") + .getOrCreate() // Load input data println(s"Loading LIBSVM file with UDT from ${params.input}.") diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeClassificationExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeClassificationExample.scala index a0a2e1fb33dce48b7059af9306592a0d44275a26..7f6c8de9679f943cea4ce00b4929b2877d82c556 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeClassificationExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeClassificationExample.scala @@ -29,7 +29,10 @@ import org.apache.spark.sql.SparkSession object DecisionTreeClassificationExample { def main(args: Array[String]): Unit = { - val spark = SparkSession.builder.appName("DecisionTreeClassificationExample").getOrCreate() + val spark = SparkSession + .builder + .appName("DecisionTreeClassificationExample") + .getOrCreate() // $example on$ // Load the data stored in LIBSVM format as a DataFrame. val data = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt") diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala index cea1d801aa73af8657a55501ba93fed9d681ada6..eadb02ab0d7f721be8fd1e6146d481b4a94dd101 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala @@ -167,7 +167,9 @@ object DecisionTreeExample { testInput: String, algo: String, fracTest: Double): (DataFrame, DataFrame) = { - val spark = SparkSession.builder.getOrCreate() + val spark = SparkSession + .builder + .getOrCreate() // Load training data val origExamples: DataFrame = loadData(spark, input, dataFormat) diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeRegressionExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeRegressionExample.scala index 26b52d0489e1c8020c2d5525d3029d206d61d30b..799070ef47da607c87e2a79acdb453d0820bac5f 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeRegressionExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeRegressionExample.scala @@ -29,7 +29,10 @@ import org.apache.spark.sql.SparkSession object DecisionTreeRegressionExample { def main(args: Array[String]): Unit = { - val spark = SparkSession.builder.appName("DecisionTreeRegressionExample").getOrCreate() + val spark = SparkSession + .builder + .appName("DecisionTreeRegressionExample") + .getOrCreate() // $example on$ // Load the data stored in LIBSVM format as a DataFrame. diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala index 2aa1ab1ec855f0ff1f2c2efa4b9f5b23254e8b5f..a522d2127edcab403f7be2659155958aaa0e79b4 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala @@ -37,7 +37,10 @@ import org.apache.spark.sql.{Dataset, Row, SparkSession} object DeveloperApiExample { def main(args: Array[String]) { - val spark = SparkSession.builder.appName("DeveloperApiExample").getOrCreate() + val spark = SparkSession + .builder + .appName("DeveloperApiExample") + .getOrCreate() import spark.implicits._ // Prepare training data. diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/ElementwiseProductExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/ElementwiseProductExample.scala index f289c28df9b535509a14b630976f9c4b24dabc63..b99b76e58cef73c223739c977bc2ef172e110169 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/ElementwiseProductExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/ElementwiseProductExample.scala @@ -26,7 +26,10 @@ import org.apache.spark.sql.SparkSession object ElementwiseProductExample { def main(args: Array[String]): Unit = { - val spark = SparkSession.builder.appName("ElementwiseProductExample").getOrCreate() + val spark = SparkSession + .builder + .appName("ElementwiseProductExample") + .getOrCreate() // $example on$ // Create some vector data; also works for sparse vectors diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/EstimatorTransformerParamExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/EstimatorTransformerParamExample.scala index 91076ccbc14d443aca87571939deedea58d82683..972241e76922e865aa9a87fb24f790edf35a787e 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/EstimatorTransformerParamExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/EstimatorTransformerParamExample.scala @@ -29,7 +29,10 @@ import org.apache.spark.sql.SparkSession object EstimatorTransformerParamExample { def main(args: Array[String]): Unit = { - val spark = SparkSession.builder.appName("EstimatorTransformerParamExample").getOrCreate() + val spark = SparkSession + .builder + .appName("EstimatorTransformerParamExample") + .getOrCreate() // $example on$ // Prepare training data from a list of (label, features) tuples. diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/GradientBoostedTreeClassifierExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/GradientBoostedTreeClassifierExample.scala index 412c54db7d5c105750309b30c38e9dd7f874cef2..b6a8baba2d95f5d570226555a80d178da82e5789 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/GradientBoostedTreeClassifierExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/GradientBoostedTreeClassifierExample.scala @@ -28,7 +28,10 @@ import org.apache.spark.sql.SparkSession object GradientBoostedTreeClassifierExample { def main(args: Array[String]): Unit = { - val spark = SparkSession.builder.appName("GradientBoostedTreeClassifierExample").getOrCreate() + val spark = SparkSession + .builder + .appName("GradientBoostedTreeClassifierExample") + .getOrCreate() // $example on$ // Load and parse the data file, converting it to a DataFrame. diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/GradientBoostedTreeRegressorExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/GradientBoostedTreeRegressorExample.scala index fd43553cc69dfa862e01e1c5017af96fe994412c..62285b83cbb9dfe01c2a023b4abab3b32b5c1d37 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/GradientBoostedTreeRegressorExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/GradientBoostedTreeRegressorExample.scala @@ -28,7 +28,10 @@ import org.apache.spark.sql.SparkSession object GradientBoostedTreeRegressorExample { def main(args: Array[String]): Unit = { - val spark = SparkSession.builder.appName("GradientBoostedTreeRegressorExample").getOrCreate() + val spark = SparkSession + .builder + .appName("GradientBoostedTreeRegressorExample") + .getOrCreate() // $example on$ // Load and parse the data file, converting it to a DataFrame. diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/IndexToStringExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/IndexToStringExample.scala index d873618726c6ed44ab716f0b8985ca3ca655837d..950733831c3d53020619365be9f28c89f3ee30e5 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/IndexToStringExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/IndexToStringExample.scala @@ -25,7 +25,10 @@ import org.apache.spark.sql.SparkSession object IndexToStringExample { def main(args: Array[String]) { - val spark = SparkSession.builder.appName("IndexToStringExample").getOrCreate() + val spark = SparkSession + .builder + .appName("IndexToStringExample") + .getOrCreate() // $example on$ val df = spark.createDataFrame(Seq( diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/KMeansExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/KMeansExample.scala index d2573fad3596f221036f3b19f251519b909f82d4..2abd588c6f0e41dab3115357e420a74b9fe6ce9d 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/KMeansExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/KMeansExample.scala @@ -36,7 +36,10 @@ object KMeansExample { def main(args: Array[String]): Unit = { // Creates a Spark context and a SQL context - val spark = SparkSession.builder.appName(s"${this.getClass.getSimpleName}").getOrCreate() + val spark = SparkSession + .builder + .appName(s"${this.getClass.getSimpleName}") + .getOrCreate() // $example on$ // Crates a DataFrame diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/LDAExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/LDAExample.scala index c23adee1a3ead959b002f9436679b1f129164b87..c2920f6a5d4a1515f43ef1dc50e272c11561416b 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/LDAExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/LDAExample.scala @@ -40,7 +40,10 @@ object LDAExample { val input = "data/mllib/sample_lda_data.txt" // Creates a Spark context and a SQL context - val spark = SparkSession.builder.appName(s"${this.getClass.getSimpleName}").getOrCreate() + val spark = SparkSession + .builder + .appName(s"${this.getClass.getSimpleName}") + .getOrCreate() // $example on$ // Loads data diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/LinearRegressionWithElasticNetExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/LinearRegressionWithElasticNetExample.scala index cb6e2492f513f41ac05b138f3002dd332482e303..94cf2866238b9606d17ccfd9bac8e9e761987258 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/LinearRegressionWithElasticNetExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/LinearRegressionWithElasticNetExample.scala @@ -26,7 +26,10 @@ import org.apache.spark.sql.SparkSession object LinearRegressionWithElasticNetExample { def main(args: Array[String]): Unit = { - val spark = SparkSession.builder.appName("LinearRegressionWithElasticNetExample").getOrCreate() + val spark = SparkSession + .builder + .appName("LinearRegressionWithElasticNetExample") + .getOrCreate() // $example on$ // Load training data diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionSummaryExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionSummaryExample.scala index 50670d7b384168edae0f812f65fed6f550a57ce1..cd8775c94216244d447b4d679805f7fb9dd0c654 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionSummaryExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionSummaryExample.scala @@ -27,7 +27,10 @@ import org.apache.spark.sql.functions.max object LogisticRegressionSummaryExample { def main(args: Array[String]): Unit = { - val spark = SparkSession.builder.appName("LogisticRegressionSummaryExample").getOrCreate() + val spark = SparkSession + .builder + .appName("LogisticRegressionSummaryExample") + .getOrCreate() import spark.implicits._ // Load training data diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/MaxAbsScalerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/MaxAbsScalerExample.scala index 896d8fadbe67c1e32a028f20a3a475da64c36d8d..572adce6570817a6e5ed7195021494e3fdba8b78 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/MaxAbsScalerExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/MaxAbsScalerExample.scala @@ -24,7 +24,10 @@ import org.apache.spark.sql.SparkSession object MaxAbsScalerExample { def main(args: Array[String]): Unit = { - val spark = SparkSession.builder.appName("MaxAbsScalerExample").getOrCreate() + val spark = SparkSession + .builder + .appName("MaxAbsScalerExample") + .getOrCreate() // $example on$ val dataFrame = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt") diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/MinMaxScalerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/MinMaxScalerExample.scala index bcdca0fa0440d455e97aa26a7bb8a68da7f88bde..d728019a621d4d4bef839e524ad0d0e392607723 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/MinMaxScalerExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/MinMaxScalerExample.scala @@ -25,7 +25,10 @@ import org.apache.spark.sql.SparkSession object MinMaxScalerExample { def main(args: Array[String]): Unit = { - val spark = SparkSession.builder.appName("MinMaxScalerExample").getOrCreate() + val spark = SparkSession + .builder + .appName("MinMaxScalerExample") + .getOrCreate() // $example on$ val dataFrame = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt") diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/MultilayerPerceptronClassifierExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/MultilayerPerceptronClassifierExample.scala index a11fe1b4b2627bbc3127ba0f22dc3d46dc31758b..0e780fb7d342ed0e0096758002b092bc20643723 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/MultilayerPerceptronClassifierExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/MultilayerPerceptronClassifierExample.scala @@ -30,7 +30,10 @@ import org.apache.spark.sql.SparkSession object MultilayerPerceptronClassifierExample { def main(args: Array[String]): Unit = { - val spark = SparkSession.builder.appName("MultilayerPerceptronClassifierExample").getOrCreate() + val spark = SparkSession + .builder + .appName("MultilayerPerceptronClassifierExample") + .getOrCreate() // $example on$ // Load the data stored in LIBSVM format as a DataFrame. diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/NGramExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/NGramExample.scala index 1b71a398905e0fa000bc12a2608fb2cd36b65a84..e0b52e7a367fccc6abcfd1ea8a36f9face1f7bbd 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/NGramExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/NGramExample.scala @@ -25,7 +25,10 @@ import org.apache.spark.sql.SparkSession object NGramExample { def main(args: Array[String]): Unit = { - val spark = SparkSession.builder.appName("NGramExample").getOrCreate() + val spark = SparkSession + .builder + .appName("NGramExample") + .getOrCreate() // $example on$ val wordDataFrame = spark.createDataFrame(Seq( diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/NaiveBayesExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/NaiveBayesExample.scala index 8d54555cd3c65f2ed872a5d8a8c9c24af13b705b..90cdebfcb08de7ec8a8d4da2f299e27a5139b7f6 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/NaiveBayesExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/NaiveBayesExample.scala @@ -26,7 +26,10 @@ import org.apache.spark.sql.SparkSession object NaiveBayesExample { def main(args: Array[String]): Unit = { - val spark = SparkSession.builder.appName("NaiveBayesExample").getOrCreate() + val spark = SparkSession + .builder + .appName("NaiveBayesExample") + .getOrCreate() // $example on$ // Load the data stored in LIBSVM format as a DataFrame. val data = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt") diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/NormalizerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/NormalizerExample.scala index 4622d69ef98cc59db77ab9da9db3abe1e50875a3..75ba33a7e7fc1529902e790db825e0cd729ea111 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/NormalizerExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/NormalizerExample.scala @@ -25,7 +25,10 @@ import org.apache.spark.sql.SparkSession object NormalizerExample { def main(args: Array[String]): Unit = { - val spark = SparkSession.builder.appName("NormalizerExample").getOrCreate() + val spark = SparkSession + .builder + .appName("NormalizerExample") + .getOrCreate() // $example on$ val dataFrame = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt") diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/OneHotEncoderExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/OneHotEncoderExample.scala index 338436100c5ce6f6c63729a39094ce7637933229..4aa649b1332c6c6a6838da619f8101a2a1353e41 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/OneHotEncoderExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/OneHotEncoderExample.scala @@ -25,7 +25,10 @@ import org.apache.spark.sql.SparkSession object OneHotEncoderExample { def main(args: Array[String]): Unit = { - val spark = SparkSession.builder.appName("OneHotEncoderExample").getOrCreate() + val spark = SparkSession + .builder + .appName("OneHotEncoderExample") + .getOrCreate() // $example on$ val df = spark.createDataFrame(Seq( diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/OneVsRestExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/OneVsRestExample.scala index e2351c682d753568a19bb675f99da1249b350cc1..fc73ae07ff6c6fd829491ca2a4ddb13d47fcb635 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/OneVsRestExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/OneVsRestExample.scala @@ -109,7 +109,10 @@ object OneVsRestExample { } private def run(params: Params) { - val spark = SparkSession.builder.appName(s"OneVsRestExample with $params").getOrCreate() + val spark = SparkSession + .builder + .appName(s"OneVsRestExample with $params") + .getOrCreate() // $example on$ val inputData = spark.read.format("libsvm").load(params.input) diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/PCAExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/PCAExample.scala index 14394d562468b7e9f790861fe23d25684f8c2fae..7927323b4285233c2c34d2ae3fbbd3b1b6630cb6 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/PCAExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/PCAExample.scala @@ -26,7 +26,10 @@ import org.apache.spark.sql.SparkSession object PCAExample { def main(args: Array[String]): Unit = { - val spark = SparkSession.builder.appName("PCAExample").getOrCreate() + val spark = SparkSession + .builder + .appName("PCAExample") + .getOrCreate() // $example on$ val data = Array( diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/PipelineExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/PipelineExample.scala index 61b34aebd9c32ab64997297880ed4e0e6670c7e6..e5e916ac166fb0ba7fd38c0d8c0ae4d4154a7c07 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/PipelineExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/PipelineExample.scala @@ -30,7 +30,10 @@ import org.apache.spark.sql.SparkSession object PipelineExample { def main(args: Array[String]): Unit = { - val spark = SparkSession.builder.appName("PipelineExample").getOrCreate() + val spark = SparkSession + .builder + .appName("PipelineExample") + .getOrCreate() // $example on$ // Prepare training documents from a list of (id, text, label) tuples. diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/PolynomialExpansionExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/PolynomialExpansionExample.scala index 4d8c672a55c34ea1eeb88f0061833eb1671c3d5c..94b17a3cd7064fae92e22b5e66c669a6d35d93e0 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/PolynomialExpansionExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/PolynomialExpansionExample.scala @@ -26,7 +26,10 @@ import org.apache.spark.sql.SparkSession object PolynomialExpansionExample { def main(args: Array[String]): Unit = { - val spark = SparkSession.builder.appName("PolynomialExpansionExample").getOrCreate() + val spark = SparkSession + .builder + .appName("PolynomialExpansionExample") + .getOrCreate() // $example on$ val data = Array( diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/QuantileDiscretizerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/QuantileDiscretizerExample.scala index 0839c609f1d95fbf2918738905d6f204eba2851d..1a165155941612d0e58fe2ae69a5121d3af49951 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/QuantileDiscretizerExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/QuantileDiscretizerExample.scala @@ -24,7 +24,10 @@ import org.apache.spark.sql.SparkSession object QuantileDiscretizerExample { def main(args: Array[String]) { - val spark = SparkSession.builder.appName("QuantileDiscretizerExample").getOrCreate() + val spark = SparkSession + .builder + .appName("QuantileDiscretizerExample") + .getOrCreate() import spark.implicits._ // $example on$ diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/RFormulaExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/RFormulaExample.scala index 699b621db90af9b123c0bed77b20f8c7612d14fb..9ea4920146448140945f2c0bdee622839fd135a9 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/RFormulaExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/RFormulaExample.scala @@ -25,7 +25,10 @@ import org.apache.spark.sql.SparkSession object RFormulaExample { def main(args: Array[String]): Unit = { - val spark = SparkSession.builder.appName("RFormulaExample").getOrCreate() + val spark = SparkSession + .builder + .appName("RFormulaExample") + .getOrCreate() // $example on$ val dataset = spark.createDataFrame(Seq( diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestClassifierExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestClassifierExample.scala index 4192a9c737045d8965495f2503ba88de11e10e89..ae0bd945d8fed2d61e16e9602c9cbb6bc777ed2c 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestClassifierExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestClassifierExample.scala @@ -28,7 +28,10 @@ import org.apache.spark.sql.SparkSession object RandomForestClassifierExample { def main(args: Array[String]): Unit = { - val spark = SparkSession.builder.appName("RandomForestClassifierExample").getOrCreate() + val spark = SparkSession + .builder + .appName("RandomForestClassifierExample") + .getOrCreate() // $example on$ // Load and parse the data file, converting it to a DataFrame. diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestRegressorExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestRegressorExample.scala index 5632f0419a80c49ba602ee60636d478b81d71325..96dc2f05be974c8caf8b4f19be5c89fd1c5b84f5 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestRegressorExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestRegressorExample.scala @@ -28,7 +28,10 @@ import org.apache.spark.sql.SparkSession object RandomForestRegressorExample { def main(args: Array[String]): Unit = { - val spark = SparkSession.builder.appName("RandomForestRegressorExample").getOrCreate() + val spark = SparkSession + .builder + .appName("RandomForestRegressorExample") + .getOrCreate() // $example on$ // Load and parse the data file, converting it to a DataFrame. diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/SQLTransformerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/SQLTransformerExample.scala index f03b29ba327c62f2760144b94a5f44a0900d17cb..bb4587b82cb37cffdd30bb56ad63d6ff14ac0737 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/SQLTransformerExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/SQLTransformerExample.scala @@ -25,7 +25,10 @@ import org.apache.spark.sql.SparkSession object SQLTransformerExample { def main(args: Array[String]) { - val spark = SparkSession.builder.appName("SQLTransformerExample").getOrCreate() + val spark = SparkSession + .builder + .appName("SQLTransformerExample") + .getOrCreate() // $example on$ val df = spark.createDataFrame( diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/SimpleParamsExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/SimpleParamsExample.scala index dff771950715aa1a2edb71a9ba21c8103c50833d..3547dd95bdcedd727a6c2e59d94b284c93ae710f 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/SimpleParamsExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/SimpleParamsExample.scala @@ -34,7 +34,10 @@ import org.apache.spark.sql.{Row, SparkSession} object SimpleParamsExample { def main(args: Array[String]) { - val spark = SparkSession.builder.appName("SimpleParamsExample").getOrCreate() + val spark = SparkSession + .builder + .appName("SimpleParamsExample") + .getOrCreate() import spark.implicits._ // Prepare training data. diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/SimpleTextClassificationPipeline.scala b/examples/src/main/scala/org/apache/spark/examples/ml/SimpleTextClassificationPipeline.scala index 05199007f015a7340be844b5cade95d8a558ccb4..c78ff2378bc1f4dd21b76309171762186272eae4 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/SimpleTextClassificationPipeline.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/SimpleTextClassificationPipeline.scala @@ -42,7 +42,10 @@ case class Document(id: Long, text: String) object SimpleTextClassificationPipeline { def main(args: Array[String]) { - val spark = SparkSession.builder.appName("SimpleTextClassificationPipeline").getOrCreate() + val spark = SparkSession + .builder + .appName("SimpleTextClassificationPipeline") + .getOrCreate() import spark.implicits._ // Prepare training documents, which are labeled. diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/StandardScalerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/StandardScalerExample.scala index 55f777c6e228ae87b40582e65ca0e55638f0c377..4d668e8ab9670f6c737eb16b44cabda2890d40a9 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/StandardScalerExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/StandardScalerExample.scala @@ -25,7 +25,10 @@ import org.apache.spark.sql.SparkSession object StandardScalerExample { def main(args: Array[String]): Unit = { - val spark = SparkSession.builder.appName("StandardScalerExample").getOrCreate() + val spark = SparkSession + .builder + .appName("StandardScalerExample") + .getOrCreate() // $example on$ val dataFrame = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt") diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/StopWordsRemoverExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/StopWordsRemoverExample.scala index 85e79c8cb38aef8e7bd7e1db10cb4a7a8c586a2a..fb1a43e962cd513821d4807c3aa5186c1436493f 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/StopWordsRemoverExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/StopWordsRemoverExample.scala @@ -25,7 +25,10 @@ import org.apache.spark.sql.SparkSession object StopWordsRemoverExample { def main(args: Array[String]): Unit = { - val spark = SparkSession.builder.appName("StopWordsRemoverExample").getOrCreate() + val spark = SparkSession + .builder + .appName("StopWordsRemoverExample") + .getOrCreate() // $example on$ val remover = new StopWordsRemover() diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/StringIndexerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/StringIndexerExample.scala index e01a768da988d5e33a394cb51c14d0cdf772f64d..63f273e87a209412c26564a6361fe8344394b38f 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/StringIndexerExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/StringIndexerExample.scala @@ -25,7 +25,10 @@ import org.apache.spark.sql.SparkSession object StringIndexerExample { def main(args: Array[String]): Unit = { - val spark = SparkSession.builder.appName("StringIndexerExample").getOrCreate() + val spark = SparkSession + .builder + .appName("StringIndexerExample") + .getOrCreate() // $example on$ val df = spark.createDataFrame( diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/TfIdfExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/TfIdfExample.scala index 910ef62a2670a5218ac7a560b276d92175b7532f..33b5daec597838341f4952492d15af4f320ec235 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/TfIdfExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/TfIdfExample.scala @@ -26,7 +26,10 @@ import org.apache.spark.sql.SparkSession object TfIdfExample { def main(args: Array[String]) { - val spark = SparkSession.builder.appName("TfIdfExample").getOrCreate() + val spark = SparkSession + .builder + .appName("TfIdfExample") + .getOrCreate() // $example on$ val sentenceData = spark.createDataFrame(Seq( diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/TokenizerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/TokenizerExample.scala index 4f0c47b3c84c7c7c8db52c4b7d3d3148923d3298..1c70dc700b91c5bd4475e259902ecc431b45644e 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/TokenizerExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/TokenizerExample.scala @@ -25,7 +25,10 @@ import org.apache.spark.sql.SparkSession object TokenizerExample { def main(args: Array[String]): Unit = { - val spark = SparkSession.builder.appName("TokenizerExample").getOrCreate() + val spark = SparkSession + .builder + .appName("TokenizerExample") + .getOrCreate() // $example on$ val sentenceDataFrame = spark.createDataFrame(Seq( diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/VectorAssemblerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/VectorAssemblerExample.scala index 56b7263b192e082cf2775b4a5475354da5fb2bb8..8e382ccc4844ae82ef1eaf4c29b042c3d2654d4e 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/VectorAssemblerExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/VectorAssemblerExample.scala @@ -26,7 +26,10 @@ import org.apache.spark.sql.SparkSession object VectorAssemblerExample { def main(args: Array[String]): Unit = { - val spark = SparkSession.builder.appName("VectorAssemblerExample").getOrCreate() + val spark = SparkSession + .builder + .appName("VectorAssemblerExample") + .getOrCreate() // $example on$ val dataset = spark.createDataFrame( diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/VectorIndexerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/VectorIndexerExample.scala index 214ad91634ee33d6e88fa8a0f2d0e1795413173b..afa761aee0b98caf138a4cf0e678f867a6300a9c 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/VectorIndexerExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/VectorIndexerExample.scala @@ -25,7 +25,10 @@ import org.apache.spark.sql.SparkSession object VectorIndexerExample { def main(args: Array[String]): Unit = { - val spark = SparkSession.builder.appName("VectorIndexerExample").getOrCreate() + val spark = SparkSession + .builder + .appName("VectorIndexerExample") + .getOrCreate() // $example on$ val data = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt") diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/VectorSlicerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/VectorSlicerExample.scala index 716bf023a8087bc7858337c9329ac22f18f8d4d6..b1a3997f48c91aec0d70280e2d23c8f17b0f967a 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/VectorSlicerExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/VectorSlicerExample.scala @@ -31,7 +31,10 @@ import org.apache.spark.sql.SparkSession object VectorSlicerExample { def main(args: Array[String]): Unit = { - val spark = SparkSession.builder.appName("VectorSlicerExample").getOrCreate() + val spark = SparkSession + .builder + .appName("VectorSlicerExample") + .getOrCreate() // $example on$ val data = Arrays.asList(Row(Vectors.dense(-2.0, 2.3, 0.0))) diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/Word2VecExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/Word2VecExample.scala index 292b6d9f7776333a9fdbb78b61971017e9769fa3..9ac5623607296ca9223a0415bb9c5dfeeb3ef733 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/Word2VecExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/Word2VecExample.scala @@ -25,7 +25,10 @@ import org.apache.spark.sql.SparkSession object Word2VecExample { def main(args: Array[String]) { - val spark = SparkSession.builder.appName("Word2Vec example").getOrCreate() + val spark = SparkSession + .builder + .appName("Word2Vec example") + .getOrCreate() // $example on$ // Input data: Each row is a bag of words from a sentence or document. diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala index c2bf1548b56850012478f906d2f3a3ddc8c0c46a..7651aade493a028bb473dbadd3c7f4b82fba0adb 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala @@ -189,7 +189,9 @@ object LDAExample { vocabSize: Int, stopwordFile: String): (RDD[(Long, Vector)], Array[String], Long) = { - val spark = SparkSession.builder.getOrCreate() + val spark = SparkSession + .builder + .getOrCreate() import spark.implicits._ // Get dataset of document texts diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala index cd4f0bb0de601d2ad0f0bbb1529c5ee79fa8ec55..781a934df66372ab2640e352807bbd243cf1977c 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala @@ -26,7 +26,10 @@ import org.apache.spark.sql.SparkSession object RankingMetricsExample { def main(args: Array[String]) { - val spark = SparkSession.builder.appName("RankingMetricsExample").getOrCreate() + val spark = SparkSession + .builder + .appName("RankingMetricsExample") + .getOrCreate() import spark.implicits._ // $example on$ // Read in the ratings data diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/RegressionMetricsExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/RegressionMetricsExample.scala index 22c47a694d3467b48f81459a7706d2aada07616f..abeaaa00b5a4fe8dd154b2f1e7f071cf0e50ea8c 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/RegressionMetricsExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/RegressionMetricsExample.scala @@ -27,7 +27,10 @@ import org.apache.spark.sql.SparkSession object RegressionMetricsExample { def main(args: Array[String]): Unit = { - val spark = SparkSession.builder.appName("RegressionMetricsExample").getOrCreate() + val spark = SparkSession + .builder + .appName("RegressionMetricsExample") + .getOrCreate() // $example on$ // Load the data val data = spark diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala b/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala index b4118b16e29187ad985ae36385dca9b5bbd8baff..94c378ae4b9116271d12277da930b3ab43a6b7d1 100644 --- a/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala +++ b/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala @@ -26,7 +26,10 @@ case class Record(key: Int, value: String) object RDDRelation { def main(args: Array[String]) { - val spark = SparkSession.builder.appName("RDDRelation").getOrCreate() + val spark = SparkSession + .builder + .appName("RDDRelation") + .getOrCreate() // Importing the SparkSession gives access to all the SQL functions and implicit conversions. import spark.implicits._ diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/SqlNetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/SqlNetworkWordCount.scala index 2f0fe704f709b7eb7fe4e43328f0073560b539b1..9aba4a05a89adcd345a944089a81568786d9ad56 100644 --- a/examples/src/main/scala/org/apache/spark/examples/streaming/SqlNetworkWordCount.scala +++ b/examples/src/main/scala/org/apache/spark/examples/streaming/SqlNetworkWordCount.scala @@ -93,7 +93,10 @@ object SparkSessionSingleton { def getInstance(sparkConf: SparkConf): SparkSession = { if (instance == null) { - instance = SparkSession.builder.config(sparkConf).getOrCreate() + instance = SparkSession + .builder + .config(sparkConf) + .getOrCreate() } instance } diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedPlainValuesReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedPlainValuesReader.java index 9475c853a03ffeee2e51355cd0e18292f43a595e..9def4559d214edd889fde9fa8e2f9f3e15fafb30 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedPlainValuesReader.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedPlainValuesReader.java @@ -34,8 +34,9 @@ public class VectorizedPlainValuesReader extends ValuesReader implements Vectori private int offset; private int bitOffset; // Only used for booleans. private ByteBuffer byteBuffer; // used to wrap the byte array buffer - - private final static boolean bigEndianPlatform = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN); + + private static final boolean bigEndianPlatform = + ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN); public VectorizedPlainValuesReader() { } diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java index b8dd16227ec179439bb8e655e514ce4748bd5fcd..70b4a683311ca08bb3092448c1524eae3413fe3b 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java @@ -19,8 +19,6 @@ package org.apache.spark.sql.execution.vectorized; import java.nio.ByteBuffer; import java.nio.ByteOrder; -import org.apache.commons.lang.NotImplementedException; - import org.apache.spark.memory.MemoryMode; import org.apache.spark.sql.types.*; import org.apache.spark.unsafe.Platform; @@ -29,9 +27,10 @@ import org.apache.spark.unsafe.Platform; * Column data backed using offheap memory. */ public final class OffHeapColumnVector extends ColumnVector { - - private final static boolean bigEndianPlatform = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN); - + + private static final boolean bigEndianPlatform = + ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN); + // The data stored in these two allocations need to maintain binary compatible. We can // directly pass this buffer to external components. private long nulls; @@ -230,7 +229,8 @@ public final class OffHeapColumnVector extends ColumnVector { int srcOffset = srcIndex + Platform.BYTE_ARRAY_OFFSET; long offset = data + 4 * rowId; for (int i = 0; i < count; ++i, offset += 4, srcOffset += 4) { - Platform.putInt(null, offset, java.lang.Integer.reverseBytes(Platform.getInt(src, srcOffset))); + Platform.putInt(null, offset, + java.lang.Integer.reverseBytes(Platform.getInt(src, srcOffset))); } } } @@ -276,7 +276,8 @@ public final class OffHeapColumnVector extends ColumnVector { int srcOffset = srcIndex + Platform.BYTE_ARRAY_OFFSET; long offset = data + 8 * rowId; for (int i = 0; i < count; ++i, offset += 8, srcOffset += 8) { - Platform.putLong(null, offset, java.lang.Long.reverseBytes(Platform.getLong(src, srcOffset))); + Platform.putLong(null, offset, + java.lang.Long.reverseBytes(Platform.getLong(src, srcOffset))); } } } diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java index b1ffe4c21049babc1edd5558d7000b79fca023a4..7fb7617050f2d1413df1df0a530d0f13496af2b6 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java @@ -29,9 +29,10 @@ import org.apache.spark.unsafe.Platform; * and a java array for the values. */ public final class OnHeapColumnVector extends ColumnVector { - - private final static boolean bigEndianPlatform = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN); - + + private static final boolean bigEndianPlatform = + ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN); + // The data stored in these arrays need to maintain binary compatible. We can // directly pass this buffer to external components. diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/SessionManager.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/SessionManager.java index 0457b3781ebf074daaf4cd6af0d9b5dc2ae9bfb3..de066dd406c7a0cc4199af22229fbe5b36f2f8b1 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/SessionManager.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/SessionManager.java @@ -22,7 +22,6 @@ import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Date; -import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Future; @@ -35,7 +34,6 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.ql.hooks.HookUtils; import org.apache.hive.service.CompositeService; import org.apache.hive.service.cli.HiveSQLException; import org.apache.hive.service.cli.SessionHandle;