diff --git a/examples/src/main/python/ml/cross_validator.py b/examples/src/main/python/ml/cross_validator.py index 907eec67a0eb5843d7897d5314f9a918419a0f87..db7054307c2e351248f9d8bf6a1f11400774d8cb 100644 --- a/examples/src/main/python/ml/cross_validator.py +++ b/examples/src/main/python/ml/cross_validator.py @@ -84,10 +84,10 @@ if __name__ == "__main__": # Prepare test documents, which are unlabeled. test = spark.createDataFrame([ - (4L, "spark i j k"), - (5L, "l m n"), - (6L, "mapreduce spark"), - (7L, "apache hadoop") + (4, "spark i j k"), + (5, "l m n"), + (6, "mapreduce spark"), + (7, "apache hadoop") ], ["id", "text"]) # Make predictions on test documents. cvModel uses the best model found (lrModel). diff --git a/examples/src/main/python/ml/gaussian_mixture_example.py b/examples/src/main/python/ml/gaussian_mixture_example.py index 8ad450b669fc91e285c5ca861d2eccd3d85acf25..e4a0d314e9d91a2debaa35b9a0402b12ab79acdd 100644 --- a/examples/src/main/python/ml/gaussian_mixture_example.py +++ b/examples/src/main/python/ml/gaussian_mixture_example.py @@ -38,7 +38,7 @@ if __name__ == "__main__": # loads data dataset = spark.read.format("libsvm").load("data/mllib/sample_kmeans_data.txt") - gmm = GaussianMixture().setK(2).setSeed(538009335L) + gmm = GaussianMixture().setK(2).setSeed(538009335) model = gmm.fit(dataset) print("Gaussians shown as a DataFrame: ") diff --git a/examples/src/main/python/ml/pipeline_example.py b/examples/src/main/python/ml/pipeline_example.py index f63e4db43422243ff4678d76af74cd3b80e4ad33..e1fab7cbe6d801dc02901557b9b3d44b731f2faa 100644 --- a/examples/src/main/python/ml/pipeline_example.py +++ b/examples/src/main/python/ml/pipeline_example.py @@ -35,10 +35,10 @@ if __name__ == "__main__": # $example on$ # Prepare training documents from a list of (id, text, label) tuples. training = spark.createDataFrame([ - (0L, "a b c d e spark", 1.0), - (1L, "b d", 0.0), - (2L, "spark f g h", 1.0), - (3L, "hadoop mapreduce", 0.0) + (0, "a b c d e spark", 1.0), + (1, "b d", 0.0), + (2, "spark f g h", 1.0), + (3, "hadoop mapreduce", 0.0) ], ["id", "text", "label"]) # Configure an ML pipeline, which consists of three stages: tokenizer, hashingTF, and lr. @@ -52,10 +52,10 @@ if __name__ == "__main__": # Prepare test documents, which are unlabeled (id, text) tuples. test = spark.createDataFrame([ - (4L, "spark i j k"), - (5L, "l m n"), - (6L, "spark hadoop spark"), - (7L, "apache hadoop") + (4, "spark i j k"), + (5, "l m n"), + (6, "spark hadoop spark"), + (7, "apache hadoop") ], ["id", "text"]) # Make predictions on test documents and print columns of interest. diff --git a/examples/src/main/python/mllib/binary_classification_metrics_example.py b/examples/src/main/python/mllib/binary_classification_metrics_example.py index daf000e38dcd094cb23b0688d7d7dcedf2657213..91f8378f29c0c5d5d4f839759cd8421419af54b3 100644 --- a/examples/src/main/python/mllib/binary_classification_metrics_example.py +++ b/examples/src/main/python/mllib/binary_classification_metrics_example.py @@ -39,7 +39,7 @@ if __name__ == "__main__": .rdd.map(lambda row: LabeledPoint(row[0], row[1])) # Split data into training (60%) and test (40%) - training, test = data.randomSplit([0.6, 0.4], seed=11L) + training, test = data.randomSplit([0.6, 0.4], seed=11) training.cache() # Run training algorithm to build the model diff --git a/examples/src/main/python/mllib/multi_class_metrics_example.py b/examples/src/main/python/mllib/multi_class_metrics_example.py index cd56b3c97c77849b734a72680e5f00af91d4d834..7dc5fb4f9127f89d5dc2dfeded92d1b13f26028b 100644 --- a/examples/src/main/python/mllib/multi_class_metrics_example.py +++ b/examples/src/main/python/mllib/multi_class_metrics_example.py @@ -32,7 +32,7 @@ if __name__ == "__main__": data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_multiclass_classification_data.txt") # Split data into training (60%) and test (40%) - training, test = data.randomSplit([0.6, 0.4], seed=11L) + training, test = data.randomSplit([0.6, 0.4], seed=11) training.cache() # Run training algorithm to build the model