From b0cafdb6ccff9add89dc31c45adf87c8fa906aac Mon Sep 17 00:00:00 2001
From: Nick Pentreath <nickp@za.ibm.com>
Date: Sat, 7 May 2016 10:57:40 +0200
Subject: [PATCH] [MINOR][ML][PYSPARK] ALS example cleanup

Cleans up ALS examples by removing unnecessary casts to double for `rating` and `prediction` columns, since `RegressionEvaluator` now supports `Double` & `Float` input types.

## How was this patch tested?

Manual compile and run with `run-example ml.ALSExample` and `spark-submit examples/src/main/python/ml/als_example.py`.

Author: Nick Pentreath <nickp@za.ibm.com>

Closes #12892 from MLnick/als-examples-cleanup.
---
 .../org/apache/spark/examples/ml/JavaALSExample.java     | 6 +-----
 examples/src/main/python/ml/als_example.py               | 9 +++------
 .../scala/org/apache/spark/examples/ml/ALSExample.scala  | 6 ------
 3 files changed, 4 insertions(+), 17 deletions(-)

diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java
index 4b13ba6f9c..7f568f4e0d 100644
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java
@@ -29,7 +29,6 @@ import org.apache.spark.api.java.function.Function;
 import org.apache.spark.ml.evaluation.RegressionEvaluator;
 import org.apache.spark.ml.recommendation.ALS;
 import org.apache.spark.ml.recommendation.ALSModel;
-import org.apache.spark.sql.types.DataTypes;
 // $example off$
 
 public class JavaALSExample {
@@ -109,10 +108,7 @@ public class JavaALSExample {
     ALSModel model = als.fit(training);
 
     // Evaluate the model by computing the RMSE on the test data
-    Dataset<Row> rawPredictions = model.transform(test);
-    Dataset<Row> predictions = rawPredictions
-      .withColumn("rating", rawPredictions.col("rating").cast(DataTypes.DoubleType))
-      .withColumn("prediction", rawPredictions.col("prediction").cast(DataTypes.DoubleType));
+    Dataset<Row> predictions = model.transform(test);
 
     RegressionEvaluator evaluator = new RegressionEvaluator()
       .setMetricName("rmse")
diff --git a/examples/src/main/python/ml/als_example.py b/examples/src/main/python/ml/als_example.py
index ff0829b0dd..1a979ff5b5 100644
--- a/examples/src/main/python/ml/als_example.py
+++ b/examples/src/main/python/ml/als_example.py
@@ -48,12 +48,9 @@ if __name__ == "__main__":
     model = als.fit(training)
 
     # Evaluate the model by computing the RMSE on the test data
-    rawPredictions = model.transform(test)
-    predictions = rawPredictions\
-        .withColumn("rating", rawPredictions.rating.cast("double"))\
-        .withColumn("prediction", rawPredictions.prediction.cast("double"))
-    evaluator =\
-        RegressionEvaluator(metricName="rmse", labelCol="rating", predictionCol="prediction")
+    predictions = model.transform(test)
+    evaluator = RegressionEvaluator(metricName="rmse", labelCol="rating",
+                                    predictionCol="prediction")
     rmse = evaluator.evaluate(predictions)
     print("Root-mean-square error = " + str(rmse))
     # $example off$
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala
index 7c1cfe2937..6b151a622e 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala
@@ -23,10 +23,6 @@ import org.apache.spark.ml.evaluation.RegressionEvaluator
 import org.apache.spark.ml.recommendation.ALS
 // $example off$
 import org.apache.spark.sql.SparkSession
-// $example on$
-import org.apache.spark.sql.functions._
-import org.apache.spark.sql.types.DoubleType
-// $example off$
 
 object ALSExample {
 
@@ -65,8 +61,6 @@ object ALSExample {
 
     // Evaluate the model by computing the RMSE on the test data
     val predictions = model.transform(test)
-      .withColumn("rating", col("rating").cast(DoubleType))
-      .withColumn("prediction", col("prediction").cast(DoubleType))
 
     val evaluator = new RegressionEvaluator()
       .setMetricName("rmse")
-- 
GitLab