From f75f633b21faaf911f04aeff847f25749b1ecd89 Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Sat, 28 Mar 2015 15:08:05 -0700
Subject: [PATCH] [SPARK-6571][MLLIB] use wrapper in
 MatrixFactorizationModel.load

This fixes `predictAll` after load. jkbradley

Author: Xiangrui Meng <meng@databricks.com>

Closes #5243 from mengxr/SPARK-6571 and squashes the following commits:

82dcaa7 [Xiangrui Meng] use wrapper in MatrixFactorizationModel.load
---
 .../MatrixFactorizationModelWrapper.scala     | 40 +++++++++++++++++++
 .../mllib/api/python/PythonMLLibAPI.scala     | 18 ---------
 python/pyspark/mllib/recommendation.py        |  8 ++++
 3 files changed, 48 insertions(+), 18 deletions(-)
 create mode 100644 mllib/src/main/scala/org/apache/spark/mllib/api/python/MatrixFactorizationModelWrapper.scala

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/MatrixFactorizationModelWrapper.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/MatrixFactorizationModelWrapper.scala
new file mode 100644
index 0000000000..ecd3b16598
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/MatrixFactorizationModelWrapper.scala
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.api.python
+
+import org.apache.spark.api.java.JavaRDD
+import org.apache.spark.mllib.recommendation.{MatrixFactorizationModel, Rating}
+import org.apache.spark.rdd.RDD
+
+/**
+ * A Wrapper of MatrixFactorizationModel to provide helper method for Python.
+ */
+private[python] class MatrixFactorizationModelWrapper(model: MatrixFactorizationModel)
+  extends MatrixFactorizationModel(model.rank, model.userFeatures, model.productFeatures) {
+
+  def predict(userAndProducts: JavaRDD[Array[Any]]): RDD[Rating] =
+    predict(SerDe.asTupleRDD(userAndProducts.rdd))
+
+  def getUserFeatures: RDD[Array[Any]] = {
+    SerDe.fromTuple2RDD(userFeatures.asInstanceOf[RDD[(Any, Any)]])
+  }
+
+  def getProductFeatures: RDD[Array[Any]] = {
+    SerDe.fromTuple2RDD(productFeatures.asInstanceOf[RDD[(Any, Any)]])
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index e391567347..22fa684fd2 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -58,7 +58,6 @@ import org.apache.spark.util.Utils
  */
 private[python] class PythonMLLibAPI extends Serializable {
 
-
   /**
    * Loads and serializes labeled points saved with `RDD#saveAsTextFile`.
    * @param jsc Java SparkContext
@@ -346,24 +345,7 @@ private[python] class PythonMLLibAPI extends Serializable {
       model.predictSoft(data)
   }
 
-  /**
-   * A Wrapper of MatrixFactorizationModel to provide helpfer method for Python
-   */
-  private[python] class MatrixFactorizationModelWrapper(model: MatrixFactorizationModel)
-    extends MatrixFactorizationModel(model.rank, model.userFeatures, model.productFeatures) {
 
-    def predict(userAndProducts: JavaRDD[Array[Any]]): RDD[Rating] =
-      predict(SerDe.asTupleRDD(userAndProducts.rdd))
-
-    def getUserFeatures: RDD[Array[Any]] = {
-      SerDe.fromTuple2RDD(userFeatures.asInstanceOf[RDD[(Any, Any)]])
-    }
-
-    def getProductFeatures: RDD[Array[Any]] = {
-      SerDe.fromTuple2RDD(productFeatures.asInstanceOf[RDD[(Any, Any)]])
-    }
-
-  }
 
   /**
    * Java stub for Python mllib ALS.train().  This stub returns a handle
diff --git a/python/pyspark/mllib/recommendation.py b/python/pyspark/mllib/recommendation.py
index 1a4527b12c..b094e50856 100644
--- a/python/pyspark/mllib/recommendation.py
+++ b/python/pyspark/mllib/recommendation.py
@@ -90,6 +90,8 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader):
     >>> sameModel = MatrixFactorizationModel.load(sc, path)
     >>> sameModel.predict(2,2)
     0.43...
+    >>> sameModel.predictAll(testset).collect()
+    [Rating(...
     >>> try:
     ...     os.removedirs(path)
     ... except OSError:
@@ -111,6 +113,12 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader):
     def productFeatures(self):
         return self.call("getProductFeatures")
 
+    @classmethod
+    def load(cls, sc, path):
+        model = cls._load_java(sc, path)
+        wrapper = sc._jvm.MatrixFactorizationModelWrapper(model)
+        return MatrixFactorizationModel(wrapper)
+
 
 class ALS(object):
 
-- 
GitLab