Skip to content
Snippets Groups Projects
Commit f75f633b authored by Xiangrui Meng's avatar Xiangrui Meng Committed by Joseph K. Bradley
Browse files

[SPARK-6571][MLLIB] use wrapper in MatrixFactorizationModel.load

This fixes `predictAll` after load. jkbradley

Author: Xiangrui Meng <meng@databricks.com>

Closes #5243 from mengxr/SPARK-6571 and squashes the following commits:

82dcaa7 [Xiangrui Meng] use wrapper in MatrixFactorizationModel.load
parent 99631438
No related branches found
No related tags found
No related merge requests found
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.mllib.api.python
import org.apache.spark.api.java.JavaRDD
import org.apache.spark.mllib.recommendation.{MatrixFactorizationModel, Rating}
import org.apache.spark.rdd.RDD
/**
* A Wrapper of MatrixFactorizationModel to provide helper method for Python.
*/
private[python] class MatrixFactorizationModelWrapper(model: MatrixFactorizationModel)
extends MatrixFactorizationModel(model.rank, model.userFeatures, model.productFeatures) {
def predict(userAndProducts: JavaRDD[Array[Any]]): RDD[Rating] =
predict(SerDe.asTupleRDD(userAndProducts.rdd))
def getUserFeatures: RDD[Array[Any]] = {
SerDe.fromTuple2RDD(userFeatures.asInstanceOf[RDD[(Any, Any)]])
}
def getProductFeatures: RDD[Array[Any]] = {
SerDe.fromTuple2RDD(productFeatures.asInstanceOf[RDD[(Any, Any)]])
}
}
......@@ -58,7 +58,6 @@ import org.apache.spark.util.Utils
*/
private[python] class PythonMLLibAPI extends Serializable {
/**
* Loads and serializes labeled points saved with `RDD#saveAsTextFile`.
* @param jsc Java SparkContext
......@@ -346,24 +345,7 @@ private[python] class PythonMLLibAPI extends Serializable {
model.predictSoft(data)
}
/**
* A Wrapper of MatrixFactorizationModel to provide helpfer method for Python
*/
private[python] class MatrixFactorizationModelWrapper(model: MatrixFactorizationModel)
extends MatrixFactorizationModel(model.rank, model.userFeatures, model.productFeatures) {
def predict(userAndProducts: JavaRDD[Array[Any]]): RDD[Rating] =
predict(SerDe.asTupleRDD(userAndProducts.rdd))
def getUserFeatures: RDD[Array[Any]] = {
SerDe.fromTuple2RDD(userFeatures.asInstanceOf[RDD[(Any, Any)]])
}
def getProductFeatures: RDD[Array[Any]] = {
SerDe.fromTuple2RDD(productFeatures.asInstanceOf[RDD[(Any, Any)]])
}
}
/**
* Java stub for Python mllib ALS.train(). This stub returns a handle
......
......@@ -90,6 +90,8 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader):
>>> sameModel = MatrixFactorizationModel.load(sc, path)
>>> sameModel.predict(2,2)
0.43...
>>> sameModel.predictAll(testset).collect()
[Rating(...
>>> try:
... os.removedirs(path)
... except OSError:
......@@ -111,6 +113,12 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader):
def productFeatures(self):
return self.call("getProductFeatures")
@classmethod
def load(cls, sc, path):
model = cls._load_java(sc, path)
wrapper = sc._jvm.MatrixFactorizationModelWrapper(model)
return MatrixFactorizationModel(wrapper)
class ALS(object):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment