Skip to content
Snippets Groups Projects
Commit d9b7f3e4 authored by Yu ISHIKAWA's avatar Yu ISHIKAWA Committed by Xiangrui Meng
Browse files

[SPARK-10276] [MLLIB] [PYSPARK] Add @since annotation to pyspark.mllib.recommendation

Author: Yu ISHIKAWA <yuu.ishikawa@gmail.com>

Closes #8677 from yu-iskw/SPARK-10276.
parent 1894653e
No related branches found
No related tags found
No related merge requests found
......@@ -18,7 +18,7 @@
import array
from collections import namedtuple
from pyspark import SparkContext
from pyspark import SparkContext, since
from pyspark.rdd import RDD
from pyspark.mllib.common import JavaModelWrapper, callMLlibFunc, inherit_doc
from pyspark.mllib.util import JavaLoader, JavaSaveable
......@@ -36,6 +36,8 @@ class Rating(namedtuple("Rating", ["user", "product", "rating"])):
(1, 2, 5.0)
>>> (r[0], r[1], r[2])
(1, 2, 5.0)
.. versionadded:: 1.2.0
"""
def __reduce__(self):
......@@ -111,13 +113,17 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader):
... rmtree(path)
... except OSError:
... pass
.. versionadded:: 0.9.0
"""
@since("0.9.0")
def predict(self, user, product):
"""
Predicts rating for the given user and product.
"""
return self._java_model.predict(int(user), int(product))
@since("0.9.0")
def predictAll(self, user_product):
"""
Returns a list of predicted ratings for input user and product pairs.
......@@ -128,6 +134,7 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader):
user_product = user_product.map(lambda u_p: (int(u_p[0]), int(u_p[1])))
return self.call("predict", user_product)
@since("1.2.0")
def userFeatures(self):
"""
Returns a paired RDD, where the first element is the user and the
......@@ -135,6 +142,7 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader):
"""
return self.call("getUserFeatures").mapValues(lambda v: array.array('d', v))
@since("1.2.0")
def productFeatures(self):
"""
Returns a paired RDD, where the first element is the product and the
......@@ -142,6 +150,7 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader):
"""
return self.call("getProductFeatures").mapValues(lambda v: array.array('d', v))
@since("1.4.0")
def recommendUsers(self, product, num):
"""
Recommends the top "num" number of users for a given product and returns a list
......@@ -149,6 +158,7 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader):
"""
return list(self.call("recommendUsers", product, num))
@since("1.4.0")
def recommendProducts(self, user, num):
"""
Recommends the top "num" number of products for a given user and returns a list
......@@ -157,17 +167,25 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader):
return list(self.call("recommendProducts", user, num))
@property
@since("1.4.0")
def rank(self):
"""Rank for the features in this model"""
return self.call("rank")
@classmethod
@since("1.3.1")
def load(cls, sc, path):
"""Load a model from the given path"""
model = cls._load_java(sc, path)
wrapper = sc._jvm.MatrixFactorizationModelWrapper(model)
return MatrixFactorizationModel(wrapper)
class ALS(object):
"""Alternating Least Squares matrix factorization
.. versionadded:: 0.9.0
"""
@classmethod
def _prepare(cls, ratings):
......@@ -188,15 +206,31 @@ class ALS(object):
return ratings
@classmethod
@since("0.9.0")
def train(cls, ratings, rank, iterations=5, lambda_=0.01, blocks=-1, nonnegative=False,
seed=None):
"""
Train a matrix factorization model given an RDD of ratings given by users to some products,
in the form of (userID, productID, rating) pairs. We approximate the ratings matrix as the
product of two lower-rank matrices of a given rank (number of features). To solve for these
features, we run a given number of iterations of ALS. This is done using a level of
parallelism given by `blocks`.
"""
model = callMLlibFunc("trainALSModel", cls._prepare(ratings), rank, iterations,
lambda_, blocks, nonnegative, seed)
return MatrixFactorizationModel(model)
@classmethod
@since("0.9.0")
def trainImplicit(cls, ratings, rank, iterations=5, lambda_=0.01, blocks=-1, alpha=0.01,
nonnegative=False, seed=None):
"""
Train a matrix factorization model given an RDD of 'implicit preferences' given by users
to some products, in the form of (userID, productID, preference) pairs. We approximate the
ratings matrix as the product of two lower-rank matrices of a given rank (number of
features). To solve for these features, we run a given number of iterations of ALS.
This is done using a level of parallelism given by `blocks`.
"""
model = callMLlibFunc("trainImplicitALSModel", cls._prepare(ratings), rank,
iterations, lambda_, blocks, alpha, nonnegative, seed)
return MatrixFactorizationModel(model)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment