Skip to content
Snippets Groups Projects
Commit c24aeb6a authored by MechCoder's avatar MechCoder Committed by Xiangrui Meng
Browse files

[SPARK-6257] [PYSPARK] [MLLIB] MLlib API missing items in Recommendation

Adds

rank, recommendUsers and RecommendProducts to MatrixFactorizationModel in PySpark.

Author: MechCoder <manojkumarsivaraj334@gmail.com>

Closes #5807 from MechCoder/spark-6257 and squashes the following commits:

09629c6 [MechCoder] doc
953b326 [MechCoder] [SPARK-6257] MLlib API missing items in Recommendation
parent 14b32886
No related merge requests found
......@@ -216,7 +216,7 @@ model = ALS.train(ratings, rank, numIterations)
testdata = ratings.map(lambda p: (p[0], p[1]))
predictions = model.predictAll(testdata).map(lambda r: ((r[0], r[1]), r[2]))
ratesAndPreds = ratings.map(lambda r: ((r[0], r[1]), r[2])).join(predictions)
MSE = ratesAndPreds.map(lambda r: (r[1][0] - r[1][1])**2).reduce(lambda x, y: x + y) / ratesAndPreds.count()
MSE = ratesAndPreds.map(lambda r: (r[1][0] - r[1][1])**2).mean()
print("Mean Squared Error = " + str(MSE))
# Save and load model
......
......@@ -65,6 +65,13 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader):
>>> model.userFeatures().collect()
[(1, array('d', [...])), (2, array('d', [...]))]
>>> model.recommendUsers(1, 2)
[Rating(user=2, product=1, rating=1.9...), Rating(user=1, product=1, rating=1.0...)]
>>> model.recommendProducts(1, 2)
[Rating(user=1, product=2, rating=1.9...), Rating(user=1, product=1, rating=1.0...)]
>>> model.rank
4
>>> first_user = model.userFeatures().take(1)[0]
>>> latents = first_user[1]
>>> len(latents) == 4
......@@ -105,9 +112,15 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader):
... pass
"""
def predict(self, user, product):
"""
Predicts rating for the given user and product.
"""
return self._java_model.predict(int(user), int(product))
def predictAll(self, user_product):
"""
Returns a list of predicted ratings for input user and product pairs.
"""
assert isinstance(user_product, RDD), "user_product should be RDD of (user, product)"
first = user_product.first()
assert len(first) == 2, "user_product should be RDD of (user, product)"
......@@ -115,11 +128,37 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader):
return self.call("predict", user_product)
def userFeatures(self):
"""
Returns a paired RDD, where the first element is the user and the
second is an array of features corresponding to that user.
"""
return self.call("getUserFeatures").mapValues(lambda v: array.array('d', v))
def productFeatures(self):
"""
Returns a paired RDD, where the first element is the product and the
second is an array of features corresponding to that product.
"""
return self.call("getProductFeatures").mapValues(lambda v: array.array('d', v))
def recommendUsers(self, product, num):
"""
Recommends the top "num" number of users for a given product and returns a list
of Rating objects sorted by the predicted rating in descending order.
"""
return list(self.call("recommendUsers", product, num))
def recommendProducts(self, user, num):
"""
Recommends the top "num" number of products for a given user and returns a list
of Rating objects sorted by the predicted rating in descending order.
"""
return list(self.call("recommendProducts", user, num))
@property
def rank(self):
return self.call("rank")
@classmethod
def load(cls, sc, path):
model = cls._load_java(sc, path)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment