Skip to content
Snippets Groups Projects
Commit fdaad4eb authored by Davies Liu's avatar Davies Liu Committed by Xiangrui Meng
Browse files

[MLlib] fix python example of ALS in guide

fix python example of ALS in guide, use Rating instead of np.array.

Author: Davies Liu <davies@databricks.com>

Closes #4226 from davies/fix_als_guide and squashes the following commits:

1433d76 [Davies Liu] fix python example of als in guide
parent ff356e2a
No related branches found
No related tags found
No related merge requests found
...@@ -192,12 +192,11 @@ We use the default ALS.train() method which assumes ratings are explicit. We eva ...@@ -192,12 +192,11 @@ We use the default ALS.train() method which assumes ratings are explicit. We eva
recommendation by measuring the Mean Squared Error of rating prediction. recommendation by measuring the Mean Squared Error of rating prediction.
{% highlight python %} {% highlight python %}
from pyspark.mllib.recommendation import ALS from pyspark.mllib.recommendation import ALS, Rating
from numpy import array
# Load and parse the data # Load and parse the data
data = sc.textFile("data/mllib/als/test.data") data = sc.textFile("data/mllib/als/test.data")
ratings = data.map(lambda line: array([float(x) for x in line.split(',')])) ratings = data.map(lambda l: l.split(',')).map(lambda l: Rating(int(l[0]), int(l[1]), float(l[2])))
# Build the recommendation model using Alternating Least Squares # Build the recommendation model using Alternating Least Squares
rank = 10 rank = 10
...@@ -205,10 +204,10 @@ numIterations = 20 ...@@ -205,10 +204,10 @@ numIterations = 20
model = ALS.train(ratings, rank, numIterations) model = ALS.train(ratings, rank, numIterations)
# Evaluate the model on training data # Evaluate the model on training data
testdata = ratings.map(lambda p: (int(p[0]), int(p[1]))) testdata = ratings.map(lambda p: (p[0], p[1]))
predictions = model.predictAll(testdata).map(lambda r: ((r[0], r[1]), r[2])) predictions = model.predictAll(testdata).map(lambda r: ((r[0], r[1]), r[2]))
ratesAndPreds = ratings.map(lambda r: ((r[0], r[1]), r[2])).join(predictions) ratesAndPreds = ratings.map(lambda r: ((r[0], r[1]), r[2])).join(predictions)
MSE = ratesAndPreds.map(lambda r: (r[1][0] - r[1][1])**2).reduce(lambda x, y: x + y)/ratesAndPreds.count() MSE = ratesAndPreds.map(lambda r: (r[1][0] - r[1][1])**2).reduce(lambda x, y: x + y) / ratesAndPreds.count()
print("Mean Squared Error = " + str(MSE)) print("Mean Squared Error = " + str(MSE))
{% endhighlight %} {% endhighlight %}
...@@ -217,7 +216,7 @@ signals), you can use the trainImplicit method to get better results. ...@@ -217,7 +216,7 @@ signals), you can use the trainImplicit method to get better results.
{% highlight python %} {% highlight python %}
# Build the recommendation model using Alternating Least Squares based on implicit ratings # Build the recommendation model using Alternating Least Squares based on implicit ratings
model = ALS.trainImplicit(ratings, rank, numIterations, alpha = 0.01) model = ALS.trainImplicit(ratings, rank, numIterations, alpha=0.01)
{% endhighlight %} {% endhighlight %}
</div> </div>
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment