Skip to content
Snippets Groups Projects
Commit 108c4c16 authored by Sandeep's avatar Sandeep Committed by Reynold Xin
Browse files

SPARK-1668: Add implicit preference as an option to examples/MovieLensALS

Add --implicitPrefs as an command-line option to the example app MovieLensALS under examples/

Author: Sandeep <sandeep@techaddict.me>

Closes #597 from techaddict/SPARK-1668 and squashes the following commits:

8b371dc [Sandeep] Second Pass on reviews by mengxr
eca9d37 [Sandeep] based on mengxr's suggestions
937e54c [Sandeep] Changes
5149d40 [Sandeep] Changes based on review
1dd7657 [Sandeep] use mean()
42444d7 [Sandeep] Based on Suggestions by mengxr
e3082fa [Sandeep] SPARK-1668: Add implicit preference as an option to examples/MovieLensALS Add --implicitPrefs as an command-line option to the example app MovieLensALS under examples/
parent f269b016
No related branches found
No related tags found
No related merge requests found
...@@ -43,7 +43,8 @@ object MovieLensALS { ...@@ -43,7 +43,8 @@ object MovieLensALS {
kryo: Boolean = false, kryo: Boolean = false,
numIterations: Int = 20, numIterations: Int = 20,
lambda: Double = 1.0, lambda: Double = 1.0,
rank: Int = 10) rank: Int = 10,
implicitPrefs: Boolean = false)
def main(args: Array[String]) { def main(args: Array[String]) {
val defaultParams = Params() val defaultParams = Params()
...@@ -62,6 +63,9 @@ object MovieLensALS { ...@@ -62,6 +63,9 @@ object MovieLensALS {
opt[Unit]("kryo") opt[Unit]("kryo")
.text(s"use Kryo serialization") .text(s"use Kryo serialization")
.action((_, c) => c.copy(kryo = true)) .action((_, c) => c.copy(kryo = true))
opt[Unit]("implicitPrefs")
.text("use implicit preference")
.action((_, c) => c.copy(implicitPrefs = true))
arg[String]("<input>") arg[String]("<input>")
.required() .required()
.text("input paths to a MovieLens dataset of ratings") .text("input paths to a MovieLens dataset of ratings")
...@@ -88,7 +92,25 @@ object MovieLensALS { ...@@ -88,7 +92,25 @@ object MovieLensALS {
val ratings = sc.textFile(params.input).map { line => val ratings = sc.textFile(params.input).map { line =>
val fields = line.split("::") val fields = line.split("::")
Rating(fields(0).toInt, fields(1).toInt, fields(2).toDouble) if (params.implicitPrefs) {
/*
* MovieLens ratings are on a scale of 1-5:
* 5: Must see
* 4: Will enjoy
* 3: It's okay
* 2: Fairly bad
* 1: Awful
* So we should not recommend a movie if the predicted rating is less than 3.
* To map ratings to confidence scores, we use
* 5 -> 2.5, 4 -> 1.5, 3 -> 0.5, 2 -> -0.5, 1 -> -1.5. This mappings means unobserved
* entries are generally between It's okay and Fairly bad.
* The semantics of 0 in this expanded world of non-positive weights
* are "the same as never having interacted at all".
*/
Rating(fields(0).toInt, fields(1).toInt, fields(2).toDouble - 2.5)
} else {
Rating(fields(0).toInt, fields(1).toInt, fields(2).toDouble)
}
}.cache() }.cache()
val numRatings = ratings.count() val numRatings = ratings.count()
...@@ -99,7 +121,18 @@ object MovieLensALS { ...@@ -99,7 +121,18 @@ object MovieLensALS {
val splits = ratings.randomSplit(Array(0.8, 0.2)) val splits = ratings.randomSplit(Array(0.8, 0.2))
val training = splits(0).cache() val training = splits(0).cache()
val test = splits(1).cache() val test = if (params.implicitPrefs) {
/*
* 0 means "don't know" and positive values mean "confident that the prediction should be 1".
* Negative values means "confident that the prediction should be 0".
* We have in this case used some kind of weighted RMSE. The weight is the absolute value of
* the confidence. The error is the difference between prediction and either 1 or 0,
* depending on whether r is positive or negative.
*/
splits(1).map(x => Rating(x.user, x.product, if (x.rating > 0) 1.0 else 0.0))
} else {
splits(1)
}.cache()
val numTraining = training.count() val numTraining = training.count()
val numTest = test.count() val numTest = test.count()
...@@ -111,9 +144,10 @@ object MovieLensALS { ...@@ -111,9 +144,10 @@ object MovieLensALS {
.setRank(params.rank) .setRank(params.rank)
.setIterations(params.numIterations) .setIterations(params.numIterations)
.setLambda(params.lambda) .setLambda(params.lambda)
.setImplicitPrefs(params.implicitPrefs)
.run(training) .run(training)
val rmse = computeRmse(model, test, numTest) val rmse = computeRmse(model, test, params.implicitPrefs)
println(s"Test RMSE = $rmse.") println(s"Test RMSE = $rmse.")
...@@ -121,11 +155,14 @@ object MovieLensALS { ...@@ -121,11 +155,14 @@ object MovieLensALS {
} }
/** Compute RMSE (Root Mean Squared Error). */ /** Compute RMSE (Root Mean Squared Error). */
def computeRmse(model: MatrixFactorizationModel, data: RDD[Rating], n: Long) = { def computeRmse(model: MatrixFactorizationModel, data: RDD[Rating], implicitPrefs: Boolean) = {
def mapPredictedRating(r: Double) = if (implicitPrefs) math.max(math.min(r, 1.0), 0.0) else r
val predictions: RDD[Rating] = model.predict(data.map(x => (x.user, x.product))) val predictions: RDD[Rating] = model.predict(data.map(x => (x.user, x.product)))
val predictionsAndRatings = predictions.map(x => ((x.user, x.product), x.rating)) val predictionsAndRatings = predictions.map{ x =>
.join(data.map(x => ((x.user, x.product), x.rating))) ((x.user, x.product), mapPredictedRating(x.rating))
.values }.join(data.map(x => ((x.user, x.product), x.rating))).values
math.sqrt(predictionsAndRatings.map(x => (x._1 - x._2) * (x._1 - x._2)).reduce(_ + _) / n) math.sqrt(predictionsAndRatings.map(x => (x._1 - x._2) * (x._1 - x._2)).mean())
} }
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment