Rename loss -> stochasticLoss and add a note to explain why we have

multiple train methods.

Rename loss -> stochasticLoss and add a note to explain why we have
3bf98971 · Shivaram Venkataraman · 84fa20c2 · 3bf98971 · 3bf98971 · 3bf98971
Commit 3bf98971 authored 11 years ago by Shivaram Venkataraman
--- a/mllib/src/main/scala/spark/mllib/optimization/GradientDescent.scala
+++ b/mllib/src/main/scala/spark/mllib/optimization/GradientDescent.scala
@@ -40,7 +40,8 @@ object GradientDescent {
   *                            one iteration of SGD. Default value 1.0.
   *
   * @return weights - Column matrix containing weights for every feature.
-   * @return lossHistory - Array containing the loss computed for every iteration.
+   * @return stochasticLossHistory - Array containing the stochastic loss computed for 
+   *                                 every iteration.
   */
  def runMiniBatchSGD(
    data: RDD[(Double, Array[Double])],
@@ -51,7 +52,7 @@ object GradientDescent {
    initialWeights: Array[Double],
    miniBatchFraction: Double=1.0) : (DoubleMatrix, Array[Double]) = {

-    val lossHistory = new ArrayBuffer[Double](numIters)
+    val stochasticLossHistory = new ArrayBuffer[Double](numIters)

    val nexamples: Long = data.count()
    val miniBatchSize = nexamples * miniBatchFraction
@@ -69,12 +70,12 @@ object GradientDescent {
          (grad, loss)
      }.reduce((a, b) => (a._1.addi(b._1), a._2 + b._2))

-      lossHistory.append(lossSum / miniBatchSize + reg_val)
+      stochasticLossHistory.append(lossSum / miniBatchSize + reg_val)
      val update = updater.compute(weights, gradientSum.div(miniBatchSize), stepSize, i)
      weights = update._1
      reg_val = update._2
    }

-    (weights, lossHistory.toArray)
+    (weights, stochasticLossHistory.toArray)
  }
 }
--- a/mllib/src/main/scala/spark/mllib/regression/LogisticRegression.scala
+++ b/mllib/src/main/scala/spark/mllib/regression/LogisticRegression.scala
@@ -30,7 +30,7 @@ import org.jblas.DoubleMatrix
 class LogisticRegressionModel(
  val weights: DoubleMatrix,
  val intercept: Double,
-  val losses: Array[Double]) extends RegressionModel {
+  val stochasticLosses: Array[Double]) extends RegressionModel {

  override def predict(testData: spark.RDD[Array[Double]]) = {
    testData.map { x =>
@@ -114,7 +114,7 @@ class LogisticRegression private (var stepSize: Double, var miniBatchFraction: D

    val initalWeightsWithIntercept = Array(1.0, initialWeights:_*)

-    val (weights, losses) = GradientDescent.runMiniBatchSGD(
+    val (weights, stochasticLosses) = GradientDescent.runMiniBatchSGD(
      data,
      new LogisticGradient(),
      new SimpleUpdater(),
@@ -126,17 +126,19 @@ class LogisticRegression private (var stepSize: Double, var miniBatchFraction: D
    val weightsScaled = weights.getRange(1, weights.length)
    val intercept = weights.get(0)

-    val model = new LogisticRegressionModel(weightsScaled, intercept, losses)
+    val model = new LogisticRegressionModel(weightsScaled, intercept, stochasticLosses)

    logInfo("Final model weights " + model.weights)
    logInfo("Final model intercept " + model.intercept)
-    logInfo("Last 10 losses " + model.losses.takeRight(10).mkString(", "))
+    logInfo("Last 10 stochastic losses " + model.stochasticLosses.takeRight(10).mkString(", "))
    model
  }
 }

 /**
 * Top-level methods for calling Logistic Regression.
+ * NOTE(shivaram): We use multiple train methods instead of default arguments to support 
+ *                 Java programs.
 */
 object LogisticRegression {


--- a/mllib/src/main/scala/spark/mllib/regression/RidgeRegression.scala
+++ b/mllib/src/main/scala/spark/mllib/regression/RidgeRegression.scala
@@ -164,6 +164,8 @@ class RidgeRegression private (var lambdaLow: Double, var lambdaHigh: Double)

 /**
 * Top-level methods for calling Ridge Regression.
+ * NOTE(shivaram): We use multiple train methods instead of default arguments to support 
+ *                 Java programs.
 */
 object RidgeRegression {