diff --git a/mllib/src/main/scala/spark/mllib/optimization/Gradient.scala b/mllib/src/main/scala/spark/mllib/optimization/Gradient.scala
index 2fb0c8136f8520f4a6fd06a0f29ba91795b263bd..d5338360c83f6ae5c0360088a9070d5a16b64f6f 100644
--- a/mllib/src/main/scala/spark/mllib/optimization/Gradient.scala
+++ b/mllib/src/main/scala/spark/mllib/optimization/Gradient.scala
@@ -30,21 +30,3 @@ abstract class Gradient extends Serializable {
   def compute(data: DoubleMatrix, label: Double, weights: DoubleMatrix): 
       (DoubleMatrix, Double)
 }
-
-class LogisticGradient extends Gradient {
-  override def compute(data: DoubleMatrix, label: Double, weights: DoubleMatrix): 
-      (DoubleMatrix, Double) = {
-    val margin: Double = -1.0 * data.dot(weights)
-    val gradientMultiplier = (1.0 / (1.0 + math.exp(margin))) - label
-
-    val gradient = data.mul(gradientMultiplier)
-    val loss =
-      if (margin > 0) {
-        math.log(1 + math.exp(0 - margin))
-      } else {
-        math.log(1 + math.exp(margin)) - margin
-      }
-
-    (gradient, loss)
-  }
-}
diff --git a/mllib/src/main/scala/spark/mllib/optimization/GradientDescent.scala b/mllib/src/main/scala/spark/mllib/optimization/GradientDescent.scala
index e1b73bc25e9225b5293b8a9e9c21ca02eba2aab2..4c996c0903ed25966ef00967dd012799c949ce52 100644
--- a/mllib/src/main/scala/spark/mllib/optimization/GradientDescent.scala
+++ b/mllib/src/main/scala/spark/mllib/optimization/GradientDescent.scala
@@ -40,7 +40,8 @@ object GradientDescent {
    *                            one iteration of SGD. Default value 1.0.
    *
    * @return weights - Column matrix containing weights for every feature.
-   * @return lossHistory - Array containing the loss computed for every iteration.
+   * @return stochasticLossHistory - Array containing the stochastic loss computed for 
+   *                                 every iteration.
    */
   def runMiniBatchSGD(
     data: RDD[(Double, Array[Double])],
@@ -48,16 +49,16 @@ object GradientDescent {
     updater: Updater,
     stepSize: Double,
     numIters: Int,
-    miniBatchFraction: Double=1.0) : (DoubleMatrix, Array[Double]) = {
+    initialWeights: Array[Double],
+    miniBatchFraction: Double=1.0) : (Array[Double], Array[Double]) = {
 
-    val lossHistory = new ArrayBuffer[Double](numIters)
+    val stochasticLossHistory = new ArrayBuffer[Double](numIters)
 
-    val nfeatures: Int = data.take(1)(0)._2.length
     val nexamples: Long = data.count()
     val miniBatchSize = nexamples * miniBatchFraction
 
-    // Initialize weights as a column matrix
-    var weights = DoubleMatrix.ones(nfeatures)
+    // Initialize weights as a column vector
+    var weights = new DoubleMatrix(initialWeights.length, 1, initialWeights:_*)
     var reg_val = 0.0
 
     for (i <- 1 to numIters) {
@@ -68,12 +69,12 @@ object GradientDescent {
           (grad, loss)
       }.reduce((a, b) => (a._1.addi(b._1), a._2 + b._2))
 
-      lossHistory.append(lossSum / miniBatchSize + reg_val)
+      stochasticLossHistory.append(lossSum / miniBatchSize + reg_val)
       val update = updater.compute(weights, gradientSum.div(miniBatchSize), stepSize, i)
       weights = update._1
       reg_val = update._2
     }
 
-    (weights, lossHistory.toArray)
+    (weights.toArray, stochasticLossHistory.toArray)
   }
 }
diff --git a/mllib/src/main/scala/spark/mllib/regression/LogisticRegression.scala b/mllib/src/main/scala/spark/mllib/regression/LogisticRegression.scala
index bb294c22570ee41b9b88e23e045b9362789cc547..711e205c39d1de480b3658fbe3cf3dd623d20039 100644
--- a/mllib/src/main/scala/spark/mllib/regression/LogisticRegression.scala
+++ b/mllib/src/main/scala/spark/mllib/regression/LogisticRegression.scala
@@ -28,24 +28,45 @@ import org.jblas.DoubleMatrix
  * Based on Matlab code written by John Duchi.
  */
 class LogisticRegressionModel(
-  val weights: DoubleMatrix,
+  val weights: Array[Double],
   val intercept: Double,
-  val losses: Array[Double]) extends RegressionModel {
+  val stochasticLosses: Array[Double]) extends RegressionModel {
+
+  // Create a column vector that can be used for predictions
+  private val weightsMatrix = new DoubleMatrix(weights.length, 1, weights:_*)
 
   override def predict(testData: spark.RDD[Array[Double]]) = {
     testData.map { x =>
-      val margin = new DoubleMatrix(1, x.length, x:_*).mmul(this.weights).get(0) + this.intercept
+      val margin = new DoubleMatrix(1, x.length, x:_*).mmul(weightsMatrix).get(0) + this.intercept
       1.0/ (1.0 + math.exp(margin * -1))
     }
   }
 
   override def predict(testData: Array[Double]): Double = {
     val dataMat = new DoubleMatrix(1, testData.length, testData:_*)
-    val margin = dataMat.mmul(this.weights).get(0) + this.intercept
+    val margin = dataMat.mmul(weightsMatrix).get(0) + this.intercept
     1.0/ (1.0 + math.exp(margin * -1))
   }
 }
 
+class LogisticGradient extends Gradient {
+  override def compute(data: DoubleMatrix, label: Double, weights: DoubleMatrix): 
+      (DoubleMatrix, Double) = {
+    val margin: Double = -1.0 * data.dot(weights)
+    val gradientMultiplier = (1.0 / (1.0 + math.exp(margin))) - label
+
+    val gradient = data.mul(gradientMultiplier)
+    val loss =
+      if (margin > 0) {
+        math.log(1 + math.exp(0 - margin))
+      } else {
+        math.log(1 + math.exp(margin)) - margin
+      }
+
+    (gradient, loss)
+  }
+}
+
 class LogisticRegression private (var stepSize: Double, var miniBatchFraction: Double,
     var numIters: Int)
   extends Logging {
@@ -80,31 +101,74 @@ class LogisticRegression private (var stepSize: Double, var miniBatchFraction: D
   }
 
   def train(input: RDD[(Double, Array[Double])]): LogisticRegressionModel = {
+    val nfeatures: Int = input.take(1)(0)._2.length
+    val initialWeights = Array.fill(nfeatures)(1.0)
+    train(input, initialWeights)
+  }
+
+  def train(
+    input: RDD[(Double, Array[Double])],
+    initialWeights: Array[Double]): LogisticRegressionModel = {
+
     // Add a extra variable consisting of all 1.0's for the intercept.
     val data = input.map { case (y, features) =>
       (y, Array(1.0, features:_*))
     }
 
-    val (weights, losses) = GradientDescent.runMiniBatchSGD(
-      data, new LogisticGradient(), new SimpleUpdater(), stepSize, numIters, miniBatchFraction)
+    val initalWeightsWithIntercept = Array(1.0, initialWeights:_*)
+
+    val (weights, stochasticLosses) = GradientDescent.runMiniBatchSGD(
+      data,
+      new LogisticGradient(),
+      new SimpleUpdater(),
+      stepSize,
+      numIters,
+      initalWeightsWithIntercept,
+      miniBatchFraction)
 
-    val weightsScaled = weights.getRange(1, weights.length)
-    val intercept = weights.get(0)
+    val intercept = weights(0)
+    val weightsScaled = weights.tail
 
-    val model = new LogisticRegressionModel(weightsScaled, intercept, losses)
+    val model = new LogisticRegressionModel(weightsScaled, intercept, stochasticLosses)
 
-    logInfo("Final model weights " + model.weights)
+    logInfo("Final model weights " + model.weights.mkString(","))
     logInfo("Final model intercept " + model.intercept)
-    logInfo("Last 10 losses " + model.losses.takeRight(10).mkString(", "))
+    logInfo("Last 10 stochastic losses " + model.stochasticLosses.takeRight(10).mkString(", "))
     model
   }
 }
 
 /**
  * Top-level methods for calling Logistic Regression.
+ * NOTE(shivaram): We use multiple train methods instead of default arguments to support 
+ *                 Java programs.
  */
 object LogisticRegression {
 
+  /**
+   * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed number
+   * of iterations of gradient descent using the specified step size. Each iteration uses
+   * `miniBatchFraction` fraction of the data to calculate the gradient. The weights used in
+   * gradient descent are initialized using the initial weights provided.
+   *
+   * @param input RDD of (label, array of features) pairs.
+   * @param numIterations Number of iterations of gradient descent to run.
+   * @param stepSize Step size to be used for each iteration of gradient descent.
+   * @param miniBatchFraction Fraction of data to be used per iteration.
+   * @param initialWeights Initial set of weights to be used. Array should be equal in size to 
+   *        the number of features in the data.
+   */
+  def train(
+      input: RDD[(Double, Array[Double])],
+      numIterations: Int,
+      stepSize: Double,
+      miniBatchFraction: Double,
+      initialWeights: Array[Double])
+    : LogisticRegressionModel =
+  {
+    new LogisticRegression(stepSize, miniBatchFraction, numIterations).train(input, initialWeights)
+  }
+
   /**
    * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed number
    * of iterations of gradient descent using the specified step size. Each iteration uses
diff --git a/mllib/src/main/scala/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/spark/mllib/regression/RidgeRegression.scala
index 7c7f912b43c4f0ee635deeb28e30b6a1a12fffc2..f724edd732f5147e1daed1f8160e44346f46cb0d 100644
--- a/mllib/src/main/scala/spark/mllib/regression/RidgeRegression.scala
+++ b/mllib/src/main/scala/spark/mllib/regression/RidgeRegression.scala
@@ -164,6 +164,8 @@ class RidgeRegression private (var lambdaLow: Double, var lambdaHigh: Double)
 
 /**
  * Top-level methods for calling Ridge Regression.
+ * NOTE(shivaram): We use multiple train methods instead of default arguments to support 
+ *                 Java programs.
  */
 object RidgeRegression {
 
diff --git a/mllib/src/test/resources/log4j.properties b/mllib/src/test/resources/log4j.properties
index a112e0b506994279615c6342375ef2f928eb8366..4265ba6e5de3324c332d79fb2df30abb7f65b712 100644
--- a/mllib/src/test/resources/log4j.properties
+++ b/mllib/src/test/resources/log4j.properties
@@ -19,7 +19,7 @@
 log4j.rootCategory=INFO, file
 log4j.appender.file=org.apache.log4j.FileAppender
 log4j.appender.file.append=false
-log4j.appender.file.file=ml/target/unit-tests.log
+log4j.appender.file.file=mllib/target/unit-tests.log
 log4j.appender.file.layout=org.apache.log4j.PatternLayout
 log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n
 
diff --git a/mllib/src/test/scala/spark/mllib/regression/LogisticRegressionSuite.scala b/mllib/src/test/scala/spark/mllib/regression/LogisticRegressionSuite.scala
index bc9bfd054fe9b456dc2290ae37efb1bf8d9812ad..47191d9a5a0b391eccaa509d23b316976e4cf36c 100644
--- a/mllib/src/test/scala/spark/mllib/regression/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/spark/mllib/regression/LogisticRegressionSuite.scala
@@ -34,16 +34,14 @@ class LogisticRegressionSuite extends FunSuite with BeforeAndAfterAll {
     System.clearProperty("spark.driver.port")
   }
 
-  // Test if we can correctly learn A, B where Y = logistic(A + B*X)
-  test("logistic regression") {
-    val nPoints = 10000
+  // Generate input of the form Y = logistic(offset + scale*X)
+  def generateLogisticInput(
+    offset: Double,
+    scale: Double,
+    nPoints: Int) : Seq[(Double, Array[Double])]  = {
     val rnd = new Random(42)
-
     val x1 = Array.fill[Double](nPoints)(rnd.nextGaussian())
 
-    val A = 2.0
-    val B = -1.5
-
     // NOTE: if U is uniform[0, 1] then ln(u) - ln(1-u) is Logistic(0,1)
     val unifRand = new scala.util.Random(45)
     val rLogis = (0 until nPoints).map { i =>
@@ -51,14 +49,24 @@ class LogisticRegressionSuite extends FunSuite with BeforeAndAfterAll {
       math.log(u) - math.log(1.0-u)
     }
 
-    // y <- A + B*x + rlogis(100)
+    // y <- A + B*x + rLogis()
     // y <- as.numeric(y > 0)
     val y = (0 until nPoints).map { i =>
-      val yVal = A + B * x1(i) + rLogis(i)
+      val yVal = offset + scale * x1(i) + rLogis(i)
       if (yVal > 0) 1.0 else 0.0
     }
 
-    val testData = (0 until nPoints).map(i => (y(i).toDouble, Array(x1(i)))).toArray
+    val testData = (0 until nPoints).map(i => (y(i).toDouble, Array(x1(i))))
+    testData
+  }
+
+  // Test if we can correctly learn A, B where Y = logistic(A + B*X)
+  test("logistic regression") {
+    val nPoints = 10000
+    val A = 2.0
+    val B = -1.5
+
+    val testData = generateLogisticInput(A, B, nPoints)
 
     val testRDD = sc.parallelize(testData, 2)
     testRDD.cache()
@@ -67,7 +75,31 @@ class LogisticRegressionSuite extends FunSuite with BeforeAndAfterAll {
 
     val model = lr.train(testRDD)
 
-    val weight0 = model.weights.get(0)
+    val weight0 = model.weights(0)
+    assert(weight0 >= -1.60 && weight0 <= -1.40, weight0 + " not in [-1.6, -1.4]")
+    assert(model.intercept >= 1.9 && model.intercept <= 2.1, model.intercept + " not in [1.9, 2.1]")
+  }
+
+  test("logistic regression with initial weights") {
+    val nPoints = 10000
+    val A = 2.0
+    val B = -1.5
+
+    val testData = generateLogisticInput(A, B, nPoints)
+
+    val initialB = -1.0
+    val initialWeights = Array(initialB)
+
+    val testRDD = sc.parallelize(testData, 2)
+    testRDD.cache()
+
+    // Use half as many iterations as the previous test.
+    val lr = new LogisticRegression().setStepSize(10.0)
+                                     .setNumIterations(10)
+
+    val model = lr.train(testRDD, initialWeights)
+
+    val weight0 = model.weights(0)
     assert(weight0 >= -1.60 && weight0 <= -1.40, weight0 + " not in [-1.6, -1.4]")
     assert(model.intercept >= 1.9 && model.intercept <= 2.1, model.intercept + " not in [1.9, 2.1]")
   }