From cabd54d93162a3f2a0cc7ed76fb46d8224edab94 Mon Sep 17 00:00:00 2001
From: Zheng RuiFeng <ruifengz@foxmail.com>
Date: Thu, 28 Apr 2016 22:44:14 -0700
Subject: [PATCH] [SPARK-14829][MLLIB] Deprecate GLM APIs using SGD

## What changes were proposed in this pull request?
According to the [SPARK-14829](https://issues.apache.org/jira/browse/SPARK-14829), deprecate API of LogisticRegression and LinearRegression using SGD

## How was this patch tested?
manual tests

Author: Zheng RuiFeng <ruifengz@foxmail.com>

Closes #12596 from zhengruifeng/deprecate_sgd.
---
 .../classification/LogisticRegression.scala    |  2 ++
 .../apache/spark/mllib/regression/Lasso.scala  |  4 ++++
 .../mllib/regression/LinearRegression.scala    |  2 ++
 .../mllib/regression/RidgeRegression.scala     |  4 ++++
 python/pyspark/mllib/classification.py         |  7 +++++++
 python/pyspark/mllib/regression.py             | 18 ++++++++++++++++++
 6 files changed, 37 insertions(+)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
index f10570e662..1d25a58e0f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
@@ -206,6 +206,7 @@ object LogisticRegressionModel extends Loader[LogisticRegressionModel] {
  * Using [[LogisticRegressionWithLBFGS]] is recommended over this.
  */
 @Since("0.8.0")
+@deprecated("Use ml.classification.LogisticRegression or LogisticRegressionWithLBFGS", "2.0.0")
 class LogisticRegressionWithSGD private[mllib] (
     private var stepSize: Double,
     private var numIterations: Int,
@@ -240,6 +241,7 @@ class LogisticRegressionWithSGD private[mllib] (
  * NOTE: Labels used in Logistic Regression should be {0, 1}
  */
 @Since("0.8.0")
+@deprecated("Use ml.classification.LogisticRegression or LogisticRegressionWithLBFGS", "2.0.0")
 object LogisticRegressionWithSGD {
   // NOTE(shivaram): We use multiple train methods instead of default arguments to support
   // Java programs.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
index d55e5dfdaa..ef8c80f0cb 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
@@ -85,6 +85,8 @@ object LassoModel extends Loader[LassoModel] {
  * See also the documentation for the precise formulation.
  */
 @Since("0.8.0")
+@deprecated("Use ml.regression.LinearRegression with elasticNetParam = 1.0. Note the default " +
+  "regParam is 0.01 for LassoWithSGD, but is 0.0 for LinearRegression.", "2.0.0")
 class LassoWithSGD private (
     private var stepSize: Double,
     private var numIterations: Int,
@@ -118,6 +120,8 @@ class LassoWithSGD private (
  *
  */
 @Since("0.8.0")
+@deprecated("Use ml.regression.LinearRegression with elasticNetParam = 1.0. Note the default " +
+  "regParam is 0.01 for LassoWithSGD, but is 0.0 for LinearRegression.", "2.0.0")
 object LassoWithSGD {
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
index e754e74492..9e9d98bc5e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
@@ -86,6 +86,7 @@ object LinearRegressionModel extends Loader[LinearRegressionModel] {
  * See also the documentation for the precise formulation.
  */
 @Since("0.8.0")
+@deprecated("Use ml.regression.LinearRegression or LBFGS", "2.0.0")
 class LinearRegressionWithSGD private[mllib] (
     private var stepSize: Double,
     private var numIterations: Int,
@@ -119,6 +120,7 @@ class LinearRegressionWithSGD private[mllib] (
  *
  */
 @Since("0.8.0")
+@deprecated("Use ml.regression.LinearRegression or LBFGS", "2.0.0")
 object LinearRegressionWithSGD {
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
index 0a44ff559d..512fb9a712 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
@@ -86,6 +86,8 @@ object RidgeRegressionModel extends Loader[RidgeRegressionModel] {
  * See also the documentation for the precise formulation.
  */
 @Since("0.8.0")
+@deprecated("Use ml.regression.LinearRegression with elasticNetParam = 0.0. Note the default " +
+  "regParam is 0.01 for RidgeRegressionWithSGD, but is 0.0 for LinearRegression.", "2.0.0")
 class RidgeRegressionWithSGD private (
     private var stepSize: Double,
     private var numIterations: Int,
@@ -119,6 +121,8 @@ class RidgeRegressionWithSGD private (
  *
  */
 @Since("0.8.0")
+@deprecated("Use ml.regression.LinearRegression with elasticNetParam = 0.0. Note the default " +
+  "regParam is 0.01 for RidgeRegressionWithSGD, but is 0.0 for LinearRegression.", "2.0.0")
 object RidgeRegressionWithSGD {
 
   /**
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index 57106f8690..fe5b6844bf 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -16,6 +16,7 @@
 #
 
 from math import exp
+import warnings
 
 import numpy
 from numpy import array
@@ -266,6 +267,8 @@ class LogisticRegressionModel(LinearClassificationModel):
 class LogisticRegressionWithSGD(object):
     """
     .. versionadded:: 0.9.0
+    .. note:: Deprecated in 2.0.0. Use ml.classification.LogisticRegression or
+            LogisticRegressionWithLBFGS.
     """
     @classmethod
     @since('0.9.0')
@@ -312,6 +315,10 @@ class LogisticRegressionWithSGD(object):
           A condition which decides iteration termination.
           (default: 0.001)
         """
+        warnings.warn(
+            "Deprecated in 2.0.0. Use ml.classification.LogisticRegression or "
+            "LogisticRegressionWithLBFGS.")
+
         def train(rdd, i):
             return callMLlibFunc("trainLogisticRegressionModelWithSGD", rdd, int(iterations),
                                  float(step), float(miniBatchFraction), i, float(regParam), regType,
diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index 3b77a62000..639c5eabaa 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -17,6 +17,7 @@
 
 import numpy as np
 from numpy import array
+import warnings
 
 from pyspark import RDD, since
 from pyspark.streaming.dstream import DStream
@@ -221,6 +222,7 @@ def _regression_train_wrapper(train_func, modelClass, data, initial_weights):
 class LinearRegressionWithSGD(object):
     """
     .. versionadded:: 0.9.0
+    .. note:: Deprecated in 2.0.0. Use ml.regression.LinearRegression.
     """
     @classmethod
     @since("0.9.0")
@@ -276,6 +278,8 @@ class LinearRegressionWithSGD(object):
           A condition which decides iteration termination.
           (default: 0.001)
         """
+        warnings.warn("Deprecated in 2.0.0. Use ml.regression.LinearRegression.")
+
         def train(rdd, i):
             return callMLlibFunc("trainLinearRegressionModelWithSGD", rdd, int(iterations),
                                  float(step), float(miniBatchFraction), i, float(regParam),
@@ -366,6 +370,8 @@ class LassoModel(LinearRegressionModelBase):
 class LassoWithSGD(object):
     """
     .. versionadded:: 0.9.0
+    .. note:: Deprecated in 2.0.0. Use ml.regression.LinearRegression with elasticNetParam = 1.0.
+            Note the default regParam is 0.01 for LassoWithSGD, but is 0.0 for LinearRegression.
     """
     @classmethod
     @since("0.9.0")
@@ -413,6 +419,10 @@ class LassoWithSGD(object):
           A condition which decides iteration termination.
           (default: 0.001)
         """
+        warnings.warn(
+            "Deprecated in 2.0.0. Use ml.regression.LinearRegression with elasticNetParam = 1.0. "
+            "Note the default regParam is 0.01 for LassoWithSGD, but is 0.0 for LinearRegression.")
+
         def train(rdd, i):
             return callMLlibFunc("trainLassoModelWithSGD", rdd, int(iterations), float(step),
                                  float(regParam), float(miniBatchFraction), i, bool(intercept),
@@ -503,6 +513,9 @@ class RidgeRegressionModel(LinearRegressionModelBase):
 class RidgeRegressionWithSGD(object):
     """
     .. versionadded:: 0.9.0
+    .. note:: Deprecated in 2.0.0. Use ml.regression.LinearRegression with elasticNetParam = 0.0.
+            Note the default regParam is 0.01 for RidgeRegressionWithSGD, but is 0.0 for
+            LinearRegression.
     """
     @classmethod
     @since("0.9.0")
@@ -550,6 +563,11 @@ class RidgeRegressionWithSGD(object):
           A condition which decides iteration termination.
           (default: 0.001)
         """
+        warnings.warn(
+            "Deprecated in 2.0.0. Use ml.regression.LinearRegression with elasticNetParam = 0.0. "
+            "Note the default regParam is 0.01 for RidgeRegressionWithSGD, but is 0.0 for "
+            "LinearRegression.")
+
         def train(rdd, i):
             return callMLlibFunc("trainRidgeModelWithSGD", rdd, int(iterations), float(step),
                                  float(regParam), float(miniBatchFraction), i, bool(intercept),
-- 
GitLab