From 65fec798ce52ca6b8b0fe14b78a16712778ad04c Mon Sep 17 00:00:00 2001 From: Xiangrui Meng <meng@databricks.com> Date: Thu, 13 Aug 2015 10:16:40 -0700 Subject: [PATCH] [MINOR] [DOC] fix mllib pydoc warnings Switch to correct Sphinx syntax. MechCoder Author: Xiangrui Meng <meng@databricks.com> Closes #8169 from mengxr/mllib-pydoc-fix. --- python/pyspark/mllib/regression.py | 14 ++++++++++---- python/pyspark/mllib/util.py | 1 + 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py index 5b7afc15dd..41946e3674 100644 --- a/python/pyspark/mllib/regression.py +++ b/python/pyspark/mllib/regression.py @@ -207,8 +207,10 @@ class LinearRegressionWithSGD(object): Train a linear regression model using Stochastic Gradient Descent (SGD). This solves the least squares regression formulation - f(weights) = 1/n ||A weights-y||^2^ - (which is the mean squared error). + + f(weights) = 1/(2n) ||A weights - y||^2, + + which is the mean squared error. Here the data matrix has n rows, and the input RDD holds the set of rows of A, each with its corresponding right hand side label y. See also the documentation for the precise formulation. @@ -334,7 +336,9 @@ class LassoWithSGD(object): Stochastic Gradient Descent. This solves the l1-regularized least squares regression formulation - f(weights) = 1/2n ||A weights-y||^2^ + regParam ||weights||_1 + + f(weights) = 1/(2n) ||A weights - y||^2 + regParam ||weights||_1. + Here the data matrix has n rows, and the input RDD holds the set of rows of A, each with its corresponding right hand side label y. See also the documentation for the precise formulation. @@ -451,7 +455,9 @@ class RidgeRegressionWithSGD(object): Stochastic Gradient Descent. This solves the l2-regularized least squares regression formulation - f(weights) = 1/2n ||A weights-y||^2^ + regParam/2 ||weights||^2^ + + f(weights) = 1/(2n) ||A weights - y||^2 + regParam/2 ||weights||^2. + Here the data matrix has n rows, and the input RDD holds the set of rows of A, each with its corresponding right hand side label y. See also the documentation for the precise formulation. diff --git a/python/pyspark/mllib/util.py b/python/pyspark/mllib/util.py index 916de2d6fc..10a1e4b3eb 100644 --- a/python/pyspark/mllib/util.py +++ b/python/pyspark/mllib/util.py @@ -300,6 +300,7 @@ class LinearDataGenerator(object): :param: seed Random Seed :param: eps Used to scale the noise. If eps is set high, the amount of gaussian noise added is more. + Returns a list of LabeledPoints of length nPoints """ weights = [float(weight) for weight in weights] -- GitLab