Skip to content
Snippets Groups Projects
Commit f80f7b69 authored by Ehsan M.Kermani's avatar Ehsan M.Kermani Committed by Xiangrui Meng
Browse files

[SPARK-10265][DOCUMENTATION, ML] Fixed @Since annotation to ml.regression

Here is my first commit.

Author: Ehsan M.Kermani <ehsanmo1367@gmail.com>

Closes #8728 from ehsanmok/SinceAnn.
parent 6b87acd6
No related branches found
No related tags found
No related merge requests found
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
package org.apache.spark.ml.regression package org.apache.spark.ml.regression
import org.apache.spark.annotation.Experimental import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.ml.{PredictionModel, Predictor} import org.apache.spark.ml.{PredictionModel, Predictor}
import org.apache.spark.ml.param.ParamMap import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.tree.{DecisionTreeModel, DecisionTreeParams, Node, TreeRegressorParams} import org.apache.spark.ml.tree.{DecisionTreeModel, DecisionTreeParams, Node, TreeRegressorParams}
...@@ -36,30 +36,39 @@ import org.apache.spark.sql.DataFrame ...@@ -36,30 +36,39 @@ import org.apache.spark.sql.DataFrame
* for regression. * for regression.
* It supports both continuous and categorical features. * It supports both continuous and categorical features.
*/ */
@Since("1.4.0")
@Experimental @Experimental
final class DecisionTreeRegressor(override val uid: String) final class DecisionTreeRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends Predictor[Vector, DecisionTreeRegressor, DecisionTreeRegressionModel] extends Predictor[Vector, DecisionTreeRegressor, DecisionTreeRegressionModel]
with DecisionTreeParams with TreeRegressorParams { with DecisionTreeParams with TreeRegressorParams {
@Since("1.4.0")
def this() = this(Identifiable.randomUID("dtr")) def this() = this(Identifiable.randomUID("dtr"))
// Override parameter setters from parent trait for Java API compatibility. // Override parameter setters from parent trait for Java API compatibility.
@Since("1.4.0")
override def setMaxDepth(value: Int): this.type = super.setMaxDepth(value) override def setMaxDepth(value: Int): this.type = super.setMaxDepth(value)
@Since("1.4.0")
override def setMaxBins(value: Int): this.type = super.setMaxBins(value) override def setMaxBins(value: Int): this.type = super.setMaxBins(value)
@Since("1.4.0")
override def setMinInstancesPerNode(value: Int): this.type = override def setMinInstancesPerNode(value: Int): this.type =
super.setMinInstancesPerNode(value) super.setMinInstancesPerNode(value)
@Since("1.4.0")
override def setMinInfoGain(value: Double): this.type = super.setMinInfoGain(value) override def setMinInfoGain(value: Double): this.type = super.setMinInfoGain(value)
@Since("1.4.0")
override def setMaxMemoryInMB(value: Int): this.type = super.setMaxMemoryInMB(value) override def setMaxMemoryInMB(value: Int): this.type = super.setMaxMemoryInMB(value)
@Since("1.4.0")
override def setCacheNodeIds(value: Boolean): this.type = super.setCacheNodeIds(value) override def setCacheNodeIds(value: Boolean): this.type = super.setCacheNodeIds(value)
@Since("1.4.0")
override def setCheckpointInterval(value: Int): this.type = super.setCheckpointInterval(value) override def setCheckpointInterval(value: Int): this.type = super.setCheckpointInterval(value)
@Since("1.4.0")
override def setImpurity(value: String): this.type = super.setImpurity(value) override def setImpurity(value: String): this.type = super.setImpurity(value)
override protected def train(dataset: DataFrame): DecisionTreeRegressionModel = { override protected def train(dataset: DataFrame): DecisionTreeRegressionModel = {
...@@ -78,9 +87,11 @@ final class DecisionTreeRegressor(override val uid: String) ...@@ -78,9 +87,11 @@ final class DecisionTreeRegressor(override val uid: String)
subsamplingRate = 1.0) subsamplingRate = 1.0)
} }
@Since("1.4.0")
override def copy(extra: ParamMap): DecisionTreeRegressor = defaultCopy(extra) override def copy(extra: ParamMap): DecisionTreeRegressor = defaultCopy(extra)
} }
@Since("1.4.0")
@Experimental @Experimental
object DecisionTreeRegressor { object DecisionTreeRegressor {
/** Accessor for supported impurities: variance */ /** Accessor for supported impurities: variance */
...@@ -93,6 +104,7 @@ object DecisionTreeRegressor { ...@@ -93,6 +104,7 @@ object DecisionTreeRegressor {
* It supports both continuous and categorical features. * It supports both continuous and categorical features.
* @param rootNode Root of the decision tree * @param rootNode Root of the decision tree
*/ */
@Since("1.4.0")
@Experimental @Experimental
final class DecisionTreeRegressionModel private[ml] ( final class DecisionTreeRegressionModel private[ml] (
override val uid: String, override val uid: String,
...@@ -115,10 +127,12 @@ final class DecisionTreeRegressionModel private[ml] ( ...@@ -115,10 +127,12 @@ final class DecisionTreeRegressionModel private[ml] (
rootNode.predictImpl(features).prediction rootNode.predictImpl(features).prediction
} }
@Since("1.4.0")
override def copy(extra: ParamMap): DecisionTreeRegressionModel = { override def copy(extra: ParamMap): DecisionTreeRegressionModel = {
copyValues(new DecisionTreeRegressionModel(uid, rootNode, numFeatures), extra).setParent(parent) copyValues(new DecisionTreeRegressionModel(uid, rootNode, numFeatures), extra).setParent(parent)
} }
@Since("1.4.0")
override def toString: String = { override def toString: String = {
s"DecisionTreeRegressionModel (uid=$uid) of depth $depth with $numNodes nodes" s"DecisionTreeRegressionModel (uid=$uid) of depth $depth with $numNodes nodes"
} }
......
...@@ -20,7 +20,7 @@ package org.apache.spark.ml.regression ...@@ -20,7 +20,7 @@ package org.apache.spark.ml.regression
import com.github.fommil.netlib.BLAS.{getInstance => blas} import com.github.fommil.netlib.BLAS.{getInstance => blas}
import org.apache.spark.Logging import org.apache.spark.Logging
import org.apache.spark.annotation.Experimental import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.ml.{PredictionModel, Predictor} import org.apache.spark.ml.{PredictionModel, Predictor}
import org.apache.spark.ml.param.{Param, ParamMap} import org.apache.spark.ml.param.{Param, ParamMap}
import org.apache.spark.ml.tree.{DecisionTreeModel, GBTParams, TreeEnsembleModel, TreeRegressorParams} import org.apache.spark.ml.tree.{DecisionTreeModel, GBTParams, TreeEnsembleModel, TreeRegressorParams}
...@@ -42,54 +42,65 @@ import org.apache.spark.sql.types.DoubleType ...@@ -42,54 +42,65 @@ import org.apache.spark.sql.types.DoubleType
* learning algorithm for regression. * learning algorithm for regression.
* It supports both continuous and categorical features. * It supports both continuous and categorical features.
*/ */
@Since("1.4.0")
@Experimental @Experimental
final class GBTRegressor(override val uid: String) final class GBTRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends Predictor[Vector, GBTRegressor, GBTRegressionModel] extends Predictor[Vector, GBTRegressor, GBTRegressionModel]
with GBTParams with TreeRegressorParams with Logging { with GBTParams with TreeRegressorParams with Logging {
@Since("1.4.0")
def this() = this(Identifiable.randomUID("gbtr")) def this() = this(Identifiable.randomUID("gbtr"))
// Override parameter setters from parent trait for Java API compatibility. // Override parameter setters from parent trait for Java API compatibility.
// Parameters from TreeRegressorParams: // Parameters from TreeRegressorParams:
@Since("1.4.0")
override def setMaxDepth(value: Int): this.type = super.setMaxDepth(value) override def setMaxDepth(value: Int): this.type = super.setMaxDepth(value)
@Since("1.4.0")
override def setMaxBins(value: Int): this.type = super.setMaxBins(value) override def setMaxBins(value: Int): this.type = super.setMaxBins(value)
@Since("1.4.0")
override def setMinInstancesPerNode(value: Int): this.type = override def setMinInstancesPerNode(value: Int): this.type =
super.setMinInstancesPerNode(value) super.setMinInstancesPerNode(value)
@Since("1.4.0")
override def setMinInfoGain(value: Double): this.type = super.setMinInfoGain(value) override def setMinInfoGain(value: Double): this.type = super.setMinInfoGain(value)
@Since("1.4.0")
override def setMaxMemoryInMB(value: Int): this.type = super.setMaxMemoryInMB(value) override def setMaxMemoryInMB(value: Int): this.type = super.setMaxMemoryInMB(value)
@Since("1.4.0")
override def setCacheNodeIds(value: Boolean): this.type = super.setCacheNodeIds(value) override def setCacheNodeIds(value: Boolean): this.type = super.setCacheNodeIds(value)
@Since("1.4.0")
override def setCheckpointInterval(value: Int): this.type = super.setCheckpointInterval(value) override def setCheckpointInterval(value: Int): this.type = super.setCheckpointInterval(value)
/** /**
* The impurity setting is ignored for GBT models. * The impurity setting is ignored for GBT models.
* Individual trees are built using impurity "Variance." * Individual trees are built using impurity "Variance."
*/ */
@Since("1.4.0")
override def setImpurity(value: String): this.type = { override def setImpurity(value: String): this.type = {
logWarning("GBTRegressor.setImpurity should NOT be used") logWarning("GBTRegressor.setImpurity should NOT be used")
this this
} }
// Parameters from TreeEnsembleParams: // Parameters from TreeEnsembleParams:
@Since("1.4.0")
override def setSubsamplingRate(value: Double): this.type = super.setSubsamplingRate(value) override def setSubsamplingRate(value: Double): this.type = super.setSubsamplingRate(value)
@Since("1.4.0")
override def setSeed(value: Long): this.type = { override def setSeed(value: Long): this.type = {
logWarning("The 'seed' parameter is currently ignored by Gradient Boosting.") logWarning("The 'seed' parameter is currently ignored by Gradient Boosting.")
super.setSeed(value) super.setSeed(value)
} }
// Parameters from GBTParams: // Parameters from GBTParams:
@Since("1.4.0")
override def setMaxIter(value: Int): this.type = super.setMaxIter(value) override def setMaxIter(value: Int): this.type = super.setMaxIter(value)
@Since("1.4.0")
override def setStepSize(value: Double): this.type = super.setStepSize(value) override def setStepSize(value: Double): this.type = super.setStepSize(value)
// Parameters for GBTRegressor: // Parameters for GBTRegressor:
...@@ -100,6 +111,7 @@ final class GBTRegressor(override val uid: String) ...@@ -100,6 +111,7 @@ final class GBTRegressor(override val uid: String)
* (default = squared) * (default = squared)
* @group param * @group param
*/ */
@Since("1.4.0")
val lossType: Param[String] = new Param[String](this, "lossType", "Loss function which GBT" + val lossType: Param[String] = new Param[String](this, "lossType", "Loss function which GBT" +
" tries to minimize (case-insensitive). Supported options:" + " tries to minimize (case-insensitive). Supported options:" +
s" ${GBTRegressor.supportedLossTypes.mkString(", ")}", s" ${GBTRegressor.supportedLossTypes.mkString(", ")}",
...@@ -108,9 +120,11 @@ final class GBTRegressor(override val uid: String) ...@@ -108,9 +120,11 @@ final class GBTRegressor(override val uid: String)
setDefault(lossType -> "squared") setDefault(lossType -> "squared")
/** @group setParam */ /** @group setParam */
@Since("1.4.0")
def setLossType(value: String): this.type = set(lossType, value) def setLossType(value: String): this.type = set(lossType, value)
/** @group getParam */ /** @group getParam */
@Since("1.4.0")
def getLossType: String = $(lossType).toLowerCase def getLossType: String = $(lossType).toLowerCase
/** (private[ml]) Convert new loss to old loss. */ /** (private[ml]) Convert new loss to old loss. */
...@@ -135,13 +149,16 @@ final class GBTRegressor(override val uid: String) ...@@ -135,13 +149,16 @@ final class GBTRegressor(override val uid: String)
GBTRegressionModel.fromOld(oldModel, this, categoricalFeatures, numFeatures) GBTRegressionModel.fromOld(oldModel, this, categoricalFeatures, numFeatures)
} }
@Since("1.4.0")
override def copy(extra: ParamMap): GBTRegressor = defaultCopy(extra) override def copy(extra: ParamMap): GBTRegressor = defaultCopy(extra)
} }
@Since("1.4.0")
@Experimental @Experimental
object GBTRegressor { object GBTRegressor {
// The losses below should be lowercase. // The losses below should be lowercase.
/** Accessor for supported loss settings: squared (L2), absolute (L1) */ /** Accessor for supported loss settings: squared (L2), absolute (L1) */
@Since("1.4.0")
final val supportedLossTypes: Array[String] = Array("squared", "absolute").map(_.toLowerCase) final val supportedLossTypes: Array[String] = Array("squared", "absolute").map(_.toLowerCase)
} }
...@@ -154,6 +171,7 @@ object GBTRegressor { ...@@ -154,6 +171,7 @@ object GBTRegressor {
* @param _trees Decision trees in the ensemble. * @param _trees Decision trees in the ensemble.
* @param _treeWeights Weights for the decision trees in the ensemble. * @param _treeWeights Weights for the decision trees in the ensemble.
*/ */
@Since("1.4.0")
@Experimental @Experimental
final class GBTRegressionModel private[ml]( final class GBTRegressionModel private[ml](
override val uid: String, override val uid: String,
...@@ -172,11 +190,14 @@ final class GBTRegressionModel private[ml]( ...@@ -172,11 +190,14 @@ final class GBTRegressionModel private[ml](
* @param _trees Decision trees in the ensemble. * @param _trees Decision trees in the ensemble.
* @param _treeWeights Weights for the decision trees in the ensemble. * @param _treeWeights Weights for the decision trees in the ensemble.
*/ */
@Since("1.4.0")
def this(uid: String, _trees: Array[DecisionTreeRegressionModel], _treeWeights: Array[Double]) = def this(uid: String, _trees: Array[DecisionTreeRegressionModel], _treeWeights: Array[Double]) =
this(uid, _trees, _treeWeights, -1) this(uid, _trees, _treeWeights, -1)
@Since("1.4.0")
override def trees: Array[DecisionTreeModel] = _trees.asInstanceOf[Array[DecisionTreeModel]] override def trees: Array[DecisionTreeModel] = _trees.asInstanceOf[Array[DecisionTreeModel]]
@Since("1.4.0")
override def treeWeights: Array[Double] = _treeWeights override def treeWeights: Array[Double] = _treeWeights
override protected def transformImpl(dataset: DataFrame): DataFrame = { override protected def transformImpl(dataset: DataFrame): DataFrame = {
...@@ -194,11 +215,13 @@ final class GBTRegressionModel private[ml]( ...@@ -194,11 +215,13 @@ final class GBTRegressionModel private[ml](
blas.ddot(numTrees, treePredictions, 1, _treeWeights, 1) blas.ddot(numTrees, treePredictions, 1, _treeWeights, 1)
} }
@Since("1.4.0")
override def copy(extra: ParamMap): GBTRegressionModel = { override def copy(extra: ParamMap): GBTRegressionModel = {
copyValues(new GBTRegressionModel(uid, _trees, _treeWeights, numFeatures), copyValues(new GBTRegressionModel(uid, _trees, _treeWeights, numFeatures),
extra).setParent(parent) extra).setParent(parent)
} }
@Since("1.4.0")
override def toString: String = { override def toString: String = {
s"GBTRegressionModel (uid=$uid) with $numTrees trees" s"GBTRegressionModel (uid=$uid) with $numTrees trees"
} }
......
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
package org.apache.spark.ml.regression package org.apache.spark.ml.regression
import org.apache.spark.Logging import org.apache.spark.Logging
import org.apache.spark.annotation.Experimental import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.{Estimator, Model}
import org.apache.spark.ml.param._ import org.apache.spark.ml.param._
import org.apache.spark.ml.param.shared.{HasFeaturesCol, HasLabelCol, HasPredictionCol, HasWeightCol} import org.apache.spark.ml.param.shared.{HasFeaturesCol, HasLabelCol, HasPredictionCol, HasWeightCol}
...@@ -124,32 +124,42 @@ private[regression] trait IsotonicRegressionBase extends Params with HasFeatures ...@@ -124,32 +124,42 @@ private[regression] trait IsotonicRegressionBase extends Params with HasFeatures
* *
* Uses [[org.apache.spark.mllib.regression.IsotonicRegression]]. * Uses [[org.apache.spark.mllib.regression.IsotonicRegression]].
*/ */
@Since("1.5.0")
@Experimental @Experimental
class IsotonicRegression(override val uid: String) extends Estimator[IsotonicRegressionModel] class IsotonicRegression @Since("1.5.0") (@Since("1.5.0") override val uid: String)
with IsotonicRegressionBase { extends Estimator[IsotonicRegressionModel] with IsotonicRegressionBase {
@Since("1.5.0")
def this() = this(Identifiable.randomUID("isoReg")) def this() = this(Identifiable.randomUID("isoReg"))
/** @group setParam */ /** @group setParam */
@Since("1.5.0")
def setLabelCol(value: String): this.type = set(labelCol, value) def setLabelCol(value: String): this.type = set(labelCol, value)
/** @group setParam */ /** @group setParam */
@Since("1.5.0")
def setFeaturesCol(value: String): this.type = set(featuresCol, value) def setFeaturesCol(value: String): this.type = set(featuresCol, value)
/** @group setParam */ /** @group setParam */
@Since("1.5.0")
def setPredictionCol(value: String): this.type = set(predictionCol, value) def setPredictionCol(value: String): this.type = set(predictionCol, value)
/** @group setParam */ /** @group setParam */
@Since("1.5.0")
def setIsotonic(value: Boolean): this.type = set(isotonic, value) def setIsotonic(value: Boolean): this.type = set(isotonic, value)
/** @group setParam */ /** @group setParam */
@Since("1.5.0")
def setWeightCol(value: String): this.type = set(weightCol, value) def setWeightCol(value: String): this.type = set(weightCol, value)
/** @group setParam */ /** @group setParam */
@Since("1.5.0")
def setFeatureIndex(value: Int): this.type = set(featureIndex, value) def setFeatureIndex(value: Int): this.type = set(featureIndex, value)
@Since("1.5.0")
override def copy(extra: ParamMap): IsotonicRegression = defaultCopy(extra) override def copy(extra: ParamMap): IsotonicRegression = defaultCopy(extra)
@Since("1.5.0")
override def fit(dataset: DataFrame): IsotonicRegressionModel = { override def fit(dataset: DataFrame): IsotonicRegressionModel = {
validateAndTransformSchema(dataset.schema, fitting = true) validateAndTransformSchema(dataset.schema, fitting = true)
// Extract columns from data. If dataset is persisted, do not persist oldDataset. // Extract columns from data. If dataset is persisted, do not persist oldDataset.
...@@ -163,6 +173,7 @@ class IsotonicRegression(override val uid: String) extends Estimator[IsotonicReg ...@@ -163,6 +173,7 @@ class IsotonicRegression(override val uid: String) extends Estimator[IsotonicReg
copyValues(new IsotonicRegressionModel(uid, oldModel).setParent(this)) copyValues(new IsotonicRegressionModel(uid, oldModel).setParent(this))
} }
@Since("1.5.0")
override def transformSchema(schema: StructType): StructType = { override def transformSchema(schema: StructType): StructType = {
validateAndTransformSchema(schema, fitting = true) validateAndTransformSchema(schema, fitting = true)
} }
...@@ -178,6 +189,7 @@ class IsotonicRegression(override val uid: String) extends Estimator[IsotonicReg ...@@ -178,6 +189,7 @@ class IsotonicRegression(override val uid: String) extends Estimator[IsotonicReg
* @param oldModel A [[org.apache.spark.mllib.regression.IsotonicRegressionModel]] * @param oldModel A [[org.apache.spark.mllib.regression.IsotonicRegressionModel]]
* model trained by [[org.apache.spark.mllib.regression.IsotonicRegression]]. * model trained by [[org.apache.spark.mllib.regression.IsotonicRegression]].
*/ */
@Since("1.5.0")
@Experimental @Experimental
class IsotonicRegressionModel private[ml] ( class IsotonicRegressionModel private[ml] (
override val uid: String, override val uid: String,
...@@ -185,27 +197,34 @@ class IsotonicRegressionModel private[ml] ( ...@@ -185,27 +197,34 @@ class IsotonicRegressionModel private[ml] (
extends Model[IsotonicRegressionModel] with IsotonicRegressionBase { extends Model[IsotonicRegressionModel] with IsotonicRegressionBase {
/** @group setParam */ /** @group setParam */
@Since("1.5.0")
def setFeaturesCol(value: String): this.type = set(featuresCol, value) def setFeaturesCol(value: String): this.type = set(featuresCol, value)
/** @group setParam */ /** @group setParam */
@Since("1.5.0")
def setPredictionCol(value: String): this.type = set(predictionCol, value) def setPredictionCol(value: String): this.type = set(predictionCol, value)
/** @group setParam */ /** @group setParam */
@Since("1.5.0")
def setFeatureIndex(value: Int): this.type = set(featureIndex, value) def setFeatureIndex(value: Int): this.type = set(featureIndex, value)
/** Boundaries in increasing order for which predictions are known. */ /** Boundaries in increasing order for which predictions are known. */
@Since("1.5.0")
def boundaries: Vector = Vectors.dense(oldModel.boundaries) def boundaries: Vector = Vectors.dense(oldModel.boundaries)
/** /**
* Predictions associated with the boundaries at the same index, monotone because of isotonic * Predictions associated with the boundaries at the same index, monotone because of isotonic
* regression. * regression.
*/ */
@Since("1.5.0")
def predictions: Vector = Vectors.dense(oldModel.predictions) def predictions: Vector = Vectors.dense(oldModel.predictions)
@Since("1.5.0")
override def copy(extra: ParamMap): IsotonicRegressionModel = { override def copy(extra: ParamMap): IsotonicRegressionModel = {
copyValues(new IsotonicRegressionModel(uid, oldModel), extra).setParent(parent) copyValues(new IsotonicRegressionModel(uid, oldModel), extra).setParent(parent)
} }
@Since("1.5.0")
override def transform(dataset: DataFrame): DataFrame = { override def transform(dataset: DataFrame): DataFrame = {
val predict = dataset.schema($(featuresCol)).dataType match { val predict = dataset.schema($(featuresCol)).dataType match {
case DoubleType => case DoubleType =>
...@@ -217,6 +236,7 @@ class IsotonicRegressionModel private[ml] ( ...@@ -217,6 +236,7 @@ class IsotonicRegressionModel private[ml] (
dataset.withColumn($(predictionCol), predict(col($(featuresCol)))) dataset.withColumn($(predictionCol), predict(col($(featuresCol))))
} }
@Since("1.5.0")
override def transformSchema(schema: StructType): StructType = { override def transformSchema(schema: StructType): StructType = {
validateAndTransformSchema(schema, fitting = false) validateAndTransformSchema(schema, fitting = false)
} }
......
...@@ -24,9 +24,9 @@ import breeze.optimize.{CachedDiffFunction, DiffFunction, LBFGS => BreezeLBFGS, ...@@ -24,9 +24,9 @@ import breeze.optimize.{CachedDiffFunction, DiffFunction, LBFGS => BreezeLBFGS,
import breeze.stats.distributions.StudentsT import breeze.stats.distributions.StudentsT
import org.apache.spark.{Logging, SparkException} import org.apache.spark.{Logging, SparkException}
import org.apache.spark.annotation.Experimental
import org.apache.spark.ml.feature.Instance import org.apache.spark.ml.feature.Instance
import org.apache.spark.ml.optim.WeightedLeastSquares import org.apache.spark.ml.optim.WeightedLeastSquares
import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.ml.PredictorParams import org.apache.spark.ml.PredictorParams
import org.apache.spark.ml.param.ParamMap import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.param.shared._ import org.apache.spark.ml.param.shared._
...@@ -61,11 +61,13 @@ private[regression] trait LinearRegressionParams extends PredictorParams ...@@ -61,11 +61,13 @@ private[regression] trait LinearRegressionParams extends PredictorParams
* - L1 (Lasso) * - L1 (Lasso)
* - L2 + L1 (elastic net) * - L2 + L1 (elastic net)
*/ */
@Since("1.3.0")
@Experimental @Experimental
class LinearRegression(override val uid: String) class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String)
extends Regressor[Vector, LinearRegression, LinearRegressionModel] extends Regressor[Vector, LinearRegression, LinearRegressionModel]
with LinearRegressionParams with Logging { with LinearRegressionParams with Logging {
@Since("1.4.0")
def this() = this(Identifiable.randomUID("linReg")) def this() = this(Identifiable.randomUID("linReg"))
/** /**
...@@ -73,6 +75,7 @@ class LinearRegression(override val uid: String) ...@@ -73,6 +75,7 @@ class LinearRegression(override val uid: String)
* Default is 0.0. * Default is 0.0.
* @group setParam * @group setParam
*/ */
@Since("1.3.0")
def setRegParam(value: Double): this.type = set(regParam, value) def setRegParam(value: Double): this.type = set(regParam, value)
setDefault(regParam -> 0.0) setDefault(regParam -> 0.0)
...@@ -81,6 +84,7 @@ class LinearRegression(override val uid: String) ...@@ -81,6 +84,7 @@ class LinearRegression(override val uid: String)
* Default is true. * Default is true.
* @group setParam * @group setParam
*/ */
@Since("1.5.0")
def setFitIntercept(value: Boolean): this.type = set(fitIntercept, value) def setFitIntercept(value: Boolean): this.type = set(fitIntercept, value)
setDefault(fitIntercept -> true) setDefault(fitIntercept -> true)
...@@ -93,6 +97,7 @@ class LinearRegression(override val uid: String) ...@@ -93,6 +97,7 @@ class LinearRegression(override val uid: String)
* Default is true. * Default is true.
* @group setParam * @group setParam
*/ */
@Since("1.5.0")
def setStandardization(value: Boolean): this.type = set(standardization, value) def setStandardization(value: Boolean): this.type = set(standardization, value)
setDefault(standardization -> true) setDefault(standardization -> true)
...@@ -103,6 +108,7 @@ class LinearRegression(override val uid: String) ...@@ -103,6 +108,7 @@ class LinearRegression(override val uid: String)
* Default is 0.0 which is an L2 penalty. * Default is 0.0 which is an L2 penalty.
* @group setParam * @group setParam
*/ */
@Since("1.4.0")
def setElasticNetParam(value: Double): this.type = set(elasticNetParam, value) def setElasticNetParam(value: Double): this.type = set(elasticNetParam, value)
setDefault(elasticNetParam -> 0.0) setDefault(elasticNetParam -> 0.0)
...@@ -111,6 +117,7 @@ class LinearRegression(override val uid: String) ...@@ -111,6 +117,7 @@ class LinearRegression(override val uid: String)
* Default is 100. * Default is 100.
* @group setParam * @group setParam
*/ */
@Since("1.3.0")
def setMaxIter(value: Int): this.type = set(maxIter, value) def setMaxIter(value: Int): this.type = set(maxIter, value)
setDefault(maxIter -> 100) setDefault(maxIter -> 100)
...@@ -120,6 +127,7 @@ class LinearRegression(override val uid: String) ...@@ -120,6 +127,7 @@ class LinearRegression(override val uid: String)
* Default is 1E-6. * Default is 1E-6.
* @group setParam * @group setParam
*/ */
@Since("1.4.0")
def setTol(value: Double): this.type = set(tol, value) def setTol(value: Double): this.type = set(tol, value)
setDefault(tol -> 1E-6) setDefault(tol -> 1E-6)
...@@ -129,6 +137,7 @@ class LinearRegression(override val uid: String) ...@@ -129,6 +137,7 @@ class LinearRegression(override val uid: String)
* Default is empty, so all instances have weight one. * Default is empty, so all instances have weight one.
* @group setParam * @group setParam
*/ */
@Since("1.6.0")
def setWeightCol(value: String): this.type = set(weightCol, value) def setWeightCol(value: String): this.type = set(weightCol, value)
setDefault(weightCol -> "") setDefault(weightCol -> "")
...@@ -139,6 +148,7 @@ class LinearRegression(override val uid: String) ...@@ -139,6 +148,7 @@ class LinearRegression(override val uid: String)
* selected automatically. * selected automatically.
* @group setParam * @group setParam
*/ */
@Since("1.6.0")
def setSolver(value: String): this.type = set(solver, value) def setSolver(value: String): this.type = set(solver, value)
setDefault(solver -> "auto") setDefault(solver -> "auto")
...@@ -329,6 +339,7 @@ class LinearRegression(override val uid: String) ...@@ -329,6 +339,7 @@ class LinearRegression(override val uid: String)
model.setSummary(trainingSummary) model.setSummary(trainingSummary)
} }
@Since("1.4.0")
override def copy(extra: ParamMap): LinearRegression = defaultCopy(extra) override def copy(extra: ParamMap): LinearRegression = defaultCopy(extra)
} }
...@@ -336,6 +347,7 @@ class LinearRegression(override val uid: String) ...@@ -336,6 +347,7 @@ class LinearRegression(override val uid: String)
* :: Experimental :: * :: Experimental ::
* Model produced by [[LinearRegression]]. * Model produced by [[LinearRegression]].
*/ */
@Since("1.3.0")
@Experimental @Experimental
class LinearRegressionModel private[ml] ( class LinearRegressionModel private[ml] (
override val uid: String, override val uid: String,
...@@ -355,6 +367,7 @@ class LinearRegressionModel private[ml] ( ...@@ -355,6 +367,7 @@ class LinearRegressionModel private[ml] (
* Gets summary (e.g. residuals, mse, r-squared ) of model on training set. An exception is * Gets summary (e.g. residuals, mse, r-squared ) of model on training set. An exception is
* thrown if `trainingSummary == None`. * thrown if `trainingSummary == None`.
*/ */
@Since("1.5.0")
def summary: LinearRegressionTrainingSummary = trainingSummary match { def summary: LinearRegressionTrainingSummary = trainingSummary match {
case Some(summ) => summ case Some(summ) => summ
case None => case None =>
...@@ -369,6 +382,7 @@ class LinearRegressionModel private[ml] ( ...@@ -369,6 +382,7 @@ class LinearRegressionModel private[ml] (
} }
/** Indicates whether a training summary exists for this model instance. */ /** Indicates whether a training summary exists for this model instance. */
@Since("1.5.0")
def hasSummary: Boolean = trainingSummary.isDefined def hasSummary: Boolean = trainingSummary.isDefined
/** /**
...@@ -402,6 +416,7 @@ class LinearRegressionModel private[ml] ( ...@@ -402,6 +416,7 @@ class LinearRegressionModel private[ml] (
dot(features, coefficients) + intercept dot(features, coefficients) + intercept
} }
@Since("1.4.0")
override def copy(extra: ParamMap): LinearRegressionModel = { override def copy(extra: ParamMap): LinearRegressionModel = {
val newModel = copyValues(new LinearRegressionModel(uid, coefficients, intercept), extra) val newModel = copyValues(new LinearRegressionModel(uid, coefficients, intercept), extra)
if (trainingSummary.isDefined) newModel.setSummary(trainingSummary.get) if (trainingSummary.isDefined) newModel.setSummary(trainingSummary.get)
...@@ -416,6 +431,7 @@ class LinearRegressionModel private[ml] ( ...@@ -416,6 +431,7 @@ class LinearRegressionModel private[ml] (
* @param predictions predictions outputted by the model's `transform` method. * @param predictions predictions outputted by the model's `transform` method.
* @param objectiveHistory objective function (scaled loss + regularization) at each iteration. * @param objectiveHistory objective function (scaled loss + regularization) at each iteration.
*/ */
@Since("1.5.0")
@Experimental @Experimental
class LinearRegressionTrainingSummary private[regression] ( class LinearRegressionTrainingSummary private[regression] (
predictions: DataFrame, predictions: DataFrame,
...@@ -428,6 +444,7 @@ class LinearRegressionTrainingSummary private[regression] ( ...@@ -428,6 +444,7 @@ class LinearRegressionTrainingSummary private[regression] (
extends LinearRegressionSummary(predictions, predictionCol, labelCol, model, diagInvAtWA) { extends LinearRegressionSummary(predictions, predictionCol, labelCol, model, diagInvAtWA) {
/** Number of training iterations until termination */ /** Number of training iterations until termination */
@Since("1.5.0")
val totalIterations = objectiveHistory.length val totalIterations = objectiveHistory.length
} }
...@@ -437,6 +454,7 @@ class LinearRegressionTrainingSummary private[regression] ( ...@@ -437,6 +454,7 @@ class LinearRegressionTrainingSummary private[regression] (
* Linear regression results evaluated on a dataset. * Linear regression results evaluated on a dataset.
* @param predictions predictions outputted by the model's `transform` method. * @param predictions predictions outputted by the model's `transform` method.
*/ */
@Since("1.5.0")
@Experimental @Experimental
class LinearRegressionSummary private[regression] ( class LinearRegressionSummary private[regression] (
@transient val predictions: DataFrame, @transient val predictions: DataFrame,
...@@ -455,33 +473,39 @@ class LinearRegressionSummary private[regression] ( ...@@ -455,33 +473,39 @@ class LinearRegressionSummary private[regression] (
* explainedVariance = 1 - variance(y - \hat{y}) / variance(y) * explainedVariance = 1 - variance(y - \hat{y}) / variance(y)
* Reference: [[http://en.wikipedia.org/wiki/Explained_variation]] * Reference: [[http://en.wikipedia.org/wiki/Explained_variation]]
*/ */
@Since("1.5.0")
val explainedVariance: Double = metrics.explainedVariance val explainedVariance: Double = metrics.explainedVariance
/** /**
* Returns the mean absolute error, which is a risk function corresponding to the * Returns the mean absolute error, which is a risk function corresponding to the
* expected value of the absolute error loss or l1-norm loss. * expected value of the absolute error loss or l1-norm loss.
*/ */
@Since("1.5.0")
val meanAbsoluteError: Double = metrics.meanAbsoluteError val meanAbsoluteError: Double = metrics.meanAbsoluteError
/** /**
* Returns the mean squared error, which is a risk function corresponding to the * Returns the mean squared error, which is a risk function corresponding to the
* expected value of the squared error loss or quadratic loss. * expected value of the squared error loss or quadratic loss.
*/ */
@Since("1.5.0")
val meanSquaredError: Double = metrics.meanSquaredError val meanSquaredError: Double = metrics.meanSquaredError
/** /**
* Returns the root mean squared error, which is defined as the square root of * Returns the root mean squared error, which is defined as the square root of
* the mean squared error. * the mean squared error.
*/ */
@Since("1.5.0")
val rootMeanSquaredError: Double = metrics.rootMeanSquaredError val rootMeanSquaredError: Double = metrics.rootMeanSquaredError
/** /**
* Returns R^2^, the coefficient of determination. * Returns R^2^, the coefficient of determination.
* Reference: [[http://en.wikipedia.org/wiki/Coefficient_of_determination]] * Reference: [[http://en.wikipedia.org/wiki/Coefficient_of_determination]]
*/ */
@Since("1.5.0")
val r2: Double = metrics.r2 val r2: Double = metrics.r2
/** Residuals (label - predicted value) */ /** Residuals (label - predicted value) */
@Since("1.5.0")
@transient lazy val residuals: DataFrame = { @transient lazy val residuals: DataFrame = {
val t = udf { (pred: Double, label: Double) => label - pred } val t = udf { (pred: Double, label: Double) => label - pred }
predictions.select(t(col(predictionCol), col(labelCol)).as("residuals")) predictions.select(t(col(predictionCol), col(labelCol)).as("residuals"))
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
package org.apache.spark.ml.regression package org.apache.spark.ml.regression
import org.apache.spark.annotation.Experimental import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.ml.{PredictionModel, Predictor} import org.apache.spark.ml.{PredictionModel, Predictor}
import org.apache.spark.ml.param.ParamMap import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.tree.{DecisionTreeModel, RandomForestParams, TreeEnsembleModel, TreeRegressorParams} import org.apache.spark.ml.tree.{DecisionTreeModel, RandomForestParams, TreeEnsembleModel, TreeRegressorParams}
...@@ -37,44 +37,55 @@ import org.apache.spark.sql.functions._ ...@@ -37,44 +37,55 @@ import org.apache.spark.sql.functions._
* [[http://en.wikipedia.org/wiki/Random_forest Random Forest]] learning algorithm for regression. * [[http://en.wikipedia.org/wiki/Random_forest Random Forest]] learning algorithm for regression.
* It supports both continuous and categorical features. * It supports both continuous and categorical features.
*/ */
@Since("1.4.0")
@Experimental @Experimental
final class RandomForestRegressor(override val uid: String) final class RandomForestRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends Predictor[Vector, RandomForestRegressor, RandomForestRegressionModel] extends Predictor[Vector, RandomForestRegressor, RandomForestRegressionModel]
with RandomForestParams with TreeRegressorParams { with RandomForestParams with TreeRegressorParams {
@Since("1.4.0")
def this() = this(Identifiable.randomUID("rfr")) def this() = this(Identifiable.randomUID("rfr"))
// Override parameter setters from parent trait for Java API compatibility. // Override parameter setters from parent trait for Java API compatibility.
// Parameters from TreeRegressorParams: // Parameters from TreeRegressorParams:
@Since("1.4.0")
override def setMaxDepth(value: Int): this.type = super.setMaxDepth(value) override def setMaxDepth(value: Int): this.type = super.setMaxDepth(value)
@Since("1.4.0")
override def setMaxBins(value: Int): this.type = super.setMaxBins(value) override def setMaxBins(value: Int): this.type = super.setMaxBins(value)
@Since("1.4.0")
override def setMinInstancesPerNode(value: Int): this.type = override def setMinInstancesPerNode(value: Int): this.type =
super.setMinInstancesPerNode(value) super.setMinInstancesPerNode(value)
@Since("1.4.0")
override def setMinInfoGain(value: Double): this.type = super.setMinInfoGain(value) override def setMinInfoGain(value: Double): this.type = super.setMinInfoGain(value)
@Since("1.4.0")
override def setMaxMemoryInMB(value: Int): this.type = super.setMaxMemoryInMB(value) override def setMaxMemoryInMB(value: Int): this.type = super.setMaxMemoryInMB(value)
@Since("1.4.0")
override def setCacheNodeIds(value: Boolean): this.type = super.setCacheNodeIds(value) override def setCacheNodeIds(value: Boolean): this.type = super.setCacheNodeIds(value)
@Since("1.4.0")
override def setCheckpointInterval(value: Int): this.type = super.setCheckpointInterval(value) override def setCheckpointInterval(value: Int): this.type = super.setCheckpointInterval(value)
@Since("1.4.0")
override def setImpurity(value: String): this.type = super.setImpurity(value) override def setImpurity(value: String): this.type = super.setImpurity(value)
// Parameters from TreeEnsembleParams: // Parameters from TreeEnsembleParams:
@Since("1.4.0")
override def setSubsamplingRate(value: Double): this.type = super.setSubsamplingRate(value) override def setSubsamplingRate(value: Double): this.type = super.setSubsamplingRate(value)
@Since("1.4.0")
override def setSeed(value: Long): this.type = super.setSeed(value) override def setSeed(value: Long): this.type = super.setSeed(value)
// Parameters from RandomForestParams: // Parameters from RandomForestParams:
@Since("1.4.0")
override def setNumTrees(value: Int): this.type = super.setNumTrees(value) override def setNumTrees(value: Int): this.type = super.setNumTrees(value)
@Since("1.4.0")
override def setFeatureSubsetStrategy(value: String): this.type = override def setFeatureSubsetStrategy(value: String): this.type =
super.setFeatureSubsetStrategy(value) super.setFeatureSubsetStrategy(value)
...@@ -91,15 +102,19 @@ final class RandomForestRegressor(override val uid: String) ...@@ -91,15 +102,19 @@ final class RandomForestRegressor(override val uid: String)
new RandomForestRegressionModel(trees, numFeatures) new RandomForestRegressionModel(trees, numFeatures)
} }
@Since("1.4.0")
override def copy(extra: ParamMap): RandomForestRegressor = defaultCopy(extra) override def copy(extra: ParamMap): RandomForestRegressor = defaultCopy(extra)
} }
@Since("1.4.0")
@Experimental @Experimental
object RandomForestRegressor { object RandomForestRegressor {
/** Accessor for supported impurity settings: variance */ /** Accessor for supported impurity settings: variance */
@Since("1.4.0")
final val supportedImpurities: Array[String] = TreeRegressorParams.supportedImpurities final val supportedImpurities: Array[String] = TreeRegressorParams.supportedImpurities
/** Accessor for supported featureSubsetStrategy settings: auto, all, onethird, sqrt, log2 */ /** Accessor for supported featureSubsetStrategy settings: auto, all, onethird, sqrt, log2 */
@Since("1.4.0")
final val supportedFeatureSubsetStrategies: Array[String] = final val supportedFeatureSubsetStrategies: Array[String] =
RandomForestParams.supportedFeatureSubsetStrategies RandomForestParams.supportedFeatureSubsetStrategies
} }
...@@ -111,6 +126,7 @@ object RandomForestRegressor { ...@@ -111,6 +126,7 @@ object RandomForestRegressor {
* @param _trees Decision trees in the ensemble. * @param _trees Decision trees in the ensemble.
* @param numFeatures Number of features used by this model * @param numFeatures Number of features used by this model
*/ */
@Since("1.4.0")
@Experimental @Experimental
final class RandomForestRegressionModel private[ml] ( final class RandomForestRegressionModel private[ml] (
override val uid: String, override val uid: String,
...@@ -128,11 +144,13 @@ final class RandomForestRegressionModel private[ml] ( ...@@ -128,11 +144,13 @@ final class RandomForestRegressionModel private[ml] (
private[ml] def this(trees: Array[DecisionTreeRegressionModel], numFeatures: Int) = private[ml] def this(trees: Array[DecisionTreeRegressionModel], numFeatures: Int) =
this(Identifiable.randomUID("rfr"), trees, numFeatures) this(Identifiable.randomUID("rfr"), trees, numFeatures)
@Since("1.4.0")
override def trees: Array[DecisionTreeModel] = _trees.asInstanceOf[Array[DecisionTreeModel]] override def trees: Array[DecisionTreeModel] = _trees.asInstanceOf[Array[DecisionTreeModel]]
// Note: We may add support for weights (based on tree performance) later on. // Note: We may add support for weights (based on tree performance) later on.
private lazy val _treeWeights: Array[Double] = Array.fill[Double](numTrees)(1.0) private lazy val _treeWeights: Array[Double] = Array.fill[Double](numTrees)(1.0)
@Since("1.4.0")
override def treeWeights: Array[Double] = _treeWeights override def treeWeights: Array[Double] = _treeWeights
override protected def transformImpl(dataset: DataFrame): DataFrame = { override protected def transformImpl(dataset: DataFrame): DataFrame = {
...@@ -150,10 +168,12 @@ final class RandomForestRegressionModel private[ml] ( ...@@ -150,10 +168,12 @@ final class RandomForestRegressionModel private[ml] (
_trees.map(_.rootNode.predictImpl(features).prediction).sum / numTrees _trees.map(_.rootNode.predictImpl(features).prediction).sum / numTrees
} }
@Since("1.4.0")
override def copy(extra: ParamMap): RandomForestRegressionModel = { override def copy(extra: ParamMap): RandomForestRegressionModel = {
copyValues(new RandomForestRegressionModel(uid, _trees, numFeatures), extra).setParent(parent) copyValues(new RandomForestRegressionModel(uid, _trees, numFeatures), extra).setParent(parent)
} }
@Since("1.4.0")
override def toString: String = { override def toString: String = {
s"RandomForestRegressionModel (uid=$uid) with $numTrees trees" s"RandomForestRegressionModel (uid=$uid) with $numTrees trees"
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment