Skip to content
Snippets Groups Projects
Commit ecfb3e73 authored by lihao's avatar lihao Committed by Xiangrui Meng
Browse files

[SPARK-10286][ML][PYSPARK][DOCS] Add @since annotation to pyspark.ml.param and pyspark.ml.*

Author: lihao <lihaowhu@gmail.com>

Closes #9275 from lidinghao/SPARK-10286.
parent 2804674a
No related branches found
No related tags found
No related merge requests found
......@@ -17,6 +17,7 @@
from abc import abstractmethod, ABCMeta
from pyspark import since
from pyspark.ml.wrapper import JavaWrapper
from pyspark.ml.param import Param, Params
from pyspark.ml.param.shared import HasLabelCol, HasPredictionCol, HasRawPredictionCol
......@@ -31,6 +32,8 @@ __all__ = ['Evaluator', 'BinaryClassificationEvaluator', 'RegressionEvaluator',
class Evaluator(Params):
"""
Base class for evaluators that compute metrics from predictions.
.. versionadded:: 1.4.0
"""
__metaclass__ = ABCMeta
......@@ -46,6 +49,7 @@ class Evaluator(Params):
"""
raise NotImplementedError()
@since("1.4.0")
def evaluate(self, dataset, params=None):
"""
Evaluates the output with optional parameters.
......@@ -66,6 +70,7 @@ class Evaluator(Params):
else:
raise ValueError("Params must be a param map but got %s." % type(params))
@since("1.5.0")
def isLargerBetter(self):
"""
Indicates whether the metric returned by :py:meth:`evaluate` should be maximized
......@@ -114,6 +119,8 @@ class BinaryClassificationEvaluator(JavaEvaluator, HasLabelCol, HasRawPrediction
0.70...
>>> evaluator.evaluate(dataset, {evaluator.metricName: "areaUnderPR"})
0.83...
.. versionadded:: 1.4.0
"""
# a placeholder to make it appear in the generated doc
......@@ -138,6 +145,7 @@ class BinaryClassificationEvaluator(JavaEvaluator, HasLabelCol, HasRawPrediction
kwargs = self.__init__._input_kwargs
self._set(**kwargs)
@since("1.4.0")
def setMetricName(self, value):
"""
Sets the value of :py:attr:`metricName`.
......@@ -145,6 +153,7 @@ class BinaryClassificationEvaluator(JavaEvaluator, HasLabelCol, HasRawPrediction
self._paramMap[self.metricName] = value
return self
@since("1.4.0")
def getMetricName(self):
"""
Gets the value of metricName or its default value.
......@@ -152,6 +161,7 @@ class BinaryClassificationEvaluator(JavaEvaluator, HasLabelCol, HasRawPrediction
return self.getOrDefault(self.metricName)
@keyword_only
@since("1.4.0")
def setParams(self, rawPredictionCol="rawPrediction", labelCol="label",
metricName="areaUnderROC"):
"""
......@@ -180,6 +190,8 @@ class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol):
0.993...
>>> evaluator.evaluate(dataset, {evaluator.metricName: "mae"})
2.649...
.. versionadded:: 1.4.0
"""
# Because we will maximize evaluation value (ref: `CrossValidator`),
# when we evaluate a metric that is needed to minimize (e.g., `"rmse"`, `"mse"`, `"mae"`),
......@@ -205,6 +217,7 @@ class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol):
kwargs = self.__init__._input_kwargs
self._set(**kwargs)
@since("1.4.0")
def setMetricName(self, value):
"""
Sets the value of :py:attr:`metricName`.
......@@ -212,6 +225,7 @@ class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol):
self._paramMap[self.metricName] = value
return self
@since("1.4.0")
def getMetricName(self):
"""
Gets the value of metricName or its default value.
......@@ -219,6 +233,7 @@ class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol):
return self.getOrDefault(self.metricName)
@keyword_only
@since("1.4.0")
def setParams(self, predictionCol="prediction", labelCol="label",
metricName="rmse"):
"""
......@@ -246,6 +261,8 @@ class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictio
0.66...
>>> evaluator.evaluate(dataset, {evaluator.metricName: "recall"})
0.66...
.. versionadded:: 1.5.0
"""
# a placeholder to make it appear in the generated doc
metricName = Param(Params._dummy(), "metricName",
......@@ -271,6 +288,7 @@ class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictio
kwargs = self.__init__._input_kwargs
self._set(**kwargs)
@since("1.5.0")
def setMetricName(self, value):
"""
Sets the value of :py:attr:`metricName`.
......@@ -278,6 +296,7 @@ class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictio
self._paramMap[self.metricName] = value
return self
@since("1.5.0")
def getMetricName(self):
"""
Gets the value of metricName or its default value.
......@@ -285,6 +304,7 @@ class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictio
return self.getOrDefault(self.metricName)
@keyword_only
@since("1.5.0")
def setParams(self, predictionCol="prediction", labelCol="label",
metricName="f1"):
"""
......
This diff is collapsed.
......@@ -18,6 +18,7 @@
from abc import ABCMeta
import copy
from pyspark import since
from pyspark.ml.util import Identifiable
......@@ -27,6 +28,8 @@ __all__ = ['Param', 'Params']
class Param(object):
"""
A param with self-contained documentation.
.. versionadded:: 1.3.0
"""
def __init__(self, parent, name, doc):
......@@ -56,6 +59,8 @@ class Params(Identifiable):
"""
Components that take parameters. This also provides an internal
param map to store parameter values attached to the instance.
.. versionadded:: 1.3.0
"""
__metaclass__ = ABCMeta
......@@ -72,6 +77,7 @@ class Params(Identifiable):
self._params = None
@property
@since("1.3.0")
def params(self):
"""
Returns all params ordered by name. The default implementation
......@@ -83,6 +89,7 @@ class Params(Identifiable):
[getattr(self, x) for x in dir(self) if x != "params"]))
return self._params
@since("1.4.0")
def explainParam(self, param):
"""
Explains a single param and returns its name, doc, and optional
......@@ -100,6 +107,7 @@ class Params(Identifiable):
valueStr = "(" + ", ".join(values) + ")"
return "%s: %s %s" % (param.name, param.doc, valueStr)
@since("1.4.0")
def explainParams(self):
"""
Returns the documentation of all params with their optionally
......@@ -107,6 +115,7 @@ class Params(Identifiable):
"""
return "\n".join([self.explainParam(param) for param in self.params])
@since("1.4.0")
def getParam(self, paramName):
"""
Gets a param by its name.
......@@ -117,6 +126,7 @@ class Params(Identifiable):
else:
raise ValueError("Cannot find param with name %s." % paramName)
@since("1.4.0")
def isSet(self, param):
"""
Checks whether a param is explicitly set by user.
......@@ -124,6 +134,7 @@ class Params(Identifiable):
param = self._resolveParam(param)
return param in self._paramMap
@since("1.4.0")
def hasDefault(self, param):
"""
Checks whether a param has a default value.
......@@ -131,6 +142,7 @@ class Params(Identifiable):
param = self._resolveParam(param)
return param in self._defaultParamMap
@since("1.4.0")
def isDefined(self, param):
"""
Checks whether a param is explicitly set by user or has
......@@ -138,6 +150,7 @@ class Params(Identifiable):
"""
return self.isSet(param) or self.hasDefault(param)
@since("1.4.0")
def hasParam(self, paramName):
"""
Tests whether this instance contains a param with a given
......@@ -146,6 +159,7 @@ class Params(Identifiable):
param = self._resolveParam(paramName)
return param in self.params
@since("1.4.0")
def getOrDefault(self, param):
"""
Gets the value of a param in the user-supplied param map or its
......@@ -157,6 +171,7 @@ class Params(Identifiable):
else:
return self._defaultParamMap[param]
@since("1.4.0")
def extractParamMap(self, extra=None):
"""
Extracts the embedded default param values and user-supplied
......@@ -175,6 +190,7 @@ class Params(Identifiable):
paramMap.update(extra)
return paramMap
@since("1.4.0")
def copy(self, extra=None):
"""
Creates a copy of this instance with the same uid and some
......
......@@ -17,6 +17,7 @@
from abc import ABCMeta, abstractmethod
from pyspark import since
from pyspark.ml.param import Param, Params
from pyspark.ml.util import keyword_only
from pyspark.mllib.common import inherit_doc
......@@ -26,6 +27,8 @@ from pyspark.mllib.common import inherit_doc
class Estimator(Params):
"""
Abstract class for estimators that fit models to data.
.. versionadded:: 1.3.0
"""
__metaclass__ = ABCMeta
......@@ -42,6 +45,7 @@ class Estimator(Params):
"""
raise NotImplementedError()
@since("1.3.0")
def fit(self, dataset, params=None):
"""
Fits a model to the input dataset with optional parameters.
......@@ -73,6 +77,8 @@ class Transformer(Params):
"""
Abstract class for transformers that transform one dataset into
another.
.. versionadded:: 1.3.0
"""
__metaclass__ = ABCMeta
......@@ -88,6 +94,7 @@ class Transformer(Params):
"""
raise NotImplementedError()
@since("1.3.0")
def transform(self, dataset, params=None):
"""
Transforms the input dataset with optional parameters.
......@@ -113,6 +120,8 @@ class Transformer(Params):
class Model(Transformer):
"""
Abstract class for models that are fitted by estimators.
.. versionadded:: 1.4.0
"""
__metaclass__ = ABCMeta
......@@ -136,6 +145,8 @@ class Pipeline(Estimator):
consists of fitted models and transformers, corresponding to the
pipeline stages. If there are no stages, the pipeline acts as an
identity transformer.
.. versionadded:: 1.3.0
"""
@keyword_only
......@@ -151,6 +162,7 @@ class Pipeline(Estimator):
kwargs = self.__init__._input_kwargs
self.setParams(**kwargs)
@since("1.3.0")
def setStages(self, value):
"""
Set pipeline stages.
......@@ -161,6 +173,7 @@ class Pipeline(Estimator):
self._paramMap[self.stages] = value
return self
@since("1.3.0")
def getStages(self):
"""
Get pipeline stages.
......@@ -169,6 +182,7 @@ class Pipeline(Estimator):
return self._paramMap[self.stages]
@keyword_only
@since("1.3.0")
def setParams(self, stages=None):
"""
setParams(self, stages=None)
......@@ -204,7 +218,14 @@ class Pipeline(Estimator):
transformers.append(stage)
return PipelineModel(transformers)
@since("1.4.0")
def copy(self, extra=None):
"""
Creates a copy of this instance.
:param extra: extra parameters
:returns: new instance
"""
if extra is None:
extra = dict()
that = Params.copy(self, extra)
......@@ -216,6 +237,8 @@ class Pipeline(Estimator):
class PipelineModel(Model):
"""
Represents a compiled pipeline with transformers and fitted models.
.. versionadded:: 1.3.0
"""
def __init__(self, stages):
......@@ -227,7 +250,14 @@ class PipelineModel(Model):
dataset = t.transform(dataset)
return dataset
@since("1.4.0")
def copy(self, extra=None):
"""
Creates a copy of this instance.
:param extra: extra parameters
:returns: new instance
"""
if extra is None:
extra = dict()
stages = [stage.copy(extra) for stage in self.stages]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment