Skip to content
Snippets Groups Projects
Commit 3aff0866 authored by Holden Karau's avatar Holden Karau Committed by Joseph K. Bradley
Browse files

[SPARK-9774] [ML] [PYSPARK] Add python api for ml regression isotonicregression

Add the Python API for isotonicregression.

Author: Holden Karau <holden@pigscanfly.ca>

Closes #8214 from holdenk/SPARK-9774-add-python-api-for-ml-regression-isotonicregression.
parent 1bc435ae
No related branches found
No related tags found
No related merge requests found
......@@ -133,7 +133,10 @@ if __name__ == "__main__":
("thresholds", "Thresholds in multi-class classification to adjust the probability of " +
"predicting each class. Array must have length equal to the number of classes, with " +
"values >= 0. The class with largest value p/t is predicted, where p is the original " +
"probability of that class and t is the class' threshold.", None)]
"probability of that class and t is the class' threshold.", None),
("weightCol", "weight column name. If this is not set or empty, we treat " +
"all instance weights as 1.0.", None)]
code = []
for name, doc, defaultValueStr in shared:
param_code = _gen_param_header(name, doc, defaultValueStr)
......
......@@ -570,6 +570,33 @@ class HasThresholds(Params):
return self.getOrDefault(self.thresholds)
class HasWeightCol(Params):
"""
Mixin for param weightCol: weight column name. If this is not set or empty, we treat all instance weights as 1.0..
"""
# a placeholder to make it appear in the generated doc
weightCol = Param(Params._dummy(), "weightCol", "weight column name. If this is not set or empty, we treat all instance weights as 1.0.")
def __init__(self):
super(HasWeightCol, self).__init__()
#: param for weight column name. If this is not set or empty, we treat all instance weights as 1.0.
self.weightCol = Param(self, "weightCol", "weight column name. If this is not set or empty, we treat all instance weights as 1.0.")
def setWeightCol(self, value):
"""
Sets the value of :py:attr:`weightCol`.
"""
self._paramMap[self.weightCol] = value
return self
def getWeightCol(self):
"""
Gets the value of weightCol or its default value.
"""
return self.getOrDefault(self.weightCol)
class DecisionTreeParams(Params):
"""
Mixin for Decision Tree parameters.
......
......@@ -25,6 +25,7 @@ from pyspark.mllib.common import inherit_doc
__all__ = ['AFTSurvivalRegression', 'AFTSurvivalRegressionModel',
'DecisionTreeRegressor', 'DecisionTreeRegressionModel',
'GBTRegressor', 'GBTRegressionModel',
'IsotonicRegression', 'IsotonicRegressionModel',
'LinearRegression', 'LinearRegressionModel',
'RandomForestRegressor', 'RandomForestRegressionModel']
......@@ -142,6 +143,123 @@ class LinearRegressionModel(JavaModel):
return self._call_java("intercept")
@inherit_doc
class IsotonicRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
HasWeightCol):
"""
.. note:: Experimental
Currently implemented using parallelized pool adjacent violators algorithm.
Only univariate (single feature) algorithm supported.
>>> from pyspark.mllib.linalg import Vectors
>>> df = sqlContext.createDataFrame([
... (1.0, Vectors.dense(1.0)),
... (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
>>> ir = IsotonicRegression()
>>> model = ir.fit(df)
>>> test0 = sqlContext.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
>>> model.transform(test0).head().prediction
0.0
>>> model.boundaries
DenseVector([0.0, 1.0])
"""
# a placeholder to make it appear in the generated doc
isotonic = \
Param(Params._dummy(), "isotonic",
"whether the output sequence should be isotonic/increasing (true) or" +
"antitonic/decreasing (false).")
featureIndex = \
Param(Params._dummy(), "featureIndex",
"The index of the feature if featuresCol is a vector column, no effect otherwise.")
@keyword_only
def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
weightCol=None, isotonic=True, featureIndex=0):
"""
__init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
weightCol=None, isotonic=True, featureIndex=0):
"""
super(IsotonicRegression, self).__init__()
self._java_obj = self._new_java_obj(
"org.apache.spark.ml.regression.IsotonicRegression", self.uid)
self.isotonic = \
Param(self, "isotonic",
"whether the output sequence should be isotonic/increasing (true) or" +
"antitonic/decreasing (false).")
self.featureIndex = \
Param(self, "featureIndex",
"The index of the feature if featuresCol is a vector column, no effect " +
"otherwise.")
self._setDefault(isotonic=True, featureIndex=0)
kwargs = self.__init__._input_kwargs
self.setParams(**kwargs)
@keyword_only
def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
weightCol=None, isotonic=True, featureIndex=0):
"""
setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
weightCol=None, isotonic=True, featureIndex=0):
Set the params for IsotonicRegression.
"""
kwargs = self.setParams._input_kwargs
return self._set(**kwargs)
def _create_model(self, java_model):
return IsotonicRegressionModel(java_model)
def setIsotonic(self, value):
"""
Sets the value of :py:attr:`isotonic`.
"""
self._paramMap[self.isotonic] = value
return self
def getIsotonic(self):
"""
Gets the value of isotonic or its default value.
"""
return self.getOrDefault(self.isotonic)
def setFeatureIndex(self, value):
"""
Sets the value of :py:attr:`featureIndex`.
"""
self._paramMap[self.featureIndex] = value
return self
def getFeatureIndex(self):
"""
Gets the value of featureIndex or its default value.
"""
return self.getOrDefault(self.featureIndex)
class IsotonicRegressionModel(JavaModel):
"""
.. note:: Experimental
Model fitted by IsotonicRegression.
"""
@property
def boundaries(self):
"""
Model boundaries.
"""
return self._call_java("boundaries")
@property
def predictions(self):
"""
Predictions associated with the boundaries at the same index, monotone because of isotonic
regression.
"""
return self._call_java("predictions")
class TreeRegressorParams(object):
"""
Private class to track supported impurity measures.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment