Skip to content
Snippets Groups Projects
Commit 82e9d9c8 authored by noelsmith's avatar noelsmith Committed by Xiangrui Meng
Browse files

[SPARK-10272][PYSPARK][MLLIB] Added @since tags to pyspark.mllib.evaluation

Duplicated the since decorator from pyspark.sql into pyspark (also tweaked to handle functions without docstrings).

Added since to public methods + "versionadded::" to classes (derived from the git file history in pyspark).

Note - I added also the tags to MultilabelMetrics even though it isn't declared as public in the __all__ statement... if that's incorrect - I'll remove.

Author: noelsmith <mail@noelsmith.com>

Closes #8628 from noel-smith/SPARK-10272-since-mllib-evalutation.
parent 06e6b765
No related branches found
No related tags found
No related merge requests found
......@@ -15,6 +15,7 @@
# limitations under the License.
#
from pyspark import since
from pyspark.mllib.common import JavaModelWrapper, callMLlibFunc
from pyspark.sql import SQLContext
from pyspark.sql.types import StructField, StructType, DoubleType, IntegerType, ArrayType
......@@ -37,6 +38,8 @@ class BinaryClassificationMetrics(JavaModelWrapper):
>>> metrics.areaUnderPR
0.83...
>>> metrics.unpersist()
.. versionadded:: 1.4.0
"""
def __init__(self, scoreAndLabels):
......@@ -50,6 +53,7 @@ class BinaryClassificationMetrics(JavaModelWrapper):
super(BinaryClassificationMetrics, self).__init__(java_model)
@property
@since('1.4.0')
def areaUnderROC(self):
"""
Computes the area under the receiver operating characteristic
......@@ -58,12 +62,14 @@ class BinaryClassificationMetrics(JavaModelWrapper):
return self.call("areaUnderROC")
@property
@since('1.4.0')
def areaUnderPR(self):
"""
Computes the area under the precision-recall curve.
"""
return self.call("areaUnderPR")
@since('1.4.0')
def unpersist(self):
"""
Unpersists intermediate RDDs used in the computation.
......@@ -91,6 +97,8 @@ class RegressionMetrics(JavaModelWrapper):
0.61...
>>> metrics.r2
0.94...
.. versionadded:: 1.4.0
"""
def __init__(self, predictionAndObservations):
......@@ -104,6 +112,7 @@ class RegressionMetrics(JavaModelWrapper):
super(RegressionMetrics, self).__init__(java_model)
@property
@since('1.4.0')
def explainedVariance(self):
"""
Returns the explained variance regression score.
......@@ -112,6 +121,7 @@ class RegressionMetrics(JavaModelWrapper):
return self.call("explainedVariance")
@property
@since('1.4.0')
def meanAbsoluteError(self):
"""
Returns the mean absolute error, which is a risk function corresponding to the
......@@ -120,6 +130,7 @@ class RegressionMetrics(JavaModelWrapper):
return self.call("meanAbsoluteError")
@property
@since('1.4.0')
def meanSquaredError(self):
"""
Returns the mean squared error, which is a risk function corresponding to the
......@@ -128,6 +139,7 @@ class RegressionMetrics(JavaModelWrapper):
return self.call("meanSquaredError")
@property
@since('1.4.0')
def rootMeanSquaredError(self):
"""
Returns the root mean squared error, which is defined as the square root of
......@@ -136,6 +148,7 @@ class RegressionMetrics(JavaModelWrapper):
return self.call("rootMeanSquaredError")
@property
@since('1.4.0')
def r2(self):
"""
Returns R^2^, the coefficient of determination.
......@@ -178,6 +191,8 @@ class MulticlassMetrics(JavaModelWrapper):
0.66...
>>> metrics.weightedFMeasure(2.0)
0.65...
.. versionadded:: 1.4.0
"""
def __init__(self, predictionAndLabels):
......@@ -190,6 +205,7 @@ class MulticlassMetrics(JavaModelWrapper):
java_model = java_class(df._jdf)
super(MulticlassMetrics, self).__init__(java_model)
@since('1.4.0')
def confusionMatrix(self):
"""
Returns confusion matrix: predicted classes are in columns,
......@@ -197,18 +213,21 @@ class MulticlassMetrics(JavaModelWrapper):
"""
return self.call("confusionMatrix")
@since('1.4.0')
def truePositiveRate(self, label):
"""
Returns true positive rate for a given label (category).
"""
return self.call("truePositiveRate", label)
@since('1.4.0')
def falsePositiveRate(self, label):
"""
Returns false positive rate for a given label (category).
"""
return self.call("falsePositiveRate", label)
@since('1.4.0')
def precision(self, label=None):
"""
Returns precision or precision for a given label (category) if specified.
......@@ -218,6 +237,7 @@ class MulticlassMetrics(JavaModelWrapper):
else:
return self.call("precision", float(label))
@since('1.4.0')
def recall(self, label=None):
"""
Returns recall or recall for a given label (category) if specified.
......@@ -227,6 +247,7 @@ class MulticlassMetrics(JavaModelWrapper):
else:
return self.call("recall", float(label))
@since('1.4.0')
def fMeasure(self, label=None, beta=None):
"""
Returns f-measure or f-measure for a given label (category) if specified.
......@@ -243,6 +264,7 @@ class MulticlassMetrics(JavaModelWrapper):
return self.call("fMeasure", label, beta)
@property
@since('1.4.0')
def weightedTruePositiveRate(self):
"""
Returns weighted true positive rate.
......@@ -251,6 +273,7 @@ class MulticlassMetrics(JavaModelWrapper):
return self.call("weightedTruePositiveRate")
@property
@since('1.4.0')
def weightedFalsePositiveRate(self):
"""
Returns weighted false positive rate.
......@@ -258,6 +281,7 @@ class MulticlassMetrics(JavaModelWrapper):
return self.call("weightedFalsePositiveRate")
@property
@since('1.4.0')
def weightedRecall(self):
"""
Returns weighted averaged recall.
......@@ -266,12 +290,14 @@ class MulticlassMetrics(JavaModelWrapper):
return self.call("weightedRecall")
@property
@since('1.4.0')
def weightedPrecision(self):
"""
Returns weighted averaged precision.
"""
return self.call("weightedPrecision")
@since('1.4.0')
def weightedFMeasure(self, beta=None):
"""
Returns weighted averaged f-measure.
......@@ -307,6 +333,7 @@ class RankingMetrics(JavaModelWrapper):
>>> metrics.ndcgAt(10)
0.48...
.. versionadded:: 1.4.0
"""
def __init__(self, predictionAndLabels):
......@@ -317,6 +344,7 @@ class RankingMetrics(JavaModelWrapper):
java_model = callMLlibFunc("newRankingMetrics", df._jdf)
super(RankingMetrics, self).__init__(java_model)
@since('1.4.0')
def precisionAt(self, k):
"""
Compute the average precision of all the queries, truncated at ranking position k.
......@@ -331,6 +359,7 @@ class RankingMetrics(JavaModelWrapper):
return self.call("precisionAt", int(k))
@property
@since('1.4.0')
def meanAveragePrecision(self):
"""
Returns the mean average precision (MAP) of all the queries.
......@@ -339,6 +368,7 @@ class RankingMetrics(JavaModelWrapper):
"""
return self.call("meanAveragePrecision")
@since('1.4.0')
def ndcgAt(self, k):
"""
Compute the average NDCG value of all the queries, truncated at ranking position k.
......@@ -388,6 +418,8 @@ class MultilabelMetrics(JavaModelWrapper):
0.28...
>>> metrics.accuracy
0.54...
.. versionadded:: 1.4.0
"""
def __init__(self, predictionAndLabels):
......@@ -399,6 +431,7 @@ class MultilabelMetrics(JavaModelWrapper):
java_model = java_class(df._jdf)
super(MultilabelMetrics, self).__init__(java_model)
@since('1.4.0')
def precision(self, label=None):
"""
Returns precision or precision for a given label (category) if specified.
......@@ -408,6 +441,7 @@ class MultilabelMetrics(JavaModelWrapper):
else:
return self.call("precision", float(label))
@since('1.4.0')
def recall(self, label=None):
"""
Returns recall or recall for a given label (category) if specified.
......@@ -417,6 +451,7 @@ class MultilabelMetrics(JavaModelWrapper):
else:
return self.call("recall", float(label))
@since('1.4.0')
def f1Measure(self, label=None):
"""
Returns f1Measure or f1Measure for a given label (category) if specified.
......@@ -427,6 +462,7 @@ class MultilabelMetrics(JavaModelWrapper):
return self.call("f1Measure", float(label))
@property
@since('1.4.0')
def microPrecision(self):
"""
Returns micro-averaged label-based precision.
......@@ -435,6 +471,7 @@ class MultilabelMetrics(JavaModelWrapper):
return self.call("microPrecision")
@property
@since('1.4.0')
def microRecall(self):
"""
Returns micro-averaged label-based recall.
......@@ -443,6 +480,7 @@ class MultilabelMetrics(JavaModelWrapper):
return self.call("microRecall")
@property
@since('1.4.0')
def microF1Measure(self):
"""
Returns micro-averaged label-based f1-measure.
......@@ -451,6 +489,7 @@ class MultilabelMetrics(JavaModelWrapper):
return self.call("microF1Measure")
@property
@since('1.4.0')
def hammingLoss(self):
"""
Returns Hamming-loss.
......@@ -458,6 +497,7 @@ class MultilabelMetrics(JavaModelWrapper):
return self.call("hammingLoss")
@property
@since('1.4.0')
def subsetAccuracy(self):
"""
Returns subset accuracy.
......@@ -466,6 +506,7 @@ class MultilabelMetrics(JavaModelWrapper):
return self.call("subsetAccuracy")
@property
@since('1.4.0')
def accuracy(self):
"""
Returns accuracy.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment