Skip to content
Snippets Groups Projects
Commit 8668ead2 authored by Xiangrui Meng's avatar Xiangrui Meng
Browse files

[SPARK-10233] [MLLIB] update since version in mllib.evaluation

Same as #8421 but for `mllib.evaluation`.

cc avulanov

Author: Xiangrui Meng <meng@databricks.com>

Closes #8423 from mengxr/SPARK-10233.
parent 125205cd
No related branches found
No related tags found
No related merge requests found
......@@ -42,11 +42,11 @@ import org.apache.spark.sql.DataFrame
* be smaller as a result, meaning there may be an extra sample at
* partition boundaries.
*/
@Since("1.3.0")
@Since("1.0.0")
@Experimental
class BinaryClassificationMetrics(
val scoreAndLabels: RDD[(Double, Double)],
val numBins: Int) extends Logging {
class BinaryClassificationMetrics @Since("1.3.0") (
@Since("1.3.0") val scoreAndLabels: RDD[(Double, Double)],
@Since("1.3.0") val numBins: Int) extends Logging {
require(numBins >= 0, "numBins must be nonnegative")
......
......@@ -33,7 +33,7 @@ import org.apache.spark.sql.DataFrame
*/
@Since("1.1.0")
@Experimental
class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
class MulticlassMetrics @Since("1.1.0") (predictionAndLabels: RDD[(Double, Double)]) {
/**
* An auxiliary constructor taking a DataFrame.
......@@ -140,6 +140,7 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
/**
* Returns precision
*/
@Since("1.1.0")
lazy val precision: Double = tpByClass.values.sum.toDouble / labelCount
/**
......@@ -148,23 +149,27 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
* because sum of all false positives is equal to sum
* of all false negatives)
*/
@Since("1.1.0")
lazy val recall: Double = precision
/**
* Returns f-measure
* (equals to precision and recall because precision equals recall)
*/
@Since("1.1.0")
lazy val fMeasure: Double = precision
/**
* Returns weighted true positive rate
* (equals to precision, recall and f-measure)
*/
@Since("1.1.0")
lazy val weightedTruePositiveRate: Double = weightedRecall
/**
* Returns weighted false positive rate
*/
@Since("1.1.0")
lazy val weightedFalsePositiveRate: Double = labelCountByClass.map { case (category, count) =>
falsePositiveRate(category) * count.toDouble / labelCount
}.sum
......@@ -173,6 +178,7 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
* Returns weighted averaged recall
* (equals to precision, recall and f-measure)
*/
@Since("1.1.0")
lazy val weightedRecall: Double = labelCountByClass.map { case (category, count) =>
recall(category) * count.toDouble / labelCount
}.sum
......@@ -180,6 +186,7 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
/**
* Returns weighted averaged precision
*/
@Since("1.1.0")
lazy val weightedPrecision: Double = labelCountByClass.map { case (category, count) =>
precision(category) * count.toDouble / labelCount
}.sum
......@@ -196,6 +203,7 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
/**
* Returns weighted averaged f1-measure
*/
@Since("1.1.0")
lazy val weightedFMeasure: Double = labelCountByClass.map { case (category, count) =>
fMeasure(category, 1.0) * count.toDouble / labelCount
}.sum
......@@ -203,5 +211,6 @@ class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) {
/**
* Returns the sequence of labels in ascending order
*/
@Since("1.1.0")
lazy val labels: Array[Double] = tpByClass.keys.toArray.sorted
}
......@@ -28,7 +28,7 @@ import org.apache.spark.sql.DataFrame
* both are non-null Arrays, each with unique elements.
*/
@Since("1.2.0")
class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]) {
class MultilabelMetrics @Since("1.2.0") (predictionAndLabels: RDD[(Array[Double], Array[Double])]) {
/**
* An auxiliary constructor taking a DataFrame.
......@@ -46,6 +46,7 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
* Returns subset accuracy
* (for equal sets of labels)
*/
@Since("1.2.0")
lazy val subsetAccuracy: Double = predictionAndLabels.filter { case (predictions, labels) =>
predictions.deep == labels.deep
}.count().toDouble / numDocs
......@@ -53,6 +54,7 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
/**
* Returns accuracy
*/
@Since("1.2.0")
lazy val accuracy: Double = predictionAndLabels.map { case (predictions, labels) =>
labels.intersect(predictions).size.toDouble /
(labels.size + predictions.size - labels.intersect(predictions).size)}.sum / numDocs
......@@ -61,6 +63,7 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
/**
* Returns Hamming-loss
*/
@Since("1.2.0")
lazy val hammingLoss: Double = predictionAndLabels.map { case (predictions, labels) =>
labels.size + predictions.size - 2 * labels.intersect(predictions).size
}.sum / (numDocs * numLabels)
......@@ -68,6 +71,7 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
/**
* Returns document-based precision averaged by the number of documents
*/
@Since("1.2.0")
lazy val precision: Double = predictionAndLabels.map { case (predictions, labels) =>
if (predictions.size > 0) {
predictions.intersect(labels).size.toDouble / predictions.size
......@@ -79,6 +83,7 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
/**
* Returns document-based recall averaged by the number of documents
*/
@Since("1.2.0")
lazy val recall: Double = predictionAndLabels.map { case (predictions, labels) =>
labels.intersect(predictions).size.toDouble / labels.size
}.sum / numDocs
......@@ -86,6 +91,7 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
/**
* Returns document-based f1-measure averaged by the number of documents
*/
@Since("1.2.0")
lazy val f1Measure: Double = predictionAndLabels.map { case (predictions, labels) =>
2.0 * predictions.intersect(labels).size / (predictions.size + labels.size)
}.sum / numDocs
......@@ -143,6 +149,7 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
* Returns micro-averaged label-based precision
* (equals to micro-averaged document-based precision)
*/
@Since("1.2.0")
lazy val microPrecision: Double = {
val sumFp = fpPerClass.foldLeft(0L){ case(cum, (_, fp)) => cum + fp}
sumTp.toDouble / (sumTp + sumFp)
......@@ -152,6 +159,7 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
* Returns micro-averaged label-based recall
* (equals to micro-averaged document-based recall)
*/
@Since("1.2.0")
lazy val microRecall: Double = {
val sumFn = fnPerClass.foldLeft(0.0){ case(cum, (_, fn)) => cum + fn}
sumTp.toDouble / (sumTp + sumFn)
......@@ -161,10 +169,12 @@ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]
* Returns micro-averaged label-based f1-measure
* (equals to micro-averaged document-based f1-measure)
*/
@Since("1.2.0")
lazy val microF1Measure: Double = 2.0 * sumTp / (2 * sumTp + sumFnClass + sumFpClass)
/**
* Returns the sequence of labels in ascending order
*/
@Since("1.2.0")
lazy val labels: Array[Double] = tpPerClass.keys.toArray.sorted
}
......@@ -32,7 +32,8 @@ import org.apache.spark.sql.DataFrame
*/
@Since("1.2.0")
@Experimental
class RegressionMetrics(predictionAndObservations: RDD[(Double, Double)]) extends Logging {
class RegressionMetrics @Since("1.2.0") (
predictionAndObservations: RDD[(Double, Double)]) extends Logging {
/**
* An auxiliary constructor taking a DataFrame.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment