[SPARK-10239] [SPARK-10244] [MLLIB] update since versions in mllib.pmml and mllib.util

Same as #8421 but for `mllib.pmml` and `mllib.util`. cc dbtsai Author: Xiangrui Meng <meng@databricks.com> Closes #8430 from mengxr/SPARK-10239 and squashes the following commits: a189acf [Xiangrui Meng] update since versions in mllib.pmml and mllib.util

[SPARK-10239] [SPARK-10244] [MLLIB] update since versions in mllib.pmml and mllib.util
00ae4be9 · Xiangrui Meng · DB Tsai · 92059078 · 00ae4be9 · 00ae4be9
Commit 00ae4be9 authored 9 years ago by Xiangrui Meng Committed by DB Tsai 9 years ago
--- a/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala
@@ -23,7 +23,7 @@ import javax.xml.transform.stream.StreamResult
 import org.jpmml.model.JAXBUtil
 import org.apache.spark.SparkContext
-import org.apache.spark.annotation.{DeveloperApi, Experimental}
+import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
 import org.apache.spark.mllib.pmml.export.PMMLModelExportFactory
 /**
@@ -33,6 +33,7 @@ import org.apache.spark.mllib.pmml.export.PMMLModelExportFactory
 * developed by the Data Mining Group (www.dmg.org).
 */
 @DeveloperApi
+@Since("1.4.0")
 trait PMMLExportable {
  /**
@@ -48,6 +49,7 @@ trait PMMLExportable {
   * Export the model to a local file in PMML format
   */
  @Experimental
+  @Since("1.4.0")
  def toPMML(localPath: String): Unit = {
    toPMML(new StreamResult(new File(localPath)))
  }
@@ -57,6 +59,7 @@ trait PMMLExportable {
   * Export the model to a directory on a distributed file system in PMML format
   */
  @Experimental
+  @Since("1.4.0")
  def toPMML(sc: SparkContext, path: String): Unit = {
    val pmml = toPMML()
    sc.parallelize(Array(pmml), 1).saveAsTextFile(path)
@@ -67,6 +70,7 @@ trait PMMLExportable {
   * Export the model to the OutputStream in PMML format
   */
  @Experimental
+  @Since("1.4.0")
  def toPMML(outputStream: OutputStream): Unit = {
    toPMML(new StreamResult(outputStream))
  }
@@ -76,6 +80,7 @@ trait PMMLExportable {
   * Export the model to a String in PMML format
   */
  @Experimental
+  @Since("1.4.0")
  def toPMML(): String = {
    val writer = new StringWriter
    toPMML(new StreamResult(writer))

--- a/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala
@@ -17,16 +17,17 @@
 package org.apache.spark.mllib.util
-import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.Logging
-import org.apache.spark.rdd.RDD
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.rdd.RDD
 /**
 * :: DeveloperApi ::
 * A collection of methods used to validate data before applying ML algorithms.
 */
 @DeveloperApi
+@Since("0.8.0")
 object DataValidators extends Logging {
  /**
@@ -34,6 +35,7 @@ object DataValidators extends Logging {
   *
   * @return True if labels are all zero or one, false otherwise.
   */
+  @Since("1.0.0")
  val binaryLabelValidator: RDD[LabeledPoint] => Boolean = { data =>
    val numInvalid = data.filter(x => x.label != 1.0 && x.label != 0.0).count()
    if (numInvalid != 0) {
@@ -48,6 +50,7 @@ object DataValidators extends Logging {
   *
   * @return True if labels are all in the range of {0, 1, ..., k-1}, false otherwise.
   */
+  @Since("1.3.0")
  def multiLabelValidator(k: Int): RDD[LabeledPoint] => Boolean = { data =>
    val numInvalid = data.filter(x =>
      x.label - x.label.toInt != 0.0 || x.label < 0 || x.label > k - 1).count()

--- a/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala
@@ -19,8 +19,8 @@ package org.apache.spark.mllib.util
 import scala.util.Random
-import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.SparkContext
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.rdd.RDD
 /**
@@ -30,6 +30,7 @@ import org.apache.spark.rdd.RDD
 * cluster with scale 1 around each center.
 */
 @DeveloperApi
+@Since("0.8.0")
 object KMeansDataGenerator {
  /**
@@ -42,6 +43,7 @@ object KMeansDataGenerator {
   * @param r Scaling factor for the distribution of the initial centers
   * @param numPartitions Number of partitions of the generated RDD; default 2
   */
+  @Since("0.8.0")
  def generateKMeansRDD(
      sc: SparkContext,
      numPoints: Int,
@@ -62,6 +64,7 @@ object KMeansDataGenerator {
    }
  }
+  @Since("0.8.0")
  def main(args: Array[String]) {
    if (args.length < 6) {
      // scalastyle:off println

--- a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
@@ -22,11 +22,11 @@ import scala.util.Random
 import com.github.fommil.netlib.BLAS.{getInstance => blas}
-import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.SparkContext
-import org.apache.spark.rdd.RDD
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.rdd.RDD
 /**
 * :: DeveloperApi ::
@@ -35,6 +35,7 @@ import org.apache.spark.mllib.regression.LabeledPoint
 * response variable `Y`.
 */
 @DeveloperApi
+@Since("0.8.0")
 object LinearDataGenerator {
  /**
@@ -46,6 +47,7 @@ object LinearDataGenerator {
   * @param seed Random seed
   * @return Java List of input.
   */
+  @Since("0.8.0")
  def generateLinearInputAsList(
      intercept: Double,
      weights: Array[Double],
@@ -68,6 +70,7 @@ object LinearDataGenerator {
   * @param eps Epsilon scaling factor.
   * @return Seq of input.
   */
+  @Since("0.8.0")
  def generateLinearInput(
      intercept: Double,
      weights: Array[Double],
@@ -92,6 +95,7 @@ object LinearDataGenerator {
   * @param eps Epsilon scaling factor.
   * @return Seq of input.
   */
+  @Since("0.8.0")
  def generateLinearInput(
      intercept: Double,
      weights: Array[Double],
@@ -132,6 +136,7 @@ object LinearDataGenerator {
   *
   * @return RDD of LabeledPoint containing sample data.
   */
+  @Since("0.8.0")
  def generateLinearRDD(
      sc: SparkContext,
      nexamples: Int,
@@ -151,6 +156,7 @@ object LinearDataGenerator {
    data
  }
+  @Since("0.8.0")
  def main(args: Array[String]) {
    if (args.length < 2) {
      // scalastyle:off println

--- a/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala
@@ -19,7 +19,7 @@ package org.apache.spark.mllib.util
 import scala.util.Random
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{Since, DeveloperApi}
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.regression.LabeledPoint
@@ -31,6 +31,7 @@ import org.apache.spark.mllib.linalg.Vectors
 * with probability `probOne` and scales features for positive examples by `eps`.
 */
 @DeveloperApi
+@Since("0.8.0")
 object LogisticRegressionDataGenerator {
  /**
@@ -43,6 +44,7 @@ object LogisticRegressionDataGenerator {
   * @param nparts Number of partitions of the generated RDD. Default value is 2.
   * @param probOne Probability that a label is 1 (and not 0). Default value is 0.5.
   */
+  @Since("0.8.0")
  def generateLogisticRDD(
    sc: SparkContext,
    nexamples: Int,
@@ -62,6 +64,7 @@ object LogisticRegressionDataGenerator {
    data
  }
+  @Since("0.8.0")
  def main(args: Array[String]) {
    if (args.length != 5) {
      // scalastyle:off println

--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
@@ -23,7 +23,7 @@ import scala.language.postfixOps
 import scala.util.Random
 import org.apache.spark.SparkContext
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{Since, DeveloperApi}
 import org.apache.spark.mllib.linalg.{BLAS, DenseMatrix}
 import org.apache.spark.rdd.RDD
@@ -52,7 +52,9 @@ import org.apache.spark.rdd.RDD
 *   testSampFact   (Double) Percentage of training data to use as test data.
 */
 @DeveloperApi
+@Since("0.8.0")
 object MFDataGenerator {
+  @Since("0.8.0")
  def main(args: Array[String]) {
    if (args.length < 2) {
      // scalastyle:off println

--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
@@ -36,6 +36,7 @@ import org.apache.spark.streaming.dstream.DStream
 /**
 * Helper methods to load, save and pre-process data used in ML Lib.
 */
+@Since("0.8.0")
 object MLUtils {
  private[mllib] lazy val EPSILON = {
@@ -168,6 +169,7 @@ object MLUtils {
   *
   * @see [[org.apache.spark.mllib.util.MLUtils#loadLibSVMFile]]
   */
+  @Since("1.0.0")
  def saveAsLibSVMFile(data: RDD[LabeledPoint], dir: String) {
    // TODO: allow to specify label precision and feature precision.
    val dataStr = data.map { case LabeledPoint(label, features) =>

--- a/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
@@ -21,11 +21,11 @@ import scala.util.Random
 import com.github.fommil.netlib.BLAS.{getInstance => blas}
-import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.SparkContext
-import org.apache.spark.rdd.RDD
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.rdd.RDD
 /**
 * :: DeveloperApi ::
@@ -33,8 +33,10 @@ import org.apache.spark.mllib.regression.LabeledPoint
 * for the features and adds Gaussian noise with weight 0.1 to generate labels.
 */
 @DeveloperApi
+@Since("0.8.0")
 object SVMDataGenerator {
+  @Since("0.8.0")
  def main(args: Array[String]) {
    if (args.length < 2) {
      // scalastyle:off println

--- a/mllib/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala
@@ -24,7 +24,7 @@ import org.json4s._
 import org.json4s.jackson.JsonMethods._
 import org.apache.spark.SparkContext
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.types.{DataType, StructField, StructType}
@@ -35,6 +35,7 @@ import org.apache.spark.sql.types.{DataType, StructField, StructType}
 * This should be inherited by the class which implements model instances.
 */
 @DeveloperApi
+@Since("1.3.0")
 trait Saveable {
  /**
@@ -50,6 +51,7 @@ trait Saveable {
   * @param path  Path specifying the directory in which to save this model.
   *              If the directory already exists, this method throws an exception.
   */
+  @Since("1.3.0")
  def save(sc: SparkContext, path: String): Unit
  /** Current version of model save/load format. */
@@ -64,6 +66,7 @@ trait Saveable {
 * This should be inherited by an object paired with the model class.
 */
 @DeveloperApi
+@Since("1.3.0")
 trait Loader[M <: Saveable] {
  /**
@@ -75,6 +78,7 @@ trait Loader[M <: Saveable] {
   * @param path  Path specifying the directory to which the model was saved.
   * @return  Model instance
   */
+  @Since("1.3.0")
  def load(sc: SparkContext, path: String): M
 }