From 9b670bcaec9c220603ec10a6d186865dabf26a5b Mon Sep 17 00:00:00 2001 From: Yuhao <yuhao.yang@intel.com> Date: Tue, 29 Nov 2016 18:46:59 -0800 Subject: [PATCH] [SPARK-18319][ML][QA2.1] 2.1 QA: API: Experimental, DeveloperApi, final, sealed audit ## What changes were proposed in this pull request? make a pass through the items marked as Experimental or DeveloperApi and see if any are stable enough to be unmarked. Also check for items marked final or sealed to see if they are stable enough to be opened up as APIs. Some discussions in the jira: https://issues.apache.org/jira/browse/SPARK-18319 ## How was this patch tested? existing ut Author: Yuhao <yuhao.yang@intel.com> Author: Yuhao Yang <hhbyyh@gmail.com> Closes #15972 from hhbyyh/experimental21. --- .../MultilayerPerceptronClassifier.scala | 6 +----- .../spark/ml/clustering/BisectingKMeans.scala | 5 ----- .../spark/ml/clustering/GaussianMixture.scala | 5 ----- .../org/apache/spark/ml/clustering/KMeans.scala | 4 ---- .../org/apache/spark/ml/clustering/LDA.scala | 12 ++---------- .../apache/spark/ml/feature/LabeledPoint.scala | 4 +--- .../apache/spark/ml/feature/MaxAbsScaler.scala | 6 +----- .../org/apache/spark/ml/util/ReadWrite.scala | 14 +------------- .../spark/mllib/clustering/LDAOptimizer.scala | 2 +- python/pyspark/ml/classification.py | 4 ---- python/pyspark/ml/clustering.py | 16 ---------------- python/pyspark/ml/feature.py | 4 ---- python/pyspark/ml/util.py | 8 -------- 13 files changed, 7 insertions(+), 83 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala index 1b45eafbac..aaaf7df345 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala @@ -21,7 +21,7 @@ import scala.collection.JavaConverters._ import org.apache.hadoop.fs.Path -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.{PredictionModel, Predictor, PredictorParams} import org.apache.spark.ml.ann.{FeedForwardTopology, FeedForwardTrainer} import org.apache.spark.ml.feature.LabeledPoint @@ -135,7 +135,6 @@ private object LabelConverter { } /** - * :: Experimental :: * Classifier trainer based on the Multilayer Perceptron. * Each layer has sigmoid activation function, output layer has softmax. * Number of inputs has to be equal to the size of feature vectors. @@ -143,7 +142,6 @@ private object LabelConverter { * */ @Since("1.5.0") -@Experimental class MultilayerPerceptronClassifier @Since("1.5.0") ( @Since("1.5.0") override val uid: String) extends Predictor[Vector, MultilayerPerceptronClassifier, MultilayerPerceptronClassificationModel] @@ -282,7 +280,6 @@ object MultilayerPerceptronClassifier } /** - * :: Experimental :: * Classification model based on the Multilayer Perceptron. * Each layer has sigmoid activation function, output layer has softmax. * @@ -291,7 +288,6 @@ object MultilayerPerceptronClassifier * @param weights the weights of layers */ @Since("1.5.0") -@Experimental class MultilayerPerceptronClassificationModel private[ml] ( @Since("1.5.0") override val uid: String, @Since("1.5.0") val layers: Array[Int], diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala index e58df6ba91..4c20e6563b 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala @@ -80,13 +80,11 @@ private[clustering] trait BisectingKMeansParams extends Params } /** - * :: Experimental :: * Model fitted by BisectingKMeans. * * @param parentModel a model trained by [[org.apache.spark.mllib.clustering.BisectingKMeans]]. */ @Since("2.0.0") -@Experimental class BisectingKMeansModel private[ml] ( @Since("2.0.0") override val uid: String, private val parentModel: MLlibBisectingKMeansModel @@ -197,8 +195,6 @@ object BisectingKMeansModel extends MLReadable[BisectingKMeansModel] { } /** - * :: Experimental :: - * * A bisecting k-means algorithm based on the paper "A comparison of document clustering techniques" * by Steinbach, Karypis, and Kumar, with modification to fit Spark. * The algorithm starts from a single cluster that contains all points. @@ -213,7 +209,6 @@ object BisectingKMeansModel extends MLReadable[BisectingKMeansModel] { * KDD Workshop on Text Mining, 2000.</a> */ @Since("2.0.0") -@Experimental class BisectingKMeans @Since("2.0.0") ( @Since("2.0.0") override val uid: String) extends Estimator[BisectingKMeansModel] with BisectingKMeansParams with DefaultParamsWritable { diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala index c764c3aa32..ac56845581 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala @@ -68,8 +68,6 @@ private[clustering] trait GaussianMixtureParams extends Params with HasMaxIter w } /** - * :: Experimental :: - * * Multivariate Gaussian Mixture Model (GMM) consisting of k Gaussians, where points * are drawn from each Gaussian i with probability weights(i). * @@ -80,7 +78,6 @@ private[clustering] trait GaussianMixtureParams extends Params with HasMaxIter w * the Multivariate Gaussian (Normal) Distribution for Gaussian i */ @Since("2.0.0") -@Experimental class GaussianMixtureModel private[ml] ( @Since("2.0.0") override val uid: String, @Since("2.0.0") val weights: Array[Double], @@ -265,7 +262,6 @@ object GaussianMixtureModel extends MLReadable[GaussianMixtureModel] { } /** - * :: Experimental :: * Gaussian Mixture clustering. * * This class performs expectation maximization for multivariate Gaussian @@ -284,7 +280,6 @@ object GaussianMixtureModel extends MLReadable[GaussianMixtureModel] { * on statistical/theoretical arguments) and (b) numerical issues with Gaussian distributions. */ @Since("2.0.0") -@Experimental class GaussianMixture @Since("2.0.0") ( @Since("2.0.0") override val uid: String) extends Estimator[GaussianMixtureModel] with GaussianMixtureParams with DefaultParamsWritable { diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala index ad4f79a79c..e168a418cb 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala @@ -96,13 +96,11 @@ private[clustering] trait KMeansParams extends Params with HasMaxIter with HasFe } /** - * :: Experimental :: * Model fitted by KMeans. * * @param parentModel a model trained by spark.mllib.clustering.KMeans. */ @Since("1.5.0") -@Experimental class KMeansModel private[ml] ( @Since("1.5.0") override val uid: String, private val parentModel: MLlibKMeansModel) @@ -248,13 +246,11 @@ object KMeansModel extends MLReadable[KMeansModel] { } /** - * :: Experimental :: * K-means clustering with support for k-means|| initialization proposed by Bahmani et al. * * @see <a href="http://dx.doi.org/10.14778/2180912.2180915">Bahmani et al., Scalable k-means++.</a> */ @Since("1.5.0") -@Experimental class KMeans @Since("1.5.0") ( @Since("1.5.0") override val uid: String) extends Estimator[KMeansModel] with KMeansParams with DefaultParamsWritable { diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala index cd403d842b..583e5e0928 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala @@ -22,7 +22,7 @@ import org.json4s.DefaultFormats import org.json4s.JsonAST.JObject import org.json4s.jackson.JsonMethods._ -import org.apache.spark.annotation.{DeveloperApi, Experimental, Since} +import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.internal.Logging import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.linalg.{Matrix, Vector, Vectors, VectorUDT} @@ -396,15 +396,13 @@ private object LDAParams { /** - * :: Experimental :: * Model fitted by [[LDA]]. * * @param vocabSize Vocabulary size (number of terms or words in the vocabulary) * @param sparkSession Used to construct local DataFrames for returning query results */ @Since("1.6.0") -@Experimental -sealed abstract class LDAModel private[ml] ( +abstract class LDAModel private[ml] ( @Since("1.6.0") override val uid: String, @Since("1.6.0") val vocabSize: Int, @Since("1.6.0") @transient private[ml] val sparkSession: SparkSession) @@ -556,14 +554,12 @@ sealed abstract class LDAModel private[ml] ( /** - * :: Experimental :: * * Local (non-distributed) model fitted by [[LDA]]. * * This model stores the inferred topics only; it does not store info about the training dataset. */ @Since("1.6.0") -@Experimental class LocalLDAModel private[ml] ( uid: String, vocabSize: Int, @@ -641,7 +637,6 @@ object LocalLDAModel extends MLReadable[LocalLDAModel] { /** - * :: Experimental :: * * Distributed model fitted by [[LDA]]. * This type of model is currently only produced by Expectation-Maximization (EM). @@ -653,7 +648,6 @@ object LocalLDAModel extends MLReadable[LocalLDAModel] { * `copy()` cheap. */ @Since("1.6.0") -@Experimental class DistributedLDAModel private[ml] ( uid: String, vocabSize: Int, @@ -789,7 +783,6 @@ object DistributedLDAModel extends MLReadable[DistributedLDAModel] { /** - * :: Experimental :: * * Latent Dirichlet Allocation (LDA), a topic model designed for text documents. * @@ -813,7 +806,6 @@ object DistributedLDAModel extends MLReadable[DistributedLDAModel] { * Latent Dirichlet allocation (Wikipedia)</a> */ @Since("1.6.0") -@Experimental class LDA @Since("1.6.0") ( @Since("1.6.0") override val uid: String) extends Estimator[LDAModel] with LDAParams with DefaultParamsWritable { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/LabeledPoint.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/LabeledPoint.scala index 7d8e4adcc2..c5d0ec1a8d 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/LabeledPoint.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/LabeledPoint.scala @@ -19,11 +19,10 @@ package org.apache.spark.ml.feature import scala.beans.BeanInfo -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.linalg.Vector /** - * :: Experimental :: * * Class that represents the features and label of a data point. * @@ -31,7 +30,6 @@ import org.apache.spark.ml.linalg.Vector * @param features List of features for this data point. */ @Since("2.0.0") -@Experimental @BeanInfo case class LabeledPoint(@Since("2.0.0") label: Double, @Since("2.0.0") features: Vector) { override def toString: String = { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala index acabf0b892..85f9732f79 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala @@ -19,7 +19,7 @@ package org.apache.spark.ml.feature import org.apache.hadoop.fs.Path -import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.annotation.Since import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.linalg.{Vector, Vectors, VectorUDT} import org.apache.spark.ml.param.{ParamMap, Params} @@ -48,12 +48,10 @@ private[feature] trait MaxAbsScalerParams extends Params with HasInputCol with H } /** - * :: Experimental :: * Rescale each feature individually to range [-1, 1] by dividing through the largest maximum * absolute value in each feature. It does not shift/center the data, and thus does not destroy * any sparsity. */ -@Experimental @Since("2.0.0") class MaxAbsScaler @Since("2.0.0") (@Since("2.0.0") override val uid: String) extends Estimator[MaxAbsScalerModel] with MaxAbsScalerParams with DefaultParamsWritable { @@ -101,11 +99,9 @@ object MaxAbsScaler extends DefaultParamsReadable[MaxAbsScaler] { } /** - * :: Experimental :: * Model fitted by [[MaxAbsScaler]]. * */ -@Experimental @Since("2.0.0") class MaxAbsScalerModel private[ml] ( @Since("2.0.0") override val uid: String, diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala index 95f480455e..c0e3801499 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala @@ -26,7 +26,7 @@ import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods._ import org.apache.spark.SparkContext -import org.apache.spark.annotation.{DeveloperApi, Experimental, Since} +import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.internal.Logging import org.apache.spark.ml._ import org.apache.spark.ml.classification.{OneVsRest, OneVsRestModel} @@ -81,11 +81,8 @@ private[util] sealed trait BaseReadWrite { } /** - * :: Experimental :: - * * Abstract class for utility classes that can save ML instances. */ -@Experimental @Since("1.6.0") abstract class MLWriter extends BaseReadWrite with Logging { @@ -138,11 +135,8 @@ abstract class MLWriter extends BaseReadWrite with Logging { } /** - * :: Experimental :: - * * Trait for classes that provide [[MLWriter]]. */ -@Experimental @Since("1.6.0") trait MLWritable { @@ -178,13 +172,10 @@ trait DefaultParamsWritable extends MLWritable { self: Params => } /** - * :: Experimental :: - * * Abstract class for utility classes that can load ML instances. * * @tparam T ML instance type */ -@Experimental @Since("1.6.0") abstract class MLReader[T] extends BaseReadWrite { @@ -202,13 +193,10 @@ abstract class MLReader[T] extends BaseReadWrite { } /** - * :: Experimental :: - * * Trait for objects that provide [[MLReader]]. * * @tparam T ML instance type */ -@Experimental @Since("1.6.0") trait MLReadable[T] { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala index 96b49bcc0a..48bae4276c 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala @@ -38,7 +38,7 @@ import org.apache.spark.storage.StorageLevel */ @Since("1.4.0") @DeveloperApi -sealed trait LDAOptimizer { +trait LDAOptimizer { /* DEVELOPERS NOTE: diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index 8054a34db3..5fe4bab186 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -1138,8 +1138,6 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasMaxIter, HasTol, HasSeed, HasStepSize, JavaMLWritable, JavaMLReadable): """ - .. note:: Experimental - Classifier trainer based on the Multilayer Perceptron. Each layer has sigmoid activation function, output layer has softmax. Number of inputs has to be equal to the size of feature vectors. @@ -1311,8 +1309,6 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, class MultilayerPerceptronClassificationModel(JavaModel, JavaPredictionModel, JavaMLWritable, JavaMLReadable): """ - .. note:: Experimental - Model fitted by MultilayerPerceptronClassifier. .. versionadded:: 1.6.0 diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py index b29b5ac70e..7f8d845564 100644 --- a/python/pyspark/ml/clustering.py +++ b/python/pyspark/ml/clustering.py @@ -87,8 +87,6 @@ class ClusteringSummary(JavaWrapper): class GaussianMixtureModel(JavaModel, JavaMLWritable, JavaMLReadable): """ - .. note:: Experimental - Model fitted by GaussianMixture. .. versionadded:: 2.0.0 @@ -141,8 +139,6 @@ class GaussianMixtureModel(JavaModel, JavaMLWritable, JavaMLReadable): class GaussianMixture(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIter, HasTol, HasSeed, HasProbabilityCol, JavaMLWritable, JavaMLReadable): """ - .. note:: Experimental - GaussianMixture clustering. This class performs expectation maximization for multivariate Gaussian Mixture Models (GMMs). A GMM represents a composite distribution of @@ -441,8 +437,6 @@ class KMeans(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIter, HasTol class BisectingKMeansModel(JavaModel, JavaMLWritable, JavaMLReadable): """ - .. note:: Experimental - Model fitted by BisectingKMeans. .. versionadded:: 2.0.0 @@ -487,8 +481,6 @@ class BisectingKMeansModel(JavaModel, JavaMLWritable, JavaMLReadable): class BisectingKMeans(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIter, HasSeed, JavaMLWritable, JavaMLReadable): """ - .. note:: Experimental - A bisecting k-means algorithm based on the paper "A comparison of document clustering techniques" by Steinbach, Karypis, and Kumar, with modification to fit Spark. The algorithm starts from a single cluster that contains all points. @@ -619,8 +611,6 @@ class BisectingKMeansSummary(ClusteringSummary): @inherit_doc class LDAModel(JavaModel): """ - .. note:: Experimental - Latent Dirichlet Allocation (LDA) model. This abstraction permits for different underlying representations, including local and distributed data structures. @@ -697,8 +687,6 @@ class LDAModel(JavaModel): @inherit_doc class DistributedLDAModel(LDAModel, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Distributed model fitted by :py:class:`LDA`. This type of model is currently only produced by Expectation-Maximization (EM). @@ -761,8 +749,6 @@ class DistributedLDAModel(LDAModel, JavaMLReadable, JavaMLWritable): @inherit_doc class LocalLDAModel(LDAModel, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Local (non-distributed) model fitted by :py:class:`LDA`. This model stores the inferred topics only; it does not store info about the training dataset. @@ -775,8 +761,6 @@ class LocalLDAModel(LDAModel, JavaMLReadable, JavaMLWritable): class LDA(JavaEstimator, HasFeaturesCol, HasMaxIter, HasSeed, HasCheckpointInterval, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Latent Dirichlet Allocation (LDA), a topic model designed for text documents. Terminology: diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index 40b63d4d31..aada38d1ad 100755 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -654,8 +654,6 @@ class IDFModel(JavaModel, JavaMLReadable, JavaMLWritable): @inherit_doc class MaxAbsScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Rescale each feature individually to range [-1, 1] by dividing through the largest maximum absolute value in each feature. It does not shift/center the data, and thus does not destroy any sparsity. @@ -715,8 +713,6 @@ class MaxAbsScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Jav class MaxAbsScalerModel(JavaModel, JavaMLReadable, JavaMLWritable): """ - .. note:: Experimental - Model fitted by :py:class:`MaxAbsScaler`. .. versionadded:: 2.0.0 diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py index bec4b28952..c65b3d14be 100644 --- a/python/pyspark/ml/util.py +++ b/python/pyspark/ml/util.py @@ -62,8 +62,6 @@ class Identifiable(object): @inherit_doc class MLWriter(object): """ - .. note:: Experimental - Utility class that can save ML instances. .. versionadded:: 2.0.0 @@ -129,8 +127,6 @@ class JavaMLWriter(MLWriter): @inherit_doc class MLWritable(object): """ - .. note:: Experimental - Mixin for ML instances that provide :py:class:`MLWriter`. .. versionadded:: 2.0.0 @@ -159,8 +155,6 @@ class JavaMLWritable(MLWritable): @inherit_doc class MLReader(object): """ - .. note:: Experimental - Utility class that can load ML instances. .. versionadded:: 2.0.0 @@ -242,8 +236,6 @@ class JavaMLReader(MLReader): @inherit_doc class MLReadable(object): """ - .. note:: Experimental - Mixin for instances that provide :py:class:`MLReader`. .. versionadded:: 2.0.0 -- GitLab