Skip to content
Snippets Groups Projects
Commit 0e2f2163 authored by noelsmith's avatar noelsmith Committed by Xiangrui Meng
Browse files

[SPARK-10094] Pyspark ML Feature transformers marked as experimental

Modified class-level docstrings to mark all feature transformers in pyspark.ml as experimental.

Author: noelsmith <mail@noelsmith.com>

Closes #8623 from noel-smith/SPARK-10094-mark-pyspark-ml-trans-exp.
parent 3a11e50e
No related branches found
No related tags found
No related merge requests found
...@@ -36,6 +36,8 @@ __all__ = ['Binarizer', 'Bucketizer', 'DCT', 'ElementwiseProduct', 'HashingTF', ...@@ -36,6 +36,8 @@ __all__ = ['Binarizer', 'Bucketizer', 'DCT', 'ElementwiseProduct', 'HashingTF',
@inherit_doc @inherit_doc
class Binarizer(JavaTransformer, HasInputCol, HasOutputCol): class Binarizer(JavaTransformer, HasInputCol, HasOutputCol):
""" """
.. note:: Experimental
Binarize a column of continuous features given a threshold. Binarize a column of continuous features given a threshold.
>>> df = sqlContext.createDataFrame([(0.5,)], ["values"]) >>> df = sqlContext.createDataFrame([(0.5,)], ["values"])
...@@ -92,6 +94,8 @@ class Binarizer(JavaTransformer, HasInputCol, HasOutputCol): ...@@ -92,6 +94,8 @@ class Binarizer(JavaTransformer, HasInputCol, HasOutputCol):
@inherit_doc @inherit_doc
class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol): class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol):
""" """
.. note:: Experimental
Maps a column of continuous features to a column of feature buckets. Maps a column of continuous features to a column of feature buckets.
>>> df = sqlContext.createDataFrame([(0.1,), (0.4,), (1.2,), (1.5,)], ["values"]) >>> df = sqlContext.createDataFrame([(0.1,), (0.4,), (1.2,), (1.5,)], ["values"])
...@@ -169,6 +173,8 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol): ...@@ -169,6 +173,8 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol):
@inherit_doc @inherit_doc
class DCT(JavaTransformer, HasInputCol, HasOutputCol): class DCT(JavaTransformer, HasInputCol, HasOutputCol):
""" """
.. note:: Experimental
A feature transformer that takes the 1D discrete cosine transform A feature transformer that takes the 1D discrete cosine transform
of a real vector. No zero padding is performed on the input vector. of a real vector. No zero padding is performed on the input vector.
It returns a real vector of the same length representing the DCT. It returns a real vector of the same length representing the DCT.
...@@ -232,6 +238,8 @@ class DCT(JavaTransformer, HasInputCol, HasOutputCol): ...@@ -232,6 +238,8 @@ class DCT(JavaTransformer, HasInputCol, HasOutputCol):
@inherit_doc @inherit_doc
class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol): class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol):
""" """
.. note:: Experimental
Outputs the Hadamard product (i.e., the element-wise product) of each input vector Outputs the Hadamard product (i.e., the element-wise product) of each input vector
with a provided "weight" vector. In other words, it scales each column of the dataset with a provided "weight" vector. In other words, it scales each column of the dataset
by a scalar multiplier. by a scalar multiplier.
...@@ -289,6 +297,8 @@ class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol): ...@@ -289,6 +297,8 @@ class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol):
@inherit_doc @inherit_doc
class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures): class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures):
""" """
.. note:: Experimental
Maps a sequence of terms to their term frequencies using the Maps a sequence of terms to their term frequencies using the
hashing trick. hashing trick.
...@@ -327,6 +337,8 @@ class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures): ...@@ -327,6 +337,8 @@ class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures):
@inherit_doc @inherit_doc
class IDF(JavaEstimator, HasInputCol, HasOutputCol): class IDF(JavaEstimator, HasInputCol, HasOutputCol):
""" """
.. note:: Experimental
Compute the Inverse Document Frequency (IDF) given a collection of documents. Compute the Inverse Document Frequency (IDF) given a collection of documents.
>>> from pyspark.mllib.linalg import DenseVector >>> from pyspark.mllib.linalg import DenseVector
...@@ -387,6 +399,8 @@ class IDF(JavaEstimator, HasInputCol, HasOutputCol): ...@@ -387,6 +399,8 @@ class IDF(JavaEstimator, HasInputCol, HasOutputCol):
class IDFModel(JavaModel): class IDFModel(JavaModel):
""" """
.. note:: Experimental
Model fitted by IDF. Model fitted by IDF.
""" """
...@@ -395,6 +409,8 @@ class IDFModel(JavaModel): ...@@ -395,6 +409,8 @@ class IDFModel(JavaModel):
@ignore_unicode_prefix @ignore_unicode_prefix
class NGram(JavaTransformer, HasInputCol, HasOutputCol): class NGram(JavaTransformer, HasInputCol, HasOutputCol):
""" """
.. note:: Experimental
A feature transformer that converts the input array of strings into an array of n-grams. Null A feature transformer that converts the input array of strings into an array of n-grams. Null
values in the input array are ignored. values in the input array are ignored.
It returns an array of n-grams where each n-gram is represented by a space-separated string of It returns an array of n-grams where each n-gram is represented by a space-separated string of
...@@ -463,6 +479,8 @@ class NGram(JavaTransformer, HasInputCol, HasOutputCol): ...@@ -463,6 +479,8 @@ class NGram(JavaTransformer, HasInputCol, HasOutputCol):
@inherit_doc @inherit_doc
class Normalizer(JavaTransformer, HasInputCol, HasOutputCol): class Normalizer(JavaTransformer, HasInputCol, HasOutputCol):
""" """
.. note:: Experimental
Normalize a vector to have unit norm using the given p-norm. Normalize a vector to have unit norm using the given p-norm.
>>> from pyspark.mllib.linalg import Vectors >>> from pyspark.mllib.linalg import Vectors
...@@ -519,6 +537,8 @@ class Normalizer(JavaTransformer, HasInputCol, HasOutputCol): ...@@ -519,6 +537,8 @@ class Normalizer(JavaTransformer, HasInputCol, HasOutputCol):
@inherit_doc @inherit_doc
class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol): class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol):
""" """
.. note:: Experimental
A one-hot encoder that maps a column of category indices to a A one-hot encoder that maps a column of category indices to a
column of binary vectors, with at most a single one-value per row column of binary vectors, with at most a single one-value per row
that indicates the input category index. that indicates the input category index.
...@@ -591,6 +611,8 @@ class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol): ...@@ -591,6 +611,8 @@ class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol):
@inherit_doc @inherit_doc
class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol): class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol):
""" """
.. note:: Experimental
Perform feature expansion in a polynomial space. As said in wikipedia of Polynomial Expansion, Perform feature expansion in a polynomial space. As said in wikipedia of Polynomial Expansion,
which is available at `http://en.wikipedia.org/wiki/Polynomial_expansion`, "In mathematics, an which is available at `http://en.wikipedia.org/wiki/Polynomial_expansion`, "In mathematics, an
expansion of a product of sums expresses it as a sum of products by using the fact that expansion of a product of sums expresses it as a sum of products by using the fact that
...@@ -649,6 +671,8 @@ class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol): ...@@ -649,6 +671,8 @@ class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol):
@ignore_unicode_prefix @ignore_unicode_prefix
class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol): class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol):
""" """
.. note:: Experimental
A regex based tokenizer that extracts tokens either by using the A regex based tokenizer that extracts tokens either by using the
provided regex pattern (in Java dialect) to split the text provided regex pattern (in Java dialect) to split the text
(default) or repeatedly matching the regex (if gaps is false). (default) or repeatedly matching the regex (if gaps is false).
...@@ -746,6 +770,8 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol): ...@@ -746,6 +770,8 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol):
@inherit_doc @inherit_doc
class SQLTransformer(JavaTransformer): class SQLTransformer(JavaTransformer):
""" """
.. note:: Experimental
Implements the transforms which are defined by SQL statement. Implements the transforms which are defined by SQL statement.
Currently we only support SQL syntax like 'SELECT ... FROM __THIS__' Currently we only support SQL syntax like 'SELECT ... FROM __THIS__'
where '__THIS__' represents the underlying table of the input dataset. where '__THIS__' represents the underlying table of the input dataset.
...@@ -797,6 +823,8 @@ class SQLTransformer(JavaTransformer): ...@@ -797,6 +823,8 @@ class SQLTransformer(JavaTransformer):
@inherit_doc @inherit_doc
class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol): class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol):
""" """
.. note:: Experimental
Standardizes features by removing the mean and scaling to unit variance using column summary Standardizes features by removing the mean and scaling to unit variance using column summary
statistics on the samples in the training set. statistics on the samples in the training set.
...@@ -870,6 +898,8 @@ class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol): ...@@ -870,6 +898,8 @@ class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol):
class StandardScalerModel(JavaModel): class StandardScalerModel(JavaModel):
""" """
.. note:: Experimental
Model fitted by StandardScaler. Model fitted by StandardScaler.
""" """
...@@ -891,6 +921,8 @@ class StandardScalerModel(JavaModel): ...@@ -891,6 +921,8 @@ class StandardScalerModel(JavaModel):
@inherit_doc @inherit_doc
class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol): class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol):
""" """
.. note:: Experimental
A label indexer that maps a string column of labels to an ML column of label indices. A label indexer that maps a string column of labels to an ML column of label indices.
If the input column is numeric, we cast it to string and index the string values. If the input column is numeric, we cast it to string and index the string values.
The indices are in [0, numLabels), ordered by label frequencies. The indices are in [0, numLabels), ordered by label frequencies.
...@@ -929,6 +961,8 @@ class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol): ...@@ -929,6 +961,8 @@ class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol):
class StringIndexerModel(JavaModel): class StringIndexerModel(JavaModel):
""" """
.. note:: Experimental
Model fitted by StringIndexer. Model fitted by StringIndexer.
""" """
...@@ -1006,6 +1040,8 @@ class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol): ...@@ -1006,6 +1040,8 @@ class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol):
@ignore_unicode_prefix @ignore_unicode_prefix
class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol): class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol):
""" """
.. note:: Experimental
A tokenizer that converts the input string to lowercase and then A tokenizer that converts the input string to lowercase and then
splits it by white spaces. splits it by white spaces.
...@@ -1051,6 +1087,8 @@ class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol): ...@@ -1051,6 +1087,8 @@ class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol):
@inherit_doc @inherit_doc
class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol): class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol):
""" """
.. note:: Experimental
A feature transformer that merges multiple columns into a vector column. A feature transformer that merges multiple columns into a vector column.
>>> df = sqlContext.createDataFrame([(1, 0, 3)], ["a", "b", "c"]) >>> df = sqlContext.createDataFrame([(1, 0, 3)], ["a", "b", "c"])
...@@ -1087,6 +1125,8 @@ class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol): ...@@ -1087,6 +1125,8 @@ class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol):
@inherit_doc @inherit_doc
class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol): class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol):
""" """
.. note:: Experimental
Class for indexing categorical feature columns in a dataset of [[Vector]]. Class for indexing categorical feature columns in a dataset of [[Vector]].
This has 2 usage modes: This has 2 usage modes:
...@@ -1186,6 +1226,8 @@ class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol): ...@@ -1186,6 +1226,8 @@ class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol):
class VectorIndexerModel(JavaModel): class VectorIndexerModel(JavaModel):
""" """
.. note:: Experimental
Model fitted by VectorIndexer. Model fitted by VectorIndexer.
""" """
...@@ -1194,6 +1236,8 @@ class VectorIndexerModel(JavaModel): ...@@ -1194,6 +1236,8 @@ class VectorIndexerModel(JavaModel):
@ignore_unicode_prefix @ignore_unicode_prefix
class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, HasOutputCol): class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, HasOutputCol):
""" """
.. note:: Experimental
Word2Vec trains a model of `Map(String, Vector)`, i.e. transforms a word into a code for further Word2Vec trains a model of `Map(String, Vector)`, i.e. transforms a word into a code for further
natural language processing or machine learning process. natural language processing or machine learning process.
...@@ -1307,6 +1351,8 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has ...@@ -1307,6 +1351,8 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
class Word2VecModel(JavaModel): class Word2VecModel(JavaModel):
""" """
.. note:: Experimental
Model fitted by Word2Vec. Model fitted by Word2Vec.
""" """
...@@ -1332,6 +1378,8 @@ class Word2VecModel(JavaModel): ...@@ -1332,6 +1378,8 @@ class Word2VecModel(JavaModel):
@inherit_doc @inherit_doc
class PCA(JavaEstimator, HasInputCol, HasOutputCol): class PCA(JavaEstimator, HasInputCol, HasOutputCol):
""" """
.. note:: Experimental
PCA trains a model to project vectors to a low-dimensional space using PCA. PCA trains a model to project vectors to a low-dimensional space using PCA.
>>> from pyspark.mllib.linalg import Vectors >>> from pyspark.mllib.linalg import Vectors
...@@ -1387,6 +1435,8 @@ class PCA(JavaEstimator, HasInputCol, HasOutputCol): ...@@ -1387,6 +1435,8 @@ class PCA(JavaEstimator, HasInputCol, HasOutputCol):
class PCAModel(JavaModel): class PCAModel(JavaModel):
""" """
.. note:: Experimental
Model fitted by PCA. Model fitted by PCA.
""" """
...@@ -1470,6 +1520,8 @@ class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol): ...@@ -1470,6 +1520,8 @@ class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol):
class RFormulaModel(JavaModel): class RFormulaModel(JavaModel):
""" """
.. note:: Experimental
Model fitted by :py:class:`RFormula`. Model fitted by :py:class:`RFormula`.
""" """
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment