diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index a4e60f916b5c82a598785726a783ccca4dd4a4f5..55bde6d0ea4fbbc620624ffc6dfc59ade8cb815c 100644 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -178,6 +178,7 @@ class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol): .. note:: Experimental Extracts a vocabulary from document collections and generates a :py:attr:`CountVectorizerModel`. + >>> df = sqlContext.createDataFrame( ... [(0, ["a", "b", "c"]), (1, ["a", "b", "b", "c", "a"])], ... ["label", "raw"])