From f9b6bf2f83d9dad273aa36d65d0560d35b941cc2 Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Mon, 29 Jun 2015 18:50:23 -0700
Subject: [PATCH] [SPARK-7667] [MLLIB] MLlib Python API consistency check

MLlib Python API consistency check

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #6856 from yanboliang/spark-7667 and squashes the following commits:

21bae35 [Yanbo Liang] remove duplicate code
eb12f95 [Yanbo Liang] fix doc inherit problem
9e7ec3c [Yanbo Liang] address comments
e763d32 [Yanbo Liang] MLlib Python API consistency check
---
 python/pyspark/mllib/feature.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py
index f00bb93b7b..b5138773fd 100644
--- a/python/pyspark/mllib/feature.py
+++ b/python/pyspark/mllib/feature.py
@@ -111,6 +111,15 @@ class JavaVectorTransformer(JavaModelWrapper, VectorTransformer):
     """
 
     def transform(self, vector):
+        """
+        Applies transformation on a vector or an RDD[Vector].
+
+        Note: In Python, transform cannot currently be used within
+              an RDD transformation or action.
+              Call transform directly on the RDD instead.
+
+        :param vector: Vector or RDD of Vector to be transformed.
+        """
         if isinstance(vector, RDD):
             vector = vector.map(_convert_to_vector)
         else:
@@ -191,7 +200,7 @@ class StandardScaler(object):
         Computes the mean and variance and stores as a model to be used
         for later scaling.
 
-        :param data: The data used to compute the mean and variance
+        :param dataset: The data used to compute the mean and variance
                      to build the transformation model.
         :return: a StandardScalarModel
         """
@@ -346,10 +355,6 @@ class IDFModel(JavaVectorTransformer):
                   vector
         :return: an RDD of TF-IDF vectors or a TF-IDF vector
         """
-        if isinstance(x, RDD):
-            return JavaVectorTransformer.transform(self, x)
-
-        x = _convert_to_vector(x)
         return JavaVectorTransformer.transform(self, x)
 
     def idf(self):
-- 
GitLab