From 8ead999fd627b12837fb2f082a0e76e9d121d269 Mon Sep 17 00:00:00 2001
From: Davies Liu <davies@databricks.com>
Date: Tue, 13 Jan 2015 12:50:31 -0800
Subject: [PATCH] [SPARK-5223] [MLlib] [PySpark] fix MapConverter and
 ListConverter in MLlib

It will introduce problems if the object in dict/list/tuple can not support by py4j, such as Vector.
Also, pickle may have better performance for larger object (less RPC).

In some cases that the object in dict/list can not be pickled (such as JavaObject), we should still use MapConvert/ListConvert.

This PR should be ported into branch-1.2

Author: Davies Liu <davies@databricks.com>

Closes #4023 from davies/listconvert and squashes the following commits:

55d4ab2 [Davies Liu] fix MapConverter and ListConverter in MLlib
---
 python/pyspark/mllib/common.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/python/pyspark/mllib/common.py b/python/pyspark/mllib/common.py
index 33c49e2399..3c5ee66cd8 100644
--- a/python/pyspark/mllib/common.py
+++ b/python/pyspark/mllib/common.py
@@ -18,7 +18,7 @@
 import py4j.protocol
 from py4j.protocol import Py4JJavaError
 from py4j.java_gateway import JavaObject
-from py4j.java_collections import MapConverter, ListConverter, JavaArray, JavaList
+from py4j.java_collections import ListConverter, JavaArray, JavaList
 
 from pyspark import RDD, SparkContext
 from pyspark.serializers import PickleSerializer, AutoBatchedSerializer
@@ -70,9 +70,7 @@ def _py2java(sc, obj):
         obj = _to_java_object_rdd(obj)
     elif isinstance(obj, SparkContext):
         obj = obj._jsc
-    elif isinstance(obj, dict):
-        obj = MapConverter().convert(obj, sc._gateway._gateway_client)
-    elif isinstance(obj, (list, tuple)):
+    elif isinstance(obj, list) and (obj or isinstance(obj[0], JavaObject)):
         obj = ListConverter().convert(obj, sc._gateway._gateway_client)
     elif isinstance(obj, JavaObject):
         pass
-- 
GitLab