From 2db6a853a53b4c25e35983bc489510abb8a73e1d Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Mon, 2 Mar 2015 17:14:34 -0800
Subject: [PATCH] [SPARK-6121][SQL][MLLIB] simpleString for UDT

`df.dtypes` shows `null` for UDTs. This PR uses `udt` by default and `VectorUDT` overwrites it with `vector`.

jkbradley davies

Author: Xiangrui Meng <meng@databricks.com>

Closes #4858 from mengxr/SPARK-6121 and squashes the following commits:

34f0a77 [Xiangrui Meng] simpleString for UDT
---
 python/pyspark/mllib/linalg.py | 3 +++
 python/pyspark/sql/types.py    | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/mllib/linalg.py b/python/pyspark/mllib/linalg.py
index 597012b1c9..f5aad28afd 100644
--- a/python/pyspark/mllib/linalg.py
+++ b/python/pyspark/mllib/linalg.py
@@ -152,6 +152,9 @@ class VectorUDT(UserDefinedType):
         else:
             raise ValueError("do not recognize type %r" % tpe)
 
+    def simpleString(self):
+        return "vector"
+
 
 class Vector(object):
 
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 31a861e1fe..0169028ccc 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -468,7 +468,7 @@ class UserDefinedType(DataType):
         raise NotImplementedError("UDT must implement deserialize().")
 
     def simpleString(self):
-        return 'null'
+        return 'udt'
 
     def json(self):
         return json.dumps(self.jsonValue(), separators=(',', ':'), sort_keys=True)
-- 
GitLab