From 2db6a853a53b4c25e35983bc489510abb8a73e1d Mon Sep 17 00:00:00 2001 From: Xiangrui Meng <meng@databricks.com> Date: Mon, 2 Mar 2015 17:14:34 -0800 Subject: [PATCH] [SPARK-6121][SQL][MLLIB] simpleString for UDT `df.dtypes` shows `null` for UDTs. This PR uses `udt` by default and `VectorUDT` overwrites it with `vector`. jkbradley davies Author: Xiangrui Meng <meng@databricks.com> Closes #4858 from mengxr/SPARK-6121 and squashes the following commits: 34f0a77 [Xiangrui Meng] simpleString for UDT --- python/pyspark/mllib/linalg.py | 3 +++ python/pyspark/sql/types.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/python/pyspark/mllib/linalg.py b/python/pyspark/mllib/linalg.py index 597012b1c9..f5aad28afd 100644 --- a/python/pyspark/mllib/linalg.py +++ b/python/pyspark/mllib/linalg.py @@ -152,6 +152,9 @@ class VectorUDT(UserDefinedType): else: raise ValueError("do not recognize type %r" % tpe) + def simpleString(self): + return "vector" + class Vector(object): diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index 31a861e1fe..0169028ccc 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -468,7 +468,7 @@ class UserDefinedType(DataType): raise NotImplementedError("UDT must implement deserialize().") def simpleString(self): - return 'null' + return 'udt' def json(self): return json.dumps(self.jsonValue(), separators=(',', ':'), sort_keys=True) -- GitLab