From 007da1a9dc3bb912da841cc0f5832a4fa28e6d9d Mon Sep 17 00:00:00 2001 From: Joshi <rekhajoshm@gmail.com> Date: Wed, 6 Jan 2016 10:48:14 -0800 Subject: [PATCH] [SPARK-11531][ML] SparseVector error Msg PySpark SparseVector should have "Found duplicate indices" error message Author: Joshi <rekhajoshm@gmail.com> Author: Rekha Joshi <rekhajoshm@gmail.com> Closes #9525 from rekhajoshm/SPARK-11531. --- python/pyspark/mllib/linalg/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py index ae9ce58450..131b855bf9 100644 --- a/python/pyspark/mllib/linalg/__init__.py +++ b/python/pyspark/mllib/linalg/__init__.py @@ -528,7 +528,9 @@ class SparseVector(Vector): assert len(self.indices) == len(self.values), "index and value arrays not same length" for i in xrange(len(self.indices) - 1): if self.indices[i] >= self.indices[i + 1]: - raise TypeError("indices array must be sorted") + raise TypeError( + "Indices %s and %s are not strictly increasing" + % (self.indices[i], self.indices[i + 1])) def numNonzeros(self): """ -- GitLab