From 007da1a9dc3bb912da841cc0f5832a4fa28e6d9d Mon Sep 17 00:00:00 2001
From: Joshi <rekhajoshm@gmail.com>
Date: Wed, 6 Jan 2016 10:48:14 -0800
Subject: [PATCH] [SPARK-11531][ML] SparseVector error Msg

PySpark SparseVector should have "Found duplicate indices" error message

Author: Joshi <rekhajoshm@gmail.com>
Author: Rekha Joshi <rekhajoshm@gmail.com>

Closes #9525 from rekhajoshm/SPARK-11531.
---
 python/pyspark/mllib/linalg/__init__.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py
index ae9ce58450..131b855bf9 100644
--- a/python/pyspark/mllib/linalg/__init__.py
+++ b/python/pyspark/mllib/linalg/__init__.py
@@ -528,7 +528,9 @@ class SparseVector(Vector):
             assert len(self.indices) == len(self.values), "index and value arrays not same length"
             for i in xrange(len(self.indices) - 1):
                 if self.indices[i] >= self.indices[i + 1]:
-                    raise TypeError("indices array must be sorted")
+                    raise TypeError(
+                        "Indices %s and %s are not strictly increasing"
+                        % (self.indices[i], self.indices[i + 1]))
 
     def numNonzeros(self):
         """
-- 
GitLab