From 1ec0a0dc2819d3db3555799cb78c2946f652bff4 Mon Sep 17 00:00:00 2001 From: Bhargav Mangipudi <bhargav.mangipudi@gmail.com> Date: Fri, 16 Oct 2015 14:36:05 -0700 Subject: [PATCH] =?UTF-8?q?[SPARK-11050]=20[MLLIB]=20PySpark=20SparseVecto?= =?UTF-8?q?r=20can=20return=20wrong=20index=20in=20e=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …rror message For negative indices in the SparseVector, we update the index value. If we have an incorrect index at this point, the error message has the incorrect *updated* index instead of the original one. This change contains the fix for the same. Author: Bhargav Mangipudi <bhargav.mangipudi@gmail.com> Closes #9069 from bhargav/spark-10759. --- python/pyspark/mllib/linalg/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py index d903b9030d..5276eb41cf 100644 --- a/python/pyspark/mllib/linalg/__init__.py +++ b/python/pyspark/mllib/linalg/__init__.py @@ -764,10 +764,11 @@ class SparseVector(Vector): if not isinstance(index, int): raise TypeError( "Indices must be of type integer, got type %s" % type(index)) + + if index >= self.size or index < -self.size: + raise ValueError("Index %d out of bounds." % index) if index < 0: index += self.size - if index >= self.size or index < 0: - raise ValueError("Index %d out of bounds." % index) insert_index = np.searchsorted(inds, index) if insert_index >= inds.size: -- GitLab