From c48f2a3a5fd714ad2ff19b29337e55583988431e Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Tue, 12 Jan 2016 11:50:33 +0000
Subject: [PATCH] [SPARK-7615][MLLIB] MLLIB Word2Vec wordVectors divided by
 Euclidean Norm equals to zero

Cosine similarity with 0 vector should be 0

Related to https://github.com/apache/spark/pull/10152

Author: Sean Owen <sowen@cloudera.com>

Closes #10696 from srowen/SPARK-7615.
---
 .../scala/org/apache/spark/mllib/feature/Word2Vec.scala    | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
index dc5d070890..dee898827f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
@@ -543,7 +543,12 @@ class Word2VecModel private[spark] (
     val cosVec = cosineVec.map(_.toDouble)
     var ind = 0
     while (ind < numWords) {
-      cosVec(ind) /= wordVecNorms(ind)
+      val norm = wordVecNorms(ind)
+      if (norm == 0.0) {
+        cosVec(ind) = 0.0
+      } else {
+        cosVec(ind) /= norm
+      }
       ind += 1
     }
     wordList.zip(cosVec)
-- 
GitLab