From 7a8a81d79f4bee3395fb399ccc4d47744f8a0951 Mon Sep 17 00:00:00 2001
From: WeichenXu <WeichenXu123@outlook.com>
Date: Sat, 3 Sep 2016 09:52:53 +0100
Subject: [PATCH] [SPARK-17363][ML][MLLIB] fix
 MultivariantOnlineSummerizer.numNonZeros

## What changes were proposed in this pull request?

fix `MultivariantOnlineSummerizer.numNonZeros` method,
return `nnz` array, instead of  `weightSum` array

## How was this patch tested?

Existing test.

Author: WeichenXu <WeichenXu123@outlook.com>

Closes #14923 from WeichenXu123/fix_MultivariantOnlineSummerizer_numNonZeros.
---
 .../spark/mllib/stat/MultivariateOnlineSummarizer.scala       | 4 ++--
 .../spark/mllib/stat/MultivariateOnlineSummarizerSuite.scala  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
index 964f419d12..7a2a7a35a9 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
@@ -231,9 +231,9 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
    */
   @Since("1.1.0")
   override def numNonzeros: Vector = {
-    require(totalWeightSum > 0, s"Nothing has been added to this summarizer.")
+    require(totalCnt > 0, s"Nothing has been added to this summarizer.")
 
-    Vectors.dense(weightSum)
+    Vectors.dense(nnz.map(_.toDouble))
   }
 
   /**
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizerSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizerSuite.scala
index 165a3f314a..797e84fcc7 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizerSuite.scala
@@ -237,7 +237,7 @@ class MultivariateOnlineSummarizerSuite extends SparkFunSuite {
       absTol 1E-10, "mean mismatch")
     assert(summarizer.variance ~== Vectors.dense(Array(0.17657142857, 1.645115714, 2.42057142857))
       absTol 1E-8, "variance mismatch")
-    assert(summarizer.numNonzeros ~== Vectors.dense(Array(0.3, 0.5, 0.4))
+    assert(summarizer.numNonzeros ~== Vectors.dense(Array(3.0, 4.0, 3.0))
       absTol 1E-10, "numNonzeros mismatch")
     assert(summarizer.max ~== Vectors.dense(Array(0.0, 1.7, 1.3)) absTol 1E-10, "max mismatch")
     assert(summarizer.min ~== Vectors.dense(Array(-0.8, -1.2, -1.7)) absTol 1E-10, "min mismatch")
-- 
GitLab