From fc957dc78138e72036dbbadc9a54f155d318c038 Mon Sep 17 00:00:00 2001 From: lewuathe <lewuathe@me.com> Date: Tue, 7 Apr 2015 14:36:57 -0700 Subject: [PATCH] [SPARK-6720][MLLIB] PySpark MultivariateStatisticalSummary unit test for normL1... ... and normL2. Add test cases to insufficient unit test for `normL1` and `normL2`. Ref: https://github.com/apache/spark/pull/5359 Author: lewuathe <lewuathe@me.com> Closes #5374 from Lewuathe/SPARK-6720 and squashes the following commits: 5541b24 [lewuathe] More accurate tests dc5718c [lewuathe] [SPARK-6720] PySpark MultivariateStatisticalSummary unit test for normL1 and normL2 --- python/pyspark/mllib/tests.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py index 47dad7d12e..61ef398487 100644 --- a/python/pyspark/mllib/tests.py +++ b/python/pyspark/mllib/tests.py @@ -363,6 +363,13 @@ class StatTests(PySparkTestCase): self.assertEqual(10, len(summary.normL1())) self.assertEqual(10, len(summary.normL2())) + data2 = self.sc.parallelize(xrange(10)).map(lambda x: Vectors.dense(x)) + summary2 = Statistics.colStats(data2) + self.assertEqual(array([45.0]), summary2.normL1()) + import math + expectedNormL2 = math.sqrt(sum(map(lambda x: x*x, xrange(10)))) + self.assertTrue(math.fabs(summary2.normL2()[0] - expectedNormL2) < 1e-14) + class VectorUDTTests(PySparkTestCase): -- GitLab