From 2ef016b130a48869cf81fe6cf147ef2b1e79d674 Mon Sep 17 00:00:00 2001 From: Manish Amde <manish9ue@gmail.com> Date: Tue, 11 Nov 2014 22:47:53 -0800 Subject: [PATCH] [MLLIB] SPARK-4347: Reducing GradientBoostingSuite run time. Before: [info] GradientBoostingSuite: [info] - Regression with continuous features: SquaredError (22 seconds, 115 milliseconds) [info] - Regression with continuous features: Absolute Error (19 seconds, 330 milliseconds) [info] - Binary classification with continuous features: Log Loss (19 seconds, 17 milliseconds) After: [info] - Regression with continuous features: SquaredError (7 seconds, 69 milliseconds) [info] - Regression with continuous features: Absolute Error (4 seconds, 617 milliseconds) [info] - Binary classification with continuous features: Log Loss (4 seconds, 658 milliseconds) cc: mengxr, jkbradley Author: Manish Amde <manish9ue@gmail.com> Closes #3214 from manishamde/gbt_test_speedup and squashes the following commits: 8994552 [Manish Amde] reducing gbt test run times --- .../spark/mllib/tree/GradientBoostingSuite.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostingSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostingSuite.scala index 99a02eda60..ae0028a688 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostingSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostingSuite.scala @@ -35,7 +35,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext { test("Regression with continuous features: SquaredError") { GradientBoostingSuite.testCombinations.foreach { case (numIterations, learningRate, subsamplingRate) => - val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 50, 1000) + val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 10, 100) val rdd = sc.parallelize(arr) val categoricalFeaturesInfo = Map.empty[Int, Int] @@ -53,7 +53,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext { assert(gbt.weakHypotheses.size === numIterations) val gbtTree = gbt.weakHypotheses(0) - EnsembleTestHelper.validateRegressor(gbt, arr, 0.02) + EnsembleTestHelper.validateRegressor(gbt, arr, 0.03) // Make sure trees are the same. assert(gbtTree.toString == dt.toString) @@ -63,7 +63,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext { test("Regression with continuous features: Absolute Error") { GradientBoostingSuite.testCombinations.foreach { case (numIterations, learningRate, subsamplingRate) => - val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 50, 1000) + val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 10, 100) val rdd = sc.parallelize(arr) val categoricalFeaturesInfo = Map.empty[Int, Int] @@ -81,7 +81,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext { assert(gbt.weakHypotheses.size === numIterations) val gbtTree = gbt.weakHypotheses(0) - EnsembleTestHelper.validateRegressor(gbt, arr, 0.02) + EnsembleTestHelper.validateRegressor(gbt, arr, 0.03) // Make sure trees are the same. assert(gbtTree.toString == dt.toString) @@ -91,7 +91,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext { test("Binary classification with continuous features: Log Loss") { GradientBoostingSuite.testCombinations.foreach { case (numIterations, learningRate, subsamplingRate) => - val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 50, 1000) + val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 10, 100) val rdd = sc.parallelize(arr) val categoricalFeaturesInfo = Map.empty[Int, Int] -- GitLab