Skip to content
Snippets Groups Projects
Commit 2ef016b1 authored by Manish Amde's avatar Manish Amde Committed by Xiangrui Meng
Browse files

[MLLIB] SPARK-4347: Reducing GradientBoostingSuite run time.

Before:
[info] GradientBoostingSuite:
[info] - Regression with continuous features: SquaredError (22 seconds, 115 milliseconds)
[info] - Regression with continuous features: Absolute Error (19 seconds, 330 milliseconds)
[info] - Binary classification with continuous features: Log Loss (19 seconds, 17 milliseconds)

After:
[info] - Regression with continuous features: SquaredError (7 seconds, 69 milliseconds)
[info] - Regression with continuous features: Absolute Error (4 seconds, 617 milliseconds)
[info] - Binary classification with continuous features: Log Loss (4 seconds, 658 milliseconds)

cc: mengxr, jkbradley

Author: Manish Amde <manish9ue@gmail.com>

Closes #3214 from manishamde/gbt_test_speedup and squashes the following commits:

8994552 [Manish Amde] reducing gbt test run times
parent daaca14c
No related branches found
No related tags found
No related merge requests found
...@@ -35,7 +35,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext { ...@@ -35,7 +35,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext {
test("Regression with continuous features: SquaredError") { test("Regression with continuous features: SquaredError") {
GradientBoostingSuite.testCombinations.foreach { GradientBoostingSuite.testCombinations.foreach {
case (numIterations, learningRate, subsamplingRate) => case (numIterations, learningRate, subsamplingRate) =>
val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 50, 1000) val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 10, 100)
val rdd = sc.parallelize(arr) val rdd = sc.parallelize(arr)
val categoricalFeaturesInfo = Map.empty[Int, Int] val categoricalFeaturesInfo = Map.empty[Int, Int]
...@@ -53,7 +53,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext { ...@@ -53,7 +53,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext {
assert(gbt.weakHypotheses.size === numIterations) assert(gbt.weakHypotheses.size === numIterations)
val gbtTree = gbt.weakHypotheses(0) val gbtTree = gbt.weakHypotheses(0)
EnsembleTestHelper.validateRegressor(gbt, arr, 0.02) EnsembleTestHelper.validateRegressor(gbt, arr, 0.03)
// Make sure trees are the same. // Make sure trees are the same.
assert(gbtTree.toString == dt.toString) assert(gbtTree.toString == dt.toString)
...@@ -63,7 +63,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext { ...@@ -63,7 +63,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext {
test("Regression with continuous features: Absolute Error") { test("Regression with continuous features: Absolute Error") {
GradientBoostingSuite.testCombinations.foreach { GradientBoostingSuite.testCombinations.foreach {
case (numIterations, learningRate, subsamplingRate) => case (numIterations, learningRate, subsamplingRate) =>
val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 50, 1000) val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 10, 100)
val rdd = sc.parallelize(arr) val rdd = sc.parallelize(arr)
val categoricalFeaturesInfo = Map.empty[Int, Int] val categoricalFeaturesInfo = Map.empty[Int, Int]
...@@ -81,7 +81,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext { ...@@ -81,7 +81,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext {
assert(gbt.weakHypotheses.size === numIterations) assert(gbt.weakHypotheses.size === numIterations)
val gbtTree = gbt.weakHypotheses(0) val gbtTree = gbt.weakHypotheses(0)
EnsembleTestHelper.validateRegressor(gbt, arr, 0.02) EnsembleTestHelper.validateRegressor(gbt, arr, 0.03)
// Make sure trees are the same. // Make sure trees are the same.
assert(gbtTree.toString == dt.toString) assert(gbtTree.toString == dt.toString)
...@@ -91,7 +91,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext { ...@@ -91,7 +91,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext {
test("Binary classification with continuous features: Log Loss") { test("Binary classification with continuous features: Log Loss") {
GradientBoostingSuite.testCombinations.foreach { GradientBoostingSuite.testCombinations.foreach {
case (numIterations, learningRate, subsamplingRate) => case (numIterations, learningRate, subsamplingRate) =>
val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 50, 1000) val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 10, 100)
val rdd = sc.parallelize(arr) val rdd = sc.parallelize(arr)
val categoricalFeaturesInfo = Map.empty[Int, Int] val categoricalFeaturesInfo = Map.empty[Int, Int]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment