Skip to content
Snippets Groups Projects
Commit 2ef016b1 authored by Manish Amde's avatar Manish Amde Committed by Xiangrui Meng
Browse files

[MLLIB] SPARK-4347: Reducing GradientBoostingSuite run time.

Before:
[info] GradientBoostingSuite:
[info] - Regression with continuous features: SquaredError (22 seconds, 115 milliseconds)
[info] - Regression with continuous features: Absolute Error (19 seconds, 330 milliseconds)
[info] - Binary classification with continuous features: Log Loss (19 seconds, 17 milliseconds)

After:
[info] - Regression with continuous features: SquaredError (7 seconds, 69 milliseconds)
[info] - Regression with continuous features: Absolute Error (4 seconds, 617 milliseconds)
[info] - Binary classification with continuous features: Log Loss (4 seconds, 658 milliseconds)

cc: mengxr, jkbradley

Author: Manish Amde <manish9ue@gmail.com>

Closes #3214 from manishamde/gbt_test_speedup and squashes the following commits:

8994552 [Manish Amde] reducing gbt test run times
parent daaca14c
No related branches found
No related tags found
No related merge requests found
......@@ -35,7 +35,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext {
test("Regression with continuous features: SquaredError") {
GradientBoostingSuite.testCombinations.foreach {
case (numIterations, learningRate, subsamplingRate) =>
val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 50, 1000)
val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 10, 100)
val rdd = sc.parallelize(arr)
val categoricalFeaturesInfo = Map.empty[Int, Int]
......@@ -53,7 +53,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext {
assert(gbt.weakHypotheses.size === numIterations)
val gbtTree = gbt.weakHypotheses(0)
EnsembleTestHelper.validateRegressor(gbt, arr, 0.02)
EnsembleTestHelper.validateRegressor(gbt, arr, 0.03)
// Make sure trees are the same.
assert(gbtTree.toString == dt.toString)
......@@ -63,7 +63,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext {
test("Regression with continuous features: Absolute Error") {
GradientBoostingSuite.testCombinations.foreach {
case (numIterations, learningRate, subsamplingRate) =>
val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 50, 1000)
val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 10, 100)
val rdd = sc.parallelize(arr)
val categoricalFeaturesInfo = Map.empty[Int, Int]
......@@ -81,7 +81,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext {
assert(gbt.weakHypotheses.size === numIterations)
val gbtTree = gbt.weakHypotheses(0)
EnsembleTestHelper.validateRegressor(gbt, arr, 0.02)
EnsembleTestHelper.validateRegressor(gbt, arr, 0.03)
// Make sure trees are the same.
assert(gbtTree.toString == dt.toString)
......@@ -91,7 +91,7 @@ class GradientBoostingSuite extends FunSuite with LocalSparkContext {
test("Binary classification with continuous features: Log Loss") {
GradientBoostingSuite.testCombinations.foreach {
case (numIterations, learningRate, subsamplingRate) =>
val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 50, 1000)
val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 10, 100)
val rdd = sc.parallelize(arr)
val categoricalFeaturesInfo = Map.empty[Int, Int]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment