Skip to content
Snippets Groups Projects
Commit c5912ecc authored by Xiangrui Meng's avatar Xiangrui Meng
Browse files

[FIX][MLLIB] fix seed in BaggedPointSuite

Saw Jenkins test failures due to random seeds.

jkbradley manishamde

Author: Xiangrui Meng <meng@databricks.com>

Closes #3084 from mengxr/fix-baggedpoint-suite and squashes the following commits:

f735a43 [Xiangrui Meng] fix seed in BaggedPointSuite
parent 4f035dd2
No related branches found
No related tags found
No related merge requests found
...@@ -30,7 +30,7 @@ class BaggedPointSuite extends FunSuite with LocalSparkContext { ...@@ -30,7 +30,7 @@ class BaggedPointSuite extends FunSuite with LocalSparkContext {
test("BaggedPoint RDD: without subsampling") { test("BaggedPoint RDD: without subsampling") {
val arr = EnsembleTestHelper.generateOrderedLabeledPoints(1, 1000) val arr = EnsembleTestHelper.generateOrderedLabeledPoints(1, 1000)
val rdd = sc.parallelize(arr) val rdd = sc.parallelize(arr)
val baggedRDD = BaggedPoint.convertToBaggedRDD(rdd, 1.0, 1, false) val baggedRDD = BaggedPoint.convertToBaggedRDD(rdd, 1.0, 1, false, 42)
baggedRDD.collect().foreach { baggedPoint => baggedRDD.collect().foreach { baggedPoint =>
assert(baggedPoint.subsampleWeights.size == 1 && baggedPoint.subsampleWeights(0) == 1) assert(baggedPoint.subsampleWeights.size == 1 && baggedPoint.subsampleWeights(0) == 1)
} }
...@@ -44,7 +44,7 @@ class BaggedPointSuite extends FunSuite with LocalSparkContext { ...@@ -44,7 +44,7 @@ class BaggedPointSuite extends FunSuite with LocalSparkContext {
val arr = EnsembleTestHelper.generateOrderedLabeledPoints(1, 1000) val arr = EnsembleTestHelper.generateOrderedLabeledPoints(1, 1000)
val rdd = sc.parallelize(arr) val rdd = sc.parallelize(arr)
seeds.foreach { seed => seeds.foreach { seed =>
val baggedRDD = BaggedPoint.convertToBaggedRDD(rdd, 1.0, numSubsamples, true) val baggedRDD = BaggedPoint.convertToBaggedRDD(rdd, 1.0, numSubsamples, true, seed)
val subsampleCounts: Array[Array[Double]] = baggedRDD.map(_.subsampleWeights).collect() val subsampleCounts: Array[Array[Double]] = baggedRDD.map(_.subsampleWeights).collect()
EnsembleTestHelper.testRandomArrays(subsampleCounts, numSubsamples, expectedMean, EnsembleTestHelper.testRandomArrays(subsampleCounts, numSubsamples, expectedMean,
expectedStddev, epsilon = 0.01) expectedStddev, epsilon = 0.01)
...@@ -60,7 +60,7 @@ class BaggedPointSuite extends FunSuite with LocalSparkContext { ...@@ -60,7 +60,7 @@ class BaggedPointSuite extends FunSuite with LocalSparkContext {
val arr = EnsembleTestHelper.generateOrderedLabeledPoints(1, 1000) val arr = EnsembleTestHelper.generateOrderedLabeledPoints(1, 1000)
val rdd = sc.parallelize(arr) val rdd = sc.parallelize(arr)
seeds.foreach { seed => seeds.foreach { seed =>
val baggedRDD = BaggedPoint.convertToBaggedRDD(rdd, subsample, numSubsamples, true) val baggedRDD = BaggedPoint.convertToBaggedRDD(rdd, subsample, numSubsamples, true, seed)
val subsampleCounts: Array[Array[Double]] = baggedRDD.map(_.subsampleWeights).collect() val subsampleCounts: Array[Array[Double]] = baggedRDD.map(_.subsampleWeights).collect()
EnsembleTestHelper.testRandomArrays(subsampleCounts, numSubsamples, expectedMean, EnsembleTestHelper.testRandomArrays(subsampleCounts, numSubsamples, expectedMean,
expectedStddev, epsilon = 0.01) expectedStddev, epsilon = 0.01)
...@@ -75,7 +75,7 @@ class BaggedPointSuite extends FunSuite with LocalSparkContext { ...@@ -75,7 +75,7 @@ class BaggedPointSuite extends FunSuite with LocalSparkContext {
val arr = EnsembleTestHelper.generateOrderedLabeledPoints(1, 1000) val arr = EnsembleTestHelper.generateOrderedLabeledPoints(1, 1000)
val rdd = sc.parallelize(arr) val rdd = sc.parallelize(arr)
seeds.foreach { seed => seeds.foreach { seed =>
val baggedRDD = BaggedPoint.convertToBaggedRDD(rdd, 1.0, numSubsamples, false) val baggedRDD = BaggedPoint.convertToBaggedRDD(rdd, 1.0, numSubsamples, false, seed)
val subsampleCounts: Array[Array[Double]] = baggedRDD.map(_.subsampleWeights).collect() val subsampleCounts: Array[Array[Double]] = baggedRDD.map(_.subsampleWeights).collect()
EnsembleTestHelper.testRandomArrays(subsampleCounts, numSubsamples, expectedMean, EnsembleTestHelper.testRandomArrays(subsampleCounts, numSubsamples, expectedMean,
expectedStddev, epsilon = 0.01) expectedStddev, epsilon = 0.01)
...@@ -91,7 +91,7 @@ class BaggedPointSuite extends FunSuite with LocalSparkContext { ...@@ -91,7 +91,7 @@ class BaggedPointSuite extends FunSuite with LocalSparkContext {
val arr = EnsembleTestHelper.generateOrderedLabeledPoints(1, 1000) val arr = EnsembleTestHelper.generateOrderedLabeledPoints(1, 1000)
val rdd = sc.parallelize(arr) val rdd = sc.parallelize(arr)
seeds.foreach { seed => seeds.foreach { seed =>
val baggedRDD = BaggedPoint.convertToBaggedRDD(rdd, subsample, numSubsamples, false) val baggedRDD = BaggedPoint.convertToBaggedRDD(rdd, subsample, numSubsamples, false, seed)
val subsampleCounts: Array[Array[Double]] = baggedRDD.map(_.subsampleWeights).collect() val subsampleCounts: Array[Array[Double]] = baggedRDD.map(_.subsampleWeights).collect()
EnsembleTestHelper.testRandomArrays(subsampleCounts, numSubsamples, expectedMean, EnsembleTestHelper.testRandomArrays(subsampleCounts, numSubsamples, expectedMean,
expectedStddev, epsilon = 0.01) expectedStddev, epsilon = 0.01)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment