diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala index 088ec6a0c046540e778fccfb498807f151f349ad..93cf16e6f0c2af3f9b0999265470231ce2a4a4af 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala @@ -357,7 +357,7 @@ private[python] class PythonMLLibAPI extends Serializable { val kMeansAlg = new KMeans() .setK(k) .setMaxIterations(maxIterations) - .setRuns(runs) + .internalSetRuns(runs) .setInitializationMode(initializationMode) .setInitializationSteps(initializationSteps) .setEpsilon(epsilon) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala index 901164a391170788f4fa65150086efc99af1b36e..67de62bc2e848e27d6fdc0e26ac3a4a2be9835fb 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala @@ -119,9 +119,18 @@ class KMeans private ( @Since("0.8.0") @deprecated("Support for runs is deprecated. This param will have no effect in 2.0.0.", "1.6.0") def setRuns(runs: Int): this.type = { + internalSetRuns(runs) + } + + // Internal version of setRuns for Python API, this should be removed at the same time as setRuns + // this is done to avoid deprecation warnings in our build. + private[mllib] def internalSetRuns(runs: Int): this.type = { if (runs <= 0) { throw new IllegalArgumentException("Number of runs must be positive") } + if (runs != 1) { + logWarning("Setting number of runs is deprecated and will have no effect in 2.0.0") + } this.runs = runs this } @@ -502,7 +511,7 @@ object KMeans { seed: Long): KMeansModel = { new KMeans().setK(k) .setMaxIterations(maxIterations) - .setRuns(runs) + .internalSetRuns(runs) .setInitializationMode(initializationMode) .setSeed(seed) .run(data) @@ -528,7 +537,7 @@ object KMeans { initializationMode: String): KMeansModel = { new KMeans().setK(k) .setMaxIterations(maxIterations) - .setRuns(runs) + .internalSetRuns(runs) .setInitializationMode(initializationMode) .run(data) } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala index 8af6750da4ff37c0ffb73bc20b68b3948e2da1ad..898a09e51636c3b9eed47f7246b5c1747eccdd0f 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala @@ -105,8 +105,7 @@ object MFDataGenerator { // optionally generate testing data if (test) { - val testSampSize = math.min( - math.round(sampSize * testSampFact), math.round(mn - sampSize)).toInt + val testSampSize = math.min(math.round(sampSize * testSampFact).toInt, mn - sampSize) val testOmega = shuffled.slice(sampSize, sampSize + testSampSize) val testOrdered = testOmega.sortWith(_ < _).toArray val testData: RDD[(Int, Int, Double)] = sc.parallelize(testOrdered)