diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala index bf0d9d9231ac7fa197594505388b893f683176a5..eb3ee41f7cf4f62164ff1a1856e1ae3ed89dd914 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala @@ -326,6 +326,8 @@ class NaiveBayes private ( /** Set the smoothing parameter. Default: 1.0. */ @Since("0.9.0") def setLambda(lambda: Double): NaiveBayes = { + require(lambda >= 0, + s"Smoothing parameter must be nonnegative but got ${lambda}") this.lambda = lambda this } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala index 88dbfe3fcc9f5cca7d5cca9d50bbece0bb522c36..03eb903bb8feec43bdc6a20f9673b65141afb8b8 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala @@ -78,11 +78,9 @@ class GaussianMixture private ( */ @Since("1.3.0") def setInitialModel(model: GaussianMixtureModel): this.type = { - if (model.k == k) { - initialModel = Some(model) - } else { - throw new IllegalArgumentException("mismatched cluster count (model.k != k)") - } + require(model.k == k, + s"Mismatched cluster count (model.k ${model.k} != k ${k})") + initialModel = Some(model) this } @@ -97,6 +95,8 @@ class GaussianMixture private ( */ @Since("1.3.0") def setK(k: Int): this.type = { + require(k > 0, + s"Number of Gaussians must be positive but got ${k}") this.k = k this } @@ -112,6 +112,8 @@ class GaussianMixture private ( */ @Since("1.3.0") def setMaxIterations(maxIterations: Int): this.type = { + require(maxIterations >= 0, + s"Maximum of iterations must be nonnegative but got ${maxIterations}") this.maxIterations = maxIterations this } @@ -128,6 +130,8 @@ class GaussianMixture private ( */ @Since("1.3.0") def setConvergenceTol(convergenceTol: Double): this.type = { + require(convergenceTol >= 0.0, + s"Convergence tolerance must be nonnegative but got ${convergenceTol}") this.convergenceTol = convergenceTol this } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala index 26f5600e6c07895ec3d827ad1f760979754e8850..a7beb81980299b9ba0b659a481c283be0257c0be 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala @@ -65,6 +65,8 @@ class KMeans private ( */ @Since("0.8.0") def setK(k: Int): this.type = { + require(k > 0, + s"Number of clusters must be positive but got ${k}") this.k = k this } @@ -80,6 +82,8 @@ class KMeans private ( */ @Since("0.8.0") def setMaxIterations(maxIterations: Int): this.type = { + require(maxIterations >= 0, + s"Maximum of iterations must be nonnegative but got ${maxIterations}") this.maxIterations = maxIterations this } @@ -147,9 +151,8 @@ class KMeans private ( */ @Since("0.8.0") def setInitializationSteps(initializationSteps: Int): this.type = { - if (initializationSteps <= 0) { - throw new IllegalArgumentException("Number of initialization steps must be positive") - } + require(initializationSteps > 0, + s"Number of initialization steps must be positive but got ${initializationSteps}") this.initializationSteps = initializationSteps this } @@ -166,6 +169,8 @@ class KMeans private ( */ @Since("0.8.0") def setEpsilon(epsilon: Double): this.type = { + require(epsilon >= 0, + s"Distance threshold must be nonnegative but got ${epsilon}") this.epsilon = epsilon this } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala index fad808857a7887150898984f44ad9b85d32cf8f0..12813fd412b11a11b06650dca3f84eed6b273d1a 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala @@ -232,6 +232,8 @@ class LDA private ( */ @Since("1.3.0") def setMaxIterations(maxIterations: Int): this.type = { + require(maxIterations >= 0, + s"Maximum of iterations must be nonnegative but got ${maxIterations}") this.maxIterations = maxIterations this } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala index a422303dc933a0294dc43cd8ea1a01bc5646a84e..2e257ff9b7def5c6c6f95276009f77ddf606f589 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala @@ -137,6 +137,8 @@ class PowerIterationClustering private[clustering] ( */ @Since("1.3.0") def setK(k: Int): this.type = { + require(k > 0, + s"Number of clusters must be positive but got ${k}") this.k = k this } @@ -146,6 +148,8 @@ class PowerIterationClustering private[clustering] ( */ @Since("1.3.0") def setMaxIterations(maxIterations: Int): this.type = { + require(maxIterations >= 0, + s"Maximum of iterations must be nonnegative but got ${maxIterations}") this.maxIterations = maxIterations this } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala index a8d7b8fdedb1ff3d1fbf42355cd6205ef879f1b9..4eb8fc049e611c80f639357dd6f4c4359ee69f38 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala @@ -178,6 +178,8 @@ class StreamingKMeans @Since("1.2.0") ( */ @Since("1.2.0") def setK(k: Int): this.type = { + require(k > 0, + s"Number of clusters must be positive but got ${k}") this.k = k this } @@ -187,6 +189,8 @@ class StreamingKMeans @Since("1.2.0") ( */ @Since("1.2.0") def setDecayFactor(a: Double): this.type = { + require(a >= 0, + s"Decay factor must be nonnegative but got ${a}") this.decayFactor = a this } @@ -198,6 +202,8 @@ class StreamingKMeans @Since("1.2.0") ( */ @Since("1.2.0") def setHalfLife(halfLife: Double, timeUnit: String): this.type = { + require(halfLife > 0, + s"Half life must be positive but got ${halfLife}") if (timeUnit != StreamingKMeans.BATCHES && timeUnit != StreamingKMeans.POINTS) { throw new IllegalArgumentException("Invalid time unit for decay: " + timeUnit) } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala index 24e0a98c39bff0c505162e3fc350f8e2a3667640..30c403e547beedc30536a4c986b56efae9c5c272 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala @@ -30,7 +30,8 @@ import org.apache.spark.rdd.RDD */ @Since("1.4.0") class PCA @Since("1.4.0") (@Since("1.4.0") val k: Int) { - require(k >= 1, s"PCA requires a number of principal components k >= 1 but was given $k") + require(k > 0, + s"Number of principal components must be positive but got ${k}") /** * Computes a [[PCAModel]] that contains the principal components of the input vectors. diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala index d3356b783fc24b59e01318d62d78189b952f4b13..5b079fce3a83d06d39f5997402666f82fab45925 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala @@ -84,6 +84,8 @@ class Word2Vec extends Serializable with Logging { */ @Since("2.0.0") def setMaxSentenceLength(maxSentenceLength: Int): this.type = { + require(maxSentenceLength > 0, + s"Maximum length of sentences must be positive but got ${maxSentenceLength}") this.maxSentenceLength = maxSentenceLength this } @@ -93,6 +95,8 @@ class Word2Vec extends Serializable with Logging { */ @Since("1.1.0") def setVectorSize(vectorSize: Int): this.type = { + require(vectorSize > 0, + s"vector size must be positive but got ${vectorSize}") this.vectorSize = vectorSize this } @@ -102,6 +106,8 @@ class Word2Vec extends Serializable with Logging { */ @Since("1.1.0") def setLearningRate(learningRate: Double): this.type = { + require(learningRate > 0, + s"Initial learning rate must be positive but got ${learningRate}") this.learningRate = learningRate this } @@ -111,7 +117,8 @@ class Word2Vec extends Serializable with Logging { */ @Since("1.1.0") def setNumPartitions(numPartitions: Int): this.type = { - require(numPartitions > 0, s"numPartitions must be greater than 0 but got $numPartitions") + require(numPartitions > 0, + s"Number of partitions must be positive but got ${numPartitions}") this.numPartitions = numPartitions this } @@ -122,6 +129,8 @@ class Word2Vec extends Serializable with Logging { */ @Since("1.1.0") def setNumIterations(numIterations: Int): this.type = { + require(numIterations >= 0, + s"Number of iterations must be nonnegative but got ${numIterations}") this.numIterations = numIterations this } @@ -140,6 +149,8 @@ class Word2Vec extends Serializable with Logging { */ @Since("1.6.0") def setWindowSize(window: Int): this.type = { + require(window > 0, + s"Window of words must be positive but got ${window}") this.window = window this } @@ -150,6 +161,8 @@ class Word2Vec extends Serializable with Logging { */ @Since("1.3.0") def setMinCount(minCount: Int): this.type = { + require(minCount >= 0, + s"Minimum number of times must be nonnegative but got ${minCount}") this.minCount = minCount this } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala index 5592416964226bcefa9ace13e104b53acbd58cf4..9a63cc29dacb562f9960a0b2ae9fb0681b257052 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala @@ -50,7 +50,8 @@ class AssociationRules private[fpm] ( */ @Since("1.5.0") def setMinConfidence(minConfidence: Double): this.type = { - require(minConfidence >= 0.0 && minConfidence <= 1.0) + require(minConfidence >= 0.0 && minConfidence <= 1.0, + s"Minimal confidence must be in range [0, 1] but got ${minConfidence}") this.minConfidence = minConfidence this } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala index 3f40af8f3ada7a9ffbb5f6abaff865252f8696e6..4f4996f3be617e20640109bfec723f764facbb32 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala @@ -180,6 +180,8 @@ class FPGrowth private ( */ @Since("1.3.0") def setMinSupport(minSupport: Double): this.type = { + require(minSupport >= 0.0 && minSupport <= 1.0, + s"Minimal support level must be in range [0, 1] but got ${minSupport}") this.minSupport = minSupport this } @@ -190,6 +192,8 @@ class FPGrowth private ( */ @Since("1.3.0") def setNumPartitions(numPartitions: Int): this.type = { + require(numPartitions > 0, + s"Number of partitions must be positive but got ${numPartitions}") this.numPartitions = numPartitions this } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala index fbf657b0fac480fffe4ccf6b4e3a6f27a075815d..a67ea836e5681b78d4d60e94fe034edd8e2b6f5f 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala @@ -46,6 +46,8 @@ class GradientDescent private[spark] (private var gradient: Gradient, private va * In subsequent steps, the step size will decrease with stepSize/sqrt(t) */ def setStepSize(step: Double): this.type = { + require(step > 0, + s"Initial step size must be positive but got ${step}") this.stepSize = step this } @@ -57,6 +59,8 @@ class GradientDescent private[spark] (private var gradient: Gradient, private va */ @Experimental def setMiniBatchFraction(fraction: Double): this.type = { + require(fraction > 0 && fraction <= 1.0, + s"Fraction for mini-batch SGD must be in range (0, 1] but got ${fraction}") this.miniBatchFraction = fraction this } @@ -65,6 +69,8 @@ class GradientDescent private[spark] (private var gradient: Gradient, private va * Set the number of iterations for SGD. Default 100. */ def setNumIterations(iters: Int): this.type = { + require(iters >= 0, + s"Number of iterations must be nonnegative but got ${iters}") this.numIterations = iters this } @@ -73,6 +79,8 @@ class GradientDescent private[spark] (private var gradient: Gradient, private va * Set the regularization parameter. Default 0.0. */ def setRegParam(regParam: Double): this.type = { + require(regParam >= 0, + s"Regularization parameter must be nonnegative but got ${regParam}") this.regParam = regParam this } @@ -91,7 +99,8 @@ class GradientDescent private[spark] (private var gradient: Gradient, private va * Must be between 0.0 and 1.0 inclusively. */ def setConvergenceTol(tolerance: Double): this.type = { - require(0.0 <= tolerance && tolerance <= 1.0) + require(tolerance >= 0.0 && tolerance <= 1.0, + s"Convergence tolerance must be in range [0, 1] but got ${tolerance}") this.convergenceTol = tolerance this } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala index 82c2ce4790055e66b7cc9dc2e041a9ecb5c27686..16a33526414bd2a8ac96975b2a5c3523856adaba 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala @@ -52,7 +52,8 @@ class LBFGS(private var gradient: Gradient, private var updater: Updater) * Restriction: numCorrections > 0 */ def setNumCorrections(corrections: Int): this.type = { - assert(corrections > 0) + require(corrections > 0, + s"Number of corrections must be positive but got ${corrections}") this.numCorrections = corrections this } @@ -64,6 +65,8 @@ class LBFGS(private var gradient: Gradient, private var updater: Updater) * and therefore generally cause more iterations to be run. */ def setConvergenceTol(tolerance: Double): this.type = { + require(tolerance >= 0, + s"Convergence tolerance must be nonnegative but got ${tolerance}") this.convergenceTol = tolerance this } @@ -88,6 +91,8 @@ class LBFGS(private var gradient: Gradient, private var updater: Updater) * Set the maximal number of iterations for L-BFGS. Default 100. */ def setNumIterations(iters: Int): this.type = { + require(iters >= 0, + s"Maximum of iterations must be nonnegative but got ${iters}") this.maxNumIterations = iters this } @@ -103,6 +108,8 @@ class LBFGS(private var gradient: Gradient, private var updater: Updater) * Set the regularization parameter. Default 0.0. */ def setRegParam(regParam: Double): this.type = { + require(regParam >= 0, + s"Regularization parameter must be nonnegative but got ${regParam}") this.regParam = regParam this } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala index c5b02d6b2e9cec71bfeeb17827bc5cd053cdbfbb..467cb83cd166213cdacf1b5c279e3cfd53063aad 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala @@ -97,6 +97,8 @@ class ALS private ( */ @Since("0.8.0") def setBlocks(numBlocks: Int): this.type = { + require(numBlocks == -1 || numBlocks > 0, + s"Number of blocks must be -1 or positive but got ${numBlocks}") this.numUserBlocks = numBlocks this.numProductBlocks = numBlocks this @@ -107,6 +109,8 @@ class ALS private ( */ @Since("1.1.0") def setUserBlocks(numUserBlocks: Int): this.type = { + require(numUserBlocks == -1 || numUserBlocks > 0, + s"Number of blocks must be -1 or positive but got ${numUserBlocks}") this.numUserBlocks = numUserBlocks this } @@ -116,6 +120,8 @@ class ALS private ( */ @Since("1.1.0") def setProductBlocks(numProductBlocks: Int): this.type = { + require(numProductBlocks == -1 || numProductBlocks > 0, + s"Number of product blocks must be -1 or positive but got ${numProductBlocks}") this.numProductBlocks = numProductBlocks this } @@ -123,6 +129,8 @@ class ALS private ( /** Set the rank of the feature matrices computed (number of features). Default: 10. */ @Since("0.8.0") def setRank(rank: Int): this.type = { + require(rank > 0, + s"Rank of the feature matrices must be positive but got ${rank}") this.rank = rank this } @@ -130,6 +138,8 @@ class ALS private ( /** Set the number of iterations to run. Default: 10. */ @Since("0.8.0") def setIterations(iterations: Int): this.type = { + require(iterations >= 0, + s"Number of iterations must be nonnegative but got ${iterations}") this.iterations = iterations this } @@ -137,6 +147,8 @@ class ALS private ( /** Set the regularization parameter, lambda. Default: 0.01. */ @Since("0.8.0") def setLambda(lambda: Double): this.type = { + require(lambda >= 0.0, + s"Regularization parameter must be nonnegative but got ${lambda}") this.lambda = lambda this }