diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala index 0b47cbbac8fe5eac03a429d6f6b6cf766ebb09aa..45d293bc69618936616a16fb4852423bb9e549c2 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala @@ -185,7 +185,7 @@ class BisectingKMeans @Since("2.0.0") ( .setSeed($(seed)) val parentModel = bkm.run(rdd) val model = new BisectingKMeansModel(uid, parentModel) - copyValues(model) + copyValues(model.setParent(this)) } @Since("2.0.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala index dc6d5d928097046b059d4b807a6bf99f5a2ce097..b2292e20e212462168bc684bb75e6964c0d81732 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala @@ -250,7 +250,7 @@ class KMeans @Since("1.5.0") ( .setEpsilon($(tol)) val parentModel = algo.run(rdd) val model = new KMeansModel(uid, parentModel) - copyValues(model) + copyValues(model.setParent(this)) } @Since("1.5.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala index 2a294d388182910978650fd4b1b51d3e8c616a40..1f4cca123310a555c304d5d7a9cf8f8bf53d6384 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala @@ -95,7 +95,7 @@ final class QuantileDiscretizer(override val uid: String) val candidates = QuantileDiscretizer.findSplitCandidates(samples, $(numBuckets) - 1) val splits = QuantileDiscretizer.getSplits(candidates) val bucketizer = new Bucketizer(uid).setSplits(splits) - copyValues(bucketizer) + copyValues(bucketizer.setParent(this)) } override def copy(extra: ParamMap): QuantileDiscretizer = defaultCopy(extra) diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala index b26571eb9f35a9e3864b8f5e18d011a67279e3a0..fc4a4add5d7556675ed542c20b6c2badbd6a0249 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala @@ -81,5 +81,6 @@ class BisectingKMeansSuite extends SparkFunSuite with MLlibTestSparkContext { assert(clusters.size === k) assert(clusters === Set(0, 1, 2, 3, 4)) assert(model.computeCost(dataset) < 0.1) + assert(model.hasParent) } } diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala index 2724e51f31aa48380c8ce150708472bd92565389..e5357ba8e220cfc95c7b3f4a114545c304ff777c 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala @@ -97,6 +97,7 @@ class KMeansSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultR assert(clusters.size === k) assert(clusters === Set(0, 1, 2, 3, 4)) assert(model.computeCost(dataset) < 0.1) + assert(model.hasParent) } test("read/write") { diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/QuantileDiscretizerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/QuantileDiscretizerSuite.scala index 4fde42972f01b9f563ddb112e67d2149ac479e8e..6a2c601bbed182b4f82e1dc7d119b22a58e6f2e6 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/QuantileDiscretizerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/QuantileDiscretizerSuite.scala @@ -94,7 +94,9 @@ private object QuantileDiscretizerSuite extends SparkFunSuite { val df = sc.parallelize(data.map(Tuple1.apply)).toDF("input") val discretizer = new QuantileDiscretizer().setInputCol("input").setOutputCol("result") .setNumBuckets(numBucket).setSeed(1) - val result = discretizer.fit(df).transform(df) + val model = discretizer.fit(df) + assert(model.hasParent) + val result = model.transform(df) val transformedFeatures = result.select("result").collect() .map { case Row(transformedFeature: Double) => transformedFeature }