Skip to content
Snippets Groups Projects
Unverified Commit 7db09abb authored by Zakaria_Hili's avatar Zakaria_Hili Committed by Sean Owen
Browse files

[SPARK-18356][ML] KMeans should cache RDD before training

## What changes were proposed in this pull request?

According to request of Mr. Joseph Bradley , I did this update of my PR https://github.com/apache/spark/pull/15965 in order to eliminate the extrat fit() method.

jkbradley
## How was this patch tested?
Pass existing tests

Author: Zakaria_Hili <zakahili@gmail.com>
Author: HILI Zakaria <zakahili@gmail.com>

Closes #16295 from ZakariaHili/zakbranch.
parent 1e5c51f3
No related branches found
No related tags found
No related merge requests found
......@@ -302,22 +302,19 @@ class KMeans @Since("1.5.0") (
@Since("2.0.0")
override def fit(dataset: Dataset[_]): KMeansModel = {
val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE
fit(dataset, handlePersistence)
}
@Since("2.2.0")
protected def fit(dataset: Dataset[_], handlePersistence: Boolean): KMeansModel = {
transformSchema(dataset.schema, logging = true)
val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE
val instances: RDD[OldVector] = dataset.select(col($(featuresCol))).rdd.map {
case Row(point: Vector) => OldVectors.fromML(point)
}
if (handlePersistence) {
instances.persist(StorageLevel.MEMORY_AND_DISK)
}
val instr = Instrumentation.create(this, instances)
instr.logParams(featuresCol, predictionCol, k, initMode, initSteps, maxIter, seed, tol)
val algo = new MLlibKMeans()
.setK($(k))
.setInitializationMode($(initMode))
......@@ -329,6 +326,7 @@ class KMeans @Since("1.5.0") (
val model = copyValues(new KMeansModel(uid, parentModel).setParent(this))
val summary = new KMeansSummary(
model.transform(dataset), $(predictionCol), $(featuresCol), $(k))
model.setSummary(Some(summary))
instr.logSuccess(model)
if (handlePersistence) {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment