[SPARK-20930][ML] Destroy broadcasted centers after computing cost in KMeans

## What changes were proposed in this pull request? Destroy broadcasted centers after computing cost ## How was this patch tested? existing tests Author: Zheng RuiFeng <ruifengz@foxmail.com> Closes #18152 from zhengruifeng/destroy_kmeans_model.

[SPARK-20930][ML] Destroy broadcasted centers after computing cost in KMeans
98b5ccd3 · Zheng RuiFeng · Sean Owen · 2d39711b · 98b5ccd3 · 98b5ccd3
Commit 98b5ccd3 authored 7 years ago by Zheng RuiFeng Committed by Sean Owen 7 years ago
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
@@ -85,7 +85,10 @@ class KMeansModel @Since("1.1.0") (@Since("1.0.0") val clusterCenters: Array[Vec
  @Since("0.8.0")
  def computeCost(data: RDD[Vector]): Double = {
    val bcCentersWithNorm = data.context.broadcast(clusterCentersWithNorm)
-    data.map(p => KMeans.pointCost(bcCentersWithNorm.value, new VectorWithNorm(p))).sum()
+    val cost = data
+      .map(p => KMeans.pointCost(bcCentersWithNorm.value, new VectorWithNorm(p))).sum()
+    bcCentersWithNorm.destroy(blocking = false)
+    cost
  }

--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
@@ -320,6 +320,7 @@ class LocalLDAModel private[spark] (
        docBound
      }.sum()
+    ElogbetaBc.destroy(blocking = false)
    // Bound component for prob(topic-term distributions):
    //   E[log p(beta | eta) - log q(beta | lambda)]
@@ -372,7 +373,6 @@ class LocalLDAModel private[spark] (
   */
  private[spark] def getTopicDistributionMethod(sc: SparkContext): Vector => Vector = {
    val expElogbeta = exp(LDAUtils.dirichletExpectation(topicsMatrix.asBreeze.toDenseMatrix.t).t)
-    val expElogbetaBc = sc.broadcast(expElogbeta)
    val docConcentrationBrz = this.docConcentration.asBreeze
    val gammaShape = this.gammaShape
    val k = this.k
@@ -383,7 +383,7 @@ class LocalLDAModel private[spark] (
      } else {
        val (gamma, _, _) = OnlineLDAOptimizer.variationalTopicInference(
          termCounts,
-          expElogbetaBc.value,
+          expElogbeta,
          docConcentrationBrz,
          gammaShape,
          k)

--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
@@ -246,6 +246,7 @@ object GradientDescent extends Logging {
            // c: (grad, loss, count)
            (c1._1 += c2._1, c1._2 + c2._2, c1._3 + c2._3)
          })
+      bcWeights.destroy(blocking = false)
      if (miniBatchSize > 0) {
        /**