Skip to content
Snippets Groups Projects
Commit 98697734 authored by Travis Galoppo's avatar Travis Galoppo Committed by Xiangrui Meng
Browse files

SPARK-5400 [MLlib] Changed name of GaussianMixtureEM to GaussianMixture

Decoupling the model and the algorithm

Author: Travis Galoppo <tjg2107@columbia.edu>

Closes #4290 from tgaloppo/spark-5400 and squashes the following commits:

9c1534c [Travis Galoppo] Fixed invokation instructions in comments
d848076 [Travis Galoppo] SPARK-5400 Changed name of GaussianMixtureEM to GaussianMixture to separate model from algorithm
parent f377431a
No related branches found
No related tags found
No related merge requests found
......@@ -18,17 +18,17 @@
package org.apache.spark.examples.mllib
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.mllib.clustering.GaussianMixtureEM
import org.apache.spark.mllib.clustering.GaussianMixture
import org.apache.spark.mllib.linalg.Vectors
/**
* An example Gaussian Mixture Model EM app. Run with
* {{{
* ./bin/run-example org.apache.spark.examples.mllib.DenseGmmEM <input> <k> <covergenceTol>
* ./bin/run-example mllib.DenseGaussianMixture <input> <k> <convergenceTol>
* }}}
* If you use it as a template to create your own app, please use `spark-submit` to submit your app.
*/
object DenseGmmEM {
object DenseGaussianMixture {
def main(args: Array[String]): Unit = {
if (args.length < 3) {
println("usage: DenseGmmEM <input file> <k> <convergenceTol> [maxIterations]")
......@@ -46,7 +46,7 @@ object DenseGmmEM {
Vectors.dense(line.trim.split(' ').map(_.toDouble))
}.cache()
val clusters = new GaussianMixtureEM()
val clusters = new GaussianMixture()
.setK(k)
.setConvergenceTol(convergenceTol)
.setMaxIterations(maxIterations)
......
......@@ -44,7 +44,7 @@ import org.apache.spark.util.Utils
* is considered to have occurred.
* @param maxIterations The maximum number of iterations to perform
*/
class GaussianMixtureEM private (
class GaussianMixture private (
private var k: Int,
private var convergenceTol: Double,
private var maxIterations: Int,
......
......@@ -24,7 +24,7 @@ import org.apache.spark.mllib.stat.distribution.MultivariateGaussian
import org.apache.spark.mllib.util.MLlibTestSparkContext
import org.apache.spark.mllib.util.TestingUtils._
class GMMExpectationMaximizationSuite extends FunSuite with MLlibTestSparkContext {
class GaussianMixtureSuite extends FunSuite with MLlibTestSparkContext {
test("single cluster") {
val data = sc.parallelize(Array(
Vectors.dense(6.0, 9.0),
......@@ -39,7 +39,7 @@ class GMMExpectationMaximizationSuite extends FunSuite with MLlibTestSparkContex
val seeds = Array(314589, 29032897, 50181, 494821, 4660)
seeds.foreach { seed =>
val gmm = new GaussianMixtureEM().setK(1).setSeed(seed).run(data)
val gmm = new GaussianMixture().setK(1).setSeed(seed).run(data)
assert(gmm.weights(0) ~== Ew absTol 1E-5)
assert(gmm.gaussians(0).mu ~== Emu absTol 1E-5)
assert(gmm.gaussians(0).sigma ~== Esigma absTol 1E-5)
......@@ -68,7 +68,7 @@ class GMMExpectationMaximizationSuite extends FunSuite with MLlibTestSparkContex
val Emu = Array(Vectors.dense(-4.3673), Vectors.dense(5.1604))
val Esigma = Array(Matrices.dense(1, 1, Array(1.1098)), Matrices.dense(1, 1, Array(0.86644)))
val gmm = new GaussianMixtureEM()
val gmm = new GaussianMixture()
.setK(2)
.setInitialModel(initialGmm)
.run(data)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment