diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala index f31949f13a4cf310a1db2fa8207d8947de514ac9..82f05e4a18cee231fc97dc2afcc04e3f6f9ad0a8 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala @@ -674,10 +674,9 @@ class DistributedLDAModel private[clustering] ( } /** Java-friendly version of [[topTopicsPerDocument]] */ - def javaTopTopicsPerDocument( - k: Int): JavaRDD[(java.lang.Long, Array[Int], Array[java.lang.Double])] = { + def javaTopTopicsPerDocument(k: Int): JavaRDD[(java.lang.Long, Array[Int], Array[Double])] = { val topics = topTopicsPerDocument(k) - topics.asInstanceOf[RDD[(java.lang.Long, Array[Int], Array[java.lang.Double])]].toJavaRDD() + topics.asInstanceOf[RDD[(java.lang.Long, Array[Int], Array[Double])]].toJavaRDD() } // TODO: diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala index 24fe48cb8f71fc3d69fbd6807c0647e76dab9614..ef8d78607048f50ac450ecf3ec4f2a8e05c428d2 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala @@ -221,9 +221,7 @@ object Statistics { def kolmogorovSmirnovTest( data: JavaDoubleRDD, distName: String, - params: java.lang.Double*): KolmogorovSmirnovTestResult = { - val javaParams = params.asInstanceOf[Seq[Double]] - KolmogorovSmirnovTest.testOneSample(data.rdd.asInstanceOf[RDD[Double]], - distName, javaParams: _*) + params: Double*): KolmogorovSmirnovTestResult = { + kolmogorovSmirnovTest(data.rdd.asInstanceOf[RDD[Double]], distName, params: _*) } } diff --git a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java index 427be9430d82069e5e80c1591fa5a5e1dbc63f39..6e91cde2eabb53383155bbfb9843bdf611ca0c81 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java @@ -22,12 +22,14 @@ import java.util.ArrayList; import java.util.Arrays; import scala.Tuple2; +import scala.Tuple3; import org.junit.After; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertArrayEquals; import org.junit.Before; import org.junit.Test; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import org.apache.spark.api.java.function.Function; import org.apache.spark.api.java.JavaPairRDD; @@ -44,9 +46,9 @@ public class JavaLDASuite implements Serializable { public void setUp() { sc = new JavaSparkContext("local", "JavaLDA"); ArrayList<Tuple2<Long, Vector>> tinyCorpus = new ArrayList<Tuple2<Long, Vector>>(); - for (int i = 0; i < LDASuite$.MODULE$.tinyCorpus().length; i++) { - tinyCorpus.add(new Tuple2<Long, Vector>((Long)LDASuite$.MODULE$.tinyCorpus()[i]._1(), - LDASuite$.MODULE$.tinyCorpus()[i]._2())); + for (int i = 0; i < LDASuite.tinyCorpus().length; i++) { + tinyCorpus.add(new Tuple2<Long, Vector>((Long)LDASuite.tinyCorpus()[i]._1(), + LDASuite.tinyCorpus()[i]._2())); } JavaRDD<Tuple2<Long, Vector>> tmpCorpus = sc.parallelize(tinyCorpus, 2); corpus = JavaPairRDD.fromJavaRDD(tmpCorpus); @@ -60,7 +62,7 @@ public class JavaLDASuite implements Serializable { @Test public void localLDAModel() { - Matrix topics = LDASuite$.MODULE$.tinyTopics(); + Matrix topics = LDASuite.tinyTopics(); double[] topicConcentration = new double[topics.numRows()]; Arrays.fill(topicConcentration, 1.0D / topics.numRows()); LocalLDAModel model = new LocalLDAModel(topics, Vectors.dense(topicConcentration), 1D, 100D); @@ -110,8 +112,8 @@ public class JavaLDASuite implements Serializable { assertEquals(roundedLocalTopicSummary.length, k); // Check: log probabilities - assert(model.logLikelihood() < 0.0); - assert(model.logPrior() < 0.0); + assertTrue(model.logLikelihood() < 0.0); + assertTrue(model.logPrior() < 0.0); // Check: topic distributions JavaPairRDD<Long, Vector> topicDistributions = model.javaTopicDistributions(); @@ -126,8 +128,12 @@ public class JavaLDASuite implements Serializable { assertEquals(topicDistributions.count(), nonEmptyCorpus.count()); // Check: javaTopTopicsPerDocuments - JavaRDD<scala.Tuple3<java.lang.Long, int[], java.lang.Double[]>> topTopics = - model.javaTopTopicsPerDocument(3); + Tuple3<Long, int[], double[]> topTopics = model.javaTopTopicsPerDocument(3).first(); + Long docId = topTopics._1(); // confirm doc ID type + int[] topicIndices = topTopics._2(); + double[] topicWeights = topTopics._3(); + assertEquals(3, topicIndices.length); + assertEquals(3, topicWeights.length); } @Test @@ -177,18 +183,18 @@ public class JavaLDASuite implements Serializable { // check: logLikelihood. ArrayList<Tuple2<Long, Vector>> docsSingleWord = new ArrayList<Tuple2<Long, Vector>>(); - docsSingleWord.add(new Tuple2<Long, Vector>(Long.valueOf(0), Vectors.dense(1.0, 0.0, 0.0))); + docsSingleWord.add(new Tuple2<Long, Vector>(0L, Vectors.dense(1.0, 0.0, 0.0))); JavaPairRDD<Long, Vector> single = JavaPairRDD.fromJavaRDD(sc.parallelize(docsSingleWord)); double logLikelihood = toyModel.logLikelihood(single); } - private static int tinyK = LDASuite$.MODULE$.tinyK(); - private static int tinyVocabSize = LDASuite$.MODULE$.tinyVocabSize(); - private static Matrix tinyTopics = LDASuite$.MODULE$.tinyTopics(); + private static int tinyK = LDASuite.tinyK(); + private static int tinyVocabSize = LDASuite.tinyVocabSize(); + private static Matrix tinyTopics = LDASuite.tinyTopics(); private static Tuple2<int[], double[]>[] tinyTopicDescription = - LDASuite$.MODULE$.tinyTopicDescription(); + LDASuite.tinyTopicDescription(); private JavaPairRDD<Long, Vector> corpus; - private LocalLDAModel toyModel = LDASuite$.MODULE$.toyModel(); - private ArrayList<Tuple2<Long, Vector>> toyData = LDASuite$.MODULE$.javaToyData(); + private LocalLDAModel toyModel = LDASuite.toyModel(); + private ArrayList<Tuple2<Long, Vector>> toyData = LDASuite.javaToyData(); } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala index 926185e90bcf92533cf1c52c301d6a7f17a9fa16..99e28499fd316c4cf01c6626380c27698b7e4db2 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala @@ -581,7 +581,7 @@ private[clustering] object LDASuite { def javaToyData: JArrayList[(java.lang.Long, Vector)] = { val javaData = new JArrayList[(java.lang.Long, Vector)] var i = 0 - while (i < toyData.size) { + while (i < toyData.length) { javaData.add((toyData(i)._1, toyData(i)._2)) i += 1 }