Skip to content
Snippets Groups Projects
Commit 1650f6f5 authored by Feynman Liang's avatar Feynman Liang Committed by Sean Owen
Browse files

[SPARK-10254] [ML] Removes Guava dependencies in spark.ml.feature JavaTests

* Replaces `com.google.common` dependencies with `java.util.Arrays`
* Small clean up in `JavaNormalizerSuite`

Author: Feynman Liang <fliang@databricks.com>

Closes #8445 from feynmanliang/SPARK-10254.
parent 9625d13d
No related branches found
No related tags found
No related merge requests found
Showing
with 35 additions and 30 deletions
......@@ -17,7 +17,8 @@
package org.apache.spark.ml.feature;
import com.google.common.collect.Lists;
import java.util.Arrays;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
......@@ -54,7 +55,7 @@ public class JavaBucketizerSuite {
public void bucketizerTest() {
double[] splits = {-0.5, 0.0, 0.5};
JavaRDD<Row> data = jsc.parallelize(Lists.newArrayList(
JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
RowFactory.create(-0.5),
RowFactory.create(-0.3),
RowFactory.create(0.0),
......
......@@ -17,7 +17,8 @@
package org.apache.spark.ml.feature;
import com.google.common.collect.Lists;
import java.util.Arrays;
import edu.emory.mathcs.jtransforms.dct.DoubleDCT_1D;
import org.junit.After;
import org.junit.Assert;
......@@ -56,7 +57,7 @@ public class JavaDCTSuite {
@Test
public void javaCompatibilityTest() {
double[] input = new double[] {1D, 2D, 3D, 4D};
JavaRDD<Row> data = jsc.parallelize(Lists.newArrayList(
JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
RowFactory.create(Vectors.dense(input))
));
DataFrame dataset = jsql.createDataFrame(data, new StructType(new StructField[]{
......
......@@ -17,7 +17,8 @@
package org.apache.spark.ml.feature;
import com.google.common.collect.Lists;
import java.util.Arrays;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
......@@ -54,7 +55,7 @@ public class JavaHashingTFSuite {
@Test
public void hashingTF() {
JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
RowFactory.create(0.0, "Hi I heard about Spark"),
RowFactory.create(0.0, "I wish Java could use case classes"),
RowFactory.create(1.0, "Logistic regression models are neat")
......
......@@ -17,15 +17,15 @@
package org.apache.spark.ml.feature;
import java.util.List;
import java.util.Arrays;
import com.google.common.collect.Lists;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.mllib.linalg.Vectors;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.SQLContext;
......@@ -48,13 +48,12 @@ public class JavaNormalizerSuite {
@Test
public void normalizer() {
// The tests are to check Java compatibility.
List<VectorIndexerSuite.FeatureData> points = Lists.newArrayList(
JavaRDD<VectorIndexerSuite.FeatureData> points = jsc.parallelize(Arrays.asList(
new VectorIndexerSuite.FeatureData(Vectors.dense(0.0, -2.0)),
new VectorIndexerSuite.FeatureData(Vectors.dense(1.0, 3.0)),
new VectorIndexerSuite.FeatureData(Vectors.dense(1.0, 4.0))
);
DataFrame dataFrame = jsql.createDataFrame(jsc.parallelize(points, 2),
VectorIndexerSuite.FeatureData.class);
));
DataFrame dataFrame = jsql.createDataFrame(points, VectorIndexerSuite.FeatureData.class);
Normalizer normalizer = new Normalizer()
.setInputCol("features")
.setOutputCol("normFeatures");
......
......@@ -18,11 +18,11 @@
package org.apache.spark.ml.feature;
import java.io.Serializable;
import java.util.Arrays;
import java.util.List;
import scala.Tuple2;
import com.google.common.collect.Lists;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
......@@ -78,7 +78,7 @@ public class JavaPCASuite implements Serializable {
@Test
public void testPCA() {
List<Vector> points = Lists.newArrayList(
List<Vector> points = Arrays.asList(
Vectors.sparse(5, new int[]{1, 3}, new double[]{1.0, 7.0}),
Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0),
Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0)
......
......@@ -17,7 +17,8 @@
package org.apache.spark.ml.feature;
import com.google.common.collect.Lists;
import java.util.Arrays;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
......@@ -59,7 +60,7 @@ public class JavaPolynomialExpansionSuite {
.setOutputCol("polyFeatures")
.setDegree(3);
JavaRDD<Row> data = jsc.parallelize(Lists.newArrayList(
JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
RowFactory.create(
Vectors.dense(-2.0, 2.3),
Vectors.dense(-2.0, 4.0, -8.0, 2.3, -4.6, 9.2, 5.29, -10.58, 12.17)
......
......@@ -17,9 +17,9 @@
package org.apache.spark.ml.feature;
import java.util.Arrays;
import java.util.List;
import com.google.common.collect.Lists;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
......@@ -48,7 +48,7 @@ public class JavaStandardScalerSuite {
@Test
public void standardScaler() {
// The tests are to check Java compatibility.
List<VectorIndexerSuite.FeatureData> points = Lists.newArrayList(
List<VectorIndexerSuite.FeatureData> points = Arrays.asList(
new VectorIndexerSuite.FeatureData(Vectors.dense(0.0, -2.0)),
new VectorIndexerSuite.FeatureData(Vectors.dense(1.0, 3.0)),
new VectorIndexerSuite.FeatureData(Vectors.dense(1.0, 4.0))
......
......@@ -17,7 +17,8 @@
package org.apache.spark.ml.feature;
import com.google.common.collect.Lists;
import java.util.Arrays;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
......@@ -54,7 +55,8 @@ public class JavaTokenizerSuite {
.setGaps(true)
.setMinTokenLength(3);
JavaRDD<TokenizerTestData> rdd = jsc.parallelize(Lists.newArrayList(
JavaRDD<TokenizerTestData> rdd = jsc.parallelize(Arrays.asList(
new TokenizerTestData("Test of tok.", new String[] {"Test", "tok."}),
new TokenizerTestData("Te,st. punct", new String[] {"Te,st.", "punct"})
));
......
......@@ -18,6 +18,7 @@
package org.apache.spark.ml.feature;
import java.io.Serializable;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
......@@ -26,8 +27,6 @@ import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import com.google.common.collect.Lists;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.ml.feature.VectorIndexerSuite.FeatureData;
import org.apache.spark.mllib.linalg.Vectors;
......@@ -52,7 +51,7 @@ public class JavaVectorIndexerSuite implements Serializable {
@Test
public void vectorIndexerAPI() {
// The tests are to check Java compatibility.
List<FeatureData> points = Lists.newArrayList(
List<FeatureData> points = Arrays.asList(
new FeatureData(Vectors.dense(0.0, -2.0)),
new FeatureData(Vectors.dense(1.0, 3.0)),
new FeatureData(Vectors.dense(1.0, 4.0))
......
......@@ -17,7 +17,7 @@
package org.apache.spark.ml.feature;
import com.google.common.collect.Lists;
import java.util.Arrays;
import org.junit.After;
import org.junit.Assert;
......@@ -63,7 +63,7 @@ public class JavaVectorSlicerSuite {
};
AttributeGroup group = new AttributeGroup("userFeatures", attrs);
JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
RowFactory.create(Vectors.sparse(3, new int[]{0, 1}, new double[]{-2.0, 2.3})),
RowFactory.create(Vectors.dense(-2.0, 2.3, 0.0))
));
......
......@@ -17,7 +17,8 @@
package org.apache.spark.ml.feature;
import com.google.common.collect.Lists;
import java.util.Arrays;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
......@@ -50,10 +51,10 @@ public class JavaWord2VecSuite {
@Test
public void testJavaWord2Vec() {
JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
RowFactory.create(Lists.newArrayList("Hi I heard about Spark".split(" "))),
RowFactory.create(Lists.newArrayList("I wish Java could use case classes".split(" "))),
RowFactory.create(Lists.newArrayList("Logistic regression models are neat".split(" ")))
JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
RowFactory.create(Arrays.asList("Hi I heard about Spark".split(" "))),
RowFactory.create(Arrays.asList("I wish Java could use case classes".split(" "))),
RowFactory.create(Arrays.asList("Logistic regression models are neat".split(" ")))
));
StructType schema = new StructType(new StructField[]{
new StructField("text", new ArrayType(DataTypes.StringType, true), false, Metadata.empty())
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment