From efaf7d18205f5ae3a1c767942ee7d7320f7410de Mon Sep 17 00:00:00 2001 From: DB Tsai <dbt@netflix.com> Date: Mon, 11 Apr 2016 09:35:47 -0700 Subject: [PATCH] [SPARK-14462][ML][MLLIB] Add the mllib-local build to maven pom ## What changes were proposed in this pull request? In order to separate the linear algebra, and vector matrix classes into a standalone jar, we need to setup the build first. This PR will create a new jar called mllib-local with minimal dependencies. The previous PR was failing the build because of `spark-core:test` dependency, and that was reverted. In this PR, `FunSuite` with `// scalastyle:ignore funsuite` in mllib-local test was used, similar to sketch. Thanks. ## How was this patch tested? Unit tests mengxr tedyu holdenk Author: DB Tsai <dbt@netflix.com> Closes #12298 from dbtsai/dbtsai-mllib-local-build-fix. --- dev/sparktestsupport/modules.py | 14 ++- mllib-local/pom.xml | 87 +++++++++++++++++++ .../org/apache/spark/ml/DummyTesting.scala | 23 +++++ .../apache/spark/ml/DummyTestingSuite.scala | 28 ++++++ mllib/pom.xml | 12 +++ pom.xml | 1 + project/SparkBuild.scala | 6 +- 7 files changed, 167 insertions(+), 4 deletions(-) create mode 100644 mllib-local/pom.xml create mode 100644 mllib-local/src/main/scala/org/apache/spark/ml/DummyTesting.scala create mode 100644 mllib-local/src/test/scala/org/apache/spark/ml/DummyTestingSuite.scala diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index bb04ec6ee6..c844bcff7e 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -256,9 +256,21 @@ streaming_flume_assembly = Module( ) +mllib_local = Module( + name="mllib-local", + dependencies=[], + source_file_regexes=[ + "mllib-local", + ], + sbt_test_goals=[ + "mllib-local/test", + ] +) + + mllib = Module( name="mllib", - dependencies=[streaming, sql], + dependencies=[mllib_local, streaming, sql], source_file_regexes=[ "data/mllib/", "mllib/", diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml new file mode 100644 index 0000000000..c56561f215 --- /dev/null +++ b/mllib-local/pom.xml @@ -0,0 +1,87 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + ~ Licensed to the Apache Software Foundation (ASF) under one or more + ~ contributor license agreements. See the NOTICE file distributed with + ~ this work for additional information regarding copyright ownership. + ~ The ASF licenses this file to You under the Apache License, Version 2.0 + ~ (the "License"); you may not use this file except in compliance with + ~ the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, software + ~ distributed under the License is distributed on an "AS IS" BASIS, + ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ~ See the License for the specific language governing permissions and + ~ limitations under the License. + --> + +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <parent> + <groupId>org.apache.spark</groupId> + <artifactId>spark-parent_2.11</artifactId> + <version>2.0.0-SNAPSHOT</version> + <relativePath>../pom.xml</relativePath> + </parent> + + <groupId>org.apache.spark</groupId> + <artifactId>spark-mllib-local_2.11</artifactId> + <properties> + <sbt.project.name>mllib-local</sbt.project.name> + </properties> + <packaging>jar</packaging> + <name>Spark Project ML Local Library</name> + <url>http://spark.apache.org/</url> + + <dependencies> + <dependency> + <groupId>org.scalanlp</groupId> + <artifactId>breeze_${scala.binary.version}</artifactId> + <version>0.11.2</version> + <exclusions> + <!-- This is included as a compile-scoped dependency by jtransforms, which is + a dependency of breeze. --> + <exclusion> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + </exclusion> + <exclusion> + <groupId>org.apache.commons</groupId> + <artifactId>commons-math3</artifactId> + </exclusion> + </exclusions> + </dependency> + <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-math3</artifactId> + </dependency> + <dependency> + <groupId>org.scalacheck</groupId> + <artifactId>scalacheck_${scala.binary.version}</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.mockito</groupId> + <artifactId>mockito-core</artifactId> + <scope>test</scope> + </dependency> + </dependencies> + <profiles> + <profile> + <id>netlib-lgpl</id> + <dependencies> + <dependency> + <groupId>com.github.fommil.netlib</groupId> + <artifactId>all</artifactId> + <version>${netlib.java.version}</version> + <type>pom</type> + </dependency> + </dependencies> + </profile> + </profiles> + <build> + <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory> + <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory> + </build> +</project> diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/DummyTesting.scala b/mllib-local/src/main/scala/org/apache/spark/ml/DummyTesting.scala new file mode 100644 index 0000000000..6b3268cdfa --- /dev/null +++ b/mllib-local/src/main/scala/org/apache/spark/ml/DummyTesting.scala @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml + +// This is a private class testing if the new build works. To be removed soon. +private[ml] object DummyTesting { + private[ml] def add10(input: Double): Double = input + 10 +} diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/DummyTestingSuite.scala b/mllib-local/src/test/scala/org/apache/spark/ml/DummyTestingSuite.scala new file mode 100644 index 0000000000..51b7c2409f --- /dev/null +++ b/mllib-local/src/test/scala/org/apache/spark/ml/DummyTestingSuite.scala @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml + +import org.scalatest.FunSuite // scalastyle:ignore funsuite + +// This is testing if the new build works. To be removed soon. +class DummyTestingSuite extends FunSuite { // scalastyle:ignore funsuite + + test("This is testing if the new build works.") { + assert(DummyTesting.add10(15) === 25) + } +} diff --git a/mllib/pom.xml b/mllib/pom.xml index 428176dcbf..e56eafc300 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -62,6 +62,18 @@ <artifactId>spark-graphx_${scala.binary.version}</artifactId> <version>${project.version}</version> </dependency> + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-mllib-local_${scala.binary.version}</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-mllib-local_${scala.binary.version}</artifactId> + <version>${project.version}</version> + <type>test-jar</type> + <scope>test</scope> + </dependency> <dependency> <groupId>org.scalanlp</groupId> <artifactId>breeze_${scala.binary.version}</artifactId> diff --git a/pom.xml b/pom.xml index 4cbc6a2f11..38843b4f74 100644 --- a/pom.xml +++ b/pom.xml @@ -94,6 +94,7 @@ <module>core</module> <module>graphx</module> <module>mllib</module> + <module>mllib-local</module> <module>tools</module> <module>streaming</module> <module>sql/catalyst</module> diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 60124ef0a1..c5688ecec6 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -47,9 +47,9 @@ object BuildCommons { ).map(ProjectRef(buildLocation, _)) val allProjects@Seq( - core, graphx, mllib, repl, networkCommon, networkShuffle, launcher, unsafe, testTags, sketch, _* + core, graphx, mllib, mllibLocal, repl, networkCommon, networkShuffle, launcher, unsafe, testTags, sketch, _* ) = Seq( - "core", "graphx", "mllib", "repl", "network-common", "network-shuffle", "launcher", "unsafe", + "core", "graphx", "mllib", "mllib-local", "repl", "network-common", "network-shuffle", "launcher", "unsafe", "test-tags", "sketch" ).map(ProjectRef(buildLocation, _)) ++ sqlProjects ++ streamingProjects @@ -254,7 +254,7 @@ object SparkBuild extends PomBuild { val mimaProjects = allProjects.filterNot { x => Seq( spark, hive, hiveThriftServer, catalyst, repl, networkCommon, networkShuffle, networkYarn, - unsafe, testTags, sketch + unsafe, testTags, sketch, mllibLocal ).contains(x) } -- GitLab