From 34661d8a5acbeecae9b034a2a6a737f16d8738bb Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun <dongjoon@apache.org> Date: Wed, 31 May 2017 22:39:25 -0700 Subject: [PATCH] [SPARK-20708][CORE] Make `addExclusionRules` up-to-date ## What changes were proposed in this pull request? Since [SPARK-9263](https://issues.apache.org/jira/browse/SPARK-9263), `resolveMavenCoordinates` ignores Spark and Spark's dependencies by using `addExclusionRules`. This PR aims to make [addExclusionRules](https://github.com/apache/spark/blob/master/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala#L956-L974) up-to-date to neglect correctly because it fails to neglect some components like the following. **mllib (correct)** ``` $ bin/spark-shell --packages org.apache.spark:spark-mllib_2.11:2.1.1 ... --------------------------------------------------------------------- | | modules || artifacts | | conf | number| search|dwnlded|evicted|| number|dwnlded| --------------------------------------------------------------------- | default | 0 | 0 | 0 | 0 || 0 | 0 | --------------------------------------------------------------------- ``` **mllib-local (wrong)** ``` $ bin/spark-shell --packages org.apache.spark:spark-mllib-local_2.11:2.1.1 ... --------------------------------------------------------------------- | | modules || artifacts | | conf | number| search|dwnlded|evicted|| number|dwnlded| --------------------------------------------------------------------- | default | 15 | 2 | 2 | 0 || 15 | 2 | --------------------------------------------------------------------- ``` ## How was this patch tested? Pass the Jenkins with a updated test case. Author: Dongjoon Hyun <dongjoon@apache.org> Closes #17947 from dongjoon-hyun/SPARK-20708. --- .../org/apache/spark/deploy/SparkSubmit.scala | 17 ++++++++++------- .../spark/deploy/SparkSubmitUtilsSuite.scala | 9 +++------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index c60a2a1706..d13fb41939 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -879,6 +879,15 @@ private[spark] object SparkSubmitUtils { // Exposed for testing var printStream = SparkSubmit.printStream + // Exposed for testing. + // These components are used to make the default exclusion rules for Spark dependencies. + // We need to specify each component explicitly, otherwise we miss spark-streaming-kafka-0-8 and + // other spark-streaming utility components. Underscore is there to differentiate between + // spark-streaming_2.1x and spark-streaming-kafka-0-8-assembly_2.1x + val IVY_DEFAULT_EXCLUDES = Seq("catalyst_", "core_", "graphx_", "launcher_", "mllib_", + "mllib-local_", "network-common_", "network-shuffle_", "repl_", "sketch_", "sql_", "streaming_", + "tags_", "unsafe_") + /** * Represents a Maven Coordinate * @param groupId the groupId of the coordinate @@ -1007,13 +1016,7 @@ private[spark] object SparkSubmitUtils { // Add scala exclusion rule md.addExcludeRule(createExclusion("*:scala-library:*", ivySettings, ivyConfName)) - // We need to specify each component explicitly, otherwise we miss spark-streaming-kafka-0-8 and - // other spark-streaming utility components. Underscore is there to differentiate between - // spark-streaming_2.1x and spark-streaming-kafka-0-8-assembly_2.1x - val components = Seq("catalyst_", "core_", "graphx_", "hive_", "mllib_", "repl_", - "sql_", "streaming_", "yarn_", "network-common_", "network-shuffle_", "network-yarn_") - - components.foreach { comp => + IVY_DEFAULT_EXCLUDES.foreach { comp => md.addExcludeRule(createExclusion(s"org.apache.spark:spark-$comp*:*", ivySettings, ivyConfName)) } diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala index 266c9d33b5..57024786b9 100644 --- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala @@ -187,12 +187,9 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { } test("neglects Spark and Spark's dependencies") { - val components = Seq("catalyst_", "core_", "graphx_", "hive_", "mllib_", "repl_", - "sql_", "streaming_", "yarn_", "network-common_", "network-shuffle_", "network-yarn_") - - val coordinates = - components.map(comp => s"org.apache.spark:spark-${comp}2.10:1.2.0").mkString(",") + - ",org.apache.spark:spark-core_fake:1.2.0" + val coordinates = SparkSubmitUtils.IVY_DEFAULT_EXCLUDES + .map(comp => s"org.apache.spark:spark-${comp}2.11:2.1.1") + .mkString(",") + ",org.apache.spark:spark-core_fake:1.2.0" val path = SparkSubmitUtils.resolveMavenCoordinates( coordinates, -- GitLab