diff --git a/core/pom.xml b/core/pom.xml index fc42f48973fe90b8ba8df3c2ca77da689b415c17..262a3320db106e7ebc57e7c0fa5c5f22873a1397 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -381,35 +381,6 @@ <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory> <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory> <plugins> - <!-- Unzip py4j so we can include its files in the jar --> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-antrun-plugin</artifactId> - <executions> - <execution> - <phase>generate-resources</phase> - <goals> - <goal>run</goal> - </goals> - </execution> - </executions> - <configuration> - <target> - <unzip src="../python/lib/py4j-0.8.2.1-src.zip" dest="../python/build" /> - </target> - </configuration> - </plugin> - <plugin> - <artifactId>maven-clean-plugin</artifactId> - <configuration> - <filesets> - <fileset> - <directory>${basedir}/../python/build</directory> - </fileset> - </filesets> - <verbose>true</verbose> - </configuration> - </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-dependency-plugin</artifactId> @@ -438,24 +409,6 @@ </executions> </plugin> </plugins> - - <resources> - <resource> - <directory>src/main/resources</directory> - </resource> - <resource> - <directory>../python</directory> - <includes> - <include>pyspark/*.py</include> - </includes> - </resource> - <resource> - <directory>../python/build</directory> - <includes> - <include>py4j/*.py</include> - </includes> - </resource> - </resources> </build> <profiles> diff --git a/mllib/pom.xml b/mllib/pom.xml index a3c57ae26000baaa84b3de73ca99e1d709ccc8b9..0c07ca1a62fd31085ea884255ee9c9b7b6e9c54d 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -141,16 +141,5 @@ <build> <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory> <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory> - <resources> - <resource> - <directory>../python</directory> - <includes> - <include>pyspark/mllib/*.py</include> - <include>pyspark/mllib/stat/*.py</include> - <include>pyspark/ml/*.py</include> - <include>pyspark/ml/param/*.py</include> - </includes> - </resource> - </resources> </build> </project> diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 186345af0e60e73d23baa146890886e1b7d5db04..1b87e4e98bd83dd2ad447ccae7b71f813f19de12 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -168,7 +168,7 @@ object SparkBuild extends PomBuild { /* Enable Assembly for all assembly projects */ assemblyProjects.foreach(enable(Assembly.settings)) - /* Package pyspark artifacts in the main assembly. */ + /* Package pyspark artifacts in a separate zip file for YARN. */ enable(PySparkAssembly.settings)(assembly) /* Enable unidoc only for the root spark project */ @@ -373,22 +373,15 @@ object PySparkAssembly { import java.util.zip.{ZipOutputStream, ZipEntry} lazy val settings = Seq( - unmanagedJars in Compile += { BuildCommons.sparkHome / "python/lib/py4j-0.8.2.1-src.zip" }, // Use a resource generator to copy all .py files from python/pyspark into a managed directory // to be included in the assembly. We can't just add "python/" to the assembly's resource dir // list since that will copy unneeded / unwanted files. resourceGenerators in Compile <+= resourceManaged in Compile map { outDir: File => val src = new File(BuildCommons.sparkHome, "python/pyspark") - val zipFile = new File(BuildCommons.sparkHome , "python/lib/pyspark.zip") zipFile.delete() zipRecursive(src, zipFile) - - val dst = new File(outDir, "pyspark") - if (!dst.isDirectory()) { - require(dst.mkdirs()) - } - copy(src, dst) + Seq[File]() } ) @@ -416,42 +409,11 @@ object PySparkAssembly { output.write(buf, 0, n) } } + output.closeEntry() in.close() } } - private def copy(src: File, dst: File): Seq[File] = { - src.listFiles().flatMap { f => - val child = new File(dst, f.getName()) - if (f.isDirectory()) { - child.mkdir() - copy(f, child) - } else if (f.getName().endsWith(".py")) { - var in: Option[FileInputStream] = None - var out: Option[FileOutputStream] = None - try { - in = Some(new FileInputStream(f)) - out = Some(new FileOutputStream(child)) - - val bytes = new Array[Byte](1024) - var read = 0 - while (read >= 0) { - read = in.get.read(bytes) - if (read > 0) { - out.get.write(bytes, 0, read) - } - } - - Some(child) - } finally { - in.foreach(_.close()) - out.foreach(_.close()) - } - } else { - None - } - } - } } object Unidoc { diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 7d274a73e079ff2358419d2a86f9503486558a1d..ffe95bb49188f894dfa77cd9c943feb2f5c4bd0e 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -103,13 +103,5 @@ <build> <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory> <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory> - <resources> - <resource> - <directory>../../python</directory> - <includes> - <include>pyspark/sql/*.py</include> - </includes> - </resource> - </resources> </build> </project> diff --git a/streaming/pom.xml b/streaming/pom.xml index 5ca55a4f680bbaf35653ced2a9995ae298bd7485..5ab7f4472c38bfdda3fe02d4e5c0adc07c105ac0 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -105,13 +105,5 @@ </configuration> </plugin> </plugins> - <resources> - <resource> - <directory>../python</directory> - <includes> - <include>pyspark/streaming/*.py</include> - </includes> - </resource> - </resources> </build> </project>