Skip to content
Snippets Groups Projects
Commit a1d9e5cc authored by Marcelo Vanzin's avatar Marcelo Vanzin Committed by Sean Owen
Browse files

[SPARK-8126] [BUILD] Use custom temp directory during build.

Even with all the efforts to cleanup the temp directories created by
unit tests, Spark leaves a lot of garbage in /tmp after a test run.
This change overrides java.io.tmpdir to place those files under the
build directory instead.

After an sbt full unit test run, I was left with > 400 MB of temp
files. Since they're now under the build dir, it's much easier to
clean them up.

Also make a slight change to a unit test to make it not pollute the
source directory with test data.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #6674 from vanzin/SPARK-8126 and squashes the following commits:

0f8ad41 [Marcelo Vanzin] Make sure tmp dir exists when tests run.
643e916 [Marcelo Vanzin] [MINOR] [BUILD] Use custom temp directory during build.
parent 03ef6be9
No related branches found
No related tags found
No related merge requests found
...@@ -28,9 +28,12 @@ import org.apache.ivy.plugins.resolver.IBiblioResolver ...@@ -28,9 +28,12 @@ import org.apache.ivy.plugins.resolver.IBiblioResolver
import org.apache.spark.SparkFunSuite import org.apache.spark.SparkFunSuite
import org.apache.spark.deploy.SparkSubmitUtils.MavenCoordinate import org.apache.spark.deploy.SparkSubmitUtils.MavenCoordinate
import org.apache.spark.util.Utils
class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
private var tempIvyPath: String = _
private val noOpOutputStream = new OutputStream { private val noOpOutputStream = new OutputStream {
def write(b: Int) = {} def write(b: Int) = {}
} }
...@@ -47,6 +50,7 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { ...@@ -47,6 +50,7 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
super.beforeAll() super.beforeAll()
// We don't want to write logs during testing // We don't want to write logs during testing
SparkSubmitUtils.printStream = new BufferPrintStream SparkSubmitUtils.printStream = new BufferPrintStream
tempIvyPath = Utils.createTempDir(namePrefix = "ivy").getAbsolutePath()
} }
test("incorrect maven coordinate throws error") { test("incorrect maven coordinate throws error") {
...@@ -90,21 +94,20 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { ...@@ -90,21 +94,20 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
} }
test("ivy path works correctly") { test("ivy path works correctly") {
val ivyPath = "dummy" + File.separator + "ivy"
val md = SparkSubmitUtils.getModuleDescriptor val md = SparkSubmitUtils.getModuleDescriptor
val artifacts = for (i <- 0 until 3) yield new MDArtifact(md, s"jar-$i", "jar", "jar") val artifacts = for (i <- 0 until 3) yield new MDArtifact(md, s"jar-$i", "jar", "jar")
var jPaths = SparkSubmitUtils.resolveDependencyPaths(artifacts.toArray, new File(ivyPath)) var jPaths = SparkSubmitUtils.resolveDependencyPaths(artifacts.toArray, new File(tempIvyPath))
for (i <- 0 until 3) { for (i <- 0 until 3) {
val index = jPaths.indexOf(ivyPath) val index = jPaths.indexOf(tempIvyPath)
assert(index >= 0) assert(index >= 0)
jPaths = jPaths.substring(index + ivyPath.length) jPaths = jPaths.substring(index + tempIvyPath.length)
} }
val main = MavenCoordinate("my.awesome.lib", "mylib", "0.1") val main = MavenCoordinate("my.awesome.lib", "mylib", "0.1")
IvyTestUtils.withRepository(main, None, None) { repo => IvyTestUtils.withRepository(main, None, None) { repo =>
// end to end // end to end
val jarPath = SparkSubmitUtils.resolveMavenCoordinates(main.toString, Option(repo), val jarPath = SparkSubmitUtils.resolveMavenCoordinates(main.toString, Option(repo),
Option(ivyPath), true) Option(tempIvyPath), true)
assert(jarPath.indexOf(ivyPath) >= 0, "should use non-default ivy path") assert(jarPath.indexOf(tempIvyPath) >= 0, "should use non-default ivy path")
} }
} }
...@@ -123,13 +126,12 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { ...@@ -123,13 +126,12 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
assert(jarPath.indexOf("mylib") >= 0, "should find artifact") assert(jarPath.indexOf("mylib") >= 0, "should find artifact")
} }
// Local ivy repository with modified home // Local ivy repository with modified home
val dummyIvyPath = "dummy" + File.separator + "ivy" val dummyIvyLocal = new File(tempIvyPath, "local" + File.separator)
val dummyIvyLocal = new File(dummyIvyPath, "local" + File.separator)
IvyTestUtils.withRepository(main, None, Some(dummyIvyLocal), true) { repo => IvyTestUtils.withRepository(main, None, Some(dummyIvyLocal), true) { repo =>
val jarPath = SparkSubmitUtils.resolveMavenCoordinates(main.toString, None, val jarPath = SparkSubmitUtils.resolveMavenCoordinates(main.toString, None,
Some(dummyIvyPath), true) Some(tempIvyPath), true)
assert(jarPath.indexOf("mylib") >= 0, "should find artifact") assert(jarPath.indexOf("mylib") >= 0, "should find artifact")
assert(jarPath.indexOf(dummyIvyPath) >= 0, "should be in new ivy path") assert(jarPath.indexOf(tempIvyPath) >= 0, "should be in new ivy path")
} }
} }
......
...@@ -179,7 +179,7 @@ ...@@ -179,7 +179,7 @@
<parquet.deps.scope>compile</parquet.deps.scope> <parquet.deps.scope>compile</parquet.deps.scope>
<!-- <!--
Overridable test home. So that you can call individual pom files directory without Overridable test home. So that you can call individual pom files directly without
things breaking. things breaking.
--> -->
<spark.test.home>${session.executionRootDirectory}</spark.test.home> <spark.test.home>${session.executionRootDirectory}</spark.test.home>
...@@ -1256,6 +1256,7 @@ ...@@ -1256,6 +1256,7 @@
<systemProperties> <systemProperties>
<derby.system.durability>test</derby.system.durability> <derby.system.durability>test</derby.system.durability>
<java.awt.headless>true</java.awt.headless> <java.awt.headless>true</java.awt.headless>
<java.io.tmpdir>${project.build.directory}/tmp</java.io.tmpdir>
<spark.test.home>${spark.test.home}</spark.test.home> <spark.test.home>${spark.test.home}</spark.test.home>
<spark.testing>1</spark.testing> <spark.testing>1</spark.testing>
<spark.ui.enabled>false</spark.ui.enabled> <spark.ui.enabled>false</spark.ui.enabled>
...@@ -1289,6 +1290,7 @@ ...@@ -1289,6 +1290,7 @@
<systemProperties> <systemProperties>
<derby.system.durability>test</derby.system.durability> <derby.system.durability>test</derby.system.durability>
<java.awt.headless>true</java.awt.headless> <java.awt.headless>true</java.awt.headless>
<java.io.tmpdir>${project.build.directory}/tmp</java.io.tmpdir>
<spark.test.home>${spark.test.home}</spark.test.home> <spark.test.home>${spark.test.home}</spark.test.home>
<spark.testing>1</spark.testing> <spark.testing>1</spark.testing>
<spark.ui.enabled>false</spark.ui.enabled> <spark.ui.enabled>false</spark.ui.enabled>
...@@ -1548,6 +1550,26 @@ ...@@ -1548,6 +1550,26 @@
</execution> </execution>
</executions> </executions>
</plugin> </plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
<executions>
<execution>
<id>create-tmp-dir</id>
<phase>generate-test-resources</phase>
<goals>
<goal>run</goal>
</goals>
<configuration>
<target>
<mkdir dir="${project.build.directory}/tmp" />
</target>
</configuration>
</execution>
</executions>
</plugin>
<!-- Enable surefire and scalatest in all children, in one place: --> <!-- Enable surefire and scalatest in all children, in one place: -->
<plugin> <plugin>
<groupId>org.apache.maven.plugins</groupId> <groupId>org.apache.maven.plugins</groupId>
......
...@@ -51,6 +51,11 @@ object BuildCommons { ...@@ -51,6 +51,11 @@ object BuildCommons {
// Root project. // Root project.
val spark = ProjectRef(buildLocation, "spark") val spark = ProjectRef(buildLocation, "spark")
val sparkHome = buildLocation val sparkHome = buildLocation
val testTempDir = s"$sparkHome/target/tmp"
if (!new File(testTempDir).isDirectory()) {
require(new File(testTempDir).mkdirs())
}
} }
object SparkBuild extends PomBuild { object SparkBuild extends PomBuild {
...@@ -496,6 +501,7 @@ object TestSettings { ...@@ -496,6 +501,7 @@ object TestSettings {
"SPARK_DIST_CLASSPATH" -> "SPARK_DIST_CLASSPATH" ->
(fullClasspath in Test).value.files.map(_.getAbsolutePath).mkString(":").stripSuffix(":"), (fullClasspath in Test).value.files.map(_.getAbsolutePath).mkString(":").stripSuffix(":"),
"JAVA_HOME" -> sys.env.get("JAVA_HOME").getOrElse(sys.props("java.home"))), "JAVA_HOME" -> sys.env.get("JAVA_HOME").getOrElse(sys.props("java.home"))),
javaOptions in Test += s"-Djava.io.tmpdir=$testTempDir",
javaOptions in Test += "-Dspark.test.home=" + sparkHome, javaOptions in Test += "-Dspark.test.home=" + sparkHome,
javaOptions in Test += "-Dspark.testing=1", javaOptions in Test += "-Dspark.testing=1",
javaOptions in Test += "-Dspark.port.maxRetries=100", javaOptions in Test += "-Dspark.port.maxRetries=100",
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment