diff --git a/README.md b/README.md index 8f686743f042ba8dbb6b9d2b672357b83ad9c864..873ec9882f9a8befd103363214338dfb2c6f2ad7 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ This README file only contains basic setup instructions. ## Building Spark requires Scala 2.10. The project is built using Simple Build Tool (SBT), -which can be obtained from [here](http://www.scala-sbt.org). To build Spark and its example programs, run: +which can be obtained [here](http://www.scala-sbt.org). To build Spark and its example programs, run: sbt assembly @@ -38,24 +38,11 @@ locally with one thread, or "local[N]" to run locally with N threads. ## Running tests -### With sbt (Much faster to run compared to maven) -Once you have built spark with `sbt assembly` mentioned in [Building](#Building) section. Test suits can be run as follows using sbt. +Testing first requires [Building](#Building) Spark. Once Spark is built, tests +can be run using: `sbt test` -### With maven. -1. Export these necessary environment variables as follows. - - `export SCALA_HOME=<scala distribution>` - - `export MAVEN_OPTS="-Xmx1512m -XX:MaxPermSize=512m"` - -2. Build assembly by -`mvn package -DskipTests` - -3. Run tests -`mvn test` - ## A Note About Hadoop Versions Spark uses the Hadoop core library to talk to HDFS and other Hadoop-supported diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index c6f6883b011527d17917625c16967318835b8a62..e80e43af6d4e78478d882c5e935805ccacea5216 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -173,7 +173,8 @@ class SparkContext( value <- Option(System.getenv(key))) { executorEnvs(key) = value } - // A workaround for SPARK_TESTING and SPARK_HOME + // Convert java options to env vars as a work around + // since we can't set env vars directly in sbt. for { (envKey, propKey) <- Seq(("SPARK_HOME", "spark.home"), ("SPARK_TESTING", "spark.testing")) value <- Option(System.getenv(envKey)).orElse(Option(System.getProperty(propKey)))} { executorEnvs(envKey) = value diff --git a/core/src/test/scala/org/apache/spark/DriverSuite.scala b/core/src/test/scala/org/apache/spark/DriverSuite.scala index 89c5631ad8f07a49fc39e85820bb9b5d800cde42..7e1e55fa3b29962426d4289399e2195ea63f107e 100644 --- a/core/src/test/scala/org/apache/spark/DriverSuite.scala +++ b/core/src/test/scala/org/apache/spark/DriverSuite.scala @@ -30,8 +30,7 @@ import org.apache.spark.util.Utils class DriverSuite extends FunSuite with Timeouts { test("driver should exit after finishing") { - val sparkHome = Option(System.getenv("SPARK_HOME")) - .orElse(Option(System.getProperty("spark.home"))).get + val sparkHome = sys.env.get("SPARK_HOME").orElse(sys.props.get("spark.home")).get // Regression test for SPARK-530: "Spark driver process doesn't exit after finishing" val masters = Table(("master"), ("local"), ("local-cluster[2,1,512]")) forAll(masters) { (master: String) => diff --git a/core/src/test/scala/org/apache/spark/FileServerSuite.scala b/core/src/test/scala/org/apache/spark/FileServerSuite.scala index 506f7484fb7316f61223143de4ea207819cbe302..a2eb9a4e84696c9326c5523e673add86b9d620d8 100644 --- a/core/src/test/scala/org/apache/spark/FileServerSuite.scala +++ b/core/src/test/scala/org/apache/spark/FileServerSuite.scala @@ -27,54 +27,39 @@ import org.scalatest.FunSuite class FileServerSuite extends FunSuite with LocalSparkContext { @transient var tmpFile: File = _ - @transient var testJarFile: String = _ - + @transient var tmpJarUrl: String = _ override def beforeAll() { super.beforeAll() - val buffer = new Array[Byte](10240) - val tmpdir = new File(Files.createTempDir(), "test") - tmpdir.mkdir() - val tmpJarEntry = new File(tmpdir, "FileServerSuite2.txt") - val pw = new PrintWriter(tmpJarEntry) - pw.println("test String in the file named FileServerSuite2.txt") + val tmpDir = new File(Files.createTempDir(), "test") + tmpDir.mkdir() + + val textFile = new File(tmpDir, "FileServerSuite.txt") + val pw = new PrintWriter(textFile) + pw.println("100") pw.close() - // The ugliest code possible, was translated from java. - val tmpFile2 = new File(tmpdir, "test.jar") - val stream = new FileOutputStream(tmpFile2) - val jar = new JarOutputStream(stream, new java.util.jar.Manifest()) - val jarAdd = new JarEntry(tmpJarEntry.getName) - jarAdd.setTime(tmpJarEntry.lastModified) - jar.putNextEntry(jarAdd) - val in = new FileInputStream(tmpJarEntry) + + val jarFile = new File(tmpDir, "test.jar") + val jarStream = new FileOutputStream(jarFile) + val jar = new JarOutputStream(jarStream, new java.util.jar.Manifest()) + + val jarEntry = new JarEntry(textFile.getName) + jar.putNextEntry(jarEntry) + + val in = new FileInputStream(textFile) + val buffer = new Array[Byte](10240) var nRead = 0 - while (nRead <= 0) { + while (nRead <= 0) { nRead = in.read(buffer, 0, buffer.length) jar.write(buffer, 0, nRead) } + in.close() jar.close() - stream.close() - testJarFile = tmpFile2.toURI.toURL.toString - } - - override def beforeEach() { - super.beforeEach() - // Create a sample text file - val tmpdir = new File(Files.createTempDir(), "test") - tmpdir.mkdir() - tmpFile = new File(tmpdir, "FileServerSuite.txt") - val pw = new PrintWriter(tmpFile) - pw.println("100") - pw.close() - } + jarStream.close() - override def afterEach() { - super.afterEach() - // Clean up downloaded file - if (tmpFile.exists) { - tmpFile.delete() - } + tmpFile = textFile + tmpJarUrl = jarFile.toURI.toURL.toString } test("Distributing files locally") { @@ -108,10 +93,10 @@ class FileServerSuite extends FunSuite with LocalSparkContext { test ("Dynamically adding JARS locally") { sc = new SparkContext("local[4]", "test") - sc.addJar(testJarFile) + sc.addJar(tmpJarUrl) val testData = Array((1, 1)) - sc.parallelize(testData).foreach { (x) => - if (Thread.currentThread.getContextClassLoader.getResource("FileServerSuite2.txt") == null) { + sc.parallelize(testData).foreach { x => + if (Thread.currentThread.getContextClassLoader.getResource("FileServerSuite.txt") == null) { throw new SparkException("jar not added") } } @@ -133,10 +118,10 @@ class FileServerSuite extends FunSuite with LocalSparkContext { test ("Dynamically adding JARS on a standalone cluster") { sc = new SparkContext("local-cluster[1,1,512]", "test") - sc.addJar(testJarFile) + sc.addJar(tmpJarUrl) val testData = Array((1,1)) - sc.parallelize(testData).foreach { (x) => - if (Thread.currentThread.getContextClassLoader.getResource("FileServerSuite2.txt") == null) { + sc.parallelize(testData).foreach { x => + if (Thread.currentThread.getContextClassLoader.getResource("FileServerSuite.txt") == null) { throw new SparkException("jar not added") } } @@ -144,10 +129,10 @@ class FileServerSuite extends FunSuite with LocalSparkContext { test ("Dynamically adding JARS on a standalone cluster using local: URL") { sc = new SparkContext("local-cluster[1,1,512]", "test") - sc.addJar(testJarFile.replace("file", "local")) + sc.addJar(tmpJarUrl.replace("file", "local")) val testData = Array((1,1)) - sc.parallelize(testData).foreach { (x) => - if (Thread.currentThread.getContextClassLoader.getResource("FileServerSuite2.txt") == null) { + sc.parallelize(testData).foreach { x => + if (Thread.currentThread.getContextClassLoader.getResource("FileServerSuite.txt") == null) { throw new SparkException("jar not added") } } diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala b/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala index 894a72284b3fe840893d0e55b29a59be8294b940..f58b1ee05a64b5f1ac21cbbb3912d5e23d818ac6 100644 --- a/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala +++ b/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala @@ -19,18 +19,14 @@ package org.apache.spark.deploy.worker import java.io.File -import scala.util.Try - import org.scalatest.FunSuite import org.apache.spark.deploy.{ExecutorState, Command, ApplicationDescription} - class ExecutorRunnerTest extends FunSuite { test("command includes appId") { def f(s:String) = new File(s) - val sparkHome = Try(sys.env("SPARK_HOME")).toOption - .orElse(Option(System.getProperty("spark.home"))).get + val sparkHome = sys.env.get("SPARK_HOME").orElse(sys.env.get("spark.home")).get val appDesc = new ApplicationDescription("app name", 8, 500, Command("foo", Seq(),Map()), sparkHome, "appUiUrl") val appId = "12345-worker321-9876" diff --git a/make-distribution.sh b/make-distribution.sh index a2c8e645971432f75251efa665c82ee7deb7c124..8ae8a4cf742a161451ca13c12c08282b17f69382 100755 --- a/make-distribution.sh +++ b/make-distribution.sh @@ -45,7 +45,8 @@ DISTDIR="$FWDIR/dist" export TERM=dumb # Prevents color codes in SBT output if ! test `which sbt` ;then - echo -e "You need sbt installed and available on path, please follow the instructions here: http://www.scala-sbt.org/release/docs/Getting-Started/Setup.html" + echo -e "You need sbt installed and available on your path." + echo -e "Download sbt from http://www.scala-sbt.org/" exit -1; fi diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 8290e7cf438c743e28674ad9fe3b2a4a84f4d723..5f57c964bde615b5323d62da2cb703b5b2a7a7a8 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -72,6 +72,7 @@ object SparkBuild extends Build { val sparkHome = System.getProperty("user.dir") System.setProperty("spark.home", sparkHome) System.setProperty("spark.testing", "1") + // Allows build configuration to be set through environment variables lazy val hadoopVersion = Properties.envOrElse("SPARK_HADOOP_VERSION", DEFAULT_HADOOP_VERSION) lazy val isNewHadoop = Properties.envOrNone("SPARK_IS_NEW_HADOOP") match { diff --git a/python/lib/py4j-0.8.1-src.zip b/python/lib/py4j-0.8.1-src.zip new file mode 100644 index 0000000000000000000000000000000000000000..2069a328d1f2e6a94df057c6a3930048ae3f3832 Binary files /dev/null and b/python/lib/py4j-0.8.1-src.zip differ diff --git a/python/lib/py4j-0.8.1.zip b/python/lib/py4j-0.8.1.zip deleted file mode 100644 index 3231e31164e51daad4254e8f09a49336205563b8..0000000000000000000000000000000000000000 Binary files a/python/lib/py4j-0.8.1.zip and /dev/null differ diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py index 2b2c3a061a71d49b0bc5f9815be1bfa2d90cb191..a51d5af79b0dafd6e685b21d76efd69405e613aa 100644 --- a/python/pyspark/__init__.py +++ b/python/pyspark/__init__.py @@ -40,7 +40,7 @@ Public classes: import sys import os -sys.path.insert(0, os.path.join(os.environ["SPARK_HOME"], "python/lib/py4j0.7.egg")) +sys.path.insert(0, os.path.join(os.environ["SPARK_HOME"], "python/lib/py4j-0.8.1-src.zip")) from pyspark.conf import SparkConf