diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R index ca25f2c7e8263eb785413a3f3b0dd15da6d2380f..ac896cfbcfff766d06e57439f6b170a2dd194f24 100644 --- a/R/pkg/inst/tests/testthat/test_mllib.R +++ b/R/pkg/inst/tests/testthat/test_mllib.R @@ -22,6 +22,11 @@ context("MLlib functions") # Tests for MLlib functions in SparkR sparkSession <- sparkR.session(enableHiveSupport = FALSE) +absoluteSparkPath <- function(x) { + sparkHome <- sparkR.conf("spark.home") + file.path(sparkHome, x) +} + test_that("formula of spark.glm", { training <- suppressWarnings(createDataFrame(iris)) # directly calling the spark API @@ -354,7 +359,8 @@ test_that("spark.kmeans", { }) test_that("spark.mlp", { - df <- read.df("data/mllib/sample_multiclass_classification_data.txt", source = "libsvm") + df <- read.df(absoluteSparkPath("data/mllib/sample_multiclass_classification_data.txt"), + source = "libsvm") model <- spark.mlp(df, blockSize = 128, layers = c(4, 5, 4, 3), solver = "l-bfgs", maxIter = 100, tol = 0.5, stepSize = 1, seed = 1) @@ -616,7 +622,7 @@ test_that("spark.gaussianMixture", { }) test_that("spark.lda with libsvm", { - text <- read.df("data/mllib/sample_lda_libsvm_data.txt", source = "libsvm") + text <- read.df(absoluteSparkPath("data/mllib/sample_lda_libsvm_data.txt"), source = "libsvm") model <- spark.lda(text, optimizer = "em") stats <- summary(model, 10) @@ -652,7 +658,7 @@ test_that("spark.lda with libsvm", { }) test_that("spark.lda with text input", { - text <- read.text("data/mllib/sample_lda_data.txt") + text <- read.text(absoluteSparkPath("data/mllib/sample_lda_data.txt")) model <- spark.lda(text, optimizer = "online", features = "value") stats <- summary(model) @@ -688,7 +694,7 @@ test_that("spark.lda with text input", { }) test_that("spark.posterior and spark.perplexity", { - text <- read.text("data/mllib/sample_lda_data.txt") + text <- read.text(absoluteSparkPath("data/mllib/sample_lda_data.txt")) model <- spark.lda(text, features = "value", k = 3) # Assert perplexities are equal diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 744d5d0f7aa8e7acb1be18accede84cea0445fe0..4aa795a58a28e54c6280439c336e4af8f3c0df40 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -992,7 +992,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli // This is a hack to enforce loading hdfs-site.xml. // See SPARK-11227 for details. - FileSystem.get(new URI(path), hadoopConfiguration) + FileSystem.getLocal(hadoopConfiguration) // A Hadoop configuration can be about 10 KB, which is pretty big, so broadcast it. val confBroadcast = broadcast(new SerializableConfiguration(hadoopConfiguration)) @@ -1081,7 +1081,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli // This is a hack to enforce loading hdfs-site.xml. // See SPARK-11227 for details. - FileSystem.get(new URI(path), hadoopConfiguration) + FileSystem.getLocal(hadoopConfiguration) // The call to NewHadoopJob automatically adds security credentials to conf, // so we don't need to explicitly add them ourselves