From 6b41195bca65de6236168d96758f93b85f1dd7ca Mon Sep 17 00:00:00 2001 From: hyukjinkwon <gurwls223@gmail.com> Date: Wed, 7 Sep 2016 19:24:03 +0900 Subject: [PATCH] [SPARK-17339][SPARKR][CORE] Fix some R tests and use Path.toUri in SparkContext for Windows paths in SparkR ## What changes were proposed in this pull request? This PR fixes the Windows path issues in several APIs. Please refer https://issues.apache.org/jira/browse/SPARK-17339 for more details. ## How was this patch tested? Tests via AppVeyor CI - https://ci.appveyor.com/project/HyukjinKwon/spark/build/82-SPARK-17339-fix-r Also, manually,  Author: hyukjinkwon <gurwls223@gmail.com> Closes #14960 from HyukjinKwon/SPARK-17339. --- R/pkg/inst/tests/testthat/test_mllib.R | 14 ++++++++++---- .../main/scala/org/apache/spark/SparkContext.scala | 4 ++-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R index ca25f2c7e8..ac896cfbcf 100644 --- a/R/pkg/inst/tests/testthat/test_mllib.R +++ b/R/pkg/inst/tests/testthat/test_mllib.R @@ -22,6 +22,11 @@ context("MLlib functions") # Tests for MLlib functions in SparkR sparkSession <- sparkR.session(enableHiveSupport = FALSE) +absoluteSparkPath <- function(x) { + sparkHome <- sparkR.conf("spark.home") + file.path(sparkHome, x) +} + test_that("formula of spark.glm", { training <- suppressWarnings(createDataFrame(iris)) # directly calling the spark API @@ -354,7 +359,8 @@ test_that("spark.kmeans", { }) test_that("spark.mlp", { - df <- read.df("data/mllib/sample_multiclass_classification_data.txt", source = "libsvm") + df <- read.df(absoluteSparkPath("data/mllib/sample_multiclass_classification_data.txt"), + source = "libsvm") model <- spark.mlp(df, blockSize = 128, layers = c(4, 5, 4, 3), solver = "l-bfgs", maxIter = 100, tol = 0.5, stepSize = 1, seed = 1) @@ -616,7 +622,7 @@ test_that("spark.gaussianMixture", { }) test_that("spark.lda with libsvm", { - text <- read.df("data/mllib/sample_lda_libsvm_data.txt", source = "libsvm") + text <- read.df(absoluteSparkPath("data/mllib/sample_lda_libsvm_data.txt"), source = "libsvm") model <- spark.lda(text, optimizer = "em") stats <- summary(model, 10) @@ -652,7 +658,7 @@ test_that("spark.lda with libsvm", { }) test_that("spark.lda with text input", { - text <- read.text("data/mllib/sample_lda_data.txt") + text <- read.text(absoluteSparkPath("data/mllib/sample_lda_data.txt")) model <- spark.lda(text, optimizer = "online", features = "value") stats <- summary(model) @@ -688,7 +694,7 @@ test_that("spark.lda with text input", { }) test_that("spark.posterior and spark.perplexity", { - text <- read.text("data/mllib/sample_lda_data.txt") + text <- read.text(absoluteSparkPath("data/mllib/sample_lda_data.txt")) model <- spark.lda(text, features = "value", k = 3) # Assert perplexities are equal diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 744d5d0f7a..4aa795a58a 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -992,7 +992,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli // This is a hack to enforce loading hdfs-site.xml. // See SPARK-11227 for details. - FileSystem.get(new URI(path), hadoopConfiguration) + FileSystem.getLocal(hadoopConfiguration) // A Hadoop configuration can be about 10 KB, which is pretty big, so broadcast it. val confBroadcast = broadcast(new SerializableConfiguration(hadoopConfiguration)) @@ -1081,7 +1081,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli // This is a hack to enforce loading hdfs-site.xml. // See SPARK-11227 for details. - FileSystem.get(new URI(path), hadoopConfiguration) + FileSystem.getLocal(hadoopConfiguration) // The call to NewHadoopJob automatically adds security credentials to conf, // so we don't need to explicitly add them ourselves -- GitLab