diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R index 72386e68de4b322602cb10fc9c01f4956b4e7de4..4ca7aa664e0238c7f26972f1ef02dc64b9631a47 100644 --- a/R/pkg/R/install.R +++ b/R/pkg/R/install.R @@ -21,9 +21,9 @@ #' Download and Install Apache Spark to a Local Directory #' #' \code{install.spark} downloads and installs Spark to a local directory if -#' it is not found. The Spark version we use is the same as the SparkR version. -#' Users can specify a desired Hadoop version, the remote mirror site, and -#' the directory where the package is installed locally. +#' it is not found. If SPARK_HOME is set in the environment, and that directory is found, that is +#' returned. The Spark version we use is the same as the SparkR version. Users can specify a desired +#' Hadoop version, the remote mirror site, and the directory where the package is installed locally. #' #' The full url of remote file is inferred from \code{mirrorUrl} and \code{hadoopVersion}. #' \code{mirrorUrl} specifies the remote path to a Spark folder. It is followed by a subfolder @@ -68,6 +68,16 @@ #' \href{http://spark.apache.org/downloads.html}{Apache Spark} install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL, localDir = NULL, overwrite = FALSE) { + sparkHome <- Sys.getenv("SPARK_HOME") + if (isSparkRShell()) { + stopifnot(nchar(sparkHome) > 0) + message("Spark is already running in sparkR shell.") + return(invisible(sparkHome)) + } else if (!is.na(file.info(sparkHome)$isdir)) { + message("Spark package found in SPARK_HOME: ", sparkHome) + return(invisible(sparkHome)) + } + version <- paste0("spark-", packageVersion("SparkR")) hadoopVersion <- tolower(hadoopVersion) hadoopVersionName <- hadoopVersionName(hadoopVersion) diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index 870e76b7292fa83ecc72d58950d3082364182605..61773ed3ee8c0f9022a5a5dce897348e301d54e3 100644 --- a/R/pkg/R/sparkR.R +++ b/R/pkg/R/sparkR.R @@ -588,13 +588,11 @@ processSparkPackages <- function(packages) { sparkCheckInstall <- function(sparkHome, master, deployMode) { if (!isSparkRShell()) { if (!is.na(file.info(sparkHome)$isdir)) { - msg <- paste0("Spark package found in SPARK_HOME: ", sparkHome) - message(msg) + message("Spark package found in SPARK_HOME: ", sparkHome) NULL } else { if (interactive() || isMasterLocal(master)) { - msg <- paste0("Spark not found in SPARK_HOME: ", sparkHome) - message(msg) + message("Spark not found in SPARK_HOME: ", sparkHome) packageLocalDir <- install.spark() packageLocalDir } else if (isClientMode(master) || deployMode == "client") { diff --git a/R/pkg/tests/run-all.R b/R/pkg/tests/run-all.R index 1d04656ac2594659851ec86c4b4a3cd7acafeee9..ab8d1ca0199411485bf37d9dbab3d6aa0648f9ee 100644 --- a/R/pkg/tests/run-all.R +++ b/R/pkg/tests/run-all.R @@ -21,4 +21,7 @@ library(SparkR) # Turn all warnings into errors options("warn" = 2) +# Setup global test environment +install.spark() + test_package("SparkR") diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd index f13e0b3a18f78849a8b91a4fe83dd24fe512eefe..a742484c4cda0ba8ef312018bbc711c20bb58bdd 100644 --- a/R/pkg/vignettes/sparkr-vignettes.Rmd +++ b/R/pkg/vignettes/sparkr-vignettes.Rmd @@ -44,6 +44,9 @@ library(SparkR) We use default settings in which it runs in local mode. It auto downloads Spark package in the background if no previous installation is found. For more details about setup, see [Spark Session](#SetupSparkSession). +```{r, include=FALSE} +install.spark() +``` ```{r, message=FALSE, results="hide"} sparkR.session() ```