From 77202a6c57e6ac2438cdb6bd232a187b6734fa2b Mon Sep 17 00:00:00 2001 From: Felix Cheung <felixcheung_m@hotmail.com> Date: Wed, 18 Jan 2017 09:53:14 -0800 Subject: [PATCH] [SPARK-19231][SPARKR] add error handling for download and untar for Spark release ## What changes were proposed in this pull request? When R is starting as a package and it needs to download the Spark release distribution we need to handle error for download and untar, and clean up, otherwise it will get stuck. ## How was this patch tested? manually Author: Felix Cheung <felixcheung_m@hotmail.com> Closes #16589 from felixcheung/rtarreturncode. (cherry picked from commit 278fa1eb305220a85c816c948932d6af8fa619aa) Signed-off-by: Felix Cheung <felixcheung@apache.org> --- R/pkg/R/install.R | 55 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 15 deletions(-) diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R index cb6bbe5946..72386e68de 100644 --- a/R/pkg/R/install.R +++ b/R/pkg/R/install.R @@ -54,7 +54,7 @@ #' } #' @param overwrite If \code{TRUE}, download and overwrite the existing tar file in localDir #' and force re-install Spark (in case the local directory or file is corrupted) -#' @return \code{install.spark} returns the local directory where Spark is found or installed +#' @return the (invisible) local directory where Spark is found or installed #' @rdname install.spark #' @name install.spark #' @aliases install.spark @@ -115,17 +115,35 @@ install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL, } else { if (releaseUrl != "") { message("Downloading from alternate URL:\n- ", releaseUrl) - downloadUrl(releaseUrl, packageLocalPath, paste0("Fetch failed from ", releaseUrl)) + success <- downloadUrl(releaseUrl, packageLocalPath) + if (!success) { + unlink(packageLocalPath) + stop(paste0("Fetch failed from ", releaseUrl)) + } } else { robustDownloadTar(mirrorUrl, version, hadoopVersion, packageName, packageLocalPath) } } message(sprintf("Installing to %s", localDir)) - untar(tarfile = packageLocalPath, exdir = localDir) - if (!tarExists || overwrite) { + # There are two ways untar can fail - untar could stop() on errors like incomplete block on file + # or, tar command can return failure code + success <- tryCatch(untar(tarfile = packageLocalPath, exdir = localDir) == 0, + error = function(e) { + message(e) + message() + FALSE + }, + warning = function(w) { + # Treat warning as error, add an empty line with message() + message(w) + message() + FALSE + }) + if (!tarExists || overwrite || !success) { unlink(packageLocalPath) } + if (!success) stop("Extract archive failed.") message("DONE.") Sys.setenv(SPARK_HOME = packageLocalDir) message(paste("SPARK_HOME set to", packageLocalDir)) @@ -135,8 +153,7 @@ install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL, robustDownloadTar <- function(mirrorUrl, version, hadoopVersion, packageName, packageLocalPath) { # step 1: use user-provided url if (!is.null(mirrorUrl)) { - msg <- sprintf("Use user-provided mirror site: %s.", mirrorUrl) - message(msg) + message("Use user-provided mirror site: ", mirrorUrl) success <- directDownloadTar(mirrorUrl, version, hadoopVersion, packageName, packageLocalPath) if (success) { @@ -156,7 +173,7 @@ robustDownloadTar <- function(mirrorUrl, version, hadoopVersion, packageName, pa packageName, packageLocalPath) if (success) return() } else { - message("Unable to find preferred mirror site.") + message("Unable to download from preferred mirror site: ", mirrorUrl) } # step 3: use backup option @@ -165,8 +182,11 @@ robustDownloadTar <- function(mirrorUrl, version, hadoopVersion, packageName, pa success <- directDownloadTar(mirrorUrl, version, hadoopVersion, packageName, packageLocalPath) if (success) { - return(packageLocalPath) + return() } else { + # remove any partially downloaded file + unlink(packageLocalPath) + message("Unable to download from default mirror site: ", mirrorUrl) msg <- sprintf(paste("Unable to download Spark %s for Hadoop %s.", "Please check network connection, Hadoop version,", "or provide other mirror sites."), @@ -201,14 +221,20 @@ directDownloadTar <- function(mirrorUrl, version, hadoopVersion, packageName, pa msg <- sprintf(fmt, version, ifelse(hadoopVersion == "without", "Free build", hadoopVersion), packageRemotePath) message(msg) - downloadUrl(packageRemotePath, packageLocalPath, paste0("Fetch failed from ", mirrorUrl)) + downloadUrl(packageRemotePath, packageLocalPath) } -downloadUrl <- function(remotePath, localPath, errorMessage) { +downloadUrl <- function(remotePath, localPath) { isFail <- tryCatch(download.file(remotePath, localPath), error = function(e) { - message(errorMessage) - print(e) + message(e) + message() + TRUE + }, + warning = function(w) { + # Treat warning as error, add an empty line with message() + message(w) + message() TRUE }) !isFail @@ -234,10 +260,9 @@ sparkCachePath <- function() { if (.Platform$OS.type == "windows") { winAppPath <- Sys.getenv("LOCALAPPDATA", unset = NA) if (is.na(winAppPath)) { - msg <- paste("%LOCALAPPDATA% not found.", + stop(paste("%LOCALAPPDATA% not found.", "Please define the environment variable", - "or restart and enter an installation path in localDir.") - stop(msg) + "or restart and enter an installation path in localDir.")) } else { path <- file.path(winAppPath, "Apache", "Spark", "Cache") } -- GitLab