From 382fefd1879e4670f3e9e8841ec243e3eb11c578 Mon Sep 17 00:00:00 2001 From: Felix Cheung <felixcheung_m@hotmail.com> Date: Tue, 30 May 2017 22:33:29 -0700 Subject: [PATCH] [SPARK-20877][SPARKR][WIP] add timestamps to test runs ## What changes were proposed in this pull request? to investigate how long they run ## How was this patch tested? Jenkins, AppVeyor Author: Felix Cheung <felixcheung_m@hotmail.com> Closes #18104 from felixcheung/rtimetest. --- R/pkg/inst/tests/testthat/test_Windows.R | 3 + .../testthat/test_mllib_classification.R | 4 + .../tests/testthat/test_mllib_clustering.R | 2 + R/pkg/inst/tests/testthat/test_mllib_tree.R | 82 +++++++++++-------- R/pkg/inst/tests/testthat/test_sparkSQL.R | 15 ++++ R/pkg/inst/tests/testthat/test_utils.R | 3 + R/pkg/tests/run-all.R | 6 ++ 7 files changed, 81 insertions(+), 34 deletions(-) diff --git a/R/pkg/inst/tests/testthat/test_Windows.R b/R/pkg/inst/tests/testthat/test_Windows.R index 919b063bf0..00d684e1a4 100644 --- a/R/pkg/inst/tests/testthat/test_Windows.R +++ b/R/pkg/inst/tests/testthat/test_Windows.R @@ -27,3 +27,6 @@ test_that("sparkJars tag in SparkContext", { abcPath <- testOutput[1] expect_equal(abcPath, "a\\b\\c") }) + +message("--- End test (Windows) ", as.POSIXct(Sys.time(), tz = "GMT")) +message("elapsed ", (proc.time() - timer_ptm)[3]) diff --git a/R/pkg/inst/tests/testthat/test_mllib_classification.R b/R/pkg/inst/tests/testthat/test_mllib_classification.R index c1c746828d..82e588dc46 100644 --- a/R/pkg/inst/tests/testthat/test_mllib_classification.R +++ b/R/pkg/inst/tests/testthat/test_mllib_classification.R @@ -28,6 +28,8 @@ absoluteSparkPath <- function(x) { } test_that("spark.svmLinear", { + skip_on_cran() + df <- suppressWarnings(createDataFrame(iris)) training <- df[df$Species %in% c("versicolor", "virginica"), ] model <- spark.svmLinear(training, Species ~ ., regParam = 0.01, maxIter = 10) @@ -226,6 +228,8 @@ test_that("spark.logit", { }) test_that("spark.mlp", { + skip_on_cran() + df <- read.df(absoluteSparkPath("data/mllib/sample_multiclass_classification_data.txt"), source = "libsvm") model <- spark.mlp(df, label ~ features, blockSize = 128, layers = c(4, 5, 4, 3), diff --git a/R/pkg/inst/tests/testthat/test_mllib_clustering.R b/R/pkg/inst/tests/testthat/test_mllib_clustering.R index 8f71de1cbc..e827e961ab 100644 --- a/R/pkg/inst/tests/testthat/test_mllib_clustering.R +++ b/R/pkg/inst/tests/testthat/test_mllib_clustering.R @@ -28,6 +28,8 @@ absoluteSparkPath <- function(x) { } test_that("spark.bisectingKmeans", { + skip_on_cran() + newIris <- iris newIris$Species <- NULL training <- suppressWarnings(createDataFrame(newIris)) diff --git a/R/pkg/inst/tests/testthat/test_mllib_tree.R b/R/pkg/inst/tests/testthat/test_mllib_tree.R index 5fd6a38ecb..31427ee52a 100644 --- a/R/pkg/inst/tests/testthat/test_mllib_tree.R +++ b/R/pkg/inst/tests/testthat/test_mllib_tree.R @@ -28,6 +28,8 @@ absoluteSparkPath <- function(x) { } test_that("spark.gbt", { + skip_on_cran() + # regression data <- suppressWarnings(createDataFrame(longley)) model <- spark.gbt(data, Employed ~ ., "regression", maxDepth = 5, maxBins = 16, seed = 123) @@ -103,10 +105,12 @@ test_that("spark.gbt", { expect_equal(stats$maxDepth, 5) # spark.gbt classification can work on libsvm data - data <- read.df(absoluteSparkPath("data/mllib/sample_binary_classification_data.txt"), - source = "libsvm") - model <- spark.gbt(data, label ~ features, "classification") - expect_equal(summary(model)$numFeatures, 692) + if (not_cran_or_windows_with_hadoop()) { + data <- read.df(absoluteSparkPath("data/mllib/sample_binary_classification_data.txt"), + source = "libsvm") + model <- spark.gbt(data, label ~ features, "classification") + expect_equal(summary(model)$numFeatures, 692) + } }) test_that("spark.randomForest", { @@ -211,13 +215,17 @@ test_that("spark.randomForest", { expect_equal(length(grep("2.0", predictions)), 50) # spark.randomForest classification can work on libsvm data - data <- read.df(absoluteSparkPath("data/mllib/sample_multiclass_classification_data.txt"), - source = "libsvm") - model <- spark.randomForest(data, label ~ features, "classification") - expect_equal(summary(model)$numFeatures, 4) + if (not_cran_or_windows_with_hadoop()) { + data <- read.df(absoluteSparkPath("data/mllib/sample_multiclass_classification_data.txt"), + source = "libsvm") + model <- spark.randomForest(data, label ~ features, "classification") + expect_equal(summary(model)$numFeatures, 4) + } }) test_that("spark.decisionTree", { + skip_on_cran() + # regression data <- suppressWarnings(createDataFrame(longley)) model <- spark.decisionTree(data, Employed ~ ., "regression", maxDepth = 5, maxBins = 16) @@ -234,19 +242,21 @@ test_that("spark.decisionTree", { expect_error(capture.output(stats), NA) expect_true(length(capture.output(stats)) > 6) - modelPath <- tempfile(pattern = "spark-decisionTreeRegression", fileext = ".tmp") - write.ml(model, modelPath) - expect_error(write.ml(model, modelPath)) - write.ml(model, modelPath, overwrite = TRUE) - model2 <- read.ml(modelPath) - stats2 <- summary(model2) - expect_equal(stats$formula, stats2$formula) - expect_equal(stats$numFeatures, stats2$numFeatures) - expect_equal(stats$features, stats2$features) - expect_equal(stats$featureImportances, stats2$featureImportances) - expect_equal(stats$maxDepth, stats2$maxDepth) + if (not_cran_or_windows_with_hadoop()) { + modelPath <- tempfile(pattern = "spark-decisionTreeRegression", fileext = ".tmp") + write.ml(model, modelPath) + expect_error(write.ml(model, modelPath)) + write.ml(model, modelPath, overwrite = TRUE) + model2 <- read.ml(modelPath) + stats2 <- summary(model2) + expect_equal(stats$formula, stats2$formula) + expect_equal(stats$numFeatures, stats2$numFeatures) + expect_equal(stats$features, stats2$features) + expect_equal(stats$featureImportances, stats2$featureImportances) + expect_equal(stats$maxDepth, stats2$maxDepth) - unlink(modelPath) + unlink(modelPath) + } # classification data <- suppressWarnings(createDataFrame(iris)) @@ -263,17 +273,19 @@ test_that("spark.decisionTree", { expect_equal(length(grep("setosa", predictions)), 50) expect_equal(length(grep("versicolor", predictions)), 50) - modelPath <- tempfile(pattern = "spark-decisionTreeClassification", fileext = ".tmp") - write.ml(model, modelPath) - expect_error(write.ml(model, modelPath)) - write.ml(model, modelPath, overwrite = TRUE) - model2 <- read.ml(modelPath) - stats2 <- summary(model2) - expect_equal(stats$depth, stats2$depth) - expect_equal(stats$numNodes, stats2$numNodes) - expect_equal(stats$numClasses, stats2$numClasses) + if (not_cran_or_windows_with_hadoop()) { + modelPath <- tempfile(pattern = "spark-decisionTreeClassification", fileext = ".tmp") + write.ml(model, modelPath) + expect_error(write.ml(model, modelPath)) + write.ml(model, modelPath, overwrite = TRUE) + model2 <- read.ml(modelPath) + stats2 <- summary(model2) + expect_equal(stats$depth, stats2$depth) + expect_equal(stats$numNodes, stats2$numNodes) + expect_equal(stats$numClasses, stats2$numClasses) - unlink(modelPath) + unlink(modelPath) + } # Test numeric response variable labelToIndex <- function(species) { @@ -297,10 +309,12 @@ test_that("spark.decisionTree", { expect_equal(length(grep("2.0", predictions)), 50) # spark.decisionTree classification can work on libsvm data - data <- read.df(absoluteSparkPath("data/mllib/sample_multiclass_classification_data.txt"), - source = "libsvm") - model <- spark.decisionTree(data, label ~ features, "classification") - expect_equal(summary(model)$numFeatures, 4) + if (not_cran_or_windows_with_hadoop()) { + data <- read.df(absoluteSparkPath("data/mllib/sample_multiclass_classification_data.txt"), + source = "libsvm") + model <- spark.decisionTree(data, label ~ features, "classification") + expect_equal(summary(model)$numFeatures, 4) + } }) sparkR.session.stop() diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 9fc6e5dabe..c790d02b10 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -1395,6 +1395,8 @@ test_that("column operators", { }) test_that("column functions", { + skip_on_cran() + c <- column("a") c1 <- abs(c) + acos(c) + approxCountDistinct(c) + ascii(c) + asin(c) + atan(c) c2 <- avg(c) + base64(c) + bin(c) + bitwiseNOT(c) + cbrt(c) + ceil(c) + cos(c) @@ -1780,6 +1782,8 @@ test_that("when(), otherwise() and ifelse() with column on a DataFrame", { }) test_that("group by, agg functions", { + skip_on_cran() + df <- read.json(jsonPath) df1 <- agg(df, name = "max", age = "sum") expect_equal(1, count(df1)) @@ -2121,6 +2125,8 @@ test_that("filter() on a DataFrame", { }) test_that("join(), crossJoin() and merge() on a DataFrame", { + skip_on_cran() + df <- read.json(jsonPath) mockLines2 <- c("{\"name\":\"Michael\", \"test\": \"yes\"}", @@ -2978,6 +2984,7 @@ test_that("dapply() and dapplyCollect() on a DataFrame", { }) test_that("dapplyCollect() on DataFrame with a binary column", { + skip_on_cran() df <- data.frame(key = 1:3) df$bytes <- lapply(df$key, serialize, connection = NULL) @@ -2999,6 +3006,8 @@ test_that("dapplyCollect() on DataFrame with a binary column", { }) test_that("repartition by columns on DataFrame", { + skip_on_cran() + df <- createDataFrame( list(list(1L, 1, "1", 0.1), list(1L, 2, "2", 0.2), list(3L, 3, "3", 0.3)), c("a", "b", "c", "d")) @@ -3037,6 +3046,8 @@ test_that("repartition by columns on DataFrame", { }) test_that("coalesce, repartition, numPartitions", { + skip_on_cran() + df <- as.DataFrame(cars, numPartitions = 5) expect_equal(getNumPartitions(df), 5) expect_equal(getNumPartitions(coalesce(df, 3)), 3) @@ -3056,6 +3067,8 @@ test_that("coalesce, repartition, numPartitions", { }) test_that("gapply() and gapplyCollect() on a DataFrame", { + skip_on_cran() + df <- createDataFrame ( list(list(1L, 1, "1", 0.1), list(1L, 2, "1", 0.2), list(3L, 3, "3", 0.3)), c("a", "b", "c", "d")) @@ -3208,6 +3221,8 @@ test_that("createDataFrame sqlContext parameter backward compatibility", { }) test_that("randomSplit", { + skip_on_cran() + num <- 4000 df <- createDataFrame(data.frame(id = 1:num)) weights <- c(2, 3, 5) diff --git a/R/pkg/inst/tests/testthat/test_utils.R b/R/pkg/inst/tests/testthat/test_utils.R index 02691f0f64..6197ae7569 100644 --- a/R/pkg/inst/tests/testthat/test_utils.R +++ b/R/pkg/inst/tests/testthat/test_utils.R @@ -243,3 +243,6 @@ test_that("basenameSansExtFromUrl", { }) sparkR.session.stop() + +message("--- End test (utils) ", as.POSIXct(Sys.time(), tz = "GMT")) +message("elapsed ", (proc.time() - timer_ptm)[3]) diff --git a/R/pkg/tests/run-all.R b/R/pkg/tests/run-all.R index 9c6cba535d..f0bef4f6d2 100644 --- a/R/pkg/tests/run-all.R +++ b/R/pkg/tests/run-all.R @@ -21,6 +21,12 @@ library(SparkR) # Turn all warnings into errors options("warn" = 2) +if (.Platform$OS.type == "windows") { + Sys.setenv(TZ = "GMT") +} +message("--- Start test ", as.POSIXct(Sys.time(), tz = "GMT")) +timer_ptm <- proc.time() + # Setup global test environment # Install Spark first to set SPARK_HOME install.spark() -- GitLab