From d30238f1b9096c9fd85527d95be639de9388fcc7 Mon Sep 17 00:00:00 2001 From: actuaryzhang <actuaryzhang10@gmail.com> Date: Thu, 23 Feb 2017 11:12:02 -0800 Subject: [PATCH] [SPARK-19682][SPARKR] Issue warning (or error) when subset method "[[" takes vector index ## What changes were proposed in this pull request? The `[[` method is supposed to take a single index and return a column. This is different from base R which takes a vector index. We should check for this and issue warning or error when vector index is supplied (which is very likely given the behavior in base R). Currently I'm issuing a warning message and just take the first element of the vector index. We could change this to an error it that's better. ## How was this patch tested? new tests Author: actuaryzhang <actuaryzhang10@gmail.com> Closes #17017 from actuaryzhang/sparkRSubsetter. (cherry picked from commit 7bf09433f5c5e08154ba106be21fe24f17cd282b) Signed-off-by: Felix Cheung <felixcheung@apache.org> --- R/pkg/R/DataFrame.R | 8 ++++++++ R/pkg/inst/tests/testthat/test_sparkSQL.R | 12 ++++++++++++ 2 files changed, 20 insertions(+) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 986f1f11cc..d0f097925a 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1800,6 +1800,10 @@ setClassUnion("numericOrcharacter", c("numeric", "character")) #' @note [[ since 1.4.0 setMethod("[[", signature(x = "SparkDataFrame", i = "numericOrcharacter"), function(x, i) { + if (length(i) > 1) { + warning("Subset index has length > 1. Only the first index is used.") + i <- i[1] + } if (is.numeric(i)) { cols <- columns(x) i <- cols[[i]] @@ -1813,6 +1817,10 @@ setMethod("[[", signature(x = "SparkDataFrame", i = "numericOrcharacter"), #' @note [[<- since 2.1.1 setMethod("[[<-", signature(x = "SparkDataFrame", i = "numericOrcharacter"), function(x, i, value) { + if (length(i) > 1) { + warning("Subset index has length > 1. Only the first index is used.") + i <- i[1] + } if (is.numeric(i)) { cols <- columns(x) i <- cols[[i]] diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index d9dd0f3e14..9608fa1f77 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -1007,6 +1007,18 @@ test_that("select operators", { expect_is(df[[2]], "Column") expect_is(df[["age"]], "Column") + expect_warning(df[[1:2]], + "Subset index has length > 1. Only the first index is used.") + expect_is(suppressWarnings(df[[1:2]]), "Column") + expect_warning(df[[c("name", "age")]], + "Subset index has length > 1. Only the first index is used.") + expect_is(suppressWarnings(df[[c("name", "age")]]), "Column") + + expect_warning(df[[1:2]] <- df[[1]], + "Subset index has length > 1. Only the first index is used.") + expect_warning(df[[c("name", "age")]] <- df[[1]], + "Subset index has length > 1. Only the first index is used.") + expect_is(df[, 1, drop = F], "SparkDataFrame") expect_equal(columns(df[, 1, drop = F]), c("name")) expect_equal(columns(df[, "age", drop = F]), c("age")) -- GitLab