From d30238f1b9096c9fd85527d95be639de9388fcc7 Mon Sep 17 00:00:00 2001
From: actuaryzhang <actuaryzhang10@gmail.com>
Date: Thu, 23 Feb 2017 11:12:02 -0800
Subject: [PATCH] [SPARK-19682][SPARKR] Issue warning (or error) when subset
 method "[[" takes vector index

## What changes were proposed in this pull request?
The `[[` method is supposed to take a single index and return a column. This is different from base R which takes a vector index.  We should check for this and issue warning or error when vector index is supplied (which is very likely given the behavior in base R).

Currently I'm issuing a warning message and just take the first element of the vector index. We could change this to an error it that's better.

## How was this patch tested?
new tests

Author: actuaryzhang <actuaryzhang10@gmail.com>

Closes #17017 from actuaryzhang/sparkRSubsetter.

(cherry picked from commit 7bf09433f5c5e08154ba106be21fe24f17cd282b)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 R/pkg/R/DataFrame.R                       |  8 ++++++++
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 12 ++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 986f1f11cc..d0f097925a 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1800,6 +1800,10 @@ setClassUnion("numericOrcharacter", c("numeric", "character"))
 #' @note [[ since 1.4.0
 setMethod("[[", signature(x = "SparkDataFrame", i = "numericOrcharacter"),
           function(x, i) {
+            if (length(i) > 1) {
+              warning("Subset index has length > 1. Only the first index is used.")
+              i <- i[1]
+            }
             if (is.numeric(i)) {
               cols <- columns(x)
               i <- cols[[i]]
@@ -1813,6 +1817,10 @@ setMethod("[[", signature(x = "SparkDataFrame", i = "numericOrcharacter"),
 #' @note [[<- since 2.1.1
 setMethod("[[<-", signature(x = "SparkDataFrame", i = "numericOrcharacter"),
           function(x, i, value) {
+            if (length(i) > 1) {
+              warning("Subset index has length > 1. Only the first index is used.")
+              i <- i[1]
+            }
             if (is.numeric(i)) {
               cols <- columns(x)
               i <- cols[[i]]
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index d9dd0f3e14..9608fa1f77 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1007,6 +1007,18 @@ test_that("select operators", {
   expect_is(df[[2]], "Column")
   expect_is(df[["age"]], "Column")
 
+  expect_warning(df[[1:2]],
+                 "Subset index has length > 1. Only the first index is used.")
+  expect_is(suppressWarnings(df[[1:2]]), "Column")
+  expect_warning(df[[c("name", "age")]],
+                 "Subset index has length > 1. Only the first index is used.")
+  expect_is(suppressWarnings(df[[c("name", "age")]]), "Column")
+
+  expect_warning(df[[1:2]] <- df[[1]],
+                 "Subset index has length > 1. Only the first index is used.")
+  expect_warning(df[[c("name", "age")]] <- df[[1]],
+                 "Subset index has length > 1. Only the first index is used.")
+
   expect_is(df[, 1, drop = F], "SparkDataFrame")
   expect_equal(columns(df[, 1, drop = F]), c("name"))
   expect_equal(columns(df[, "age", drop = F]), c("age"))
-- 
GitLab