From 46c5749768fefd976097c7d5612ec184a4cfe1b9 Mon Sep 17 00:00:00 2001
From: zero323 <zero323@users.noreply.github.com>
Date: Wed, 19 Apr 2017 21:19:46 -0700
Subject: [PATCH] [SPARK-20375][R] R wrappers for array and map

## What changes were proposed in this pull request?

Adds wrappers for `o.a.s.sql.functions.array` and `o.a.s.sql.functions.map`

## How was this patch tested?

Unit tests, `check-cran.sh`

Author: zero323 <zero323@users.noreply.github.com>

Closes #17674 from zero323/SPARK-20375.
---
 R/pkg/NAMESPACE                           |  2 +
 R/pkg/R/functions.R                       | 53 +++++++++++++++++++++++
 R/pkg/R/generics.R                        |  8 ++++
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 17 ++++++++
 4 files changed, 80 insertions(+)

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index ca45c6f9b0..b6b559adf0 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -213,6 +213,8 @@ exportMethods("%in%",
               "count",
               "countDistinct",
               "crc32",
+              "create_array",
+              "create_map",
               "hash",
               "cume_dist",
               "date_add",
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index c311921fb3..f854df11e5 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -3652,3 +3652,56 @@ setMethod("posexplode",
             jc <- callJStatic("org.apache.spark.sql.functions", "posexplode", x@jc)
             column(jc)
           })
+
+#' create_array
+#'
+#' Creates a new array column. The input columns must all have the same data type.
+#'
+#' @param x Column to compute on
+#' @param ... additional Column(s).
+#'
+#' @family normal_funcs
+#' @rdname create_array
+#' @name create_array
+#' @aliases create_array,Column-method
+#' @export
+#' @examples \dontrun{create_array(df$x, df$y, df$z)}
+#' @note create_array since 2.3.0
+setMethod("create_array",
+          signature(x = "Column"),
+          function(x, ...) {
+            jcols <- lapply(list(x, ...), function (x) {
+              stopifnot(class(x) == "Column")
+              x@jc
+            })
+            jc <- callJStatic("org.apache.spark.sql.functions", "array", jcols)
+            column(jc)
+          })
+
+#' create_map
+#'
+#' Creates a new map column. The input columns must be grouped as key-value pairs,
+#' e.g. (key1, value1, key2, value2, ...).
+#' The key columns must all have the same data type, and can't be null.
+#' The value columns must all have the same data type.
+#'
+#' @param x Column to compute on
+#' @param ... additional Column(s).
+#'
+#' @family normal_funcs
+#' @rdname create_map
+#' @name create_map
+#' @aliases create_map,Column-method
+#' @export
+#' @examples \dontrun{create_map(lit("x"), lit(1.0), lit("y"), lit(-1.0))}
+#' @note create_map since 2.3.0
+setMethod("create_map",
+          signature(x = "Column"),
+          function(x, ...) {
+            jcols <- lapply(list(x, ...), function (x) {
+              stopifnot(class(x) == "Column")
+              x@jc
+            })
+            jc <- callJStatic("org.apache.spark.sql.functions", "map", jcols)
+            column(jc)
+          })
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 945676c7f1..da46823f52 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -942,6 +942,14 @@ setGeneric("countDistinct", function(x, ...) { standardGeneric("countDistinct")
 #' @export
 setGeneric("crc32", function(x) { standardGeneric("crc32") })
 
+#' @rdname create_array
+#' @export
+setGeneric("create_array", function(x, ...) { standardGeneric("create_array") })
+
+#' @rdname create_map
+#' @export
+setGeneric("create_map", function(x, ...) { standardGeneric("create_map") })
+
 #' @rdname hash
 #' @export
 setGeneric("hash", function(x, ...) { standardGeneric("hash") })
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 6a6c9a809a..9e87a47106 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1461,6 +1461,23 @@ test_that("column functions", {
   expect_equal(length(arr$arrcol[[1]]), 2)
   expect_equal(arr$arrcol[[1]][[1]]$name, "Bob")
   expect_equal(arr$arrcol[[1]][[2]]$name, "Alice")
+
+  # Test create_array() and create_map()
+  df <- as.DataFrame(data.frame(
+    x = c(1.0, 2.0), y = c(-1.0, 3.0), z = c(-2.0, 5.0)
+  ))
+
+  arrs <- collect(select(df, create_array(df$x, df$y, df$z)))
+  expect_equal(arrs[, 1], list(list(1, -1, -2), list(2, 3, 5)))
+
+  maps <- collect(select(
+    df, create_map(lit("x"), df$x, lit("y"), df$y, lit("z"), df$z)))
+
+  expect_equal(
+    maps[, 1],
+    lapply(
+      list(list(x = 1, y = -1, z = -2), list(x = 2, y = 3,  z = 5)),
+      as.environment))
 })
 
 test_that("column binary mathfunctions", {
-- 
GitLab