[SPARK-11773][SPARKR] Implement collection functions in SparkR.

Author: Sun Rui <rui.sun@intel.com> Closes #9764 from sun-rui/SPARK-11773.

[SPARK-11773][SPARKR] Implement collection functions in SparkR.
224723e6 · Sun Rui · Shivaram Venkataraman · a97d6f3a · 224723e6 · 224723e6
Commit 224723e6 authored 9 years ago by Sun Rui Committed by Shivaram Venkataraman 9 years ago
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -98,6 +98,7 @@ exportMethods("%in%",
              "add_months",
              "alias",
              "approxCountDistinct",
+              "array_contains",
              "asc",
              "ascii",
              "asin",
@@ -215,6 +216,7 @@ exportMethods("%in%",
              "sinh",
              "size",
              "skewness",
+              "sort_array",
              "soundex",
              "stddev",
              "stddev_pop",

--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2198,4 +2198,4 @@ setMethod("coltypes",
            rTypes[naIndices] <- types[naIndices]

            rTypes
-          })
\ No newline at end of file
+          })
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -373,22 +373,6 @@ setMethod("exp",
            column(jc)
          })

-#' explode
-#'
-#' Creates a new row for each element in the given array or map column.
-#'
-#' @rdname explode
-#' @name explode
-#' @family collection_funcs
-#' @export
-#' @examples \dontrun{explode(df$c)}
-setMethod("explode",
-          signature(x = "Column"),
-          function(x) {
-            jc <- callJStatic("org.apache.spark.sql.functions", "explode", x@jc)
-            column(jc)
-          })
-
 #' expm1
 #'
 #' Computes the exponential of the given value minus one.
@@ -980,22 +964,6 @@ setMethod("sinh",
            column(jc)
          })

-#' size
-#'
-#' Returns length of array or map.
-#'
-#' @rdname size
-#' @name size
-#' @family collection_funcs
-#' @export
-#' @examples \dontrun{size(df$c)}
-setMethod("size",
-          signature(x = "Column"),
-          function(x) {
-            jc <- callJStatic("org.apache.spark.sql.functions", "size", x@jc)
-            column(jc)
-          })
-
 #' skewness
 #'
 #' Aggregate function: returns the skewness of the values in a group.
@@ -2365,3 +2333,80 @@ setMethod("rowNumber",
            jc <- callJStatic("org.apache.spark.sql.functions", "rowNumber")
            column(jc)
          })
+
+###################### Collection functions######################
+
+#' array_contains
+#'
+#' Returns true if the array contain the value.
+#'
+#' @param x A Column
+#' @param value A value to be checked if contained in the column
+#' @rdname array_contains
+#' @name array_contains
+#' @family collection_funcs
+#' @export
+#' @examples \dontrun{array_contains(df$c, 1)}
+setMethod("array_contains",
+          signature(x = "Column", value = "ANY"),
+          function(x, value) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "array_contains", x@jc, value)
+            column(jc)
+          })
+
+#' explode
+#'
+#' Creates a new row for each element in the given array or map column.
+#'
+#' @rdname explode
+#' @name explode
+#' @family collection_funcs
+#' @export
+#' @examples \dontrun{explode(df$c)}
+setMethod("explode",
+          signature(x = "Column"),
+          function(x) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "explode", x@jc)
+            column(jc)
+          })
+
+#' size
+#'
+#' Returns length of array or map.
+#'
+#' @rdname size
+#' @name size
+#' @family collection_funcs
+#' @export
+#' @examples \dontrun{size(df$c)}
+setMethod("size",
+          signature(x = "Column"),
+          function(x) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "size", x@jc)
+            column(jc)
+          })
+
+#' sort_array
+#'
+#' Sorts the input array for the given column in ascending order,
+#' according to the natural ordering of the array elements.
+#'
+#' @param x A Column to sort
+#' @param asc A logical flag indicating the sorting order.
+#'            TRUE, sorting is in ascending order.
+#'            FALSE, sorting is in descending order.
+#' @rdname sort_array
+#' @name sort_array
+#' @family collection_funcs
+#' @export
+#' @examples
+#' \dontrun{
+#' sort_array(df$c)
+#' sort_array(df$c, FALSE)
+#' }
+setMethod("sort_array",
+          signature(x = "Column"),
+          function(x, asc = TRUE) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "sort_array", x@jc, asc)
+            column(jc)
+          })
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -644,6 +644,10 @@ setGeneric("add_months", function(y, x) { standardGeneric("add_months") })
 #' @export
 setGeneric("approxCountDistinct", function(x, ...) { standardGeneric("approxCountDistinct") })

+#' @rdname array_contains
+#' @export
+setGeneric("array_contains", function(x, value) { standardGeneric("array_contains") })
+
 #' @rdname ascii
 #' @export
 setGeneric("ascii", function(x) { standardGeneric("ascii") })
@@ -961,6 +965,10 @@ setGeneric("size", function(x) { standardGeneric("size") })
 #' @export
 setGeneric("skewness", function(x) { standardGeneric("skewness") })

+#' @rdname sort_array
+#' @export
+setGeneric("sort_array", function(x, asc = TRUE) { standardGeneric("sort_array") })
+
 #' @rdname soundex
 #' @export
 setGeneric("soundex", function(x) { standardGeneric("soundex") })
@@ -1076,4 +1084,4 @@ setGeneric("with")

 #' @rdname coltypes
 #' @export
-setGeneric("coltypes", function(x) { standardGeneric("coltypes") })
\ No newline at end of file
+setGeneric("coltypes", function(x) { standardGeneric("coltypes") })
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -635,4 +635,4 @@ assignNewEnv <- function(data) {
    assign(x = cols[i], value = data[, cols[i]], envir = env)
  }
  env
-}
\ No newline at end of file
+}
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -878,6 +878,16 @@ test_that("column functions", {

  df4 <- createDataFrame(sqlContext, list(list(a = "010101")))
  expect_equal(collect(select(df4, conv(df4$a, 2, 16)))[1, 1], "15")
+
+  # Test array_contains() and sort_array()
+  df <- createDataFrame(sqlContext, list(list(list(1L, 2L, 3L)), list(list(6L, 5L, 4L))))
+  result <- collect(select(df, array_contains(df[[1]], 1L)))[[1]]
+  expect_equal(result, c(TRUE, FALSE))
+
+  result <- collect(select(df, sort_array(df[[1]], FALSE)))[[1]]
+  expect_equal(result, list(list(3L, 2L, 1L), list(6L, 5L, 4L)))
+  result <- collect(select(df, sort_array(df[[1]])))[[1]]
+  expect_equal(result, list(list(1L, 2L, 3L), list(4L, 5L, 6L)))
 })
 #
 test_that("column binary mathfunctions", {