Skip to content
Snippets Groups Projects
Commit 224723e6 authored by Sun Rui's avatar Sun Rui Committed by Shivaram Venkataraman
Browse files

[SPARK-11773][SPARKR] Implement collection functions in SparkR.

Author: Sun Rui <rui.sun@intel.com>

Closes #9764 from sun-rui/SPARK-11773.
parent a97d6f3a
No related branches found
No related tags found
No related merge requests found
......@@ -98,6 +98,7 @@ exportMethods("%in%",
"add_months",
"alias",
"approxCountDistinct",
"array_contains",
"asc",
"ascii",
"asin",
......@@ -215,6 +216,7 @@ exportMethods("%in%",
"sinh",
"size",
"skewness",
"sort_array",
"soundex",
"stddev",
"stddev_pop",
......
......@@ -2198,4 +2198,4 @@ setMethod("coltypes",
rTypes[naIndices] <- types[naIndices]
rTypes
})
\ No newline at end of file
})
......@@ -373,22 +373,6 @@ setMethod("exp",
column(jc)
})
#' explode
#'
#' Creates a new row for each element in the given array or map column.
#'
#' @rdname explode
#' @name explode
#' @family collection_funcs
#' @export
#' @examples \dontrun{explode(df$c)}
setMethod("explode",
signature(x = "Column"),
function(x) {
jc <- callJStatic("org.apache.spark.sql.functions", "explode", x@jc)
column(jc)
})
#' expm1
#'
#' Computes the exponential of the given value minus one.
......@@ -980,22 +964,6 @@ setMethod("sinh",
column(jc)
})
#' size
#'
#' Returns length of array or map.
#'
#' @rdname size
#' @name size
#' @family collection_funcs
#' @export
#' @examples \dontrun{size(df$c)}
setMethod("size",
signature(x = "Column"),
function(x) {
jc <- callJStatic("org.apache.spark.sql.functions", "size", x@jc)
column(jc)
})
#' skewness
#'
#' Aggregate function: returns the skewness of the values in a group.
......@@ -2365,3 +2333,80 @@ setMethod("rowNumber",
jc <- callJStatic("org.apache.spark.sql.functions", "rowNumber")
column(jc)
})
###################### Collection functions######################
#' array_contains
#'
#' Returns true if the array contain the value.
#'
#' @param x A Column
#' @param value A value to be checked if contained in the column
#' @rdname array_contains
#' @name array_contains
#' @family collection_funcs
#' @export
#' @examples \dontrun{array_contains(df$c, 1)}
setMethod("array_contains",
signature(x = "Column", value = "ANY"),
function(x, value) {
jc <- callJStatic("org.apache.spark.sql.functions", "array_contains", x@jc, value)
column(jc)
})
#' explode
#'
#' Creates a new row for each element in the given array or map column.
#'
#' @rdname explode
#' @name explode
#' @family collection_funcs
#' @export
#' @examples \dontrun{explode(df$c)}
setMethod("explode",
signature(x = "Column"),
function(x) {
jc <- callJStatic("org.apache.spark.sql.functions", "explode", x@jc)
column(jc)
})
#' size
#'
#' Returns length of array or map.
#'
#' @rdname size
#' @name size
#' @family collection_funcs
#' @export
#' @examples \dontrun{size(df$c)}
setMethod("size",
signature(x = "Column"),
function(x) {
jc <- callJStatic("org.apache.spark.sql.functions", "size", x@jc)
column(jc)
})
#' sort_array
#'
#' Sorts the input array for the given column in ascending order,
#' according to the natural ordering of the array elements.
#'
#' @param x A Column to sort
#' @param asc A logical flag indicating the sorting order.
#' TRUE, sorting is in ascending order.
#' FALSE, sorting is in descending order.
#' @rdname sort_array
#' @name sort_array
#' @family collection_funcs
#' @export
#' @examples
#' \dontrun{
#' sort_array(df$c)
#' sort_array(df$c, FALSE)
#' }
setMethod("sort_array",
signature(x = "Column"),
function(x, asc = TRUE) {
jc <- callJStatic("org.apache.spark.sql.functions", "sort_array", x@jc, asc)
column(jc)
})
......@@ -644,6 +644,10 @@ setGeneric("add_months", function(y, x) { standardGeneric("add_months") })
#' @export
setGeneric("approxCountDistinct", function(x, ...) { standardGeneric("approxCountDistinct") })
#' @rdname array_contains
#' @export
setGeneric("array_contains", function(x, value) { standardGeneric("array_contains") })
#' @rdname ascii
#' @export
setGeneric("ascii", function(x) { standardGeneric("ascii") })
......@@ -961,6 +965,10 @@ setGeneric("size", function(x) { standardGeneric("size") })
#' @export
setGeneric("skewness", function(x) { standardGeneric("skewness") })
#' @rdname sort_array
#' @export
setGeneric("sort_array", function(x, asc = TRUE) { standardGeneric("sort_array") })
#' @rdname soundex
#' @export
setGeneric("soundex", function(x) { standardGeneric("soundex") })
......@@ -1076,4 +1084,4 @@ setGeneric("with")
#' @rdname coltypes
#' @export
setGeneric("coltypes", function(x) { standardGeneric("coltypes") })
\ No newline at end of file
setGeneric("coltypes", function(x) { standardGeneric("coltypes") })
......@@ -635,4 +635,4 @@ assignNewEnv <- function(data) {
assign(x = cols[i], value = data[, cols[i]], envir = env)
}
env
}
\ No newline at end of file
}
......@@ -878,6 +878,16 @@ test_that("column functions", {
df4 <- createDataFrame(sqlContext, list(list(a = "010101")))
expect_equal(collect(select(df4, conv(df4$a, 2, 16)))[1, 1], "15")
# Test array_contains() and sort_array()
df <- createDataFrame(sqlContext, list(list(list(1L, 2L, 3L)), list(list(6L, 5L, 4L))))
result <- collect(select(df, array_contains(df[[1]], 1L)))[[1]]
expect_equal(result, c(TRUE, FALSE))
result <- collect(select(df, sort_array(df[[1]], FALSE)))[[1]]
expect_equal(result, list(list(3L, 2L, 1L), list(6L, 5L, 4L)))
result <- collect(select(df, sort_array(df[[1]])))[[1]]
expect_equal(result, list(list(1L, 2L, 3L), list(4L, 5L, 6L)))
})
#
test_that("column binary mathfunctions", {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment