diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 81e19364ae7ea1f19f85322a795b3e2956e87389..871f8e41a0f23d6d392f4c19c5eabea88bb11a5d 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -229,6 +229,7 @@ exportMethods("%in%", "floor", "format_number", "format_string", + "from_json", "from_unixtime", "from_utc_timestamp", "getField", @@ -327,6 +328,7 @@ exportMethods("%in%", "toDegrees", "toRadians", "to_date", + "to_json", "to_timestamp", "to_utc_timestamp", "translate", diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 9e5084481fcde7ddf453056c97a023295c41f004..edf2bcf8fdb3c4f80c58d26cce81a786e2d532df 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -1793,6 +1793,33 @@ setMethod("to_date", column(jc) }) +#' to_json +#' +#' Converts a column containing a \code{structType} into a Column of JSON string. +#' Resolving the Column can fail if an unsupported type is encountered. +#' +#' @param x Column containing the struct +#' @param ... additional named properties to control how it is converted, accepts the same options +#' as the JSON data source. +#' +#' @family normal_funcs +#' @rdname to_json +#' @name to_json +#' @aliases to_json,Column-method +#' @export +#' @examples +#' \dontrun{ +#' to_json(df$t, dateFormat = 'dd/MM/yyyy') +#' select(df, to_json(df$t)) +#'} +#' @note to_json since 2.2.0 +setMethod("to_json", signature(x = "Column"), + function(x, ...) { + options <- varargsToStrEnv(...) + jc <- callJStatic("org.apache.spark.sql.functions", "to_json", x@jc, options) + column(jc) + }) + #' to_timestamp #' #' Converts the column into a TimestampType. You may optionally specify a format @@ -2403,6 +2430,36 @@ setMethod("date_format", signature(y = "Column", x = "character"), column(jc) }) +#' from_json +#' +#' Parses a column containing a JSON string into a Column of \code{structType} with the specified +#' \code{schema}. If the string is unparseable, the Column will contains the value NA. +#' +#' @param x Column containing the JSON string. +#' @param schema a structType object to use as the schema to use when parsing the JSON string. +#' @param ... additional named properties to control how the json is parsed, accepts the same +#' options as the JSON data source. +#' +#' @family normal_funcs +#' @rdname from_json +#' @name from_json +#' @aliases from_json,Column,structType-method +#' @export +#' @examples +#' \dontrun{ +#' schema <- structType(structField("name", "string"), +#' select(df, from_json(df$value, schema, dateFormat = "dd/MM/yyyy")) +#'} +#' @note from_json since 2.2.0 +setMethod("from_json", signature(x = "Column", schema = "structType"), + function(x, schema, ...) { + options <- varargsToStrEnv(...) + jc <- callJStatic("org.apache.spark.sql.functions", + "from_json", + x@jc, schema$jobj, options) + column(jc) + }) + #' from_utc_timestamp #' #' Given a timestamp, which corresponds to a certain time of day in UTC, returns another timestamp diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 647cbbdd825e357f33567d447bcc8be6e4eb013e..45bc12746511c76fcfd68b0fcca35ff17c81bfeb 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -991,6 +991,10 @@ setGeneric("format_number", function(y, x) { standardGeneric("format_number") }) #' @export setGeneric("format_string", function(format, x, ...) { standardGeneric("format_string") }) +#' @rdname from_json +#' @export +setGeneric("from_json", function(x, schema, ...) { standardGeneric("from_json") }) + #' @rdname from_unixtime #' @export setGeneric("from_unixtime", function(x, ...) { standardGeneric("from_unixtime") }) @@ -1265,6 +1269,10 @@ setGeneric("toRadians", function(x) { standardGeneric("toRadians") }) #' @export setGeneric("to_date", function(x, format) { standardGeneric("to_date") }) +#' @rdname to_json +#' @export +setGeneric("to_json", function(x, ...) { standardGeneric("to_json") }) + #' @rdname to_timestamp #' @export setGeneric("to_timestamp", function(x, format) { standardGeneric("to_timestamp") }) diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 1dd8c5ce6cb3230a2b8e7c9bff85711f5c5bfd05..7c096597fea661458e1d7402e03a0ebc276c76f8 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -88,6 +88,13 @@ mockLinesComplexType <- complexTypeJsonPath <- tempfile(pattern = "sparkr-test", fileext = ".tmp") writeLines(mockLinesComplexType, complexTypeJsonPath) +# For test map type and struct type in DataFrame +mockLinesMapType <- c("{\"name\":\"Bob\",\"info\":{\"age\":16,\"height\":176.5}}", + "{\"name\":\"Alice\",\"info\":{\"age\":20,\"height\":164.3}}", + "{\"name\":\"David\",\"info\":{\"age\":60,\"height\":180}}") +mapTypeJsonPath <- tempfile(pattern = "sparkr-test", fileext = ".tmp") +writeLines(mockLinesMapType, mapTypeJsonPath) + test_that("calling sparkRSQL.init returns existing SQL context", { sqlContext <- suppressWarnings(sparkRSQL.init(sc)) expect_equal(suppressWarnings(sparkRSQL.init(sc)), sqlContext) @@ -466,13 +473,6 @@ test_that("create DataFrame from a data.frame with complex types", { expect_equal(ldf$an_envir, collected$an_envir) }) -# For test map type and struct type in DataFrame -mockLinesMapType <- c("{\"name\":\"Bob\",\"info\":{\"age\":16,\"height\":176.5}}", - "{\"name\":\"Alice\",\"info\":{\"age\":20,\"height\":164.3}}", - "{\"name\":\"David\",\"info\":{\"age\":60,\"height\":180}}") -mapTypeJsonPath <- tempfile(pattern = "sparkr-test", fileext = ".tmp") -writeLines(mockLinesMapType, mapTypeJsonPath) - test_that("Collect DataFrame with complex types", { # ArrayType df <- read.json(complexTypeJsonPath) @@ -1337,6 +1337,33 @@ test_that("column functions", { df <- createDataFrame(data.frame(x = c(2.5, 3.5))) expect_equal(collect(select(df, bround(df$x, 0)))[[1]][1], 2) expect_equal(collect(select(df, bround(df$x, 0)))[[1]][2], 4) + + # Test to_json(), from_json() + df <- read.json(mapTypeJsonPath) + j <- collect(select(df, alias(to_json(df$info), "json"))) + expect_equal(j[order(j$json), ][1], "{\"age\":16,\"height\":176.5}") + df <- as.DataFrame(j) + schema <- structType(structField("age", "integer"), + structField("height", "double")) + s <- collect(select(df, alias(from_json(df$json, schema), "structcol"))) + expect_equal(ncol(s), 1) + expect_equal(nrow(s), 3) + expect_is(s[[1]][[1]], "struct") + expect_true(any(apply(s, 1, function(x) { x[[1]]$age == 16 } ))) + + # passing option + df <- as.DataFrame(list(list("col" = "{\"date\":\"21/10/2014\"}"))) + schema2 <- structType(structField("date", "date")) + expect_error(tryCatch(collect(select(df, from_json(df$col, schema2))), + error = function(e) { stop(e) }), + paste0(".*(java.lang.NumberFormatException: For input string:).*")) + s <- collect(select(df, from_json(df$col, schema2, dateFormat = "dd/MM/yyyy"))) + expect_is(s[[1]][[1]]$date, "Date") + expect_equal(as.character(s[[1]][[1]]$date), "2014-10-21") + + # check for unparseable + df <- as.DataFrame(list(list("a" = ""))) + expect_equal(collect(select(df, from_json(df$a, schema)))[[1]][[1]], NA) }) test_that("column binary mathfunctions", { @@ -2867,5 +2894,7 @@ unlink(parquetPath) unlink(orcPath) unlink(jsonPath) unlink(jsonPathNa) +unlink(complexTypeJsonPath) +unlink(mapTypeJsonPath) sparkR.session.stop()