diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 95d949ee3e5a4617a4f1a373ffb2ed6225cf2594..41986a5e7ab7da819b07422719f1b601e16883f4 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -228,7 +228,8 @@ exportMethods("agg") export("sparkRSQL.init", "sparkRHive.init") -export("cacheTable", +export("as.DataFrame", + "cacheTable", "clearCache", "createDataFrame", "createExternalTable", diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index 66c7e307212c3bb4ac77027287083392d12c050c..399f53657a68cf5baffce063dca1b4a940a734e1 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -64,21 +64,23 @@ infer_type <- function(x) { } } -#' Create a DataFrame from an RDD +#' Create a DataFrame #' -#' Converts an RDD to a DataFrame by infer the types. +#' Converts R data.frame or list into DataFrame. #' #' @param sqlContext A SQLContext #' @param data An RDD or list or data.frame #' @param schema a list of column names or named list (StructType), optional #' @return an DataFrame +#' @rdname createDataFrame #' @export #' @examples #'\dontrun{ #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) -#' rdd <- lapply(parallelize(sc, 1:10), function(x) list(a=x, b=as.character(x))) -#' df <- createDataFrame(sqlContext, rdd) +#' df1 <- as.DataFrame(sqlContext, iris) +#' df2 <- as.DataFrame(sqlContext, list(3,4,5,6)) +#' df3 <- createDataFrame(sqlContext, iris) #' } # TODO(davies): support sampling and infer type from NA @@ -151,6 +153,13 @@ createDataFrame <- function(sqlContext, data, schema = NULL, samplingRatio = 1.0 dataFrame(sdf) } +#' @rdname createDataFrame +#' @aliases createDataFrame +#' @export +as.DataFrame <- function(sqlContext, data, schema = NULL, samplingRatio = 1.0) { + createDataFrame(sqlContext, data, schema, samplingRatio) +} + # toDF # # Converts an RDD to a DataFrame by infer the types. diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index af6efa40fb2f621f1a355960bfe7da044f935497..b599994854670c2afacd2536adb71327fe1d27d1 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -89,17 +89,28 @@ test_that("structType and structField", { test_that("create DataFrame from RDD", { rdd <- lapply(parallelize(sc, 1:10), function(x) { list(x, as.character(x)) }) df <- createDataFrame(sqlContext, rdd, list("a", "b")) + dfAsDF <- as.DataFrame(sqlContext, rdd, list("a", "b")) expect_is(df, "DataFrame") + expect_is(dfAsDF, "DataFrame") expect_equal(count(df), 10) + expect_equal(count(dfAsDF), 10) expect_equal(nrow(df), 10) + expect_equal(nrow(dfAsDF), 10) expect_equal(ncol(df), 2) + expect_equal(ncol(dfAsDF), 2) expect_equal(dim(df), c(10, 2)) + expect_equal(dim(dfAsDF), c(10, 2)) expect_equal(columns(df), c("a", "b")) + expect_equal(columns(dfAsDF), c("a", "b")) expect_equal(dtypes(df), list(c("a", "int"), c("b", "string"))) + expect_equal(dtypes(dfAsDF), list(c("a", "int"), c("b", "string"))) df <- createDataFrame(sqlContext, rdd) + dfAsDF <- as.DataFrame(sqlContext, rdd) expect_is(df, "DataFrame") + expect_is(dfAsDF, "DataFrame") expect_equal(columns(df), c("_1", "_2")) + expect_equal(columns(dfAsDF), c("_1", "_2")) schema <- structType(structField(x = "a", type = "integer", nullable = TRUE), structField(x = "b", type = "string", nullable = TRUE)) @@ -130,9 +141,13 @@ test_that("create DataFrame from RDD", { schema <- structType(structField("name", "string"), structField("age", "integer"), structField("height", "float")) df2 <- createDataFrame(sqlContext, df.toRDD, schema) + df2AsDF <- as.DataFrame(sqlContext, df.toRDD, schema) expect_equal(columns(df2), c("name", "age", "height")) + expect_equal(columns(df2AsDF), c("name", "age", "height")) expect_equal(dtypes(df2), list(c("name", "string"), c("age", "int"), c("height", "float"))) + expect_equal(dtypes(df2AsDF), list(c("name", "string"), c("age", "int"), c("height", "float"))) expect_equal(collect(where(df2, df2$name == "Bob")), c("Bob", 16, 176.5)) + expect_equal(collect(where(df2AsDF, df2$name == "Bob")), c("Bob", 16, 176.5)) localDF <- data.frame(name=c("John", "Smith", "Sarah"), age=c(19, 23, 18),