From 1e0aba90b9e73834af70d196f7f869b062d98d94 Mon Sep 17 00:00:00 2001
From: Narine Kokhlikyan <narine.kokhlikyan@gmail.com>
Date: Tue, 13 Oct 2015 10:09:05 -0700
Subject: [PATCH] [SPARK-10888] [SPARKR] Added as.DataFrame as a synonym to
 createDataFrame

as.DataFrame is more a R-style like signature.
Also, I'd like to know if we could make the context, e.g. sqlContext global, so that we do not have to specify it as an argument, when we each time create a dataframe.

Author: Narine Kokhlikyan <narine.kokhlikyan@gmail.com>

Closes #8952 from NarineK/sparkrasDataFrame.
---
 R/pkg/NAMESPACE                  |  3 ++-
 R/pkg/R/SQLContext.R             | 17 +++++++++++++----
 R/pkg/inst/tests/test_sparkSQL.R | 15 +++++++++++++++
 3 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 95d949ee3e..41986a5e7a 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -228,7 +228,8 @@ exportMethods("agg")
 export("sparkRSQL.init",
        "sparkRHive.init")
 
-export("cacheTable",
+export("as.DataFrame",
+       "cacheTable",
        "clearCache",
        "createDataFrame",
        "createExternalTable",
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index 66c7e30721..399f53657a 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -64,21 +64,23 @@ infer_type <- function(x) {
   }
 }
 
-#' Create a DataFrame from an RDD
+#' Create a DataFrame
 #'
-#' Converts an RDD to a DataFrame by infer the types.
+#' Converts R data.frame or list into DataFrame.
 #'
 #' @param sqlContext A SQLContext
 #' @param data An RDD or list or data.frame
 #' @param schema a list of column names or named list (StructType), optional
 #' @return an DataFrame
+#' @rdname createDataFrame
 #' @export
 #' @examples
 #'\dontrun{
 #' sc <- sparkR.init()
 #' sqlContext <- sparkRSQL.init(sc)
-#' rdd <- lapply(parallelize(sc, 1:10), function(x) list(a=x, b=as.character(x)))
-#' df <- createDataFrame(sqlContext, rdd)
+#' df1 <- as.DataFrame(sqlContext, iris)
+#' df2 <- as.DataFrame(sqlContext, list(3,4,5,6))
+#' df3 <- createDataFrame(sqlContext, iris)
 #' }
 
 # TODO(davies): support sampling and infer type from NA
@@ -151,6 +153,13 @@ createDataFrame <- function(sqlContext, data, schema = NULL, samplingRatio = 1.0
   dataFrame(sdf)
 }
 
+#' @rdname createDataFrame
+#' @aliases createDataFrame
+#' @export
+as.DataFrame <- function(sqlContext, data, schema = NULL, samplingRatio = 1.0) {
+  createDataFrame(sqlContext, data, schema, samplingRatio)
+}
+
 # toDF
 #
 # Converts an RDD to a DataFrame by infer the types.
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index af6efa40fb..b599994854 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -89,17 +89,28 @@ test_that("structType and structField", {
 test_that("create DataFrame from RDD", {
   rdd <- lapply(parallelize(sc, 1:10), function(x) { list(x, as.character(x)) })
   df <- createDataFrame(sqlContext, rdd, list("a", "b"))
+  dfAsDF <- as.DataFrame(sqlContext, rdd, list("a", "b"))
   expect_is(df, "DataFrame")
+  expect_is(dfAsDF, "DataFrame")
   expect_equal(count(df), 10)
+  expect_equal(count(dfAsDF), 10)
   expect_equal(nrow(df), 10)
+  expect_equal(nrow(dfAsDF), 10)
   expect_equal(ncol(df), 2)
+  expect_equal(ncol(dfAsDF), 2)
   expect_equal(dim(df), c(10, 2))
+  expect_equal(dim(dfAsDF), c(10, 2))
   expect_equal(columns(df), c("a", "b"))
+  expect_equal(columns(dfAsDF), c("a", "b"))
   expect_equal(dtypes(df), list(c("a", "int"), c("b", "string")))
+  expect_equal(dtypes(dfAsDF), list(c("a", "int"), c("b", "string")))
 
   df <- createDataFrame(sqlContext, rdd)
+  dfAsDF <- as.DataFrame(sqlContext, rdd)
   expect_is(df, "DataFrame")
+  expect_is(dfAsDF, "DataFrame")
   expect_equal(columns(df), c("_1", "_2"))
+  expect_equal(columns(dfAsDF), c("_1", "_2"))
 
   schema <- structType(structField(x = "a", type = "integer", nullable = TRUE),
                         structField(x = "b", type = "string", nullable = TRUE))
@@ -130,9 +141,13 @@ test_that("create DataFrame from RDD", {
   schema <- structType(structField("name", "string"), structField("age", "integer"),
                        structField("height", "float"))
   df2 <- createDataFrame(sqlContext, df.toRDD, schema)
+  df2AsDF <- as.DataFrame(sqlContext, df.toRDD, schema)
   expect_equal(columns(df2), c("name", "age", "height"))
+  expect_equal(columns(df2AsDF), c("name", "age", "height"))
   expect_equal(dtypes(df2), list(c("name", "string"), c("age", "int"), c("height", "float")))
+  expect_equal(dtypes(df2AsDF), list(c("name", "string"), c("age", "int"), c("height", "float")))
   expect_equal(collect(where(df2, df2$name == "Bob")), c("Bob", 16, 176.5))
+  expect_equal(collect(where(df2AsDF, df2$name == "Bob")), c("Bob", 16, 176.5))
 
   localDF <- data.frame(name=c("John", "Smith", "Sarah"),
                         age=c(19, 23, 18),
-- 
GitLab