Skip to content
Snippets Groups Projects
Commit 1e0aba90 authored by Narine Kokhlikyan's avatar Narine Kokhlikyan Committed by Shivaram Venkataraman
Browse files

[SPARK-10888] [SPARKR] Added as.DataFrame as a synonym to createDataFrame

as.DataFrame is more a R-style like signature.
Also, I'd like to know if we could make the context, e.g. sqlContext global, so that we do not have to specify it as an argument, when we each time create a dataframe.

Author: Narine Kokhlikyan <narine.kokhlikyan@gmail.com>

Closes #8952 from NarineK/sparkrasDataFrame.
parent 5e3868ba
No related branches found
No related tags found
No related merge requests found
......@@ -228,7 +228,8 @@ exportMethods("agg")
export("sparkRSQL.init",
"sparkRHive.init")
export("cacheTable",
export("as.DataFrame",
"cacheTable",
"clearCache",
"createDataFrame",
"createExternalTable",
......
......@@ -64,21 +64,23 @@ infer_type <- function(x) {
}
}
#' Create a DataFrame from an RDD
#' Create a DataFrame
#'
#' Converts an RDD to a DataFrame by infer the types.
#' Converts R data.frame or list into DataFrame.
#'
#' @param sqlContext A SQLContext
#' @param data An RDD or list or data.frame
#' @param schema a list of column names or named list (StructType), optional
#' @return an DataFrame
#' @rdname createDataFrame
#' @export
#' @examples
#'\dontrun{
#' sc <- sparkR.init()
#' sqlContext <- sparkRSQL.init(sc)
#' rdd <- lapply(parallelize(sc, 1:10), function(x) list(a=x, b=as.character(x)))
#' df <- createDataFrame(sqlContext, rdd)
#' df1 <- as.DataFrame(sqlContext, iris)
#' df2 <- as.DataFrame(sqlContext, list(3,4,5,6))
#' df3 <- createDataFrame(sqlContext, iris)
#' }
# TODO(davies): support sampling and infer type from NA
......@@ -151,6 +153,13 @@ createDataFrame <- function(sqlContext, data, schema = NULL, samplingRatio = 1.0
dataFrame(sdf)
}
#' @rdname createDataFrame
#' @aliases createDataFrame
#' @export
as.DataFrame <- function(sqlContext, data, schema = NULL, samplingRatio = 1.0) {
createDataFrame(sqlContext, data, schema, samplingRatio)
}
# toDF
#
# Converts an RDD to a DataFrame by infer the types.
......
......@@ -89,17 +89,28 @@ test_that("structType and structField", {
test_that("create DataFrame from RDD", {
rdd <- lapply(parallelize(sc, 1:10), function(x) { list(x, as.character(x)) })
df <- createDataFrame(sqlContext, rdd, list("a", "b"))
dfAsDF <- as.DataFrame(sqlContext, rdd, list("a", "b"))
expect_is(df, "DataFrame")
expect_is(dfAsDF, "DataFrame")
expect_equal(count(df), 10)
expect_equal(count(dfAsDF), 10)
expect_equal(nrow(df), 10)
expect_equal(nrow(dfAsDF), 10)
expect_equal(ncol(df), 2)
expect_equal(ncol(dfAsDF), 2)
expect_equal(dim(df), c(10, 2))
expect_equal(dim(dfAsDF), c(10, 2))
expect_equal(columns(df), c("a", "b"))
expect_equal(columns(dfAsDF), c("a", "b"))
expect_equal(dtypes(df), list(c("a", "int"), c("b", "string")))
expect_equal(dtypes(dfAsDF), list(c("a", "int"), c("b", "string")))
df <- createDataFrame(sqlContext, rdd)
dfAsDF <- as.DataFrame(sqlContext, rdd)
expect_is(df, "DataFrame")
expect_is(dfAsDF, "DataFrame")
expect_equal(columns(df), c("_1", "_2"))
expect_equal(columns(dfAsDF), c("_1", "_2"))
schema <- structType(structField(x = "a", type = "integer", nullable = TRUE),
structField(x = "b", type = "string", nullable = TRUE))
......@@ -130,9 +141,13 @@ test_that("create DataFrame from RDD", {
schema <- structType(structField("name", "string"), structField("age", "integer"),
structField("height", "float"))
df2 <- createDataFrame(sqlContext, df.toRDD, schema)
df2AsDF <- as.DataFrame(sqlContext, df.toRDD, schema)
expect_equal(columns(df2), c("name", "age", "height"))
expect_equal(columns(df2AsDF), c("name", "age", "height"))
expect_equal(dtypes(df2), list(c("name", "string"), c("age", "int"), c("height", "float")))
expect_equal(dtypes(df2AsDF), list(c("name", "string"), c("age", "int"), c("height", "float")))
expect_equal(collect(where(df2, df2$name == "Bob")), c("Bob", 16, 176.5))
expect_equal(collect(where(df2AsDF, df2$name == "Bob")), c("Bob", 16, 176.5))
localDF <- data.frame(name=c("John", "Smith", "Sarah"),
age=c(19, 23, 18),
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment