From c82883239eadc4615a3aba907cd4633cb7aed26e Mon Sep 17 00:00:00 2001 From: felixcheung <felixcheung_m@hotmail.com> Date: Thu, 26 May 2016 21:42:36 -0700 Subject: [PATCH] [SPARK-10903] followup - update API doc for SqlContext ## What changes were proposed in this pull request? Follow up on the earlier PR - in here we are fixing up roxygen2 doc examples. Also add to the programming guide migration section. ## How was this patch tested? SparkR tests Author: felixcheung <felixcheung_m@hotmail.com> Closes #13340 from felixcheung/sqlcontextdoc. --- R/pkg/R/DataFrame.R | 141 ++++++++++++++++++++++---------------------- R/pkg/R/jobj.R | 3 +- R/pkg/R/mllib.R | 10 ++-- R/pkg/R/stats.R | 12 ++-- docs/sparkr.md | 1 + 5 files changed, 82 insertions(+), 85 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index d54ee54cd8..30a567523f 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -39,7 +39,7 @@ setOldClass("structType") #'\dontrun{ #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) -#' df <- createDataFrame(sqlContext, faithful) +#' df <- createDataFrame(faithful) #'} setClass("SparkDataFrame", slots = list(env = "environment", @@ -78,7 +78,7 @@ dataFrame <- function(sdf, isCached = FALSE) { #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' printSchema(df) #'} setMethod("printSchema", @@ -103,7 +103,7 @@ setMethod("printSchema", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' dfSchema <- schema(df) #'} setMethod("schema", @@ -127,7 +127,7 @@ setMethod("schema", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' explain(df, TRUE) #'} setMethod("explain", @@ -158,7 +158,7 @@ setMethod("explain", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' isLocal(df) #'} setMethod("isLocal", @@ -183,7 +183,7 @@ setMethod("isLocal", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' showDF(df) #'} setMethod("showDF", @@ -208,7 +208,7 @@ setMethod("showDF", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' df #'} setMethod("show", "SparkDataFrame", @@ -235,7 +235,7 @@ setMethod("show", "SparkDataFrame", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' dtypes(df) #'} setMethod("dtypes", @@ -262,7 +262,7 @@ setMethod("dtypes", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' columns(df) #' colnames(df) #'} @@ -342,7 +342,7 @@ setMethod("colnames<-", #' @export #' @examples #'\dontrun{ -#' irisDF <- createDataFrame(sqlContext, iris) +#' irisDF <- createDataFrame(iris) #' coltypes(irisDF) #'} setMethod("coltypes", @@ -397,7 +397,7 @@ setMethod("coltypes", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' coltypes(df) <- c("character", "integer") #' coltypes(df) <- c(NA, "numeric") #'} @@ -444,9 +444,9 @@ setMethod("coltypes<-", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' registerTempTable(df, "json_df") -#' new_df <- sql(sqlContext, "SELECT * FROM json_df") +#' new_df <- sql("SELECT * FROM json_df") #'} setMethod("registerTempTable", signature(x = "SparkDataFrame", tableName = "character"), @@ -471,8 +471,8 @@ setMethod("registerTempTable", #'\dontrun{ #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) -#' df <- read.df(sqlContext, path, "parquet") -#' df2 <- read.df(sqlContext, path2, "parquet") +#' df <- read.df(path, "parquet") +#' df2 <- read.df(path2, "parquet") #' registerTempTable(df, "table1") #' insertInto(df2, "table1", overwrite = TRUE) #'} @@ -500,7 +500,7 @@ setMethod("insertInto", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' cache(df) #'} setMethod("cache", @@ -528,7 +528,7 @@ setMethod("cache", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' persist(df, "MEMORY_AND_DISK") #'} setMethod("persist", @@ -556,7 +556,7 @@ setMethod("persist", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' persist(df, "MEMORY_AND_DISK") #' unpersist(df) #'} @@ -591,7 +591,7 @@ setMethod("unpersist", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' newDF <- repartition(df, 2L) #' newDF <- repartition(df, numPartitions = 2L) #' newDF <- repartition(df, col = df$"col1", df$"col2") @@ -636,7 +636,7 @@ setMethod("repartition", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' newRDD <- toJSON(df) #'} setMethod("toJSON", @@ -664,7 +664,7 @@ setMethod("toJSON", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' write.json(df, "/tmp/sparkr-tmp/") #'} setMethod("write.json", @@ -691,7 +691,7 @@ setMethod("write.json", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' write.parquet(df, "/tmp/sparkr-tmp1/") #' saveAsParquetFile(df, "/tmp/sparkr-tmp2/") #'} @@ -730,7 +730,7 @@ setMethod("saveAsParquetFile", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.txt" -#' df <- read.text(sqlContext, path) +#' df <- read.text(path) #' write.text(df, "/tmp/sparkr-tmp/") #'} setMethod("write.text", @@ -755,7 +755,7 @@ setMethod("write.text", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' distinctDF <- distinct(df) #'} setMethod("distinct", @@ -791,7 +791,7 @@ setMethod("unique", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' collect(sample(df, FALSE, 0.5)) #' collect(sample(df, TRUE, 0.5)) #'} @@ -834,7 +834,7 @@ setMethod("sample_frac", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' count(df) #' } setMethod("count", @@ -864,7 +864,7 @@ setMethod("nrow", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' ncol(df) #' } setMethod("ncol", @@ -885,7 +885,7 @@ setMethod("ncol", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' dim(df) #' } setMethod("dim", @@ -909,7 +909,7 @@ setMethod("dim", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' collected <- collect(df) #' firstName <- collected[[1]]$name #' } @@ -981,7 +981,7 @@ setMethod("collect", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' limitedDF <- limit(df, 10) #' } setMethod("limit", @@ -1002,7 +1002,7 @@ setMethod("limit", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' take(df, 2) #' } setMethod("take", @@ -1031,7 +1031,7 @@ setMethod("take", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' head(df) #' } setMethod("head", @@ -1054,7 +1054,7 @@ setMethod("head", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' first(df) #' } setMethod("first", @@ -1075,7 +1075,7 @@ setMethod("first", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' rdd <- toRDD(df) #'} setMethod("toRDD", @@ -1188,13 +1188,12 @@ dapplyInternal <- function(x, func, schema) { #' @export #' @examples #' \dontrun{ -#' df <- createDataFrame (sqlContext, iris) +#' df <- createDataFrame(iris) #' df1 <- dapply(df, function(x) { x }, schema(df)) #' collect(df1) #' #' # filter and add a column -#' df <- createDataFrame ( -#' sqlContext, +#' df <- createDataFrame( #' list(list(1L, 1, "1"), list(2L, 2, "2"), list(3L, 3, "3")), #' c("a", "b", "c")) #' schema <- structType(structField("a", "integer"), structField("b", "double"), @@ -1234,12 +1233,11 @@ setMethod("dapply", #' @export #' @examples #' \dontrun{ -#' df <- createDataFrame (sqlContext, iris) +#' df <- createDataFrame(iris) #' ldf <- dapplyCollect(df, function(x) { x }) #' #' # filter and add a column -#' df <- createDataFrame ( -#' sqlContext, +#' df <- createDataFrame( #' list(list(1L, 1, "1"), list(2L, 2, "2"), list(3L, 3, "3")), #' c("a", "b", "c")) #' ldf <- dapplyCollect( @@ -1533,7 +1531,7 @@ setMethod("select", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' selectExpr(df, "col1", "(col2 * 5) as newCol") #' } setMethod("selectExpr", @@ -1563,7 +1561,7 @@ setMethod("selectExpr", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' newDF <- withColumn(df, "newCol", df$col1 * 5) #' # Replace an existing column #' newDF2 <- withColumn(newDF, "newCol", newDF$col1) @@ -1592,13 +1590,12 @@ setMethod("withColumn", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' newDF <- mutate(df, newCol = df$col1 * 5, newCol2 = df$col1 * 2) #' names(newDF) # Will contain newCol, newCol2 #' newDF2 <- transform(df, newCol = df$col1 / 5, newCol2 = df$col1 * 2) #' -#' df <- createDataFrame(sqlContext, -#' list(list("Andy", 30L), list("Justin", 19L)), c("name", "age")) +#' df <- createDataFrame(list(list("Andy", 30L), list("Justin", 19L)), c("name", "age")) #' # Replace the "age" column #' df1 <- mutate(df, age = df$age + 1L) #' } @@ -1684,7 +1681,7 @@ setMethod("transform", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' newDF <- withColumnRenamed(df, "col1", "newCol1") #' } setMethod("withColumnRenamed", @@ -1709,7 +1706,7 @@ setMethod("withColumnRenamed", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' newDF <- rename(df, col1 = df$newCol1) #' } setMethod("rename", @@ -1753,7 +1750,7 @@ setClassUnion("characterOrColumn", c("character", "Column")) #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' arrange(df, df$col1) #' arrange(df, asc(df$col1), desc(abs(df$col2))) #' arrange(df, "col1", decreasing = TRUE) @@ -1829,7 +1826,7 @@ setMethod("orderBy", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' filter(df, "col1 > 0") #' filter(df, df$col2 != "abcdefg") #' } @@ -1869,7 +1866,7 @@ setMethod("where", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' dropDuplicates(df) #' dropDuplicates(df, c("col1", "col2")) #' } @@ -1903,8 +1900,8 @@ setMethod("dropDuplicates", #'\dontrun{ #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) -#' df1 <- read.json(sqlContext, path) -#' df2 <- read.json(sqlContext, path2) +#' df1 <- read.json(path) +#' df2 <- read.json(path2) #' join(df1, df2) # Performs a Cartesian #' join(df1, df2, df1$col1 == df2$col2) # Performs an inner join based on expression #' join(df1, df2, df1$col1 == df2$col2, "right_outer") @@ -1960,8 +1957,8 @@ setMethod("join", #'\dontrun{ #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) -#' df1 <- read.json(sqlContext, path) -#' df2 <- read.json(sqlContext, path2) +#' df1 <- read.json(path) +#' df2 <- read.json(path2) #' merge(df1, df2) # Performs a Cartesian #' merge(df1, df2, by = "col1") # Performs an inner join based on expression #' merge(df1, df2, by.x = "col1", by.y = "col2", all.y = TRUE) @@ -2093,8 +2090,8 @@ generateAliasesForIntersectedCols <- function (x, intersectedColNames, suffix) { #'\dontrun{ #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) -#' df1 <- read.json(sqlContext, path) -#' df2 <- read.json(sqlContext, path2) +#' df1 <- read.json(path) +#' df2 <- read.json(path2) #' unioned <- unionAll(df, df2) #' } setMethod("unionAll", @@ -2136,8 +2133,8 @@ setMethod("rbind", #'\dontrun{ #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) -#' df1 <- read.json(sqlContext, path) -#' df2 <- read.json(sqlContext, path2) +#' df1 <- read.json(path) +#' df2 <- read.json(path2) #' intersectDF <- intersect(df, df2) #' } setMethod("intersect", @@ -2163,8 +2160,8 @@ setMethod("intersect", #'\dontrun{ #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) -#' df1 <- read.json(sqlContext, path) -#' df2 <- read.json(sqlContext, path2) +#' df1 <- read.json(path) +#' df2 <- read.json(path2) #' exceptDF <- except(df, df2) #' } #' @rdname except @@ -2205,7 +2202,7 @@ setMethod("except", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' write.df(df, "myfile", "parquet", "overwrite") #' saveDF(df, parquetPath2, "parquet", mode = saveMode, mergeSchema = mergeSchema) #' } @@ -2268,7 +2265,7 @@ setMethod("saveDF", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' saveAsTable(df, "myfile") #' } setMethod("saveAsTable", @@ -2307,7 +2304,7 @@ setMethod("saveAsTable", #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- read.json(sqlContext, path) +#' df <- read.json(path) #' describe(df) #' describe(df, "col1") #' describe(df, "col1", "col2") @@ -2476,7 +2473,7 @@ setMethod("fillna", #' @rdname as.data.frame #' @examples \dontrun{ #' -#' irisDF <- createDataFrame(sqlContext, iris) +#' irisDF <- createDataFrame(iris) #' df <- as.data.frame(irisDF[irisDF$Species == "setosa", ]) #' } setMethod("as.data.frame", @@ -2543,7 +2540,7 @@ setMethod("with", #' @param object a SparkDataFrame #' @examples \dontrun{ #' # Create a SparkDataFrame from the Iris dataset -#' irisDF <- createDataFrame(sqlContext, iris) +#' irisDF <- createDataFrame(iris) #' #' # Show the structure of the SparkDataFrame #' str(irisDF) @@ -2651,7 +2648,7 @@ setMethod("drop", }) #' This function computes a histogram for a given SparkR Column. -#' +#' #' @name histogram #' @title Histogram #' @param nbins the number of bins (optional). Default value is 10. @@ -2661,12 +2658,12 @@ setMethod("drop", #' @rdname histogram #' @family SparkDataFrame functions #' @export -#' @examples +#' @examples #' \dontrun{ -#' +#' #' # Create a SparkDataFrame from the Iris dataset -#' irisDF <- createDataFrame(sqlContext, iris) -#' +#' irisDF <- createDataFrame(iris) +#' #' # Compute histogram statistics #' histStats <- histogram(irisDF, irisDF$Sepal_Length, nbins = 12) #' @@ -2676,8 +2673,8 @@ setMethod("drop", #' require(ggplot2) #' plot <- ggplot(histStats, aes(x = centroids, y = counts)) + #' geom_bar(stat = "identity") + -#' xlab("Sepal_Length") + ylab("Frequency") -#' } +#' xlab("Sepal_Length") + ylab("Frequency") +#' } setMethod("histogram", signature(df = "SparkDataFrame", col = "characterOrColumn"), function(df, col, nbins = 10) { diff --git a/R/pkg/R/jobj.R b/R/pkg/R/jobj.R index 898e80648f..5b9142bbd3 100644 --- a/R/pkg/R/jobj.R +++ b/R/pkg/R/jobj.R @@ -72,8 +72,7 @@ jobj <- function(objId) { #' @param x The JVM object reference #' @param ... further arguments passed to or from other methods print.jobj <- function(x, ...) { - cls <- callJMethod(x, "getClass") - name <- callJMethod(cls, "getName") + name <- getClassName.jobj(x) cat("Java ref type", name, "id", x$id, "\n", sep = " ") } diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R index f46681149d..d4152b43b6 100644 --- a/R/pkg/R/mllib.R +++ b/R/pkg/R/mllib.R @@ -66,7 +66,7 @@ setClass("KMeansModel", representation(jobj = "jobj")) #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' data(iris) -#' df <- createDataFrame(sqlContext, iris) +#' df <- createDataFrame(iris) #' model <- spark.glm(df, Sepal_Length ~ Sepal_Width, family="gaussian") #' summary(model) #' } @@ -114,7 +114,7 @@ setMethod( #' sc <- sparkR.init() #' sqlContext <- sparkRSQL.init(sc) #' data(iris) -#' df <- createDataFrame(sqlContext, iris) +#' df <- createDataFrame(iris) #' model <- glm(Sepal_Length ~ Sepal_Width, df, family="gaussian") #' summary(model) #' } @@ -391,7 +391,7 @@ setMethod("predict", signature(object = "KMeansModel"), #' @export #' @examples #' \dontrun{ -#' df <- createDataFrame(sqlContext, infert) +#' df <- createDataFrame(infert) #' model <- spark.naiveBayes(df, education ~ ., laplace = 0) #'} setMethod("spark.naiveBayes", signature(data = "SparkDataFrame", formula = "formula"), @@ -414,7 +414,7 @@ setMethod("spark.naiveBayes", signature(data = "SparkDataFrame", formula = "form #' @export #' @examples #' \dontrun{ -#' df <- createDataFrame(sqlContext, infert) +#' df <- createDataFrame(infert) #' model <- spark.naiveBayes(df, education ~ ., laplace = 0) #' path <- "path/to/model" #' write.ml(model, path) @@ -545,7 +545,7 @@ read.ml <- function(path) { #' @export #' @examples #' \dontrun{ -#' df <- createDataFrame(sqlContext, ovarian) +#' df <- createDataFrame(ovarian) #' model <- spark.survreg(df, Surv(futime, fustat) ~ ecog_ps + rx) #' } setMethod("spark.survreg", signature(data = "SparkDataFrame", formula = "formula"), diff --git a/R/pkg/R/stats.R b/R/pkg/R/stats.R index 879b664421..6b53517873 100644 --- a/R/pkg/R/stats.R +++ b/R/pkg/R/stats.R @@ -37,7 +37,7 @@ setOldClass("jobj") #' @export #' @examples #' \dontrun{ -#' df <- jsonFile(sqlContext, "/path/to/file.json") +#' df <- read.json("/path/to/file.json") #' ct <- crosstab(df, "title", "gender") #' } setMethod("crosstab", @@ -62,7 +62,7 @@ setMethod("crosstab", #' @export #' @examples #'\dontrun{ -#' df <- jsonFile(sqlContext, "/path/to/file.json") +#' df <- read.json("/path/to/file.json") #' cov <- cov(df, "title", "gender") #' } setMethod("cov", @@ -91,7 +91,7 @@ setMethod("cov", #' @export #' @examples #'\dontrun{ -#' df <- jsonFile(sqlContext, "/path/to/file.json") +#' df <- read.json("/path/to/file.json") #' corr <- corr(df, "title", "gender") #' corr <- corr(df, "title", "gender", method = "pearson") #' } @@ -120,7 +120,7 @@ setMethod("corr", #' @export #' @examples #' \dontrun{ -#' df <- jsonFile(sqlContext, "/path/to/file.json") +#' df <- read.json("/path/to/file.json") #' fi = freqItems(df, c("title", "gender")) #' } setMethod("freqItems", signature(x = "SparkDataFrame", cols = "character"), @@ -157,7 +157,7 @@ setMethod("freqItems", signature(x = "SparkDataFrame", cols = "character"), #' @export #' @examples #' \dontrun{ -#' df <- jsonFile(sqlContext, "/path/to/file.json") +#' df <- read.json("/path/to/file.json") #' quantiles <- approxQuantile(df, "key", c(0.5, 0.8), 0.0) #' } setMethod("approxQuantile", @@ -185,7 +185,7 @@ setMethod("approxQuantile", #' @export #' @examples #'\dontrun{ -#' df <- jsonFile(sqlContext, "/path/to/file.json") +#' df <- read.json("/path/to/file.json") #' sample <- sampleBy(df, "key", fractions, 36) #' } setMethod("sampleBy", diff --git a/docs/sparkr.md b/docs/sparkr.md index 6b2ca6d6a5..59e486d192 100644 --- a/docs/sparkr.md +++ b/docs/sparkr.md @@ -392,3 +392,4 @@ You can inspect the search path in R with [`search()`](https://stat.ethz.ch/R-ma - The method `table` has been removed and replaced by `tableToDF`. - The class `DataFrame` has been renamed to `SparkDataFrame` to avoid name conflicts. + - The `sqlContext` parameter is no longer required for these functions: `createDataFrame`, `as.DataFrame`, `read.json`, `jsonFile`, `read.parquet`, `parquetFile`, `read.text`, `sql`, `tables`, `tableNames`, `cacheTable`, `uncacheTable`, `clearCache`, `dropTempTable`, `read.df`, `loadDF`, `createExternalTable` -- GitLab