diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index b3f2dd82ff54993239885291f9553c68f1f65f0b..a8ade1ac9ab7292127855f81abe847ba4dd14e8e 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -463,6 +463,7 @@ setMethod("createOrReplaceTempView", }) #' (Deprecated) Register Temporary Table +#' #' Registers a SparkDataFrame as a Temporary Table in the SQLContext #' @param x A SparkDataFrame #' @param tableName A character vector containing the name of the table @@ -606,10 +607,10 @@ setMethod("unpersist", #' #' The following options for repartition are possible: #' \itemize{ -#' \item{"Option 1"} {Return a new SparkDataFrame partitioned by +#' \item{1.} {Return a new SparkDataFrame partitioned by #' the given columns into `numPartitions`.} -#' \item{"Option 2"} {Return a new SparkDataFrame that has exactly `numPartitions`.} -#' \item{"Option 3"} {Return a new SparkDataFrame partitioned by the given column(s), +#' \item{2.} {Return a new SparkDataFrame that has exactly `numPartitions`.} +#' \item{3.} {Return a new SparkDataFrame partitioned by the given column(s), #' using `spark.sql.shuffle.partitions` as number of partitions.} #'} #' @param x A SparkDataFrame @@ -1053,7 +1054,7 @@ setMethod("limit", dataFrame(res) }) -#' Take the first NUM rows of a SparkDataFrame and return a the results as a data.frame +#' Take the first NUM rows of a SparkDataFrame and return a the results as a R data.frame #' #' @family SparkDataFrame functions #' @rdname take @@ -1076,7 +1077,7 @@ setMethod("take", #' Head #' -#' Return the first NUM rows of a SparkDataFrame as a data.frame. If NUM is NULL, +#' Return the first NUM rows of a SparkDataFrame as a R data.frame. If NUM is NULL, #' then head() returns the first 6 rows in keeping with the current data.frame #' convention in R. #' @@ -1157,7 +1158,6 @@ setMethod("toRDD", #' #' @param x a SparkDataFrame #' @return a GroupedData -#' @seealso GroupedData #' @family SparkDataFrame functions #' @rdname groupBy #' @name groupBy @@ -1242,9 +1242,9 @@ dapplyInternal <- function(x, func, schema) { #' #' @param x A SparkDataFrame #' @param func A function to be applied to each partition of the SparkDataFrame. -#' func should have only one parameter, to which a data.frame corresponds +#' func should have only one parameter, to which a R data.frame corresponds #' to each partition will be passed. -#' The output of func should be a data.frame. +#' The output of func should be a R data.frame. #' @param schema The schema of the resulting SparkDataFrame after the function is applied. #' It must match the output of func. #' @family SparkDataFrame functions @@ -1291,9 +1291,9 @@ setMethod("dapply", #' #' @param x A SparkDataFrame #' @param func A function to be applied to each partition of the SparkDataFrame. -#' func should have only one parameter, to which a data.frame corresponds +#' func should have only one parameter, to which a R data.frame corresponds #' to each partition will be passed. -#' The output of func should be a data.frame. +#' The output of func should be a R data.frame. #' @family SparkDataFrame functions #' @rdname dapplyCollect #' @name dapplyCollect @@ -1641,7 +1641,6 @@ setMethod("select", signature(x = "SparkDataFrame", col = "character"), } }) -#' @family SparkDataFrame functions #' @rdname select #' @export #' @note select(SparkDataFrame, Column) since 1.4.0 @@ -1654,7 +1653,6 @@ setMethod("select", signature(x = "SparkDataFrame", col = "Column"), dataFrame(sdf) }) -#' @family SparkDataFrame functions #' @rdname select #' @export #' @note select(SparkDataFrame, list) since 1.4.0 @@ -2001,7 +1999,6 @@ setMethod("filter", dataFrame(sdf) }) -#' @family SparkDataFrame functions #' @rdname filter #' @name where #' @note where since 1.4.0 @@ -2222,11 +2219,13 @@ setMethod("merge", joinRes }) +#' Creates a list of columns by replacing the intersected ones with aliases +#' #' Creates a list of columns by replacing the intersected ones with aliases. #' The name of the alias column is formed by concatanating the original column name and a suffix. #' -#' @param x a SparkDataFrame on which the -#' @param intersectedColNames a list of intersected column names +#' @param x a SparkDataFrame +#' @param intersectedColNames a list of intersected column names of the SparkDataFrame #' @param suffix a suffix for the column name #' @return list of columns #' @@ -2513,9 +2512,9 @@ setMethod("summary", }) -#' dropna +#' A set of SparkDataFrame functions working with NA values #' -#' Returns a new SparkDataFrame omitting rows with null values. +#' dropna, na.omit - Returns a new SparkDataFrame omitting rows with null values. #' #' @param x A SparkDataFrame. #' @param how "any" or "all". @@ -2567,9 +2566,7 @@ setMethod("na.omit", dropna(object, how, minNonNulls, cols) }) -#' fillna -#' -#' Replace null values. +#' fillna - Replace null values. #' #' @param x A SparkDataFrame. #' @param value Value to replace null values with. @@ -2640,7 +2637,7 @@ setMethod("fillna", dataFrame(sdf) }) -#' Download data from a SparkDataFrame into a data.frame +#' Download data from a SparkDataFrame into a R data.frame #' #' This function downloads the contents of a SparkDataFrame into an R's data.frame. #' Since data.frames are held in memory, ensure that you have enough memory diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index 8d2c4ac7cee91e2b8f5c928f760237a1d8c90b24..ee3a41cacbee61a760c4a9309416266f54712893 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -67,7 +67,7 @@ dispatchFunc <- function(newFuncSig, x, ...) { } #' return the SparkSession -#' @note getSparkSession since 2.0.0 +#' @noRd getSparkSession <- function() { if (exists(".sparkRsession", envir = .sparkREnv)) { get(".sparkRsession", envir = .sparkREnv) @@ -77,7 +77,7 @@ getSparkSession <- function() { } #' infer the SQL type -#' @note infer_type since 1.4.0 +#' @noRd infer_type <- function(x) { if (is.null(x)) { stop("can not infer type from NULL") @@ -451,7 +451,7 @@ sql <- function(x, ...) { #' Create a SparkDataFrame from a SparkSQL Table #' #' Returns the specified Table as a SparkDataFrame. The Table must have already been registered -#' in the SQLContext. +#' in the SparkSession. #' #' @param tableName The SparkSQL Table to convert to a SparkDataFrame. #' @return SparkDataFrame diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index 1af65d5d6e15580401021c21f2c7497269ad856c..1a65912d3aed1e328961d0f55871d7558eef859b 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -34,6 +34,11 @@ setOldClass("jobj") setClass("Column", slots = list(jc = "jobj")) +#' A set of operations working with SparkDataFrame columns +#' @rdname columnfunctions +#' @name columnfunctions +NULL + setMethod("initialize", "Column", function(.Object, jc) { .Object@jc <- jc .Object @@ -47,6 +52,7 @@ setMethod("column", #' @rdname show #' @name show +#' @export #' @note show(Column) since 1.4.0 setMethod("show", "Column", function(object) { diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R index 42f89c806bfd9c78021cacb749944a107c6ab846..96ef9438ad5dc3b7c9dec9956c71fc64338e461b 100644 --- a/R/pkg/R/context.R +++ b/R/pkg/R/context.R @@ -225,9 +225,10 @@ setCheckpointDir <- function(sc, dirName) { invisible(callJMethod(sc, "setCheckpointDir", suppressWarnings(normalizePath(dirName)))) } -#' Run a function over a list of elements, distributing the computations with Spark. +#' Run a function over a list of elements, distributing the computations with Spark #' -#' Applies a function in a manner that is similar to doParallel or lapply to elements of a list. +#' Run a function over a list of elements, distributing the computations with Spark. Applies a +#' function in a manner that is similar to doParallel or lapply to elements of a list. #' The computations are distributed using Spark. It is conceptually the same as the following code: #' lapply(list, func) #' diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index ce2386998c1bc3066cf0a81c85d5f328a751b1fb..6e0009f7c90aff4f498029c01f41a485d2cf1083 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -77,13 +77,14 @@ setMethod("acos", column(jc) }) -#' approxCountDistinct +#' Returns the approximate number of distinct items in a group #' -#' Aggregate function: returns the approximate number of distinct items in a group. +#' Returns the approximate number of distinct items in a group. This is a column +#' aggregate function. #' #' @rdname approxCountDistinct #' @name approxCountDistinct -#' @family agg_funcs +#' @return the approximate number of distinct items in a group. #' @export #' @examples \dontrun{approxCountDistinct(df$c)} #' @note approxCountDistinct(Column) since 1.4.0 @@ -234,7 +235,7 @@ setMethod("cbrt", column(jc) }) -#' ceil +#' Computes the ceiling of the given value #' #' Computes the ceiling of the given value. #' @@ -254,15 +255,16 @@ setMethod("ceil", #' Though scala functions has "col" function, we don't expose it in SparkR #' because we don't want to conflict with the "col" function in the R base #' package and we also have "column" function exported which is an alias of "col". +#' @noRd col <- function(x) { column(callJStatic("org.apache.spark.sql.functions", "col", x)) } -#' column +#' Returns a Column based on the given column name #' #' Returns a Column based on the given column name. #' -#' @rdname col +#' @rdname column #' @name column #' @family normal_funcs #' @export @@ -385,9 +387,9 @@ setMethod("cosh", column(jc) }) -#' count +#' Returns the number of items in a group #' -#' Aggregate function: returns the number of items in a group. +#' Returns the number of items in a group. This is a column aggregate function. #' #' @rdname count #' @name count @@ -1193,7 +1195,7 @@ setMethod("sha1", #' #' Computes the signum of the given value. #' -#' @rdname signum +#' @rdname sign #' @name signum #' @family math_funcs #' @export @@ -1717,7 +1719,7 @@ setMethod("datediff", signature(y = "Column"), #' hypot #' -#' Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow. +#' Computes "sqrt(a^2 + b^2)" without intermediate overflow or underflow. #' #' @rdname hypot #' @name hypot @@ -1813,12 +1815,8 @@ setMethod("pmod", signature(y = "Column"), }) -#' Approx Count Distinct -#' -#' @family agg_funcs #' @rdname approxCountDistinct #' @name approxCountDistinct -#' @return the approximate number of distinct items in a group. #' @export #' @examples \dontrun{approxCountDistinct(df$c, 0.02)} #' @note approxCountDistinct(Column, numeric) since 1.4.0 @@ -1918,10 +1916,6 @@ setMethod("least", column(jc) }) -#' ceiling -#' -#' Computes the ceiling of the given value. -#' #' @rdname ceil #' @name ceiling #' @export @@ -1933,11 +1927,7 @@ setMethod("ceiling", ceil(x) }) -#' sign -#' -#' Computes the signum of the given value. -#' -#' @rdname signum +#' @rdname sign #' @name sign #' @export #' @examples \dontrun{sign(df$c)} @@ -1961,10 +1951,6 @@ setMethod("n_distinct", signature(x = "Column"), countDistinct(x, ...) }) -#' n -#' -#' Aggregate function: returns the number of items in a group. -#' #' @rdname count #' @name n #' @export diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index c307de7c07633deaee08a325a3c699df15ccf659..ead403be98002fd9b2e357f23f9d09c6cae84e1c 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -430,7 +430,7 @@ setGeneric("coltypes", function(x) { standardGeneric("coltypes") }) #' @export setGeneric("coltypes<-", function(x, value) { standardGeneric("coltypes<-") }) -#' @rdname schema +#' @rdname columns #' @export setGeneric("columns", function(x) {standardGeneric("columns") }) @@ -495,7 +495,7 @@ setGeneric("na.omit", standardGeneric("na.omit") }) -#' @rdname schema +#' @rdname dtypes #' @export setGeneric("dtypes", function(x) { standardGeneric("dtypes") }) @@ -551,7 +551,7 @@ setGeneric("mutate", function(.data, ...) {standardGeneric("mutate") }) #' @export setGeneric("orderBy", function(x, col, ...) { standardGeneric("orderBy") }) -#' @rdname schema +#' @rdname printSchema #' @export setGeneric("printSchema", function(x) { standardGeneric("printSchema") }) @@ -638,7 +638,7 @@ setGeneric("schema", function(x) { standardGeneric("schema") }) #' @export setGeneric("select", function(x, col, ...) { standardGeneric("select") } ) -#' @rdname select +#' @rdname selectExpr #' @export setGeneric("selectExpr", function(x, expr, ...) { standardGeneric("selectExpr") }) @@ -693,67 +693,67 @@ setGeneric("randomSplit", function(x, weights, seed) { standardGeneric("randomSp ###################### Column Methods ########################## -#' @rdname column +#' @rdname columnfunctions #' @export setGeneric("asc", function(x) { standardGeneric("asc") }) -#' @rdname column +#' @rdname between #' @export setGeneric("between", function(x, bounds) { standardGeneric("between") }) -#' @rdname column +#' @rdname cast #' @export setGeneric("cast", function(x, dataType) { standardGeneric("cast") }) -#' @rdname column +#' @rdname columnfunctions #' @export setGeneric("contains", function(x, ...) { standardGeneric("contains") }) -#' @rdname column +#' @rdname columnfunctions #' @export setGeneric("desc", function(x) { standardGeneric("desc") }) -#' @rdname column +#' @rdname endsWith #' @export setGeneric("endsWith", function(x, suffix) { standardGeneric("endsWith") }) -#' @rdname column +#' @rdname columnfunctions #' @export setGeneric("getField", function(x, ...) { standardGeneric("getField") }) -#' @rdname column +#' @rdname columnfunctions #' @export setGeneric("getItem", function(x, ...) { standardGeneric("getItem") }) -#' @rdname column +#' @rdname columnfunctions #' @export setGeneric("isNaN", function(x) { standardGeneric("isNaN") }) -#' @rdname column +#' @rdname columnfunctions #' @export setGeneric("isNull", function(x) { standardGeneric("isNull") }) -#' @rdname column +#' @rdname columnfunctions #' @export setGeneric("isNotNull", function(x) { standardGeneric("isNotNull") }) -#' @rdname column +#' @rdname columnfunctions #' @export setGeneric("like", function(x, ...) { standardGeneric("like") }) -#' @rdname column +#' @rdname columnfunctions #' @export setGeneric("rlike", function(x, ...) { standardGeneric("rlike") }) -#' @rdname column +#' @rdname startsWith #' @export setGeneric("startsWith", function(x, prefix) { standardGeneric("startsWith") }) -#' @rdname column +#' @rdname when #' @export setGeneric("when", function(condition, value) { standardGeneric("when") }) -#' @rdname column +#' @rdname otherwise #' @export setGeneric("otherwise", function(x, value) { standardGeneric("otherwise") }) @@ -825,7 +825,7 @@ setGeneric("cbrt", function(x) { standardGeneric("cbrt") }) #' @export setGeneric("ceil", function(x) { standardGeneric("ceil") }) -#' @rdname col +#' @rdname column #' @export setGeneric("column", function(x) { standardGeneric("column") }) @@ -1119,7 +1119,7 @@ setGeneric("shiftRight", function(y, x) { standardGeneric("shiftRight") }) #' @export setGeneric("shiftRightUnsigned", function(y, x) { standardGeneric("shiftRightUnsigned") }) -#' @rdname signum +#' @rdname sign #' @export setGeneric("signum", function(x) { standardGeneric("signum") }) diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R index d6ff2aa22df2fe66a956991fe12181b7e955675f..74dba8fe966fbe4de706f422c26cc2171c9d7026 100644 --- a/R/pkg/R/mllib.R +++ b/R/pkg/R/mllib.R @@ -235,8 +235,6 @@ setMethod("predict", signature(object = "GeneralizedLinearRegressionModel"), #' similarly to R package e1071's predict. #' #' @param object A fitted naive Bayes model -#' @param newData SparkDataFrame for testing -#' @return SparkDataFrame containing predicted labels in a column named "prediction" #' @rdname predict #' @export #' @examples @@ -378,8 +376,6 @@ setMethod("summary", signature(object = "KMeansModel"), #' Makes predictions from a k-means model or a model produced by spark.kmeans(). #' #' @param object A fitted k-means model -#' @param newData SparkDataFrame for testing -#' @return SparkDataFrame containing predicted labels in a column named "prediction" #' @rdname predict #' @export #' @examples @@ -621,8 +617,6 @@ setMethod("summary", signature(object = "AFTSurvivalRegressionModel"), #' similarly to R package survival's predict. #' #' @param object A fitted AFT survival regression model -#' @param newData SparkDataFrame for testing -#' @return SparkDataFrame containing predicted labels in a column named "prediction" #' @rdname predict #' @export #' @examples diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index 94d0e63c8a46a4be85f969436ea439394c2e9932..2b6e124151397725338e7c29f6cc76b55c40ba50 100644 --- a/R/pkg/R/sparkR.R +++ b/R/pkg/R/sparkR.R @@ -36,6 +36,8 @@ sparkR.stop <- function() { sparkR.session.stop() } +#' Stop the Spark Session and Spark Context +#' #' Stop the Spark Session and Spark Context. #' #' Also terminates the backend this R session is connected to. @@ -88,7 +90,7 @@ sparkR.session.stop <- function() { clearJobjs() } -#' (Deprecated) Initialize a new Spark Context. +#' (Deprecated) Initialize a new Spark Context #' #' This function initializes a new SparkContext. #' @@ -249,7 +251,7 @@ sparkR.sparkContext <- function( sc } -#' (Deprecated) Initialize a new SQLContext. +#' (Deprecated) Initialize a new SQLContext #' #' This function creates a SparkContext from an existing JavaSparkContext and #' then uses it to initialize a new SQLContext @@ -278,7 +280,7 @@ sparkRSQL.init <- function(jsc = NULL) { sparkR.session(enableHiveSupport = FALSE) } -#' (Deprecated) Initialize a new HiveContext. +#' (Deprecated) Initialize a new HiveContext #' #' This function creates a HiveContext from an existing JavaSparkContext #'