From c33e4b0d96d424568963c7e716c20f02949c72d1 Mon Sep 17 00:00:00 2001 From: Shivaram Venkataraman <shivaram@cs.berkeley.edu> Date: Sat, 16 Jul 2016 17:06:44 -0700 Subject: [PATCH] [SPARK-16507][SPARKR] Add a CRAN checker, fix Rd aliases ## What changes were proposed in this pull request? Add a check-cran.sh script that runs `R CMD check` as CRAN. Also fixes a number of issues pointed out by the check. These include - Updating `DESCRIPTION` to be appropriate - Adding a .Rbuildignore to ignore lintr, src-native, html that are non-standard files / dirs - Adding aliases to all S4 methods in DataFrame, Column, GroupedData etc. This is required as stated in https://cran.r-project.org/doc/manuals/r-release/R-exts.html#Documenting-S4-classes-and-methods - Other minor fixes ## How was this patch tested? SparkR unit tests, running the above mentioned script Author: Shivaram Venkataraman <shivaram@cs.berkeley.edu> Closes #14173 from shivaram/sparkr-cran-changes. --- R/check-cran.sh | 52 ++++++ R/pkg/.Rbuildignore | 5 + R/pkg/DESCRIPTION | 8 +- R/pkg/NAMESPACE | 9 + R/pkg/R/DataFrame.R | 105 ++++++++++- R/pkg/R/SQLContext.R | 3 + R/pkg/R/WindowSpec.R | 6 + R/pkg/R/column.R | 27 ++- R/pkg/R/functions.R | 436 ++++++++++++++++++++++++++++++++++++++++++- R/pkg/R/generics.R | 8 +- R/pkg/R/group.R | 9 +- R/pkg/R/mllib.R | 4 +- R/pkg/R/schema.R | 16 +- R/pkg/R/stats.R | 6 + R/pkg/R/utils.R | 20 +- R/pkg/R/window.R | 4 + dev/.rat-excludes | 1 + 17 files changed, 676 insertions(+), 43 deletions(-) create mode 100755 R/check-cran.sh create mode 100644 R/pkg/.Rbuildignore diff --git a/R/check-cran.sh b/R/check-cran.sh new file mode 100755 index 0000000000..b3a6860961 --- /dev/null +++ b/R/check-cran.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -o pipefail +set -e + +FWDIR="$(cd `dirname $0`; pwd)" +pushd $FWDIR > /dev/null + +if [ ! -z "$R_HOME" ] + then + R_SCRIPT_PATH="$R_HOME/bin" + else + # if system wide R_HOME is not found, then exit + if [ ! `command -v R` ]; then + echo "Cannot find 'R_HOME'. Please specify 'R_HOME' or make sure R is properly installed." + exit 1 + fi + R_SCRIPT_PATH="$(dirname $(which R))" +fi +echo "USING R_HOME = $R_HOME" + +# Build the latest docs +$FWDIR/create-docs.sh + +# Build a zip file containing the source package +"$R_SCRIPT_PATH/"R CMD build $FWDIR/pkg + +# Run check as-cran. +# TODO(shivaram): Remove the skip tests once we figure out the install mechanism + +VERSION=`grep Version $FWDIR/pkg/DESCRIPTION | awk '{print $NF}'` + +"$R_SCRIPT_PATH/"R CMD check --as-cran --no-tests SparkR_"$VERSION".tar.gz + +popd > /dev/null diff --git a/R/pkg/.Rbuildignore b/R/pkg/.Rbuildignore new file mode 100644 index 0000000000..544d203a6d --- /dev/null +++ b/R/pkg/.Rbuildignore @@ -0,0 +1,5 @@ +^.*\.Rproj$ +^\.Rproj\.user$ +^\.lintr$ +^src-native$ +^html$ diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 963a1bb580..ac73d6c798 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,12 +1,10 @@ Package: SparkR Type: Package -Title: R frontend for Spark +Title: R Frontend for Apache Spark Version: 2.0.0 -Date: 2013-09-09 +Date: 2016-07-07 Author: The Apache Software Foundation Maintainer: Shivaram Venkataraman <shivaram@cs.berkeley.edu> -Imports: - methods Depends: R (>= 3.0), methods, @@ -14,7 +12,7 @@ Suggests: testthat, e1071, survival -Description: R frontend for Spark +Description: The SparkR package provides an R frontend for Apache Spark. License: Apache License (== 2.0) Collate: 'schema.R' diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index fe52905e0a..1d74c6d955 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -343,3 +343,12 @@ export("partitionBy", export("windowPartitionBy", "windowOrderBy") + +S3method(print, jobj) +S3method(print, structField) +S3method(print, structType) +S3method(print, summary.GeneralizedLinearRegressionModel) +S3method(structField, character) +S3method(structField, jobj) +S3method(structType, jobj) +S3method(structType, structField) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 47f9203ace..2e99aa026d 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -74,6 +74,7 @@ dataFrame <- function(sdf, isCached = FALSE) { #' @family SparkDataFrame functions #' @rdname printSchema #' @name printSchema +#' @aliases printSchema,SparkDataFrame-method #' @export #' @examples #'\dontrun{ @@ -99,6 +100,7 @@ setMethod("printSchema", #' @family SparkDataFrame functions #' @rdname schema #' @name schema +#' @aliases schema,SparkDataFrame-method #' @export #' @examples #'\dontrun{ @@ -121,6 +123,7 @@ setMethod("schema", #' @param x A SparkDataFrame #' @param extended Logical. If extended is FALSE, explain() only prints the physical plan. #' @family SparkDataFrame functions +#' @aliases explain,SparkDataFrame-method #' @rdname explain #' @name explain #' @export @@ -154,6 +157,7 @@ setMethod("explain", #' @family SparkDataFrame functions #' @rdname isLocal #' @name isLocal +#' @aliases isLocal,SparkDataFrame-method #' @export #' @examples #'\dontrun{ @@ -179,6 +183,7 @@ setMethod("isLocal", #' truncated. However, if set greater than zero, truncates strings longer than `truncate` #' characters and all cells will be aligned right. #' @family SparkDataFrame functions +#' @aliases showDF,SparkDataFrame-method #' @rdname showDF #' @name showDF #' @export @@ -210,6 +215,7 @@ setMethod("showDF", #' #' @family SparkDataFrame functions #' @rdname show +#' @aliases show,SparkDataFrame-method #' @name show #' @export #' @examples @@ -238,6 +244,7 @@ setMethod("show", "SparkDataFrame", #' @family SparkDataFrame functions #' @rdname dtypes #' @name dtypes +#' @aliases dtypes,SparkDataFrame-method #' @export #' @examples #'\dontrun{ @@ -264,7 +271,7 @@ setMethod("dtypes", #' @family SparkDataFrame functions #' @rdname columns #' @name columns - +#' @aliases columns,SparkDataFrame-method #' @export #' @examples #'\dontrun{ @@ -285,6 +292,7 @@ setMethod("columns", #' @rdname columns #' @name names +#' @aliases names,SparkDataFrame-method #' @note names since 1.5.0 setMethod("names", signature(x = "SparkDataFrame"), @@ -293,6 +301,7 @@ setMethod("names", }) #' @rdname columns +#' @aliases names<-,SparkDataFrame-method #' @name names<- #' @note names<- since 1.5.0 setMethod("names<-", @@ -305,6 +314,7 @@ setMethod("names<-", }) #' @rdname columns +#' @aliases colnames,SparkDataFrame-method #' @name colnames #' @note colnames since 1.6.0 setMethod("colnames", @@ -314,6 +324,7 @@ setMethod("colnames", }) #' @rdname columns +#' @aliases colnames<-,SparkDataFrame-method #' @name colnames<- #' @note colnames<- since 1.6.0 setMethod("colnames<-", @@ -350,6 +361,7 @@ setMethod("colnames<-", #' @param x A SparkDataFrame #' @return value A character vector with the column types of the given SparkDataFrame #' @rdname coltypes +#' @aliases coltypes,SparkDataFrame-method #' @name coltypes #' @family SparkDataFrame functions #' @export @@ -405,6 +417,7 @@ setMethod("coltypes", #' to keep that column as-is. #' @rdname coltypes #' @name coltypes<- +#' @aliases coltypes<-,SparkDataFrame,character-method #' @export #' @examples #'\dontrun{ @@ -453,6 +466,7 @@ setMethod("coltypes<-", #' @family SparkDataFrame functions #' @rdname createOrReplaceTempView #' @name createOrReplaceTempView +#' @aliases createOrReplaceTempView,SparkDataFrame,character-method #' @export #' @examples #'\dontrun{ @@ -479,6 +493,7 @@ setMethod("createOrReplaceTempView", #' @seealso \link{createOrReplaceTempView} #' @rdname registerTempTable-deprecated #' @name registerTempTable +#' @aliases registerTempTable,SparkDataFrame,character-method #' @export #' @examples #'\dontrun{ @@ -508,6 +523,7 @@ setMethod("registerTempTable", #' @family SparkDataFrame functions #' @rdname insertInto #' @name insertInto +#' @aliases insertInto,SparkDataFrame,character-method #' @export #' @examples #'\dontrun{ @@ -534,6 +550,7 @@ setMethod("insertInto", #' @param x A SparkDataFrame #' #' @family SparkDataFrame functions +#' @aliases cache,SparkDataFrame-method #' @rdname cache #' @name cache #' @export @@ -564,6 +581,7 @@ setMethod("cache", #' @family SparkDataFrame functions #' @rdname persist #' @name persist +#' @aliases persist,SparkDataFrame,character-method #' @export #' @examples #'\dontrun{ @@ -591,6 +609,7 @@ setMethod("persist", #' #' @family SparkDataFrame functions #' @rdname unpersist-methods +#' @aliases unpersist,SparkDataFrame-method #' @name unpersist #' @export #' @examples @@ -627,6 +646,7 @@ setMethod("unpersist", #' @family SparkDataFrame functions #' @rdname repartition #' @name repartition +#' @aliases repartition,SparkDataFrame-method #' @export #' @examples #'\dontrun{ @@ -670,6 +690,7 @@ setMethod("repartition", #' #' @param x A SparkDataFrame #' @return A StringRRDD of JSON objects +#' @aliases toJSON,SparkDataFrame-method #' @noRd #' @examples #'\dontrun{ @@ -697,6 +718,7 @@ setMethod("toJSON", #' @family SparkDataFrame functions #' @rdname write.json #' @name write.json +#' @aliases write.json,SparkDataFrame,character-method #' @export #' @examples #'\dontrun{ @@ -722,6 +744,7 @@ setMethod("write.json", #' @param path The directory where the file is saved #' #' @family SparkDataFrame functions +#' @aliases write.orc,SparkDataFrame,character-method #' @rdname write.orc #' @name write.orc #' @export @@ -751,6 +774,7 @@ setMethod("write.orc", #' @family SparkDataFrame functions #' @rdname write.parquet #' @name write.parquet +#' @aliases write.parquet,SparkDataFrame,character-method #' @export #' @examples #'\dontrun{ @@ -770,6 +794,7 @@ setMethod("write.parquet", #' @rdname write.parquet #' @name saveAsParquetFile +#' @aliases saveAsParquetFile,SparkDataFrame,character-method #' @export #' @note saveAsParquetFile since 1.4.0 setMethod("saveAsParquetFile", @@ -789,6 +814,7 @@ setMethod("saveAsParquetFile", #' @param path The directory where the file is saved #' #' @family SparkDataFrame functions +#' @aliases write.text,SparkDataFrame,character-method #' @rdname write.text #' @name write.text #' @export @@ -814,6 +840,7 @@ setMethod("write.text", #' @param x A SparkDataFrame #' #' @family SparkDataFrame functions +#' @aliases distinct,SparkDataFrame-method #' @rdname distinct #' @name distinct #' @export @@ -834,6 +861,7 @@ setMethod("distinct", #' @rdname distinct #' @name unique +#' @aliases unique,SparkDataFrame-method #' @note unique since 1.5.0 setMethod("unique", signature(x = "SparkDataFrame"), @@ -851,6 +879,7 @@ setMethod("unique", #' @param seed Randomness seed value #' #' @family SparkDataFrame functions +#' @aliases sample,SparkDataFrame,logical,numeric-method #' @rdname sample #' @name sample #' @export @@ -879,6 +908,7 @@ setMethod("sample", }) #' @rdname sample +#' @aliases sample_frac,SparkDataFrame,logical,numeric-method #' @name sample_frac #' @note sample_frac since 1.4.0 setMethod("sample_frac", @@ -895,6 +925,7 @@ setMethod("sample_frac", #' @family SparkDataFrame functions #' @rdname nrow #' @name count +#' @aliases count,SparkDataFrame-method #' @export #' @examples #'\dontrun{ @@ -912,6 +943,7 @@ setMethod("count", #' @name nrow #' @rdname nrow +#' @aliases nrow,SparkDataFrame-method #' @note nrow since 1.5.0 setMethod("nrow", signature(x = "SparkDataFrame"), @@ -926,6 +958,7 @@ setMethod("nrow", #' @family SparkDataFrame functions #' @rdname ncol #' @name ncol +#' @aliases ncol,SparkDataFrame-method #' @export #' @examples #'\dontrun{ @@ -948,6 +981,7 @@ setMethod("ncol", #' #' @family SparkDataFrame functions #' @rdname dim +#' @aliases dim,SparkDataFrame-method #' @name dim #' @export #' @examples @@ -972,6 +1006,7 @@ setMethod("dim", #' #' @family SparkDataFrame functions #' @rdname collect +#' @aliases collect,SparkDataFrame-method #' @name collect #' @export #' @examples @@ -1045,6 +1080,7 @@ setMethod("collect", #' @family SparkDataFrame functions #' @rdname limit #' @name limit +#' @aliases limit,SparkDataFrame,numeric-method #' @export #' @examples #' \dontrun{ @@ -1066,6 +1102,7 @@ setMethod("limit", #' @family SparkDataFrame functions #' @rdname take #' @name take +#' @aliases take,SparkDataFrame,numeric-method #' @export #' @examples #'\dontrun{ @@ -1093,6 +1130,7 @@ setMethod("take", #' @return A data.frame #' #' @family SparkDataFrame functions +#' @aliases head,SparkDataFrame-method #' @rdname head #' @name head #' @export @@ -1116,6 +1154,7 @@ setMethod("head", #' @param x A SparkDataFrame #' #' @family SparkDataFrame functions +#' @aliases first,SparkDataFrame-method #' @rdname first #' @name first #' @export @@ -1166,6 +1205,7 @@ setMethod("toRDD", #' @param x a SparkDataFrame #' @return a GroupedData #' @family SparkDataFrame functions +#' @aliases groupBy,SparkDataFrame-method #' @rdname groupBy #' @name groupBy #' @export @@ -1193,6 +1233,7 @@ setMethod("groupBy", #' @rdname groupBy #' @name group_by +#' @aliases group_by,SparkDataFrame-method #' @note group_by since 1.4.0 setMethod("group_by", signature(x = "SparkDataFrame"), @@ -1206,7 +1247,8 @@ setMethod("group_by", #' #' @param x a SparkDataFrame #' @family SparkDataFrame functions -#' @rdname agg +#' @aliases agg,SparkDataFrame-method +#' @rdname summarize #' @name agg #' @export #' @note agg since 1.4.0 @@ -1216,8 +1258,9 @@ setMethod("agg", agg(groupBy(x), ...) }) -#' @rdname agg +#' @rdname summarize #' @name summarize +#' @aliases summarize,SparkDataFrame-method #' @note summarize since 1.4.0 setMethod("summarize", signature(x = "SparkDataFrame"), @@ -1256,6 +1299,7 @@ dapplyInternal <- function(x, func, schema) { #' It must match the output of func. #' @family SparkDataFrame functions #' @rdname dapply +#' @aliases dapply,SparkDataFrame,function,structType-method #' @name dapply #' @seealso \link{dapplyCollect} #' @export @@ -1294,7 +1338,7 @@ setMethod("dapply", #' dapplyCollect #' #' Apply a function to each partition of a SparkDataFrame and collect the result back -#’ to R as a data.frame. +#' to R as a data.frame. #' #' @param x A SparkDataFrame #' @param func A function to be applied to each partition of the SparkDataFrame. @@ -1303,6 +1347,7 @@ setMethod("dapply", #' The output of func should be a R data.frame. #' @family SparkDataFrame functions #' @rdname dapplyCollect +#' @aliases dapplyCollect,SparkDataFrame,function-method #' @name dapplyCollect #' @seealso \link{dapply} #' @export @@ -1358,6 +1403,7 @@ setMethod("dapplyCollect", #' output column with preferred output column name and corresponding data type. #' @return a SparkDataFrame #' @family SparkDataFrame functions +#' @aliases gapply,SparkDataFrame-method #' @rdname gapply #' @name gapply #' @seealso \link{gapplyCollect} @@ -1446,6 +1492,7 @@ setMethod("gapply", #' The output of `func` is a local R data.frame. #' @return a data.frame #' @family SparkDataFrame functions +#' @aliases gapplyCollect,SparkDataFrame-method #' @rdname gapplyCollect #' @name gapplyCollect #' @seealso \link{gapply} @@ -1592,6 +1639,7 @@ getColumn <- function(x, c) { #' @rdname select #' @name $ +#' @aliases $,SparkDataFrame-method #' @note $ since 1.4.0 setMethod("$", signature(x = "SparkDataFrame"), function(x, name) { @@ -1600,6 +1648,7 @@ setMethod("$", signature(x = "SparkDataFrame"), #' @rdname select #' @name $<- +#' @aliases $<-,SparkDataFrame-method #' @note $<- since 1.4.0 setMethod("$<-", signature(x = "SparkDataFrame"), function(x, name, value) { @@ -1618,6 +1667,7 @@ setClassUnion("numericOrcharacter", c("numeric", "character")) #' @rdname subset #' @name [[ +#' @aliases [[,SparkDataFrame,numericOrcharacter-method #' @note [[ since 1.4.0 setMethod("[[", signature(x = "SparkDataFrame", i = "numericOrcharacter"), function(x, i) { @@ -1630,6 +1680,7 @@ setMethod("[[", signature(x = "SparkDataFrame", i = "numericOrcharacter"), #' @rdname subset #' @name [ +#' @aliases [,SparkDataFrame-method #' @note [ since 1.4.0 setMethod("[", signature(x = "SparkDataFrame"), function(x, i, j, ..., drop = F) { @@ -1677,6 +1728,7 @@ setMethod("[", signature(x = "SparkDataFrame"), #' @return A new SparkDataFrame containing only the rows that meet the condition with selected columns #' @export #' @family SparkDataFrame functions +#' @aliases subset,SparkDataFrame-method #' @rdname subset #' @name subset #' @family subsetting functions @@ -1714,6 +1766,7 @@ setMethod("subset", signature(x = "SparkDataFrame"), #' @export #' @family SparkDataFrame functions #' @rdname select +#' @aliases select,SparkDataFrame,character-method #' @name select #' @family subsetting functions #' @examples @@ -1743,6 +1796,7 @@ setMethod("select", signature(x = "SparkDataFrame", col = "character"), #' @rdname select #' @export +#' @aliases select,SparkDataFrame,Column-method #' @note select(SparkDataFrame, Column) since 1.4.0 setMethod("select", signature(x = "SparkDataFrame", col = "Column"), function(x, col, ...) { @@ -1755,6 +1809,7 @@ setMethod("select", signature(x = "SparkDataFrame", col = "Column"), #' @rdname select #' @export +#' @aliases select,SparkDataFrame,list-method #' @note select(SparkDataFrame, list) since 1.4.0 setMethod("select", signature(x = "SparkDataFrame", col = "list"), @@ -1779,6 +1834,7 @@ setMethod("select", #' @param ... Additional expressions #' @return A SparkDataFrame #' @family SparkDataFrame functions +#' @aliases selectExpr,SparkDataFrame,character-method #' @rdname selectExpr #' @name selectExpr #' @export @@ -1808,6 +1864,7 @@ setMethod("selectExpr", #' @param col A Column expression. #' @return A SparkDataFrame with the new column added or the existing column replaced. #' @family SparkDataFrame functions +#' @aliases withColumn,SparkDataFrame,character,Column-method #' @rdname withColumn #' @name withColumn #' @seealso \link{rename} \link{mutate} @@ -1837,6 +1894,7 @@ setMethod("withColumn", #' @param col a named argument of the form name = col #' @return A new SparkDataFrame with the new columns added or replaced. #' @family SparkDataFrame functions +#' @aliases mutate,SparkDataFrame-method #' @rdname mutate #' @name mutate #' @seealso \link{rename} \link{withColumn} @@ -1912,6 +1970,7 @@ setMethod("mutate", #' @export #' @rdname mutate +#' @aliases transform,SparkDataFrame-method #' @name transform #' @note transform since 1.5.0 setMethod("transform", @@ -1931,6 +1990,7 @@ setMethod("transform", #' @family SparkDataFrame functions #' @rdname rename #' @name withColumnRenamed +#' @aliases withColumnRenamed,SparkDataFrame,character,character-method #' @seealso \link{mutate} #' @export #' @examples @@ -1957,6 +2017,7 @@ setMethod("withColumnRenamed", #' @param ... A named pair of the form new_column_name = existing_column #' @rdname rename #' @name rename +#' @aliases rename,SparkDataFrame-method #' @export #' @examples #'\dontrun{ @@ -1999,6 +2060,7 @@ setClassUnion("characterOrColumn", c("character", "Column")) #' a character vector is specified for col #' @return A SparkDataFrame where all elements are sorted. #' @family SparkDataFrame functions +#' @aliases arrange,SparkDataFrame,Column-method #' @rdname arrange #' @name arrange #' @export @@ -2026,6 +2088,7 @@ setMethod("arrange", #' @rdname arrange #' @name arrange +#' @aliases arrange,SparkDataFrame,character-method #' @export #' @note arrange(SparkDataFrame, character) since 1.4.0 setMethod("arrange", @@ -2059,6 +2122,7 @@ setMethod("arrange", #' @rdname arrange #' @name orderBy +#' @aliases orderBy,SparkDataFrame,characterOrColumn-method #' @export #' @note orderBy(SparkDataFrame, characterOrColumn) since 1.4.0 setMethod("orderBy", @@ -2076,6 +2140,7 @@ setMethod("orderBy", #' or a string containing a SQL statement #' @return A SparkDataFrame containing only the rows that meet the condition. #' @family SparkDataFrame functions +#' @aliases filter,SparkDataFrame,characterOrColumn-method #' @rdname filter #' @name filter #' @family subsetting functions @@ -2101,6 +2166,7 @@ setMethod("filter", #' @rdname filter #' @name where +#' @aliases where,SparkDataFrame,characterOrColumn-method #' @note where since 1.4.0 setMethod("where", signature(x = "SparkDataFrame", condition = "characterOrColumn"), @@ -2118,6 +2184,7 @@ setMethod("where", #' If the first argument contains a character vector, the followings are ignored. #' @return A SparkDataFrame with duplicate rows removed. #' @family SparkDataFrame functions +#' @aliases dropDuplicates,SparkDataFrame-method #' @rdname dropDuplicates #' @name dropDuplicates #' @export @@ -2164,6 +2231,7 @@ setMethod("dropDuplicates", #' 'right_outer', 'rightouter', 'right', and 'leftsemi'. The default joinType is "inner". #' @return A SparkDataFrame containing the result of the join operation. #' @family SparkDataFrame functions +#' @aliases join,SparkDataFrame,SparkDataFrame-method #' @rdname join #' @name join #' @seealso \link{merge} @@ -2223,6 +2291,7 @@ setMethod("join", #' outer join will be returned. If all.x and all.y are set to TRUE, a full #' outer join will be returned. #' @family SparkDataFrame functions +#' @aliases merge,SparkDataFrame,SparkDataFrame-method #' @rdname merge #' @seealso \link{join} #' @export @@ -2360,6 +2429,7 @@ generateAliasesForIntersectedCols <- function (x, intersectedColNames, suffix) { #' @family SparkDataFrame functions #' @rdname union #' @name union +#' @aliases union,SparkDataFrame,SparkDataFrame-method #' @seealso \link{rbind} #' @export #' @examples @@ -2381,6 +2451,7 @@ setMethod("union", #' unionAll is deprecated - use union instead #' @rdname union #' @name unionAll +#' @aliases unionAll,SparkDataFrame,SparkDataFrame-method #' @export #' @note unionAll since 1.4.0 setMethod("unionAll", @@ -2399,6 +2470,7 @@ setMethod("unionAll", #' @param ... Additional SparkDataFrame #' @return A SparkDataFrame containing the result of the union. #' @family SparkDataFrame functions +#' @aliases rbind,SparkDataFrame-method #' @rdname rbind #' @name rbind #' @seealso \link{union} @@ -2428,6 +2500,7 @@ setMethod("rbind", #' @param y A SparkDataFrame #' @return A SparkDataFrame containing the result of the intersect. #' @family SparkDataFrame functions +#' @aliases intersect,SparkDataFrame,SparkDataFrame-method #' @rdname intersect #' @name intersect #' @export @@ -2455,6 +2528,7 @@ setMethod("intersect", #' @param y A SparkDataFrame #' @return A SparkDataFrame containing the result of the except operation. #' @family SparkDataFrame functions +#' @aliases except,SparkDataFrame,SparkDataFrame-method #' @rdname except #' @name except #' @export @@ -2498,6 +2572,7 @@ setMethod("except", #' @param mode One of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default) #' #' @family SparkDataFrame functions +#' @aliases write.df,SparkDataFrame,character-method #' @rdname write.df #' @name write.df #' @export @@ -2529,6 +2604,7 @@ setMethod("write.df", #' @rdname write.df #' @name saveDF +#' @aliases saveDF,SparkDataFrame,character-method #' @export #' @note saveDF since 1.4.0 setMethod("saveDF", @@ -2558,6 +2634,7 @@ setMethod("saveDF", #' @param mode One of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default) #' #' @family SparkDataFrame functions +#' @aliases saveAsTable,SparkDataFrame,character-method #' @rdname saveAsTable #' @name saveAsTable #' @export @@ -2595,6 +2672,7 @@ setMethod("saveAsTable", #' @param ... Additional expressions #' @return A SparkDataFrame #' @family SparkDataFrame functions +#' @aliases describe,SparkDataFrame,character-method describe,SparkDataFrame,ANY-method #' @rdname summary #' @name describe #' @export @@ -2618,6 +2696,7 @@ setMethod("describe", #' @rdname summary #' @name describe +#' @aliases describe,SparkDataFrame-method #' @note describe(SparkDataFrame) since 1.4.0 setMethod("describe", signature(x = "SparkDataFrame"), @@ -2628,6 +2707,7 @@ setMethod("describe", #' @rdname summary #' @name summary +#' @aliases summary,SparkDataFrame-method #' @note summary(SparkDataFrame) since 1.5.0 setMethod("summary", signature(object = "SparkDataFrame"), @@ -2653,6 +2733,7 @@ setMethod("summary", #' #' @family SparkDataFrame functions #' @rdname nafunctions +#' @aliases dropna,SparkDataFrame-method #' @name dropna #' @export #' @examples @@ -2682,6 +2763,7 @@ setMethod("dropna", #' @rdname nafunctions #' @name na.omit +#' @aliases na.omit,SparkDataFrame-method #' @export #' @note na.omit since 1.5.0 setMethod("na.omit", @@ -2707,6 +2789,7 @@ setMethod("na.omit", #' #' @rdname nafunctions #' @name fillna +#' @aliases fillna,SparkDataFrame-method #' @export #' @examples #'\dontrun{ @@ -2770,6 +2853,7 @@ setMethod("fillna", #' @param x a SparkDataFrame #' @return a data.frame #' @family SparkDataFrame functions +#' @aliases as.data.frame,SparkDataFrame-method #' @rdname as.data.frame #' @examples \dontrun{ #' @@ -2791,6 +2875,7 @@ setMethod("as.data.frame", #' #' @family SparkDataFrame functions #' @rdname attach +#' @aliases attach,SparkDataFrame-method #' @param what (SparkDataFrame) The SparkDataFrame to attach #' @param pos (integer) Specify position in search() where to attach. #' @param name (character) Name to use for the attached SparkDataFrame. Names @@ -2821,6 +2906,7 @@ setMethod("attach", #' #' @rdname with #' @family SparkDataFrame functions +#' @aliases with,SparkDataFrame-method #' @param data (SparkDataFrame) SparkDataFrame to use for constructing an environment. #' @param expr (expression) Expression to evaluate. #' @param ... arguments to be passed to future methods. @@ -2844,6 +2930,7 @@ setMethod("with", #' #' @name str #' @rdname str +#' @aliases str,SparkDataFrame-method #' @family SparkDataFrame functions #' @param object a SparkDataFrame #' @examples \dontrun{ @@ -2925,6 +3012,7 @@ setMethod("str", #' @family SparkDataFrame functions #' @rdname drop #' @name drop +#' @aliases drop,SparkDataFrame-method #' @export #' @examples #'\dontrun{ @@ -2950,6 +3038,10 @@ setMethod("drop", }) # Expose base::drop +#' @name drop +#' @rdname drop +#' @aliases drop,ANY-method +#' @export setMethod("drop", signature(x = "ANY"), function(x) { @@ -2966,6 +3058,7 @@ setMethod("drop", #' @param colname the name of the column to build the histogram from. #' @return a data.frame with the histogram statistics, i.e., counts and centroids. #' @rdname histogram +#' @aliases histogram,SparkDataFrame,characterOrColumn-method #' @family SparkDataFrame functions #' @export #' @examples @@ -3025,7 +3118,7 @@ setMethod("histogram", # columns AND all of them have names 100 characters long (which is very unlikely), # AND they run 1 billion histograms, the probability of collision will roughly be # 1 in 4.4 x 10 ^ 96 - colname <- paste(base:::sample(c(letters, LETTERS), + colname <- paste(base::sample(c(letters, LETTERS), size = min(max(nchar(colnames(df))) + 1, 100), replace = TRUE), collapse = "") @@ -3100,6 +3193,7 @@ setMethod("histogram", #' @family SparkDataFrame functions #' @rdname write.jdbc #' @name write.jdbc +#' @aliases write.jdbc,SparkDataFrame,character,character-method #' @export #' @examples #'\dontrun{ @@ -3127,6 +3221,7 @@ setMethod("write.jdbc", #' @param seed A seed to use for random split #' #' @family SparkDataFrame functions +#' @aliases randomSplit,SparkDataFrame,numeric-method #' @rdname randomSplit #' @name randomSplit #' @export diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index d2ea1553c6..a14bcd91b3 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -269,6 +269,9 @@ as.DataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0) { createDataFrame(data, schema, samplingRatio) } +#' @rdname createDataFrame +#' @aliases as.DataFrame +#' @export as.DataFrame <- function(x, ...) { dispatchFunc("as.DataFrame(data, schema = NULL, samplingRatio = 1.0)", x, ...) } diff --git a/R/pkg/R/WindowSpec.R b/R/pkg/R/WindowSpec.R index e20d05dece..4746380096 100644 --- a/R/pkg/R/WindowSpec.R +++ b/R/pkg/R/WindowSpec.R @@ -58,6 +58,7 @@ setMethod("show", "WindowSpec", #' @return a WindowSpec #' @rdname partitionBy #' @name partitionBy +#' @aliases partitionBy,WindowSpec-method #' @family windowspec_method #' @export #' @examples @@ -89,6 +90,7 @@ setMethod("partitionBy", #' @return a WindowSpec #' @rdname arrange #' @name orderBy +#' @aliases orderBy,WindowSpec,character-method #' @family windowspec_method #' @export #' @examples @@ -105,6 +107,7 @@ setMethod("orderBy", #' @rdname arrange #' @name orderBy +#' @aliases orderBy,WindowSpec,Column-method #' @export #' @note orderBy(WindowSpec, Column) since 2.0.0 setMethod("orderBy", @@ -131,6 +134,7 @@ setMethod("orderBy", #' The frame is unbounded if this is the maximum long value. #' @return a WindowSpec #' @rdname rowsBetween +#' @aliases rowsBetween,WindowSpec,numeric,numeric-method #' @name rowsBetween #' @family windowspec_method #' @export @@ -162,6 +166,7 @@ setMethod("rowsBetween", #' The frame is unbounded if this is the maximum long value. #' @return a WindowSpec #' @rdname rangeBetween +#' @aliases rangeBetween,WindowSpec,numeric,numeric-method #' @name rangeBetween #' @family windowspec_method #' @export @@ -187,6 +192,7 @@ setMethod("rangeBetween", #' #' @rdname over #' @name over +#' @aliases over,Column,WindowSpec-method #' @family colum_func #' @export #' @note over since 2.0.0 diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index 1a65912d3a..0edb9d2ae5 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -44,6 +44,9 @@ setMethod("initialize", "Column", function(.Object, jc) { .Object }) +#' @rdname column +#' @name column +#' @aliases column,jobj-method setMethod("column", signature(x = "jobj"), function(x) { @@ -52,6 +55,7 @@ setMethod("column", #' @rdname show #' @name show +#' @aliases show,Column-method #' @export #' @note show(Column) since 1.4.0 setMethod("show", "Column", @@ -131,8 +135,12 @@ createMethods() #' #' Set a new name for a column #' +#' @param object Column to rename +#' @param data new name to use +#' #' @rdname alias #' @name alias +#' @aliases alias,Column-method #' @family colum_func #' @export #' @note alias since 1.4.0 @@ -153,6 +161,7 @@ setMethod("alias", #' @rdname substr #' @name substr #' @family colum_func +#' @aliases substr,Column-method #' #' @param start starting position #' @param stop ending position @@ -171,8 +180,9 @@ setMethod("substr", signature(x = "Column"), #' @rdname startsWith #' @name startsWith #' @family colum_func +#' @aliases startsWith,Column-method #' -#' @param x vector of character string whose “starts†are considered +#' @param x vector of character string whose "starts" are considered #' @param prefix character vector (often of length one) #' @note startsWith since 1.4.0 setMethod("startsWith", signature(x = "Column"), @@ -189,8 +199,9 @@ setMethod("startsWith", signature(x = "Column"), #' @rdname endsWith #' @name endsWith #' @family colum_func +#' @aliases endsWith,Column-method #' -#' @param x vector of character string whose “ends†are considered +#' @param x vector of character string whose "ends" are considered #' @param suffix character vector (often of length one) #' @note endsWith since 1.4.0 setMethod("endsWith", signature(x = "Column"), @@ -206,6 +217,7 @@ setMethod("endsWith", signature(x = "Column"), #' @rdname between #' @name between #' @family colum_func +#' @aliases between,Column-method #' #' @param bounds lower and upper bounds #' @note between since 1.5.0 @@ -224,10 +236,10 @@ setMethod("between", signature(x = "Column"), #' @rdname cast #' @name cast #' @family colum_func +#' @aliases cast,Column-method #' #' @examples \dontrun{ #' cast(df$age, "string") -#' cast(df$name, list(type="array", elementType="byte", containsNull = TRUE)) #' } #' @note cast since 1.4.0 setMethod("cast", @@ -235,12 +247,8 @@ setMethod("cast", function(x, dataType) { if (is.character(dataType)) { column(callJMethod(x@jc, "cast", dataType)) - } else if (is.list(dataType)) { - json <- tojson(dataType) - jdataType <- callJStatic("org.apache.spark.sql.types.DataType", "fromJson", json) - column(callJMethod(x@jc, "cast", jdataType)) } else { - stop("dataType should be character or list") + stop("dataType should be character") } }) @@ -248,7 +256,7 @@ setMethod("cast", #' #' @rdname match #' @name %in% -#' @aliases %in% +#' @aliases %in%,Column-method #' @return a matched values as a result of comparing with given values. #' @export #' @examples @@ -272,6 +280,7 @@ setMethod("%in%", #' @rdname otherwise #' @name otherwise #' @family colum_func +#' @aliases otherwise,Column-method #' @export #' @note otherwise since 1.5.0 setMethod("otherwise", diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 52d46f9d76..573c915a5c 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -27,6 +27,7 @@ NULL #' @rdname lit #' @name lit #' @export +#' @aliases lit,ANY-method #' @examples #' \dontrun{ #' lit(df$name) @@ -46,11 +47,14 @@ setMethod("lit", signature("ANY"), #' #' Computes the absolute value. #' +#' @param x Column to compute on. +#' #' @rdname abs #' @name abs #' @family normal_funcs #' @export #' @examples \dontrun{abs(df$c)} +#' @aliases abs,Column-method #' @note abs since 1.5.0 setMethod("abs", signature(x = "Column"), @@ -64,11 +68,14 @@ setMethod("abs", #' Computes the cosine inverse of the given value; the returned angle is in the range #' 0.0 through pi. #' +#' @param x Column to compute on. +#' #' @rdname acos #' @name acos #' @family math_funcs #' @export #' @examples \dontrun{acos(df$c)} +#' @aliases acos,Column-method #' @note acos since 1.5.0 setMethod("acos", signature(x = "Column"), @@ -82,10 +89,13 @@ setMethod("acos", #' Returns the approximate number of distinct items in a group. This is a column #' aggregate function. #' +#' @param x Column to compute on. +#' #' @rdname approxCountDistinct #' @name approxCountDistinct #' @return the approximate number of distinct items in a group. #' @export +#' @aliases approxCountDistinct,Column-method #' @examples \dontrun{approxCountDistinct(df$c)} #' @note approxCountDistinct(Column) since 1.4.0 setMethod("approxCountDistinct", @@ -100,10 +110,13 @@ setMethod("approxCountDistinct", #' Computes the numeric value of the first character of the string column, and returns the #' result as a int column. #' +#' @param x Column to compute on. +#' #' @rdname ascii #' @name ascii #' @family string_funcs #' @export +#' @aliases ascii,Column-method #' @examples \dontrun{\dontrun{ascii(df$c)}} #' @note ascii since 1.5.0 setMethod("ascii", @@ -118,10 +131,13 @@ setMethod("ascii", #' Computes the sine inverse of the given value; the returned angle is in the range #' -pi/2 through pi/2. #' +#' @param x Column to compute on. +#' #' @rdname asin #' @name asin #' @family math_funcs #' @export +#' @aliases asin,Column-method #' @examples \dontrun{asin(df$c)} #' @note asin since 1.5.0 setMethod("asin", @@ -135,10 +151,13 @@ setMethod("asin", #' #' Computes the tangent inverse of the given value. #' +#' @param x Column to compute on. +#' #' @rdname atan #' @name atan #' @family math_funcs #' @export +#' @aliases atan,Column-method #' @examples \dontrun{atan(df$c)} #' @note atan since 1.5.0 setMethod("atan", @@ -152,10 +171,13 @@ setMethod("atan", #' #' Aggregate function: returns the average of the values in a group. #' +#' @param x Column to compute on. +#' #' @rdname avg #' @name avg #' @family agg_funcs #' @export +#' @aliases avg,Column-method #' @examples \dontrun{avg(df$c)} #' @note avg since 1.4.0 setMethod("avg", @@ -170,10 +192,13 @@ setMethod("avg", #' Computes the BASE64 encoding of a binary column and returns it as a string column. #' This is the reverse of unbase64. #' +#' @param x Column to compute on. +#' #' @rdname base64 #' @name base64 #' @family string_funcs #' @export +#' @aliases base64,Column-method #' @examples \dontrun{base64(df$c)} #' @note base64 since 1.5.0 setMethod("base64", @@ -188,10 +213,13 @@ setMethod("base64", #' An expression that returns the string representation of the binary value of the given long #' column. For example, bin("12") returns "1100". #' +#' @param x Column to compute on. +#' #' @rdname bin #' @name bin #' @family math_funcs #' @export +#' @aliases bin,Column-method #' @examples \dontrun{bin(df$c)} #' @note bin since 1.5.0 setMethod("bin", @@ -205,10 +233,13 @@ setMethod("bin", #' #' Computes bitwise NOT. #' +#' @param x Column to compute on. +#' #' @rdname bitwiseNOT #' @name bitwiseNOT #' @family normal_funcs #' @export +#' @aliases bitwiseNOT,Column-method #' @examples \dontrun{bitwiseNOT(df$c)} #' @note bitwiseNOT since 1.5.0 setMethod("bitwiseNOT", @@ -222,10 +253,13 @@ setMethod("bitwiseNOT", #' #' Computes the cube-root of the given value. #' +#' @param x Column to compute on. +#' #' @rdname cbrt #' @name cbrt #' @family math_funcs #' @export +#' @aliases cbrt,Column-method #' @examples \dontrun{cbrt(df$c)} #' @note cbrt since 1.4.0 setMethod("cbrt", @@ -239,10 +273,13 @@ setMethod("cbrt", #' #' Computes the ceiling of the given value. #' +#' @param x Column to compute on. +#' #' @rdname ceil #' @name ceil #' @family math_funcs #' @export +#' @aliases ceil,Column-method #' @examples \dontrun{ceil(df$c)} #' @note ceil since 1.5.0 setMethod("ceil", @@ -263,11 +300,14 @@ col <- function(x) { #' Returns a Column based on the given column name #' #' Returns a Column based on the given column name. +# +#' @param x Character column name. #' #' @rdname column #' @name column #' @family normal_funcs #' @export +#' @aliases column,character-method #' @examples \dontrun{column(df)} #' @note column since 1.6.0 setMethod("column", @@ -279,10 +319,13 @@ setMethod("column", #' #' Computes the Pearson Correlation Coefficient for two Columns. #' +#' @param x Column to compute on. +#' #' @rdname corr #' @name corr #' @family math_funcs #' @export +#' @aliases corr,Column-method #' @examples \dontrun{corr(df$c, df$d)} #' @note corr since 1.6.0 setMethod("corr", signature(x = "Column"), @@ -296,10 +339,13 @@ setMethod("corr", signature(x = "Column"), #' #' Compute the sample covariance between two expressions. #' +#' @param x Column to compute on. +#' #' @rdname cov #' @name cov #' @family math_funcs #' @export +#' @aliases cov,characterOrColumn-method #' @examples #' \dontrun{ #' cov(df$c, df$d) @@ -315,7 +361,11 @@ setMethod("cov", signature(x = "characterOrColumn"), }) #' @rdname cov +#' +#' @param col1 First column to compute cov_samp. +#' @param col2 Second column to compute cov_samp. #' @name covar_samp +#' @aliases covar_samp,characterOrColumn,characterOrColumn-method #' @note covar_samp since 2.0.0 setMethod("covar_samp", signature(col1 = "characterOrColumn", col2 = "characterOrColumn"), function(col1, col2) { @@ -332,10 +382,14 @@ setMethod("covar_samp", signature(col1 = "characterOrColumn", col2 = "characterO #' #' Compute the population covariance between two expressions. #' +#' @param col1 First column to compute cov_pop. +#' @param col2 Second column to compute cov_pop. +#' #' @rdname covar_pop #' @name covar_pop #' @family math_funcs #' @export +#' @aliases covar_pop,characterOrColumn,characterOrColumn-method #' @examples #' \dontrun{ #' covar_pop(df$c, df$d) @@ -357,9 +411,12 @@ setMethod("covar_pop", signature(col1 = "characterOrColumn", col2 = "characterOr #' #' Computes the cosine of the given value. #' +#' @param x Column to compute on. +#' #' @rdname cos #' @name cos #' @family math_funcs +#' @aliases cos,Column-method #' @export #' @examples \dontrun{cos(df$c)} #' @note cos since 1.5.0 @@ -374,9 +431,12 @@ setMethod("cos", #' #' Computes the hyperbolic cosine of the given value. #' +#' @param x Column to compute on. +#' #' @rdname cosh #' @name cosh #' @family math_funcs +#' @aliases cosh,Column-method #' @export #' @examples \dontrun{cosh(df$c)} #' @note cosh since 1.5.0 @@ -391,9 +451,12 @@ setMethod("cosh", #' #' Returns the number of items in a group. This is a column aggregate function. #' -#' @rdname count +#' @param x Column to compute on. +#' +#' @rdname nrow #' @name count #' @family agg_funcs +#' @aliases count,Column-method #' @export #' @examples \dontrun{count(df$c)} #' @note count since 1.4.0 @@ -409,9 +472,12 @@ setMethod("count", #' Calculates the cyclic redundancy check value (CRC32) of a binary column and #' returns the value as a bigint. #' +#' @param x Column to compute on. +#' #' @rdname crc32 #' @name crc32 #' @family misc_funcs +#' @aliases crc32,Column-method #' @export #' @examples \dontrun{crc32(df$c)} #' @note crc32 since 1.5.0 @@ -426,9 +492,12 @@ setMethod("crc32", #' #' Calculates the hash code of given columns, and returns the result as a int column. #' +#' @param x Column to compute on. +#' #' @rdname hash #' @name hash #' @family misc_funcs +#' @aliases hash,Column-method #' @export #' @examples \dontrun{hash(df$c)} #' @note hash since 2.0.0 @@ -447,9 +516,12 @@ setMethod("hash", #' #' Extracts the day of the month as an integer from a given date/timestamp/string. #' +#' @param x Column to compute on. +#' #' @rdname dayofmonth #' @name dayofmonth #' @family datetime_funcs +#' @aliases dayofmonth,Column-method #' @export #' @examples \dontrun{dayofmonth(df$c)} #' @note dayofmonth since 1.5.0 @@ -464,9 +536,12 @@ setMethod("dayofmonth", #' #' Extracts the day of the year as an integer from a given date/timestamp/string. #' +#' @param x Column to compute on. +#' #' @rdname dayofyear #' @name dayofyear #' @family datetime_funcs +#' @aliases dayofyear,Column-method #' @export #' @examples \dontrun{dayofyear(df$c)} #' @note dayofyear since 1.5.0 @@ -482,9 +557,13 @@ setMethod("dayofyear", #' Computes the first argument into a string from a binary using the provided character set #' (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16'). #' +#' @param x Column to compute on. +#' @param charset Character set to use +#' #' @rdname decode #' @name decode #' @family string_funcs +#' @aliases decode,Column,character-method #' @export #' @examples \dontrun{decode(df$c, "UTF-8")} #' @note decode since 1.6.0 @@ -500,9 +579,13 @@ setMethod("decode", #' Computes the first argument into a binary from a string using the provided character set #' (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16'). #' +#' @param x Column to compute on. +#' @param charset Character set to use +#' #' @rdname encode #' @name encode #' @family string_funcs +#' @aliases encode,Column,character-method #' @export #' @examples \dontrun{encode(df$c, "UTF-8")} #' @note encode since 1.6.0 @@ -517,9 +600,12 @@ setMethod("encode", #' #' Computes the exponential of the given value. #' +#' @param x Column to compute on. +#' #' @rdname exp #' @name exp #' @family math_funcs +#' @aliases exp,Column-method #' @export #' @examples \dontrun{exp(df$c)} #' @note exp since 1.5.0 @@ -534,8 +620,11 @@ setMethod("exp", #' #' Computes the exponential of the given value minus one. #' +#' @param x Column to compute on. +#' #' @rdname expm1 #' @name expm1 +#' @aliases expm1,Column-method #' @family math_funcs #' @export #' @examples \dontrun{expm1(df$c)} @@ -551,8 +640,11 @@ setMethod("expm1", #' #' Computes the factorial of the given value. #' +#' @param x Column to compute on. +#' #' @rdname factorial #' @name factorial +#' @aliases factorial,Column-method #' @family math_funcs #' @export #' @examples \dontrun{factorial(df$c)} @@ -571,8 +663,11 @@ setMethod("factorial", #' The function by default returns the first values it sees. It will return the first non-missing #' value it sees when na.rm is set to true. If all values are missing, then NA is returned. #' +#' @param x Column to compute on. +#' #' @rdname first #' @name first +#' @aliases first,characterOrColumn-method #' @family agg_funcs #' @export #' @examples @@ -597,8 +692,11 @@ setMethod("first", #' #' Computes the floor of the given value. #' +#' @param x Column to compute on. +#' #' @rdname floor #' @name floor +#' @aliases floor,Column-method #' @family math_funcs #' @export #' @examples \dontrun{floor(df$c)} @@ -614,9 +712,12 @@ setMethod("floor", #' #' Computes hex value of the given column. #' +#' @param x Column to compute on. +#' #' @rdname hex #' @name hex #' @family math_funcs +#' @aliases hex,Column-method #' @export #' @examples \dontrun{hex(df$c)} #' @note hex since 1.5.0 @@ -631,8 +732,11 @@ setMethod("hex", #' #' Extracts the hours as an integer from a given date/timestamp/string. #' +#' @param x Column to compute on. +#' #' @rdname hour #' @name hour +#' @aliases hour,Column-method #' @family datetime_funcs #' @export #' @examples \dontrun{hour(df$c)} @@ -651,9 +755,12 @@ setMethod("hour", #' #' For example, "hello world" will become "Hello World". #' +#' @param x Column to compute on. +#' #' @rdname initcap #' @name initcap #' @family string_funcs +#' @aliases initcap,Column-method #' @export #' @examples \dontrun{initcap(df$c)} #' @note initcap since 1.5.0 @@ -668,9 +775,12 @@ setMethod("initcap", #' #' Return true if the column is NaN, alias for \link{isnan} #' +#' @param x Column to compute on. +#' #' @rdname is.nan #' @name is.nan #' @family normal_funcs +#' @aliases is.nan,Column-method #' @export #' @examples #' \dontrun{ @@ -686,6 +796,7 @@ setMethod("is.nan", #' @rdname is.nan #' @name isnan +#' @aliases isnan,Column-method #' @note isnan since 2.0.0 setMethod("isnan", signature(x = "Column"), @@ -698,8 +809,11 @@ setMethod("isnan", #' #' Aggregate function: returns the kurtosis of the values in a group. #' +#' @param x Column to compute on. +#' #' @rdname kurtosis #' @name kurtosis +#' @aliases kurtosis,Column-method #' @family agg_funcs #' @export #' @examples \dontrun{kurtosis(df$c)} @@ -718,8 +832,11 @@ setMethod("kurtosis", #' The function by default returns the last values it sees. It will return the last non-missing #' value it sees when na.rm is set to true. If all values are missing, then NA is returned. #' +#' @param x Column to compute on. +#' #' @rdname last #' @name last +#' @aliases last,characterOrColumn-method #' @family agg_funcs #' @export #' @examples @@ -746,8 +863,11 @@ setMethod("last", #' For example, input "2015-07-27" returns "2015-07-31" since July 31 is the last day of the #' month in July 2015. #' +#' @param x Column to compute on. +#' #' @rdname last_day #' @name last_day +#' @aliases last_day,Column-method #' @family datetime_funcs #' @export #' @examples \dontrun{last_day(df$c)} @@ -763,8 +883,11 @@ setMethod("last_day", #' #' Computes the length of a given string or binary column. #' +#' @param x Column to compute on. +#' #' @rdname length #' @name length +#' @aliases length,Column-method #' @family string_funcs #' @export #' @examples \dontrun{length(df$c)} @@ -780,8 +903,11 @@ setMethod("length", #' #' Computes the natural logarithm of the given value. #' +#' @param x Column to compute on. +#' #' @rdname log #' @name log +#' @aliases log,Column-method #' @family math_funcs #' @export #' @examples \dontrun{log(df$c)} @@ -797,9 +923,12 @@ setMethod("log", #' #' Computes the logarithm of the given value in base 10. #' +#' @param x Column to compute on. +#' #' @rdname log10 #' @name log10 #' @family math_funcs +#' @aliases log10,Column-method #' @export #' @examples \dontrun{log10(df$c)} #' @note log10 since 1.5.0 @@ -814,9 +943,12 @@ setMethod("log10", #' #' Computes the natural logarithm of the given value plus one. #' +#' @param x Column to compute on. +#' #' @rdname log1p #' @name log1p #' @family math_funcs +#' @aliases log1p,Column-method #' @export #' @examples \dontrun{log1p(df$c)} #' @note log1p since 1.5.0 @@ -831,9 +963,12 @@ setMethod("log1p", #' #' Computes the logarithm of the given column in base 2. #' +#' @param x Column to compute on. +#' #' @rdname log2 #' @name log2 #' @family math_funcs +#' @aliases log2,Column-method #' @export #' @examples \dontrun{log2(df$c)} #' @note log2 since 1.5.0 @@ -848,9 +983,12 @@ setMethod("log2", #' #' Converts a string column to lower case. #' +#' @param x Column to compute on. +#' #' @rdname lower #' @name lower #' @family string_funcs +#' @aliases lower,Column-method #' @export #' @examples \dontrun{lower(df$c)} #' @note lower since 1.4.0 @@ -865,9 +1003,12 @@ setMethod("lower", #' #' Trim the spaces from left end for the specified string value. #' +#' @param x Column to compute on. +#' #' @rdname ltrim #' @name ltrim #' @family string_funcs +#' @aliases ltrim,Column-method #' @export #' @examples \dontrun{ltrim(df$c)} #' @note ltrim since 1.5.0 @@ -882,9 +1023,12 @@ setMethod("ltrim", #' #' Aggregate function: returns the maximum value of the expression in a group. #' +#' @param x Column to compute on. +#' #' @rdname max #' @name max #' @family agg_funcs +#' @aliases max,Column-method #' @export #' @examples \dontrun{max(df$c)} #' @note max since 1.5.0 @@ -900,9 +1044,12 @@ setMethod("max", #' Calculates the MD5 digest of a binary column and returns the value #' as a 32 character hex string. #' +#' @param x Column to compute on. +#' #' @rdname md5 #' @name md5 #' @family misc_funcs +#' @aliases md5,Column-method #' @export #' @examples \dontrun{md5(df$c)} #' @note md5 since 1.5.0 @@ -918,9 +1065,12 @@ setMethod("md5", #' Aggregate function: returns the average of the values in a group. #' Alias for avg. #' +#' @param x Column to compute on. +#' #' @rdname mean #' @name mean #' @family agg_funcs +#' @aliases mean,Column-method #' @export #' @examples \dontrun{mean(df$c)} #' @note mean since 1.5.0 @@ -935,8 +1085,11 @@ setMethod("mean", #' #' Aggregate function: returns the minimum value of the expression in a group. #' +#' @param x Column to compute on. +#' #' @rdname min #' @name min +#' @aliases min,Column-method #' @family agg_funcs #' @export #' @examples \dontrun{min(df$c)} @@ -952,8 +1105,11 @@ setMethod("min", #' #' Extracts the minutes as an integer from a given date/timestamp/string. #' +#' @param x Column to compute on. +#' #' @rdname minute #' @name minute +#' @aliases minute,Column-method #' @family datetime_funcs #' @export #' @examples \dontrun{minute(df$c)} @@ -981,6 +1137,7 @@ setMethod("minute", #' This is equivalent to the MONOTONICALLY_INCREASING_ID function in SQL. #' #' @rdname monotonically_increasing_id +#' @aliases monotonically_increasing_id,missing-method #' @name monotonically_increasing_id #' @family misc_funcs #' @export @@ -996,8 +1153,11 @@ setMethod("monotonically_increasing_id", #' #' Extracts the month as an integer from a given date/timestamp/string. #' +#' @param x Column to compute on. +#' #' @rdname month #' @name month +#' @aliases month,Column-method #' @family datetime_funcs #' @export #' @examples \dontrun{month(df$c)} @@ -1013,9 +1173,12 @@ setMethod("month", #' #' Unary minus, i.e. negate the expression. #' +#' @param x Column to compute on. +#' #' @rdname negate #' @name negate #' @family normal_funcs +#' @aliases negate,Column-method #' @export #' @examples \dontrun{negate(df$c)} #' @note negate since 1.5.0 @@ -1030,9 +1193,12 @@ setMethod("negate", #' #' Extracts the quarter as an integer from a given date/timestamp/string. #' +#' @param x Column to compute on. +#' #' @rdname quarter #' @name quarter #' @family datetime_funcs +#' @aliases quarter,Column-method #' @export #' @examples \dontrun{quarter(df$c)} #' @note quarter since 1.5.0 @@ -1047,9 +1213,12 @@ setMethod("quarter", #' #' Reverses the string column and returns it as a new string column. #' +#' @param x Column to compute on. +#' #' @rdname reverse #' @name reverse #' @family string_funcs +#' @aliases reverse,Column-method #' @export #' @examples \dontrun{reverse(df$c)} #' @note reverse since 1.5.0 @@ -1065,9 +1234,12 @@ setMethod("reverse", #' Returns the double value that is closest in value to the argument and #' is equal to a mathematical integer. #' +#' @param x Column to compute on. +#' #' @rdname rint #' @name rint #' @family math_funcs +#' @aliases rint,Column-method #' @export #' @examples \dontrun{rint(df$c)} #' @note rint since 1.5.0 @@ -1082,9 +1254,12 @@ setMethod("rint", #' #' Returns the value of the column `e` rounded to 0 decimal places using HALF_UP rounding mode. #' +#' @param x Column to compute on. +#' #' @rdname round #' @name round #' @family math_funcs +#' @aliases round,Column-method #' @export #' @examples \dontrun{round(df$c)} #' @note round since 1.5.0 @@ -1102,9 +1277,12 @@ setMethod("round", #' Also known as Gaussian rounding or bankers' rounding that rounds to the nearest even number. #' bround(2.5, 0) = 2, bround(3.5, 0) = 4. #' +#' @param x Column to compute on. +#' #' @rdname bround #' @name bround #' @family math_funcs +#' @aliases bround,Column-method #' @export #' @examples \dontrun{bround(df$c, 0)} #' @note bround since 2.0.0 @@ -1120,9 +1298,12 @@ setMethod("bround", #' #' Trim the spaces from right end for the specified string value. #' +#' @param x Column to compute on. +#' #' @rdname rtrim #' @name rtrim #' @family string_funcs +#' @aliases rtrim,Column-method #' @export #' @examples \dontrun{rtrim(df$c)} #' @note rtrim since 1.5.0 @@ -1137,9 +1318,12 @@ setMethod("rtrim", #' #' Aggregate function: alias for \link{stddev_samp} #' +#' @param x Column to compute on. +#' #' @rdname sd #' @name sd #' @family agg_funcs +#' @aliases sd,Column-method #' @seealso \link{stddev_pop}, \link{stddev_samp} #' @export #' @examples @@ -1160,9 +1344,12 @@ setMethod("sd", #' #' Extracts the seconds as an integer from a given date/timestamp/string. #' +#' @param x Column to compute on. +#' #' @rdname second #' @name second #' @family datetime_funcs +#' @aliases second,Column-method #' @export #' @examples \dontrun{second(df$c)} #' @note second since 1.5.0 @@ -1178,9 +1365,12 @@ setMethod("second", #' Calculates the SHA-1 digest of a binary column and returns the value #' as a 40 character hex string. #' +#' @param x Column to compute on. +#' #' @rdname sha1 #' @name sha1 #' @family misc_funcs +#' @aliases sha1,Column-method #' @export #' @examples \dontrun{sha1(df$c)} #' @note sha1 since 1.5.0 @@ -1195,8 +1385,11 @@ setMethod("sha1", #' #' Computes the signum of the given value. #' +#' @param x Column to compute on. +#' #' @rdname sign #' @name signum +#' @aliases signum,Column-method #' @family math_funcs #' @export #' @examples \dontrun{signum(df$c)} @@ -1212,9 +1405,12 @@ setMethod("signum", #' #' Computes the sine of the given value. #' +#' @param x Column to compute on. +#' #' @rdname sin #' @name sin #' @family math_funcs +#' @aliases sin,Column-method #' @export #' @examples \dontrun{sin(df$c)} #' @note sin since 1.5.0 @@ -1229,9 +1425,12 @@ setMethod("sin", #' #' Computes the hyperbolic sine of the given value. #' +#' @param x Column to compute on. +#' #' @rdname sinh #' @name sinh #' @family math_funcs +#' @aliases sinh,Column-method #' @export #' @examples \dontrun{sinh(df$c)} #' @note sinh since 1.5.0 @@ -1246,9 +1445,12 @@ setMethod("sinh", #' #' Aggregate function: returns the skewness of the values in a group. #' +#' @param x Column to compute on. +#' #' @rdname skewness #' @name skewness #' @family agg_funcs +#' @aliases skewness,Column-method #' @export #' @examples \dontrun{skewness(df$c)} #' @note skewness since 1.6.0 @@ -1263,9 +1465,12 @@ setMethod("skewness", #' #' Return the soundex code for the specified expression. #' +#' @param x Column to compute on. +#' #' @rdname soundex #' @name soundex #' @family string_funcs +#' @aliases soundex,Column-method #' @export #' @examples \dontrun{soundex(df$c)} #' @note soundex since 1.5.0 @@ -1286,6 +1491,7 @@ setMethod("soundex", #' #' @rdname spark_partition_id #' @name spark_partition_id +#' @aliases spark_partition_id,missing-method #' @export #' @examples #' \dontrun{select(df, spark_partition_id())} @@ -1298,6 +1504,7 @@ setMethod("spark_partition_id", }) #' @rdname sd +#' @aliases stddev,Column-method #' @name stddev #' @note stddev since 1.6.0 setMethod("stddev", @@ -1311,9 +1518,12 @@ setMethod("stddev", #' #' Aggregate function: returns the population standard deviation of the expression in a group. #' +#' @param x Column to compute on. +#' #' @rdname stddev_pop #' @name stddev_pop #' @family agg_funcs +#' @aliases stddev_pop,Column-method #' @seealso \link{sd}, \link{stddev_samp} #' @export #' @examples \dontrun{stddev_pop(df$c)} @@ -1329,9 +1539,12 @@ setMethod("stddev_pop", #' #' Aggregate function: returns the unbiased sample standard deviation of the expression in a group. #' +#' @param x Column to compute on. +#' #' @rdname stddev_samp #' @name stddev_samp #' @family agg_funcs +#' @aliases stddev_samp,Column-method #' @seealso \link{stddev_pop}, \link{sd} #' @export #' @examples \dontrun{stddev_samp(df$c)} @@ -1347,9 +1560,12 @@ setMethod("stddev_samp", #' #' Creates a new struct column that composes multiple input columns. #' +#' @param x Column to compute on. +#' #' @rdname struct #' @name struct #' @family normal_funcs +#' @aliases struct,characterOrColumn-method #' @export #' @examples #' \dontrun{ @@ -1373,9 +1589,12 @@ setMethod("struct", #' #' Computes the square root of the specified float value. #' +#' @param x Column to compute on. +#' #' @rdname sqrt #' @name sqrt #' @family math_funcs +#' @aliases sqrt,Column-method #' @export #' @examples \dontrun{sqrt(df$c)} #' @note sqrt since 1.5.0 @@ -1390,9 +1609,12 @@ setMethod("sqrt", #' #' Aggregate function: returns the sum of all values in the expression. #' +#' @param x Column to compute on. +#' #' @rdname sum #' @name sum #' @family agg_funcs +#' @aliases sum,Column-method #' @export #' @examples \dontrun{sum(df$c)} #' @note sum since 1.5.0 @@ -1407,9 +1629,12 @@ setMethod("sum", #' #' Aggregate function: returns the sum of distinct values in the expression. #' +#' @param x Column to compute on. +#' #' @rdname sumDistinct #' @name sumDistinct #' @family agg_funcs +#' @aliases sumDistinct,Column-method #' @export #' @examples \dontrun{sumDistinct(df$c)} #' @note sumDistinct since 1.4.0 @@ -1424,9 +1649,12 @@ setMethod("sumDistinct", #' #' Computes the tangent of the given value. #' +#' @param x Column to compute on. +#' #' @rdname tan #' @name tan #' @family math_funcs +#' @aliases tan,Column-method #' @export #' @examples \dontrun{tan(df$c)} #' @note tan since 1.5.0 @@ -1441,9 +1669,12 @@ setMethod("tan", #' #' Computes the hyperbolic tangent of the given value. #' +#' @param x Column to compute on. +#' #' @rdname tanh #' @name tanh #' @family math_funcs +#' @aliases tanh,Column-method #' @export #' @examples \dontrun{tanh(df$c)} #' @note tanh since 1.5.0 @@ -1458,9 +1689,12 @@ setMethod("tanh", #' #' Converts an angle measured in radians to an approximately equivalent angle measured in degrees. #' +#' @param x Column to compute on. +#' #' @rdname toDegrees #' @name toDegrees #' @family math_funcs +#' @aliases toDegrees,Column-method #' @export #' @examples \dontrun{toDegrees(df$c)} #' @note toDegrees since 1.4.0 @@ -1475,9 +1709,12 @@ setMethod("toDegrees", #' #' Converts an angle measured in degrees to an approximately equivalent angle measured in radians. #' +#' @param x Column to compute on. +#' #' @rdname toRadians #' @name toRadians #' @family math_funcs +#' @aliases toRadians,Column-method #' @export #' @examples \dontrun{toRadians(df$c)} #' @note toRadians since 1.4.0 @@ -1492,9 +1729,12 @@ setMethod("toRadians", #' #' Converts the column into DateType. #' +#' @param x Column to compute on. +#' #' @rdname to_date #' @name to_date #' @family datetime_funcs +#' @aliases to_date,Column-method #' @export #' @examples \dontrun{to_date(df$c)} #' @note to_date since 1.5.0 @@ -1509,9 +1749,12 @@ setMethod("to_date", #' #' Trim the spaces from both ends for the specified string column. #' +#' @param x Column to compute on. +#' #' @rdname trim #' @name trim #' @family string_funcs +#' @aliases trim,Column-method #' @export #' @examples \dontrun{trim(df$c)} #' @note trim since 1.5.0 @@ -1527,9 +1770,12 @@ setMethod("trim", #' Decodes a BASE64 encoded string column and returns it as a binary column. #' This is the reverse of base64. #' +#' @param x Column to compute on. +#' #' @rdname unbase64 #' @name unbase64 #' @family string_funcs +#' @aliases unbase64,Column-method #' @export #' @examples \dontrun{unbase64(df$c)} #' @note unbase64 since 1.5.0 @@ -1545,9 +1791,12 @@ setMethod("unbase64", #' Inverse of hex. Interprets each pair of characters as a hexadecimal number #' and converts to the byte representation of number. #' +#' @param x Column to compute on. +#' #' @rdname unhex #' @name unhex #' @family math_funcs +#' @aliases unhex,Column-method #' @export #' @examples \dontrun{unhex(df$c)} #' @note unhex since 1.5.0 @@ -1562,9 +1811,12 @@ setMethod("unhex", #' #' Converts a string column to upper case. #' +#' @param x Column to compute on. +#' #' @rdname upper #' @name upper #' @family string_funcs +#' @aliases upper,Column-method #' @export #' @examples \dontrun{upper(df$c)} #' @note upper since 1.4.0 @@ -1579,9 +1831,12 @@ setMethod("upper", #' #' Aggregate function: alias for \link{var_samp}. #' +#' @param x Column to compute on. +#' #' @rdname var #' @name var #' @family agg_funcs +#' @aliases var,Column-method #' @seealso \link{var_pop}, \link{var_samp} #' @export #' @examples @@ -1599,6 +1854,7 @@ setMethod("var", }) #' @rdname var +#' @aliases variance,Column-method #' @name variance #' @note variance since 1.6.0 setMethod("variance", @@ -1612,9 +1868,12 @@ setMethod("variance", #' #' Aggregate function: returns the population variance of the values in a group. #' +#' @param x Column to compute on. +#' #' @rdname var_pop #' @name var_pop #' @family agg_funcs +#' @aliases var_pop,Column-method #' @seealso \link{var}, \link{var_samp} #' @export #' @examples \dontrun{var_pop(df$c)} @@ -1630,8 +1889,11 @@ setMethod("var_pop", #' #' Aggregate function: returns the unbiased variance of the values in a group. #' +#' @param x Column to compute on. +#' #' @rdname var_samp #' @name var_samp +#' @aliases var_samp,Column-method #' @family agg_funcs #' @seealso \link{var_pop}, \link{var} #' @export @@ -1648,8 +1910,11 @@ setMethod("var_samp", #' #' Extracts the week number as an integer from a given date/timestamp/string. #' +#' @param x Column to compute on. +#' #' @rdname weekofyear #' @name weekofyear +#' @aliases weekofyear,Column-method #' @family datetime_funcs #' @export #' @examples \dontrun{weekofyear(df$c)} @@ -1665,9 +1930,12 @@ setMethod("weekofyear", #' #' Extracts the year as an integer from a given date/timestamp/string. #' +#' @param x Column to compute on. +#' #' @rdname year #' @name year #' @family datetime_funcs +#' @aliases year,Column-method #' @export #' @examples \dontrun{year(df$c)} #' @note year since 1.5.0 @@ -1682,10 +1950,14 @@ setMethod("year", #' #' Returns the angle theta from the conversion of rectangular coordinates (x, y) to #' polar coordinates (r, theta). +# +#' @param x Column to compute on. +#' @param y Column to compute on. #' #' @rdname atan2 #' @name atan2 #' @family math_funcs +#' @aliases atan2,Column-method #' @export #' @examples \dontrun{atan2(df$c, x)} #' @note atan2 since 1.5.0 @@ -1702,8 +1974,12 @@ setMethod("atan2", signature(y = "Column"), #' #' Returns the number of days from `start` to `end`. #' +#' @param x start Column to use. +#' @param y end Column to use. +#' #' @rdname datediff #' @name datediff +#' @aliases datediff,Column-method #' @family datetime_funcs #' @export #' @examples \dontrun{datediff(df$c, x)} @@ -1720,10 +1996,14 @@ setMethod("datediff", signature(y = "Column"), #' hypot #' #' Computes "sqrt(a^2 + b^2)" without intermediate overflow or underflow. +# +#' @param x Column to compute on. +#' @param y Column to compute on. #' #' @rdname hypot #' @name hypot #' @family math_funcs +#' @aliases hypot,Column-method #' @export #' @examples \dontrun{hypot(df$c, x)} #' @note hypot since 1.4.0 @@ -1740,9 +2020,13 @@ setMethod("hypot", signature(y = "Column"), #' #' Computes the Levenshtein distance of the two given string columns. #' +#' @param x Column to compute on. +#' @param y Column to compute on. +#' #' @rdname levenshtein #' @name levenshtein #' @family string_funcs +#' @aliases levenshtein,Column-method #' @export #' @examples \dontrun{levenshtein(df$c, x)} #' @note levenshtein since 1.5.0 @@ -1759,9 +2043,13 @@ setMethod("levenshtein", signature(y = "Column"), #' #' Returns number of months between dates `date1` and `date2`. #' +#' @param x start Column to use. +#' @param y end Column to use. +#' #' @rdname months_between #' @name months_between #' @family datetime_funcs +#' @aliases months_between,Column-method #' @export #' @examples \dontrun{months_between(df$c, x)} #' @note months_between since 1.5.0 @@ -1779,9 +2067,13 @@ setMethod("months_between", signature(y = "Column"), #' Returns col1 if it is not NaN, or col2 if col1 is NaN. #' Both inputs should be floating point columns (DoubleType or FloatType). #' +#' @param x first Column. +#' @param y second Column. +#' #' @rdname nanvl #' @name nanvl #' @family normal_funcs +#' @aliases nanvl,Column-method #' @export #' @examples \dontrun{nanvl(df$c, x)} #' @note nanvl since 1.5.0 @@ -1798,10 +2090,14 @@ setMethod("nanvl", signature(y = "Column"), #' #' Returns the positive value of dividend mod divisor. #' +#' @param x divisor Column. +#' @param y dividend Column. +#' #' @rdname pmod #' @name pmod #' @docType methods #' @family math_funcs +#' @aliases pmod,Column-method #' @export #' @examples \dontrun{pmod(df$c, x)} #' @note pmod since 1.5.0 @@ -1817,6 +2113,10 @@ setMethod("pmod", signature(y = "Column"), #' @rdname approxCountDistinct #' @name approxCountDistinct +#' +#' @param rsd maximum estimation error allowed (default = 0.05) +#' +#' @aliases approxCountDistinct,Column-method #' @export #' @examples \dontrun{approxCountDistinct(df$c, 0.02)} #' @note approxCountDistinct(Column, numeric) since 1.4.0 @@ -1829,9 +2129,13 @@ setMethod("approxCountDistinct", #' Count Distinct #' +#' @param x Column to compute on +#' @param ... other columns +#' #' @family agg_funcs #' @rdname countDistinct #' @name countDistinct +#' @aliases countDistinct,Column-method #' @return the number of distinct items in a group. #' @export #' @examples \dontrun{countDistinct(df$c)} @@ -1852,10 +2156,14 @@ setMethod("countDistinct", #' concat #' #' Concatenates multiple input string columns together into a single string column. +#' +#' @param x Column to compute on +#' @param ... other columns #' #' @family string_funcs #' @rdname concat #' @name concat +#' @aliases concat,Column-method #' @export #' @examples \dontrun{concat(df$strings, df$strings2)} #' @note concat since 1.5.0 @@ -1875,9 +2183,13 @@ setMethod("concat", #' Returns the greatest value of the list of column names, skipping null values. #' This function takes at least 2 parameters. It will return null if all parameters are null. #' +#' @param x Column to compute on +#' @param ... other columns +#' #' @family normal_funcs #' @rdname greatest #' @name greatest +#' @aliases greatest,Column-method #' @export #' @examples \dontrun{greatest(df$c, df$d)} #' @note greatest since 1.5.0 @@ -1898,8 +2210,12 @@ setMethod("greatest", #' Returns the least value of the list of column names, skipping null values. #' This function takes at least 2 parameters. It will return null if all parameters are null. #' +#' @param x Column to compute on +#' @param ... other columns +#' #' @family normal_funcs #' @rdname least +#' @aliases least,Column-method #' @name least #' @export #' @examples \dontrun{least(df$c, df$d)} @@ -1917,7 +2233,9 @@ setMethod("least", }) #' @rdname ceil +#' #' @name ceiling +#' @aliases ceiling,Column-method #' @export #' @examples \dontrun{ceiling(df$c)} #' @note ceiling since 1.5.0 @@ -1928,7 +2246,10 @@ setMethod("ceiling", }) #' @rdname sign +#' @param x Column to compute on +#' #' @name sign +#' @aliases sign,Column-method #' @export #' @examples \dontrun{sign(df$c)} #' @note sign since 1.5.0 @@ -1941,8 +2262,12 @@ setMethod("sign", signature(x = "Column"), #' #' Aggregate function: returns the number of distinct items in a group. #' +#' @param x Column to compute on +#' @param ... other columns +#' #' @rdname countDistinct #' @name n_distinct +#' @aliases n_distinct,Column-method #' @export #' @examples \dontrun{n_distinct(df$c)} #' @note n_distinct since 1.4.0 @@ -1951,8 +2276,11 @@ setMethod("n_distinct", signature(x = "Column"), countDistinct(x, ...) }) -#' @rdname count +#' @rdname nrow +#' @param x Column to compute on +#' #' @name n +#' @aliases n,Column-method #' @export #' @examples \dontrun{n(df$c)} #' @note n since 1.4.0 @@ -1972,9 +2300,13 @@ setMethod("n", signature(x = "Column"), #' NOTE: Use when ever possible specialized functions like \code{year}. These benefit from a #' specialized implementation. #' +#' @param y Column to compute on +#' @param x date format specification +#' #' @family datetime_funcs #' @rdname date_format #' @name date_format +#' @aliases date_format,Column,character-method #' @export #' @examples \dontrun{date_format(df$t, 'MM/dd/yyy')} #' @note date_format since 1.5.0 @@ -1988,9 +2320,13 @@ setMethod("date_format", signature(y = "Column", x = "character"), #' #' Assumes given timestamp is UTC and converts to given timezone. #' +#' @param y Column to compute on +#' @param x time zone to use +#' #' @family datetime_funcs #' @rdname from_utc_timestamp #' @name from_utc_timestamp +#' @aliases from_utc_timestamp,Column,character-method #' @export #' @examples \dontrun{from_utc_timestamp(df$t, 'PST')} #' @note from_utc_timestamp since 1.5.0 @@ -2011,6 +2347,7 @@ setMethod("from_utc_timestamp", signature(y = "Column", x = "character"), #' @param y column to check #' @param x substring to check #' @family string_funcs +#' @aliases instr,Column,character-method #' @rdname instr #' @name instr #' @export @@ -2033,9 +2370,13 @@ setMethod("instr", signature(y = "Column", x = "character"), #' Day of the week parameter is case insensitive, and accepts first three or two characters: #' "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun". #' +#' @param y Column to compute on +#' @param x Day of the week string +#' #' @family datetime_funcs #' @rdname next_day #' @name next_day +#' @aliases next_day,Column,character-method #' @export #' @examples #'\dontrun{ @@ -2053,9 +2394,13 @@ setMethod("next_day", signature(y = "Column", x = "character"), #' #' Assumes given timestamp is in given timezone and converts to UTC. #' +#' @param y Column to compute on +#' @param x timezone to use +#' #' @family datetime_funcs #' @rdname to_utc_timestamp #' @name to_utc_timestamp +#' @aliases to_utc_timestamp,Column,character-method #' @export #' @examples \dontrun{to_utc_timestamp(df$t, 'PST')} #' @note to_utc_timestamp since 1.5.0 @@ -2069,9 +2414,13 @@ setMethod("to_utc_timestamp", signature(y = "Column", x = "character"), #' #' Returns the date that is numMonths after startDate. #' +#' @param y Column to compute on +#' @param x Number of months to add +#' #' @name add_months #' @family datetime_funcs #' @rdname add_months +#' @aliases add_months,Column,numeric-method #' @export #' @examples \dontrun{add_months(df$d, 1)} #' @note add_months since 1.5.0 @@ -2085,9 +2434,13 @@ setMethod("add_months", signature(y = "Column", x = "numeric"), #' #' Returns the date that is `days` days after `start` #' +#' @param y Column to compute on +#' @param x Number of days to add +#' #' @family datetime_funcs #' @rdname date_add #' @name date_add +#' @aliases date_add,Column,numeric-method #' @export #' @examples \dontrun{date_add(df$d, 1)} #' @note date_add since 1.5.0 @@ -2101,9 +2454,13 @@ setMethod("date_add", signature(y = "Column", x = "numeric"), #' #' Returns the date that is `days` days before `start` #' +#' @param y Column to compute on +#' @param x Number of days to substract +#' #' @family datetime_funcs #' @rdname date_sub #' @name date_sub +#' @aliases date_sub,Column,numeric-method #' @export #' @examples \dontrun{date_sub(df$d, 1)} #' @note date_sub since 1.5.0 @@ -2126,6 +2483,7 @@ setMethod("date_sub", signature(y = "Column", x = "numeric"), #' @family string_funcs #' @rdname format_number #' @name format_number +#' @aliases format_number,Column,numeric-method #' @export #' @examples \dontrun{format_number(df$n, 4)} #' @note format_number since 1.5.0 @@ -2147,6 +2505,7 @@ setMethod("format_number", signature(y = "Column", x = "numeric"), #' @family misc_funcs #' @rdname sha2 #' @name sha2 +#' @aliases sha2,Column,numeric-method #' @export #' @examples \dontrun{sha2(df$c, 256)} #' @note sha2 since 1.5.0 @@ -2161,9 +2520,13 @@ setMethod("sha2", signature(y = "Column", x = "numeric"), #' Shift the given value numBits left. If the given value is a long value, this function #' will return a long value else it will return an integer value. #' +#' @param y column to compute on. +#' @param x number of bits to shift. +#' #' @family math_funcs #' @rdname shiftLeft #' @name shiftLeft +#' @aliases shiftLeft,Column,numeric-method #' @export #' @examples \dontrun{shiftLeft(df$c, 1)} #' @note shiftLeft since 1.5.0 @@ -2180,9 +2543,13 @@ setMethod("shiftLeft", signature(y = "Column", x = "numeric"), #' Shift the given value numBits right. If the given value is a long value, it will return #' a long value else it will return an integer value. #' +#' @param y column to compute on. +#' @param x number of bits to shift. +#' #' @family math_funcs #' @rdname shiftRight #' @name shiftRight +#' @aliases shiftRight,Column,numeric-method #' @export #' @examples \dontrun{shiftRight(df$c, 1)} #' @note shiftRight since 1.5.0 @@ -2199,9 +2566,13 @@ setMethod("shiftRight", signature(y = "Column", x = "numeric"), #' Unsigned shift the given value numBits right. If the given value is a long value, #' it will return a long value else it will return an integer value. #' +#' @param y column to compute on. +#' @param x number of bits to shift. +#' #' @family math_funcs #' @rdname shiftRightUnsigned #' @name shiftRightUnsigned +#' @aliases shiftRightUnsigned,Column,numeric-method #' @export #' @examples \dontrun{shiftRightUnsigned(df$c, 1)} #' @note shiftRightUnsigned since 1.5.0 @@ -2218,9 +2589,14 @@ setMethod("shiftRightUnsigned", signature(y = "Column", x = "numeric"), #' Concatenates multiple input string columns together into a single string column, #' using the given separator. #' +#' @param x column to concatenate. +#' @param sep separator to use. +#' @param ... other columns to concatenate. +#' #' @family string_funcs #' @rdname concat_ws #' @name concat_ws +#' @aliases concat_ws,character,Column-method #' @export #' @examples \dontrun{concat_ws('-', df$s, df$d)} #' @note concat_ws since 1.5.0 @@ -2235,8 +2611,13 @@ setMethod("concat_ws", signature(sep = "character", x = "Column"), #' #' Convert a number in a string column from one base to another. #' +#' @param x column to convert. +#' @param fromBase base to convert from. +#' @param toBase base to convert to. +#' #' @family math_funcs #' @rdname conv +#' @aliases conv,Column,numeric,numeric-method #' @name conv #' @export #' @examples \dontrun{conv(df$n, 2, 16)} @@ -2258,6 +2639,7 @@ setMethod("conv", signature(x = "Column", fromBase = "numeric", toBase = "numeri #' #' @family normal_funcs #' @rdname expr +#' @aliases expr,character-method #' @name expr #' @export #' @examples \dontrun{expr('length(name)')} @@ -2275,6 +2657,7 @@ setMethod("expr", signature(x = "character"), #' @family string_funcs #' @rdname format_string #' @name format_string +#' @aliases format_string,character,Column-method #' @export #' @examples \dontrun{format_string('%d %s', df$a, df$b)} #' @note format_string since 1.5.0 @@ -2296,6 +2679,7 @@ setMethod("format_string", signature(format = "character", x = "Column"), #' @family datetime_funcs #' @rdname from_unixtime #' @name from_unixtime +#' @aliases from_unixtime,Column-method #' @export #' @examples #'\dontrun{ @@ -2334,6 +2718,7 @@ setMethod("from_unixtime", signature(x = "Column"), #' @family datetime_funcs #' @rdname window #' @name window +#' @aliases window,Column-method #' @export #' @examples #'\dontrun{ @@ -2383,6 +2768,7 @@ setMethod("window", signature(x = "Column"), #' #' @family string_funcs #' @rdname locate +#' @aliases locate,character,Column-method #' @name locate #' @export #' @examples \dontrun{locate('b', df$c, 1)} @@ -2401,6 +2787,7 @@ setMethod("locate", signature(substr = "character", str = "Column"), #' #' @family string_funcs #' @rdname lpad +#' @aliases lpad,Column,numeric,character-method #' @name lpad #' @export #' @examples \dontrun{lpad(df$c, 6, '#')} @@ -2420,6 +2807,7 @@ setMethod("lpad", signature(x = "Column", len = "numeric", pad = "character"), #' @family normal_funcs #' @rdname rand #' @name rand +#' @aliases rand,missing-method #' @export #' @examples \dontrun{rand()} #' @note rand since 1.5.0 @@ -2431,6 +2819,7 @@ setMethod("rand", signature(seed = "missing"), #' @rdname rand #' @name rand +#' @aliases rand,numeric-method #' @export #' @note rand(numeric) since 1.5.0 setMethod("rand", signature(seed = "numeric"), @@ -2446,6 +2835,7 @@ setMethod("rand", signature(seed = "numeric"), #' @family normal_funcs #' @rdname randn #' @name randn +#' @aliases randn,missing-method #' @export #' @examples \dontrun{randn()} #' @note randn since 1.5.0 @@ -2457,6 +2847,7 @@ setMethod("randn", signature(seed = "missing"), #' @rdname randn #' @name randn +#' @aliases randn,numeric-method #' @export #' @note randn(numeric) since 1.5.0 setMethod("randn", signature(seed = "numeric"), @@ -2472,6 +2863,7 @@ setMethod("randn", signature(seed = "numeric"), #' @family string_funcs #' @rdname regexp_extract #' @name regexp_extract +#' @aliases regexp_extract,Column,character,numeric-method #' @export #' @examples \dontrun{regexp_extract(df$c, '(\d+)-(\d+)', 1)} #' @note regexp_extract since 1.5.0 @@ -2491,6 +2883,7 @@ setMethod("regexp_extract", #' @family string_funcs #' @rdname regexp_replace #' @name regexp_replace +#' @aliases regexp_replace,Column,character,character-method #' @export #' @examples \dontrun{regexp_replace(df$c, '(\\d+)', '--')} #' @note regexp_replace since 1.5.0 @@ -2510,6 +2903,7 @@ setMethod("regexp_replace", #' @family string_funcs #' @rdname rpad #' @name rpad +#' @aliases rpad,Column,numeric,character-method #' @export #' @examples \dontrun{rpad(df$c, 6, '#')} #' @note rpad since 1.5.0 @@ -2530,6 +2924,7 @@ setMethod("rpad", signature(x = "Column", len = "numeric", pad = "character"), #' #' @family string_funcs #' @rdname substring_index +#' @aliases substring_index,Column,character,numeric-method #' @name substring_index #' @export #' @examples @@ -2557,6 +2952,7 @@ setMethod("substring_index", #' @family string_funcs #' @rdname translate #' @name translate +#' @aliases translate,Column,character,character-method #' @export #' @examples \dontrun{translate(df$c, 'rnlt', '123')} #' @note translate since 1.5.0 @@ -2575,6 +2971,7 @@ setMethod("translate", #' @family datetime_funcs #' @rdname unix_timestamp #' @name unix_timestamp +#' @aliases unix_timestamp,missing,missing-method #' @export #' @examples #'\dontrun{ @@ -2591,6 +2988,7 @@ setMethod("unix_timestamp", signature(x = "missing", format = "missing"), #' @rdname unix_timestamp #' @name unix_timestamp +#' @aliases unix_timestamp,Column,missing-method #' @export #' @note unix_timestamp(Column) since 1.5.0 setMethod("unix_timestamp", signature(x = "Column", format = "missing"), @@ -2601,6 +2999,7 @@ setMethod("unix_timestamp", signature(x = "Column", format = "missing"), #' @rdname unix_timestamp #' @name unix_timestamp +#' @aliases unix_timestamp,Column,character-method #' @export #' @note unix_timestamp(Column, character) since 1.5.0 setMethod("unix_timestamp", signature(x = "Column", format = "character"), @@ -2616,6 +3015,7 @@ setMethod("unix_timestamp", signature(x = "Column", format = "character"), #' @family normal_funcs #' @rdname when #' @name when +#' @aliases when,Column-method #' @seealso \link{ifelse} #' @export #' @examples \dontrun{when(df$age == 2, df$age + 1)} @@ -2636,6 +3036,7 @@ setMethod("when", signature(condition = "Column", value = "ANY"), #' @family normal_funcs #' @rdname ifelse #' @name ifelse +#' @aliases ifelse,Column-method #' @seealso \link{when} #' @export #' @examples \dontrun{ @@ -2671,6 +3072,7 @@ setMethod("ifelse", #' @rdname cume_dist #' @name cume_dist #' @family window_funcs +#' @aliases cume_dist,missing-method #' @export #' @examples \dontrun{cume_dist()} #' @note cume_dist since 1.6.0 @@ -2694,6 +3096,7 @@ setMethod("cume_dist", #' @rdname dense_rank #' @name dense_rank #' @family window_funcs +#' @aliases dense_rank,missing-method #' @export #' @examples \dontrun{dense_rank()} #' @note dense_rank since 1.6.0 @@ -2714,6 +3117,7 @@ setMethod("dense_rank", #' #' @rdname lag #' @name lag +#' @aliases lag,characterOrColumn-method #' @family window_funcs #' @export #' @examples \dontrun{lag(df$c)} @@ -2739,10 +3143,15 @@ setMethod("lag", #' an `offset` of one will return the next row at any given point in the window partition. #' #' This is equivalent to the LEAD function in SQL. +#' +#' @param x Column to compute on +#' @param offset Number of rows to offset +#' @param defaultValue (Optional) default value to use #' #' @rdname lead #' @name lead #' @family window_funcs +#' @aliases lead,characterOrColumn,numeric-method #' @export #' @examples \dontrun{lead(df$c)} #' @note lead since 1.6.0 @@ -2763,13 +3172,16 @@ setMethod("lead", #' ntile #' #' Window function: returns the ntile group id (from 1 to `n` inclusive) in an ordered window -#' partition. Fow example, if `n` is 4, the first quarter of the rows will get value 1, the second +#' partition. For example, if `n` is 4, the first quarter of the rows will get value 1, the second #' quarter will get 2, the third quarter will get 3, and the last quarter will get 4. #' #' This is equivalent to the NTILE function in SQL. #' +#' @param x Number of ntile groups +#' #' @rdname ntile #' @name ntile +#' @aliases ntile,numeric-method #' @family window_funcs #' @export #' @examples \dontrun{ntile(1)} @@ -2794,6 +3206,7 @@ setMethod("ntile", #' @rdname percent_rank #' @name percent_rank #' @family window_funcs +#' @aliases percent_rank,missing-method #' @export #' @examples \dontrun{percent_rank()} #' @note percent_rank since 1.6.0 @@ -2818,6 +3231,7 @@ setMethod("percent_rank", #' @rdname rank #' @name rank #' @family window_funcs +#' @aliases rank,missing-method #' @export #' @examples \dontrun{rank()} #' @note rank since 1.6.0 @@ -2829,6 +3243,10 @@ setMethod("rank", }) # Expose rank() in the R base package +#' @name rank +#' @rdname rank +#' @aliases rank,ANY-method +#' @export setMethod("rank", signature(x = "ANY"), function(x, ...) { @@ -2843,6 +3261,7 @@ setMethod("rank", #' #' @rdname row_number #' @name row_number +#' @aliases row_number,missing-method #' @family window_funcs #' @export #' @examples \dontrun{row_number()} @@ -2863,6 +3282,7 @@ setMethod("row_number", #' @param x A Column #' @param value A value to be checked if contained in the column #' @rdname array_contains +#' @aliases array_contains,Column-method #' @name array_contains #' @family collection_funcs #' @export @@ -2879,9 +3299,12 @@ setMethod("array_contains", #' #' Creates a new row for each element in the given array or map column. #' +#' @param x Column to compute on +#' #' @rdname explode #' @name explode #' @family collection_funcs +#' @aliases explode,Column-method #' @export #' @examples \dontrun{explode(df$c)} #' @note explode since 1.5.0 @@ -2895,9 +3318,12 @@ setMethod("explode", #' size #' #' Returns length of array or map. +#' +#' @param x Column to compute on #' #' @rdname size #' @name size +#' @aliases size,Column-method #' @family collection_funcs #' @export #' @examples \dontrun{size(df$c)} @@ -2920,6 +3346,7 @@ setMethod("size", #' FALSE, sorting is in descending order. #' @rdname sort_array #' @name sort_array +#' @aliases sort_array,Column-method #' @family collection_funcs #' @export #' @examples @@ -2939,9 +3366,12 @@ setMethod("sort_array", #' #' Creates a new row for each element with position in the given array or map column. #' +#' @param x Column to compute on +#' #' @rdname posexplode #' @name posexplode #' @family collection_funcs +#' @aliases posexplode,Column-method #' @export #' @examples \dontrun{posexplode(df$c)} #' @note posexplode since 2.1.0 diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 8416e5cdb8..e7444ac246 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -51,7 +51,7 @@ setGeneric("collectPartition", standardGeneric("collectPartition") }) -# @rdname count +# @rdname nrow # @export setGeneric("count", function(x) { standardGeneric("count") }) @@ -395,7 +395,7 @@ setGeneric("value", function(bcast) { standardGeneric("value") }) #################### SparkDataFrame Methods ######################## -#' @rdname agg +#' @rdname summarize #' @export setGeneric("agg", function (x, ...) { standardGeneric("agg") }) @@ -654,7 +654,7 @@ setGeneric("showDF", function(x, ...) { standardGeneric("showDF") }) # @export setGeneric("subset", function(x, ...) { standardGeneric("subset") }) -#' @rdname agg +#' @rdname summarize #' @export setGeneric("summarize", function(x, ...) { standardGeneric("summarize") }) @@ -1022,7 +1022,7 @@ setGeneric("month", function(x) { standardGeneric("month") }) #' @export setGeneric("months_between", function(y, x) { standardGeneric("months_between") }) -#' @rdname count +#' @rdname nrow #' @export setGeneric("n", function(x) { standardGeneric("n") }) diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R index 5ed7e8abb4..85348ae76b 100644 --- a/R/pkg/R/group.R +++ b/R/pkg/R/group.R @@ -47,6 +47,7 @@ groupedData <- function(sgd) { #' @rdname show +#' @aliases show,GroupedData-method #' @note show(GroupedData) since 1.4.0 setMethod("show", "GroupedData", function(object) { @@ -61,6 +62,7 @@ setMethod("show", "GroupedData", #' @param x a GroupedData #' @return a SparkDataFrame #' @rdname count +#' @aliases count,GroupedData-method #' @export #' @examples #' \dontrun{ @@ -84,6 +86,7 @@ setMethod("count", #' @param x a GroupedData #' @return a SparkDataFrame #' @rdname summarize +#' @aliases agg,GroupedData-method #' @name agg #' @family agg_funcs #' @export @@ -121,6 +124,7 @@ setMethod("agg", #' @rdname summarize #' @name summarize +#' @aliases summarize,GroupedData-method #' @note summarize since 1.4.0 setMethod("summarize", signature(x = "GroupedData"), @@ -146,6 +150,7 @@ methods <- c("avg", "max", "mean", "min", "sum") #' @param values A value or a list/vector of distinct values for the output columns. #' @return GroupedData object #' @rdname pivot +#' @aliases pivot,GroupedData,character-method #' @name pivot #' @export #' @examples @@ -198,6 +203,7 @@ createMethods() #' #' @param x A GroupedData #' @rdname gapply +#' @aliases gapply,GroupedData-method #' @name gapply #' @export #' @note gapply(GroupedData) since 2.0.0 @@ -212,6 +218,7 @@ setMethod("gapply", #' #' @param x A GroupedData #' @rdname gapplyCollect +#' @aliases gapplyCollect,GroupedData-method #' @name gapplyCollect #' @export #' @note gapplyCollect(GroupedData) since 2.0.0 @@ -243,4 +250,4 @@ gapplyInternal <- function(x, func, schema) { broadcastArr, if (class(schema) == "structType") { schema$jobj } else { NULL }) dataFrame(sdf) -} \ No newline at end of file +} diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R index 94e1f654f5..50c601fcd9 100644 --- a/R/pkg/R/mllib.R +++ b/R/pkg/R/mllib.R @@ -91,6 +91,7 @@ NULL #' \url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}. #' @param tol Positive convergence tolerance of iterations. #' @param maxIter Integer giving the maximal number of IRLS iterations. +#' @aliases spark.glm,SparkDataFrame,formula-method #' @return \code{spark.glm} returns a fitted generalized linear model #' @rdname spark.glm #' @name spark.glm @@ -306,6 +307,7 @@ setMethod("summary", signature(object = "NaiveBayesModel"), #' @param initMode The initialization algorithm choosen to fit the model #' @return \code{spark.kmeans} returns a fitted k-means model #' @rdname spark.kmeans +#' @aliases spark.kmeans,SparkDataFrame,formula-method #' @name spark.kmeans #' @export #' @examples @@ -418,6 +420,7 @@ setMethod("predict", signature(object = "KMeansModel"), #' @param smoothing Smoothing parameter #' @return \code{spark.naiveBayes} returns a fitted naive Bayes model #' @rdname spark.naiveBayes +#' @aliases spark.naiveBayes,SparkDataFrame,formula-method #' @name spark.naiveBayes #' @seealso e1071: \url{https://cran.r-project.org/web/packages/e1071/} #' @export @@ -512,7 +515,6 @@ setMethod("write.ml", signature(object = "GeneralizedLinearRegressionModel", pat #' which means throw exception if the output path exists. #' #' @rdname spark.kmeans -#' @name write.ml #' @export #' @note write.ml(KMeansModel, character) since 2.0.0 setMethod("write.ml", signature(object = "KMeansModel", path = "character"), diff --git a/R/pkg/R/schema.R b/R/pkg/R/schema.R index a91e9980df..b429f5de13 100644 --- a/R/pkg/R/schema.R +++ b/R/pkg/R/schema.R @@ -26,6 +26,7 @@ #' @param x a structField object (created with the field() function) #' @param ... additional structField objects #' @return a structType object +#' @rdname structType #' @export #' @examples #'\dontrun{ @@ -40,13 +41,19 @@ structType <- function(x, ...) { UseMethod("structType", x) } -structType.jobj <- function(x) { +#' @rdname structType +#' @method structType jobj +#' @export +structType.jobj <- function(x, ...) { obj <- structure(list(), class = "structType") obj$jobj <- x obj$fields <- function() { lapply(callJMethod(obj$jobj, "fields"), structField) } obj } +#' @rdname structType +#' @method structType structField +#' @export structType.structField <- function(x, ...) { fields <- list(x, ...) if (!all(sapply(fields, inherits, "structField"))) { @@ -104,7 +111,10 @@ structField <- function(x, ...) { UseMethod("structField", x) } -structField.jobj <- function(x) { +#' @rdname structField +#' @method structField jobj +#' @export +structField.jobj <- function(x, ...) { obj <- structure(list(), class = "structField") obj$jobj <- x obj$name <- function() { callJMethod(x, "name") } @@ -179,7 +189,7 @@ checkType <- function(type) { #' @param nullable A logical vector indicating whether or not the field is nullable #' @rdname structField #' @export -structField.character <- function(x, type, nullable = TRUE) { +structField.character <- function(x, type, nullable = TRUE, ...) { if (class(x) != "character") { stop("Field name must be a string.") } diff --git a/R/pkg/R/stats.R b/R/pkg/R/stats.R index c92352e1b0..2b4ce195cb 100644 --- a/R/pkg/R/stats.R +++ b/R/pkg/R/stats.R @@ -34,6 +34,7 @@ setOldClass("jobj") #' #' @rdname crosstab #' @name crosstab +#' @aliases crosstab,SparkDataFrame,character,character-method #' @family stat functions #' @export #' @examples @@ -59,6 +60,7 @@ setMethod("crosstab", #' #' @rdname cov #' @name cov +#' @aliases cov,SparkDataFrame-method #' @family stat functions #' @export #' @examples @@ -88,6 +90,7 @@ setMethod("cov", #' #' @rdname corr #' @name corr +#' @aliases corr,SparkDataFrame-method #' @family stat functions #' @export #' @examples @@ -120,6 +123,7 @@ setMethod("corr", #' #' @rdname freqItems #' @name freqItems +#' @aliases freqItems,SparkDataFrame,character-method #' @family stat functions #' @export #' @examples @@ -158,6 +162,7 @@ setMethod("freqItems", signature(x = "SparkDataFrame", cols = "character"), #' #' @rdname approxQuantile #' @name approxQuantile +#' @aliases approxQuantile,SparkDataFrame,character,numeric,numeric-method #' @family stat functions #' @export #' @examples @@ -188,6 +193,7 @@ setMethod("approxQuantile", #' @return A new SparkDataFrame that represents the stratified sample #' #' @rdname sampleBy +#' @aliases sampleBy,SparkDataFrame,character,list,numeric-method #' @name sampleBy #' @family stat functions #' @export diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R index e75bfbf037..240b9f669b 100644 --- a/R/pkg/R/utils.R +++ b/R/pkg/R/utils.R @@ -126,20 +126,16 @@ hashCode <- function(key) { as.integer(bitwXor(intBits[2], intBits[1])) } else if (class(key) == "character") { # TODO: SPARK-7839 means we might not have the native library available - if (is.loaded("stringHashCode")) { - .Call("stringHashCode", key) + n <- nchar(key) + if (n == 0) { + 0L } else { - n <- nchar(key) - if (n == 0) { - 0L - } else { - asciiVals <- sapply(charToRaw(key), function(x) { strtoi(x, 16L) }) - hashC <- 0 - for (k in 1:length(asciiVals)) { - hashC <- mult31AndAdd(hashC, asciiVals[k]) - } - as.integer(hashC) + asciiVals <- sapply(charToRaw(key), function(x) { strtoi(x, 16L) }) + hashC <- 0 + for (k in 1:length(asciiVals)) { + hashC <- mult31AndAdd(hashC, asciiVals[k]) } + as.integer(hashC) } } else { warning(paste("Could not hash object, returning 0", sep = "")) diff --git a/R/pkg/R/window.R b/R/pkg/R/window.R index d9d069cde9..215d0e7b5c 100644 --- a/R/pkg/R/window.R +++ b/R/pkg/R/window.R @@ -28,6 +28,7 @@ #' #' @rdname windowPartitionBy #' @name windowPartitionBy +#' @aliases windowPartitionBy,character-method #' @export #' @examples #' \dontrun{ @@ -50,6 +51,7 @@ setMethod("windowPartitionBy", #' @rdname windowPartitionBy #' @name windowPartitionBy +#' @aliases windowPartitionBy,Column-method #' @export #' @note windowPartitionBy(Column) since 2.0.0 setMethod("windowPartitionBy", @@ -75,6 +77,7 @@ setMethod("windowPartitionBy", #' #' @rdname windowOrderBy #' @name windowOrderBy +#' @aliases windowOrderBy,character-method #' @export #' @examples #' \dontrun{ @@ -97,6 +100,7 @@ setMethod("windowOrderBy", #' @rdname windowOrderBy #' @name windowOrderBy +#' @aliases windowOrderBy,Column-method #' @export #' @note windowOrderBy(Column) since 2.0.0 setMethod("windowOrderBy", diff --git a/dev/.rat-excludes b/dev/.rat-excludes index fb582dec56..0c866717a3 100644 --- a/dev/.rat-excludes +++ b/dev/.rat-excludes @@ -99,3 +99,4 @@ spark-deps-.* .*tsv org.apache.spark.scheduler.ExternalClusterManager .*\.sql +.Rbuildignore -- GitLab