Skip to content
Snippets Groups Projects
Commit 18708f76 authored by Junyang Qian's avatar Junyang Qian Committed by Felix Cheung
Browse files

[SPARKR][MINOR] Add more examples to window function docs

## What changes were proposed in this pull request?

This PR adds more examples to window function docs to make them more accessible to the users.

It also fixes default value issues for `lag` and `lead`.

## How was this patch tested?

Manual test, R unit test.

Author: Junyang Qian <junyangq@databricks.com>

Closes #14779 from junyangq/SPARKR-FixWindowFunctionDocs.
parent 945c04bc
No related branches found
No related tags found
No related merge requests found
...@@ -203,6 +203,18 @@ setMethod("rangeBetween", ...@@ -203,6 +203,18 @@ setMethod("rangeBetween",
#' @aliases over,Column,WindowSpec-method #' @aliases over,Column,WindowSpec-method
#' @family colum_func #' @family colum_func
#' @export #' @export
#' @examples \dontrun{
#' df <- createDataFrame(mtcars)
#'
#' # Partition by am (transmission) and order by hp (horsepower)
#' ws <- orderBy(windowPartitionBy("am"), "hp")
#'
#' # Rank on hp within each partition
#' out <- select(df, over(rank(), ws), df$hp, df$am)
#'
#' # Lag mpg values by 1 row on the partition-and-ordered table
#' out <- select(df, over(lead(df$mpg), ws), df$mpg, df$hp, df$am)
#' }
#' @note over since 2.0.0 #' @note over since 2.0.0
setMethod("over", setMethod("over",
signature(x = "Column", window = "WindowSpec"), signature(x = "Column", window = "WindowSpec"),
......
...@@ -3121,9 +3121,9 @@ setMethod("ifelse", ...@@ -3121,9 +3121,9 @@ setMethod("ifelse",
#' @aliases cume_dist,missing-method #' @aliases cume_dist,missing-method
#' @export #' @export
#' @examples \dontrun{ #' @examples \dontrun{
#' df <- createDataFrame(iris) #' df <- createDataFrame(mtcars)
#' ws <- orderBy(windowPartitionBy("Species"), "Sepal_Length") #' ws <- orderBy(windowPartitionBy("am"), "hp")
#' out <- select(df, over(cume_dist(), ws), df$Sepal_Length, df$Species) #' out <- select(df, over(cume_dist(), ws), df$hp, df$am)
#' } #' }
#' @note cume_dist since 1.6.0 #' @note cume_dist since 1.6.0
setMethod("cume_dist", setMethod("cume_dist",
...@@ -3148,7 +3148,11 @@ setMethod("cume_dist", ...@@ -3148,7 +3148,11 @@ setMethod("cume_dist",
#' @family window_funcs #' @family window_funcs
#' @aliases dense_rank,missing-method #' @aliases dense_rank,missing-method
#' @export #' @export
#' @examples \dontrun{dense_rank()} #' @examples \dontrun{
#' df <- createDataFrame(mtcars)
#' ws <- orderBy(windowPartitionBy("am"), "hp")
#' out <- select(df, over(dense_rank(), ws), df$hp, df$am)
#' }
#' @note dense_rank since 1.6.0 #' @note dense_rank since 1.6.0
setMethod("dense_rank", setMethod("dense_rank",
signature("missing"), signature("missing"),
...@@ -3168,18 +3172,26 @@ setMethod("dense_rank", ...@@ -3168,18 +3172,26 @@ setMethod("dense_rank",
#' @param x the column as a character string or a Column to compute on. #' @param x the column as a character string or a Column to compute on.
#' @param offset the number of rows back from the current row from which to obtain a value. #' @param offset the number of rows back from the current row from which to obtain a value.
#' If not specified, the default is 1. #' If not specified, the default is 1.
#' @param defaultValue default to use when the offset row does not exist. #' @param defaultValue (optional) default to use when the offset row does not exist.
#' @param ... further arguments to be passed to or from other methods. #' @param ... further arguments to be passed to or from other methods.
#' @rdname lag #' @rdname lag
#' @name lag #' @name lag
#' @aliases lag,characterOrColumn-method #' @aliases lag,characterOrColumn-method
#' @family window_funcs #' @family window_funcs
#' @export #' @export
#' @examples \dontrun{lag(df$c)} #' @examples \dontrun{
#' df <- createDataFrame(mtcars)
#'
#' # Partition by am (transmission) and order by hp (horsepower)
#' ws <- orderBy(windowPartitionBy("am"), "hp")
#'
#' # Lag mpg values by 1 row on the partition-and-ordered table
#' out <- select(df, over(lag(df$mpg), ws), df$mpg, df$hp, df$am)
#' }
#' @note lag since 1.6.0 #' @note lag since 1.6.0
setMethod("lag", setMethod("lag",
signature(x = "characterOrColumn"), signature(x = "characterOrColumn"),
function(x, offset, defaultValue = NULL) { function(x, offset = 1, defaultValue = NULL) {
col <- if (class(x) == "Column") { col <- if (class(x) == "Column") {
x@jc x@jc
} else { } else {
...@@ -3194,25 +3206,35 @@ setMethod("lag", ...@@ -3194,25 +3206,35 @@ setMethod("lag",
#' lead #' lead
#' #'
#' Window function: returns the value that is \code{offset} rows after the current row, and #' Window function: returns the value that is \code{offset} rows after the current row, and
#' NULL if there is less than \code{offset} rows after the current row. For example, #' \code{defaultValue} if there is less than \code{offset} rows after the current row.
#' an \code{offset} of one will return the next row at any given point in the window partition. #' For example, an \code{offset} of one will return the next row at any given point
#' in the window partition.
#' #'
#' This is equivalent to the \code{LEAD} function in SQL. #' This is equivalent to the \code{LEAD} function in SQL.
#' #'
#' @param x Column to compute on #' @param x the column as a character string or a Column to compute on.
#' @param offset Number of rows to offset #' @param offset the number of rows after the current row from which to obtain a value.
#' @param defaultValue (Optional) default value to use #' If not specified, the default is 1.
#' @param defaultValue (optional) default to use when the offset row does not exist.
#' #'
#' @rdname lead #' @rdname lead
#' @name lead #' @name lead
#' @family window_funcs #' @family window_funcs
#' @aliases lead,characterOrColumn,numeric-method #' @aliases lead,characterOrColumn,numeric-method
#' @export #' @export
#' @examples \dontrun{lead(df$c)} #' @examples \dontrun{
#' df <- createDataFrame(mtcars)
#'
#' # Partition by am (transmission) and order by hp (horsepower)
#' ws <- orderBy(windowPartitionBy("am"), "hp")
#'
#' # Lead mpg values by 1 row on the partition-and-ordered table
#' out <- select(df, over(lead(df$mpg), ws), df$mpg, df$hp, df$am)
#' }
#' @note lead since 1.6.0 #' @note lead since 1.6.0
setMethod("lead", setMethod("lead",
signature(x = "characterOrColumn", offset = "numeric", defaultValue = "ANY"), signature(x = "characterOrColumn", offset = "numeric", defaultValue = "ANY"),
function(x, offset, defaultValue = NULL) { function(x, offset = 1, defaultValue = NULL) {
col <- if (class(x) == "Column") { col <- if (class(x) == "Column") {
x@jc x@jc
} else { } else {
...@@ -3239,7 +3261,15 @@ setMethod("lead", ...@@ -3239,7 +3261,15 @@ setMethod("lead",
#' @aliases ntile,numeric-method #' @aliases ntile,numeric-method
#' @family window_funcs #' @family window_funcs
#' @export #' @export
#' @examples \dontrun{ntile(1)} #' @examples \dontrun{
#' df <- createDataFrame(mtcars)
#'
#' # Partition by am (transmission) and order by hp (horsepower)
#' ws <- orderBy(windowPartitionBy("am"), "hp")
#'
#' # Get ntile group id (1-4) for hp
#' out <- select(df, over(ntile(4), ws), df$hp, df$am)
#' }
#' @note ntile since 1.6.0 #' @note ntile since 1.6.0
setMethod("ntile", setMethod("ntile",
signature(x = "numeric"), signature(x = "numeric"),
...@@ -3263,7 +3293,11 @@ setMethod("ntile", ...@@ -3263,7 +3293,11 @@ setMethod("ntile",
#' @family window_funcs #' @family window_funcs
#' @aliases percent_rank,missing-method #' @aliases percent_rank,missing-method
#' @export #' @export
#' @examples \dontrun{percent_rank()} #' @examples \dontrun{
#' df <- createDataFrame(mtcars)
#' ws <- orderBy(windowPartitionBy("am"), "hp")
#' out <- select(df, over(percent_rank(), ws), df$hp, df$am)
#' }
#' @note percent_rank since 1.6.0 #' @note percent_rank since 1.6.0
setMethod("percent_rank", setMethod("percent_rank",
signature("missing"), signature("missing"),
...@@ -3288,7 +3322,11 @@ setMethod("percent_rank", ...@@ -3288,7 +3322,11 @@ setMethod("percent_rank",
#' @family window_funcs #' @family window_funcs
#' @aliases rank,missing-method #' @aliases rank,missing-method
#' @export #' @export
#' @examples \dontrun{rank()} #' @examples \dontrun{
#' df <- createDataFrame(mtcars)
#' ws <- orderBy(windowPartitionBy("am"), "hp")
#' out <- select(df, over(rank(), ws), df$hp, df$am)
#' }
#' @note rank since 1.6.0 #' @note rank since 1.6.0
setMethod("rank", setMethod("rank",
signature(x = "missing"), signature(x = "missing"),
...@@ -3321,7 +3359,11 @@ setMethod("rank", ...@@ -3321,7 +3359,11 @@ setMethod("rank",
#' @aliases row_number,missing-method #' @aliases row_number,missing-method
#' @family window_funcs #' @family window_funcs
#' @export #' @export
#' @examples \dontrun{row_number()} #' @examples \dontrun{
#' df <- createDataFrame(mtcars)
#' ws <- orderBy(windowPartitionBy("am"), "hp")
#' out <- select(df, over(row_number(), ws), df$hp, df$am)
#' }
#' @note row_number since 1.6.0 #' @note row_number since 1.6.0
setMethod("row_number", setMethod("row_number",
signature("missing"), signature("missing"),
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment