From d5807def10c21e145163dc1e34d38258dda73ebf Mon Sep 17 00:00:00 2001 From: Sandeep Singh <sandeep@techaddict.me> Date: Wed, 8 Jun 2016 23:41:29 -0700 Subject: [PATCH] [MINOR][DOC] In Dataset docs, remove self link to Dataset and add link to Column ## What changes were proposed in this pull request? Documentation Fix ## How was this patch tested? Author: Sandeep Singh <sandeep@techaddict.me> Closes #13567 from techaddict/minor-4. --- .../scala/org/apache/spark/sql/Dataset.scala | 200 +++++++++--------- 1 file changed, 100 insertions(+), 100 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 6cbc27d91c..162524a9ef 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -67,7 +67,7 @@ private[sql] object Dataset { } /** - * A [[Dataset]] is a strongly typed collection of domain-specific objects that can be transformed + * A Dataset is a strongly typed collection of domain-specific objects that can be transformed * in parallel using functional or relational operations. Each Dataset also has an untyped view * called a [[DataFrame]], which is a Dataset of [[Row]]. * @@ -105,7 +105,7 @@ private[sql] object Dataset { * }}} * * Dataset operations can also be untyped, through various domain-specific-language (DSL) - * functions defined in: [[Dataset]] (this class), [[Column]], and [[functions]]. These operations + * functions defined in: Dataset (this class), [[Column]], and [[functions]]. These operations * are very similar to the operations available in the data frame abstraction in R or Python. * * To select a column from the Dataset, use `apply` method in Scala and `col` in Java. @@ -194,13 +194,13 @@ class Dataset[T] private[sql]( /** * Currently [[ExpressionEncoder]] is the only implementation of [[Encoder]], here we turn the * passed in encoder to [[ExpressionEncoder]] explicitly, and mark it implicit so that we can use - * it when constructing new [[Dataset]] objects that have the same object type (that will be + * it when constructing new Dataset objects that have the same object type (that will be * possibly resolved to a different schema). */ private[sql] implicit val exprEnc: ExpressionEncoder[T] = encoderFor(encoder) /** - * Encoder is used mostly as a container of serde expressions in [[Dataset]]. We build logical + * Encoder is used mostly as a container of serde expressions in Dataset. We build logical * plans by these serde expressions and execute it within the query framework. However, for * performance reasons we may want to use encoder as a function to deserialize internal rows to * custom objects, e.g. collect. Here we resolve and bind the encoder so that we can call its @@ -340,7 +340,7 @@ class Dataset[T] private[sql]( /** * :: Experimental :: - * Returns a new [[Dataset]] where each record has been mapped on to the specified type. The + * Returns a new Dataset where each record has been mapped on to the specified type. The * method used to map columns depend on the type of `U`: * - When `U` is a class, fields for the class will be mapped to columns of the same name * (case sensitivity is determined by `spark.sql.caseSensitive`). @@ -349,7 +349,7 @@ class Dataset[T] private[sql]( * - When `U` is a primitive type (i.e. String, Int, etc), then the first column of the * [[DataFrame]] will be used. * - * If the schema of the [[Dataset]] does not match the desired `U` type, you can use `select` + * If the schema of the Dataset does not match the desired `U` type, you can use `select` * along with `alias` or `as` to rearrange or rename as required. * * @group basic @@ -385,7 +385,7 @@ class Dataset[T] private[sql]( } /** - * Returns the schema of this [[Dataset]]. + * Returns the schema of this Dataset. * * @group basic * @since 1.6.0 @@ -453,8 +453,8 @@ class Dataset[T] private[sql]( def isLocal: Boolean = logicalPlan.isInstanceOf[LocalRelation] /** - * Returns true if this [[Dataset]] contains one or more sources that continuously - * return data as it arrives. A [[Dataset]] that reads data from a streaming source + * Returns true if this Dataset contains one or more sources that continuously + * return data as it arrives. A Dataset that reads data from a streaming source * must be executed as a [[ContinuousQuery]] using the `startStream()` method in * [[DataFrameWriter]]. Methods that return a single answer, e.g. `count()` or * `collect()`, will throw an [[AnalysisException]] when there is a streaming @@ -467,7 +467,7 @@ class Dataset[T] private[sql]( def isStreaming: Boolean = logicalPlan.isStreaming /** - * Displays the [[Dataset]] in a tabular form. Strings more than 20 characters will be truncated, + * Displays the Dataset in a tabular form. Strings more than 20 characters will be truncated, * and all cells will be aligned right. For example: * {{{ * year month AVG('Adj Close) MAX('Adj Close) @@ -486,7 +486,7 @@ class Dataset[T] private[sql]( def show(numRows: Int): Unit = show(numRows, truncate = true) /** - * Displays the top 20 rows of [[Dataset]] in a tabular form. Strings more than 20 characters + * Displays the top 20 rows of Dataset in a tabular form. Strings more than 20 characters * will be truncated, and all cells will be aligned right. * * @group action @@ -495,7 +495,7 @@ class Dataset[T] private[sql]( def show(): Unit = show(20) /** - * Displays the top 20 rows of [[Dataset]] in a tabular form. + * Displays the top 20 rows of Dataset in a tabular form. * * @param truncate Whether truncate long strings. If true, strings more than 20 characters will * be truncated and all cells will be aligned right @@ -506,7 +506,7 @@ class Dataset[T] private[sql]( def show(truncate: Boolean): Unit = show(20, truncate) /** - * Displays the [[Dataset]] in a tabular form. For example: + * Displays the Dataset in a tabular form. For example: * {{{ * year month AVG('Adj Close) MAX('Adj Close) * 1980 12 0.503218 0.595103 @@ -727,7 +727,7 @@ class Dataset[T] private[sql]( /** * :: Experimental :: - * Joins this [[Dataset]] returning a [[Tuple2]] for each pair where `condition` evaluates to + * Joins this Dataset returning a [[Tuple2]] for each pair where `condition` evaluates to * true. * * This is similar to the relation `join` function with one important difference in the @@ -807,7 +807,7 @@ class Dataset[T] private[sql]( /** * :: Experimental :: - * Using inner equi-join to join this [[Dataset]] returning a [[Tuple2]] for each pair + * Using inner equi-join to join this Dataset returning a [[Tuple2]] for each pair * where `condition` evaluates to true. * * @param other Right side of the join. @@ -822,7 +822,7 @@ class Dataset[T] private[sql]( } /** - * Returns a new [[Dataset]] with each partition sorted by the given expressions. + * Returns a new Dataset with each partition sorted by the given expressions. * * This is the same operation as "SORT BY" in SQL (Hive QL). * @@ -835,7 +835,7 @@ class Dataset[T] private[sql]( } /** - * Returns a new [[Dataset]] with each partition sorted by the given expressions. + * Returns a new Dataset with each partition sorted by the given expressions. * * This is the same operation as "SORT BY" in SQL (Hive QL). * @@ -848,7 +848,7 @@ class Dataset[T] private[sql]( } /** - * Returns a new [[Dataset]] sorted by the specified column, all in ascending order. + * Returns a new Dataset sorted by the specified column, all in ascending order. * {{{ * // The following 3 are equivalent * ds.sort("sortcol") @@ -865,7 +865,7 @@ class Dataset[T] private[sql]( } /** - * Returns a new [[Dataset]] sorted by the given expressions. For example: + * Returns a new Dataset sorted by the given expressions. For example: * {{{ * ds.sort($"col1", $"col2".desc) * }}} @@ -879,7 +879,7 @@ class Dataset[T] private[sql]( } /** - * Returns a new [[Dataset]] sorted by the given expressions. + * Returns a new Dataset sorted by the given expressions. * This is an alias of the `sort` function. * * @group typedrel @@ -889,7 +889,7 @@ class Dataset[T] private[sql]( def orderBy(sortCol: String, sortCols: String*): Dataset[T] = sort(sortCol, sortCols : _*) /** - * Returns a new [[Dataset]] sorted by the given expressions. + * Returns a new Dataset sorted by the given expressions. * This is an alias of the `sort` function. * * @group typedrel @@ -923,7 +923,7 @@ class Dataset[T] private[sql]( } /** - * Returns a new [[Dataset]] with an alias set. + * Returns a new Dataset with an alias set. * * @group typedrel * @since 1.6.0 @@ -933,7 +933,7 @@ class Dataset[T] private[sql]( } /** - * (Scala-specific) Returns a new [[Dataset]] with an alias set. + * (Scala-specific) Returns a new Dataset with an alias set. * * @group typedrel * @since 2.0.0 @@ -941,7 +941,7 @@ class Dataset[T] private[sql]( def as(alias: Symbol): Dataset[T] = as(alias.name) /** - * Returns a new [[Dataset]] with an alias set. Same as `as`. + * Returns a new Dataset with an alias set. Same as `as`. * * @group typedrel * @since 2.0.0 @@ -949,7 +949,7 @@ class Dataset[T] private[sql]( def alias(alias: String): Dataset[T] = as(alias) /** - * (Scala-specific) Returns a new [[Dataset]] with an alias set. Same as `as`. + * (Scala-specific) Returns a new Dataset with an alias set. Same as `as`. * * @group typedrel * @since 2.0.0 @@ -1008,7 +1008,7 @@ class Dataset[T] private[sql]( /** * :: Experimental :: - * Returns a new [[Dataset]] by computing the given [[Column]] expression for each element. + * Returns a new Dataset by computing the given [[Column]] expression for each element. * * {{{ * val ds = Seq(1, 2, 3).toDS() @@ -1045,7 +1045,7 @@ class Dataset[T] private[sql]( /** * :: Experimental :: - * Returns a new [[Dataset]] by computing the given [[Column]] expressions for each element. + * Returns a new Dataset by computing the given [[Column]] expressions for each element. * * @group typedrel * @since 1.6.0 @@ -1056,7 +1056,7 @@ class Dataset[T] private[sql]( /** * :: Experimental :: - * Returns a new [[Dataset]] by computing the given [[Column]] expressions for each element. + * Returns a new Dataset by computing the given [[Column]] expressions for each element. * * @group typedrel * @since 1.6.0 @@ -1070,7 +1070,7 @@ class Dataset[T] private[sql]( /** * :: Experimental :: - * Returns a new [[Dataset]] by computing the given [[Column]] expressions for each element. + * Returns a new Dataset by computing the given [[Column]] expressions for each element. * * @group typedrel * @since 1.6.0 @@ -1085,7 +1085,7 @@ class Dataset[T] private[sql]( /** * :: Experimental :: - * Returns a new [[Dataset]] by computing the given [[Column]] expressions for each element. + * Returns a new Dataset by computing the given [[Column]] expressions for each element. * * @group typedrel * @since 1.6.0 @@ -1154,7 +1154,7 @@ class Dataset[T] private[sql]( } /** - * Groups the [[Dataset]] using the specified columns, so we can run aggregation on them. See + * Groups the Dataset using the specified columns, so we can run aggregation on them. See * [[RelationalGroupedDataset]] for all the available aggregate functions. * * {{{ @@ -1177,7 +1177,7 @@ class Dataset[T] private[sql]( } /** - * Create a multi-dimensional rollup for the current [[Dataset]] using the specified columns, + * Create a multi-dimensional rollup for the current Dataset using the specified columns, * so we can run aggregation on them. * See [[RelationalGroupedDataset]] for all the available aggregate functions. * @@ -1201,7 +1201,7 @@ class Dataset[T] private[sql]( } /** - * Create a multi-dimensional cube for the current [[Dataset]] using the specified columns, + * Create a multi-dimensional cube for the current Dataset using the specified columns, * so we can run aggregation on them. * See [[RelationalGroupedDataset]] for all the available aggregate functions. * @@ -1225,7 +1225,7 @@ class Dataset[T] private[sql]( } /** - * Groups the [[Dataset]] using the specified columns, so that we can run aggregation on them. + * Groups the Dataset using the specified columns, so that we can run aggregation on them. * See [[RelationalGroupedDataset]] for all the available aggregate functions. * * This is a variant of groupBy that can only group by existing columns using column names @@ -1254,7 +1254,7 @@ class Dataset[T] private[sql]( /** * :: Experimental :: * (Scala-specific) - * Reduces the elements of this [[Dataset]] using the specified binary function. The given `func` + * Reduces the elements of this Dataset using the specified binary function. The given `func` * must be commutative and associative or the result may be non-deterministic. * * @group action @@ -1310,7 +1310,7 @@ class Dataset[T] private[sql]( groupByKey(func.call(_))(encoder) /** - * Create a multi-dimensional rollup for the current [[Dataset]] using the specified columns, + * Create a multi-dimensional rollup for the current Dataset using the specified columns, * so we can run aggregation on them. * See [[RelationalGroupedDataset]] for all the available aggregate functions. * @@ -1339,7 +1339,7 @@ class Dataset[T] private[sql]( } /** - * Create a multi-dimensional cube for the current [[Dataset]] using the specified columns, + * Create a multi-dimensional cube for the current Dataset using the specified columns, * so we can run aggregation on them. * See [[RelationalGroupedDataset]] for all the available aggregate functions. * @@ -1367,7 +1367,7 @@ class Dataset[T] private[sql]( } /** - * (Scala-specific) Aggregates on the entire [[Dataset]] without groups. + * (Scala-specific) Aggregates on the entire Dataset without groups. * {{{ * // ds.agg(...) is a shorthand for ds.groupBy().agg(...) * ds.agg("age" -> "max", "salary" -> "avg") @@ -1382,7 +1382,7 @@ class Dataset[T] private[sql]( } /** - * (Scala-specific) Aggregates on the entire [[Dataset]] without groups. + * (Scala-specific) Aggregates on the entire Dataset without groups. * {{{ * // ds.agg(...) is a shorthand for ds.groupBy().agg(...) * ds.agg(Map("age" -> "max", "salary" -> "avg")) @@ -1395,7 +1395,7 @@ class Dataset[T] private[sql]( def agg(exprs: Map[String, String]): DataFrame = groupBy().agg(exprs) /** - * (Java-specific) Aggregates on the entire [[Dataset]] without groups. + * (Java-specific) Aggregates on the entire Dataset without groups. * {{{ * // ds.agg(...) is a shorthand for ds.groupBy().agg(...) * ds.agg(Map("age" -> "max", "salary" -> "avg")) @@ -1408,7 +1408,7 @@ class Dataset[T] private[sql]( def agg(exprs: java.util.Map[String, String]): DataFrame = groupBy().agg(exprs) /** - * Aggregates on the entire [[Dataset]] without groups. + * Aggregates on the entire Dataset without groups. * {{{ * // ds.agg(...) is a shorthand for ds.groupBy().agg(...) * ds.agg(max($"age"), avg($"salary")) @@ -1422,9 +1422,9 @@ class Dataset[T] private[sql]( def agg(expr: Column, exprs: Column*): DataFrame = groupBy().agg(expr, exprs : _*) /** - * Returns a new [[Dataset]] by taking the first `n` rows. The difference between this function + * Returns a new Dataset by taking the first `n` rows. The difference between this function * and `head` is that `head` is an action and returns an array (by triggering query execution) - * while `limit` returns a new [[Dataset]]. + * while `limit` returns a new Dataset. * * @group typedrel * @since 2.0.0 @@ -1434,7 +1434,7 @@ class Dataset[T] private[sql]( } /** - * Returns a new [[Dataset]] containing union of rows in this Dataset and another Dataset. + * Returns a new Dataset containing union of rows in this Dataset and another Dataset. * This is equivalent to `UNION ALL` in SQL. * * To do a SQL-style set union (that does deduplication of elements), use this function followed @@ -1447,7 +1447,7 @@ class Dataset[T] private[sql]( def unionAll(other: Dataset[T]): Dataset[T] = union(other) /** - * Returns a new [[Dataset]] containing union of rows in this Dataset and another Dataset. + * Returns a new Dataset containing union of rows in this Dataset and another Dataset. * This is equivalent to `UNION ALL` in SQL. * * To do a SQL-style set union (that does deduplication of elements), use this function followed @@ -1463,7 +1463,7 @@ class Dataset[T] private[sql]( } /** - * Returns a new [[Dataset]] containing rows only in both this Dataset and another Dataset. + * Returns a new Dataset containing rows only in both this Dataset and another Dataset. * This is equivalent to `INTERSECT` in SQL. * * Note that, equality checking is performed directly on the encoded representation of the data @@ -1477,7 +1477,7 @@ class Dataset[T] private[sql]( } /** - * Returns a new [[Dataset]] containing rows in this Dataset but not in another Dataset. + * Returns a new Dataset containing rows in this Dataset but not in another Dataset. * This is equivalent to `EXCEPT` in SQL. * * Note that, equality checking is performed directly on the encoded representation of the data @@ -1491,7 +1491,7 @@ class Dataset[T] private[sql]( } /** - * Returns a new [[Dataset]] by sampling a fraction of rows. + * Returns a new Dataset by sampling a fraction of rows. * * @param withReplacement Sample with replacement or not. * @param fraction Fraction of rows to generate. @@ -1505,7 +1505,7 @@ class Dataset[T] private[sql]( } /** - * Returns a new [[Dataset]] by sampling a fraction of rows, using a random seed. + * Returns a new Dataset by sampling a fraction of rows, using a random seed. * * @param withReplacement Sample with replacement or not. * @param fraction Fraction of rows to generate. @@ -1518,7 +1518,7 @@ class Dataset[T] private[sql]( } /** - * Randomly splits this [[Dataset]] with the provided weights. + * Randomly splits this Dataset with the provided weights. * * @param weights weights for splits, will be normalized if they don't sum to 1. * @param seed Seed for sampling. @@ -1545,7 +1545,7 @@ class Dataset[T] private[sql]( } /** - * Returns a Java list that contains randomly split [[Dataset]] with the provided weights. + * Returns a Java list that contains randomly split Dataset with the provided weights. * * @param weights weights for splits, will be normalized if they don't sum to 1. * @param seed Seed for sampling. @@ -1559,7 +1559,7 @@ class Dataset[T] private[sql]( } /** - * Randomly splits this [[Dataset]] with the provided weights. + * Randomly splits this Dataset with the provided weights. * * @param weights weights for splits, will be normalized if they don't sum to 1. * @group typedrel @@ -1570,7 +1570,7 @@ class Dataset[T] private[sql]( } /** - * Randomly splits this [[Dataset]] with the provided weights. Provided for the Python Api. + * Randomly splits this Dataset with the provided weights. Provided for the Python Api. * * @param weights weights for splits, will be normalized if they don't sum to 1. * @param seed Seed for sampling. @@ -1580,7 +1580,7 @@ class Dataset[T] private[sql]( } /** - * (Scala-specific) Returns a new [[Dataset]] where each row has been expanded to zero or more + * (Scala-specific) Returns a new Dataset where each row has been expanded to zero or more * rows by the provided function. This is similar to a `LATERAL VIEW` in HiveQL. The columns of * the input row are implicitly joined with each row that is output by the function. * @@ -1623,7 +1623,7 @@ class Dataset[T] private[sql]( } /** - * (Scala-specific) Returns a new [[Dataset]] where a single column has been expanded to zero + * (Scala-specific) Returns a new Dataset where a single column has been expanded to zero * or more rows by the provided function. This is similar to a `LATERAL VIEW` in HiveQL. All * columns of the input row are implicitly joined with each value that is output by the function. * @@ -1664,7 +1664,7 @@ class Dataset[T] private[sql]( } /** - * Returns a new [[Dataset]] by adding a column or replacing the existing column that has + * Returns a new Dataset by adding a column or replacing the existing column that has * the same name. * * @group untypedrel @@ -1689,7 +1689,7 @@ class Dataset[T] private[sql]( } /** - * Returns a new [[Dataset]] by adding a column with metadata. + * Returns a new Dataset by adding a column with metadata. */ private[spark] def withColumn(colName: String, col: Column, metadata: Metadata): DataFrame = { val resolver = sparkSession.sessionState.analyzer.resolver @@ -1710,7 +1710,7 @@ class Dataset[T] private[sql]( } /** - * Returns a new [[Dataset]] with a column renamed. + * Returns a new Dataset with a column renamed. * This is a no-op if schema doesn't contain existingName. * * @group untypedrel @@ -1735,7 +1735,7 @@ class Dataset[T] private[sql]( } /** - * Returns a new [[Dataset]] with a column dropped. This is a no-op if schema doesn't contain + * Returns a new Dataset with a column dropped. This is a no-op if schema doesn't contain * column name. * * This method can only be used to drop top level columns. the colName string is treated @@ -1749,7 +1749,7 @@ class Dataset[T] private[sql]( } /** - * Returns a new [[Dataset]] with columns dropped. + * Returns a new Dataset with columns dropped. * This is a no-op if schema doesn't contain column name(s). * * This method can only be used to drop top level columns. the colName string is treated literally @@ -1773,8 +1773,8 @@ class Dataset[T] private[sql]( } /** - * Returns a new [[Dataset]] with a column dropped. - * This version of drop accepts a Column rather than a name. + * Returns a new Dataset with a column dropped. + * This version of drop accepts a [[Column]] rather than a name. * This is a no-op if the Dataset doesn't have a column * with an equivalent expression. * @@ -1796,7 +1796,7 @@ class Dataset[T] private[sql]( } /** - * Returns a new [[Dataset]] that contains only the unique rows from this [[Dataset]]. + * Returns a new Dataset that contains only the unique rows from this Dataset. * This is an alias for `distinct`. * * @group typedrel @@ -1805,7 +1805,7 @@ class Dataset[T] private[sql]( def dropDuplicates(): Dataset[T] = dropDuplicates(this.columns) /** - * (Scala-specific) Returns a new [[Dataset]] with duplicate rows removed, considering only + * (Scala-specific) Returns a new Dataset with duplicate rows removed, considering only * the subset of columns. * * @group typedrel @@ -1825,7 +1825,7 @@ class Dataset[T] private[sql]( } /** - * Returns a new [[Dataset]] with duplicate rows removed, considering only + * Returns a new Dataset with duplicate rows removed, considering only * the subset of columns. * * @group typedrel @@ -1838,7 +1838,7 @@ class Dataset[T] private[sql]( * If no columns are given, this function computes statistics for all numerical columns. * * This function is meant for exploratory data analysis, as we make no guarantee about the - * backward compatibility of the schema of the resulting [[Dataset]]. If you want to + * backward compatibility of the schema of the resulting Dataset. If you want to * programmatically compute summary statistics, use the `agg` function instead. * * {{{ @@ -1937,7 +1937,7 @@ class Dataset[T] private[sql]( /** * :: Experimental :: * (Scala-specific) - * Returns a new [[Dataset]] that only contains elements where `func` returns `true`. + * Returns a new Dataset that only contains elements where `func` returns `true`. * * @group typedrel * @since 1.6.0 @@ -1954,7 +1954,7 @@ class Dataset[T] private[sql]( /** * :: Experimental :: * (Java-specific) - * Returns a new [[Dataset]] that only contains elements where `func` returns `true`. + * Returns a new Dataset that only contains elements where `func` returns `true`. * * @group typedrel * @since 1.6.0 @@ -1971,7 +1971,7 @@ class Dataset[T] private[sql]( /** * :: Experimental :: * (Scala-specific) - * Returns a new [[Dataset]] that contains the result of applying `func` to each element. + * Returns a new Dataset that contains the result of applying `func` to each element. * * @group typedrel * @since 1.6.0 @@ -1984,7 +1984,7 @@ class Dataset[T] private[sql]( /** * :: Experimental :: * (Java-specific) - * Returns a new [[Dataset]] that contains the result of applying `func` to each element. + * Returns a new Dataset that contains the result of applying `func` to each element. * * @group typedrel * @since 1.6.0 @@ -1998,7 +1998,7 @@ class Dataset[T] private[sql]( /** * :: Experimental :: * (Scala-specific) - * Returns a new [[Dataset]] that contains the result of applying `func` to each partition. + * Returns a new Dataset that contains the result of applying `func` to each partition. * * @group typedrel * @since 1.6.0 @@ -2014,7 +2014,7 @@ class Dataset[T] private[sql]( /** * :: Experimental :: * (Java-specific) - * Returns a new [[Dataset]] that contains the result of applying `f` to each partition. + * Returns a new Dataset that contains the result of applying `f` to each partition. * * @group typedrel * @since 1.6.0 @@ -2043,7 +2043,7 @@ class Dataset[T] private[sql]( /** * :: Experimental :: * (Scala-specific) - * Returns a new [[Dataset]] by first applying a function to all elements of this [[Dataset]], + * Returns a new Dataset by first applying a function to all elements of this Dataset, * and then flattening the results. * * @group typedrel @@ -2056,7 +2056,7 @@ class Dataset[T] private[sql]( /** * :: Experimental :: * (Java-specific) - * Returns a new [[Dataset]] by first applying a function to all elements of this [[Dataset]], + * Returns a new Dataset by first applying a function to all elements of this Dataset, * and then flattening the results. * * @group typedrel @@ -2080,7 +2080,7 @@ class Dataset[T] private[sql]( /** * (Java-specific) - * Runs `func` on each element of this [[Dataset]]. + * Runs `func` on each element of this Dataset. * * @group action * @since 1.6.0 @@ -2088,7 +2088,7 @@ class Dataset[T] private[sql]( def foreach(func: ForeachFunction[T]): Unit = foreach(func.call(_)) /** - * Applies a function `f` to each partition of this [[Dataset]]. + * Applies a function `f` to each partition of this Dataset. * * @group action * @since 1.6.0 @@ -2099,7 +2099,7 @@ class Dataset[T] private[sql]( /** * (Java-specific) - * Runs `func` on each partition of this [[Dataset]]. + * Runs `func` on each partition of this Dataset. * * @group action * @since 1.6.0 @@ -2108,7 +2108,7 @@ class Dataset[T] private[sql]( foreachPartition(it => func.call(it.asJava)) /** - * Returns the first `n` rows in the [[Dataset]]. + * Returns the first `n` rows in the Dataset. * * Running take requires moving data into the application's driver process, and doing so with * a very large `n` can crash the driver process with OutOfMemoryError. @@ -2119,7 +2119,7 @@ class Dataset[T] private[sql]( def take(n: Int): Array[T] = head(n) /** - * Returns the first `n` rows in the [[Dataset]] as a list. + * Returns the first `n` rows in the Dataset as a list. * * Running take requires moving data into the application's driver process, and doing so with * a very large `n` can crash the driver process with OutOfMemoryError. @@ -2130,7 +2130,7 @@ class Dataset[T] private[sql]( def takeAsList(n: Int): java.util.List[T] = java.util.Arrays.asList(take(n) : _*) /** - * Returns an array that contains all of [[Row]]s in this [[Dataset]]. + * Returns an array that contains all of [[Row]]s in this Dataset. * * Running collect requires moving all the data into the application's driver process, and * doing so on a very large dataset can crash the driver process with OutOfMemoryError. @@ -2143,7 +2143,7 @@ class Dataset[T] private[sql]( def collect(): Array[T] = collect(needCallback = true) /** - * Returns a Java list that contains all of [[Row]]s in this [[Dataset]]. + * Returns a Java list that contains all of [[Row]]s in this Dataset. * * Running collect requires moving all the data into the application's driver process, and * doing so on a very large dataset can crash the driver process with OutOfMemoryError. @@ -2171,9 +2171,9 @@ class Dataset[T] private[sql]( } /** - * Return an iterator that contains all of [[Row]]s in this [[Dataset]]. + * Return an iterator that contains all of [[Row]]s in this Dataset. * - * The iterator will consume as much memory as the largest partition in this [[Dataset]]. + * The iterator will consume as much memory as the largest partition in this Dataset. * * Note: this results in multiple Spark jobs, and if the input Dataset is the result * of a wide transformation (e.g. join with different partitioners), to avoid @@ -2189,7 +2189,7 @@ class Dataset[T] private[sql]( } /** - * Returns the number of rows in the [[Dataset]]. + * Returns the number of rows in the Dataset. * @group action * @since 1.6.0 */ @@ -2198,7 +2198,7 @@ class Dataset[T] private[sql]( } /** - * Returns a new [[Dataset]] that has exactly `numPartitions` partitions. + * Returns a new Dataset that has exactly `numPartitions` partitions. * * @group typedrel * @since 1.6.0 @@ -2208,7 +2208,7 @@ class Dataset[T] private[sql]( } /** - * Returns a new [[Dataset]] partitioned by the given partitioning expressions into + * Returns a new Dataset partitioned by the given partitioning expressions into * `numPartitions`. The resulting Dataset is hash partitioned. * * This is the same operation as "DISTRIBUTE BY" in SQL (Hive QL). @@ -2222,7 +2222,7 @@ class Dataset[T] private[sql]( } /** - * Returns a new [[Dataset]] partitioned by the given partitioning expressions, using + * Returns a new Dataset partitioned by the given partitioning expressions, using * `spark.sql.shuffle.partitions` as number of partitions. * The resulting Dataset is hash partitioned. * @@ -2237,7 +2237,7 @@ class Dataset[T] private[sql]( } /** - * Returns a new [[Dataset]] that has exactly `numPartitions` partitions. + * Returns a new Dataset that has exactly `numPartitions` partitions. * Similar to coalesce defined on an [[RDD]], this operation results in a narrow dependency, e.g. * if you go from 1000 partitions to 100 partitions, there will not be a shuffle, instead each of * the 100 new partitions will claim 10 of the current partitions. @@ -2250,7 +2250,7 @@ class Dataset[T] private[sql]( } /** - * Returns a new [[Dataset]] that contains only the unique rows from this [[Dataset]]. + * Returns a new Dataset that contains only the unique rows from this Dataset. * This is an alias for `dropDuplicates`. * * Note that, equality checking is performed directly on the encoded representation of the data @@ -2262,7 +2262,7 @@ class Dataset[T] private[sql]( def distinct(): Dataset[T] = dropDuplicates() /** - * Persist this [[Dataset]] with the default storage level (`MEMORY_AND_DISK`). + * Persist this Dataset with the default storage level (`MEMORY_AND_DISK`). * * @group basic * @since 1.6.0 @@ -2273,7 +2273,7 @@ class Dataset[T] private[sql]( } /** - * Persist this [[Dataset]] with the default storage level (`MEMORY_AND_DISK`). + * Persist this Dataset with the default storage level (`MEMORY_AND_DISK`). * * @group basic * @since 1.6.0 @@ -2281,7 +2281,7 @@ class Dataset[T] private[sql]( def cache(): this.type = persist() /** - * Persist this [[Dataset]] with the given storage level. + * Persist this Dataset with the given storage level. * @param newLevel One of: `MEMORY_ONLY`, `MEMORY_AND_DISK`, `MEMORY_ONLY_SER`, * `MEMORY_AND_DISK_SER`, `DISK_ONLY`, `MEMORY_ONLY_2`, * `MEMORY_AND_DISK_2`, etc. @@ -2295,7 +2295,7 @@ class Dataset[T] private[sql]( } /** - * Mark the [[Dataset]] as non-persistent, and remove all blocks for it from memory and disk. + * Mark the Dataset as non-persistent, and remove all blocks for it from memory and disk. * * @param blocking Whether to block until all blocks are deleted. * @@ -2308,7 +2308,7 @@ class Dataset[T] private[sql]( } /** - * Mark the [[Dataset]] as non-persistent, and remove all blocks for it from memory and disk. + * Mark the Dataset as non-persistent, and remove all blocks for it from memory and disk. * * @group basic * @since 1.6.0 @@ -2316,7 +2316,7 @@ class Dataset[T] private[sql]( def unpersist(): this.type = unpersist(blocking = false) /** - * Represents the content of the [[Dataset]] as an [[RDD]] of [[T]]. + * Represents the content of the Dataset as an [[RDD]] of [[T]]. * * @group basic * @since 1.6.0 @@ -2330,21 +2330,21 @@ class Dataset[T] private[sql]( } /** - * Returns the content of the [[Dataset]] as a [[JavaRDD]] of [[Row]]s. + * Returns the content of the Dataset as a [[JavaRDD]] of [[Row]]s. * @group basic * @since 1.6.0 */ def toJavaRDD: JavaRDD[T] = rdd.toJavaRDD() /** - * Returns the content of the [[Dataset]] as a [[JavaRDD]] of [[Row]]s. + * Returns the content of the Dataset as a [[JavaRDD]] of [[Row]]s. * @group basic * @since 1.6.0 */ def javaRDD: JavaRDD[T] = toJavaRDD /** - * Registers this [[Dataset]] as a temporary table using the given name. The lifetime of this + * Registers this Dataset as a temporary table using the given name. The lifetime of this * temporary table is tied to the [[SparkSession]] that was used to create this Dataset. * * @group basic @@ -2394,7 +2394,7 @@ class Dataset[T] private[sql]( /** * :: Experimental :: - * Interface for saving the content of the [[Dataset]] out into external storage or streams. + * Interface for saving the content of the Dataset out into external storage or streams. * * @group basic * @since 1.6.0 @@ -2403,7 +2403,7 @@ class Dataset[T] private[sql]( def write: DataFrameWriter = new DataFrameWriter(toDF()) /** - * Returns the content of the [[Dataset]] as a Dataset of JSON strings. + * Returns the content of the Dataset as a Dataset of JSON strings. * @since 2.0.0 */ def toJSON: Dataset[String] = { -- GitLab