Skip to content
Snippets Groups Projects
Commit 0197262a authored by Jacek Laskowski's avatar Jacek Laskowski Committed by Sean Owen
Browse files

[DOCS] Docs-only improvements

…adoc

## What changes were proposed in this pull request?

Use recommended values for row boundaries in Window's scaladoc, i.e. `Window.unboundedPreceding`, `Window.unboundedFollowing`, and `Window.currentRow` (that were introduced in 2.1.0).

## How was this patch tested?

Local build

Author: Jacek Laskowski <jacek@japila.pl>

Closes #17417 from jaceklaskowski/window-expression-scaladoc.
parent b454d440
No related branches found
No related tags found
No related merge requests found
Showing
with 30 additions and 33 deletions
......@@ -60,8 +60,6 @@ public abstract class MemoryConsumer {
/**
* Force spill during building.
*
* For testing.
*/
public void spill() throws IOException {
spill(Long.MAX_VALUE, this);
......
......@@ -52,8 +52,7 @@ import org.apache.spark.util.Utils;
* This class implements sort-based shuffle's hash-style shuffle fallback path. This write path
* writes incoming records to separate files, one file per reduce partition, then concatenates these
* per-partition files to form a single output file, regions of which are served to reducers.
* Records are not buffered in memory. This is essentially identical to
* {@link org.apache.spark.shuffle.hash.HashShuffleWriter}, except that it writes output in a format
* Records are not buffered in memory. It writes output in a format
* that can be served / consumed via {@link org.apache.spark.shuffle.IndexShuffleBlockResolver}.
* <p>
* This write path is inefficient for shuffles with large numbers of reduce partitions because it
......@@ -61,7 +60,7 @@ import org.apache.spark.util.Utils;
* {@link SortShuffleManager} only selects this write path when
* <ul>
* <li>no Ordering is specified,</li>
* <li>no Aggregator is specific, and</li>
* <li>no Aggregator is specified, and</li>
* <li>the number of partitions is less than
* <code>spark.shuffle.sort.bypassMergeThreshold</code>.</li>
* </ul>
......
......@@ -71,13 +71,12 @@ private[spark] trait ExecutorAllocationClient {
/**
* Request that the cluster manager kill every executor on the specified host.
* Results in a call to killExecutors for each executor on the host, with the replace
* and force arguments set to true.
*
* @return whether the request is acknowledged by the cluster manager.
*/
def killExecutorsOnHost(host: String): Boolean
/**
/**
* Request that the cluster manager kill the specified executor.
* @return whether the request is acknowledged by the cluster manager.
*/
......
......@@ -149,7 +149,7 @@ private[spark] abstract class Task[T](
def preferredLocations: Seq[TaskLocation] = Nil
// Map output tracker epoch. Will be set by TaskScheduler.
// Map output tracker epoch. Will be set by TaskSetManager.
var epoch: Long = -1
// Task context, to be initialized in run().
......
......@@ -77,7 +77,7 @@ abstract class Serializer {
* position = 0
* serOut.write(obj1)
* serOut.flush()
* position = # of bytes writen to stream so far
* position = # of bytes written to stream so far
* obj1Bytes = output[0:position-1]
* serOut.write(obj2)
* serOut.flush()
......
......@@ -95,8 +95,7 @@ private[spark] class BlockStoreShuffleReader[K, C](
// Sort the output if there is a sort ordering defined.
dep.keyOrdering match {
case Some(keyOrd: Ordering[K]) =>
// Create an ExternalSorter to sort the data. Note that if spark.shuffle.spill is disabled,
// the ExternalSorter won't spill to disk.
// Create an ExternalSorter to sort the data.
val sorter =
new ExternalSorter[K, C, C](context, ordering = Some(keyOrd), serializer = dep.serializer)
sorter.insertAll(aggregatedIter)
......
......@@ -61,7 +61,7 @@ private[spark] class IndexShuffleBlockResolver(
/**
* Remove data file and index file that contain the output data from one map.
* */
*/
def removeDataByMap(shuffleId: Int, mapId: Int): Unit = {
var file = getDataFile(shuffleId, mapId)
if (file.exists()) {
......@@ -132,7 +132,7 @@ private[spark] class IndexShuffleBlockResolver(
* replace them with new ones.
*
* Note: the `lengths` will be updated to match the existing index file if use the existing ones.
* */
*/
def writeIndexFileAndCommit(
shuffleId: Int,
mapId: Int,
......
......@@ -82,13 +82,13 @@ private[spark] class SortShuffleManager(conf: SparkConf) extends ShuffleManager
override val shuffleBlockResolver = new IndexShuffleBlockResolver(conf)
/**
* Register a shuffle with the manager and obtain a handle for it to pass to tasks.
* Obtains a [[ShuffleHandle]] to pass to tasks.
*/
override def registerShuffle[K, V, C](
shuffleId: Int,
numMaps: Int,
dependency: ShuffleDependency[K, V, C]): ShuffleHandle = {
if (SortShuffleWriter.shouldBypassMergeSort(SparkEnv.get.conf, dependency)) {
if (SortShuffleWriter.shouldBypassMergeSort(conf, dependency)) {
// If there are fewer than spark.shuffle.sort.bypassMergeThreshold partitions and we don't
// need map-side aggregation, then write numPartitions files directly and just concatenate
// them at the end. This avoids doing serialization and deserialization twice to merge
......
......@@ -279,7 +279,7 @@ private[spark] object AccumulatorContext {
/**
* An [[AccumulatorV2 accumulator]] for computing sum, count, and averages for 64-bit integers.
* An [[AccumulatorV2 accumulator]] for computing sum, count, and average of 64-bit integers.
*
* @since 2.0.0
*/
......
......@@ -30,7 +30,7 @@ import org.apache.spark.sql.{DataFrame, Row, SparkSession}
import org.apache.spark.util.Utils
/**
* An example of how to use [[org.apache.spark.sql.DataFrame]] for ML. Run with
* An example of how to use [[DataFrame]] for ML. Run with
* {{{
* ./bin/run-example ml.DataFrameExample [options]
* }}}
......
......@@ -29,7 +29,7 @@ import org.apache.spark.sql.types.{StructField, StructType}
/**
* API for correlation functions in MLlib, compatible with Dataframes and Datasets.
*
* The functions in this package generalize the functions in [[org.apache.spark.sql.Dataset.stat]]
* The functions in this package generalize the functions in [[org.apache.spark.sql.Dataset#stat]]
* to spark.ml's Vector types.
*/
@Since("2.2.0")
......
......@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.trees.CurrentOrigin
/**
* Collection of rules related to hints. The only hint currently available is broadcast join hint.
*
* Note that this is separatedly into two rules because in the future we might introduce new hint
* Note that this is separately into two rules because in the future we might introduce new hint
* rules that have different ordering requirements from broadcast.
*/
object ResolveHints {
......
......@@ -229,9 +229,9 @@ case class ExpressionEncoder[T](
// serializer expressions are used to encode an object to a row, while the object is usually an
// intermediate value produced inside an operator, not from the output of the child operator. This
// is quite different from normal expressions, and `AttributeReference` doesn't work here
// (intermediate value is not an attribute). We assume that all serializer expressions use a same
// `BoundReference` to refer to the object, and throw exception if they don't.
assert(serializer.forall(_.references.isEmpty), "serializer cannot reference to any attributes.")
// (intermediate value is not an attribute). We assume that all serializer expressions use the
// same `BoundReference` to refer to the object, and throw exception if they don't.
assert(serializer.forall(_.references.isEmpty), "serializer cannot reference any attributes.")
assert(serializer.flatMap { ser =>
val boundRefs = ser.collect { case b: BoundReference => b }
assert(boundRefs.nonEmpty,
......
......@@ -491,7 +491,7 @@ abstract class BinaryExpression extends Expression {
* A [[BinaryExpression]] that is an operator, with two properties:
*
* 1. The string representation is "x symbol y", rather than "funcName(x, y)".
* 2. Two inputs are expected to the be same type. If the two inputs have different types,
* 2. Two inputs are expected to be of the same type. If the two inputs have different types,
* the analyzer will find the tightest common type and do the proper type casting.
*/
abstract class BinaryOperator extends BinaryExpression with ExpectsInputTypes {
......
......@@ -695,7 +695,7 @@ case class DenseRank(children: Seq[Expression]) extends RankLike {
*
* This documentation has been based upon similar documentation for the Hive and Presto projects.
*
* @param children to base the rank on; a change in the value of one the children will trigger a
* @param children to base the rank on; a change in the value of one of the children will trigger a
* change in rank. This is an internal parameter and will be assigned by the
* Analyser.
*/
......
......@@ -65,7 +65,7 @@ object EliminateSerialization extends Rule[LogicalPlan] {
/**
* Combines two adjacent [[TypedFilter]]s, which operate on same type object in condition, into one,
* mering the filter functions into one conjunctive function.
* merging the filter functions into one conjunctive function.
*/
object CombineTypedFilters extends Rule[LogicalPlan] {
def apply(plan: LogicalPlan): LogicalPlan = plan transform {
......
......@@ -492,7 +492,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
}
/**
* Add an [[Aggregate]] to a logical plan.
* Add an [[Aggregate]] or [[GroupingSets]] to a logical plan.
*/
private def withAggregation(
ctx: AggregationContext,
......@@ -519,7 +519,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
}
/**
* Add a Hint to a logical plan.
* Add a [[Hint]] to a logical plan.
*/
private def withHints(
ctx: HintContext,
......@@ -545,7 +545,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
}
/**
* Create a single relation referenced in a FROM claused. This method is used when a part of the
* Create a single relation referenced in a FROM clause. This method is used when a part of the
* join condition is nested, for example:
* {{{
* select * from t1 join (t2 cross join t3) on col1 = col2
......
......@@ -230,14 +230,15 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT
def producedAttributes: AttributeSet = AttributeSet.empty
/**
* Attributes that are referenced by expressions but not provided by this nodes children.
* Attributes that are referenced by expressions but not provided by this node's children.
* Subclasses should override this method if they produce attributes internally as it is used by
* assertions designed to prevent the construction of invalid plans.
*/
def missingInput: AttributeSet = references -- inputSet -- producedAttributes
/**
* Runs [[transform]] with `rule` on all expressions present in this query operator.
* Runs [[transformExpressionsDown]] with `rule` on all expressions present
* in this query operator.
* Users should not expect a specific directionality. If a specific directionality is needed,
* transformExpressionsDown or transformExpressionsUp should be used.
*
......
......@@ -32,7 +32,7 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with Logging {
private var _analyzed: Boolean = false
/**
* Marks this plan as already analyzed. This should only be called by CheckAnalysis.
* Marks this plan as already analyzed. This should only be called by [[CheckAnalysis]].
*/
private[catalyst] def setAnalyzed(): Unit = { _analyzed = true }
......
......@@ -26,7 +26,8 @@ import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.CalendarInterval
/**
* Test basic expression parsing. If a type of expression is supported it should be tested here.
* Test basic expression parsing.
* If the type of an expression is supported it should be tested here.
*
* Please note that some of the expressions test don't have to be sound expressions, only their
* structure needs to be valid. Unsound expressions should be caught by the Analyzer or
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment