Skip to content
Snippets Groups Projects
Commit e45daf22 authored by Chris Cope's avatar Chris Cope Committed by Patrick Wendell
Browse files

[SPARK-1766] sorted functions to meet pedantic requirements

Pedantry is underrated

Author: Chris Cope <ccope@resilientscience.com>

Closes #1859 from copester/master and squashes the following commits:

0fb4499 [Chris Cope] [SPARK-1766] sorted functions to meet pedantic requirements
parent b431e674
No related branches found
No related tags found
No related merge requests found
......@@ -237,6 +237,25 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
combineByKey[V]((v: V) => v, func, func, partitioner)
}
/**
* Merge the values for each key using an associative reduce function. This will also perform
* the merging locally on each mapper before sending results to a reducer, similarly to a
* "combiner" in MapReduce. Output will be hash-partitioned with numPartitions partitions.
*/
def reduceByKey(func: (V, V) => V, numPartitions: Int): RDD[(K, V)] = {
reduceByKey(new HashPartitioner(numPartitions), func)
}
/**
* Merge the values for each key using an associative reduce function. This will also perform
* the merging locally on each mapper before sending results to a reducer, similarly to a
* "combiner" in MapReduce. Output will be hash-partitioned with the existing partitioner/
* parallelism level.
*/
def reduceByKey(func: (V, V) => V): RDD[(K, V)] = {
reduceByKey(defaultPartitioner(self), func)
}
/**
* Merge the values for each key using an associative reduce function, but return the results
* immediately to the master as a Map. This will also perform the merging locally on each mapper
......@@ -374,15 +393,6 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
countApproxDistinctByKey(relativeSD, defaultPartitioner(self))
}
/**
* Merge the values for each key using an associative reduce function. This will also perform
* the merging locally on each mapper before sending results to a reducer, similarly to a
* "combiner" in MapReduce. Output will be hash-partitioned with numPartitions partitions.
*/
def reduceByKey(func: (V, V) => V, numPartitions: Int): RDD[(K, V)] = {
reduceByKey(new HashPartitioner(numPartitions), func)
}
/**
* Group the values for each key in the RDD into a single sequence. Allows controlling the
* partitioning of the resulting key-value pair RDD by passing a Partitioner.
......@@ -482,16 +492,6 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
combineByKey(createCombiner, mergeValue, mergeCombiners, defaultPartitioner(self))
}
/**
* Merge the values for each key using an associative reduce function. This will also perform
* the merging locally on each mapper before sending results to a reducer, similarly to a
* "combiner" in MapReduce. Output will be hash-partitioned with the existing partitioner/
* parallelism level.
*/
def reduceByKey(func: (V, V) => V): RDD[(K, V)] = {
reduceByKey(defaultPartitioner(self), func)
}
/**
* Group the values for each key in the RDD into a single sequence. Hash-partitions the
* resulting RDD with the existing partitioner/parallelism level.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment