Skip to content
Snippets Groups Projects
Commit fa51b0fb authored by Reynold Xin's avatar Reynold Xin
Browse files

[SPARK-2598] RangePartitioner's binary search does not use the given Ordering

We should fix this in branch-1.0 as well.

Author: Reynold Xin <rxin@apache.org>

Closes #1500 from rxin/rangePartitioner and squashes the following commits:

c0a94f5 [Reynold Xin] [SPARK-2598] RangePartitioner's binary search does not use the given Ordering.
parent 98ab4112
No related branches found
No related tags found
No related merge requests found
......@@ -134,8 +134,8 @@ class RangePartitioner[K : Ordering : ClassTag, V](
def getPartition(key: Any): Int = {
val k = key.asInstanceOf[K]
var partition = 0
if (rangeBounds.length < 1000) {
// If we have less than 100 partitions naive search
if (rangeBounds.length <= 128) {
// If we have less than 128 partitions naive search
while (partition < rangeBounds.length && ordering.gt(k, rangeBounds(partition))) {
partition += 1
}
......
......@@ -19,11 +19,11 @@ package org.apache.spark.util
import java.util
import scala.Array
import scala.reflect._
import scala.reflect.{classTag, ClassTag}
private[spark] object CollectionsUtils {
def makeBinarySearch[K : Ordering : ClassTag] : (Array[K], K) => Int = {
// For primitive keys, we can use the natural ordering. Otherwise, use the Ordering comparator.
classTag[K] match {
case ClassTag.Float =>
(l, x) => util.Arrays.binarySearch(l.asInstanceOf[Array[Float]], x.asInstanceOf[Float])
......@@ -40,7 +40,8 @@ private[spark] object CollectionsUtils {
case ClassTag.Long =>
(l, x) => util.Arrays.binarySearch(l.asInstanceOf[Array[Long]], x.asInstanceOf[Long])
case _ =>
(l, x) => util.Arrays.binarySearch(l.asInstanceOf[Array[AnyRef]], x)
val comparator = implicitly[Ordering[K]].asInstanceOf[java.util.Comparator[Any]]
(l, x) => util.Arrays.binarySearch(l.asInstanceOf[Array[AnyRef]], x, comparator)
}
}
}
......@@ -91,6 +91,17 @@ class PartitioningSuite extends FunSuite with SharedSparkContext with PrivateMet
}
}
test("RangePartitioner for keys that are not Comparable (but with Ordering)") {
// Row does not extend Comparable, but has an implicit Ordering defined.
implicit object RowOrdering extends Ordering[Row] {
override def compare(x: Row, y: Row) = x.value - y.value
}
val rdd = sc.parallelize(1 to 4500).map(x => (Row(x), Row(x)))
val partitioner = new RangePartitioner(1500, rdd)
partitioner.getPartition(Row(100))
}
test("HashPartitioner not equal to RangePartitioner") {
val rdd = sc.parallelize(1 to 10).map(x => (x, x))
val rangeP2 = new RangePartitioner(2, rdd)
......@@ -177,3 +188,6 @@ class PartitioningSuite extends FunSuite with SharedSparkContext with PrivateMet
// Add other tests here for classes that should be able to handle empty partitions correctly
}
}
private sealed case class Row(value: Int)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment