Skip to content
Snippets Groups Projects
Commit 16b928c5 authored by Davies Liu's avatar Davies Liu Committed by Reynold Xin
Browse files

[SPARK-9529] [SQL] improve TungstenSort on DecimalType

Generate prefix for DecimalType, fix the random generator of decimal

cc JoshRosen

Author: Davies Liu <davies@databricks.com>

Closes #7857 from davies/sort_decimal and squashes the following commits:

2433959 [Davies Liu] Merge branch 'master' of github.com:apache/spark into sort_decimal
de24253 [Davies Liu] fix style
0a54c1a [Davies Liu] sort decimal
parent 28d944e8
No related branches found
No related tags found
No related merge requests found
......@@ -67,6 +67,19 @@ case class SortPrefix(child: SortOrder) extends UnaryExpression {
(DoublePrefixComparator.computePrefix(Double.NegativeInfinity),
s"$DoublePrefixCmp.computePrefix((double)$input)")
case StringType => (0L, s"$input.getPrefix()")
case dt: DecimalType if dt.precision - dt.scale <= Decimal.MAX_LONG_DIGITS =>
val prefix = if (dt.precision <= Decimal.MAX_LONG_DIGITS) {
s"$input.toUnscaledLong()"
} else {
// reduce the scale to fit in a long
val p = Decimal.MAX_LONG_DIGITS
val s = p - (dt.precision - dt.scale)
s"$input.changePrecision($p, $s) ? $input.toUnscaledLong() : ${Long.MinValue}L"
}
(Long.MinValue, prefix)
case dt: DecimalType =>
(DoublePrefixComparator.computePrefix(Double.NegativeInfinity),
s"$DoublePrefixCmp.computePrefix($input.toDouble())")
case _ => (0L, "0L")
}
......
......@@ -107,7 +107,10 @@ object RandomDataGenerator {
case DateType => Some(() => new java.sql.Date(rand.nextInt()))
case TimestampType => Some(() => new java.sql.Timestamp(rand.nextLong()))
case DecimalType.Fixed(precision, scale) => Some(
() => BigDecimal.apply(rand.nextLong(), rand.nextInt(), new MathContext(precision)))
() => BigDecimal.apply(
rand.nextLong() % math.pow(10, precision).toLong,
scale,
new MathContext(precision)))
case DoubleType => randomNumeric[Double](
rand, r => longBitsToDouble(r.nextLong()), Seq(Double.MinValue, Double.MinPositiveValue,
Double.MaxValue, Double.PositiveInfinity, Double.NegativeInfinity, Double.NaN, 0.0))
......
......@@ -34,8 +34,9 @@ object DataTypeTestUtils {
* decimal types.
*/
val fractionalTypes: Set[FractionalType] = Set(
DecimalType.USER_DEFAULT,
DecimalType(20, 5),
DecimalType.SYSTEM_DEFAULT,
DecimalType(2, 1),
DoubleType,
FloatType
)
......
......@@ -36,16 +36,16 @@ object SortPrefixUtils {
def getPrefixComparator(sortOrder: SortOrder): PrefixComparator = {
sortOrder.dataType match {
case StringType if sortOrder.isAscending => PrefixComparators.STRING
case StringType if !sortOrder.isAscending => PrefixComparators.STRING_DESC
case BooleanType | ByteType | ShortType | IntegerType | LongType | DateType | TimestampType
if sortOrder.isAscending =>
PrefixComparators.LONG
case BooleanType | ByteType | ShortType | IntegerType | LongType | DateType | TimestampType
if !sortOrder.isAscending =>
PrefixComparators.LONG_DESC
case FloatType | DoubleType if sortOrder.isAscending => PrefixComparators.DOUBLE
case FloatType | DoubleType if !sortOrder.isAscending => PrefixComparators.DOUBLE_DESC
case StringType =>
if (sortOrder.isAscending) PrefixComparators.STRING else PrefixComparators.STRING_DESC
case BooleanType | ByteType | ShortType | IntegerType | LongType | DateType | TimestampType =>
if (sortOrder.isAscending) PrefixComparators.LONG else PrefixComparators.LONG_DESC
case dt: DecimalType if dt.precision - dt.scale <= Decimal.MAX_LONG_DIGITS =>
if (sortOrder.isAscending) PrefixComparators.LONG else PrefixComparators.LONG_DESC
case FloatType | DoubleType =>
if (sortOrder.isAscending) PrefixComparators.DOUBLE else PrefixComparators.DOUBLE_DESC
case dt: DecimalType =>
if (sortOrder.isAscending) PrefixComparators.DOUBLE else PrefixComparators.DOUBLE_DESC
case _ => NoOpPrefixComparator
}
}
......
......@@ -61,8 +61,7 @@ class TungstenSortSuite extends SparkPlanTest with BeforeAndAfterAll {
// Test sorting on different data types
for (
dataType <- DataTypeTestUtils.atomicTypes ++ Set(NullType)
if !dataType.isInstanceOf[DecimalType]; // We don't have an unsafe representation for decimals
dataType <- DataTypeTestUtils.atomicTypes ++ Set(NullType);
nullable <- Seq(true, false);
sortOrder <- Seq('a.asc :: Nil, 'a.desc :: Nil);
randomDataGenerator <- RandomDataGenerator.forType(dataType, nullable)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment