Skip to content
Snippets Groups Projects
Commit 10173279 authored by Herman van Hovell's avatar Herman van Hovell Committed by Reynold Xin
Browse files

[SPARK-12848][SQL] Change parsed decimal literal datatype from Double to Decimal

The current parser turns a decimal literal, for example ```12.1```, into a Double. The problem with this approach is that we convert an exact literal into a non-exact ```Double```. The PR changes this behavior, a Decimal literal is now converted into an extact ```BigDecimal```.

The behavior for scientific decimals, for example ```12.1e01```, is unchanged. This will be converted into a Double.

This PR replaces the ```BigDecimal``` literal by a ```Double``` literal, because the ```BigDecimal``` is the default now. You can use the double literal by appending a 'D' to the value, for instance: ```3.141527D```

cc davies rxin

Author: Herman van Hovell <hvanhovell@questtec.nl>

Closes #10796 from hvanhovell/SPARK-12848.
parent f3934a8d
No related branches found
No related tags found
No related merge requests found
Showing
with 58 additions and 42 deletions
......@@ -1812,7 +1812,7 @@ test_that("Method coltypes() to get and set R's data types of a DataFrame", {
expect_equal(coltypes(x), "map<string,string>")
df <- selectExpr(read.json(sqlContext, jsonPath), "name", "(age * 1.21) as age")
expect_equal(dtypes(df), list(c("name", "string"), c("age", "double")))
expect_equal(dtypes(df), list(c("name", "string"), c("age", "decimal(24,2)")))
df1 <- select(df, cast(df$age, "integer"))
coltypes(df) <- c("character", "integer")
......
......@@ -122,7 +122,7 @@ constant
| BigintLiteral
| SmallintLiteral
| TinyintLiteral
| DecimalLiteral
| DoubleLiteral
| booleanValue
;
......
......@@ -418,9 +418,9 @@ TinyintLiteral
(Digit)+ 'Y'
;
DecimalLiteral
DoubleLiteral
:
Number 'B' 'D'
Number 'D'
;
ByteLengthLiteral
......
......@@ -623,6 +623,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
val CASE = "(?i)CASE".r
val INTEGRAL = "[+-]?\\d+".r
val DECIMAL = "[+-]?((\\d+(\\.\\d*)?)|(\\.\\d+))".r
protected def nodeToExpr(node: ASTNode): Expression = node match {
/* Attribute References */
......@@ -785,8 +786,8 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
case ast if ast.tokenType == SparkSqlParser.BigintLiteral =>
Literal.create(ast.text.substring(0, ast.text.length() - 1).toLong, LongType)
case ast if ast.tokenType == SparkSqlParser.DecimalLiteral =>
Literal(Decimal(ast.text.substring(0, ast.text.length() - 2)))
case ast if ast.tokenType == SparkSqlParser.DoubleLiteral =>
Literal(ast.text.toDouble)
case ast if ast.tokenType == SparkSqlParser.Number =>
val text = ast.text
......@@ -799,7 +800,10 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
Literal(v.longValue())
case v => Literal(v.underlying())
}
case DECIMAL(_*) =>
Literal(BigDecimal(text).underlying())
case _ =>
// Convert a scientifically notated decimal into a double.
Literal(text.toDouble)
}
case ast if ast.tokenType == SparkSqlParser.StringLiteral =>
......
......@@ -692,12 +692,11 @@ object HiveTypeCoercion {
case e if !e.childrenResolved => e
// Find tightest common type for If, if the true value and false value have different types.
case i @ If(pred, left, right) if left.dataType != right.dataType =>
findTightestCommonTypeToString(left.dataType, right.dataType).map { widestType =>
findWiderTypeForTwo(left.dataType, right.dataType).map { widestType =>
val newLeft = if (left.dataType == widestType) left else Cast(left, widestType)
val newRight = if (right.dataType == widestType) right else Cast(right, widestType)
If(pred, newLeft, newRight)
}.getOrElse(i) // If there is no applicable conversion, leave expression unchanged.
// Convert If(null literal, _, _) into boolean type.
// In the optimizer, we should short-circuit this directly into false value.
case If(pred, left, right) if pred.dataType == NullType =>
......
......@@ -212,7 +212,7 @@ class MathExpressionsSuite extends QueryTest with SharedSQLContext {
Seq(Row(5, 10, 0), Row(55, 60, 100), Row(555, 560, 600))
)
val pi = "3.1415BD"
val pi = "3.1415"
checkAnswer(
sql(s"SELECT round($pi, -3), round($pi, -2), round($pi, -1), " +
s"round($pi, 0), round($pi, 1), round($pi, 2), round($pi, 3)"),
......@@ -367,6 +367,16 @@ class MathExpressionsSuite extends QueryTest with SharedSQLContext {
checkAnswer(
input.toDF("key", "value").selectExpr("abs(key) a").sort("a"),
input.map(pair => Row(pair._2)))
checkAnswer(
sql("select abs(0), abs(-1), abs(123), abs(-9223372036854775807), abs(9223372036854775807)"),
Row(0, 1, 123, 9223372036854775807L, 9223372036854775807L)
)
checkAnswer(
sql("select abs(0.0), abs(-3.14159265), abs(3.14159265)"),
Row(BigDecimal("0.0"), BigDecimal("3.14159265"), BigDecimal("3.14159265"))
)
}
test("log2") {
......
......@@ -1174,19 +1174,19 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
test("Floating point number format") {
checkAnswer(
sql("SELECT 0.3"), Row(0.3)
sql("SELECT 0.3"), Row(BigDecimal(0.3))
)
checkAnswer(
sql("SELECT -0.8"), Row(-0.8)
sql("SELECT -0.8"), Row(BigDecimal(-0.8))
)
checkAnswer(
sql("SELECT .5"), Row(0.5)
sql("SELECT .5"), Row(BigDecimal(0.5))
)
checkAnswer(
sql("SELECT -.18"), Row(-0.18)
sql("SELECT -.18"), Row(BigDecimal(-0.18))
)
}
......@@ -1200,11 +1200,11 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
)
checkAnswer(
sql("SELECT 9223372036854775808BD"), Row(new java.math.BigDecimal("9223372036854775808"))
sql("SELECT 9223372036854775808"), Row(new java.math.BigDecimal("9223372036854775808"))
)
checkAnswer(
sql("SELECT -9223372036854775809BD"), Row(new java.math.BigDecimal("-9223372036854775809"))
sql("SELECT -9223372036854775809"), Row(new java.math.BigDecimal("-9223372036854775809"))
)
}
......@@ -1219,11 +1219,11 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
)
checkAnswer(
sql("SELECT -5.2BD"), Row(BigDecimal(-5.2))
sql("SELECT -5.2"), Row(BigDecimal(-5.2))
)
checkAnswer(
sql("SELECT +6.8"), Row(6.8d)
sql("SELECT +6.8e0"), Row(6.8d)
)
checkAnswer(
......@@ -1598,20 +1598,20 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
}
test("decimal precision with multiply/division") {
checkAnswer(sql("select 10.3BD * 3.0BD"), Row(BigDecimal("30.90")))
checkAnswer(sql("select 10.3000BD * 3.0BD"), Row(BigDecimal("30.90000")))
checkAnswer(sql("select 10.30000BD * 30.0BD"), Row(BigDecimal("309.000000")))
checkAnswer(sql("select 10.300000000000000000BD * 3.000000000000000000BD"),
checkAnswer(sql("select 10.3 * 3.0"), Row(BigDecimal("30.90")))
checkAnswer(sql("select 10.3000 * 3.0"), Row(BigDecimal("30.90000")))
checkAnswer(sql("select 10.30000 * 30.0"), Row(BigDecimal("309.000000")))
checkAnswer(sql("select 10.300000000000000000 * 3.000000000000000000"),
Row(BigDecimal("30.900000000000000000000000000000000000", new MathContext(38))))
checkAnswer(sql("select 10.300000000000000000BD * 3.0000000000000000000BD"),
checkAnswer(sql("select 10.300000000000000000 * 3.0000000000000000000"),
Row(null))
checkAnswer(sql("select 10.3BD / 3.0BD"), Row(BigDecimal("3.433333")))
checkAnswer(sql("select 10.3000BD / 3.0BD"), Row(BigDecimal("3.4333333")))
checkAnswer(sql("select 10.30000BD / 30.0BD"), Row(BigDecimal("0.343333333")))
checkAnswer(sql("select 10.300000000000000000BD / 3.00000000000000000BD"),
checkAnswer(sql("select 10.3 / 3.0"), Row(BigDecimal("3.433333")))
checkAnswer(sql("select 10.3000 / 3.0"), Row(BigDecimal("3.4333333")))
checkAnswer(sql("select 10.30000 / 30.0"), Row(BigDecimal("0.343333333")))
checkAnswer(sql("select 10.300000000000000000 / 3.00000000000000000"),
Row(BigDecimal("3.433333333333333333333333333", new MathContext(38))))
checkAnswer(sql("select 10.3000000000000000000BD / 3.00000000000000000BD"),
checkAnswer(sql("select 10.3000000000000000000 / 3.00000000000000000"),
Row(BigDecimal("3.4333333333333333333333333333", new MathContext(38))))
}
......@@ -1637,13 +1637,13 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
}
test("precision smaller than scale") {
checkAnswer(sql("select 10.00BD"), Row(BigDecimal("10.00")))
checkAnswer(sql("select 1.00BD"), Row(BigDecimal("1.00")))
checkAnswer(sql("select 0.10BD"), Row(BigDecimal("0.10")))
checkAnswer(sql("select 0.01BD"), Row(BigDecimal("0.01")))
checkAnswer(sql("select 0.001BD"), Row(BigDecimal("0.001")))
checkAnswer(sql("select -0.01BD"), Row(BigDecimal("-0.01")))
checkAnswer(sql("select -0.001BD"), Row(BigDecimal("-0.001")))
checkAnswer(sql("select 10.00"), Row(BigDecimal("10.00")))
checkAnswer(sql("select 1.00"), Row(BigDecimal("1.00")))
checkAnswer(sql("select 0.10"), Row(BigDecimal("0.10")))
checkAnswer(sql("select 0.01"), Row(BigDecimal("0.01")))
checkAnswer(sql("select 0.001"), Row(BigDecimal("0.001")))
checkAnswer(sql("select -0.01"), Row(BigDecimal("-0.01")))
checkAnswer(sql("select -0.001"), Row(BigDecimal("-0.001")))
}
test("external sorting updates peak execution memory") {
......
......@@ -442,13 +442,13 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
// Number and String conflict: resolve the type as number in this query.
checkAnswer(
sql("select num_str + 1.2BD from jsonTable where num_str > 14"),
sql("select num_str + 1.2 from jsonTable where num_str > 14"),
Row(BigDecimal("92233720368547758071.2"))
)
// Number and String conflict: resolve the type as number in this query.
checkAnswer(
sql("select num_str + 1.2BD from jsonTable where num_str >= 92233720368547758060BD"),
sql("select num_str + 1.2 from jsonTable where num_str >= 92233720368547758060"),
Row(new java.math.BigDecimal("92233720368547758071.2"))
)
......
......@@ -323,7 +323,14 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
// Feature removed in HIVE-11145
"alter_partition_protect_mode",
"drop_partitions_ignore_protection",
"protectmode"
"protectmode",
// Spark parser treats numerical literals differently: it creates decimals instead of doubles.
"udf_abs",
"udf_format_number",
"udf_round",
"udf_round_3",
"view_cast"
)
/**
......@@ -884,7 +891,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"udf_10_trims",
"udf_E",
"udf_PI",
"udf_abs",
"udf_acos",
"udf_add",
"udf_array",
......@@ -928,7 +934,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"udf_find_in_set",
"udf_float",
"udf_floor",
"udf_format_number",
"udf_from_unixtime",
"udf_greaterthan",
"udf_greaterthanorequal",
......@@ -976,8 +981,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"udf_regexp_replace",
"udf_repeat",
"udf_rlike",
"udf_round",
"udf_round_3",
"udf_rpad",
"udf_rtrim",
"udf_sign",
......
......@@ -559,7 +559,7 @@ class HiveWindowFunctionQuerySuite extends HiveComparisonTest with BeforeAndAfte
"""
|select p_mfgr,p_name, p_size,
|histogram_numeric(p_retailprice, 5) over w1 as hist,
|percentile(p_partkey, 0.5) over w1 as per,
|percentile(p_partkey, cast(0.5 as double)) over w1 as per,
|row_number() over(distribute by p_mfgr sort by p_name) as rn
|from part
|window w1 as (distribute by p_mfgr sort by p_mfgr, p_name
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment