Skip to content
Snippets Groups Projects
Commit d188a677 authored by Daoyuan Wang's avatar Daoyuan Wang Committed by Cheng Lian
Browse files

[SPARK-10533][SQL] handle scientific notation in sqlParser

https://issues.apache.org/jira/browse/SPARK-10533

val df = sqlContext.createDataFrame(Seq(("a",1.0),("b",2.0),("c",3.0)))
df.filter("_2 < 2.0e1").show

Scientific notation didn't work.

Author: Daoyuan Wang <daoyuan.wang@intel.com>

Closes #9085 from adrian-wang/scinotation.
parent 233e534a
No related branches found
No related tags found
No related merge requests found
......@@ -82,6 +82,10 @@ class SqlLexical extends StdLexical {
override def toString: String = chars
}
case class DecimalLit(chars: String) extends Token {
override def toString: String = chars
}
/* This is a work around to support the lazy setting */
def initialize(keywords: Seq[String]): Unit = {
reserved.clear()
......@@ -102,8 +106,12 @@ class SqlLexical extends StdLexical {
}
override lazy val token: Parser[Token] =
( identChar ~ (identChar | digit).* ^^
{ case first ~ rest => processIdent((first :: rest).mkString) }
( rep1(digit) ~ ('.' ~> digit.*).? ~ (exp ~> sign.? ~ rep1(digit)) ^^ {
case i ~ None ~ (sig ~ rest) =>
DecimalLit(i.mkString + "e" + sig.mkString + rest.mkString)
case i ~ Some(d) ~ (sig ~ rest) =>
DecimalLit(i.mkString + "." + d.mkString + "e" + sig.mkString + rest.mkString)
}
| digit.* ~ identChar ~ (identChar | digit).* ^^
{ case first ~ middle ~ rest => processIdent((first ++ (middle :: rest)).mkString) }
| rep1(digit) ~ ('.' ~> digit.*).? ^^ {
......@@ -125,6 +133,9 @@ class SqlLexical extends StdLexical {
override def identChar: Parser[Elem] = letter | elem('_')
private lazy val sign: Parser[Elem] = elem("s", c => c == '+' || c == '-')
private lazy val exp: Parser[Elem] = elem("e", c => c == 'E' || c == 'e')
override def whitespace: Parser[Any] =
( whitespaceChar
| '/' ~ '*' ~ comment
......
......@@ -337,6 +337,9 @@ object SqlParser extends AbstractSparkSQLParser with DataTypeParser {
| sign.? ~ unsignedFloat ^^ {
case s ~ f => Literal(toDecimalOrDouble(s.getOrElse("") + f))
}
| sign.? ~ unsignedDecimal ^^ {
case s ~ d => Literal(toDecimalOrDouble(s.getOrElse("") + d))
}
)
protected lazy val unsignedFloat: Parser[String] =
......@@ -344,6 +347,14 @@ object SqlParser extends AbstractSparkSQLParser with DataTypeParser {
| elem("decimal", _.isInstanceOf[lexical.FloatLit]) ^^ (_.chars)
)
protected lazy val unsignedDecimal: Parser[String] =
( "." ~> decimalLit ^^ { u => "0." + u }
| elem("scientific_notation", _.isInstanceOf[lexical.DecimalLit]) ^^ (_.chars)
)
def decimalLit: Parser[String] =
elem("scientific_notation", _.isInstanceOf[lexical.DecimalLit]) ^^ (_.chars)
protected lazy val sign: Parser[String] = ("+" | "-")
protected lazy val integral: Parser[String] =
......
......@@ -177,9 +177,14 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
}
test("filterExpr") {
checkAnswer(
testData.filter("key > 90"),
testData.collect().filter(_.getInt(0) > 90).toSeq)
val res = testData.collect().filter(_.getInt(0) > 90).toSeq
checkAnswer(testData.filter("key > 90"), res)
checkAnswer(testData.filter("key > 9.0e1"), res)
checkAnswer(testData.filter("key > .9e+2"), res)
checkAnswer(testData.filter("key > 0.9e+2"), res)
checkAnswer(testData.filter("key > 900e-1"), res)
checkAnswer(testData.filter("key > 900.0E-1"), res)
checkAnswer(testData.filter("key > 9.e+1"), res)
}
test("filterExpr using where") {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment