Skip to content
Snippets Groups Projects
Commit ec8be274 authored by egraldlo's avatar egraldlo Committed by Michael Armbrust
Browse files

[SPARK-1995][SQL] system function upper and lower can be supported

I don't know whether it's time to implement system function about string operation in spark sql now.

Author: egraldlo <egraldlo@gmail.com>

Closes #936 from egraldlo/stringoperator and squashes the following commits:

3c6c60a [egraldlo] Add UPPER, LOWER, MAX and MIN into hive parser
ea76d0a [egraldlo] modify the formatting issues
b49f25e [egraldlo] modify the formatting issues
1f0bbb5 [egraldlo] system function upper and lower supported
13d3267 [egraldlo] system function upper and lower supported
parent d000ca98
No related branches found
No related tags found
No related merge requests found
......@@ -124,6 +124,8 @@ class SqlParser extends StandardTokenParsers with PackratParsers {
protected val OVERWRITE = Keyword("OVERWRITE")
protected val LIKE = Keyword("LIKE")
protected val RLIKE = Keyword("RLIKE")
protected val UPPER = Keyword("UPPER")
protected val LOWER = Keyword("LOWER")
protected val REGEXP = Keyword("REGEXP")
protected val ORDER = Keyword("ORDER")
protected val OUTER = Keyword("OUTER")
......@@ -329,6 +331,8 @@ class SqlParser extends StandardTokenParsers with PackratParsers {
AVG ~> "(" ~> expression <~ ")" ^^ { case exp => Average(exp) } |
MIN ~> "(" ~> expression <~ ")" ^^ { case exp => Min(exp) } |
MAX ~> "(" ~> expression <~ ")" ^^ { case exp => Max(exp) } |
UPPER ~> "(" ~> expression <~ ")" ^^ { case exp => Upper(exp) } |
LOWER ~> "(" ~> expression <~ ")" ^^ { case exp => Lower(exp) } |
IF ~> "(" ~> expression ~ "," ~ expression ~ "," ~ expression <~ ")" ^^ {
case c ~ "," ~ t ~ "," ~ f => If(c,t,f)
} |
......
......@@ -70,6 +70,22 @@ trait StringRegexExpression {
}
}
trait CaseConversionExpression {
self: UnaryExpression =>
type EvaluatedType = Any
def convert(v: String): String
def nullable: Boolean = child.nullable
def dataType: DataType = StringType
override def eval(input: Row): Any = {
val converted = child.eval(input)
convert(converted.toString)
}
}
/**
* Simple RegEx pattern matching function
*/
......@@ -115,3 +131,19 @@ case class RLike(left: Expression, right: Expression)
override def escape(v: String): String = v
override def matches(regex: Pattern, str: String): Boolean = regex.matcher(str).find(0)
}
/**
* A function that converts the characters of a string to uppercase.
*/
case class Upper(child: Expression) extends UnaryExpression with CaseConversionExpression {
override def convert(v: String): String = v.toUpperCase()
}
/**
* A function that converts the characters of a string to lowercase.
*/
case class Lower(child: Expression) extends UnaryExpression with CaseConversionExpression {
override def convert(v: String): String = v.toLowerCase()
}
......@@ -313,4 +313,27 @@ class SQLQuerySuite extends QueryTest {
(3, "C"),
(4, "D")))
}
test("system function upper()") {
checkAnswer(
sql("SELECT n,UPPER(l) FROM lowerCaseData"),
Seq(
(1, "A"),
(2, "B"),
(3, "C"),
(4, "D")))
}
test("system function lower()") {
checkAnswer(
sql("SELECT N,LOWER(L) FROM upperCaseData"),
Seq(
(1, "a"),
(2, "b"),
(3, "c"),
(4, "d"),
(5, "e"),
(6, "f")))
}
}
......@@ -781,6 +781,10 @@ private[hive] object HiveQl {
val COUNT = "(?i)COUNT".r
val AVG = "(?i)AVG".r
val SUM = "(?i)SUM".r
val MAX = "(?i)MAX".r
val MIN = "(?i)MIN".r
val UPPER = "(?i)UPPER".r
val LOWER = "(?i)LOWER".r
val RAND = "(?i)RAND".r
val AND = "(?i)AND".r
val OR = "(?i)OR".r
......@@ -817,7 +821,13 @@ private[hive] object HiveQl {
case Token("TOK_FUNCTIONDI", Token(COUNT(), Nil) :: args) => CountDistinct(args.map(nodeToExpr))
case Token("TOK_FUNCTION", Token(SUM(), Nil) :: arg :: Nil) => Sum(nodeToExpr(arg))
case Token("TOK_FUNCTIONDI", Token(SUM(), Nil) :: arg :: Nil) => SumDistinct(nodeToExpr(arg))
case Token("TOK_FUNCTION", Token(MAX(), Nil) :: arg :: Nil) => Max(nodeToExpr(arg))
case Token("TOK_FUNCTION", Token(MIN(), Nil) :: arg :: Nil) => Min(nodeToExpr(arg))
/* System functions about string operations */
case Token("TOK_FUNCTION", Token(UPPER(), Nil) :: arg :: Nil) => Upper(nodeToExpr(arg))
case Token("TOK_FUNCTION", Token(LOWER(), Nil) :: arg :: Nil) => Lower(nodeToExpr(arg))
/* Casts */
case Token("TOK_FUNCTION", Token("TOK_STRING", Nil) :: arg :: Nil) =>
Cast(nodeToExpr(arg), StringType)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment