From 5807cb40ae178f0395c71b967f02aee853ef8bc9 Mon Sep 17 00:00:00 2001 From: ravipesala <ravindra.pesala@huawei.com> Date: Tue, 28 Oct 2014 13:36:06 -0700 Subject: [PATCH] [SPARK-3814][SQL] Support for Bitwise AND(&), OR(|) ,XOR(^), NOT(~) in Spark HQL and SQL Currently there is no support of Bitwise & , | in Spark HiveQl and Spark SQL as well. So this PR support the same. I am closing https://github.com/apache/spark/pull/2926 as it has conflicts to merge. And also added support for Bitwise AND(&), OR(|) ,XOR(^), NOT(~) And I handled all review comments in that PR Author: ravipesala <ravindra.pesala@huawei.com> Closes #2961 from ravipesala/SPARK-3814-NEW4 and squashes the following commits: a391c7a [ravipesala] Rebase with master --- .../spark/sql/catalyst/SparkSQLParser.scala | 2 +- .../apache/spark/sql/catalyst/SqlParser.scala | 4 + .../spark/sql/catalyst/dsl/package.scala | 4 + .../sql/catalyst/expressions/arithmetic.scala | 89 +++++++++++++++++++ .../ExpressionEvaluationSuite.scala | 32 +++++++ .../org/apache/spark/sql/SQLQuerySuite.scala | 16 ++++ .../org/apache/spark/sql/hive/HiveQl.scala | 4 + .../sql/hive/execution/SQLQuerySuite.scala | 24 +++++ 8 files changed, 174 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SparkSQLParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SparkSQLParser.scala index 219322c015..12e8346a64 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SparkSQLParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SparkSQLParser.scala @@ -61,7 +61,7 @@ class SqlLexical(val keywords: Seq[String]) extends StdLexical { delimiters += ( "@", "*", "+", "-", "<", "=", "<>", "!=", "<=", ">=", ">", "/", "(", ")", - ",", ";", "%", "{", "}", ":", "[", "]", "." + ",", ";", "%", "{", "}", ":", "[", "]", ".", "&", "|", "^", "~" ) override lazy val token: Parser[Token] = diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala index 4e967713ed..0acf7252ba 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala @@ -260,6 +260,9 @@ class SqlParser extends AbstractSparkSQLParser { ( "*" ^^^ { (e1: Expression, e2: Expression) => Multiply(e1, e2) } | "/" ^^^ { (e1: Expression, e2: Expression) => Divide(e1, e2) } | "%" ^^^ { (e1: Expression, e2: Expression) => Remainder(e1, e2) } + | "&" ^^^ { (e1: Expression, e2: Expression) => BitwiseAnd(e1, e2) } + | "|" ^^^ { (e1: Expression, e2: Expression) => BitwiseOr(e1, e2) } + | "^" ^^^ { (e1: Expression, e2: Expression) => BitwiseXor(e1, e2) } ) protected lazy val function: Parser[Expression] = @@ -370,6 +373,7 @@ class SqlParser extends AbstractSparkSQLParser { | dotExpressionHeader | ident ^^ UnresolvedAttribute | signedPrimary + | "~" ~> expression ^^ BitwiseNot ) protected lazy val dotExpressionHeader: Parser[Expression] = diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala index 75b6e37c2a..23cfd483ec 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala @@ -62,12 +62,16 @@ package object dsl { def unary_- = UnaryMinus(expr) def unary_! = Not(expr) + def unary_~ = BitwiseNot(expr) def + (other: Expression) = Add(expr, other) def - (other: Expression) = Subtract(expr, other) def * (other: Expression) = Multiply(expr, other) def / (other: Expression) = Divide(expr, other) def % (other: Expression) = Remainder(expr, other) + def & (other: Expression) = BitwiseAnd(expr, other) + def | (other: Expression) = BitwiseOr(expr, other) + def ^ (other: Expression) = BitwiseXor(expr, other) def && (other: Expression) = And(expr, other) def || (other: Expression) = Or(expr, other) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index fe825fdcda..83e8466ec2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -64,6 +64,23 @@ abstract class BinaryArithmetic extends BinaryExpression { } left.dataType } + + override def eval(input: Row): Any = { + val evalE1 = left.eval(input) + if(evalE1 == null) { + null + } else { + val evalE2 = right.eval(input) + if (evalE2 == null) { + null + } else { + evalInternal(evalE1, evalE2) + } + } + } + + def evalInternal(evalE1: EvaluatedType, evalE2: EvaluatedType): Any = + sys.error(s"BinaryExpressions must either override eval or evalInternal") } case class Add(left: Expression, right: Expression) extends BinaryArithmetic { @@ -100,6 +117,78 @@ case class Remainder(left: Expression, right: Expression) extends BinaryArithmet override def eval(input: Row): Any = i2(input, left, right, _.rem(_, _)) } +/** + * A function that calculates bitwise and(&) of two numbers. + */ +case class BitwiseAnd(left: Expression, right: Expression) extends BinaryArithmetic { + def symbol = "&" + + override def evalInternal(evalE1: EvaluatedType, evalE2: EvaluatedType): Any = dataType match { + case ByteType => (evalE1.asInstanceOf[Byte] & evalE2.asInstanceOf[Byte]).toByte + case ShortType => (evalE1.asInstanceOf[Short] & evalE2.asInstanceOf[Short]).toShort + case IntegerType => evalE1.asInstanceOf[Int] & evalE2.asInstanceOf[Int] + case LongType => evalE1.asInstanceOf[Long] & evalE2.asInstanceOf[Long] + case other => sys.error(s"Unsupported bitwise & operation on ${other}") + } +} + +/** + * A function that calculates bitwise or(|) of two numbers. + */ +case class BitwiseOr(left: Expression, right: Expression) extends BinaryArithmetic { + def symbol = "&" + + override def evalInternal(evalE1: EvaluatedType, evalE2: EvaluatedType): Any = dataType match { + case ByteType => (evalE1.asInstanceOf[Byte] | evalE2.asInstanceOf[Byte]).toByte + case ShortType => (evalE1.asInstanceOf[Short] | evalE2.asInstanceOf[Short]).toShort + case IntegerType => evalE1.asInstanceOf[Int] | evalE2.asInstanceOf[Int] + case LongType => evalE1.asInstanceOf[Long] | evalE2.asInstanceOf[Long] + case other => sys.error(s"Unsupported bitwise | operation on ${other}") + } +} + +/** + * A function that calculates bitwise xor(^) of two numbers. + */ +case class BitwiseXor(left: Expression, right: Expression) extends BinaryArithmetic { + def symbol = "^" + + override def evalInternal(evalE1: EvaluatedType, evalE2: EvaluatedType): Any = dataType match { + case ByteType => (evalE1.asInstanceOf[Byte] ^ evalE2.asInstanceOf[Byte]).toByte + case ShortType => (evalE1.asInstanceOf[Short] ^ evalE2.asInstanceOf[Short]).toShort + case IntegerType => evalE1.asInstanceOf[Int] ^ evalE2.asInstanceOf[Int] + case LongType => evalE1.asInstanceOf[Long] ^ evalE2.asInstanceOf[Long] + case other => sys.error(s"Unsupported bitwise ^ operation on ${other}") + } +} + +/** + * A function that calculates bitwise not(~) of a number. + */ +case class BitwiseNot(child: Expression) extends UnaryExpression { + type EvaluatedType = Any + + def dataType = child.dataType + override def foldable = child.foldable + def nullable = child.nullable + override def toString = s"-$child" + + override def eval(input: Row): Any = { + val evalE = child.eval(input) + if (evalE == null) { + null + } else { + dataType match { + case ByteType => (~(evalE.asInstanceOf[Byte])).toByte + case ShortType => (~(evalE.asInstanceOf[Short])).toShort + case IntegerType => ~(evalE.asInstanceOf[Int]) + case LongType => ~(evalE.asInstanceOf[Long]) + case other => sys.error(s"Unsupported bitwise ~ operation on ${other}") + } + } + } +} + case class MaxOf(left: Expression, right: Expression) extends Expression { type EvaluatedType = Any diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala index 53c53481f9..5657bc555e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala @@ -680,4 +680,36 @@ class ExpressionEvaluationSuite extends FunSuite { checkEvaluation(Sqrt(Literal(null, DoubleType)), null, new GenericRow(Array[Any](null))) } + + test("Bitwise operations") { + val row = new GenericRow(Array[Any](1, 2, 3, null)) + val c1 = 'a.int.at(0) + val c2 = 'a.int.at(1) + val c3 = 'a.int.at(2) + val c4 = 'a.int.at(3) + + checkEvaluation(BitwiseAnd(c1, c4), null, row) + checkEvaluation(BitwiseAnd(c1, c2), 0, row) + checkEvaluation(BitwiseAnd(c1, Literal(null, IntegerType)), null, row) + checkEvaluation(BitwiseAnd(Literal(null, IntegerType), Literal(null, IntegerType)), null, row) + + checkEvaluation(BitwiseOr(c1, c4), null, row) + checkEvaluation(BitwiseOr(c1, c2), 3, row) + checkEvaluation(BitwiseOr(c1, Literal(null, IntegerType)), null, row) + checkEvaluation(BitwiseOr(Literal(null, IntegerType), Literal(null, IntegerType)), null, row) + + checkEvaluation(BitwiseXor(c1, c4), null, row) + checkEvaluation(BitwiseXor(c1, c2), 3, row) + checkEvaluation(BitwiseXor(c1, Literal(null, IntegerType)), null, row) + checkEvaluation(BitwiseXor(Literal(null, IntegerType), Literal(null, IntegerType)), null, row) + + checkEvaluation(BitwiseNot(c4), null, row) + checkEvaluation(BitwiseNot(c1), -2, row) + checkEvaluation(BitwiseNot(Literal(null, IntegerType)), null, row) + + checkEvaluation(c1 & c2, 0, row) + checkEvaluation(c1 | c2, 3, row) + checkEvaluation(c1 ^ c2, 3, row) + checkEvaluation(~c1, -2, row) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 12e1cfc1cb..1034c2d05f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -883,4 +883,20 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { jsonRDD(data).registerTempTable("records") sql("SELECT `key?number1` FROM records") } + + test("SPARK-3814 Support Bitwise & operator") { + checkAnswer(sql("SELECT key&1 FROM testData WHERE key = 1 "), 1) + } + + test("SPARK-3814 Support Bitwise | operator") { + checkAnswer(sql("SELECT key|0 FROM testData WHERE key = 1 "), 1) + } + + test("SPARK-3814 Support Bitwise ^ operator") { + checkAnswer(sql("SELECT key^0 FROM testData WHERE key = 1 "), 1) + } + + test("SPARK-3814 Support Bitwise ~ operator") { + checkAnswer(sql("SELECT ~key FROM testData WHERE key = 1 "), -2) + } } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala index aa80b2f04d..ed07a28039 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala @@ -956,6 +956,7 @@ private[hive] object HiveQl { /* Arithmetic */ case Token("-", child :: Nil) => UnaryMinus(nodeToExpr(child)) + case Token("~", child :: Nil) => BitwiseNot(nodeToExpr(child)) case Token("+", left :: right:: Nil) => Add(nodeToExpr(left), nodeToExpr(right)) case Token("-", left :: right:: Nil) => Subtract(nodeToExpr(left), nodeToExpr(right)) case Token("*", left :: right:: Nil) => Multiply(nodeToExpr(left), nodeToExpr(right)) @@ -963,6 +964,9 @@ private[hive] object HiveQl { case Token(DIV(), left :: right:: Nil) => Cast(Divide(nodeToExpr(left), nodeToExpr(right)), LongType) case Token("%", left :: right:: Nil) => Remainder(nodeToExpr(left), nodeToExpr(right)) + case Token("&", left :: right:: Nil) => BitwiseAnd(nodeToExpr(left), nodeToExpr(right)) + case Token("|", left :: right:: Nil) => BitwiseOr(nodeToExpr(left), nodeToExpr(right)) + case Token("^", left :: right:: Nil) => BitwiseXor(nodeToExpr(left), nodeToExpr(right)) case Token("TOK_FUNCTION", Token(SQRT(), Nil) :: arg :: Nil) => Sqrt(nodeToExpr(arg)) /* Comparisons */ diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index fbe6ac765c..a4aea31d3f 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -75,4 +75,28 @@ class SQLQuerySuite extends QueryTest { sql("SELECT a.key FROM (SELECT key FROM src) `a`"), sql("SELECT `key` FROM src").collect().toSeq) } + + test("SPARK-3814 Support Bitwise & operator") { + checkAnswer( + sql("SELECT case when 1&1=1 then 1 else 0 end FROM src"), + sql("SELECT 1 FROM src").collect().toSeq) + } + + test("SPARK-3814 Support Bitwise | operator") { + checkAnswer( + sql("SELECT case when 1|0=1 then 1 else 0 end FROM src"), + sql("SELECT 1 FROM src").collect().toSeq) + } + + test("SPARK-3814 Support Bitwise ^ operator") { + checkAnswer( + sql("SELECT case when 1^0=1 then 1 else 0 end FROM src"), + sql("SELECT 1 FROM src").collect().toSeq) + } + + test("SPARK-3814 Support Bitwise ~ operator") { + checkAnswer( + sql("SELECT case when ~1=-2 then 1 else 0 end FROM src"), + sql("SELECT 1 FROM src").collect().toSeq) + } } -- GitLab