diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 7c15e38458690582b033337ff85e4798a1337738..b0390cb9942e6d73487bdc0abfd3eea492d3e264 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -1018,6 +1018,18 @@ def sha2(col, numBits): return Column(jc) +@since(2.0) +def hash(*cols): + """Calculates the hash code of given columns, and returns the result as a int column. + + >>> sqlContext.createDataFrame([('ABC',)], ['a']).select(hash('a').alias('hash')).collect() + [Row(hash=1358996357)] + """ + sc = SparkContext._active_spark_context + jc = sc._jvm.functions.hash(_to_seq(sc, cols, _to_java_column)) + return Column(jc) + + # ---------------------- String/Binary functions ------------------------------ _string_functions = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala index 8834924687c0cf0f31147d3ca1491d2320961a51..6697d463614d591b2f3aaaf8bb63b959c87e6ed8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala @@ -200,7 +200,7 @@ case class Murmur3Hash(children: Seq[Expression], seed: Int) extends Expression override def checkInputDataTypes(): TypeCheckResult = { if (children.isEmpty) { - TypeCheckResult.TypeCheckFailure("arguments of function hash cannot be empty") + TypeCheckResult.TypeCheckFailure("function hash requires at least one argument") } else { TypeCheckResult.TypeCheckSuccess } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala index 915c585ec91fb582995ba95f537da451cdb389fa..f3df716a57824ae8bf72466682b8dda4345cafe8 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala @@ -163,6 +163,7 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite { assertError(Coalesce(Seq('intField, 'booleanField)), "input to function coalesce should all be the same type") assertError(Coalesce(Nil), "input to function coalesce cannot be empty") + assertError(new Murmur3Hash(Nil), "function hash requires at least one argument") assertError(Explode('intField), "input to function explode should be array or map type") } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index e223e32fd702ea06490545c231b298aa166ad3c8..1c96f647b6345b4384cdbc6efbd2627e2250c4d8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -1820,8 +1820,8 @@ object functions extends LegacyFunctions { * @since 2.0 */ @scala.annotation.varargs - def hash(col: Column, cols: Column*): Column = withExpr { - new Murmur3Hash((col +: cols).map(_.expr)) + def hash(cols: Column*): Column = withExpr { + new Murmur3Hash(cols.map(_.expr)) } //////////////////////////////////////////////////////////////////////////////////////////////