Skip to content
Snippets Groups Projects
Commit b5ce84a1 authored by Xiu Guo's avatar Xiu Guo Committed by Reynold Xin
Browse files

[SPARK-12456][SQL] Add ExpressionDescription to misc functions

First try, not sure how much information we need to provide in the usage part.

Author: Xiu Guo <xguo27@gmail.com>

Closes #10423 from xguo27/SPARK-12456.
parent bc0f30d0
No related branches found
No related tags found
No related merge requests found
......@@ -26,6 +26,9 @@ import org.apache.spark.unsafe.types.UTF8String
/**
* Expression that returns the name of the current file being read in using [[SqlNewHadoopRDD]]
*/
@ExpressionDescription(
usage = "_FUNC_() - Returns the name of the current file being read if available",
extended = "> SELECT _FUNC_();\n ''")
case class InputFileName() extends LeafExpression with Nondeterministic {
override def nullable: Boolean = true
......
......@@ -32,6 +32,14 @@ import org.apache.spark.sql.types.{LongType, DataType}
*
* Since this expression is stateful, it cannot be a case object.
*/
@ExpressionDescription(
usage =
"""_FUNC_() - Returns monotonically increasing 64-bit integers.
The generated ID is guaranteed to be monotonically increasing and unique, but not consecutive.
The current implementation puts the partition ID in the upper 31 bits, and the lower 33 bits
represent the record number within each partition. The assumption is that the data frame has
less than 1 billion partitions, and each partition has less than 8 billion records.""",
extended = "> SELECT _FUNC_();\n 0")
private[sql] case class MonotonicallyIncreasingID() extends LeafExpression with Nondeterministic {
/**
......
......@@ -26,6 +26,9 @@ import org.apache.spark.sql.types.{IntegerType, DataType}
/**
* Expression that returns the current partition id of the Spark task.
*/
@ExpressionDescription(
usage = "_FUNC_() - Returns the current partition id of the Spark task",
extended = "> SELECT _FUNC_();\n 0")
private[sql] case class SparkPartitionID() extends LeafExpression with Nondeterministic {
override def nullable: Boolean = false
......
......@@ -30,6 +30,9 @@ import org.apache.spark.unsafe.types.UTF8String
* A function that calculates an MD5 128-bit checksum and returns it as a hex string
* For input of type [[BinaryType]]
*/
@ExpressionDescription(
usage = "_FUNC_(input) - Returns an MD5 128-bit checksum as a hex string of the input",
extended = "> SELECT _FUNC_('Spark');\n '8cde774d6f7333752ed72cacddb05126'")
case class Md5(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def dataType: DataType = StringType
......@@ -53,6 +56,12 @@ case class Md5(child: Expression) extends UnaryExpression with ImplicitCastInput
* asking for an unsupported SHA function, the return value is NULL. If either argument is NULL or
* the hash length is not one of the permitted values, the return value is NULL.
*/
@ExpressionDescription(
usage = "_FUNC_(input, bitLength) - Returns a checksum of SHA-2 family as a hex string of the " +
"input. SHA-224, SHA-256, SHA-384, and SHA-512 are supported. Bit length of 0 is equivalent " +
"to 256",
extended = "> SELECT _FUNC_('Spark', 0);\n " +
"'529bc3b07127ecb7e53a4dcf1991d9152c24537d919178022b2c42657f79a26b'")
case class Sha2(left: Expression, right: Expression)
extends BinaryExpression with Serializable with ImplicitCastInputTypes {
......@@ -118,6 +127,9 @@ case class Sha2(left: Expression, right: Expression)
* A function that calculates a sha1 hash value and returns it as a hex string
* For input of type [[BinaryType]] or [[StringType]]
*/
@ExpressionDescription(
usage = "_FUNC_(input) - Returns a sha1 hash value as a hex string of the input",
extended = "> SELECT _FUNC_('Spark');\n '85f5955f4b27a9a4c2aab6ffe5d7189fc298b92c'")
case class Sha1(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def dataType: DataType = StringType
......@@ -138,6 +150,9 @@ case class Sha1(child: Expression) extends UnaryExpression with ImplicitCastInpu
* A function that computes a cyclic redundancy check value and returns it as a bigint
* For input of type [[BinaryType]]
*/
@ExpressionDescription(
usage = "_FUNC_(input) - Returns a cyclic redundancy check value as a bigint of the input",
extended = "> SELECT _FUNC_('Spark');\n '1557323817'")
case class Crc32(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def dataType: DataType = LongType
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment