Skip to content
Snippets Groups Projects
Commit b0768538 authored by Herman van Hovell's avatar Herman van Hovell Committed by Reynold Xin
Browse files

[SPARK-14321][SQL] Reduce date format cost and string-to-date cost in date functions

## What changes were proposed in this pull request?
The current implementations of `UnixTime` and `FromUnixTime` do not cache their parser/formatter as much as they could. This PR resolved this issue.

This PR is a take over from https://github.com/apache/spark/pull/13522 and further optimizes the re-use of the parser/formatter. It also fixes the improves handling (catching the actual exception instead of `Throwable`). All credits for this work should go to rajeshbalamohan.

This PR closes https://github.com/apache/spark/pull/13522

## How was this patch tested?
Current tests.

Author: Herman van Hovell <hvanhovell@databricks.com>
Author: Rajesh Balamohan <rbalamohan@apache.org>

Closes #13581 from hvanhovell/SPARK-14321.
parent 6cb71f47
No related branches found
No related tags found
No related merge requests found
......@@ -399,6 +399,8 @@ abstract class UnixTime extends BinaryExpression with ExpectsInputTypes {
override def nullable: Boolean = true
private lazy val constFormat: UTF8String = right.eval().asInstanceOf[UTF8String]
private lazy val formatter: SimpleDateFormat =
Try(new SimpleDateFormat(constFormat.toString)).getOrElse(null)
override def eval(input: InternalRow): Any = {
val t = left.eval(input)
......@@ -411,11 +413,11 @@ abstract class UnixTime extends BinaryExpression with ExpectsInputTypes {
case TimestampType =>
t.asInstanceOf[Long] / 1000000L
case StringType if right.foldable =>
if (constFormat != null) {
Try(new SimpleDateFormat(constFormat.toString).parse(
t.asInstanceOf[UTF8String].toString).getTime / 1000L).getOrElse(null)
} else {
if (constFormat == null || formatter == null) {
null
} else {
Try(formatter.parse(
t.asInstanceOf[UTF8String].toString).getTime / 1000L).getOrElse(null)
}
case StringType =>
val f = right.eval(input)
......@@ -434,13 +436,10 @@ abstract class UnixTime extends BinaryExpression with ExpectsInputTypes {
left.dataType match {
case StringType if right.foldable =>
val sdf = classOf[SimpleDateFormat].getName
val fString = if (constFormat == null) null else constFormat.toString
val formatter = ctx.freshName("formatter")
if (fString == null) {
ev.copy(code = s"""
boolean ${ev.isNull} = true;
${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};""")
if (formatter == null) {
ExprCode("", "true", ctx.defaultValue(dataType))
} else {
val formatterName = ctx.addReferenceObj("formatter", formatter, sdf)
val eval1 = left.genCode(ctx)
ev.copy(code = s"""
${eval1.code}
......@@ -448,10 +447,8 @@ abstract class UnixTime extends BinaryExpression with ExpectsInputTypes {
${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
if (!${ev.isNull}) {
try {
$sdf $formatter = new $sdf("$fString");
${ev.value} =
$formatter.parse(${eval1.value}.toString()).getTime() / 1000L;
} catch (java.lang.Throwable e) {
${ev.value} = $formatterName.parse(${eval1.value}.toString()).getTime() / 1000L;
} catch (java.text.ParseException e) {
${ev.isNull} = true;
}
}""")
......@@ -463,7 +460,9 @@ abstract class UnixTime extends BinaryExpression with ExpectsInputTypes {
try {
${ev.value} =
(new $sdf($format.toString())).parse($string.toString()).getTime() / 1000L;
} catch (java.lang.Throwable e) {
} catch (java.lang.IllegalArgumentException e) {
${ev.isNull} = true;
} catch (java.text.ParseException e) {
${ev.isNull} = true;
}
"""
......@@ -520,6 +519,8 @@ case class FromUnixTime(sec: Expression, format: Expression)
override def inputTypes: Seq[AbstractDataType] = Seq(LongType, StringType)
private lazy val constFormat: UTF8String = right.eval().asInstanceOf[UTF8String]
private lazy val formatter: SimpleDateFormat =
Try(new SimpleDateFormat(constFormat.toString)).getOrElse(null)
override def eval(input: InternalRow): Any = {
val time = left.eval(input)
......@@ -527,10 +528,10 @@ case class FromUnixTime(sec: Expression, format: Expression)
null
} else {
if (format.foldable) {
if (constFormat == null) {
if (constFormat == null || formatter == null) {
null
} else {
Try(UTF8String.fromString(new SimpleDateFormat(constFormat.toString).format(
Try(UTF8String.fromString(formatter.format(
new java.util.Date(time.asInstanceOf[Long] * 1000L)))).getOrElse(null)
}
} else {
......@@ -549,11 +550,10 @@ case class FromUnixTime(sec: Expression, format: Expression)
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val sdf = classOf[SimpleDateFormat].getName
if (format.foldable) {
if (constFormat == null) {
ev.copy(code = s"""
boolean ${ev.isNull} = true;
${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};""")
if (formatter == null) {
ExprCode("", "true", "(UTF8String) null")
} else {
val formatterName = ctx.addReferenceObj("formatter", formatter, sdf)
val t = left.genCode(ctx)
ev.copy(code = s"""
${t.code}
......@@ -561,9 +561,9 @@ case class FromUnixTime(sec: Expression, format: Expression)
${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
if (!${ev.isNull}) {
try {
${ev.value} = UTF8String.fromString(new $sdf("${constFormat.toString}").format(
${ev.value} = UTF8String.fromString($formatterName.format(
new java.util.Date(${t.value} * 1000L)));
} catch (java.lang.Throwable e) {
} catch (java.lang.IllegalArgumentException e) {
${ev.isNull} = true;
}
}""")
......@@ -574,7 +574,7 @@ case class FromUnixTime(sec: Expression, format: Expression)
try {
${ev.value} = UTF8String.fromString((new $sdf($f.toString())).format(
new java.util.Date($seconds * 1000L)));
} catch (java.lang.Throwable e) {
} catch (java.lang.IllegalArgumentException e) {
${ev.isNull} = true;
}""".stripMargin
})
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment