From 4ea6480a3ba4ca7e09089c9b99d4a855894b9015 Mon Sep 17 00:00:00 2001 From: Tarek Auel <tarek.auel@googlemail.com> Date: Thu, 16 Jul 2015 08:26:39 -0700 Subject: [PATCH] [SPARK-8995] [SQL] cast date strings like '2015-01-01 12:15:31' to date Jira https://issues.apache.org/jira/browse/SPARK-8995 In PR #6981we noticed that we cannot cast date strings that contains a time, like '2015-03-18 12:39:40' to date. Besides it's not possible to cast a string like '18:03:20' to a timestamp. If a time is passed without a date, today is inferred as date. Author: Tarek Auel <tarek.auel@googlemail.com> Author: Tarek Auel <tarek.auel@gmail.com> Closes #7353 from tarekauel/SPARK-8995 and squashes the following commits: 14f333b [Tarek Auel] [SPARK-8995] added tests for daylight saving time ca1ae69 [Tarek Auel] [SPARK-8995] style fix d20b8b4 [Tarek Auel] [SPARK-8995] bug fix: distinguish between 0 and null ef05753 [Tarek Auel] [SPARK-8995] added check for year >= 1000 01c9ff3 [Tarek Auel] [SPARK-8995] support for time strings 34ec573 [Tarek Auel] fixed style 71622c0 [Tarek Auel] improved timestamp and date parsing 0e30c0a [Tarek Auel] Hive compatibility cfbaed7 [Tarek Auel] fixed wrong checks 71f89c1 [Tarek Auel] [SPARK-8995] minor style fix f7452fa [Tarek Auel] [SPARK-8995] removed old timestamp parsing 30e5aec [Tarek Auel] [SPARK-8995] date and timestamp cast c1083fb [Tarek Auel] [SPARK-8995] cast date strings like '2015-01-01 12:15:31' to date or timestamp --- .../spark/sql/catalyst/expressions/Cast.scala | 17 +- .../sql/catalyst/util/DateTimeUtils.scala | 198 ++++++++++++++++ .../sql/catalyst/expressions/CastSuite.scala | 144 ++++++++++++ .../catalyst/util/DateTimeUtilsSuite.scala | 218 ++++++++++++++++++ 4 files changed, 562 insertions(+), 15 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index ab02addfb4..83d5b3b76b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -167,17 +167,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w // TimestampConverter private[this] def castToTimestamp(from: DataType): Any => Any = from match { case StringType => - buildCast[UTF8String](_, utfs => { - // Throw away extra if more than 9 decimal places - val s = utfs.toString - val periodIdx = s.indexOf(".") - var n = s - if (periodIdx != -1 && n.length() - periodIdx > 9) { - n = n.substring(0, periodIdx + 10) - } - try DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf(n)) - catch { case _: java.lang.IllegalArgumentException => null } - }) + buildCast[UTF8String](_, utfs => DateTimeUtils.stringToTimestamp(utfs).orNull) case BooleanType => buildCast[Boolean](_, b => if (b) 1L else 0) case LongType => @@ -220,10 +210,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w // DateConverter private[this] def castToDate(from: DataType): Any => Any = from match { case StringType => - buildCast[UTF8String](_, s => - try DateTimeUtils.fromJavaDate(Date.valueOf(s.toString)) - catch { case _: java.lang.IllegalArgumentException => null } - ) + buildCast[UTF8String](_, s => DateTimeUtils.stringToDate(s).orNull) case TimestampType => // throw valid precision more than seconds, according to Hive. // Timestamp.nanos is in 0 to 999,999,999, no more than a second. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index c1ddee3ef0..53c32a0a98 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -21,6 +21,8 @@ import java.sql.{Date, Timestamp} import java.text.{DateFormat, SimpleDateFormat} import java.util.{Calendar, TimeZone} +import org.apache.spark.unsafe.types.UTF8String + /** * Helper functions for converting between internal and external date and time representations. * Dates are exposed externally as java.sql.Date and are represented internally as the number of @@ -180,4 +182,200 @@ object DateTimeUtils { val nanos = (us % MICROS_PER_SECOND) * 1000L (day.toInt, secondsInDay * NANOS_PER_SECOND + nanos) } + + /** + * Parses a given UTF8 date string to the corresponding a corresponding [[Long]] value. + * The return type is [[Option]] in order to distinguish between 0L and null. The following + * formats are allowed: + * + * `yyyy` + * `yyyy-[m]m` + * `yyyy-[m]m-[d]d` + * `yyyy-[m]m-[d]d ` + * `yyyy-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]` + * `yyyy-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]Z` + * `yyyy-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]-[h]h:[m]m` + * `yyyy-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]+[h]h:[m]m` + * `yyyy-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]` + * `yyyy-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]Z` + * `yyyy-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]-[h]h:[m]m` + * `yyyy-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]+[h]h:[m]m` + * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]` + * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]Z` + * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]-[h]h:[m]m` + * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]+[h]h:[m]m` + * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]` + * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]Z` + * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]-[h]h:[m]m` + * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]+[h]h:[m]m` + */ + def stringToTimestamp(s: UTF8String): Option[Long] = { + if (s == null) { + return None + } + var timeZone: Option[Byte] = None + val segments: Array[Int] = Array[Int](1, 1, 1, 0, 0, 0, 0, 0, 0) + var i = 0 + var currentSegmentValue = 0 + val bytes = s.getBytes + var j = 0 + var digitsMilli = 0 + var justTime = false + while (j < bytes.length) { + val b = bytes(j) + val parsedValue = b - '0'.toByte + if (parsedValue < 0 || parsedValue > 9) { + if (j == 0 && b == 'T') { + justTime = true + i += 3 + } else if (i < 2) { + if (b == '-') { + segments(i) = currentSegmentValue + currentSegmentValue = 0 + i += 1 + } else if (i == 0 && b == ':') { + justTime = true + segments(3) = currentSegmentValue + currentSegmentValue = 0 + i = 4 + } else { + return None + } + } else if (i == 2) { + if (b == ' ' || b == 'T') { + segments(i) = currentSegmentValue + currentSegmentValue = 0 + i += 1 + } else { + return None + } + } else if (i == 3 || i == 4) { + if (b == ':') { + segments(i) = currentSegmentValue + currentSegmentValue = 0 + i += 1 + } else { + return None + } + } else if (i == 5 || i == 6) { + if (b == 'Z') { + segments(i) = currentSegmentValue + currentSegmentValue = 0 + i += 1 + timeZone = Some(43) + } else if (b == '-' || b == '+') { + segments(i) = currentSegmentValue + currentSegmentValue = 0 + i += 1 + timeZone = Some(b) + } else if (b == '.' && i == 5) { + segments(i) = currentSegmentValue + currentSegmentValue = 0 + i += 1 + } else { + return None + } + if (i == 6 && b != '.') { + i += 1 + } + } else { + if (b == ':' || b == ' ') { + segments(i) = currentSegmentValue + currentSegmentValue = 0 + i += 1 + } else { + return None + } + } + } else { + if (i == 6) { + digitsMilli += 1 + } + currentSegmentValue = currentSegmentValue * 10 + parsedValue + } + j += 1 + } + + segments(i) = currentSegmentValue + + while (digitsMilli < 6) { + segments(6) *= 10 + digitsMilli += 1 + } + + if (!justTime && (segments(0) < 1000 || segments(0) > 9999 || segments(1) < 1 || + segments(1) > 12 || segments(2) < 1 || segments(2) > 31)) { + return None + } + + if (segments(3) < 0 || segments(3) > 23 || segments(4) < 0 || segments(4) > 59 || + segments(5) < 0 || segments(5) > 59 || segments(6) < 0 || segments(6) > 999999 || + segments(7) < 0 || segments(7) > 23 || segments(8) < 0 || segments(8) > 59) { + return None + } + + val c = if (timeZone.isEmpty) { + Calendar.getInstance() + } else { + Calendar.getInstance( + TimeZone.getTimeZone(f"GMT${timeZone.get.toChar}${segments(7)}%02d:${segments(8)}%02d")) + } + + if (justTime) { + c.set(Calendar.HOUR, segments(3)) + c.set(Calendar.MINUTE, segments(4)) + c.set(Calendar.SECOND, segments(5)) + } else { + c.set(segments(0), segments(1) - 1, segments(2), segments(3), segments(4), segments(5)) + } + + Some(c.getTimeInMillis / 1000 * 1000000 + segments(6)) + } + + /** + * Parses a given UTF8 date string to the corresponding a corresponding [[Int]] value. + * The return type is [[Option]] in order to distinguish between 0 and null. The following + * formats are allowed: + * + * `yyyy`, + * `yyyy-[m]m` + * `yyyy-[m]m-[d]d` + * `yyyy-[m]m-[d]d ` + * `yyyy-[m]m-[d]d *` + * `yyyy-[m]m-[d]dT*` + */ + def stringToDate(s: UTF8String): Option[Int] = { + if (s == null) { + return None + } + val segments: Array[Int] = Array[Int](1, 1, 1) + var i = 0 + var currentSegmentValue = 0 + val bytes = s.getBytes + var j = 0 + while (j < bytes.length && (i < 3 && !(bytes(j) == ' ' || bytes(j) == 'T'))) { + val b = bytes(j) + if (i < 2 && b == '-') { + segments(i) = currentSegmentValue + currentSegmentValue = 0 + i += 1 + } else { + val parsedValue = b - '0'.toByte + if (parsedValue < 0 || parsedValue > 9) { + return None + } else { + currentSegmentValue = currentSegmentValue * 10 + parsedValue + } + } + j += 1 + } + segments(i) = currentSegmentValue + if (segments(0) < 1000 || segments(0) > 9999 || segments(1) < 1 || segments(1) > 12 || + segments(2) < 1 || segments(2) > 31) { + return None + } + val c = Calendar.getInstance() + c.set(segments(0), segments(1) - 1, segments(2), 0, 0, 0) + Some((c.getTimeInMillis / 1000 / 3600 / 24).toInt) + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index 1de161c367..ef8bcd41f7 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql.catalyst.expressions import java.sql.{Timestamp, Date} +import java.util.{TimeZone, Calendar} import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.InternalRow @@ -41,6 +42,137 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(cast(v, Literal(expected).dataType), expected) } + test("cast string to date") { + var c = Calendar.getInstance() + c.set(2015, 0, 1, 0, 0, 0) + c.set(Calendar.MILLISECOND, 0) + checkEvaluation(Cast(Literal("2015"), DateType), new Date(c.getTimeInMillis)) + c = Calendar.getInstance() + c.set(2015, 2, 1, 0, 0, 0) + c.set(Calendar.MILLISECOND, 0) + checkEvaluation(Cast(Literal("2015-03"), DateType), new Date(c.getTimeInMillis)) + c = Calendar.getInstance() + c.set(2015, 2, 18, 0, 0, 0) + c.set(Calendar.MILLISECOND, 0) + checkEvaluation(Cast(Literal("2015-03-18"), DateType), new Date(c.getTimeInMillis)) + checkEvaluation(Cast(Literal("2015-03-18 "), DateType), new Date(c.getTimeInMillis)) + checkEvaluation(Cast(Literal("2015-03-18 123142"), DateType), new Date(c.getTimeInMillis)) + checkEvaluation(Cast(Literal("2015-03-18T123123"), DateType), new Date(c.getTimeInMillis)) + checkEvaluation(Cast(Literal("2015-03-18T"), DateType), new Date(c.getTimeInMillis)) + + checkEvaluation(Cast(Literal("2015-03-18X"), DateType), null) + checkEvaluation(Cast(Literal("2015/03/18"), DateType), null) + checkEvaluation(Cast(Literal("2015.03.18"), DateType), null) + checkEvaluation(Cast(Literal("20150318"), DateType), null) + checkEvaluation(Cast(Literal("2015-031-8"), DateType), null) + } + + test("cast string to timestamp") { + checkEvaluation(Cast(Literal("123"), TimestampType), + null) + + var c = Calendar.getInstance() + c.set(2015, 0, 1, 0, 0, 0) + c.set(Calendar.MILLISECOND, 0) + checkEvaluation(Cast(Literal("2015"), TimestampType), + new Timestamp(c.getTimeInMillis)) + c = Calendar.getInstance() + c.set(2015, 2, 1, 0, 0, 0) + c.set(Calendar.MILLISECOND, 0) + checkEvaluation(Cast(Literal("2015-03"), TimestampType), + new Timestamp(c.getTimeInMillis)) + c = Calendar.getInstance() + c.set(2015, 2, 18, 0, 0, 0) + c.set(Calendar.MILLISECOND, 0) + checkEvaluation(Cast(Literal("2015-03-18"), TimestampType), + new Timestamp(c.getTimeInMillis)) + checkEvaluation(Cast(Literal("2015-03-18 "), TimestampType), + new Timestamp(c.getTimeInMillis)) + checkEvaluation(Cast(Literal("2015-03-18T"), TimestampType), + new Timestamp(c.getTimeInMillis)) + + c = Calendar.getInstance() + c.set(2015, 2, 18, 12, 3, 17) + c.set(Calendar.MILLISECOND, 0) + checkEvaluation(Cast(Literal("2015-03-18 12:03:17"), TimestampType), + new Timestamp(c.getTimeInMillis)) + checkEvaluation(Cast(Literal("2015-03-18T12:03:17"), TimestampType), + new Timestamp(c.getTimeInMillis)) + + c = Calendar.getInstance(TimeZone.getTimeZone("UTC")) + c.set(2015, 2, 18, 12, 3, 17) + c.set(Calendar.MILLISECOND, 0) + checkEvaluation(Cast(Literal("2015-03-18T12:03:17Z"), TimestampType), + new Timestamp(c.getTimeInMillis)) + checkEvaluation(Cast(Literal("2015-03-18 12:03:17Z"), TimestampType), + new Timestamp(c.getTimeInMillis)) + + c = Calendar.getInstance(TimeZone.getTimeZone("GMT-01:00")) + c.set(2015, 2, 18, 12, 3, 17) + c.set(Calendar.MILLISECOND, 0) + checkEvaluation(Cast(Literal("2015-03-18T12:03:17-1:0"), TimestampType), + new Timestamp(c.getTimeInMillis)) + checkEvaluation(Cast(Literal("2015-03-18T12:03:17-01:00"), TimestampType), + new Timestamp(c.getTimeInMillis)) + + c = Calendar.getInstance(TimeZone.getTimeZone("GMT+07:30")) + c.set(2015, 2, 18, 12, 3, 17) + c.set(Calendar.MILLISECOND, 0) + checkEvaluation(Cast(Literal("2015-03-18T12:03:17+07:30"), TimestampType), + new Timestamp(c.getTimeInMillis)) + + c = Calendar.getInstance(TimeZone.getTimeZone("GMT+07:03")) + c.set(2015, 2, 18, 12, 3, 17) + c.set(Calendar.MILLISECOND, 0) + checkEvaluation(Cast(Literal("2015-03-18T12:03:17+7:3"), TimestampType), + new Timestamp(c.getTimeInMillis)) + + c = Calendar.getInstance() + c.set(2015, 2, 18, 12, 3, 17) + c.set(Calendar.MILLISECOND, 123) + checkEvaluation(Cast(Literal("2015-03-18 12:03:17.123"), TimestampType), + new Timestamp(c.getTimeInMillis)) + checkEvaluation(Cast(Literal("2015-03-18T12:03:17.123"), TimestampType), + new Timestamp(c.getTimeInMillis)) + + c = Calendar.getInstance(TimeZone.getTimeZone("UTC")) + c.set(2015, 2, 18, 12, 3, 17) + c.set(Calendar.MILLISECOND, 456) + checkEvaluation(Cast(Literal("2015-03-18T12:03:17.456Z"), TimestampType), + new Timestamp(c.getTimeInMillis)) + checkEvaluation(Cast(Literal("2015-03-18 12:03:17.456Z"), TimestampType), + new Timestamp(c.getTimeInMillis)) + + c = Calendar.getInstance(TimeZone.getTimeZone("GMT-01:00")) + c.set(2015, 2, 18, 12, 3, 17) + c.set(Calendar.MILLISECOND, 123) + checkEvaluation(Cast(Literal("2015-03-18T12:03:17.123-1:0"), TimestampType), + new Timestamp(c.getTimeInMillis)) + checkEvaluation(Cast(Literal("2015-03-18T12:03:17.123-01:00"), TimestampType), + new Timestamp(c.getTimeInMillis)) + + c = Calendar.getInstance(TimeZone.getTimeZone("GMT+07:30")) + c.set(2015, 2, 18, 12, 3, 17) + c.set(Calendar.MILLISECOND, 123) + checkEvaluation(Cast(Literal("2015-03-18T12:03:17.123+07:30"), TimestampType), + new Timestamp(c.getTimeInMillis)) + + c = Calendar.getInstance(TimeZone.getTimeZone("GMT+07:03")) + c.set(2015, 2, 18, 12, 3, 17) + c.set(Calendar.MILLISECOND, 123) + checkEvaluation(Cast(Literal("2015-03-18T12:03:17.123+7:3"), TimestampType), + new Timestamp(c.getTimeInMillis)) + + checkEvaluation(Cast(Literal("2015-03-18 123142"), TimestampType), null) + checkEvaluation(Cast(Literal("2015-03-18T123123"), TimestampType), null) + checkEvaluation(Cast(Literal("2015-03-18X"), TimestampType), null) + checkEvaluation(Cast(Literal("2015/03/18"), TimestampType), null) + checkEvaluation(Cast(Literal("2015.03.18"), TimestampType), null) + checkEvaluation(Cast(Literal("20150318"), TimestampType), null) + checkEvaluation(Cast(Literal("2015-031-8"), TimestampType), null) + checkEvaluation(Cast(Literal("2015-03-18T12:03:17-0:70"), TimestampType), null) + } + test("cast from int") { checkCast(0, false) checkCast(1, true) @@ -149,6 +281,18 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper { val nts = sts + ".1" val ts = Timestamp.valueOf(nts) + val defaultTimeZone = TimeZone.getDefault + TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles")) + var c = Calendar.getInstance() + c.set(2015, 2, 8, 2, 30, 0) + checkEvaluation(cast(cast(new Timestamp(c.getTimeInMillis), StringType), TimestampType), + c.getTimeInMillis * 1000) + c = Calendar.getInstance() + c.set(2015, 10, 1, 2, 30, 0) + checkEvaluation(cast(cast(new Timestamp(c.getTimeInMillis), StringType), TimestampType), + c.getTimeInMillis * 1000) + TimeZone.setDefault(defaultTimeZone) + checkEvaluation(cast("abdef", StringType), "abdef") checkEvaluation(cast("abdef", DecimalType.Unlimited), null) checkEvaluation(cast("abdef", TimestampType), null) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index f63ac191e7..c65fcbc4d1 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -19,8 +19,10 @@ package org.apache.spark.sql.catalyst.util import java.sql.{Date, Timestamp} import java.text.SimpleDateFormat +import java.util.{TimeZone, Calendar} import org.apache.spark.SparkFunSuite +import org.apache.spark.unsafe.types.UTF8String class DateTimeUtilsSuite extends SparkFunSuite { @@ -86,4 +88,220 @@ class DateTimeUtilsSuite extends SparkFunSuite { checkFromToJavaDate(new Date(df1.parse("1776-07-04 10:30:00").getTime)) checkFromToJavaDate(new Date(df2.parse("1776-07-04 18:30:00 UTC").getTime)) } + + test("string to date") { + val millisPerDay = 1000L * 3600L * 24L + var c = Calendar.getInstance() + c.set(2015, 0, 28, 0, 0, 0) + c.set(Calendar.MILLISECOND, 0) + assert(DateTimeUtils.stringToDate(UTF8String.fromString("2015-01-28")).get === + c.getTimeInMillis / millisPerDay) + c.set(2015, 0, 1, 0, 0, 0) + c.set(Calendar.MILLISECOND, 0) + assert(DateTimeUtils.stringToDate(UTF8String.fromString("2015")).get === + c.getTimeInMillis / millisPerDay) + c = Calendar.getInstance() + c.set(2015, 2, 1, 0, 0, 0) + c.set(Calendar.MILLISECOND, 0) + assert(DateTimeUtils.stringToDate(UTF8String.fromString("2015-03")).get === + c.getTimeInMillis / millisPerDay) + c = Calendar.getInstance() + c.set(2015, 2, 18, 0, 0, 0) + c.set(Calendar.MILLISECOND, 0) + assert(DateTimeUtils.stringToDate(UTF8String.fromString("2015-03-18")).get === + c.getTimeInMillis / millisPerDay) + assert(DateTimeUtils.stringToDate(UTF8String.fromString("2015-03-18 ")).get === + c.getTimeInMillis / millisPerDay) + assert(DateTimeUtils.stringToDate(UTF8String.fromString("2015-03-18 123142")).get === + c.getTimeInMillis / millisPerDay) + assert(DateTimeUtils.stringToDate(UTF8String.fromString("2015-03-18T123123")).get === + c.getTimeInMillis / millisPerDay) + assert(DateTimeUtils.stringToDate(UTF8String.fromString("2015-03-18T")).get === + c.getTimeInMillis / millisPerDay) + + assert(DateTimeUtils.stringToDate(UTF8String.fromString("2015-03-18X")).isEmpty) + assert(DateTimeUtils.stringToDate(UTF8String.fromString("2015/03/18")).isEmpty) + assert(DateTimeUtils.stringToDate(UTF8String.fromString("2015.03.18")).isEmpty) + assert(DateTimeUtils.stringToDate(UTF8String.fromString("20150318")).isEmpty) + assert(DateTimeUtils.stringToDate(UTF8String.fromString("2015-031-8")).isEmpty) + } + + test("string to timestamp") { + var c = Calendar.getInstance() + c.set(1969, 11, 31, 16, 0, 0) + c.set(Calendar.MILLISECOND, 0) + assert(DateTimeUtils.stringToTimestamp(UTF8String.fromString("1969-12-31 16:00:00")).get === + c.getTimeInMillis * 1000) + c.set(2015, 0, 1, 0, 0, 0) + c.set(Calendar.MILLISECOND, 0) + assert(DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015")).get === + c.getTimeInMillis * 1000) + c = Calendar.getInstance() + c.set(2015, 2, 1, 0, 0, 0) + c.set(Calendar.MILLISECOND, 0) + assert(DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03")).get === + c.getTimeInMillis * 1000) + c = Calendar.getInstance() + c.set(2015, 2, 18, 0, 0, 0) + c.set(Calendar.MILLISECOND, 0) + assert(DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-18")).get === + c.getTimeInMillis * 1000) + assert(DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-18 ")).get === + c.getTimeInMillis * 1000) + assert(DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-18T")).get === + c.getTimeInMillis * 1000) + + c = Calendar.getInstance() + c.set(2015, 2, 18, 12, 3, 17) + c.set(Calendar.MILLISECOND, 0) + assert(DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-18 12:03:17")).get === + c.getTimeInMillis * 1000) + assert(DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-18T12:03:17")).get === + c.getTimeInMillis * 1000) + + c = Calendar.getInstance(TimeZone.getTimeZone("GMT-13:53")) + c.set(2015, 2, 18, 12, 3, 17) + c.set(Calendar.MILLISECOND, 0) + assert(DateTimeUtils.stringToTimestamp( + UTF8String.fromString("2015-03-18T12:03:17-13:53")).get === c.getTimeInMillis * 1000) + + c = Calendar.getInstance(TimeZone.getTimeZone("UTC")) + c.set(2015, 2, 18, 12, 3, 17) + c.set(Calendar.MILLISECOND, 0) + assert(DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-18T12:03:17Z")).get === + c.getTimeInMillis * 1000) + assert(DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-18 12:03:17Z")).get === + c.getTimeInMillis * 1000) + + c = Calendar.getInstance(TimeZone.getTimeZone("GMT-01:00")) + c.set(2015, 2, 18, 12, 3, 17) + c.set(Calendar.MILLISECOND, 0) + assert(DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-18T12:03:17-1:0")).get === + c.getTimeInMillis * 1000) + assert(DateTimeUtils.stringToTimestamp( + UTF8String.fromString("2015-03-18T12:03:17-01:00")).get === c.getTimeInMillis * 1000) + + c = Calendar.getInstance(TimeZone.getTimeZone("GMT+07:30")) + c.set(2015, 2, 18, 12, 3, 17) + c.set(Calendar.MILLISECOND, 0) + assert(DateTimeUtils.stringToTimestamp( + UTF8String.fromString("2015-03-18T12:03:17+07:30")).get === c.getTimeInMillis * 1000) + + c = Calendar.getInstance(TimeZone.getTimeZone("GMT+07:03")) + c.set(2015, 2, 18, 12, 3, 17) + c.set(Calendar.MILLISECOND, 0) + assert(DateTimeUtils.stringToTimestamp( + UTF8String.fromString("2015-03-18T12:03:17+07:03")).get === c.getTimeInMillis * 1000) + + c = Calendar.getInstance() + c.set(2015, 2, 18, 12, 3, 17) + c.set(Calendar.MILLISECOND, 123) + assert(DateTimeUtils.stringToTimestamp( + UTF8String.fromString("2015-03-18 12:03:17.123")).get === c.getTimeInMillis * 1000) + assert(DateTimeUtils.stringToTimestamp( + UTF8String.fromString("2015-03-18T12:03:17.123")).get === c.getTimeInMillis * 1000) + + c = Calendar.getInstance(TimeZone.getTimeZone("UTC")) + c.set(2015, 2, 18, 12, 3, 17) + c.set(Calendar.MILLISECOND, 456) + assert(DateTimeUtils.stringToTimestamp( + UTF8String.fromString("2015-03-18T12:03:17.456Z")).get === c.getTimeInMillis * 1000) + assert(DateTimeUtils.stringToTimestamp( + UTF8String.fromString("2015-03-18 12:03:17.456Z")).get === c.getTimeInMillis * 1000) + + c = Calendar.getInstance(TimeZone.getTimeZone("GMT-01:00")) + c.set(2015, 2, 18, 12, 3, 17) + c.set(Calendar.MILLISECOND, 123) + assert(DateTimeUtils.stringToTimestamp( + UTF8String.fromString("2015-03-18T12:03:17.123-1:0")).get === c.getTimeInMillis * 1000) + assert(DateTimeUtils.stringToTimestamp( + UTF8String.fromString("2015-03-18T12:03:17.123-01:00")).get === c.getTimeInMillis * 1000) + + c = Calendar.getInstance(TimeZone.getTimeZone("GMT+07:30")) + c.set(2015, 2, 18, 12, 3, 17) + c.set(Calendar.MILLISECOND, 123) + assert(DateTimeUtils.stringToTimestamp( + UTF8String.fromString("2015-03-18T12:03:17.123+07:30")).get === c.getTimeInMillis * 1000) + + c = Calendar.getInstance(TimeZone.getTimeZone("GMT+07:30")) + c.set(2015, 2, 18, 12, 3, 17) + c.set(Calendar.MILLISECOND, 123) + assert(DateTimeUtils.stringToTimestamp( + UTF8String.fromString("2015-03-18T12:03:17.123+07:30")).get === c.getTimeInMillis * 1000) + + c = Calendar.getInstance(TimeZone.getTimeZone("GMT+07:30")) + c.set(2015, 2, 18, 12, 3, 17) + c.set(Calendar.MILLISECOND, 123) + assert(DateTimeUtils.stringToTimestamp( + UTF8String.fromString("2015-03-18T12:03:17.123121+7:30")).get === + c.getTimeInMillis * 1000 + 121) + + c = Calendar.getInstance(TimeZone.getTimeZone("GMT+07:30")) + c.set(2015, 2, 18, 12, 3, 17) + c.set(Calendar.MILLISECOND, 123) + assert(DateTimeUtils.stringToTimestamp( + UTF8String.fromString("2015-03-18T12:03:17.12312+7:30")).get === + c.getTimeInMillis * 1000 + 120) + + c = Calendar.getInstance(TimeZone.getTimeZone("GMT+07:30")) + c.set(Calendar.HOUR, 18) + c.set(Calendar.MINUTE, 12) + c.set(Calendar.SECOND, 15) + c.set(Calendar.MILLISECOND, 123) + assert(DateTimeUtils.stringToTimestamp( + UTF8String.fromString("T18:12:15.12312+7:30")).get === + c.getTimeInMillis * 1000 + 120) + + c = Calendar.getInstance(TimeZone.getTimeZone("GMT+07:30")) + c.set(Calendar.HOUR, 18) + c.set(Calendar.MINUTE, 12) + c.set(Calendar.SECOND, 15) + c.set(Calendar.MILLISECOND, 123) + assert(DateTimeUtils.stringToTimestamp( + UTF8String.fromString("18:12:15.12312+7:30")).get === + c.getTimeInMillis * 1000 + 120) + + c = Calendar.getInstance() + c.set(2011, 4, 6, 7, 8, 9) + c.set(Calendar.MILLISECOND, 100) + assert(DateTimeUtils.stringToTimestamp( + UTF8String.fromString("2011-05-06 07:08:09.1000")).get === c.getTimeInMillis * 1000) + + val defaultTimeZone = TimeZone.getDefault + TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles")) + + c = Calendar.getInstance() + c.set(2015, 2, 8, 2, 0, 0) + c.set(Calendar.MILLISECOND, 0) + assert(DateTimeUtils.stringToTimestamp( + UTF8String.fromString("2015-3-8 2:0:0")).get === c.getTimeInMillis * 1000) + c.add(Calendar.MINUTE, 30) + assert(DateTimeUtils.stringToTimestamp( + UTF8String.fromString("2015-3-8 3:30:0")).get === c.getTimeInMillis * 1000) + assert(DateTimeUtils.stringToTimestamp( + UTF8String.fromString("2015-3-8 2:30:0")).get === c.getTimeInMillis * 1000) + + c = Calendar.getInstance() + c.set(2015, 10, 1, 1, 59, 0) + c.set(Calendar.MILLISECOND, 0) + c.add(Calendar.MINUTE, 31) + assert(DateTimeUtils.stringToTimestamp( + UTF8String.fromString("2015-11-1 2:30:0")).get === c.getTimeInMillis * 1000) + TimeZone.setDefault(defaultTimeZone) + + assert(DateTimeUtils.stringToTimestamp(UTF8String.fromString("238")).isEmpty) + assert(DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-18 123142")).isEmpty) + assert(DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-18T123123")).isEmpty) + assert(DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-18X")).isEmpty) + assert(DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015/03/18")).isEmpty) + assert(DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015.03.18")).isEmpty) + assert(DateTimeUtils.stringToTimestamp(UTF8String.fromString("20150318")).isEmpty) + assert(DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-031-8")).isEmpty) + assert(DateTimeUtils.stringToTimestamp( + UTF8String.fromString("2015-03-18T12:03.17-20:0")).isEmpty) + assert(DateTimeUtils.stringToTimestamp( + UTF8String.fromString("2015-03-18T12:03.17-0:70")).isEmpty) + assert(DateTimeUtils.stringToTimestamp( + UTF8String.fromString("2015-03-18T12:03.17-1:0:0")).isEmpty) + } } -- GitLab