From 2f191c66b668fc97f82f44fd8336b6a4488c2f5d Mon Sep 17 00:00:00 2001
From: Davies Liu <davies@databricks.com>
Date: Tue, 17 Nov 2015 23:14:05 -0800
Subject: [PATCH] [SPARK-11643] [SQL] parse year with leading zero

Support the years between 0 <= year < 1000

Author: Davies Liu <davies@databricks.com>

Closes #9701 from davies/leading_zero.
---
 .../sql/catalyst/util/DateTimeUtils.scala     | 20 +++++++++++++++++--
 .../catalyst/util/DateTimeUtilsSuite.scala    | 17 +++++++++++++---
 2 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 8fb3f41f1b..17a5527f3f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -241,6 +241,10 @@ object DateTimeUtils {
           i += 3
         } else if (i < 2) {
           if (b == '-') {
+            if (i == 0 && j != 4) {
+              // year should have exact four digits
+              return None
+            }
             segments(i) = currentSegmentValue
             currentSegmentValue = 0
             i += 1
@@ -308,13 +312,17 @@ object DateTimeUtils {
     }
 
     segments(i) = currentSegmentValue
+    if (!justTime && i == 0 && j != 4) {
+      // year should have exact four digits
+      return None
+    }
 
     while (digitsMilli < 6) {
       segments(6) *= 10
       digitsMilli += 1
     }
 
-    if (!justTime && (segments(0) < 1000 || segments(0) > 9999 || segments(1) < 1 ||
+    if (!justTime && (segments(0) < 0 || segments(0) > 9999 || segments(1) < 1 ||
         segments(1) > 12 || segments(2) < 1 || segments(2) > 31)) {
       return None
     }
@@ -368,6 +376,10 @@ object DateTimeUtils {
     while (j < bytes.length && (i < 3 && !(bytes(j) == ' ' || bytes(j) == 'T'))) {
       val b = bytes(j)
       if (i < 2 && b == '-') {
+        if (i == 0 && j != 4) {
+          // year should have exact four digits
+          return None
+        }
         segments(i) = currentSegmentValue
         currentSegmentValue = 0
         i += 1
@@ -381,8 +393,12 @@ object DateTimeUtils {
       }
       j += 1
     }
+    if (i == 0 && j != 4) {
+      // year should have exact four digits
+      return None
+    }
     segments(i) = currentSegmentValue
-    if (segments(0) < 1000 || segments(0) > 9999 || segments(1) < 1 || segments(1) > 12 ||
+    if (segments(0) < 0 || segments(0) > 9999 || segments(1) < 1 || segments(1) > 12 ||
         segments(2) < 1 || segments(2) > 31) {
       return None
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index 60d45422bc..faca128bad 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -110,6 +110,10 @@ class DateTimeUtilsSuite extends SparkFunSuite {
     c.set(Calendar.MILLISECOND, 0)
     assert(stringToDate(UTF8String.fromString("2015")).get ===
       millisToDays(c.getTimeInMillis))
+    c.set(1, 0, 1, 0, 0, 0)
+    c.set(Calendar.MILLISECOND, 0)
+    assert(stringToDate(UTF8String.fromString("0001")).get ===
+      millisToDays(c.getTimeInMillis))
     c = Calendar.getInstance()
     c.set(2015, 2, 1, 0, 0, 0)
     c.set(Calendar.MILLISECOND, 0)
@@ -134,11 +138,15 @@ class DateTimeUtilsSuite extends SparkFunSuite {
     assert(stringToDate(UTF8String.fromString("2015.03.18")).isEmpty)
     assert(stringToDate(UTF8String.fromString("20150318")).isEmpty)
     assert(stringToDate(UTF8String.fromString("2015-031-8")).isEmpty)
+    assert(stringToDate(UTF8String.fromString("02015-03-18")).isEmpty)
+    assert(stringToDate(UTF8String.fromString("015-03-18")).isEmpty)
+    assert(stringToDate(UTF8String.fromString("015")).isEmpty)
+    assert(stringToDate(UTF8String.fromString("02015")).isEmpty)
   }
 
   test("string to time") {
     // Tests with UTC.
-    var c = Calendar.getInstance(TimeZone.getTimeZone("UTC"))
+    val c = Calendar.getInstance(TimeZone.getTimeZone("UTC"))
     c.set(Calendar.MILLISECOND, 0)
 
     c.set(1900, 0, 1, 0, 0, 0)
@@ -174,9 +182,9 @@ class DateTimeUtilsSuite extends SparkFunSuite {
     c.set(Calendar.MILLISECOND, 0)
     assert(stringToTimestamp(UTF8String.fromString("1969-12-31 16:00:00")).get ===
       c.getTimeInMillis * 1000)
-    c.set(2015, 0, 1, 0, 0, 0)
+    c.set(1, 0, 1, 0, 0, 0)
     c.set(Calendar.MILLISECOND, 0)
-    assert(stringToTimestamp(UTF8String.fromString("2015")).get ===
+    assert(stringToTimestamp(UTF8String.fromString("0001")).get ===
       c.getTimeInMillis * 1000)
     c = Calendar.getInstance()
     c.set(2015, 2, 1, 0, 0, 0)
@@ -319,6 +327,7 @@ class DateTimeUtilsSuite extends SparkFunSuite {
       UTF8String.fromString("2011-05-06 07:08:09.1000")).get === c.getTimeInMillis * 1000)
 
     assert(stringToTimestamp(UTF8String.fromString("238")).isEmpty)
+    assert(stringToTimestamp(UTF8String.fromString("00238")).isEmpty)
     assert(stringToTimestamp(UTF8String.fromString("2015-03-18 123142")).isEmpty)
     assert(stringToTimestamp(UTF8String.fromString("2015-03-18T123123")).isEmpty)
     assert(stringToTimestamp(UTF8String.fromString("2015-03-18X")).isEmpty)
@@ -326,6 +335,8 @@ class DateTimeUtilsSuite extends SparkFunSuite {
     assert(stringToTimestamp(UTF8String.fromString("2015.03.18")).isEmpty)
     assert(stringToTimestamp(UTF8String.fromString("20150318")).isEmpty)
     assert(stringToTimestamp(UTF8String.fromString("2015-031-8")).isEmpty)
+    assert(stringToTimestamp(UTF8String.fromString("02015-01-18")).isEmpty)
+    assert(stringToTimestamp(UTF8String.fromString("015-01-18")).isEmpty)
     assert(stringToTimestamp(
       UTF8String.fromString("2015-03-18T12:03.17-20:0")).isEmpty)
     assert(stringToTimestamp(
-- 
GitLab