Skip to content
Snippets Groups Projects
Commit 2f493f7e authored by Davies Liu's avatar Davies Liu Committed by Cheng Lian
Browse files

[SPARK-10177] [SQL] fix reading Timestamp in parquet from Hive

We misunderstood the Julian days and nanoseconds of the day in parquet (as TimestampType) from Hive/Impala, they are overlapped, so can't be added together directly.

In order to avoid the confusing rounding when do the converting, we use `2440588` as the Julian Day of epoch of unix timestamp (which should be 2440587.5).

Author: Davies Liu <davies@databricks.com>
Author: Cheng Lian <lian@databricks.com>

Closes #8400 from davies/timestamp_parquet.
parent 1fc37581
No related branches found
No related tags found
No related merge requests found
...@@ -37,7 +37,8 @@ object DateTimeUtils { ...@@ -37,7 +37,8 @@ object DateTimeUtils {
type SQLTimestamp = Long type SQLTimestamp = Long
// see http://stackoverflow.com/questions/466321/convert-unix-timestamp-to-julian // see http://stackoverflow.com/questions/466321/convert-unix-timestamp-to-julian
final val JULIAN_DAY_OF_EPOCH = 2440587 // and .5 // it's 2440587.5, rounding up to compatible with Hive
final val JULIAN_DAY_OF_EPOCH = 2440588
final val SECONDS_PER_DAY = 60 * 60 * 24L final val SECONDS_PER_DAY = 60 * 60 * 24L
final val MICROS_PER_SECOND = 1000L * 1000L final val MICROS_PER_SECOND = 1000L * 1000L
final val NANOS_PER_SECOND = MICROS_PER_SECOND * 1000L final val NANOS_PER_SECOND = MICROS_PER_SECOND * 1000L
...@@ -183,7 +184,7 @@ object DateTimeUtils { ...@@ -183,7 +184,7 @@ object DateTimeUtils {
*/ */
def fromJulianDay(day: Int, nanoseconds: Long): SQLTimestamp = { def fromJulianDay(day: Int, nanoseconds: Long): SQLTimestamp = {
// use Long to avoid rounding errors // use Long to avoid rounding errors
val seconds = (day - JULIAN_DAY_OF_EPOCH).toLong * SECONDS_PER_DAY - SECONDS_PER_DAY / 2 val seconds = (day - JULIAN_DAY_OF_EPOCH).toLong * SECONDS_PER_DAY
seconds * MICROS_PER_SECOND + nanoseconds / 1000L seconds * MICROS_PER_SECOND + nanoseconds / 1000L
} }
...@@ -191,7 +192,7 @@ object DateTimeUtils { ...@@ -191,7 +192,7 @@ object DateTimeUtils {
* Returns Julian day and nanoseconds in a day from the number of microseconds * Returns Julian day and nanoseconds in a day from the number of microseconds
*/ */
def toJulianDay(us: SQLTimestamp): (Int, Long) = { def toJulianDay(us: SQLTimestamp): (Int, Long) = {
val seconds = us / MICROS_PER_SECOND + SECONDS_PER_DAY / 2 val seconds = us / MICROS_PER_SECOND
val day = seconds / SECONDS_PER_DAY + JULIAN_DAY_OF_EPOCH val day = seconds / SECONDS_PER_DAY + JULIAN_DAY_OF_EPOCH
val secondsInDay = seconds % SECONDS_PER_DAY val secondsInDay = seconds % SECONDS_PER_DAY
val nanos = (us % MICROS_PER_SECOND) * 1000L val nanos = (us % MICROS_PER_SECOND) * 1000L
......
...@@ -49,13 +49,18 @@ class DateTimeUtilsSuite extends SparkFunSuite { ...@@ -49,13 +49,18 @@ class DateTimeUtilsSuite extends SparkFunSuite {
test("us and julian day") { test("us and julian day") {
val (d, ns) = toJulianDay(0) val (d, ns) = toJulianDay(0)
assert(d === JULIAN_DAY_OF_EPOCH) assert(d === JULIAN_DAY_OF_EPOCH)
assert(ns === SECONDS_PER_DAY / 2 * NANOS_PER_SECOND) assert(ns === 0)
assert(fromJulianDay(d, ns) == 0L) assert(fromJulianDay(d, ns) == 0L)
val t = new Timestamp(61394778610000L) // (2015, 6, 11, 10, 10, 10, 100) val t = Timestamp.valueOf("2015-06-11 10:10:10.100")
val (d1, ns1) = toJulianDay(fromJavaTimestamp(t)) val (d1, ns1) = toJulianDay(fromJavaTimestamp(t))
val t2 = toJavaTimestamp(fromJulianDay(d1, ns1)) val t1 = toJavaTimestamp(fromJulianDay(d1, ns1))
assert(t.equals(t2)) assert(t.equals(t1))
val t2 = Timestamp.valueOf("2015-06-11 20:10:10.100")
val (d2, ns2) = toJulianDay(fromJavaTimestamp(t2))
val t22 = toJavaTimestamp(fromJulianDay(d2, ns2))
assert(t2.equals(t22))
} }
test("SPARK-6785: java date conversion before and after epoch") { test("SPARK-6785: java date conversion before and after epoch") {
......
...@@ -113,7 +113,7 @@ class ParquetHiveCompatibilitySuite extends ParquetCompatibilityTest with Before ...@@ -113,7 +113,7 @@ class ParquetHiveCompatibilitySuite extends ParquetCompatibilityTest with Before
"BOOLEAN", "TINYINT", "SMALLINT", "INT", "BIGINT", "FLOAT", "DOUBLE", "STRING") "BOOLEAN", "TINYINT", "SMALLINT", "INT", "BIGINT", "FLOAT", "DOUBLE", "STRING")
} }
ignore("SPARK-10177 timestamp") { test("SPARK-10177 timestamp") {
testParquetHiveCompatibility(Row(Timestamp.valueOf("2015-08-24 00:31:00")), "TIMESTAMP") testParquetHiveCompatibility(Row(Timestamp.valueOf("2015-08-24 00:31:00")), "TIMESTAMP")
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment