From 711addd46e98e42deca97c5b9c0e55fddebaa458 Mon Sep 17 00:00:00 2001
From: Jason White <jason.white@shopify.com>
Date: Tue, 7 Mar 2017 13:14:37 -0800
Subject: [PATCH] [SPARK-19561] [PYTHON] cast TimestampType.toInternal output
 to long
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

Cast the output of `TimestampType.toInternal` to long to allow for proper Timestamp creation in DataFrames near the epoch.

## How was this patch tested?

Added a new test that fails without the change.

dongjoon-hyun davies Mind taking a look?

The contribution is my original work and I license the work to the project under the project’s open source license.

Author: Jason White <jason.white@shopify.com>

Closes #16896 from JasonMWhite/SPARK-19561.

(cherry picked from commit 6f4684622a951806bebe7652a14f7d1ce03e24c7)
Signed-off-by: Davies Liu <davies.liu@gmail.com>
---
 python/pyspark/sql/tests.py | 6 ++++++
 python/pyspark/sql/types.py | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 877ab88d17..4140c2d11c 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -1360,6 +1360,12 @@ class SQLTests(ReusedPySparkTestCase):
         self.assertEqual(now, now1)
         self.assertEqual(now, utcnow1)
 
+    # regression test for SPARK-19561
+    def test_datetime_at_epoch(self):
+        epoch = datetime.datetime.fromtimestamp(0)
+        df = self.spark.createDataFrame([Row(date=epoch)])
+        self.assertEqual(df.first()['date'], epoch)
+
     def test_decimal(self):
         from decimal import Decimal
         schema = StructType([StructField("decimal", DecimalType(10, 5))])
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 4a023123b6..d4b9fa8545 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -189,7 +189,7 @@ class TimestampType(AtomicType):
         if dt is not None:
             seconds = (calendar.timegm(dt.utctimetuple()) if dt.tzinfo
                        else time.mktime(dt.timetuple()))
-            return int(seconds) * 1000000 + dt.microsecond
+            return long(seconds) * 1000000 + dt.microsecond
 
     def fromInternal(self, ts):
         if ts is not None:
-- 
GitLab