From 711addd46e98e42deca97c5b9c0e55fddebaa458 Mon Sep 17 00:00:00 2001 From: Jason White <jason.white@shopify.com> Date: Tue, 7 Mar 2017 13:14:37 -0800 Subject: [PATCH] [SPARK-19561] [PYTHON] cast TimestampType.toInternal output to long MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## What changes were proposed in this pull request? Cast the output of `TimestampType.toInternal` to long to allow for proper Timestamp creation in DataFrames near the epoch. ## How was this patch tested? Added a new test that fails without the change. dongjoon-hyun davies Mind taking a look? The contribution is my original work and I license the work to the project under the project’s open source license. Author: Jason White <jason.white@shopify.com> Closes #16896 from JasonMWhite/SPARK-19561. (cherry picked from commit 6f4684622a951806bebe7652a14f7d1ce03e24c7) Signed-off-by: Davies Liu <davies.liu@gmail.com> --- python/pyspark/sql/tests.py | 6 ++++++ python/pyspark/sql/types.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index 877ab88d17..4140c2d11c 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -1360,6 +1360,12 @@ class SQLTests(ReusedPySparkTestCase): self.assertEqual(now, now1) self.assertEqual(now, utcnow1) + # regression test for SPARK-19561 + def test_datetime_at_epoch(self): + epoch = datetime.datetime.fromtimestamp(0) + df = self.spark.createDataFrame([Row(date=epoch)]) + self.assertEqual(df.first()['date'], epoch) + def test_decimal(self): from decimal import Decimal schema = StructType([StructField("decimal", DecimalType(10, 5))]) diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index 4a023123b6..d4b9fa8545 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -189,7 +189,7 @@ class TimestampType(AtomicType): if dt is not None: seconds = (calendar.timegm(dt.utctimetuple()) if dt.tzinfo else time.mktime(dt.timetuple())) - return int(seconds) * 1000000 + dt.microsecond + return long(seconds) * 1000000 + dt.microsecond def fromInternal(self, ts): if ts is not None: -- GitLab