diff --git a/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala b/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala index 6e4eab4b805c12750efe7a8c0db52f906d26934c..42f67e8dbe8654721eb4f17e3245ba1183b1a63e 100644 --- a/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala +++ b/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala @@ -73,6 +73,16 @@ private[spark] object SerDeUtil extends Logging { // This must be ISO 8859-1 / Latin 1, not UTF-8, to interoperate correctly val data = args(1).asInstanceOf[String].getBytes(StandardCharsets.ISO_8859_1) construct(typecode, machineCodes(typecode), data) + } else if (args.length == 2 && args(0) == "l") { + // On Python 2, an array of typecode 'l' should be handled as long rather than int. + val values = args(1).asInstanceOf[JArrayList[_]] + val result = new Array[Long](values.size) + var i = 0 + while (i < values.size) { + result(i) = values.get(i).asInstanceOf[Number].longValue() + i += 1 + } + result } else { super.construct(args) } diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index c0e3b8d1323965d474639329f45d498bbc5c5419..9db2f40474f70e5b802a173b050baaca9f508195 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -2342,6 +2342,12 @@ class SQLTests(ReusedPySparkTestCase): self.assertEquals(types[2], np.bool) self.assertEquals(types[3], np.float32) + def test_create_dataframe_from_array_of_long(self): + import array + data = [Row(longarray=array.array('l', [-9223372036854775808, 0, 9223372036854775807]))] + df = self.spark.createDataFrame(data) + self.assertEqual(df.first(), Row(longarray=[-9223372036854775808, 0, 9223372036854775807])) + class HiveSparkSubmitTests(SparkSubmitTests):