diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py index e753ed402cdd72e35f0c4eb1bdb1fe84af1421d1..b172f38ea22d045ba580330b2f3294c373bb9db3 100644 --- a/python/pyspark/sql/column.py +++ b/python/pyspark/sql/column.py @@ -406,8 +406,14 @@ class Column(object): [Row(col=u'Ali'), Row(col=u'Bob')] """ if type(startPos) != type(length): - raise TypeError("Can not mix the type") - if isinstance(startPos, (int, long)): + raise TypeError( + "startPos and length must be the same type. " + "Got {startPos_t} and {length_t}, respectively." + .format( + startPos_t=type(startPos), + length_t=type(length), + )) + if isinstance(startPos, int): jc = self._jc.substr(startPos, length) elif isinstance(startPos, Column): jc = self._jc.substr(startPos._jc, length._jc) diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index cf2c473a1645c1066028e2c9479d90af23899b38..45a3f9e7165f19f6aea110f8bfc9a8887759785a 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -1220,6 +1220,18 @@ class SQLTests(ReusedPySparkTestCase): rndn2 = df.select('key', functions.randn(0)).collect() self.assertEqual(sorted(rndn1), sorted(rndn2)) + def test_string_functions(self): + from pyspark.sql.functions import col, lit + df = self.spark.createDataFrame([['nick']], schema=['name']) + self.assertRaisesRegexp( + TypeError, + "must be the same type", + lambda: df.select(col('name').substr(0, lit(1)))) + if sys.version_info.major == 2: + self.assertRaises( + TypeError, + lambda: df.select(col('name').substr(long(0), long(1)))) + def test_array_contains_function(self): from pyspark.sql.functions import array_contains