Skip to content
Snippets Groups Projects
Commit 96608310 authored by Nicholas Chammas's avatar Nicholas Chammas Committed by hyukjinkwon
Browse files

[SPARK-21712][PYSPARK] Clarify type error for Column.substr()

Proposed changes:
* Clarify the type error that `Column.substr()` gives.

Test plan:
* Tested this manually.
* Test code:
    ```python
    from pyspark.sql.functions import col, lit
    spark.createDataFrame([['nick']], schema=['name']).select(col('name').substr(0, lit(1)))
    ```
* Before:
    ```
    TypeError: Can not mix the type
    ```
* After:
    ```
    TypeError: startPos and length must be the same type. Got <class 'int'> and
    <class 'pyspark.sql.column.Column'>, respectively.
    ```

Author: Nicholas Chammas <nicholas.chammas@gmail.com>

Closes #18926 from nchammas/SPARK-21712-substr-type-error.
parent 42b9eda8
No related branches found
No related tags found
No related merge requests found
......@@ -406,8 +406,14 @@ class Column(object):
[Row(col=u'Ali'), Row(col=u'Bob')]
"""
if type(startPos) != type(length):
raise TypeError("Can not mix the type")
if isinstance(startPos, (int, long)):
raise TypeError(
"startPos and length must be the same type. "
"Got {startPos_t} and {length_t}, respectively."
.format(
startPos_t=type(startPos),
length_t=type(length),
))
if isinstance(startPos, int):
jc = self._jc.substr(startPos, length)
elif isinstance(startPos, Column):
jc = self._jc.substr(startPos._jc, length._jc)
......
......@@ -1220,6 +1220,18 @@ class SQLTests(ReusedPySparkTestCase):
rndn2 = df.select('key', functions.randn(0)).collect()
self.assertEqual(sorted(rndn1), sorted(rndn2))
def test_string_functions(self):
from pyspark.sql.functions import col, lit
df = self.spark.createDataFrame([['nick']], schema=['name'])
self.assertRaisesRegexp(
TypeError,
"must be the same type",
lambda: df.select(col('name').substr(0, lit(1))))
if sys.version_info.major == 2:
self.assertRaises(
TypeError,
lambda: df.select(col('name').substr(long(0), long(1))))
def test_array_contains_function(self):
from pyspark.sql.functions import array_contains
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment