diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py index e636f992ec99b36b460b9392702d1eb75ae8ddc8..3f2d7ac82585fb172616aad09aab0a1269a89d51 100644 --- a/python/pyspark/sql.py +++ b/python/pyspark/sql.py @@ -2136,9 +2136,9 @@ class DataFrame(object): def __getattr__(self, name): """ Return the column by given name """ - if isinstance(name, basestring): - return Column(self._jdf.apply(name)) - raise AttributeError + if name.startswith("__"): + raise AttributeError(name) + return Column(self._jdf.apply(name)) def alias(self, name): """ Alias the current DataFrame """ diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py index 081a77fbb0be2093b9f4d00af201ca78405ac7bc..bec1961f263933836a2e92bde24c41c3d0a1aa21 100644 --- a/python/pyspark/tests.py +++ b/python/pyspark/tests.py @@ -23,6 +23,7 @@ from array import array from fileinput import input from glob import glob import os +import pydoc import re import shutil import subprocess @@ -1032,6 +1033,15 @@ class SQLTests(ReusedPySparkTestCase): from pyspark.sql import Aggregator as Agg # self.assertEqual((0, '100'), tuple(g.agg(Agg.first(df.key), Agg.last(df.value)).first())) + def test_help_command(self): + # Regression test for SPARK-5464 + rdd = self.sc.parallelize(['{"foo":"bar"}', '{"foo":"baz"}']) + df = self.sqlCtx.jsonRDD(rdd) + # render_doc() reproduces the help() exception without printing output + pydoc.render_doc(df) + pydoc.render_doc(df.foo) + pydoc.render_doc(df.take(1)) + class InputFormatTests(ReusedPySparkTestCase):