From a17384fa343628cec44437da5b80b9403ecd5838 Mon Sep 17 00:00:00 2001 From: Reynold Xin <rxin@databricks.com> Date: Wed, 12 Aug 2015 15:27:52 -0700 Subject: [PATCH] [SPARK-9907] [SQL] Python crc32 is mistakenly calling md5 Author: Reynold Xin <rxin@databricks.com> Closes #8138 from rxin/SPARK-9907. --- python/pyspark/sql/functions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 95f46044d3..e98979533f 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -885,10 +885,10 @@ def crc32(col): returns the value as a bigint. >>> sqlContext.createDataFrame([('ABC',)], ['a']).select(crc32('a').alias('crc32')).collect() - [Row(crc32=u'902fbdd2b1df0c4f70b4a5d23525e932')] + [Row(crc32=2743272264)] """ sc = SparkContext._active_spark_context - return Column(sc._jvm.functions.md5(_to_java_column(col))) + return Column(sc._jvm.functions.crc32(_to_java_column(col))) @ignore_unicode_prefix -- GitLab