diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 8024a8de07c98efb40f64d453670daf4390cffdd..bb9926ce8ca85df9269fa035b488292ed5aebe39 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -63,6 +63,8 @@ __all__ += [
     'year', 'quarter', 'month', 'hour', 'minute', 'second',
     'dayofmonth', 'dayofyear', 'weekofyear']
 
+__all__ += ['soundex']
+
 
 def _create_function(name, doc=""):
     """ Create a function for aggregator by name"""
@@ -922,6 +924,7 @@ def trunc(date, format):
 def size(col):
     """
     Collection function: returns the length of the array or map stored in the column.
+
     :param col: name of column or expression
 
     >>> df = sqlContext.createDataFrame([([1, 2, 3],),([1],),([],)], ['data'])
@@ -932,6 +935,20 @@ def size(col):
     return Column(sc._jvm.functions.size(_to_java_column(col)))
 
 
+@since
+@ignore_unicode_prefix
+def soundex(col):
+    """
+    Returns the SoundEx encoding for a string
+
+    >>> df = sqlContext.createDataFrame([("Peters",),("Uhrbach",)], ['name'])
+    >>> df.select(soundex(df.name).alias("soundex")).collect()
+    [Row(soundex=u'P362'), Row(soundex=u'U612')]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.size(_to_java_column(col)))
+
+
 class UserDefinedFunction(object):
     """
     User defined function in Python
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 1bf7204a2515c50f646a2875bc4e10a85135bf94..3f61a9af1f075afc4699e71d7be1072751f94d59 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -194,6 +194,7 @@ object FunctionRegistry {
     expression[StringRepeat]("repeat"),
     expression[StringReverse]("reverse"),
     expression[StringTrimRight]("rtrim"),
+    expression[SoundEx]("soundex"),
     expression[StringSpace]("space"),
     expression[StringSplit]("split"),
     expression[Substring]("substr"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
index 684eac12bd6f0740e024b93dc09072b5b1db4e43..160e72f384a9b099e65bfbe4c96e005353f9d57b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
@@ -718,6 +718,22 @@ case class Levenshtein(left: Expression, right: Expression) extends BinaryExpres
   }
 }
 
+/**
+ * A function that return soundex code of the given string expression.
+ */
+case class SoundEx(child: Expression) extends UnaryExpression with ExpectsInputTypes {
+
+  override def dataType: DataType = StringType
+
+  override def inputTypes: Seq[DataType] = Seq(StringType)
+
+  override def nullSafeEval(input: Any): Any = input.asInstanceOf[UTF8String].soundex()
+
+  override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
+    defineCodeGen(ctx, ev, c => s"$c.soundex()")
+  }
+}
+
 /**
  * Returns the numeric value of the first character of str.
  */
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
index 3ecd0d374c46b2d6c82aa2384b16f7b8f331705f..fb72fe17141530e5fbf2fc1465f0792f0606f561 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
@@ -347,6 +347,34 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     // scalastyle:on
   }
 
+  test("soundex unit test") {
+    checkEvaluation(SoundEx(Literal("ZIN")), "Z500")
+    checkEvaluation(SoundEx(Literal("SU")), "S000")
+    checkEvaluation(SoundEx(Literal("")), "")
+    checkEvaluation(SoundEx(Literal.create(null, StringType)), null)
+
+    // scalastyle:off
+    // non ascii characters are not allowed in the code, so we disable the scalastyle here.
+    checkEvaluation(SoundEx(Literal("测试")), "测试")
+    checkEvaluation(SoundEx(Literal("Tschüss")), "T220")
+    // scalastyle:on
+    checkEvaluation(SoundEx(Literal("zZ")), "Z000", create_row("s8"))
+    checkEvaluation(SoundEx(Literal("RAGSSEEESSSVEEWE")), "R221")
+    checkEvaluation(SoundEx(Literal("Ashcraft")), "A261")
+    checkEvaluation(SoundEx(Literal("Aswcraft")), "A261")
+    checkEvaluation(SoundEx(Literal("Tymczak")), "T522")
+    checkEvaluation(SoundEx(Literal("Pfister")), "P236")
+    checkEvaluation(SoundEx(Literal("Miller")), "M460")
+    checkEvaluation(SoundEx(Literal("Peterson")), "P362")
+    checkEvaluation(SoundEx(Literal("Peters")), "P362")
+    checkEvaluation(SoundEx(Literal("Auerbach")), "A612")
+    checkEvaluation(SoundEx(Literal("Uhrbach")), "U612")
+    checkEvaluation(SoundEx(Literal("Moskowitz")), "M232")
+    checkEvaluation(SoundEx(Literal("Moskovitz")), "M213")
+    checkEvaluation(SoundEx(Literal("relyheewsgeessg")), "R422")
+    checkEvaluation(SoundEx(Literal("!!")), "!!")
+  }
+
   test("TRIM/LTRIM/RTRIM") {
     val s = 'a.string.at(0)
     checkEvaluation(StringTrim(Literal(" aa  ")), "aa", create_row(" abdef "))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 5d82a5eadd94d10e9bed3cf4004e96dbacf2fc81..89ffa9c50d6c7eecacc01493cd5bdc4633724fa8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -1902,6 +1902,14 @@ object functions {
     StringRepeat(str.expr, lit(n).expr)
   }
 
+  /**
+   * * Return the soundex code for the specified expression.
+   *
+   * @group string_funcs
+   * @since 1.5.0
+   */
+  def soundex(e: Column): Column = SoundEx(e.expr)
+
   /**
    * Splits str around pattern (pattern is a regular expression).
    * NOTE: pattern is a string represent the regular expression.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
index 8e0ea76d15881e5f16c37e02019d1cdf9079fcf1..b7f073cccb6acad7652a2105a920faeb1e4bb726 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
@@ -142,6 +142,15 @@ class StringFunctionsSuite extends QueryTest {
       Row("aa123cc"))
   }
 
+  test("soundex function") {
+    val df = Seq(("MARY", "SU")).toDF("l", "r")
+    checkAnswer(
+      df.select(soundex($"l"), soundex($"r")), Row("M600", "S000"))
+
+    checkAnswer(
+      df.selectExpr("SoundEx(l)", "SoundEx(r)"), Row("M600", "S000"))
+  }
+
   test("string instr function") {
     val df = Seq(("aaads", "aa", "zz")).toDF("a", "b", "c")
 
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index c38953f65d7d7a3efbfdddd7e450dc7706701e4d..9d4998fd48a386cc3bd20d64da30b3f96e8c6f22 100644
--- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -680,4 +680,57 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable {
     }
     return result;
   }
+
+  /**
+   * Soundex mapping table
+   */
+  private static final byte[] US_ENGLISH_MAPPING = {'0', '1', '2', '3', '0', '1', '2', '7',
+    '0', '2', '2', '4', '5', '5', '0', '1', '2', '6', '2', '3', '0', '1', '7', '2', '0', '2'};
+
+  /**
+   * Encodes a string into a Soundex value. Soundex is an encoding used to relate similar names,
+   * but can also be used as a general purpose scheme to find word with similar phonemes.
+   * https://en.wikipedia.org/wiki/Soundex
+   */
+  public UTF8String soundex() {
+    if (numBytes == 0) {
+      return EMPTY_UTF8;
+    }
+
+    byte b = getByte(0);
+    if ('a' <= b && b <= 'z') {
+      b -= 32;
+    } else if (b < 'A' || 'Z' < b) {
+      // first character must be a letter
+      return this;
+    }
+    byte sx[] = {'0', '0', '0', '0'};
+    sx[0] = b;
+    int sxi = 1;
+    int idx = b - 'A';
+    byte lastCode = US_ENGLISH_MAPPING[idx];
+
+    for (int i = 1; i < numBytes; i++) {
+      b = getByte(i);
+      if ('a' <= b && b <= 'z') {
+        b -= 32;
+      } else if (b < 'A' || 'Z' < b) {
+        // not a letter, skip it
+        lastCode = '0';
+        continue;
+      }
+      idx = b - 'A';
+      byte code = US_ENGLISH_MAPPING[idx];
+      if (code == '7') {
+        // ignore it
+      } else {
+        if (code != '0' && code != lastCode) {
+          sx[sxi++] = code;
+          if (sxi > 3) break;
+        }
+        lastCode = code;
+      }
+    }
+    return UTF8String.fromBytes(sx);
+  }
 }
diff --git a/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java b/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
index f2cc19ca6b1727b2e21c9e794e7be2d28eccef2e..c565210872322f111d4e3610cf4b8b005a16876e 100644
--- a/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
+++ b/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
@@ -341,4 +341,52 @@ public class UTF8StringSuite {
     assertEquals(fromString("   "), blankString(3));
     assertEquals(fromString(""), blankString(0));
   }
+
+  @Test
+  public void soundex() {
+    assertEquals(fromString("Robert").soundex(), fromString("R163"));
+    assertEquals(fromString("Rupert").soundex(), fromString("R163"));
+    assertEquals(fromString("Rubin").soundex(), fromString("R150"));
+    assertEquals(fromString("Ashcraft").soundex(), fromString("A261"));
+    assertEquals(fromString("Ashcroft").soundex(), fromString("A261"));
+    assertEquals(fromString("Burroughs").soundex(), fromString("B620"));
+    assertEquals(fromString("Burrows").soundex(), fromString("B620"));
+    assertEquals(fromString("Ekzampul").soundex(), fromString("E251"));
+    assertEquals(fromString("Example").soundex(), fromString("E251"));
+    assertEquals(fromString("Ellery").soundex(), fromString("E460"));
+    assertEquals(fromString("Euler").soundex(), fromString("E460"));
+    assertEquals(fromString("Ghosh").soundex(), fromString("G200"));
+    assertEquals(fromString("Gauss").soundex(), fromString("G200"));
+    assertEquals(fromString("Gutierrez").soundex(), fromString("G362"));
+    assertEquals(fromString("Heilbronn").soundex(), fromString("H416"));
+    assertEquals(fromString("Hilbert").soundex(), fromString("H416"));
+    assertEquals(fromString("Jackson").soundex(), fromString("J250"));
+    assertEquals(fromString("Kant").soundex(), fromString("K530"));
+    assertEquals(fromString("Knuth").soundex(), fromString("K530"));
+    assertEquals(fromString("Lee").soundex(), fromString("L000"));
+    assertEquals(fromString("Lukasiewicz").soundex(), fromString("L222"));
+    assertEquals(fromString("Lissajous").soundex(), fromString("L222"));
+    assertEquals(fromString("Ladd").soundex(), fromString("L300"));
+    assertEquals(fromString("Lloyd").soundex(), fromString("L300"));
+    assertEquals(fromString("Moses").soundex(), fromString("M220"));
+    assertEquals(fromString("O'Hara").soundex(), fromString("O600"));
+    assertEquals(fromString("Pfister").soundex(), fromString("P236"));
+    assertEquals(fromString("Rubin").soundex(), fromString("R150"));
+    assertEquals(fromString("Robert").soundex(), fromString("R163"));
+    assertEquals(fromString("Rupert").soundex(), fromString("R163"));
+    assertEquals(fromString("Soundex").soundex(), fromString("S532"));
+    assertEquals(fromString("Sownteks").soundex(), fromString("S532"));
+    assertEquals(fromString("Tymczak").soundex(), fromString("T522"));
+    assertEquals(fromString("VanDeusen").soundex(), fromString("V532"));
+    assertEquals(fromString("Washington").soundex(), fromString("W252"));
+    assertEquals(fromString("Wheaton").soundex(), fromString("W350"));
+
+    assertEquals(fromString("a").soundex(), fromString("A000"));
+    assertEquals(fromString("ab").soundex(), fromString("A100"));
+    assertEquals(fromString("abc").soundex(), fromString("A120"));
+    assertEquals(fromString("abcd").soundex(), fromString("A123"));
+    assertEquals(fromString("").soundex(), fromString(""));
+    assertEquals(fromString("123").soundex(), fromString("123"));
+    assertEquals(fromString("世界千世").soundex(), fromString("世界千世"));
+  }
 }