From 49efd03bacad6060d99ed5e2fe53ba3df1d1317e Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Sun, 6 Dec 2015 11:15:02 -0800
Subject: [PATCH] [SPARK-12138][SQL] Escape \u in the generated comments of
 codegen

When \u appears in a comment block (i.e. in /**/), code gen will break. So, in Expression and CodegenFallback, we escape \u to \\u.

yhuai Please review it. I did reproduce it and it works after the fix. Thanks!

Author: gatorsmile <gatorsmile@gmail.com>

Closes #10155 from gatorsmile/escapeU.
---
 .../spark/sql/catalyst/expressions/Expression.scala      | 4 +++-
 .../sql/catalyst/expressions/CodeGenerationSuite.scala   | 9 +++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 614f0c075f..6d807c9ecf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -220,7 +220,9 @@ abstract class Expression extends TreeNode[Expression] {
    * Returns the string representation of this expression that is safe to be put in
    * code comments of generated code.
    */
-  protected def toCommentSafeString: String = this.toString.replace("*/", "\\*\\/")
+  protected def toCommentSafeString: String = this.toString
+    .replace("*/", "\\*\\/")
+    .replace("\\u", "\\\\u")
 }
 
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
index fe754240dc..cd2ef7dcd0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
@@ -107,4 +107,13 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
       true,
       InternalRow(UTF8String.fromString("*/")))
   }
+
+  test("\\u in the data") {
+    // When \ u appears in a comment block (i.e. in /**/), code gen will break.
+    // So, in Expression and CodegenFallback, we escape \ u to \\u.
+    checkEvaluation(
+      EqualTo(BoundReference(0, StringType, false), Literal.create("\\u", StringType)),
+      true,
+      InternalRow(UTF8String.fromString("\\u")))
+  }
 }
-- 
GitLab