Skip to content
Snippets Groups Projects
Commit 21e366ae authored by Dongjoon Hyun's avatar Dongjoon Hyun Committed by Wenchen Fan
Browse files

[SPARK-19912][SQL] String literals should be escaped for Hive metastore partition pruning

## What changes were proposed in this pull request?

Since current `HiveShim`'s `convertFilters` does not escape the string literals. There exists the following correctness issues. This PR aims to return the correct result and also shows the more clear exception message.

**BEFORE**

```scala
scala> Seq((1, "p1", "q1"), (2, "p1\" and q=\"q1", "q2")).toDF("a", "p", "q").write.partitionBy("p", "q").saveAsTable("t1")

scala> spark.table("t1").filter($"p" === "p1\" and q=\"q1").select($"a").show
+---+
|  a|
+---+
+---+

scala> spark.table("t1").filter($"p" === "'\"").select($"a").show
java.lang.RuntimeException: Caught Hive MetaException attempting to get partition metadata by filter from ...
```

**AFTER**

```scala
scala> spark.table("t1").filter($"p" === "p1\" and q=\"q1").select($"a").show
+---+
|  a|
+---+
|  2|
+---+

scala> spark.table("t1").filter($"p" === "'\"").select($"a").show
java.lang.UnsupportedOperationException: Partition filter cannot have both `"` and `'` characters
```

## How was this patch tested?

Pass the Jenkins test with new test cases.

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #17266 from dongjoon-hyun/SPARK-19912.
parent 7fa116f8
No related branches found
No related tags found
No related merge requests found
...@@ -596,13 +596,24 @@ private[client] class Shim_v0_13 extends Shim_v0_12 { ...@@ -596,13 +596,24 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
s"$v ${op.symbol} ${a.name}" s"$v ${op.symbol} ${a.name}"
case op @ BinaryComparison(a: Attribute, Literal(v, _: StringType)) case op @ BinaryComparison(a: Attribute, Literal(v, _: StringType))
if !varcharKeys.contains(a.name) => if !varcharKeys.contains(a.name) =>
s"""${a.name} ${op.symbol} "$v"""" s"""${a.name} ${op.symbol} ${quoteStringLiteral(v.toString)}"""
case op @ BinaryComparison(Literal(v, _: StringType), a: Attribute) case op @ BinaryComparison(Literal(v, _: StringType), a: Attribute)
if !varcharKeys.contains(a.name) => if !varcharKeys.contains(a.name) =>
s""""$v" ${op.symbol} ${a.name}""" s"""${quoteStringLiteral(v.toString)} ${op.symbol} ${a.name}"""
}.mkString(" and ") }.mkString(" and ")
} }
private def quoteStringLiteral(str: String): String = {
if (!str.contains("\"")) {
s""""$str""""
} else if (!str.contains("'")) {
s"""'$str'"""
} else {
throw new UnsupportedOperationException(
"""Partition filter cannot have both `"` and `'` characters""")
}
}
override def getPartitionsByFilter( override def getPartitionsByFilter(
hive: Hive, hive: Hive,
table: Table, table: Table,
...@@ -611,6 +622,7 @@ private[client] class Shim_v0_13 extends Shim_v0_12 { ...@@ -611,6 +622,7 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
// Hive getPartitionsByFilter() takes a string that represents partition // Hive getPartitionsByFilter() takes a string that represents partition
// predicates like "str_key=\"value\" and int_key=1 ..." // predicates like "str_key=\"value\" and int_key=1 ..."
val filter = convertFilters(table, predicates) val filter = convertFilters(table, predicates)
val partitions = val partitions =
if (filter.isEmpty) { if (filter.isEmpty) {
getAllPartitionsMethod.invoke(hive, table).asInstanceOf[JSet[Partition]] getAllPartitionsMethod.invoke(hive, table).asInstanceOf[JSet[Partition]]
......
...@@ -65,6 +65,11 @@ class FiltersSuite extends SparkFunSuite with Logging { ...@@ -65,6 +65,11 @@ class FiltersSuite extends SparkFunSuite with Logging {
(Literal("") === a("varchar", StringType)) :: Nil, (Literal("") === a("varchar", StringType)) :: Nil,
"") "")
filterTest("SPARK-19912 String literals should be escaped for Hive metastore partition pruning",
(a("stringcol", StringType) === Literal("p1\" and q=\"q1")) ::
(Literal("p2\" and q=\"q2") === a("stringcol", StringType)) :: Nil,
"""stringcol = 'p1" and q="q1' and 'p2" and q="q2' = stringcol""")
private def filterTest(name: String, filters: Seq[Expression], result: String) = { private def filterTest(name: String, filters: Seq[Expression], result: String) = {
test(name) { test(name) {
val converted = shim.convertFilters(testTable, filters) val converted = shim.convertFilters(testTable, filters)
......
...@@ -2057,4 +2057,20 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { ...@@ -2057,4 +2057,20 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
} }
} }
} }
test("SPARK-19912 String literals should be escaped for Hive metastore partition pruning") {
withTable("spark_19912") {
Seq(
(1, "p1", "q1"),
(2, "'", "q2"),
(3, "\"", "q3"),
(4, "p1\" and q=\"q1", "q4")
).toDF("a", "p", "q").write.partitionBy("p", "q").saveAsTable("spark_19912")
val table = spark.table("spark_19912")
checkAnswer(table.filter($"p" === "'").select($"a"), Row(2))
checkAnswer(table.filter($"p" === "\"").select($"a"), Row(3))
checkAnswer(table.filter($"p" === "p1\" and q=\"q1").select($"a"), Row(4))
}
}
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment