From 24f338ba7b34df493dd49bbc354d08f5e3afbb85 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Wed, 20 Apr 2016 17:56:31 -0700
Subject: [PATCH] [SPARK-14775][SQL] Remove TestHiveSparkSession.rewritePaths

## What changes were proposed in this pull request?
The path rewrite in TestHiveSparkSession is pretty hacky. I think we can remove those complexity and just do a string replacement when we read the query files in. This would remove the overloading of runNativeSql in TestHive, which will simplify the removal of Hive specific variable substitution.

## How was this patch tested?
This is a small test refactoring to simplify test infrastructure.

Author: Reynold Xin <rxin@databricks.com>

Closes #12543 from rxin/SPARK-14775.
---
 .../apache/spark/sql/hive/test/TestHive.scala  | 18 ------------------
 .../test/resources/hive-test-path-helper.txt   |  1 +
 .../hive/execution/HiveComparisonTest.scala    | 14 +++++++++++++-
 .../sql/hive/execution/HiveQueryFileTest.scala |  6 +++---
 4 files changed, 17 insertions(+), 22 deletions(-)
 create mode 100644 sql/hive/src/test/resources/hive-test-path-helper.txt

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index e629099086..2bb13996c1 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -182,19 +182,6 @@ private[hive] class TestHiveSparkSession(
     Option(System.getenv(envVar)).map(new File(_))
   }
 
-  /**
-   * Replaces relative paths to the parent directory "../" with hiveDevHome since this is how the
-   * hive test cases assume the system is set up.
-   */
-  private[hive] def rewritePaths(cmd: String): String =
-    if (cmd.toUpperCase contains "LOAD DATA") {
-      val testDataLocation =
-        hiveDevHome.map(_.getCanonicalPath).getOrElse(inRepoTests.getCanonicalPath)
-      cmd.replaceAll("\\.\\./\\.\\./", testDataLocation + "/")
-    } else {
-      cmd
-    }
-
   val hiveFilesTemp = File.createTempFile("catalystHiveFiles", "")
   hiveFilesTemp.delete()
   hiveFilesTemp.mkdir()
@@ -566,11 +553,6 @@ private[hive] class TestHiveSessionState(sparkSession: TestHiveSparkSession)
   override def executePlan(plan: LogicalPlan): TestHiveQueryExecution = {
     new TestHiveQueryExecution(sparkSession, plan)
   }
-
-  // Override so we can intercept relative paths and rewrite them to point at hive.
-  override def runNativeSql(sql: String): Seq[String] = {
-    super.runNativeSql(sparkSession.rewritePaths(substitutor.substitute(hiveconf, sql)))
-  }
 }
 
 
diff --git a/sql/hive/src/test/resources/hive-test-path-helper.txt b/sql/hive/src/test/resources/hive-test-path-helper.txt
new file mode 100644
index 0000000000..356b131ea1
--- /dev/null
+++ b/sql/hive/src/test/resources/hive-test-path-helper.txt
@@ -0,0 +1 @@
+This file is here so we can match on it and find the path to the current folder.
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
index bd46cb922e..994dc4a2d2 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
@@ -47,6 +47,17 @@ import org.apache.spark.sql.hive.test.{TestHive, TestHiveQueryExecution}
 abstract class HiveComparisonTest
   extends SparkFunSuite with BeforeAndAfterAll with GivenWhenThen {
 
+  /**
+   * Path to the test datasets. We find this by looking up "hive-test-path-helper.txt" file.
+   *
+   * Before we run the query in Spark, we replace "../../data" with this path.
+   */
+  private val testDataPath: String = {
+    Thread.currentThread.getContextClassLoader
+      .getResource("hive-test-path-helper.txt")
+      .getPath.replace("/hive-test-path-helper.txt", "/data")
+  }
+
   /**
    * When set, any cache files that result in test failures will be deleted.  Used when the test
    * harness or hive have been updated thus requiring new golden answers to be computed for some
@@ -386,7 +397,8 @@ abstract class HiveComparisonTest
           var query: TestHiveQueryExecution = null
           try {
             query = {
-              val originalQuery = new TestHiveQueryExecution(queryString)
+              val originalQuery = new TestHiveQueryExecution(
+                queryString.replace("../../data", testDataPath))
               val containsCommands = originalQuery.analyzed.collectFirst {
                 case _: Command => ()
                 case _: LogicalInsertIntoHiveTable => ()
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQueryFileTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQueryFileTest.scala
index f96c989c46..e772324a57 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQueryFileTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQueryFileTest.scala
@@ -40,14 +40,14 @@ abstract class HiveQueryFileTest extends HiveComparisonTest {
 
   def testCases: Seq[(String, File)]
 
-  val runAll =
+  val runAll: Boolean =
     !(System.getProperty("spark.hive.alltests") == null) ||
     runOnlyDirectories.nonEmpty ||
     skipDirectories.nonEmpty
 
-  val whiteListProperty = "spark.hive.whitelist"
+  val whiteListProperty: String = "spark.hive.whitelist"
   // Allow the whiteList to be overridden by a system property
-  val realWhiteList =
+  val realWhiteList: Seq[String] =
     Option(System.getProperty(whiteListProperty)).map(_.split(",").toSeq).getOrElse(whiteList)
 
   // Go through all the test cases and add them to scala test.
-- 
GitLab