Skip to content
Snippets Groups Projects
Commit e3d2022e authored by zuotingbing's avatar zuotingbing Committed by Xiao Li
Browse files

[SPARK-20594][SQL] The staging directory should be a child directory starts...

[SPARK-20594][SQL] The staging directory should be a child directory starts with "." to avoid being deleted if we set hive.exec.stagingdir under the table directory.

JIRA Issue: https://issues.apache.org/jira/browse/SPARK-20594

## What changes were proposed in this pull request?

The staging directory should be a child directory starts with "." to avoid being deleted before moving staging directory to table directory if we set hive.exec.stagingdir under the table directory.

## How was this patch tested?

Added unit tests

Author: zuotingbing <zuo.tingbing9@zte.com.cn>

Closes #17858 from zuotingbing/spark-stagingdir.
parent 0d3a6319
No related branches found
No related tags found
No related merge requests found
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
package org.apache.spark.sql.hive.execution package org.apache.spark.sql.hive.execution
import java.io.IOException import java.io.{File, IOException}
import java.net.URI import java.net.URI
import java.text.SimpleDateFormat import java.text.SimpleDateFormat
import java.util.{Date, Locale, Random} import java.util.{Date, Locale, Random}
...@@ -97,12 +97,24 @@ case class InsertIntoHiveTable( ...@@ -97,12 +97,24 @@ case class InsertIntoHiveTable(
val inputPathUri: URI = inputPath.toUri val inputPathUri: URI = inputPath.toUri
val inputPathName: String = inputPathUri.getPath val inputPathName: String = inputPathUri.getPath
val fs: FileSystem = inputPath.getFileSystem(hadoopConf) val fs: FileSystem = inputPath.getFileSystem(hadoopConf)
val stagingPathName: String = var stagingPathName: String =
if (inputPathName.indexOf(stagingDir) == -1) { if (inputPathName.indexOf(stagingDir) == -1) {
new Path(inputPathName, stagingDir).toString new Path(inputPathName, stagingDir).toString
} else { } else {
inputPathName.substring(0, inputPathName.indexOf(stagingDir) + stagingDir.length) inputPathName.substring(0, inputPathName.indexOf(stagingDir) + stagingDir.length)
} }
// SPARK-20594: This is a walk-around fix to resolve a Hive bug. Hive requires that the
// staging directory needs to avoid being deleted when users set hive.exec.stagingdir
// under the table directory.
if (FileUtils.isSubDir(new Path(stagingPathName), inputPath, fs) &&
!stagingPathName.stripPrefix(inputPathName).stripPrefix(File.separator).startsWith(".")) {
logDebug(s"The staging dir '$stagingPathName' should be a child directory starts " +
"with '.' to avoid being deleted if we set hive.exec.stagingdir under the table " +
"directory.")
stagingPathName = new Path(inputPathName, ".hive-staging").toString
}
val dir: Path = val dir: Path =
fs.makeQualified( fs.makeQualified(
new Path(stagingPathName + "_" + executionId + "-" + TaskRunner.getTaskRunnerID)) new Path(stagingPathName + "_" + executionId + "-" + TaskRunner.getTaskRunnerID))
......
...@@ -494,4 +494,15 @@ class InsertIntoHiveTableSuite extends QueryTest with TestHiveSingleton with Bef ...@@ -494,4 +494,15 @@ class InsertIntoHiveTableSuite extends QueryTest with TestHiveSingleton with Bef
spark.table("t").write.insertInto(tableName) spark.table("t").write.insertInto(tableName)
} }
} }
test("SPARK-20594: hive.exec.stagingdir was deleted by Hive") {
// Set hive.exec.stagingdir under the table directory without start with ".".
withSQLConf("hive.exec.stagingdir" -> "./test") {
withTable("test_table") {
sql("CREATE TABLE test_table (key int)")
sql("INSERT OVERWRITE TABLE test_table SELECT 1")
checkAnswer(sql("SELECT * FROM test_table"), Row(1))
}
}
}
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment