diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala index 61eb601a18c3228f63cbe7e70ef82631ced38479..2d29940eb8dab4a5daa894f17f1cdcd9a375c3ae 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala @@ -274,7 +274,16 @@ private[state] class HDFSBackedStateStoreProvider( private def commitUpdates(newVersion: Long, map: MapType, tempDeltaFile: Path): Path = { synchronized { val finalDeltaFile = deltaFile(newVersion) - if (!fs.rename(tempDeltaFile, finalDeltaFile)) { + + // scalastyle:off + // Renaming a file atop an existing one fails on HDFS + // (http://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/filesystem/filesystem.html). + // Hence we should either skip the rename step or delete the target file. Because deleting the + // target file will break speculation, skipping the rename step is the only choice. It's still + // semantically correct because Structured Streaming requires rerunning a batch should + // generate the same output. (SPARK-19677) + // scalastyle:on + if (!fs.exists(finalDeltaFile) && !fs.rename(tempDeltaFile, finalDeltaFile)) { throw new IOException(s"Failed to rename $tempDeltaFile to $finalDeltaFile") } loadedMaps.put(newVersion, map) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala index 4863a4cbcf4f5708e1d9be3bc83f0d357f2cac57..21a0a10e6deab58ace2d0aa3aff265ff8209643b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala @@ -210,13 +210,6 @@ class StateStoreSuite extends SparkFunSuite with BeforeAndAfter with PrivateMeth assert(store1.commit() === 2) assert(rowsToSet(store1.iterator()) === Set("a" -> 1, "b" -> 1)) assert(getDataFromFiles(provider) === Set("a" -> 1, "b" -> 1)) - - // Overwrite the version with other data - val store2 = provider.getStore(1) - put(store2, "c", 1) - assert(store2.commit() === 2) - assert(rowsToSet(store2.iterator()) === Set("a" -> 1, "c" -> 1)) - assert(getDataFromFiles(provider) === Set("a" -> 1, "c" -> 1)) } test("snapshotting") { @@ -292,6 +285,15 @@ class StateStoreSuite extends SparkFunSuite with BeforeAndAfter with PrivateMeth assert(getDataFromFiles(provider, 19) === Set("a" -> 19)) } + test("SPARK-19677: Committing a delta file atop an existing one should not fail on HDFS") { + val conf = new Configuration() + conf.set("fs.fake.impl", classOf[RenameLikeHDFSFileSystem].getName) + conf.set("fs.default.name", "fake:///") + + val provider = newStoreProvider(hadoopConf = conf) + provider.getStore(0).commit() + provider.getStore(0).commit() + } test("corrupted file handling") { val provider = newStoreProvider(minDeltasForSnapshot = 5) @@ -681,6 +683,21 @@ private[state] object StateStoreSuite { } } +/** + * Fake FileSystem that simulates HDFS rename semantic, i.e. renaming a file atop an existing + * one should return false. + * See hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/filesystem/filesystem.html + */ +class RenameLikeHDFSFileSystem extends RawLocalFileSystem { + override def rename(src: Path, dst: Path): Boolean = { + if (exists(dst)) { + return false + } else { + return super.rename(src, dst) + } + } +} + /** * Fake FileSystem to test that the StateStore throws an exception while committing the * delta file, when `fs.rename` returns `false`.