[SPARK-2191][SQL] Make sure InsertIntoHiveTable doesn't execute more than once.

Author: Michael Armbrust <michael@databricks.com> Closes #1129 from marmbrus/doubleCreateAs and squashes the following commits: 9c6d9e4 [Michael Armbrust] Fix typo. 5128fe2 [Michael Armbrust] Make sure InsertIntoHiveTable doesn't execute each time you ask for its result.

[SPARK-2191][SQL] Make sure InsertIntoHiveTable doesn't execute more than once.
777c5958 · Michael Armbrust · Reynold Xin · bce0897b · 777c5958 · 777c5958
Commit 777c5958 authored 10 years ago by Michael Armbrust Committed by Reynold Xin 10 years ago
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala
@@ -344,12 +344,16 @@ case class InsertIntoHiveTable(
    writer.commitJob()
  }

+  override def execute() = result
+
  /**
   * Inserts all the rows in the table into Hive.  Row objects are properly serialized with the
   * `org.apache.hadoop.hive.serde2.SerDe` and the
   * `org.apache.hadoop.mapred.OutputFormat` provided by the table definition.
+   *
+   * Note: this is run once and then kept to avoid double insertions.
   */
-  def execute() = {
+  private lazy val result: RDD[Row] = {
    val childRdd = child.execute()
    assert(childRdd != null)


--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -28,6 +28,12 @@ import org.apache.spark.sql.{SchemaRDD, execution, Row}
 */
 class HiveQuerySuite extends HiveComparisonTest {

+  test("CREATE TABLE AS runs once") {
+    hql("CREATE TABLE foo AS SELECT 1 FROM src LIMIT 1").collect()
+    assert(hql("SELECT COUNT(*) FROM foo").collect().head.getLong(0) === 1,
+      "Incorrect number of rows in created table")
+  }
+
  createQueryTest("between",
    "SELECT * FROM src WHERE key Between 1 and 2")