Skip to content
Snippets Groups Projects
Commit 7c0a648f authored by Cheng Lian's avatar Cheng Lian
Browse files

[HOTFIX] [SQL] Disables Metastore Parquet table conversion for "SQLQuerySuite.CTAS with serde"

Ideally we should convert Metastore Parquet tables with our own Parquet implementation on both read path and write path. However, the write path is not well covered, and causes this test failure. This PR is a hotfix to bring back Jenkins PR builder. A proper fix will be delivered in a follow-up PR.

<!-- Reviewable:start -->
[<img src="https://reviewable.io/review_button.png" height=40 alt="Review on Reviewable"/>](https://reviewable.io/reviews/apache/spark/4413)
<!-- Reviewable:end -->

Author: Cheng Lian <lian@databricks.com>

Closes #4413 from liancheng/hotfix-parquet-ctas and squashes the following commits:

5291289 [Cheng Lian] Hot fix for "SQLQuerySuite.CTAS with serde"
parent e8a5d50a
No related branches found
No related tags found
No related merge requests found
......@@ -17,13 +17,10 @@
package org.apache.spark.sql.hive.execution
import org.apache.spark.sql.QueryTest
import org.apache.spark.sql.Row
import org.apache.spark.sql.hive.HiveShim
import org.apache.spark.sql.hive.test.TestHive._
import org.apache.spark.sql.types._
import org.apache.spark.util.Utils
import org.apache.spark.sql.hive.HiveShim
import org.apache.spark.sql.{QueryTest, Row, SQLConf}
case class Nested1(f1: Nested2)
case class Nested2(f2: Nested3)
......@@ -109,28 +106,34 @@ class SQLQuerySuite extends QueryTest {
)
if (HiveShim.version =="0.13.1") {
sql(
"""CREATE TABLE ctas5
| STORED AS parquet AS
| SELECT key, value
| FROM src
| ORDER BY key, value""".stripMargin).collect
checkExistence(sql("DESC EXTENDED ctas5"), true,
"name:key", "type:string", "name:value", "ctas5",
"org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat",
"org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat",
"org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe",
"MANAGED_TABLE"
)
val default = getConf("spark.sql.hive.convertMetastoreParquet", "true")
// use the Hive SerDe for parquet tables
sql("set spark.sql.hive.convertMetastoreParquet = false")
checkAnswer(
sql("SELECT key, value FROM ctas5 ORDER BY key, value"),
sql("SELECT key, value FROM src ORDER BY key, value").collect().toSeq)
sql(s"set spark.sql.hive.convertMetastoreParquet = $default")
val origUseParquetDataSource = conf.parquetUseDataSourceApi
try {
setConf(SQLConf.PARQUET_USE_DATA_SOURCE_API, "false")
sql(
"""CREATE TABLE ctas5
| STORED AS parquet AS
| SELECT key, value
| FROM src
| ORDER BY key, value""".stripMargin).collect()
checkExistence(sql("DESC EXTENDED ctas5"), true,
"name:key", "type:string", "name:value", "ctas5",
"org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat",
"org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat",
"org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe",
"MANAGED_TABLE"
)
val default = getConf("spark.sql.hive.convertMetastoreParquet", "true")
// use the Hive SerDe for parquet tables
sql("set spark.sql.hive.convertMetastoreParquet = false")
checkAnswer(
sql("SELECT key, value FROM ctas5 ORDER BY key, value"),
sql("SELECT key, value FROM src ORDER BY key, value").collect().toSeq)
sql(s"set spark.sql.hive.convertMetastoreParquet = $default")
} finally {
setConf(SQLConf.PARQUET_USE_DATA_SOURCE_API, origUseParquetDataSource.toString)
}
}
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment