Skip to content
Snippets Groups Projects
Commit e8a5d50a authored by Reynold Xin's avatar Reynold Xin
Browse files

[SPARK-5638][SQL] Add a config flag to disable eager analysis of DataFrames

Author: Reynold Xin <rxin@databricks.com>

Closes #4408 from rxin/df-config-eager and squashes the following commits:

c0204cf [Reynold Xin] [SPARK-5638][SQL] Add a config flag to disable eager analysis of DataFrames.
parent 85ccee81
No related branches found
No related tags found
No related merge requests found
......@@ -53,7 +53,9 @@ private[sql] class DataFrameImpl protected[sql](
def this(sqlContext: SQLContext, logicalPlan: LogicalPlan) = {
this(sqlContext, {
val qe = sqlContext.executePlan(logicalPlan)
qe.analyzed // This should force analysis and throw errors if there are any
if (sqlContext.conf.dataFrameEagerAnalysis) {
qe.analyzed // This should force analysis and throw errors if there are any
}
qe
})
}
......
......@@ -52,6 +52,9 @@ private[spark] object SQLConf {
// This is used to set the default data source
val DEFAULT_DATA_SOURCE_NAME = "spark.sql.default.datasource"
// Whether to perform eager analysis on a DataFrame.
val DATAFRAME_EAGER_ANALYSIS = "spark.sql.dataframe.eagerAnalysis"
object Deprecated {
val MAPRED_REDUCE_TASKS = "mapred.reduce.tasks"
}
......@@ -173,6 +176,9 @@ private[sql] class SQLConf extends Serializable {
private[spark] def defaultDataSourceName: String =
getConf(DEFAULT_DATA_SOURCE_NAME, "org.apache.spark.sql.parquet")
private[spark] def dataFrameEagerAnalysis: Boolean =
getConf(DATAFRAME_EAGER_ANALYSIS, "true").toBoolean
/** ********************** SQLConf functionality methods ************ */
/** Set Spark SQL configuration properties. */
......
......@@ -17,19 +17,23 @@
package org.apache.spark.sql
import scala.language.postfixOps
import org.apache.spark.sql.Dsl._
import org.apache.spark.sql.types._
/* Implicits */
import org.apache.spark.sql.test.TestSQLContext
import org.apache.spark.sql.test.TestSQLContext.logicalPlanToSparkQuery
import org.apache.spark.sql.test.TestSQLContext.implicits._
import scala.language.postfixOps
class DataFrameSuite extends QueryTest {
import org.apache.spark.sql.TestData._
test("analysis error should be eagerly reported") {
val oldSetting = TestSQLContext.conf.dataFrameEagerAnalysis
// Eager analysis.
TestSQLContext.setConf(SQLConf.DATAFRAME_EAGER_ANALYSIS, "true")
intercept[Exception] { testData.select('nonExistentName) }
intercept[Exception] {
testData.groupBy('key).agg(Map("nonExistentName" -> "sum"))
......@@ -40,6 +44,13 @@ class DataFrameSuite extends QueryTest {
intercept[Exception] {
testData.groupBy($"abcd").agg(Map("key" -> "sum"))
}
// No more eager analysis once the flag is turned off
TestSQLContext.setConf(SQLConf.DATAFRAME_EAGER_ANALYSIS, "false")
testData.select('nonExistentName)
// Set the flag back to original value before this test.
TestSQLContext.setConf(SQLConf.DATAFRAME_EAGER_ANALYSIS, oldSetting.toString)
}
test("table scan") {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment