diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql new file mode 100644 index 0000000000000000000000000000000000000000..3fd1c37e71795673fc3ba2587c3cf92121b81dd0 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql @@ -0,0 +1,4 @@ +-- date time functions + +-- [SPARK-16836] current_date and current_timestamp literals +select current_date = current_date(), current_timestamp = current_timestamp(); diff --git a/sql/core/src/test/resources/sql-tests/inputs/having.sql b/sql/core/src/test/resources/sql-tests/inputs/having.sql new file mode 100644 index 0000000000000000000000000000000000000000..364c022d959dc46ccd50d5bee7584f30b5c852be --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/having.sql @@ -0,0 +1,15 @@ +create temporary view hav as select * from values + ("one", 1), + ("two", 2), + ("three", 3), + ("one", 5) + as hav(k, v); + +-- having clause +SELECT k, sum(v) FROM hav GROUP BY k HAVING sum(v) > 2; + +-- having condition contains grouping column +SELECT count(k) FROM hav GROUP BY v + 1 HAVING v + 1 = 2; + +-- SPARK-11032: resolve having correctly +SELECT MIN(t.v) FROM (SELECT * FROM hav WHERE v > 0) t HAVING(COUNT(1) > 0); diff --git a/sql/core/src/test/resources/sql-tests/inputs/natural-join.sql b/sql/core/src/test/resources/sql-tests/inputs/natural-join.sql new file mode 100644 index 0000000000000000000000000000000000000000..71a50157b766c2839ff7eefd4bfb1f0971e98e2d --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/natural-join.sql @@ -0,0 +1,20 @@ +create temporary view nt1 as select * from values + ("one", 1), + ("two", 2), + ("three", 3) + as nt1(k, v1); + +create temporary view nt2 as select * from values + ("one", 1), + ("two", 22), + ("one", 5) + as nt2(k, v2); + + +SELECT * FROM nt1 natural join nt2 where k = "one"; + +SELECT * FROM nt1 natural left join nt2 order by v1, v2; + +SELECT * FROM nt1 natural right join nt2 order by v1, v2; + +SELECT count(*) FROM nt1 natural full outer join nt2; diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out new file mode 100644 index 0000000000000000000000000000000000000000..51746579b13145b5ff83a8d3cdc26fe069a6de56 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out @@ -0,0 +1,10 @@ +-- Automatically generated by org.apache.spark.sql.SQLQueryTestSuite +-- Number of queries: 1 + + +-- !query 0 +select current_date = current_date(), current_timestamp = current_timestamp() +-- !query 0 schema +struct<(current_date() = current_date()):boolean,(current_timestamp() = current_timestamp()):boolean> +-- !query 0 output +true true diff --git a/sql/core/src/test/resources/sql-tests/results/having.sql.out b/sql/core/src/test/resources/sql-tests/results/having.sql.out new file mode 100644 index 0000000000000000000000000000000000000000..0bc8be66be638f6ac31be55bba27f30f9b8325e8 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/having.sql.out @@ -0,0 +1,40 @@ +-- Automatically generated by org.apache.spark.sql.SQLQueryTestSuite +-- Number of queries: 4 + + +-- !query 0 +create temporary view hav as select * from values + ("one", 1), + ("two", 2), + ("three", 3), + ("one", 5) + as hav(k, v) +-- !query 0 schema +struct<> +-- !query 0 output + + + +-- !query 1 +SELECT k, sum(v) FROM hav GROUP BY k HAVING sum(v) > 2 +-- !query 1 schema +struct<k:string,sum(v):bigint> +-- !query 1 output +one 6 +three 3 + + +-- !query 2 +SELECT count(k) FROM hav GROUP BY v + 1 HAVING v + 1 = 2 +-- !query 2 schema +struct<count(k):bigint> +-- !query 2 output +1 + + +-- !query 3 +SELECT MIN(t.v) FROM (SELECT * FROM hav WHERE v > 0) t HAVING(COUNT(1) > 0) +-- !query 3 schema +struct<min(v):int> +-- !query 3 output +1 diff --git a/sql/core/src/test/resources/sql-tests/results/natural-join.sql.out b/sql/core/src/test/resources/sql-tests/results/natural-join.sql.out new file mode 100644 index 0000000000000000000000000000000000000000..d4954dabea8c9e4cfed433ee4d92bd1300fe7748 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/natural-join.sql.out @@ -0,0 +1,64 @@ +-- Automatically generated by org.apache.spark.sql.SQLQueryTestSuite +-- Number of queries: 6 + + +-- !query 0 +create temporary view nt1 as select * from values + ("one", 1), + ("two", 2), + ("three", 3) + as nt1(k, v1) +-- !query 0 schema +struct<> +-- !query 0 output + + + +-- !query 1 +create temporary view nt2 as select * from values + ("one", 1), + ("two", 22), + ("one", 5) + as nt2(k, v2) +-- !query 1 schema +struct<> +-- !query 1 output + + + +-- !query 2 +SELECT * FROM nt1 natural join nt2 where k = "one" +-- !query 2 schema +struct<k:string,v1:int,v2:int> +-- !query 2 output +one 1 1 +one 1 5 + + +-- !query 3 +SELECT * FROM nt1 natural left join nt2 order by v1, v2 +-- !query 3 schema +struct<k:string,v1:int,v2:int> +-- !query 3 output +one 1 1 +one 1 5 +two 2 22 +three 3 NULL + + +-- !query 4 +SELECT * FROM nt1 natural right join nt2 order by v1, v2 +-- !query 4 schema +struct<k:string,v1:int,v2:int> +-- !query 4 output +one 1 1 +one 1 5 +two 2 22 + + +-- !query 5 +SELECT count(*) FROM nt1 natural full outer join nt2 +-- !query 5 schema +struct<count(1):bigint> +-- !query 5 output +4 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index a0130dd48c2ff26c8a49cfd72b2616cafefa4de6..14a92973a7f823171a6a416239fa92148e3eae5e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -38,26 +38,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { setupTestData() - test("having clause") { - withTempView("hav") { - Seq(("one", 1), ("two", 2), ("three", 3), ("one", 5)).toDF("k", "v") - .createOrReplaceTempView("hav") - checkAnswer( - sql("SELECT k, sum(v) FROM hav GROUP BY k HAVING sum(v) > 2"), - Row("one", 6) :: Row("three", 3) :: Nil) - } - } - - test("having condition contains grouping column") { - withTempView("hav") { - Seq(("one", 1), ("two", 2), ("three", 3), ("one", 5)).toDF("k", "v") - .createOrReplaceTempView("hav") - checkAnswer( - sql("SELECT count(k) FROM hav GROUP BY v + 1 HAVING v + 1 = 2"), - Row(1) :: Nil) - } - } - test("SPARK-8010: promote numeric to string") { val df = Seq((1, 1)).toDF("key", "value") df.createOrReplaceTempView("src") @@ -1969,15 +1949,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { } } - test("SPARK-11032: resolve having correctly") { - withTempView("src") { - Seq(1 -> "a").toDF("i", "j").createOrReplaceTempView("src") - checkAnswer( - sql("SELECT MIN(t.i) FROM (SELECT * FROM src WHERE i > 0) t HAVING(COUNT(1) > 0)"), - Row(1)) - } - } - test("SPARK-11303: filter should not be pushed down into sample") { val df = spark.range(100) List(true, false).foreach { withReplacement => @@ -2517,30 +2488,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { } } - test("natural join") { - val df1 = Seq(("one", 1), ("two", 2), ("three", 3)).toDF("k", "v1") - val df2 = Seq(("one", 1), ("two", 22), ("one", 5)).toDF("k", "v2") - withTempView("nt1", "nt2") { - df1.createOrReplaceTempView("nt1") - df2.createOrReplaceTempView("nt2") - checkAnswer( - sql("SELECT * FROM nt1 natural join nt2 where k = \"one\""), - Row("one", 1, 1) :: Row("one", 1, 5) :: Nil) - - checkAnswer( - sql("SELECT * FROM nt1 natural left join nt2 order by v1, v2"), - Row("one", 1, 1) :: Row("one", 1, 5) :: Row("two", 2, 22) :: Row("three", 3, null) :: Nil) - - checkAnswer( - sql("SELECT * FROM nt1 natural right join nt2 order by v1, v2"), - Row("one", 1, 1) :: Row("one", 1, 5) :: Row("two", 2, 22) :: Nil) - - checkAnswer( - sql("SELECT count(*) FROM nt1 natural full outer join nt2"), - Row(4) :: Nil) - } - } - test("join with using clause") { val df1 = Seq(("r1c1", "r1c2", "t1r1c3"), ("r2c1", "r2c2", "t1r2c3"), ("r3c1x", "r3c2", "t1r3c3")).toDF("c1", "c2", "c3") @@ -2991,13 +2938,4 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { data.selectExpr("`part.col1`", "`col.1`")) } } - - test("current_date and current_timestamp literals") { - // NOTE that I am comparing the result of the literal with the result of the function call. - // This is done to prevent the test from failing because we are comparing a result to an out - // dated timestamp (quite likely) or date (very unlikely - but equally annoying). - checkAnswer( - sql("select current_date = current_date(), current_timestamp = current_timestamp()"), - Seq(Row(true, true))) - } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index 08b8432d68eb35dc9ddf4a4af91dda4a8c1954a3..14a029ed50a6921ab3942ded4b7efa8125a44414 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -20,9 +20,12 @@ package org.apache.spark.sql import java.io.File import java.util.{Locale, TimeZone} +import org.apache.spark.sql.catalyst.planning.PhysicalOperation +import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.catalyst.util.{fileToString, stringToFile} import org.apache.spark.sql.test.SharedSQLContext +import org.apache.spark.sql.types.StructType /** * End-to-end test cases for SQL queries. @@ -126,14 +129,18 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext { cleaned.split("(?<=[^\\\\]);").map(_.trim).filter(_ != "").toSeq } + // Create a local SparkSession to have stronger isolation between different test cases. + // This does not isolate catalog changes. + val localSparkSession = spark.newSession() + // Run the SQL queries preparing them for comparison. val outputs: Seq[QueryOutput] = queries.map { sql => - val df = spark.sql(sql) + val (schema, output) = getNormalizedResult(localSparkSession, sql) // We might need to do some query canonicalization in the future. QueryOutput( sql = sql, - schema = df.schema.catalogString, - output = df.queryExecution.hiveResultString().mkString("\n")) + schema = schema.catalogString, + output = output.mkString("\n")) } if (regenerateGoldenFiles) { @@ -176,6 +183,23 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext { } } + /** Executes a query and returns the result as (schema of the output, normalized output). */ + private def getNormalizedResult(session: SparkSession, sql: String): (StructType, Seq[String]) = { + // Returns true if the plan is supposed to be sorted. + def isSorted(plan: LogicalPlan): Boolean = plan match { + case _: Join | _: Aggregate | _: Generate | _: Sample | _: Distinct => false + case PhysicalOperation(_, _, Sort(_, true, _)) => true + case _ => plan.children.iterator.exists(isSorted) + } + + val df = session.sql(sql) + val schema = df.schema + val answer = df.queryExecution.hiveResultString() + + // If the output is not pre-sorted, sort it. + if (isSorted(df.queryExecution.analyzed)) (schema, answer) else (schema, answer.sorted) + } + private def listTestCases(): Seq[TestCase] = { listFilesRecursively(new File(inputFilePath)).map { file => val resultFile = file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".out"