Skip to content
Snippets Groups Projects
Commit f041e55e authored by Tejas Patil's avatar Tejas Patil Committed by Wenchen Fan
Browse files

[SPARK-19618][SQL] Inconsistency wrt max. buckets allowed from Dataframe API vs SQL

## What changes were proposed in this pull request?

Jira: https://issues.apache.org/jira/browse/SPARK-19618

Moved the check for validating number of buckets from `DataFrameWriter` to `BucketSpec` creation

## How was this patch tested?

- Added more unit tests

Author: Tejas Patil <tejasp@fb.com>

Closes #16948 from tejasapatil/SPARK-19618_max_buckets.
parent 8487902a
No related branches found
No related tags found
No related merge requests found
......@@ -135,8 +135,9 @@ case class BucketSpec(
numBuckets: Int,
bucketColumnNames: Seq[String],
sortColumnNames: Seq[String]) {
if (numBuckets <= 0) {
throw new AnalysisException(s"Expected positive number of buckets, but got `$numBuckets`.")
if (numBuckets <= 0 || numBuckets >= 100000) {
throw new AnalysisException(
s"Number of buckets should be greater than 0 but less than 100000. Got `$numBuckets`")
}
override def toString: String = {
......
......@@ -275,7 +275,6 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
}
numBuckets.map { n =>
require(n > 0 && n < 100000, "Bucket number must be greater than 0 and less than 100000.")
BucketSpec(n, bucketColumnNames.get, sortColumnNames.getOrElse(Nil))
}
}
......
......@@ -206,7 +206,7 @@ class CreateTableAsSelectSuite
}
}
test("create table using as select - with non-zero buckets") {
test("create table using as select - with valid number of buckets") {
val catalog = spark.sessionState.catalog
withTable("t") {
sql(
......@@ -222,19 +222,21 @@ class CreateTableAsSelectSuite
}
}
test("create table using as select - with zero buckets") {
test("create table using as select - with invalid number of buckets") {
withTable("t") {
val e = intercept[AnalysisException] {
sql(
s"""
|CREATE TABLE t USING PARQUET
|OPTIONS (PATH '${path.toURI}')
|CLUSTERED BY (a) SORTED BY (b) INTO 0 BUCKETS
|AS SELECT 1 AS a, 2 AS b
""".stripMargin
)
}.getMessage
assert(e.contains("Expected positive number of buckets, but got `0`"))
Seq(0, 100000).foreach(numBuckets => {
val e = intercept[AnalysisException] {
sql(
s"""
|CREATE TABLE t USING PARQUET
|OPTIONS (PATH '${path.toURI}')
|CLUSTERED BY (a) SORTED BY (b) INTO $numBuckets BUCKETS
|AS SELECT 1 AS a, 2 AS b
""".stripMargin
)
}.getMessage
assert(e.contains("Number of buckets should be greater than 0 but less than 100000"))
})
}
}
......
......@@ -38,10 +38,14 @@ class BucketedWriteSuite extends QueryTest with SQLTestUtils with TestHiveSingle
intercept[AnalysisException](df.write.bucketBy(2, "k").saveAsTable("tt"))
}
test("numBuckets not greater than 0 or less than 100000") {
test("numBuckets be greater than 0 but less than 100000") {
val df = Seq(1 -> "a", 2 -> "b").toDF("i", "j")
intercept[IllegalArgumentException](df.write.bucketBy(0, "i").saveAsTable("tt"))
intercept[IllegalArgumentException](df.write.bucketBy(100000, "i").saveAsTable("tt"))
Seq(-1, 0, 100000).foreach(numBuckets => {
val e = intercept[AnalysisException](df.write.bucketBy(numBuckets, "i").saveAsTable("tt"))
assert(
e.getMessage.contains("Number of buckets should be greater than 0 but less than 100000"))
})
}
test("specify sorting columns without bucketing columns") {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment