Skip to content
Snippets Groups Projects
Commit e42724b1 authored by sureshthalamati's avatar sureshthalamati Committed by Reynold Xin
Browse files

[SPARK-13167][SQL] Include rows with null values for partition column when...

[SPARK-13167][SQL] Include rows with null values for partition column when reading from JDBC datasources.

Rows with null values in partition column are not included in the results because none of the partition
where clause specify is null predicate on the partition column. This fix adds is null predicate on the partition column  to the first JDBC partition where clause.

Example:
JDBCPartition(THEID < 1 or THEID is null, 0),JDBCPartition(THEID >= 1 AND THEID < 2,1),
JDBCPartition(THEID >= 2, 2)

Author: sureshthalamati <suresh.thalamati@gmail.com>

Closes #11063 from sureshthalamati/nullable_jdbc_part_col_spark-13167.
parent a640c5b4
No related branches found
No related tags found
No related merge requests found
......@@ -44,6 +44,12 @@ private[sql] object JDBCRelation {
* exactly once. The parameters minValue and maxValue are advisory in that
* incorrect values may cause the partitioning to be poor, but no data
* will fail to be represented.
*
* Null value predicate is added to the first partition where clause to include
* the rows with null value for the partitions column.
*
* @param partitioning partition information to generate the where clause for each partition
* @return an array of partitions with where clause for each partition
*/
def columnPartition(partitioning: JDBCPartitioningInfo): Array[Partition] = {
if (partitioning == null) return Array[Partition](JDBCPartition(null, 0))
......@@ -66,7 +72,7 @@ private[sql] object JDBCRelation {
if (upperBound == null) {
lowerBound
} else if (lowerBound == null) {
upperBound
s"$upperBound or $column is null"
} else {
s"$lowerBound AND $upperBound"
}
......
......@@ -171,6 +171,27 @@ class JDBCSuite extends SparkFunSuite
|OPTIONS (url '$url', dbtable 'TEST.NULLTYPES', user 'testUser', password 'testPass')
""".stripMargin.replaceAll("\n", " "))
conn.prepareStatement(
"create table test.emp(name TEXT(32) NOT NULL," +
" theid INTEGER, \"Dept\" INTEGER)").executeUpdate()
conn.prepareStatement(
"insert into test.emp values ('fred', 1, 10)").executeUpdate()
conn.prepareStatement(
"insert into test.emp values ('mary', 2, null)").executeUpdate()
conn.prepareStatement(
"insert into test.emp values ('joe ''foo'' \"bar\"', 3, 30)").executeUpdate()
conn.prepareStatement(
"insert into test.emp values ('kathy', null, null)").executeUpdate()
conn.commit()
sql(
s"""
|CREATE TEMPORARY TABLE nullparts
|USING org.apache.spark.sql.jdbc
|OPTIONS (url '$url', dbtable 'TEST.EMP', user 'testUser', password 'testPass',
|partitionColumn '"Dept"', lowerBound '1', upperBound '4', numPartitions '4')
""".stripMargin.replaceAll("\n", " "))
// Untested: IDENTITY, OTHER, UUID, ARRAY, and GEOMETRY types.
}
......@@ -338,6 +359,23 @@ class JDBCSuite extends SparkFunSuite
.collect().length === 3)
}
test("Partioning on column that might have null values.") {
assert(
sqlContext.read.jdbc(urlWithUserAndPass, "TEST.EMP", "theid", 0, 4, 3, new Properties)
.collect().length === 4)
assert(
sqlContext.read.jdbc(urlWithUserAndPass, "TEST.EMP", "THEID", 0, 4, 3, new Properties)
.collect().length === 4)
// partitioning on a nullable quoted column
assert(
sqlContext.read.jdbc(urlWithUserAndPass, "TEST.EMP", """"Dept"""", 0, 4, 3, new Properties)
.collect().length === 4)
}
test("SELECT * on partitioned table with a nullable partioncolumn") {
assert(sql("SELECT * FROM nullparts").collect().size == 4)
}
test("H2 integral types") {
val rows = sql("SELECT * FROM inttypes WHERE A IS NOT NULL").collect()
assert(rows.length === 1)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment