Skip to content
Snippets Groups Projects
Commit 5f894d23 authored by Reynold Xin's avatar Reynold Xin
Browse files

[SPARK-18760][SQL] Consistent format specification for FileFormats

## What changes were proposed in this pull request?
This patch fixes the format specification in explain for file sources (Parquet and Text formats are the only two that are different from the rest):

Before:
```
scala> spark.read.text("test.text").explain()
== Physical Plan ==
*FileScan text [value#15] Batched: false, Format: org.apache.spark.sql.execution.datasources.text.TextFileFormatxyz, Location: InMemoryFileIndex[file:/scratch/rxin/spark/test.text], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<value:string>
```

After:
```
scala> spark.read.text("test.text").explain()
== Physical Plan ==
*FileScan text [value#15] Batched: false, Format: Text, Location: InMemoryFileIndex[file:/scratch/rxin/spark/test.text], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<value:string>
```

Also closes #14680.

## How was this patch tested?
Verified in spark-shell.

Author: Reynold Xin <rxin@databricks.com>

Closes #16187 from rxin/SPARK-18760.
parent 26432df9
No related branches found
No related tags found
No related merge requests found
......@@ -61,7 +61,7 @@ class ParquetFileFormat
override def shortName(): String = "parquet"
override def toString: String = "ParquetFormat"
override def toString: String = "Parquet"
override def hashCode(): Int = getClass.hashCode()
......
......@@ -39,6 +39,8 @@ class TextFileFormat extends TextBasedFileFormat with DataSourceRegister {
override def shortName(): String = "text"
override def toString: String = "Text"
private def verifySchema(schema: StructType): Unit = {
if (schema.size != 1) {
throw new AnalysisException(
......
......@@ -31,7 +31,8 @@ import org.apache.spark.sql.test.SharedSQLContext
import org.apache.spark.sql.types._
import org.apache.spark.util.Utils
class FileStreamSourceTest extends StreamTest with SharedSQLContext with PrivateMethodTester {
abstract class FileStreamSourceTest
extends StreamTest with SharedSQLContext with PrivateMethodTester {
import testImplicits._
......@@ -848,13 +849,13 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
val explainWithoutExtended = q.explainInternal(false)
// `extended = false` only displays the physical plan.
assert("Relation.*text".r.findAllMatchIn(explainWithoutExtended).size === 0)
assert("TextFileFormat".r.findAllMatchIn(explainWithoutExtended).size === 1)
assert(": Text".r.findAllMatchIn(explainWithoutExtended).size === 1)
val explainWithExtended = q.explainInternal(true)
// `extended = true` displays 3 logical plans (Parsed/Optimized/Optimized) and 1 physical
// plan.
assert("Relation.*text".r.findAllMatchIn(explainWithExtended).size === 3)
assert("TextFileFormat".r.findAllMatchIn(explainWithExtended).size === 1)
assert(": Text".r.findAllMatchIn(explainWithExtended).size === 1)
} finally {
q.stop()
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment