From 37b93f54e89332b6b77bb02c1c2299614338fd7c Mon Sep 17 00:00:00 2001 From: gatorsmile <gatorsmile@gmail.com> Date: Wed, 14 Sep 2016 00:37:42 +0200 Subject: [PATCH] [SPARK-17530][SQL] Add Statistics into DESCRIBE FORMATTED ### What changes were proposed in this pull request? Statistics is missing in the output of `DESCRIBE FORMATTED`. This PR is to add it. After the PR, the output will be like: ``` +----------------------------+----------------------------------------------------------------------------------------------------------------------+-------+ |col_name |data_type |comment| +----------------------------+----------------------------------------------------------------------------------------------------------------------+-------+ |key |string |null | |value |string |null | | | | | |# Detailed Table Information| | | |Database: |default | | |Owner: |xiaoli | | |Create Time: |Tue Sep 13 14:36:57 PDT 2016 | | |Last Access Time: |Wed Dec 31 16:00:00 PST 1969 | | |Location: |file:/private/var/folders/4b/sgmfldk15js406vk7lw5llzw0000gn/T/warehouse-9982e1db-df17-4376-a140-dbbee0203d83/texttable| | |Table Type: |MANAGED | | |Statistics: |sizeInBytes=5812, rowCount=500, isBroadcastable=false | | |Table Parameters: | | | | rawDataSize |-1 | | | numFiles |1 | | | transient_lastDdlTime |1473802620 | | | totalSize |5812 | | | COLUMN_STATS_ACCURATE |false | | | numRows |-1 | | | | | | |# Storage Information | | | |SerDe Library: |org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe | | |InputFormat: |org.apache.hadoop.mapred.TextInputFormat | | |OutputFormat: |org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat | | |Compressed: |No | | |Storage Desc Parameters: | | | | serialization.format |1 | | +----------------------------+----------------------------------------------------------------------------------------------------------------------+-------+ ``` Also improve the output of statistics in `DESCRIBE EXTENDED` by removing duplicate `Statistics`. Below is the example after the PR: ``` +----------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------+ |col_name |data_type |comment| +----------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------+ |key |string |null | |value |string |null | | | | | |# Detailed Table Information|CatalogTable( Table: `default`.`texttable` Owner: xiaoli Created: Tue Sep 13 14:38:43 PDT 2016 Last Access: Wed Dec 31 16:00:00 PST 1969 Type: MANAGED Schema: [StructField(key,StringType,true), StructField(value,StringType,true)] Provider: hive Properties: [rawDataSize=-1, numFiles=1, transient_lastDdlTime=1473802726, totalSize=5812, COLUMN_STATS_ACCURATE=false, numRows=-1] Statistics: sizeInBytes=5812, rowCount=500, isBroadcastable=false Storage(Location: file:/private/var/folders/4b/sgmfldk15js406vk7lw5llzw0000gn/T/warehouse-8ea5c5a0-5680-4778-91cb-c6334cf8a708/texttable, InputFormat: org.apache.hadoop.mapred.TextInputFormat, OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, Serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, Properties: [serialization.format=1]))| | +----------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------+ ``` ### How was this patch tested? Manually tested. Author: gatorsmile <gatorsmile@gmail.com> Closes #15083 from gatorsmile/descFormattedStats. --- .../spark/sql/catalyst/catalog/interface.scala | 2 +- .../sql/catalyst/plans/logical/Statistics.scala | 15 ++++++++------- .../spark/sql/execution/command/tables.scala | 1 + 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index e74fa6e638..e52251f960 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -191,7 +191,7 @@ case class CatalogTable( viewText.map("View: " + _).getOrElse(""), comment.map("Comment: " + _).getOrElse(""), if (properties.nonEmpty) s"Properties: $tableProperties" else "", - if (stats.isDefined) s"Statistics: ${stats.get}" else "", + if (stats.isDefined) s"Statistics: ${stats.get.simpleString}" else "", s"$storage") output.filter(_.nonEmpty).mkString("CatalogTable(\n\t", "\n\t", ")") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala index 58fa537a18..3cf20385dd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala @@ -38,12 +38,13 @@ case class Statistics( sizeInBytes: BigInt, rowCount: Option[BigInt] = None, isBroadcastable: Boolean = false) { - override def toString: String = { - val output = - Seq(s"sizeInBytes=$sizeInBytes", - if (rowCount.isDefined) s"rowCount=${rowCount.get}" else "", - s"isBroadcastable=$isBroadcastable" - ) - output.filter(_.nonEmpty).mkString("Statistics(", ", ", ")") + override def toString: String = "Statistics(" + simpleString + ")" + + /** Readable string representation for the Statistics. */ + def simpleString: String = { + Seq(s"sizeInBytes=$sizeInBytes", + if (rowCount.isDefined) s"rowCount=${rowCount.get}" else "", + s"isBroadcastable=$isBroadcastable" + ).filter(_.nonEmpty).mkString("", ", ", "") } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 027f3588e2..9fbcd48b4a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -468,6 +468,7 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean, isF append(buffer, "Last Access Time:", new Date(table.lastAccessTime).toString, "") append(buffer, "Location:", table.storage.locationUri.getOrElse(""), "") append(buffer, "Table Type:", table.tableType.name, "") + table.stats.foreach(s => append(buffer, "Statistics:", s.simpleString, "")) append(buffer, "Table Parameters:", "", "") table.properties.foreach { case (key, value) => -- GitLab