Skip to content
Snippets Groups Projects
Commit 275a0c08 authored by Daoyuan Wang's avatar Daoyuan Wang Committed by Michael Armbrust
Browse files

[SPARK-5824] [SQL] add null format in ctas and set default col comment to null

Author: Daoyuan Wang <daoyuan.wang@intel.com>

Closes #4609 from adrian-wang/ctas and squashes the following commits:

0a75d5a [Daoyuan Wang] reorder import
93d1863 [Daoyuan Wang] add null format in ctas and set default col comment to null
parent cc552e04
No related branches found
No related tags found
No related merge requests found
Showing
with 61 additions and 1 deletion
......@@ -640,6 +640,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"nonblock_op_deduplicate",
"notable_alias1",
"notable_alias2",
"nullformatCTAS",
"nullgroup",
"nullgroup2",
"nullgroup3",
......
......@@ -240,7 +240,7 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with
val hiveSchema: JList[FieldSchema] = if (schema == null || schema.isEmpty) {
crtTbl.getCols
} else {
schema.map(attr => new FieldSchema(attr.name, toMetastoreType(attr.dataType), ""))
schema.map(attr => new FieldSchema(attr.name, toMetastoreType(attr.dataType), null))
}
tbl.setFields(hiveSchema)
......@@ -314,6 +314,7 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with
if (crtTbl != null && crtTbl.getLineDelim() != null) {
tbl.setSerdeParam(serdeConstants.LINE_DELIM, crtTbl.getLineDelim())
}
HiveShim.setTblNullFormat(crtTbl, tbl)
if (crtTbl != null && crtTbl.getSerdeProps() != null) {
val iter = crtTbl.getSerdeProps().entrySet().iterator()
......
1.0 1
1.0 1
1.0 1
1.0 1
1.0 1
NULL 1
NULL NULL
1.0 NULL
1.0 1
1.0 1
1.0 1
1.0 1
1.0 1
1.0 1
1.0 1
fooNull 1
fooNull fooNull
1.0 fooNull
1.0 1
1.0 1
a string
b string
c string
d string
Detailed Table Information Table(tableName:base_tab, dbName:default, owner:animal, createTime:1423973915, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:string, comment:null), FieldSchema(name:b, type:string, comment:null), FieldSchema(name:c, type:string, comment:null), FieldSchema(name:d, type:string, comment:null)], location:file:/tmp/sparkHiveWarehouse2573474017665704744/base_tab, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numFiles=1, transient_lastDdlTime=1423973915, COLUMN_STATS_ACCURATE=true, totalSize=130, numRows=0, rawDataSize=0}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)
a string
b string
Detailed Table Information Table(tableName:null_tab3, dbName:default, owner:animal, createTime:1423973928, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:string, comment:null), FieldSchema(name:b, type:string, comment:null)], location:file:/tmp/sparkHiveWarehouse2573474017665704744/null_tab3, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.null.format=fooNull, serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numFiles=1, transient_lastDdlTime=1423973928, COLUMN_STATS_ACCURATE=true, totalSize=80, numRows=10, rawDataSize=70}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)
CREATE TABLE `null_tab3`(
`a` string,
`b` string)
ROW FORMAT DELIMITED
NULL DEFINED AS 'fooNull'
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
'file:/tmp/sparkHiveWarehouse2573474017665704744/null_tab3'
TBLPROPERTIES (
'numFiles'='1',
'transient_lastDdlTime'='1423973928',
'COLUMN_STATS_ACCURATE'='true',
'totalSize'='80',
'numRows'='10',
'rawDataSize'='70')
......@@ -245,6 +245,8 @@ private[hive] object HiveShim {
def prepareWritable(w: Writable): Writable = {
w
}
def setTblNullFormat(crtTbl: CreateTableDesc, tbl: Table) = {}
}
class ShimFileSinkDesc(var dir: String, var tableInfo: TableDesc, var compressed: Boolean)
......
......@@ -35,6 +35,7 @@ import org.apache.hadoop.hive.ql.Context
import org.apache.hadoop.hive.ql.metadata.{Table, Hive, Partition}
import org.apache.hadoop.hive.ql.plan.{CreateTableDesc, FileSinkDesc, TableDesc}
import org.apache.hadoop.hive.ql.processors.CommandProcessorFactory
import org.apache.hadoop.hive.serde.serdeConstants
import org.apache.hadoop.hive.serde2.typeinfo.{TypeInfo, DecimalTypeInfo, TypeInfoFactory}
import org.apache.hadoop.hive.serde2.objectinspector.primitive.{HiveDecimalObjectInspector, PrimitiveObjectInspectorFactory}
import org.apache.hadoop.hive.serde2.objectinspector.{PrimitiveObjectInspector, ObjectInspector}
......@@ -410,6 +411,12 @@ private[hive] object HiveShim {
}
w
}
def setTblNullFormat(crtTbl: CreateTableDesc, tbl: Table) = {
if (crtTbl != null && crtTbl.getNullFormat() != null) {
tbl.setSerdeParam(serdeConstants.SERIALIZATION_NULL_FORMAT, crtTbl.getNullFormat())
}
}
}
/*
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment