Skip to content
Snippets Groups Projects
Commit 6bdddb6f authored by Xiangrui Meng's avatar Xiangrui Meng Committed by Michael Armbrust
Browse files

[SPARK-6361][SQL] support adding a column with metadata in DF

This is used by ML pipelines to embed ML attributes in columns created by ML transformers/estimators. marmbrus

Author: Xiangrui Meng <meng@databricks.com>

Closes #5151 from mengxr/SPARK-6361 and squashes the following commits:

bb30de3 [Xiangrui Meng] support adding a column with metadata in DF
parent a1d1529d
No related branches found
No related tags found
No related merge requests found
......@@ -95,9 +95,12 @@ abstract class Attribute extends NamedExpression {
* @param name the name to be associated with the result of computing [[child]].
* @param exprId A globally unique id used to check if an [[AttributeReference]] refers to this
* alias. Auto-assigned if left blank.
* @param explicitMetadata Explicit metadata associated with this alias that overwrites child's.
*/
case class Alias(child: Expression, name: String)
(val exprId: ExprId = NamedExpression.newExprId, val qualifiers: Seq[String] = Nil)
case class Alias(child: Expression, name: String)(
val exprId: ExprId = NamedExpression.newExprId,
val qualifiers: Seq[String] = Nil,
val explicitMetadata: Option[Metadata] = None)
extends NamedExpression with trees.UnaryNode[Expression] {
override type EvaluatedType = Any
......@@ -107,9 +110,11 @@ case class Alias(child: Expression, name: String)
override def dataType = child.dataType
override def nullable = child.nullable
override def metadata: Metadata = {
child match {
case named: NamedExpression => named.metadata
case _ => Metadata.empty
explicitMetadata.getOrElse {
child match {
case named: NamedExpression => named.metadata
case _ => Metadata.empty
}
}
}
......@@ -123,11 +128,12 @@ case class Alias(child: Expression, name: String)
override def toString: String = s"$child AS $name#${exprId.id}$typeSuffix"
override protected final def otherCopyArgs = exprId :: qualifiers :: Nil
override protected final def otherCopyArgs = exprId :: qualifiers :: explicitMetadata :: Nil
override def equals(other: Any): Boolean = other match {
case a: Alias =>
name == a.name && exprId == a.exprId && child == a.child && qualifiers == a.qualifiers
name == a.name && exprId == a.exprId && child == a.child && qualifiers == a.qualifiers &&
explicitMetadata == a.explicitMetadata
case _ => false
}
}
......
......@@ -594,6 +594,19 @@ class Column(protected[sql] val expr: Expression) {
*/
def as(alias: Symbol): Column = Alias(expr, alias.name)()
/**
* Gives the column an alias with metadata.
* {{{
* val metadata: Metadata = ...
* df.select($"colA".as("colB", metadata))
* }}}
*
* @group expr_ops
*/
def as(alias: String, metadata: Metadata): Column = {
Alias(expr, alias)(explicitMetadata = Some(metadata))
}
/**
* Casts the column to a different data type.
* {{{
......
......@@ -17,12 +17,10 @@
package org.apache.spark.sql
import org.apache.spark.sql.catalyst.expressions.NamedExpression
import org.apache.spark.sql.catalyst.plans.logical.{Project, NoRelation}
import org.apache.spark.sql.functions._
import org.apache.spark.sql.test.TestSQLContext
import org.apache.spark.sql.test.TestSQLContext.implicits._
import org.apache.spark.sql.types.{BooleanType, IntegerType, StructField, StructType}
import org.apache.spark.sql.types._
class ColumnExpressionSuite extends QueryTest {
......@@ -322,4 +320,15 @@ class ColumnExpressionSuite extends QueryTest {
assert('key.desc == 'key.desc)
assert('key.desc != 'key.asc)
}
test("alias with metadata") {
val metadata = new MetadataBuilder()
.putString("originName", "value")
.build()
val schema = testData
.select($"*", col("value").as("abc", metadata))
.schema
assert(schema("value").metadata === Metadata.empty)
assert(schema("abc").metadata === metadata)
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment