diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala index 692cbd7c0d32c65a8e46d44aad32fedf563a8081..c2cd8951bc9c1842e759ca9e6a14f466e6453346 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala @@ -22,11 +22,11 @@ import java.nio.ByteBuffer import com.google.common.primitives.{Doubles, Ints, Longs} import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.catalyst.{InternalRow} +import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess} import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile.{PercentileDigest} +import org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile.PercentileDigest import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData} import org.apache.spark.sql.catalyst.util.QuantileSummaries import org.apache.spark.sql.catalyst.util.QuantileSummaries.{defaultCompressThreshold, Stats} @@ -71,7 +71,8 @@ case class ApproximatePercentile( percentageExpression: Expression, accuracyExpression: Expression, override val mutableAggBufferOffset: Int, - override val inputAggBufferOffset: Int) extends TypedImperativeAggregate[PercentileDigest] { + override val inputAggBufferOffset: Int) + extends TypedImperativeAggregate[PercentileDigest] with ImplicitCastInputTypes { def this(child: Expression, percentageExpression: Expression, accuracyExpression: Expression) = { this(child, percentageExpression, accuracyExpression, 0, 0) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala index d523420530c2caf9b871ed9c6739df27d9576200..c423e17169e8513746772fb66b475b1674469a5b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala @@ -25,7 +25,7 @@ import org.apache.spark.sql.types._ @ExpressionDescription( usage = "_FUNC_(expr) - Returns the mean calculated from values of a group.") -case class Average(child: Expression) extends DeclarativeAggregate { +case class Average(child: Expression) extends DeclarativeAggregate with ImplicitCastInputTypes { override def prettyName: String = "avg" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala index 1a93f4590331b621dc93bf8e957cea1823c4e8b0..572d29caf5bc999a6b32f180d5c602a729eec45c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala @@ -42,7 +42,8 @@ import org.apache.spark.sql.types._ * * @param child to compute central moments of. */ -abstract class CentralMomentAgg(child: Expression) extends DeclarativeAggregate { +abstract class CentralMomentAgg(child: Expression) + extends DeclarativeAggregate with ImplicitCastInputTypes { /** * The central moment order to be computed. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Corr.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Corr.scala index 657f519d2a05efe3058fe519181592ddbf12ad41..95a4a0d5af634b781871dc40825fcc3f35f753d6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Corr.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Corr.scala @@ -32,7 +32,8 @@ import org.apache.spark.sql.types._ @ExpressionDescription( usage = "_FUNC_(expr1, expr2) - Returns Pearson coefficient of correlation between a set of number pairs.") // scalastyle:on line.size.limit -case class Corr(x: Expression, y: Expression) extends DeclarativeAggregate { +case class Corr(x: Expression, y: Expression) + extends DeclarativeAggregate with ImplicitCastInputTypes { override def children: Seq[Expression] = Seq(x, y) override def nullable: Boolean = true diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala index bcae0dc0754c4fed33acf42dd88b53a0c4785a7a..1990f2f2f0722e93813c5d456c73a8b7cc08b375 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala @@ -38,9 +38,6 @@ case class Count(children: Seq[Expression]) extends DeclarativeAggregate { // Return data type. override def dataType: DataType = LongType - // Expected input data type. - override def inputTypes: Seq[AbstractDataType] = Seq.fill(children.size)(AnyDataType) - private lazy val count = AttributeReference("count", LongType, nullable = false)() override lazy val aggBufferAttributes = count :: Nil diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAgg.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAgg.scala index 1bfae9e5a4d38b51e6a81cad18d3aa9fd31f0d85..f5f185f2c54287180adf24e7a1aae3ef103cbc64 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAgg.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAgg.scala @@ -22,7 +22,7 @@ import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess} -import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription} +import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression, ExpressionDescription} import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String import org.apache.spark.util.sketch.CountMinSketch @@ -52,7 +52,8 @@ case class CountMinSketchAgg( confidenceExpression: Expression, seedExpression: Expression, override val mutableAggBufferOffset: Int, - override val inputAggBufferOffset: Int) extends TypedImperativeAggregate[CountMinSketch] { + override val inputAggBufferOffset: Int) + extends TypedImperativeAggregate[CountMinSketch] with ExpectsInputTypes { def this( child: Expression, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala index ae5ed779700b6d1a1eef0da114fb9dc77936a6ae..fc6c34baafdd10df913900dac049dc0bf9ea3991 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala @@ -25,7 +25,8 @@ import org.apache.spark.sql.types._ * Compute the covariance between two expressions. * When applied on empty data (i.e., count is zero), it returns NULL. */ -abstract class Covariance(x: Expression, y: Expression) extends DeclarativeAggregate { +abstract class Covariance(x: Expression, y: Expression) + extends DeclarativeAggregate with ImplicitCastInputTypes { override def children: Seq[Expression] = Seq(x, y) override def nullable: Boolean = true diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala index 29b894798000440d7380c369b66887076bd66ae6..bfc58c22886cc41d6c51c599f756ab57cea06671 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala @@ -17,7 +17,8 @@ package org.apache.spark.sql.catalyst.expressions.aggregate -import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.analysis.TypeCheckResult +import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.types._ @@ -33,16 +34,11 @@ import org.apache.spark.sql.types._ _FUNC_(expr[, isIgnoreNull]) - Returns the first value of `expr` for a group of rows. If `isIgnoreNull` is true, returns only non-null values. """) -case class First(child: Expression, ignoreNullsExpr: Expression) extends DeclarativeAggregate { +case class First(child: Expression, ignoreNullsExpr: Expression) + extends DeclarativeAggregate with ExpectsInputTypes { def this(child: Expression) = this(child, Literal.create(false, BooleanType)) - private val ignoreNulls: Boolean = ignoreNullsExpr match { - case Literal(b: Boolean, BooleanType) => b - case _ => - throw new AnalysisException("The second argument of First should be a boolean literal.") - } - override def children: Seq[Expression] = child :: ignoreNullsExpr :: Nil override def nullable: Boolean = true @@ -56,6 +52,20 @@ case class First(child: Expression, ignoreNullsExpr: Expression) extends Declara // Expected input data type. override def inputTypes: Seq[AbstractDataType] = Seq(AnyDataType, BooleanType) + override def checkInputDataTypes(): TypeCheckResult = { + val defaultCheck = super.checkInputDataTypes() + if (defaultCheck.isFailure) { + defaultCheck + } else if (!ignoreNullsExpr.foldable) { + TypeCheckFailure( + s"The second argument of First must be a boolean literal, but got: ${ignoreNullsExpr.sql}") + } else { + TypeCheckSuccess + } + } + + private def ignoreNulls: Boolean = ignoreNullsExpr.eval().asInstanceOf[Boolean] + private lazy val first = AttributeReference("first", child.dataType)() private lazy val valueSet = AttributeReference("valueSet", BooleanType)() diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala index 77b7eb228edc5354076ee4c8b1c003503692e875..d5c9166443d739c01634993930d814335ba2f215 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala @@ -140,8 +140,6 @@ case class HyperLogLogPlusPlus( override def dataType: DataType = LongType - override def inputTypes: Seq[AbstractDataType] = Seq(AnyDataType) - override def aggBufferSchema: StructType = StructType.fromAttributes(aggBufferAttributes) /** Allocate enough words to store all registers. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala index b0a363e7d6dce03482105afa8ea3f6350c88705e..96a6ec08a160a697976b9c5007bf8ea181c792cc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala @@ -17,7 +17,8 @@ package org.apache.spark.sql.catalyst.expressions.aggregate -import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.analysis.TypeCheckResult +import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.types._ @@ -33,16 +34,11 @@ import org.apache.spark.sql.types._ _FUNC_(expr[, isIgnoreNull]) - Returns the last value of `expr` for a group of rows. If `isIgnoreNull` is true, returns only non-null values. """) -case class Last(child: Expression, ignoreNullsExpr: Expression) extends DeclarativeAggregate { +case class Last(child: Expression, ignoreNullsExpr: Expression) + extends DeclarativeAggregate with ExpectsInputTypes { def this(child: Expression) = this(child, Literal.create(false, BooleanType)) - private val ignoreNulls: Boolean = ignoreNullsExpr match { - case Literal(b: Boolean, BooleanType) => b - case _ => - throw new AnalysisException("The second argument of First should be a boolean literal.") - } - override def children: Seq[Expression] = child :: ignoreNullsExpr :: Nil override def nullable: Boolean = true @@ -56,6 +52,20 @@ case class Last(child: Expression, ignoreNullsExpr: Expression) extends Declarat // Expected input data type. override def inputTypes: Seq[AbstractDataType] = Seq(AnyDataType, BooleanType) + override def checkInputDataTypes(): TypeCheckResult = { + val defaultCheck = super.checkInputDataTypes() + if (defaultCheck.isFailure) { + defaultCheck + } else if (!ignoreNullsExpr.foldable) { + TypeCheckFailure( + s"The second argument of Last must be a boolean literal, but got: ${ignoreNullsExpr.sql}") + } else { + TypeCheckSuccess + } + } + + private def ignoreNulls: Boolean = ignoreNullsExpr.eval().asInstanceOf[Boolean] + private lazy val last = AttributeReference("last", child.dataType)() private lazy val valueSet = AttributeReference("valueSet", BooleanType)() diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala index f32c9c677a864108f0e5071f200878f44e579a26..58fd1d8620e16a31dffbe69d818848c9fbb8668c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala @@ -33,9 +33,6 @@ case class Max(child: Expression) extends DeclarativeAggregate { // Return data type. override def dataType: DataType = child.dataType - // Expected input data type. - override def inputTypes: Seq[AbstractDataType] = Seq(AnyDataType) - override def checkInputDataTypes(): TypeCheckResult = TypeUtils.checkForOrderingExpr(child.dataType, "function max") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala index 9ef42b96975afcdba7caf65982d8237e587d93bf..b2724ee76827c384344de7c8c53411bd33bc101e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala @@ -33,9 +33,6 @@ case class Min(child: Expression) extends DeclarativeAggregate { // Return data type. override def dataType: DataType = child.dataType - // Expected input data type. - override def inputTypes: Seq[AbstractDataType] = Seq(AnyDataType) - override def checkInputDataTypes(): TypeCheckResult = TypeUtils.checkForOrderingExpr(child.dataType, "function min") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Percentile.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Percentile.scala index 356e088d1d665fe11a5a49ab44c166d30aaa35de..b51b55313e56ea7477ab004e8f7c9cbf35c7b300 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Percentile.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Percentile.scala @@ -54,10 +54,11 @@ import org.apache.spark.util.collection.OpenHashMap be between 0.0 and 1.0. """) case class Percentile( - child: Expression, - percentageExpression: Expression, - mutableAggBufferOffset: Int = 0, - inputAggBufferOffset: Int = 0) extends TypedImperativeAggregate[OpenHashMap[Number, Long]] { + child: Expression, + percentageExpression: Expression, + mutableAggBufferOffset: Int = 0, + inputAggBufferOffset: Int = 0) + extends TypedImperativeAggregate[OpenHashMap[Number, Long]] with ImplicitCastInputTypes { def this(child: Expression, percentageExpression: Expression) = { this(child, percentageExpression, 0, 0) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PivotFirst.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PivotFirst.scala index 087606077295f10604add844eb7567b39f4d5b33..9ad31243e4122a547cae4aa4d5aff1b768091711 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PivotFirst.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PivotFirst.scala @@ -77,8 +77,6 @@ case class PivotFirst( override val children: Seq[Expression] = pivotColumn :: valueColumn :: Nil - override lazy val inputTypes: Seq[AbstractDataType] = children.map(_.dataType) - override val nullable: Boolean = false val valueDataType = valueColumn.dataType diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala index f3731d40058e33bd2af337f78cc15547c57b320e..96e8ceec6d4c3caf5341a1707697e3748cef4798 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala @@ -24,7 +24,7 @@ import org.apache.spark.sql.types._ @ExpressionDescription( usage = "_FUNC_(expr) - Returns the sum calculated from values of a group.") -case class Sum(child: Expression) extends DeclarativeAggregate { +case class Sum(child: Expression) extends DeclarativeAggregate with ImplicitCastInputTypes { override def children: Seq[Expression] = child :: Nil diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala index d2880d58aefe1853a2820ae4d2b7cd6ba8189304..b176e2a128f43923e580e7b60de35f920fcf4fcf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala @@ -44,8 +44,6 @@ abstract class Collect extends ImperativeAggregate { override def dataType: DataType = ArrayType(child.dataType) - override def inputTypes: Seq[AbstractDataType] = Seq(AnyDataType) - override def supportsPartial: Boolean = false override def aggBufferAttributes: Seq[AttributeReference] = Nil diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala index f3fd58bc98ef65d241f51b6dd7a7df6710f455ab..7397b60360273dc2f843d947d0d70f8d2436a9ab 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala @@ -155,7 +155,7 @@ case class AggregateExpression( * Code which accepts [[AggregateFunction]] instances should be prepared to handle both types of * aggregate functions. */ -sealed abstract class AggregateFunction extends Expression with ImplicitCastInputTypes { +sealed abstract class AggregateFunction extends Expression { /** An aggregate function is not foldable. */ final override def foldable: Boolean = false diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala index 3cbbcdf4a96cc7d5f6d60aec903e8610c21e1c1f..c0d6a6b92be7e9a3c0dd81a8420ece54be550750 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala @@ -443,7 +443,6 @@ abstract class AggregateWindowFunction extends DeclarativeAggregate with WindowF abstract class RowNumberLike extends AggregateWindowFunction { override def children: Seq[Expression] = Nil - override def inputTypes: Seq[AbstractDataType] = Nil protected val zero = Literal(0) protected val one = Literal(1) protected val rowNumber = AttributeReference("rowNumber", IntegerType, nullable = false)() @@ -600,7 +599,6 @@ case class NTile(buckets: Expression) extends RowNumberLike with SizeBasedWindow * This documentation has been based upon similar documentation for the Hive and Presto projects. */ abstract class RankLike extends AggregateWindowFunction { - override def inputTypes: Seq[AbstractDataType] = children.map(_ => AnyDataType) /** Store the values of the window 'order' expressions. */ protected val orderAttrs = children.map { expr => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TypedAggregateExpression.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TypedAggregateExpression.scala index 6f7f2f842c4262f8a73481c664586ea60a507e4a..9911c0b33a30a35bfd443b88202288d52710c654 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TypedAggregateExpression.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TypedAggregateExpression.scala @@ -81,8 +81,6 @@ case class TypedAggregateExpression( override def references: AttributeSet = AttributeSet(inputDeserializer.toSeq) - override def inputTypes: Seq[AbstractDataType] = Nil - private def aggregatorLiteral = Literal.create(aggregator, ObjectType(classOf[Aggregator[Any, Any, Any]])) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala index 67760f334e406bb1a5186db1e1c3656affa2b0a2..ae5e2c6bece2a7bc1b32347fb5d7ad142506faa6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala @@ -324,7 +324,7 @@ case class ScalaUDAF( udaf: UserDefinedAggregateFunction, mutableAggBufferOffset: Int = 0, inputAggBufferOffset: Int = 0) - extends ImperativeAggregate with NonSQLExpression with Logging { + extends ImperativeAggregate with NonSQLExpression with Logging with ImplicitCastInputTypes { override def withNewMutableAggBufferOffset(newMutableAggBufferOffset: Int): ImperativeAggregate = copy(mutableAggBufferOffset = newMutableAggBufferOffset) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CountMinSketchAggQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CountMinSketchAggQuerySuite.scala index 4cc50604bc13a7677397e051b48a619d7d7eba07..3e715a393e530f63c686344bbd87201fb221dde5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CountMinSketchAggQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CountMinSketchAggQuerySuite.scala @@ -110,9 +110,11 @@ class CountMinSketchAggQuerySuite extends QueryTest with SharedSQLContext { withTempView(table) { val rdd: RDD[Row] = spark.sparkContext.parallelize(data) spark.createDataFrame(rdd, schema).createOrReplaceTempView(table) - val cmsSql = schema.fieldNames.map(col => s"count_min_sketch($col, $eps, $confidence, $seed)") - .mkString(", ") - val result = sql(s"SELECT $cmsSql FROM $table").head() + + val cmsSql = schema.fieldNames.map { col => + s"count_min_sketch($col, ${eps}D, ${confidence}D, $seed)" + } + val result = sql(s"SELECT ${cmsSql.mkString(", ")} FROM $table").head() schema.indices.foreach { i => val binaryData = result.getAs[Array[Byte]](i) val in = new ByteArrayInputStream(binaryData) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TypedImperativeAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TypedImperativeAggregateSuite.scala index 07599152e2ce6fbec5b349aaef5109042f3478d5..70c39518ab6e3f406e5d25903e35501af8d04712 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/TypedImperativeAggregateSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/TypedImperativeAggregateSuite.scala @@ -21,13 +21,13 @@ import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, Da import org.apache.spark.sql.TypedImperativeAggregateSuite.TypedMax import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.expressions.{BoundReference, Expression, GenericInternalRow, SpecificInternalRow} +import org.apache.spark.sql.catalyst.expressions.{BoundReference, Expression, GenericInternalRow, ImplicitCastInputTypes, SpecificInternalRow} import org.apache.spark.sql.catalyst.expressions.aggregate.TypedImperativeAggregate import org.apache.spark.sql.execution.aggregate.HashAggregateExec import org.apache.spark.sql.expressions.Window import org.apache.spark.sql.functions._ import org.apache.spark.sql.test.SharedSQLContext -import org.apache.spark.sql.types.{AbstractDataType, BinaryType, DataType, IntegerType, LongType} +import org.apache.spark.sql.types._ class TypedImperativeAggregateSuite extends QueryTest with SharedSQLContext { @@ -231,7 +231,8 @@ object TypedImperativeAggregateSuite { child: Expression, nullable: Boolean = false, mutableAggBufferOffset: Int = 0, - inputAggBufferOffset: Int = 0) extends TypedImperativeAggregate[MaxValue] { + inputAggBufferOffset: Int = 0) + extends TypedImperativeAggregate[MaxValue] with ImplicitCastInputTypes { override def createAggregationBuffer(): MaxValue = { diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala index 90e86959cd0e47fd864316e659c8d9454410eafb..349faae40b8bfda035bc4c6c65a6cbb519ad707c 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala @@ -378,10 +378,6 @@ private[hive] case class HiveUDAFFunction( @transient private lazy val aggBufferSerDe: AggregationBufferSerDe = new AggregationBufferSerDe - // We rely on Hive to check the input data types, so use `AnyDataType` here to bypass our - // catalyst type checking framework. - override def inputTypes: Seq[AbstractDataType] = children.map(_ => AnyDataType) - override def nullable: Boolean = true override def supportsPartial: Boolean = true diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/TestingTypedCount.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/TestingTypedCount.scala index a3d48d98f886e3c1552dddfe4ab8f9f2463f5f0b..d27287bad04ca7458f91169240b527cb0ecfbd8b 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/TestingTypedCount.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/TestingTypedCount.scala @@ -71,8 +71,6 @@ case class TestingTypedCount( TestingTypedCount.State(dataStream.readLong()) } - override def inputTypes: Seq[AbstractDataType] = AnyDataType :: Nil - override def withNewMutableAggBufferOffset(newMutableAggBufferOffset: Int): ImperativeAggregate = copy(mutableAggBufferOffset = newMutableAggBufferOffset)