Skip to content
Snippets Groups Projects
Commit 3b6ac323 authored by Burak Yavuz's avatar Burak Yavuz Committed by Josh Rosen
Browse files

[SPARK-18952][BACKPORT] Regex strings not properly escaped in codegen for aggregations

## What changes were proposed in this pull request?

Backport for #16361 to 2.1 branch.

## How was this patch tested?

Unit tests

Author: Burak Yavuz <brkyvz@gmail.com>

Closes #16518 from brkyvz/reg-break-2.1.
parent 80a3e13e
No related branches found
No related tags found
No related merge requests found
......@@ -43,28 +43,30 @@ class RowBasedHashMapGenerator(
extends HashMapGenerator (ctx, aggregateExpressions, generatedClassName,
groupingKeySchema, bufferSchema) {
protected def initializeAggregateHashMap(): String = {
override protected def initializeAggregateHashMap(): String = {
val generatedKeySchema: String =
s"new org.apache.spark.sql.types.StructType()" +
groupingKeySchema.map { key =>
val keyName = ctx.addReferenceObj(key.name)
key.dataType match {
case d: DecimalType =>
s""".add("${key.name}", org.apache.spark.sql.types.DataTypes.createDecimalType(
s""".add("$keyName", org.apache.spark.sql.types.DataTypes.createDecimalType(
|${d.precision}, ${d.scale}))""".stripMargin
case _ =>
s""".add("${key.name}", org.apache.spark.sql.types.DataTypes.${key.dataType})"""
s""".add("$keyName", org.apache.spark.sql.types.DataTypes.${key.dataType})"""
}
}.mkString("\n").concat(";")
val generatedValueSchema: String =
s"new org.apache.spark.sql.types.StructType()" +
bufferSchema.map { key =>
val keyName = ctx.addReferenceObj(key.name)
key.dataType match {
case d: DecimalType =>
s""".add("${key.name}", org.apache.spark.sql.types.DataTypes.createDecimalType(
s""".add("$keyName", org.apache.spark.sql.types.DataTypes.createDecimalType(
|${d.precision}, ${d.scale}))""".stripMargin
case _ =>
s""".add("${key.name}", org.apache.spark.sql.types.DataTypes.${key.dataType})"""
s""".add("$keyName", org.apache.spark.sql.types.DataTypes.${key.dataType})"""
}
}.mkString("\n").concat(";")
......
......@@ -48,28 +48,30 @@ class VectorizedHashMapGenerator(
extends HashMapGenerator (ctx, aggregateExpressions, generatedClassName,
groupingKeySchema, bufferSchema) {
protected def initializeAggregateHashMap(): String = {
override protected def initializeAggregateHashMap(): String = {
val generatedSchema: String =
s"new org.apache.spark.sql.types.StructType()" +
(groupingKeySchema ++ bufferSchema).map { key =>
val keyName = ctx.addReferenceObj(key.name)
key.dataType match {
case d: DecimalType =>
s""".add("${key.name}", org.apache.spark.sql.types.DataTypes.createDecimalType(
s""".add("$keyName", org.apache.spark.sql.types.DataTypes.createDecimalType(
|${d.precision}, ${d.scale}))""".stripMargin
case _ =>
s""".add("${key.name}", org.apache.spark.sql.types.DataTypes.${key.dataType})"""
s""".add("$keyName", org.apache.spark.sql.types.DataTypes.${key.dataType})"""
}
}.mkString("\n").concat(";")
val generatedAggBufferSchema: String =
s"new org.apache.spark.sql.types.StructType()" +
bufferSchema.map { key =>
val keyName = ctx.addReferenceObj(key.name)
key.dataType match {
case d: DecimalType =>
s""".add("${key.name}", org.apache.spark.sql.types.DataTypes.createDecimalType(
s""".add("$keyName", org.apache.spark.sql.types.DataTypes.createDecimalType(
|${d.precision}, ${d.scale}))""".stripMargin
case _ =>
s""".add("${key.name}", org.apache.spark.sql.types.DataTypes.${key.dataType})"""
s""".add("$keyName", org.apache.spark.sql.types.DataTypes.${key.dataType})"""
}
}.mkString("\n").concat(";")
......
......@@ -97,6 +97,15 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext {
)
}
test("SPARK-18952: regexes fail codegen when used as keys due to bad forward-slash escapes") {
val df = Seq(("some[thing]", "random-string")).toDF("key", "val")
checkAnswer(
df.groupBy(regexp_extract('key, "([a-z]+)\\[", 1)).count(),
Row("some", 1) :: Nil
)
}
test("rollup") {
checkAnswer(
courseSales.rollup("course", "year").sum("earnings"),
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment