Skip to content
Snippets Groups Projects
Commit 0c140c16 authored by uncleGen's avatar uncleGen Committed by Shixiong Zhu
Browse files

[SPARK-19859][SS][FOLLOW-UP] The new watermark should override the old one.


## What changes were proposed in this pull request?

A follow up to SPARK-19859:

- extract the calculation of `delayMs` and reuse it.
- update EventTimeWatermarkExec
- use the correct `delayMs` in EventTimeWatermark

## How was this patch tested?

Jenkins.

Author: uncleGen <hustyugm@gmail.com>

Closes #17221 from uncleGen/SPARK-19859.

(cherry picked from commit eeb1d6db)
Signed-off-by: default avatarShixiong Zhu <shixiong@databricks.com>
parent 00859e14
No related branches found
No related tags found
No related merge requests found
......@@ -24,6 +24,12 @@ import org.apache.spark.unsafe.types.CalendarInterval
object EventTimeWatermark {
/** The [[org.apache.spark.sql.types.Metadata]] key used to hold the eventTime watermark delay. */
val delayKey = "spark.watermarkDelayMs"
def getDelayMs(delay: CalendarInterval): Long = {
// We define month as `31 days` to simplify calculation.
val millisPerMonth = CalendarInterval.MICROS_PER_DAY / 1000 * 31
delay.milliseconds + delay.months * millisPerMonth
}
}
/**
......@@ -37,9 +43,10 @@ case class EventTimeWatermark(
// Update the metadata on the eventTime column to include the desired delay.
override val output: Seq[Attribute] = child.output.map { a =>
if (a semanticEquals eventTime) {
val delayMs = EventTimeWatermark.getDelayMs(delay)
val updatedMetadata = new MetadataBuilder()
.withMetadata(a.metadata)
.putLong(EventTimeWatermark.delayKey, delay.milliseconds)
.putLong(EventTimeWatermark.delayKey, delayMs)
.build()
a.withMetadata(updatedMetadata)
} else if (a.metadata.contains(EventTimeWatermark.delayKey)) {
......
......@@ -84,10 +84,7 @@ case class EventTimeWatermarkExec(
child: SparkPlan) extends SparkPlan {
val eventTimeStats = new EventTimeStatsAccum()
val delayMs = {
val millisPerMonth = CalendarInterval.MICROS_PER_DAY / 1000 * 31
delay.milliseconds + delay.months * millisPerMonth
}
val delayMs = EventTimeWatermark.getDelayMs(delay)
sparkContext.register(eventTimeStats)
......@@ -105,10 +102,16 @@ case class EventTimeWatermarkExec(
override val output: Seq[Attribute] = child.output.map { a =>
if (a semanticEquals eventTime) {
val updatedMetadata = new MetadataBuilder()
.withMetadata(a.metadata)
.putLong(EventTimeWatermark.delayKey, delayMs)
.build()
.withMetadata(a.metadata)
.putLong(EventTimeWatermark.delayKey, delayMs)
.build()
a.withMetadata(updatedMetadata)
} else if (a.metadata.contains(EventTimeWatermark.delayKey)) {
// Remove existing watermark
val updatedMetadata = new MetadataBuilder()
.withMetadata(a.metadata)
.remove(EventTimeWatermark.delayKey)
.build()
a.withMetadata(updatedMetadata)
} else {
a
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment