Skip to content
Snippets Groups Projects
Commit eeb1d6db authored by uncleGen's avatar uncleGen Committed by Shixiong Zhu
Browse files

[SPARK-19859][SS][FOLLOW-UP] The new watermark should override the old one.

## What changes were proposed in this pull request?

A follow up to SPARK-19859:

- extract the calculation of `delayMs` and reuse it.
- update EventTimeWatermarkExec
- use the correct `delayMs` in EventTimeWatermark

## How was this patch tested?

Jenkins.

Author: uncleGen <hustyugm@gmail.com>

Closes #17221 from uncleGen/SPARK-19859.
parent 029e40b4
No related branches found
No related tags found
No related merge requests found
...@@ -24,6 +24,12 @@ import org.apache.spark.unsafe.types.CalendarInterval ...@@ -24,6 +24,12 @@ import org.apache.spark.unsafe.types.CalendarInterval
object EventTimeWatermark { object EventTimeWatermark {
/** The [[org.apache.spark.sql.types.Metadata]] key used to hold the eventTime watermark delay. */ /** The [[org.apache.spark.sql.types.Metadata]] key used to hold the eventTime watermark delay. */
val delayKey = "spark.watermarkDelayMs" val delayKey = "spark.watermarkDelayMs"
def getDelayMs(delay: CalendarInterval): Long = {
// We define month as `31 days` to simplify calculation.
val millisPerMonth = CalendarInterval.MICROS_PER_DAY / 1000 * 31
delay.milliseconds + delay.months * millisPerMonth
}
} }
/** /**
...@@ -37,9 +43,10 @@ case class EventTimeWatermark( ...@@ -37,9 +43,10 @@ case class EventTimeWatermark(
// Update the metadata on the eventTime column to include the desired delay. // Update the metadata on the eventTime column to include the desired delay.
override val output: Seq[Attribute] = child.output.map { a => override val output: Seq[Attribute] = child.output.map { a =>
if (a semanticEquals eventTime) { if (a semanticEquals eventTime) {
val delayMs = EventTimeWatermark.getDelayMs(delay)
val updatedMetadata = new MetadataBuilder() val updatedMetadata = new MetadataBuilder()
.withMetadata(a.metadata) .withMetadata(a.metadata)
.putLong(EventTimeWatermark.delayKey, delay.milliseconds) .putLong(EventTimeWatermark.delayKey, delayMs)
.build() .build()
a.withMetadata(updatedMetadata) a.withMetadata(updatedMetadata)
} else if (a.metadata.contains(EventTimeWatermark.delayKey)) { } else if (a.metadata.contains(EventTimeWatermark.delayKey)) {
......
...@@ -84,10 +84,7 @@ case class EventTimeWatermarkExec( ...@@ -84,10 +84,7 @@ case class EventTimeWatermarkExec(
child: SparkPlan) extends SparkPlan { child: SparkPlan) extends SparkPlan {
val eventTimeStats = new EventTimeStatsAccum() val eventTimeStats = new EventTimeStatsAccum()
val delayMs = { val delayMs = EventTimeWatermark.getDelayMs(delay)
val millisPerMonth = CalendarInterval.MICROS_PER_DAY / 1000 * 31
delay.milliseconds + delay.months * millisPerMonth
}
sparkContext.register(eventTimeStats) sparkContext.register(eventTimeStats)
...@@ -105,10 +102,16 @@ case class EventTimeWatermarkExec( ...@@ -105,10 +102,16 @@ case class EventTimeWatermarkExec(
override val output: Seq[Attribute] = child.output.map { a => override val output: Seq[Attribute] = child.output.map { a =>
if (a semanticEquals eventTime) { if (a semanticEquals eventTime) {
val updatedMetadata = new MetadataBuilder() val updatedMetadata = new MetadataBuilder()
.withMetadata(a.metadata) .withMetadata(a.metadata)
.putLong(EventTimeWatermark.delayKey, delayMs) .putLong(EventTimeWatermark.delayKey, delayMs)
.build() .build()
a.withMetadata(updatedMetadata)
} else if (a.metadata.contains(EventTimeWatermark.delayKey)) {
// Remove existing watermark
val updatedMetadata = new MetadataBuilder()
.withMetadata(a.metadata)
.remove(EventTimeWatermark.delayKey)
.build()
a.withMetadata(updatedMetadata) a.withMetadata(updatedMetadata)
} else { } else {
a a
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment