Skip to content
Snippets Groups Projects
Commit 02d9c352 authored by Davies Liu's avatar Davies Liu Committed by Davies Liu
Browse files

[SPARK-14092] [SQL] move shouldStop() to end of while loop

## What changes were proposed in this pull request?

This PR rollback some changes in #11274 , which introduced some performance regression when do a simple aggregation on parquet scan with one integer column.

Does not really understand how this change introduce this huge impact, maybe related show JIT compiler inline functions. (saw very different stats from profiling).

## How was this patch tested?

Manually run the parquet reader benchmark, before this change:
```
Intel(R) Core(TM) i7-4558U CPU  2.80GHz
Int and String Scan:                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------
SQL Parquet Vectorized                   2391 / 3107         43.9          22.8       1.0X
```
After this change
```
Java HotSpot(TM) 64-Bit Server VM 1.7.0_60-b19 on Mac OS X 10.9.5
Intel(R) Core(TM) i7-4558U CPU  2.80GHz
Int and String Scan:                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------
SQL Parquet Vectorized                   2032 / 2626         51.6          19.4       1.0X```

Author: Davies Liu <davies@databricks.com>

Closes #11912 from davies/fix_regression.
parent 30bdb5cb
No related branches found
No related tags found
No related merge requests found
......@@ -255,11 +255,12 @@ private[sql] case class DataSourceScan(
| $numOutputRows.add(numRows);
| }
|
| while (!shouldStop() && $idx < numRows) {
| // this loop is very perf sensitive and changes to it should be measured carefully
| while ($idx < numRows) {
| int $rowidx = $idx++;
| ${consume(ctx, columns1).trim}
| if (shouldStop()) return;
| }
| if (shouldStop()) return;
|
| if (!$input.hasNext()) {
| $batch = null;
......@@ -280,7 +281,7 @@ private[sql] case class DataSourceScan(
s"""
| private void $scanRows(InternalRow $row) throws java.io.IOException {
| boolean firstRow = true;
| while (!shouldStop() && (firstRow || $input.hasNext())) {
| while (firstRow || $input.hasNext()) {
| if (firstRow) {
| firstRow = false;
| } else {
......@@ -288,6 +289,7 @@ private[sql] case class DataSourceScan(
| }
| $numOutputRows.add(1);
| ${consume(ctx, columns2, inputRow).trim}
| if (shouldStop()) return;
| }
| }""".stripMargin)
......
......@@ -103,11 +103,12 @@ trait CodegenSupport extends SparkPlan {
* # call child.produce()
* initialized = true;
* }
* while (!shouldStop() && hashmap.hasNext()) {
* while (hashmap.hasNext()) {
* row = hashmap.next();
* # build the aggregation results
* # create variables for results
* # call consume(), which will call parent.doConsume()
* if (shouldStop()) return;
* }
*/
protected def doProduce(ctx: CodegenContext): String
......@@ -251,9 +252,10 @@ case class InputAdapter(child: SparkPlan) extends UnaryNode with CodegenSupport
ctx.currentVars = null
val columns = exprs.map(_.gen(ctx))
s"""
| while (!shouldStop() && $input.hasNext()) {
| while ($input.hasNext()) {
| InternalRow $row = (InternalRow) $input.next();
| ${consume(ctx, columns, row).trim}
| if (shouldStop()) return;
| }
""".stripMargin
}
......@@ -320,7 +322,7 @@ case class WholeStageCodegen(child: SparkPlan) extends UnaryNode with CodegenSup
/** Codegened pipeline for:
* ${toCommentSafeString(child.treeString.trim)}
*/
class GeneratedIterator extends org.apache.spark.sql.execution.BufferedRowIterator {
final class GeneratedIterator extends org.apache.spark.sql.execution.BufferedRowIterator {
private Object[] references;
${ctx.declareMutableStates()}
......
......@@ -282,13 +282,14 @@ case class Range(
| }
| }
|
| while (!$overflow && $checkEnd && !shouldStop()) {
| while (!$overflow && $checkEnd) {
| long $value = $number;
| $number += ${step}L;
| if ($number < $value ^ ${step}L < 0) {
| $overflow = true;
| }
| ${consume(ctx, Seq(ev))}
| if (shouldStop()) return;
| }
""".stripMargin
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment