diff --git a/core/src/main/scala/org/apache/spark/util/Benchmark.scala b/core/src/main/scala/org/apache/spark/util/Benchmark.scala index 1bf6f821e9b315da23377c9b34b391f5a9fdfcdf..39d182931076227fde8e88c6fb1c4ee89d6fd811 100644 --- a/core/src/main/scala/org/apache/spark/util/Benchmark.scala +++ b/core/src/main/scala/org/apache/spark/util/Benchmark.scala @@ -35,7 +35,8 @@ import org.apache.commons.lang3.SystemUtils * If outputPerIteration is true, the timing for each run will be printed to stdout. */ private[spark] class Benchmark( - name: String, valuesPerIteration: Long, + name: String, + valuesPerIteration: Long, iters: Int = 5, outputPerIteration: Boolean = false) { val benchmarks = mutable.ArrayBuffer.empty[Benchmark.Case] @@ -61,7 +62,6 @@ private[spark] class Benchmark( println val firstBest = results.head.bestMs - val firstAvg = results.head.avgMs // The results are going to be processor specific so it is useful to include that. println(Benchmark.getProcessorName()) printf("%-35s %16s %12s %13s %10s\n", name + ":", "Best/Avg Time(ms)", "Rate(M/s)", diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/HashBenchmark.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/HashBenchmark.scala index 184f845b4dce2f0e2254c236b88f53dd719c67e6..5a929f211aaa4b3e6d52f693550987396d17cb05 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/HashBenchmark.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/HashBenchmark.scala @@ -29,9 +29,7 @@ import org.apache.spark.util.Benchmark */ object HashBenchmark { - def test(name: String, schema: StructType, iters: Int): Unit = { - val numRows = 1024 * 8 - + def test(name: String, schema: StructType, numRows: Int, iters: Int): Unit = { val generator = RandomDataGenerator.forType(schema, nullable = false).get val encoder = RowEncoder(schema) val attrs = schema.toAttributes @@ -70,7 +68,14 @@ object HashBenchmark { def main(args: Array[String]): Unit = { val simple = new StructType().add("i", IntegerType) - test("simple", simple, 1024) + /* + Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz + Hash For simple: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative + ------------------------------------------------------------------------------------------- + interpreted version 941 / 955 142.6 7.0 1.0X + codegen version 1737 / 1775 77.3 12.9 0.5X + */ + test("simple", simple, 1 << 13, 1 << 14) val normal = new StructType() .add("null", NullType) @@ -87,18 +92,39 @@ object HashBenchmark { .add("binary", BinaryType) .add("date", DateType) .add("timestamp", TimestampType) - test("normal", normal, 128) + /* + Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz + Hash For normal: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative + ------------------------------------------------------------------------------------------- + interpreted version 2209 / 2271 0.9 1053.4 1.0X + codegen version 1887 / 2018 1.1 899.9 1.2X + */ + test("normal", normal, 1 << 10, 1 << 11) val arrayOfInt = ArrayType(IntegerType) val array = new StructType() .add("array", arrayOfInt) .add("arrayOfArray", ArrayType(arrayOfInt)) - test("array", array, 64) + /* + Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz + Hash For array: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative + ------------------------------------------------------------------------------------------- + interpreted version 1481 / 1529 0.1 11301.7 1.0X + codegen version 2591 / 2636 0.1 19771.1 0.6X + */ + test("array", array, 1 << 8, 1 << 9) val mapOfInt = MapType(IntegerType, IntegerType) val map = new StructType() .add("map", mapOfInt) .add("mapOfMap", MapType(IntegerType, mapOfInt)) - test("map", map, 64) + /* + Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz + Hash For map: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative + ------------------------------------------------------------------------------------------- + interpreted version 1820 / 1861 0.0 444347.2 1.0X + codegen version 205 / 223 0.0 49936.5 8.9X + */ + test("map", map, 1 << 6, 1 << 6) } }