Skip to content

Commit 7a2ea58

Browse files
maropudongjoon-hyun
authored andcommitted
[SPARK-29084][SQL][TESTS] Check method bytecode size in BenchmarkQueryTest
### What changes were proposed in this pull request? This pr proposes to check method bytecode size in `BenchmarkQueryTest`. This metric is critical for performance numbers. ### Why are the changes needed? For performance checks ### Does this PR introduce any user-facing change? No ### How was this patch tested? N/A Closes apache#25788 from maropu/CheckMethodSize. Authored-by: Takeshi Yamamuro <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent 51d3509 commit 7a2ea58

File tree

3 files changed

+19
-7
lines changed

3 files changed

+19
-7
lines changed

sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
package org.apache.spark.sql
1919

2020
import org.apache.spark.internal.config.Tests.IS_TESTING
21-
import org.apache.spark.sql.catalyst.expressions.codegen.{CodeFormatter, CodeGenerator}
21+
import org.apache.spark.sql.catalyst.expressions.codegen.{ByteCodeStats, CodeFormatter, CodeGenerator}
2222
import org.apache.spark.sql.catalyst.rules.RuleExecutor
2323
import org.apache.spark.sql.execution.{SparkPlan, WholeStageCodegenExec}
2424
import org.apache.spark.sql.test.SharedSparkSession
@@ -48,7 +48,7 @@ abstract class BenchmarkQueryTest extends QueryTest with SharedSparkSession {
4848
RuleExecutor.resetMetrics()
4949
}
5050

51-
protected def checkGeneratedCode(plan: SparkPlan): Unit = {
51+
protected def checkGeneratedCode(plan: SparkPlan, checkMethodCodeSize: Boolean = true): Unit = {
5252
val codegenSubtrees = new collection.mutable.HashSet[WholeStageCodegenExec]()
5353
plan foreach {
5454
case s: WholeStageCodegenExec =>
@@ -57,7 +57,7 @@ abstract class BenchmarkQueryTest extends QueryTest with SharedSparkSession {
5757
}
5858
codegenSubtrees.toSeq.foreach { subtree =>
5959
val code = subtree.doCodeGen()._2
60-
try {
60+
val (_, ByteCodeStats(maxMethodCodeSize, _, _)) = try {
6161
// Just check the generated code can be properly compiled
6262
CodeGenerator.compile(code)
6363
} catch {
@@ -72,6 +72,11 @@ abstract class BenchmarkQueryTest extends QueryTest with SharedSparkSession {
7272
""".stripMargin
7373
throw new Exception(msg, e)
7474
}
75+
76+
assert(!checkMethodCodeSize ||
77+
maxMethodCodeSize <= CodeGenerator.DEFAULT_JVM_HUGE_METHOD_LIMIT,
78+
s"too long generated codes found in the WholeStageCodegenExec subtree (id=${subtree.id}) " +
79+
s"and JIT optimization might not work:\n${subtree.treeString}")
7580
}
7681
}
7782
}

sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,13 +82,19 @@ class TPCDSQuerySuite extends BenchmarkQueryTest with TPCDSSchema {
8282
"q3", "q7", "q10", "q19", "q27", "q34", "q42", "q43", "q46", "q52", "q53", "q55", "q59",
8383
"q63", "q65", "q68", "q73", "q79", "q89", "q98", "ss_max")
8484

85+
// List up the known queries having too large code in a generated function.
86+
// A JIRA file for `modified-q3` is as follows;
87+
// [SPARK-29128] Split predicate code in OR expressions
88+
val blackListForMethodCodeSizeCheck = Set("modified-q3")
89+
8590
modifiedTPCDSQueries.foreach { name =>
8691
val queryString = resourceToString(s"tpcds-modifiedQueries/$name.sql",
8792
classLoader = Thread.currentThread().getContextClassLoader)
88-
test(s"modified-$name") {
93+
val testName = s"modified-$name"
94+
test(testName) {
8995
// check the plans can be properly generated
9096
val plan = sql(queryString).queryExecution.executedPlan
91-
checkGeneratedCode(plan)
97+
checkGeneratedCode(plan, !blackListForMethodCodeSizeCheck.contains(testName))
9298
}
9399
}
94100
}

sql/core/src/test/scala/org/apache/spark/sql/execution/LogicalPlanTagInSparkPlanSuite.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,9 @@ import org.apache.spark.sql.execution.window.WindowExec
3232

3333
class LogicalPlanTagInSparkPlanSuite extends TPCDSQuerySuite {
3434

35-
override protected def checkGeneratedCode(plan: SparkPlan): Unit = {
36-
super.checkGeneratedCode(plan)
35+
override protected def checkGeneratedCode(
36+
plan: SparkPlan, checkMethodCodeSize: Boolean = true): Unit = {
37+
super.checkGeneratedCode(plan, checkMethodCodeSize)
3738
checkLogicalPlanTag(plan)
3839
}
3940

0 commit comments

Comments
 (0)