Skip to content

Commit 4f10e54

Browse files
wangyumcloud-fan
authored andcommitted
[SPARK-29655][SQL] Read bucketed tables obeys spark.sql.shuffle.partitions
### What changes were proposed in this pull request? In order to avoid frequently changing the value of `spark.sql.adaptive.shuffle.maxNumPostShufflePartitions`, we usually set `spark.sql.adaptive.shuffle.maxNumPostShufflePartitions` much larger than `spark.sql.shuffle.partitions` after enabling adaptive execution, which causes some bucket map join lose efficacy and add more `ShuffleExchange`. How to reproduce: ```scala val bucketedTableName = "bucketed_table" spark.range(10000).write.bucketBy(500, "id").sortBy("id").mode(org.apache.spark.sql.SaveMode.Overwrite).saveAsTable(bucketedTableName) val bucketedTable = spark.table(bucketedTableName) val df = spark.range(8) spark.conf.set("spark.sql.autoBroadcastJoinThreshold", -1) // Spark 2.4. spark.sql.adaptive.enabled=false // We set spark.sql.shuffle.partitions <= 500 every time based on our data in this case. spark.conf.set("spark.sql.shuffle.partitions", 500) bucketedTable.join(df, "id").explain() // Since 3.0. We enabled adaptive execution and set spark.sql.adaptive.shuffle.maxNumPostShufflePartitions to a larger values to fit more cases. spark.conf.set("spark.sql.adaptive.enabled", true) spark.conf.set("spark.sql.adaptive.shuffle.maxNumPostShufflePartitions", 1000) bucketedTable.join(df, "id").explain() ``` ``` scala> bucketedTable.join(df, "id").explain() == Physical Plan == *(4) Project [id#5L] +- *(4) SortMergeJoin [id#5L], [id#7L], Inner :- *(1) Sort [id#5L ASC NULLS FIRST], false, 0 : +- *(1) Project [id#5L] : +- *(1) Filter isnotnull(id#5L) : +- *(1) ColumnarToRow : +- FileScan parquet default.bucketed_table[id#5L] Batched: true, DataFilters: [isnotnull(id#5L)], Format: Parquet, Location: InMemoryFileIndex[file:/root/opensource/apache-spark/spark-3.0.0-SNAPSHOT-bin-3.2.0/spark-warehou..., PartitionFilters: [], PushedFilters: [IsNotNull(id)], ReadSchema: struct<id:bigint>, SelectedBucketsCount: 500 out of 500 +- *(3) Sort [id#7L ASC NULLS FIRST], false, 0 +- Exchange hashpartitioning(id#7L, 500), true, [id=#49] +- *(2) Range (0, 8, step=1, splits=16) ``` vs ``` scala> bucketedTable.join(df, "id").explain() == Physical Plan == AdaptiveSparkPlan(isFinalPlan=false) +- Project [id#5L] +- SortMergeJoin [id#5L], [id#7L], Inner :- Sort [id#5L ASC NULLS FIRST], false, 0 : +- Exchange hashpartitioning(id#5L, 1000), true, [id=#93] : +- Project [id#5L] : +- Filter isnotnull(id#5L) : +- FileScan parquet default.bucketed_table[id#5L] Batched: true, DataFilters: [isnotnull(id#5L)], Format: Parquet, Location: InMemoryFileIndex[file:/root/opensource/apache-spark/spark-3.0.0-SNAPSHOT-bin-3.2.0/spark-warehou..., PartitionFilters: [], PushedFilters: [IsNotNull(id)], ReadSchema: struct<id:bigint>, SelectedBucketsCount: 500 out of 500 +- Sort [id#7L ASC NULLS FIRST], false, 0 +- Exchange hashpartitioning(id#7L, 1000), true, [id=#92] +- Range (0, 8, step=1, splits=16) ``` This PR makes read bucketed tables always obeys `spark.sql.shuffle.partitions` even enabling adaptive execution and set `spark.sql.adaptive.shuffle.maxNumPostShufflePartitions` to avoid add more `ShuffleExchange`. ### Why are the changes needed? Do not degrade performance after enabling adaptive execution. ### Does this PR introduce any user-facing change? No. ### How was this patch tested? Unit test. Closes #26409 from wangyum/SPARK-29655. Authored-by: Yuming Wang <[email protected]> Signed-off-by: Wenchen Fan <[email protected]>
1 parent 0c68578 commit 4f10e54

File tree

3 files changed

+49
-4
lines changed

3 files changed

+49
-4
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,24 @@ case class EnsureRequirements(conf: SQLConf) extends Rule[SparkPlan] {
8383
numPartitionsSet.headOption
8484
}
8585

86-
val targetNumPartitions = requiredNumPartitions.getOrElse(childrenNumPartitions.max)
86+
// If there are non-shuffle children that satisfy the required distribution, we have
87+
// some tradeoffs when picking the expected number of shuffle partitions:
88+
// 1. We should avoid shuffling these children.
89+
// 2. We should have a reasonable parallelism.
90+
val nonShuffleChildrenNumPartitions =
91+
childrenIndexes.map(children).filterNot(_.isInstanceOf[ShuffleExchangeExec])
92+
.map(_.outputPartitioning.numPartitions)
93+
val expectedChildrenNumPartitions = if (nonShuffleChildrenNumPartitions.nonEmpty) {
94+
// Here we pick the max number of partitions among these non-shuffle children as the
95+
// expected number of shuffle partitions. However, if it's smaller than
96+
// `conf.numShufflePartitions`, we pick `conf.numShufflePartitions` as the
97+
// expected number of shuffle partitions.
98+
math.max(nonShuffleChildrenNumPartitions.max, conf.numShufflePartitions)
99+
} else {
100+
childrenNumPartitions.max
101+
}
102+
103+
val targetNumPartitions = requiredNumPartitions.getOrElse(expectedChildrenNumPartitions)
87104

88105
children = children.zip(requiredChildDistributions).zipWithIndex.map {
89106
case ((child, distribution), index) if childrenIndexes.contains(index) =>

sql/core/src/test/scala/org/apache/spark/sql/execution/ReduceNumShufflePartitionsSuite.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,7 @@ class ReduceNumShufflePartitionsSuite extends SparkFunSuite with BeforeAndAfterA
274274
.setMaster("local[*]")
275275
.setAppName("test")
276276
.set(UI_ENABLED, false)
277+
.set(SQLConf.SHUFFLE_PARTITIONS.key, "5")
277278
.set(SQLConf.SHUFFLE_MAX_NUM_POSTSHUFFLE_PARTITIONS.key, "5")
278279
.set(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key, "true")
279280
.set(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key, "-1")
@@ -507,7 +508,7 @@ class ReduceNumShufflePartitionsSuite extends SparkFunSuite with BeforeAndAfterA
507508
join,
508509
expectedAnswer.collect())
509510

510-
// Then, let's make sure we do not reduce number of ppst shuffle partitions.
511+
// Then, let's make sure we do not reduce number of post shuffle partitions.
511512
val finalPlan = join.queryExecution.executedPlan
512513
.asInstanceOf[AdaptiveSparkPlanExec].executedPlan
513514
val shuffleReaders = finalPlan.collect {

sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import org.apache.spark.sql.catalyst.expressions
2828
import org.apache.spark.sql.catalyst.expressions._
2929
import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
3030
import org.apache.spark.sql.execution.{DataSourceScanExec, SortExec}
31+
import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec
3132
import org.apache.spark.sql.execution.datasources.BucketingUtils
3233
import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
3334
import org.apache.spark.sql.execution.joins.SortMergeJoinExec
@@ -382,8 +383,16 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils {
382383
joined.sort("bucketed_table1.k", "bucketed_table2.k"),
383384
df1.join(df2, joinCondition(df1, df2), joinType).sort("df1.k", "df2.k"))
384385

385-
assert(joined.queryExecution.executedPlan.isInstanceOf[SortMergeJoinExec])
386-
val joinOperator = joined.queryExecution.executedPlan.asInstanceOf[SortMergeJoinExec]
386+
val joinOperator = if (joined.sqlContext.conf.adaptiveExecutionEnabled) {
387+
val executedPlan =
388+
joined.queryExecution.executedPlan.asInstanceOf[AdaptiveSparkPlanExec].executedPlan
389+
assert(executedPlan.isInstanceOf[SortMergeJoinExec])
390+
executedPlan.asInstanceOf[SortMergeJoinExec]
391+
} else {
392+
val executedPlan = joined.queryExecution.executedPlan
393+
assert(executedPlan.isInstanceOf[SortMergeJoinExec])
394+
executedPlan.asInstanceOf[SortMergeJoinExec]
395+
}
387396

388397
// check existence of shuffle
389398
assert(
@@ -795,4 +804,22 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils {
795804
}
796805
}
797806

807+
test("SPARK-29655 Read bucketed tables obeys spark.sql.shuffle.partitions") {
808+
withSQLConf(
809+
SQLConf.SHUFFLE_PARTITIONS.key -> "5",
810+
SQLConf.SHUFFLE_MAX_NUM_POSTSHUFFLE_PARTITIONS.key -> "7") {
811+
val bucketSpec = Some(BucketSpec(6, Seq("i", "j"), Nil))
812+
Seq(false, true).foreach { enableAdaptive =>
813+
withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> s"$enableAdaptive") {
814+
val bucketedTableTestSpecLeft = BucketedTableTestSpec(bucketSpec, expectedShuffle = false)
815+
val bucketedTableTestSpecRight = BucketedTableTestSpec(None, expectedShuffle = true)
816+
testBucketing(
817+
bucketedTableTestSpecLeft = bucketedTableTestSpecLeft,
818+
bucketedTableTestSpecRight = bucketedTableTestSpecRight,
819+
joinCondition = joinCondition(Seq("i", "j"))
820+
)
821+
}
822+
}
823+
}
824+
}
798825
}

0 commit comments

Comments
 (0)