Skip to content

Commit 729a8e2

Browse files
Update docs to be more explicit.
1 parent 573e644 commit 729a8e2

File tree

1 file changed

+4
-7
lines changed

1 file changed

+4
-7
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,10 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
5252
* the two join sides. When planning a [[execution.BroadcastHashJoin]], if one side has an
5353
* estimated physical size smaller than the user-settable threshold
5454
* `spark.sql.auto.convert.join.size`, the planner would mark it as the ''build'' relation and
55-
* mark the other relation as the ''stream'' side. If both estimates exceed the threshold,
56-
* they will instead be used to decide the build side in a [[execution.ShuffledHashJoin]].
55+
* mark the other relation as the ''stream'' side. The build table will be ''broadcasted'' to
56+
* all of the executors involved in the join, as a [[org.apache.spark.broadcast.Broadcast]]
57+
* object. If both estimates exceed the threshold, they will instead be used to decide the build
58+
* side in a [[execution.ShuffledHashJoin]].
5759
*/
5860
object HashJoin extends Strategy with PredicateHelper {
5961
private[this] def broadcastHashJoin(
@@ -144,11 +146,6 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
144146
}
145147
}
146148

147-
/**
148-
* This strategy applies a simple optimization based on the estimates of the physical sizes of
149-
* the two join sides: the planner would mark the relation with the smaller estimated physical
150-
* size as the ''build'' (broadcast) relation and mark the other as the ''stream'' relation.
151-
*/
152149
object BroadcastNestedLoopJoin extends Strategy {
153150
def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
154151
case logical.Join(left, right, joinType, condition) =>

0 commit comments

Comments
 (0)