@@ -52,8 +52,10 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
52
52
* the two join sides. When planning a [[execution.BroadcastHashJoin ]], if one side has an
53
53
* estimated physical size smaller than the user-settable threshold
54
54
* `spark.sql.auto.convert.join.size`, the planner would mark it as the ''build'' relation and
55
- * mark the other relation as the ''stream'' side. If both estimates exceed the threshold,
56
- * they will instead be used to decide the build side in a [[execution.ShuffledHashJoin ]].
55
+ * mark the other relation as the ''stream'' side. The build table will be ''broadcasted'' to
56
+ * all of the executors involved in the join, as a [[org.apache.spark.broadcast.Broadcast ]]
57
+ * object. If both estimates exceed the threshold, they will instead be used to decide the build
58
+ * side in a [[execution.ShuffledHashJoin ]].
57
59
*/
58
60
object HashJoin extends Strategy with PredicateHelper {
59
61
private [this ] def broadcastHashJoin (
@@ -144,11 +146,6 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
144
146
}
145
147
}
146
148
147
- /**
148
- * This strategy applies a simple optimization based on the estimates of the physical sizes of
149
- * the two join sides: the planner would mark the relation with the smaller estimated physical
150
- * size as the ''build'' (broadcast) relation and mark the other as the ''stream'' relation.
151
- */
152
149
object BroadcastNestedLoopJoin extends Strategy {
153
150
def apply (plan : LogicalPlan ): Seq [SparkPlan ] = plan match {
154
151
case logical.Join (left, right, joinType, condition) =>
0 commit comments