@@ -19,6 +19,8 @@ package org.apache.spark.sql.catalyst.analysis
19
19
20
20
import java .util .Locale
21
21
22
+ import scala .collection .mutable
23
+
22
24
import org .apache .spark .sql .AnalysisException
23
25
import org .apache .spark .sql .catalyst .expressions .IntegerLiteral
24
26
import org .apache .spark .sql .catalyst .plans .logical ._
@@ -28,45 +30,69 @@ import org.apache.spark.sql.internal.SQLConf
28
30
29
31
30
32
/**
31
- * Collection of rules related to hints. The only hint currently available is broadcast join hint.
33
+ * Collection of rules related to hints. The only hint currently available is join strategy hint.
32
34
*
33
35
* Note that this is separately into two rules because in the future we might introduce new hint
34
- * rules that have different ordering requirements from broadcast .
36
+ * rules that have different ordering requirements from join strategies .
35
37
*/
36
38
object ResolveHints {
37
39
38
40
/**
39
- * For broadcast hint, we accept "BROADCAST", "BROADCASTJOIN", and "MAPJOIN", and a sequence of
40
- * relation aliases can be specified in the hint. A broadcast hint plan node will be inserted
41
- * on top of any relation (that is not aliased differently), subquery, or common table expression
42
- * that match the specified name.
41
+ * The list of allowed join strategy hints is defined in [[JoinStrategyHint.strategies ]], and a
42
+ * sequence of relation aliases can be specified with a join strategy hint, e.g., "MERGE(a, c)",
43
+ * "BROADCAST(a)". A join strategy hint plan node will be inserted on top of any relation (that
44
+ * is not aliased differently), subquery, or common table expression that match the specified
45
+ * name.
43
46
*
44
47
* The hint resolution works by recursively traversing down the query plan to find a relation or
45
- * subquery that matches one of the specified broadcast aliases. The traversal does not go past
46
- * beyond any existing broadcast hints, subquery aliases .
48
+ * subquery that matches one of the specified relation aliases. The traversal does not go past
49
+ * beyond any view reference, with clause or subquery alias .
47
50
*
48
51
* This rule must happen before common table expressions.
49
52
*/
50
- class ResolveBroadcastHints (conf : SQLConf ) extends Rule [LogicalPlan ] {
51
- private val BROADCAST_HINT_NAMES = Set (" BROADCAST" , " BROADCASTJOIN" , " MAPJOIN" )
53
+ class ResolveJoinStrategyHints (conf : SQLConf ) extends Rule [LogicalPlan ] {
54
+ private val STRATEGY_HINT_NAMES = JoinStrategyHint .strategies.flatMap(_.hintAliases)
55
+
56
+ private val hintErrorHandler = conf.hintErrorHandler
52
57
53
58
def resolver : Resolver = conf.resolver
54
59
55
- private def applyBroadcastHint (plan : LogicalPlan , toBroadcast : Set [String ]): LogicalPlan = {
60
+ private def createHintInfo (hintName : String ): HintInfo = {
61
+ HintInfo (strategy =
62
+ JoinStrategyHint .strategies.find(
63
+ _.hintAliases.map(
64
+ _.toUpperCase(Locale .ROOT )).contains(hintName.toUpperCase(Locale .ROOT ))))
65
+ }
66
+
67
+ private def applyJoinStrategyHint (
68
+ plan : LogicalPlan ,
69
+ relations : mutable.HashSet [String ],
70
+ hintName : String ): LogicalPlan = {
56
71
// Whether to continue recursing down the tree
57
72
var recurse = true
58
73
59
74
val newNode = CurrentOrigin .withOrigin(plan.origin) {
60
75
plan match {
61
- case u : UnresolvedRelation if toBroadcast.exists(resolver(_, u.tableIdentifier.table)) =>
62
- ResolvedHint (plan, HintInfo (broadcast = true ))
63
- case r : SubqueryAlias if toBroadcast.exists(resolver(_, r.alias)) =>
64
- ResolvedHint (plan, HintInfo (broadcast = true ))
76
+ case ResolvedHint (u @ UnresolvedRelation (ident), hint)
77
+ if relations.exists(resolver(_, ident.table)) =>
78
+ relations.remove(ident.table)
79
+ ResolvedHint (u, createHintInfo(hintName).merge(hint, hintErrorHandler))
80
+
81
+ case ResolvedHint (r : SubqueryAlias , hint)
82
+ if relations.exists(resolver(_, r.alias)) =>
83
+ relations.remove(r.alias)
84
+ ResolvedHint (r, createHintInfo(hintName).merge(hint, hintErrorHandler))
85
+
86
+ case u : UnresolvedRelation if relations.exists(resolver(_, u.tableIdentifier.table)) =>
87
+ relations.remove(u.tableIdentifier.table)
88
+ ResolvedHint (plan, createHintInfo(hintName))
89
+ case r : SubqueryAlias if relations.exists(resolver(_, r.alias)) =>
90
+ relations.remove(r.alias)
91
+ ResolvedHint (plan, createHintInfo(hintName))
65
92
66
93
case _ : ResolvedHint | _ : View | _ : With | _ : SubqueryAlias =>
67
94
// Don't traverse down these nodes.
68
- // For an existing broadcast hint, there is no point going down (if we do, we either
69
- // won't change the structure, or will introduce another broadcast hint that is useless.
95
+ // For an existing strategy hint, there is no chance for a match from this point down.
70
96
// The rest (view, with, subquery) indicates different scopes that we shouldn't traverse
71
97
// down. Note that technically when this rule is executed, we haven't completed view
72
98
// resolution yet and as a result the view part should be deadcode. I'm leaving it here
@@ -80,25 +106,31 @@ object ResolveHints {
80
106
}
81
107
82
108
if ((plan fastEquals newNode) && recurse) {
83
- newNode.mapChildren(child => applyBroadcastHint (child, toBroadcast ))
109
+ newNode.mapChildren(child => applyJoinStrategyHint (child, relations, hintName ))
84
110
} else {
85
111
newNode
86
112
}
87
113
}
88
114
89
115
def apply (plan : LogicalPlan ): LogicalPlan = plan resolveOperatorsUp {
90
- case h : UnresolvedHint if BROADCAST_HINT_NAMES .contains(h.name.toUpperCase(Locale .ROOT )) =>
116
+ case h : UnresolvedHint if STRATEGY_HINT_NAMES .contains(h.name.toUpperCase(Locale .ROOT )) =>
91
117
if (h.parameters.isEmpty) {
92
- // If there is no table alias specified, turn the entire subtree into a BroadcastHint .
93
- ResolvedHint (h.child, HintInfo (broadcast = true ))
118
+ // If there is no table alias specified, apply the hint on the entire subtree .
119
+ ResolvedHint (h.child, createHintInfo(h.name ))
94
120
} else {
95
- // Otherwise, find within the subtree query plans that should be broadcasted .
96
- applyBroadcastHint(h.child, h.parameters.map {
121
+ // Otherwise, find within the subtree query plans to apply the hint .
122
+ val relationNames = h.parameters.map {
97
123
case tableName : String => tableName
98
124
case tableId : UnresolvedAttribute => tableId.name
99
- case unsupported => throw new AnalysisException (" Broadcast hint parameter should be " +
100
- s " an identifier or string but was $unsupported ( ${unsupported.getClass}" )
101
- }.toSet)
125
+ case unsupported => throw new AnalysisException (" Join strategy hint parameter " +
126
+ s " should be an identifier or string but was $unsupported ( ${unsupported.getClass}" )
127
+ }
128
+ val relationNameSet = new mutable.HashSet [String ]
129
+ relationNames.foreach(relationNameSet.add)
130
+
131
+ val applied = applyJoinStrategyHint(h.child, relationNameSet, h.name)
132
+ hintErrorHandler.hintRelationsNotFound(h.name, h.parameters, relationNameSet.toSet)
133
+ applied
102
134
}
103
135
}
104
136
}
@@ -133,9 +165,14 @@ object ResolveHints {
133
165
* Removes all the hints, used to remove invalid hints provided by the user.
134
166
* This must be executed after all the other hint rules are executed.
135
167
*/
136
- object RemoveAllHints extends Rule [LogicalPlan ] {
168
+ class RemoveAllHints (conf : SQLConf ) extends Rule [LogicalPlan ] {
169
+
170
+ private val hintErrorHandler = conf.hintErrorHandler
171
+
137
172
def apply (plan : LogicalPlan ): LogicalPlan = plan resolveOperatorsUp {
138
- case h : UnresolvedHint => h.child
173
+ case h : UnresolvedHint =>
174
+ hintErrorHandler.hintNotRecognized(h.name, h.parameters)
175
+ h.child
139
176
}
140
177
}
141
178
0 commit comments