@@ -109,7 +109,7 @@ class CodegenContext {
109
109
val idx = references.length
110
110
references += obj
111
111
val clsName = Option (className).getOrElse(obj.getClass.getName)
112
- addMutableState(clsName, term, s " this. $term = ( $clsName) references[ $idx]; " )
112
+ addMutableState(clsName, term, s " $term = ( $clsName) references[ $idx]; " )
113
113
term
114
114
}
115
115
@@ -198,41 +198,139 @@ class CodegenContext {
198
198
partitionInitializationStatements.mkString(" \n " )
199
199
}
200
200
201
+ /**
202
+ * Holds expressions that are equivalent. Used to perform subexpression elimination
203
+ * during codegen.
204
+ *
205
+ * For expressions that appear more than once, generate additional code to prevent
206
+ * recomputing the value.
207
+ *
208
+ * For example, consider two expression generated from this SQL statement:
209
+ * SELECT (col1 + col2), (col1 + col2) / col3.
210
+ *
211
+ * equivalentExpressions will match the tree containing `col1 + col2` and it will only
212
+ * be evaluated once.
213
+ */
214
+ val equivalentExpressions : EquivalentExpressions = new EquivalentExpressions
215
+
216
+ // Foreach expression that is participating in subexpression elimination, the state to use.
217
+ val subExprEliminationExprs = mutable.HashMap .empty[Expression , SubExprEliminationState ]
218
+
219
+ // The collection of sub-expression result resetting methods that need to be called on each row.
220
+ val subexprFunctions = mutable.ArrayBuffer .empty[String ]
221
+
222
+ private val outerClassName = " OuterClass"
223
+
201
224
/**
202
- * Holding all the functions those will be added into generated class.
225
+ * Holds the class and instance names to be generated, where `OuterClass` is a placeholder
226
+ * standing for whichever class is generated as the outermost class and which will contain any
227
+ * nested sub-classes. All other classes and instance names in this list will represent private,
228
+ * nested sub-classes.
203
229
*/
204
- val addedFunctions : mutable.Map [String , String ] =
205
- mutable.Map .empty[String , String ]
230
+ private val classes : mutable.ListBuffer [(String , String )] =
231
+ mutable.ListBuffer [(String , String )](outerClassName -> null )
232
+
233
+ // A map holding the current size in bytes of each class to be generated.
234
+ private val classSize : mutable.Map [String , Int ] =
235
+ mutable.Map [String , Int ](outerClassName -> 0 )
236
+
237
+ // Nested maps holding function names and their code belonging to each class.
238
+ private val classFunctions : mutable.Map [String , mutable.Map [String , String ]] =
239
+ mutable.Map (outerClassName -> mutable.Map .empty[String , String ])
206
240
207
- def addNewFunction (funcName : String , funcCode : String ): Unit = {
208
- addedFunctions += ((funcName, funcCode))
241
+ // Returns the size of the most recently added class.
242
+ private def currClassSize (): Int = classSize(classes.head._1)
243
+
244
+ // Returns the class name and instance name for the most recently added class.
245
+ private def currClass (): (String , String ) = classes.head
246
+
247
+ // Adds a new class. Requires the class' name, and its instance name.
248
+ private def addClass (className : String , classInstance : String ): Unit = {
249
+ classes.prepend(className -> classInstance)
250
+ classSize += className -> 0
251
+ classFunctions += className -> mutable.Map .empty[String , String ]
209
252
}
210
253
211
254
/**
212
- * Holds expressions that are equivalent. Used to perform subexpression elimination
213
- * during codegen.
214
- *
215
- * For expressions that appear more than once, generate additional code to prevent
216
- * recomputing the value.
255
+ * Adds a function to the generated class. If the code for the `OuterClass` grows too large, the
256
+ * function will be inlined into a new private, nested class, and a instance-qualified name for
257
+ * the function will be returned. Otherwise, the function will be inlined to the `OuterClass` the
258
+ * simple `funcName` will be returned.
217
259
*
218
- * For example, consider two expression generated from this SQL statement:
219
- * SELECT (col1 + col2), (col1 + col2) / col3.
220
- *
221
- * equivalentExpressions will match the tree containing `col1 + col2` and it will only
222
- * be evaluated once.
260
+ * @param funcName the class-unqualified name of the function
261
+ * @param funcCode the body of the function
262
+ * @param inlineToOuterClass whether the given code must be inlined to the `OuterClass`. This
263
+ * can be necessary when a function is declared outside of the context
264
+ * it is eventually referenced and a returned qualified function name
265
+ * cannot otherwise be accessed.
266
+ * @return the name of the function, qualified by class if it will be inlined to a private,
267
+ * nested sub-class
223
268
*/
224
- val equivalentExpressions : EquivalentExpressions = new EquivalentExpressions
269
+ def addNewFunction (
270
+ funcName : String ,
271
+ funcCode : String ,
272
+ inlineToOuterClass : Boolean = false ): String = {
273
+ // The number of named constants that can exist in the class is limited by the Constant Pool
274
+ // limit, 65,536. We cannot know how many constants will be inserted for a class, so we use a
275
+ // threshold of 1600k bytes to determine when a function should be inlined to a private, nested
276
+ // sub-class.
277
+ val (className, classInstance) = if (inlineToOuterClass) {
278
+ outerClassName -> " "
279
+ } else if (currClassSize > 1600000 ) {
280
+ val className = freshName(" NestedClass" )
281
+ val classInstance = freshName(" nestedClassInstance" )
282
+
283
+ addClass(className, classInstance)
284
+
285
+ className -> classInstance
286
+ } else {
287
+ currClass()
288
+ }
225
289
226
- // Foreach expression that is participating in subexpression elimination, the state to use.
227
- val subExprEliminationExprs = mutable. HashMap .empty[ Expression , SubExprEliminationState ]
290
+ classSize(className) += funcCode.length
291
+ classFunctions(className) += funcName -> funcCode
228
292
229
- // The collection of sub-expression result resetting methods that need to be called on each row.
230
- val subexprFunctions = mutable.ArrayBuffer .empty[String ]
293
+ if (className == outerClassName) {
294
+ funcName
295
+ } else {
231
296
232
- def declareAddedFunctions () : String = {
233
- addedFunctions.map { case (funcName, funcCode) => funcCode }.mkString( " \n " )
297
+ s " $classInstance . $funcName "
298
+ }
234
299
}
235
300
301
+ /**
302
+ * Instantiates all nested, private sub-classes as objects to the `OuterClass`
303
+ */
304
+ private [sql] def initNestedClasses (): String = {
305
+ // Nested, private sub-classes have no mutable state (though they do reference the outer class'
306
+ // mutable state), so we declare and initialize them inline to the OuterClass.
307
+ classes.filter(_._1 != outerClassName).map {
308
+ case (className, classInstance) =>
309
+ s " private $className $classInstance = new $className(); "
310
+ }.mkString(" \n " )
311
+ }
312
+
313
+ /**
314
+ * Declares all function code that should be inlined to the `OuterClass`.
315
+ */
316
+ private [sql] def declareAddedFunctions (): String = {
317
+ classFunctions(outerClassName).values.mkString(" \n " )
318
+ }
319
+
320
+ /**
321
+ * Declares all nested, private sub-classes and the function code that should be inlined to them.
322
+ */
323
+ private [sql] def declareNestedClasses (): String = {
324
+ classFunctions.filterKeys(_ != outerClassName).map {
325
+ case (className, functions) =>
326
+ s """
327
+ |private class $className {
328
+ | ${functions.values.mkString(" \n " )}
329
+ |}
330
+ """ .stripMargin
331
+ }
332
+ }.mkString(" \n " )
333
+
236
334
final val JAVA_BOOLEAN = " boolean"
237
335
final val JAVA_BYTE = " byte"
238
336
final val JAVA_SHORT = " short"
@@ -552,8 +650,7 @@ class CodegenContext {
552
650
return 0;
553
651
}
554
652
"""
555
- addNewFunction(compareFunc, funcCode)
556
- s " this. $compareFunc( $c1, $c2) "
653
+ s " ${addNewFunction(compareFunc, funcCode)}( $c1, $c2) "
557
654
case schema : StructType =>
558
655
val comparisons = GenerateOrdering .genComparisons(this , schema)
559
656
val compareFunc = freshName(" compareStruct" )
@@ -569,8 +666,7 @@ class CodegenContext {
569
666
return 0;
570
667
}
571
668
"""
572
- addNewFunction(compareFunc, funcCode)
573
- s " this. $compareFunc( $c1, $c2) "
669
+ s " ${addNewFunction(compareFunc, funcCode)}( $c1, $c2) "
574
670
case other if other.isInstanceOf [AtomicType ] => s " $c1.compare( $c2) "
575
671
case udt : UserDefinedType [_] => genComp(udt.sqlType, c1, c2)
576
672
case _ =>
@@ -640,7 +736,9 @@ class CodegenContext {
640
736
641
737
/**
642
738
* Splits the generated code of expressions into multiple functions, because function has
643
- * 64kb code size limit in JVM
739
+ * 64kb code size limit in JVM. If the class to which the function would be inlined would grow
740
+ * beyond 1600kb, we declare a private, nested sub-class, and the function is inlined to it
741
+ * instead, because classes have a constant pool limit of 65,536 named values.
644
742
*
645
743
* @param expressions the codes to evaluate expressions.
646
744
* @param funcName the split function name base.
@@ -685,7 +783,6 @@ class CodegenContext {
685
783
|}
686
784
""" .stripMargin
687
785
addNewFunction(name, code)
688
- name
689
786
}
690
787
691
788
foldFunctions(functions.map(name => s " $name( ${arguments.map(_._2).mkString(" , " )}) " ))
@@ -769,8 +866,6 @@ class CodegenContext {
769
866
|}
770
867
""" .stripMargin
771
868
772
- addNewFunction(fnName, fn)
773
-
774
869
// Add a state and a mapping of the common subexpressions that are associate with this
775
870
// state. Adding this expression to subExprEliminationExprMap means it will call `fn`
776
871
// when it is code generated. This decision should be a cost based one.
@@ -791,7 +886,7 @@ class CodegenContext {
791
886
addMutableState(javaType(expr.dataType), value,
792
887
s " $value = ${defaultValue(expr.dataType)}; " )
793
888
794
- subexprFunctions += s " $fnName( $INPUT_ROW); "
889
+ subexprFunctions += s " ${addNewFunction( fnName, fn)} ( $INPUT_ROW); "
795
890
val state = SubExprEliminationState (isNull, value)
796
891
e.foreach(subExprEliminationExprs.put(_, state))
797
892
}
0 commit comments