Skip to content

Commit e45e904

Browse files
author
Andrew Or
committed
More minor updates (wording, renaming etc.)
1 parent 8b71cdb commit e45e904

File tree

2 files changed

+66
-52
lines changed

2 files changed

+66
-52
lines changed

core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -363,24 +363,24 @@ private class ReturnStatementFinder extends ClassVisitor(ASM4) {
363363
}
364364

365365
/** Helper class to identify a method. */
366-
private case class MethodIdentifier(cls: Class[_], name: String, desc: String)
366+
private case class MethodIdentifier[T](cls: Class[T], name: String, desc: String)
367367

368368
/**
369369
* Find the fields accessed by a given class.
370370
*
371-
* The fields are stored in the mutable map passed in by the class that contains them.
371+
* The resulting fields are stored in the mutable map passed in through the constructor.
372372
* This map is assumed to have its keys already populated with the classes of interest.
373373
*
374374
* @param fields the mutable map that stores the fields to return
375-
* @param findTransitively if true, find fields indirectly referenced in other classes
376-
* @param specificMethod if not empty, visit only this method
377-
* @param visitedMethods a list of visited methods to avoid cycles
375+
* @param findTransitively if true, find fields indirectly referenced through method calls
376+
* @param specificMethod if not empty, visit only this specific method
377+
* @param visitedMethods a set of visited methods to avoid cycles
378378
*/
379379
private[util] class FieldAccessFinder(
380380
fields: Map[Class[_], Set[String]],
381381
findTransitively: Boolean,
382-
specificMethod: Option[MethodIdentifier] = None,
383-
visitedMethods: Set[MethodIdentifier] = Set.empty)
382+
specificMethod: Option[MethodIdentifier[_]] = None,
383+
visitedMethods: Set[MethodIdentifier[_]] = Set.empty)
384384
extends ClassVisitor(ASM4) {
385385

386386
override def visitMethod(
@@ -390,7 +390,7 @@ private[util] class FieldAccessFinder(
390390
sig: String,
391391
exceptions: Array[String]): MethodVisitor = {
392392

393-
// Ignore this method unless we are told to visit it
393+
// If we are told to visit only a certain method and this is not the one, ignore it
394394
if (specificMethod.isDefined &&
395395
(specificMethod.get.name != name || specificMethod.get.desc != desc)) {
396396
return null
@@ -412,7 +412,7 @@ private[util] class FieldAccessFinder(
412412
if (op == INVOKEVIRTUAL && owner.endsWith("$iwC") && !name.endsWith("$outer")) {
413413
fields(cl) += name
414414
}
415-
// Visit other methods to find fields that are transitively referenced
415+
// Optionally visit other methods to find fields that are transitively referenced
416416
if (findTransitively) {
417417
val m = MethodIdentifier(cl, name, desc)
418418
if (!visitedMethods.contains(m)) {
@@ -431,6 +431,11 @@ private[util] class FieldAccessFinder(
431431
private class InnerClosureFinder(output: Set[Class[_]]) extends ClassVisitor(ASM4) {
432432
var myName: String = null
433433

434+
// TODO: Recursively find inner closures that we indirectly reference, e.g.
435+
// val closure1 = () = { () => 1 }
436+
// val closure2 = () => { (1 to 5).map(closure1) }
437+
// The second closure technically has two inner closures, but this finder only finds one
438+
434439
override def visit(version: Int, access: Int, name: String, sig: String,
435440
superName: String, interfaces: Array[String]) {
436441
myName = name

core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite2.scala

Lines changed: 52 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@ import org.apache.spark.serializer.SerializerInstance
3232
*/
3333
class ClosureCleanerSuite2 extends FunSuite with BeforeAndAfterAll with PrivateMethodTester {
3434

35-
// Start a SparkContext so that SparkEnv.get.closureSerializer is accessible
36-
// We do not actually use this explicitly except to stop it later
35+
// Start a SparkContext so that the closure serializer is accessible
36+
// We do not actually use this explicitly otherwise
3737
private var sc: SparkContext = null
3838
private var closureSerializer: SerializerInstance = null
3939

@@ -48,7 +48,7 @@ class ClosureCleanerSuite2 extends FunSuite with BeforeAndAfterAll with PrivateM
4848
closureSerializer = null
4949
}
5050

51-
// Some fields and methods that belong to this class, which is itself not serializable
51+
// Some fields and methods to reference in inner closures later
5252
private val someSerializableValue = 1
5353
private val someNonSerializableValue = new NonSerializable
5454
private def someSerializableMethod() = 1
@@ -86,19 +86,19 @@ class ClosureCleanerSuite2 extends FunSuite with BeforeAndAfterAll with PrivateM
8686
assertSerializable(closure, serializableBefore)
8787
// If the resulting closure is not serializable even after
8888
// cleaning, we expect ClosureCleaner to throw a SparkException
89-
intercept[SparkException] {
89+
if (serializableAfter) {
9090
ClosureCleaner.clean(closure, checkSerializable = true, transitive)
91-
// Otherwise, if we do expect the closure to be serializable after the
92-
// clean, throw the SparkException ourselves so scalatest is happy
93-
if (serializableAfter) { throw new SparkException("no-op") }
91+
} else {
92+
intercept[SparkException] {
93+
ClosureCleaner.clean(closure, checkSerializable = true, transitive)
94+
}
9495
}
9596
assertSerializable(closure, serializableAfter)
9697
}
9798

9899
/**
99100
* Return the fields accessed by the given closure by class.
100-
* This also optionally finds the fields transitively referenced through methods
101-
* that belong to other classes.
101+
* This also optionally finds the fields transitively referenced through methods invocations.
102102
*/
103103
private def findAccessedFields(
104104
closure: AnyRef,
@@ -211,7 +211,7 @@ class ClosureCleanerSuite2 extends FunSuite with BeforeAndAfterAll with PrivateM
211211
val outerObjects2 = getOuterObjects(closure2)
212212
assert(outerClasses1.size === outerObjects1.size)
213213
assert(outerClasses2.size === outerObjects2.size)
214-
// These inner closures only reference local variables, and so do not have $outer pointer
214+
// These inner closures only reference local variables, and so do not have $outer pointers
215215
assert(outerClasses1.isEmpty)
216216
assert(outerClasses2.isEmpty)
217217
}
@@ -235,8 +235,8 @@ class ClosureCleanerSuite2 extends FunSuite with BeforeAndAfterAll with PrivateM
235235
// This closure references the "test2" scope because it needs to find the method `y`
236236
// Scope hierarchy: "test2" < "FunSuite#test" < ClosureCleanerSuite2
237237
assert(outerClasses2.size === 3)
238-
// This closure references the "test2" scope because it needs to find the
239-
// `localValue` defined outside of this scope
238+
// This closure references the "test2" scope because it needs to find the `localValue`
239+
// defined outside of this scope
240240
assert(outerClasses3.size === 3)
241241
assert(isClosure(outerClasses2(0)))
242242
assert(isClosure(outerClasses3(0)))
@@ -270,10 +270,12 @@ class ClosureCleanerSuite2 extends FunSuite with BeforeAndAfterAll with PrivateM
270270
assert(fields2.isEmpty)
271271
assert(fields3.size === 2)
272272
// This corresponds to the "FunSuite#test" closure. This is empty because the
273-
// field `closure3` references belongs to its parent (i.e. ClosureCleanerSuite2)
273+
// `someSerializableValue` belongs to its parent (i.e. ClosureCleanerSuite2).
274274
assert(fields3(outerClasses3(0)).isEmpty)
275275
// This corresponds to the ClosureCleanerSuite2. This is also empty, however,
276-
// because we did not find fields transitively (i.e. beyond 1 enclosing scope)
276+
// because accessing a `ClosureCleanerSuite2#someSerializableValue` actually involves a
277+
// method call. Since we do not find fields transitively, we will not recursively trace
278+
// through the fields referenced by this method.
277279
assert(fields3(outerClasses3(1)).isEmpty)
278280

279281
val fields1t = findAccessedFields(closure1, outerClasses1, findTransitively = true)
@@ -283,7 +285,7 @@ class ClosureCleanerSuite2 extends FunSuite with BeforeAndAfterAll with PrivateM
283285
assert(fields2t.isEmpty)
284286
assert(fields3t.size === 2)
285287
// Because we find fields transitively now, we are able to detect that we need the
286-
// $outer pointer to get the field from the ClosureCleanerSuite2.
288+
// $outer pointer to get the field from the ClosureCleanerSuite2
287289
assert(fields3t(outerClasses3(0)).size === 1)
288290
assert(fields3t(outerClasses3(0)).head === "$outer")
289291
assert(fields3t(outerClasses3(1)).size === 1)
@@ -304,31 +306,32 @@ class ClosureCleanerSuite2 extends FunSuite with BeforeAndAfterAll with PrivateM
304306
val outerClasses3 = getOuterClasses(closure3)
305307
val outerClasses4 = getOuterClasses(closure4)
306308

307-
// First, find only fields the closures directly access
309+
// First, find only fields accessed directly, not transitively, by these closures
308310
val fields1 = findAccessedFields(closure1, outerClasses1, findTransitively = false)
309311
val fields2 = findAccessedFields(closure2, outerClasses2, findTransitively = false)
310312
val fields3 = findAccessedFields(closure3, outerClasses3, findTransitively = false)
311313
val fields4 = findAccessedFields(closure4, outerClasses4, findTransitively = false)
312314
assert(fields1.isEmpty)
313315
// "test1" < "FunSuite#test" < ClosureCleanerSuite2
314316
assert(fields2.size === 3)
315-
assert(fields2(outerClasses2(0)).isEmpty) // `def a` is not a field
316-
assert(fields2(outerClasses2(1)).isEmpty)
317-
assert(fields2(outerClasses2(2)).isEmpty)
317+
// Since we do not find fields transitively here, we do not look into what `def a` references
318+
assert(fields2(outerClasses2(0)).isEmpty) // This corresponds to the "test1" scope
319+
assert(fields2(outerClasses2(1)).isEmpty) // This corresponds to the "FunSuite#test" scope
320+
assert(fields2(outerClasses2(2)).isEmpty) // This corresponds to the ClosureCleanerSuite2
318321
assert(fields3.size === 3)
319-
// Note that `localValue` is a field of the "test1" closure because `def a` needs it
320-
// Further note that it is NOT a field of the "FunSuite#test" closure but a local variable
322+
// Note that `localValue` is a field of the "test1" scope because `def a` references it,
323+
// but NOT a field of the "FunSuite#test" scope because it is only a local variable there
321324
assert(fields3(outerClasses3(0)).size === 1)
322325
assert(fields3(outerClasses3(0)).head.contains("localValue"))
323326
assert(fields3(outerClasses3(1)).isEmpty)
324327
assert(fields3(outerClasses3(2)).isEmpty)
325328
assert(fields4.size === 3)
329+
// Because `val someSerializableValue` is an instance variable, even an explicit reference
330+
// here actually involves a method call to access the underlying value of the variable.
331+
// Because we are not finding fields transitively here, we do not consider the fields
332+
// accessed by this "method" (i.e. the val's accessor).
326333
assert(fields4(outerClasses4(0)).isEmpty)
327334
assert(fields4(outerClasses4(1)).isEmpty)
328-
// Because `someSerializableValue` is a val, even an explicit reference here actually
329-
// involves a method call to access the underlying value of the variable. Because we are
330-
// not finding fields transitively here, we do not consider the fields accessed by this
331-
// "method" (i.e. the val's accessor).
332335
assert(fields4(outerClasses4(2)).isEmpty)
333336

334337
// Now do the same, but find fields that the closures transitively reference
@@ -338,8 +341,7 @@ class ClosureCleanerSuite2 extends FunSuite with BeforeAndAfterAll with PrivateM
338341
val fields4t = findAccessedFields(closure4, outerClasses4, findTransitively = true)
339342
assert(fields1t.isEmpty)
340343
assert(fields2t.size === 3)
341-
// This closure transitively references `localValue` because `def a` uses it
342-
assert(fields2t(outerClasses2(0)).size === 1)
344+
assert(fields2t(outerClasses2(0)).size === 1) // `def a` references `localValue`
343345
assert(fields2t(outerClasses2(0)).head.contains("localValue"))
344346
assert(fields2t(outerClasses2(1)).isEmpty)
345347
assert(fields2t(outerClasses2(2)).isEmpty)
@@ -362,11 +364,11 @@ class ClosureCleanerSuite2 extends FunSuite with BeforeAndAfterAll with PrivateM
362364
}
363365

364366
test("clean basic serializable closures") {
365-
val localSerializableVal = someSerializableValue
367+
val localValue = someSerializableValue
366368
val closure1 = () => 1
367369
val closure2 = () => Array[String]("a", "b", "c")
368370
val closure3 = (s: String, arr: Array[Long]) => s + arr.mkString(", ")
369-
val closure4 = () => localSerializableVal
371+
val closure4 = () => localValue
370372
val closure5 = () => new NonSerializable(5) // we're just serializing the class information
371373
val closure1r = closure1()
372374
val closure2r = closure2()
@@ -395,7 +397,7 @@ class ClosureCleanerSuite2 extends FunSuite with BeforeAndAfterAll with PrivateM
395397
val closure4 = () => someNonSerializableValue
396398
val closure2 = () => someNonSerializableMethod()
397399

398-
// These are not cleanable because they ultimately reference the `this` pointer
400+
// These are not cleanable because they ultimately reference the ClosureCleanerSuite2
399401
testClean(closure1, serializableBefore = false, serializableAfter = false)
400402
testClean(closure2, serializableBefore = false, serializableAfter = false)
401403
testClean(closure3, serializableBefore = false, serializableAfter = false)
@@ -404,13 +406,13 @@ class ClosureCleanerSuite2 extends FunSuite with BeforeAndAfterAll with PrivateM
404406
}
405407

406408
test("clean basic nested serializable closures") {
407-
val localSerializableValue = someSerializableValue
409+
val localValue = someSerializableValue
408410
val closure1 = (i: Int) => {
409-
(1 to i).map { x => x + localSerializableValue } // 1 level of nesting
411+
(1 to i).map { x => x + localValue } // 1 level of nesting
410412
}
411413
val closure2 = (j: Int) => {
412414
(1 to j).flatMap { x =>
413-
(1 to x).map { y => y + localSerializableValue } // 2 levels
415+
(1 to x).map { y => y + localValue } // 2 levels
414416
}
415417
}
416418
val closure3 = (k: Int, l: Int, m: Int) => {
@@ -426,6 +428,7 @@ class ClosureCleanerSuite2 extends FunSuite with BeforeAndAfterAll with PrivateM
426428
testClean(closure2, serializableBefore = true, serializableAfter = true)
427429
testClean(closure3, serializableBefore = true, serializableAfter = true)
428430

431+
// Verify that closures can still be invoked and the result still the same
429432
assert(closure1(1) === closure1r)
430433
assert(closure2(2) === closure2r)
431434
assert(closure3(3, 4, 5) === closure3r)
@@ -434,9 +437,12 @@ class ClosureCleanerSuite2 extends FunSuite with BeforeAndAfterAll with PrivateM
434437
test("clean basic nested non-serializable closures") {
435438
def localSerializableMethod() = someSerializableValue
436439
val localNonSerializableValue = someNonSerializableValue
440+
// These closures ultimately reference the ClosureCleanerSuite2
441+
// Note that even accessing `val` that is an instance variable involves a method call
437442
val closure1 = (i: Int) => { (1 to i).map { x => x + someSerializableValue } }
438443
val closure2 = (j: Int) => { (1 to j).map { x => x + someSerializableMethod() } }
439444
val closure4 = (k: Int) => { (1 to k).map { x => x + localSerializableMethod() } }
445+
// This closure references a local non-serializable value
440446
val closure3 = (l: Int) => { (1 to l).map { x => localNonSerializableValue } }
441447
// This is non-serializable no matter how many levels we nest it
442448
val closure5 = (m: Int) => {
@@ -457,15 +463,18 @@ class ClosureCleanerSuite2 extends FunSuite with BeforeAndAfterAll with PrivateM
457463
}
458464

459465
test("clean complicated nested serializable closures") {
460-
val localSerializableValue = someSerializableValue
466+
val localValue = someSerializableValue
467+
468+
// Here we assume that if the outer closure is serializable,
469+
// then all inner closures must also be serializable
461470

462471
// Reference local fields from all levels
463472
val closure1 = (i: Int) => {
464473
val a = 1
465474
(1 to i).flatMap { x =>
466475
val b = a + 1
467476
(1 to x).map { y =>
468-
y + a + b + localSerializableValue
477+
y + a + b + localValue
469478
}
470479
}
471480
}
@@ -479,8 +488,8 @@ class ClosureCleanerSuite2 extends FunSuite with BeforeAndAfterAll with PrivateM
479488
def b2 = a2 + 1
480489
(1 to x).map { y =>
481490
// If this references a method outside the outermost closure, then it will try to pull
482-
// in the ClosureCleanerSuite2. This is why `localSerializableValue` here must be a val.
483-
y + a1 + a2 + b1 + b2 + localSerializableValue
491+
// in the ClosureCleanerSuite2. This is why `localValue` here must be a local `val`.
492+
y + a1 + a2 + b1 + b2 + localValue
484493
}
485494
}
486495
}
@@ -494,13 +503,13 @@ class ClosureCleanerSuite2 extends FunSuite with BeforeAndAfterAll with PrivateM
494503
}
495504

496505
test("clean complicated nested non-serializable closures") {
497-
val localSerializableValue = someSerializableValue
506+
val localValue = someSerializableValue
498507

499-
// Note that we are not interested in cleaning the outer closures here
508+
// Note that we are not interested in cleaning the outer closures here (they are not cleanable)
500509
// The only reason why they exist is to nest the inner closures
501510

502511
val test1 = () => {
503-
val a = localSerializableValue
512+
val a = localValue
504513
val b = sc
505514
val inner1 = (x: Int) => x + a + b.hashCode()
506515
val inner2 = (x: Int) => x + a
@@ -509,15 +518,15 @@ class ClosureCleanerSuite2 extends FunSuite with BeforeAndAfterAll with PrivateM
509518
// There is no way to clean it
510519
testClean(inner1, serializableBefore = false, serializableAfter = false)
511520

512-
// This closure is serializable to begin with since
513-
// it does not have a pointer to the outer closure
521+
// This closure is serializable to begin with since it does not need a pointer to
522+
// the outer closure (it only references local variables)
514523
testClean(inner2, serializableBefore = true, serializableAfter = true)
515524
}
516525

517526
// Same as above, but the `val a` becomes `def a`
518527
// The difference here is that all inner closures now have pointers to the outer closure
519528
val test2 = () => {
520-
def a = localSerializableValue
529+
def a = localValue
521530
val b = sc
522531
val inner1 = (x: Int) => x + a + b.hashCode()
523532
val inner2 = (x: Int) => x + a

0 commit comments

Comments
 (0)