Rename silly method names + add detailed comments

Andrew Or · Andrew Or · commit cd0c680161e9 · 2015-12-09T18:30:34.000-08:00
diff --git a/core/src/main/scala/org/apache/spark/memory/ExecutionMemoryPool.scala b/core/src/main/scala/org/apache/spark/memory/ExecutionMemoryPool.scala
@@ -70,15 +70,28 @@ private[memory] class ExecutionMemoryPool(
    * active tasks) before it is forced to spill. This can happen if the number of tasks increase
    * but an older task had a lot of memory already.
    *
+   * @param numBytes number of bytes to acquire
+   * @param taskAttemptId the task attempt acquiring memory
+   * @param maybeGrowPool a callback that potentially grows the size of this pool. It takes in
+   *                      one parameter (Long) that represents the desired amount of memory by
+   *                      which this pool should be expanded.
+   * @param computeMaxPoolSize a callback that returns the maximum allowable size of this pool
+   *                           at this given moment. This is not a field because the max pool
+   *                           size is variable in certain cases. For instance, in unified
+   *                           memory management, the execution pool can be expanded by evicting
+   *                           cached blocks, thereby shrinking the storage pool.
+   *
    * @return the number of bytes granted to the task.
    */
-  def acquireMemory(
+  private[memory] def acquireMemory(
       numBytes: Long,
       taskAttemptId: Long,
-      maybeResizePool: Long => Unit = (_: Long) => Unit,
-      computeDaviesThingMax: () => Long = () => poolSize): Long = lock.synchronized {
+      maybeGrowPool: Long => Unit = (additionalSpaceNeeded: Long) => Unit,
+      computeMaxPoolSize: () => Long = () => poolSize): Long = lock.synchronized {
     assert(numBytes > 0, s"invalid number of bytes requested: $numBytes")
 
+    // TODO: clean up this clunky method signature
+
     // Add this task to the taskMemory map just so we can keep an accurate count of the number
     // of active tasks, to let other tasks ramp down their memory in calls to `acquireMemory`
     if (!memoryForTask.contains(taskAttemptId)) {
@@ -95,23 +108,30 @@ private[memory] class ExecutionMemoryPool(
       val numActiveTasks = memoryForTask.keys.size
       val curMem = memoryForTask(taskAttemptId)
 
-      // TODO: explain me
-      maybeResizePool(numBytes - memoryFree)
-
-      // TODO: explain me
-      val daviesThingMax = computeDaviesThingMax()
-
-      // How much we can grant this task; don't let it grow to more than 1 / numActiveTasks;
-      // don't let it be negative
-      val maxToGrant = math.min(numBytes, math.max(0, (daviesThingMax / numActiveTasks) - curMem))
+      // In every iteration of this loop, we should first try to reclaim any borrowed execution
+      // space from storage. This is necessary because of the potential race condition where new
+      // storage blocks may steal the free execution memory that this task was waiting for.
+      maybeGrowPool(numBytes - memoryFree)
+
+      // Maximum size the pool would have after potentially growing the pool.
+      // This is used to compute the upper bound of how much memory each task can occupy. This
+      // must take into account potential free memory as well as the amount this pool currently
+      // occupies. Otherwise, we may run into SPARK-12155 where, in unified memory management,
+      // we did not take into account space that could have been freed by evicting cached blocks.
+      val maxPoolSize = computeMaxPoolSize()
+      val maxMemoryPerTask = maxPoolSize / numActiveTasks
+      val minMemoryPerTask = poolSize / (2 * numActiveTasks)
+
+      // How much we can grant this task; keep its share within 0 <= X <= 1 / numActiveTasks
+      val maxToGrant = math.min(numBytes, math.max(0, maxMemoryPerTask - curMem))
       // Only give it as much memory as is free, which might be none if it reached 1 / numTasks
       val toGrant = math.min(maxToGrant, memoryFree)
 
-      if (curMem < poolSize / (2 * numActiveTasks)) {
+      if (curMem < minMemoryPerTask) {
         // We want to let each task get at least 1 / (2 * numActiveTasks) before blocking;
         // if we can't give it this much now, wait for other tasks to free up memory
         // (this happens if older tasks allocated lots of memory before N grew)
-        if (memoryFree >= math.min(maxToGrant, poolSize / (2 * numActiveTasks) - curMem)) {
+        if (memoryFree >= math.min(maxToGrant, poolSize / minMemoryPerTask)) {
           memoryForTask(taskAttemptId) += toGrant
           return toGrant
         } else {
diff --git a/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala b/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala
@@ -83,9 +83,13 @@ private[spark] class UnifiedMemoryManager private[memory] (
       case MemoryMode.ON_HEAP =>
 
         /**
-         * TODO: add comment.
+         * Grow the execution pool by evicting cached blocks, thereby shrinking the storage pool.
+         *
+         * When acquiring memory for a task, the execution pool may need to make multiple
+         * attempts. Each attempt must be able to evict storage in case another task jumps in
+         * and caches a large block between the attempts. This is called once per attempt.
          */
-        def maybeResizePool(extraMemoryNeeded: Long): Unit = {
+        def maybeGrowExecutionPool(extraMemoryNeeded: Long): Unit = {
           if (extraMemoryNeeded > 0) {
             // There is not enough free memory in the execution pool, so try to reclaim memory from
             // storage. We can reclaim any free memory from the storage pool. If the storage pool
@@ -103,14 +107,24 @@ private[spark] class UnifiedMemoryManager private[memory] (
         }
 
         /**
-         * TODO: (maxMemory - math.min(storageMemoryUsed, SF * maxMemory)
+         * The size the execution pool would have after evicting storage memory.
+         *
+         * The execution memory pool divides this quantity among the active tasks evenly to cap
+         * the execution memory allocation for each task. It is important to keep this greater
+         * than the execution pool size, which doesn't take into account potential memory that
+         * could be freed by evicting storage. Otherwise we may hit SPARK-12155.
+         *
+         * Additionally, this quantity should be kept below `maxMemory` to arbitrate fairness
+         * in execution memory allocation across tasks, Otherwise, a task may occupy more than
+         * its fair share of execution memory, mistakenly thinking that other tasks can acquire
+         * the portion of storage memory that cannot be evicted.
          */
-        def computeDaviesThingMax(): Long = {
+        def computeMaxExecutionPoolSize(): Long = {
           maxMemory - math.min(storageMemoryUsed, storageRegionSize)
         }
 
         onHeapExecutionMemoryPool.acquireMemory(
-          numBytes, taskAttemptId, maybeResizePool, computeDaviesThingMax)
+          numBytes, taskAttemptId, maybeGrowExecutionPool, computeMaxExecutionPoolSize)
 
       case MemoryMode.OFF_HEAP =>
         // For now, we only support on-heap caching of data, so we do not need to interact with
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Task.scala b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
@@ -92,7 +92,10 @@ private[spark] abstract class Task[T](
         Utils.tryLogNonFatalError {
           // Release memory used by this thread for unrolling blocks
           SparkEnv.get.blockManager.memoryStore.releaseUnrollMemoryForThisTask()
-          // TODO: you don't understand this?
+          // Notify any tasks waiting for execution memory to be freed to wake up and try to
+          // acquire memory again. This makes impossible the scenario where a task sleeps forever
+          // because there are no other tasks left to notify it. Since this is safe to do but may
+          // not be strictly necessary, we should revisit whether we can remove this in the future.
           val memoryManager = SparkEnv.get.memoryManager
           memoryManager.synchronized { memoryManager.notifyAll() }
         }