Skip to content

Commit cd0c680

Browse files
author
Andrew Or
committed
Rename silly method names + add detailed comments
1 parent 35392f5 commit cd0c680

File tree

3 files changed

+57
-20
lines changed

3 files changed

+57
-20
lines changed

core/src/main/scala/org/apache/spark/memory/ExecutionMemoryPool.scala

Lines changed: 34 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -70,15 +70,28 @@ private[memory] class ExecutionMemoryPool(
7070
* active tasks) before it is forced to spill. This can happen if the number of tasks increase
7171
* but an older task had a lot of memory already.
7272
*
73+
* @param numBytes number of bytes to acquire
74+
* @param taskAttemptId the task attempt acquiring memory
75+
* @param maybeGrowPool a callback that potentially grows the size of this pool. It takes in
76+
* one parameter (Long) that represents the desired amount of memory by
77+
* which this pool should be expanded.
78+
* @param computeMaxPoolSize a callback that returns the maximum allowable size of this pool
79+
* at this given moment. This is not a field because the max pool
80+
* size is variable in certain cases. For instance, in unified
81+
* memory management, the execution pool can be expanded by evicting
82+
* cached blocks, thereby shrinking the storage pool.
83+
*
7384
* @return the number of bytes granted to the task.
7485
*/
75-
def acquireMemory(
86+
private[memory] def acquireMemory(
7687
numBytes: Long,
7788
taskAttemptId: Long,
78-
maybeResizePool: Long => Unit = (_: Long) => Unit,
79-
computeDaviesThingMax: () => Long = () => poolSize): Long = lock.synchronized {
89+
maybeGrowPool: Long => Unit = (additionalSpaceNeeded: Long) => Unit,
90+
computeMaxPoolSize: () => Long = () => poolSize): Long = lock.synchronized {
8091
assert(numBytes > 0, s"invalid number of bytes requested: $numBytes")
8192

93+
// TODO: clean up this clunky method signature
94+
8295
// Add this task to the taskMemory map just so we can keep an accurate count of the number
8396
// of active tasks, to let other tasks ramp down their memory in calls to `acquireMemory`
8497
if (!memoryForTask.contains(taskAttemptId)) {
@@ -95,23 +108,30 @@ private[memory] class ExecutionMemoryPool(
95108
val numActiveTasks = memoryForTask.keys.size
96109
val curMem = memoryForTask(taskAttemptId)
97110

98-
// TODO: explain me
99-
maybeResizePool(numBytes - memoryFree)
100-
101-
// TODO: explain me
102-
val daviesThingMax = computeDaviesThingMax()
103-
104-
// How much we can grant this task; don't let it grow to more than 1 / numActiveTasks;
105-
// don't let it be negative
106-
val maxToGrant = math.min(numBytes, math.max(0, (daviesThingMax / numActiveTasks) - curMem))
111+
// In every iteration of this loop, we should first try to reclaim any borrowed execution
112+
// space from storage. This is necessary because of the potential race condition where new
113+
// storage blocks may steal the free execution memory that this task was waiting for.
114+
maybeGrowPool(numBytes - memoryFree)
115+
116+
// Maximum size the pool would have after potentially growing the pool.
117+
// This is used to compute the upper bound of how much memory each task can occupy. This
118+
// must take into account potential free memory as well as the amount this pool currently
119+
// occupies. Otherwise, we may run into SPARK-12155 where, in unified memory management,
120+
// we did not take into account space that could have been freed by evicting cached blocks.
121+
val maxPoolSize = computeMaxPoolSize()
122+
val maxMemoryPerTask = maxPoolSize / numActiveTasks
123+
val minMemoryPerTask = poolSize / (2 * numActiveTasks)
124+
125+
// How much we can grant this task; keep its share within 0 <= X <= 1 / numActiveTasks
126+
val maxToGrant = math.min(numBytes, math.max(0, maxMemoryPerTask - curMem))
107127
// Only give it as much memory as is free, which might be none if it reached 1 / numTasks
108128
val toGrant = math.min(maxToGrant, memoryFree)
109129

110-
if (curMem < poolSize / (2 * numActiveTasks)) {
130+
if (curMem < minMemoryPerTask) {
111131
// We want to let each task get at least 1 / (2 * numActiveTasks) before blocking;
112132
// if we can't give it this much now, wait for other tasks to free up memory
113133
// (this happens if older tasks allocated lots of memory before N grew)
114-
if (memoryFree >= math.min(maxToGrant, poolSize / (2 * numActiveTasks) - curMem)) {
134+
if (memoryFree >= math.min(maxToGrant, poolSize / minMemoryPerTask)) {
115135
memoryForTask(taskAttemptId) += toGrant
116136
return toGrant
117137
} else {

core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,13 @@ private[spark] class UnifiedMemoryManager private[memory] (
8383
case MemoryMode.ON_HEAP =>
8484

8585
/**
86-
* TODO: add comment.
86+
* Grow the execution pool by evicting cached blocks, thereby shrinking the storage pool.
87+
*
88+
* When acquiring memory for a task, the execution pool may need to make multiple
89+
* attempts. Each attempt must be able to evict storage in case another task jumps in
90+
* and caches a large block between the attempts. This is called once per attempt.
8791
*/
88-
def maybeResizePool(extraMemoryNeeded: Long): Unit = {
92+
def maybeGrowExecutionPool(extraMemoryNeeded: Long): Unit = {
8993
if (extraMemoryNeeded > 0) {
9094
// There is not enough free memory in the execution pool, so try to reclaim memory from
9195
// storage. We can reclaim any free memory from the storage pool. If the storage pool
@@ -103,14 +107,24 @@ private[spark] class UnifiedMemoryManager private[memory] (
103107
}
104108

105109
/**
106-
* TODO: (maxMemory - math.min(storageMemoryUsed, SF * maxMemory)
110+
* The size the execution pool would have after evicting storage memory.
111+
*
112+
* The execution memory pool divides this quantity among the active tasks evenly to cap
113+
* the execution memory allocation for each task. It is important to keep this greater
114+
* than the execution pool size, which doesn't take into account potential memory that
115+
* could be freed by evicting storage. Otherwise we may hit SPARK-12155.
116+
*
117+
* Additionally, this quantity should be kept below `maxMemory` to arbitrate fairness
118+
* in execution memory allocation across tasks, Otherwise, a task may occupy more than
119+
* its fair share of execution memory, mistakenly thinking that other tasks can acquire
120+
* the portion of storage memory that cannot be evicted.
107121
*/
108-
def computeDaviesThingMax(): Long = {
122+
def computeMaxExecutionPoolSize(): Long = {
109123
maxMemory - math.min(storageMemoryUsed, storageRegionSize)
110124
}
111125

112126
onHeapExecutionMemoryPool.acquireMemory(
113-
numBytes, taskAttemptId, maybeResizePool, computeDaviesThingMax)
127+
numBytes, taskAttemptId, maybeGrowExecutionPool, computeMaxExecutionPoolSize)
114128

115129
case MemoryMode.OFF_HEAP =>
116130
// For now, we only support on-heap caching of data, so we do not need to interact with

core/src/main/scala/org/apache/spark/scheduler/Task.scala

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,10 @@ private[spark] abstract class Task[T](
9292
Utils.tryLogNonFatalError {
9393
// Release memory used by this thread for unrolling blocks
9494
SparkEnv.get.blockManager.memoryStore.releaseUnrollMemoryForThisTask()
95-
// TODO: you don't understand this?
95+
// Notify any tasks waiting for execution memory to be freed to wake up and try to
96+
// acquire memory again. This makes impossible the scenario where a task sleeps forever
97+
// because there are no other tasks left to notify it. Since this is safe to do but may
98+
// not be strictly necessary, we should revisit whether we can remove this in the future.
9699
val memoryManager = SparkEnv.get.memoryManager
97100
memoryManager.synchronized { memoryManager.notifyAll() }
98101
}

0 commit comments

Comments
 (0)