@@ -20,13 +20,13 @@ package org.apache.spark.storage
20
20
import java .nio .ByteBuffer
21
21
import java .util .LinkedHashMap
22
22
23
- import scala .collection .mutable
24
- import scala .collection .mutable .ArrayBuffer
25
-
26
23
import org .apache .spark .TaskContext
27
24
import org .apache .spark .memory .MemoryManager
28
- import org .apache .spark .util .{SizeEstimator , Utils }
29
25
import org .apache .spark .util .collection .SizeTrackingVector
26
+ import org .apache .spark .util .{SizeEstimator , Utils }
27
+
28
+ import scala .collection .mutable
29
+ import scala .collection .mutable .ArrayBuffer
30
30
31
31
private case class MemoryEntry (value : Any , size : Long , deserialized : Boolean )
32
32
@@ -59,6 +59,11 @@ private[spark] class MemoryStore(blockManager: BlockManager, memoryManager: Memo
59
59
private val unrollMemoryThreshold : Long =
60
60
conf.getLong(" spark.storage.unrollMemoryThreshold" , 1024 * 1024 )
61
61
62
+ // csd flag controlling whether to apply Csd's caching block size policy
63
+ private val csdCacheBlockSizeLimit : Long =
64
+ conf.getLong(" spark.storage.MemoryStore.csdCacheBlockSizeLimit" , Integer .MAX_VALUE .toLong)
65
+ assert(csdCacheBlockSizeLimit <= Integer .MAX_VALUE )
66
+
62
67
/** Total amount of memory available for storage, in bytes. */
63
68
private def maxMemory : Long = memoryManager.maxStorageMemory
64
69
@@ -173,11 +178,15 @@ private[spark] class MemoryStore(blockManager: BlockManager, memoryManager: Memo
173
178
val res = putArray(blockId, arrayValues, level, returnValues)
174
179
droppedBlocks ++= res.droppedBlocks
175
180
PutResult (res.size, res.data, droppedBlocks)
176
- case Right (iteratorValues) =>
181
+ case Right ((iteratorValues, false )) =>
182
+ // big block detected when unrolling
183
+ PutResult (0 , Left (iteratorValues), droppedBlocks)
184
+ case Right ((iteratorValues, true )) =>
177
185
// Not enough space to unroll this block; drop to disk if applicable
178
186
if (level.useDisk && allowPersistToDisk) {
179
187
logWarning(s " Persisting block $blockId to disk instead. " )
180
- val res = blockManager.diskStore.putIterator(blockId, iteratorValues, level, returnValues)
188
+ val res =
189
+ blockManager.diskStore.putIterator(blockId, iteratorValues, level, returnValues)
181
190
PutResult (res.size, res.data, droppedBlocks)
182
191
} else {
183
192
PutResult (0 , Left (iteratorValues), droppedBlocks)
@@ -234,6 +243,46 @@ private[spark] class MemoryStore(blockManager: BlockManager, memoryManager: Memo
234
243
logInfo(" MemoryStore cleared" )
235
244
}
236
245
246
+ /**
247
+ * This api is used by CSD as a post process of [[unrollSafely ]] to detect
248
+ * partitions larger than 2G in estimated object size when there is not enough memory for
249
+ * unrolling.
250
+ * This api continues fetching until we "see" 2G of object in size or values exhausted,
251
+ * with the assumption that the amount of memory specified by sizeLimit parameter are available
252
+ * in spark's user memory space per thread and per operator/RDD compute.
253
+ *
254
+ * Parameter sizeLimit is at most as large as csdCacheBlockSizeLimit (which is also upper bounded
255
+ * by Integer.MAX_VALUE), we can make sure user memory space has at least 2G available
256
+ * per thread for worst case.
257
+ */
258
+ private [this ] def fetchUntilCsdBlockSizeLimit [T ](
259
+ blockId : BlockId ,
260
+ inputValues : Iterator [T ],
261
+ valuesSeen : SizeTrackingVector [Any ]): Boolean = {
262
+ // if switch is off, do nothing
263
+ if (csdCacheBlockSizeLimit <= 0 ) {
264
+ true
265
+ } else {
266
+ val start = System .currentTimeMillis
267
+ var currentEstimatedSize = valuesSeen.estimateSize()
268
+ try {
269
+ var elementsExamined = 0L
270
+ val memoryCheckPeriod = 16
271
+ while (inputValues.hasNext && currentEstimatedSize <= csdCacheBlockSizeLimit) {
272
+ valuesSeen += inputValues.next()
273
+ elementsExamined += 1
274
+ if (elementsExamined % memoryCheckPeriod == 0 ) {
275
+ currentEstimatedSize = valuesSeen.estimateSize()
276
+ }
277
+ }
278
+ (currentEstimatedSize <= csdCacheBlockSizeLimit)
279
+ } finally {
280
+ logWarning(s " fetchUntilCsdBlockSizeLimit( $blockId) duration: " +
281
+ s " ${Utils .msDurationToString(System .currentTimeMillis - start)}" )
282
+ }
283
+ }
284
+ }
285
+
237
286
/**
238
287
* Unroll the given block in memory safely.
239
288
*
@@ -245,12 +294,19 @@ private[spark] class MemoryStore(blockManager: BlockManager, memoryManager: Memo
245
294
*
246
295
* This method returns either an array with the contents of the entire block or an iterator
247
296
* containing the values of the block (if the array would have exceeded available memory).
297
+ *
298
+ * SPY-1394: CSD modified this API in the following way:
299
+ * 1. It returns a tuple (iterator, boolean), when short of memory.
300
+ * The boolean is an indicator on whether caller should cache to disk, based
301
+ * on detection of over-sized block.
302
+ * 2. When over-sized block is detected, terminate the unroll and tell the caller to not
303
+ * cache at all.
248
304
*/
249
305
def unrollSafely (
250
306
blockId : BlockId ,
251
307
values : Iterator [Any ],
252
308
droppedBlocks : ArrayBuffer [(BlockId , BlockStatus )])
253
- : Either [Array [Any ], Iterator [Any ]] = {
309
+ : Either [Array [Any ], ( Iterator [Any ], Boolean ) ] = {
254
310
255
311
// Number of elements unrolled so far
256
312
var elementsUnrolled = 0
@@ -281,11 +337,18 @@ private[spark] class MemoryStore(blockManager: BlockManager, memoryManager: Memo
281
337
282
338
// Unroll this block safely, checking whether we have exceeded our threshold periodically
283
339
try {
284
- while (values.hasNext && keepUnrolling) {
340
+ var currentSize = 0L
341
+ var shouldCache = true
342
+ while (values.hasNext && keepUnrolling && (csdCacheBlockSizeLimit <= 0 || shouldCache)) {
285
343
vector += values.next()
286
344
if (elementsUnrolled % memoryCheckPeriod == 0 ) {
287
345
// If our vector's size has exceeded the threshold, request more memory
288
- val currentSize = vector.estimateSize()
346
+ currentSize = vector.estimateSize()
347
+
348
+ if (csdCacheBlockSizeLimit > 0 && shouldCache && currentSize > csdCacheBlockSizeLimit) {
349
+ shouldCache = false
350
+ }
351
+
289
352
if (currentSize >= memoryThreshold) {
290
353
val amountToRequest = (currentSize * memoryGrowthFactor - memoryThreshold).toLong
291
354
keepUnrolling = reserveUnrollMemoryForThisTask(
@@ -300,13 +363,25 @@ private[spark] class MemoryStore(blockManager: BlockManager, memoryManager: Memo
300
363
elementsUnrolled += 1
301
364
}
302
365
303
- if (keepUnrolling) {
366
+ if (keepUnrolling && shouldCache ) {
304
367
// We successfully unrolled the entirety of this block
305
368
Left (vector.toArray)
306
369
} else {
307
- // We ran out of space while unrolling the values for this block
308
- logUnrollFailureMessage(blockId, vector.estimateSize())
309
- Right (vector.iterator ++ values)
370
+ if (! shouldCache) {
371
+ logBlockSizeLimitMessage(blockId, currentSize)
372
+ Right (vector.iterator ++ values, shouldCache)
373
+ } else {
374
+ // could be false positive because we have not seen enough of the values
375
+ // continue the fetching using memory from user
376
+ shouldCache = fetchUntilCsdBlockSizeLimit(blockId, values, vector)
377
+ if (! shouldCache) {
378
+ logBlockSizeLimitMessage(blockId, vector.estimateSize())
379
+ } else {
380
+ // We ran out of space while unrolling the values for this block
381
+ logUnrollFailureMessage(blockId, vector.estimateSize())
382
+ }
383
+ Right (vector.iterator ++ values, shouldCache)
384
+ }
310
385
}
311
386
312
387
} finally {
@@ -583,4 +658,12 @@ private[spark] class MemoryStore(blockManager: BlockManager, memoryManager: Memo
583
658
)
584
659
logMemoryUsage()
585
660
}
661
+
662
+ private def logBlockSizeLimitMessage (blockId : BlockId , currentSize : Long ): Unit = {
663
+ logWarning(
664
+ s " Block size limit reached: $blockId! " +
665
+ s " (computed ${Utils .bytesToString(currentSize)} so far) "
666
+ )
667
+ logMemoryUsage()
668
+ }
586
669
}
0 commit comments