Skip to content

Commit 104b8d8

Browse files
committed
[SPARK-2503] Lower shuffle output buffer (spark.shuffle.file.buffer.kb) to 32KB.
This can substantially reduce memory usage during shuffle.
1 parent 05bf4e4 commit 104b8d8

File tree

5 files changed

+5
-5
lines changed

5 files changed

+5
-5
lines changed

core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ private[spark] class SortShuffleWriter[K, V, C](
4040
private val ser = Serializer.getSerializer(dep.serializer.orNull)
4141

4242
private val conf = SparkEnv.get.conf
43-
private val fileBufferSize = conf.getInt("spark.shuffle.file.buffer.kb", 100) * 1024
43+
private val fileBufferSize = conf.getInt("spark.shuffle.file.buffer.kb", 32) * 1024
4444

4545
private var sorter: ExternalSorter[K, V, _] = null
4646
private var outputFile: File = null

core/src/main/scala/org/apache/spark/storage/ShuffleBlockManager.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ class ShuffleBlockManager(blockManager: BlockManager) extends Logging {
7373
val sortBasedShuffle =
7474
conf.get("spark.shuffle.manager", "") == classOf[SortShuffleManager].getName
7575

76-
private val bufferSize = conf.getInt("spark.shuffle.file.buffer.kb", 100) * 1024
76+
private val bufferSize = conf.getInt("spark.shuffle.file.buffer.kb", 32) * 1024
7777

7878
/**
7979
* Contains all the state related to a particular shuffle. This includes a pool of unused

core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ class ExternalAppendOnlyMap[K, V, C](
107107
private var _memoryBytesSpilled = 0L
108108
private var _diskBytesSpilled = 0L
109109

110-
private val fileBufferSize = sparkConf.getInt("spark.shuffle.file.buffer.kb", 100) * 1024
110+
private val fileBufferSize = sparkConf.getInt("spark.shuffle.file.buffer.kb", 32) * 1024
111111
private val keyComparator = new HashComparator[K]
112112
private val ser = serializer.newInstance()
113113

core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ private[spark] class ExternalSorter[K, V, C](
8383

8484
private val conf = SparkEnv.get.conf
8585
private val spillingEnabled = conf.getBoolean("spark.shuffle.spill", true)
86-
private val fileBufferSize = conf.getInt("spark.shuffle.file.buffer.kb", 100) * 1024
86+
private val fileBufferSize = conf.getInt("spark.shuffle.file.buffer.kb", 32) * 1024
8787

8888
// Size of object batches when reading/writing from serializers.
8989
//

docs/configuration.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ Apart from these, the following properties are also available, and may be useful
258258
</tr>
259259
<tr>
260260
<td><code>spark.shuffle.file.buffer.kb</code></td>
261-
<td>100</td>
261+
<td>32</td>
262262
<td>
263263
Size of the in-memory buffer for each shuffle file output stream, in kilobytes. These buffers
264264
reduce the number of disk seeks and system calls made in creating intermediate shuffle files.

0 commit comments

Comments
 (0)