|
17 | 17 |
|
18 | 18 | package org.apache.spark.sql.execution
|
19 | 19 |
|
20 |
| -import java.io.{DataOutputStream, ByteArrayInputStream, ByteArrayOutputStream} |
| 20 | +import java.io.{File, DataOutputStream, ByteArrayInputStream, ByteArrayOutputStream} |
21 | 21 |
|
22 |
| -import org.apache.spark.SparkFunSuite |
| 22 | +import org.apache.spark.executor.ShuffleWriteMetrics |
| 23 | +import org.apache.spark.storage.ShuffleBlockId |
| 24 | +import org.apache.spark.util.collection.ExternalSorter |
23 | 25 | import org.apache.spark.sql.Row
|
24 | 26 | import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
|
25 | 27 | import org.apache.spark.sql.catalyst.expressions.{UnsafeProjection, UnsafeRow}
|
26 | 28 | import org.apache.spark.sql.types._
|
| 29 | +import org.apache.spark._ |
27 | 30 |
|
28 | 31 |
|
29 | 32 | /**
|
@@ -87,4 +90,42 @@ class UnsafeRowSerializerSuite extends SparkFunSuite {
|
87 | 90 | assert(!deserializerIter.hasNext)
|
88 | 91 | assert(input.closed)
|
89 | 92 | }
|
| 93 | + |
| 94 | + test("SPARK-10466: external sorter spilling with unsafe row serializer") { |
| 95 | + val conf = new SparkConf() |
| 96 | + .set("spark.shuffle.spill.initialMemoryThreshold", "1024") |
| 97 | + .set("spark.shuffle.sort.bypassMergeThreshold", "0") |
| 98 | + .set("spark.shuffle.memoryFraction", "0.0001") |
| 99 | + var sc: SparkContext = null |
| 100 | + var outputFile: File = null |
| 101 | + try { |
| 102 | + sc = new SparkContext("local", "test", conf) |
| 103 | + outputFile = File.createTempFile("test-unsafe-row-serializer-spill", "") |
| 104 | + val data = (1 to 1000).iterator.map { i => |
| 105 | + (i, toUnsafeRow(Row(i), Array(IntegerType))) |
| 106 | + } |
| 107 | + val sorter = new ExternalSorter[Int, UnsafeRow, UnsafeRow]( |
| 108 | + partitioner = Some(new HashPartitioner(10)), |
| 109 | + serializer = Some(new UnsafeRowSerializer(numFields = 1))) |
| 110 | + |
| 111 | + // Ensure we spilled something and have to merge them later |
| 112 | + assert(sorter.numSpills === 0) |
| 113 | + sorter.insertAll(data) |
| 114 | + assert(sorter.numSpills > 0) |
| 115 | + |
| 116 | + // Merging spilled files should not throw assertion error |
| 117 | + val taskContext = new TaskContextImpl(0, 0, 0, 0, null, null, InternalAccumulator.create(sc)) |
| 118 | + taskContext.taskMetrics.shuffleWriteMetrics = Some(new ShuffleWriteMetrics) |
| 119 | + sorter.writePartitionedFile(ShuffleBlockId(0, 0, 0), taskContext, outputFile) |
| 120 | + |
| 121 | + } finally { |
| 122 | + // Clean up |
| 123 | + if (sc != null) { |
| 124 | + sc.stop() |
| 125 | + } |
| 126 | + if (outputFile != null) { |
| 127 | + outputFile.delete() |
| 128 | + } |
| 129 | + } |
| 130 | + } |
90 | 131 | }
|
0 commit comments