Skip to content

Commit 0959514

Browse files
committed
Added unit test for having many hash collisions
1 parent 892debb commit 0959514

File tree

1 file changed

+38
-1
lines changed

1 file changed

+38
-1
lines changed

core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,11 @@ class ExternalAppendOnlyMapSuite extends FunSuite with LocalSparkContext {
277277
("pomatoes", "eructation") // 568647356
278278
)
279279

280+
collisionPairs.foreach { case (w1, w2) =>
281+
// String.hashCode is documented to use a specific algorithm, but check just in case
282+
assert(w1.hashCode === w2.hashCode)
283+
}
284+
280285
(1 to 100000).map(_.toString).foreach { i => map.insert(i, i) }
281286
collisionPairs.foreach { case (w1, w2) =>
282287
map.insert(w1, w2)
@@ -296,7 +301,32 @@ class ExternalAppendOnlyMapSuite extends FunSuite with LocalSparkContext {
296301
assert(kv._2.equals(expectedValue))
297302
count += 1
298303
}
299-
assert(count == 100000 + collisionPairs.size * 2)
304+
assert(count === 100000 + collisionPairs.size * 2)
305+
}
306+
307+
test("spilling with many hash collisions") {
308+
val conf = new SparkConf(true)
309+
conf.set("spark.shuffle.memoryFraction", "0.0001")
310+
sc = new SparkContext("local-cluster[1,1,512]", "test", conf)
311+
312+
val map = new ExternalAppendOnlyMap[FixedHashObject, Int, Int](_ => 1, _ + _, _ + _)
313+
314+
// Insert 10 copies each of lots of objects whose hash codes are either 0 or 1. This causes
315+
// problems if the map fails to group together the objects with the same code (SPARK-2043).
316+
for (i <- 1 to 10) {
317+
for (j <- 1 to 10000) {
318+
map.insert(FixedHashObject(j, j % 2), 1)
319+
}
320+
}
321+
322+
val it = map.iterator
323+
var count = 0
324+
while (it.hasNext) {
325+
val kv = it.next()
326+
assert(kv._2 === 10)
327+
count += 1
328+
}
329+
assert(count === 10000)
300330
}
301331

302332
test("spilling with hash collisions using the Int.MaxValue key") {
@@ -317,3 +347,10 @@ class ExternalAppendOnlyMapSuite extends FunSuite with LocalSparkContext {
317347
}
318348
}
319349
}
350+
351+
/**
352+
* A dummy class that always returns the same hash code, to easily test hash collisions
353+
*/
354+
case class FixedHashObject(val v: Int, val h: Int) extends Serializable {
355+
override def hashCode(): Int = h
356+
}

0 commit comments

Comments
 (0)