jhartlaub
diff --git a/‎core/src/main/java/org/apache/spark/network/netty/FileClientHandler.java
Lines changed: 2 additions & 1 deletion b/‎core/src/main/java/org/apache/spark/network/netty/FileClientHandler.java
Lines changed: 2 additions & 1 deletion
diff --git a/‎core/src/main/java/org/apache/spark/network/netty/FileServerHandler.java
Lines changed: 4 additions & 2 deletions b/‎core/src/main/java/org/apache/spark/network/netty/FileServerHandler.java
Lines changed: 4 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/BlockStoreShuffleFetcher.scala
Lines changed: 5 additions & 6 deletions b/‎core/src/main/scala/org/apache/spark/BlockStoreShuffleFetcher.scala
Lines changed: 5 additions & 6 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/CacheManager.scala
Lines changed: 4 additions & 4 deletions b/‎core/src/main/scala/org/apache/spark/CacheManager.scala
Lines changed: 4 additions & 4 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/broadcast/BitTorrentBroadcast.scala
Lines changed: 2 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/broadcast/BitTorrentBroadcast.scala
Lines changed: 2 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala
Lines changed: 5 additions & 6 deletions b/‎core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala
Lines changed: 5 additions & 6 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/broadcast/TreeBroadcast.scala
Lines changed: 3 additions & 5 deletions b/‎core/src/main/scala/org/apache/spark/broadcast/TreeBroadcast.scala
Lines changed: 3 additions & 5 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/executor/Executor.scala
Lines changed: 2 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/executor/Executor.scala
Lines changed: 2 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/network/netty/FileHeader.scala
Lines changed: 10 additions & 12 deletions b/‎core/src/main/scala/org/apache/spark/network/netty/FileHeader.scala
Lines changed: 10 additions & 12 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/network/netty/ShuffleCopier.scala
Lines changed: 14 additions & 13 deletions b/‎core/src/main/scala/org/apache/spark/network/netty/ShuffleCopier.scala
Lines changed: 14 additions & 13 deletions
@@ -21,6 +21,7 @@
 import io.netty.channel.ChannelHandlerContext;
 import io.netty.channel.ChannelInboundByteHandlerAdapter;
 
+import org.apache.spark.storage.BlockId;
 
 abstract class FileClientHandler extends ChannelInboundByteHandlerAdapter {
 
@@ -33,7 +34,7 @@ public boolean isComplete() {
   }
 
   public abstract void handle(ChannelHandlerContext ctx, ByteBuf in, FileHeader header);
-  public abstract void handleError(String blockId);
+  public abstract void handleError(BlockId blockId);
 
   @Override
   public ByteBuf newInboundBuffer(ChannelHandlerContext ctx) {
 
@@ -24,6 +24,7 @@
 import io.netty.channel.ChannelInboundMessageHandlerAdapter;
 import io.netty.channel.DefaultFileRegion;
 
+import org.apache.spark.storage.BlockId;
 
 class FileServerHandler extends ChannelInboundMessageHandlerAdapter<String> {
 
@@ -34,8 +35,9 @@ public FileServerHandler(PathResolver pResolver){
   }
 
   @Override
-  public void messageReceived(ChannelHandlerContext ctx, String blockId) {
-    String path = pResolver.getAbsolutePath(blockId);
+  public void messageReceived(ChannelHandlerContext ctx, String blockIdString) {
+    BlockId blockId = BlockId.apply(blockIdString);
+    String path = pResolver.getAbsolutePath(blockId.name());
     // if getFilePath returns null, close the channel
     if (path == null) {
       //ctx.close();
 
@@ -22,7 +22,7 @@ import scala.collection.mutable.HashMap
 
 import org.apache.spark.executor.{ShuffleReadMetrics, TaskMetrics}
 import org.apache.spark.serializer.Serializer
-import org.apache.spark.storage.BlockManagerId
+import org.apache.spark.storage.{BlockId, BlockManagerId, ShuffleBlockId}
 import org.apache.spark.util.CompletionIterator
 
 
@@ -45,22 +45,21 @@ private[spark] class BlockStoreShuffleFetcher extends ShuffleFetcher with Loggin
       splitsByAddress.getOrElseUpdate(address, ArrayBuffer()) += ((index, size))
     }
 
-    val blocksByAddress: Seq[(BlockManagerId, Seq[(String, Long)])] = splitsByAddress.toSeq.map {
+    val blocksByAddress: Seq[(BlockManagerId, Seq[(BlockId, Long)])] = splitsByAddress.toSeq.map {
       case (address, splits) =>
-        (address, splits.map(s => ("shuffle_%d_%d_%d".format(shuffleId, s._1, reduceId), s._2)))
+        (address, splits.map(s => (ShuffleBlockId(shuffleId, s._1, reduceId), s._2)))
     }
 
-    def unpackBlock(blockPair: (String, Option[Iterator[Any]])) : Iterator[T] = {
+    def unpackBlock(blockPair: (BlockId, Option[Iterator[Any]])) : Iterator[T] = {
       val blockId = blockPair._1
       val blockOption = blockPair._2
       blockOption match {
         case Some(block) => {
           block.asInstanceOf[Iterator[T]]
         }
         case None => {
-          val regex = "shuffle_([0-9]*)_([0-9]*)_([0-9]*)".r
           blockId match {
-            case regex(shufId, mapId, _) =>
+            case ShuffleBlockId(shufId, mapId, _) =>
               val address = statuses(mapId.toInt)._1
               throw new FetchFailedException(address, shufId.toInt, mapId.toInt, reduceId, null)
             case _ =>
 
@@ -18,7 +18,7 @@
 package org.apache.spark
 
 import scala.collection.mutable.{ArrayBuffer, HashSet}
-import org.apache.spark.storage.{BlockManager, StorageLevel}
+import org.apache.spark.storage.{BlockId, BlockManager, StorageLevel, RDDBlockId}
 import org.apache.spark.rdd.RDD
 
 
@@ -28,12 +28,12 @@ import org.apache.spark.rdd.RDD
 private[spark] class CacheManager(blockManager: BlockManager) extends Logging {
 
   /** Keys of RDD splits that are being computed/loaded. */
-  private val loading = new HashSet[String]()
+  private val loading = new HashSet[RDDBlockId]()
 
   /** Gets or computes an RDD split. Used by RDD.iterator() when an RDD is cached. */
   def getOrCompute[T](rdd: RDD[T], split: Partition, context: TaskContext, storageLevel: StorageLevel)
       : Iterator[T] = {
-    val key = "rdd_%d_%d".format(rdd.id, split.index)
+    val key = RDDBlockId(rdd.id, split.index)
     logDebug("Looking for partition " + key)
     blockManager.get(key) match {
       case Some(values) =>
@@ -73,7 +73,7 @@ private[spark] class CacheManager(blockManager: BlockManager) extends Logging {
           if (context.runningLocally) { return computedValues }
           val elements = new ArrayBuffer[Any]
           elements ++= computedValues
-          blockManager.put(key, elements, storageLevel, true)
+          blockManager.put(key, elements, storageLevel, tellMaster = true)
           return elements.iterator.asInstanceOf[Iterator[T]]
         } finally {
           loading.synchronized {
 
@@ -26,7 +26,7 @@ import scala.collection.mutable.{ListBuffer, Map, Set}
 import scala.math
 
 import org.apache.spark._
-import org.apache.spark.storage.{BlockManager, StorageLevel}
+import org.apache.spark.storage.{BroadcastBlockId, StorageLevel}
 import org.apache.spark.util.Utils
 
 private[spark] class BitTorrentBroadcast[T](@transient var value_ : T, isLocal: Boolean, id: Long)
@@ -36,7 +36,7 @@ private[spark] class BitTorrentBroadcast[T](@transient var value_ : T, isLocal:
 
   def value = value_
 
-  def blockId: String = BlockManager.toBroadcastId(id)
+  def blockId = BroadcastBlockId(id)
 
   MultiTracker.synchronized {
     SparkEnv.get.blockManager.putSingle(blockId, value_, StorageLevel.MEMORY_AND_DISK, false)
 
@@ -25,16 +25,15 @@ import it.unimi.dsi.fastutil.io.FastBufferedOutputStream
 
 import org.apache.spark.{HttpServer, Logging, SparkEnv}
 import org.apache.spark.io.CompressionCodec
-import org.apache.spark.storage.{BlockManager, StorageLevel}
-import org.apache.spark.util.{MetadataCleanerType, Utils, MetadataCleaner, TimeStampedHashSet}
-
+import org.apache.spark.storage.{BroadcastBlockId, StorageLevel}
+import org.apache.spark.util.{MetadataCleaner, MetadataCleanerType, TimeStampedHashSet, Utils}
 
 private[spark] class HttpBroadcast[T](@transient var value_ : T, isLocal: Boolean, id: Long)
   extends Broadcast[T](id) with Logging with Serializable {
 
   def value = value_
 
-  def blockId: String = BlockManager.toBroadcastId(id)
+  def blockId = BroadcastBlockId(id)
 
   HttpBroadcast.synchronized {
     SparkEnv.get.blockManager.putSingle(blockId, value_, StorageLevel.MEMORY_AND_DISK, false)
@@ -121,7 +120,7 @@ private object HttpBroadcast extends Logging {
   }
 
   def write(id: Long, value: Any) {
-    val file = new File(broadcastDir, "broadcast-" + id)
+    val file = new File(broadcastDir, BroadcastBlockId(id).name)
     val out: OutputStream = {
       if (compress) {
         compressionCodec.compressedOutputStream(new FileOutputStream(file))
@@ -137,7 +136,7 @@ private object HttpBroadcast extends Logging {
   }
 
   def read[T](id: Long): T = {
-    val url = serverUri + "/broadcast-" + id
+    val url = serverUri + "/" + BroadcastBlockId(id).name
     val in = {
       if (compress) {
         compressionCodec.compressedInputStream(new URL(url).openStream())
 
@@ -19,21 +19,19 @@ package org.apache.spark.broadcast
 
 import java.io._
 import java.net._
-import java.util.{Comparator, Random, UUID}
 
-import scala.collection.mutable.{ListBuffer, Map, Set}
-import scala.math
+import scala.collection.mutable.{ListBuffer, Set}
 
 import org.apache.spark._
-import org.apache.spark.storage.{BlockManager, StorageLevel}
+import org.apache.spark.storage.{BroadcastBlockId, StorageLevel}
 import org.apache.spark.util.Utils
 
 private[spark] class TreeBroadcast[T](@transient var value_ : T, isLocal: Boolean, id: Long)
 extends Broadcast[T](id) with Logging with Serializable {
 
   def value = value_
 
-  def blockId = BlockManager.toBroadcastId(id)
+  def blockId = BroadcastBlockId(id)
 
   MultiTracker.synchronized {
     SparkEnv.get.blockManager.putSingle(blockId, value_, StorageLevel.MEMORY_AND_DISK, false)
 
@@ -27,7 +27,7 @@ import scala.collection.mutable.HashMap
 
 import org.apache.spark.scheduler._
 import org.apache.spark._
-import org.apache.spark.storage.StorageLevel
+import org.apache.spark.storage.{StorageLevel, TaskResultBlockId}
 import org.apache.spark.util.Utils
 
 /**
@@ -173,7 +173,7 @@ private[spark] class Executor(
         val serializedResult = {
           if (serializedDirectResult.limit >= akkaFrameSize - 1024) {
             logInfo("Storing result for " + taskId + " in local BlockManager")
-            val blockId = "taskresult_" + taskId
+            val blockId = TaskResultBlockId(taskId)
             env.blockManager.putBytes(
               blockId, serializedDirectResult, StorageLevel.MEMORY_AND_DISK_SER)
             ser.serialize(new IndirectTaskResult[Any](blockId))
 
@@ -20,17 +20,18 @@ package org.apache.spark.network.netty
 import io.netty.buffer._
 
 import org.apache.spark.Logging
+import org.apache.spark.storage.{TestBlockId, BlockId}
 
 private[spark] class FileHeader (
   val fileLen: Int,
-  val blockId: String) extends Logging {
+  val blockId: BlockId) extends Logging {
 
   lazy val buffer = {
     val buf = Unpooled.buffer()
     buf.capacity(FileHeader.HEADER_SIZE)
     buf.writeInt(fileLen)
-    buf.writeInt(blockId.length)
-    blockId.foreach((x: Char) => buf.writeByte(x))
+    buf.writeInt(blockId.name.length)
+    blockId.name.foreach((x: Char) => buf.writeByte(x))
     //padding the rest of header
     if (FileHeader.HEADER_SIZE - buf.readableBytes > 0 ) {
       buf.writeZero(FileHeader.HEADER_SIZE - buf.readableBytes)
@@ -57,18 +58,15 @@ private[spark] object FileHeader {
     for (i <- 1 to idLength) {
       idBuilder += buf.readByte().asInstanceOf[Char]
     }
-    val blockId = idBuilder.toString()
+    val blockId = BlockId(idBuilder.toString())
     new FileHeader(length, blockId)
   }
 
-
-  def main (args:Array[String]){
-
-    val header = new FileHeader(25,"block_0");
-    val buf = header.buffer;
-    val newheader = FileHeader.create(buf);
-    System.out.println("id="+newheader.blockId+",size="+newheader.fileLen)
-
+  def main (args:Array[String]) {
+    val header = new FileHeader(25, TestBlockId("my_block"))
+    val buf = header.buffer
+    val newHeader = FileHeader.create(buf)
+    System.out.println("id=" + newHeader.blockId + ",size=" + newHeader.fileLen)
   }
 }
 
@@ -27,12 +27,13 @@ import org.apache.spark.Logging
 import org.apache.spark.network.ConnectionManagerId
 
 import scala.collection.JavaConverters._
+import org.apache.spark.storage.BlockId
 
 
 private[spark] class ShuffleCopier extends Logging {
 
-  def getBlock(host: String, port: Int, blockId: String,
-      resultCollectCallback: (String, Long, ByteBuf) => Unit) {
+  def getBlock(host: String, port: Int, blockId: BlockId,
+      resultCollectCallback: (BlockId, Long, ByteBuf) => Unit) {
 
     val handler = new ShuffleCopier.ShuffleClientHandler(resultCollectCallback)
     val connectTimeout = System.getProperty("spark.shuffle.netty.connect.timeout", "60000").toInt
@@ -41,7 +42,7 @@ private[spark] class ShuffleCopier extends Logging {
     try {
       fc.init()
       fc.connect(host, port)
-      fc.sendRequest(blockId)
+      fc.sendRequest(blockId.name)
       fc.waitForClose()
       fc.close()
     } catch {
@@ -53,14 +54,14 @@ private[spark] class ShuffleCopier extends Logging {
     }
   }
 
-  def getBlock(cmId: ConnectionManagerId, blockId: String,
-      resultCollectCallback: (String, Long, ByteBuf) => Unit) {
+  def getBlock(cmId: ConnectionManagerId, blockId: BlockId,
+      resultCollectCallback: (BlockId, Long, ByteBuf) => Unit) {
     getBlock(cmId.host, cmId.port, blockId, resultCollectCallback)
   }
 
   def getBlocks(cmId: ConnectionManagerId,
-    blocks: Seq[(String, Long)],
-    resultCollectCallback: (String, Long, ByteBuf) => Unit) {
+    blocks: Seq[(BlockId, Long)],
+    resultCollectCallback: (BlockId, Long, ByteBuf) => Unit) {
 
     for ((blockId, size) <- blocks) {
       getBlock(cmId, blockId, resultCollectCallback)
@@ -71,22 +72,22 @@ private[spark] class ShuffleCopier extends Logging {
 
 private[spark] object ShuffleCopier extends Logging {
 
-  private class ShuffleClientHandler(resultCollectCallBack: (String, Long, ByteBuf) => Unit)
+  private class ShuffleClientHandler(resultCollectCallBack: (BlockId, Long, ByteBuf) => Unit)
     extends FileClientHandler with Logging {
 
     override def handle(ctx: ChannelHandlerContext, in: ByteBuf, header: FileHeader) {
       logDebug("Received Block: " + header.blockId + " (" + header.fileLen + "B)");
       resultCollectCallBack(header.blockId, header.fileLen.toLong, in.readBytes(header.fileLen))
     }
 
-    override def handleError(blockId: String) {
+    override def handleError(blockId: BlockId) {
       if (!isComplete) {
         resultCollectCallBack(blockId, -1, null)
       }
     }
   }
 
-  def echoResultCollectCallBack(blockId: String, size: Long, content: ByteBuf) {
+  def echoResultCollectCallBack(blockId: BlockId, size: Long, content: ByteBuf) {
     if (size != -1) {
       logInfo("File: " + blockId + " content is : \" " + content.toString(CharsetUtil.UTF_8) + "\"")
     }
@@ -99,20 +100,20 @@ private[spark] object ShuffleCopier extends Logging {
     }
     val host = args(0)
     val port = args(1).toInt
-    val file = args(2)
+    val blockId = BlockId(args(2))
     val threads = if (args.length > 3) args(3).toInt else 10
 
     val copiers = Executors.newFixedThreadPool(80)
     val tasks = (for (i <- Range(0, threads)) yield { 
       Executors.callable(new Runnable() {
         def run() {
           val copier = new ShuffleCopier()
-          copier.getBlock(host, port, file, echoResultCollectCallBack)
+          copier.getBlock(host, port, blockId, echoResultCollectCallBack)
         }
       })
     }).asJava
     copiers.invokeAll(tasks)
-    copiers.shutdown
+    copiers.shutdown()
     System.exit(0)
   }
 }