apache
diff --git a/‎core/src/main/java/org/apache/spark/TaskContext.java
Lines changed: 22 additions & 2 deletions b/‎core/src/main/java/org/apache/spark/TaskContext.java
Lines changed: 22 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/TaskContextImpl.scala
Lines changed: 7 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/TaskContextImpl.scala
Lines changed: 7 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
Lines changed: 5 additions & 0 deletions b/‎core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
Lines changed: 5 additions & 0 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
Lines changed: 2 additions & 1 deletion b/‎core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
Lines changed: 2 additions & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/executor/Executor.scala
Lines changed: 13 additions & 4 deletions b/‎core/src/main/scala/org/apache/spark/executor/Executor.scala
Lines changed: 13 additions & 4 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
Lines changed: 4 additions & 1 deletion b/‎core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
Lines changed: 4 additions & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala
Lines changed: 3 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala
Lines changed: 3 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
Lines changed: 1 addition & 1 deletion b/‎core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
Lines changed: 3 additions & 6 deletions b/‎core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
Lines changed: 3 additions & 6 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
Lines changed: 2 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
Lines changed: 2 additions & 2 deletions
@@ -62,7 +62,7 @@ static void unset() {
    */
   public abstract boolean isInterrupted();
 
-  /** @deprecated: use isRunningLocally() */
+  /** @deprecated use {@link #isRunningLocally()} */
   @Deprecated
   public abstract boolean runningLocally();
 
@@ -87,19 +87,39 @@ static void unset() {
    * is for HadoopRDD to register a callback to close the input stream.
    * Will be called in any situation - success, failure, or cancellation.
    *
-   * @deprecated: use addTaskCompletionListener
+   * @deprecated use {@link #addTaskCompletionListener(scala.Function1)}
    *
    * @param f Callback function.
    */
   @Deprecated
   public abstract void addOnCompleteCallback(final Function0<Unit> f);
 
+  /**
+   * The ID of the stage that this task belong to.
+   */
   public abstract int stageId();
 
+  /**
+   * The ID of the RDD partition that is computed by this task.
+   */
   public abstract int partitionId();
 
+  /**
+   * How many times this task has been attempted.  The first task attempt will be assigned
+   * attemptNumber = 0, and subsequent attempts will have increasing attempt numbers.
+   */
+  public abstract int attemptNumber();
+
+  /** @deprecated use {@link #taskAttemptId()}; it was renamed to avoid ambiguity. */
+  @Deprecated
   public abstract long attemptId();
 
+  /**
+   * An ID that is unique to this task attempt (within the same SparkContext, no two task attempts
+   * will share the same attempt ID).  This is roughly equivalent to Hadoop's TaskAttemptID.
+   */
+  public abstract long taskAttemptId();
+
   /** ::DeveloperApi:: */
   @DeveloperApi
   public abstract TaskMetrics taskMetrics();
 
@@ -22,14 +22,19 @@ import org.apache.spark.util.{TaskCompletionListener, TaskCompletionListenerExce
 
 import scala.collection.mutable.ArrayBuffer
 
-private[spark] class TaskContextImpl(val stageId: Int,
+private[spark] class TaskContextImpl(
+    val stageId: Int,
     val partitionId: Int,
-    val attemptId: Long,
+    override val taskAttemptId: Long,
+    override val attemptNumber: Int,
     val runningLocally: Boolean = false,
     val taskMetrics: TaskMetrics = TaskMetrics.empty)
   extends TaskContext
   with Logging {
 
+  // For backwards-compatibility; this method is now deprecated as of 1.3.0.
+  override def attemptId: Long = taskAttemptId
+
   // List of callback functions to execute when the task completes.
   @transient private val onCompleteCallbacks = new ArrayBuffer[TaskCompletionListener]
 
 
@@ -149,6 +149,11 @@ private[spark] class SparkSubmitArguments(args: Seq[String], env: Map[String, St
     // Global defaults. These should be keep to minimum to avoid confusing behavior.
     master = Option(master).getOrElse("local[*]")
 
+    // In YARN mode, app name can be set via SPARK_YARN_APP_NAME (see SPARK-5222)
+    if (master.startsWith("yarn")) {
+      name = Option(name).orElse(env.get("SPARK_YARN_APP_NAME")).orNull
+    }
+
     // Set name from main class if not given
     name = Option(name).orElse(Option(mainClass)).orNull
     if (name == null && primaryResource != null) {
 
@@ -71,7 +71,8 @@ private[spark] class CoarseGrainedExecutorBackend(
         val ser = env.closureSerializer.newInstance()
         val taskDesc = ser.deserialize[TaskDescription](data.value)
         logInfo("Got assigned task " + taskDesc.taskId)
-        executor.launchTask(this, taskDesc.taskId, taskDesc.name, taskDesc.serializedTask)
+        executor.launchTask(this, taskId = taskDesc.taskId, attemptNumber = taskDesc.attemptNumber,
+          taskDesc.name, taskDesc.serializedTask)
       }
 
     case KillTask(taskId, _, interruptThread) =>
 
@@ -108,8 +108,13 @@ private[spark] class Executor(
   startDriverHeartbeater()
 
   def launchTask(
-      context: ExecutorBackend, taskId: Long, taskName: String, serializedTask: ByteBuffer) {
-    val tr = new TaskRunner(context, taskId, taskName, serializedTask)
+      context: ExecutorBackend,
+      taskId: Long,
+      attemptNumber: Int,
+      taskName: String,
+      serializedTask: ByteBuffer) {
+    val tr = new TaskRunner(context, taskId = taskId, attemptNumber = attemptNumber, taskName,
+      serializedTask)
     runningTasks.put(taskId, tr)
     threadPool.execute(tr)
   }
@@ -134,7 +139,11 @@ private[spark] class Executor(
   private def gcTime = ManagementFactory.getGarbageCollectorMXBeans.map(_.getCollectionTime).sum
 
   class TaskRunner(
-      execBackend: ExecutorBackend, val taskId: Long, taskName: String, serializedTask: ByteBuffer)
+      execBackend: ExecutorBackend,
+      val taskId: Long,
+      val attemptNumber: Int,
+      taskName: String,
+      serializedTask: ByteBuffer)
     extends Runnable {
 
     @volatile private var killed = false
@@ -180,7 +189,7 @@ private[spark] class Executor(
 
         // Run the actual task and measure its runtime.
         taskStart = System.currentTimeMillis()
-        val value = task.run(taskId.toInt)
+        val value = task.run(taskAttemptId = taskId, attemptNumber = attemptNumber)
         val taskFinish = System.currentTimeMillis()
 
         // If the task has been killed, let's fail it.
 
@@ -28,6 +28,7 @@ import org.apache.mesos.Protos.{TaskStatus => MesosTaskStatus, _}
 import org.apache.spark.{Logging, TaskState, SparkConf, SparkEnv}
 import org.apache.spark.TaskState.TaskState
 import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.scheduler.cluster.mesos.{MesosTaskLaunchData}
 import org.apache.spark.util.{SignalLogger, Utils}
 
 private[spark] class MesosExecutorBackend
@@ -77,11 +78,13 @@ private[spark] class MesosExecutorBackend
 
   override def launchTask(d: ExecutorDriver, taskInfo: TaskInfo) {
     val taskId = taskInfo.getTaskId.getValue.toLong
+    val taskData = MesosTaskLaunchData.fromByteString(taskInfo.getData)
     if (executor == null) {
       logError("Received launchTask but executor was null")
     } else {
       SparkHadoopUtil.get.runAsSparkUser { () =>
-        executor.launchTask(this, taskId, taskInfo.getName, taskInfo.getData.asReadOnlyByteBuffer)
+        executor.launchTask(this, taskId = taskId, attemptNumber = taskData.attemptNumber,
+          taskInfo.getName, taskData.serializedTask)
       }
     }
   }
 
@@ -95,7 +95,8 @@ private[spark] object CheckpointRDD extends Logging {
 
     val finalOutputName = splitIdToFile(ctx.partitionId)
     val finalOutputPath = new Path(outputDir, finalOutputName)
-    val tempOutputPath = new Path(outputDir, "." + finalOutputName + "-attempt-" + ctx.attemptId)
+    val tempOutputPath =
+      new Path(outputDir, "." + finalOutputName + "-attempt-" + ctx.attemptNumber)
 
     if (fs.exists(tempOutputPath)) {
       throw new IOException("Checkpoint failed: temporary path " +
@@ -119,7 +120,7 @@ private[spark] object CheckpointRDD extends Logging {
         logInfo("Deleting tempOutputPath " + tempOutputPath)
         fs.delete(tempOutputPath, false)
         throw new IOException("Checkpoint failed: failed to save output of task: "
-          + ctx.attemptId + " and final output path does not exist")
+          + ctx.attemptNumber + " and final output path does not exist")
       } else {
         // Some other copy of this task must've finished before us and renamed it
         logInfo("Final output path " + finalOutputPath + " already exists; not overwriting it")
 
@@ -229,7 +229,7 @@ class HadoopRDD[K, V](
       var reader: RecordReader[K, V] = null
       val inputFormat = getInputFormat(jobConf)
       HadoopRDD.addLocalConfiguration(new SimpleDateFormat("yyyyMMddHHmm").format(createTime),
-        context.stageId, theSplit.index, context.attemptId.toInt, jobConf)
+        context.stageId, theSplit.index, context.attemptNumber, jobConf)
       reader = inputFormat.getRecordReader(split.inputSplit.value, jobConf, Reporter.NULL)
 
       // Register an on-task-completion callback to close the input stream.
 
@@ -978,12 +978,9 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
 
     val writeShard = (context: TaskContext, iter: Iterator[(K,V)]) => {
       val config = wrappedConf.value
-      // Hadoop wants a 32-bit task attempt ID, so if ours is bigger than Int.MaxValue, roll it
-      // around by taking a mod. We expect that no task will be attempted 2 billion times.
-      val attemptNumber = (context.attemptId % Int.MaxValue).toInt
       /* "reduce task" <split #> <attempt # = spark task #> */
       val attemptId = newTaskAttemptID(jobtrackerID, stageId, isMap = false, context.partitionId,
-        attemptNumber)
+        context.attemptNumber)
       val hadoopContext = newTaskAttemptContext(config, attemptId)
       val format = outfmt.newInstance
       format match {
@@ -1062,11 +1059,11 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
       val config = wrappedConf.value
       // Hadoop wants a 32-bit task attempt ID, so if ours is bigger than Int.MaxValue, roll it
       // around by taking a mod. We expect that no task will be attempted 2 billion times.
-      val attemptNumber = (context.attemptId % Int.MaxValue).toInt
+      val taskAttemptId = (context.taskAttemptId % Int.MaxValue).toInt
 
       val (outputMetrics, bytesWrittenCallback) = initHadoopOutputMetrics(context, config)
 
-      writer.setup(context.stageId, context.partitionId, attemptNumber)
+      writer.setup(context.stageId, context.partitionId, taskAttemptId)
       writer.open()
       try {
         var recordsWritten = 0L
 
@@ -635,8 +635,8 @@ class DAGScheduler(
     try {
       val rdd = job.finalStage.rdd
       val split = rdd.partitions(job.partitions(0))
-      val taskContext =
-        new TaskContextImpl(job.finalStage.id, job.partitions(0), 0, true)
+      val taskContext = new TaskContextImpl(job.finalStage.id, job.partitions(0), taskAttemptId = 0,
+        attemptNumber = 0, runningLocally = true)
       TaskContextHelper.setTaskContext(taskContext)
       try {
         val result = job.func(taskContext, rdd.iterator(split, taskContext))
Original file line number	Diff line number	Diff line change
`@@ -71,7 +71,8 @@ private[spark] class CoarseGrainedExecutorBackend(`
`71`	`71`	`val ser = env.closureSerializer.newInstance()`
`72`	`72`	`val taskDesc = ser.deserialize[TaskDescription](data.value)`
`73`	`73`	`logInfo("Got assigned task " + taskDesc.taskId)`
`74`		`- executor.launchTask(this, taskDesc.taskId, taskDesc.name, taskDesc.serializedTask)`
	`74`	`+ executor.launchTask(this, taskId = taskDesc.taskId, attemptNumber = taskDesc.attemptNumber,`
	`75`	`+ taskDesc.name, taskDesc.serializedTask)`
`75`	`76`	`}`
`76`	`77`
`77`	`78`	`case KillTask(taskId, _, interruptThread) =>`