apache
diff --git a/‎core/pom.xml
Lines changed: 1 addition & 0 deletions b/‎core/pom.xml
Lines changed: 1 addition & 0 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/ExecutorState.scala
Lines changed: 2 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/deploy/ExecutorState.scala
Lines changed: 2 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/master/Master.scala
Lines changed: 3 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/deploy/master/Master.scala
Lines changed: 3 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
Lines changed: 3 additions & 4 deletions b/‎core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
Lines changed: 3 additions & 4 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
Lines changed: 7 additions & 4 deletions b/‎core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
Lines changed: 7 additions & 4 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
Lines changed: 2 additions & 4 deletions b/‎core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
Lines changed: 2 additions & 4 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala
Lines changed: 19 additions & 12 deletions b/‎core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala
Lines changed: 19 additions & 12 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/storage/RDDInfo.scala
Lines changed: 3 additions & 3 deletions b/‎core/src/main/scala/org/apache/spark/storage/RDDInfo.scala
Lines changed: 3 additions & 3 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/storage/StorageUtils.scala
Lines changed: 3 additions & 0 deletions b/‎core/src/main/scala/org/apache/spark/storage/StorageUtils.scala
Lines changed: 3 additions & 0 deletions
diff --git a/‎core/src/test/scala/org/apache/spark/rdd/ParallelCollectionSplitSuite.scala
Lines changed: 18 additions & 0 deletions b/‎core/src/test/scala/org/apache/spark/rdd/ParallelCollectionSplitSuite.scala
Lines changed: 18 additions & 0 deletions
@@ -70,6 +70,7 @@
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-math3</artifactId>
+      <version>3.3</version>
       <scope>test</scope>
     </dependency>
     <dependency>
 
@@ -19,9 +19,9 @@ package org.apache.spark.deploy
 
 private[spark] object ExecutorState extends Enumeration {
 
-  val LAUNCHING, LOADING, RUNNING, KILLED, FAILED, LOST = Value
+  val LAUNCHING, LOADING, RUNNING, KILLED, FAILED, LOST, EXITED = Value
 
   type ExecutorState = Value
 
-  def isFinished(state: ExecutorState): Boolean = Seq(KILLED, FAILED, LOST).contains(state)
+  def isFinished(state: ExecutorState): Boolean = Seq(KILLED, FAILED, LOST, EXITED).contains(state)
 }
@@ -303,10 +303,11 @@ private[spark] class Master(
             appInfo.removeExecutor(exec)
             exec.worker.removeExecutor(exec)
 
+            val normalExit = exitStatus.exists(_ == 0)
             // Only retry certain number of times so we don't go into an infinite loop.
-            if (appInfo.incrementRetryCount < ApplicationState.MAX_NUM_RETRY) {
+            if (!normalExit && appInfo.incrementRetryCount < ApplicationState.MAX_NUM_RETRY) {
               schedule()
-            } else {
+            } else if (!normalExit) {
               logError("Application %s with ID %s failed %d times, removing it".format(
                 appInfo.desc.name, appInfo.id, appInfo.retryCount))
               removeApplication(appInfo, ApplicationState.FAILED)
 
@@ -154,11 +154,10 @@ private[spark] class ExecutorRunner(
       Files.write(header, stderr, Charsets.UTF_8)
       stderrAppender = FileAppender(process.getErrorStream, stderr, conf)
 
-      // Wait for it to exit; this is actually a bad thing if it happens, because we expect to run
-      // long-lived processes only. However, in the future, we might restart the executor a few
-      // times on the same machine.
+      // Wait for it to exit; executor may exit with code 0 (when driver instructs it to shutdown)
+      // or with nonzero exit code
       val exitCode = process.waitFor()
-      state = ExecutorState.FAILED
+      state = ExecutorState.EXITED
       val message = "Command exited with code " + exitCode
       worker ! ExecutorStateChanged(appId, execId, state, Some(message), Some(exitCode))
     } catch {
 
@@ -139,10 +139,13 @@ class HadoopRDD[K, V](
       // Create a JobConf that will be cached and used across this RDD's getJobConf() calls in the
       // local process. The local cache is accessed through HadoopRDD.putCachedMetadata().
       // The caching helps minimize GC, since a JobConf can contain ~10KB of temporary objects.
-      val newJobConf = new JobConf(broadcastedConf.value.value)
-      initLocalJobConfFuncOpt.map(f => f(newJobConf))
-      HadoopRDD.putCachedMetadata(jobConfCacheKey, newJobConf)
-      newJobConf
+      // synchronize to prevent ConcurrentModificationException (Spark-1097, Hadoop-10456)
+      broadcastedConf.synchronized {
+        val newJobConf = new JobConf(broadcastedConf.value.value)
+        initLocalJobConfFuncOpt.map(f => f(newJobConf))
+        HadoopRDD.putCachedMetadata(jobConfCacheKey, newJobConf)
+        newJobConf
+      }
     }
   }
 
 
@@ -787,8 +787,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
     val outfmt = job.getOutputFormatClass
     val jobFormat = outfmt.newInstance
 
-    if (self.conf.getBoolean("spark.hadoop.validateOutputSpecs", true) &&
-      jobFormat.isInstanceOf[NewFileOutputFormat[_, _]]) {
+    if (self.conf.getBoolean("spark.hadoop.validateOutputSpecs", true)) {
       // FileOutputFormat ignores the filesystem parameter
       jobFormat.checkOutputSpecs(job)
     }
@@ -854,8 +853,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
     logDebug("Saving as hadoop file of type (" + keyClass.getSimpleName + ", " +
       valueClass.getSimpleName + ")")
 
-    if (self.conf.getBoolean("spark.hadoop.validateOutputSpecs", true) &&
-      outputFormatInstance.isInstanceOf[FileOutputFormat[_, _]]) {
+    if (self.conf.getBoolean("spark.hadoop.validateOutputSpecs", true)) {
       // FileOutputFormat ignores the filesystem parameter
       val ignoredFs = FileSystem.get(conf)
       conf.getOutputFormat.checkOutputSpecs(ignoredFs, conf)
 
@@ -117,6 +117,15 @@ private object ParallelCollectionRDD {
     if (numSlices < 1) {
       throw new IllegalArgumentException("Positive number of slices required")
     }
+    // Sequences need to be sliced at the same set of index positions for operations
+    // like RDD.zip() to behave as expected
+    def positions(length: Long, numSlices: Int): Iterator[(Int, Int)] = {
+      (0 until numSlices).iterator.map(i => {
+        val start = ((i * length) / numSlices).toInt
+        val end = (((i + 1) * length) / numSlices).toInt
+        (start, end)
+      })
+    }
     seq match {
       case r: Range.Inclusive => {
         val sign = if (r.step < 0) {
@@ -128,30 +137,28 @@ private object ParallelCollectionRDD {
           r.start, r.end + sign, r.step).asInstanceOf[Seq[T]], numSlices)
       }
       case r: Range => {
-        (0 until numSlices).map(i => {
-          val start = ((i * r.length.toLong) / numSlices).toInt
-          val end = (((i + 1) * r.length.toLong) / numSlices).toInt
-          new Range(r.start + start * r.step, r.start + end * r.step, r.step)
-        }).asInstanceOf[Seq[Seq[T]]]
+        positions(r.length, numSlices).map({
+          case (start, end) =>
+            new Range(r.start + start * r.step, r.start + end * r.step, r.step)
+        }).toSeq.asInstanceOf[Seq[Seq[T]]]
       }
       case nr: NumericRange[_] => {
         // For ranges of Long, Double, BigInteger, etc
         val slices = new ArrayBuffer[Seq[T]](numSlices)
-        val sliceSize = (nr.size + numSlices - 1) / numSlices // Round up to catch everything
         var r = nr
-        for (i <- 0 until numSlices) {
+        for ((start, end) <- positions(nr.length, numSlices)) {
+          val sliceSize = end - start
           slices += r.take(sliceSize).asInstanceOf[Seq[T]]
           r = r.drop(sliceSize)
         }
         slices
       }
       case _ => {
         val array = seq.toArray // To prevent O(n^2) operations for List etc
-        (0 until numSlices).map(i => {
-          val start = ((i * array.length.toLong) / numSlices).toInt
-          val end = (((i + 1) * array.length.toLong) / numSlices).toInt
-          array.slice(start, end).toSeq
-        })
+        positions(array.length, numSlices).map({
+          case (start, end) =>
+            array.slice(start, end).toSeq
+        }).toSeq
       }
     }
   }
 
@@ -26,7 +26,7 @@ class RDDInfo(
     val id: Int,
     val name: String,
     val numPartitions: Int,
-    val storageLevel: StorageLevel)
+    var storageLevel: StorageLevel)
   extends Ordered[RDDInfo] {
 
   var numCachedPartitions = 0
@@ -36,8 +36,8 @@ class RDDInfo(
 
   override def toString = {
     import Utils.bytesToString
-    ("RDD \"%s\" (%d) Storage: %s; CachedPartitions: %d; TotalPartitions: %d; MemorySize: %s; " +
-      "TachyonSize: %s; DiskSize: %s").format(
+    ("RDD \"%s\" (%d) StorageLevel: %s; CachedPartitions: %d; TotalPartitions: %d; " +
+      "MemorySize: %s; TachyonSize: %s; DiskSize: %s").format(
         name, id, storageLevel.toString, numCachedPartitions, numPartitions,
         bytesToString(memSize), bytesToString(tachyonSize), bytesToString(diskSize))
   }
 
@@ -89,10 +89,13 @@ private[spark] object StorageUtils {
       // Add up memory, disk and Tachyon sizes
       val persistedBlocks =
         blocks.filter { status => status.memSize + status.diskSize + status.tachyonSize > 0 }
+      val _storageLevel =
+        if (persistedBlocks.length > 0) persistedBlocks(0).storageLevel else StorageLevel.NONE
       val memSize = persistedBlocks.map(_.memSize).reduceOption(_ + _).getOrElse(0L)
       val diskSize = persistedBlocks.map(_.diskSize).reduceOption(_ + _).getOrElse(0L)
       val tachyonSize = persistedBlocks.map(_.tachyonSize).reduceOption(_ + _).getOrElse(0L)
       rddInfoMap.get(rddId).map { rddInfo =>
+        rddInfo.storageLevel = _storageLevel
         rddInfo.numCachedPartitions = persistedBlocks.length
         rddInfo.memSize = memSize
         rddInfo.diskSize = diskSize
 
@@ -111,6 +111,24 @@ class ParallelCollectionSplitSuite extends FunSuite with Checkers {
     assert(slices.forall(_.isInstanceOf[Range]))
   }
 
+  test("identical slice sizes between Range and NumericRange") {
+    val r = ParallelCollectionRDD.slice(1 to 7, 4)
+    val nr = ParallelCollectionRDD.slice(1L to 7L, 4)
+    assert(r.size === 4)
+    for (i <- 0 until r.size) {
+      assert(r(i).size === nr(i).size)
+    }
+  }
+
+  test("identical slice sizes between List and NumericRange") {
+    val r = ParallelCollectionRDD.slice(List(1, 2), 4)
+    val nr = ParallelCollectionRDD.slice(1L to 2L, 4)
+    assert(r.size === 4)
+    for (i <- 0 until r.size) {
+      assert(r(i).size === nr(i).size)
+    }
+  }
+
   test("large ranges don't overflow") {
     val N = 100 * 1000 * 1000
     val data = 0 until N
Original file line number	Diff line number	Diff line change
`@@ -19,9 +19,9 @@ package org.apache.spark.deploy`
`19`	`19`
`20`	`20`	`private[spark] object ExecutorState extends Enumeration {`
`21`	`21`
`22`		`- val LAUNCHING, LOADING, RUNNING, KILLED, FAILED, LOST = Value`
	`22`	`+ val LAUNCHING, LOADING, RUNNING, KILLED, FAILED, LOST, EXITED = Value`
`23`	`23`
`24`	`24`	`type ExecutorState = Value`
`25`	`25`
`26`		`- def isFinished(state: ExecutorState): Boolean = Seq(KILLED, FAILED, LOST).contains(state)`
	`26`	`+ def isFinished(state: ExecutorState): Boolean = Seq(KILLED, FAILED, LOST, EXITED).contains(state)`
`27`	`27`	`}`