jeanlyn
diff --git a/‎R/pkg/R/RDD.R
Lines changed: 4 additions & 12 deletions b/‎R/pkg/R/RDD.R
Lines changed: 4 additions & 12 deletions
diff --git a/‎R/pkg/R/pairRDD.R
Lines changed: 0 additions & 4 deletions b/‎R/pkg/R/pairRDD.R
Lines changed: 0 additions & 4 deletions
diff --git a/‎bin/spark-class
Lines changed: 10 additions & 1 deletion b/‎bin/spark-class
Lines changed: 10 additions & 1 deletion
diff --git a/‎bin/spark-class2.cmd
Lines changed: 10 additions & 1 deletion b/‎bin/spark-class2.cmd
Lines changed: 10 additions & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/ContextCleaner.scala
Lines changed: 32 additions & 12 deletions b/‎core/src/main/scala/org/apache/spark/ContextCleaner.scala
Lines changed: 32 additions & 12 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/ApplicationDescription.scala
Lines changed: 5 additions & 4 deletions b/‎core/src/main/scala/org/apache/spark/deploy/ApplicationDescription.scala
Lines changed: 5 additions & 4 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala
Lines changed: 2 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala
Lines changed: 2 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
Lines changed: 2 additions & 0 deletions b/‎core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
Lines changed: 2 additions & 0 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
Lines changed: 4 additions & 1 deletion b/‎core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
Lines changed: 4 additions & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
Lines changed: 2 additions & 0 deletions b/‎core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
Lines changed: 2 additions & 0 deletions
@@ -85,7 +85,7 @@ setMethod("initialize", "PipelinedRDD", function(.Object, prev, func, jrdd_val)
 
   if (!inherits(prev, "PipelinedRDD") || !isPipelinable(prev)) {
     # This transformation is the first in its stage:
-    .Object@func <- func
+    .Object@func <- cleanClosure(func)
     .Object@prev_jrdd <- getJRDD(prev)
     .Object@env$prev_serializedMode <- prev@env$serializedMode
     # NOTE: We use prev_serializedMode to track the serialization mode of prev_JRDD
@@ -94,7 +94,7 @@ setMethod("initialize", "PipelinedRDD", function(.Object, prev, func, jrdd_val)
     pipelinedFunc <- function(split, iterator) {
       func(split, prev@func(split, iterator))
     }
-    .Object@func <- pipelinedFunc
+    .Object@func <- cleanClosure(pipelinedFunc)
     .Object@prev_jrdd <- prev@prev_jrdd # maintain the pipeline
     # Get the serialization mode of the parent RDD
     .Object@env$prev_serializedMode <- prev@env$prev_serializedMode
@@ -144,17 +144,13 @@ setMethod("getJRDD", signature(rdd = "PipelinedRDD"),
               return(rdd@env$jrdd_val)
             }
 
-            computeFunc <- function(split, part) {
-              rdd@func(split, part)
-            }
-
             packageNamesArr <- serialize(.sparkREnv[[".packages"]],
                                          connection = NULL)
 
             broadcastArr <- lapply(ls(.broadcastNames),
                                    function(name) { get(name, .broadcastNames) })
 
-            serializedFuncArr <- serialize(computeFunc, connection = NULL)
+            serializedFuncArr <- serialize(rdd@func, connection = NULL)
 
             prev_jrdd <- rdd@prev_jrdd
 
@@ -551,11 +547,7 @@ setMethod("mapPartitions",
 setMethod("lapplyPartitionsWithIndex",
           signature(X = "RDD", FUN = "function"),
           function(X, FUN) {
-            FUN <- cleanClosure(FUN)
-            closureCapturingFunc <- function(split, part) {
-              FUN(split, part)
-            }
-            PipelinedRDD(X, closureCapturingFunc)
+            PipelinedRDD(X, FUN)
           })
 
 #' @rdname lapplyPartitionsWithIndex
 
@@ -694,10 +694,6 @@ setMethod("cogroup",
             for (i in 1:rddsLen) {
               rdds[[i]] <- lapply(rdds[[i]], 
                                   function(x) { list(x[[1]], list(i, x[[2]])) })
-              # TODO(hao): As issue [SparkR-142] mentions, the right value of i
-              # will not be captured into UDF if getJRDD is not invoked.
-              # It should be resolved together with that issue.
-              getJRDD(rdds[[i]])  # Capture the closure.
             }
             union.rdd <- Reduce(unionRDD, rdds)
             group.func <- function(vlist) {
 
@@ -82,13 +82,22 @@ if [ $(command -v "$JAR_CMD") ] ; then
   fi
 fi
 
+LAUNCH_CLASSPATH="$SPARK_ASSEMBLY_JAR"
+
+# Add the launcher build dir to the classpath if requested.
+if [ -n "$SPARK_PREPEND_CLASSES" ]; then
+  LAUNCH_CLASSPATH="$SPARK_HOME/launcher/target/scala-$SPARK_SCALA_VERSION/classes:$LAUNCH_CLASSPATH"
+fi
+
+export _SPARK_ASSEMBLY="$SPARK_ASSEMBLY_JAR"
+
 # The launcher library will print arguments separated by a NULL character, to allow arguments with
 # characters that would be otherwise interpreted by the shell. Read that in a while loop, populating
 # an array that will be used to exec the final command.
 CMD=()
 while IFS= read -d '' -r ARG; do
   CMD+=("$ARG")
-done < <("$RUNNER" -cp "$SPARK_ASSEMBLY_JAR" org.apache.spark.launcher.Main "$@")
+done < <("$RUNNER" -cp "$LAUNCH_CLASSPATH" org.apache.spark.launcher.Main "$@")
 
 if [ "${CMD[0]}" = "usage" ]; then
   "${CMD[@]}"
 
@@ -46,13 +46,22 @@ if "%SPARK_ASSEMBLY_JAR%"=="0" (
   exit /b 1
 )
 
+set LAUNCH_CLASSPATH=%SPARK_ASSEMBLY_JAR%
+
+rem Add the launcher build dir to the classpath if requested.
+if not "x%SPARK_PREPEND_CLASSES%"=="x" (
+  set LAUNCH_CLASSPATH=%SPARK_HOME%\launcher\target\scala-%SPARK_SCALA_VERSION%\classes;%LAUNCH_CLASSPATH%
+)
+
+set _SPARK_ASSEMBLY=%SPARK_ASSEMBLY_JAR%
+
 rem Figure out where java is.
 set RUNNER=java
 if not "x%JAVA_HOME%"=="x" set RUNNER=%JAVA_HOME%\bin\java
 
 rem The launcher library prints the command to be executed in a single line suitable for being
 rem executed by the batch interpreter. So read all the output of the launcher into a variable.
-for /f "tokens=*" %%i in ('cmd /C ""%RUNNER%" -cp %SPARK_ASSEMBLY_JAR% org.apache.spark.launcher.Main %*"') do (
+for /f "tokens=*" %%i in ('cmd /C ""%RUNNER%" -cp %LAUNCH_CLASSPATH% org.apache.spark.launcher.Main %*"') do (
   set SPARK_CMD=%%i
 )
 %SPARK_CMD%
@@ -22,7 +22,7 @@ import java.lang.ref.{ReferenceQueue, WeakReference}
 import scala.collection.mutable.{ArrayBuffer, SynchronizedBuffer}
 
 import org.apache.spark.broadcast.Broadcast
-import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.{RDDCheckpointData, RDD}
 import org.apache.spark.util.Utils
 
 /**
@@ -33,6 +33,7 @@ private case class CleanRDD(rddId: Int) extends CleanupTask
 private case class CleanShuffle(shuffleId: Int) extends CleanupTask
 private case class CleanBroadcast(broadcastId: Long) extends CleanupTask
 private case class CleanAccum(accId: Long) extends CleanupTask
+private case class CleanCheckpoint(rddId: Int) extends CleanupTask
 
 /**
  * A WeakReference associated with a CleanupTask.
@@ -94,12 +95,12 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
   @volatile private var stopped = false
 
   /** Attach a listener object to get information of when objects are cleaned. */
-  def attachListener(listener: CleanerListener) {
+  def attachListener(listener: CleanerListener): Unit = {
     listeners += listener
   }
 
   /** Start the cleaner. */
-  def start() {
+  def start(): Unit = {
     cleaningThread.setDaemon(true)
     cleaningThread.setName("Spark Context Cleaner")
     cleaningThread.start()
@@ -108,7 +109,7 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
   /**
    * Stop the cleaning thread and wait until the thread has finished running its current task.
    */
-  def stop() {
+  def stop(): Unit = {
     stopped = true
     // Interrupt the cleaning thread, but wait until the current task has finished before
     // doing so. This guards against the race condition where a cleaning thread may
@@ -121,7 +122,7 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
   }
 
   /** Register a RDD for cleanup when it is garbage collected. */
-  def registerRDDForCleanup(rdd: RDD[_]) {
+  def registerRDDForCleanup(rdd: RDD[_]): Unit = {
     registerForCleanup(rdd, CleanRDD(rdd.id))
   }
 
@@ -130,17 +131,22 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
   }
 
   /** Register a ShuffleDependency for cleanup when it is garbage collected. */
-  def registerShuffleForCleanup(shuffleDependency: ShuffleDependency[_, _, _]) {
+  def registerShuffleForCleanup(shuffleDependency: ShuffleDependency[_, _, _]): Unit = {
     registerForCleanup(shuffleDependency, CleanShuffle(shuffleDependency.shuffleId))
   }
 
   /** Register a Broadcast for cleanup when it is garbage collected. */
-  def registerBroadcastForCleanup[T](broadcast: Broadcast[T]) {
+  def registerBroadcastForCleanup[T](broadcast: Broadcast[T]): Unit = {
     registerForCleanup(broadcast, CleanBroadcast(broadcast.id))
   }
 
+  /** Register a RDDCheckpointData for cleanup when it is garbage collected. */
+  def registerRDDCheckpointDataForCleanup[T](rdd: RDD[_], parentId: Int): Unit = {
+    registerForCleanup(rdd, CleanCheckpoint(parentId))
+  }
+
   /** Register an object for cleanup. */
-  private def registerForCleanup(objectForCleanup: AnyRef, task: CleanupTask) {
+  private def registerForCleanup(objectForCleanup: AnyRef, task: CleanupTask): Unit = {
     referenceBuffer += new CleanupTaskWeakReference(task, objectForCleanup, referenceQueue)
   }
 
@@ -164,6 +170,8 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
                 doCleanupBroadcast(broadcastId, blocking = blockOnCleanupTasks)
               case CleanAccum(accId) =>
                 doCleanupAccum(accId, blocking = blockOnCleanupTasks)
+              case CleanCheckpoint(rddId) =>
+                doCleanCheckpoint(rddId)
             }
           }
         }
@@ -175,7 +183,7 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
   }
 
   /** Perform RDD cleanup. */
-  def doCleanupRDD(rddId: Int, blocking: Boolean) {
+  def doCleanupRDD(rddId: Int, blocking: Boolean): Unit = {
     try {
       logDebug("Cleaning RDD " + rddId)
       sc.unpersistRDD(rddId, blocking)
@@ -187,7 +195,7 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
   }
 
   /** Perform shuffle cleanup, asynchronously. */
-  def doCleanupShuffle(shuffleId: Int, blocking: Boolean) {
+  def doCleanupShuffle(shuffleId: Int, blocking: Boolean): Unit = {
     try {
       logDebug("Cleaning shuffle " + shuffleId)
       mapOutputTrackerMaster.unregisterShuffle(shuffleId)
@@ -200,7 +208,7 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
   }
 
   /** Perform broadcast cleanup. */
-  def doCleanupBroadcast(broadcastId: Long, blocking: Boolean) {
+  def doCleanupBroadcast(broadcastId: Long, blocking: Boolean): Unit = {
     try {
       logDebug(s"Cleaning broadcast $broadcastId")
       broadcastManager.unbroadcast(broadcastId, true, blocking)
@@ -212,7 +220,7 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
   }
 
   /** Perform accumulator cleanup. */
-  def doCleanupAccum(accId: Long, blocking: Boolean) {
+  def doCleanupAccum(accId: Long, blocking: Boolean): Unit = {
     try {
       logDebug("Cleaning accumulator " + accId)
       Accumulators.remove(accId)
@@ -223,6 +231,18 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
     }
   }
 
+  /** Perform checkpoint cleanup. */
+  def doCleanCheckpoint(rddId: Int): Unit = {
+    try {
+      logDebug("Cleaning rdd checkpoint data " + rddId)
+      RDDCheckpointData.clearRDDCheckpointData(sc, rddId)
+      logInfo("Cleaned rdd checkpoint data " + rddId)
+    }
+    catch {
+      case e: Exception => logError("Error cleaning rdd checkpoint data " + rddId, e)
+    }
+  }
+
   private def blockManagerMaster = sc.env.blockManager.master
   private def broadcastManager = sc.env.broadcastManager
   private def mapOutputTrackerMaster = sc.env.mapOutputTracker.asInstanceOf[MapOutputTrackerMaster]
 
@@ -22,26 +22,27 @@ import java.net.URI
 private[spark] class ApplicationDescription(
     val name: String,
     val maxCores: Option[Int],
-    val memoryPerSlave: Int,
+    val memoryPerExecutorMB: Int,
     val command: Command,
     var appUiUrl: String,
     val eventLogDir: Option[URI] = None,
     // short name of compression codec used when writing event logs, if any (e.g. lzf)
-    val eventLogCodec: Option[String] = None)
+    val eventLogCodec: Option[String] = None,
+    val coresPerExecutor: Option[Int] = None)
   extends Serializable {
 
   val user = System.getProperty("user.name", "<unknown>")
 
   def copy(
       name: String = name,
       maxCores: Option[Int] = maxCores,
-      memoryPerSlave: Int = memoryPerSlave,
+      memoryPerExecutorMB: Int = memoryPerExecutorMB,
       command: Command = command,
       appUiUrl: String = appUiUrl,
       eventLogDir: Option[URI] = eventLogDir,
       eventLogCodec: Option[String] = eventLogCodec): ApplicationDescription =
     new ApplicationDescription(
-      name, maxCores, memoryPerSlave, command, appUiUrl, eventLogDir, eventLogCodec)
+      name, maxCores, memoryPerExecutorMB, command, appUiUrl, eventLogDir, eventLogCodec)
 
   override def toString: String = "ApplicationDescription(" + name + ")"
 }
@@ -46,7 +46,7 @@ private[deploy] object JsonProtocol {
     ("name" -> obj.desc.name) ~
     ("cores" -> obj.desc.maxCores) ~
     ("user" ->  obj.desc.user) ~
-    ("memoryperslave" -> obj.desc.memoryPerSlave) ~
+    ("memoryperslave" -> obj.desc.memoryPerExecutorMB) ~
     ("submitdate" -> obj.submitDate.toString) ~
     ("state" -> obj.state.toString) ~
     ("duration" -> obj.duration)
@@ -55,7 +55,7 @@ private[deploy] object JsonProtocol {
   def writeApplicationDescription(obj: ApplicationDescription): JObject = {
     ("name" -> obj.name) ~
     ("cores" -> obj.maxCores) ~
-    ("memoryperslave" -> obj.memoryPerSlave) ~
+    ("memoryperslave" -> obj.memoryPerExecutorMB) ~
     ("user" -> obj.user) ~
     ("command" -> obj.command.toString)
   }
 
@@ -406,6 +406,8 @@ object SparkSubmit {
       OptionAssigner(args.jars, YARN, CLUSTER, clOption = "--addJars"),
 
       // Other options
+      OptionAssigner(args.executorCores, STANDALONE, ALL_DEPLOY_MODES,
+        sysProp = "spark.executor.cores"),
       OptionAssigner(args.executorMemory, STANDALONE | MESOS | YARN, ALL_DEPLOY_MODES,
         sysProp = "spark.executor.memory"),
       OptionAssigner(args.totalExecutorCores, STANDALONE | MESOS, ALL_DEPLOY_MODES,
 
@@ -482,10 +482,13 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
         | Spark standalone and Mesos only:
         |  --total-executor-cores NUM  Total cores for all executors.
         |
+        | Spark standalone and YARN only:
+        |  --executor-cores NUM        Number of cores per executor. (Default: 1 in YARN mode,
+        |                              or all available cores on the worker in standalone mode)
+        |
         | YARN-only:
         |  --driver-cores NUM          Number of cores used by the driver, only in cluster mode
         |                              (Default: 1).
-        |  --executor-cores NUM        Number of cores per executor (Default: 1).
         |  --queue QUEUE_NAME          The YARN queue to submit to (Default: "default").
         |  --num-executors NUM         Number of executors to launch (Default: 2).
         |  --archives ARCHIVES         Comma separated list of archives to be extracted into the
 
@@ -90,6 +90,8 @@ private[history] class HistoryPage(parent: HistoryServer) extends WebUIPage("")
                 </span>
               </h4> ++
               appTable
+            } else if (requestedIncomplete) {
+              <h4>No incomplete applications found!</h4>
             } else {
               <h4>No completed applications found!</h4> ++
               <p>Did you specify the correct logging directory?