apache
diff --git a/‎bin/docker-image-tool.sh
Lines changed: 1 addition & 0 deletions b/‎bin/docker-image-tool.sh
Lines changed: 1 addition & 0 deletions
diff --git a/‎core/src/main/resources/org/apache/spark/ui/static/stagepage.js
Lines changed: 8 additions & 2 deletions b/‎core/src/main/resources/org/apache/spark/ui/static/stagepage.js
Lines changed: 8 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
Lines changed: 6 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
Lines changed: 6 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
Lines changed: 51 additions & 22 deletions b/‎core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
Lines changed: 51 additions & 22 deletions
diff --git a/‎core/src/test/scala/org/apache/spark/status/AppStatusStoreSuite.scala
Lines changed: 30 additions & 3 deletions b/‎core/src/test/scala/org/apache/spark/status/AppStatusStoreSuite.scala
Lines changed: 30 additions & 3 deletions
diff --git a/‎dev/.scalafmt.conf
Lines changed: 4 additions & 0 deletions b/‎dev/.scalafmt.conf
Lines changed: 4 additions & 0 deletions
diff --git a/‎docs/running-on-kubernetes.md
Lines changed: 7 additions & 0 deletions b/‎docs/running-on-kubernetes.md
Lines changed: 7 additions & 0 deletions
diff --git a/‎docs/sql-data-sources-jdbc.md
Lines changed: 3 additions & 3 deletions b/‎docs/sql-data-sources-jdbc.md
Lines changed: 3 additions & 3 deletions
diff --git a/‎docs/sql-migration-guide-upgrade.md
Lines changed: 1 addition & 1 deletion b/‎docs/sql-migration-guide-upgrade.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎project/SparkBuild.scala
Lines changed: 7 additions & 1 deletion b/‎project/SparkBuild.scala
Lines changed: 7 additions & 1 deletion
@@ -107,6 +107,7 @@ function create_dev_build_context {(
     "$PYSPARK_CTX/kubernetes/dockerfiles"
   mkdir "$PYSPARK_CTX/python"
   cp -r "python/lib" "$PYSPARK_CTX/python/lib"
+  cp -r "python/pyspark" "$PYSPARK_CTX/python/pyspark"
 
   local R_CTX="$CTX_DIR/sparkr"
   mkdir -p "$R_CTX/kubernetes"
 
@@ -221,7 +221,10 @@ function createDataTableForTaskSummaryMetricsTable(taskSummaryMetricsTable) {
             "searching": false,
             "order": [[0, "asc"]],
             "bSort": false,
-            "bAutoWidth": false
+            "bAutoWidth": false,
+            "oLanguage": {
+                "sEmptyTable": "No tasks have reported metrics yet"
+            }
         };
         taskSummaryMetricsDataTable = $(taskMetricsTable).DataTable(taskConf);
     }
@@ -426,7 +429,10 @@ $(document).ready(function () {
                         }
                     ],
                     "order": [[0, "asc"]],
-                    "bAutoWidth": false
+                    "bAutoWidth": false,
+                    "oLanguage": {
+                        "sEmptyTable": "No data to show yet"
+                    }
                 }
                 var executorSummaryTableSelector =
                     $("#summary-executor-table").DataTable(executorSummaryConf);
 
@@ -813,14 +813,14 @@ private[spark] class SparkSubmit extends Logging {
       mainClass = Utils.classForName(childMainClass)
     } catch {
       case e: ClassNotFoundException =>
-        logWarning(s"Failed to load $childMainClass.", e)
+        logError(s"Failed to load class $childMainClass.")
         if (childMainClass.contains("thriftserver")) {
           logInfo(s"Failed to load main class $childMainClass.")
           logInfo("You need to build Spark with -Phive and -Phive-thriftserver.")
         }
         throw new SparkUserAppException(CLASS_NOT_FOUND_EXIT_STATUS)
       case e: NoClassDefFoundError =>
-        logWarning(s"Failed to load $childMainClass: ${e.getMessage()}")
+        logError(s"Failed to load $childMainClass: ${e.getMessage()}")
         if (e.getMessage.contains("org/apache/hadoop/hive")) {
           logInfo(s"Failed to load hive class.")
           logInfo("You need to build Spark with -Phive and -Phive-thriftserver.")
@@ -915,13 +915,17 @@ object SparkSubmit extends CommandLineUtils with Logging {
           override protected def logInfo(msg: => String): Unit = self.logInfo(msg)
 
           override protected def logWarning(msg: => String): Unit = self.logWarning(msg)
+
+          override protected def logError(msg: => String): Unit = self.logError(msg)
         }
       }
 
       override protected def logInfo(msg: => String): Unit = printMessage(msg)
 
       override protected def logWarning(msg: => String): Unit = printMessage(s"Warning: $msg")
 
+      override protected def logError(msg: => String): Unit = printMessage(s"Error: $msg")
+
       override def doSubmit(args: Array[String]): Unit = {
         try {
           super.doSubmit(args)
 
@@ -148,11 +148,20 @@ private[spark] class AppStatusStore(
     // cheaper for disk stores (avoids deserialization).
     val count = {
       Utils.tryWithResource(
-        store.view(classOf[TaskDataWrapper])
-          .parent(stageKey)
-          .index(TaskIndexNames.EXEC_RUN_TIME)
-          .first(0L)
-          .closeableIterator()
+        if (store.isInstanceOf[InMemoryStore]) {
+          store.view(classOf[TaskDataWrapper])
+            .parent(stageKey)
+            .index(TaskIndexNames.STATUS)
+            .first("SUCCESS")
+            .last("SUCCESS")
+            .closeableIterator()
+        } else {
+          store.view(classOf[TaskDataWrapper])
+            .parent(stageKey)
+            .index(TaskIndexNames.EXEC_RUN_TIME)
+            .first(0L)
+            .closeableIterator()
+        }
       ) { it =>
         var _count = 0L
         while (it.hasNext()) {
@@ -221,30 +230,50 @@ private[spark] class AppStatusStore(
     // stabilize once the stage finishes. It's also slow, especially with disk stores.
     val indices = quantiles.map { q => math.min((q * count).toLong, count - 1) }
 
+    // TODO: Summary metrics needs to display all the successful tasks' metrics (SPARK-26119).
+    // For InMemory case, it is efficient to find using the following code. But for diskStore case
+    // we need an efficient solution to avoid deserialization time overhead. For that, we need to
+    // rework on the way indexing works, so that we can index by specific metrics for successful
+    // and failed tasks differently (would be tricky). Also would require changing the disk store
+    // version (to invalidate old stores).
     def scanTasks(index: String)(fn: TaskDataWrapper => Long): IndexedSeq[Double] = {
-      Utils.tryWithResource(
-        store.view(classOf[TaskDataWrapper])
+      if (store.isInstanceOf[InMemoryStore]) {
+        val quantileTasks = store.view(classOf[TaskDataWrapper])
           .parent(stageKey)
           .index(index)
           .first(0L)
-          .closeableIterator()
-      ) { it =>
-        var last = Double.NaN
-        var currentIdx = -1L
-        indices.map { idx =>
-          if (idx == currentIdx) {
-            last
-          } else {
-            val diff = idx - currentIdx
-            currentIdx = idx
-            if (it.skip(diff - 1)) {
-              last = fn(it.next()).toDouble
+          .asScala
+          .filter { _.status == "SUCCESS"} // Filter "SUCCESS" tasks
+          .toIndexedSeq
+
+        indices.map { index =>
+          fn(quantileTasks(index.toInt)).toDouble
+        }.toIndexedSeq
+      } else {
+        Utils.tryWithResource(
+          store.view(classOf[TaskDataWrapper])
+            .parent(stageKey)
+            .index(index)
+            .first(0L)
+            .closeableIterator()
+        ) { it =>
+          var last = Double.NaN
+          var currentIdx = -1L
+          indices.map { idx =>
+            if (idx == currentIdx) {
               last
             } else {
-              Double.NaN
+              val diff = idx - currentIdx
+              currentIdx = idx
+              if (it.skip(diff - 1)) {
+                last = fn(it.next()).toDouble
+                last
+              } else {
+                Double.NaN
+              }
             }
-          }
-        }.toIndexedSeq
+          }.toIndexedSeq
+        }
       }
     }
 
 
@@ -77,6 +77,34 @@ class AppStatusStoreSuite extends SparkFunSuite {
     assert(store.count(classOf[CachedQuantile]) === 2)
   }
 
+  test("only successfull task have taskSummary") {
+    val store = new InMemoryStore()
+    (0 until 5).foreach { i => store.write(newTaskData(i, status = "FAILED")) }
+    val appStore = new AppStatusStore(store).taskSummary(stageId, attemptId, uiQuantiles)
+    assert(appStore.size === 0)
+  }
+
+  test("summary should contain task metrics of only successfull tasks") {
+    val store = new InMemoryStore()
+
+    for (i <- 0 to 5) {
+      if (i % 2 == 1) {
+        store.write(newTaskData(i, status = "FAILED"))
+      } else {
+        store.write(newTaskData(i))
+      }
+    }
+
+    val summary = new AppStatusStore(store).taskSummary(stageId, attemptId, uiQuantiles).get
+
+    val values = Array(0.0, 2.0, 4.0)
+
+    val dist = new Distribution(values, 0, values.length).getQuantiles(uiQuantiles.sorted)
+    dist.zip(summary.executorRunTime).foreach { case (expected, actual) =>
+      assert(expected === actual)
+    }
+  }
+
   private def compareQuantiles(count: Int, quantiles: Array[Double]): Unit = {
     val store = new InMemoryStore()
     val values = (0 until count).map { i =>
@@ -93,12 +121,11 @@ class AppStatusStoreSuite extends SparkFunSuite {
     }
   }
 
-  private def newTaskData(i: Int): TaskDataWrapper = {
+  private def newTaskData(i: Int, status: String = "SUCCESS"): TaskDataWrapper = {
     new TaskDataWrapper(
-      i, i, i, i, i, i, i.toString, i.toString, i.toString, i.toString, false, Nil, None,
+      i, i, i, i, i, i, i.toString, i.toString, status, i.toString, false, Nil, None,
       i, i, i, i, i, i, i, i, i, i,
       i, i, i, i, i, i, i, i, i, i,
       i, i, i, i, stageId, attemptId)
   }
-
 }
@@ -19,6 +19,10 @@ align = none
 align.openParenDefnSite = false
 align.openParenCallSite = false
 align.tokens = []
+optIn = {
+  configStyleArguments = false
+}
+danglingParentheses = false
 docstrings = JavaDoc
 maxColumn = 98
 
@@ -944,6 +944,13 @@ specific to Spark on Kubernetes.
    <code>spark.kubernetes.executor.podTemplateFile=/path/to/executor-pod-template.yaml`</code>
   </td>
 </tr>
+<tr>
+  <td><code>spark.kubernetes.executor.deleteOnTermination</code></td>
+  <td>true</td>
+  <td>
+  Specify whether executor pods should be deleted in case of failure or normal termination.
+  </td>
+</tr>
 </table>
 
 #### Pod template properties
 
@@ -64,9 +64,9 @@ the following case-insensitive options:
             Example:<br>
             <code>
                spark.read.format("jdbc")<br>
-               &nbsp&nbsp .option("dbtable", "(select c1, c2 from t1) as subq")<br>
-               &nbsp&nbsp .option("partitionColumn", "subq.c1"<br>
-               &nbsp&nbsp .load()
+                 .option("url", jdbcUrl)<br>
+                 .option("query", "select c1, c2 from t1")<br>
+                 .load()
             </code></li>
       </ol>
     </td>
 
@@ -33,7 +33,7 @@ displayTitle: Spark SQL Upgrading Guide
 
   - Spark applications which are built with Spark version 2.4 and prior, and call methods of `UserDefinedFunction`, need to be re-compiled with Spark 3.0, as they are not binary compatible with Spark 3.0.
 
-  - Since Spark 3.0, JSON datasource uses java.time API for parsing and generating JSON content. New formatting implementation supports date/timestamp patterns conformed to ISO 8601. To switch back to the implementation used in Spark 2.4 and earlier, set `spark.sql.legacy.timeParser.enabled` to `true`.
+  - Since Spark 3.0, CSV/JSON datasources use java.time API for parsing and generating CSV/JSON content. New formatting implementation supports date/timestamp patterns conformed to ISO 8601. To switch back to the implementation used in Spark 2.4 and earlier, set `spark.sql.legacy.timeParser.enabled` to `true`.
 
 ## Upgrading From Spark SQL 2.3 to 2.4
 
 
@@ -494,7 +494,13 @@ object KubernetesIntegrationTests {
     dockerBuild := {
       if (shouldBuildImage) {
         val dockerTool = s"$sparkHome/bin/docker-image-tool.sh"
-        val cmd = Seq(dockerTool, "-m", "-t", imageTag.value, "build")
+        val bindingsDir = s"$sparkHome/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings"
+        val cmd = Seq(dockerTool, "-m",
+          "-t", imageTag.value,
+          "-p", s"$bindingsDir/python/Dockerfile",
+          "-R", s"$bindingsDir/R/Dockerfile",
+          "build"
+        )
         val ec = Process(cmd).!
         if (ec != 0) {
           throw new IllegalStateException(s"Process '${cmd.mkString(" ")}' exited with $ec.")