Skip to content

Commit a14807e

Browse files
andrewor14aarondav
authored andcommitted
[SPARK-2147 / 2161] Show removed executors on the UI
This PR includes two changes - **[SPARK-2147]** When an application finishes cleanly (i.e. `sc.stop()` is called), all of its executors used to disappear from the Master UI. This no longer happens. - **[SPARK-2161]** This adds a "Removed Executors" table to Master UI, so the user can find out why their executors died from the logs, for instance. The equivalent table already existed in the Worker UI, but was hidden because of a bug (the comment `//scalastyle:off` disconnected the `Seq[Node]` that represents the HTML for table). This should go into 1.0.1 if possible. Author: Andrew Or <[email protected]> Closes #1102 from andrewor14/remember-removed-executors and squashes the following commits: 2e2298f [Andrew Or] Add hash code method to ExecutorInfo (minor) abd72e0 [Andrew Or] Merge branch 'master' of github.com:apache/spark into remember-removed-executors 792f992 [Andrew Or] Add missing equals method in ExecutorInfo 3390b49 [Andrew Or] Add executor state column to WorkerPage 161f8a2 [Andrew Or] Display finished executors table (fix bug) fbb65b8 [Andrew Or] Removed unused method c89bb6e [Andrew Or] Add table for removed executors in MasterWebUI fe47402 [Andrew Or] Show exited executors on the Master UI
1 parent 443f5e1 commit a14807e

File tree

4 files changed

+107
-87
lines changed

4 files changed

+107
-87
lines changed

core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ package org.apache.spark.deploy.master
2020
import java.util.Date
2121

2222
import scala.collection.mutable
23+
import scala.collection.mutable.ArrayBuffer
2324

2425
import akka.actor.ActorRef
2526

@@ -36,6 +37,7 @@ private[spark] class ApplicationInfo(
3637

3738
@transient var state: ApplicationState.Value = _
3839
@transient var executors: mutable.HashMap[Int, ExecutorInfo] = _
40+
@transient var removedExecutors: ArrayBuffer[ExecutorInfo] = _
3941
@transient var coresGranted: Int = _
4042
@transient var endTime: Long = _
4143
@transient var appSource: ApplicationSource = _
@@ -51,6 +53,7 @@ private[spark] class ApplicationInfo(
5153
endTime = -1L
5254
appSource = new ApplicationSource(this)
5355
nextExecutorId = 0
56+
removedExecutors = new ArrayBuffer[ExecutorInfo]
5457
}
5558

5659
private def newExecutorId(useID: Option[Int] = None): Int = {
@@ -74,6 +77,7 @@ private[spark] class ApplicationInfo(
7477

7578
def removeExecutor(exec: ExecutorInfo) {
7679
if (executors.contains(exec.id)) {
80+
removedExecutors += executors(exec.id)
7781
executors -= exec.id
7882
coresGranted -= exec.cores
7983
}

core/src/main/scala/org/apache/spark/deploy/master/ExecutorInfo.scala

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,4 +34,19 @@ private[spark] class ExecutorInfo(
3434
}
3535

3636
def fullId: String = application.id + "/" + id
37+
38+
override def equals(other: Any): Boolean = {
39+
other match {
40+
case info: ExecutorInfo =>
41+
fullId == info.fullId &&
42+
worker.id == info.worker.id &&
43+
cores == info.cores &&
44+
memory == info.memory
45+
case _ => false
46+
}
47+
}
48+
49+
override def toString: String = fullId
50+
51+
override def hashCode: Int = toString.hashCode()
3752
}

core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala

Lines changed: 46 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ import scala.xml.Node
2525
import akka.pattern.ask
2626
import org.json4s.JValue
2727

28-
import org.apache.spark.deploy.JsonProtocol
28+
import org.apache.spark.deploy.{ExecutorState, JsonProtocol}
2929
import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState}
3030
import org.apache.spark.deploy.master.ExecutorInfo
3131
import org.apache.spark.ui.{WebUIPage, UIUtils}
@@ -57,43 +57,55 @@ private[spark] class ApplicationPage(parent: MasterWebUI) extends WebUIPage("app
5757
})
5858

5959
val executorHeaders = Seq("ExecutorID", "Worker", "Cores", "Memory", "State", "Logs")
60-
val executors = app.executors.values.toSeq
61-
val executorTable = UIUtils.listingTable(executorHeaders, executorRow, executors)
60+
val allExecutors = (app.executors.values ++ app.removedExecutors).toSet.toSeq
61+
// This includes executors that are either still running or have exited cleanly
62+
val executors = allExecutors.filter { exec =>
63+
!ExecutorState.isFinished(exec.state) || exec.state == ExecutorState.EXITED
64+
}
65+
val removedExecutors = allExecutors.diff(executors)
66+
val executorsTable = UIUtils.listingTable(executorHeaders, executorRow, executors)
67+
val removedExecutorsTable = UIUtils.listingTable(executorHeaders, executorRow, removedExecutors)
6268

6369
val content =
64-
<div class="row-fluid">
65-
<div class="span12">
66-
<ul class="unstyled">
67-
<li><strong>ID:</strong> {app.id}</li>
68-
<li><strong>Name:</strong> {app.desc.name}</li>
69-
<li><strong>User:</strong> {app.desc.user}</li>
70-
<li><strong>Cores:</strong>
71-
{
72-
if (app.desc.maxCores.isEmpty) {
73-
"Unlimited (%s granted)".format(app.coresGranted)
74-
} else {
75-
"%s (%s granted, %s left)".format(
76-
app.desc.maxCores.get, app.coresGranted, app.coresLeft)
77-
}
78-
}
79-
</li>
80-
<li>
81-
<strong>Executor Memory:</strong>
82-
{Utils.megabytesToString(app.desc.memoryPerSlave)}
83-
</li>
84-
<li><strong>Submit Date:</strong> {app.submitDate}</li>
85-
<li><strong>State:</strong> {app.state}</li>
86-
<li><strong><a href={app.desc.appUiUrl}>Application Detail UI</a></strong></li>
87-
</ul>
88-
</div>
70+
<div class="row-fluid">
71+
<div class="span12">
72+
<ul class="unstyled">
73+
<li><strong>ID:</strong> {app.id}</li>
74+
<li><strong>Name:</strong> {app.desc.name}</li>
75+
<li><strong>User:</strong> {app.desc.user}</li>
76+
<li><strong>Cores:</strong>
77+
{
78+
if (app.desc.maxCores.isEmpty) {
79+
"Unlimited (%s granted)".format(app.coresGranted)
80+
} else {
81+
"%s (%s granted, %s left)".format(
82+
app.desc.maxCores.get, app.coresGranted, app.coresLeft)
83+
}
84+
}
85+
</li>
86+
<li>
87+
<strong>Executor Memory:</strong>
88+
{Utils.megabytesToString(app.desc.memoryPerSlave)}
89+
</li>
90+
<li><strong>Submit Date:</strong> {app.submitDate}</li>
91+
<li><strong>State:</strong> {app.state}</li>
92+
<li><strong><a href={app.desc.appUiUrl}>Application Detail UI</a></strong></li>
93+
</ul>
8994
</div>
95+
</div>
9096

91-
<div class="row-fluid"> <!-- Executors -->
92-
<div class="span12">
93-
<h4> Executor Summary </h4>
94-
{executorTable}
95-
</div>
96-
</div>;
97+
<div class="row-fluid"> <!-- Executors -->
98+
<div class="span12">
99+
<h4> Executor Summary </h4>
100+
{executorsTable}
101+
{
102+
if (removedExecutors.nonEmpty) {
103+
<h4> Removed Executors </h4> ++
104+
removedExecutorsTable
105+
}
106+
}
107+
</div>
108+
</div>;
97109
UIUtils.basicSparkPage(content, "Application: " + app.desc.name)
98110
}
99111

core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerPage.scala

Lines changed: 42 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -46,74 +46,62 @@ private[spark] class WorkerPage(parent: WorkerWebUI) extends WebUIPage("") {
4646
val stateFuture = (workerActor ? RequestWorkerState)(timeout).mapTo[WorkerStateResponse]
4747
val workerState = Await.result(stateFuture, timeout)
4848

49-
val executorHeaders = Seq("ExecutorID", "Cores", "Memory", "Job Details", "Logs")
49+
val executorHeaders = Seq("ExecutorID", "Cores", "State", "Memory", "Job Details", "Logs")
50+
val runningExecutors = workerState.executors
5051
val runningExecutorTable =
51-
UIUtils.listingTable(executorHeaders, executorRow, workerState.executors)
52+
UIUtils.listingTable(executorHeaders, executorRow, runningExecutors)
53+
val finishedExecutors = workerState.finishedExecutors
5254
val finishedExecutorTable =
53-
UIUtils.listingTable(executorHeaders, executorRow, workerState.finishedExecutors)
55+
UIUtils.listingTable(executorHeaders, executorRow, finishedExecutors)
5456

5557
val driverHeaders = Seq("DriverID", "Main Class", "State", "Cores", "Memory", "Logs", "Notes")
5658
val runningDrivers = workerState.drivers.sortBy(_.driverId).reverse
5759
val runningDriverTable = UIUtils.listingTable(driverHeaders, driverRow, runningDrivers)
5860
val finishedDrivers = workerState.finishedDrivers.sortBy(_.driverId).reverse
59-
def finishedDriverTable = UIUtils.listingTable(driverHeaders, driverRow, finishedDrivers)
61+
val finishedDriverTable = UIUtils.listingTable(driverHeaders, driverRow, finishedDrivers)
6062

6163
// For now we only show driver information if the user has submitted drivers to the cluster.
6264
// This is until we integrate the notion of drivers and applications in the UI.
63-
def hasDrivers = runningDrivers.length > 0 || finishedDrivers.length > 0
6465

6566
val content =
66-
<div class="row-fluid"> <!-- Worker Details -->
67-
<div class="span12">
68-
<ul class="unstyled">
69-
<li><strong>ID:</strong> {workerState.workerId}</li>
70-
<li><strong>
71-
Master URL:</strong> {workerState.masterUrl}
72-
</li>
73-
<li><strong>Cores:</strong> {workerState.cores} ({workerState.coresUsed} Used)</li>
74-
<li><strong>Memory:</strong> {Utils.megabytesToString(workerState.memory)}
75-
({Utils.megabytesToString(workerState.memoryUsed)} Used)</li>
76-
</ul>
77-
<p><a href={workerState.masterWebUiUrl}>Back to Master</a></p>
78-
</div>
67+
<div class="row-fluid"> <!-- Worker Details -->
68+
<div class="span12">
69+
<ul class="unstyled">
70+
<li><strong>ID:</strong> {workerState.workerId}</li>
71+
<li><strong>
72+
Master URL:</strong> {workerState.masterUrl}
73+
</li>
74+
<li><strong>Cores:</strong> {workerState.cores} ({workerState.coresUsed} Used)</li>
75+
<li><strong>Memory:</strong> {Utils.megabytesToString(workerState.memory)}
76+
({Utils.megabytesToString(workerState.memoryUsed)} Used)</li>
77+
</ul>
78+
<p><a href={workerState.masterWebUiUrl}>Back to Master</a></p>
7979
</div>
80-
81-
<div class="row-fluid"> <!-- Running Executors -->
82-
<div class="span12">
83-
<h4> Running Executors {workerState.executors.size} </h4>
84-
{runningExecutorTable}
85-
</div>
86-
</div>
87-
// scalastyle:off
88-
<div>
89-
{if (hasDrivers)
90-
<div class="row-fluid"> <!-- Running Drivers -->
91-
<div class="span12">
92-
<h4> Running Drivers {workerState.drivers.size} </h4>
93-
{runningDriverTable}
94-
</div>
95-
</div>
80+
</div>
81+
<div class="row-fluid"> <!-- Executors and Drivers -->
82+
<div class="span12">
83+
<h4> Running Executors ({runningExecutors.size}) </h4>
84+
{runningExecutorTable}
85+
{
86+
if (runningDrivers.nonEmpty) {
87+
<h4> Running Drivers ({runningDrivers.size}) </h4> ++
88+
runningDriverTable
89+
}
9690
}
97-
</div>
98-
99-
<div class="row-fluid"> <!-- Finished Executors -->
100-
<div class="span12">
101-
<h4> Finished Executors </h4>
102-
{finishedExecutorTable}
103-
</div>
104-
</div>
105-
106-
<div>
107-
{if (hasDrivers)
108-
<div class="row-fluid"> <!-- Finished Drivers -->
109-
<div class="span12">
110-
<h4> Finished Drivers </h4>
111-
{finishedDriverTable}
112-
</div>
113-
</div>
91+
{
92+
if (finishedExecutors.nonEmpty) {
93+
<h4>Finished Executors ({finishedExecutors.size}) </h4> ++
94+
finishedExecutorTable
95+
}
11496
}
115-
</div>;
116-
// scalastyle:on
97+
{
98+
if (finishedDrivers.nonEmpty) {
99+
<h4> Finished Drivers ({finishedDrivers.size}) </h4> ++
100+
finishedDriverTable
101+
}
102+
}
103+
</div>
104+
</div>;
117105
UIUtils.basicSparkPage(content, "Spark Worker at %s:%s".format(
118106
workerState.host, workerState.port))
119107
}
@@ -122,6 +110,7 @@ private[spark] class WorkerPage(parent: WorkerWebUI) extends WebUIPage("") {
122110
<tr>
123111
<td>{executor.execId}</td>
124112
<td>{executor.cores}</td>
113+
<td>{executor.state}</td>
125114
<td sorttable_customkey={executor.memory.toString}>
126115
{Utils.megabytesToString(executor.memory)}
127116
</td>

0 commit comments

Comments
 (0)