Skip to content

Commit 4fece2f

Browse files
author
Marcelo Vanzin
committed
SHS-NG M4.2: Port executors page to new backend.
The executors page is built on top of the REST API, so the page itself was easy to hook up to the new code. Some other pages depend on the `ExecutorListener` class that is being removed, though, so they needed to be modified to use data from the new store. Fortunately, all they seemed to need is the map of executor logs, so that was somewhat easy too. The executor timeline graph required some extra code to save the executor-related events in the UI store. This just implements the existing functionality, without making any changes related to efficiency or scalability of that graph. I had to change some of the test golden files because the old code would return executors in "random" order (since it used a mutable Map instead of something that returns a sorted list), and the new code returns executors in id order. The static files are still kept in the core/ resources directory since Jetty's default servlet does not handle fetching static files from multiple jars. TODO: add unit tests for the new ExecutorSummary fields being populated.
1 parent 211a1a9 commit 4fece2f

21 files changed

+382
-556
lines changed

core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorData.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@
1717

1818
package org.apache.spark.scheduler.cluster
1919

20+
import scala.annotation.meta.getter
21+
22+
import com.fasterxml.jackson.annotation.JsonIgnore
23+
2024
import org.apache.spark.rpc.{RpcAddress, RpcEndpointRef}
2125

2226
/**
@@ -29,9 +33,12 @@ import org.apache.spark.rpc.{RpcAddress, RpcEndpointRef}
2933
* @param totalCores The total number of cores available to the executor
3034
*/
3135
private[cluster] class ExecutorData(
36+
@(JsonIgnore @getter)
3237
val executorEndpoint: RpcEndpointRef,
38+
@(JsonIgnore @getter)
3339
val executorAddress: RpcAddress,
3440
override val executorHost: String,
41+
@(JsonIgnore @getter)
3542
var freeCores: Int,
3643
override val totalCores: Int,
3744
override val logUrlMap: Map[String, String]

core/src/main/scala/org/apache/spark/status/AppStateListener.scala

Lines changed: 121 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@ private class AppStateListener(override protected val kvstore: KVStore) extends
5454

5555
private var appId: String = null
5656
private var activeJobs: Set[Int] = Set()
57+
private var executorEventId: Long = 0L
58+
59+
private var coresPerTask: Int = 1
5760

5861
override def onApplicationStart(event: SparkListenerApplicationStart): Unit = {
5962
assert(event.appId.isDefined, "Application without IDs are not supported.")
@@ -96,6 +99,8 @@ private class AppStateListener(override protected val kvstore: KVStore) extends
9699
details("System Properties"),
97100
details("Classpath Entries"))
98101

102+
coresPerTask = envInfo.sparkProperties.toMap.get("spark.task.cpus").map(_.toInt)
103+
.getOrElse(coresPerTask)
99104
kvstore.write(new ApplicationEnvironmentInfoWrapper(envInfo))
100105
}
101106

@@ -129,9 +134,12 @@ private class AppStateListener(override protected val kvstore: KVStore) extends
129134
hostPort = event.executorInfo.executorHost,
130135
isActive = true,
131136
totalCores = event.executorInfo.totalCores,
137+
maxTasks = event.executorInfo.totalCores / coresPerTask,
132138
executorLogs = event.executorInfo.logUrlMap)
133139
new ExecutorSummaryWrapper(newInfo)
134140
}
141+
142+
writeExecutorEvent(event)
135143
}
136144

137145
override def onExecutorRemoved(event: SparkListenerExecutorRemoved): Unit = {
@@ -140,12 +148,19 @@ private class AppStateListener(override protected val kvstore: KVStore) extends
140148
isActive = false)
141149
new ExecutorSummaryWrapper(newInfo)
142150
}
151+
152+
writeExecutorEvent(event)
143153
}
144154

145155
override def onExecutorBlacklisted(event: SparkListenerExecutorBlacklisted): Unit = {
146156
updateBlackListStatus(event.executorId, true)
147157
}
148158

159+
private def writeExecutorEvent(event: SparkListenerEvent): Unit = {
160+
executorEventId += 1
161+
kvstore.write(new ExecutorEventData(executorEventId, event))
162+
}
163+
149164
override def onExecutorUnblacklisted(event: SparkListenerExecutorUnblacklisted): Unit = {
150165
updateBlackListStatus(event.executorId, false)
151166
}
@@ -342,6 +357,14 @@ private class AppStateListener(override protected val kvstore: KVStore) extends
342357
numActiveTasks = stage.info.numActiveTasks + 1)
343358
newStageDataWrapper(stage, newInfo)
344359
}
360+
361+
updateExecutorSummary(event.taskInfo.executorId) { uiexec =>
362+
val updated = newExecutorSummary(
363+
uiexec.info,
364+
activeTasks = uiexec.info.activeTasks + 1,
365+
totalTasks = uiexec.info.totalTasks + 1)
366+
new ExecutorSummaryWrapper(updated)
367+
}
345368
}
346369

347370
override def onTaskGettingResult(event: SparkListenerTaskGettingResult): Unit = {
@@ -440,6 +463,7 @@ private class AppStateListener(override protected val kvstore: KVStore) extends
440463
newExecutorStageSummary(
441464
exec,
442465
failedTasks = exec.failedTasks + failedDelta,
466+
succeededTasks = exec.succeededTasks + completedDelta,
443467
inputBytes = exec.inputBytes + metricsDelta.inputMetrics.bytesRead,
444468
outputBytes = exec.outputBytes + metricsDelta.outputMetrics.bytesWritten,
445469
shuffleRead = exec.shuffleRead + metricsDelta.shuffleReadMetrics.localBytesRead +
@@ -448,6 +472,30 @@ private class AppStateListener(override protected val kvstore: KVStore) extends
448472
memoryBytesSpilled = exec.memoryBytesSpilled + metricsDelta.memoryBytesSpilled,
449473
diskBytesSpilled = exec.diskBytesSpilled + metricsDelta.diskBytesSpilled)
450474
}
475+
476+
updateExecutorSummary(event.taskInfo.executorId) { uiexec =>
477+
val (gcTime, inputBytes, shuffleRead, shuffleWrite) = if (event.taskMetrics != null) {
478+
val readMetrics = event.taskMetrics.shuffleReadMetrics
479+
(event.taskMetrics.jvmGCTime,
480+
event.taskMetrics.inputMetrics.bytesRead,
481+
readMetrics.localBytesRead + readMetrics.remoteBytesRead,
482+
event.taskMetrics.shuffleWriteMetrics.bytesWritten)
483+
} else {
484+
(0L, 0L, 0L, 0L)
485+
}
486+
487+
val updated = newExecutorSummary(
488+
uiexec.info,
489+
activeTasks = uiexec.info.activeTasks - 1,
490+
completedTasks = uiexec.info.completedTasks + completedDelta,
491+
failedTasks = uiexec.info.failedTasks + failedDelta,
492+
totalDuration = uiexec.info.totalDuration + event.taskInfo.duration,
493+
totalGCTime = uiexec.info.totalGCTime + gcTime,
494+
totalInputBytes = uiexec.info.totalInputBytes + inputBytes,
495+
totalShuffleRead = uiexec.info.totalShuffleRead + shuffleRead,
496+
totalShuffleWrite = uiexec.info.totalShuffleWrite + shuffleWrite)
497+
new ExecutorSummaryWrapper(updated)
498+
}
451499
}
452500

453501
override def onStageCompleted(event: SparkListenerStageCompleted): Unit = {
@@ -486,10 +534,24 @@ private class AppStateListener(override protected val kvstore: KVStore) extends
486534
}
487535

488536
override def onBlockManagerAdded(event: SparkListenerBlockManagerAdded): Unit = {
537+
// This needs to set fields that are already set by onExecutorAdded because the driver is
538+
// considered an "executor" in the UI, but does not have a SparkListenerExecutorAdded event.
489539
updateExecutorSummary(event.blockManagerId.executorId) { exec =>
540+
// Only create memory metrics if the event has the info; this avoid setting bogus values
541+
// when replaying old application logs.
542+
val memMetrics = event.maxOnHeapMem.map { _ =>
543+
newMemoryMetrics(
544+
exec.info.memoryMetrics,
545+
totalOnHeapStorageMemory = event.maxOnHeapMem,
546+
totalOffHeapStorageMemory = event.maxOffHeapMem)
547+
}
548+
490549
val updated = newExecutorSummary(
491550
exec.info,
492-
maxMemory = event.maxMem)
551+
hostPort = event.blockManagerId.hostPort,
552+
isActive = true,
553+
maxMemory = event.maxMem,
554+
memoryMetrics = memMetrics)
493555
new ExecutorSummaryWrapper(updated)
494556
}
495557
}
@@ -566,6 +628,12 @@ private class AppStateListener(override protected val kvstore: KVStore) extends
566628
// Function to apply a delta to a value, but ensure that it doesn't go negative.
567629
def newValue(old: Long, delta: Long): Long = math.max(0, old + delta)
568630

631+
// Function to calculate the remaining memory after applying a delta. Assumes that if
632+
// one value is provided, all the values are.
633+
def remainingMemory(max: Option[Long], oldv: Option[Long], newv: Option[Long]): Option[Long] = {
634+
max.map { m => newValue(m, oldv.get - newv.get) }
635+
}
636+
569637
// If the storage level is NONE, then don't update the storage level of existing information.
570638
val updatedStorageLevel = if (storageLevel.useMemory || storageLevel.useDisk) {
571639
Some(storageLevel.description)
@@ -579,6 +647,7 @@ private class AppStateListener(override protected val kvstore: KVStore) extends
579647
new ExecutorSummaryWrapper(newExecutorSummary(None, id = executorId)))
580648

581649
var rddBlocksDelta = 0
650+
val memMetrics = executorInfo.info.memoryMetrics
582651

583652
// Update the block entry in the RDD info, keeping track of the deltas above so that we
584653
// can update the executor information too.
@@ -634,12 +703,31 @@ private class AppStateListener(override protected val kvstore: KVStore) extends
634703
val newDistMem = newValue(oldDist.memoryUsed, event.blockUpdatedInfo.memSize * memoryMult)
635704
val newDistDisk = newValue(oldDist.diskUsed, event.blockUpdatedInfo.diskSize * diskMult)
636705
val newDists = if (newDistMem > 0 || newDistDisk > 0) {
706+
val newOffHeap = if (storageLevel.useOffHeap) Some(newDistMem) else None
707+
val newOnHeap = if (!storageLevel.useOffHeap) Some(newDistMem) else None
708+
val remainingOffHeap = if (storageLevel.useOffHeap) {
709+
remainingMemory(memMetrics.map(_.totalOffHeapStorageMemory), oldDist.offHeapMemoryUsed,
710+
newOffHeap)
711+
} else {
712+
None
713+
}
714+
val remainingOnHeap = if (!storageLevel.useOffHeap) {
715+
remainingMemory(memMetrics.map(_.totalOnHeapStorageMemory), oldDist.onHeapMemoryUsed,
716+
newOnHeap)
717+
} else {
718+
None
719+
}
720+
637721
val newDist = newRDDDataDistribution(
638722
oldDist,
639723
memoryUsed = newDistMem,
640724
memoryRemaining = newValue(oldDist.memoryRemaining,
641725
event.blockUpdatedInfo.memSize * memoryMult * -1),
642-
diskUsed = newDistDisk)
726+
diskUsed = newDistDisk,
727+
onHeapMemoryUsed = newOnHeap,
728+
offHeapMemoryUsed = newOffHeap,
729+
onHeapMemoryRemaining = remainingOnHeap,
730+
offHeapMemoryRemaining = remainingOffHeap)
643731
Seq(newDist)
644732
} else {
645733
Nil
@@ -656,13 +744,30 @@ private class AppStateListener(override protected val kvstore: KVStore) extends
656744
}
657745

658746
// Update the ExecutorSummary for the block's manager.
747+
val updatedMemMetrics = memMetrics.map { m =>
748+
val onHeapUsed = if (!storageLevel.useOffHeap) {
749+
newValue(m.usedOnHeapStorageMemory, event.blockUpdatedInfo.memSize * memoryMult)
750+
} else {
751+
m.usedOnHeapStorageMemory
752+
}
753+
val offHeapUsed = if (storageLevel.useOffHeap) {
754+
newValue(m.usedOffHeapStorageMemory, event.blockUpdatedInfo.memSize * memoryMult)
755+
} else {
756+
m.usedOffHeapStorageMemory
757+
}
758+
newMemoryMetrics(m,
759+
usedOnHeapStorageMemory = onHeapUsed,
760+
usedOffHeapStorageMemory = offHeapUsed)
761+
}
762+
659763
val newExecSummary = newExecutorSummary(
660764
executorInfo.info,
661765
rddBlocks = newValue(executorInfo.info.rddBlocks, rddBlocksDelta).toInt,
662766
memoryUsed = newValue(executorInfo.info.memoryUsed,
663767
event.blockUpdatedInfo.memSize * memoryMult),
664768
diskUsed = newValue(executorInfo.info.diskUsed,
665-
event.blockUpdatedInfo.diskSize * diskMult))
769+
event.blockUpdatedInfo.diskSize * diskMult),
770+
memoryMetrics = updatedMemMetrics)
666771
kvstore.write(new ExecutorSummaryWrapper(newExecSummary))
667772
}
668773

@@ -819,6 +924,19 @@ private class AppStateListener(override protected val kvstore: KVStore) extends
819924
option(memoryMetrics, old.map(_.memoryMetrics)))
820925
}
821926

927+
private def newMemoryMetrics(
928+
old: Option[v1.MemoryMetrics],
929+
usedOnHeapStorageMemory: Option[Long] = None,
930+
usedOffHeapStorageMemory: Option[Long] = None,
931+
totalOnHeapStorageMemory: Option[Long] = None,
932+
totalOffHeapStorageMemory: Option[Long] = None): v1.MemoryMetrics = {
933+
new v1.MemoryMetrics(
934+
value(usedOnHeapStorageMemory, old.map(_.usedOnHeapStorageMemory), 0L),
935+
value(usedOffHeapStorageMemory, old.map(_.usedOffHeapStorageMemory), 0L),
936+
value(totalOnHeapStorageMemory, old.map(_.totalOnHeapStorageMemory), 0L),
937+
value(totalOffHeapStorageMemory, old.map(_.totalOffHeapStorageMemory), 0L))
938+
}
939+
822940
private def newJobData(
823941
old: Option[v1.JobData],
824942
jobId: Option[Int] = None,

core/src/main/scala/org/apache/spark/status/AppStateStore.scala

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ import scala.collection.JavaConverters._
2424

2525
import org.apache.spark.{JobExecutionStatus, SparkConf}
2626
import org.apache.spark.kvstore.KVStore
27-
import org.apache.spark.scheduler.SparkListenerBus
27+
import org.apache.spark.scheduler.{SparkListenerBus, SparkListenerEvent}
2828
import org.apache.spark.status.api.v1
2929
import org.apache.spark.util.{Distribution, Utils}
3030

@@ -56,6 +56,15 @@ private[spark] class AppStateStore private (store: KVStore, tempStorePath: Optio
5656
.asScala.map(_.info).toSeq
5757
}
5858

59+
def executorSummary(executorId: String): Option[v1.ExecutorSummary] = {
60+
try {
61+
Some(store.read(classOf[ExecutorSummaryWrapper], executorId).info)
62+
} catch {
63+
case _: NoSuchElementException =>
64+
None
65+
}
66+
}
67+
5968
def stageList(statuses: JList[v1.StageStatus]): Seq[v1.StageData] = {
6069
val it = store.view(classOf[StageDataWrapper]).asScala.map(_.info)
6170
if (!statuses.isEmpty) {
@@ -198,6 +207,10 @@ private[spark] class AppStateStore private (store: KVStore, tempStorePath: Optio
198207
store.read(classOf[RDDStorageInfoWrapper], rddId).info
199208
}
200209

210+
def executorEvents(): Seq[SparkListenerEvent] = {
211+
store.view(classOf[ExecutorEventData]).asScala.map(_.event).toSeq
212+
}
213+
201214
def close(): Unit = {
202215
store.close()
203216
tempStorePath.foreach(Utils.deleteRecursively)

core/src/main/scala/org/apache/spark/status/api/v1/AllExecutorListResource.scala

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,22 +20,11 @@ import javax.ws.rs.{GET, Produces}
2020
import javax.ws.rs.core.MediaType
2121

2222
import org.apache.spark.ui.SparkUI
23-
import org.apache.spark.ui.exec.ExecutorsPage
2423

2524
@Produces(Array(MediaType.APPLICATION_JSON))
2625
private[v1] class AllExecutorListResource(ui: SparkUI) {
2726

2827
@GET
29-
def executorList(): Seq[ExecutorSummary] = {
30-
val listener = ui.executorsListener
31-
listener.synchronized {
32-
// The follow codes should be protected by `listener` to make sure no executors will be
33-
// removed before we query their status. See SPARK-12784.
34-
(0 until listener.activeStorageStatusList.size).map { statusId =>
35-
ExecutorsPage.getExecInfo(listener, statusId, isActive = true)
36-
} ++ (0 until listener.deadStorageStatusList.size).map { statusId =>
37-
ExecutorsPage.getExecInfo(listener, statusId, isActive = false)
38-
}
39-
}
40-
}
28+
def executorList(): Seq[ExecutorSummary] = ui.store.executorList(false)
29+
4130
}

core/src/main/scala/org/apache/spark/status/api/v1/ExecutorListResource.scala

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,21 +20,11 @@ import javax.ws.rs.{GET, Produces}
2020
import javax.ws.rs.core.MediaType
2121

2222
import org.apache.spark.ui.SparkUI
23-
import org.apache.spark.ui.exec.ExecutorsPage
2423

2524
@Produces(Array(MediaType.APPLICATION_JSON))
2625
private[v1] class ExecutorListResource(ui: SparkUI) {
2726

2827
@GET
29-
def executorList(): Seq[ExecutorSummary] = {
30-
val listener = ui.executorsListener
31-
listener.synchronized {
32-
// The follow codes should be protected by `listener` to make sure no executors will be
33-
// removed before we query their status. See SPARK-12784.
34-
val storageStatusList = listener.activeStorageStatusList
35-
(0 until storageStatusList.size).map { statusId =>
36-
ExecutorsPage.getExecInfo(listener, statusId, isActive = true)
37-
}
38-
}
39-
}
28+
def executorList(): Seq[ExecutorSummary] = ui.store.executorList(true)
29+
4030
}

core/src/main/scala/org/apache/spark/status/storeTypes.scala

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import java.lang.{Integer => JInteger, Long => JLong}
2222
import com.fasterxml.jackson.annotation.JsonIgnore
2323

2424
import org.apache.spark.kvstore.KVIndex
25+
import org.apache.spark.scheduler.SparkListenerEvent
2526
import org.apache.spark.status.api.v1._
2627
import org.apache.spark.status.KVUtils._
2728

@@ -133,3 +134,11 @@ private[spark] class ExecutorStageSummaryWrapper(
133134
def stage: Array[Int] = Array(stageId, stageAttemptId)
134135

135136
}
137+
138+
/**
139+
* Store raw executor events so that the executor timeline can be drawn. The event is wrapped
140+
* in a container so that a monotonically increasing ID can be added to it.
141+
*/
142+
private[spark] class ExecutorEventData(
143+
@KVIndexParam val id: Long,
144+
val event: SparkListenerEvent)

0 commit comments

Comments
 (0)