@@ -19,7 +19,7 @@ package org.apache.spark.streaming.ui
19
19
20
20
import javax .servlet .http .HttpServletRequest
21
21
22
- import scala .xml .{NodeSeq , Node }
22
+ import scala .xml .{NodeSeq , Node , Text }
23
23
24
24
import org .apache .commons .lang3 .StringEscapeUtils
25
25
@@ -28,6 +28,7 @@ import org.apache.spark.ui.{UIUtils, WebUIPage}
28
28
import org .apache .spark .streaming .ui .StreamingJobProgressListener .{SparkJobId , OutputOpId }
29
29
import org .apache .spark .ui .jobs .UIData .JobUIData
30
30
31
+ private case class SparkJobIdWithUIData (sparkJobId : SparkJobId , jobUIData : Option [JobUIData ])
31
32
32
33
private [ui] class BatchPage (parent : StreamingTab ) extends WebUIPage (" batch" ) {
33
34
private val streamingListener = parent.listener
@@ -44,25 +45,33 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") {
44
45
<th >Error </th >
45
46
}
46
47
48
+ private def generateJobRow (
49
+ outputOpId : OutputOpId ,
50
+ outputOpDescription : Seq [Node ],
51
+ formattedOutputOpDuration : String ,
52
+ numSparkJobRowsInOutputOp : Int ,
53
+ isFirstRow : Boolean ,
54
+ sparkJob : SparkJobIdWithUIData ): Seq [Node ] = {
55
+ if (sparkJob.jobUIData.isDefined) {
56
+ generateNormalJobRow(outputOpId, outputOpDescription, formattedOutputOpDuration,
57
+ numSparkJobRowsInOutputOp, isFirstRow, sparkJob.jobUIData.get)
58
+ } else {
59
+ generateDroppedJobRow(outputOpId, outputOpDescription, formattedOutputOpDuration,
60
+ numSparkJobRowsInOutputOp, isFirstRow, sparkJob.sparkJobId)
61
+ }
62
+ }
63
+
47
64
/**
48
65
* Generate a row for a Spark Job. Because duplicated output op infos needs to be collapsed into
49
66
* one cell, we use "rowspan" for the first row of a output op.
50
67
*/
51
- def generateJobRow (
68
+ def generateNormalJobRow (
52
69
outputOpId : OutputOpId ,
70
+ outputOpDescription : Seq [Node ],
53
71
formattedOutputOpDuration : String ,
54
72
numSparkJobRowsInOutputOp : Int ,
55
73
isFirstRow : Boolean ,
56
74
sparkJob : JobUIData ): Seq [Node ] = {
57
- val lastStageInfo = Option (sparkJob.stageIds)
58
- .filter(_.nonEmpty)
59
- .flatMap { ids => sparkListener.stageIdToInfo.get(ids.max) }
60
- val lastStageData = lastStageInfo.flatMap { s =>
61
- sparkListener.stageIdToData.get((s.stageId, s.attemptId))
62
- }
63
-
64
- val lastStageName = lastStageInfo.map(_.name).getOrElse(" (Unknown Stage Name)" )
65
- val lastStageDescription = lastStageData.flatMap(_.description).getOrElse(" " )
66
75
val duration : Option [Long ] = {
67
76
sparkJob.submissionTime.map { start =>
68
77
val end = sparkJob.completionTime.getOrElse(System .currentTimeMillis())
@@ -83,9 +92,7 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") {
83
92
if (isFirstRow) {
84
93
<td class =" output-op-id-cell" rowspan ={numSparkJobRowsInOutputOp.toString}>{outputOpId.toString}</td >
85
94
<td rowspan ={numSparkJobRowsInOutputOp.toString}>
86
- <span class =" description-input" title ={lastStageDescription}>
87
- {lastStageDescription}
88
- </span >{lastStageName}
95
+ {outputOpDescription}
89
96
</td >
90
97
<td rowspan ={numSparkJobRowsInOutputOp.toString}>{formattedOutputOpDuration}</td >
91
98
} else {
@@ -122,27 +129,97 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") {
122
129
</tr >
123
130
}
124
131
125
- private def generateOutputOpIdRow (
126
- outputOpId : OutputOpId , sparkJobs : Seq [JobUIData ]): Seq [Node ] = {
127
- val sparkjobDurations = sparkJobs.map(sparkJob => {
128
- sparkJob.submissionTime.map { start =>
129
- val end = sparkJob.completionTime.getOrElse(System .currentTimeMillis())
130
- end - start
132
+ /**
133
+ * If a job is dropped by sparkListener due to exceeding the limitation, we only show the job id
134
+ * with "-" cells.
135
+ */
136
+ private def generateDroppedJobRow (
137
+ outputOpId : OutputOpId ,
138
+ outputOpDescription : Seq [Node ],
139
+ formattedOutputOpDuration : String ,
140
+ numSparkJobRowsInOutputOp : Int ,
141
+ isFirstRow : Boolean ,
142
+ jobId : Int ): Seq [Node ] = {
143
+ // In the first row, output op id and its information needs to be shown. In other rows, these
144
+ // cells will be taken up due to "rowspan".
145
+ // scalastyle:off
146
+ val prefixCells =
147
+ if (isFirstRow) {
148
+ <td class =" output-op-id-cell" rowspan ={numSparkJobRowsInOutputOp.toString}>{outputOpId.toString}</td >
149
+ <td rowspan ={numSparkJobRowsInOutputOp.toString}>{outputOpDescription}</td >
150
+ <td rowspan ={numSparkJobRowsInOutputOp.toString}>{formattedOutputOpDuration}</td >
151
+ } else {
152
+ Nil
131
153
}
132
- })
154
+ // scalastyle:on
155
+
156
+ <tr >
157
+ {prefixCells}
158
+ <td sorttable_customkey ={jobId.toString}>
159
+ {jobId.toString}
160
+ </td >
161
+ <!-- Duration -->
162
+ <td >-</ td>
163
+ <!-- Stages : Succeeded / Total -->
164
+ <td >-</ td>
165
+ <!-- Tasks (for all stages): Succeeded / Total -->
166
+ <td >-</ td>
167
+ <!-- Error -->
168
+ <td >-</ td>
169
+ </tr >
170
+ }
171
+
172
+ private def generateOutputOpIdRow (
173
+ outputOpId : OutputOpId , sparkJobs : Seq [SparkJobIdWithUIData ]): Seq [Node ] = {
174
+ // We don't count the durations of dropped jobs
175
+ val sparkJobDurations = sparkJobs.filter(_.jobUIData.nonEmpty).map(_.jobUIData.get).
176
+ map(sparkJob => {
177
+ sparkJob.submissionTime.map { start =>
178
+ val end = sparkJob.completionTime.getOrElse(System .currentTimeMillis())
179
+ end - start
180
+ }
181
+ })
133
182
val formattedOutputOpDuration =
134
- if (sparkjobDurations .exists(_ == None )) {
135
- // If any job does not finish, set "formattedOutputOpDuration" to "-"
183
+ if (sparkJobDurations.isEmpty || sparkJobDurations .exists(_ == None )) {
184
+ // If no job or any job does not finish, set "formattedOutputOpDuration" to "-"
136
185
" -"
137
186
} else {
138
- UIUtils .formatDuration(sparkjobDurations .flatMap(x => x).sum)
187
+ UIUtils .formatDuration(sparkJobDurations .flatMap(x => x).sum)
139
188
}
140
- generateJobRow(outputOpId, formattedOutputOpDuration, sparkJobs.size, true , sparkJobs.head) ++
189
+
190
+ val description = generateOutputOpDescription(sparkJobs)
191
+
192
+ generateJobRow(
193
+ outputOpId, description, formattedOutputOpDuration, sparkJobs.size, true , sparkJobs.head) ++
141
194
sparkJobs.tail.map { sparkJob =>
142
- generateJobRow(outputOpId, formattedOutputOpDuration, sparkJobs.size, false , sparkJob)
195
+ generateJobRow(
196
+ outputOpId, description, formattedOutputOpDuration, sparkJobs.size, false , sparkJob)
143
197
}.flatMap(x => x)
144
198
}
145
199
200
+ private def generateOutputOpDescription (sparkJobs : Seq [SparkJobIdWithUIData ]): Seq [Node ] = {
201
+ val lastStageInfo =
202
+ sparkJobs.flatMap(_.jobUIData).headOption. // Get the first JobUIData
203
+ // Get the latest Stage info
204
+ flatMap { sparkJob =>
205
+ if (sparkJob.stageIds.isEmpty) {
206
+ None
207
+ } else {
208
+ sparkListener.stageIdToInfo.get(sparkJob.stageIds.max)
209
+ }
210
+ }
211
+ val lastStageData = lastStageInfo.flatMap { s =>
212
+ sparkListener.stageIdToData.get((s.stageId, s.attemptId))
213
+ }
214
+
215
+ val lastStageName = lastStageInfo.map(_.name).getOrElse(" (Unknown Stage Name)" )
216
+ val lastStageDescription = lastStageData.flatMap(_.description).getOrElse(" " )
217
+
218
+ <span class =" description-input" title ={lastStageDescription}>
219
+ {lastStageDescription}
220
+ </span > ++ Text (lastStageName)
221
+ }
222
+
146
223
private def failureReasonCell (failureReason : String ): Seq [Node ] = {
147
224
val isMultiline = failureReason.indexOf('\n ' ) >= 0
148
225
// Display the first line by default
@@ -187,10 +264,10 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") {
187
264
(outputOpId, outputOpIdAndSparkJobIds.map(_.sparkJobId).sorted)
188
265
}
189
266
sparkListener.synchronized {
190
- val outputOpIdWithJobs : Seq [(OutputOpId , Seq [JobUIData ])] =
267
+ val outputOpIdWithJobs : Seq [(OutputOpId , Seq [SparkJobIdWithUIData ])] =
191
268
outputOpIdToSparkJobIds.map { case (outputOpId, sparkJobIds) =>
192
- // Filter out spark Job ids that don't exist in sparkListener
193
- (outputOpId, sparkJobIds.flatMap( getJobData))
269
+ (outputOpId,
270
+ sparkJobIds.map(sparkJobId => SparkJobIdWithUIData (sparkJobId, getJobData(sparkJobId)) ))
194
271
}
195
272
196
273
<table id =" batch-job-table" class =" table table-bordered table-striped table-condensed" >
@@ -200,7 +277,7 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") {
200
277
<tbody >
201
278
{
202
279
outputOpIdWithJobs.map {
203
- case (outputOpId, jobs ) => generateOutputOpIdRow(outputOpId, jobs )
280
+ case (outputOpId, sparkJobIds ) => generateOutputOpIdRow(outputOpId, sparkJobIds )
204
281
}
205
282
}
206
283
</tbody >
0 commit comments