Skip to content

Commit 26c1c56

Browse files
guliangliangAndrew Or
authored andcommitted
[SPARK-5522] Accelerate the Histroty Server start
When starting the history server, all the log files will be fetched and parsed in order to get the applications' meta data e.g. App Name, Start Time, Duration, etc. In our production cluster, there exist 2600 log files (160G) in HDFS and it costs 3 hours to restart the history server, which is a little bit too long for us. It would be better, if the history server can show logs with missing information during start-up and fill the missing information after fetching and parsing a log file. Author: guliangliang <[email protected]> Closes apache#4525 from marsishandsome/Spark5522 and squashes the following commits: a865c11 [guliangliang] fix bug2 4340c2b [guliangliang] fix bug af92a5a [guliangliang] [SPARK-5522] Accelerate the Histroty Server start
1 parent 6b348d9 commit 26c1c56

File tree

1 file changed

+74
-41
lines changed

1 file changed

+74
-41
lines changed

core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala

Lines changed: 74 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -18,22 +18,23 @@
1818
package org.apache.spark.deploy.history
1919

2020
import java.io.{IOException, BufferedInputStream, FileNotFoundException, InputStream}
21-
import java.util.concurrent.{Executors, TimeUnit}
21+
import java.util.concurrent.{ExecutorService, Executors, TimeUnit}
2222

2323
import scala.collection.mutable
2424
import scala.concurrent.duration.Duration
2525

2626
import com.google.common.util.concurrent.ThreadFactoryBuilder
2727

28-
import org.apache.hadoop.fs.{FileStatus, Path}
28+
import com.google.common.util.concurrent.MoreExecutors
2929
import org.apache.hadoop.fs.permission.AccessControlException
30-
31-
import org.apache.spark.{Logging, SecurityManager, SparkConf}
30+
import org.apache.hadoop.fs.{FileStatus, Path}
3231
import org.apache.spark.deploy.SparkHadoopUtil
3332
import org.apache.spark.io.CompressionCodec
3433
import org.apache.spark.scheduler._
3534
import org.apache.spark.ui.SparkUI
3635
import org.apache.spark.util.Utils
36+
import org.apache.spark.{Logging, SecurityManager, SparkConf}
37+
3738

3839
/**
3940
* A class that provides application history from event logs stored in the file system.
@@ -98,6 +99,17 @@ private[history] class FsHistoryProvider(conf: SparkConf) extends ApplicationHis
9899
}
99100
}
100101

102+
/**
103+
* An Executor to fetch and parse log files.
104+
*/
105+
private val replayExecutor: ExecutorService = {
106+
if (!conf.contains("spark.testing")) {
107+
Executors.newSingleThreadExecutor(Utils.namedThreadFactory("log-replay-executor"))
108+
} else {
109+
MoreExecutors.sameThreadExecutor()
110+
}
111+
}
112+
101113
initialize()
102114

103115
private def initialize(): Unit = {
@@ -171,10 +183,10 @@ private[history] class FsHistoryProvider(conf: SparkConf) extends ApplicationHis
171183
*/
172184
private[history] def checkForLogs(): Unit = {
173185
try {
174-
var newLastModifiedTime = lastModifiedTime
175186
val statusList = Option(fs.listStatus(new Path(logDir))).map(_.toSeq)
176187
.getOrElse(Seq[FileStatus]())
177-
val logInfos = statusList
188+
var newLastModifiedTime = lastModifiedTime
189+
val logInfos: Seq[FileStatus] = statusList
178190
.filter { entry =>
179191
try {
180192
getModificationTime(entry).map { time =>
@@ -189,48 +201,69 @@ private[history] class FsHistoryProvider(conf: SparkConf) extends ApplicationHis
189201
false
190202
}
191203
}
192-
.flatMap { entry =>
193-
try {
194-
Some(replay(entry, new ReplayListenerBus()))
195-
} catch {
196-
case e: Exception =>
197-
logError(s"Failed to load application log data from $entry.", e)
198-
None
199-
}
200-
}
201-
.sortWith(compareAppInfo)
204+
.flatMap { entry => Some(entry) }
205+
.sortWith { case (entry1, entry2) =>
206+
val mod1 = getModificationTime(entry1).getOrElse(-1L)
207+
val mod2 = getModificationTime(entry2).getOrElse(-1L)
208+
mod1 >= mod2
209+
}
210+
211+
logInfos.sliding(20, 20).foreach { batch =>
212+
replayExecutor.submit(new Runnable {
213+
override def run(): Unit = mergeApplicationListing(batch)
214+
})
215+
}
202216

203217
lastModifiedTime = newLastModifiedTime
218+
} catch {
219+
case e: Exception => logError("Exception in checking for event log updates", e)
220+
}
221+
}
204222

205-
// When there are new logs, merge the new list with the existing one, maintaining
206-
// the expected ordering (descending end time). Maintaining the order is important
207-
// to avoid having to sort the list every time there is a request for the log list.
208-
if (!logInfos.isEmpty) {
209-
val newApps = new mutable.LinkedHashMap[String, FsApplicationHistoryInfo]()
210-
def addIfAbsent(info: FsApplicationHistoryInfo) = {
211-
if (!newApps.contains(info.id) ||
212-
newApps(info.id).logPath.endsWith(EventLoggingListener.IN_PROGRESS) &&
213-
!info.logPath.endsWith(EventLoggingListener.IN_PROGRESS)) {
214-
newApps += (info.id -> info)
215-
}
223+
/**
224+
* Replay the log files in the list and merge the list of old applications with new ones
225+
*/
226+
private def mergeApplicationListing(logs: Seq[FileStatus]): Unit = {
227+
val bus = new ReplayListenerBus()
228+
val newApps = logs.flatMap { fileStatus =>
229+
try {
230+
val res = replay(fileStatus, bus)
231+
logInfo(s"Application log ${res.logPath} loaded successfully.")
232+
Some(res)
233+
} catch {
234+
case e: Exception =>
235+
logError(
236+
s"Exception encountered when attempting to load application log ${fileStatus.getPath}")
237+
None
238+
}
239+
}.toSeq.sortWith(compareAppInfo)
240+
241+
// When there are new logs, merge the new list with the existing one, maintaining
242+
// the expected ordering (descending end time). Maintaining the order is important
243+
// to avoid having to sort the list every time there is a request for the log list.
244+
if (newApps.nonEmpty) {
245+
val mergedApps = new mutable.LinkedHashMap[String, FsApplicationHistoryInfo]()
246+
def addIfAbsent(info: FsApplicationHistoryInfo): Unit = {
247+
if (!mergedApps.contains(info.id) ||
248+
mergedApps(info.id).logPath.endsWith(EventLoggingListener.IN_PROGRESS) &&
249+
!info.logPath.endsWith(EventLoggingListener.IN_PROGRESS)) {
250+
mergedApps += (info.id -> info)
216251
}
252+
}
217253

218-
val newIterator = logInfos.iterator.buffered
219-
val oldIterator = applications.values.iterator.buffered
220-
while (newIterator.hasNext && oldIterator.hasNext) {
221-
if (compareAppInfo(newIterator.head, oldIterator.head)) {
222-
addIfAbsent(newIterator.next)
223-
} else {
224-
addIfAbsent(oldIterator.next)
225-
}
254+
val newIterator = newApps.iterator.buffered
255+
val oldIterator = applications.values.iterator.buffered
256+
while (newIterator.hasNext && oldIterator.hasNext) {
257+
if (compareAppInfo(newIterator.head, oldIterator.head)) {
258+
addIfAbsent(newIterator.next())
259+
} else {
260+
addIfAbsent(oldIterator.next())
226261
}
227-
newIterator.foreach(addIfAbsent)
228-
oldIterator.foreach(addIfAbsent)
229-
230-
applications = newApps
231262
}
232-
} catch {
233-
case e: Exception => logError("Exception in checking for event log updates", e)
263+
newIterator.foreach(addIfAbsent)
264+
oldIterator.foreach(addIfAbsent)
265+
266+
applications = mergedApps
234267
}
235268
}
236269

0 commit comments

Comments
 (0)