Skip to content

Commit dd8cc4b

Browse files
author
Marcelo Vanzin
committed
Standardize on using spark.history.* configuration.
Update documentation to mention the config options instead of the old command line argument, and update the startup script.
1 parent 4da3a52 commit dd8cc4b

File tree

4 files changed

+33
-44
lines changed

4 files changed

+33
-44
lines changed

core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,11 @@ private[history] class FsHistoryProvider(conf: SparkConf) extends ApplicationHis
3535
private val UPDATE_INTERVAL_MS = conf.getInt("spark.history.fs.updateInterval",
3636
conf.getInt("spark.history.updateInterval", 10)) * 1000
3737

38-
private val logDir = conf.get("spark.history.fs.logDirectory")
38+
private val logDir = conf.get("spark.history.fs.logDirectory", null)
39+
if (logDir == null) {
40+
throw new IllegalArgumentException("Logging directory must be specified.")
41+
}
42+
3943
private val fs = Utils.getHadoopFileSystem(logDir)
4044

4145
// A timestamp of when the disk was last accessed to check for log updates

core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala

Lines changed: 7 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,14 @@
1717

1818
package org.apache.spark.deploy.history
1919

20-
import java.net.URI
21-
22-
import org.apache.hadoop.fs.Path
23-
2420
import org.apache.spark.SparkConf
2521
import org.apache.spark.util.Utils
2622

2723
/**
2824
* Command-line parser for the master.
2925
*/
3026
private[spark] class HistoryServerArguments(conf: SparkConf, args: Array[String]) {
31-
var logDir = conf.get("spark.history.fs.logDirectory", null)
27+
private var logDir: String = null
3228

3329
parse(args.toList)
3430

@@ -46,36 +42,17 @@ private[spark] class HistoryServerArguments(conf: SparkConf, args: Array[String]
4642
case _ =>
4743
printUsageAndExit(1)
4844
}
49-
validateLogDir()
50-
conf.set("spark.history.fs.logDirectory", logDir)
51-
}
52-
53-
private def validateLogDir() {
54-
if (logDir == null) {
55-
System.err.println("Logging directory must be specified.")
56-
printUsageAndExit(1)
57-
}
58-
val fileSystem = Utils.getHadoopFileSystem(new URI(logDir))
59-
val path = new Path(logDir)
60-
if (!fileSystem.exists(path)) {
61-
System.err.println("Logging directory specified does not exist: %s".format(logDir))
62-
printUsageAndExit(1)
63-
}
64-
if (!fileSystem.getFileStatus(path).isDir) {
65-
System.err.println("Logging directory specified is not a directory: %s".format(logDir))
66-
printUsageAndExit(1)
45+
if (logDir != null) {
46+
conf.set("spark.history.fs.logDirectory", logDir)
6747
}
6848
}
6949

7050
private def printUsageAndExit(exitCode: Int) {
7151
System.err.println(
7252
"""
73-
|Usage: HistoryServer [-d logDir]
74-
|
75-
|The preferred way to pass options is to set the configuration below using
76-
|SPARK_HISTORY_OPTS. The "-d" command line argument is avalable for backwards
77-
|compatibility, and overrides "spark.history.fs.logDirectory".
53+
|Usage: HistoryServer
7854
|
55+
|Configuration options can be set by setting the corresponding JVM system property.
7956
|History Server options are always available; additional options depend on the provider.
8057
|
8158
|History Server options:
@@ -84,7 +61,8 @@ private[spark] class HistoryServerArguments(conf: SparkConf, args: Array[String]
8461
| spark.history.acls.enable Whether to enable view acls for all applications (default false)
8562
| spark.history.provider Name of history provider class (defaults to file system-based
8663
| provider)
87-
|
64+
| spark.history.retainedApplications Max number of application UIs to keep loaded in memory
65+
| (default 50)
8866
|FsHistoryProvider options:
8967
|
9068
| spark.history.fs.logDirectory Directory where app logs are stored (required)

docs/monitoring.md

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,13 @@ If Spark is run on Mesos or YARN, it is still possible to reconstruct the UI of
3535
application through Spark's history server, provided that the application's event logs exist.
3636
You can start a the history server by executing:
3737

38-
./sbin/start-history-server.sh <base-logging-directory>
38+
./sbin/start-history-server.sh
3939

40-
The base logging directory must be supplied, and should contain sub-directories that each
41-
represents an application's event logs. This creates a web interface at
42-
`http://<server-url>:18080` by default. The history server can be configured as follows:
40+
When using the file-system provider class (see spark.history.provider below), the base logging
41+
directory must be supplied in the "spark.history.fs.logDirectory" configuration option, and should
42+
contain sub-directories that each represents an application's event logs. This creates a web
43+
interface at `http://<server-url>:18080` by default. The history server can be configured as
44+
follows:
4345

4446
<table class="table">
4547
<tr><th style="width:21%">Environment Variable</th><th>Meaning</th></tr>
@@ -68,6 +70,12 @@ represents an application's event logs. This creates a web interface at
6870

6971
<table class="table">
7072
<tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
73+
<tr>
74+
<td>spark.history.provider</td>
75+
<td>org.apache.spark.deploy.history.FsHistoryProvider</td>
76+
<td>Name of the class implementing the application history backend. Currently there is only
77+
one implementation provided by Spark, which matches the default value.</td>
78+
</tr>
7179
<tr>
7280
<td>spark.history.fs.updateInterval</td>
7381
<td>10</td>
@@ -78,7 +86,7 @@ represents an application's event logs. This creates a web interface at
7886
</tr>
7987
<tr>
8088
<td>spark.history.retainedApplications</td>
81-
<td>250</td>
89+
<td>50</td>
8290
<td>
8391
The number of application UIs to retain. If this cap is exceeded, then the oldest
8492
applications will be removed.

sbin/start-history-server.sh

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,19 +19,18 @@
1919

2020
# Starts the history server on the machine this script is executed on.
2121
#
22-
# Usage: start-history-server.sh <base-log-dir> [<web-ui-port>]
23-
# Example: ./start-history-server.sh --dir /tmp/spark-events --port 18080
22+
# Usage: start-history-server.sh
23+
#
24+
# Use the SPARK_HISTORY_OPTS environment variable to set history server configuration.
2425
#
2526

2627
sbin=`dirname "$0"`
2728
sbin=`cd "$sbin"; pwd`
2829

29-
if [ $# -lt 1 ]; then
30-
echo "Usage: ./start-history-server.sh <base-log-dir>"
31-
echo "Example: ./start-history-server.sh /tmp/spark-events"
32-
exit
30+
if [ $# != 0 ]; then
31+
echo "Using command line arguments for setting the log directory is deprecated. Please "
32+
echo "set the spark.history.fs.logDirectory configuration option instead."
33+
export SPARK_HISTORY_OPTS="$SPARK_HISTORY_OPTS -Dspark.history.fs.logDirectory=$1"
3334
fi
3435

35-
LOG_DIR=$1
36-
37-
"$sbin"/spark-daemon.sh start org.apache.spark.deploy.history.HistoryServer 1 --dir "$LOG_DIR"
36+
exec "$sbin"/spark-daemon.sh start org.apache.spark.deploy.history.HistoryServer 1

0 commit comments

Comments
 (0)