-
Notifications
You must be signed in to change notification settings - Fork 28.8k
[SPARK-2098] All Spark processes should support spark-defaults.conf, config file #1256
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,8 +17,10 @@ | |
|
||
package org.apache.spark | ||
|
||
import java.io.File | ||
import scala.collection.JavaConverters._ | ||
import scala.collection.mutable.HashMap | ||
import org.apache.spark.util.Utils | ||
|
||
/** | ||
* Configuration for a Spark application. Used to set various Spark parameters as key-value pairs. | ||
|
@@ -33,25 +35,33 @@ import scala.collection.mutable.HashMap | |
* All setter methods in this class support chaining. For example, you can write | ||
* `new SparkConf().setMaster("local").setAppName("My app")`. | ||
* | ||
* The order of precedence for options is system properties > file. | ||
* | ||
* Note that once a SparkConf object is passed to Spark, it is cloned and can no longer be modified | ||
* by the user. Spark does not support modifying the configuration at runtime. | ||
* | ||
* @param loadDefaults whether to also load values from Java system properties | ||
* @param loadDefaults whether to also load values from Java system properties, file. | ||
* @param fileName load properties from file | ||
*/ | ||
class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging { | ||
class SparkConf(loadDefaults: Boolean, fileName: Option[String]) | ||
extends Cloneable with Logging { | ||
|
||
import SparkConf._ | ||
|
||
/** Create a SparkConf that loads defaults from system properties and the classpath */ | ||
def this() = this(true) | ||
def this() = this(true, None) | ||
|
||
/** | ||
* Create a SparkConf | ||
* @param loadDefaults whether to also load values from Java system properties | ||
*/ | ||
def this(loadDefaults: Boolean) = this(loadDefaults, None) | ||
|
||
private val settings = new HashMap[String, String]() | ||
|
||
if (loadDefaults) { | ||
// Load any spark.* system properties | ||
for ((k, v) <- System.getProperties.asScala if k.startsWith("spark.")) { | ||
settings(k) = v | ||
} | ||
fileName.foreach(f => loadPropertiesFromFile(f, isOverride = true)) | ||
loadSystemProperties() | ||
} | ||
|
||
/** Set a configuration variable. */ | ||
|
@@ -307,6 +317,27 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging { | |
def toDebugString: String = { | ||
settings.toArray.sorted.map{case (k, v) => k + "=" + v}.mkString("\n") | ||
} | ||
|
||
/** Load properties from file. */ | ||
private[spark] def loadPropertiesFromFile(fileName: String, isOverride: Boolean = false) { | ||
val file = new File(fileName) | ||
if (file.isFile()) { | ||
loadProperties(Utils.getPropertiesFromFile(file.getAbsolutePath), isOverride) | ||
} | ||
} | ||
|
||
/** Load any spark.* system properties */ | ||
private[spark] def loadSystemProperties() { | ||
loadProperties(sys.props.toSeq, true) | ||
} | ||
|
||
private def loadProperties(seq: Seq[(String, String)], isOverride: Boolean) { | ||
for ((k, v) <- seq if k.startsWith("spark.")) { | ||
if (isOverride || settings.get(k).isEmpty) { | ||
settings(k) = v | ||
} | ||
} | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hm, I actually don't think any of this logic should exist in |
||
} | ||
|
||
private[spark] object SparkConf { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,7 +20,7 @@ package org.apache.spark.util | |
import java.io._ | ||
import java.net._ | ||
import java.nio.ByteBuffer | ||
import java.util.{Locale, Random, UUID} | ||
import java.util.{Locale, Properties, Random, UUID} | ||
import java.util.concurrent.{ThreadFactory, ConcurrentHashMap, Executors, ThreadPoolExecutor} | ||
|
||
import scala.collection.JavaConversions._ | ||
|
@@ -1307,6 +1307,43 @@ private[spark] object Utils extends Logging { | |
} | ||
} | ||
|
||
/** Load properties present in the given file. */ | ||
def getPropertiesFromFile(filename: String): Seq[(String, String)] = { | ||
val file = new File(filename) | ||
require(file.exists(), s"Properties file $file does not exist") | ||
require(file.isFile(), s"Properties file $file is not a normal file") | ||
val inputStream = new FileInputStream(file) | ||
getPropertiesFromInputStream(inputStream) | ||
} | ||
|
||
/** | ||
* Load properties present in the given inputStream. | ||
* @param inputStream InputStream from where to load properties. | ||
* Expected to contain UTF-8 data. Will be closed by this method. | ||
*/ | ||
private[spark] def getPropertiesFromInputStream(inputStream: InputStream): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You don't need to put this in a separate function. We won't ever read the Spark configs from another input stream that's not a file, so this can just go into |
||
Seq[(String, String)] = { | ||
val inReader = new InputStreamReader(inputStream, "UTF-8") | ||
try { | ||
val properties = new Properties() | ||
properties.load(inReader) | ||
properties.stringPropertyNames().toSeq.map(k => (k, properties(k).trim)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is probably a little more efficient: properties.entrySet().map(e => (e.getKey().asInstanceOf[String], e.getValue().asInstanceOf[String].trim())) |
||
} catch { | ||
case e: IOException => | ||
val message = s"Failed when loading Spark properties" | ||
throw new SparkException(message, e) | ||
} finally { | ||
inReader.close() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you're keeping this, you should document that this method will close the given input stream. |
||
} | ||
} | ||
|
||
private[spark] def getDefaultConfigFile: String = { | ||
val s = File.separator | ||
Seq( | ||
sys.env.get("SPARK_CONF_DIR").map(t => new File(s"$t${s}spark-defaults.conf")), | ||
sys.env.get("SPARK_HOME").map(t => new File(s"${t}${s}conf${s}spark-defaults.conf"))). | ||
filter(_.isDefined).map(_.get).find(_.exists).map(_.getAbsolutePath).orNull | ||
} | ||
/** Return a nice string representation of the exception, including the stack trace. */ | ||
def exceptionString(e: Exception): String = { | ||
if (e == null) "" else exceptionString(getFormattedClassName(e), e.getMessage, e.getStackTrace) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think this comment is necessary. The user shouldn't manually set the system properties anyway; the usual mechanism is through some
--*
option in SparkSubmit.