Skip to content

Commit bbfb8c9

Browse files
committed
Merge branch 'master' into stratified
2 parents ee9d260 + 9d5ecf8 commit bbfb8c9

File tree

39 files changed

+489
-180
lines changed

39 files changed

+489
-180
lines changed

bin/compute-classpath.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,10 +81,10 @@ ASSEMBLY_JAR=$(ls "$assembly_folder"/spark-assembly*hadoop*.jar 2>/dev/null)
8181
# Verify that versions of java used to build the jars and run Spark are compatible
8282
jar_error_check=$("$JAR_CMD" -tf "$ASSEMBLY_JAR" nonexistent/class/path 2>&1)
8383
if [[ "$jar_error_check" =~ "invalid CEN header" ]]; then
84-
echo "Loading Spark jar with '$JAR_CMD' failed. "
85-
echo "This is likely because Spark was compiled with Java 7 and run "
86-
echo "with Java 6. (see SPARK-1703). Please use Java 7 to run Spark "
87-
echo "or build Spark with Java 6."
84+
echo "Loading Spark jar with '$JAR_CMD' failed. " 1>&2
85+
echo "This is likely because Spark was compiled with Java 7 and run " 1>&2
86+
echo "with Java 6. (see SPARK-1703). Please use Java 7 to run Spark " 1>&2
87+
echo "or build Spark with Java 6." 1>&2
8888
exit 1
8989
fi
9090

bin/pyspark

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ export SPARK_HOME="$FWDIR"
2626
SCALA_VERSION=2.10
2727

2828
if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
29-
echo "Usage: ./bin/pyspark [options]"
29+
echo "Usage: ./bin/pyspark [options]" 1>&2
3030
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
3131
exit 0
3232
fi
@@ -36,8 +36,8 @@ if [ ! -f "$FWDIR/RELEASE" ]; then
3636
# Exit if the user hasn't compiled Spark
3737
ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*.jar >& /dev/null
3838
if [[ $? != 0 ]]; then
39-
echo "Failed to find Spark assembly in $FWDIR/assembly/target" >&2
40-
echo "You need to build Spark before running this program" >&2
39+
echo "Failed to find Spark assembly in $FWDIR/assembly/target" 1>&2
40+
echo "You need to build Spark before running this program" 1>&2
4141
exit 1
4242
fi
4343
fi

bin/run-example

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@ if [ -n "$1" ]; then
2727
EXAMPLE_CLASS="$1"
2828
shift
2929
else
30-
echo "Usage: ./bin/run-example <example-class> [example-args]"
31-
echo " - set MASTER=XX to use a specific master"
32-
echo " - can use abbreviated example class name (e.g. SparkPi, mllib.LinearRegression)"
30+
echo "Usage: ./bin/run-example <example-class> [example-args]" 1>&2
31+
echo " - set MASTER=XX to use a specific master" 1>&2
32+
echo " - can use abbreviated example class name (e.g. SparkPi, mllib.LinearRegression)" 1>&2
3333
exit 1
3434
fi
3535

@@ -40,8 +40,8 @@ elif [ -e "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.ja
4040
fi
4141

4242
if [[ -z $SPARK_EXAMPLES_JAR ]]; then
43-
echo "Failed to find Spark examples assembly in $FWDIR/lib or $FWDIR/examples/target" >&2
44-
echo "You need to build Spark before running this program" >&2
43+
echo "Failed to find Spark examples assembly in $FWDIR/lib or $FWDIR/examples/target" 1>&2
44+
echo "You need to build Spark before running this program" 1>&2
4545
exit 1
4646
fi
4747

bin/spark-class

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,13 @@ export SPARK_HOME="$FWDIR"
3333
. $FWDIR/bin/load-spark-env.sh
3434

3535
if [ -z "$1" ]; then
36-
echo "Usage: spark-class <class> [<args>]" >&2
36+
echo "Usage: spark-class <class> [<args>]" 1>&2
3737
exit 1
3838
fi
3939

4040
if [ -n "$SPARK_MEM" ]; then
41-
echo "Warning: SPARK_MEM is deprecated, please use a more specific config option"
42-
echo "(e.g., spark.executor.memory or SPARK_DRIVER_MEMORY)."
41+
echo -e "Warning: SPARK_MEM is deprecated, please use a more specific config option" 1>&2
42+
echo -e "(e.g., spark.executor.memory or SPARK_DRIVER_MEMORY)." 1>&2
4343
fi
4444

4545
# Use SPARK_MEM or 512m as the default memory, to be overridden by specific options
@@ -147,10 +147,9 @@ fi
147147
export CLASSPATH
148148

149149
if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then
150-
echo -n "Spark Command: "
151-
echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"
152-
echo "========================================"
153-
echo
150+
echo -n "Spark Command: " 1>&2
151+
echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" 1>&2
152+
echo -e "========================================\n" 1>&2
154153
fi
155154

156155
exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"

core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -599,6 +599,8 @@ private class PythonAccumulatorParam(@transient serverHost: String, serverPort:
599599
} else {
600600
// This happens on the master, where we pass the updates to Python through a socket
601601
val socket = new Socket(serverHost, serverPort)
602+
// SPARK-2282: Immediately reuse closed sockets because we create one per task.
603+
socket.setReuseAddress(true)
602604
val in = socket.getInputStream
603605
val out = new DataOutputStream(new BufferedOutputStream(socket.getOutputStream, bufferSize))
604606
out.writeInt(val2.size)

core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ import org.apache.spark.{Logging, SecurityManager, SparkConf}
2727
import org.apache.spark.deploy.SparkHadoopUtil
2828
import org.apache.spark.ui.{WebUI, SparkUI, UIUtils}
2929
import org.apache.spark.ui.JettyUtils._
30-
import org.apache.spark.util.Utils
30+
import org.apache.spark.util.{SignalLogger, Utils}
3131

3232
/**
3333
* A web server that renders SparkUIs of completed applications.
@@ -169,10 +169,11 @@ class HistoryServer(
169169
*
170170
* This launches the HistoryServer as a Spark daemon.
171171
*/
172-
object HistoryServer {
172+
object HistoryServer extends Logging {
173173
private val conf = new SparkConf
174174

175175
def main(argStrings: Array[String]) {
176+
SignalLogger.register(log)
176177
initSecurity()
177178
val args = new HistoryServerArguments(conf, argStrings)
178179
val securityManager = new SecurityManager(conf)

core/src/main/scala/org/apache/spark/deploy/master/Master.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -481,7 +481,7 @@ private[spark] class Master(
481481
// First schedule drivers, they take strict precedence over applications
482482
val shuffledWorkers = Random.shuffle(workers) // Randomization helps balance drivers
483483
for (worker <- shuffledWorkers if worker.state == WorkerState.ALIVE) {
484-
for (driver <- waitingDrivers) {
484+
for (driver <- List(waitingDrivers: _*)) { // iterate over a copy of waitingDrivers
485485
if (worker.memoryFree >= driver.desc.mem && worker.coresFree >= driver.desc.cores) {
486486
launchDriver(worker, driver)
487487
waitingDrivers -= driver

core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -141,8 +141,8 @@ class HadoopRDD[K, V](
141141
// local process. The local cache is accessed through HadoopRDD.putCachedMetadata().
142142
// The caching helps minimize GC, since a JobConf can contain ~10KB of temporary objects.
143143
// synchronize to prevent ConcurrentModificationException (Spark-1097, Hadoop-10456)
144-
broadcastedConf.synchronized {
145-
val newJobConf = new JobConf(broadcastedConf.value.value)
144+
conf.synchronized {
145+
val newJobConf = new JobConf(conf)
146146
initLocalJobConfFuncOpt.map(f => f(newJobConf))
147147
HadoopRDD.putCachedMetadata(jobConfCacheKey, newJobConf)
148148
newJobConf

core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import org.apache.spark.broadcast.HttpBroadcast
3030
import org.apache.spark.scheduler.MapStatus
3131
import org.apache.spark.storage._
3232
import org.apache.spark.storage.{GetBlock, GotBlock, PutBlock}
33+
import org.apache.spark.util.BoundedPriorityQueue
3334

3435
import scala.reflect.ClassTag
3536

@@ -183,7 +184,8 @@ private[serializer] object KryoSerializer {
183184
classOf[GetBlock],
184185
classOf[MapStatus],
185186
classOf[BlockManagerId],
186-
classOf[Array[Byte]]
187+
classOf[Array[Byte]],
188+
classOf[BoundedPriorityQueue[_]]
187189
)
188190
}
189191

core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,26 +28,31 @@ import org.apache.spark.scheduler._
2828
*/
2929
@DeveloperApi
3030
class StorageStatusListener extends SparkListener {
31-
private val executorIdToStorageStatus = mutable.Map[String, StorageStatus]()
31+
// This maintains only blocks that are cached (i.e. storage level is not StorageLevel.NONE)
32+
private[storage] val executorIdToStorageStatus = mutable.Map[String, StorageStatus]()
3233

3334
def storageStatusList = executorIdToStorageStatus.values.toSeq
3435

3536
/** Update storage status list to reflect updated block statuses */
36-
def updateStorageStatus(execId: String, updatedBlocks: Seq[(BlockId, BlockStatus)]) {
37-
val filteredStatus = storageStatusList.find(_.blockManagerId.executorId == execId)
37+
private def updateStorageStatus(execId: String, updatedBlocks: Seq[(BlockId, BlockStatus)]) {
38+
val filteredStatus = executorIdToStorageStatus.get(execId)
3839
filteredStatus.foreach { storageStatus =>
3940
updatedBlocks.foreach { case (blockId, updatedStatus) =>
40-
storageStatus.blocks(blockId) = updatedStatus
41+
if (updatedStatus.storageLevel == StorageLevel.NONE) {
42+
storageStatus.blocks.remove(blockId)
43+
} else {
44+
storageStatus.blocks(blockId) = updatedStatus
45+
}
4146
}
4247
}
4348
}
4449

4550
/** Update storage status list to reflect the removal of an RDD from the cache */
46-
def updateStorageStatus(unpersistedRDDId: Int) {
51+
private def updateStorageStatus(unpersistedRDDId: Int) {
4752
storageStatusList.foreach { storageStatus =>
4853
val unpersistedBlocksIds = storageStatus.rddBlocks.keys.filter(_.rddId == unpersistedRDDId)
4954
unpersistedBlocksIds.foreach { blockId =>
50-
storageStatus.blocks(blockId) = BlockStatus(StorageLevel.NONE, 0L, 0L, 0L)
55+
storageStatus.blocks.remove(blockId)
5156
}
5257
}
5358
}

0 commit comments

Comments
 (0)