Skip to content

Commit 3773210

Browse files
committed
Merge pull request #23 from markhamstra/master-csd
SPY-350 and catching up to Apache branch-0.9
2 parents f5b3104 + bc5cb1d commit 3773210

File tree

48 files changed

+395
-123
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+395
-123
lines changed

CHANGES.txt

Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,214 @@
11
Spark Change Log
22
----------------
33

4+
Release 0.9.2
5+
6+
[branch-0.9] bump versions for v0.9.2 release candidate
7+
Xiangrui Meng <[email protected]>
8+
2014-07-16 23:53:40 -0700
9+
Commit: c9a22e8, github.com/apache/spark/pull/1458
10+
11+
[branch-0.9] Fix github links in docs
12+
Xiangrui Meng <[email protected]>
13+
2014-07-16 23:39:02 -0700
14+
Commit: 60f4b3b, github.com/apache/spark/pull/1456
15+
16+
[SPARK-1112, 2156] (0.9 edition) Use correct akka frame size and overhead amounts.
17+
Patrick Wendell <[email protected]>
18+
2014-07-16 21:30:50 -0700
19+
Commit: 7edee34, github.com/apache/spark/pull/1455
20+
21+
[SPARK-2433][MLLIB] fix NaiveBayesModel.predict
22+
Xiangrui Meng <[email protected]>
23+
2014-07-16 20:12:09 -0700
24+
Commit: 0116dee, github.com/apache/spark/pull/1453
25+
26+
[SPARK-2362] Fix for newFilesOnly logic in file DStream
27+
Gabriele Nizzoli <[email protected]>
28+
2014-07-08 14:23:38 -0700
29+
Commit: 8e5604b, github.com/apache/spark/pull/1077
30+
31+
SPARK-2282: Reuse PySpark Accumulator sockets to avoid crashing Spark
32+
Aaron Davidson <[email protected]>
33+
2014-07-03 23:02:36 -0700
34+
Commit: 57873ef, github.com/apache/spark/pull/1220
35+
36+
[SPARK-2350] Don't NPE while launching drivers
37+
Aaron Davidson <[email protected]>
38+
2014-07-03 22:31:41 -0700
39+
Commit: c37e9ed, github.com/apache/spark/pull/1289
40+
41+
[SPARK-1516]Throw exception in yarn client instead of run system.exit
42+
John Zhao <[email protected]>
43+
2014-07-03 15:17:51 -0700
44+
Commit: 0d3d5ce, github.com/apache/spark/pull/1099
45+
46+
HOTFIX: Removing out dated python path in testing tool.
47+
Patrick Wendell <[email protected]>
48+
2014-06-27 18:19:16 -0700
49+
Commit: b3f4245
50+
51+
[SPARK-1912] fix compress memory issue during reduce
52+
Wenchen Fan(Cloud) <[email protected]>
53+
2014-06-03 13:18:20 -0700
54+
Commit: 9509819, github.com/apache/spark/pull/860
55+
56+
SPARK-2241: quote command line args in ec2 script
57+
Ori Kremer <[email protected]>
58+
2014-06-22 20:21:23 -0700
59+
Commit: ef8501d, github.com/apache/spark/pull/1169
60+
61+
HOTFIX: bug caused by #941
62+
Patrick Wendell <[email protected]>
63+
2014-06-17 15:09:24 -0700
64+
Commit: 2a2eace, github.com/apache/spark/pull/1108
65+
66+
SPARK-1990: added compatibility for python 2.6 for ssh_read command
67+
68+
2014-06-16 23:42:27 -0700
69+
Commit: 8e9f479, github.com/apache/spark/pull/941
70+
71+
[SPARK-1998] SparkFlumeEvent with body bigger than 1020 bytes are not re...
72+
73+
2014-06-10 17:26:17 -0700
74+
Commit: 706e38f, github.com/apache/spark/pull/951
75+
76+
Spark 1384 - Fix spark-shell on yarn access to secure hdfs - branch-0.9 only
77+
Thomas Graves <[email protected]>
78+
2014-06-09 23:07:25 -0700
79+
Commit: cc95d97, github.com/apache/spark/pull/287
80+
81+
[SPARK-1870] Made deployment with --jars work in yarn-standalone mode.
82+
83+
2014-06-09 22:56:24 -0700
84+
Commit: 1d3aab9, github.com/apache/spark/pull/1013
85+
86+
SPARK-2043: ExternalAppendOnlyMap doesn't always find matching keys
87+
Matei Zaharia <[email protected]>
88+
2014-06-05 23:01:48 -0700
89+
Commit: 51f677e, github.com/apache/spark/pull/986
90+
91+
SPARK-1790: Update EC2 scripts to support r3 instance types
92+
Varakhedi Sujeet <[email protected]>
93+
2014-06-04 16:01:56 -0700
94+
Commit: 6634a34, github.com/apache/spark/pull/960
95+
96+
[SPARK-1468] Modify the partition function used by partitionBy.
97+
Erik Selin <[email protected]>
98+
2014-06-03 13:31:16 -0700
99+
Commit: 41e7853, github.com/apache/spark/pull/371
100+
101+
SPARK-1917: fix PySpark import of scipy.special functions
102+
Uri Laserson <[email protected]>
103+
2014-05-31 14:59:09 -0700
104+
Commit: e03af41, github.com/apache/spark/pull/866
105+
106+
SPARK-1935: Explicitly add commons-codec 1.5 as a dependency (for branch-0.9).
107+
Yin Huai <[email protected]>
108+
2014-05-30 22:12:17 -0700
109+
Commit: 563bfe1, github.com/apache/spark/pull/912
110+
111+
SPARK-1188: Do not re-use objects in the EdgePartition/EdgeTriplet iterators.
112+
Daniel Darabos <[email protected]>
113+
2014-04-02 12:27:37 -0700
114+
Commit: a92900c, github.com/apache/spark/pull/276
115+
116+
[SPARK-1712]: TaskDescription instance is too big causes Spark to hang
117+
118+
2014-05-28 15:57:05 -0700
119+
Commit: aef6390, github.com/apache/spark/pull/694
120+
121+
Spark 1916
122+
David Lemieux <[email protected]>
123+
2014-05-28 15:50:35 -0700
124+
Commit: 234a378, github.com/apache/spark/pull/865
125+
126+
SPARK-1145: Memory mapping with many small blocks can cause JVM allocation failures
127+
Patrick Wendell <[email protected]>
128+
2014-04-27 17:40:56 -0700
129+
Commit: 7633949, github.com/apache/spark/pull/43
130+
131+
Update version to 0.9.2-SNAPSHOT in sbt
132+
Matei Zaharia <[email protected]>
133+
2014-05-11 16:54:54 -0700
134+
Commit: c9f40d0
135+
136+
SPARK-1775: Unneeded lock in ShuffleMapTask.deserializeInfo
137+
138+
2014-05-08 22:30:17 -0700
139+
Commit: bea2be3, github.com/apache/spark/pull/707
140+
141+
[SPARK-1685] Cancel retryTimer on restart of Worker or AppClient
142+
Mark Hamstra <[email protected]>
143+
2014-05-06 12:53:39 -0700
144+
Commit: 9e2c59e, github.com/apache/spark/pull/602
145+
146+
[WIP] SPARK-1676: Cache Hadoop UGIs by default to prevent FileSystem leak
147+
Thomas Graves <[email protected]>
148+
2014-05-03 10:59:05 -0700
149+
Commit: 45561cd, github.com/apache/spark/pull/621
150+
151+
version number fix
152+
153+
2014-04-21 23:42:47 -0700
154+
Commit: 54c3b7e, github.com/apache/spark/pull/467
155+
156+
Small syntax error from previous backport
157+
Patrick Wendell <[email protected]>
158+
2014-04-13 14:32:22 -0700
159+
Commit: 9e89789
160+
161+
Update WindowedDStream.scala
162+
baishuo(白硕) <[email protected]>
163+
2014-04-11 20:33:42 -0700
164+
Commit: 4a325e1, github.com/apache/spark/pull/390
165+
166+
Fixed typo on Spark quick-start docs.
167+
Tathagata Das <[email protected]>
168+
2014-04-07 18:27:46 -0700
169+
Commit: 19cf2f7
170+
171+
SPARK-1432: Make sure that all metadata fields are properly cleaned
172+
Davis Shepherd <[email protected]>
173+
2014-04-07 10:02:00 -0700
174+
Commit: 69fc97d, github.com/apache/spark/pull/338
175+
176+
SPARK-1421. Make MLlib work on Python 2.6
177+
Matei Zaharia <[email protected]>
178+
2014-04-05 20:52:05 -0700
179+
Commit: 139fc1a, github.com/apache/spark/pull/335
180+
181+
Update documentation for work around for SPARK-1384
182+
Thomas Graves <[email protected]>
183+
2014-04-04 18:26:51 -0700
184+
Commit: d4df076, github.com/apache/spark/pull/314
185+
186+
SPARK-1337: Application web UI garbage collects newest stages
187+
Patrick Wendell <[email protected]>
188+
2014-04-03 22:13:56 -0700
189+
Commit: 7f727cf, github.com/apache/spark/pull/320
190+
191+
[SPARK-1134] Fix and document passing of arguments to IPython
192+
Diana Carroll <[email protected]>
193+
2014-04-03 15:48:42 -0700
194+
Commit: d9c7a80, github.com/apache/spark/pull/294
195+
196+
Spark 1162 Implemented takeOrdered in pyspark.
197+
Prashant Sharma <[email protected]>
198+
2014-04-03 15:42:17 -0700
199+
Commit: 28e7643, github.com/apache/spark/pull/97
200+
201+
fix path for jar, make sed actually work on OSX
202+
Nick Lanham <[email protected]>
203+
2014-03-28 13:33:35 -0700
204+
Commit: a6c955a, github.com/apache/spark/pull/264
205+
206+
Make sed do -i '' on OSX
207+
Nick Lanham <[email protected]>
208+
2014-03-27 22:45:00 -0700
209+
Commit: 4afbd19, github.com/apache/spark/pull/258
210+
211+
4212
Release 0.9.1
5213

6214
Revert "[maven-release-plugin] prepare release v0.9.1-rc2"

assembly/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
<parent>
2222
<groupId>org.apache.spark</groupId>
2323
<artifactId>spark-parent</artifactId>
24-
<version>0.9.2-candidate-csd-8-SNAPSHOT</version>
24+
<version>0.9.2-csd-1-SNAPSHOT</version>
2525
<relativePath>../pom.xml</relativePath>
2626
</parent>
2727

bagel/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
<parent>
2222
<groupId>org.apache.spark</groupId>
2323
<artifactId>spark-parent</artifactId>
24-
<version>0.9.2-candidate-csd-8-SNAPSHOT</version>
24+
<version>0.9.2-csd-1-SNAPSHOT</version>
2525
<relativePath>../pom.xml</relativePath>
2626
</parent>
2727

core/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
<parent>
2222
<groupId>org.apache.spark</groupId>
2323
<artifactId>spark-parent</artifactId>
24-
<version>0.9.2-candidate-csd-8-SNAPSHOT</version>
24+
<version>0.9.2-csd-1-SNAPSHOT</version>
2525
<relativePath>../pom.xml</relativePath>
2626
</parent>
2727

core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,8 @@ private class PythonAccumulatorParam(@transient serverHost: String, serverPort:
307307
} else {
308308
// This happens on the master, where we pass the updates to Python through a socket
309309
val socket = new Socket(serverHost, serverPort)
310+
// SPARK-2282: Immediately reuse closed sockets because we create one per task.
311+
socket.setReuseAddress(true)
310312
val in = socket.getInputStream
311313
val out = new DataOutputStream(new BufferedOutputStream(socket.getOutputStream, bufferSize))
312314
out.writeInt(val2.size)

core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,13 @@ private[spark] class ApplicationInfo(
9090

9191
def retryCount = _retryCount
9292

93-
def incrementRetryCount = {
93+
def incrementRetryCount() = {
9494
_retryCount += 1
9595
_retryCount
9696
}
9797

98+
def resetRetryCount() = _retryCount = 0
99+
98100
def markFinished(endState: ApplicationState.Value) {
99101
state = endState
100102
endTime = System.currentTimeMillis()

core/src/main/scala/org/apache/spark/deploy/master/Master.scala

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -264,27 +264,34 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
264264
val execOption = idToApp.get(appId).flatMap(app => app.executors.get(execId))
265265
execOption match {
266266
case Some(exec) => {
267+
val appInfo = idToApp(appId)
267268
exec.state = state
269+
if (state == ExecutorState.RUNNING) { appInfo.resetRetryCount() }
268270
exec.application.driver ! ExecutorUpdated(execId, state, message, exitStatus)
269271
if (ExecutorState.isFinished(state)) {
270-
val appInfo = idToApp(appId)
271272
// Remove this executor from the worker and app
272-
logInfo("Removing executor " + exec.fullId + " because it is " + state)
273+
logInfo(s"Removing executor ${exec.fullId} because it is $state")
273274
appInfo.removeExecutor(exec)
274275
exec.worker.removeExecutor(exec)
275276

277+
val normalExit = exitStatus == Some(0)
276278
// Only retry certain number of times so we don't go into an infinite loop.
277-
if (appInfo.incrementRetryCount < ApplicationState.MAX_NUM_RETRY) {
278-
schedule()
279-
} else {
280-
logError("Application %s with ID %s failed %d times, removing it".format(
281-
appInfo.desc.name, appInfo.id, appInfo.retryCount))
282-
removeApplication(appInfo, ApplicationState.FAILED)
279+
if (!normalExit) {
280+
if (appInfo.incrementRetryCount() < ApplicationState.MAX_NUM_RETRY) {
281+
schedule()
282+
} else {
283+
val execs = appInfo.executors.values
284+
if (!execs.exists(_.state == ExecutorState.RUNNING)) {
285+
logError(s"Application ${appInfo.desc.name} with ID ${appInfo.id} failed " +
286+
s"${appInfo.retryCount} times; removing it")
287+
removeApplication(appInfo, ApplicationState.FAILED)
288+
}
289+
}
283290
}
284291
}
285292
}
286293
case None =>
287-
logWarning("Got status update for unknown executor " + appId + "/" + execId)
294+
logWarning(s"Got status update for unknown executor $appId/$execId")
288295
}
289296
}
290297

@@ -450,7 +457,7 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
450457
// First schedule drivers, they take strict precedence over applications
451458
val shuffledWorkers = Random.shuffle(workers) // Randomization helps balance drivers
452459
for (worker <- shuffledWorkers if worker.state == WorkerState.ALIVE) {
453-
for (driver <- waitingDrivers) {
460+
for (driver <- List(waitingDrivers: _*)) { // iterate over a copy of waitingDrivers
454461
if (worker.memoryFree >= driver.desc.mem && worker.coresFree >= driver.desc.cores) {
455462
launchDriver(worker, driver)
456463
waitingDrivers -= driver

core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -139,9 +139,10 @@ private[spark] class ExecutorRunner(
139139
Files.write(header, stderr, Charsets.UTF_8)
140140
CommandUtils.redirectStream(process.getErrorStream, stderr)
141141

142-
// Wait for it to exit; this is actually a bad thing if it happens, because we expect to run
143-
// long-lived processes only. However, in the future, we might restart the executor a few
144-
// times on the same machine.
142+
state = ExecutorState.RUNNING
143+
worker ! ExecutorStateChanged(appId, execId, state, None, None)
144+
// Wait for it to exit; executor may exit with code 0 (when driver instructs it to shutdown)
145+
// or with nonzero exit code
145146
val exitCode = process.waitFor()
146147
state = ExecutorState.FAILED
147148
val message = "Command exited with code " + exitCode

core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -211,18 +211,29 @@ private[spark] class Worker(
211211
if (masterUrl != activeMasterUrl) {
212212
logWarning("Invalid Master (" + masterUrl + ") attempted to launch executor.")
213213
} else {
214-
logInfo("Asked to launch executor %s/%d for %s".format(appId, execId, appDesc.name))
215-
// TODO (pwendell): We shuld make sparkHome an Option[String] in
216-
// ApplicationDescription to be more explicit about this.
217-
val effectiveSparkHome = Option(execSparkHome_).getOrElse(sparkHome.getAbsolutePath)
218-
val manager = new ExecutorRunner(appId, execId, appDesc, cores_, memory_,
219-
self, workerId, host, new File(effectiveSparkHome), workDir, akkaUrl, ExecutorState.RUNNING)
220-
executors(appId + "/" + execId) = manager
221-
manager.start()
222-
coresUsed += cores_
223-
memoryUsed += memory_
224-
masterLock.synchronized {
225-
master ! ExecutorStateChanged(appId, execId, manager.state, None, None)
214+
try {
215+
logInfo("Asked to launch executor %s/%d for %s".format(appId, execId, appDesc.name))
216+
val effectiveSparkHome = Option(execSparkHome_).getOrElse(sparkHome.getAbsolutePath)
217+
val manager = new ExecutorRunner(appId, execId, appDesc, cores_, memory_,
218+
self, workerId, host, new File(effectiveSparkHome), workDir, akkaUrl, ExecutorState.LOADING)
219+
executors(appId + "/" + execId) = manager
220+
manager.start()
221+
coresUsed += cores_
222+
memoryUsed += memory_
223+
masterLock.synchronized {
224+
master ! ExecutorStateChanged(appId, execId, manager.state, None, None)
225+
}
226+
} catch {
227+
case e: Exception => {
228+
logError("Failed to launch executor %s/%d for %s".format(appId, execId, appDesc.name))
229+
if (executors.contains(appId + "/" + execId)) {
230+
executors(appId + "/" + execId).kill()
231+
executors -= appId + "/" + execId
232+
}
233+
masterLock.synchronized {
234+
master ! ExecutorStateChanged(appId, execId, ExecutorState.FAILED, None, None)
235+
}
236+
}
226237
}
227238
}
228239

0 commit comments

Comments
 (0)