@@ -76,7 +76,7 @@ private[spark] class MesosClusterTaskState(
76
76
*/
77
77
private [spark] class MesosClusterSchedulerState (
78
78
val frameworkId : String ,
79
- val masterUrl : String ,
79
+ val masterUrl : Option [ String ] ,
80
80
val queuedDrivers : Iterable [MesosDriverDescription ],
81
81
val launchedDrivers : Iterable [MesosClusterTaskState ],
82
82
val finishedDrivers : Iterable [MesosClusterTaskState ],
@@ -135,7 +135,7 @@ private[spark] class MesosClusterSchedulerDriver(
135
135
// All supervised drivers that are waiting to retry after termination.
136
136
var superviseRetryList : SuperviseRetryList = _
137
137
138
- private var masterInfo : MasterInfo = _
138
+ private var masterInfo : Option [ MasterInfo ] = None
139
139
140
140
private def createDateFormat = new SimpleDateFormat (" yyyyMMddHHmmss" ) // For application IDs
141
141
private def newDriverId (submitDate : Date ): String = {
@@ -252,7 +252,7 @@ private[spark] class MesosClusterSchedulerDriver(
252
252
markRegistered()
253
253
254
254
stateLock.synchronized {
255
- this .masterInfo = masterInfo
255
+ this .masterInfo = Some ( masterInfo)
256
256
if (! launchedDrivers.pendingRecover.isEmpty) {
257
257
// Start task reconciliation if we need to recover.
258
258
val statuses = launchedDrivers.pendingRecover.collect {
@@ -350,20 +350,14 @@ private[spark] class MesosClusterSchedulerDriver(
350
350
getResource(o.getResourcesList, " cpus" ),
351
351
getResource(o.getResourcesList, " mem" ))
352
352
}
353
- logTrace(s " Received offers from Mesos: ${printOffers(currentOffers)}" )
353
+ logTrace(s " Received offers from Mesos: \n ${printOffers(currentOffers)}" )
354
354
val tasks = new mutable.HashMap [OfferID , ArrayBuffer [TaskInfo ]]()
355
355
val currentTime = new Date ()
356
-
357
356
def scheduleTasks (
358
- taskFunc : () => ( Option [ MesosDriverDescription ] , Option [RetryState ]),
357
+ tasksFunc : () => Seq [( MesosDriverDescription , Option [RetryState ])] ,
359
358
scheduledCallback : (String ) => Unit ): Unit = {
360
- var nextItem = taskFunc()
361
- // TODO: We should not stop scheduling at the very first task
362
- // that cannot be scheduled. Instead we should exhaust the
363
- // candidate list and remove drivers that cannot scheduled
364
- // over a configurable period of time.
365
- while (nextItem._1.isDefined) {
366
- val (submission, retryState) = (nextItem._1.get, nextItem._2)
359
+ val candidates = tasksFunc()
360
+ for ((submission, retryState) <- candidates) {
367
361
val driverCpu = submission.cores
368
362
val driverMem = submission.mem
369
363
logTrace(s " Finding offer to launch driver with cpu: $driverCpu, mem: $driverMem" )
@@ -374,56 +368,54 @@ private[spark] class MesosClusterSchedulerDriver(
374
368
if (offerOption.isEmpty) {
375
369
logDebug(s " Unable to find offer to launch driver id: ${submission.submissionId.get}, " +
376
370
s " cpu: $driverCpu, mem: $driverMem" )
377
- return
378
- }
379
-
380
- val offer = offerOption.get
381
- offer.cpu -= driverCpu
382
- offer.mem -= driverMem
383
- val taskId = TaskID .newBuilder().setValue(submission.submissionId.get).build()
384
- val cpuResource = Resource .newBuilder()
385
- .setName(" cpus" ).setType(Value .Type .SCALAR )
386
- .setScalar(Value .Scalar .newBuilder().setValue(driverCpu)).build()
387
- val memResource = Resource .newBuilder()
388
- .setName(" mem" ).setType(Value .Type .SCALAR )
389
- .setScalar(Value .Scalar .newBuilder().setValue(driverMem)).build()
390
- val commandInfo = buildCommand(submission)
391
- val appName = submission.schedulerProperties(" spark.app.name" )
392
- val taskInfo = TaskInfo .newBuilder()
393
- .setTaskId(taskId)
394
- .setName(s " Driver for $appName" )
395
- .setSlaveId(offer.offer.getSlaveId)
396
- .setCommand(commandInfo)
397
- .addResources(cpuResource)
398
- .addResources(memResource)
399
- .build
400
- val queuedTasks = if (! tasks.contains(offer.offer.getId)) {
401
- val buffer = new ArrayBuffer [TaskInfo ]
402
- tasks(offer.offer.getId) = buffer
403
- buffer
404
371
} else {
405
- tasks(offer.offer.getId)
372
+ val offer = offerOption.get
373
+ offer.cpu -= driverCpu
374
+ offer.mem -= driverMem
375
+ val taskId = TaskID .newBuilder().setValue(submission.submissionId.get).build()
376
+ val cpuResource = Resource .newBuilder()
377
+ .setName(" cpus" ).setType(Value .Type .SCALAR )
378
+ .setScalar(Value .Scalar .newBuilder().setValue(driverCpu)).build()
379
+ val memResource = Resource .newBuilder()
380
+ .setName(" mem" ).setType(Value .Type .SCALAR )
381
+ .setScalar(Value .Scalar .newBuilder().setValue(driverMem)).build()
382
+ val commandInfo = buildCommand(submission)
383
+ val appName = submission.schedulerProperties(" spark.app.name" )
384
+ val taskInfo = TaskInfo .newBuilder()
385
+ .setTaskId(taskId)
386
+ .setName(s " Driver for $appName" )
387
+ .setSlaveId(offer.offer.getSlaveId)
388
+ .setCommand(commandInfo)
389
+ .addResources(cpuResource)
390
+ .addResources(memResource)
391
+ .build
392
+ val queuedTasks = if (! tasks.contains(offer.offer.getId)) {
393
+ val buffer = new ArrayBuffer [TaskInfo ]
394
+ tasks(offer.offer.getId) = buffer
395
+ buffer
396
+ } else {
397
+ tasks(offer.offer.getId)
398
+ }
399
+ queuedTasks += taskInfo
400
+ logTrace(s " Using offer ${offer.offer.getId.getValue} to launch driver " +
401
+ submission.submissionId.get)
402
+
403
+ launchedDrivers.set(
404
+ submission.submissionId.get,
405
+ new MesosClusterTaskState (submission, taskId, offer.offer.getSlaveId,
406
+ None , new Date (), retryState))
407
+ scheduledCallback(submission.submissionId.get)
406
408
}
407
- queuedTasks += taskInfo
408
- logTrace(s " Using offer ${offer.offer.getId.getValue} to launch driver " +
409
- submission.submissionId.get)
410
-
411
- launchedDrivers.set(
412
- submission.submissionId.get,
413
- new MesosClusterTaskState (submission, taskId, offer.offer.getSlaveId,
414
- None , new Date (), retryState))
415
- scheduledCallback(submission.submissionId.get)
416
- nextItem = taskFunc()
417
409
}
418
410
}
419
411
420
412
stateLock.synchronized {
421
413
scheduleTasks(() => {
422
- superviseRetryList.getNextRetry (currentTime)
414
+ superviseRetryList.getNextRetries (currentTime)
423
415
}, (id : String ) => {
424
416
superviseRetryList.remove(id)
425
417
})
426
- scheduleTasks(() => ( queue.peek() , None ), (_ ) => queue.poll( ))
418
+ scheduleTasks(() => queue.drivers.map(d => (d , None )) , (id ) => queue.remove(id ))
427
419
}
428
420
429
421
tasks.foreach { case (offerId, tasks) =>
@@ -439,8 +431,8 @@ private[spark] class MesosClusterSchedulerDriver(
439
431
stateLock.synchronized {
440
432
new MesosClusterSchedulerState (
441
433
frameworkId,
442
- s " http:// ${masterInfo .getIp}: ${masterInfo .getPort}" ,
443
- queue.drivers ,
434
+ masterInfo.map(m => s " http:// ${m .getIp}: ${m .getPort}" ) ,
435
+ queue.copyDrivers ,
444
436
launchedDrivers.states,
445
437
finishedDrivers.collect { case s => s.copy() },
446
438
superviseRetryList.retries)
0 commit comments