@@ -532,70 +532,47 @@ private[master] class Master(
532
532
}
533
533
534
534
/**
535
- * Schedule executors to be launched on the workers.
536
- *
537
- * There are two modes of launching executors. The first attempts to spread out an application's
538
- * executors on as many workers as possible, while the second does the opposite (i.e. launch them
539
- * on as few workers as possible). The former is usually better for data locality purposes and is
540
- * the default.
541
- *
542
- * The number of cores assigned to each executor is configurable. When this is explicitly set,
543
- * multiple executors from the same application may be launched on the same worker if the worker
544
- * has enough cores and memory. Otherwise, each executor grabs all the cores available on the
545
- * worker by default, in which case only one executor may be launched on each worker.
535
+ * Schedule and launch executors on workers
546
536
*/
547
537
private def startExecutorsOnWorkers (): Unit = {
548
538
// Right now this is a very simple FIFO scheduler. We keep trying to fit in the first app
549
539
// in the queue, then the second app, etc.
550
- if (spreadOutApps) {
551
- // Try to spread out each app among all the workers, until it has all its cores
552
- for (app <- waitingApps if app.coresLeft > 0 ) {
553
- val usableWorkers = workers.toArray.filter(_.state == WorkerState .ALIVE )
554
- .filter(worker => worker.memoryFree >= app.desc.memoryPerExecutorMB &&
555
- worker.coresFree >= app.desc.coresPerExecutor.getOrElse(1 ))
556
- .sortBy(_.coresFree).reverse
557
- val numUsable = usableWorkers.length
558
- val assigned = new Array [Int ](numUsable) // Number of cores to give on each node
559
- var toAssign = math.min(app.coresLeft, usableWorkers.map(_.coresFree).sum)
560
- var pos = 0
561
- while (toAssign > 0 ) {
562
- if (usableWorkers(pos).coresFree - assigned(pos) > 0 ) {
563
- toAssign -= 1
564
- assigned(pos) += 1
565
- }
566
- pos = (pos + 1 ) % numUsable
567
- }
568
- // Now that we've decided how many cores to give on each node, let's actually give them
569
- for (pos <- 0 until numUsable if assigned(pos) > 0 ) {
570
- allocateWorkerResourceToExecutors(app, assigned(pos), usableWorkers(pos))
571
- }
572
- }
573
- } else {
574
- // Pack each app into as few workers as possible until we've assigned all its cores
575
- for (worker <- workers if worker.coresFree > 0 && worker.state == WorkerState .ALIVE ) {
576
- for (app <- waitingApps if app.coresLeft > 0 ) {
577
- allocateWorkerResourceToExecutors(app, app.coresLeft, worker)
578
- }
540
+ for (app <- waitingApps if app.coresLeft > 0 ) {
541
+ val coresPerExecutor : Option [Int ] = app.desc.coresPerExecutor
542
+ // Filter out workers that don't have enough resources to launch an executor
543
+ val usableWorkers = workers.toArray.filter(_.state == WorkerState .ALIVE )
544
+ .filter(worker => worker.memoryFree >= app.desc.memoryPerExecutorMB &&
545
+ worker.coresFree >= coresPerExecutor.getOrElse(1 ))
546
+ .sortBy(_.coresFree).reverse
547
+ val assignedCores = Master .scheduleExecutorsOnWorkers(app, usableWorkers, spreadOutApps)
548
+
549
+ // Now that we've decided how many cores to allocate on each worker, let's allocate them
550
+ for (pos <- 0 until usableWorkers.length if assignedCores(pos) > 0 ) {
551
+ allocateWorkerResourceToExecutors(
552
+ app, assignedCores(pos), coresPerExecutor, usableWorkers(pos))
579
553
}
580
554
}
581
555
}
582
556
583
557
/**
584
558
* Allocate a worker's resources to one or more executors.
585
559
* @param app the info of the application which the executors belong to
586
- * @param coresToAllocate cores on this worker to be allocated to this application
560
+ * @param assignedCores number of cores on this worker for this application
561
+ * @param coresPerExecutor number of cores per executor
587
562
* @param worker the worker info
588
563
*/
589
564
private def allocateWorkerResourceToExecutors (
590
565
app : ApplicationInfo ,
591
- coresToAllocate : Int ,
566
+ assignedCores : Int ,
567
+ coresPerExecutor : Option [Int ],
592
568
worker : WorkerInfo ): Unit = {
593
- val memoryPerExecutor = app.desc.memoryPerExecutorMB
594
- val coresPerExecutor = app.desc.coresPerExecutor.getOrElse(coresToAllocate)
595
- var coresLeft = coresToAllocate
596
- while (coresLeft >= coresPerExecutor && worker.memoryFree >= memoryPerExecutor) {
597
- val exec = app.addExecutor(worker, coresPerExecutor)
598
- coresLeft -= coresPerExecutor
569
+ // If the number of cores per executor is specified, we divide the cores assigned
570
+ // to this worker evenly among the executors with no remainder.
571
+ // Otherwise, we launch a single executor that grabs all the assignedCores on this worker.
572
+ val numExecutors = coresPerExecutor.map { assignedCores / _ }.getOrElse(1 )
573
+ val coresToAssign = coresPerExecutor.getOrElse(assignedCores)
574
+ for (i <- 1 to numExecutors) {
575
+ val exec = app.addExecutor(worker, coresToAssign)
599
576
launchExecutor(worker, exec)
600
577
app.state = ApplicationState .RUNNING
601
578
}
@@ -892,7 +869,7 @@ private[master] class Master(
892
869
893
870
private [deploy] object Master extends Logging {
894
871
val systemName = " sparkMaster"
895
- private val actorName = " Master"
872
+ val actorName = " Master"
896
873
897
874
def main (argStrings : Array [String ]) {
898
875
SignalLogger .register(log)
@@ -944,4 +921,74 @@ private[deploy] object Master extends Logging {
944
921
val portsResponse = Await .result(portsRequest, timeout).asInstanceOf [BoundPortsResponse ]
945
922
(actorSystem, boundPort, portsResponse.webUIPort, portsResponse.restPort)
946
923
}
924
+
925
+
926
+ /**
927
+ * Schedule executors to be launched on the workers.
928
+ * Returns an array containing number of cores assigned to each worker.
929
+ *
930
+ * There are two modes of launching executors. The first attempts to spread out an application's
931
+ * executors on as many workers as possible, while the second does the opposite (i.e. launch them
932
+ * on as few workers as possible). The former is usually better for data locality purposes and is
933
+ * the default.
934
+ *
935
+ * The number of cores assigned to each executor is configurable. When this is explicitly set,
936
+ * multiple executors from the same application may be launched on the same worker if the worker
937
+ * has enough cores and memory. Otherwise, each executor grabs all the cores available on the
938
+ * worker by default, in which case only one executor may be launched on each worker.
939
+ *
940
+ * It is important to allocate coresPerExecutor on each worker at a time (instead of 1 core
941
+ * at a time). Consider the following example: cluster has 4 workers with 16 cores each.
942
+ * User requests 3 executors (spark.cores.max = 48, spark.executor.cores = 16). If 1 core is
943
+ * allocated at a time, 12 cores from each worker would be assigned to each executor.
944
+ * Since 12 < 16, no executors would launch [SPARK-8881].
945
+ *
946
+ * Unfortunately, this must be moved out here into the Master object because Akka allows
947
+ * neither creating actors outside of Props nor accessing the Master after setting up the
948
+ * actor system. Otherwise, there is no way to test it.
949
+ */
950
+ def scheduleExecutorsOnWorkers (
951
+ app : ApplicationInfo ,
952
+ usableWorkers : Array [WorkerInfo ],
953
+ spreadOutApps : Boolean ): Array [Int ] = {
954
+ // If the number of cores per executor is not specified, then we can just schedule
955
+ // 1 core at a time since we expect a single executor to be launched on each worker
956
+ val coresPerExecutor = app.desc.coresPerExecutor.getOrElse(1 )
957
+ val memoryPerExecutor = app.desc.memoryPerExecutorMB
958
+ val numUsable = usableWorkers.length
959
+ val assignedCores = new Array [Int ](numUsable) // Number of cores to give to each worker
960
+ val assignedMemory = new Array [Int ](numUsable) // Amount of memory to give to each worker
961
+ var coresToAssign = math.min(app.coresLeft, usableWorkers.map(_.coresFree).sum)
962
+ var freeWorkers = (0 until numUsable).toIndexedSeq
963
+
964
+ def canLaunchExecutor (pos : Int ): Boolean = {
965
+ usableWorkers(pos).coresFree - assignedCores(pos) >= coresPerExecutor &&
966
+ usableWorkers(pos).memoryFree - assignedMemory(pos) >= memoryPerExecutor
967
+ }
968
+
969
+ while (coresToAssign >= coresPerExecutor && freeWorkers.nonEmpty) {
970
+ freeWorkers = freeWorkers.filter(canLaunchExecutor)
971
+ freeWorkers.foreach { pos =>
972
+ var keepScheduling = true
973
+ while (keepScheduling && canLaunchExecutor(pos) && coresToAssign >= coresPerExecutor) {
974
+ coresToAssign -= coresPerExecutor
975
+ assignedCores(pos) += coresPerExecutor
976
+ // If cores per executor is not set, we are assigning 1 core at a time
977
+ // without actually meaning to launch 1 executor for each core assigned
978
+ if (app.desc.coresPerExecutor.isDefined) {
979
+ assignedMemory(pos) += memoryPerExecutor
980
+ }
981
+
982
+ // Spreading out an application means spreading out its executors across as
983
+ // many workers as possible. If we are not spreading out, then we should keep
984
+ // scheduling executors on this worker until we use all of its resources.
985
+ // Otherwise, just move on to the next worker.
986
+ if (spreadOutApps) {
987
+ keepScheduling = false
988
+ }
989
+ }
990
+ }
991
+ }
992
+ assignedCores
993
+ }
947
994
}
0 commit comments