Skip to content

Commit d1e28bc

Browse files
committed
Merge remote-tracking branch 'origin/master' into sql-external-sort
2 parents cd05866 + 2d45571 commit d1e28bc

File tree

69 files changed

+2922
-4060
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

69 files changed

+2922
-4060
lines changed

R/pkg/R/SQLContext.R

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,9 @@ infer_type <- function(x) {
8686
createDataFrame <- function(sqlContext, data, schema = NULL, samplingRatio = 1.0) {
8787
if (is.data.frame(data)) {
8888
# get the names of columns, they will be put into RDD
89-
schema <- names(data)
89+
if (is.null(schema)) {
90+
schema <- names(data)
91+
}
9092
n <- nrow(data)
9193
m <- ncol(data)
9294
# get rid of factor type

core/src/main/scala/org/apache/spark/SparkContext.scala

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -532,7 +532,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
532532
_executorAllocationManager =
533533
if (dynamicAllocationEnabled) {
534534
assert(supportDynamicAllocation,
535-
"Dynamic allocation of executors is currently only supported in YARN mode")
535+
"Dynamic allocation of executors is currently only supported in YARN and Mesos mode")
536536
Some(new ExecutorAllocationManager(this, listenerBus, _conf))
537537
} else {
538538
None
@@ -853,7 +853,6 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
853853
minPartitions).setName(path)
854854
}
855855

856-
857856
/**
858857
* :: Experimental ::
859858
*
@@ -1364,10 +1363,14 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
13641363

13651364
/**
13661365
* Return whether dynamically adjusting the amount of resources allocated to
1367-
* this application is supported. This is currently only available for YARN.
1366+
* this application is supported. This is currently only available for YARN
1367+
* and Mesos coarse-grained mode.
13681368
*/
1369-
private[spark] def supportDynamicAllocation =
1370-
master.contains("yarn") || _conf.getBoolean("spark.dynamicAllocation.testing", false)
1369+
private[spark] def supportDynamicAllocation: Boolean = {
1370+
(master.contains("yarn")
1371+
|| master.contains("mesos")
1372+
|| _conf.getBoolean("spark.dynamicAllocation.testing", false))
1373+
}
13711374

13721375
/**
13731376
* :: DeveloperApi ::
@@ -1385,7 +1388,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
13851388
*/
13861389
private[spark] override def requestTotalExecutors(numExecutors: Int): Boolean = {
13871390
assert(supportDynamicAllocation,
1388-
"Requesting executors is currently only supported in YARN mode")
1391+
"Requesting executors is currently only supported in YARN and Mesos modes")
13891392
schedulerBackend match {
13901393
case b: CoarseGrainedSchedulerBackend =>
13911394
b.requestTotalExecutors(numExecutors)
@@ -1403,7 +1406,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
14031406
@DeveloperApi
14041407
override def requestExecutors(numAdditionalExecutors: Int): Boolean = {
14051408
assert(supportDynamicAllocation,
1406-
"Requesting executors is currently only supported in YARN mode")
1409+
"Requesting executors is currently only supported in YARN and Mesos modes")
14071410
schedulerBackend match {
14081411
case b: CoarseGrainedSchedulerBackend =>
14091412
b.requestExecutors(numAdditionalExecutors)
@@ -1421,7 +1424,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
14211424
@DeveloperApi
14221425
override def killExecutors(executorIds: Seq[String]): Boolean = {
14231426
assert(supportDynamicAllocation,
1424-
"Killing executors is currently only supported in YARN mode")
1427+
"Killing executors is currently only supported in YARN and Mesos modes")
14251428
schedulerBackend match {
14261429
case b: CoarseGrainedSchedulerBackend =>
14271430
b.killExecutors(executorIds)

core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
162162
.orNull
163163
executorCores = Option(executorCores)
164164
.orElse(sparkProperties.get("spark.executor.cores"))
165+
.orElse(env.get("SPARK_EXECUTOR_CORES"))
165166
.orNull
166167
totalExecutorCores = Option(totalExecutorCores)
167168
.orElse(sparkProperties.get("spark.cores.max"))

core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala

Lines changed: 110 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,14 @@
1818
package org.apache.spark.scheduler.cluster.mesos
1919

2020
import java.io.File
21-
import java.util.{List => JList}
21+
import java.util.{List => JList, Collections}
22+
import java.util.concurrent.locks.ReentrantLock
2223

2324
import scala.collection.JavaConversions._
2425
import scala.collection.mutable.{HashMap, HashSet}
2526

27+
import com.google.common.collect.HashBiMap
28+
import org.apache.mesos.Protos.{TaskInfo => MesosTaskInfo, _}
2629
import org.apache.mesos.{Scheduler => MScheduler, _}
2730
import org.apache.mesos.Protos.{TaskInfo => MesosTaskInfo, _}
2831
import org.apache.spark.{SparkContext, SparkEnv, SparkException, TaskState}
@@ -60,9 +63,27 @@ private[spark] class CoarseMesosSchedulerBackend(
6063

6164
val slaveIdsWithExecutors = new HashSet[String]
6265

63-
val taskIdToSlaveId = new HashMap[Int, String]
64-
val failuresBySlaveId = new HashMap[String, Int] // How many times tasks on each slave failed
66+
val taskIdToSlaveId: HashBiMap[Int, String] = HashBiMap.create[Int, String]
67+
// How many times tasks on each slave failed
68+
val failuresBySlaveId: HashMap[String, Int] = new HashMap[String, Int]
69+
70+
/**
71+
* The total number of executors we aim to have. Undefined when not using dynamic allocation
72+
* and before the ExecutorAllocatorManager calls [[doRequesTotalExecutors]].
73+
*/
74+
private var executorLimitOption: Option[Int] = None
75+
76+
/**
77+
* Return the current executor limit, which may be [[Int.MaxValue]]
78+
* before properly initialized.
79+
*/
80+
private[mesos] def executorLimit: Int = executorLimitOption.getOrElse(Int.MaxValue)
81+
82+
private val pendingRemovedSlaveIds = new HashSet[String]
6583

84+
// private lock object protecting mutable state above. Using the intrinsic lock
85+
// may lead to deadlocks since the superclass might also try to lock
86+
private val stateLock = new ReentrantLock
6687

6788
val extraCoresPerSlave = conf.getInt("spark.mesos.extra.cores", 0)
6889

@@ -86,7 +107,7 @@ private[spark] class CoarseMesosSchedulerBackend(
86107
startScheduler(master, CoarseMesosSchedulerBackend.this, fwInfo)
87108
}
88109

89-
def createCommand(offer: Offer, numCores: Int): CommandInfo = {
110+
def createCommand(offer: Offer, numCores: Int, taskId: Int): CommandInfo = {
90111
val executorSparkHome = conf.getOption("spark.mesos.executor.home")
91112
.orElse(sc.getSparkHome())
92113
.getOrElse {
@@ -120,10 +141,6 @@ private[spark] class CoarseMesosSchedulerBackend(
120141
}
121142
val command = CommandInfo.newBuilder()
122143
.setEnvironment(environment)
123-
val driverUrl = sc.env.rpcEnv.uriOf(
124-
SparkEnv.driverActorSystemName,
125-
RpcAddress(conf.get("spark.driver.host"), conf.get("spark.driver.port").toInt),
126-
CoarseGrainedSchedulerBackend.ENDPOINT_NAME)
127144

128145
val uri = conf.getOption("spark.executor.uri")
129146
.orElse(Option(System.getenv("SPARK_EXECUTOR_URI")))
@@ -133,7 +150,7 @@ private[spark] class CoarseMesosSchedulerBackend(
133150
command.setValue(
134151
"%s \"%s\" org.apache.spark.executor.CoarseGrainedExecutorBackend"
135152
.format(prefixEnv, runScript) +
136-
s" --driver-url $driverUrl" +
153+
s" --driver-url $driverURL" +
137154
s" --executor-id ${offer.getSlaveId.getValue}" +
138155
s" --hostname ${offer.getHostname}" +
139156
s" --cores $numCores" +
@@ -142,11 +159,12 @@ private[spark] class CoarseMesosSchedulerBackend(
142159
// Grab everything to the first '.'. We'll use that and '*' to
143160
// glob the directory "correctly".
144161
val basename = uri.get.split('/').last.split('.').head
162+
val executorId = sparkExecutorId(offer.getSlaveId.getValue, taskId.toString)
145163
command.setValue(
146164
s"cd $basename*; $prefixEnv " +
147165
"./bin/spark-class org.apache.spark.executor.CoarseGrainedExecutorBackend" +
148-
s" --driver-url $driverUrl" +
149-
s" --executor-id ${offer.getSlaveId.getValue}" +
166+
s" --driver-url $driverURL" +
167+
s" --executor-id $executorId" +
150168
s" --hostname ${offer.getHostname}" +
151169
s" --cores $numCores" +
152170
s" --app-id $appId")
@@ -155,6 +173,17 @@ private[spark] class CoarseMesosSchedulerBackend(
155173
command.build()
156174
}
157175

176+
protected def driverURL: String = {
177+
if (conf.contains("spark.testing")) {
178+
"driverURL"
179+
} else {
180+
sc.env.rpcEnv.uriOf(
181+
SparkEnv.driverActorSystemName,
182+
RpcAddress(conf.get("spark.driver.host"), conf.get("spark.driver.port").toInt),
183+
CoarseGrainedSchedulerBackend.ENDPOINT_NAME)
184+
}
185+
}
186+
158187
override def offerRescinded(d: SchedulerDriver, o: OfferID) {}
159188

160189
override def registered(d: SchedulerDriver, frameworkId: FrameworkID, masterInfo: MasterInfo) {
@@ -172,17 +201,18 @@ private[spark] class CoarseMesosSchedulerBackend(
172201
* unless we've already launched more than we wanted to.
173202
*/
174203
override def resourceOffers(d: SchedulerDriver, offers: JList[Offer]) {
175-
synchronized {
204+
stateLock.synchronized {
176205
val filters = Filters.newBuilder().setRefuseSeconds(5).build()
177206
for (offer <- offers) {
178207
val offerAttributes = toAttributeMap(offer.getAttributesList)
179208
val meetsConstraints = matchesAttributeRequirements(slaveOfferConstraints, offerAttributes)
180-
val slaveId = offer.getSlaveId.toString
209+
val slaveId = offer.getSlaveId.getValue
181210
val mem = getResource(offer.getResourcesList, "mem")
182211
val cpus = getResource(offer.getResourcesList, "cpus").toInt
183212
val id = offer.getId.getValue
184-
if (meetsConstraints &&
213+
if (taskIdToSlaveId.size < executorLimit &&
185214
totalCoresAcquired < maxCores &&
215+
meetsConstraints &&
186216
mem >= calculateTotalMemory(sc) &&
187217
cpus >= 1 &&
188218
failuresBySlaveId.getOrElse(slaveId, 0) < MAX_SLAVE_FAILURES &&
@@ -197,7 +227,7 @@ private[spark] class CoarseMesosSchedulerBackend(
197227
val task = MesosTaskInfo.newBuilder()
198228
.setTaskId(TaskID.newBuilder().setValue(taskId.toString).build())
199229
.setSlaveId(offer.getSlaveId)
200-
.setCommand(createCommand(offer, cpusToUse + extraCoresPerSlave))
230+
.setCommand(createCommand(offer, cpusToUse + extraCoresPerSlave, taskId))
201231
.setName("Task " + taskId)
202232
.addResources(createResource("cpus", cpusToUse))
203233
.addResources(createResource("mem", calculateTotalMemory(sc)))
@@ -209,7 +239,9 @@ private[spark] class CoarseMesosSchedulerBackend(
209239

210240
// accept the offer and launch the task
211241
logDebug(s"Accepting offer: $id with attributes: $offerAttributes mem: $mem cpu: $cpus")
212-
d.launchTasks(List(offer.getId), List(task.build()), filters)
242+
d.launchTasks(
243+
Collections.singleton(offer.getId),
244+
Collections.singleton(task.build()), filters)
213245
} else {
214246
// Decline the offer
215247
logDebug(s"Declining offer: $id with attributes: $offerAttributes mem: $mem cpu: $cpus")
@@ -224,7 +256,7 @@ private[spark] class CoarseMesosSchedulerBackend(
224256
val taskId = status.getTaskId.getValue.toInt
225257
val state = status.getState
226258
logInfo("Mesos task " + taskId + " is now " + state)
227-
synchronized {
259+
stateLock.synchronized {
228260
if (TaskState.isFinished(TaskState.fromMesos(state))) {
229261
val slaveId = taskIdToSlaveId(taskId)
230262
slaveIdsWithExecutors -= slaveId
@@ -242,8 +274,9 @@ private[spark] class CoarseMesosSchedulerBackend(
242274
"is Spark installed on it?")
243275
}
244276
}
277+
executorTerminated(d, slaveId, s"Executor finished with state $state")
245278
// In case we'd rejected everything before but have now lost a node
246-
mesosDriver.reviveOffers()
279+
d.reviveOffers()
247280
}
248281
}
249282
}
@@ -262,18 +295,39 @@ private[spark] class CoarseMesosSchedulerBackend(
262295

263296
override def frameworkMessage(d: SchedulerDriver, e: ExecutorID, s: SlaveID, b: Array[Byte]) {}
264297

265-
override def slaveLost(d: SchedulerDriver, slaveId: SlaveID) {
266-
logInfo("Mesos slave lost: " + slaveId.getValue)
267-
synchronized {
268-
if (slaveIdsWithExecutors.contains(slaveId.getValue)) {
269-
// Note that the slave ID corresponds to the executor ID on that slave
270-
slaveIdsWithExecutors -= slaveId.getValue
271-
removeExecutor(slaveId.getValue, "Mesos slave lost")
298+
/**
299+
* Called when a slave is lost or a Mesos task finished. Update local view on
300+
* what tasks are running and remove the terminated slave from the list of pending
301+
* slave IDs that we might have asked to be killed. It also notifies the driver
302+
* that an executor was removed.
303+
*/
304+
private def executorTerminated(d: SchedulerDriver, slaveId: String, reason: String): Unit = {
305+
stateLock.synchronized {
306+
if (slaveIdsWithExecutors.contains(slaveId)) {
307+
val slaveIdToTaskId = taskIdToSlaveId.inverse()
308+
if (slaveIdToTaskId.contains(slaveId)) {
309+
val taskId: Int = slaveIdToTaskId.get(slaveId)
310+
taskIdToSlaveId.remove(taskId)
311+
removeExecutor(sparkExecutorId(slaveId, taskId.toString), reason)
312+
}
313+
// TODO: This assumes one Spark executor per Mesos slave,
314+
// which may no longer be true after SPARK-5095
315+
pendingRemovedSlaveIds -= slaveId
316+
slaveIdsWithExecutors -= slaveId
272317
}
273318
}
274319
}
275320

276-
override def executorLost(d: SchedulerDriver, e: ExecutorID, s: SlaveID, status: Int) {
321+
private def sparkExecutorId(slaveId: String, taskId: String): String = {
322+
s"$slaveId/$taskId"
323+
}
324+
325+
override def slaveLost(d: SchedulerDriver, slaveId: SlaveID): Unit = {
326+
logInfo("Mesos slave lost: " + slaveId.getValue)
327+
executorTerminated(d, slaveId.getValue, "Mesos slave lost: " + slaveId.getValue)
328+
}
329+
330+
override def executorLost(d: SchedulerDriver, e: ExecutorID, s: SlaveID, status: Int): Unit = {
277331
logInfo("Executor lost: %s, marking slave %s as lost".format(e.getValue, s.getValue))
278332
slaveLost(d, s)
279333
}
@@ -284,4 +338,34 @@ private[spark] class CoarseMesosSchedulerBackend(
284338
super.applicationId
285339
}
286340

341+
override def doRequestTotalExecutors(requestedTotal: Int): Boolean = {
342+
// We don't truly know if we can fulfill the full amount of executors
343+
// since at coarse grain it depends on the amount of slaves available.
344+
logInfo("Capping the total amount of executors to " + requestedTotal)
345+
executorLimitOption = Some(requestedTotal)
346+
true
347+
}
348+
349+
override def doKillExecutors(executorIds: Seq[String]): Boolean = {
350+
if (mesosDriver == null) {
351+
logWarning("Asked to kill executors before the Mesos driver was started.")
352+
return false
353+
}
354+
355+
val slaveIdToTaskId = taskIdToSlaveId.inverse()
356+
for (executorId <- executorIds) {
357+
val slaveId = executorId.split("/")(0)
358+
if (slaveIdToTaskId.contains(slaveId)) {
359+
mesosDriver.killTask(
360+
TaskID.newBuilder().setValue(slaveIdToTaskId.get(slaveId).toString).build())
361+
pendingRemovedSlaveIds += slaveId
362+
} else {
363+
logWarning("Unable to find executor Id '" + executorId + "' in Mesos scheduler")
364+
}
365+
}
366+
// no need to adjust `executorLimitOption` since the AllocationManager already communicated
367+
// the desired limit through a call to `doRequestTotalExecutors`.
368+
// See [[o.a.s.scheduler.cluster.CoarseGrainedSchedulerBackend.killExecutors]]
369+
true
370+
}
287371
}

core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ import scala.collection.JavaConversions._
2424
import scala.util.control.NonFatal
2525

2626
import com.google.common.base.Splitter
27-
import org.apache.mesos.{MesosSchedulerDriver, Protos, Scheduler}
27+
import org.apache.mesos.{MesosSchedulerDriver, SchedulerDriver, Scheduler, Protos}
2828
import org.apache.mesos.Protos._
2929
import org.apache.mesos.protobuf.GeneratedMessage
3030
import org.apache.spark.{Logging, SparkContext}
@@ -39,7 +39,7 @@ private[mesos] trait MesosSchedulerUtils extends Logging {
3939
private final val registerLatch = new CountDownLatch(1)
4040

4141
// Driver for talking to Mesos
42-
protected var mesosDriver: MesosSchedulerDriver = null
42+
protected var mesosDriver: SchedulerDriver = null
4343

4444
/**
4545
* Starts the MesosSchedulerDriver with the provided information. This method returns

core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,10 +124,16 @@ private[spark] class DiskBlockManager(blockManager: BlockManager, conf: SparkCon
124124
(blockId, getFile(blockId))
125125
}
126126

127+
/**
128+
* Create local directories for storing block data. These directories are
129+
* located inside configured local directories and won't
130+
* be deleted on JVM exit when using the external shuffle service.
131+
*/
127132
private def createLocalDirs(conf: SparkConf): Array[File] = {
128-
Utils.getOrCreateLocalRootDirs(conf).flatMap { rootDir =>
133+
Utils.getConfiguredLocalDirs(conf).flatMap { rootDir =>
129134
try {
130135
val localDir = Utils.createDirectory(rootDir, "blockmgr")
136+
Utils.chmod700(localDir)
131137
logInfo(s"Created local directory at $localDir")
132138
Some(localDir)
133139
} catch {

core/src/main/scala/org/apache/spark/ui/JettyUtils.scala

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -210,10 +210,16 @@ private[spark] object JettyUtils extends Logging {
210210
conf: SparkConf,
211211
serverName: String = ""): ServerInfo = {
212212

213-
val collection = new ContextHandlerCollection
214-
collection.setHandlers(handlers.toArray)
215213
addFilters(handlers, conf)
216214

215+
val collection = new ContextHandlerCollection
216+
val gzipHandlers = handlers.map { h =>
217+
val gzipHandler = new GzipHandler
218+
gzipHandler.setHandler(h)
219+
gzipHandler
220+
}
221+
collection.setHandlers(gzipHandlers.toArray)
222+
217223
// Bind to the given port, or throw a java.net.BindException if the port is occupied
218224
def connect(currentPort: Int): (Server, Int) = {
219225
val server = new Server(new InetSocketAddress(hostName, currentPort))

0 commit comments

Comments
 (0)