Skip to content

Commit 57ea52e

Browse files
committed
Merge branch 'master' into spark-2706
2 parents 9412d24 + 2b0d513 commit 57ea52e

File tree

9 files changed

+43
-22
lines changed

9 files changed

+43
-22
lines changed

.gitignore

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,10 @@ out/
1515
third_party/libmesos.so
1616
third_party/libmesos.dylib
1717
conf/java-opts
18-
conf/spark-env.sh
19-
conf/streaming-env.sh
20-
conf/log4j.properties
21-
conf/spark-defaults.conf
22-
conf/hive-site.xml
18+
conf/*.sh
19+
conf/*.properties
20+
conf/*.conf
21+
conf/*.xml
2322
docs/_site
2423
docs/api
2524
target/
@@ -50,7 +49,6 @@ unit-tests.log
5049
/lib/
5150
rat-results.txt
5251
scalastyle.txt
53-
conf/*.conf
5452
scalastyle-output.xml
5553

5654
# For Hive

core/src/test/scala/org/apache/spark/ui/UISuite.scala

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ import javax.servlet.http.HttpServletRequest
2323
import scala.io.Source
2424
import scala.util.{Failure, Success, Try}
2525

26-
import org.eclipse.jetty.server.Server
2726
import org.eclipse.jetty.servlet.ServletContextHandler
2827
import org.scalatest.FunSuite
2928
import org.scalatest.concurrent.Eventually._
@@ -108,14 +107,8 @@ class UISuite extends FunSuite {
108107
}
109108

110109
test("jetty selects different port under contention") {
111-
val startPort = 4040
112-
val server = new Server(startPort)
113-
114-
Try { server.start() } match {
115-
case Success(s) =>
116-
case Failure(e) =>
117-
// Either case server port is busy hence setup for test complete
118-
}
110+
val server = new ServerSocket(0)
111+
val startPort = server.getLocalPort
119112
val serverInfo1 = JettyUtils.startJettyServer(
120113
"0.0.0.0", startPort, Seq[ServletContextHandler](), new SparkConf)
121114
val serverInfo2 = JettyUtils.startJettyServer(
@@ -126,6 +119,9 @@ class UISuite extends FunSuite {
126119
assert(boundPort1 != startPort)
127120
assert(boundPort2 != startPort)
128121
assert(boundPort1 != boundPort2)
122+
serverInfo1.server.stop()
123+
serverInfo2.server.stop()
124+
server.close()
129125
}
130126

131127
test("jetty binds to port 0 correctly") {

docs/_layouts/global.html

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@
111111
<li class="divider"></li>
112112
<li><a href="building-spark.html">Building Spark</a></li>
113113
<li><a href="https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark">Contributing to Spark</a></li>
114+
<li><a href="https://cwiki.apache.org/confluence/display/SPARK/Supplemental+Spark+Projects">Supplemental Projects</a></li>
114115
</ul>
115116
</li>
116117
</ul>
@@ -151,7 +152,7 @@ <h1 class="title">{{ page.title }}</h1>
151152
MathJax.Hub.Config({
152153
tex2jax: {
153154
inlineMath: [ ["$", "$"], ["\\\\(","\\\\)"] ],
154-
displayMath: [ ["$$","$$"], ["\\[", "\\]"] ],
155+
displayMath: [ ["$$","$$"], ["\\[", "\\]"] ],
155156
processEscapes: true,
156157
skipTags: ['script', 'noscript', 'style', 'textarea', 'pre']
157158
}

docs/configuration.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -520,10 +520,10 @@ Apart from these, the following properties are also available, and may be useful
520520
</tr>
521521
<tr>
522522
<td><code>spark.files.fetchTimeout</code></td>
523-
<td>false</td>
523+
<td>60</td>
524524
<td>
525525
Communication timeout to use when fetching files added through SparkContext.addFile() from
526-
the driver.
526+
the driver, in seconds.
527527
</td>
528528
</tr>
529529
<tr>

docs/ec2-scripts.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,6 @@ If you have a patch or suggestion for one of these limitations, feel free to
156156

157157
# Accessing Data in S3
158158

159-
Spark's file interface allows it to process data in Amazon S3 using the same URI formats that are supported for Hadoop. You can specify a path in S3 as input through a URI of the form `s3n://<bucket>/path`. You will also need to set your Amazon security credentials, either by setting the environment variables `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` before your program or through `SparkContext.hadoopConfiguration`. Full instructions on S3 access using the Hadoop input libraries can be found on the [Hadoop S3 page](http://wiki.apache.org/hadoop/AmazonS3).
159+
Spark's file interface allows it to process data in Amazon S3 using the same URI formats that are supported for Hadoop. You can specify a path in S3 as input through a URI of the form `s3n://<bucket>/path`. To provide AWS credentials for S3 access, launch the Spark cluster with the option `--copy-aws-credentials`. Full instructions on S3 access using the Hadoop input libraries can be found on the [Hadoop S3 page](http://wiki.apache.org/hadoop/AmazonS3).
160160

161161
In addition to using a single input file, you can also use a directory of files as input by simply giving the path to the directory.

docs/index.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ options for deployment:
107107
* [OpenStack Swift](storage-openstack-swift.html)
108108
* [Building Spark](building-spark.html): build Spark using the Maven system
109109
* [Contributing to Spark](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark)
110+
* [Supplemental Projects](https://cwiki.apache.org/confluence/display/SPARK/Supplemental+Spark+Projects): related third party Spark projects
110111

111112
**External Resources:**
112113

ec2/deploy.generic/root/spark-ec2/ec2-variables.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,5 @@ export HADOOP_MAJOR_VERSION="{{hadoop_major_version}}"
3030
export SWAP_MB="{{swap}}"
3131
export SPARK_WORKER_INSTANCES="{{spark_worker_instances}}"
3232
export SPARK_MASTER_OPTS="{{spark_master_opts}}"
33+
export AWS_ACCESS_KEY_ID="{{aws_access_key_id}}"
34+
export AWS_SECRET_ACCESS_KEY="{{aws_secret_access_key}}"

ec2/spark_ec2.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,9 @@ def parse_args():
158158
parser.add_option(
159159
"--additional-security-group", type="string", default="",
160160
help="Additional security group to place the machines in")
161+
parser.add_option(
162+
"--copy-aws-credentials", action="store_true", default=False,
163+
help="Add AWS credentials to hadoop configuration to allow Spark to access S3")
161164

162165
(opts, args) = parser.parse_args()
163166
if len(args) != 2:
@@ -714,6 +717,13 @@ def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules):
714717
"spark_master_opts": opts.master_opts
715718
}
716719

720+
if opts.copy_aws_credentials:
721+
template_vars["aws_access_key_id"] = conn.aws_access_key_id
722+
template_vars["aws_secret_access_key"] = conn.aws_secret_access_key
723+
else:
724+
template_vars["aws_access_key_id"] = ""
725+
template_vars["aws_secret_access_key"] = ""
726+
717727
# Create a temp directory in which we will place all the files to be
718728
# deployed after we substitue template parameters in them
719729
tmp_dir = tempfile.mkdtemp()

yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientBaseSuite.scala

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ import org.scalatest.Matchers
3838

3939
import scala.collection.JavaConversions._
4040
import scala.collection.mutable.{ HashMap => MutableHashMap }
41+
import scala.reflect.ClassTag
4142
import scala.util.Try
4243

4344
import org.apache.spark.{SparkException, SparkConf}
@@ -200,9 +201,10 @@ class ClientBaseSuite extends FunSuite with Matchers {
200201

201202

202203
val knownDefMRAppCP: Seq[String] =
203-
getFieldValue[String, Seq[String]](classOf[MRJobConfig],
204-
"DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH",
205-
Seq[String]())(a => a.split(","))
204+
getFieldValue2[String, Array[String], Seq[String]](
205+
classOf[MRJobConfig],
206+
"DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH",
207+
Seq[String]())(a => a.split(","))(a => a.toSeq)
206208

207209
val knownYARNAppCP = Some(Seq("/known/yarn/path"))
208210

@@ -232,6 +234,17 @@ class ClientBaseSuite extends FunSuite with Matchers {
232234
def getFieldValue[A, B](clazz: Class[_], field: String, defaults: => B)(mapTo: A => B): B =
233235
Try(clazz.getField(field)).map(_.get(null).asInstanceOf[A]).toOption.map(mapTo).getOrElse(defaults)
234236

237+
def getFieldValue2[A: ClassTag, A1: ClassTag, B](
238+
clazz: Class[_],
239+
field: String,
240+
defaults: => B)(mapTo: A => B)(mapTo1: A1 => B) : B = {
241+
Try(clazz.getField(field)).map(_.get(null)).map {
242+
case v: A => mapTo(v)
243+
case v1: A1 => mapTo1(v1)
244+
case _ => defaults
245+
}.toOption.getOrElse(defaults)
246+
}
247+
235248
private class DummyClient(
236249
val args: ClientArguments,
237250
val conf: Configuration,

0 commit comments

Comments
 (0)