Skip to content

Commit 61b8e0f

Browse files
committed
merge master
2 parents 7e5a322 + 9c439d3 commit 61b8e0f

File tree

64 files changed

+1680
-1143
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+1680
-1143
lines changed

core/src/main/scala/org/apache/spark/ui/UIUtils.scala

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -216,8 +216,10 @@ private[spark] object UIUtils extends Logging {
216216
<div class="row-fluid">
217217
<div class="span12">
218218
<h3 style="vertical-align: middle; display: inline-block;">
219-
<img src={prependBaseUri("/static/spark-logo-77x50px-hd.png")}
220-
style="margin-right: 15px;" />
219+
<a style="text-decoration: none" href={prependBaseUri("/")}>
220+
<img src={prependBaseUri("/static/spark-logo-77x50px-hd.png")}
221+
style="margin-right: 15px;" />
222+
</a>
221223
{title}
222224
</h3>
223225
</div>

dev/run-tests

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ function handle_error () {
4242
elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.0" ]; then
4343
export SBT_MAVEN_PROFILES_ARGS="-Dhadoop.version=2.0.0-mr1-cdh4.1.1"
4444
elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.2" ]; then
45-
export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Dhadoop.version=2.2.0"
45+
export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0"
4646
elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.3" ]; then
4747
export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Phadoop-2.3 -Dhadoop.version=2.3.0"
4848
fi

dev/scalastyle

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ echo -e "q\n" | sbt/sbt -Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0 yarn/scalasty
2626
>> scalastyle.txt
2727

2828
ERRORS=$(cat scalastyle.txt | grep -e "\<error\>")
29+
rm scalastyle.txt
30+
2931
if test ! -z "$ERRORS"; then
3032
echo -e "Scalastyle checks failed at following occurrences:\n$ERRORS"
3133
exit 1

docs/README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,19 +54,19 @@ phase, use the following sytax:
5454
// supported languages too.
5555
{% endhighlight %}
5656

57-
## API Docs (Scaladoc and Epydoc)
57+
## API Docs (Scaladoc and Sphinx)
5858

5959
You can build just the Spark scaladoc by running `sbt/sbt doc` from the SPARK_PROJECT_ROOT directory.
6060

61-
Similarly, you can build just the PySpark epydoc by running `epydoc --config epydoc.conf` from the
62-
SPARK_PROJECT_ROOT/pyspark directory. Documentation is only generated for classes that are listed as
61+
Similarly, you can build just the PySpark docs by running `make html` from the
62+
SPARK_PROJECT_ROOT/python/docs directory. Documentation is only generated for classes that are listed as
6363
public in `__init__.py`.
6464

6565
When you run `jekyll` in the `docs` directory, it will also copy over the scaladoc for the various
6666
Spark subprojects into the `docs` directory (and then also into the `_site` directory). We use a
6767
jekyll plugin to run `sbt/sbt doc` before building the site so if you haven't run it (recently) it
6868
may take some time as it generates all of the scaladoc. The jekyll plugin also generates the
69-
PySpark docs using [epydoc](http://epydoc.sourceforge.net/).
69+
PySpark docs [Sphinx](http://sphinx-doc.org/).
7070

7171
NOTE: To skip the step of building and copying over the Scala and Python API docs, run `SKIP_API=1
7272
jekyll`.

docs/_config.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ gems:
88
kramdown:
99
entity_output: numeric
1010

11+
include:
12+
- _static
13+
1114
# These allow the documentation to be updated with nerw releases
1215
# of Spark, Scala, and Mesos.
1316
SPARK_VERSION: 1.0.0-SNAPSHOT

docs/_plugins/copy_api_dirs.rb

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -63,19 +63,20 @@
6363
puts "cp -r " + source + "/. " + dest
6464
cp_r(source + "/.", dest)
6565

66-
# Build Epydoc for Python
67-
puts "Moving to python directory and building epydoc."
68-
cd("../python")
69-
puts `epydoc --config epydoc.conf`
66+
# Build Sphinx docs for Python
7067

71-
puts "Moving back into docs dir."
72-
cd("../docs")
68+
puts "Moving to python/docs directory and building sphinx."
69+
cd("../python/docs")
70+
puts `make html`
71+
72+
puts "Moving back into home dir."
73+
cd("../../")
7374

7475
puts "Making directory api/python"
75-
mkdir_p "api/python"
76+
mkdir_p "docs/api/python"
7677

77-
puts "cp -r ../python/docs/. api/python"
78-
cp_r("../python/docs/.", "api/python")
78+
puts "cp -r python/docs/_build/html/. docs/api/python"
79+
cp_r("python/docs/_build/html/.", "docs/api/python")
7980

8081
cd("..")
8182
end

ec2/spark_ec2.py

Lines changed: 87 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import tempfile
3333
import time
3434
import urllib2
35+
import warnings
3536
from optparse import OptionParser
3637
from sys import stderr
3738
import boto
@@ -61,8 +62,8 @@ def parse_args():
6162
"-s", "--slaves", type="int", default=1,
6263
help="Number of slaves to launch (default: %default)")
6364
parser.add_option(
64-
"-w", "--wait", type="int", default=120,
65-
help="Seconds to wait for nodes to start (default: %default)")
65+
"-w", "--wait", type="int",
66+
help="DEPRECATED (no longer necessary) - Seconds to wait for nodes to start")
6667
parser.add_option(
6768
"-k", "--key-pair",
6869
help="Key pair to use on instances")
@@ -195,18 +196,6 @@ def get_or_make_group(conn, name):
195196
return conn.create_security_group(name, "Spark EC2 group")
196197

197198

198-
# Wait for a set of launched instances to exit the "pending" state
199-
# (i.e. either to start running or to fail and be terminated)
200-
def wait_for_instances(conn, instances):
201-
while True:
202-
for i in instances:
203-
i.update()
204-
if len([i for i in instances if i.state == 'pending']) > 0:
205-
time.sleep(5)
206-
else:
207-
return
208-
209-
210199
# Check whether a given EC2 instance object is in a state we consider active,
211200
# i.e. not terminating or terminated. We count both stopping and stopped as
212201
# active since we can restart stopped clusters.
@@ -594,7 +583,7 @@ def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key):
594583

595584
# NOTE: We should clone the repository before running deploy_files to
596585
# prevent ec2-variables.sh from being overwritten
597-
ssh(master, opts, "rm -rf spark-ec2 && git clone https://github.com/mesos/spark-ec2.git -b v3")
586+
ssh(master, opts, "rm -rf spark-ec2 && git clone https://github.com/mesos/spark-ec2.git -b v4")
598587

599588
print "Deploying files to master..."
600589
deploy_files(conn, "deploy.generic", opts, master_nodes, slave_nodes, modules)
@@ -619,14 +608,64 @@ def setup_spark_cluster(master, opts):
619608
print "Ganglia started at http://%s:5080/ganglia" % master
620609

621610

622-
# Wait for a whole cluster (masters, slaves and ZooKeeper) to start up
623-
def wait_for_cluster(conn, wait_secs, master_nodes, slave_nodes):
624-
print "Waiting for instances to start up..."
625-
time.sleep(5)
626-
wait_for_instances(conn, master_nodes)
627-
wait_for_instances(conn, slave_nodes)
628-
print "Waiting %d more seconds..." % wait_secs
629-
time.sleep(wait_secs)
611+
def is_ssh_available(host, opts):
612+
"Checks if SSH is available on the host."
613+
try:
614+
with open(os.devnull, 'w') as devnull:
615+
ret = subprocess.check_call(
616+
ssh_command(opts) + ['-t', '-t', '-o', 'ConnectTimeout=3',
617+
'%s@%s' % (opts.user, host), stringify_command('true')],
618+
stdout=devnull,
619+
stderr=devnull
620+
)
621+
return ret == 0
622+
except subprocess.CalledProcessError as e:
623+
return False
624+
625+
626+
def is_cluster_ssh_available(cluster_instances, opts):
627+
for i in cluster_instances:
628+
if not is_ssh_available(host=i.ip_address, opts=opts):
629+
return False
630+
else:
631+
return True
632+
633+
634+
def wait_for_cluster_state(cluster_instances, cluster_state, opts):
635+
"""
636+
cluster_instances: a list of boto.ec2.instance.Instance
637+
cluster_state: a string representing the desired state of all the instances in the cluster
638+
value can be 'ssh-ready' or a valid value from boto.ec2.instance.InstanceState such as
639+
'running', 'terminated', etc.
640+
(would be nice to replace this with a proper enum: http://stackoverflow.com/a/1695250)
641+
"""
642+
sys.stdout.write(
643+
"Waiting for all instances in cluster to enter '{s}' state.".format(s=cluster_state)
644+
)
645+
sys.stdout.flush()
646+
647+
num_attempts = 0
648+
649+
while True:
650+
time.sleep(3 * num_attempts)
651+
652+
for i in cluster_instances:
653+
s = i.update() # capture output to suppress print to screen in newer versions of boto
654+
655+
if cluster_state == 'ssh-ready':
656+
if all(i.state == 'running' for i in cluster_instances) and \
657+
is_cluster_ssh_available(cluster_instances, opts):
658+
break
659+
else:
660+
if all(i.state == cluster_state for i in cluster_instances):
661+
break
662+
663+
num_attempts += 1
664+
665+
sys.stdout.write(".")
666+
sys.stdout.flush()
667+
668+
sys.stdout.write("\n")
630669

631670

632671
# Get number of local disks available for a given EC2 instance type.
@@ -868,6 +907,16 @@ def real_main():
868907
(opts, action, cluster_name) = parse_args()
869908

870909
# Input parameter validation
910+
if opts.wait is not None:
911+
# NOTE: DeprecationWarnings are silent in 2.7+ by default.
912+
# To show them, run Python with the -Wdefault switch.
913+
# See: https://docs.python.org/3.5/whatsnew/2.7.html
914+
warnings.warn(
915+
"This option is deprecated and has no effect. "
916+
"spark-ec2 automatically waits as long as necessary for clusters to startup.",
917+
DeprecationWarning
918+
)
919+
871920
if opts.ebs_vol_num > 8:
872921
print >> stderr, "ebs-vol-num cannot be greater than 8"
873922
sys.exit(1)
@@ -890,7 +939,11 @@ def real_main():
890939
(master_nodes, slave_nodes) = get_existing_cluster(conn, opts, cluster_name)
891940
else:
892941
(master_nodes, slave_nodes) = launch_cluster(conn, opts, cluster_name)
893-
wait_for_cluster(conn, opts.wait, master_nodes, slave_nodes)
942+
wait_for_cluster_state(
943+
cluster_instances=(master_nodes + slave_nodes),
944+
cluster_state='ssh-ready',
945+
opts=opts
946+
)
894947
setup_cluster(conn, master_nodes, slave_nodes, opts, True)
895948

896949
elif action == "destroy":
@@ -919,7 +972,11 @@ def real_main():
919972
else:
920973
group_names = [opts.security_group_prefix + "-master",
921974
opts.security_group_prefix + "-slaves"]
922-
975+
wait_for_cluster_state(
976+
cluster_instances=(master_nodes + slave_nodes),
977+
cluster_state='terminated',
978+
opts=opts
979+
)
923980
attempt = 1
924981
while attempt <= 3:
925982
print "Attempt %d" % attempt
@@ -1019,7 +1076,11 @@ def real_main():
10191076
for inst in master_nodes:
10201077
if inst.state not in ["shutting-down", "terminated"]:
10211078
inst.start()
1022-
wait_for_cluster(conn, opts.wait, master_nodes, slave_nodes)
1079+
wait_for_cluster_state(
1080+
cluster_instances=(master_nodes + slave_nodes),
1081+
cluster_state='ssh-ready',
1082+
opts=opts
1083+
)
10231084
setup_cluster(conn, master_nodes, slave_nodes, opts, False)
10241085

10251086
else:
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.examples.mllib
19+
20+
import scala.reflect.runtime.universe._
21+
22+
/**
23+
* Abstract class for parameter case classes.
24+
* This overrides the [[toString]] method to print all case class fields by name and value.
25+
* @tparam T Concrete parameter class.
26+
*/
27+
abstract class AbstractParams[T: TypeTag] {
28+
29+
private def tag: TypeTag[T] = typeTag[T]
30+
31+
/**
32+
* Finds all case class fields in concrete class instance, and outputs them in JSON-style format:
33+
* {
34+
* [field name]:\t[field value]\n
35+
* [field name]:\t[field value]\n
36+
* ...
37+
* }
38+
*/
39+
override def toString: String = {
40+
val tpe = tag.tpe
41+
val allAccessors = tpe.declarations.collect {
42+
case m: MethodSymbol if m.isCaseAccessor => m
43+
}
44+
val mirror = runtimeMirror(getClass.getClassLoader)
45+
val instanceMirror = mirror.reflect(this)
46+
allAccessors.map { f =>
47+
val paramName = f.name.toString
48+
val fieldMirror = instanceMirror.reflectField(f)
49+
val paramValue = fieldMirror.get
50+
s" $paramName:\t$paramValue"
51+
}.mkString("{\n", ",\n", "\n}")
52+
}
53+
}

examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassification.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ object BinaryClassification {
5555
stepSize: Double = 1.0,
5656
algorithm: Algorithm = LR,
5757
regType: RegType = L2,
58-
regParam: Double = 0.1)
58+
regParam: Double = 0.1) extends AbstractParams[Params]
5959

6060
def main(args: Array[String]) {
6161
val defaultParams = Params()

examples/src/main/scala/org/apache/spark/examples/mllib/Correlations.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ import org.apache.spark.{SparkConf, SparkContext}
3535
object Correlations {
3636

3737
case class Params(input: String = "data/mllib/sample_linear_regression_data.txt")
38+
extends AbstractParams[Params]
3839

3940
def main(args: Array[String]) {
4041

0 commit comments

Comments
 (0)