Skip to content

Commit c6d7478

Browse files
author
Marcelo Vanzin
committed
Merge branch 'master' into yarn-hs-link-2
Conflicts: core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala core/src/main/scala/org/apache/spark/scheduler/ApplicationEventListener.scala core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
2 parents 4e3483f + ba28a8f commit c6d7478

File tree

576 files changed

+28737
-4956
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

576 files changed

+28737
-4956
lines changed

.rat-excludes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,4 @@ dist/*
5555
.*ipr
5656
.*iws
5757
logs
58+
.*scalastyle-output.xml

LICENSE

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
272272

273273

274274
========================================================================
275-
For Py4J (python/lib/py4j0.7.egg and files in assembly/lib/net/sf/py4j):
275+
For Py4J (python/lib/py4j-0.8.2.1-src.zip)
276276
========================================================================
277277

278278
Copyright (c) 2009-2011, Barthelemy Dagenais All rights reserved.
@@ -532,7 +532,7 @@ The following components are provided under a BSD-style license. See project lin
532532
(New BSD license) Protocol Buffer Java API (org.spark-project.protobuf:protobuf-java:2.4.1-shaded - http://code.google.com/p/protobuf)
533533
(The BSD License) Fortran to Java ARPACK (net.sourceforge.f2j:arpack_combined_all:0.1 - http://f2j.sourceforge.net)
534534
(The BSD License) xmlenc Library (xmlenc:xmlenc:0.52 - http://xmlenc.sourceforge.net)
535-
(The New BSD License) Py4J (net.sf.py4j:py4j:0.8.1 - http://py4j.sourceforge.net/)
535+
(The New BSD License) Py4J (net.sf.py4j:py4j:0.8.2.1 - http://py4j.sourceforge.net/)
536536
(Two-clause BSD-style license) JUnit-Interface (com.novocode:junit-interface:0.10 - http://github.com/szeiger/junit-interface/)
537537
(ISC/BSD License) jbcrypt (org.mindrot:jbcrypt:0.3m - http://www.mindrot.org/)
538538

@@ -549,3 +549,4 @@ The following components are provided under the MIT License. See project link fo
549549
(MIT License) pyrolite (org.spark-project:pyrolite:2.0.1 - http://pythonhosted.org/Pyro4/)
550550
(MIT License) scopt (com.github.scopt:scopt_2.10:3.2.0 - https://github.com/scopt/scopt)
551551
(The MIT License) Mockito (org.mockito:mockito-all:1.8.5 - http://www.mockito.org)
552+
(MIT License) jquery (https://jquery.org/license/)

bin/beeline

Lines changed: 7 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -17,29 +17,14 @@
1717
# limitations under the License.
1818
#
1919

20-
# Figure out where Spark is installed
21-
FWDIR="$(cd `dirname $0`/..; pwd)"
20+
#
21+
# Shell script for starting BeeLine
2222

23-
# Find the java binary
24-
if [ -n "${JAVA_HOME}" ]; then
25-
RUNNER="${JAVA_HOME}/bin/java"
26-
else
27-
if [ `command -v java` ]; then
28-
RUNNER="java"
29-
else
30-
echo "JAVA_HOME is not set" >&2
31-
exit 1
32-
fi
33-
fi
23+
# Enter posix mode for bash
24+
set -o posix
3425

35-
# Compute classpath using external script
36-
classpath_output=$($FWDIR/bin/compute-classpath.sh)
37-
if [[ "$?" != "0" ]]; then
38-
echo "$classpath_output"
39-
exit 1
40-
else
41-
CLASSPATH=$classpath_output
42-
fi
26+
# Figure out where Spark is installed
27+
FWDIR="$(cd `dirname $0`/..; pwd)"
4328

4429
CLASS="org.apache.hive.beeline.BeeLine"
45-
exec "$RUNNER" -cp "$CLASSPATH" $CLASS "$@"
30+
exec "$FWDIR/bin/spark-class" $CLASS "$@"

bin/pyspark

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,18 @@ FWDIR="$(cd `dirname $0`/..; pwd)"
2323
# Export this as SPARK_HOME
2424
export SPARK_HOME="$FWDIR"
2525

26+
source $FWDIR/bin/utils.sh
27+
2628
SCALA_VERSION=2.10
2729

28-
if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
30+
function usage() {
2931
echo "Usage: ./bin/pyspark [options]" 1>&2
3032
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
3133
exit 0
34+
}
35+
36+
if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
37+
usage
3238
fi
3339

3440
# Exit if the user hasn't compiled Spark
@@ -52,7 +58,7 @@ export PYSPARK_PYTHON
5258

5359
# Add the PySpark classes to the Python path:
5460
export PYTHONPATH=$SPARK_HOME/python/:$PYTHONPATH
55-
export PYTHONPATH=$SPARK_HOME/python/lib/py4j-0.8.1-src.zip:$PYTHONPATH
61+
export PYTHONPATH=$SPARK_HOME/python/lib/py4j-0.8.2.1-src.zip:$PYTHONPATH
5662

5763
# Load the PySpark shell.py script when ./pyspark is used interactively:
5864
export OLD_PYTHONSTARTUP=$PYTHONSTARTUP
@@ -66,10 +72,11 @@ fi
6672
# Build up arguments list manually to preserve quotes and backslashes.
6773
# We export Spark submit arguments as an environment variable because shell.py must run as a
6874
# PYTHONSTARTUP script, which does not take in arguments. This is required for IPython notebooks.
69-
75+
SUBMIT_USAGE_FUNCTION=usage
76+
gatherSparkSubmitOpts "$@"
7077
PYSPARK_SUBMIT_ARGS=""
7178
whitespace="[[:space:]]"
72-
for i in "$@"; do
79+
for i in "${SUBMISSION_OPTS[@]}"; do
7380
if [[ $i =~ \" ]]; then i=$(echo $i | sed 's/\"/\\\"/g'); fi
7481
if [[ $i =~ $whitespace ]]; then i=\"$i\"; fi
7582
PYSPARK_SUBMIT_ARGS="$PYSPARK_SUBMIT_ARGS $i"
@@ -90,7 +97,10 @@ fi
9097
if [[ "$1" =~ \.py$ ]]; then
9198
echo -e "\nWARNING: Running python applications through ./bin/pyspark is deprecated as of Spark 1.0." 1>&2
9299
echo -e "Use ./bin/spark-submit <python file>\n" 1>&2
93-
exec $FWDIR/bin/spark-submit "$@"
100+
primary=$1
101+
shift
102+
gatherSparkSubmitOpts "$@"
103+
exec $FWDIR/bin/spark-submit "${SUBMISSION_OPTS[@]}" $primary "${APPLICATION_OPTS[@]}"
94104
else
95105
# Only use ipython if no command line arguments were provided [SPARK-1134]
96106
if [[ "$IPYTHON" = "1" ]]; then

bin/pyspark2.cmd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ rem Figure out which Python to use.
4545
if [%PYSPARK_PYTHON%] == [] set PYSPARK_PYTHON=python
4646

4747
set PYTHONPATH=%FWDIR%python;%PYTHONPATH%
48-
set PYTHONPATH=%FWDIR%python\lib\py4j-0.8.1-src.zip;%PYTHONPATH%
48+
set PYTHONPATH=%FWDIR%python\lib\py4j-0.8.2.1-src.zip;%PYTHONPATH%
4949

5050
set OLD_PYTHONSTARTUP=%PYTHONSTARTUP%
5151
set PYTHONSTARTUP=%FWDIR%python\pyspark\shell.py

bin/run-example

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ if [ -n "$1" ]; then
2929
else
3030
echo "Usage: ./bin/run-example <example-class> [example-args]" 1>&2
3131
echo " - set MASTER=XX to use a specific master" 1>&2
32-
echo " - can use abbreviated example class name (e.g. SparkPi, mllib.LinearRegression)" 1>&2
32+
echo " - can use abbreviated example class name relative to com.apache.spark.examples" 1>&2
33+
echo " (e.g. SparkPi, mllib.LinearRegression, streaming.KinesisWordCountASL)" 1>&2
3334
exit 1
3435
fi
3536

bin/run-example2.cmd

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ rem Test that an argument was given
3232
if not "x%1"=="x" goto arg_given
3333
echo Usage: run-example ^<example-class^> [example-args]
3434
echo - set MASTER=XX to use a specific master
35-
echo - can use abbreviated example class name (e.g. SparkPi, mllib.LinearRegression)
35+
echo - can use abbreviated example class name relative to com.apache.spark.examples
36+
echo (e.g. SparkPi, mllib.LinearRegression, streaming.KinesisWordCountASL)
3637
goto exit
3738
:arg_given
3839

bin/spark-shell

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,21 @@ set -o posix
3131
## Global script variables
3232
FWDIR="$(cd `dirname $0`/..; pwd)"
3333

34+
function usage() {
35+
echo "Usage: ./bin/spark-shell [options]"
36+
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
37+
exit 0
38+
}
39+
3440
if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
35-
echo "Usage: ./bin/spark-shell [options]"
36-
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
37-
exit 0
41+
usage
3842
fi
3943

40-
function main(){
44+
source $FWDIR/bin/utils.sh
45+
SUBMIT_USAGE_FUNCTION=usage
46+
gatherSparkSubmitOpts "$@"
47+
48+
function main() {
4149
if $cygwin; then
4250
# Workaround for issue involving JLine and Cygwin
4351
# (see http://sourceforge.net/p/jline/bugs/40/).
@@ -46,11 +54,11 @@ function main(){
4654
# (see https://github.com/sbt/sbt/issues/562).
4755
stty -icanon min 1 -echo > /dev/null 2>&1
4856
export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Djline.terminal=unix"
49-
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main spark-shell "$@"
57+
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
5058
stty icanon echo > /dev/null 2>&1
5159
else
5260
export SPARK_SUBMIT_OPTS
53-
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main spark-shell "$@"
61+
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
5462
fi
5563
}
5664

bin/spark-sql

Lines changed: 62 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,72 @@
2323
# Enter posix mode for bash
2424
set -o posix
2525

26+
CLASS="org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver"
27+
2628
# Figure out where Spark is installed
2729
FWDIR="$(cd `dirname $0`/..; pwd)"
2830

29-
if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
30-
echo "Usage: ./sbin/spark-sql [options]"
31+
function usage {
32+
echo "Usage: ./bin/spark-sql [options] [cli option]"
33+
pattern="usage"
34+
pattern+="\|Spark assembly has been built with Hive"
35+
pattern+="\|NOTE: SPARK_PREPEND_CLASSES is set"
36+
pattern+="\|Spark Command: "
37+
pattern+="\|--help"
38+
pattern+="\|======="
39+
3140
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
41+
echo
42+
echo "CLI options:"
43+
$FWDIR/bin/spark-class $CLASS --help 2>&1 | grep -v "$pattern" 1>&2
44+
}
45+
46+
function ensure_arg_number {
47+
arg_number=$1
48+
at_least=$2
49+
50+
if [[ $arg_number -lt $at_least ]]; then
51+
usage
52+
exit 1
53+
fi
54+
}
55+
56+
if [[ "$@" = --help ]] || [[ "$@" = -h ]]; then
57+
usage
3258
exit 0
3359
fi
3460

35-
CLASS="org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver"
36-
exec "$FWDIR"/bin/spark-submit --class $CLASS spark-internal $@
61+
CLI_ARGS=()
62+
SUBMISSION_ARGS=()
63+
64+
while (($#)); do
65+
case $1 in
66+
-d | --define | --database | -f | -h | --hiveconf | --hivevar | -i | -p)
67+
ensure_arg_number $# 2
68+
CLI_ARGS+=($1); shift
69+
CLI_ARGS+=($1); shift
70+
;;
71+
72+
-e)
73+
ensure_arg_number $# 2
74+
CLI_ARGS+=($1); shift
75+
CLI_ARGS+=(\"$1\"); shift
76+
;;
77+
78+
-s | --silent)
79+
CLI_ARGS+=($1); shift
80+
;;
81+
82+
-v | --verbose)
83+
# Both SparkSubmit and SparkSQLCLIDriver recognizes -v | --verbose
84+
CLI_ARGS+=($1)
85+
SUBMISSION_ARGS+=($1); shift
86+
;;
87+
88+
*)
89+
SUBMISSION_ARGS+=($1); shift
90+
;;
91+
esac
92+
done
93+
94+
eval exec "$FWDIR"/bin/spark-submit --class $CLASS ${SUBMISSION_ARGS[*]} spark-internal ${CLI_ARGS[*]}

bin/utils.sh

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#!/usr/bin/env bash
2+
3+
#
4+
# Licensed to the Apache Software Foundation (ASF) under one or more
5+
# contributor license agreements. See the NOTICE file distributed with
6+
# this work for additional information regarding copyright ownership.
7+
# The ASF licenses this file to You under the Apache License, Version 2.0
8+
# (the "License"); you may not use this file except in compliance with
9+
# the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing, software
14+
# distributed under the License is distributed on an "AS IS" BASIS,
15+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
# See the License for the specific language governing permissions and
17+
# limitations under the License.
18+
#
19+
20+
# Gather all all spark-submit options into SUBMISSION_OPTS
21+
function gatherSparkSubmitOpts() {
22+
23+
if [ -z "$SUBMIT_USAGE_FUNCTION" ]; then
24+
echo "Function for printing usage of $0 is not set." 1>&2
25+
echo "Please set usage function to shell variable 'SUBMIT_USAGE_FUNCTION' in $0" 1>&2
26+
exit 1
27+
fi
28+
29+
# NOTE: If you add or remove spark-sumbmit options,
30+
# modify NOT ONLY this script but also SparkSubmitArgument.scala
31+
SUBMISSION_OPTS=()
32+
APPLICATION_OPTS=()
33+
while (($#)); do
34+
case "$1" in
35+
--master | --deploy-mode | --class | --name | --jars | --py-files | --files | \
36+
--conf | --properties-file | --driver-memory | --driver-java-options | \
37+
--driver-library-path | --driver-class-path | --executor-memory | --driver-cores | \
38+
--total-executor-cores | --executor-cores | --queue | --num-executors | --archives)
39+
if [[ $# -lt 2 ]]; then
40+
"$SUBMIT_USAGE_FUNCTION"
41+
exit 1;
42+
fi
43+
SUBMISSION_OPTS+=("$1"); shift
44+
SUBMISSION_OPTS+=("$1"); shift
45+
;;
46+
47+
--verbose | -v | --supervise)
48+
SUBMISSION_OPTS+=("$1"); shift
49+
;;
50+
51+
*)
52+
APPLICATION_OPTS+=("$1"); shift
53+
;;
54+
esac
55+
done
56+
57+
export SUBMISSION_OPTS
58+
export APPLICATION_OPTS
59+
}

0 commit comments

Comments
 (0)