Skip to content

Commit f5aa841

Browse files
committed
Merge pull request #3 from apache/branch-1.1
merge upstream changes from branch-1.1
2 parents 407ea9f + e136312 commit f5aa841

File tree

383 files changed

+26023
-4489
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

383 files changed

+26023
-4489
lines changed

CHANGES.txt

Lines changed: 14575 additions & 0 deletions
Large diffs are not rendered by default.

README.md

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ Many of the example programs print usage help if no params are given.
6969
Testing first requires [building Spark](#building-spark). Once Spark is built, tests
7070
can be run using:
7171

72-
./sbt/sbt test
72+
./dev/run-tests
7373

7474
## A Note About Hadoop Versions
7575

@@ -118,11 +118,10 @@ If your project is built with Maven, add this to your POM file's `<dependencies>
118118
## A Note About Thrift JDBC server and CLI for Spark SQL
119119

120120
Spark SQL supports Thrift JDBC server and CLI.
121-
See sql-programming-guide.md for more information about those features.
122-
You can use those features by setting `-Phive-thriftserver` when building Spark as follows.
123-
124-
$ sbt/sbt -Phive-thriftserver assembly
121+
See sql-programming-guide.md for more information about using the JDBC server and CLI.
122+
You can use those features by setting `-Phive` when building Spark as follows.
125123

124+
$ sbt/sbt -Phive assembly
126125

127126
## Configuration
128127

assembly/pom.xml

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -163,11 +163,6 @@
163163
<artifactId>spark-hive_${scala.binary.version}</artifactId>
164164
<version>${project.version}</version>
165165
</dependency>
166-
</dependencies>
167-
</profile>
168-
<profile>
169-
<id>hive-thriftserver</id>
170-
<dependencies>
171166
<dependency>
172167
<groupId>org.apache.spark</groupId>
173168
<artifactId>spark-hive-thriftserver_${scala.binary.version}</artifactId>

bin/compute-classpath.cmd

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ rem Load environment variables from conf\spark-env.cmd, if it exists
3636
if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
3737

3838
rem Build up classpath
39-
set CLASSPATH=%FWDIR%conf
39+
set CLASSPATH=%SPARK_CLASSPATH%;%SPARK_SUBMIT_CLASSPATH%;%FWDIR%conf
40+
4041
if exist "%FWDIR%RELEASE" (
4142
for %%d in ("%FWDIR%lib\spark-assembly*.jar") do (
4243
set ASSEMBLY_JAR=%%d

bin/pyspark

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,8 @@ if [[ "$1" =~ \.py$ ]]; then
102102
gatherSparkSubmitOpts "$@"
103103
exec $FWDIR/bin/spark-submit "${SUBMISSION_OPTS[@]}" $primary "${APPLICATION_OPTS[@]}"
104104
else
105+
# PySpark shell requires special handling downstream
106+
export PYSPARK_SHELL=1
105107
# Only use ipython if no command line arguments were provided [SPARK-1134]
106108
if [[ "$IPYTHON" = "1" ]]; then
107109
exec ipython $IPYTHON_OPTS

bin/spark-class

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
# limitations under the License.
1818
#
1919

20+
# NOTE: Any changes to this file must be reflected in SparkSubmitDriverBootstrapper.scala!
21+
2022
cygwin=false
2123
case "`uname`" in
2224
CYGWIN*) cygwin=true;;
@@ -39,7 +41,7 @@ fi
3941

4042
if [ -n "$SPARK_MEM" ]; then
4143
echo -e "Warning: SPARK_MEM is deprecated, please use a more specific config option" 1>&2
42-
echo -e "(e.g., spark.executor.memory or SPARK_DRIVER_MEMORY)." 1>&2
44+
echo -e "(e.g., spark.executor.memory or spark.driver.memory)." 1>&2
4345
fi
4446

4547
# Use SPARK_MEM or 512m as the default memory, to be overridden by specific options
@@ -73,11 +75,17 @@ case "$1" in
7375
OUR_JAVA_MEM=${SPARK_EXECUTOR_MEMORY:-$DEFAULT_MEM}
7476
;;
7577

76-
# Spark submit uses SPARK_SUBMIT_OPTS and SPARK_JAVA_OPTS
77-
'org.apache.spark.deploy.SparkSubmit')
78-
OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_SUBMIT_OPTS \
79-
-Djava.library.path=$SPARK_SUBMIT_LIBRARY_PATH"
78+
# Spark submit uses SPARK_JAVA_OPTS + SPARK_SUBMIT_OPTS +
79+
# SPARK_DRIVER_MEMORY + SPARK_SUBMIT_DRIVER_MEMORY.
80+
'org.apache.spark.deploy.SparkSubmit')
81+
OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_SUBMIT_OPTS"
8082
OUR_JAVA_MEM=${SPARK_DRIVER_MEMORY:-$DEFAULT_MEM}
83+
if [ -n "$SPARK_SUBMIT_LIBRARY_PATH" ]; then
84+
OUR_JAVA_OPTS="$OUR_JAVA_OPTS -Djava.library.path=$SPARK_SUBMIT_LIBRARY_PATH"
85+
fi
86+
if [ -n "$SPARK_SUBMIT_DRIVER_MEMORY" ]; then
87+
OUR_JAVA_MEM="$SPARK_SUBMIT_DRIVER_MEMORY"
88+
fi
8189
;;
8290

8391
*)
@@ -101,11 +109,12 @@ fi
101109
# Set JAVA_OPTS to be able to load native libraries and to set heap size
102110
JAVA_OPTS="-XX:MaxPermSize=128m $OUR_JAVA_OPTS"
103111
JAVA_OPTS="$JAVA_OPTS -Xms$OUR_JAVA_MEM -Xmx$OUR_JAVA_MEM"
112+
104113
# Load extra JAVA_OPTS from conf/java-opts, if it exists
105114
if [ -e "$FWDIR/conf/java-opts" ] ; then
106115
JAVA_OPTS="$JAVA_OPTS `cat $FWDIR/conf/java-opts`"
107116
fi
108-
export JAVA_OPTS
117+
109118
# Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in CommandUtils.scala!
110119

111120
TOOLS_DIR="$FWDIR"/tools
@@ -146,10 +155,28 @@ if $cygwin; then
146155
fi
147156
export CLASSPATH
148157

149-
if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then
150-
echo -n "Spark Command: " 1>&2
151-
echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" 1>&2
152-
echo -e "========================================\n" 1>&2
158+
# In Spark submit client mode, the driver is launched in the same JVM as Spark submit itself.
159+
# Here we must parse the properties file for relevant "spark.driver.*" configs before launching
160+
# the driver JVM itself. Instead of handling this complexity in Bash, we launch a separate JVM
161+
# to prepare the launch environment of this driver JVM.
162+
163+
if [ -n "$SPARK_SUBMIT_BOOTSTRAP_DRIVER" ]; then
164+
# This is used only if the properties file actually contains these special configs
165+
# Export the environment variables needed by SparkSubmitDriverBootstrapper
166+
export RUNNER
167+
export CLASSPATH
168+
export JAVA_OPTS
169+
export OUR_JAVA_MEM
170+
export SPARK_CLASS=1
171+
shift # Ignore main class (org.apache.spark.deploy.SparkSubmit) and use our own
172+
exec "$RUNNER" org.apache.spark.deploy.SparkSubmitDriverBootstrapper "$@"
173+
else
174+
# Note: The format of this command is closely echoed in SparkSubmitDriverBootstrapper.scala
175+
if [ -n "$SPARK_PRINT_LAUNCH_COMMAND" ]; then
176+
echo -n "Spark Command: " 1>&2
177+
echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" 1>&2
178+
echo -e "========================================\n" 1>&2
179+
fi
180+
exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"
153181
fi
154182

155-
exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"

bin/spark-class2.cmd

100755100644
Lines changed: 39 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ rem See the License for the specific language governing permissions and
1717
rem limitations under the License.
1818
rem
1919

20+
rem Any changes to this file must be reflected in SparkSubmitDriverBootstrapper.scala!
21+
2022
setlocal enabledelayedexpansion
2123

2224
set SCALA_VERSION=2.10
@@ -38,7 +40,7 @@ if not "x%1"=="x" goto arg_given
3840

3941
if not "x%SPARK_MEM%"=="x" (
4042
echo Warning: SPARK_MEM is deprecated, please use a more specific config option
41-
echo e.g., spark.executor.memory or SPARK_DRIVER_MEMORY.
43+
echo e.g., spark.executor.memory or spark.driver.memory.
4244
)
4345

4446
rem Use SPARK_MEM or 512m as the default memory, to be overridden by specific options
@@ -67,18 +69,26 @@ rem Executors use SPARK_JAVA_OPTS + SPARK_EXECUTOR_MEMORY.
6769
set OUR_JAVA_OPTS=%SPARK_JAVA_OPTS% %SPARK_EXECUTOR_OPTS%
6870
if not "x%SPARK_EXECUTOR_MEMORY%"=="x" set OUR_JAVA_MEM=%SPARK_EXECUTOR_MEMORY%
6971

70-
rem All drivers use SPARK_JAVA_OPTS + SPARK_DRIVER_MEMORY. The repl also uses SPARK_REPL_OPTS.
71-
) else if "%1"=="org.apache.spark.repl.Main" (
72-
set OUR_JAVA_OPTS=%SPARK_JAVA_OPTS% %SPARK_REPL_OPTS%
72+
rem Spark submit uses SPARK_JAVA_OPTS + SPARK_SUBMIT_OPTS +
73+
rem SPARK_DRIVER_MEMORY + SPARK_SUBMIT_DRIVER_MEMORY.
74+
rem The repl also uses SPARK_REPL_OPTS.
75+
) else if "%1"=="org.apache.spark.deploy.SparkSubmit" (
76+
set OUR_JAVA_OPTS=%SPARK_JAVA_OPTS% %SPARK_SUBMIT_OPTS% %SPARK_REPL_OPTS%
77+
if not "x%SPARK_SUBMIT_LIBRARY_PATH%"=="x" (
78+
set OUR_JAVA_OPTS=!OUR_JAVA_OPTS! -Djava.library.path=%SPARK_SUBMIT_LIBRARY_PATH%
79+
) else if not "x%SPARK_LIBRARY_PATH%"=="x" (
80+
set OUR_JAVA_OPTS=!OUR_JAVA_OPTS! -Djava.library.path=%SPARK_LIBRARY_PATH%
81+
)
7382
if not "x%SPARK_DRIVER_MEMORY%"=="x" set OUR_JAVA_MEM=%SPARK_DRIVER_MEMORY%
83+
if not "x%SPARK_SUBMIT_DRIVER_MEMORY%"=="x" set OUR_JAVA_MEM=%SPARK_SUBMIT_DRIVER_MEMORY%
7484
) else (
7585
set OUR_JAVA_OPTS=%SPARK_JAVA_OPTS%
7686
if not "x%SPARK_DRIVER_MEMORY%"=="x" set OUR_JAVA_MEM=%SPARK_DRIVER_MEMORY%
7787
)
7888

79-
rem Set JAVA_OPTS to be able to load native libraries and to set heap size
80-
set JAVA_OPTS=-XX:MaxPermSize=128m %OUR_JAVA_OPTS% -Djava.library.path=%SPARK_LIBRARY_PATH% -Xms%OUR_JAVA_MEM% -Xmx%OUR_JAVA_MEM%
8189
rem Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in CommandUtils.scala!
90+
rem Set JAVA_OPTS to be able to load native libraries and to set heap size
91+
set JAVA_OPTS=-XX:MaxPermSize=128m %OUR_JAVA_OPTS% -Xms%OUR_JAVA_MEM% -Xmx%OUR_JAVA_MEM%
8292

8393
rem Test whether the user has built Spark
8494
if exist "%FWDIR%RELEASE" goto skip_build_test
@@ -109,5 +119,27 @@ rem Figure out where java is.
109119
set RUNNER=java
110120
if not "x%JAVA_HOME%"=="x" set RUNNER=%JAVA_HOME%\bin\java
111121

112-
"%RUNNER%" -cp "%CLASSPATH%" %JAVA_OPTS% %*
122+
rem In Spark submit client mode, the driver is launched in the same JVM as Spark submit itself.
123+
rem Here we must parse the properties file for relevant "spark.driver.*" configs before launching
124+
rem the driver JVM itself. Instead of handling this complexity here, we launch a separate JVM
125+
rem to prepare the launch environment of this driver JVM.
126+
127+
rem In this case, leave out the main class (org.apache.spark.deploy.SparkSubmit) and use our own.
128+
rem Leaving out the first argument is surprisingly difficult to do in Windows. Note that this must
129+
rem be done here because the Windows "shift" command does not work in a conditional block.
130+
set BOOTSTRAP_ARGS=
131+
shift
132+
:start_parse
133+
if "%~1" == "" goto end_parse
134+
set BOOTSTRAP_ARGS=%BOOTSTRAP_ARGS% %~1
135+
shift
136+
goto start_parse
137+
:end_parse
138+
139+
if not [%SPARK_SUBMIT_BOOTSTRAP_DRIVER%] == [] (
140+
set SPARK_CLASS=1
141+
"%RUNNER%" org.apache.spark.deploy.SparkSubmitDriverBootstrapper %BOOTSTRAP_ARGS%
142+
) else (
143+
"%RUNNER%" -cp "%CLASSPATH%" %JAVA_OPTS% %*
144+
)
113145
:exit

bin/spark-shell

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222

2323
cygwin=false
2424
case "`uname`" in
25-
CYGWIN*) cygwin=true;;
25+
CYGWIN*) cygwin=true;;
2626
esac
2727

2828
# Enter posix mode for bash
@@ -32,9 +32,9 @@ set -o posix
3232
FWDIR="$(cd `dirname $0`/..; pwd)"
3333

3434
function usage() {
35-
echo "Usage: ./bin/spark-shell [options]"
36-
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
37-
exit 0
35+
echo "Usage: ./bin/spark-shell [options]"
36+
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
37+
exit 0
3838
}
3939

4040
if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
@@ -46,20 +46,20 @@ SUBMIT_USAGE_FUNCTION=usage
4646
gatherSparkSubmitOpts "$@"
4747

4848
function main() {
49-
if $cygwin; then
50-
# Workaround for issue involving JLine and Cygwin
51-
# (see http://sourceforge.net/p/jline/bugs/40/).
52-
# If you're using the Mintty terminal emulator in Cygwin, may need to set the
53-
# "Backspace sends ^H" setting in "Keys" section of the Mintty options
54-
# (see https://github.com/sbt/sbt/issues/562).
55-
stty -icanon min 1 -echo > /dev/null 2>&1
56-
export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Djline.terminal=unix"
57-
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
58-
stty icanon echo > /dev/null 2>&1
59-
else
60-
export SPARK_SUBMIT_OPTS
61-
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
62-
fi
49+
if $cygwin; then
50+
# Workaround for issue involving JLine and Cygwin
51+
# (see http://sourceforge.net/p/jline/bugs/40/).
52+
# If you're using the Mintty terminal emulator in Cygwin, may need to set the
53+
# "Backspace sends ^H" setting in "Keys" section of the Mintty options
54+
# (see https://github.com/sbt/sbt/issues/562).
55+
stty -icanon min 1 -echo > /dev/null 2>&1
56+
export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Djline.terminal=unix"
57+
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
58+
stty icanon echo > /dev/null 2>&1
59+
else
60+
export SPARK_SUBMIT_OPTS
61+
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
62+
fi
6363
}
6464

6565
# Copy restore-TTY-on-exit functions from Scala script so spark-shell exits properly even in

bin/spark-sql

Lines changed: 13 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
set -o posix
2525

2626
CLASS="org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver"
27+
CLASS_NOT_FOUND_EXIT_STATUS=1
2728

2829
# Figure out where Spark is installed
2930
FWDIR="$(cd `dirname $0`/..; pwd)"
@@ -43,52 +44,22 @@ function usage {
4344
$FWDIR/bin/spark-class $CLASS --help 2>&1 | grep -v "$pattern" 1>&2
4445
}
4546

46-
function ensure_arg_number {
47-
arg_number=$1
48-
at_least=$2
49-
50-
if [[ $arg_number -lt $at_least ]]; then
51-
usage
52-
exit 1
53-
fi
54-
}
55-
56-
if [[ "$@" = --help ]] || [[ "$@" = -h ]]; then
47+
if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
5748
usage
5849
exit 0
5950
fi
6051

61-
CLI_ARGS=()
62-
SUBMISSION_ARGS=()
63-
64-
while (($#)); do
65-
case $1 in
66-
-d | --define | --database | -f | -h | --hiveconf | --hivevar | -i | -p)
67-
ensure_arg_number $# 2
68-
CLI_ARGS+=("$1"); shift
69-
CLI_ARGS+=("$1"); shift
70-
;;
52+
source $FWDIR/bin/utils.sh
53+
SUBMIT_USAGE_FUNCTION=usage
54+
gatherSparkSubmitOpts "$@"
7155

72-
-e)
73-
ensure_arg_number $# 2
74-
CLI_ARGS+=("$1"); shift
75-
CLI_ARGS+=("$1"); shift
76-
;;
56+
"$FWDIR"/bin/spark-submit --class $CLASS "${SUBMISSION_OPTS[@]}" spark-internal "${APPLICATION_OPTS[@]}"
57+
exit_status=$?
7758

78-
-s | --silent)
79-
CLI_ARGS+=("$1"); shift
80-
;;
81-
82-
-v | --verbose)
83-
# Both SparkSubmit and SparkSQLCLIDriver recognizes -v | --verbose
84-
CLI_ARGS+=("$1")
85-
SUBMISSION_ARGS+=("$1"); shift
86-
;;
87-
88-
*)
89-
SUBMISSION_ARGS+=("$1"); shift
90-
;;
91-
esac
92-
done
59+
if [[ exit_status -eq CLASS_NOT_FOUND_EXIT_STATUS ]]; then
60+
echo
61+
echo "Failed to load Spark SQL CLI main class $CLASS."
62+
echo "You need to build Spark with -Phive."
63+
fi
9364

94-
exec "$FWDIR"/bin/spark-submit --class $CLASS "${SUBMISSION_ARGS[@]}" spark-internal "${CLI_ARGS[@]}"
65+
exit $exit_status

0 commit comments

Comments
 (0)