Skip to content

Commit 9dbdabe

Browse files
committed
merge from master
2 parents 719d009 + 8db0f7e commit 9dbdabe

File tree

379 files changed

+9412
-4583
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

379 files changed

+9412
-4583
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ conf/java-opts
1818
conf/spark-env.sh
1919
conf/streaming-env.sh
2020
conf/log4j.properties
21+
conf/spark-defaults.conf
2122
docs/_site
2223
docs/api
2324
target/

.rat-excludes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ RELEASE
1111
control
1212
docs
1313
fairscheduler.xml.template
14+
spark-defaults.conf.template
1415
log4j.properties
1516
log4j.properties.template
1617
metrics.properties.template

assembly/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333

3434
<properties>
3535
<spark.jar.dir>scala-${scala.binary.version}</spark.jar.dir>
36-
<spark.jar.basename>${project.artifactId}-${project.version}-hadoop${hadoop.version}.jar</spark.jar.basename>
36+
<spark.jar.basename>spark-assembly-${project.version}-hadoop${hadoop.version}.jar</spark.jar.basename>
3737
<spark.jar>${project.build.directory}/${spark.jar.dir}/${spark.jar.basename}</spark.jar>
3838
<deb.pkg.name>spark</deb.pkg.name>
3939
<deb.install.path>/usr/share/spark</deb.install.path>

bagel/pom.xml

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -31,20 +31,6 @@
3131
<name>Spark Project Bagel</name>
3232
<url>http://spark.apache.org/</url>
3333

34-
<profiles>
35-
<profile>
36-
<!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
37-
a Hadoop 0.23.X issue -->
38-
<id>yarn-alpha</id>
39-
<dependencies>
40-
<dependency>
41-
<groupId>org.apache.avro</groupId>
42-
<artifactId>avro</artifactId>
43-
</dependency>
44-
</dependencies>
45-
</profile>
46-
</profiles>
47-
4834
<dependencies>
4935
<dependency>
5036
<groupId>org.apache.spark</groupId>

bin/compute-classpath.cmd

Lines changed: 88 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,69 +1,88 @@
1-
@echo off
2-
3-
rem
4-
rem Licensed to the Apache Software Foundation (ASF) under one or more
5-
rem contributor license agreements. See the NOTICE file distributed with
6-
rem this work for additional information regarding copyright ownership.
7-
rem The ASF licenses this file to You under the Apache License, Version 2.0
8-
rem (the "License"); you may not use this file except in compliance with
9-
rem the License. You may obtain a copy of the License at
10-
rem
11-
rem http://www.apache.org/licenses/LICENSE-2.0
12-
rem
13-
rem Unless required by applicable law or agreed to in writing, software
14-
rem distributed under the License is distributed on an "AS IS" BASIS,
15-
rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16-
rem See the License for the specific language governing permissions and
17-
rem limitations under the License.
18-
rem
19-
20-
rem This script computes Spark's classpath and prints it to stdout; it's used by both the "run"
21-
rem script and the ExecutorRunner in standalone cluster mode.
22-
23-
set SCALA_VERSION=2.10
24-
25-
rem Figure out where the Spark framework is installed
26-
set FWDIR=%~dp0..\
27-
28-
rem Load environment variables from conf\spark-env.cmd, if it exists
29-
if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
30-
31-
rem Build up classpath
32-
set CLASSPATH=%FWDIR%conf
33-
if exist "%FWDIR%RELEASE" (
34-
for %%d in ("%FWDIR%jars\spark-assembly*.jar") do (
35-
set ASSEMBLY_JAR=%%d
36-
)
37-
) else (
38-
for %%d in ("%FWDIR%assembly\target\scala-%SCALA_VERSION%\spark-assembly*hadoop*.jar") do (
39-
set ASSEMBLY_JAR=%%d
40-
)
41-
)
42-
set CLASSPATH=%CLASSPATH%;%ASSEMBLY_JAR%
43-
44-
if "x%SPARK_TESTING%"=="x1" (
45-
rem Add test clases to path
46-
set CLASSPATH=%CLASSPATH%;%FWDIR%core\target\scala-%SCALA_VERSION%\test-classes
47-
set CLASSPATH=%CLASSPATH%;%FWDIR%repl\target\scala-%SCALA_VERSION%\test-classes
48-
set CLASSPATH=%CLASSPATH%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\test-classes
49-
set CLASSPATH=%CLASSPATH%;%FWDIR%bagel\target\scala-%SCALA_VERSION%\test-classes
50-
set CLASSPATH=%CLASSPATH%;%FWDIR%streaming\target\scala-%SCALA_VERSION%\test-classes
51-
)
52-
53-
rem Add hadoop conf dir - else FileSystem.*, etc fail
54-
rem Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts
55-
rem the configurtion files.
56-
if "x%HADOOP_CONF_DIR%"=="x" goto no_hadoop_conf_dir
57-
set CLASSPATH=%CLASSPATH%;%HADOOP_CONF_DIR%
58-
:no_hadoop_conf_dir
59-
60-
if "x%YARN_CONF_DIR%"=="x" goto no_yarn_conf_dir
61-
set CLASSPATH=%CLASSPATH%;%YARN_CONF_DIR%
62-
:no_yarn_conf_dir
63-
64-
rem A bit of a hack to allow calling this script within run2.cmd without seeing output
65-
if "%DONT_PRINT_CLASSPATH%"=="1" goto exit
66-
67-
echo %CLASSPATH%
68-
69-
:exit
1+
@echo off
2+
3+
rem
4+
rem Licensed to the Apache Software Foundation (ASF) under one or more
5+
rem contributor license agreements. See the NOTICE file distributed with
6+
rem this work for additional information regarding copyright ownership.
7+
rem The ASF licenses this file to You under the Apache License, Version 2.0
8+
rem (the "License"); you may not use this file except in compliance with
9+
rem the License. You may obtain a copy of the License at
10+
rem
11+
rem http://www.apache.org/licenses/LICENSE-2.0
12+
rem
13+
rem Unless required by applicable law or agreed to in writing, software
14+
rem distributed under the License is distributed on an "AS IS" BASIS,
15+
rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
rem See the License for the specific language governing permissions and
17+
rem limitations under the License.
18+
rem
19+
20+
rem This script computes Spark's classpath and prints it to stdout; it's used by both the "run"
21+
rem script and the ExecutorRunner in standalone cluster mode.
22+
23+
set SCALA_VERSION=2.10
24+
25+
rem Figure out where the Spark framework is installed
26+
set FWDIR=%~dp0..\
27+
28+
rem Load environment variables from conf\spark-env.cmd, if it exists
29+
if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
30+
31+
rem Build up classpath
32+
set CLASSPATH=%FWDIR%conf
33+
if exist "%FWDIR%RELEASE" (
34+
for %%d in ("%FWDIR%jars\spark-assembly*.jar") do (
35+
set ASSEMBLY_JAR=%%d
36+
)
37+
) else (
38+
for %%d in ("%FWDIR%assembly\target\scala-%SCALA_VERSION%\spark-assembly*hadoop*.jar") do (
39+
set ASSEMBLY_JAR=%%d
40+
)
41+
)
42+
43+
set CLASSPATH=%CLASSPATH%;%ASSEMBLY_JAR%
44+
45+
set SPARK_CLASSES=%FWDIR%core\target\scala-%SCALA_VERSION%\classes
46+
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%repl\target\scala-%SCALA_VERSION%\classes
47+
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\classes
48+
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%bagel\target\scala-%SCALA_VERSION%\classes
49+
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%graphx\target\scala-%SCALA_VERSION%\classes
50+
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%streaming\target\scala-%SCALA_VERSION%\classes
51+
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%tools\target\scala-%SCALA_VERSION%\classes
52+
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%sql\catalyst\target\scala-%SCALA_VERSION%\classes
53+
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%sql\core\target\scala-%SCALA_VERSION%\classes
54+
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%sql\hive\target\scala-%SCALA_VERSION%\classes
55+
56+
set SPARK_TEST_CLASSES=%FWDIR%core\target\scala-%SCALA_VERSION%\test-classes
57+
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%repl\target\scala-%SCALA_VERSION%\test-classes
58+
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\test-classes
59+
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%bagel\target\scala-%SCALA_VERSION%\test-classes
60+
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%graphx\target\scala-%SCALA_VERSION%\test-classes
61+
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%streaming\target\scala-%SCALA_VERSION%\test-classes
62+
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%sql\catalyst\target\scala-%SCALA_VERSION%\test-classes
63+
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%sql\core\target\scala-%SCALA_VERSION%\test-classes
64+
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%sql\hive\target\scala-%SCALA_VERSION%\test-classes
65+
66+
if "x%SPARK_TESTING%"=="x1" (
67+
rem Add test clases to path - note, add SPARK_CLASSES and SPARK_TEST_CLASSES before CLASSPATH
68+
rem so that local compilation takes precedence over assembled jar
69+
set CLASSPATH=%SPARK_CLASSES%;%SPARK_TEST_CLASSES%;%CLASSPATH%
70+
)
71+
72+
rem Add hadoop conf dir - else FileSystem.*, etc fail
73+
rem Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts
74+
rem the configurtion files.
75+
if "x%HADOOP_CONF_DIR%"=="x" goto no_hadoop_conf_dir
76+
set CLASSPATH=%CLASSPATH%;%HADOOP_CONF_DIR%
77+
:no_hadoop_conf_dir
78+
79+
if "x%YARN_CONF_DIR%"=="x" goto no_yarn_conf_dir
80+
set CLASSPATH=%CLASSPATH%;%YARN_CONF_DIR%
81+
:no_yarn_conf_dir
82+
83+
rem A bit of a hack to allow calling this script within run2.cmd without seeing output
84+
if "%DONT_PRINT_CLASSPATH%"=="1" goto exit
85+
86+
echo %CLASSPATH%
87+
88+
:exit

bin/compute-classpath.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ FWDIR="$(cd `dirname $0`/..; pwd)"
2828
. $FWDIR/bin/load-spark-env.sh
2929

3030
# Build up classpath
31-
CLASSPATH="$SPARK_CLASSPATH:$FWDIR/conf"
31+
CLASSPATH="$SPARK_CLASSPATH:$SPARK_SUBMIT_CLASSPATH:$FWDIR/conf"
3232

3333
ASSEMBLY_DIR="$FWDIR/assembly/target/scala-$SCALA_VERSION"
3434

@@ -50,9 +50,9 @@ if [ -f "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar ]; then
5050
else
5151
# Else use spark-assembly jar from either RELEASE or assembly directory
5252
if [ -f "$FWDIR/RELEASE" ]; then
53-
ASSEMBLY_JAR=`ls "$FWDIR"/jars/spark*-assembly*.jar`
53+
ASSEMBLY_JAR=`ls "$FWDIR"/lib/spark-assembly*hadoop*.jar`
5454
else
55-
ASSEMBLY_JAR=`ls "$ASSEMBLY_DIR"/spark*-assembly*hadoop*.jar`
55+
ASSEMBLY_JAR=`ls "$ASSEMBLY_DIR"/spark-assembly*hadoop*.jar`
5656
fi
5757
CLASSPATH="$CLASSPATH:$ASSEMBLY_JAR"
5858
fi

bin/run-example

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,15 @@ fi
4040
# Figure out the JAR file that our examples were packaged into. This includes a bit of a hack
4141
# to avoid the -sources and -doc packages that are built by publish-local.
4242
EXAMPLES_DIR="$FWDIR"/examples
43-
SPARK_EXAMPLES_JAR=""
44-
if [ -e "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/*assembly*[0-9Tg].jar ]; then
45-
export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/*assembly*[0-9Tg].jar`
43+
44+
if [ -f "$FWDIR/RELEASE" ]; then
45+
export SPARK_EXAMPLES_JAR=`ls "$FWDIR"/lib/spark-examples-*hadoop*.jar`
46+
elif [ -e "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar ]; then
47+
export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar`
4648
fi
49+
4750
if [[ -z $SPARK_EXAMPLES_JAR ]]; then
48-
echo "Failed to find Spark examples assembly in $FWDIR/examples/target" >&2
51+
echo "Failed to find Spark examples assembly in $FWDIR/lib or $FWDIR/examples/target" >&2
4952
echo "You need to build Spark with sbt/sbt assembly before running this program" >&2
5053
exit 1
5154
fi
@@ -75,7 +78,6 @@ fi
7578

7679
# Set JAVA_OPTS to be able to load native libraries and to set heap size
7780
JAVA_OPTS="$SPARK_JAVA_OPTS"
78-
JAVA_OPTS="$JAVA_OPTS -Djava.library.path=$SPARK_LIBRARY_PATH"
7981
# Load extra JAVA_OPTS from conf/java-opts, if it exists
8082
if [ -e "$FWDIR/conf/java-opts" ] ; then
8183
JAVA_OPTS="$JAVA_OPTS `cat $FWDIR/conf/java-opts`"

bin/spark-class

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,11 +73,13 @@ case "$1" in
7373
OUR_JAVA_MEM=${SPARK_EXECUTOR_MEMORY:-$DEFAULT_MEM}
7474
;;
7575

76-
# All drivers use SPARK_JAVA_OPTS + SPARK_DRIVER_MEMORY. The repl also uses SPARK_REPL_OPTS.
77-
'org.apache.spark.repl.Main')
78-
OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_REPL_OPTS"
76+
# Spark submit uses SPARK_SUBMIT_OPTS and SPARK_JAVA_OPTS
77+
'org.apache.spark.deploy.SparkSubmit')
78+
OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_SUBMIT_OPTS \
79+
-Djava.library.path=$SPARK_SUBMIT_LIBRARY_PATH"
7980
OUR_JAVA_MEM=${SPARK_DRIVER_MEMORY:-$DEFAULT_MEM}
8081
;;
82+
8183
*)
8284
OUR_JAVA_OPTS="$SPARK_JAVA_OPTS"
8385
OUR_JAVA_MEM=${SPARK_DRIVER_MEMORY:-$DEFAULT_MEM}
@@ -98,7 +100,6 @@ fi
98100

99101
# Set JAVA_OPTS to be able to load native libraries and to set heap size
100102
JAVA_OPTS="$OUR_JAVA_OPTS"
101-
JAVA_OPTS="$JAVA_OPTS -Djava.library.path=$SPARK_LIBRARY_PATH"
102103
JAVA_OPTS="$JAVA_OPTS -Xms$OUR_JAVA_MEM -Xmx$OUR_JAVA_MEM"
103104
# Load extra JAVA_OPTS from conf/java-opts, if it exists
104105
if [ -e "$FWDIR/conf/java-opts" ] ; then

0 commit comments

Comments
 (0)