Skip to content

Commit da4b221

Browse files
committed
Merge branch 'master' of https://github.com/tgravescs/spark into SPARK1198
2 parents 61be271 + 47ebea5 commit da4b221

File tree

11,777 files changed

+264788
-4258
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

11,777 files changed

+264788
-4258
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
sbt/*.jar
88
.settings
99
.cache
10+
.mima-excludes
1011
/build/
1112
work/
1213
out/
@@ -45,3 +46,5 @@ dist/
4546
spark-*-bin.tar.gz
4647
unit-tests.log
4748
/lib/
49+
rat-results.txt
50+
scalastyle.txt

.rat-excludes

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
target
2+
.gitignore
3+
.project
4+
.classpath
5+
.mima-excludes
6+
.rat-excludes
7+
.*md
8+
derby.log
9+
TAGS
10+
RELEASE
11+
control
12+
docs
13+
fairscheduler.xml.template
14+
log4j.properties
15+
log4j.properties.template
16+
metrics.properties.template
17+
slaves
18+
spark-env.sh
19+
spark-env.sh.template
20+
log4j-defaults.properties
21+
sorttable.js
22+
.*txt
23+
.*data
24+
.*log
25+
cloudpickle.py
26+
join.py
27+
SparkExprTyper.scala
28+
SparkILoop.scala
29+
SparkILoopInit.scala
30+
SparkIMain.scala
31+
SparkImports.scala
32+
SparkJLineCompletion.scala
33+
SparkJLineReader.scala
34+
SparkMemberHandlers.scala
35+
sbt
36+
sbt-launch-lib.bash
37+
plugins.sbt
38+
work
39+
.*\.q
40+
golden
41+
test.out/*

.travis.yml

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one or more
2+
# contributor license agreements. See the NOTICE file distributed with
3+
# this work for additional information regarding copyright ownership.
4+
# The ASF licenses this file to You under the Apache License, Version 2.0
5+
# (the "License"); you may not use this file except in compliance with
6+
# the License. You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
language: scala
17+
scala:
18+
- "2.10.3"
19+
jdk:
20+
- oraclejdk7
21+
env:
22+
matrix:
23+
- TEST="scalastyle assembly/assembly"
24+
- TEST="catalyst/test sql/test streaming/test mllib/test graphx/test bagel/test"
25+
- TEST=hive/test
26+
cache:
27+
directories:
28+
- $HOME/.m2
29+
- $HOME/.ivy2
30+
- $HOME/.sbt
31+
script:
32+
- "sbt ++$TRAVIS_SCALA_VERSION $TEST"

NOTICE

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
11
Apache Spark
2-
Copyright 2013 The Apache Software Foundation.
2+
Copyright 2014 The Apache Software Foundation.
33

44
This product includes software developed at
55
The Apache Software Foundation (http://www.apache.org/).
6+
7+
In addition, this product includes:
8+
9+
- JUnit (http://www.junit.org) is a testing framework for Java. We included it
10+
under the terms of the Eclipse Public License v1.0.
11+
12+
- JTransforms (https://sites.google.com/site/piotrwendykier/software/jtransforms)
13+
provides fast transforms in Java. It is tri-licensed, and we included it under
14+
the terms of the Mozilla Public License v1.1.

assembly/pom.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,11 @@
7979
<artifactId>spark-graphx_${scala.binary.version}</artifactId>
8080
<version>${project.version}</version>
8181
</dependency>
82+
<dependency>
83+
<groupId>org.apache.spark</groupId>
84+
<artifactId>spark-sql_${scala.binary.version}</artifactId>
85+
<version>${project.version}</version>
86+
</dependency>
8287
<dependency>
8388
<groupId>net.sf.py4j</groupId>
8489
<artifactId>py4j</artifactId>

bin/compute-classpath.sh

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,31 +25,48 @@ SCALA_VERSION=2.10
2525
# Figure out where Spark is installed
2626
FWDIR="$(cd `dirname $0`/..; pwd)"
2727

28-
# Load environment variables from conf/spark-env.sh, if it exists
29-
if [ -e "$FWDIR/conf/spark-env.sh" ] ; then
30-
. $FWDIR/conf/spark-env.sh
31-
fi
28+
. $FWDIR/bin/load-spark-env.sh
3229

3330
# Build up classpath
3431
CLASSPATH="$SPARK_CLASSPATH:$FWDIR/conf"
3532

33+
# Support for interacting with Hive. Since hive pulls in a lot of dependencies that might break
34+
# existing Spark applications, it is not included in the standard spark assembly. Instead, we only
35+
# include it in the classpath if the user has explicitly requested it by running "sbt hive/assembly"
36+
# Hopefully we will find a way to avoid uber-jars entirely and deploy only the needed packages in
37+
# the future.
38+
if [ -f "$FWDIR"/sql/hive/target/scala-$SCALA_VERSION/spark-hive-assembly-*.jar ]; then
39+
40+
# Datanucleus jars do not work if only included in the uberjar as plugin.xml metadata is lost.
41+
DATANUCLEUSJARS=$(JARS=("$FWDIR/lib_managed/jars"/datanucleus-*.jar); IFS=:; echo "${JARS[*]}")
42+
CLASSPATH=$CLASSPATH:$DATANUCLEUSJARS
43+
44+
ASSEMBLY_DIR="$FWDIR/sql/hive/target/scala-$SCALA_VERSION/"
45+
else
46+
ASSEMBLY_DIR="$FWDIR/assembly/target/scala-$SCALA_VERSION/"
47+
fi
48+
3649
# First check if we have a dependencies jar. If so, include binary classes with the deps jar
37-
if [ -f "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*-deps.jar ]; then
50+
if [ -f "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar ]; then
3851
CLASSPATH="$CLASSPATH:$FWDIR/core/target/scala-$SCALA_VERSION/classes"
3952
CLASSPATH="$CLASSPATH:$FWDIR/repl/target/scala-$SCALA_VERSION/classes"
4053
CLASSPATH="$CLASSPATH:$FWDIR/mllib/target/scala-$SCALA_VERSION/classes"
4154
CLASSPATH="$CLASSPATH:$FWDIR/bagel/target/scala-$SCALA_VERSION/classes"
4255
CLASSPATH="$CLASSPATH:$FWDIR/graphx/target/scala-$SCALA_VERSION/classes"
4356
CLASSPATH="$CLASSPATH:$FWDIR/streaming/target/scala-$SCALA_VERSION/classes"
57+
CLASSPATH="$CLASSPATH:$FWDIR/tools/target/scala-$SCALA_VERSION/classes"
58+
CLASSPATH="$CLASSPATH:$FWDIR/sql/catalyst/target/scala-$SCALA_VERSION/classes"
59+
CLASSPATH="$CLASSPATH:$FWDIR/sql/core/target/scala-$SCALA_VERSION/classes"
60+
CLASSPATH="$CLASSPATH:$FWDIR/sql/hive/target/scala-$SCALA_VERSION/classes"
4461

45-
DEPS_ASSEMBLY_JAR=`ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*-deps.jar`
62+
DEPS_ASSEMBLY_JAR=`ls "$ASSEMBLY_DIR"/spark*-assembly*hadoop*-deps.jar`
4663
CLASSPATH="$CLASSPATH:$DEPS_ASSEMBLY_JAR"
4764
else
4865
# Else use spark-assembly jar from either RELEASE or assembly directory
4966
if [ -f "$FWDIR/RELEASE" ]; then
50-
ASSEMBLY_JAR=`ls "$FWDIR"/jars/spark-assembly*.jar`
67+
ASSEMBLY_JAR=`ls "$FWDIR"/jars/spark*-assembly*.jar`
5168
else
52-
ASSEMBLY_JAR=`ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*.jar`
69+
ASSEMBLY_JAR=`ls "$ASSEMBLY_DIR"/spark*-assembly*hadoop*.jar`
5370
fi
5471
CLASSPATH="$CLASSPATH:$ASSEMBLY_JAR"
5572
fi
@@ -62,6 +79,9 @@ if [[ $SPARK_TESTING == 1 ]]; then
6279
CLASSPATH="$CLASSPATH:$FWDIR/bagel/target/scala-$SCALA_VERSION/test-classes"
6380
CLASSPATH="$CLASSPATH:$FWDIR/graphx/target/scala-$SCALA_VERSION/test-classes"
6481
CLASSPATH="$CLASSPATH:$FWDIR/streaming/target/scala-$SCALA_VERSION/test-classes"
82+
CLASSPATH="$CLASSPATH:$FWDIR/sql/catalyst/target/scala-$SCALA_VERSION/test-classes"
83+
CLASSPATH="$CLASSPATH:$FWDIR/sql/core/target/scala-$SCALA_VERSION/test-classes"
84+
CLASSPATH="$CLASSPATH:$FWDIR/sql/hive/target/scala-$SCALA_VERSION/test-classes"
6585
fi
6686

6787
# Add hadoop conf dir if given -- otherwise FileSystem.*, etc fail !

bin/load-spark-env.sh

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#!/usr/bin/env bash
2+
3+
#
4+
# Licensed to the Apache Software Foundation (ASF) under one or more
5+
# contributor license agreements. See the NOTICE file distributed with
6+
# this work for additional information regarding copyright ownership.
7+
# The ASF licenses this file to You under the Apache License, Version 2.0
8+
# (the "License"); you may not use this file except in compliance with
9+
# the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing, software
14+
# distributed under the License is distributed on an "AS IS" BASIS,
15+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
# See the License for the specific language governing permissions and
17+
# limitations under the License.
18+
#
19+
20+
# This script loads spark-env.sh if it exists, and ensures it is only loaded once.
21+
# spark-env.sh is loaded from SPARK_CONF_DIR if set, or within the current directory's
22+
# conf/ subdirectory.
23+
24+
if [ -z "$SPARK_ENV_LOADED" ]; then
25+
export SPARK_ENV_LOADED=1
26+
27+
# Returns the parent of the directory this script lives in.
28+
parent_dir="$(cd `dirname $0`/..; pwd)"
29+
30+
use_conf_dir=${SPARK_CONF_DIR:-"$parent_dir/conf"}
31+
32+
if [ -f "${use_conf_dir}/spark-env.sh" ]; then
33+
. "${use_conf_dir}/spark-env.sh"
34+
fi
35+
fi

bin/pyspark

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,7 @@ if [ ! -f "$FWDIR/RELEASE" ]; then
3636
fi
3737
fi
3838

39-
# Load environment variables from conf/spark-env.sh, if it exists
40-
if [ -e "$FWDIR/conf/spark-env.sh" ] ; then
41-
. $FWDIR/conf/spark-env.sh
42-
fi
39+
. $FWDIR/bin/load-spark-env.sh
4340

4441
# Figure out which Python executable to use
4542
if [ -z "$PYSPARK_PYTHON" ] ; then

bin/run-example

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,7 @@ FWDIR="$(cd `dirname $0`/..; pwd)"
3030
# Export this as SPARK_HOME
3131
export SPARK_HOME="$FWDIR"
3232

33-
# Load environment variables from conf/spark-env.sh, if it exists
34-
if [ -e "$FWDIR/conf/spark-env.sh" ] ; then
35-
. $FWDIR/conf/spark-env.sh
36-
fi
33+
. $FWDIR/bin/load-spark-env.sh
3734

3835
if [ -z "$1" ]; then
3936
echo "Usage: run-example <example-class> [<args>]" >&2

bin/spark-class

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,7 @@ FWDIR="$(cd `dirname $0`/..; pwd)"
3030
# Export this as SPARK_HOME
3131
export SPARK_HOME="$FWDIR"
3232

33-
# Load environment variables from conf/spark-env.sh, if it exists
34-
if [ -e "$FWDIR/conf/spark-env.sh" ] ; then
35-
. $FWDIR/conf/spark-env.sh
36-
fi
33+
. $FWDIR/bin/load-spark-env.sh
3734

3835
if [ -z "$1" ]; then
3936
echo "Usage: spark-class <class> [<args>]" >&2
@@ -137,8 +134,7 @@ fi
137134

138135
# Compute classpath using external script
139136
CLASSPATH=`$FWDIR/bin/compute-classpath.sh`
140-
141-
if [ "$1" == "org.apache.spark.tools.JavaAPICompletenessChecker" ]; then
137+
if [[ "$1" =~ org.apache.spark.tools.* ]]; then
142138
CLASSPATH="$CLASSPATH:$SPARK_TOOLS_JAR"
143139
fi
144140

0 commit comments

Comments
 (0)