pdeyhim
diff --git a/‎.gitignore
Lines changed: 3 additions & 0 deletions b/‎.gitignore
Lines changed: 3 additions & 0 deletions
diff --git a/‎.rat-excludes
Lines changed: 41 additions & 0 deletions b/‎.rat-excludes
Lines changed: 41 additions & 0 deletions
diff --git a/‎.travis.yml
Lines changed: 32 additions & 0 deletions b/‎.travis.yml
Lines changed: 32 additions & 0 deletions
diff --git a/‎NOTICE
Lines changed: 10 additions & 1 deletion b/‎NOTICE
Lines changed: 10 additions & 1 deletion
diff --git a/‎assembly/pom.xml
Lines changed: 5 additions & 0 deletions b/‎assembly/pom.xml
Lines changed: 5 additions & 0 deletions
diff --git a/‎bin/compute-classpath.sh
Lines changed: 28 additions & 8 deletions b/‎bin/compute-classpath.sh
Lines changed: 28 additions & 8 deletions
diff --git a/‎bin/load-spark-env.sh
Lines changed: 35 additions & 0 deletions b/‎bin/load-spark-env.sh
Lines changed: 35 additions & 0 deletions
diff --git a/‎bin/pyspark
Lines changed: 1 addition & 4 deletions b/‎bin/pyspark
Lines changed: 1 addition & 4 deletions
diff --git a/‎bin/run-example
Lines changed: 1 addition & 4 deletions b/‎bin/run-example
Lines changed: 1 addition & 4 deletions
diff --git a/‎bin/spark-class
Lines changed: 2 additions & 6 deletions b/‎bin/spark-class
Lines changed: 2 additions & 6 deletions
@@ -7,6 +7,7 @@
 sbt/*.jar
 .settings
 .cache
+.mima-excludes
 /build/
 work/
 out/
@@ -45,3 +46,5 @@ dist/
 spark-*-bin.tar.gz
 unit-tests.log
 /lib/
+rat-results.txt
+scalastyle.txt
@@ -0,0 +1,41 @@
+target
+.gitignore
+.project
+.classpath
+.mima-excludes
+.rat-excludes
+.*md
+derby.log
+TAGS
+RELEASE
+control
+docs
+fairscheduler.xml.template
+log4j.properties
+log4j.properties.template
+metrics.properties.template
+slaves
+spark-env.sh
+spark-env.sh.template
+log4j-defaults.properties
+sorttable.js
+.*txt
+.*data
+.*log
+cloudpickle.py
+join.py
+SparkExprTyper.scala
+SparkILoop.scala
+SparkILoopInit.scala
+SparkIMain.scala
+SparkImports.scala
+SparkJLineCompletion.scala
+SparkJLineReader.scala
+SparkMemberHandlers.scala
+sbt
+sbt-launch-lib.bash
+plugins.sbt
+work
+.*\.q
+golden
+test.out/*
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+ 
+ language: scala
+ scala:
+   - "2.10.3"
+ jdk:
+   - oraclejdk7
+ env:
+  matrix:
+   - TEST="scalastyle assembly/assembly"
+   - TEST="catalyst/test sql/test streaming/test mllib/test graphx/test bagel/test"
+   - TEST=hive/test
+ cache:
+   directories:
+     - $HOME/.m2
+     - $HOME/.ivy2
+     - $HOME/.sbt
+ script:
+   - "sbt ++$TRAVIS_SCALA_VERSION $TEST"
@@ -1,5 +1,14 @@
 Apache Spark
-Copyright 2013 The Apache Software Foundation.
+Copyright 2014 The Apache Software Foundation.
 
 This product includes software developed at
 The Apache Software Foundation (http://www.apache.org/).
+
+In addition, this product includes:
+
+- JUnit (http://www.junit.org) is a testing framework for Java. We included it
+  under the terms of the Eclipse Public License v1.0.
+
+- JTransforms (https://sites.google.com/site/piotrwendykier/software/jtransforms)
+  provides fast transforms in Java. It is tri-licensed, and we included it under 
+  the terms of the Mozilla Public License v1.1.
@@ -79,6 +79,11 @@
       <artifactId>spark-graphx_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
     <dependency>
       <groupId>net.sf.py4j</groupId>
       <artifactId>py4j</artifactId>
 
@@ -25,31 +25,48 @@ SCALA_VERSION=2.10
 # Figure out where Spark is installed
 FWDIR="$(cd `dirname $0`/..; pwd)"
 
-# Load environment variables from conf/spark-env.sh, if it exists
-if [ -e "$FWDIR/conf/spark-env.sh" ] ; then
-  . $FWDIR/conf/spark-env.sh
-fi
+. $FWDIR/bin/load-spark-env.sh
 
 # Build up classpath
 CLASSPATH="$SPARK_CLASSPATH:$FWDIR/conf"
 
+# Support for interacting with Hive.  Since hive pulls in a lot of dependencies that might break
+# existing Spark applications, it is not included in the standard spark assembly.  Instead, we only
+# include it in the classpath if the user has explicitly requested it by running "sbt hive/assembly"
+# Hopefully we will find a way to avoid uber-jars entirely and deploy only the needed packages in
+# the future.
+if [ -f "$FWDIR"/sql/hive/target/scala-$SCALA_VERSION/spark-hive-assembly-*.jar ]; then
+
+  # Datanucleus jars do not work if only included in the uberjar as plugin.xml metadata is lost.
+  DATANUCLEUSJARS=$(JARS=("$FWDIR/lib_managed/jars"/datanucleus-*.jar); IFS=:; echo "${JARS[*]}")
+  CLASSPATH=$CLASSPATH:$DATANUCLEUSJARS
+
+  ASSEMBLY_DIR="$FWDIR/sql/hive/target/scala-$SCALA_VERSION/"
+else
+  ASSEMBLY_DIR="$FWDIR/assembly/target/scala-$SCALA_VERSION/"
+fi
+
 # First check if we have a dependencies jar. If so, include binary classes with the deps jar
-if [ -f "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*-deps.jar ]; then
+if [ -f "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar ]; then
   CLASSPATH="$CLASSPATH:$FWDIR/core/target/scala-$SCALA_VERSION/classes"
   CLASSPATH="$CLASSPATH:$FWDIR/repl/target/scala-$SCALA_VERSION/classes"
   CLASSPATH="$CLASSPATH:$FWDIR/mllib/target/scala-$SCALA_VERSION/classes"
   CLASSPATH="$CLASSPATH:$FWDIR/bagel/target/scala-$SCALA_VERSION/classes"
   CLASSPATH="$CLASSPATH:$FWDIR/graphx/target/scala-$SCALA_VERSION/classes"
   CLASSPATH="$CLASSPATH:$FWDIR/streaming/target/scala-$SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/tools/target/scala-$SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/sql/catalyst/target/scala-$SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/sql/core/target/scala-$SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/sql/hive/target/scala-$SCALA_VERSION/classes"
 
-  DEPS_ASSEMBLY_JAR=`ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*-deps.jar`
+  DEPS_ASSEMBLY_JAR=`ls "$ASSEMBLY_DIR"/spark*-assembly*hadoop*-deps.jar`
   CLASSPATH="$CLASSPATH:$DEPS_ASSEMBLY_JAR"
 else
   # Else use spark-assembly jar from either RELEASE or assembly directory
   if [ -f "$FWDIR/RELEASE" ]; then
-    ASSEMBLY_JAR=`ls "$FWDIR"/jars/spark-assembly*.jar`
+    ASSEMBLY_JAR=`ls "$FWDIR"/jars/spark*-assembly*.jar`
   else
-    ASSEMBLY_JAR=`ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*.jar`
+    ASSEMBLY_JAR=`ls "$ASSEMBLY_DIR"/spark*-assembly*hadoop*.jar`
   fi
   CLASSPATH="$CLASSPATH:$ASSEMBLY_JAR"
 fi
@@ -62,6 +79,9 @@ if [[ $SPARK_TESTING == 1 ]]; then
   CLASSPATH="$CLASSPATH:$FWDIR/bagel/target/scala-$SCALA_VERSION/test-classes"
   CLASSPATH="$CLASSPATH:$FWDIR/graphx/target/scala-$SCALA_VERSION/test-classes"
   CLASSPATH="$CLASSPATH:$FWDIR/streaming/target/scala-$SCALA_VERSION/test-classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/sql/catalyst/target/scala-$SCALA_VERSION/test-classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/sql/core/target/scala-$SCALA_VERSION/test-classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/sql/hive/target/scala-$SCALA_VERSION/test-classes"
 fi
 
 # Add hadoop conf dir if given -- otherwise FileSystem.*, etc fail !
 
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This script loads spark-env.sh if it exists, and ensures it is only loaded once.
+# spark-env.sh is loaded from SPARK_CONF_DIR if set, or within the current directory's
+# conf/ subdirectory.
+
+if [ -z "$SPARK_ENV_LOADED" ]; then
+  export SPARK_ENV_LOADED=1
+
+  # Returns the parent of the directory this script lives in.
+  parent_dir="$(cd `dirname $0`/..; pwd)"
+
+  use_conf_dir=${SPARK_CONF_DIR:-"$parent_dir/conf"}
+
+  if [ -f "${use_conf_dir}/spark-env.sh" ]; then
+    . "${use_conf_dir}/spark-env.sh"
+  fi
+fi
@@ -36,10 +36,7 @@ if [ ! -f "$FWDIR/RELEASE" ]; then
   fi
 fi
 
-# Load environment variables from conf/spark-env.sh, if it exists
-if [ -e "$FWDIR/conf/spark-env.sh" ] ; then
-  . $FWDIR/conf/spark-env.sh
-fi
+. $FWDIR/bin/load-spark-env.sh
 
 # Figure out which Python executable to use
 if [ -z "$PYSPARK_PYTHON" ] ; then
 
@@ -30,10 +30,7 @@ FWDIR="$(cd `dirname $0`/..; pwd)"
 # Export this as SPARK_HOME
 export SPARK_HOME="$FWDIR"
 
-# Load environment variables from conf/spark-env.sh, if it exists
-if [ -e "$FWDIR/conf/spark-env.sh" ] ; then
-  . $FWDIR/conf/spark-env.sh
-fi
+. $FWDIR/bin/load-spark-env.sh
 
 if [ -z "$1" ]; then
   echo "Usage: run-example <example-class> [<args>]" >&2
 
@@ -30,10 +30,7 @@ FWDIR="$(cd `dirname $0`/..; pwd)"
 # Export this as SPARK_HOME
 export SPARK_HOME="$FWDIR"
 
-# Load environment variables from conf/spark-env.sh, if it exists
-if [ -e "$FWDIR/conf/spark-env.sh" ] ; then
-  . $FWDIR/conf/spark-env.sh
-fi
+. $FWDIR/bin/load-spark-env.sh
 
 if [ -z "$1" ]; then
   echo "Usage: spark-class <class> [<args>]" >&2
@@ -137,8 +134,7 @@ fi
 
 # Compute classpath using external script
 CLASSPATH=`$FWDIR/bin/compute-classpath.sh`
-
-if [ "$1" == "org.apache.spark.tools.JavaAPICompletenessChecker" ]; then
+if [[ "$1" =~ org.apache.spark.tools.* ]]; then
   CLASSPATH="$CLASSPATH:$SPARK_TOOLS_JAR"
 fi