Skip to content

Commit d559ec5

Browse files
committed
Merge remote-tracking branch 'upstream/master' into mllib_pmml_model_export_SPARK-1406
2 parents 8fe12bb + 75fdccc commit d559ec5

File tree

1,209 files changed

+64958
-20362
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,209 files changed

+64958
-20362
lines changed

.gitignore

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,19 @@
88
*.pyc
99
.idea/
1010
.idea_modules/
11-
sbt/*.jar
11+
build/*.jar
1212
.settings
1313
.cache
14+
cache
1415
.generated-mima*
15-
/build/
1616
work/
1717
out/
1818
.DS_Store
1919
third_party/libmesos.so
2020
third_party/libmesos.dylib
21+
build/apache-maven*
22+
build/zinc*
23+
build/scala*
2124
conf/java-opts
2225
conf/*.sh
2326
conf/*.cmd
@@ -51,10 +54,11 @@ checkpoint
5154
derby.log
5255
dist/
5356
dev/create-release/*txt
54-
dev/create-release/*new
57+
dev/create-release/*final
5558
spark-*-bin-*.tgz
5659
unit-tests.log
5760
/lib/
61+
ec2/lib/
5862
rat-results.txt
5963
scalastyle.txt
6064
scalastyle-output.xml

.rat-excludes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,4 @@ dist/*
6464
logs
6565
.*scalastyle-output.xml
6666
.*dependency-reduced-pom.xml
67+
known_translations

LICENSE

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -646,7 +646,8 @@ THE SOFTWARE.
646646

647647
========================================================================
648648
For Scala Interpreter classes (all .scala files in repl/src/main/scala
649-
except for Main.Scala, SparkHelper.scala and ExecutorClassLoader.scala):
649+
except for Main.Scala, SparkHelper.scala and ExecutorClassLoader.scala),
650+
and for SerializableMapWrapper in JavaUtils.scala:
650651
========================================================================
651652

652653
Copyright (c) 2002-2013 EPFL

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ To build Spark and its example programs, run:
2626

2727
(You do not need to do this if you downloaded a pre-built package.)
2828
More detailed documentation is available from the project site, at
29-
["Building Spark with Maven"](http://spark.apache.org/docs/latest/building-with-maven.html).
29+
["Building Spark"](http://spark.apache.org/docs/latest/building-spark.html).
3030

3131
## Interactive Scala Shell
3232

assembly/pom.xml

Lines changed: 24 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -39,16 +39,10 @@
3939
<deb.pkg.name>spark</deb.pkg.name>
4040
<deb.install.path>/usr/share/spark</deb.install.path>
4141
<deb.user>root</deb.user>
42-
<deb.bin.filemode>744</deb.bin.filemode>
42+
<deb.bin.filemode>755</deb.bin.filemode>
4343
</properties>
4444

4545
<dependencies>
46-
<!-- Promote Guava to compile scope in this module so it's included while shading. -->
47-
<dependency>
48-
<groupId>com.google.guava</groupId>
49-
<artifactId>guava</artifactId>
50-
<scope>compile</scope>
51-
</dependency>
5246
<dependency>
5347
<groupId>org.apache.spark</groupId>
5448
<artifactId>spark-core_${scala.binary.version}</artifactId>
@@ -133,20 +127,6 @@
133127
<goal>shade</goal>
134128
</goals>
135129
<configuration>
136-
<relocations>
137-
<relocation>
138-
<pattern>com.google</pattern>
139-
<shadedPattern>org.spark-project.guava</shadedPattern>
140-
<includes>
141-
<include>com.google.common.**</include>
142-
</includes>
143-
<excludes>
144-
<exclude>com/google/common/base/Absent*</exclude>
145-
<exclude>com/google/common/base/Optional*</exclude>
146-
<exclude>com/google/common/base/Present*</exclude>
147-
</excludes>
148-
</relocation>
149-
</relocations>
150130
<transformers>
151131
<transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
152132
<transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
@@ -169,16 +149,6 @@
169149
</build>
170150

171151
<profiles>
172-
<profile>
173-
<id>yarn-alpha</id>
174-
<dependencies>
175-
<dependency>
176-
<groupId>org.apache.spark</groupId>
177-
<artifactId>spark-yarn-alpha_${scala.binary.version}</artifactId>
178-
<version>${project.version}</version>
179-
</dependency>
180-
</dependencies>
181-
</profile>
182152
<profile>
183153
<id>yarn</id>
184154
<dependencies>
@@ -310,7 +280,7 @@
310280
<user>${deb.user}</user>
311281
<group>${deb.user}</group>
312282
<prefix>${deb.install.path}/conf</prefix>
313-
<filemode>744</filemode>
283+
<filemode>${deb.bin.filemode}</filemode>
314284
</mapper>
315285
</data>
316286
<data>
@@ -332,7 +302,7 @@
332302
<user>${deb.user}</user>
333303
<group>${deb.user}</group>
334304
<prefix>${deb.install.path}/sbin</prefix>
335-
<filemode>744</filemode>
305+
<filemode>${deb.bin.filemode}</filemode>
336306
</mapper>
337307
</data>
338308
<data>
@@ -343,7 +313,7 @@
343313
<user>${deb.user}</user>
344314
<group>${deb.user}</group>
345315
<prefix>${deb.install.path}/python</prefix>
346-
<filemode>744</filemode>
316+
<filemode>${deb.bin.filemode}</filemode>
347317
</mapper>
348318
</data>
349319
</dataSet>
@@ -364,5 +334,25 @@
364334
</dependency>
365335
</dependencies>
366336
</profile>
337+
338+
<!-- Profiles that disable inclusion of certain dependencies. -->
339+
<profile>
340+
<id>hadoop-provided</id>
341+
<properties>
342+
<hadoop.deps.scope>provided</hadoop.deps.scope>
343+
</properties>
344+
</profile>
345+
<profile>
346+
<id>hive-provided</id>
347+
<properties>
348+
<hive.deps.scope>provided</hive.deps.scope>
349+
</properties>
350+
</profile>
351+
<profile>
352+
<id>parquet-provided</id>
353+
<properties>
354+
<parquet.deps.scope>provided</parquet.deps.scope>
355+
</properties>
356+
</profile>
367357
</profiles>
368358
</project>

bagel/pom.xml

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -40,15 +40,6 @@
4040
<artifactId>spark-core_${scala.binary.version}</artifactId>
4141
<version>${project.version}</version>
4242
</dependency>
43-
<dependency>
44-
<groupId>org.eclipse.jetty</groupId>
45-
<artifactId>jetty-server</artifactId>
46-
</dependency>
47-
<dependency>
48-
<groupId>org.scalatest</groupId>
49-
<artifactId>scalatest_${scala.binary.version}</artifactId>
50-
<scope>test</scope>
51-
</dependency>
5243
<dependency>
5344
<groupId>org.scalacheck</groupId>
5445
<artifactId>scalacheck_${scala.binary.version}</artifactId>
@@ -58,11 +49,5 @@
5849
<build>
5950
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
6051
<testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
61-
<plugins>
62-
<plugin>
63-
<groupId>org.scalatest</groupId>
64-
<artifactId>scalatest-maven-plugin</artifactId>
65-
</plugin>
66-
</plugins>
6752
</build>
6853
</project>

bagel/src/test/resources/log4j.properties

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@
1515
# limitations under the License.
1616
#
1717

18-
# Set everything to be logged to the file bagel/target/unit-tests.log
18+
# Set everything to be logged to the file target/unit-tests.log
1919
log4j.rootCategory=INFO, file
2020
log4j.appender.file=org.apache.log4j.FileAppender
21-
log4j.appender.file.append=false
21+
log4j.appender.file.append=true
2222
log4j.appender.file.file=target/unit-tests.log
2323
log4j.appender.file.layout=org.apache.log4j.PatternLayout
2424
log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n

bin/compute-classpath.cmd

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,13 @@ if "x%YARN_CONF_DIR%"=="x" goto no_yarn_conf_dir
109109
set CLASSPATH=%CLASSPATH%;%YARN_CONF_DIR%
110110
:no_yarn_conf_dir
111111

112+
rem To allow for distributions to append needed libraries to the classpath (e.g. when
113+
rem using the "hadoop-provided" profile to build Spark), check SPARK_DIST_CLASSPATH and
114+
rem append it to tbe final classpath.
115+
if not "x%$SPARK_DIST_CLASSPATH%"=="x" (
116+
set CLASSPATH=%CLASSPATH%;%SPARK_DIST_CLASSPATH%
117+
)
118+
112119
rem A bit of a hack to allow calling this script within run2.cmd without seeing output
113120
if "%DONT_PRINT_CLASSPATH%"=="1" goto exit
114121

bin/compute-classpath.sh

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,11 @@ FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
2525

2626
. "$FWDIR"/bin/load-spark-env.sh
2727

28-
CLASSPATH="$SPARK_CLASSPATH:$SPARK_SUBMIT_CLASSPATH"
28+
if [ -n "$SPARK_CLASSPATH" ]; then
29+
CLASSPATH="$SPARK_CLASSPATH:$SPARK_SUBMIT_CLASSPATH"
30+
else
31+
CLASSPATH="$SPARK_SUBMIT_CLASSPATH"
32+
fi
2933

3034
# Build up classpath
3135
if [ -n "$SPARK_CONF_DIR" ]; then
@@ -46,8 +50,8 @@ fi
4650
if [ -n "$SPARK_PREPEND_CLASSES" ]; then
4751
echo "NOTE: SPARK_PREPEND_CLASSES is set, placing locally compiled Spark"\
4852
"classes ahead of assembly." >&2
53+
# Spark classes
4954
CLASSPATH="$CLASSPATH:$FWDIR/core/target/scala-$SPARK_SCALA_VERSION/classes"
50-
CLASSPATH="$CLASSPATH:$FWDIR/core/target/jars/*"
5155
CLASSPATH="$CLASSPATH:$FWDIR/repl/target/scala-$SPARK_SCALA_VERSION/classes"
5256
CLASSPATH="$CLASSPATH:$FWDIR/mllib/target/scala-$SPARK_SCALA_VERSION/classes"
5357
CLASSPATH="$CLASSPATH:$FWDIR/bagel/target/scala-$SPARK_SCALA_VERSION/classes"
@@ -59,6 +63,8 @@ if [ -n "$SPARK_PREPEND_CLASSES" ]; then
5963
CLASSPATH="$CLASSPATH:$FWDIR/sql/hive/target/scala-$SPARK_SCALA_VERSION/classes"
6064
CLASSPATH="$CLASSPATH:$FWDIR/sql/hive-thriftserver/target/scala-$SPARK_SCALA_VERSION/classes"
6165
CLASSPATH="$CLASSPATH:$FWDIR/yarn/stable/target/scala-$SPARK_SCALA_VERSION/classes"
66+
# Jars for shaded deps in their original form (copied here during build)
67+
CLASSPATH="$CLASSPATH:$FWDIR/core/target/jars/*"
6268
fi
6369

6470
# Use spark-assembly jar from either RELEASE or assembly directory
@@ -68,22 +74,25 @@ else
6874
assembly_folder="$ASSEMBLY_DIR"
6975
fi
7076

71-
num_jars="$(ls "$assembly_folder" | grep "spark-assembly.*hadoop.*\.jar" | wc -l)"
72-
if [ "$num_jars" -eq "0" ]; then
73-
echo "Failed to find Spark assembly in $assembly_folder"
74-
echo "You need to build Spark before running this program."
75-
exit 1
76-
fi
77+
num_jars=0
78+
79+
for f in ${assembly_folder}/spark-assembly*hadoop*.jar; do
80+
if [[ ! -e "$f" ]]; then
81+
echo "Failed to find Spark assembly in $assembly_folder" 1>&2
82+
echo "You need to build Spark before running this program." 1>&2
83+
exit 1
84+
fi
85+
ASSEMBLY_JAR="$f"
86+
num_jars=$((num_jars+1))
87+
done
88+
7789
if [ "$num_jars" -gt "1" ]; then
78-
jars_list=$(ls "$assembly_folder" | grep "spark-assembly.*hadoop.*.jar")
79-
echo "Found multiple Spark assembly jars in $assembly_folder:"
80-
echo "$jars_list"
81-
echo "Please remove all but one jar."
90+
echo "Found multiple Spark assembly jars in $assembly_folder:" 1>&2
91+
ls ${assembly_folder}/spark-assembly*hadoop*.jar 1>&2
92+
echo "Please remove all but one jar." 1>&2
8293
exit 1
8394
fi
8495

85-
ASSEMBLY_JAR="$(ls "$assembly_folder"/spark-assembly*hadoop*.jar 2>/dev/null)"
86-
8796
# Verify that versions of java used to build the jars and run Spark are compatible
8897
jar_error_check=$("$JAR_CMD" -tf "$ASSEMBLY_JAR" nonexistent/class/path 2>&1)
8998
if [[ "$jar_error_check" =~ "invalid CEN header" ]]; then
@@ -108,7 +117,7 @@ else
108117
datanucleus_dir="$FWDIR"/lib_managed/jars
109118
fi
110119

111-
datanucleus_jars="$(find "$datanucleus_dir" 2>/dev/null | grep "datanucleus-.*\\.jar")"
120+
datanucleus_jars="$(find "$datanucleus_dir" 2>/dev/null | grep "datanucleus-.*\\.jar$")"
112121
datanucleus_jars="$(echo "$datanucleus_jars" | tr "\n" : | sed s/:$//g)"
113122

114123
if [ -n "$datanucleus_jars" ]; then
@@ -142,4 +151,11 @@ if [ -n "$YARN_CONF_DIR" ]; then
142151
CLASSPATH="$CLASSPATH:$YARN_CONF_DIR"
143152
fi
144153

154+
# To allow for distributions to append needed libraries to the classpath (e.g. when
155+
# using the "hadoop-provided" profile to build Spark), check SPARK_DIST_CLASSPATH and
156+
# append it to tbe final classpath.
157+
if [ -n "$SPARK_DIST_CLASSPATH" ]; then
158+
CLASSPATH="$CLASSPATH:$SPARK_DIST_CLASSPATH"
159+
fi
160+
145161
echo "$CLASSPATH"

bin/run-example

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,17 +35,32 @@ else
3535
fi
3636

3737
if [ -f "$FWDIR/RELEASE" ]; then
38-
export SPARK_EXAMPLES_JAR="`ls "$FWDIR"/lib/spark-examples-*hadoop*.jar`"
39-
elif [ -e "$EXAMPLES_DIR"/target/scala-$SPARK_SCALA_VERSION/spark-examples-*hadoop*.jar ]; then
40-
export SPARK_EXAMPLES_JAR="`ls "$EXAMPLES_DIR"/target/scala-$SPARK_SCALA_VERSION/spark-examples-*hadoop*.jar`"
38+
JAR_PATH="${FWDIR}/lib"
39+
else
40+
JAR_PATH="${EXAMPLES_DIR}/target/scala-${SPARK_SCALA_VERSION}"
4141
fi
4242

43-
if [[ -z "$SPARK_EXAMPLES_JAR" ]]; then
44-
echo "Failed to find Spark examples assembly in $FWDIR/lib or $FWDIR/examples/target" 1>&2
45-
echo "You need to build Spark before running this program" 1>&2
43+
JAR_COUNT=0
44+
45+
for f in ${JAR_PATH}/spark-examples-*hadoop*.jar; do
46+
if [[ ! -e "$f" ]]; then
47+
echo "Failed to find Spark examples assembly in $FWDIR/lib or $FWDIR/examples/target" 1>&2
48+
echo "You need to build Spark before running this program" 1>&2
49+
exit 1
50+
fi
51+
SPARK_EXAMPLES_JAR="$f"
52+
JAR_COUNT=$((JAR_COUNT+1))
53+
done
54+
55+
if [ "$JAR_COUNT" -gt "1" ]; then
56+
echo "Found multiple Spark examples assembly jars in ${JAR_PATH}" 1>&2
57+
ls ${JAR_PATH}/spark-examples-*hadoop*.jar 1>&2
58+
echo "Please remove all but one jar." 1>&2
4659
exit 1
4760
fi
4861

62+
export SPARK_EXAMPLES_JAR
63+
4964
EXAMPLE_MASTER=${MASTER:-"local[*]"}
5065

5166
if [[ ! $EXAMPLE_CLASS == org.apache.spark.examples* ]]; then

0 commit comments

Comments
 (0)