Skip to content

Commit a9eae7e

Browse files
committed
Merge branch 'master' of github.com:apache/spark
2 parents d5154da + 21109fb commit a9eae7e

File tree

126 files changed

+2876
-926
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

126 files changed

+2876
-926
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,4 @@ dist/
4545
spark-*-bin.tar.gz
4646
unit-tests.log
4747
/lib/
48+
rat-results.txt

.rat-excludes

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
target
2+
.gitignore
3+
.project
4+
.classpath
5+
.rat-excludes
6+
.*md
7+
derby.log
8+
TAGS
9+
RELEASE
10+
control
11+
docs
12+
fairscheduler.xml.template
13+
log4j.properties
14+
log4j.properties.template
15+
metrics.properties.template
16+
slaves
17+
spark-env.sh
18+
spark-env.sh.template
19+
log4j-defaults.properties
20+
sorttable.js
21+
.*txt
22+
.*data
23+
.*log
24+
cloudpickle.py
25+
join.py
26+
SparkExprTyper.scala
27+
SparkILoop.scala
28+
SparkILoopInit.scala
29+
SparkIMain.scala
30+
SparkImports.scala
31+
SparkJLineCompletion.scala
32+
SparkJLineReader.scala
33+
SparkMemberHandlers.scala
34+
sbt
35+
sbt-launch-lib.bash
36+
plugins.sbt
37+
work
38+
.*\.q
39+
golden

NOTICE

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,12 @@ Copyright 2014 The Apache Software Foundation.
33

44
This product includes software developed at
55
The Apache Software Foundation (http://www.apache.org/).
6+
7+
In addition, this product includes:
8+
9+
- JUnit (http://www.junit.org) is a testing framework for Java. We included it
10+
under the terms of the Eclipse Public License v1.0.
11+
12+
- JTransforms (https://sites.google.com/site/piotrwendykier/software/jtransforms)
13+
provides fast transforms in Java. It is tri-licensed, and we included it under
14+
the terms of the Mozilla Public License v1.1.

core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ private[ui] class StageTable(stages: Seq[StageInfo], parent: JobProgressUI) {
6363
val startWidth = "width: %s%%".format((started.toDouble/total)*100)
6464

6565
<div class="progress">
66-
<span style="text-align:center; position:absolute; width:100%;">
66+
<span style="text-align:center; position:absolute; width:100%; left:0;">
6767
{completed}/{total} {failed}
6868
</span>
6969
<div class="bar bar-completed" style={completeWidth}></div>

dev/check-license

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
#!/usr/bin/env bash
2+
3+
#
4+
# Licensed to the Apache Software Foundation (ASF) under one or more
5+
# contributor license agreements. See the NOTICE file distributed with
6+
# this work for additional information regarding copyright ownership.
7+
# The ASF licenses this file to You under the Apache License, Version 2.0
8+
# (the "License"); you may not use this file except in compliance with
9+
# the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing, software
14+
# distributed under the License is distributed on an "AS IS" BASIS,
15+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
# See the License for the specific language governing permissions and
17+
# limitations under the License.
18+
#
19+
20+
21+
acquire_rat_jar () {
22+
23+
URL1="http://search.maven.org/remotecontent?filepath=org/apache/rat/apache-rat/${RAT_VERSION}/apache-rat-${RAT_VERSION}.jar"
24+
URL2="http://repo1.maven.org/maven2/org/apache/rat/apache-rat/${RAT_VERSION}/apache-rat-${RAT_VERSION}.jar"
25+
26+
JAR=$rat_jar
27+
28+
if [[ ! -f "$rat_jar" ]]; then
29+
# Download rat launch jar if it hasn't been downloaded yet
30+
if [ ! -f ${JAR} ]; then
31+
# Download
32+
printf "Attempting to fetch rat\n"
33+
JAR_DL=${JAR}.part
34+
if hash curl 2>/dev/null; then
35+
(curl --progress-bar ${URL1} > ${JAR_DL} || curl --progress-bar ${URL2} > ${JAR_DL}) && mv ${JAR_DL} ${JAR}
36+
elif hash wget 2>/dev/null; then
37+
(wget --progress=bar ${URL1} -O ${JAR_DL} || wget --progress=bar ${URL2} -O ${JAR_DL}) && mv ${JAR_DL} ${JAR}
38+
else
39+
printf "You do not have curl or wget installed, please install rat manually.\n"
40+
exit -1
41+
fi
42+
fi
43+
if [ ! -f ${JAR} ]; then
44+
# We failed to download
45+
printf "Our attempt to download rat locally to ${JAR} failed. Please install rat manually.\n"
46+
exit -1
47+
fi
48+
printf "Launching rat from ${JAR}\n"
49+
fi
50+
}
51+
52+
# Go to the Spark project root directory
53+
FWDIR="$(cd `dirname $0`/..; pwd)"
54+
cd $FWDIR
55+
56+
if test -x "$JAVA_HOME/bin/java"; then
57+
declare java_cmd="$JAVA_HOME/bin/java"
58+
else
59+
declare java_cmd=java
60+
fi
61+
62+
export RAT_VERSION=0.10
63+
export rat_jar=$FWDIR/lib/apache-rat-${RAT_VERSION}.jar
64+
mkdir -p $FWDIR/lib
65+
66+
[[ -f "$rat_jar" ]] || acquire_rat_jar || {
67+
echo "Download failed. Obtain the rat jar manually and place it at $rat_jar"
68+
exit 1
69+
}
70+
71+
$java_cmd -jar $rat_jar -E $FWDIR/.rat-excludes -d $FWDIR > rat-results.txt
72+
73+
ERRORS=$(cat rat-results.txt | grep -e "??")
74+
75+
if test ! -z "$ERRORS"; then
76+
echo "Could not find Apache license headers in the following files:"
77+
echo "$ERRORS"
78+
exit 1
79+
else
80+
echo -e "RAT checks passed."
81+
fi

dev/run-tests

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,13 @@ else
3434
fi
3535

3636
JAVA_VERSION=$($java_cmd -version 2>&1 | sed 's/java version "\(.*\)\.\(.*\)\..*"/\1\2/; 1q')
37-
[ "$JAVA_VERSION" -ge 18 ] && echo "" || echo "[Warn] Java 8 tests will not run, because JDK version is < 1.8."
37+
[ "$JAVA_VERSION" -ge 18 ] && echo "" || echo "[Warn] Java 8 tests will not run because JDK version is < 1.8."
3838

39+
echo "========================================================================="
40+
echo "Running Apache RAT checks"
41+
echo "========================================================================="
42+
43+
dev/check-license
3944

4045
echo "========================================================================="
4146
echo "Running Scala style checks"

examples/src/main/java/org/apache/spark/examples/JavaKMeans.java

Lines changed: 0 additions & 138 deletions
This file was deleted.

examples/src/main/java/org/apache/spark/mllib/examples/JavaKMeans.java

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,32 +17,33 @@
1717

1818
package org.apache.spark.mllib.examples;
1919

20+
import java.util.regex.Pattern;
21+
2022
import org.apache.spark.api.java.JavaRDD;
2123
import org.apache.spark.api.java.JavaSparkContext;
2224
import org.apache.spark.api.java.function.Function;
2325

2426
import org.apache.spark.mllib.clustering.KMeans;
2527
import org.apache.spark.mllib.clustering.KMeansModel;
26-
27-
import java.util.Arrays;
28-
import java.util.regex.Pattern;
28+
import org.apache.spark.mllib.linalg.Vector;
29+
import org.apache.spark.mllib.linalg.Vectors;
2930

3031
/**
3132
* Example using MLLib KMeans from Java.
3233
*/
3334
public final class JavaKMeans {
3435

35-
static class ParsePoint implements Function<String, double[]> {
36+
private static class ParsePoint implements Function<String, Vector> {
3637
private static final Pattern SPACE = Pattern.compile(" ");
3738

3839
@Override
39-
public double[] call(String line) {
40+
public Vector call(String line) {
4041
String[] tok = SPACE.split(line);
4142
double[] point = new double[tok.length];
4243
for (int i = 0; i < tok.length; ++i) {
4344
point[i] = Double.parseDouble(tok[i]);
4445
}
45-
return point;
46+
return Vectors.dense(point);
4647
}
4748
}
4849

@@ -65,15 +66,15 @@ public static void main(String[] args) {
6566

6667
JavaSparkContext sc = new JavaSparkContext(args[0], "JavaKMeans",
6768
System.getenv("SPARK_HOME"), JavaSparkContext.jarOfClass(JavaKMeans.class));
68-
JavaRDD<String> lines = sc.textFile(args[1]);
69+
JavaRDD<String> lines = sc.textFile(inputFile);
6970

70-
JavaRDD<double[]> points = lines.map(new ParsePoint());
71+
JavaRDD<Vector> points = lines.map(new ParsePoint());
7172

72-
KMeansModel model = KMeans.train(points.rdd(), k, iterations, runs);
73+
KMeansModel model = KMeans.train(points.rdd(), k, iterations, runs, KMeans.K_MEANS_PARALLEL());
7374

7475
System.out.println("Cluster centers:");
75-
for (double[] center : model.clusterCenters()) {
76-
System.out.println(" " + Arrays.toString(center));
76+
for (Vector center : model.clusterCenters()) {
77+
System.out.println(" " + center);
7778
}
7879
double cost = model.computeCost(points.rdd());
7980
System.out.println("Cost: " + cost);

mllib/pom.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,11 @@
6060
<artifactId>jblas</artifactId>
6161
<version>1.2.3</version>
6262
</dependency>
63+
<dependency>
64+
<groupId>org.scalanlp</groupId>
65+
<artifactId>breeze_${scala.binary.version}</artifactId>
66+
<version>0.7</version>
67+
</dependency>
6368
<dependency>
6469
<groupId>org.scalatest</groupId>
6570
<artifactId>scalatest_${scala.binary.version}</artifactId>

0 commit comments

Comments
 (0)