diff --git a/assembly/pom.xml b/assembly/pom.xml
index 884447a7b4943..ba2350b5c17f5 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.10
- 1.4.0-csd-5-SNAPSHOT
+ 1.4.1-csd-1-SNAPSHOT
../pom.xml
diff --git a/bagel/pom.xml b/bagel/pom.xml
index d19118c418fcb..fbde3e22bb85f 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.10
- 1.4.0-csd-5-SNAPSHOT
+ 1.4.1-csd-1-SNAPSHOT
../pom.xml
diff --git a/core/pom.xml b/core/pom.xml
index 4e62d294e07a2..436ac62b06152 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.10
- 1.4.0-csd-5-SNAPSHOT
+ 1.4.1-csd-1-SNAPSHOT
../pom.xml
diff --git a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
index 06e616220c706..f413c1d37fbb6 100644
--- a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
@@ -210,10 +210,16 @@ private[spark] object JettyUtils extends Logging {
conf: SparkConf,
serverName: String = ""): ServerInfo = {
- val collection = new ContextHandlerCollection
- collection.setHandlers(handlers.toArray)
addFilters(handlers, conf)
+ val collection = new ContextHandlerCollection
+ val gzipHandlers = handlers.map { h =>
+ val gzipHandler = new GzipHandler
+ gzipHandler.setHandler(h)
+ gzipHandler
+ }
+ collection.setHandlers(gzipHandlers.toArray)
+
// Bind to the given port, or throw a java.net.BindException if the port is occupied
def connect(currentPort: Int): (Server, Int) = {
val server = new Server(new InetSocketAddress(hostName, currentPort))
diff --git a/dev/create-release/create-release.sh b/dev/create-release/create-release.sh
index 54274a83f6d66..30190dcd41ec5 100755
--- a/dev/create-release/create-release.sh
+++ b/dev/create-release/create-release.sh
@@ -118,13 +118,13 @@ if [[ ! "$@" =~ --skip-publish ]]; then
rm -rf $SPARK_REPO
- build/mvn -DskipTests -Pyarn -Phive \
+ build/mvn -DskipTests -Pyarn -Phive -Prelease\
-Phive-thriftserver -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \
clean install
./dev/change-version-to-2.11.sh
- build/mvn -DskipTests -Pyarn -Phive \
+ build/mvn -DskipTests -Pyarn -Phive -Prelease\
-Dscala-2.11 -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \
clean install
diff --git a/docs/configuration.md b/docs/configuration.md
index affcd21514d88..19f3b7eaf7748 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1007,9 +1007,9 @@ Apart from these, the following properties are also available, and may be useful
spark.rpc.numRetries |
3 |
+
Number of times to retry before an RPC task gives up.
An RPC task will run at most times of this number.
- |
|
@@ -1029,8 +1029,8 @@ Apart from these, the following properties are also available, and may be useful
spark.rpc.lookupTimeout |
120s |
- Duration for an RPC remote endpoint lookup operation to wait before timing out.
+ Duration for an RPC remote endpoint lookup operation to wait before timing out.
|
diff --git a/docs/sparkr.md b/docs/sparkr.md
index 095ea4308cfeb..4385a4eeacd5c 100644
--- a/docs/sparkr.md
+++ b/docs/sparkr.md
@@ -68,7 +68,7 @@ you can specify the packages with the `packages` argument.
{% highlight r %}
-sc <- sparkR.init(packages="com.databricks:spark-csv_2.11:1.0.3")
+sc <- sparkR.init(sparkPackages="com.databricks:spark-csv_2.11:1.0.3")
sqlContext <- sparkRSQL.init(sc)
{% endhighlight %}
@@ -116,7 +116,7 @@ sql(hiveContext, "CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
sql(hiveContext, "LOAD DATA LOCAL INPATH 'examples/src/main/resources/kv1.txt' INTO TABLE src")
# Queries can be expressed in HiveQL.
-results <- hiveContext.sql("FROM src SELECT key, value")
+results <- sql(hiveContext, "FROM src SELECT key, value")
# results is now a DataFrame
head(results)
diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index 572c6788f2f5c..79111a74b0c25 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -828,7 +828,7 @@ using this syntax.
{% highlight scala %}
val df = sqlContext.read.format("json").load("examples/src/main/resources/people.json")
-df.select("name", "age").write.format("json").save("namesAndAges.json")
+df.select("name", "age").write.format("parquet").save("namesAndAges.parquet")
{% endhighlight %}
@@ -1518,7 +1518,7 @@ sql(sqlContext, "CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
sql(sqlContext, "LOAD DATA LOCAL INPATH 'examples/src/main/resources/kv1.txt' INTO TABLE src")
# Queries can be expressed in HiveQL.
-results = sqlContext.sql("FROM src SELECT key, value").collect()
+results <- collect(sql(sqlContext, "FROM src SELECT key, value"))
{% endhighlight %}
diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md
index 1eb3b30332e4f..33b279cf3492f 100644
--- a/docs/streaming-programming-guide.md
+++ b/docs/streaming-programming-guide.md
@@ -854,6 +854,8 @@ it with new information. To use this, you will have to do two steps.
1. Define the state update function - Specify with a function how to update the state using the
previous state and the new values from an input stream.
+In every batch, Spark will apply the state update function for all existing keys, regardless of whether they have new data in a batch or not. If the update function returns `None` then the key-value pair will be eliminated.
+
Let's illustrate this with an example. Say you want to maintain a running count of each word
seen in a text data stream. Here, the running count is the state and it is an integer. We
define the update function as:
diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py
index 05fa47f188c19..3880c2d5962e1 100755
--- a/ec2/spark_ec2.py
+++ b/ec2/spark_ec2.py
@@ -127,7 +127,7 @@ def setup_external_libs(libs):
)
with open(tgz_file_path, "wb") as tgz_file:
tgz_file.write(download_stream.read())
- with open(tgz_file_path) as tar:
+ with open(tgz_file_path, "rb") as tar:
if hashlib.md5(tar.read()).hexdigest() != lib["md5"]:
print("ERROR: Got wrong md5sum for {lib}.".format(lib=lib["name"]), file=stderr)
sys.exit(1)
@@ -1111,8 +1111,8 @@ def ssh(host, opts, command):
# If this was an ssh failure, provide the user with hints.
if e.returncode == 255:
raise UsageError(
- "Failed to SSH to remote host {0}.\n" +
- "Please check that you have provided the correct --identity-file and " +
+ "Failed to SSH to remote host {0}.\n"
+ "Please check that you have provided the correct --identity-file and "
"--key-pair parameters and try again.".format(host))
else:
raise e
diff --git a/examples/pom.xml b/examples/pom.xml
index 4c4a40bed4d98..25c0e4a8f56f0 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.10
- 1.4.0-csd-5-SNAPSHOT
+ 1.4.1-csd-1-SNAPSHOT
../pom.xml
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index 052aeba88cf97..5feb8460fb585 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.10
- 1.4.0-csd-5-SNAPSHOT
+ 1.4.1-csd-1-SNAPSHOT
../../pom.xml
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index ea5f07766cba0..d5ec8112d189c 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.10
- 1.4.0-csd-5-SNAPSHOT
+ 1.4.1-csd-1-SNAPSHOT
../../pom.xml
diff --git a/external/kafka-assembly/pom.xml b/external/kafka-assembly/pom.xml
index 2cde6966a7372..a9a96bd87a033 100644
--- a/external/kafka-assembly/pom.xml
+++ b/external/kafka-assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.10
- 1.4.0-csd-5-SNAPSHOT
+ 1.4.1-csd-1-SNAPSHOT
../../pom.xml
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
index 9b107766ad061..f97511ebbc4ba 100644
--- a/external/kafka/pom.xml
+++ b/external/kafka/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.10
- 1.4.0-csd-5-SNAPSHOT
+ 1.4.1-csd-1-SNAPSHOT
../../pom.xml
diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala
index 65d51d87f8486..c88b5db3f491a 100644
--- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala
+++ b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala
@@ -402,7 +402,7 @@ object KafkaCluster {
}
Seq("zookeeper.connect", "group.id").foreach { s =>
- if (!props.contains(s)) {
+ if (!props.containsKey(s)) {
props.setProperty(s, "")
}
}
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
index 143b2a21f9b0b..00f0b2cae129d 100644
--- a/external/mqtt/pom.xml
+++ b/external/mqtt/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.10
- 1.4.0-csd-5-SNAPSHOT
+ 1.4.1-csd-1-SNAPSHOT
../../pom.xml
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
index d8d624479a4ad..e55ec4b9349ec 100644
--- a/external/twitter/pom.xml
+++ b/external/twitter/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.10
- 1.4.0-csd-5-SNAPSHOT
+ 1.4.1-csd-1-SNAPSHOT
../../pom.xml
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index aac75e0d6cb53..acdd5e447b25c 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.10
- 1.4.0-csd-5-SNAPSHOT
+ 1.4.1-csd-1-SNAPSHOT
../../pom.xml
diff --git a/extras/kinesis-asl/pom.xml b/extras/kinesis-asl/pom.xml
index 4bfb500f43326..96c0f6c5f3b20 100644
--- a/extras/kinesis-asl/pom.xml
+++ b/extras/kinesis-asl/pom.xml
@@ -40,6 +40,13 @@
spark-streaming_${scala.binary.version}
${project.version}
+
+ org.apache.spark
+ spark-core_${scala.binary.version}
+ ${project.version}
+ test-jar
+ test
+
org.apache.spark
spark-streaming_${scala.binary.version}
diff --git a/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala b/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
index 6c262624833cd..2103dca6b766f 100644
--- a/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
+++ b/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
@@ -26,23 +26,18 @@ import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionIn
import com.amazonaws.services.kinesis.clientlibrary.types.ShutdownReason
import com.amazonaws.services.kinesis.model.Record
import org.mockito.Mockito._
-// scalastyle:off
-// To avoid introducing a dependency on Spark core tests, simply use scalatest's FunSuite
-// here instead of our own SparkFunSuite. Introducing the dependency has caused problems
-// in the past (SPARK-8781) that are complicated by bugs in the maven shade plugin (MSHADE-148).
-import org.scalatest.{BeforeAndAfter, FunSuite, Matchers}
+import org.scalatest.{BeforeAndAfter, Matchers}
import org.scalatest.mock.MockitoSugar
import org.apache.spark.storage.StorageLevel
-import org.apache.spark.streaming.{Milliseconds, Seconds, StreamingContext}
+import org.apache.spark.streaming.{Milliseconds, Seconds, StreamingContext, TestSuiteBase}
import org.apache.spark.util.{Clock, ManualClock, Utils}
/**
* Suite of Kinesis streaming receiver tests focusing mostly on the KinesisRecordProcessor
*/
-class KinesisReceiverSuite extends FunSuite with Matchers with BeforeAndAfter
- with MockitoSugar {
-// scalastyle:on
+class KinesisReceiverSuite extends TestSuiteBase with Matchers with BeforeAndAfter
+ with MockitoSugar {
val app = "TestKinesisReceiver"
val stream = "mySparkStream"
@@ -62,7 +57,7 @@ class KinesisReceiverSuite extends FunSuite with Matchers with BeforeAndAfter
var checkpointStateMock: KinesisCheckpointState = _
var currentClockMock: Clock = _
- before {
+ override def beforeFunction(): Unit = {
receiverMock = mock[KinesisReceiver]
checkpointerMock = mock[IRecordProcessorCheckpointer]
checkpointClockMock = mock[ManualClock]
@@ -70,7 +65,8 @@ class KinesisReceiverSuite extends FunSuite with Matchers with BeforeAndAfter
currentClockMock = mock[Clock]
}
- after {
+ override def afterFunction(): Unit = {
+ super.afterFunction()
// Since this suite was originally written using EasyMock, add this to preserve the old
// mocking semantics (see SPARK-5735 for more details)
verifyNoMoreInteractions(receiverMock, checkpointerMock, checkpointClockMock,
@@ -78,7 +74,7 @@ class KinesisReceiverSuite extends FunSuite with Matchers with BeforeAndAfter
}
test("KinesisUtils API") {
- val ssc = new StreamingContext("local[2]", getClass.getSimpleName, Seconds(1))
+ val ssc = new StreamingContext(master, framework, batchDuration)
// Tests the API, does not actually test data receiving
val kinesisStream1 = KinesisUtils.createStream(ssc, "mySparkStream",
"https://kinesis.us-west-2.amazonaws.com", Seconds(2),
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 7a5e0d92059cf..986612d7f31c0 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.10
- 1.4.0-csd-5-SNAPSHOT
+ 1.4.1-csd-1-SNAPSHOT
../pom.xml
diff --git a/launcher/pom.xml b/launcher/pom.xml
index 973f7cf021619..757dcc94a915d 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.10
- 1.4.0-csd-5-SNAPSHOT
+ 1.4.1-csd-1-SNAPSHOT
../pom.xml
diff --git a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
index b8f02b961113d..b2591c2ec4af6 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
@@ -133,7 +133,7 @@ void addPermGenSizeOpt(List cmd) {
}
}
- cmd.add("-XX:MaxPermSize=128m");
+ cmd.add("-XX:MaxPermSize=256m");
}
void addOptionString(List cmd, String options) {
diff --git a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java
index 97043a76cc612..7329ac9f7fb8c 100644
--- a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java
+++ b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java
@@ -194,7 +194,7 @@ private void testCmdBuilder(boolean isDriver) throws Exception {
if (isDriver) {
assertEquals("-XX:MaxPermSize=256m", arg);
} else {
- assertEquals("-XX:MaxPermSize=128m", arg);
+ assertEquals("-XX:MaxPermSize=256m", arg);
}
}
}
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 7f365a04de9ea..0fe3ac62f79c5 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.10
- 1.4.0-csd-5-SNAPSHOT
+ 1.4.1-csd-1-SNAPSHOT
../pom.xml
diff --git a/network/common/pom.xml b/network/common/pom.xml
index 0fc0a536ec0c9..90a0c3d6ce7ac 100644
--- a/network/common/pom.xml
+++ b/network/common/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.10
- 1.4.0-csd-5-SNAPSHOT
+ 1.4.1-csd-1-SNAPSHOT
../../pom.xml
diff --git a/network/shuffle/pom.xml b/network/shuffle/pom.xml
index 612c32e58db8f..563d8c0bf014f 100644
--- a/network/shuffle/pom.xml
+++ b/network/shuffle/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.10
- 1.4.0-csd-5-SNAPSHOT
+ 1.4.1-csd-1-SNAPSHOT
../../pom.xml
diff --git a/network/yarn/pom.xml b/network/yarn/pom.xml
index a9ba04ec5b759..afbb5e84326be 100644
--- a/network/yarn/pom.xml
+++ b/network/yarn/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.10
- 1.4.0-csd-5-SNAPSHOT
+ 1.4.1-csd-1-SNAPSHOT
../../pom.xml
diff --git a/pom.xml b/pom.xml
index dd639e13f5418..29c0e5640b7a0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
org.apache.spark
spark-parent_2.10
- 1.4.0-csd-5-SNAPSHOT
+ 1.4.1-csd-1-SNAPSHOT
pom
Spark Project Parent POM
http://spark.apache.org/
@@ -164,6 +164,8 @@
2.4.4
1.1.1.7
1.1.2
+
+ false
${java.home}
@@ -1312,6 +1314,7 @@
false
false
true
+ true
@@ -1459,6 +1462,8 @@
2.3
false
+
+ ${create.dependency.reduced.pom}
@@ -1843,6 +1848,26 @@
+
+
+ release
+
+
+ true
+
+
+