diff --git a/assembly/pom.xml b/assembly/pom.xml index 884447a7b4943..ba2350b5c17f5 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-csd-5-SNAPSHOT + 1.4.1-csd-1-SNAPSHOT ../pom.xml diff --git a/bagel/pom.xml b/bagel/pom.xml index d19118c418fcb..fbde3e22bb85f 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-csd-5-SNAPSHOT + 1.4.1-csd-1-SNAPSHOT ../pom.xml diff --git a/core/pom.xml b/core/pom.xml index 4e62d294e07a2..436ac62b06152 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-csd-5-SNAPSHOT + 1.4.1-csd-1-SNAPSHOT ../pom.xml diff --git a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala index 06e616220c706..f413c1d37fbb6 100644 --- a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala +++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala @@ -210,10 +210,16 @@ private[spark] object JettyUtils extends Logging { conf: SparkConf, serverName: String = ""): ServerInfo = { - val collection = new ContextHandlerCollection - collection.setHandlers(handlers.toArray) addFilters(handlers, conf) + val collection = new ContextHandlerCollection + val gzipHandlers = handlers.map { h => + val gzipHandler = new GzipHandler + gzipHandler.setHandler(h) + gzipHandler + } + collection.setHandlers(gzipHandlers.toArray) + // Bind to the given port, or throw a java.net.BindException if the port is occupied def connect(currentPort: Int): (Server, Int) = { val server = new Server(new InetSocketAddress(hostName, currentPort)) diff --git a/dev/create-release/create-release.sh b/dev/create-release/create-release.sh index 54274a83f6d66..30190dcd41ec5 100755 --- a/dev/create-release/create-release.sh +++ b/dev/create-release/create-release.sh @@ -118,13 +118,13 @@ if [[ ! "$@" =~ --skip-publish ]]; then rm -rf $SPARK_REPO - build/mvn -DskipTests -Pyarn -Phive \ + build/mvn -DskipTests -Pyarn -Phive -Prelease\ -Phive-thriftserver -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \ clean install ./dev/change-version-to-2.11.sh - build/mvn -DskipTests -Pyarn -Phive \ + build/mvn -DskipTests -Pyarn -Phive -Prelease\ -Dscala-2.11 -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \ clean install diff --git a/docs/configuration.md b/docs/configuration.md index affcd21514d88..19f3b7eaf7748 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1007,9 +1007,9 @@ Apart from these, the following properties are also available, and may be useful spark.rpc.numRetries 3 + Number of times to retry before an RPC task gives up. An RPC task will run at most times of this number. - @@ -1029,8 +1029,8 @@ Apart from these, the following properties are also available, and may be useful spark.rpc.lookupTimeout 120s - Duration for an RPC remote endpoint lookup operation to wait before timing out. + Duration for an RPC remote endpoint lookup operation to wait before timing out. diff --git a/docs/sparkr.md b/docs/sparkr.md index 095ea4308cfeb..4385a4eeacd5c 100644 --- a/docs/sparkr.md +++ b/docs/sparkr.md @@ -68,7 +68,7 @@ you can specify the packages with the `packages` argument.
{% highlight r %} -sc <- sparkR.init(packages="com.databricks:spark-csv_2.11:1.0.3") +sc <- sparkR.init(sparkPackages="com.databricks:spark-csv_2.11:1.0.3") sqlContext <- sparkRSQL.init(sc) {% endhighlight %}
@@ -116,7 +116,7 @@ sql(hiveContext, "CREATE TABLE IF NOT EXISTS src (key INT, value STRING)") sql(hiveContext, "LOAD DATA LOCAL INPATH 'examples/src/main/resources/kv1.txt' INTO TABLE src") # Queries can be expressed in HiveQL. -results <- hiveContext.sql("FROM src SELECT key, value") +results <- sql(hiveContext, "FROM src SELECT key, value") # results is now a DataFrame head(results) diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md index 572c6788f2f5c..79111a74b0c25 100644 --- a/docs/sql-programming-guide.md +++ b/docs/sql-programming-guide.md @@ -828,7 +828,7 @@ using this syntax. {% highlight scala %} val df = sqlContext.read.format("json").load("examples/src/main/resources/people.json") -df.select("name", "age").write.format("json").save("namesAndAges.json") +df.select("name", "age").write.format("parquet").save("namesAndAges.parquet") {% endhighlight %} @@ -1518,7 +1518,7 @@ sql(sqlContext, "CREATE TABLE IF NOT EXISTS src (key INT, value STRING)") sql(sqlContext, "LOAD DATA LOCAL INPATH 'examples/src/main/resources/kv1.txt' INTO TABLE src") # Queries can be expressed in HiveQL. -results = sqlContext.sql("FROM src SELECT key, value").collect() +results <- collect(sql(sqlContext, "FROM src SELECT key, value")) {% endhighlight %} diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md index 1eb3b30332e4f..33b279cf3492f 100644 --- a/docs/streaming-programming-guide.md +++ b/docs/streaming-programming-guide.md @@ -854,6 +854,8 @@ it with new information. To use this, you will have to do two steps. 1. Define the state update function - Specify with a function how to update the state using the previous state and the new values from an input stream. +In every batch, Spark will apply the state update function for all existing keys, regardless of whether they have new data in a batch or not. If the update function returns `None` then the key-value pair will be eliminated. + Let's illustrate this with an example. Say you want to maintain a running count of each word seen in a text data stream. Here, the running count is the state and it is an integer. We define the update function as: diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 05fa47f188c19..3880c2d5962e1 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -127,7 +127,7 @@ def setup_external_libs(libs): ) with open(tgz_file_path, "wb") as tgz_file: tgz_file.write(download_stream.read()) - with open(tgz_file_path) as tar: + with open(tgz_file_path, "rb") as tar: if hashlib.md5(tar.read()).hexdigest() != lib["md5"]: print("ERROR: Got wrong md5sum for {lib}.".format(lib=lib["name"]), file=stderr) sys.exit(1) @@ -1111,8 +1111,8 @@ def ssh(host, opts, command): # If this was an ssh failure, provide the user with hints. if e.returncode == 255: raise UsageError( - "Failed to SSH to remote host {0}.\n" + - "Please check that you have provided the correct --identity-file and " + + "Failed to SSH to remote host {0}.\n" + "Please check that you have provided the correct --identity-file and " "--key-pair parameters and try again.".format(host)) else: raise e diff --git a/examples/pom.xml b/examples/pom.xml index 4c4a40bed4d98..25c0e4a8f56f0 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-csd-5-SNAPSHOT + 1.4.1-csd-1-SNAPSHOT ../pom.xml diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index 052aeba88cf97..5feb8460fb585 100644 --- a/external/flume-sink/pom.xml +++ b/external/flume-sink/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-csd-5-SNAPSHOT + 1.4.1-csd-1-SNAPSHOT ../../pom.xml diff --git a/external/flume/pom.xml b/external/flume/pom.xml index ea5f07766cba0..d5ec8112d189c 100644 --- a/external/flume/pom.xml +++ b/external/flume/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-csd-5-SNAPSHOT + 1.4.1-csd-1-SNAPSHOT ../../pom.xml diff --git a/external/kafka-assembly/pom.xml b/external/kafka-assembly/pom.xml index 2cde6966a7372..a9a96bd87a033 100644 --- a/external/kafka-assembly/pom.xml +++ b/external/kafka-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-csd-5-SNAPSHOT + 1.4.1-csd-1-SNAPSHOT ../../pom.xml diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml index 9b107766ad061..f97511ebbc4ba 100644 --- a/external/kafka/pom.xml +++ b/external/kafka/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-csd-5-SNAPSHOT + 1.4.1-csd-1-SNAPSHOT ../../pom.xml diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala index 65d51d87f8486..c88b5db3f491a 100644 --- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala +++ b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala @@ -402,7 +402,7 @@ object KafkaCluster { } Seq("zookeeper.connect", "group.id").foreach { s => - if (!props.contains(s)) { + if (!props.containsKey(s)) { props.setProperty(s, "") } } diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml index 143b2a21f9b0b..00f0b2cae129d 100644 --- a/external/mqtt/pom.xml +++ b/external/mqtt/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-csd-5-SNAPSHOT + 1.4.1-csd-1-SNAPSHOT ../../pom.xml diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml index d8d624479a4ad..e55ec4b9349ec 100644 --- a/external/twitter/pom.xml +++ b/external/twitter/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-csd-5-SNAPSHOT + 1.4.1-csd-1-SNAPSHOT ../../pom.xml diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml index aac75e0d6cb53..acdd5e447b25c 100644 --- a/external/zeromq/pom.xml +++ b/external/zeromq/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-csd-5-SNAPSHOT + 1.4.1-csd-1-SNAPSHOT ../../pom.xml diff --git a/extras/kinesis-asl/pom.xml b/extras/kinesis-asl/pom.xml index 4bfb500f43326..96c0f6c5f3b20 100644 --- a/extras/kinesis-asl/pom.xml +++ b/extras/kinesis-asl/pom.xml @@ -40,6 +40,13 @@ spark-streaming_${scala.binary.version} ${project.version} + + org.apache.spark + spark-core_${scala.binary.version} + ${project.version} + test-jar + test + org.apache.spark spark-streaming_${scala.binary.version} diff --git a/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala b/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala index 6c262624833cd..2103dca6b766f 100644 --- a/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala +++ b/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala @@ -26,23 +26,18 @@ import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionIn import com.amazonaws.services.kinesis.clientlibrary.types.ShutdownReason import com.amazonaws.services.kinesis.model.Record import org.mockito.Mockito._ -// scalastyle:off -// To avoid introducing a dependency on Spark core tests, simply use scalatest's FunSuite -// here instead of our own SparkFunSuite. Introducing the dependency has caused problems -// in the past (SPARK-8781) that are complicated by bugs in the maven shade plugin (MSHADE-148). -import org.scalatest.{BeforeAndAfter, FunSuite, Matchers} +import org.scalatest.{BeforeAndAfter, Matchers} import org.scalatest.mock.MockitoSugar import org.apache.spark.storage.StorageLevel -import org.apache.spark.streaming.{Milliseconds, Seconds, StreamingContext} +import org.apache.spark.streaming.{Milliseconds, Seconds, StreamingContext, TestSuiteBase} import org.apache.spark.util.{Clock, ManualClock, Utils} /** * Suite of Kinesis streaming receiver tests focusing mostly on the KinesisRecordProcessor */ -class KinesisReceiverSuite extends FunSuite with Matchers with BeforeAndAfter - with MockitoSugar { -// scalastyle:on +class KinesisReceiverSuite extends TestSuiteBase with Matchers with BeforeAndAfter + with MockitoSugar { val app = "TestKinesisReceiver" val stream = "mySparkStream" @@ -62,7 +57,7 @@ class KinesisReceiverSuite extends FunSuite with Matchers with BeforeAndAfter var checkpointStateMock: KinesisCheckpointState = _ var currentClockMock: Clock = _ - before { + override def beforeFunction(): Unit = { receiverMock = mock[KinesisReceiver] checkpointerMock = mock[IRecordProcessorCheckpointer] checkpointClockMock = mock[ManualClock] @@ -70,7 +65,8 @@ class KinesisReceiverSuite extends FunSuite with Matchers with BeforeAndAfter currentClockMock = mock[Clock] } - after { + override def afterFunction(): Unit = { + super.afterFunction() // Since this suite was originally written using EasyMock, add this to preserve the old // mocking semantics (see SPARK-5735 for more details) verifyNoMoreInteractions(receiverMock, checkpointerMock, checkpointClockMock, @@ -78,7 +74,7 @@ class KinesisReceiverSuite extends FunSuite with Matchers with BeforeAndAfter } test("KinesisUtils API") { - val ssc = new StreamingContext("local[2]", getClass.getSimpleName, Seconds(1)) + val ssc = new StreamingContext(master, framework, batchDuration) // Tests the API, does not actually test data receiving val kinesisStream1 = KinesisUtils.createStream(ssc, "mySparkStream", "https://kinesis.us-west-2.amazonaws.com", Seconds(2), diff --git a/graphx/pom.xml b/graphx/pom.xml index 7a5e0d92059cf..986612d7f31c0 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-csd-5-SNAPSHOT + 1.4.1-csd-1-SNAPSHOT ../pom.xml diff --git a/launcher/pom.xml b/launcher/pom.xml index 973f7cf021619..757dcc94a915d 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-csd-5-SNAPSHOT + 1.4.1-csd-1-SNAPSHOT ../pom.xml diff --git a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java index b8f02b961113d..b2591c2ec4af6 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java +++ b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java @@ -133,7 +133,7 @@ void addPermGenSizeOpt(List cmd) { } } - cmd.add("-XX:MaxPermSize=128m"); + cmd.add("-XX:MaxPermSize=256m"); } void addOptionString(List cmd, String options) { diff --git a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java index 97043a76cc612..7329ac9f7fb8c 100644 --- a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java +++ b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java @@ -194,7 +194,7 @@ private void testCmdBuilder(boolean isDriver) throws Exception { if (isDriver) { assertEquals("-XX:MaxPermSize=256m", arg); } else { - assertEquals("-XX:MaxPermSize=128m", arg); + assertEquals("-XX:MaxPermSize=256m", arg); } } } diff --git a/mllib/pom.xml b/mllib/pom.xml index 7f365a04de9ea..0fe3ac62f79c5 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-csd-5-SNAPSHOT + 1.4.1-csd-1-SNAPSHOT ../pom.xml diff --git a/network/common/pom.xml b/network/common/pom.xml index 0fc0a536ec0c9..90a0c3d6ce7ac 100644 --- a/network/common/pom.xml +++ b/network/common/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-csd-5-SNAPSHOT + 1.4.1-csd-1-SNAPSHOT ../../pom.xml diff --git a/network/shuffle/pom.xml b/network/shuffle/pom.xml index 612c32e58db8f..563d8c0bf014f 100644 --- a/network/shuffle/pom.xml +++ b/network/shuffle/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-csd-5-SNAPSHOT + 1.4.1-csd-1-SNAPSHOT ../../pom.xml diff --git a/network/yarn/pom.xml b/network/yarn/pom.xml index a9ba04ec5b759..afbb5e84326be 100644 --- a/network/yarn/pom.xml +++ b/network/yarn/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-csd-5-SNAPSHOT + 1.4.1-csd-1-SNAPSHOT ../../pom.xml diff --git a/pom.xml b/pom.xml index dd639e13f5418..29c0e5640b7a0 100644 --- a/pom.xml +++ b/pom.xml @@ -25,7 +25,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-csd-5-SNAPSHOT + 1.4.1-csd-1-SNAPSHOT pom Spark Project Parent POM http://spark.apache.org/ @@ -164,6 +164,8 @@ 2.4.4 1.1.1.7 1.1.2 + + false ${java.home} @@ -1312,6 +1314,7 @@ false false true + true @@ -1459,6 +1462,8 @@ 2.3 false + + ${create.dependency.reduced.pom} @@ -1843,6 +1848,26 @@ + + + release + + + true + + +