diff --git a/.gitignore b/.gitignore
index e1f64a113390a..3d178992123da 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,10 @@
 *~
 *.swp
+*.ipr
 *.iml
+*.iws
 .idea/
+sbt/*.jar
 .settings
 .cache
 /build/
@@ -41,3 +44,4 @@ derby.log
 dist/
 spark-*-bin.tar.gz
 unit-tests.log
+/lib/
diff --git a/CHANGES.txt b/CHANGES.txt
index 66b754dd78b87..8c78d55ccd862 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,929 +1,3047 @@
 Spark Change Log
 
-Release 0.8.1-incubating
+Release 0.9.0-incubating
 
-  d03589d Mon Dec 9 23:10:00 2013 -0800
-  Merge pull request #248 from colorant/branch-0.8
-  [Fix POM file for mvn assembly on hadoop 2.2 Yarn]
+  d0a105d Thu Jan 23 20:53:31 2014 -0800
+  Merge pull request #505 from JoshRosen/SPARK-1026
+  [Deprecate mapPartitionsWithSplit in PySpark (SPARK-1026)]
 
-  3e1f78c Sun Dec 8 21:34:12 2013 -0800
+  e66d4c2 Thu Jan 23 19:47:16 2014 -0800
+  Merge pull request #503 from pwendell/master
+  [Fix bug on read-side of external sort when using Snappy.]
+
+  e8d3f2b Thu Jan 23 19:20:22 2014 -0800
+  Merge pull request #502 from pwendell/clone-1
+  [Remove Hadoop object cloning and warn users making Hadoop RDD's.]
+
+  7a62353 Thu Jan 23 19:09:25 2014 -0800
+  Merge pull request #501 from JoshRosen/cartesian-rdd-fixes
+  [Fix two bugs in PySpark cartesian(): SPARK-978 and SPARK-1034]
+
+  51960b8 Wed Jan 22 19:37:50 2014 -0800
+  Merge pull request #496 from pwendell/master
+  [Fix bug in worker clean-up in UI]
+
+  828f7b4 Wed Jan 22 15:45:18 2014 -0800
+  Merge pull request #495 from srowen/GraphXCommonsMathDependency
+  [Fix graphx Commons Math dependency]
+
+  dc5857a Wed Jan 22 14:33:25 2014 -0800
+  Merge pull request #492 from skicavs/master
+  [fixed job name and usage information for the JavaSparkPi example]
+
+  dd533c9 Wed Jan 22 14:15:58 2014 -0800
+  Merge pull request #478 from sryza/sandy-spark-1033
+  [SPARK-1033. Ask for cores in Yarn container requests]
+
+  b6fd3cd Tue Jan 21 00:12:01 2014 -0800
+  Merge pull request #480 from pwendell/0.9-fixes
+  [Handful of 0.9 fixes]
+
+  e5f8917 Mon Jan 20 23:35:07 2014 -0800
+  Merge pull request #484 from tdas/run-example-fix
+  [Made run-example respect SPARK_JAVA_OPTS and SPARK_MEM.]
+
+  410ba06 Mon Jan 20 22:26:14 2014 -0800
+  Merge pull request #482 from tdas/streaming-example-fix
+  [Added StreamingContext.awaitTermination to streaming examples]
+
+  f137947 Mon Jan 20 22:24:07 2014 -0800
+  Merge pull request #483 from pwendell/gitignore
+  [Restricting /lib to top level directory in .gitignore]
+
+  94ae25d Sun Jan 19 11:33:51 2014 -0800
+  Merge pull request #470 from tgravescs/fix_spark_examples_yarn
+  [Only log error on missing jar to allow spark examples to jar.]
+
+  0f077b5 Sun Jan 19 10:30:29 2014 -0800
+  Merge pull request #458 from tdas/docs-update
+  [Updated java API docs for streaming, along with very minor changes in the code examples.]
+
+  03019d1 Sat Jan 18 16:29:43 2014 -0800
+  Merge pull request #459 from srowen/UpdaterL2Regularization
+  [Correct L2 regularized weight update with canonical form]
+
+  76147a2 Sat Jan 18 16:24:16 2014 -0800
+  Merge pull request #437 from mridulm/master
+  [Minor api usability changes]
+
+  4ac8cab Sat Jan 18 16:22:46 2014 -0800
+  Merge pull request #426 from mateiz/py-ml-tests
+  [Re-enable Python MLlib tests (require Python 2.7 and NumPy 1.7+)]
+
+  34e911c Sat Jan 18 16:17:34 2014 -0800
+  Merge pull request #462 from mateiz/conf-file-fix
+  [Remove Typesafe Config usage and conf files to fix nested property names]
+
+  ff7201c Sat Jan 18 12:50:02 2014 -0800
+  Merge pull request #461 from pwendell/master
+  [Use renamed shuffle spill config in CoGroupedRDD.scala]
+
+  7b0d5a5 Thu Jan 16 23:18:48 2014 -0800
+  Merge pull request #451 from Qiuzhuang/master
+  [Fixed Window spark shell launch script error.]
+
+  4ccedb3 Wed Jan 15 14:26:48 2014 -0800
+  Merge pull request #444 from mateiz/py-version
+  [Clarify that Python 2.7 is only needed for MLlib]
+
+  e3fa36f Wed Jan 15 13:56:04 2014 -0800
+  Merge pull request #442 from pwendell/standalone
+  [Workers should use working directory as spark home if it's not specified]
+
+  29c76d9 Wed Jan 15 13:55:48 2014 -0800
+  Merge pull request #443 from tdas/filestream-fix
+  [Made some classes private[stremaing] and deprecated a method in JavaStreamingContext.]
+
+  aca40aa Wed Jan 15 11:15:47 2014 -0800
+  Merge pull request #441 from pwendell/graphx-build
+  [GraphX shouldn't list Spark as provided.]
+
+  e12c374 Wed Jan 15 10:01:43 2014 -0800
+  Merge pull request #433 from markhamstra/debFix
+  [Updated Debian packaging]
+
+  2f015c2 Tue Jan 14 23:17:28 2014 -0800
+  Merge pull request #436 from ankurdave/VertexId-case
+  [Rename VertexID -> VertexId in GraphX]
+
+  2859cab Tue Jan 14 23:08:19 2014 -0800
+  Merge pull request #435 from tdas/filestream-fix
+  [Fixed the flaky tests by making SparkConf not serializable]
+
+  fbfbb33 Tue Jan 14 23:06:29 2014 -0800
+  Merge pull request #434 from rxin/graphxmaven
+  [Fixed SVDPlusPlusSuite in Maven build.]
+
+  2c6c07f Tue Jan 14 21:53:05 2014 -0800
+  Merge pull request #424 from jegonzal/GraphXProgrammingGuide
+  [Additional edits for clarity in the graphx programming guide.]
+
+  6fa4e02 Tue Jan 14 21:51:25 2014 -0800
+  Merge pull request #431 from ankurdave/graphx-caching-doc
+  [Describe caching and uncaching in GraphX programming guide]
+
+  2f930d5 Tue Jan 14 15:00:11 2014 -0800
+  Merge pull request #428 from pwendell/writeable-objects
+  [Don't clone records for text files]
+
+  329c9df Tue Jan 14 14:53:36 2014 -0800
+  Merge pull request #429 from ankurdave/graphx-examples-pom.xml
+  [Add GraphX dependency to examples/pom.xml]
+
+  a14933d Tue Jan 14 14:52:42 2014 -0800
+  Merge pull request #427 from pwendell/deprecate-aggregator
+  [Deprecate rather than remove old combineValuesByKey function]
+
+  119b6c5 Tue Jan 14 13:29:08 2014 -0800
+  Merge pull request #425 from rxin/scaladoc
+  [API doc update & make Broadcast public]
+
+  bf3b150 Tue Jan 14 09:45:22 2014 -0800
+  Merge pull request #423 from jegonzal/GraphXProgrammingGuide
+  [Improving the graphx-programming-guide]
+
+  1b4adc2 Tue Jan 14 01:19:24 2014 -0800
+  Merge pull request #420 from pwendell/header-files
+  [Add missing header files]
+
+  b60840e Tue Jan 14 00:48:34 2014 -0800
+  Merge pull request #418 from pwendell/0.9-versions
+  [Version changes for release 0.9.0.]
+
+  980250b Tue Jan 14 00:05:37 2014 -0800
+  Merge pull request #416 from tdas/filestream-fix
+  [Removed unnecessary DStream operations and updated docs]
+
+  055be5c Mon Jan 13 23:26:44 2014 -0800
+  Merge pull request #415 from pwendell/shuffle-compress
+  [Enable compression by default for spills]
+
+  fdaabdc Mon Jan 13 23:08:26 2014 -0800
+  Merge pull request #380 from mateiz/py-bayes
+  [Add Naive Bayes to Python MLlib, and some API fixes]
+
+  4a805af Mon Jan 13 22:58:38 2014 -0800
+  Merge pull request #367 from ankurdave/graphx
+  [GraphX: Unifying Graphs and Tables]
+
+  945fe7a Mon Jan 13 22:56:12 2014 -0800
+  Merge pull request #408 from pwendell/external-serializers
+  [Improvements to external sorting]
+
+  68641bc Mon Jan 13 22:54:13 2014 -0800
+  Merge pull request #413 from rxin/scaladoc
+  [Adjusted visibility of various components and documentation for 0.9.0 release.]
+
+  0ca0d4d Mon Jan 13 22:32:21 2014 -0800
+  Merge pull request #401 from andrewor14/master
+  [External sorting - Add number of bytes spilled to Web UI]
+
+  08b9fec Mon Jan 13 22:29:03 2014 -0800
+  Merge pull request #409 from tdas/unpersist
+  [Automatically unpersisting RDDs that have been cleaned up from DStreams]
+
+  b07bc02 Mon Jan 13 20:45:22 2014 -0800
+  Merge pull request #412 from harveyfeng/master
+  [Add default value for HadoopRDD's `cloneRecords` constructor arg]
+
+  a2fee38 Mon Jan 13 19:45:26 2014 -0800
+  Merge pull request #411 from tdas/filestream-fix
+  [Improved logic of finding new files in FileInputDStream]
+
+  01c0d72 Mon Jan 13 16:24:30 2014 -0800
+  Merge pull request #410 from rxin/scaladoc1
+  [Updated JavaStreamingContext to make scaladoc compile.]
+
+  8038da2 Mon Jan 13 14:59:30 2014 -0800
+  Merge pull request #2 from jegonzal/GraphXCCIssue
+  [Improving documentation and identifying potential bug in CC calculation.]
+
+  b93f9d4 Mon Jan 13 12:18:05 2014 -0800
+  Merge pull request #400 from tdas/dstream-move
+  [Moved DStream and PairDSream to org.apache.spark.streaming.dstream]
+
+  e6ed13f Sun Jan 12 22:35:14 2014 -0800
+  Merge pull request #397 from pwendell/host-port
+  [Remove now un-needed hostPort option]
+
+  0b96d85 Sun Jan 12 21:31:43 2014 -0800
+  Merge pull request #399 from pwendell/consolidate-off
+  [Disable shuffle file consolidation by default]
+
+  0ab505a Sun Jan 12 21:31:04 2014 -0800
+  Merge pull request #395 from hsaputra/remove_simpleredundantreturn_scala
+  [Remove simple redundant return statements for Scala methods/functions]
+
+  405bfe8 Sun Jan 12 20:04:21 2014 -0800
+  Merge pull request #394 from tdas/error-handling
+  [Better error handling in Spark Streaming and more API cleanup]
+
+  28a6b0c Sun Jan 12 19:49:36 2014 -0800
+  Merge pull request #398 from pwendell/streaming-api
+  [Rename DStream.foreach to DStream.foreachRDD]
+
+  074f502 Sun Jan 12 17:01:13 2014 -0800
+  Merge pull request #396 from pwendell/executor-env
+  [Setting load defaults to true in executor]
+
+  82e2b92 Sun Jan 12 16:55:11 2014 -0800
+  Merge pull request #392 from rxin/listenerbus
+  [Stop SparkListenerBus daemon thread when DAGScheduler is stopped.]
+
+  288a878 Sat Jan 11 21:53:19 2014 -0800
+  Merge pull request #389 from rxin/clone-writables
+  [Minor update for clone writables and more documentation.]
+
+  dbc11df Sat Jan 11 18:07:13 2014 -0800
+  Merge pull request #388 from pwendell/master
+  [Fix UI bug introduced in #244.]
+
+  409866b Sat Jan 11 17:12:06 2014 -0800
+  Merge pull request #393 from pwendell/revert-381
+  [Revert PR 381]
+
+  6510f04 Sat Jan 11 12:48:26 2014 -0800
+  Merge pull request #387 from jerryshao/conf-fix
+  [Fix configure didn't work small problem in ALS]
+
+  ee6e7f9 Sat Jan 11 12:07:55 2014 -0800
+  Merge pull request #359 from ScrapCodes/clone-writables
+  [We clone hadoop key and values by default and reuse objects if asked to.]
+
+  4216178 Sat Jan 11 09:46:48 2014 -0800
+  Merge pull request #373 from jerryshao/kafka-upgrade
+  [Upgrade Kafka dependecy to 0.8.0 release version]
+
+  92ad18b Fri Jan 10 23:25:15 2014 -0800
+  Merge pull request #376 from prabeesh/master
+  [Change clientId to random clientId]
+
+  0b5ce7a Fri Jan 10 23:23:21 2014 -0800
+  Merge pull request #386 from pwendell/typo-fix
+  [Small typo fix]
+
+  1d7bef0 Fri Jan 10 18:53:03 2014 -0800
+  Merge pull request #381 from mateiz/default-ttl
+  [Fix default TTL for metadata cleaner]
+
+  44d6a8e Fri Jan 10 17:51:50 2014 -0800
+  Merge pull request #382 from RongGu/master
+  [Fix a type error in comment lines]
+
+  88faa30 Fri Jan 10 17:14:22 2014 -0800
+  Merge pull request #385 from shivaram/add-i2-instances
+  [Add i2 instance types to Spark EC2.]
+
+  f265531 Fri Jan 10 16:25:44 2014 -0800
+  Merge pull request #383 from tdas/driver-test
+  [API for automatic driver recovery for streaming programs and other bug fixes]
+
+  d37408f Fri Jan 10 16:25:01 2014 -0800
+  Merge pull request #377 from andrewor14/master
+  [External Sorting for Aggregator and CoGroupedRDDs (Revisited)]
+
+  0eaf01c Fri Jan 10 15:32:19 2014 -0800
+  Merge pull request #369 from pillis/master
+  [SPARK-961 Add a Vector.random() method]
+
+  7cef843 Fri Jan 10 15:34:15 2014 -0600
+  Merge pull request #371 from tgravescs/yarn_client_addjar_misc_fixes
+  [Yarn client addjar and misc fixes]
+
+  7b58f11 Fri Jan 10 12:47:46 2014 -0800
+  Merge pull request #384 from pwendell/debug-logs
+  [Make DEBUG-level logs consummable.]
+
+  23d2995 Fri Jan 10 10:20:02 2014 -0800
+  Merge pull request #1 from jegonzal/graphx
+  [ProgrammingGuide]
+
+  0ebc973 Thu Jan 9 23:58:49 2014 -0800
+  Merge pull request #375 from mateiz/option-fix
+  [Fix bug added when we changed AppDescription.maxCores to an Option]
+
+  dd03cea Thu Jan 9 23:38:03 2014 -0800
+  Merge pull request #378 from pwendell/consolidate_on
+  [Enable shuffle consolidation by default.]
+
+  997c830 Thu Jan 9 22:22:20 2014 -0800
+  Merge pull request #363 from pwendell/streaming-logs
+  [Set default logging to WARN for Spark streaming examples.]
+
+  300eaa9 Thu Jan 9 20:29:51 2014 -0800
+  Merge pull request #353 from pwendell/ipython-simplify
+  [Simplify and fix pyspark script.]
+
+  4b074fa Thu Jan 9 19:03:55 2014 -0800
+  Merge pull request #374 from mateiz/completeness
+  [Add some missing Java API methods]
+
+  a9d5333 Thu Jan 9 18:46:46 2014 -0800
+  Merge pull request #294 from RongGu/master
+  [Bug fixes for updating the RDD block's memory and disk usage information]
+
+  d86a85e Thu Jan 9 18:37:52 2014 -0800
+  Merge pull request #293 from pwendell/standalone-driver
+  [SPARK-998: Support Launching Driver Inside of Standalone Mode]
+
+  26cdb5f Thu Jan 9 17:16:34 2014 -0800
+  Merge pull request #372 from pwendell/log4j-fix-1
+  [Send logs to stderr by default (instead of stdout).]
+
+  12f414e Thu Jan 9 15:31:30 2014 -0800
+  Merge pull request #362 from mateiz/conf-getters
+  [Use typed getters for configuration settings]
+
+  365cac9 Thu Jan 9 00:56:16 2014 -0800
+  Merge pull request #361 from rxin/clean
+  [Minor style cleanup. Mostly on indenting & line width changes.]
+
+  73c724e Thu Jan 9 00:32:19 2014 -0800
+  Merge pull request #368 from pwendell/sbt-fix
+  [Don't delegate to users `sbt`.]
+
+  dceedb4 Wed Jan 8 23:19:28 2014 -0800
+  Merge pull request #364 from pwendell/fix
+  [Fixing config option "retained_stages" => "retainedStages".]
+
+  04d83fc Wed Jan 8 11:55:37 2014 -0800
+  Merge pull request #360 from witgo/master
+  [fix make-distribution.sh show version: command not found]
+
+  56ebfea Wed Jan 8 11:50:06 2014 -0800
+  Merge pull request #357 from hsaputra/set_boolean_paramname
+  [Set boolean param name for call to SparkHadoopMapReduceUtil.newTaskAttemptID]
+
+  bdeaeaf Wed Jan 8 11:48:39 2014 -0800
+  Merge pull request #358 from pwendell/add-cdh
+  [Add CDH Repository to Maven Build]
+
+  5cae05f Wed Jan 8 11:47:28 2014 -0800
+  Merge pull request #356 from hsaputra/remove_deprecated_cleanup_method
+  [Remove calls to deprecated mapred's OutputCommitter.cleanupJob]
+
+  6eef78d Wed Jan 8 08:49:20 2014 -0600
+  Merge pull request #345 from colorant/yarn
+  [support distributing extra files to worker for yarn client mode]
+
+  bb6a39a Tue Jan 7 22:32:18 2014 -0800
+  Merge pull request #322 from falaki/MLLibDocumentationImprovement
+  [SPARK-1009 Updated MLlib docs to show how to use it in Python]
+
+  cb1b927 Tue Jan 7 22:26:28 2014 -0800
+  Merge pull request #355 from ScrapCodes/patch-1
+  [Update README.md]
+
+  c0f0155 Tue Jan 7 22:21:52 2014 -0800
+  Merge pull request #313 from tdas/project-refactor
+  [Refactored the streaming project to separate external libraries like Twitter, Kafka, Flume, etc.]
+
+  f5f12dc Tue Jan 7 21:56:35 2014 -0800
+  Merge pull request #336 from liancheng/akka-remote-lookup
+  [Get rid of `Either[ActorRef, ActorSelection]']
+
+  11891e6 Wed Jan 8 00:32:18 2014 -0500
+  Merge pull request #327 from lucarosellini/master
+  [Added ‘-i’ command line option to Spark REPL]
+
+  7d0aac9 Wed Jan 8 00:30:45 2014 -0500
+  Merge pull request #354 from hsaputra/addasfheadertosbt
+  [Add ASF header to the new sbt script.]
+
+  d75dc42 Wed Jan 8 00:30:03 2014 -0500
+  Merge pull request #350 from mateiz/standalone-limit
+  [Add way to limit default # of cores used by apps in standalone mode]
+
+  61674bc Tue Jan 7 18:32:13 2014 -0800
+  Merge pull request #352 from markhamstra/oldArch
+  [Don't leave os.arch unset after BlockManagerSuite]
+
+  b2e690f Tue Jan 7 16:57:08 2014 -0800
+  Merge pull request #328 from falaki/MatrixFactorizationModel-fix
+  [SPARK-1012: DAGScheduler Exception Fix]
+
+  6ccf8ce Tue Jan 7 15:49:14 2014 -0800
+  Merge pull request #351 from pwendell/maven-fix
+  [Add log4j exclusion rule to maven.]
+
+  7d5fa17 Tue Jan 7 11:31:34 2014 -0800
+  Merge pull request #337 from yinxusen/mllib-16-bugfix
+  [Mllib 16 bugfix]
+
+  71fc113 Tue Jan 7 11:30:35 2014 -0800
+  Merge pull request #349 from CodingCat/support-worker_dir
+  [add the comments about SPARK_WORKER_DIR]
+
+  15d9534 Tue Jan 7 08:10:02 2014 -0800
+  Merge pull request #318 from srowen/master
+  [Suggested small changes to Java code for slightly more standard style, encapsulation and in some cases performance]
+
+  468af0f Tue Jan 7 08:09:01 2014 -0800
+  Merge pull request #348 from prabeesh/master
+  [spark -> org.apache.spark]
+
+  c3cf047 Tue Jan 7 00:54:25 2014 -0800
+  Merge pull request #339 from ScrapCodes/conf-improvements
+  [Conf improvements]
+
+  a862caf Tue Jan 7 00:18:20 2014 -0800
+  Merge pull request #331 from holdenk/master
+  [Add a script to download sbt if not present on the system]
+
+  b97ef21 Mon Jan 6 20:12:57 2014 -0800
+  Merge pull request #346 from sproblvem/patch-1
+  [Update stop-slaves.sh]
+
+  7210257 Mon Jan 6 18:25:44 2014 -0800
+  Merge pull request #128 from adamnovak/master
+  [Fix failing "sbt/sbt publish-local" by adding a no-argument PrimitiveKeyOpenHashMap constructor]
+
+  e4d6057 Mon Jan 6 14:56:54 2014 -0800
+  Merge pull request #343 from pwendell/build-fix
+  [Fix test breaking downstream builds]
+
+  93bf962 Mon Jan 6 11:42:41 2014 -0800
+  Merge pull request #340 from ScrapCodes/sbt-fixes
+  [Made java options to be applied during tests so that they become self explanatory.]
+
+  60edeb3 Mon Jan 6 11:40:32 2014 -0800
+  Merge pull request #338 from ScrapCodes/ning-upgrade
+  [SPARK-1005 Ning upgrade]
+
+  c708e81 Mon Jan 6 11:35:48 2014 -0800
+  Merge pull request #341 from ash211/patch-5
+  [Clarify spark.cores.max in docs]
+
+  33fcb91 Mon Jan 6 11:19:23 2014 -0800
+  Merge pull request #342 from tgravescs/fix_maven_protobuf
+  [Change protobuf version for yarn alpha back to 2.4.1]
+
+  357083c Mon Jan 6 10:29:04 2014 -0800
+  Merge pull request #330 from tgravescs/fix_addjars_null_handling
+  [Fix handling of empty SPARK_EXAMPLES_JAR]
+
+  a2e7e04 Sun Jan 5 22:37:36 2014 -0800
+  Merge pull request #333 from pwendell/logging-silence
+  [Quiet ERROR-level Akka Logs]
+
+  5b0986a Sun Jan 5 19:25:09 2014 -0800
+  Merge pull request #334 from pwendell/examples-fix
+  [Removing SPARK_EXAMPLES_JAR in the code]
+
+  f4b924f Sun Jan 5 17:11:47 2014 -0800
+  Merge pull request #335 from rxin/ser
+  [Fall back to zero-arg constructor for Serializer initialization if there is no constructor that accepts SparkConf.]
+
+  d43ad3e Sat Jan 4 16:29:30 2014 -0800
+  Merge pull request #292 from soulmachine/naive-bayes
+  [standard Naive Bayes classifier]
+
+  86404da Sat Jan 4 14:55:54 2014 -0800
+  Merge pull request #127 from jegonzal/MapByPartition
+  [Adding mapEdges and mapTriplets by Partition]
+
+  e68cdb1 Sat Jan 4 13:46:02 2014 -0800
+  Merge pull request #124 from jianpingjwang/master
+  [refactor and bug fix]
+
+  280ddf6 Sat Jan 4 12:54:41 2014 -0800
+  Merge pull request #121 from ankurdave/more-simplify
+  [Simplify GraphImpl internals further]
+
+  10fe23b Fri Jan 3 23:50:14 2014 -0800
+  Merge pull request #329 from pwendell/remove-binaries
+  [SPARK-1002: Remove Binaries from Spark Source]
+
+  c4d6145 Fri Jan 3 16:30:53 2014 -0800
+  Merge pull request #325 from witgo/master
+  [Modify spark on yarn to create SparkConf process]
+
+  4ae101f Fri Jan 3 11:24:35 2014 -0800
+  Merge pull request #317 from ScrapCodes/spark-915-segregate-scripts
+  [Spark-915 segregate scripts]
+
+  87248bd Fri Jan 3 00:45:31 2014 -0800
+  Merge pull request #1 from apache/master
+  [Merge latest Spark changes]
+
+  30b9db0 Thu Jan 2 23:15:55 2014 -0800
+  Merge pull request #285 from colorant/yarn-refactor
+  [Yarn refactor]
+
+  498a5f0 Thu Jan 2 19:06:40 2014 -0800
+  Merge pull request #323 from tgravescs/sparkconf_yarn_fix
+  [fix spark on yarn after the sparkConf changes]
+
+  0475ca8 Thu Jan 2 15:17:08 2014 -0800
+  Merge pull request #320 from kayousterhout/erroneous_failed_msg
+  [Remove erroneous FAILED state for killed tasks.]
+
+  588a169 Thu Jan 2 13:20:54 2014 -0800
+  Merge pull request #297 from tdas/window-improvement
+  [Improvements to DStream window ops and refactoring of Spark's CheckpointSuite]
+
+  5e67cdc Thu Jan 2 12:56:28 2014 -0800
+  Merge pull request #319 from kayousterhout/remove_error_method
+  [Removed redundant TaskSetManager.error() function.]
+
+  ca67909 Thu Jan 2 15:54:54 2014 -0500
+  Merge pull request #311 from tmyklebu/master
+  [SPARK-991: Report information gleaned from a Python stacktrace in the UI]
+
+  3713f81 Wed Jan 1 21:29:12 2014 -0800
+  Merge pull request #309 from mateiz/conf2
+  [SPARK-544. Migrate configuration to a SparkConf class]
+
+  c1d928a Wed Jan 1 17:03:48 2014 -0800
+  Merge pull request #312 from pwendell/log4j-fix-2
+  [SPARK-1008: Logging improvments]
+
+  dc9cb83 Wed Jan 1 13:28:34 2014 -0800
+  Merge pull request #126 from jegonzal/FixingPersist
+  [Fixing Persist Behavior]
+
+  9a0ff72 Tue Dec 31 21:50:24 2013 -0800
+  Merge pull request #314 from witgo/master
+  [restore core/pom.xml file modification]
+
+  8b8e70e Tue Dec 31 17:48:24 2013 -0800
+  Merge pull request #73 from falaki/ApproximateDistinctCount
+  [Approximate distinct count]
+
+  63b411d Tue Dec 31 14:31:28 2013 -0800
+  Merge pull request #238 from ngbinh/upgradeNetty
+  [upgrade Netty from 4.0.0.Beta2 to 4.0.13.Final]
+
+  32d6ae9 Tue Dec 31 13:51:07 2013 -0800
+  Merge pull request #120 from ankurdave/subgraph-reuses-view
+  [Reuse VTableReplicated in GraphImpl.subgraph]
+
+  55b7e2f Tue Dec 31 10:12:51 2013 -0800
+  Merge pull request #289 from tdas/filestream-fix
+  [Bug fixes for file input stream and checkpointing]
+
+  2b71ab9 Mon Dec 30 11:01:30 2013 -0800
+  Merge pull request from aarondav: Utilize DiskBlockManager pathway for temp file writing
+  [This gives us a couple advantages:]
+
+  50e3b8e Mon Dec 30 07:44:26 2013 -0800
+  Merge pull request #308 from kayousterhout/stage_naming
+  [Changed naming of StageCompleted event to be consistent]
+
+  72a17b6 Sat Dec 28 21:25:40 2013 -1000
+  Revert "Merge pull request #310 from jyunfan/master"
+  [This reverts commit 79b20e4dbe3dcd8559ec8316784d3334bb55868b, reversing]
+
+  79b20e4 Sat Dec 28 21:13:36 2013 -1000
+  Merge pull request #310 from jyunfan/master
+  [Fix typo in the Accumulators section]
+
+  7375047 Sat Dec 28 13:25:06 2013 -0800
+  Merge pull request #304 from kayousterhout/remove_unused
+  [Removed unused failed and causeOfFailure variables (in TaskSetManager)]
+
+  ad3dfd1 Fri Dec 27 22:10:14 2013 -0500
+  Merge pull request #307 from kayousterhout/other_failure
+  [Removed unused OtherFailure TaskEndReason.]
+
+  b579b83 Fri Dec 27 22:09:04 2013 -0500
+  Merge pull request #306 from kayousterhout/remove_pending
+  [Remove unused hasPendingTasks methods]
+
+  19672dc Fri Dec 27 13:37:10 2013 -0800
+  Merge pull request #305 from kayousterhout/line_spacing
+  [Fixed >100char lines in DAGScheduler.scala]
+
+  7be1e57 Thu Dec 26 23:41:40 2013 -1000
+  Merge pull request #298 from aarondav/minor
+  [Minor: Decrease margin of left side of Log page]
+
+  7d811ba Thu Dec 26 23:39:58 2013 -1000
+  Merge pull request #302 from pwendell/SPARK-1007
+  [SPARK-1007: spark-class2.cmd should change SCALA_VERSION to be 2.10]
+
+  5e69fc5 Thu Dec 26 19:10:39 2013 -0500
+  Merge pull request #295 from markhamstra/JobProgressListenerNPE
+  [Avoid a lump of coal (NPE) in JobProgressListener's stocking.]
+
+  da20270 Thu Dec 26 12:11:52 2013 -0800
+  Merge pull request #1 from aarondav/driver
+  [Refactor DriverClient to be more Actor-based]
+
+  e240bad Thu Dec 26 12:30:48 2013 -0500
+  Merge pull request #296 from witgo/master
+  [Renamed ClusterScheduler to TaskSchedulerImpl for yarn and new-yarn package]
+
+  c344ed0 Thu Dec 26 01:31:06 2013 -0500
+  Merge pull request #283 from tmyklebu/master
+  [Python bindings for mllib]
+
+  56094bc Wed Dec 25 13:14:33 2013 -0500
+  Merge pull request #290 from ash211/patch-3
+  [Typo: avaiable -> available]
+
+  4842a07 Wed Dec 25 01:52:15 2013 -0800
+  Merge pull request #287 from azuryyu/master
+  [Fixed job name in the java streaming example.]
+
+  85a344b Tue Dec 24 16:35:06 2013 -0800
+  Merge pull request #127 from kayousterhout/consolidate_schedulers
+  [Deduplicate Local and Cluster schedulers.]
+
+  c2dd6bc Tue Dec 24 14:36:47 2013 -0800
+  Merge pull request #279 from aarondav/shuffle-cleanup0
+  [Clean up shuffle files once their metadata is gone]
+
+  3bf7c70 Tue Dec 24 16:37:13 2013 -0500
+  Merge pull request #275 from ueshin/wip/changeclasspathorder
+  [Change the order of CLASSPATH.]
+
+  d63856c Mon Dec 23 22:07:26 2013 -0800
+  Merge pull request #286 from rxin/build
+  [Show full stack trace and time taken in unit tests.]
+
+  23a9ae6 Tue Dec 24 00:08:48 2013 -0500
+  Merge pull request #277 from tdas/scheduler-update
+  [Refactored the streaming scheduler and added StreamingListener interface]
+
+  11107c9 Mon Dec 23 10:38:20 2013 -0800
+  Merge pull request #244 from leftnoteasy/master
+  [Added SPARK-968 implementation for review]
+
+  44e4205 Sun Dec 22 11:44:18 2013 -0800
+  Merge pull request #116 from jianpingjwang/master
+  [remove unused variables and fix a bug]
+
+  4797c22 Fri Dec 20 13:30:39 2013 -0800
+  Merge pull request #118 from ankurdave/VertexPartitionSuite
+  [Test VertexPartition and fix bugs]
+
+  0bc57c5 Fri Dec 20 11:56:54 2013 -0800
+  Merge pull request #280 from aarondav/minor
+  [Minor cleanup for standalone scheduler]
+
+  ac70b8f Fri Dec 20 10:56:10 2013 -0800
+  Merge pull request #117 from ankurdave/more-tests
+  [More tests]
+
+  45310d4 Thu Dec 19 22:08:20 2013 -0800
+  Merge pull request #115 from ankurdave/test-reorg
+  [Reorganize unit tests; improve GraphSuite test coverage]
+
+  9228ec8 Thu Dec 19 21:37:15 2013 -0800
+  Merge pull request #1 from aarondav/127
+  [Merge master into 127]
+
+  eca68d4 Thu Dec 19 18:12:22 2013 -0800
+  Merge pull request #272 from tmyklebu/master
+  [Track and report task result serialisation time.]
+
+  7990c56 Thu Dec 19 13:35:09 2013 -0800
+  Merge pull request #276 from shivaram/collectPartition
+  [Add collectPartition to JavaRDD interface.]
+
+  440e531 Thu Dec 19 10:38:56 2013 -0800
+  Merge pull request #278 from MLnick/java-python-tostring
+  [Add toString to Java RDD, and __repr__ to Python RDD]
+
+  d8d3f3e Thu Dec 19 00:06:43 2013 -0800
+  Merge pull request #183 from aarondav/spark-959
+  [[SPARK-959] Explicitly depend on org.eclipse.jetty.orbit jar]
+
+  bfba532 Wed Dec 18 22:22:21 2013 -0800
+  Merge pull request #247 from aarondav/minor
+  [Increase spark.akka.askTimeout default to 30 seconds]
+
+  da301b5 Wed Dec 18 20:03:29 2013 -0800
+  Merge pull request #112 from amatsukawa/scc
+  [Strongly connected component algorithm]
+
+  c64a53a Wed Dec 18 16:56:26 2013 -0800
+  Merge pull request #267 from JoshRosen/cygwin
+  [Fix Cygwin support in several scripts.]
+
+  a645ef6 Wed Dec 18 16:07:52 2013 -0800
+  Merge pull request #48 from amatsukawa/add_project_to_graph
+  [Add mask operation on graph and filter graph primitive]
+
+  d7ebff0 Wed Dec 18 15:38:48 2013 -0800
+  Merge pull request #1 from ankurdave/add_project_to_graph
+  [Merge current master and reimplement Graph.mask using innerJoin]
+
+  5ea1872 Wed Dec 18 15:27:24 2013 -0800
+  Merge pull request #274 from azuryy/master
+  [Fixed the example link in the Scala programing guid.]
+
+  3fd2e09 Wed Dec 18 12:52:36 2013 -0800
+  Merge pull request #104 from jianpingjwang/master
+  [SVD++ demo]
+
+  f4effb3 Tue Dec 17 22:26:21 2013 -0800
+  Merge pull request #273 from rxin/top
+  [Fixed a performance problem in RDD.top and BoundedPriorityQueue]
+
+  1b5eacb Tue Dec 17 13:49:17 2013 -0800
+  Merge pull request #102 from ankurdave/clustered-edge-index
+  [Add clustered index on edges by source vertex]
+
+  7a8169b Mon Dec 16 22:42:21 2013 -0800
+  Merge pull request #268 from pwendell/shaded-protobuf
+  [Add support for 2.2. to master (via shaded jars)]
+
+  0476c84 Mon Dec 16 17:19:25 2013 -0800
+  Merge pull request #100 from ankurdave/mrTriplets-active-set
+  [Support activeSet option in mapReduceTriplets]
+
+  964a3b6 Mon Dec 16 15:23:51 2013 -0800
+  Merge pull request #270 from ewencp/really-force-ssh-pseudo-tty-master
+  [Force pseudo-tty allocation in spark-ec2 script.]
+
+  5192ef3 Mon Dec 16 15:08:08 2013 -0800
+  Merge pull request #94 from ankurdave/load-edges-columnar
+  [Load edges in columnar format]
+
+  883e034 Mon Dec 16 14:16:02 2013 -0800
+  Merge pull request #245 from gregakespret/task-maxfailures-fix
+  [Fix for spark.task.maxFailures not enforced correctly.]
+
+  a51f340 Sun Dec 15 22:02:30 2013 -0800
+  Merge pull request #265 from markhamstra/scala.binary.version
+  [DRY out the POMs with scala.binary.version]
+
+  ded10ce Sun Dec 15 17:25:33 2013 -0800
+  Merge pull request #103 from amplab/optimizations
+  [Optimizations cherry-picked from SIGMOD branches]
+
+  d2ced6d Sun Dec 15 14:11:34 2013 -0800
+  Merge pull request #256 from MLnick/master
+  [Fix 'IPYTHON=1 ./pyspark' throwing ValueError]
+
+  c55e698 Sun Dec 15 12:49:02 2013 -0800
+  Merge pull request #257 from tgravescs/sparkYarnFixName
+  [Fix the --name option for Spark on Yarn]
+
+  ab85f88 Sun Dec 15 12:48:32 2013 -0800
+  Merge pull request #264 from shivaram/spark-class-fix
+  [Use CoarseGrainedExecutorBackend in spark-class]
+
+  8a56c1f Sat Dec 14 16:29:24 2013 -0800
+  Merge pull request #84 from amatsukawa/graphlab_enhancements
+  [GraphLab bug fix & set start vertex]
+
+  7db9165 Sat Dec 14 14:16:34 2013 -0800
+  Merge pull request #251 from pwendell/master
+  [Fix list rendering in YARN markdown docs.]
+
+  2fd781d Sat Dec 14 12:59:37 2013 -0800
+  Merge pull request #249 from ngbinh/partitionInJavaSortByKey
+  [Expose numPartitions parameter in JavaPairRDD.sortByKey()]
+
+  9bf192b Sat Dec 14 12:52:18 2013 -0800
+  Merge pull request #91 from amplab/standalone-pagerank
+  [Standalone PageRank]
+
+  840af5e Sat Dec 14 12:51:51 2013 -0800
+  Merge pull request #99 from ankurdave/only-dynamic-pregel
+  [Remove static Pregel; take maxIterations in dynamic Pregel]
+
+  97ac060 Sat Dec 14 00:22:45 2013 -0800
+  Merge pull request #259 from pwendell/scala-2.10
+  [Migration to Scala 2.10]
+
+  7ac944f Fri Dec 13 23:22:08 2013 -0800
+  Merge pull request #262 from pwendell/mvn-fix
+  [Fix maven build issues in 2.10 branch]
+
+  6defb06 Fri Dec 13 21:18:57 2013 -0800
+  Merge pull request #261 from ScrapCodes/scala-2.10
+  [Added a comment about ActorRef and ActorSelection difference.]
+
+  76566b1 Fri Dec 13 10:11:02 2013 -0800
+  Merge pull request #260 from ScrapCodes/scala-2.10
+  [Review comments on the PR for scala 2.10 migration.]
+
+  0aeb182 Thu Dec 12 21:14:42 2013 -0800
+  Merge pull request #255 from ScrapCodes/scala-2.10
+  [Disabled yarn 2.2 in sbt and mvn build and added a message in the sbt build.]
+
+  2e89398 Wed Dec 11 23:10:53 2013 -0800
+  Merge pull request #254 from ScrapCodes/scala-2.10
+  [Scala 2.10 migration]
+
+  ce6ca4e Wed Dec 11 22:30:54 2013 -0800
+  Merge pull request #97 from dcrankshaw/fix-rddtop
+  [Added BoundedPriorityQueue kryo registrator. Fixes top issue.]
+
+  d2efe13 Tue Dec 10 13:01:26 2013 -0800
+  Merge pull request #250 from pwendell/master
+  [README incorrectly suggests build sources spark-env.sh]
+
+  6169fe1 Mon Dec 9 16:51:36 2013 -0800
+  Merge pull request #246 from pwendell/master
+  [Add missing license headers]
+
+  d992ec6 Sun Dec 8 20:49:20 2013 -0800
   Merge pull request #195 from dhardy92/fix_DebScriptPackage
   [[Deb] fix package of Spark classes adding org.apache prefix in scripts embeded in .deb]
 
-  c14f373 Sat Dec 7 22:35:31 2013 -0800
-  Merge pull request #241 from pwendell/master
-  [Update broken links and add HDP 2.0 version string]
+  1f4a4bc Sat Dec 7 22:34:34 2013 -0800
+  Merge pull request #242 from pwendell/master
+  [Update broken links and add HDP 2.0 version string]
+
+  6494d62 Sat Dec 7 11:56:16 2013 -0800
+  Merge pull request #240 from pwendell/master
+  [SPARK-917 Improve API links in nav bar]
+
+  f466f79 Sat Dec 7 11:51:52 2013 -0800
+  Merge pull request #239 from aarondav/nit
+  [Correct spellling error in configuration.md]
+
+  3abfbfb Sat Dec 7 11:24:19 2013 -0800
+  Merge pull request #92 from ankurdave/rdd-names
+  [Set RDD names for easy debugging]
+
+  31e8a14 Fri Dec 6 21:49:55 2013 -0800
+  Merge pull request #90 from amplab/pregel-replicate-changed
+  [Replicate only changed vertices]
+
+  10c3c0c Fri Dec 6 20:29:45 2013 -0800
+  Merge pull request #237 from pwendell/formatting-fix
+  [Formatting fix]
+
+  1b38f5f Fri Dec 6 20:16:15 2013 -0800
+  Merge pull request #236 from pwendell/shuffle-docs
+  [Adding disclaimer for shuffle file consolidation]
+
+  e5d5728 Fri Dec 6 20:14:56 2013 -0800
+  Merge pull request #235 from pwendell/master
+  [Minor doc fixes and updating README]
+
+  241336a Fri Dec 6 17:29:03 2013 -0800
+  Merge pull request #234 from alig/master
+  [Updated documentation about the YARN v2.2 build process]
+
+  e039234 Fri Dec 6 11:49:59 2013 -0800
+  Merge pull request #190 from markhamstra/Stages4Jobs
+  [stageId <--> jobId mapping in DAGScheduler]
+
+  bfa6860 Fri Dec 6 11:04:03 2013 -0800
+  Merge pull request #233 from hsaputra/changecontexttobackend
+  [Change the name of input argument in ClusterScheduler#initialize from context to backend.]
+
+  3fb302c Fri Dec 6 11:03:32 2013 -0800
+  Merge pull request #205 from kayousterhout/logging
+  [Added logging of scheduler delays to UI]
+
+  87676a6 Fri Dec 6 11:01:42 2013 -0800
+  Merge pull request #220 from rxin/zippart
+  [Memoize preferred locations in ZippedPartitionsBaseRDD]
+
+  0780498 Thu Dec 5 23:29:42 2013 -0800
+  Merge pull request #232 from markhamstra/FiniteWait
+  [jobWaiter.synchronized before jobWaiter.wait]
+
+  1c8500e Thu Dec 5 16:25:44 2013 -0800
+  Merge pull request #88 from amplab/varenc
+  [Fixed a bug that variable encoding doesn't work for ints that use all 64 bits.]
+
+  e0bcaa0 Thu Dec 5 12:37:02 2013 -0800
+  Merge pull request #86 from ankurdave/vid-varenc
+  [Finish work on #85]
+
+  5d46025 Thu Dec 5 12:31:24 2013 -0800
+  Merge pull request #228 from pwendell/master
+  [Document missing configs and set shuffle consolidation to false.]
+
+  3e96b9a Thu Dec 5 12:07:36 2013 -0800
+  Merge pull request #85 from ankurdave/vid-varenc
+  [Always write Vids using variable encoding]
+
+  72b6961 Wed Dec 4 23:33:04 2013 -0800
+  Merge pull request #199 from harveyfeng/yarn-2.2
+  [Hadoop 2.2 migration]
+
+  e0347ba Wed Dec 4 17:38:06 2013 -0800
+  Merge pull request #83 from ankurdave/fix-tests
+  [Fix compile errors in GraphSuite and SerializerSuite]
+
+  182f9ba Wed Dec 4 15:52:07 2013 -0800
+  Merge pull request #227 from pwendell/master
+  [Fix small bug in web UI and minor clean-up.]
+
+  cbd3b75 Wed Dec 4 15:35:26 2013 -0800
+  Merge pull request #81 from amplab/clean1
+  [Codebase refactoring]
+
+  b9e7609 Wed Dec 4 14:42:09 2013 -0800
+  Merge pull request #225 from ash211/patch-3
+  [Add missing space after "Serialized" in StorageLevel]
+
+  055462c Wed Dec 4 14:02:11 2013 -0800
+  Merge pull request #226 from ash211/patch-4
+  [Typo: applicaton]
+
+  d6e5473 Wed Dec 4 10:28:50 2013 -0800
+  Merge pull request #223 from rxin/transient
+  [Mark partitioner, name, and generator field in RDD as @transient.]
+
+  8a3475a Tue Dec 3 14:21:40 2013 -0800
+  Merge pull request #218 from JoshRosen/spark-970-pyspark-unicode-error
+  [Fix UnicodeEncodeError in PySpark saveAsTextFile() (SPARK-970)]
+
+  46b87b8 Tue Dec 3 00:41:11 2013 -0800
+  Merge pull request #2 from colorant/yarn-client-2.2
+  [Fix pom.xml for maven build]
+
+  58d9bbc Mon Dec 2 21:58:53 2013 -0800
+  Merge pull request #217 from aarondav/mesos-urls
+  [Re-enable zk:// urls for Mesos SparkContexts]
+
+  740922f Sun Dec 1 12:46:58 2013 -0800
+  Merge pull request #219 from sundeepn/schedulerexception
+  [Scheduler quits when newStage fails]
+
+  60e23a5 Sat Nov 30 23:38:49 2013 -0800
+  Merge pull request #216 from liancheng/fix-spark-966
+  [Bugfix: SPARK-965 & SPARK-966]
+
+  34ee814 Sat Nov 30 15:10:30 2013 -0800
+  Merged Ankur's pull request #80 and fixed subgraph.
+  []
+
+  743a31a Wed Nov 27 18:24:39 2013 -0800
+  Merge pull request #210 from haitaoyao/http-timeout
+  [add http timeout for httpbroadcast]
+
+  993e293 Wed Nov 27 00:57:54 2013 -0800
+  Merge pull request #1 from colorant/yarn-client-2.2
+  [Port yarn-client mode for new-yarn]
+
+  fb6875d Tue Nov 26 20:55:40 2013 -0800
+  Merge pull request #146 from JoshRosen/pyspark-custom-serializers
+  [Custom Serializers for PySpark]
+
+  330ada1 Tue Nov 26 19:08:33 2013 -0800
+  Merge pull request #207 from henrydavidge/master
+  [Log a warning if a task's serialized size is very big]
+
+  615213f Tue Nov 26 19:07:20 2013 -0800
+  Merge pull request #212 from markhamstra/SPARK-963
+  [[SPARK-963] Fixed races in JobLoggerSuite]
+
+  cb976df Tue Nov 26 10:23:19 2013 -0800
+  Merge pull request #209 from pwendell/better-docs
+  [Improve docs for shuffle instrumentation]
+
+  18d6df0 Tue Nov 26 00:00:07 2013 -0800
+  Merge pull request #86 from holdenk/master
+  [Add histogram functionality to DoubleRDDFunctions]
+
+  0e2109d Mon Nov 25 20:48:37 2013 -0800
+  Merge pull request #204 from rxin/hash
+  [OpenHashSet fixes]
+
+  c46067f Mon Nov 25 19:09:31 2013 -0800
+  Merge pull request #206 from ash211/patch-2
+  [Update tuning.md]
+
+  14bb465 Mon Nov 25 18:50:18 2013 -0800
+  Merge pull request #201 from rxin/mappartitions
+  [Use the proper partition index in mapPartitionsWIthIndex]
+
+  eb4296c Mon Nov 25 15:25:29 2013 -0800
+  Merge pull request #101 from colorant/yarn-client-scheduler
+  [For SPARK-527, Support spark-shell when running on YARN]
+
+  466fd06 Mon Nov 25 18:27:26 2013 +0800
+  Incorporated ideas from pull request #200. - Use Murmur Hash 3 finalization step to scramble the bits of HashCode   instead of the simpler version in java.util.HashMap; the latter one   had trouble with ranges of consecutive integers. Murmur Hash 3 is used   by fastutil.
+  [- Don't check keys for equality when re-inserting due to growing the]
+
+  088995f Mon Nov 25 00:57:51 2013 -0800
+  Merge pull request #77 from amplab/upgrade
+  [Sync with Spark master]
+
+  62889c4 Mon Nov 25 11:27:45 2013 +0800
+  Merge pull request #203 from witgo/master
+  [ Fix Maven build for metrics-graphite]
+
+  6af03ed Sun Nov 24 16:42:37 2013 -0800
+  Merge pull request #76 from dcrankshaw/fix_partitioners
+  [Actually use partitioner command line args in Analytics.]
+
+  859d62d Sun Nov 24 16:19:51 2013 -0800
+  Merge pull request #151 from russellcardullo/add-graphite-sink
+  [Add graphite sink for metrics]
+
+  65de73c Sun Nov 24 15:52:33 2013 -0800
+  Merge pull request #185 from mkolod/random-number-generator
+  [XORShift RNG with unit tests and benchmark]
+
+  972171b Mon Nov 25 07:50:46 2013 +0800
+  Merge pull request #197 from aarondav/patrick-fix
+  [Fix 'timeWriting' stat for shuffle files]
+
+  a1a7e36 Sun Nov 24 05:15:09 2013 -0800
+  Merge pull request #75 from amplab/simplify
+  [Simplify GraphImpl internals]
+
+  718cc80 Sun Nov 24 11:02:02 2013 +0800
+  Merge pull request #200 from mateiz/hash-fix
+  [AppendOnlyMap fixes]
+
+  51aa9d6 Sat Nov 23 19:46:46 2013 +0800
+  Merge pull request #198 from ankurdave/zipPartitions-preservesPartitioning
+  [Support preservesPartitioning in RDD.zipPartitions]
+
+  18ce7e9 Fri Nov 22 17:02:40 2013 -0800
+  Merge pull request #73 from jegonzal/TriangleCount
+  [Triangle count]
+
+  086b097 Fri Nov 22 10:26:39 2013 +0800
+  Merge pull request #193 from aoiwelle/patch-1
+  [Fix Kryo Serializer buffer documentation inconsistency]
+
+  f20093c Fri Nov 22 10:12:13 2013 +0800
+  Merge pull request #196 from pwendell/master
+  [TimeTrackingOutputStream should pass on calls to close() and flush().]
+
+  4b89501 Wed Nov 20 10:36:10 2013 -0800
+  Merge pull request #191 from hsaputra/removesemicolonscala
+  [Cleanup to remove semicolons (;) from Scala code]
+
+  202f8e6 Wed Nov 20 03:26:08 2013 -0800
+  Merge pull request #74 from dcrankshaw/remove_sleep
+  [Removed sleep from pagerank in Analytics]
+
+  74ade9e Tue Nov 19 16:53:58 2013 -0800
+  Merge pull request #62 from dcrankshaw/partitioners
+  [Allow user to choose a partitioner at runtime]
+
+  f568912 Tue Nov 19 16:11:31 2013 -0800
+  Merge pull request #181 from BlackNiuza/fix_tasks_number
+  [correct number of tasks in ExecutorsUI]
+
+  aa638ed Tue Nov 19 16:05:44 2013 -0800
+  Merge pull request #189 from tgravescs/sparkYarnErrorHandling
+  [Impove Spark on Yarn Error handling]
+
+  5592580 Tue Nov 19 16:04:01 2013 -0800
+  Merge pull request #187 from aarondav/example-bcast-test
+  [Enable the Broadcast examples to work in a cluster setting]
+
+  99cfe89 Mon Nov 18 22:00:36 2013 -0500
+  Updates to reflect pull request code review
+  []
+
+  e2ebc3a Sun Nov 17 18:42:18 2013 -0800
+  Merge pull request #182 from rxin/vector
+  [Slightly enhanced PrimitiveVector:]
+
+  26f616d Sun Nov 17 18:18:16 2013 -0800
+  Merge pull request #3 from aarondav/pv-test
+  [Add PrimitiveVectorSuite and fix bug in resize()]
+
+  1b5b358 Sat Nov 16 11:44:10 2013 -0800
+  Merge pull request #178 from hsaputra/simplecleanupcode
+  [Simple cleanup on Spark's Scala code]
+
+  62a2a71 Fri Nov 15 13:12:07 2013 -0800
+  Merge pull request #65 from amplab/varenc
+  [Use variable encoding for ints, longs, and doubles in the specialized serializers.]
+
+  f6b2e59 Thu Nov 14 23:04:55 2013 -0800
+  Merge pull request #1 from aarondav/scala210-master
+  [Various merge corrections]
+
+  96e0fb4 Thu Nov 14 22:29:28 2013 -0800
+  Merge pull request #173 from kayousterhout/scheduler_hang
+  [Fix bug where scheduler could hang after task failure.]
+
+  dfd40e9 Thu Nov 14 19:44:50 2013 -0800
+  Merge pull request #175 from kayousterhout/no_retry_not_serializable
+  [Don't retry tasks when they fail due to a NotSerializableException]
+
+  ed25105 Thu Nov 14 19:43:55 2013 -0800
+  Merge pull request #174 from ahirreddy/master
+  [Write Spark UI url to driver file on HDFS]
+
+  1a4cfbe Thu Nov 14 10:32:11 2013 -0800
+  Merge pull request #169 from kayousterhout/mesos_fix
+  [Don't ignore spark.cores.max when using Mesos Coarse mode]
+
+  5a4f483 Thu Nov 14 10:30:36 2013 -0800
+  Merge pull request #170 from liancheng/hadooprdd-doc-typo
+  [Fixed a scaladoc typo in HadoopRDD.scala]
+
+  d76f520 Thu Nov 14 10:25:48 2013 -0800
+  Merge pull request #171 from RIA-pierre-borckmans/master
+  [Fixed typos in the CDH4 distributions version codes.]
+
+  2c39d80 Wed Nov 13 23:28:01 2013 -0800
+  Merge pull request #69 from jegonzal/MissingVertices
+  [Addressing issue in Graph creation]
+
+  33b2dea Wed Nov 13 17:55:58 2013 -0800
+  Merge pull request #1 from ankurdave/MissingVertices
+  [During graph creation, create eTable earlier]
+
+  2054c61 Wed Nov 13 16:49:55 2013 -0800
+  Merge pull request #159 from liancheng/dagscheduler-actor-refine
+  [Migrate the daemon thread started by DAGScheduler to Akka actor]
+
+  9290e5b Wed Nov 13 16:48:44 2013 -0800
+  Merge pull request #165 from NathanHowell/kerberos-master
+  [spark-assembly.jar fails to authenticate with YARN ResourceManager]
+
+  a81fcb7 Wed Nov 13 10:41:01 2013 -0800
+  Merge pull request #68 from jegonzal/BitSetSetUntilBug
+  [Addressing bug in BitSet.setUntil(ind)]
+
+  39af914 Wed Nov 13 08:39:05 2013 -0800
+  Merge pull request #166 from ahirreddy/simr-spark-ui
+  [SIMR Backend Scheduler will now write Spark UI URL to HDFS, which is to ...]
+
+  f49ea28 Tue Nov 12 19:13:39 2013 -0800
+  Merge pull request #137 from tgravescs/sparkYarnJarsHdfsRebase
+  [Allow spark on yarn to be run from HDFS.]
+
+  87f2f4e Tue Nov 12 16:26:09 2013 -0800
+  Merge pull request #153 from ankurdave/stop-spot-cluster
+  [Enable stopping and starting a spot cluster]
+
+  b8bf04a Tue Nov 12 16:19:50 2013 -0800
+  Merge pull request #160 from xiajunluan/JIRA-923
+  [Fix bug JIRA-923]
+
+  dfd1ebc Tue Nov 12 09:10:05 2013 -0800
+  Merge pull request #164 from tdas/kafka-fix
+  [Made block generator thread safe to fix Kafka bug.]
+
+  2e8d450 Mon Nov 11 17:34:09 2013 -0800
+  Merge pull request #63 from jegonzal/VertexSetCleanup
+  [Cleanup of VertexSetRDD]
+
+  b8e294a Mon Nov 11 16:25:42 2013 -0800
+  Merge pull request #61 from ankurdave/pid2vid
+  [Shuffle replicated vertex attributes efficiently in columnar format]
+
+  3d7277c Mon Nov 11 15:49:28 2013 -0800
+  Merge pull request #55 from ankurdave/aggregateNeighbors-variants
+  [Specialize mapReduceTriplets for accessing subsets of vertex attributes]
+
+  23b53ef Mon Nov 11 12:30:02 2013 -0800
+  Merge pull request #156 from haoyuan/master
+  [add tachyon module]
+
+  1a06f70 Sun Nov 10 10:54:44 2013 -0800
+  Merge pull request #60 from amplab/rxin
+  [Looks good to me.]
+
+  58d4f6c Sun Nov 10 09:23:56 2013 -0800
+  Merge pull request #157 from rxin/kryo
+  [3 Kryo related changes.]
+
+  3efc019 Sat Nov 9 17:53:49 2013 -0800
+  Merge pull request #147 from JoshRosen/fix-java-api-completeness-checker
+  [Add spark-tools assembly to spark-class'ss classpath]
+
+  87954d4 Sat Nov 9 17:53:25 2013 -0800
+  Merge pull request #154 from soulmachine/ClusterScheduler
+  [Replace the thread inside ClusterScheduler.start() with an Akka scheduler]
+
+  f6c9462 Sat Nov 9 16:14:45 2013 -0800
+  Merge pull request #58 from jegonzal/KryoMessages
+  [Kryo messages]
+
+  83bf192 Sat Nov 9 15:40:29 2013 -0800
+  Merge pull request #155 from rxin/jobgroup
+  [Don't reset job group when a new job description is set.]
+
+  8af99f2 Sat Nov 9 13:48:00 2013 -0800
+  Merge pull request #149 from tgravescs/fixSecureHdfsAccess
+  [Fix secure hdfs access for spark on yarn]
+
+  72a601e Sat Nov 9 11:55:16 2013 -0800
+  Merge pull request #152 from rxin/repl
+  [Propagate SparkContext local properties from spark-repl caller thread to the repl execution thread.]
+
+  6ee05be Thu Nov 7 19:12:41 2013 -0800
+  Merge pull request #49 from jegonzal/graphxshell
+  [GraphX Console with Logo Text]
+
+  a9f96b5 Thu Nov 7 18:56:56 2013 -0800
+  Merge pull request #56 from jegonzal/PregelAPIChanges
+  [Changing Pregel API to use mapReduceTriplets instead of aggregateNeighbors]
+
+  5907137 Thu Nov 7 16:58:31 2013 -0800
+  Merge pull request #54 from amplab/rxin
+  [Converted for loops to while loops in EdgePartition.]
+
+  edf4164 Thu Nov 7 16:22:43 2013 -0800
+  Merge pull request #53 from amplab/rxin
+  [Added GraphX to classpath.]
+
+  c379e10 Thu Nov 7 16:01:47 2013 -0800
+  Merge pull request #51 from jegonzal/VertexSetRDD
+  [Reverting to Array based (materialized) output in VertexSetRDD]
+
+  3d4ad84 Thu Nov 7 11:08:27 2013 -0800
+  Merge pull request #148 from squito/include_appId
+  [Include appId in executor cmd line args]
+
+  be7e8da Wed Nov 6 23:22:47 2013 -0800
+  Merge pull request #23 from jerryshao/multi-user
+  [Add Spark multi-user support for standalone mode and Mesos]
+
+  aadeda5 Wed Nov 6 13:27:47 2013 -0800
+  Merge pull request #144 from liancheng/runjob-clean
+  [Removed unused return value in SparkContext.runJob]
+
+  951024f Wed Nov 6 09:36:14 2013 -0800
+  Merge pull request #145 from aarondav/sls-fix
+  [Attempt to fix SparkListenerSuite breakage]
+
+  bf4e613 Tue Nov 5 23:14:09 2013 -0800
+  Merge pull request #143 from rxin/scheduler-hang
+  [Ignore a task update status if the executor doesn't exist anymore.]
+
+  9f7b9bb Tue Nov 5 10:42:19 2013 -0800
+  Merge pull request #142 from liancheng/dagscheduler-pattern-matching
+  [Using case class deep match to simplify code in DAGScheduler.processEvent]
+
+  ca44b51 Tue Nov 5 01:32:55 2013 -0800
+  Merge pull request #50 from amplab/mergemerge
+  [Merge Spark master into graphx]
+
+  8106532 Mon Nov 4 20:47:14 2013 -0800
+  Merge pull request #139 from aarondav/shuffle-next
+  [Never store shuffle blocks in BlockManager]
+
+  0b26a39 Mon Nov 4 18:22:06 2013 -0800
+  Merge pull request #128 from shimingfei/joblogger-doc
+  [add javadoc to JobLogger, and some small fix]
+
+  7a26104 Mon Nov 4 17:54:06 2013 -0800
+  Merge pull request #130 from aarondav/shuffle
+  [Memory-optimized shuffle file consolidation]
+
+  b5dc339 Sun Nov 3 20:43:15 2013 -0800
+  Merge pull request #70 from rxin/hash1
+  [Fast, memory-efficient hash set, hash table implementations optimized for primitive data types.]
+
+  41ead7a Sat Nov 2 14:41:50 2013 -0700
+  Merge pull request #133 from Mistobaan/link_fix
+  [update default github]
+
+  d407c07 Sat Nov 2 14:36:37 2013 -0700
+  Merge pull request #134 from rxin/readme
+  [Fixed a typo in Hadoop version in README.]
+
+  e7c7b80 Fri Nov 1 17:58:10 2013 -0700
+  Merge pull request #132 from Mistobaan/doc_fix
+  [fix persistent-hdfs]
+
+  d6d11c2 Fri Nov 1 15:40:33 2013 -0700
+  Merge pull request #129 from velvia/2013-11/document-local-uris
+  [Document & finish support for local: URIs]
+
+  99bfcc9 Thu Oct 31 21:38:10 2013 -0700
+  Merge pull request #46 from jegonzal/VertexSetWithHashSet
+  [Switched VertexSetRDD and GraphImpl to use OpenHashSet]
+
+  fcaaf86 Thu Oct 31 18:27:30 2013 -0700
+  Merge pull request #44 from jegonzal/rxinBitSet
+  [Switching to VertexSetRDD to use @rxin BitSet and OpenHash ]
+
+  3f3c727 Thu Oct 31 09:52:25 2013 -0700
+  Merge pull request #41 from jegonzal/LineageTracking
+  [Optimizing Graph Lineage]
+
+  944f6b8 Thu Oct 31 09:40:35 2013 -0700
+  Merge pull request #43 from amplab/FixBitSetCastException
+  [Fix BitSet cast exception]
+
+  8f1098a Wed Oct 30 20:11:48 2013 -0700
+  Merge pull request #117 from stephenh/avoid_concurrent_modification_exception
+  [Handle ConcurrentModificationExceptions in SparkContext init.]
+
+  dc9ce16 Wed Oct 30 17:01:56 2013 -0700
+  Merge pull request #126 from kayousterhout/local_fix
+  [Fixed incorrect log message in local scheduler]
+
+  33de11c Wed Oct 30 16:58:27 2013 -0700
+  Merge pull request #124 from tgravescs/sparkHadoopUtilFix
+  [Pull SparkHadoopUtil out of SparkEnv (jira SPARK-886)]
+
+  a0c86c3 Wed Oct 30 15:34:39 2013 -0700
+  Merge pull request #38 from jegonzal/Documentation
+  [Improving Documentation]
+
+  618c1f6 Wed Oct 30 12:03:44 2013 -0700
+  Merge pull request #125 from velvia/2013-10/local-jar-uri
+  [Add support for local:// URI scheme for addJars()]
+
+  745dc42 Tue Oct 29 23:47:10 2013 -0700
+  Merge pull request #118 from JoshRosen/blockinfo-memory-usage
+  [Reduce the memory footprint of BlockInfo objects]
+
+  06adf63 Tue Oct 29 16:43:46 2013 -0700
+  Merge pull request #33 from kellrott/master
+  [Fixing graph/pom.xml]
+
+  098768e Tue Oct 29 15:08:36 2013 -0700
+  Merge pull request #37 from jegonzal/AnalyticsCleanup
+  [Updated Connected Components and Pregel Docs]
+
+  f0e23a0 Tue Oct 29 01:41:44 2013 -0400
+  Merge pull request #119 from soulmachine/master
+  [A little revise for the document]
+
+  aec9bf9 Sun Oct 27 19:32:00 2013 -0700
+  Merge pull request #112 from kayousterhout/ui_task_attempt_id
+  [Display both task ID and task attempt ID in UI, and rename taskId to taskAttemptId]
+
+  d4df474 Sun Oct 27 22:11:21 2013 -0400
+  Merge pull request #115 from aarondav/shuffle-fix
+  [Eliminate extra memory usage when shuffle file consolidation is disabled]
+
+  e018f2d Sat Oct 26 11:39:15 2013 -0700
+  Merge pull request #113 from pwendell/master
+  [Improve error message when multiple assembly jars are present.]
+
+  662ee9f Sat Oct 26 11:35:59 2013 -0700
+  Merge pull request #114 from soulmachine/master
+  [A little revise for the document]
+
+  bab496c Fri Oct 25 18:28:43 2013 -0700
+  Merge pull request #108 from alig/master
+  [Changes to enable executing by using HDFS as a synchronization point between driver and executors, as well as ensuring executors exit properly.]
+
+  d307db6 Fri Oct 25 17:26:06 2013 -0700
+  Merge pull request #102 from tdas/transform
+  [Added new Spark Streaming operations]
+
+  85e2cab Fri Oct 25 14:46:06 2013 -0700
+  Merge pull request #111 from kayousterhout/ui_name
+  [Properly display the name of a stage in the UI.]
+
+  ab35ec4 Fri Oct 25 10:16:18 2013 -0700
+  Merge pull request #110 from pwendell/master
+  [Exclude jopt from kafka dependency.]
+
+  4f2c943 Thu Oct 24 22:32:02 2013 -0700
+  Merge pull request #109 from pwendell/master
+  [Adding Java/Java Streaming versions of `repartition` with associated tests]
+
+  99ad4a6 Thu Oct 24 17:08:39 2013 -0700
+  Merge pull request #106 from pwendell/master
+  [Add a `repartition` operator.]
+
+  5429d62 Thu Oct 24 11:15:55 2013 -0700
+  Merge pull request #107 from ScrapCodes/scala-2.10
+  [Updating to latest akka 2.2.3, which fixes our only failing test Driver Suite.]
+
+  6f82c42 Thu Oct 24 11:09:46 2013 -0700
+  Merge pull request #34 from jegonzal/AnalyticsCleanup
+  [Analytics Cleanup]
+
+  1dc776b Wed Oct 23 22:05:52 2013 -0700
+  Merge pull request #93 from kayousterhout/ui_new_state
+  [Show "GETTING_RESULTS" state in UI.]
+
+  c4b187d Wed Oct 23 21:56:18 2013 -0700
+  Merge pull request #105 from pwendell/doc-fix
+  [Fixing broken links in programming guide]
+
+  a098438 Wed Oct 23 18:03:08 2013 -0700
+  Merge pull request #103 from JoshRosen/unpersist-fix
+  [Add unpersist() to JavaDoubleRDD and JavaPairRDD.]
+
+  dd65964 Wed Oct 23 15:07:59 2013 -0700
+  Merge pull request #64 from prabeesh/master
+  [MQTT Adapter for Spark Streaming]
+
+  452aa36 Tue Oct 22 23:15:33 2013 -0700
+  Merge pull request #97 from ewencp/pyspark-system-properties
+  [Add classmethod to SparkContext to set system properties.]
+
+  9dfcf53 Tue Oct 22 16:01:42 2013 -0700
+  Merge pull request #100 from JoshRosen/spark-902
+  [Remove redundant Java Function call() definitions]
+
+  49d5cda Tue Oct 22 15:38:02 2013 -0700
+  Merge pull request #30 from jegonzal/VertexSetRDD_Tests
+  [Testing and Documenting VertexSetRDD]
+
+  97184de Tue Oct 22 13:10:14 2013 -0700
+  Merge pull request #99 from pwendell/master
+  [Use correct formatting for comments in StoragePerfTester]
+
+  c404adb Tue Oct 22 11:30:19 2013 -0700
+  Merge pull request #90 from pwendell/master
+  [SPARK-940: Do not directly pass Stage objects to SparkListener.]
+
+  aa9019f Tue Oct 22 10:30:02 2013 -0700
+  Merge pull request #98 from aarondav/docs
+  [Docs: Fix links to RDD API documentation]
+
+  a0e08f0 Tue Oct 22 10:20:43 2013 -0700
+  Merge pull request #82 from JoshRosen/map-output-tracker-refactoring
+  [Split MapOutputTracker into Master/Worker classes]
+
+  b84193c Mon Oct 21 23:35:13 2013 -0700
+  Merge pull request #92 from tgravescs/sparkYarnFixClasspath
+  [Fix the Worker to use CoarseGrainedExecutorBackend and modify classpath ...]
+
+  731c94e Mon Oct 21 23:31:38 2013 -0700
+  Merge pull request #56 from jerryshao/kafka-0.8-dev
+  [Upgrade Kafka 0.7.2 to Kafka 0.8.0-beta1 for Spark Streaming]
+
+  48952d6 Mon Oct 21 22:45:00 2013 -0700
+  Merge pull request #87 from aarondav/shuffle-base
+  [Basic shuffle file consolidation]
+
+  a51359c Mon Oct 21 20:33:29 2013 -0700
+  Merge pull request #95 from aarondav/perftest
+  [Minor: Put StoragePerfTester in org/apache/]
+
+  39d2e9b Mon Oct 21 18:58:48 2013 -0700
+  Merge pull request #94 from aarondav/mesos-fix
+  [Fix mesos urls]
+
+  aa61bfd Mon Oct 21 11:57:05 2013 -0700
+  Merge pull request #88 from rxin/clean
+  [Made the following traits/interfaces/classes non-public:]
+
+  35886f3 Sun Oct 20 22:20:32 2013 -0700
+  Merge pull request #41 from pwendell/shuffle-benchmark
+  [Provide Instrumentation for Shuffle Write Performance]
+
+  5b9380e Sun Oct 20 21:03:51 2013 -0700
+  Merge pull request #89 from rxin/executor
+  [Don't setup the uncaught exception handler in local mode.]
+
+  261bcf2 Sun Oct 20 17:59:51 2013 -0700
+  Merge pull request #80 from rxin/build
+  [Exclusion rules for Maven build files.]
+
+  edc5e3f Sun Oct 20 17:18:06 2013 -0700
+  Merge pull request #75 from JoshRosen/block-manager-cleanup
+  [Code de-duplication in BlockManager]
+
+  2a7ae17 Sun Oct 20 11:45:21 2013 -0700
+  Merge pull request #84 from rxin/kill1
+  [Added documentation for setJobGroup. Also some minor cleanup in SparkContext.]
+
+  e4abb75 Sun Oct 20 09:38:37 2013 -0700
+  Merge pull request #85 from rxin/clean
+  [Moved the top level spark package object from spark to org.apache.spark]
+
+  136b9b3 Sun Oct 20 02:58:26 2013 -0700
+  Basic shuffle file consolidation
+  [The Spark shuffle phase can produce a large number of files, as one file is created]
+
+  747f538 Sat Oct 19 23:40:40 2013 -0700
+  Merge pull request #83 from ewencp/pyspark-accumulator-add-method
+  [Add an add() method to pyspark accumulators.]
+
+  6511bbe Sat Oct 19 11:34:56 2013 -0700
+  Merge pull request #78 from mosharaf/master
+  [Removed BitTorrentBroadcast and TreeBroadcast.]
+
+  f628804 Fri Oct 18 23:19:42 2013 -0700
+  Merge pull request #76 from pwendell/master
+  [Clarify compression property.]
+
+  599dcb0 Fri Oct 18 22:49:00 2013 -0700
+  Merge pull request #74 from rxin/kill
+  [Job cancellation via job group id.]
+
+  9cf43cf Fri Oct 18 22:07:21 2013 -0700
+  Merge pull request #28 from jegonzal/VertexSetRDD
+  [Refactoring IndexedRDD to VertexSetRDD.]
+
+  f888a5b Fri Oct 18 22:06:58 2013 -0700
+  Merge pull request #29 from ankurdave/unit-tests
+  [Unit tests for Graph and GraphOps]
+
+  8de9706 Fri Oct 18 20:32:39 2013 -0700
+  Merge pull request #66 from shivaram/sbt-assembly-deps
+  [Add SBT target to assemble dependencies]
+
+  e5316d0 Fri Oct 18 20:30:56 2013 -0700
+  Merge pull request #68 from mosharaf/master
+  [Faster and stable/reliable broadcast]
+
+  8d528af Fri Oct 18 20:24:10 2013 -0700
+  Merge pull request #71 from aarondav/scdefaults
+  [Spark shell exits if it cannot create SparkContext]
+
+  0794bd7 Fri Oct 18 18:59:58 2013 -0700
+  Merge pull request #27 from jegonzal/removed_indexedrdd_from_core
+  [Removing IndexedRDD changes for spark/core]
+
+  099977f Thu Oct 17 14:17:08 2013 -0700
+  Merge pull request #26 from ankurdave/split-vTableReplicated
+  [Great work!]
+
+  fc26e5b Thu Oct 17 13:21:07 2013 -0700
+  Merge pull request #69 from KarthikTunga/master
+  [Fix for issue SPARK-627. Implementing --config argument in the scripts.]
+
+  cf64f63 Thu Oct 17 11:12:28 2013 -0700
+  Merge pull request #67 from kayousterhout/remove_tsl
+  [Removed TaskSchedulerListener interface.]
+
+  f9973ca Wed Oct 16 15:58:41 2013 -0700
+  Merge pull request #65 from tgravescs/fixYarn
+  [Fix yarn build]
+
+  28e9c2a Tue Oct 15 23:59:56 2013 -0700
+  Merge pull request #63 from pwendell/master
+  [Fixing spark streaming example and a bug in examples build.]
+
+  4e46fde Tue Oct 15 23:14:27 2013 -0700
+  Merge pull request #62 from harveyfeng/master
+  [Make TaskContext's stageId publicly accessible.]
+
+  b534606 Tue Oct 15 21:25:03 2013 -0700
+  Merge pull request #8 from vchekan/checkpoint-ttl-restore
+  [Serialize and restore spark.cleaner.ttl to savepoint]
+
+  6dbd220 Tue Oct 15 19:02:57 2013 -0700
+  Merge pull request #34 from kayousterhout/rename
+  [Renamed StandaloneX to CoarseGrainedX.]
+
+  983b83f Tue Oct 15 19:02:46 2013 -0700
+  Merge pull request #61 from kayousterhout/daemon_thread
+  [Unified daemon thread pools]
+
+  3249e0e Tue Oct 15 14:12:33 2013 -0700
+  Merge pull request #59 from rxin/warning
+  [Bump up logging level to warning for failed tasks.]
+
+  678dec6 Tue Oct 15 10:51:46 2013 -0700
+  Merge pull request #58 from hsaputra/update-pom-asf
+  [Update pom.xml to use version 13 of the ASF parent pom]
+
+  e33b183 Mon Oct 14 22:25:47 2013 -0700
+  Merge pull request #29 from rxin/kill
+  [Job killing]
+
+  3b11f43 Mon Oct 14 14:20:01 2013 -0700
+  Merge pull request #57 from aarondav/bid
+  [Refactor BlockId into an actual type]
+
+  9979690 Sat Oct 12 21:23:26 2013 -0700
+  Merge pull request #52 from harveyfeng/hadoop-closure
+  [Add an optional closure parameter to HadoopRDD instantiation to use when creating local JobConfs.]
+
+  dca8009 Fri Oct 11 16:08:15 2013 -0700
+  Merge pull request #54 from aoiwelle/remove_unused_imports
+  [Remove unnecessary mutable imports]
+
+  0e5052b Fri Oct 11 15:45:16 2013 -0700
+  Merge pull request #51 from ScrapCodes/scala-2.10
+  [Scala 2.10]
+
+  fb25f32 Fri Oct 11 15:44:43 2013 -0700
+  Merge pull request #53 from witgo/master
+  [Add a zookeeper compile dependency to fix build in maven]
+
+  d6ead47 Fri Oct 11 15:43:01 2013 -0700
+  Merge pull request #32 from mridulm/master
+  [Address review comments, move to incubator spark]
+
+  c71499b Thu Oct 10 17:16:42 2013 -0700
+  Merge pull request #19 from aarondav/master-zk
+  [Standalone Scheduler fault tolerance using ZooKeeper]
+
+  5867a82 Thu Oct 10 14:02:37 2013 -0700
+  Merge pull request #19 from dcrankshaw/master
+  [Merge canonical 2d partitioner and group edges into benchmarks]
+
+  cd08f73 Thu Oct 10 13:55:47 2013 -0700
+  Merge pull request #44 from mateiz/fast-map
+  [A fast and low-memory append-only map for shuffle operations]
+
+  4b46d51 Thu Oct 10 13:35:36 2013 -0700
+  Merge pull request #17 from amplab/product2
+  [product 2 change]
+
+  320418f Wed Oct 9 16:55:30 2013 -0700
+  Merge pull request #49 from mateiz/kryo-fix-2
+  [Fix Chill serialization of Range objects]
+
+  215238c Wed Oct 9 16:49:44 2013 -0700
+  Merge pull request #50 from kayousterhout/SPARK-908
+  [Fix race condition in SparkListenerSuite (fixes SPARK-908).]
+
+  7827efc Wed Oct 9 15:07:25 2013 -0700
+  Merge pull request #46 from mateiz/py-sort-update
+  [Fix PySpark docs and an overly long line of code after #38]
+
+  7b3ae04 Wed Oct 9 12:14:19 2013 -0700
+  Merge pull request #45 from pwendell/metrics_units
+  [Use standard abbreviation in metrics description (MBytes -> MB)]
 
-  9c9e71e Sat Dec 7 12:47:26 2013 -0800
-  Merge pull request #241 from pwendell/branch-0.8
-  [Fix race condition in JobLoggerSuite [0.8 branch]]
+  b4fa11f Wed Oct 9 11:59:47 2013 -0700
+  Merge pull request #38 from AndreSchumacher/pyspark_sorting
+  [SPARK-705: implement sortByKey() in PySpark]
 
-  92597c0 Sat Dec 7 11:58:00 2013 -0800
-  Merge pull request #240 from pwendell/master
-  [SPARK-917 Improve API links in nav bar]
+  19d445d Wed Oct 9 11:08:34 2013 -0700
+  Merge pull request #22 from GraceH/metrics-naming
+  [SPARK-900 Use coarser grained naming for metrics]
 
-  cfca70e Sat Dec 7 01:15:20 2013 -0800
-  Merge pull request #236 from pwendell/shuffle-docs
-  [Adding disclaimer for shuffle file consolidation]
+  7d50f9f Wed Oct 9 10:32:42 2013 -0700
+  Merge pull request #35 from MartinWeindel/scala-2.10
+  [Fixing inconsistencies and warnings on Scala 2.10 branch]
 
-  80cc4ff Sat Dec 7 01:15:20 2013 -0800
-  Merge pull request #237 from pwendell/formatting-fix
-  [Formatting fix]
+  3218fa7 Tue Oct 8 23:44:55 2013 -0700
+  Merge pull request #4 from MLnick/implicit-als
+  [Adding algorithm for implicit feedback data to ALS]
 
-  4a6aae3 Sat Dec 7 01:15:20 2013 -0800
-  Merge pull request #235 from pwendell/master
-  [Minor doc fixes and updating README]
+  e67d5b9 Tue Oct 8 22:57:38 2013 -0700
+  Merge pull request #43 from mateiz/kryo-fix
+  [Don't allocate Kryo buffers unless needed]
 
-  2642312 Sat Dec 7 01:15:20 2013 -0800
-  Merge pull request #234 from alig/master
-  [Updated documentation about the YARN v2.2 build process]
+  ea34c52 Mon Oct 7 20:45:58 2013 -0700
+  Merge pull request #42 from pwendell/shuffle-read-perf
+  [Fix inconsistent and incorrect log messages in shuffle read path]
 
-  2d3eae2 Sat Dec 7 01:15:19 2013 -0800
-  Merge pull request #199 from harveyfeng/yarn-2.2
-  [Hadoop 2.2 migration]
+  02f37ee Mon Oct 7 15:48:52 2013 -0700
+  Merge pull request #39 from pwendell/master
+  [Adding Shark 0.7.1 to EC2 scripts]
 
-  1e9d084 Sat Dec 7 01:15:19 2013 -0800
-  Merge pull request #101 from colorant/yarn-client-scheduler
-  [For SPARK-527, Support spark-shell when running on YARN]
+  213b70a Mon Oct 7 10:54:22 2013 -0700
+  Merge pull request #31 from sundeepn/branch-0.8
+  [Resolving package conflicts with hadoop 0.23.9]
 
-  20d1f8b Sat Dec 7 01:15:09 2013 -0800
-  Merge pull request #191 from hsaputra/removesemicolonscala
-  [Cleanup to remove semicolons (;) from Scala code]
+  d585613 Sat Oct 5 22:57:05 2013 -0700
+  Merge pull request #37 from pwendell/merge-0.8
+  [merge in remaining changes from `branch-0.8`]
 
-  2b76315 Sat Dec 7 00:02:14 2013 -0800
-  Merge pull request #178 from hsaputra/simplecleanupcode
-  [Simple cleanup on Spark's Scala code]
+  4a25b11 Sat Oct 5 19:28:55 2013 -0700
+  Merge pull request #20 from harveyfeng/hadoop-config-cache
+  [Allow users to pass broadcasted Configurations and cache InputFormats across Hadoop file reads.]
 
-  ee22be0 Fri Dec 6 23:29:38 2013 -0800
-  Merge pull request #189 from tgravescs/sparkYarnErrorHandling
-  [Impove Spark on Yarn Error handling]
+  8fc68d0 Sat Oct 5 17:24:35 2013 -0700
+  Merge pull request #36 from pwendell/versions
+  [Bumping EC2 default version in master to .]
 
-  d77c337 Thu Dec 5 23:30:11 2013 -0800
-  Merge pull request #232 from markhamstra/FiniteWait
-  [jobWaiter.synchronized before jobWaiter.wait]
+  100222b Sat Oct 5 13:38:59 2013 -0700
+  Merge pull request #27 from davidmccauley/master
+  [SPARK-920/921 - JSON endpoint updates]
 
-  17ca8a1 Thu Dec 5 14:32:01 2013 -0800
-  Merge pull request #231 from pwendell/branch-0.8
-  [Bumping version numbers for 0.8.1 release]
+  0864193 Sat Oct 5 13:25:18 2013 -0700
+  Merge pull request #33 from AndreSchumacher/pyspark_partition_key_change
+  [Fixing SPARK-602: PythonPartitioner]
 
-  47fce43 Thu Dec 5 12:33:02 2013 -0800
-  Merge pull request #228 from pwendell/master
-  [Document missing configs and set shuffle consolidation to false.]
+  61ffcde Fri Oct 4 10:52:17 2013 -0700
+  Merge pull request #15 from dcrankshaw/master
+  [Add synthetic generators]
 
-  cc33f9f Wed Dec 4 15:57:47 2013 -0800
-  Merge pull request #227 from pwendell/master
-  [Fix small bug in web UI and minor clean-up.]
+  3fe12cc Fri Oct 4 10:51:28 2013 -0700
+  Merge pull request #946 from ScrapCodes/scala-2.10
+  [Fixed non termination of Executor backend, when sc.stop is not called and system.exit instead.]
 
-  ba44f21 Wed Dec 4 15:56:58 2013 -0800
-  Merge pull request #223 from rxin/transient
-  [Mark partitioner, name, and generator field in RDD as @transient.]
+  232765f Thu Oct 3 12:00:48 2013 -0700
+  Merge pull request #26 from Du-Li/master
+  [fixed a wildcard bug in make-distribution.sh; ask sbt to check local]
 
-  31da065 Wed Dec 4 14:01:13 2013 -0800
-  Merge pull request #95 from aarondav/perftest
-  [Minor: Put StoragePerfTester in org/apache/]
+  405e69b Thu Oct 3 10:52:41 2013 -0700
+  Merge pull request #25 from CruncherBigData/master
+  [Update README: updated the link]
 
-  daaaee1 Tue Dec 3 14:22:05 2013 -0800
-  Merge pull request #218 from JoshRosen/spark-970-pyspark-unicode-error
-  [Fix UnicodeEncodeError in PySpark saveAsTextFile() (SPARK-970)]
+  49dbfcc Thu Oct 3 10:52:06 2013 -0700
+  Merge pull request #28 from tgravescs/sparYarnAppName
+  [Allow users to set the application name for Spark on Yarn]
 
-  8b091fe Mon Dec 2 21:28:13 2013 -0800
-  Merge pull request #181 from BlackNiuza/fix_tasks_number
-  [correct number of tasks in ExecutorsUI]
+  e597ea3 Wed Oct 2 21:14:24 2013 -0700
+  Merge pull request #10 from kayousterhout/results_through-bm
+  [Send Task results through the block manager when larger than Akka frame size (fixes SPARK-669).]
 
-  d21266e Sun Dec 1 12:47:30 2013 -0800
-  Merge pull request #219 from sundeepn/schedulerexception
-  [Scheduler quits when newStage fails]
+  714fdab Thu Sep 26 14:28:55 2013 -0700
+  Merge pull request #17 from rxin/optimize
+  [Remove -optimize flag]
 
-  be9c176 Tue Nov 26 10:27:41 2013 -0800
-  Merge pull request #201 from rxin/mappartitions
-  [Use the proper partition index in mapPartitionsWIthIndex]
+  13eced7 Thu Sep 26 14:18:19 2013 -0700
+  Merge pull request #16 from pwendell/master
+  [Bug fix in master build]
 
-  9949561 Mon Nov 25 07:51:23 2013 +0800
-  Merge pull request #197 from aarondav/patrick-fix
-  [Fix 'timeWriting' stat for shuffle files]
+  70a0b99 Thu Sep 26 14:11:54 2013 -0700
+  Merge pull request #14 from kayousterhout/untangle_scheduler
+  [Improved organization of scheduling packages.]
 
-  c59ce18 Sun Nov 24 11:04:00 2013 +0800
-  Merge pull request #200 from mateiz/hash-fix
-  [AppendOnlyMap fixes]
+  afd03b2 Thu Sep 26 14:09:55 2013 -0700
+  Merge pull request #943 from ScrapCodes/scala-2.10
+  [Scala 2.10 with akka 2.2]
 
-  d7ab87e Fri Nov 22 10:27:16 2013 +0800
-  Merge pull request #193 from aoiwelle/patch-1
-  [Fix Kryo Serializer buffer documentation inconsistency]
+  76677b8 Thu Sep 26 14:03:46 2013 -0700
+  Merge pull request #670 from jey/ec2-ssh-improvements
+  [EC2 SSH improvements]
 
-  d7c6a00 Fri Nov 22 10:13:37 2013 +0800
-  Merge pull request #196 from pwendell/master
-  [TimeTrackingOutputStream should pass on calls to close() and flush().]
+  c514cd1 Thu Sep 26 13:48:20 2013 -0700
+  Merge pull request #930 from holdenk/master
+  [Add mapPartitionsWithIndex]
 
-  37126e8 Sun Nov 17 18:53:16 2013 -0800
-  Merge pull request #174 from ahirreddy/master
-  [Write Spark UI url to driver file on HDFS]
+  560ee5c Thu Sep 26 11:27:34 2013 -0700
+  Merge pull request #7 from wannabeast/memorystore-fixes
+  [some minor fixes to MemoryStore]
 
-  8823057 Sun Nov 17 18:53:04 2013 -0800
-  Merge pull request #166 from ahirreddy/simr-spark-ui
-  [SIMR Backend Scheduler will now write Spark UI URL to HDFS, which is to ...]
+  6566a19 Thu Sep 26 08:01:04 2013 -0700
+  Merge pull request #9 from rxin/limit
+  [Smarter take/limit implementation.]
 
-  e134ed5 Sun Nov 17 18:51:19 2013 -0800
-  Merge pull request #137 from tgravescs/sparkYarnJarsHdfsRebase
-  [Allow spark on yarn to be run from HDFS.]
+  834686b Sun Sep 22 15:06:48 2013 -0700
+  Merge pull request #928 from jerryshao/fairscheduler-refactor
+  [Refactor FairSchedulableBuilder]
 
-  af98fbc Sun Nov 17 18:50:23 2013 -0800
-  Merge pull request #165 from NathanHowell/kerberos-master
-  [spark-assembly.jar fails to authenticate with YARN ResourceManager]
+  a2ea069 Sat Sep 21 23:04:42 2013 -0700
+  Merge pull request #937 from jerryshao/localProperties-fix
+  [Fix PR926 local properties issues in Spark Streaming like scenarios]
 
-  a64397b Sun Nov 17 18:49:40 2013 -0800
-  Merge pull request #153 from ankurdave/stop-spot-cluster
-  [Enable stopping and starting a spot cluster]
+  f06f2da Sat Sep 21 22:43:34 2013 -0700
+  Merge pull request #941 from ilikerps/master
+  [Add "org.apache." prefix to packages in spark-class]
 
-  6c60768 Sun Nov 17 18:46:40 2013 -0800
-  Merge pull request #160 from xiajunluan/JIRA-923
-  [Fix bug JIRA-923]
+  7bb12a2 Sat Sep 21 22:42:46 2013 -0700
+  Merge pull request #940 from ankurdave/clear-port-properties-after-tests
+  [After unit tests, clear port properties unconditionally]
 
-  f0d350a Sun Nov 17 18:44:49 2013 -0800
-  Merge pull request #175 from kayousterhout/no_retry_not_serializable
-  [Don't retry tasks when they fail due to a NotSerializableException]
+  a00317b Fri Sep 20 11:29:31 2013 -0700
+  Merge pull request #1 from ankurdave/aggregateNeighbors-returns-graph
+  [Return Graph from Graph.aggregateNeighbors]
 
-  9d56371 Sun Nov 17 18:43:59 2013 -0800
-  Merge pull request #173 from kayousterhout/scheduler_hang
-  [Fix bug where scheduler could hang after task failure.]
+  6a5e665 Thu Sep 19 22:41:44 2013 -0700
+  Merge pull request #3 from ankurdave/clear-port-properties-after-tests
+  [After unit tests, clear port properties unconditionally]
 
-  24e238b Sun Nov 17 18:42:41 2013 -0800
-  Merge pull request #182 from rxin/vector
-  [Slightly enhanced PrimitiveVector:]
+  68ad33a Thu Sep 19 21:30:27 2013 -0700
+  Merge pull request #2 from ankurdave/package-fixes
+  [Package fixes (spark.graph -> org.apache.spark.graph)]
 
-  e7927ad Thu Nov 14 10:32:27 2013 -0800
-  Merge pull request #169 from kayousterhout/mesos_fix
-  [Don't ignore spark.cores.max when using Mesos Coarse mode]
+  cd7222c Thu Sep 19 14:21:24 2013 -0700
+  Merge pull request #938 from ilikerps/master
+  [Fix issue with spark_ec2 seeing empty security groups]
 
-  41dc566 Thu Nov 14 10:30:57 2013 -0800
-  Merge pull request #170 from liancheng/hadooprdd-doc-typo
-  [Fixed a scaladoc typo in HadoopRDD.scala]
+  e0dd24d Sat Aug 31 17:54:15 2013 -0700
+  Merge pull request #879 from AndreSchumacher/scala-2.10
+  [PySpark: replacing class manifest by class tag for Scala 2.10.2 in rdd.py]
 
-  333859f Thu Nov 14 10:26:27 2013 -0800
-  Merge pull request #171 from RIA-pierre-borckmans/master
-  [Fixed typos in the CDH4 distributions version codes.]
+  ad61349 Thu Jul 18 13:53:48 2013 -0700
+  Merge pull request #709 from ScrapCodes/scala-2.10
+  [Fixed warnings in scala 2.10 branch.]
 
-  c856651 Tue Nov 12 10:22:18 2013 -0800
-  Merge pull request #164 from tdas/kafka-fix
-  [Made block generator thread safe to fix Kafka bug.]
+  a289ded Mon Jul 15 15:59:43 2013 -0700
+  Merge pull request #700 from ScrapCodes/scala-2.10
+  [Scala 2.10 ]
 
-  30786c6 Sun Nov 10 11:58:58 2013 -0800
-  Merge pull request #157 from rxin/kryo
-  [3 Kryo related changes.]
+  1044a95 Fri Jun 14 20:04:24 2013 -0700
+  Merge pull request #652 from ScrapCodes/scala-2.10
+  [Fixed maven build without netty fix]
 
-  5ce6c75 Sat Nov 9 22:31:59 2013 -0800
-  Merge pull request #147 from JoshRosen/fix-java-api-completeness-checker
-  [Add spark-tools assembly to spark-class'ss classpath]
+  4b57f83 Sat Apr 20 10:40:07 2013 -0700
+  Merge pull request #535 from ScrapCodes/scala-2.10-repl-port
+  [porting of repl to scala-2.10]
 
-  1d52b50 Sat Nov 9 15:47:40 2013 -0800
-  Merge pull request #149 from tgravescs/fixSecureHdfsAccess
-  [Fix secure hdfs access for spark on yarn]
+  73b3fee Sun Jan 20 10:11:49 2013 -0800
+  Merge pull request #388 from folone/master
+  [Updated maven build configuration for Scala 2.10]
 
-  32a0c4f Sat Nov 9 15:40:54 2013 -0800
-  Merge pull request #155 from rxin/jobgroup
-  [Don't reset job group when a new job description is set.]
+  20adf27 Tue Jan 15 11:03:49 2013 -0800
+  Merge pull request #371 from folone/master
+  [Scala 2.10.0]
 
-  07ae524 Sat Nov 9 11:56:46 2013 -0800
-  Merge pull request #152 from rxin/repl
-  [Propagate SparkContext local properties from spark-repl caller thread to the repl execution thread.]
+Release 0.8.0-incubating
 
-  a5916b9 Thu Nov 7 11:08:44 2013 -0800
-  Merge pull request #148 from squito/include_appId
-  [Include appId in executor cmd line args]
+  2aff798 Sun Sep 15 14:05:04 2013 -0700
+  Merge pull request #933 from jey/yarn-typo-fix
+  [Fix typo in Maven build docs]
 
-  d5ae953 Wed Nov 6 23:23:12 2013 -0800
-  Merge pull request #23 from jerryshao/multi-user
-  [Add Spark multi-user support for standalone mode and Mesos]
+  dbd2c4f Sun Sep 15 13:20:41 2013 -0700
+  Merge pull request #932 from pwendell/mesos-version
+  [Bumping Mesos version to 0.13.0]
 
-  1d9412b Wed Nov 6 13:28:02 2013 -0800
-  Merge pull request #144 from liancheng/runjob-clean
-  [Removed unused return value in SparkContext.runJob]
+  9fb0b9d Sun Sep 15 13:02:53 2013 -0700
+  Merge pull request #931 from pwendell/yarn-docs
+  [Explain yarn.version in Maven build docs]
 
-  c8e0c0d Wed Nov 6 09:40:00 2013 -0800
-  Merge pull request #145 from aarondav/sls-fix
-  [Attempt to fix SparkListenerSuite breakage]
+  c4c1db2 Fri Sep 13 19:52:12 2013 -0700
+  Merge pull request #929 from pwendell/master
+  [Use different Hadoop version for YARN artifacts.]
 
-  0f62786 Tue Nov 5 23:14:28 2013 -0800
-  Merge pull request #143 from rxin/scheduler-hang
-  [Ignore a task update status if the executor doesn't exist anymore.]
+  a310de6 Wed Sep 11 19:36:11 2013 -0700
+  Merge pull request #926 from kayousterhout/dynamic
+  [Changed localProperties to use ThreadLocal (not DynamicVariable).]
 
-  96670e7 Tue Nov 5 10:43:10 2013 -0800
-  Merge pull request #140 from aarondav/merge-75
-  [Manually merge BlockManager refactor #75]
+  58c7d8b Wed Sep 11 17:33:42 2013 -0700
+  Merge pull request #927 from benh/mesos-docs
+  [Updated Spark on Mesos documentation.]
 
-  0848167 Tue Nov 5 10:42:36 2013 -0800
-  Merge pull request #142 from liancheng/dagscheduler-pattern-matching
-  [Using case class deep match to simplify code in DAGScheduler.processEvent]
+  91a59e6 Wed Sep 11 10:21:48 2013 -0700
+  Merge pull request #919 from mateiz/jets3t
+  [Add explicit jets3t dependency, which is excluded in hadoop-client]
 
-  07b3f01 Mon Nov 4 23:32:56 2013 -0800
-  Merge pull request #75 from JoshRosen/block-manager-cleanup
-  [Code de-duplication in BlockManager]
+  b9128d3 Wed Sep 11 10:03:06 2013 -0700
+  Merge pull request #922 from pwendell/port-change
+  [Change default port number from 3030 to 4030.]
 
-  e80d1cf Mon Nov 4 20:47:44 2013 -0800
-  Merge pull request #139 from aarondav/shuffle-next
-  [Never store shuffle blocks in BlockManager]
+  e07eef8 Wed Sep 11 07:35:39 2013 -0700
+  Merge pull request #925 from davidmccauley/master
+  [SPARK-894 - Not all WebUI fields delivered VIA JSON]
 
-  518cf22 Mon Nov 4 18:21:27 2013 -0800
-  Merge pull request #128 from shimingfei/joblogger-doc
-  [add javadoc to JobLogger, and some small fix]
+  8432f27 Tue Sep 10 23:19:53 2013 -0700
+  Merge pull request #923 from haoyuan/master
+  [fix run-example script]
 
-  7e00dee Mon Nov 4 17:54:35 2013 -0800
-  Merge pull request #130 from aarondav/shuffle
-  [Memory-optimized shuffle file consolidation]
+  d40f140 Tue Sep 10 23:05:29 2013 -0700
+  Merge pull request #921 from pwendell/master
+  [Fix HDFS access bug with assembly build.]
 
-  1d11e43 Mon Nov 4 16:30:30 2013 -0800
-  Merge pull request #138 from marmbrus/branch-0.8
-  [Fix build break caused by BlockInfo refactor]
+  0a6c051 Mon Sep 9 23:37:57 2013 -0700
+  Merge pull request #918 from pwendell/branch-0.8
+  [Update versions for 0.8.0 release.]
 
-  a3544ee Sun Nov 3 23:49:19 2013 -0800
-  Merge pull request #70 from rxin/hash1
-  [Fast, memory-efficient hash set, hash table implementations optimized for primitive data types.]
+  8c14f4b Mon Sep 9 22:07:58 2013 -0700
+  Merge pull request #917 from pwendell/master
+  [Document libgfortran dependency for MLBase]
 
-  e094daf Sun Nov 3 23:48:40 2013 -0800
-  Merge pull request #129 from velvia/2013-11/document-local-uris
-  [Document & finish support for local: URIs]
+  c81377b Mon Sep 9 20:16:19 2013 -0700
+  Merge pull request #915 from ooyala/master
+  [Get rid of / improve ugly NPE when Utils.deleteRecursively() fails]
 
-  a9e7787 Sun Nov 3 23:48:26 2013 -0800
-  Merge pull request #125 from velvia/2013-10/local-jar-uri
-  [Add support for local:// URI scheme for addJars()]
+  61d2a01 Mon Sep 9 18:21:01 2013 -0700
+  Merge pull request #916 from mateiz/mkdist-fix
+  [Fix copy issue in https://github.com/mesos/spark/pull/899]
 
-  57fdb3f Sun Nov 3 23:46:18 2013 -0800
-  Merge pull request #117 from stephenh/avoid_concurrent_modification_exception
-  [Handle ConcurrentModificationExceptions in SparkContext init.]
+  a85758c Mon Sep 9 13:45:40 2013 -0700
+  Merge pull request #907 from stephenh/document_coalesce_shuffle
+  [Add better docs for coalesce.]
 
-  ec0e4f0 Sun Nov 3 23:45:23 2013 -0800
-  Merge pull request #124 from tgravescs/sparkHadoopUtilFix
-  [Pull SparkHadoopUtil out of SparkEnv (jira SPARK-886)]
+  084fc36 Mon Sep 9 12:01:35 2013 -0700
+  Merge pull request #912 from tgravescs/ganglia-pom
+  [Add metrics-ganglia to core pom file]
 
-  ba0e858 Sun Nov 3 23:43:48 2013 -0800
-  Merge pull request #126 from kayousterhout/local_fix
-  [Fixed incorrect log message in local scheduler]
+  0456384 Mon Sep 9 09:57:54 2013 -0700
+  Merge pull request #911 from pwendell/ganglia-sink
+  [Adding Manen dependency for Ganglia]
 
-  504fe74 Sun Nov 3 23:43:03 2013 -0800
-  Merge pull request #118 from JoshRosen/blockinfo-memory-usage
-  [Reduce the memory footprint of BlockInfo objects]
+  bf984e2 Sun Sep 8 23:50:24 2013 -0700
+  Merge pull request #890 from mridulm/master
+  [Fix hash bug]
 
-  25fa229 Sun Nov 3 23:41:19 2013 -0800
-  Merge pull request #112 from kayousterhout/ui_task_attempt_id
-  [Display both task ID and task attempt ID in UI, and rename taskId to taskAttemptId]
+  e9d4f44 Sun Sep 8 23:36:48 2013 -0700
+  Merge pull request #909 from mateiz/exec-id-fix
+  [Fix an instance where full standalone mode executor IDs were passed to]
+
+  2447b1c Sun Sep 8 22:27:49 2013 -0700
+  Merge pull request #910 from mateiz/ml-doc-tweaks
+  [Small tweaks to MLlib docs]
+
+  7d3204b Sun Sep 8 21:39:12 2013 -0700
+  Merge pull request #905 from mateiz/docs2
+  [Job scheduling and cluster mode docs]
+
+  f1f8371 Sun Sep 8 21:26:11 2013 -0700
+  Merge pull request #896 from atalwalkar/master
+  [updated content]
+
+  f68848d Sun Sep 8 18:32:16 2013 -0700
+  Merge pull request #906 from pwendell/ganglia-sink
+  [Clean-up of Metrics Code/Docs and Add Ganglia Sink]
+
+  0b95799 Sun Sep 8 15:30:16 2013 -0700
+  Merge pull request #908 from pwendell/master
+  [Fix target JVM version in scala build]
+
+  04cfb3a Sun Sep 8 10:33:20 2013 -0700
+  Merge pull request #898 from ilikerps/660
+  [SPARK-660: Add StorageLevel support in Python]
+
+  38488ac Sun Sep 8 00:28:53 2013 -0700
+  Merge pull request #900 from pwendell/cdh-docs
+  [Provide docs to describe running on CDH/HDP cluster.]
+
+  a8e376e Sat Sep 7 21:16:01 2013 -0700
+  Merge pull request #904 from pwendell/master
+  [Adding Apache license to two files]
+
+  cfde85e Sat Sep 7 13:53:08 2013 -0700
+  Merge pull request #901 from ooyala/2013-09/0.8-doc-changes
+  [0.8 Doc changes for make-distribution.sh]
+
+  4a7813a Sat Sep 7 13:52:24 2013 -0700
+  Merge pull request #903 from rxin/resulttask
+  [Fixed the bug that ResultTask was not properly deserializing outputId.]
+
+  afe46ba Sat Sep 7 07:28:51 2013 -0700
+  Merge pull request #892 from jey/fix-yarn-assembly
+  [YARN build fixes]
+
+  2eebeff Fri Sep 6 15:25:22 2013 -0700
+  Merge pull request #897 from pwendell/master
+  [Docs describing Spark monitoring and instrumentation]
+
+  ddcb9d3 Thu Sep 5 23:54:09 2013 -0700
+  Merge pull request #895 from ilikerps/821
+  [SPARK-821: Don't cache results when action run locally on driver]
+
+  699c331 Thu Sep 5 20:21:53 2013 -0700
+  Merge pull request #891 from xiajunluan/SPARK-864
+  [[SPARK-864]DAGScheduler Exception if we delete Worker and StandaloneExecutorBackend then add Worker]
+
+  5c7494d Wed Sep 4 22:47:03 2013 -0700
+  Merge pull request #893 from ilikerps/master
+  [SPARK-884: Add unit test to validate Spark JSON output]
+
+  a547866 Wed Sep 4 21:11:56 2013 -0700
+  Merge pull request #894 from c0s/master
+  [Updating assembly README to reflect recent changes in the build.]
+
+  19f7027 Tue Sep 3 14:29:10 2013 -0700
+  Merge pull request #878 from tgravescs/yarnUILink
+  [Link the Spark UI up to the Yarn UI ]
+
+  68df246 Tue Sep 3 13:01:17 2013 -0700
+  Merge pull request #889 from alig/master
+  [Return the port the WebUI is bound to (useful if port 0 was used)]
+
+  d3dd48f Mon Sep 2 16:44:54 2013 -0700
+  Merge pull request #887 from mateiz/misc-fixes
+  [Miscellaneous fixes for 0.8]
+
+  636fc0c Mon Sep 2 11:20:39 2013 -0700
+  Merge pull request #886 from mateiz/codec
+  [Fix spark.io.compression.codec and change default codec to LZF]
+
+  d9a53b9 Sun Sep 1 22:12:30 2013 -0700
+  Merge pull request #885 from mateiz/win-py
+  [Allow PySpark to run on Windows]
+
+  3c520fe Sun Sep 1 17:26:55 2013 -0700
+  Merge pull request #884 from mateiz/win-fixes
+  [Run script fixes for Windows after package & assembly change]
+
+  f957c26 Sun Sep 1 14:53:57 2013 -0700
+  Merge pull request #882 from mateiz/package-rename
+  [Rename spark package to org.apache.spark]
+
+  a30fac1 Sun Sep 1 12:27:50 2013 -0700
+  Merge pull request #883 from alig/master
+  [Don't require the spark home environment variable to be set for standalone mode (change needed by SIMR)]
+
+  03cc765 Sun Sep 1 10:20:56 2013 -0700
+  Merge pull request #881 from pwendell/master
+  [Extend QuickStart to include next steps]
+
+  0e9565a Sat Aug 31 18:55:41 2013 -0700
+  Merge pull request #880 from mateiz/ui-tweaks
+  [Various UI tweaks]
+
+  2b29a1d Sat Aug 31 17:49:45 2013 -0700
+  Merge pull request #877 from mateiz/docs
+  [Doc improvements for 0.8]
+
+  6edef9c Sat Aug 31 13:39:24 2013 -0700
+  Merge pull request #861 from AndreSchumacher/pyspark_sampling_function
+  [Pyspark sampling function]
+
+  fd89835 Sat Aug 31 13:18:12 2013 -0700
+  Merge pull request #870 from JoshRosen/spark-885
+  [Don't send SIGINT / ctrl-c to Py4J gateway subprocess]
+
+  618f0ec Fri Aug 30 18:17:13 2013 -0700
+  Merge pull request #869 from AndreSchumacher/subtract
+  [PySpark: implementing subtractByKey(), subtract() and keyBy()]
+
+  94bb7fd Fri Aug 30 12:05:13 2013 -0700
+  Merge pull request #876 from mbautin/master_hadoop_rdd_conf
+  [Make HadoopRDD's configuration accessible]
+
+  9e17e45 Fri Aug 30 00:22:53 2013 -0700
+  Merge pull request #875 from shivaram/build-fix
+  [Fix broken build by removing addIntercept]
+
+  016787d Thu Aug 29 22:15:14 2013 -0700
+  Merge pull request #863 from shivaram/etrain-ridge
+  [Adding linear regression and refactoring Ridge regression to use SGD]
+
+  852d810 Thu Aug 29 22:13:15 2013 -0700
+  Merge pull request #819 from shivaram/sgd-cleanup
+  [Change SVM to use {0,1} labels]
+
+  ca71620 Thu Aug 29 21:51:14 2013 -0700
+  Merge pull request #857 from mateiz/assembly
+  [Change build and run instructions to use assemblies]
+
+  1528776 Thu Aug 29 21:30:47 2013 -0700
+  Merge pull request #874 from jerryshao/fix-report-bug
+  [Fix removed block zero size log reporting]
+
+  abdbacf Wed Aug 28 21:11:31 2013 -0700
+  Merge pull request #871 from pwendell/expose-local
+  [Expose `isLocal` in SparkContext.]
+
+  afcade3 Wed Aug 28 20:15:40 2013 -0700
+  Merge pull request #873 from pwendell/master
+  [Hot fix for command runner]
+
+  baa84e7 Wed Aug 28 12:44:46 2013 -0700
+  Merge pull request #865 from tgravescs/fixtmpdir
+  [Spark on Yarn should use yarn approved directories for spark.local.dir and tmp]
+
+  cd043cf Tue Aug 27 19:50:32 2013 -0700
+  Merge pull request #867 from tgravescs/yarnenvconfigs
+  [Spark on Yarn allow users to specify environment variables ]
+
+  898da7e Mon Aug 26 20:40:49 2013 -0700
+  Merge pull request #859 from ianbuss/sbt_opts
+  [Pass SBT_OPTS environment through to sbt_launcher]
+
+  17bafea Mon Aug 26 11:59:32 2013 -0700
+  Merge pull request #864 from rxin/json1
+  [Revert json library change]
+
+  f9fc5c1 Sat Aug 24 15:19:56 2013 -0700
+  Merge pull request #603 from pwendell/ec2-updates
+  [Several Improvements to EC2 Scripts]
+
+  d282c1e Fri Aug 23 11:20:20 2013 -0700
+  Merge pull request #860 from jey/sbt-ide-fixes
+  [Fix IDE project generation under SBT]
+
+  5a6ac12 Thu Aug 22 22:08:03 2013 -0700
+  Merge pull request #701 from ScrapCodes/documentation-suggestions
+  [Documentation suggestions for spark streaming.]
+
+  46ea0c1 Thu Aug 22 15:57:28 2013 -0700
+  Merge pull request #814 from holdenk/master
+  [Create less instances of the random class during ALS initialization.]
+
+  9ac3d62 Thu Aug 22 15:51:10 2013 -0700
+  Merge pull request #856 from jey/sbt-fix-hadoop-0.23.9
+  [Re-add removed dependency to fix build under Hadoop 0.23.9]
+
+  ae8ba83 Thu Aug 22 10:14:54 2013 -0700
+  Merge pull request #855 from jey/update-build-docs
+  [Update build docs]
+
+  8a36fd0 Thu Aug 22 10:13:35 2013 -0700
+  Merge pull request #854 from markhamstra/pomUpdate
+  [Synced sbt and maven builds to use the same dependencies, etc.]
+
+  c2d00f1 Thu Aug 22 10:13:03 2013 -0700
+  Merge pull request #832 from alig/coalesce
+  [Coalesced RDD with locality]
+
+  e6d66c8 Wed Aug 21 17:44:31 2013 -0700
+  Merge pull request #853 from AndreSchumacher/double_rdd
+  [Implementing SPARK-838: Add DoubleRDDFunctions methods to PySpark]
+
+  2905611 Tue Aug 20 17:36:14 2013 -0700
+  Merge pull request #851 from markhamstra/MutablePairTE
+  [Removed meaningless types]
+
+  d61337f Tue Aug 20 10:06:06 2013 -0700
+  Merge pull request #844 from markhamstra/priorityRename
+  [Renamed 'priority' to 'jobId' and assorted minor changes]
+
+  8cae72e Mon Aug 19 23:40:04 2013 -0700
+  Merge pull request #828 from mateiz/sched-improvements
+  [Scheduler fixes and improvements]
+
+  efeb142 Mon Aug 19 19:23:50 2013 -0700
+  Merge pull request #849 from mateiz/web-fixes
+  [Small fixes to web UI]
+
+  abdc1f8 Mon Aug 19 18:30:56 2013 -0700
+  Merge pull request #847 from rxin/rdd
+  [Allow subclasses of Product2 in all key-value related classes]
+
+  8fa0747 Sun Aug 18 17:02:54 2013 -0700
+  Merge pull request #840 from AndreSchumacher/zipegg
+  [Implementing SPARK-878 for PySpark: adding zip and egg files to context ...]
+
+  1e137a5 Sat Aug 17 22:22:32 2013 -0700
+  Merge pull request #846 from rxin/rdd
+  [Two minor RDD refactoring]
+
+  e89ffc7 Fri Aug 16 14:02:34 2013 -0700
+  Merge pull request #839 from jegonzal/zip_partitions
+  [Currying RDD.zipPartitions ]
+
+  1fb1b09 Thu Aug 15 22:15:05 2013 -0700
+  Merge pull request #841 from rxin/json
+  [Use the JSON formatter from Scala library and removed dependency on lift-json.]
+
+  c69c489 Thu Aug 15 20:55:09 2013 -0700
+  Merge pull request #843 from Reinvigorate/bug-879
+  [fixing typo in conf/slaves]
+
+  230ab27 Thu Aug 15 17:45:17 2013 -0700
+  Merge pull request #834 from Daemoen/master
+  [Updated json output to allow for display of worker state]
+
+  659553b Thu Aug 15 16:56:31 2013 -0700
+  Merge pull request #836 from pwendell/rename
+  [Rename `memoryBytesToString` and `memoryMegabytesToString`]
+
+  28369ff Thu Aug 15 16:44:02 2013 -0700
+  Merge pull request #829 from JoshRosen/pyspark-unit-tests-python-2.6
+  [Fix PySpark unit tests on Python 2.6]
+
+  1a13460 Thu Aug 15 15:50:44 2013 -0700
+  Merge pull request #833 from rxin/ui
+  [Various UI improvements.]
+
+  044a088 Wed Aug 14 20:43:49 2013 -0700
+  Merge pull request #831 from rxin/scheduler
+  [A few small scheduler / job description changes.]
+
+  839f2d4 Wed Aug 14 16:17:23 2013 -0700
+  Merge pull request #822 from pwendell/ui-features
+  [Adding GC Stats to TaskMetrics (and three small fixes)]
+
+  63446f9 Wed Aug 14 00:17:07 2013 -0700
+  Merge pull request #826 from kayousterhout/ui_fix
+  [Fixed 2 bugs in executor UI (incl. SPARK-877)]
+
+  3f14cba Tue Aug 13 20:09:51 2013 -0700
+  Merge pull request #825 from shivaram/maven-repl-fix
+  [Set SPARK_CLASSPATH for maven repl tests]
+
+  596adc6 Tue Aug 13 19:41:34 2013 -0700
+  Merge pull request #824 from mateiz/mesos-0.12.1
+  [Update to Mesos 0.12.1]
+
+  d316af9 Tue Aug 13 15:31:01 2013 -0700
+  Merge pull request #821 from pwendell/print-launch-command
+  [Print run command to stderr rather than stdout]
+
+  1f79d21 Tue Aug 13 15:23:54 2013 -0700
+  Merge pull request #818 from kayousterhout/killed_fix
+  [Properly account for killed tasks.]
+
+  622f83c Tue Aug 13 09:58:52 2013 -0700
+  Merge pull request #817 from pwendell/pr_784
+  [Minor clean-up in metrics servlet code]
+
+  a0133bf Tue Aug 13 09:28:18 2013 -0700
+  Merge pull request #784 from jerryshao/dev-metrics-servlet
+  [Add MetricsServlet for Spark metrics system]
+
+  e2fdac6 Mon Aug 12 21:26:59 2013 -0700
+  Merge pull request #802 from stayhf/SPARK-760-Python
+  [Simple PageRank algorithm implementation in Python for SPARK-760]
+
+  d3525ba Mon Aug 12 21:02:39 2013 -0700
+  Merge pull request #813 from AndreSchumacher/add_files_pyspark
+  [Implementing SPARK-865: Add the equivalent of ADD_JARS to PySpark]
+
+  9e02da2 Mon Aug 12 20:22:27 2013 -0700
+  Merge pull request #812 from shivaram/maven-mllib-tests
+  [Create SparkContext in beforeAll for MLLib tests]
+
+  65d0d91 Mon Aug 12 19:00:57 2013 -0700
+  Merge pull request #807 from JoshRosen/guava-optional
+  [Change scala.Option to Guava Optional in Java APIs]
+
+  4346f0a Mon Aug 12 12:12:12 2013 -0700
+  Merge pull request #809 from shivaram/sgd-cleanup
+  [Clean up scaladoc in ML Lib.]
+
+  ea1b4ba Mon Aug 12 08:09:58 2013 -0700
+  Merge pull request #806 from apivovarov/yarn-205
+  [Changed yarn.version to 2.0.5 in pom.xml]
+
+  2a39d2c Sun Aug 11 20:35:09 2013 -0700
+  Merge pull request #810 from pwendell/dead_doc_code
+  [Remove now dead code inside of docs]
+
+  e5b9ed2 Sun Aug 11 17:22:47 2013 -0700
+  Merge pull request #808 from pwendell/ui_compressed_bytes
+  [Report compressed bytes read when calculating TaskMetrics]
+
+  3796486 Sun Aug 11 14:51:47 2013 -0700
+  Merge pull request #805 from woggle/hadoop-rdd-jobconf
+  [Use new Configuration() instead of slower new JobConf() in SerializableWritable]
+
+  ff9ebfa Sun Aug 11 10:52:55 2013 -0700
+  Merge pull request #762 from shivaram/sgd-cleanup
+  [Refactor SGD options into a new class.]
+
+  95c62ca Sun Aug 11 10:30:52 2013 -0700
+  Merge pull request #804 from apivovarov/master
+  [Fixed path to JavaALS.java and JavaKMeans.java, fixed hadoop2-yarn profi...]
+
+  06e4f2a Sat Aug 10 18:06:23 2013 -0700
+  Merge pull request #789 from MLnick/master
+  [Adding Scala version of PageRank example]
+
+  71c63de Sat Aug 10 10:21:20 2013 -0700
+  Merge pull request #795 from mridulm/master
+  [Fix bug reported in PR 791 : a race condition in ConnectionManager and Connection]
+
+  d17eeb9 Sat Aug 10 09:02:27 2013 -0700
+  Merge pull request #785 from anfeng/master
+  [expose HDFS file system stats via Executor metrics]
+
+  dce5e47 Fri Aug 9 21:53:45 2013 -0700
+  Merge pull request #800 from dlyubimov/HBASE_VERSION
+  [Pull HBASE_VERSION in the head of sbt build]
+
+  cd247ba Fri Aug 9 20:41:13 2013 -0700
+  Merge pull request #786 from shivaram/mllib-java
+  [Java fixes, tests and examples for ALS, KMeans]
+
+  b09d4b7 Fri Aug 9 13:17:08 2013 -0700
+  Merge pull request #799 from woggle/sync-fix
+  [Remove extra synchronization in ResultTask]
+
+  0bc63bf Fri Aug 9 13:16:25 2013 -0700
+  Merge pull request #801 from pwendell/print-launch-command
+  [Print launch command [Branch 0.8 version]]
+
+  cc6b92e Fri Aug 9 13:00:33 2013 -0700
+  Merge pull request #775 from pwendell/print-launch-command
+  [Log the launch command for Spark daemons]
+
+  f94fc75 Fri Aug 9 10:04:03 2013 -0700
+  Merge pull request #788 from shane-huang/sparkjavaopts
+  [For standalone mode, add worker local env setting of SPARK_JAVA_OPTS as ...]
+
+  63b6e02 Thu Aug 8 14:02:02 2013 -0700
+  Merge pull request #797 from mateiz/chill-0.3.1
+  [Update to Chill 0.3.1]
 
-  57ea854 Sun Nov 3 23:38:37 2013 -0800
-  Merge pull request #102 from tdas/transform
-  [Added new Spark Streaming operations]
+  9955e5a Thu Aug 8 11:03:38 2013 -0700
+  Merge pull request #796 from pwendell/bootstrap-design
+  [Bootstrap re-design]
 
-  5b45c9b Sat Nov 2 14:42:22 2013 -0700
-  Merge pull request #133 from Mistobaan/link_fix
-  [update default github]
+  5133e4b Wed Aug 7 15:50:45 2013 -0700
+  Merge pull request #790 from kayousterhout/fix_throughput
+  [Fixed issue in UI that decreased scheduler throughput by 5x or more]
 
-  054d97b Sat Nov 2 14:37:35 2013 -0700
-  Merge pull request #134 from rxin/readme
-  [Fixed a typo in Hadoop version in README.]
+  3c8478e Tue Aug 6 23:25:03 2013 -0700
+  Merge pull request #747 from mateiz/improved-lr
+  [Update the Python logistic regression example]
 
-  87d4e1c Fri Nov 1 17:58:38 2013 -0700
-  Merge pull request #132 from Mistobaan/doc_fix
-  [fix persistent-hdfs]
+  6b043a6 Tue Aug 6 22:31:02 2013 -0700
+  Merge pull request #724 from dlyubimov/SPARK-826
+  [SPARK-826: fold(), reduce(), collect() always attempt to use java serialization]
 
-  3db505c Tue Oct 29 01:42:07 2013 -0400
-  Merge pull request #119 from soulmachine/master
-  [A little revise for the document]
+  de6c4c9 Tue Aug 6 17:09:50 2013 -0700
+  Merge pull request #787 from ash211/master
+  [Update spark-standalone.md]
 
-  abeca01 Sun Oct 27 22:11:39 2013 -0400
-  Merge pull request #115 from aarondav/shuffle-fix
-  [Eliminate extra memory usage when shuffle file consolidation is disabled]
+  df4d10d Tue Aug 6 15:44:05 2013 -0700
+  Merge pull request #779 from adatao/adatao-global-SparkEnv
+  [[HOTFIX] Extend thread safety for SparkEnv.get()]
 
-  79e5c50 Sat Oct 26 13:05:40 2013 -0700
-  Merge pull request #108 from alig/master
-  [Changes to enable executing by using HDFS as a synchronization point between driver and executors, as well as ensuring executors exit properly.]
+  d2b0f0c Tue Aug 6 14:49:39 2013 -0700
+  Merge pull request #770 from stayhf/SPARK-760-Java
+  [Simple PageRank algorithm implementation in Java for SPARK-760]
 
-  cb24278 Sat Oct 26 11:41:18 2013 -0700
-  Merge pull request #113 from pwendell/master
-  [Improve error message when multiple assembly jars are present.]
+  d031f73 Mon Aug 5 22:33:00 2013 -0700
+  Merge pull request #782 from WANdisco/master
+  [SHARK-94 Log the files computed by HadoopRDD and NewHadoopRDD]
 
-  3ec2f51 Sat Oct 26 11:39:29 2013 -0700
-  Merge pull request #111 from kayousterhout/ui_name
-  [Properly display the name of a stage in the UI.]
+  1b63dea Mon Aug 5 22:21:26 2013 -0700
+  Merge pull request #769 from markhamstra/NegativeCores
+  [SPARK-847 + SPARK-845: Zombie workers and negative cores]
 
-  7d47704 Sat Oct 26 11:36:23 2013 -0700
-  Merge pull request #114 from soulmachine/master
-  [A little revise for the document]
+  828aff7 Mon Aug 5 21:37:33 2013 -0700
+  Merge pull request #776 from gingsmith/master
+  [adding matrix factorization data generator]
 
-  148509f Thu Oct 24 22:32:23 2013 -0700
-  Merge pull request #109 from pwendell/master
-  [Adding Java/Java Streaming versions of `repartition` with associated tests]
+  8b27789 Mon Aug 5 19:14:52 2013 -0700
+  Merge pull request #774 from pwendell/job-description
+  [Show user-defined job name in UI]
 
-  c018c61 Thu Oct 24 17:27:28 2013 -0700
-  Merge pull request #97 from ewencp/pyspark-system-properties
-  [Add classmethod to SparkContext to set system properties.]
+  550b0cf Mon Aug 5 12:10:32 2013 -0700
+  Merge pull request #780 from cybermaster/master
+  [SPARK-850]
 
-  3500b66 Thu Oct 24 17:27:11 2013 -0700
-  Merge pull request #93 from kayousterhout/ui_new_state
-  [Show "GETTING_RESULTS" state in UI.]
+  22abbc1 Fri Aug 2 16:37:59 2013 -0700
+  Merge pull request #772 from karenfeng/ui-843
+  [Show app duration]
 
-  f5eff85 Thu Oct 24 17:26:44 2013 -0700
-  Merge pull request #83 from ewencp/pyspark-accumulator-add-method
-  [Add an add() method to pyspark accumulators.]
+  9d7dfd2 Thu Aug 1 17:41:58 2013 -0700
+  Merge pull request #743 from pwendell/app-metrics
+  [Add application metrics to standalone master]
 
-  59d6f06 Thu Oct 24 17:09:05 2013 -0700
-  Merge pull request #106 from pwendell/master
-  [Add a `repartition` operator.]
+  6d7afd7 Thu Aug 1 17:13:28 2013 -0700
+  Merge pull request #768 from pwendell/pr-695
+  [Minor clean-up of fair scheduler UI]
 
-  76f3c2f Wed Oct 23 18:03:26 2013 -0700
-  Merge pull request #103 from JoshRosen/unpersist-fix
-  [Add unpersist() to JavaDoubleRDD and JavaPairRDD.]
+  5e7b38f Thu Aug 1 14:59:33 2013 -0700
+  Merge pull request #695 from xiajunluan/pool_ui
+  [Enhance job ui in spark ui system with adding pool information]
 
-  534bab2 Tue Oct 22 16:02:45 2013 -0700
-  Merge pull request #100 from JoshRosen/spark-902
-  [Remove redundant Java Function call() definitions]
+  0a96493 Thu Aug 1 11:27:17 2013 -0700
+  Merge pull request #760 from karenfeng/heading-update
+  [Clean up web UI page headers]
 
-  fa9a0e4 Tue Oct 22 12:23:17 2013 -0700
-  Merge pull request #90 from pwendell/master
-  [SPARK-940: Do not directly pass Stage objects to SparkListener.]
+  cb7dd86 Thu Aug 1 11:06:10 2013 -0700
+  Merge pull request #758 from pwendell/master-json
+  [Add JSON path to master index page]
 
-  c449ee1 Tue Oct 22 00:00:35 2013 -0700
-  Merge pull request #92 from tgravescs/sparkYarnFixClasspath
-  [Fix the Worker to use CoarseGrainedExecutorBackend and modify classpath ...]
+  58756b7 Wed Jul 31 23:45:41 2013 -0700
+  Merge pull request #761 from mateiz/kmeans-generator
+  [Add data generator for K-means]
 
-  498cc6b Mon Oct 21 22:45:31 2013 -0700
-  Merge pull request #87 from aarondav/shuffle-base
-  [Basic shuffle file consolidation]
+  ecab635 Wed Jul 31 18:16:55 2013 -0700
+  Merge pull request #763 from c0s/assembly
+  [SPARK-842. Maven assembly is including examples libs and dependencies]
 
-  e3ad6a5 Mon Oct 21 20:20:42 2013 -0700
-  Revert "Merge pull request #94 from aarondav/mesos-fix"
-  [This reverts commit fe974ba1484dbe6fffdae8680b3b37b26de83145.]
+  39c75f3 Wed Jul 31 15:52:36 2013 -0700
+  Merge pull request #757 from BlackNiuza/result_task_generation
+  [Bug fix: SPARK-837]
 
-  fe974ba Mon Oct 21 20:15:30 2013 -0700
-  Merge pull request #94 from aarondav/mesos-fix
-  [Fix mesos urls]
+  b2b86c2 Wed Jul 31 15:51:39 2013 -0700
+  Merge pull request #753 from shivaram/glm-refactor
+  [Build changes for ML lib]
 
-  1c3f4bd Mon Oct 21 11:58:22 2013 -0700
-  Merge pull request #88 from rxin/clean
-  [Made the following traits/interfaces/classes non-public:]
+  14bf2fe Wed Jul 31 14:18:16 2013 -0700
+  Merge pull request #749 from benh/spark-executor-uri
+  [Added property 'spark.executor.uri' for launching on Mesos.]
 
-  1a50c79 Sun Oct 20 22:27:29 2013 -0700
-  Merge pull request #41 from pwendell/shuffle-benchmark
-  [Provide Instrumentation for Shuffle Write Performance]
+  4ba4c3f Wed Jul 31 13:14:49 2013 -0700
+  Merge pull request #759 from mateiz/split-fix
+  [Use the Char version of split() instead of the String one in MLUtils]
 
-  37a755c Sun Oct 20 21:04:33 2013 -0700
-  Merge pull request #89 from rxin/executor
-  [Don't setup the uncaught exception handler in local mode.]
+  a386ced Wed Jul 31 11:22:50 2013 -0700
+  Merge pull request #754 from rxin/compression
+  [Compression codec change]
 
-  ec74428 Sun Oct 20 11:46:14 2013 -0700
-  Merge pull request #84 from rxin/kill1
-  [Added documentation for setJobGroup. Also some minor cleanup in SparkContext.]
+  0be071a Wed Jul 31 11:11:59 2013 -0700
+  Merge pull request #756 from cdshines/patch-1
+  [Refactored Vector.apply(length, initializer) replacing excessive code with library method]
 
-  52d13a6 Sun Oct 20 10:50:54 2013 -0700
-  Merge pull request #85 from rxin/clean
-  [Moved the top level spark package object from spark to org.apache.spark]
+  d4556f4 Wed Jul 31 08:48:14 2013 -0700
+  Merge pull request #751 from cdshines/master
+  [Cleaned Partitioner & PythonPartitioner source by taking out non-related logic to Utils]
 
-  919c557 Sat Oct 19 11:29:00 2013 -0700
-  Merge pull request #79 from aarondav/scdefaults0.8
-  [Add deprecation warning to unrecognized MASTER urls]
+  29b8cd3 Tue Jul 30 21:30:33 2013 -0700
+  Merge pull request #755 from jerryshao/add-apache-header
+  [Add Apache license header to metrics system]
 
-  f3de2ce Fri Oct 18 23:20:16 2013 -0700
-  Merge pull request #76 from pwendell/master
-  [Clarify compression property.]
+  e87de03 Tue Jul 30 15:00:08 2013 -0700
+  Merge pull request #744 from karenfeng/bootstrap-update
+  [Use Bootstrap progress bars in web UI]
 
-  f181560 Fri Oct 18 23:14:28 2013 -0700
-  Merge pull request #68 from mosharaf/master
-  [Faster and stable/reliable broadcast]
+  ae57020 Tue Jul 30 14:56:41 2013 -0700
+  Merge pull request #752 from rxin/master
+  [Minor mllib cleanup]
 
-  eaa2150 Fri Oct 18 23:08:47 2013 -0700
-  Merge pull request #74 from rxin/kill
-  [Job cancellation via job group id.]
+  8aee118 Tue Jul 30 10:27:54 2013 -0700
+  Merge pull request #748 from atalwalkar/master
+  [made SimpleUpdater consistent with other updaters]
 
-  df21ac8 Thu Oct 17 18:38:46 2013 -0700
-  Merge pull request #69 from KarthikTunga/master
-  [Fix for issue SPARK-627. Implementing --config argument in the scripts.]
+  468a36c Mon Jul 29 19:44:33 2013 -0700
+  Merge pull request #746 from rxin/cleanup
+  [Internal cleanup]
 
-  b531552 Thu Oct 17 18:37:22 2013 -0700
-  Merge pull request #67 from kayousterhout/remove_tsl
-  [Removed TaskSchedulerListener interface.]
+  1e1ffb1 Mon Jul 29 19:26:19 2013 -0700
+  Merge pull request #745 from shivaram/loss-update-fix
+  [Remove duplicate loss history in Gradient Descent]
 
-  5a73ab7 Thu Oct 17 18:36:36 2013 -0700
-  Merge pull request #62 from harveyfeng/master
-  [Make TaskContext's stageId publicly accessible.]
+  c99b674 Mon Jul 29 16:32:55 2013 -0700
+  Merge pull request #735 from karenfeng/ui-807
+  [Totals for shuffle data and CPU time]
 
-  b6ce111 Thu Oct 17 18:35:33 2013 -0700
-  Merge pull request #61 from kayousterhout/daemon_thread
-  [Unified daemon thread pools]
+  fe7298b Mon Jul 29 14:01:00 2013 -0700
+  Merge pull request #741 from pwendell/usability
+  [Fix two small usability issues]
 
-  2760055 Thu Oct 17 18:34:56 2013 -0700
-  Merge pull request #59 from rxin/warning
-  [Bump up logging level to warning for failed tasks.]
+  c34c0f6 Mon Jul 29 13:18:10 2013 -0700
+  Merge pull request #731 from pxinghao/master
+  [Adding SVM and Lasso]
 
-  1e67234 Thu Oct 17 18:33:21 2013 -0700
-  Merge pull request #65 from tgravescs/fixYarn
-  [Fix yarn build]
+  f3d72ff Fri Jul 26 17:19:27 2013 -0700
+  Merge pull request #739 from markhamstra/toolsPom
+  [Missing tools/pom.xml scalatest dependency]
 
-  d0c9d41 Thu Oct 17 18:32:54 2013 -0700
-  Merge pull request #34 from kayousterhout/rename
-  [Renamed StandaloneX to CoarseGrainedX.]
+  cb36677 Fri Jul 26 16:59:30 2013 -0700
+  Merge pull request #738 from harsha2010/pruning
+  [Fix bug in Partition Pruning.]
 
-  cee3b43 Wed Oct 16 10:37:55 2013 -0700
-  Merge pull request #63 from pwendell/master
-  [Fixing spark streaming example and a bug in examples build.]
+  f3cf094 Thu Jul 25 14:53:21 2013 -0700
+  Merge pull request #734 from woggle/executor-env2
+  [Get more env vars from driver rather than worker]
 
-  1d92983 Tue Oct 15 10:52:15 2013 -0700
-  Merge pull request #58 from hsaputra/update-pom-asf
-  [Update pom.xml to use version 13 of the ASF parent pom]
+  51c2427 Thu Jul 25 00:03:11 2013 -0700
+  Merge pull request #732 from ryanlecompte/master
+  [Refactor Kryo serializer support to use chill/chill-java]
 
-  c50b016 Mon Oct 14 23:18:37 2013 -0700
-  Merge pull request #29 from rxin/kill
-  [Job killing]
+  52723b9 Wed Jul 24 14:33:02 2013 -0700
+  Merge pull request #728 from jey/examples-jar-env
+  [Fix setting of SPARK_EXAMPLES_JAR]
 
-  616ea6f Mon Oct 14 20:59:31 2013 -0700
-  Merge pull request #57 from aarondav/bid
-  [Refactor BlockId into an actual type]
+  20338c2 Wed Jul 24 14:32:24 2013 -0700
+  Merge pull request #729 from karenfeng/ui-811
+  [Stage Page updates]
 
-  e306de8 Mon Oct 14 20:58:48 2013 -0700
-  Merge pull request #53 from witgo/master
-  [Add a zookeeper compile dependency to fix build in maven]
+  5584ebc Wed Jul 24 11:46:46 2013 -0700
+  Merge pull request #675 from c0s/assembly
+  [Building spark assembly for further consumption of the Spark project with a deployed cluster]
 
-  d66c01f Mon Oct 14 20:58:17 2013 -0700
-  Merge pull request #19 from aarondav/master-zk
-  [Standalone Scheduler fault tolerance using ZooKeeper]
+  a73f3ee Wed Jul 24 08:59:14 2013 -0700
+  Merge pull request #671 from jerryshao/master
+  [Add metrics system for Spark]
 
-  0fcb234 Mon Oct 14 20:54:34 2013 -0700
-  Merge pull request #46 from mateiz/py-sort-update
-  [Fix PySpark docs and an overly long line of code after #38]
+  b011329 Tue Jul 23 22:50:09 2013 -0700
+  Merge pull request #727 from rxin/scheduler
+  [Scheduler code style cleanup.]
 
-  f94aa52 Mon Oct 14 20:53:15 2013 -0700
-  Merge pull request #44 from mateiz/fast-map
-  [A fast and low-memory append-only map for shuffle operations]
+  876125b Tue Jul 23 22:28:21 2013 -0700
+  Merge pull request #726 from rxin/spark-826
+  [SPARK-829: scheduler shouldn't hang if a task contains unserializable objects in its closure]
 
-  36f5b08 Mon Oct 14 20:50:28 2013 -0700
-  Merge pull request #33 from AndreSchumacher/pyspark_partition_key_change
-  [Fixing SPARK-602: PythonPartitioner]
+  2f1736c Tue Jul 23 15:53:30 2013 -0700
+  Merge pull request #725 from karenfeng/task-start
+  [Creates task start events]
 
-  d615b14 Mon Oct 14 20:48:45 2013 -0700
-  Merge pull request #32 from mridulm/master
-  [Address review comments, move to incubator spark]
+  5364f64 Tue Jul 23 13:40:34 2013 -0700
+  Merge pull request #723 from rxin/mllib
+  [Made RegressionModel serializable and added unit tests to make sure predict methods would work.]
 
-  7084217 Mon Oct 14 19:31:22 2013 -0700
-  Merge pull request #27 from davidmccauley/master
-  [SPARK-920/921 - JSON endpoint updates]
+  f369e0e Tue Jul 23 13:22:27 2013 -0700
+  Merge pull request #720 from ooyala/2013-07/persistent-rdds-api
+  [Add a public method getCachedRdds to SparkContext]
 
-  a35259a Mon Oct 14 19:28:46 2013 -0700
-  Merge pull request #26 from Du-Li/master
-  [fixed a wildcard bug in make-distribution.sh; ask sbt to check local]
+  401aac8 Mon Jul 22 16:57:16 2013 -0700
+  Merge pull request #719 from karenfeng/ui-808
+  [Creates Executors tab for Jobs UI]
 
-  62ce4ae Mon Oct 14 19:28:09 2013 -0700
-  Merge pull request #25 from CruncherBigData/master
-  [Update README: updated the link]
+  8ae1436 Mon Jul 22 16:03:04 2013 -0700
+  Merge pull request #722 from JoshRosen/spark-825
+  [Fix bug: DoubleRDDFunctions.sampleStdev() computed non-sample stdev()]
 
-  6961744 Mon Oct 14 18:55:41 2013 -0700
-  Merge pull request #10 from kayousterhout/results_through-bm
-  [Send Task results through the block manager when larger than Akka frame size (fixes SPARK-669).]
+  15fb394 Sun Jul 21 10:33:38 2013 -0700
+  Merge pull request #716 from c0s/webui-port
+  [Regression: default webui-port can't be set via command line "--webui-port" anymore]
 
-  ce364c0 Mon Oct 14 18:54:35 2013 -0700
-  Merge pull request #4 from MLnick/implicit-als
-  [Adding algorithm for implicit feedback data to ALS]
+  c40f0f2 Fri Jul 19 13:33:04 2013 -0700
+  Merge pull request #711 from shivaram/ml-generators
+  [Move ML lib data generator files to util/]
 
-  f94bd3f Mon Oct 14 15:10:59 2013 -0700
-  Merge pull request #28 from tgravescs/sparYarnAppName
-  [Allow users to set the application name for Spark on Yarn]
+  413b841 Fri Jul 19 13:31:38 2013 -0700
+  Merge pull request #717 from viirya/dev1
+  [Do not copy local jars given to SparkContext in yarn mode]
 
-  dc2c90d Mon Oct 14 15:03:34 2013 -0700
-  Merge pull request #38 from AndreSchumacher/pyspark_sorting
-  [SPARK-705: implement sortByKey() in PySpark]
+  0d0a47c Thu Jul 18 12:06:37 2013 -0700
+  Merge pull request #710 from shivaram/ml-updates
+  [Updates to LogisticRegression]
 
-  00a7551 Sat Oct 12 21:26:22 2013 -0700
-  Merge pull request #52 from harveyfeng/hadoop-closure
-  [Add an optional closure parameter to HadoopRDD instantiation to use when creating local JobConfs.]
+  c6235b5 Thu Jul 18 11:43:48 2013 -0700
+  Merge pull request #714 from adatao/master
+  [[BUGFIX]  Fix for sbt/sbt script SPARK_HOME setting]
 
-  5383a5a Sat Oct 12 21:26:11 2013 -0700
-  Merge pull request #20 from harveyfeng/hadoop-config-cache
-  [Allow users to pass broadcasted Configurations and cache InputFormats across Hadoop file reads.]
+  009c79e Thu Jul 18 11:41:52 2013 -0700
+  Merge pull request #715 from viirya/dev1
+  [fix a bug in build process that pulls in two versions of ASM.]
 
-  4a2e76a Fri Oct 11 16:09:07 2013 -0700
-  Merge pull request #54 from aoiwelle/remove_unused_imports
-  [Remove unnecessary mutable imports]
+  985a9e3 Wed Jul 17 22:27:19 2013 -0700
+  Merge pull request #712 from stayhf/SPARK-817
+  [Consistently invoke bash with /usr/bin/env bash in scripts to make code ...]
 
-  64fae16 Wed Oct 9 21:42:18 2013 -0700
-  Merge pull request #47 from xiliu82/branch-0.8
-  [add Function3 and WrappedFunction3]
+  cad48ed Tue Jul 16 21:41:28 2013 -0700
+  Merge pull request #708 from ScrapCodes/dependencies-upgrade
+  [Dependency upgrade Akka 2.0.3 -> 2.0.5]
 
-  dfc62e2 Wed Oct 9 16:55:58 2013 -0700
-  Merge pull request #49 from mateiz/kryo-fix-2
-  [Fix Chill serialization of Range objects]
+  8a8a8f2 Mon Jul 15 23:09:21 2013 -0700
+  Merge pull request #705 from rxin/errormessages
+  [Throw a more meaningful message when runJob is called to launch tasks on non-existent partitions.]
 
-  0b6f047 Wed Oct 9 16:53:31 2013 -0700
-  Merge pull request #50 from kayousterhout/SPARK-908
-  [Fix race condition in SparkListenerSuite (fixes SPARK-908).]
+  ed8415b Mon Jul 15 16:41:04 2013 -0700
+  Merge pull request #703 from karenfeng/ui-802
+  [Link to job UI from standalone deploy cluster web UI]
 
-  f930dd4 Tue Oct 8 22:58:35 2013 -0700
-  Merge pull request #43 from mateiz/kryo-fix
-  [Don't allocate Kryo buffers unless needed]
+  e3d3e6f Mon Jul 15 14:59:44 2013 -0700
+  Merge pull request #702 from karenfeng/ui-fixes
+  [Adds app name in HTML page titles on job web UI]
 
-  8e9bd93 Mon Oct 7 20:47:09 2013 -0700
-  Merge pull request #42 from pwendell/shuffle-read-perf
-  [Fix inconsistent and incorrect log messages in shuffle read path]
+  c7877d5 Sun Jul 14 12:58:13 2013 -0700
+  Merge pull request #689 from BlackNiuza/application_status
+  [Bug fix: SPARK-796]
 
-  f2cdcc4 Mon Oct 7 15:49:32 2013 -0700
-  Merge pull request #40 from pwendell/branch-0.8
-  [Adding Shark 0.7.1 to EC2 scripts [branch 0.8 version]]
+  10c0593 Sun Jul 14 11:45:18 2013 -0700
+  Merge pull request #699 from pwendell/ui-env
+  [Add `Environment` tab to SparkUI.]
 
-  023e3fd Mon Oct 7 10:47:45 2013 -0700
-  Merge pull request #31 from sundeepn/branch-0.8
-  [Resolving package conflicts with hadoop 0.23.9]
+  89e8549 Sat Jul 13 16:11:08 2013 -0700
+  Merge pull request #698 from Reinvigorate/sm-deps-change
+  [changing com.google.code.findbugs maven coordinates]
 
-  3cb9040 Thu Sep 26 14:37:06 2013 -0700
-  Merge pull request #17 from rxin/optimize
-  [Remove -optimize flag]
+  77c69ae Fri Jul 12 23:05:21 2013 -0700
+  Merge pull request #697 from pwendell/block-locations
+  [Show block locations in Web UI.]
 
-  35bcf32 Thu Sep 26 14:17:00 2013 -0700
-  Merge pull request #930 from holdenk/master
-  [Add mapPartitionsWithIndex]
+  5a7835c Fri Jul 12 20:28:21 2013 -0700
+  Merge pull request #691 from karenfeng/logpaging
+  [Create log pages]
 
-  976fe60 Thu Sep 26 14:16:17 2013 -0700
-  Merge pull request #14 from kayousterhout/untangle_scheduler
-  [Improved organization of scheduling packages.]
+  71ccca0 Fri Jul 12 20:25:06 2013 -0700
+  Merge pull request #696 from woggle/executor-env
+  [Pass executor env vars (e.g. SPARK_CLASSPATH) to compute-classpath.sh]
 
-  8cbc96b Thu Sep 26 13:16:05 2013 -0700
-  Merge pull request #7 from wannabeast/memorystore-fixes
-  [some minor fixes to MemoryStore]
+  90fc3f3 Fri Jul 12 20:23:36 2013 -0700
+  Merge pull request #692 from Reinvigorate/takeOrdered
+  [adding takeOrdered() to RDD]
 
-  240ca93 Thu Sep 26 13:12:06 2013 -0700
-  Merge pull request #9 from rxin/limit
-  [Smarter take/limit implementation.]
+  018d04c Thu Jul 11 12:48:37 2013 -0700
+  Merge pull request #684 from woggle/mesos-classloader
+  [Explicitly set class loader for MesosSchedulerDriver callbacks.]
 
-  a186792 Thu Sep 26 13:10:59 2013 -0700
-  Merge pull request #937 from jerryshao/localProperties-fix
-  [Fix PR926 local properties issues in Spark Streaming like scenarios]
+  bc19477 Wed Jul 10 22:29:41 2013 -0700
+  Merge pull request #693 from c0s/readme
+  [Updating README to reflect Scala 2.9.3 requirements]
 
-  f3c60c9 Thu Sep 26 13:10:24 2013 -0700
-  Merge pull request #941 from ilikerps/master
-  [Add "org.apache." prefix to packages in spark-class]
+  7dcda9a Mon Jul 8 23:24:23 2013 -0700
+  Merge pull request #688 from markhamstra/scalaDependencies
+  [Fixed SPARK-795 with explicit dependencies]
 
-  d5a8dbf Thu Sep 26 13:09:30 2013 -0700
-  Merge pull request #928 from jerryshao/fairscheduler-refactor
-  [Refactor FairSchedulableBuilder]
+  638927b Mon Jul 8 22:58:50 2013 -0700
+  Merge pull request #683 from shivaram/sbt-test-fix
+  [Remove some stack traces from sbt test output]
 
-Release 0.8.0-incubating
->>>>>>> ba05afd29c81e152a84461f95b0e61a783897d7a
+  3c13178 Mon Jul 8 14:50:34 2013 -0700
+  Merge pull request #687 from atalwalkar/master
+  [Added "Labeled" to util functions for labeled data]
 
-  cc33f9f Wed Dec 4 15:57:47 2013 -0800
-  Merge pull request #227 from pwendell/master
+  744da8e Sun Jul 7 17:42:25 2013 -0700
+  Merge pull request #679 from ryanlecompte/master
+  [Make binSearch method tail-recursive for RidgeRegression]
 
-  ba44f21 Wed Dec 4 15:56:58 2013 -0800
-  Merge pull request #223 from rxin/transient
+  3cc6818 Sat Jul 6 19:51:20 2013 -0700
+  Merge pull request #668 from shimingfei/guava-14.0.1
+  [update guava version from 11.0.1 to 14.0.1]
 
-  31da065 Wed Dec 4 14:01:13 2013 -0800
-  Merge pull request #95 from aarondav/perftest
+  2216188 Sat Jul 6 16:18:15 2013 -0700
+  Merge pull request #676 from c0s/asf-avro
+  [Use standard ASF published avro module instead of a proprietory built one]
 
-  daaaee1 Tue Dec 3 14:22:05 2013 -0800
-  Merge pull request #218 from JoshRosen/spark-970-pyspark-unicode-error
+  94871e4 Sat Jul 6 15:26:19 2013 -0700
+  Merge pull request #655 from tgravescs/master
+  [Add support for running Spark on Yarn on a secure Hadoop Cluster]
 
-  8b091fe Mon Dec 2 21:28:13 2013 -0800
-  Merge pull request #181 from BlackNiuza/fix_tasks_number
+  3f918b3 Sat Jul 6 12:45:18 2013 -0700
+  Merge pull request #672 from holdenk/master
+  [s/ActorSystemImpl/ExtendedActorSystem/ as ActorSystemImpl results in a warning]
 
-  d21266e Sun Dec 1 12:47:30 2013 -0800
-  Merge pull request #219 from sundeepn/schedulerexception
+  2a36e54 Sat Jul 6 12:43:21 2013 -0700
+  Merge pull request #673 from xiajunluan/master
+  [Add config template file for fair scheduler feature]
 
-  be9c176 Tue Nov 26 10:27:41 2013 -0800
-  Merge pull request #201 from rxin/mappartitions
+  7ba7fa1 Sat Jul 6 11:45:08 2013 -0700
+  Merge pull request #674 from liancheng/master
+  [Bug fix: SPARK-789]
 
-  9949561 Mon Nov 25 07:51:23 2013 +0800
-  Merge pull request #197 from aarondav/patrick-fix
+  f4416a1 Sat Jul 6 11:41:58 2013 -0700
+  Merge pull request #681 from BlackNiuza/memory_leak
+  [Remove active job from idToActiveJob when job finished or aborted]
 
-  c59ce18 Sun Nov 24 11:04:00 2013 +0800
-  Merge pull request #200 from mateiz/hash-fix
+  e063e29 Fri Jul 5 21:54:52 2013 -0700
+  Merge pull request #680 from tdas/master
+  [Fixed major performance bug in Network Receiver]
 
-  d7ab87e Fri Nov 22 10:27:16 2013 +0800
-  Merge pull request #193 from aoiwelle/patch-1
+  bf1311e Fri Jul 5 17:32:44 2013 -0700
+  Merge pull request #678 from mateiz/ml-examples
+  [Start of ML package]
 
-  d7c6a00 Fri Nov 22 10:13:37 2013 +0800
-  Merge pull request #196 from pwendell/master
+  6ad85d0 Thu Jul 4 21:32:29 2013 -0700
+  Merge pull request #677 from jerryshao/fix_stage_clean
+  [Clean StageToInfos periodically when spark.cleaner.ttl is enabled]
 
-  37126e8 Sun Nov 17 18:53:16 2013 -0800
-  Merge pull request #174 from ahirreddy/master
+  2e32fc8 Thu Jul 4 12:18:20 2013 -0700
+  Merge pull request #666 from c0s/master
+  [hbase dependency is missed in hadoop2-yarn profile of examples module
+]
 
-  8823057 Sun Nov 17 18:53:04 2013 -0800
-  Merge pull request #166 from ahirreddy/simr-spark-ui
+  6d60fe5 Mon Jul 1 18:24:03 2013 -0700
+  Merge pull request #666 from c0s/master
+  [hbase dependency is missed in hadoop2-yarn profile of examples module]
 
-  e134ed5 Sun Nov 17 18:51:19 2013 -0800
-  Merge pull request #137 from tgravescs/sparkYarnJarsHdfsRebase
+  ccfe953 Sat Jun 29 17:57:53 2013 -0700
+  Merge pull request #577 from skumargithub/master
+  [Example of cumulative counting using updateStateByKey]
 
-  af98fbc Sun Nov 17 18:50:23 2013 -0800
-  Merge pull request #165 from NathanHowell/kerberos-master
+  50ca176 Thu Jun 27 22:24:52 2013 -0700
+  Merge pull request #664 from pwendell/test-fix
+  [Removing incorrect test statement]
 
-  a64397b Sun Nov 17 18:49:40 2013 -0800
-  Merge pull request #153 from ankurdave/stop-spot-cluster
+  e49bc8c Wed Jun 26 11:13:33 2013 -0700
+  Merge pull request #663 from stephenh/option_and_getenv
+  [Be cute with Option and getenv.]
 
-  6c60768 Sun Nov 17 18:46:40 2013 -0800
-  Merge pull request #160 from xiajunluan/JIRA-923
+  f5e32ed Tue Jun 25 09:16:57 2013 -0700
+  Merge pull request #661 from mesos/streaming
+  [Kafka fixes and DStream.count fix for master]
 
-  f0d350a Sun Nov 17 18:44:49 2013 -0800
-  Merge pull request #175 from kayousterhout/no_retry_not_serializable
+  1249e91 Mon Jun 24 21:46:33 2013 -0700
+  Merge pull request #572 from Reinvigorate/sm-block-interval
+  [Adding spark.streaming.blockInterval property]
 
-  9d56371 Sun Nov 17 18:43:59 2013 -0800
-  Merge pull request #173 from kayousterhout/scheduler_hang
+  cfcda95 Mon Jun 24 21:44:50 2013 -0700
+  Merge pull request #571 from Reinvigorate/sm-kafka-serializers
+  [Surfacing decoders on KafkaInputDStream]
 
-  24e238b Sun Nov 17 18:42:41 2013 -0800
-  Merge pull request #182 from rxin/vector
+  575aff6 Mon Jun 24 21:35:50 2013 -0700
+  Merge pull request #567 from Reinvigorate/sm-count-fix
+  [Fixing count() in Spark Streaming]
 
-  e7927ad Thu Nov 14 10:32:27 2013 -0800
-  Merge pull request #169 from kayousterhout/mesos_fix
+  3e61bef Sat Jun 22 16:22:47 2013 -0700
+  Merge pull request #648 from shivaram/netty-dbg
+  [Shuffle fixes and cleanup]
 
-  41dc566 Thu Nov 14 10:30:57 2013 -0800
-  Merge pull request #170 from liancheng/hadooprdd-doc-typo
+  1ef5d0d Sat Jun 22 09:35:57 2013 -0700
+  Merge pull request #644 from shimingfei/joblogger
+  [add Joblogger to Spark (on new Spark code)]
 
-  333859f Thu Nov 14 10:26:27 2013 -0800
-  Merge pull request #171 from RIA-pierre-borckmans/master
+  7e4b266 Sat Jun 22 07:53:18 2013 -0700
+  Merge pull request #563 from jey/python-optimization
+  [Optimize PySpark worker invocation]
 
-  c856651 Tue Nov 12 10:22:18 2013 -0800
-  Merge pull request #164 from tdas/kafka-fix
+  71030ba Wed Jun 19 15:21:03 2013 -0700
+  Merge pull request #654 from lyogavin/enhance_pipe
+  [fix typo and coding style in #638]
 
-  30786c6 Sun Nov 10 11:58:58 2013 -0800
-  Merge pull request #157 from rxin/kryo
+  73f4c7d Tue Jun 18 04:21:17 2013 -0700
+  Merge pull request #605 from esjewett/SPARK-699
+  [Add hBase example (retry of pull request #596)]
 
-  5ce6c75 Sat Nov 9 22:31:59 2013 -0800
-  Merge pull request #147 from JoshRosen/fix-java-api-completeness-checker
+  9933836 Tue Jun 18 02:41:10 2013 -0700
+  Merge pull request #647 from jerryshao/master
+  [Reduce ZippedPartitionsRDD's getPreferredLocations complexity from O(2^2n) to O(2^n)]
 
-  1d52b50 Sat Nov 9 15:47:40 2013 -0800
-  Merge pull request #149 from tgravescs/fixSecureHdfsAccess
+  db42451 Mon Jun 17 15:26:36 2013 -0700
+  Merge pull request #643 from adatao/master
+  [Bug fix: Zero-length partitions result in NaN for overall mean & variance]
 
-  32a0c4f Sat Nov 9 15:40:54 2013 -0800
-  Merge pull request #155 from rxin/jobgroup
+  e82a2ff Mon Jun 17 15:13:15 2013 -0700
+  Merge pull request #653 from rxin/logging
+  [SPARK-781: Log the temp directory path when Spark says "Failed to create temp directory."]
 
-  07ae524 Sat Nov 9 11:56:46 2013 -0800
-  Merge pull request #152 from rxin/repl
+  e6d1277 Mon Jun 17 12:56:25 2013 -0700
+  Merge pull request #638 from lyogavin/enhance_pipe
+  [Enhance pipe to support more features we can do in hadoop streaming]
 
-  a5916b9 Thu Nov 7 11:08:44 2013 -0800
-  Merge pull request #148 from squito/include_appId
+  f961aac Sat Jun 15 00:53:41 2013 -0700
+  Merge pull request #649 from ryanlecompte/master
+  [Add top K method to RDD using a bounded priority queue]
 
-  d5ae953 Wed Nov 6 23:23:12 2013 -0800
-  Merge pull request #23 from jerryshao/multi-user
+  6602d94 Fri Jun 14 10:41:31 2013 -0700
+  Merge pull request #651 from rxin/groupbykey
+  [SPARK-772 / SPARK-774: groupByKey and cogroup should disable map side combine]
 
-  1d9412b Wed Nov 6 13:28:02 2013 -0800
-  Merge pull request #144 from liancheng/runjob-clean
+  d93851a Thu Jun 13 13:38:45 2013 -0700
+  Merge pull request #645 from pwendell/compression
+  [Adding compression to Hadoop save functions]
 
-  c8e0c0d Wed Nov 6 09:40:00 2013 -0800
-  Merge pull request #145 from aarondav/sls-fix
+  f1da591 Wed Jun 12 17:55:08 2013 -0700
+  Merge pull request #646 from markhamstra/jvmArgs
+  [Fixed jvmArgs in maven build.]
 
-  0f62786 Tue Nov 5 23:14:28 2013 -0800
-  Merge pull request #143 from rxin/scheduler-hang
+  0e94b73 Mon Jun 10 13:00:31 2013 -0700
+  Merge pull request #625 from stephenh/fix-start-slave
+  [Fix start-slave not passing instance number to spark-daemon.]
 
-  96670e7 Tue Nov 5 10:43:10 2013 -0800
-  Merge pull request #140 from aarondav/merge-75
+  74b91d5 Sat Jun 8 01:19:40 2013 -0700
+  Merge pull request #629 from c0s/master
+  [Sometime Maven build runs out of PermGen space.]
 
-  0848167 Tue Nov 5 10:42:36 2013 -0800
-  Merge pull request #142 from liancheng/dagscheduler-pattern-matching
+  c8fc423 Fri Jun 7 22:43:18 2013 -0700
+  Merge pull request #631 from jerryshao/master
+  [Fix block manager UI display issue when enable spark.cleaner.ttl]
 
-  07b3f01 Mon Nov 4 23:32:56 2013 -0800
-  Merge pull request #75 from JoshRosen/block-manager-cleanup
+  1ae60bc Fri Jun 7 22:39:06 2013 -0700
+  Merge pull request #634 from xiajunluan/master
+  [[Spark-753] Fix ClusterSchedulSuite unit test failed ]
 
-  e80d1cf Mon Nov 4 20:47:44 2013 -0800
-  Merge pull request #139 from aarondav/shuffle-next
+  fff3728 Tue Jun 4 16:09:50 2013 -0700
+  Merge pull request #640 from pwendell/timeout-update
+  [Fixing bug in BlockManager timeout]
 
-  518cf22 Mon Nov 4 18:21:27 2013 -0800
-  Merge pull request #128 from shimingfei/joblogger-doc
+  f420d4f Tue Jun 4 15:25:58 2013 -0700
+  Merge pull request #639 from pwendell/timeout-update
+  [Bump akka and blockmanager timeouts to 60 seconds]
 
-  7e00dee Mon Nov 4 17:54:35 2013 -0800
-  Merge pull request #130 from aarondav/shuffle
+  84530ba Fri May 31 17:06:13 2013 -0700
+  Merge pull request #636 from rxin/unpersist
+  [Unpersist More block manager cleanup.]
 
-  1d11e43 Mon Nov 4 16:30:30 2013 -0800
-  Merge pull request #138 from marmbrus/branch-0.8
+  ef77bb7 Thu May 30 14:50:06 2013 -0700
+  Merge pull request #627 from shivaram/master
+  [Netty and shuffle  bug fixes]
 
-  a3544ee Sun Nov 3 23:49:19 2013 -0800
-  Merge pull request #70 from rxin/hash1
+  8cb8178 Thu May 30 14:17:44 2013 -0700
+  Merge pull request #628 from shivaram/zero-block-size
+  [Skip fetching zero-sized blocks in NIO.]
 
-  e094daf Sun Nov 3 23:48:40 2013 -0800
-  Merge pull request #129 from velvia/2013-11/document-local-uris
+  6ed7139 Wed May 29 10:14:22 2013 -0700
+  Merge pull request #626 from stephenh/remove-add-if-no-port
+  [Remove unused addIfNoPort.]
 
-  a9e7787 Sun Nov 3 23:48:26 2013 -0800
-  Merge pull request #125 from velvia/2013-10/local-jar-uri
+  41d230c Tue May 28 23:35:24 2013 -0700
+  Merge pull request #611 from squito/classloader
+  [Use default classloaders for akka & deserializing task results]
 
-  57fdb3f Sun Nov 3 23:46:18 2013 -0800
-  Merge pull request #117 from stephenh/avoid_concurrent_modification_exception
+  3db1e17 Mon May 27 21:31:43 2013 -0700
+  Merge pull request #620 from jerryshao/master
+  [Fix CheckpointRDD java.io.FileNotFoundException when calling getPreferredLocations]
 
-  ec0e4f0 Sun Nov 3 23:45:23 2013 -0800
-  Merge pull request #124 from tgravescs/sparkHadoopUtilFix
+  3d4891d Sat May 25 23:38:05 2013 -0700
+  Merge pull request #621 from JoshRosen/spark-613
+  [Use ec2-metadata in start-slave.sh to detect if running on EC2]
 
-  ba0e858 Sun Nov 3 23:43:48 2013 -0800
-  Merge pull request #126 from kayousterhout/local_fix
+  e8d4b6c Sat May 25 21:09:03 2013 -0700
+  Merge pull request #529 from xiajunluan/master
+  [[SPARK-663]Implement Fair Scheduler in Spark Cluster Scheduler ]
 
-  504fe74 Sun Nov 3 23:43:03 2013 -0800
-  Merge pull request #118 from JoshRosen/blockinfo-memory-usage
+  9a3c344 Sat May 25 17:53:43 2013 -0700
+  Merge pull request #624 from rxin/master
+  [NonJavaSerializableClass should not be Java serializable...]
 
-  25fa229 Sun Nov 3 23:41:19 2013 -0800
-  Merge pull request #112 from kayousterhout/ui_task_attempt_id
+  24e41aa Fri May 24 16:48:52 2013 -0700
+  Merge pull request #623 from rxin/master
+  [Automatically configure Netty port.]
 
-  57ea854 Sun Nov 3 23:38:37 2013 -0800
-  Merge pull request #102 from tdas/transform
+  69161f9 Fri May 24 14:42:13 2013 -0700
+  Merge pull request #622 from rxin/master
+  [bug fix: Shuffle block iterator is ignoring the shuffle serializer setting.]
 
-  5b45c9b Sat Nov 2 14:42:22 2013 -0700
-  Merge pull request #133 from Mistobaan/link_fix
+  dbbedfc Thu May 23 23:11:06 2013 -0700
+  Merge pull request #616 from jey/maven-netty-exclusion
+  [Exclude old versions of Netty from Maven-based build]
 
-  054d97b Sat Nov 2 14:37:35 2013 -0700
-  Merge pull request #134 from rxin/readme
+  a2b0a79 Tue May 21 18:16:20 2013 -0700
+  Merge pull request #619 from woggling/adjust-sampling
+  [Use ARRAY_SAMPLE_SIZE constant instead of hard-coded 100.0 in SizeEstimator]
 
-  87d4e1c Fri Nov 1 17:58:38 2013 -0700
-  Merge pull request #132 from Mistobaan/doc_fix
+  66dac44 Tue May 21 11:41:42 2013 -0700
+  Merge pull request #618 from woggling/dead-code-disttest
+  [DistributedSuite: remove dead code]
 
-  3db505c Tue Oct 29 01:42:07 2013 -0400
-  Merge pull request #119 from soulmachine/master
+  5912cc4 Fri May 17 19:58:40 2013 -0700
+  Merge pull request #610 from JoshRosen/spark-747
+  [Throw exception if TaskResult exceeds Akka frame size]
 
-  abeca01 Sun Oct 27 22:11:39 2013 -0400
-  Merge pull request #115 from aarondav/shuffle-fix
+  6c27c38 Thu May 16 17:33:56 2013 -0700
+  Merge pull request #615 from rxin/build-fix
+  [Maven build fix & two other small changes]
 
-  79e5c50 Sat Oct 26 13:05:40 2013 -0700
-  Merge pull request #108 from alig/master
+  2f576ab Wed May 15 18:06:24 2013 -0700
+  Merge pull request #602 from rxin/shufflemerge
+  [Manual merge & cleanup of Shane's Shuffle Performance Optimization]
 
-  cb24278 Sat Oct 26 11:41:18 2013 -0700
-  Merge pull request #113 from pwendell/master
+  48c6f46 Wed May 15 10:47:19 2013 -0700
+  Merge pull request #612 from ash211/patch-4
+  [Docs: Mention spark shell's default for MASTER]
 
-  3ec2f51 Sat Oct 26 11:39:29 2013 -0700
-  Merge pull request #111 from kayousterhout/ui_name
+  203d7b7 Wed May 15 00:47:20 2013 -0700
+  Merge pull request #593 from squito/driver_ui_link
+  [Master UI has link to Application UI]
 
-  7d47704 Sat Oct 26 11:36:23 2013 -0700
-  Merge pull request #114 from soulmachine/master
+  016ac86 Mon May 13 21:45:36 2013 -0700
+  Merge pull request #601 from rxin/emptyrdd-master
+  [EmptyRDD (master branch 0.8)]
 
-  148509f Thu Oct 24 22:32:23 2013 -0700
-  Merge pull request #109 from pwendell/master
+  4b354e0 Mon May 13 17:39:19 2013 -0700
+  Merge pull request #589 from mridulm/master
+  [Add support for instance local scheduling]
 
-  c018c61 Thu Oct 24 17:27:28 2013 -0700
-  Merge pull request #97 from ewencp/pyspark-system-properties
+  5dbc9b2 Sun May 12 11:03:10 2013 -0700
+  Merge pull request #608 from pwendell/SPARK-738
+  [SPARK-738: Spark should detect and wrap nonserializable exceptions]
 
-  3500b66 Thu Oct 24 17:27:11 2013 -0700
-  Merge pull request #93 from kayousterhout/ui_new_state
+  63e1999 Fri May 10 13:54:03 2013 -0700
+  Merge pull request #606 from markhamstra/foreachPartition_fix
+  [Actually use the cleaned closure in foreachPartition]
 
-  f5eff85 Thu Oct 24 17:26:44 2013 -0700
-  Merge pull request #83 from ewencp/pyspark-accumulator-add-method
+  42bbe89 Wed May 8 22:30:31 2013 -0700
+  Merge pull request #599 from JoshRosen/spark-670
+  [Fix SPARK-670: EC2 'start' command should require -i option.]
 
-  59d6f06 Thu Oct 24 17:09:05 2013 -0700
-  Merge pull request #106 from pwendell/master
+  0f1b7a0 Wed May 8 13:38:50 2013 -0700
+  Merge pull request #596 from esjewett/master
+  [hBase example]
 
-  76f3c2f Wed Oct 23 18:03:26 2013 -0700
-  Merge pull request #103 from JoshRosen/unpersist-fix
+  7af92f2 Sat May 4 22:29:17 2013 -0700
+  Merge pull request #597 from JoshRosen/webui-fixes
+  [Two minor bug fixes for Spark Web UI]
 
-  534bab2 Tue Oct 22 16:02:45 2013 -0700
-  Merge pull request #100 from JoshRosen/spark-902
+  c74ce60 Sat May 4 22:26:35 2013 -0700
+  Merge pull request #598 from rxin/blockmanager
+  [Fixed flaky unpersist test in DistributedSuite.]
 
-  fa9a0e4 Tue Oct 22 12:23:17 2013 -0700
-  Merge pull request #90 from pwendell/master
+  3bf2c86 Fri May 3 18:27:30 2013 -0700
+  Merge pull request #594 from shivaram/master
+  [Add zip partitions to Java API]
 
-  c449ee1 Tue Oct 22 00:00:35 2013 -0700
-  Merge pull request #92 from tgravescs/sparkYarnFixClasspath
+  2484ad7 Fri May 3 17:08:55 2013 -0700
+  Merge pull request #587 from rxin/blockmanager
+  [A set of shuffle map output related changes]
 
-  498cc6b Mon Oct 21 22:45:31 2013 -0700
-  Merge pull request #87 from aarondav/shuffle-base
+  6fe9d4e Thu May 2 21:33:56 2013 -0700
+  Merge pull request #592 from woggling/localdir-fix
+  [Don't accept generated local directory names that can't be created]
 
-  e3ad6a5 Mon Oct 21 20:20:42 2013 -0700
-  Revert "Merge pull request #94 from aarondav/mesos-fix"
+  538ee75 Thu May 2 09:01:42 2013 -0700
+  Merge pull request #581 from jerryshao/master
+  [fix [SPARK-740] block manage UI throws exception when enabling Spark Streaming]
 
-  fe974ba Mon Oct 21 20:15:30 2013 -0700
-  Merge pull request #94 from aarondav/mesos-fix
+  9abcbcc Wed May 1 22:45:10 2013 -0700
+  Merge pull request #591 from rxin/removerdd
+  [RDD.unpersist: probably the most desired feature of Spark]
 
-  1c3f4bd Mon Oct 21 11:58:22 2013 -0700
-  Merge pull request #88 from rxin/clean
+  aa8fe1a Tue Apr 30 22:30:18 2013 -0700
+  Merge pull request #586 from mridulm/master
+  [Pull request to address issues Reynold Xin reported]
 
-  1a50c79 Sun Oct 20 22:27:29 2013 -0700
-  Merge pull request #41 from pwendell/shuffle-benchmark
+  f708dda Tue Apr 30 07:51:40 2013 -0700
+  Merge pull request #585 from pwendell/listener-perf
+  [[Fix SPARK-742] Task Metrics should not employ per-record timing by default]
 
-  37a755c Sun Oct 20 21:04:33 2013 -0700
-  Merge pull request #89 from rxin/executor
+  68c07ea Sun Apr 28 20:19:33 2013 -0700
+  Merge pull request #582 from shivaram/master
+  [Add zip partitions interface]
 
-  ec74428 Sun Oct 20 11:46:14 2013 -0700
-  Merge pull request #84 from rxin/kill1
+  f6ee9a8 Sun Apr 28 15:36:04 2013 -0700
+  Merge pull request #583 from mridulm/master
+  [Fix issues with streaming test cases after yarn branch merge]
 
-  52d13a6 Sun Oct 20 10:50:54 2013 -0700
-  Merge pull request #85 from rxin/clean
+  cf54b82 Thu Apr 25 11:45:58 2013 -0700
+  Merge pull request #580 from pwendell/quickstart
+  [SPARK-739 Have quickstart standlone job use README]
 
-  919c557 Sat Oct 19 11:29:00 2013 -0700
-  Merge pull request #79 from aarondav/scdefaults0.8
+  118a6c7 Wed Apr 24 08:42:30 2013 -0700
+  Merge pull request #575 from mridulm/master
+  [Manual merge of yarn branch to trunk]
 
-  f3de2ce Fri Oct 18 23:20:16 2013 -0700
-  Merge pull request #76 from pwendell/master
+  5d8a71c Tue Apr 16 19:48:02 2013 -0700
+  Merge pull request #570 from jey/increase-codecache-size
+  [Increase ReservedCodeCacheSize for sbt]
 
-  f181560 Fri Oct 18 23:14:28 2013 -0700
-  Merge pull request #68 from mosharaf/master
+  ec5e553 Sun Apr 14 08:20:13 2013 -0700
+  Merge pull request #558 from ash211/patch-jackson-conflict
+  [Don't pull in old versions of Jackson via hadoop-core]
 
-  eaa2150 Fri Oct 18 23:08:47 2013 -0700
-  Merge pull request #74 from rxin/kill
+  c1c219e Sun Apr 14 08:11:23 2013 -0700
+  Merge pull request #564 from maspotts/master
+  [Allow latest scala in PATH, with SCALA_HOME as override (instead of vice-versa)]
 
-  df21ac8 Thu Oct 17 18:38:46 2013 -0700
-  Merge pull request #69 from KarthikTunga/master
+  7c10b3e Fri Apr 12 20:55:22 2013 -0700
+  Merge pull request #565 from andyk/master
+  [Update wording of section on RDD operations in quick start guide in docs]
 
-  b531552 Thu Oct 17 18:37:22 2013 -0700
-  Merge pull request #67 from kayousterhout/remove_tsl
+  077ae0a Thu Apr 11 19:34:14 2013 -0700
+  Merge pull request #561 from ash211/patch-4
+  [Add details when BlockManager heartbeats time out]
 
-  5a73ab7 Thu Oct 17 18:36:36 2013 -0700
-  Merge pull request #62 from harveyfeng/master
+  c91ff8d Wed Apr 10 15:08:23 2013 -0700
+  Merge pull request #560 from ash211/patch-3
+  [Typos: cluser -> cluster]
 
-  b6ce111 Thu Oct 17 18:35:33 2013 -0700
-  Merge pull request #61 from kayousterhout/daemon_thread
+  7cd83bf Tue Apr 9 22:07:35 2013 -0700
+  Merge pull request #559 from ash211/patch-example-whitespace
+  [Uniform whitespace across scala examples]
 
-  2760055 Thu Oct 17 18:34:56 2013 -0700
-  Merge pull request #59 from rxin/warning
+  271a4f3 Tue Apr 9 22:04:52 2013 -0700
+  Merge pull request #555 from holdenk/master
+  [Retry failed ssh commands in the ec2 python script.]
 
-  1e67234 Thu Oct 17 18:33:21 2013 -0700
-  Merge pull request #65 from tgravescs/fixYarn
+  8ac9efb Tue Apr 9 13:50:50 2013 -0700
+  Merge pull request #527 from Reinvigorate/sm-kafka-cleanup
+  [KafkaInputDStream fixes and improvements]
 
-  d0c9d41 Thu Oct 17 18:32:54 2013 -0700
-  Merge pull request #34 from kayousterhout/rename
+  eed54a2 Mon Apr 8 09:44:30 2013 -0700
+  Merge pull request #553 from pwendell/akka-standalone
+  [SPARK-724 - Have Akka logging enabled by default for standalone daemons]
 
-  cee3b43 Wed Oct 16 10:37:55 2013 -0700
-  Merge pull request #63 from pwendell/master
+  b362df3 Sun Apr 7 17:17:52 2013 -0700
+  Merge pull request #552 from MLnick/master
+  [Bumping version for Twitter Algebird to latest]
 
-  1d92983 Tue Oct 15 10:52:15 2013 -0700
-  Merge pull request #58 from hsaputra/update-pom-asf
+  4b30190 Sun Apr 7 17:15:10 2013 -0700
+  Merge pull request #554 from andyk/scala2.9.3
+  [Fixes SPARK-723 - Update build to Scala 2.9.3]
 
-  c50b016 Mon Oct 14 23:18:37 2013 -0700
-  Merge pull request #29 from rxin/kill
+  dfe98ca Tue Apr 2 19:24:12 2013 -0700
+  Merge pull request #550 from erikvanoosten/master
+  [corrected Algebird example]
 
-  616ea6f Mon Oct 14 20:59:31 2013 -0700
-  Merge pull request #57 from aarondav/bid
+  b5d7830 Tue Apr 2 19:23:45 2013 -0700
+  Merge pull request #551 from jey/python-bugfixes
+  [Python bugfixes]
 
-  e306de8 Mon Oct 14 20:58:48 2013 -0700
-  Merge pull request #53 from witgo/master
+  2be2295 Sun Mar 31 18:09:14 2013 -0700
+  Merge pull request #548 from markhamstra/getWritableClass_filter
+  [Fixed broken filter in getWritableClass[T]]
 
-  d66c01f Mon Oct 14 20:58:17 2013 -0700
-  Merge pull request #19 from aarondav/master-zk
+  9831bc1 Fri Mar 29 22:16:22 2013 -0700
+  Merge pull request #539 from cgrothaus/fix-webui-workdirpath
+  [Bugfix: WorkerWebUI must respect workDirPath from Worker]
 
-  0fcb234 Mon Oct 14 20:54:34 2013 -0700
-  Merge pull request #46 from mateiz/py-sort-update
+  3cc8ab6 Fri Mar 29 22:14:07 2013 -0700
+  Merge pull request #541 from stephenh/shufflecoalesce
+  [Add a shuffle parameter to coalesce.]
 
-  f94aa52 Mon Oct 14 20:53:15 2013 -0700
-  Merge pull request #44 from mateiz/fast-map
+  cad507a Fri Mar 29 22:13:12 2013 -0700
+  Merge pull request #547 from jey/maven-streaming-tests-initialization-fix
+  [Move streaming test initialization into 'before' blocks]
 
-  36f5b08 Mon Oct 14 20:50:28 2013 -0700
-  Merge pull request #33 from AndreSchumacher/pyspark_partition_key_change
+  a98996d Fri Mar 29 22:12:15 2013 -0700
+  Merge pull request #545 from ash211/patch-1
+  [Don't use deprecated Application in example]
 
-  d615b14 Mon Oct 14 20:48:45 2013 -0700
-  Merge pull request #32 from mridulm/master
+  104c694 Fri Mar 29 22:11:50 2013 -0700
+  Merge pull request #546 from ash211/patch-2
+  [Update tuning.md]
 
-  7084217 Mon Oct 14 19:31:22 2013 -0700
-  Merge pull request #27 from davidmccauley/master
+  bc36ee4 Tue Mar 26 15:05:13 2013 -0700
+  Merge pull request #543 from holdenk/master
+  [Re-enable deprecation warnings and fix deprecated warning.]
 
-  a35259a Mon Oct 14 19:28:46 2013 -0700
-  Merge pull request #26 from Du-Li/master
+  b8949ca Sat Mar 23 07:19:34 2013 -0700
+  Merge pull request #505 from stephenh/volatile
+  [Make Executor fields volatile since they're read from the thread pool.]
 
-  62ce4ae Mon Oct 14 19:28:09 2013 -0700
-  Merge pull request #25 from CruncherBigData/master
+  fd53f2f Sat Mar 23 07:13:21 2013 -0700
+  Merge pull request #510 from markhamstra/WithThing
+  [mapWith, flatMapWith and filterWith]
 
-  6961744 Mon Oct 14 18:55:41 2013 -0700
-  Merge pull request #10 from kayousterhout/results_through-bm
+  4c5efcf Wed Mar 20 19:29:23 2013 -0700
+  Merge pull request #532 from andyk/master
+  [SPARK-715: Adds instructions for building with Maven to documentation]
 
-  ce364c0 Mon Oct 14 18:54:35 2013 -0700
-  Merge pull request #4 from MLnick/implicit-als
+  3558849 Wed Mar 20 19:27:47 2013 -0700
+  Merge pull request #538 from rxin/cogroup
+  [Added mapSideCombine flag to CoGroupedRDD. Added unit test for CoGroupedRDD.]
 
-  f94bd3f Mon Oct 14 15:10:59 2013 -0700
-  Merge pull request #28 from tgravescs/sparYarnAppName
+  ca4d083 Wed Mar 20 11:22:36 2013 -0700
+  Merge pull request #528 from MLnick/java-examples
+  [[SPARK-707] Adding Java versions of Pi, LogQuery and K-Means examples]
 
-  dc2c90d Mon Oct 14 15:03:34 2013 -0700
-  Merge pull request #38 from AndreSchumacher/pyspark_sorting
+  b812e6b Wed Mar 20 11:21:02 2013 -0700
+  Merge pull request #526 from markhamstra/foldByKey
+  [Add foldByKey]
 
-  00a7551 Sat Oct 12 21:26:22 2013 -0700
-  Merge pull request #52 from harveyfeng/hadoop-closure
+  945d1e7 Tue Mar 19 21:59:06 2013 -0700
+  Merge pull request #536 from sasurfer/master
+  [CoalescedRDD for many partitions]
 
-  5383a5a Sat Oct 12 21:26:11 2013 -0700
-  Merge pull request #20 from harveyfeng/hadoop-config-cache
+  1cbbe94 Tue Mar 19 21:34:34 2013 -0700
+  Merge pull request #534 from stephenh/removetrycatch
+  [Remove try/catch block that can't be hit.]
 
-  4a2e76a Fri Oct 11 16:09:07 2013 -0700
-  Merge pull request #54 from aoiwelle/remove_unused_imports
+  71e53f8 Tue Mar 19 21:31:41 2013 -0700
+  Merge pull request #537 from wishbear/configurableInputFormat
+  [call setConf from input format if it is Configurable]
 
-  64fae16 Wed Oct 9 21:42:18 2013 -0700
-  Merge pull request #47 from xiliu82/branch-0.8
+  c1e9cdc Sat Mar 16 11:47:45 2013 -0700
+  Merge pull request #525 from stephenh/subtractByKey
+  [Add PairRDDFunctions.subtractByKey.]
 
-  dfc62e2 Wed Oct 9 16:55:58 2013 -0700
-  Merge pull request #49 from mateiz/kryo-fix-2
+  cdbfd1e Fri Mar 15 15:13:28 2013 -0700
+  Merge pull request #516 from squito/fix_local_metrics
+  [Fix local metrics]
 
-  0b6f047 Wed Oct 9 16:53:31 2013 -0700
-  Merge pull request #50 from kayousterhout/SPARK-908
+  f9fa2ad Fri Mar 15 15:12:43 2013 -0700
+  Merge pull request #530 from mbautin/master-update-log4j-and-make-compile-in-IntelliJ
+  [Add a log4j compile dependency to fix build in IntelliJ]
 
-  f930dd4 Tue Oct 8 22:58:35 2013 -0700
-  Merge pull request #43 from mateiz/kryo-fix
+  4032beb Wed Mar 13 19:29:46 2013 -0700
+  Merge pull request #521 from stephenh/earlyclose
+  [Close the reader in HadoopRDD as soon as iteration end.]
 
-  8e9bd93 Mon Oct 7 20:47:09 2013 -0700
-  Merge pull request #42 from pwendell/shuffle-read-perf
+  3c97276 Wed Mar 13 19:25:08 2013 -0700
+  Merge pull request #524 from andyk/master
+  [Fix broken link to YARN documentation]
 
-  f2cdcc4 Mon Oct 7 15:49:32 2013 -0700
-  Merge pull request #40 from pwendell/branch-0.8
+  1c3d981 Wed Mar 13 19:23:48 2013 -0700
+  Merge pull request #517 from Reinvigorate/sm-build-fixes
+  [Build fixes for streaming /w SBT]
 
-  023e3fd Mon Oct 7 10:47:45 2013 -0700
-  Merge pull request #31 from sundeepn/branch-0.8
+  2d477fd Wed Mar 13 06:49:16 2013 -0700
+  Merge pull request #523 from andyk/master
+  [Fix broken link in Quick Start]
 
-  3cb9040 Thu Sep 26 14:37:06 2013 -0700
-  Merge pull request #17 from rxin/optimize
+  00c4d23 Tue Mar 12 22:19:00 2013 -0700
+  Merge pull request #518 from woggling/long-bm-sizes
+  [Send block sizes as longs in BlockManager updates]
 
-  35bcf32 Thu Sep 26 14:17:00 2013 -0700
-  Merge pull request #930 from holdenk/master
+  cbf8f0d Mon Mar 11 00:23:57 2013 -0700
+  Merge pull request #513 from MLnick/bagel-caching
+  [Adds choice of persistence level to Bagel.]
 
-  976fe60 Thu Sep 26 14:16:17 2013 -0700
-  Merge pull request #14 from kayousterhout/untangle_scheduler
+  91a9d09 Sun Mar 10 15:48:23 2013 -0700
+  Merge pull request #512 from patelh/fix-kryo-serializer
+  [Fix reference bug in Kryo serializer, add test, update version]
 
-  8cbc96b Thu Sep 26 13:16:05 2013 -0700
-  Merge pull request #7 from wannabeast/memorystore-fixes
+  557cfd0 Sun Mar 10 15:44:57 2013 -0700
+  Merge pull request #515 from woggling/deploy-app-death
+  [Notify standalone deploy client of application death.]
 
-  240ca93 Thu Sep 26 13:12:06 2013 -0700
-  Merge pull request #9 from rxin/limit
+  04fb81f Sun Mar 3 17:20:07 2013 -0800
+  Merge pull request #506 from rxin/spark-706
+  [Fixed SPARK-706: Failures in block manager put leads to read task hanging.]
 
-  a186792 Thu Sep 26 13:10:59 2013 -0700
-  Merge pull request #937 from jerryshao/localProperties-fix
+  6cf4be4 Sun Mar 3 17:16:22 2013 -0800
+  Merge pull request #462 from squito/stageInfo
+  [Track assorted metrics for each task, report summaries to user at stage completion]
 
-  f3c60c9 Thu Sep 26 13:10:24 2013 -0700
-  Merge pull request #941 from ilikerps/master
+  6bfc7ca Sat Mar 2 22:14:49 2013 -0800
+  Merge pull request #504 from mosharaf/master
+  [Worker address was getting removed when removing an app.]
 
-  d5a8dbf Thu Sep 26 13:09:30 2013 -0700
-  Merge pull request #928 from jerryshao/fairscheduler-refactor
+  94b3db1 Sat Mar 2 22:13:52 2013 -0800
+  Merge pull request #508 from markhamstra/TestServerInUse
+  [Avoid bind failure in InputStreamsSuite]
+
+  25c71d3 Fri Mar 1 08:00:18 2013 -0800
+  Merge pull request #507 from markhamstra/poms271
+  [bump version to 0.7.1-SNAPSHOT in the subproject poms]
 
diff --git a/README.md b/README.md
index 363fbf443c16d..c840a68f76b17 100644
--- a/README.md
+++ b/README.md
@@ -12,22 +12,23 @@ This README file only contains basic setup instructions.
 
 ## Building
 
-Spark requires Scala 2.9.3 (Scala 2.10 is not yet supported). The project is
-built using Simple Build Tool (SBT), which is packaged with it. To build
-Spark and its example programs, run:
+Spark requires Scala 2.10. The project is built using Simple Build Tool (SBT),
+which can be obtained [here](http://www.scala-sbt.org). If SBT is installed we
+will use the system version of sbt otherwise we will attempt to download it
+automatically. To build Spark and its example programs, run:
 
-    sbt/sbt assembly
+    ./sbt/sbt assembly
 
 Once you've built Spark, the easiest way to start using it is the shell:
 
-    ./spark-shell
+    ./bin/spark-shell
 
-Or, for the Python API, the Python shell (`./pyspark`).
+Or, for the Python API, the Python shell (`./bin/pyspark`).
 
 Spark also comes with several sample programs in the `examples` directory.
-To run one of them, use `./run-example <class> <params>`. For example:
+To run one of them, use `./bin/run-example <class> <params>`. For example:
 
-    ./run-example org.apache.spark.examples.SparkLR local[2]
+    ./bin/run-example org.apache.spark.examples.SparkLR local[2]
 
 will run the Logistic Regression example locally on 2 CPUs.
 
@@ -37,7 +38,13 @@ All of the Spark samples take a `<master>` parameter that is the cluster URL
 to connect to. This can be a mesos:// or spark:// URL, or "local" to run
 locally with one thread, or "local[N]" to run locally with N threads.
 
+## Running tests
 
+Testing first requires [Building](#building) Spark. Once Spark is built, tests
+can be run using:
+
+`./sbt/sbt test`
+ 
 ## A Note About Hadoop Versions
 
 Spark uses the Hadoop core library to talk to HDFS and other Hadoop-supported
@@ -55,7 +62,7 @@ versions without YARN, use:
     # Cloudera CDH 4.2.0 with MapReduce v1
     $ SPARK_HADOOP_VERSION=2.0.0-mr1-cdh4.2.0 sbt/sbt assembly
 
-For Apache Hadoop 2.0.X, 2.1.X, 2.2.X, 0.23.x, Cloudera CDH MRv2, and other Hadoop versions
+For Apache Hadoop 2.2.X, 2.1.X, 2.0.X, 0.23.x, Cloudera CDH MRv2, and other Hadoop versions
 with YARN, also set `SPARK_YARN=true`:
 
     # Apache Hadoop 2.0.5-alpha
@@ -64,7 +71,7 @@ with YARN, also set `SPARK_YARN=true`:
     # Cloudera CDH 4.2.0 with MapReduce v2
     $ SPARK_HADOOP_VERSION=2.0.0-cdh4.2.0 SPARK_YARN=true sbt/sbt assembly
 
-    # Apache Hadoop 2.2.0 with YARN
+    # Apache Hadoop 2.2.X and newer
     $ SPARK_HADOOP_VERSION=2.2.0 SPARK_YARN=true sbt/sbt assembly
 
 When developing a Spark application, specify the Hadoop version by adding the
diff --git a/assembly/lib/PY4J_LICENSE.txt b/assembly/lib/PY4J_LICENSE.txt
deleted file mode 100644
index a70279ca14ae3..0000000000000
--- a/assembly/lib/PY4J_LICENSE.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-
-Copyright (c) 2009-2011, Barthelemy Dagenais All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-- Redistributions of source code must retain the above copyright notice, this
-list of conditions and the following disclaimer.
-
-- Redistributions in binary form must reproduce the above copyright notice,
-this list of conditions and the following disclaimer in the documentation
-and/or other materials provided with the distribution.
-
-- The name of the author may not be used to endorse or promote products
-derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
diff --git a/assembly/lib/PY4J_VERSION.txt b/assembly/lib/PY4J_VERSION.txt
deleted file mode 100644
index 04a0cd52a8d9c..0000000000000
--- a/assembly/lib/PY4J_VERSION.txt
+++ /dev/null
@@ -1 +0,0 @@
-b7924aabe9c5e63f0a4d8bbd17019534c7ec014e
diff --git a/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.jar b/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.jar
deleted file mode 100644
index 73b7ddb7d115b..0000000000000
Binary files a/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.jar and /dev/null differ
diff --git a/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.pom b/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.pom
deleted file mode 100644
index 1c730e19b4b2f..0000000000000
--- a/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.pom
+++ /dev/null
@@ -1,9 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns="http://maven.apache.org/POM/4.0.0"
-    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-  <modelVersion>4.0.0</modelVersion>
-  <groupId>net.sf.py4j</groupId>
-  <artifactId>py4j</artifactId>
-  <version>0.7</version>
-  <description>POM was created from install:install-file</description>
-</project>
diff --git a/assembly/lib/net/sf/py4j/py4j/maven-metadata-local.xml b/assembly/lib/net/sf/py4j/py4j/maven-metadata-local.xml
deleted file mode 100644
index 6942ff45e75f1..0000000000000
--- a/assembly/lib/net/sf/py4j/py4j/maven-metadata-local.xml
+++ /dev/null
@@ -1,12 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<metadata>
-  <groupId>net.sf.py4j</groupId>
-  <artifactId>py4j</artifactId>
-  <versioning>
-    <release>0.7</release>
-    <versions>
-      <version>0.7</version>
-    </versions>
-    <lastUpdated>20130828020333</lastUpdated>
-  </versioning>
-</metadata>
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 05e5b25c57ec1..6c501b5e6cc84 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,17 +21,17 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>0.8.2-candidate-csd-3-SNAPSHOT</version>
+    <version>0.9.1-csd-1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
   <groupId>org.apache.spark</groupId>
-  <artifactId>spark-assembly_2.9.3</artifactId>
+  <artifactId>spark-assembly_2.10</artifactId>
   <name>Spark Project Assembly</name>
   <url>http://spark.incubator.apache.org/</url>
 
   <properties>
-    <spark.jar>${project.build.directory}/scala-${scala.version}/${project.artifactId}-${project.version}-hadoop${hadoop.version}.jar</spark.jar>
+    <spark.jar>${project.build.directory}/scala-${scala.binary.version}/${project.artifactId}-${project.version}-hadoop${hadoop.version}.jar</spark.jar>
     <deb.pkg.name>spark</deb.pkg.name>
     <deb.install.path>/usr/share/spark</deb.install.path>
     <deb.user>root</deb.user>
@@ -47,51 +47,39 @@
 
   <dependencies>
     <dependency>
-      <groupId>${akka.group}</groupId>
-      <artifactId>akka-actor</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>${akka.group}</groupId>
-      <artifactId>akka-remote</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>${akka.group}</groupId>
-      <artifactId>akka-slf4j</artifactId>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
     </dependency>
-    <dependency>
-      <groupId>${akka.group}</groupId>
-      <artifactId>akka-zeromq</artifactId>
-    </dependency>
-
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_2.9.3</artifactId>
+      <artifactId>spark-bagel_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-bagel_2.9.3</artifactId>
+      <artifactId>spark-mllib_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-mllib_2.9.3</artifactId>
+      <artifactId>spark-repl_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-repl_2.9.3</artifactId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-streaming_2.9.3</artifactId>
+      <artifactId>spark-graphx_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
       <groupId>net.sf.py4j</groupId>
       <artifactId>py4j</artifactId>
-      <version>0.7</version>
+      <version>0.8.1</version>
     </dependency>
   </dependencies>
 
@@ -132,12 +120,12 @@
                 <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
                   <resource>META-INF/services/org.apache.hadoop.fs.FileSystem</resource>
                 </transformer>
-              </transformers>
-              <transformers>
-                <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
                 <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
                   <resource>reference.conf</resource>
                 </transformer>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.DontIncludeResourceTransformer">
+                  <resource>log4j.properties</resource>
+                </transformer>
               </transformers>
             </configuration>
           </execution>
@@ -148,26 +136,25 @@
 
   <profiles>
     <profile>
-      <id>hadoop2-yarn</id>
+      <id>yarn-alpha</id>
       <dependencies>
         <dependency>
           <groupId>org.apache.spark</groupId>
-          <artifactId>spark-yarn_2.9.3</artifactId>
+          <artifactId>spark-yarn-alpha_${scala.binary.version}</artifactId>
           <version>${project.version}</version>
         </dependency>
       </dependencies>
     </profile>
     <profile>
-      <id>new-yarn</id>
+      <id>yarn</id>
       <dependencies>
         <dependency>
           <groupId>org.apache.spark</groupId>
-          <artifactId>spark-yarn_2.9.3</artifactId>
+          <artifactId>spark-yarn_${scala.binary.version}</artifactId>
           <version>${project.version}</version>
         </dependency>
       </dependencies>
     </profile>
-
     <profile>
       <id>bigtop-dist</id>
       <!-- This profile uses the assembly plugin to create a special "dist" package for BigTop
@@ -242,78 +229,45 @@
                       </mapper>
                     </data>
                     <data>
-                      <src>${basedir}/../spark-class</src>
-                      <type>file</type>
-                      <mapper>
-                        <type>perm</type>
-                        <user>${deb.user}</user>
-                        <group>${deb.user}</group>
-                        <prefix>${deb.install.path}</prefix>
-                        <filemode>744</filemode>
-                      </mapper>
-                    </data>
-                    <data>
-                      <src>${basedir}/../spark-executor</src>
-                      <type>file</type>
-                      <mapper>
-                        <type>perm</type>
-                        <user>${deb.user}</user>
-                        <group>${deb.user}</group>
-                        <prefix>${deb.install.path}</prefix>
-                        <filemode>744</filemode>
-                      </mapper>
-                    </data>
-                    <data>
-                      <src>${basedir}/../spark-shell</src>
+                      <src>${basedir}/src/deb/RELEASE</src>
                       <type>file</type>
                       <mapper>
                         <type>perm</type>
                         <user>${deb.user}</user>
                         <group>${deb.user}</group>
                         <prefix>${deb.install.path}</prefix>
-                        <filemode>744</filemode>
                       </mapper>
                     </data>
                     <data>
-                      <src>${basedir}/../pyspark</src>
-                      <type>file</type>
+                      <src>${basedir}/../conf</src>
+                      <type>directory</type>
                       <mapper>
                         <type>perm</type>
                         <user>${deb.user}</user>
                         <group>${deb.user}</group>
-                        <prefix>${deb.install.path}</prefix>
+                        <prefix>${deb.install.path}/conf</prefix>
                         <filemode>744</filemode>
                       </mapper>
                     </data>
                     <data>
-                      <src>${basedir}/src/deb/RELEASE</src>
-                      <type>file</type>
-                      <mapper>
-                        <type>perm</type>
-                        <user>${deb.user}</user>
-                        <group>${deb.user}</group>
-                        <prefix>${deb.install.path}</prefix>
-                      </mapper>
-                    </data>
-                    <data>
-                      <src>${basedir}/../conf</src>
+                      <src>${basedir}/../bin</src>
                       <type>directory</type>
                       <mapper>
                         <type>perm</type>
                         <user>${deb.user}</user>
                         <group>${deb.user}</group>
-                        <prefix>${deb.install.path}/conf</prefix>
+                        <prefix>${deb.install.path}/bin</prefix>
                         <filemode>744</filemode>
                       </mapper>
                     </data>
                     <data>
-                      <src>${basedir}/../bin</src>
+                      <src>${basedir}/../sbin</src>
                       <type>directory</type>
                       <mapper>
                         <type>perm</type>
                         <user>${deb.user}</user>
                         <group>${deb.user}</group>
-                        <prefix>${deb.install.path}/bin</prefix>
+                        <prefix>${deb.install.path}/sbin</prefix>
                         <filemode>744</filemode>
                       </mapper>
                     </data>
diff --git a/assembly/src/main/assembly/assembly.xml b/assembly/src/main/assembly/assembly.xml
index 47d3fa93d0765..6af383db65d47 100644
--- a/assembly/src/main/assembly/assembly.xml
+++ b/assembly/src/main/assembly/assembly.xml
@@ -39,23 +39,20 @@
     </fileSet>
     <fileSet>
       <directory>
-        ${project.parent.basedir}/bin/
+        ${project.parent.basedir}/sbin/
       </directory>
-      <outputDirectory>/bin</outputDirectory>
+      <outputDirectory>/sbin</outputDirectory>
       <includes>
         <include>**/*</include>
       </includes>
     </fileSet>
     <fileSet>
       <directory>
-        ${project.parent.basedir}
+        ${project.parent.basedir}/bin/
       </directory>
       <outputDirectory>/bin</outputDirectory>
       <includes>
-        <include>run-example*</include>
-        <include>spark-class*</include>
-        <include>spark-shell*</include>
-        <include>spark-executor*</include>
+        <include>**/*</include>
       </includes>
     </fileSet>
   </fileSets>
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 23fc9fa3b3902..89bdbac673441 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,32 +21,20 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>0.8.2-candidate-csd-3-SNAPSHOT</version>
+    <version>0.9.1-csd-1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
   <groupId>org.apache.spark</groupId>
-  <artifactId>spark-bagel_2.9.3</artifactId>
+  <artifactId>spark-bagel_2.10</artifactId>
   <packaging>jar</packaging>
   <name>Spark Project Bagel</name>
   <url>http://spark.incubator.apache.org/</url>
 
   <dependencies>
-    <dependency>
-      <groupId>${akka.group}</groupId>
-      <artifactId>akka-actor</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>${akka.group}</groupId>
-      <artifactId>akka-remote</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>${akka.group}</groupId>
-      <artifactId>akka-slf4j</artifactId>
-    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_2.9.3</artifactId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
@@ -55,18 +43,18 @@
     </dependency>
     <dependency>
       <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_2.9.3</artifactId>
+      <artifactId>scalatest_${scala.binary.version}</artifactId>
       <scope>test</scope>
     </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>
-      <artifactId>scalacheck_2.9.3</artifactId>
+      <artifactId>scalacheck_${scala.binary.version}</artifactId>
       <scope>test</scope>
     </dependency>
   </dependencies>
   <build>
-    <outputDirectory>target/scala-${scala.version}/classes</outputDirectory>
-    <testOutputDirectory>target/scala-${scala.version}/test-classes</testOutputDirectory>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
     <plugins>
       <plugin>
         <groupId>org.scalatest</groupId>
diff --git a/bagel/src/test/scala/org/apache/spark/bagel/BagelSuite.scala b/bagel/src/test/scala/org/apache/spark/bagel/BagelSuite.scala
index 7b954a477570f..9c37fadb78d2f 100644
--- a/bagel/src/test/scala/org/apache/spark/bagel/BagelSuite.scala
+++ b/bagel/src/test/scala/org/apache/spark/bagel/BagelSuite.scala
@@ -38,7 +38,6 @@ class BagelSuite extends FunSuite with Assertions with BeforeAndAfter with Timeo
     }
     // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
     System.clearProperty("spark.driver.port")
-    System.clearProperty("spark.hostPort")
   }
 
   test("halting by voting") {
diff --git a/bin/compute-classpath.cmd b/bin/compute-classpath.cmd
index cf38188c4b255..4f60bff19cb93 100644
--- a/bin/compute-classpath.cmd
+++ b/bin/compute-classpath.cmd
@@ -20,7 +20,7 @@ rem
 rem This script computes Spark's classpath and prints it to stdout; it's used by both the "run"
 rem script and the ExecutorRunner in standalone cluster mode.
 
-set SCALA_VERSION=2.9.3
+set SCALA_VERSION=2.10
 
 rem Figure out where the Spark framework is installed
 set FWDIR=%~dp0..\
@@ -29,7 +29,7 @@ rem Load environment variables from conf\spark-env.cmd, if it exists
 if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
 
 rem Build up classpath
-set CLASSPATH=%SPARK_CLASSPATH%;%FWDIR%conf
+set CLASSPATH=%FWDIR%conf
 if exist "%FWDIR%RELEASE" (
   for %%d in ("%FWDIR%jars\spark-assembly*.jar") do (
     set ASSEMBLY_JAR=%%d
diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh
index c7819d4932404..278969655de48 100755
--- a/bin/compute-classpath.sh
+++ b/bin/compute-classpath.sh
@@ -20,24 +20,39 @@
 # This script computes Spark's classpath and prints it to stdout; it's used by both the "run"
 # script and the ExecutorRunner in standalone cluster mode.
 
-SCALA_VERSION=2.9.3
+SCALA_VERSION=2.10
 
 # Figure out where Spark is installed
 FWDIR="$(cd `dirname $0`/..; pwd)"
 
 # Load environment variables from conf/spark-env.sh, if it exists
-if [ -e $FWDIR/conf/spark-env.sh ] ; then
+if [ -e "$FWDIR/conf/spark-env.sh" ] ; then
   . $FWDIR/conf/spark-env.sh
 fi
 
 # Build up classpath
 CLASSPATH="$SPARK_CLASSPATH:$FWDIR/conf"
-if [ -f "$FWDIR/RELEASE" ]; then
-  ASSEMBLY_JAR=`ls "$FWDIR"/jars/spark-assembly*.jar`
+
+# First check if we have a dependencies jar. If so, include binary classes with the deps jar
+if [ -f "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*-deps.jar ]; then
+  CLASSPATH="$CLASSPATH:$FWDIR/core/target/scala-$SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/repl/target/scala-$SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/mllib/target/scala-$SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/bagel/target/scala-$SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/graphx/target/scala-$SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/streaming/target/scala-$SCALA_VERSION/classes"
+
+  DEPS_ASSEMBLY_JAR=`ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*-deps.jar`
+  CLASSPATH="$CLASSPATH:$DEPS_ASSEMBLY_JAR"
 else
-  ASSEMBLY_JAR=`ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*.jar`
+  # Else use spark-assembly jar from either RELEASE or assembly directory
+  if [ -f "$FWDIR/RELEASE" ]; then
+    ASSEMBLY_JAR=`ls "$FWDIR"/jars/spark-assembly*.jar`
+  else
+    ASSEMBLY_JAR=`ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*.jar`
+  fi
+  CLASSPATH="$CLASSPATH:$ASSEMBLY_JAR"
 fi
-CLASSPATH="$CLASSPATH:$ASSEMBLY_JAR"
 
 # Add test classes if we're running from SBT or Maven with SPARK_TESTING set to 1
 if [[ $SPARK_TESTING == 1 ]]; then
@@ -45,6 +60,7 @@ if [[ $SPARK_TESTING == 1 ]]; then
   CLASSPATH="$CLASSPATH:$FWDIR/repl/target/scala-$SCALA_VERSION/test-classes"
   CLASSPATH="$CLASSPATH:$FWDIR/mllib/target/scala-$SCALA_VERSION/test-classes"
   CLASSPATH="$CLASSPATH:$FWDIR/bagel/target/scala-$SCALA_VERSION/test-classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/graphx/target/scala-$SCALA_VERSION/test-classes"
   CLASSPATH="$CLASSPATH:$FWDIR/streaming/target/scala-$SCALA_VERSION/test-classes"
 fi
 
diff --git a/pyspark b/bin/pyspark
similarity index 91%
rename from pyspark
rename to bin/pyspark
index 4941a36d0df00..ed6f8da73035a 100755
--- a/pyspark
+++ b/bin/pyspark
@@ -18,12 +18,12 @@
 #
 
 # Figure out where the Scala framework is installed
-FWDIR="$(cd `dirname $0`; pwd)"
+FWDIR="$(cd `dirname $0`/..; pwd)"
 
 # Export this as SPARK_HOME
 export SPARK_HOME="$FWDIR"
 
-SCALA_VERSION=2.9.3
+SCALA_VERSION=2.10
 
 # Exit if the user hasn't compiled Spark
 if [ ! -f "$FWDIR/RELEASE" ]; then
@@ -37,7 +37,7 @@ if [ ! -f "$FWDIR/RELEASE" ]; then
 fi
 
 # Load environment variables from conf/spark-env.sh, if it exists
-if [ -e $FWDIR/conf/spark-env.sh ] ; then
+if [ -e "$FWDIR/conf/spark-env.sh" ] ; then
   . $FWDIR/conf/spark-env.sh
 fi
 
@@ -59,8 +59,7 @@ if [ -n "$IPYTHON_OPTS" ]; then
 fi
 
 if [[ "$IPYTHON" = "1" ]] ; then
-  IPYTHON_OPTS=${IPYTHON_OPTS:--i}
-  exec ipython "$IPYTHON_OPTS" -c "%run $PYTHONSTARTUP"
+  exec ipython $IPYTHON_OPTS
 else
   exec "$PYSPARK_PYTHON" "$@"
 fi
diff --git a/pyspark.cmd b/bin/pyspark.cmd
similarity index 100%
rename from pyspark.cmd
rename to bin/pyspark.cmd
diff --git a/pyspark2.cmd b/bin/pyspark2.cmd
similarity index 97%
rename from pyspark2.cmd
rename to bin/pyspark2.cmd
index f58e34964393b..95791095ec932 100644
--- a/pyspark2.cmd
+++ b/bin/pyspark2.cmd
@@ -17,10 +17,10 @@ rem See the License for the specific language governing permissions and
 rem limitations under the License.
 rem
 
-set SCALA_VERSION=2.9.3
+set SCALA_VERSION=2.10
 
 rem Figure out where the Spark framework is installed
-set FWDIR=%~dp0
+set FWDIR=%~dp0..\
 
 rem Export this as SPARK_HOME
 set SPARK_HOME=%FWDIR%
diff --git a/run-example b/bin/run-example
similarity index 76%
rename from run-example
rename to bin/run-example
index 08ec717ca51a5..adba7dd97aaf8 100755
--- a/run-example
+++ b/bin/run-example
@@ -17,16 +17,21 @@
 # limitations under the License.
 #
 
-SCALA_VERSION=2.9.3
+cygwin=false
+case "`uname`" in
+    CYGWIN*) cygwin=true;;
+esac
+
+SCALA_VERSION=2.10
 
 # Figure out where the Scala framework is installed
-FWDIR="$(cd `dirname $0`; pwd)"
+FWDIR="$(cd `dirname $0`/..; pwd)"
 
 # Export this as SPARK_HOME
 export SPARK_HOME="$FWDIR"
 
 # Load environment variables from conf/spark-env.sh, if it exists
-if [ -e $FWDIR/conf/spark-env.sh ] ; then
+if [ -e "$FWDIR/conf/spark-env.sh" ] ; then
   . $FWDIR/conf/spark-env.sh
 fi
 
@@ -40,25 +45,25 @@ fi
 EXAMPLES_DIR="$FWDIR"/examples
 SPARK_EXAMPLES_JAR=""
 if [ -e "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/*assembly*[0-9Tg].jar ]; then
-  # Use the JAR from the SBT build
   export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/*assembly*[0-9Tg].jar`
 fi
-if [ -e "$EXAMPLES_DIR"/target/spark-examples*[0-9Tg].jar ]; then
-  # Use the JAR from the Maven build
-  # TODO: this also needs to become an assembly!
-  export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR"/target/spark-examples*[0-9Tg].jar`
-fi
 if [[ -z $SPARK_EXAMPLES_JAR ]]; then
   echo "Failed to find Spark examples assembly in $FWDIR/examples/target" >&2
   echo "You need to build Spark with sbt/sbt assembly before running this program" >&2
   exit 1
 fi
 
+
 # Since the examples JAR ideally shouldn't include spark-core (that dependency should be
 # "provided"), also add our standard Spark classpath, built using compute-classpath.sh.
 CLASSPATH=`$FWDIR/bin/compute-classpath.sh`
 CLASSPATH="$SPARK_EXAMPLES_JAR:$CLASSPATH"
 
+if $cygwin; then
+    CLASSPATH=`cygpath -wp $CLASSPATH`
+    export SPARK_EXAMPLES_JAR=`cygpath -w $SPARK_EXAMPLES_JAR`
+fi
+
 # Find java binary
 if [ -n "${JAVA_HOME}" ]; then
   RUNNER="${JAVA_HOME}/bin/java"
@@ -71,11 +76,20 @@ else
   fi
 fi
 
+# Set JAVA_OPTS to be able to load native libraries and to set heap size
+JAVA_OPTS="$SPARK_JAVA_OPTS"
+JAVA_OPTS="$JAVA_OPTS -Djava.library.path=$SPARK_LIBRARY_PATH"
+# Load extra JAVA_OPTS from conf/java-opts, if it exists
+if [ -e "$FWDIR/conf/java-opts" ] ; then
+  JAVA_OPTS="$JAVA_OPTS `cat $FWDIR/conf/java-opts`"
+fi
+export JAVA_OPTS
+
 if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then
   echo -n "Spark Command: "
-  echo "$RUNNER" -cp "$CLASSPATH" "$@"
+  echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"
   echo "========================================"
   echo
 fi
 
-exec "$RUNNER" -cp "$CLASSPATH" "$@"
+exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"
diff --git a/run-example.cmd b/bin/run-example.cmd
similarity index 100%
rename from run-example.cmd
rename to bin/run-example.cmd
diff --git a/run-example2.cmd b/bin/run-example2.cmd
similarity index 97%
rename from run-example2.cmd
rename to bin/run-example2.cmd
index dbb371ecfc21e..40abb9af74246 100644
--- a/run-example2.cmd
+++ b/bin/run-example2.cmd
@@ -17,10 +17,10 @@ rem See the License for the specific language governing permissions and
 rem limitations under the License.
 rem
 
-set SCALA_VERSION=2.9.3
+set SCALA_VERSION=2.10
 
 rem Figure out where the Spark framework is installed
-set FWDIR=%~dp0
+set FWDIR=%~dp0..\
 
 rem Export this as SPARK_HOME
 set SPARK_HOME=%FWDIR%
diff --git a/spark-class b/bin/spark-class
similarity index 88%
rename from spark-class
rename to bin/spark-class
index bbeca7f245692..c4225a392d6da 100755
--- a/spark-class
+++ b/bin/spark-class
@@ -17,16 +17,21 @@
 # limitations under the License.
 #
 
-SCALA_VERSION=2.9.3
+cygwin=false
+case "`uname`" in
+    CYGWIN*) cygwin=true;;
+esac
+
+SCALA_VERSION=2.10
 
 # Figure out where the Scala framework is installed
-FWDIR="$(cd `dirname $0`; pwd)"
+FWDIR="$(cd `dirname $0`/..; pwd)"
 
 # Export this as SPARK_HOME
 export SPARK_HOME="$FWDIR"
 
 # Load environment variables from conf/spark-env.sh, if it exists
-if [ -e $FWDIR/conf/spark-env.sh ] ; then
+if [ -e "$FWDIR/conf/spark-env.sh" ] ; then
   . $FWDIR/conf/spark-env.sh
 fi
 
@@ -55,7 +60,7 @@ case "$1" in
   'org.apache.spark.deploy.worker.Worker')
     OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_WORKER_OPTS"
     ;;
-  'org.apache.spark.executor.StandaloneExecutorBackend')
+  'org.apache.spark.executor.CoarseGrainedExecutorBackend')
     OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_EXECUTOR_OPTS"
     ;;
   'org.apache.spark.executor.MesosExecutorBackend')
@@ -87,7 +92,7 @@ JAVA_OPTS="$OUR_JAVA_OPTS"
 JAVA_OPTS="$JAVA_OPTS -Djava.library.path=$SPARK_LIBRARY_PATH"
 JAVA_OPTS="$JAVA_OPTS -Xms$SPARK_MEM -Xmx$SPARK_MEM"
 # Load extra JAVA_OPTS from conf/java-opts, if it exists
-if [ -e $FWDIR/conf/java-opts ] ; then
+if [ -e "$FWDIR/conf/java-opts" ] ; then
   JAVA_OPTS="$JAVA_OPTS `cat $FWDIR/conf/java-opts`"
 fi
 export JAVA_OPTS
@@ -124,7 +129,17 @@ fi
 
 # Compute classpath using external script
 CLASSPATH=`$FWDIR/bin/compute-classpath.sh`
-CLASSPATH="$SPARK_TOOLS_JAR:$CLASSPATH"
+
+if [ "$1" == "org.apache.spark.tools.JavaAPICompletenessChecker" ]; then
+  CLASSPATH="$CLASSPATH:$SPARK_TOOLS_JAR"
+fi
+
+if $cygwin; then
+  CLASSPATH=`cygpath -wp $CLASSPATH`
+  if [ "$1" == "org.apache.spark.tools.JavaAPICompletenessChecker" ]; then
+    export SPARK_TOOLS_JAR=`cygpath -w $SPARK_TOOLS_JAR`
+  fi
+fi
 export CLASSPATH
 
 if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then
@@ -135,3 +150,5 @@ if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then
 fi
 
 exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"
+
+
diff --git a/spark-class.cmd b/bin/spark-class.cmd
similarity index 100%
rename from spark-class.cmd
rename to bin/spark-class.cmd
diff --git a/spark-class2.cmd b/bin/spark-class2.cmd
old mode 100644
new mode 100755
similarity index 97%
rename from spark-class2.cmd
rename to bin/spark-class2.cmd
index 3869d0761bfaa..80818c78ec24b
--- a/spark-class2.cmd
+++ b/bin/spark-class2.cmd
@@ -17,10 +17,10 @@ rem See the License for the specific language governing permissions and
 rem limitations under the License.
 rem
 
-set SCALA_VERSION=2.9.3
+set SCALA_VERSION=2.10
 
 rem Figure out where the Spark framework is installed
-set FWDIR=%~dp0
+set FWDIR=%~dp0..\
 
 rem Export this as SPARK_HOME
 set SPARK_HOME=%FWDIR%
@@ -75,7 +75,7 @@ rem Compute classpath using external script
 set DONT_PRINT_CLASSPATH=1
 call "%FWDIR%bin\compute-classpath.cmd"
 set DONT_PRINT_CLASSPATH=0
-set CLASSPATH=%SPARK_TOOLS_JAR%;%CLASSPATH%
+set CLASSPATH=%CLASSPATH%;%SPARK_TOOLS_JAR%
 
 rem Figure out where java is.
 set RUNNER=java
diff --git a/spark-shell b/bin/spark-shell
similarity index 69%
rename from spark-shell
rename to bin/spark-shell
index 9608bd3f30be7..05a46ee0caf55 100755
--- a/spark-shell
+++ b/bin/spark-shell
@@ -23,12 +23,16 @@
 # if those two env vars are set in spark-env.sh but MASTER is not.
 # Options:
 #    -c <cores>    Set the number of cores for REPL to use
-#
+
+cygwin=false
+case "`uname`" in
+    CYGWIN*) cygwin=true;;
+esac
 
 # Enter posix mode for bash
 set -o posix
 
-FWDIR="`dirname $0`"
+FWDIR="$(cd `dirname $0`/..; pwd)"
 
 for o in "$@"; do
   if [ "$1" = "-c" -o "$1" = "--cores" ]; then
@@ -41,13 +45,18 @@ for o in "$@"; do
 done
 
 # Set MASTER from spark-env if possible
+DEFAULT_SPARK_MASTER_PORT=7077
 if [ -z "$MASTER" ]; then
   if [ -e "$FWDIR/conf/spark-env.sh" ]; then
     . "$FWDIR/conf/spark-env.sh"
   fi
-  if [[ "x" != "x$SPARK_MASTER_IP" && "y" != "y$SPARK_MASTER_PORT" ]]; then
-    MASTER="spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}"
-    export MASTER
+  if [ "x" != "x$SPARK_MASTER_IP" ]; then
+    if [ "y" != "y$SPARK_MASTER_PORT" ]; then
+      SPARK_MASTER_PORT="${SPARK_MASTER_PORT}"
+    else
+      SPARK_MASTER_PORT=$DEFAULT_SPARK_MASTER_PORT
+    fi
+    export MASTER="spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}"
   fi
 fi
 
@@ -79,7 +88,18 @@ if [[ ! $? ]]; then
   saved_stty=""
 fi
 
-$FWDIR/spark-class $OPTIONS org.apache.spark.repl.Main "$@"
+if $cygwin; then
+    # Workaround for issue involving JLine and Cygwin
+    # (see http://sourceforge.net/p/jline/bugs/40/).
+    # If you're using the Mintty terminal emulator in Cygwin, may need to set the
+    # "Backspace sends ^H" setting in "Keys" section of the Mintty options
+    # (see https://github.com/sbt/sbt/issues/562).
+    stty -icanon min 1 -echo > /dev/null 2>&1
+    $FWDIR/bin/spark-class -Djline.terminal=unix $OPTIONS org.apache.spark.repl.Main "$@"
+    stty icanon echo > /dev/null 2>&1
+else
+    $FWDIR/bin/spark-class $OPTIONS org.apache.spark.repl.Main "$@"
+fi
 
 # record the exit status lest it be overwritten:
 # then reenable echo and propagate the code.
diff --git a/spark-shell.cmd b/bin/spark-shell.cmd
old mode 100644
new mode 100755
similarity index 88%
rename from spark-shell.cmd
rename to bin/spark-shell.cmd
index 3e52bf835e5be..99799128eb734
--- a/spark-shell.cmd
+++ b/bin/spark-shell.cmd
@@ -17,6 +17,7 @@ rem See the License for the specific language governing permissions and
 rem limitations under the License.
 rem
 
-set FWDIR=%~dp0
+rem Find the path of sbin
+set BIN=%~dp0..\bin\
 
-cmd /V /E /C %FWDIR%spark-class2.cmd org.apache.spark.repl.Main %*
+cmd /V /E /C %BIN%spark-class2.cmd org.apache.spark.repl.Main %*
diff --git a/conf/log4j.properties.template b/conf/log4j.properties.template
index d72dbadc3904f..f7f853559468a 100644
--- a/conf/log4j.properties.template
+++ b/conf/log4j.properties.template
@@ -1,8 +1,11 @@
 # Set everything to be logged to the console
 log4j.rootCategory=INFO, console
 log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
 log4j.appender.console.layout=org.apache.log4j.PatternLayout
 log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
 
-# Ignore messages below warning level from Jetty, because it's a bit verbose
+# Settings to quiet third party logs that are too verbose
 log4j.logger.org.eclipse.jetty=WARN
+log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
+log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
diff --git a/conf/metrics.properties.template b/conf/metrics.properties.template
index ae10f615d1312..1c3d94e1b0831 100644
--- a/conf/metrics.properties.template
+++ b/conf/metrics.properties.template
@@ -80,6 +80,14 @@
 #     /metrics/aplications/json # App information
 #     /metrics/master/json      # Master information
 
+# org.apache.spark.metrics.sink.GraphiteSink
+#   Name:     Default:      Description:
+#   host      NONE          Hostname of Graphite server
+#   port      NONE          Port of Graphite server
+#   period    10            Poll period
+#   unit      seconds       Units of poll period
+#   prefix    EMPTY STRING  Prefix to prepend to metric name
+
 ## Examples
 # Enable JmxSink for all instances by class name
 #*.sink.jmx.class=org.apache.spark.metrics.sink.JmxSink
diff --git a/conf/spark-env.sh.template b/conf/spark-env.sh.template
index 0a35ee7c79b12..e2071e2ade8cd 100755
--- a/conf/spark-env.sh.template
+++ b/conf/spark-env.sh.template
@@ -18,4 +18,4 @@
 # - SPARK_WORKER_MEMORY, to set how much memory to use (e.g. 1000m, 2g)
 # - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT
 # - SPARK_WORKER_INSTANCES, to set the number of worker processes per node
-
+# - SPARK_WORKER_DIR, to set the working directory of worker processes
diff --git a/core/pom.xml b/core/pom.xml
index daaf6bd447afa..2c81a1693260d 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -17,218 +17,229 @@
   -->
 
 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-  <parent>
-    <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent</artifactId>
-    <version>0.8.2-candidate-csd-3-SNAPSHOT</version>
-    <relativePath>../pom.xml</relativePath>
-  </parent>
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>org.apache.spark</groupId>
+        <artifactId>spark-parent</artifactId>
+        <version>0.9.1-csd-1-SNAPSHOT</version>
+        <relativePath>../pom.xml</relativePath>
+    </parent>
 
-  <groupId>org.apache.spark</groupId>
-  <artifactId>spark-core_2.9.3</artifactId>
-  <packaging>jar</packaging>
-  <name>Spark Project Core</name>
-  <url>http://spark.incubator.apache.org/</url>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-core_2.10</artifactId>
+    <packaging>jar</packaging>
+    <name>Spark Project Core</name>
+    <url>http://spark.incubator.apache.org/</url>
 
-  <dependencies>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-client</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>net.java.dev.jets3t</groupId>
-      <artifactId>jets3t</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.avro</groupId>
-      <artifactId>avro</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.avro</groupId>
-      <artifactId>avro-ipc</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.zookeeper</groupId>
-      <artifactId>zookeeper</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.eclipse.jetty</groupId>
-      <artifactId>jetty-server</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>com.google.guava</groupId>
-      <artifactId>guava</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>com.google.code.findbugs</groupId>
-      <artifactId>jsr305</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.slf4j</groupId>
-      <artifactId>slf4j-api</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>com.ning</groupId>
-      <artifactId>compress-lzf</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.xerial.snappy</groupId>
-      <artifactId>snappy-java</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.ow2.asm</groupId>
-      <artifactId>asm</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>com.google.protobuf</groupId>
-      <artifactId>protobuf-java</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>com.twitter</groupId>
-      <artifactId>chill_2.9.3</artifactId>
-      <version>0.3.1</version>
-    </dependency>
-    <dependency>
-      <groupId>com.twitter</groupId>
-      <artifactId>chill-java</artifactId>
-      <version>0.3.1</version>
-    </dependency>
-    <dependency>
-      <groupId>${akka.group}</groupId>
-      <artifactId>akka-actor</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>${akka.group}</groupId>
-      <artifactId>akka-remote</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>${akka.group}</groupId>
-      <artifactId>akka-slf4j</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.scala-lang</groupId>
-      <artifactId>scalap</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.scala-lang</groupId>
-      <artifactId>scala-library</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>net.liftweb</groupId>
-      <artifactId>lift-json_2.9.2</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>it.unimi.dsi</groupId>
-      <artifactId>fastutil</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>colt</groupId>
-      <artifactId>colt</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.mesos</groupId>
-      <artifactId>mesos</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>io.netty</groupId>
-      <artifactId>netty-all</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>log4j</groupId>
-      <artifactId>log4j</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>com.codahale.metrics</groupId>
-      <artifactId>metrics-core</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>com.codahale.metrics</groupId>
-      <artifactId>metrics-jvm</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>com.codahale.metrics</groupId>
-      <artifactId>metrics-json</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>com.codahale.metrics</groupId>
-      <artifactId>metrics-ganglia</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.derby</groupId>
-      <artifactId>derby</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_2.9.3</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.scalacheck</groupId>
-      <artifactId>scalacheck_2.9.3</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.easymock</groupId>
-      <artifactId>easymock</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>com.novocode</groupId>
-      <artifactId>junit-interface</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.slf4j</groupId>
-      <artifactId>slf4j-log4j12</artifactId>
-      <scope>test</scope>
-    </dependency>
-  </dependencies>
-  <build>
-    <outputDirectory>target/scala-${scala.version}/classes</outputDirectory>
-    <testOutputDirectory>target/scala-${scala.version}/test-classes</testOutputDirectory>
-    <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-antrun-plugin</artifactId>
-        <executions>
-          <execution>
-            <phase>test</phase>
-            <goals>
-              <goal>run</goal>
-            </goals>
-            <configuration>
-              <exportAntProperties>true</exportAntProperties>
-              <tasks>
-                <property name="spark.classpath" refid="maven.test.classpath" />
-                <property environment="env" />
-                <fail message="Please set the SCALA_HOME (or SCALA_LIBRARY_PATH if scala is on the path) environment variables and retry.">
-                  <condition>
-                    <not>
-                      <or>
-                        <isset property="env.SCALA_HOME" />
-                        <isset property="env.SCALA_LIBRARY_PATH" />
-                      </or>
-                    </not>
-                  </condition>
-                </fail>
-              </tasks>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-      <plugin>
-        <groupId>org.scalatest</groupId>
-        <artifactId>scalatest-maven-plugin</artifactId>
-        <configuration>
-          <environmentVariables>
-            <SPARK_HOME>${basedir}/..</SPARK_HOME>
-            <SPARK_TESTING>1</SPARK_TESTING>
-            <SPARK_CLASSPATH>${spark.classpath}</SPARK_CLASSPATH>
-          </environmentVariables>
-        </configuration>
-      </plugin>
-    </plugins>
-  </build>
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-client</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>net.java.dev.jets3t</groupId>
+            <artifactId>jets3t</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.avro</groupId>
+            <artifactId>avro</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.avro</groupId>
+            <artifactId>avro-ipc</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.zookeeper</groupId>
+            <artifactId>zookeeper</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.eclipse.jetty</groupId>
+            <artifactId>jetty-server</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.google.guava</groupId>
+            <artifactId>guava</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.google.code.findbugs</groupId>
+            <artifactId>jsr305</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.ning</groupId>
+            <artifactId>compress-lzf</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.xerial.snappy</groupId>
+            <artifactId>snappy-java</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.ow2.asm</groupId>
+            <artifactId>asm</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.twitter</groupId>
+            <artifactId>chill_${scala.binary.version}</artifactId>
+            <version>0.3.1</version>
+        </dependency>
+        <dependency>
+            <groupId>com.twitter</groupId>
+            <artifactId>chill-java</artifactId>
+            <version>0.3.1</version>
+        </dependency>
+        <dependency>
+            <groupId>${akka.group}</groupId>
+            <artifactId>akka-remote_${scala.binary.version}</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>${akka.group}</groupId>
+            <artifactId>akka-slf4j_${scala.binary.version}</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>${akka.group}</groupId>
+            <artifactId>akka-testkit_${scala.binary.version}</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.scala-lang</groupId>
+            <artifactId>scala-library</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>net.liftweb</groupId>
+            <artifactId>lift-json_${scala.binary.version}</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>it.unimi.dsi</groupId>
+            <artifactId>fastutil</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>colt</groupId>
+            <artifactId>colt</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.mesos</groupId>
+            <artifactId>mesos</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>io.netty</groupId>
+            <artifactId>netty-all</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>log4j</groupId>
+            <artifactId>log4j</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.clearspring.analytics</groupId>
+            <artifactId>stream</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.codahale.metrics</groupId>
+            <artifactId>metrics-core</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.codahale.metrics</groupId>
+            <artifactId>metrics-jvm</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.codahale.metrics</groupId>
+            <artifactId>metrics-json</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.codahale.metrics</groupId>
+            <artifactId>metrics-ganglia</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.codahale.metrics</groupId>
+            <artifactId>metrics-graphite</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.derby</groupId>
+            <artifactId>derby</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>commons-io</groupId>
+            <artifactId>commons-io</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.scalatest</groupId>
+            <artifactId>scalatest_${scala.binary.version}</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.mockito</groupId>
+            <artifactId>mockito-all</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.scalacheck</groupId>
+            <artifactId>scalacheck_${scala.binary.version}</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.easymock</groupId>
+            <artifactId>easymock</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>com.novocode</groupId>
+            <artifactId>junit-interface</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-log4j12</artifactId>
+            <scope>test</scope>
+        </dependency>
+    </dependencies>
+    <build>
+        <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+        <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-antrun-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <phase>test</phase>
+                        <goals>
+                            <goal>run</goal>
+                        </goals>
+                        <configuration>
+                            <exportAntProperties>true</exportAntProperties>
+                            <tasks>
+                                <property name="spark.classpath" refid="maven.test.classpath" />
+                                <property environment="env" />
+                                <fail message="Please set the SCALA_HOME (or SCALA_LIBRARY_PATH if scala is on the path) environment variables and retry.">
+                                    <condition>
+                                        <not>
+                                            <or>
+                                                <isset property="env.SCALA_HOME" />
+                                                <isset property="env.SCALA_LIBRARY_PATH" />
+                                            </or>
+                                        </not>
+                                    </condition>
+                                </fail>
+                            </tasks>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.scalatest</groupId>
+                <artifactId>scalatest-maven-plugin</artifactId>
+                <configuration>
+                    <environmentVariables>
+                        <SPARK_HOME>${basedir}/..</SPARK_HOME>
+                        <SPARK_TESTING>1</SPARK_TESTING>
+                        <SPARK_CLASSPATH>${spark.classpath}</SPARK_CLASSPATH>
+                    </environmentVariables>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
 </project>
diff --git a/core/src/main/java/org/apache/spark/network/netty/FileClient.java b/core/src/main/java/org/apache/spark/network/netty/FileClient.java
index 20a7a3aa8c122..d2d778b7567bc 100644
--- a/core/src/main/java/org/apache/spark/network/netty/FileClient.java
+++ b/core/src/main/java/org/apache/spark/network/netty/FileClient.java
@@ -19,31 +19,36 @@
 
 import io.netty.bootstrap.Bootstrap;
 import io.netty.channel.Channel;
-import io.netty.channel.ChannelFuture;
-import io.netty.channel.ChannelFutureListener;
 import io.netty.channel.ChannelOption;
+import io.netty.channel.EventLoopGroup;
 import io.netty.channel.oio.OioEventLoopGroup;
 import io.netty.channel.socket.oio.OioSocketChannel;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.util.concurrent.TimeUnit;
+
 class FileClient {
 
-  private Logger LOG = LoggerFactory.getLogger(this.getClass().getName());
-  private FileClientHandler handler = null;
+  private static final Logger LOG = LoggerFactory.getLogger(FileClient.class.getName());
+
+  private final FileClientHandler handler;
   private Channel channel = null;
   private Bootstrap bootstrap = null;
-  private int connectTimeout = 60*1000; // 1 min
+  private EventLoopGroup group = null;
+  private final int connectTimeout;
+  private final int sendTimeout = 60; // 1 min
 
-  public FileClient(FileClientHandler handler, int connectTimeout) {
+  FileClient(FileClientHandler handler, int connectTimeout) {
     this.handler = handler;
     this.connectTimeout = connectTimeout;
   }
 
   public void init() {
+    group = new OioEventLoopGroup();
     bootstrap = new Bootstrap();
-    bootstrap.group(new OioEventLoopGroup())
+    bootstrap.group(group)
       .channel(OioSocketChannel.class)
       .option(ChannelOption.SO_KEEPALIVE, true)
       .option(ChannelOption.TCP_NODELAY, true)
@@ -58,6 +63,7 @@ public void connect(String host, int port) {
       // ChannelFuture cf = channel.closeFuture();
       //cf.addListener(new ChannelCloseListener(this));
     } catch (InterruptedException e) {
+      LOG.warn("FileClient interrupted while trying to connect", e);
       close();
     }
   }
@@ -73,16 +79,21 @@ public void waitForClose() {
   public void sendRequest(String file) {
     //assert(file == null);
     //assert(channel == null);
-    channel.write(file + "\r\n");
+      try {
+          // Should be able to send the message to network link channel.
+          boolean bSent = channel.writeAndFlush(file + "\r\n").await(sendTimeout, TimeUnit.SECONDS);
+          if (!bSent) {
+              throw new RuntimeException("Failed to send");
+          }
+      } catch (InterruptedException e) {
+          LOG.error("Error", e);
+      }
   }
 
   public void close() {
-    if(channel != null) {
-      channel.close();
-      channel = null;
-    }
-    if ( bootstrap!=null) {
-      bootstrap.shutdown();
+    if (group != null) {
+      group.shutdownGracefully();
+      group = null;
       bootstrap = null;
     }
   }
diff --git a/core/src/main/java/org/apache/spark/network/netty/FileClientChannelInitializer.java b/core/src/main/java/org/apache/spark/network/netty/FileClientChannelInitializer.java
index 65ee15d63b854..264cf97d0209f 100644
--- a/core/src/main/java/org/apache/spark/network/netty/FileClientChannelInitializer.java
+++ b/core/src/main/java/org/apache/spark/network/netty/FileClientChannelInitializer.java
@@ -17,17 +17,15 @@
 
 package org.apache.spark.network.netty;
 
-import io.netty.buffer.BufType;
 import io.netty.channel.ChannelInitializer;
 import io.netty.channel.socket.SocketChannel;
 import io.netty.handler.codec.string.StringEncoder;
 
-
 class FileClientChannelInitializer extends ChannelInitializer<SocketChannel> {
 
-  private FileClientHandler fhandler;
+  private final FileClientHandler fhandler;
 
-  public FileClientChannelInitializer(FileClientHandler handler) {
+  FileClientChannelInitializer(FileClientHandler handler) {
     fhandler = handler;
   }
 
@@ -35,7 +33,7 @@ public FileClientChannelInitializer(FileClientHandler handler) {
   public void initChannel(SocketChannel channel) {
     // file no more than 2G
     channel.pipeline()
-      .addLast("encoder", new StringEncoder(BufType.BYTE))
+      .addLast("encoder", new StringEncoder())
       .addLast("handler", fhandler);
   }
 }
diff --git a/core/src/main/java/org/apache/spark/network/netty/FileClientHandler.java b/core/src/main/java/org/apache/spark/network/netty/FileClientHandler.java
index 8a09210245fff..63d3d927255f9 100644
--- a/core/src/main/java/org/apache/spark/network/netty/FileClientHandler.java
+++ b/core/src/main/java/org/apache/spark/network/netty/FileClientHandler.java
@@ -19,11 +19,11 @@
 
 import io.netty.buffer.ByteBuf;
 import io.netty.channel.ChannelHandlerContext;
-import io.netty.channel.ChannelInboundByteHandlerAdapter;
+import io.netty.channel.SimpleChannelInboundHandler;
 
 import org.apache.spark.storage.BlockId;
 
-abstract class FileClientHandler extends ChannelInboundByteHandlerAdapter {
+abstract class FileClientHandler extends SimpleChannelInboundHandler<ByteBuf> {
 
   private FileHeader currentHeader = null;
 
@@ -37,13 +37,7 @@ public boolean isComplete() {
   public abstract void handleError(BlockId blockId);
 
   @Override
-  public ByteBuf newInboundBuffer(ChannelHandlerContext ctx) {
-    // Use direct buffer if possible.
-    return ctx.alloc().ioBuffer();
-  }
-
-  @Override
-  public void inboundBufferUpdated(ChannelHandlerContext ctx, ByteBuf in) {
+  public void channelRead0(ChannelHandlerContext ctx, ByteBuf in) {
     // get header
     if (currentHeader == null && in.readableBytes() >= FileHeader.HEADER_SIZE()) {
       currentHeader = FileHeader.create(in.readBytes(FileHeader.HEADER_SIZE()));
diff --git a/core/src/main/java/org/apache/spark/network/netty/FileServer.java b/core/src/main/java/org/apache/spark/network/netty/FileServer.java
index 666432474dc75..c93425e2787dc 100644
--- a/core/src/main/java/org/apache/spark/network/netty/FileServer.java
+++ b/core/src/main/java/org/apache/spark/network/netty/FileServer.java
@@ -20,34 +20,35 @@
 import java.net.InetSocketAddress;
 
 import io.netty.bootstrap.ServerBootstrap;
-import io.netty.channel.Channel;
 import io.netty.channel.ChannelFuture;
 import io.netty.channel.ChannelOption;
+import io.netty.channel.EventLoopGroup;
 import io.netty.channel.oio.OioEventLoopGroup;
 import io.netty.channel.socket.oio.OioServerSocketChannel;
-
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-
 /**
  * Server that accept the path of a file an echo back its content.
  */
 class FileServer {
 
-  private Logger LOG = LoggerFactory.getLogger(this.getClass().getName());
+  private static final Logger LOG = LoggerFactory.getLogger(FileServer.class.getName());
 
-  private ServerBootstrap bootstrap = null;
+  private EventLoopGroup bossGroup = null;
+  private EventLoopGroup workerGroup = null;
   private ChannelFuture channelFuture = null;
   private int port = 0;
-  private Thread blockingThread = null;
 
-  public FileServer(PathResolver pResolver, int port) {
+  FileServer(PathResolver pResolver, int port) {
     InetSocketAddress addr = new InetSocketAddress(port);
 
     // Configure the server.
-    bootstrap = new ServerBootstrap();
-    bootstrap.group(new OioEventLoopGroup(), new OioEventLoopGroup())
+    bossGroup = new OioEventLoopGroup();
+    workerGroup = new OioEventLoopGroup();
+
+    ServerBootstrap bootstrap = new ServerBootstrap();
+    bootstrap.group(bossGroup, workerGroup)
         .channel(OioServerSocketChannel.class)
         .option(ChannelOption.SO_BACKLOG, 100)
         .option(ChannelOption.SO_RCVBUF, 1500)
@@ -68,7 +69,8 @@ public FileServer(PathResolver pResolver, int port) {
    * Start the file server asynchronously in a new thread.
    */
   public void start() {
-    blockingThread = new Thread() {
+    Thread blockingThread = new Thread() {
+      @Override
       public void run() {
         try {
           channelFuture.channel().closeFuture().sync();
@@ -90,13 +92,19 @@ public int getPort() {
   public void stop() {
     // Close the bound channel.
     if (channelFuture != null) {
-      channelFuture.channel().close();
+      channelFuture.channel().close().awaitUninterruptibly();
       channelFuture = null;
     }
-    // Shutdown bootstrap.
-    if (bootstrap != null) {
-      bootstrap.shutdown();
-      bootstrap = null;
+
+    // Shutdown event groups
+    if (bossGroup != null) {
+       bossGroup.shutdownGracefully();
+       bossGroup = null;
+    }
+
+    if (workerGroup != null) {
+       workerGroup.shutdownGracefully();
+       workerGroup = null;
     }
     // TODO: Shutdown all accepted channels as well ?
   }
diff --git a/core/src/main/java/org/apache/spark/network/netty/FileServerChannelInitializer.java b/core/src/main/java/org/apache/spark/network/netty/FileServerChannelInitializer.java
index 833af1632de9c..46efec8f8d963 100644
--- a/core/src/main/java/org/apache/spark/network/netty/FileServerChannelInitializer.java
+++ b/core/src/main/java/org/apache/spark/network/netty/FileServerChannelInitializer.java
@@ -23,12 +23,11 @@
 import io.netty.handler.codec.Delimiters;
 import io.netty.handler.codec.string.StringDecoder;
 
-
 class FileServerChannelInitializer extends ChannelInitializer<SocketChannel> {
 
-  PathResolver pResolver;
+  private final PathResolver pResolver;
 
-  public FileServerChannelInitializer(PathResolver pResolver) {
+  FileServerChannelInitializer(PathResolver pResolver) {
     this.pResolver = pResolver;
   }
 
@@ -36,7 +35,7 @@ public FileServerChannelInitializer(PathResolver pResolver) {
   public void initChannel(SocketChannel channel) {
     channel.pipeline()
       .addLast("framer", new DelimiterBasedFrameDecoder(8192, Delimiters.lineDelimiter()))
-      .addLast("strDecoder", new StringDecoder())
+      .addLast("stringDecoder", new StringDecoder())
       .addLast("handler", new FileServerHandler(pResolver));
   }
 }
diff --git a/core/src/main/java/org/apache/spark/network/netty/FileServerHandler.java b/core/src/main/java/org/apache/spark/network/netty/FileServerHandler.java
index 172c6e4b1cce1..3ac045f9444f2 100644
--- a/core/src/main/java/org/apache/spark/network/netty/FileServerHandler.java
+++ b/core/src/main/java/org/apache/spark/network/netty/FileServerHandler.java
@@ -21,22 +21,26 @@
 import java.io.FileInputStream;
 
 import io.netty.channel.ChannelHandlerContext;
-import io.netty.channel.ChannelInboundMessageHandlerAdapter;
+import io.netty.channel.SimpleChannelInboundHandler;
 import io.netty.channel.DefaultFileRegion;
 
 import org.apache.spark.storage.BlockId;
 import org.apache.spark.storage.FileSegment;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
-class FileServerHandler extends ChannelInboundMessageHandlerAdapter<String> {
+class FileServerHandler extends SimpleChannelInboundHandler<String> {
 
-  PathResolver pResolver;
+  private static final Logger LOG = LoggerFactory.getLogger(FileServerHandler.class.getName());
 
-  public FileServerHandler(PathResolver pResolver){
+  private final PathResolver pResolver;
+
+  FileServerHandler(PathResolver pResolver){
     this.pResolver = pResolver;
   }
 
   @Override
-  public void messageReceived(ChannelHandlerContext ctx, String blockIdString) {
+  public void channelRead0(ChannelHandlerContext ctx, String blockIdString) {
     BlockId blockId = BlockId.apply(blockIdString);
     FileSegment fileSegment = pResolver.getBlockLocation(blockId);
     // if getBlockLocation returns null, close the channel
@@ -57,13 +61,13 @@ public void messageReceived(ChannelHandlerContext ctx, String blockIdString) {
         ctx.flush();
         return;
       }
-      int len = new Long(length).intValue();
+      int len = (int) length;
       ctx.write((new FileHeader(len, blockId)).buffer());
       try {
-        ctx.sendFile(new DefaultFileRegion(new FileInputStream(file)
+        ctx.write(new DefaultFileRegion(new FileInputStream(file)
           .getChannel(), fileSegment.offset(), fileSegment.length()));
       } catch (Exception e) {
-        e.printStackTrace();
+          LOG.error("Exception: ", e);
       }
     } else {
       ctx.write(new FileHeader(0, blockId).buffer());
@@ -73,7 +77,7 @@ public void messageReceived(ChannelHandlerContext ctx, String blockIdString) {
 
   @Override
   public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) {
-    cause.printStackTrace();
+    LOG.error("Exception: ", cause);
     ctx.close();
   }
 }
diff --git a/core/src/main/java/org/apache/spark/network/netty/PathResolver.java b/core/src/main/java/org/apache/spark/network/netty/PathResolver.java
index 9f7ced44cf93d..7ad8d03efbadc 100755
--- a/core/src/main/java/org/apache/spark/network/netty/PathResolver.java
+++ b/core/src/main/java/org/apache/spark/network/netty/PathResolver.java
@@ -1,26 +1,26 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.netty;
-
-import org.apache.spark.storage.BlockId;
-import org.apache.spark.storage.FileSegment;
-
-public interface PathResolver {
-  /** Get the file segment in which the given block resides. */
-  public FileSegment getBlockLocation(BlockId blockId);
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.netty;
+
+import org.apache.spark.storage.BlockId;
+import org.apache.spark.storage.FileSegment;
+
+public interface PathResolver {
+  /** Get the file segment in which the given block resides. */
+  FileSegment getBlockLocation(BlockId blockId);
+}
diff --git a/core/src/main/resources/org/apache/spark/log4j-defaults.properties b/core/src/main/resources/org/apache/spark/log4j-defaults.properties
new file mode 100644
index 0000000000000..f7f853559468a
--- /dev/null
+++ b/core/src/main/resources/org/apache/spark/log4j-defaults.properties
@@ -0,0 +1,11 @@
+# Set everything to be logged to the console
+log4j.rootCategory=INFO, console
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
+
+# Settings to quiet third party logs that are too verbose
+log4j.logger.org.eclipse.jetty=WARN
+log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
+log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
diff --git a/core/src/main/scala/org/apache/spark/Accumulators.scala b/core/src/main/scala/org/apache/spark/Accumulators.scala
index 6e922a612a079..de811eaf0d4bd 100644
--- a/core/src/main/scala/org/apache/spark/Accumulators.scala
+++ b/core/src/main/scala/org/apache/spark/Accumulators.scala
@@ -17,17 +17,17 @@
 
 package org.apache.spark
 
-import java.io._
+import java.io.{ObjectInputStream, Serializable}
 
 import scala.collection.mutable.Map
 import scala.collection.generic.Growable
 import org.apache.spark.serializer.JavaSerializer
 
 /**
- * A datatype that can be accumulated, i.e. has an commutative and associative "add" operation,
+ * A data type that can be accumulated, ie has an commutative and associative "add" operation,
  * but where the result type, `R`, may be different from the element type being added, `T`.
  *
- * You must define how to add data, and how to merge two of these together.  For some datatypes,
+ * You must define how to add data, and how to merge two of these together.  For some data types,
  * such as a counter, these might be the same operation. In that case, you can use the simpler
  * [[org.apache.spark.Accumulator]]. They won't always be the same, though -- e.g., imagine you are
  * accumulating a set. You will add items to the set, and you will union two sets together.
@@ -41,11 +41,11 @@ class Accumulable[R, T] (
     @transient initialValue: R,
     param: AccumulableParam[R, T])
   extends Serializable {
-  
+
   val id = Accumulators.newId
   @transient private var value_ = initialValue // Current value on master
   val zero = param.zero(initialValue)  // Zero value to be passed to workers
-  var deserialized = false
+  private var deserialized = false
 
   Accumulators.register(this, true)
 
@@ -113,7 +113,7 @@ class Accumulable[R, T] (
   def setValue(newValue: R) {
     this.value = newValue
   }
- 
+
   // Called by Java when deserializing an object
   private def readObject(in: ObjectInputStream) {
     in.defaultReadObject()
@@ -127,7 +127,7 @@ class Accumulable[R, T] (
 
 /**
  * Helper object defining how to accumulate values of a particular type. An implicit
- * AccumulableParam needs to be available when you create Accumulables of a specific type.
+ * AccumulableParam needs to be available when you create [[Accumulable]]s of a specific type.
  *
  * @tparam R the full accumulated data (result type)
  * @tparam T partial data that can be added in
@@ -177,7 +177,7 @@ class GrowableAccumulableParam[R <% Growable[T] with TraversableOnce[T] with Ser
   def zero(initialValue: R): R = {
     // We need to clone initialValue, but it's hard to specify that R should also be Cloneable.
     // Instead we'll serialize it to a buffer and load it back.
-    val ser = new JavaSerializer().newInstance()
+    val ser = new JavaSerializer(new SparkConf(false)).newInstance()
     val copy = ser.deserialize[R](ser.serialize(initialValue))
     copy.clear()   // In case it contained stuff
     copy
@@ -185,8 +185,30 @@ class GrowableAccumulableParam[R <% Growable[T] with TraversableOnce[T] with Ser
 }
 
 /**
- * A simpler value of [[org.apache.spark.Accumulable]] where the result type being accumulated is the same
- * as the types of elements being merged.
+ * A simpler value of [[Accumulable]] where the result type being accumulated is the same
+ * as the types of elements being merged, i.e. variables that are only "added" to through an
+ * associative operation and can therefore be efficiently supported in parallel. They can be used
+ * to implement counters (as in MapReduce) or sums. Spark natively supports accumulators of numeric
+ * value types, and programmers can add support for new types.
+ *
+ * An accumulator is created from an initial value `v` by calling [[SparkContext#accumulator]].
+ * Tasks running on the cluster can then add to it using the [[Accumulable#+=]] operator.
+ * However, they cannot read its value. Only the driver program can read the accumulator's value,
+ * using its value method.
+ *
+ * The interpreter session below shows an accumulator being used to add up the elements of an array:
+ *
+ * {{{
+ * scala> val accum = sc.accumulator(0)
+ * accum: spark.Accumulator[Int] = 0
+ *
+ * scala> sc.parallelize(Array(1, 2, 3, 4)).foreach(x => accum += x)
+ * ...
+ * 10/09/29 18:41:08 INFO SparkContext: Tasks finished in 0.317106 s
+ *
+ * scala> accum.value
+ * res2: Int = 10
+ * }}}
  *
  * @param initialValue initial value of accumulator
  * @param param helper object defining how to add elements of type `T`
@@ -196,9 +218,9 @@ class Accumulator[T](@transient initialValue: T, param: AccumulatorParam[T])
   extends Accumulable[T,T](initialValue, param)
 
 /**
- * A simpler version of [[org.apache.spark.AccumulableParam]] where the only datatype you can add in is the same type
- * as the accumulated value. An implicit AccumulatorParam object needs to be available when you create
- * Accumulators of a specific type.
+ * A simpler version of [[org.apache.spark.AccumulableParam]] where the only data type you can add
+ * in is the same type as the accumulated value. An implicit AccumulatorParam object needs to be
+ * available when you create Accumulators of a specific type.
  *
  * @tparam T type of value to accumulate
  */
@@ -215,10 +237,10 @@ private object Accumulators {
   val originals = Map[Long, Accumulable[_, _]]()
   val localAccums = Map[Thread, Map[Long, Accumulable[_, _]]]()
   var lastId: Long = 0
-  
+
   def newId: Long = synchronized {
     lastId += 1
-    return lastId
+    lastId
   }
 
   def register(a: Accumulable[_, _], original: Boolean): Unit = synchronized {
diff --git a/core/src/main/scala/org/apache/spark/Aggregator.scala b/core/src/main/scala/org/apache/spark/Aggregator.scala
index 1a2ec55876c35..c4579cf6ad560 100644
--- a/core/src/main/scala/org/apache/spark/Aggregator.scala
+++ b/core/src/main/scala/org/apache/spark/Aggregator.scala
@@ -17,7 +17,9 @@
 
 package org.apache.spark
 
-import org.apache.spark.util.AppendOnlyMap
+import scala.{Option, deprecated}
+
+import org.apache.spark.util.collection.{AppendOnlyMap, ExternalAppendOnlyMap}
 
 /**
  * A set of functions used to aggregate data.
@@ -31,30 +33,64 @@ case class Aggregator[K, V, C] (
     mergeValue: (C, V) => C,
     mergeCombiners: (C, C) => C) {
 
-  def combineValuesByKey(iter: Iterator[_ <: Product2[K, V]]) : Iterator[(K, C)] = {
-    val combiners = new AppendOnlyMap[K, C]
-    var kv: Product2[K, V] = null
-    val update = (hadValue: Boolean, oldValue: C) => {
-      if (hadValue) mergeValue(oldValue, kv._2) else createCombiner(kv._2)
-    }
-    while (iter.hasNext) {
-      kv = iter.next()
-      combiners.changeValue(kv._1, update)
+  private val externalSorting = SparkEnv.get.conf.getBoolean("spark.shuffle.spill", true)
+
+  @deprecated("use combineValuesByKey with TaskContext argument", "0.9.0")
+  def combineValuesByKey(iter: Iterator[_ <: Product2[K, V]]): Iterator[(K, C)] =
+    combineValuesByKey(iter, null)
+
+  def combineValuesByKey(iter: Iterator[_ <: Product2[K, V]],
+                         context: TaskContext): Iterator[(K, C)] = {
+    if (!externalSorting) {
+      val combiners = new AppendOnlyMap[K,C]
+      var kv: Product2[K, V] = null
+      val update = (hadValue: Boolean, oldValue: C) => {
+        if (hadValue) mergeValue(oldValue, kv._2) else createCombiner(kv._2)
+      }
+      while (iter.hasNext) {
+        kv = iter.next()
+        combiners.changeValue(kv._1, update)
+      }
+      combiners.iterator
+    } else {
+      val combiners = new ExternalAppendOnlyMap[K, V, C](createCombiner, mergeValue, mergeCombiners)
+      while (iter.hasNext) {
+        val (k, v) = iter.next()
+        combiners.insert(k, v)
+      }
+      // TODO: Make this non optional in a future release
+      Option(context).foreach(c => c.taskMetrics.memoryBytesSpilled = combiners.memoryBytesSpilled)
+      Option(context).foreach(c => c.taskMetrics.diskBytesSpilled = combiners.diskBytesSpilled)
+      combiners.iterator
     }
-    combiners.iterator
   }
 
-  def combineCombinersByKey(iter: Iterator[(K, C)]) : Iterator[(K, C)] = {
-    val combiners = new AppendOnlyMap[K, C]
-    var kc: (K, C) = null
-    val update = (hadValue: Boolean, oldValue: C) => {
-      if (hadValue) mergeCombiners(oldValue, kc._2) else kc._2
-    }
-    while (iter.hasNext) {
-      kc = iter.next()
-      combiners.changeValue(kc._1, update)
+  @deprecated("use combineCombinersByKey with TaskContext argument", "0.9.0")
+  def combineCombinersByKey(iter: Iterator[(K, C)]) : Iterator[(K, C)] =
+    combineCombinersByKey(iter, null)
+
+  def combineCombinersByKey(iter: Iterator[(K, C)], context: TaskContext) : Iterator[(K, C)] = {
+    if (!externalSorting) {
+      val combiners = new AppendOnlyMap[K,C]
+      var kc: Product2[K, C] = null
+      val update = (hadValue: Boolean, oldValue: C) => {
+        if (hadValue) mergeCombiners(oldValue, kc._2) else kc._2
+      }
+      while (iter.hasNext) {
+        kc = iter.next()
+        combiners.changeValue(kc._1, update)
+      }
+      combiners.iterator
+    } else {
+      val combiners = new ExternalAppendOnlyMap[K, C, C](identity, mergeCombiners, mergeCombiners)
+      while (iter.hasNext) {
+        val (k, c) = iter.next()
+        combiners.insert(k, c)
+      }
+      // TODO: Make this non optional in a future release
+      Option(context).foreach(c => c.taskMetrics.memoryBytesSpilled = combiners.memoryBytesSpilled)
+      Option(context).foreach(c => c.taskMetrics.diskBytesSpilled = combiners.diskBytesSpilled)
+      combiners.iterator
     }
-    combiners.iterator
   }
 }
-
diff --git a/core/src/main/scala/org/apache/spark/CacheManager.scala b/core/src/main/scala/org/apache/spark/CacheManager.scala
index 519ecde50a163..8e5dd8a85020d 100644
--- a/core/src/main/scala/org/apache/spark/CacheManager.scala
+++ b/core/src/main/scala/org/apache/spark/CacheManager.scala
@@ -38,7 +38,7 @@ private[spark] class CacheManager(blockManager: BlockManager) extends Logging {
     blockManager.get(key) match {
       case Some(values) =>
         // Partition is already materialized, so just return its values
-        return new InterruptibleIterator(context, values.asInstanceOf[Iterator[T]])
+        new InterruptibleIterator(context, values.asInstanceOf[Iterator[T]])
 
       case None =>
         // Mark the split as loading (unless someone else marks it first)
@@ -74,7 +74,7 @@ private[spark] class CacheManager(blockManager: BlockManager) extends Logging {
           val elements = new ArrayBuffer[Any]
           elements ++= computedValues
           blockManager.put(key, elements, storageLevel, tellMaster = true)
-          return elements.iterator.asInstanceOf[Iterator[T]]
+          elements.iterator.asInstanceOf[Iterator[T]]
         } finally {
           loading.synchronized {
             loading.remove(key)
diff --git a/core/src/main/scala/org/apache/spark/FutureAction.scala b/core/src/main/scala/org/apache/spark/FutureAction.scala
index c6b4ac5192d14..d7d10285dadcb 100644
--- a/core/src/main/scala/org/apache/spark/FutureAction.scala
+++ b/core/src/main/scala/org/apache/spark/FutureAction.scala
@@ -27,8 +27,8 @@ import org.apache.spark.rdd.RDD
 
 
 /**
- * A future for the result of an action. This is an extension of the Scala Future interface to
- * support cancellation.
+ * A future for the result of an action to support cancellation. This is an extension of the
+ * Scala Future interface to support cancellation.
  */
 trait FutureAction[T] extends Future[T] {
   // Note that we redefine methods of the Future trait here explicitly so we can specify a different
@@ -86,7 +86,7 @@ trait FutureAction[T] extends Future[T] {
 
 
 /**
- * The future holding the result of an action that triggers a single job. Examples include
+ * A [[FutureAction]] holding the result of an action that triggers a single job. Examples include
  * count, collect, reduce.
  */
 class SimpleFutureAction[T] private[spark](jobWaiter: JobWaiter[_], resultFunc: => T)
@@ -150,7 +150,7 @@ class SimpleFutureAction[T] private[spark](jobWaiter: JobWaiter[_], resultFunc:
 
 
 /**
- * A FutureAction for actions that could trigger multiple Spark jobs. Examples include take,
+ * A [[FutureAction]] for actions that could trigger multiple Spark jobs. Examples include take,
  * takeSample. Cancellation works by setting the cancelled flag to true and interrupting the
  * action thread if it is being blocked by a job.
  */
diff --git a/core/src/main/scala/org/apache/spark/HttpFileServer.scala b/core/src/main/scala/org/apache/spark/HttpFileServer.scala
index ad1ee20045f46..a885898ad48d4 100644
--- a/core/src/main/scala/org/apache/spark/HttpFileServer.scala
+++ b/core/src/main/scala/org/apache/spark/HttpFileServer.scala
@@ -47,17 +47,17 @@ private[spark] class HttpFileServer extends Logging {
   
   def addFile(file: File) : String = {
     addFileToDir(file, fileDir)
-    return serverUri + "/files/" + file.getName
+    serverUri + "/files/" + file.getName
   }
   
   def addJar(file: File) : String = {
     addFileToDir(file, jarDir)
-    return serverUri + "/jars/" + file.getName
+    serverUri + "/jars/" + file.getName
   }
   
   def addFileToDir(file: File, dir: File) : String = {
     Files.copy(file, new File(dir, file.getName))
-    return dir + "/" + file.getName
+    dir + "/" + file.getName
   }
   
 }
diff --git a/core/src/main/scala/org/apache/spark/HttpServer.scala b/core/src/main/scala/org/apache/spark/HttpServer.scala
index cdfc9dd54e06a..69a738dc4446a 100644
--- a/core/src/main/scala/org/apache/spark/HttpServer.scala
+++ b/core/src/main/scala/org/apache/spark/HttpServer.scala
@@ -46,6 +46,7 @@ private[spark] class HttpServer(resourceBase: File) extends Logging {
     if (server != null) {
       throw new ServerStateException("Server is already started")
     } else {
+      logInfo("Starting HTTP Server")
       server = new Server()
       val connector = new SocketConnector
       connector.setMaxIdleTime(60*1000)
diff --git a/core/src/main/scala/org/apache/spark/InterruptibleIterator.scala b/core/src/main/scala/org/apache/spark/InterruptibleIterator.scala
index 56e0b8d2c0b9b..9b1601d5b95fa 100644
--- a/core/src/main/scala/org/apache/spark/InterruptibleIterator.scala
+++ b/core/src/main/scala/org/apache/spark/InterruptibleIterator.scala
@@ -19,7 +19,7 @@ package org.apache.spark
 
 /**
  * An iterator that wraps around an existing iterator to provide task killing functionality.
- * It works by checking the interrupted flag in TaskContext.
+ * It works by checking the interrupted flag in [[TaskContext]].
  */
 class InterruptibleIterator[+T](val context: TaskContext, val delegate: Iterator[T])
   extends Iterator[T] {
diff --git a/core/src/main/scala/org/apache/spark/Logging.scala b/core/src/main/scala/org/apache/spark/Logging.scala
index 6a973ea4951c3..b749e5414dab6 100644
--- a/core/src/main/scala/org/apache/spark/Logging.scala
+++ b/core/src/main/scala/org/apache/spark/Logging.scala
@@ -17,8 +17,8 @@
 
 package org.apache.spark
 
-import org.slf4j.Logger
-import org.slf4j.LoggerFactory
+import org.apache.log4j.{LogManager, PropertyConfigurator}
+import org.slf4j.{Logger, LoggerFactory}
 
 /**
  * Utility trait for classes that want to log data. Creates a SLF4J logger for the class and allows
@@ -33,6 +33,7 @@ trait Logging {
   // Method to get or create the logger for this object
   protected def log: Logger = {
     if (log_ == null) {
+      initializeIfNecessary()
       var className = this.getClass.getName
       // Ignore trailing $'s in the class names for Scala objects
       if (className.endsWith("$")) {
@@ -40,7 +41,7 @@ trait Logging {
       }
       log_ = LoggerFactory.getLogger(className)
     }
-    return log_
+    log_
   }
 
   // Log methods that take only a String
@@ -89,7 +90,39 @@ trait Logging {
     log.isTraceEnabled
   }
 
-  // Method for ensuring that logging is initialized, to avoid having multiple
-  // threads do it concurrently (as SLF4J initialization is not thread safe).
-  protected def initLogging() { log }
+  private def initializeIfNecessary() {
+    if (!Logging.initialized) {
+      Logging.initLock.synchronized {
+        if (!Logging.initialized) {
+          initializeLogging()
+        }
+      }
+    }
+  }
+
+  private def initializeLogging() {
+    // If Log4j doesn't seem initialized, load a default properties file
+    val log4jInitialized = LogManager.getRootLogger.getAllAppenders.hasMoreElements
+    if (!log4jInitialized) {
+      val defaultLogProps = "org/apache/spark/log4j-defaults.properties"
+      val classLoader = this.getClass.getClassLoader
+      Option(classLoader.getResource(defaultLogProps)) match {
+        case Some(url) => 
+          PropertyConfigurator.configure(url)
+          log.info(s"Using Spark's default log4j profile: $defaultLogProps")
+        case None => 
+          System.err.println(s"Spark was unable to load $defaultLogProps")
+      }
+    }
+    Logging.initialized = true
+
+    // Force a call into slf4j to initialize it. Avoids this happening from mutliple threads
+    // and triggering this: http://mailman.qos.ch/pipermail/slf4j-dev/2010-April/002956.html
+    log
+  }
+}
+
+private object Logging {
+  @volatile private var initialized = false
+  val initLock = new Object()
 }
diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
index 4be338e4c56d0..30d182b008930 100644
--- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -20,31 +20,29 @@ package org.apache.spark
 import java.io._
 import java.util.zip.{GZIPInputStream, GZIPOutputStream}
 
-import scala.collection.mutable.HashMap
 import scala.collection.mutable.HashSet
+import scala.concurrent.Await
+import scala.concurrent.duration._
 
 import akka.actor._
-import akka.dispatch._
 import akka.pattern.ask
-import akka.remote._
-import akka.util.Duration
-
 
 import org.apache.spark.scheduler.MapStatus
 import org.apache.spark.storage.BlockManagerId
-import org.apache.spark.util.{MetadataCleanerType, Utils, MetadataCleaner, TimeStampedHashMap}
-
+import org.apache.spark.util.{AkkaUtils, MetadataCleaner, MetadataCleanerType, TimeStampedHashMap, Utils}
 
 private[spark] sealed trait MapOutputTrackerMessage
-private[spark] case class GetMapOutputStatuses(shuffleId: Int, requester: String)
+private[spark] case class GetMapOutputStatuses(shuffleId: Int)
   extends MapOutputTrackerMessage
 private[spark] case object StopMapOutputTracker extends MapOutputTrackerMessage
 
-private[spark] class MapOutputTrackerActor(tracker: MapOutputTracker) extends Actor with Logging {
+private[spark] class MapOutputTrackerMasterActor(tracker: MapOutputTrackerMaster)
+  extends Actor with Logging {
   def receive = {
-    case GetMapOutputStatuses(shuffleId: Int, requester: String) =>
-      logInfo("Asked to send map output locations for shuffle " + shuffleId + " to " + requester)
-      sender ! tracker.getSerializedLocations(shuffleId)
+    case GetMapOutputStatuses(shuffleId: Int) =>
+      val hostPort = sender.path.address.hostPort
+      logInfo("Asked to send map output locations for shuffle " + shuffleId + " to " + hostPort)
+      sender ! tracker.getSerializedMapOutputStatuses(shuffleId)
 
     case StopMapOutputTracker =>
       logInfo("MapOutputTrackerActor stopped!")
@@ -53,32 +51,29 @@ private[spark] class MapOutputTrackerActor(tracker: MapOutputTracker) extends Ac
   }
 }
 
-private[spark] class MapOutputTracker extends Logging {
+private[spark] class MapOutputTracker(conf: SparkConf) extends Logging {
+
+  private val timeout = AkkaUtils.askTimeout(conf)
 
-  private val timeout = Duration.create(System.getProperty("spark.akka.askTimeout", "10").toLong, "seconds")
-  
   // Set to the MapOutputTrackerActor living on the driver
   var trackerActor: ActorRef = _
 
-  private var mapStatuses = new TimeStampedHashMap[Int, Array[MapStatus]]
+  protected val mapStatuses = new TimeStampedHashMap[Int, Array[MapStatus]]
 
   // Incremented every time a fetch fails so that client nodes know to clear
   // their cache of map output locations if this happens.
-  private var epoch: Long = 0
-  private val epochLock = new java.lang.Object
+  protected var epoch: Long = 0
+  protected val epochLock = new java.lang.Object
 
-  // Cache a serialized version of the output statuses for each shuffle to send them out faster
-  var cacheEpoch = epoch
-  private val cachedSerializedStatuses = new TimeStampedHashMap[Int, Array[Byte]]
-
-  val metadataCleaner = new MetadataCleaner(MetadataCleanerType.MAP_OUTPUT_TRACKER, this.cleanup)
+  private val metadataCleaner =
+    new MetadataCleaner(MetadataCleanerType.MAP_OUTPUT_TRACKER, this.cleanup, conf)
 
   // Send a message to the trackerActor and get its result within a default timeout, or
   // throw a SparkException if this fails.
-  def askTracker(message: Any): Any = {
+  private def askTracker(message: Any): Any = {
     try {
       val future = trackerActor.ask(message)(timeout)
-      return Await.result(future, timeout)
+      Await.result(future, timeout)
     } catch {
       case e: Exception =>
         throw new SparkException("Error communicating with MapOutputTracker", e)
@@ -86,50 +81,12 @@ private[spark] class MapOutputTracker extends Logging {
   }
 
   // Send a one-way message to the trackerActor, to which we expect it to reply with true.
-  def communicate(message: Any) {
+  private def communicate(message: Any) {
     if (askTracker(message) != true) {
       throw new SparkException("Error reply received from MapOutputTracker")
     }
   }
 
-  def registerShuffle(shuffleId: Int, numMaps: Int) {
-    if (mapStatuses.putIfAbsent(shuffleId, new Array[MapStatus](numMaps)).isDefined) {
-      throw new IllegalArgumentException("Shuffle ID " + shuffleId + " registered twice")
-    }
-  }
-
-  def registerMapOutput(shuffleId: Int, mapId: Int, status: MapStatus) {
-    var array = mapStatuses(shuffleId)
-    array.synchronized {
-      array(mapId) = status
-    }
-  }
-
-  def registerMapOutputs(
-      shuffleId: Int,
-      statuses: Array[MapStatus],
-      changeEpoch: Boolean = false) {
-    mapStatuses.put(shuffleId, Array[MapStatus]() ++ statuses)
-    if (changeEpoch) {
-      incrementEpoch()
-    }
-  }
-
-  def unregisterMapOutput(shuffleId: Int, mapId: Int, bmAddress: BlockManagerId) {
-    var arrayOpt = mapStatuses.get(shuffleId)
-    if (arrayOpt.isDefined && arrayOpt.get != null) {
-      var array = arrayOpt.get
-      array.synchronized {
-        if (array(mapId) != null && array(mapId).location == bmAddress) {
-          array(mapId) = null
-        }
-      }
-      incrementEpoch()
-    } else {
-      throw new SparkException("unregisterMapOutput called for nonexistent shuffle ID")
-    }
-  }
-
   // Remembers which map output locations are currently being fetched on a worker
   private val fetching = new HashSet[Int]
 
@@ -159,16 +116,15 @@ private[spark] class MapOutputTracker extends Logging {
           fetching += shuffleId
         }
       }
-      
+
       if (fetchedStatuses == null) {
         // We won the race to fetch the output locs; do so
         logInfo("Doing the fetch; tracker actor = " + trackerActor)
-        val hostPort = Utils.localHostPort()
         // This try-finally prevents hangs due to timeouts:
         try {
           val fetchedBytes =
-            askTracker(GetMapOutputStatuses(shuffleId, hostPort)).asInstanceOf[Array[Byte]]
-          fetchedStatuses = deserializeStatuses(fetchedBytes)
+            askTracker(GetMapOutputStatuses(shuffleId)).asInstanceOf[Array[Byte]]
+          fetchedStatuses = MapOutputTracker.deserializeMapStatuses(fetchedBytes)
           logInfo("Got the output locations")
           mapStatuses.put(shuffleId, fetchedStatuses)
         } finally {
@@ -183,10 +139,10 @@ private[spark] class MapOutputTracker extends Logging {
           return MapOutputTracker.convertMapStatuses(shuffleId, reduceId, fetchedStatuses)
         }
       }
-      else{
+      else {
         throw new FetchFailedException(null, shuffleId, -1, reduceId,
           new Exception("Missing all output locations for shuffle " + shuffleId))
-      }      
+      }
     } else {
       statuses.synchronized {
         return MapOutputTracker.convertMapStatuses(shuffleId, reduceId, statuses)
@@ -194,9 +150,8 @@ private[spark] class MapOutputTracker extends Logging {
     }
   }
 
-  private def cleanup(cleanupTime: Long) {
+  protected def cleanup(cleanupTime: Long) {
     mapStatuses.clearOldValues(cleanupTime)
-    cachedSerializedStatuses.clearOldValues(cleanupTime)
   }
 
   def stop() {
@@ -206,15 +161,7 @@ private[spark] class MapOutputTracker extends Logging {
     trackerActor = null
   }
 
-  // Called on master to increment the epoch number
-  def incrementEpoch() {
-    epochLock.synchronized {
-      epoch += 1
-      logDebug("Increasing epoch to " + epoch)
-    }
-  }
-
-  // Called on master or workers to get current epoch number
+  // Called to get current epoch number
   def getEpoch: Long = {
     epochLock.synchronized {
       return epoch
@@ -228,14 +175,63 @@ private[spark] class MapOutputTracker extends Logging {
     epochLock.synchronized {
       if (newEpoch > epoch) {
         logInfo("Updating epoch to " + newEpoch + " and clearing cache")
-        // mapStatuses = new TimeStampedHashMap[Int, Array[MapStatus]]
-        mapStatuses.clear()
         epoch = newEpoch
+        mapStatuses.clear()
+      }
+    }
+  }
+}
+
+private[spark] class MapOutputTrackerMaster(conf: SparkConf)
+  extends MapOutputTracker(conf) {
+
+  // Cache a serialized version of the output statuses for each shuffle to send them out faster
+  private var cacheEpoch = epoch
+  private val cachedSerializedStatuses = new TimeStampedHashMap[Int, Array[Byte]]
+
+  def registerShuffle(shuffleId: Int, numMaps: Int) {
+    if (mapStatuses.putIfAbsent(shuffleId, new Array[MapStatus](numMaps)).isDefined) {
+      throw new IllegalArgumentException("Shuffle ID " + shuffleId + " registered twice")
+    }
+  }
+
+  def registerMapOutput(shuffleId: Int, mapId: Int, status: MapStatus) {
+    val array = mapStatuses(shuffleId)
+    array.synchronized {
+      array(mapId) = status
+    }
+  }
+
+  def registerMapOutputs(shuffleId: Int, statuses: Array[MapStatus], changeEpoch: Boolean = false) {
+    mapStatuses.put(shuffleId, Array[MapStatus]() ++ statuses)
+    if (changeEpoch) {
+      incrementEpoch()
+    }
+  }
+
+  def unregisterMapOutput(shuffleId: Int, mapId: Int, bmAddress: BlockManagerId) {
+    val arrayOpt = mapStatuses.get(shuffleId)
+    if (arrayOpt.isDefined && arrayOpt.get != null) {
+      val array = arrayOpt.get
+      array.synchronized {
+        if (array(mapId) != null && array(mapId).location == bmAddress) {
+          array(mapId) = null
+        }
       }
+      incrementEpoch()
+    } else {
+      throw new SparkException("unregisterMapOutput called for nonexistent shuffle ID")
+    }
+  }
+
+  def incrementEpoch() {
+    epochLock.synchronized {
+      epoch += 1
+      logDebug("Increasing epoch to " + epoch)
     }
   }
 
-  def getSerializedLocations(shuffleId: Int): Array[Byte] = {
+  def getSerializedMapOutputStatuses(shuffleId: Int): Array[Byte] = {
     var statuses: Array[MapStatus] = null
     var epochGotten: Long = -1
     epochLock.synchronized {
@@ -252,8 +248,8 @@ private[spark] class MapOutputTracker extends Logging {
       }
     }
     // If we got here, we failed to find the serialized locations in the cache, so we pulled
-    // out a snapshot of the locations as "locs"; let's serialize and return that
-    val bytes = serializeStatuses(statuses)
+    // out a snapshot of the locations as "statuses"; let's serialize and return that
+    val bytes = MapOutputTracker.serializeMapStatuses(statuses)
     logInfo("Size of output statuses for shuffle %d is %d bytes".format(shuffleId, bytes.length))
     // Add them into the table only if the epoch hasn't changed while we were working
     epochLock.synchronized {
@@ -264,14 +260,32 @@ private[spark] class MapOutputTracker extends Logging {
     bytes
   }
 
+  protected override def cleanup(cleanupTime: Long) {
+    super.cleanup(cleanupTime)
+    cachedSerializedStatuses.clearOldValues(cleanupTime)
+  }
+
+  override def stop() {
+    super.stop()
+    cachedSerializedStatuses.clear()
+  }
+
+  override def updateEpoch(newEpoch: Long) {
+    // This might be called on the MapOutputTrackerMaster if we're running in local mode.
+  }
+
   def has(shuffleId: Int): Boolean = {
     cachedSerializedStatuses.get(shuffleId).isDefined || mapStatuses.contains(shuffleId)
   }
-  
+}
+
+private[spark] object MapOutputTracker {
+  private val LOG_BASE = 1.1
+
   // Serialize an array of map output locations into an efficient byte format so that we can send
   // it to reduce tasks. We do this by compressing the serialized bytes using GZIP. They will
   // generally be pretty compressible because many map outputs will be on the same hostname.
-  private def serializeStatuses(statuses: Array[MapStatus]): Array[Byte] = {
+  def serializeMapStatuses(statuses: Array[MapStatus]): Array[Byte] = {
     val out = new ByteArrayOutputStream
     val objOut = new ObjectOutputStream(new GZIPOutputStream(out))
     // Since statuses can be modified in parallel, sync on it
@@ -282,18 +296,11 @@ private[spark] class MapOutputTracker extends Logging {
     out.toByteArray
   }
 
-  // Opposite of serializeStatuses.
-  def deserializeStatuses(bytes: Array[Byte]): Array[MapStatus] = {
+  // Opposite of serializeMapStatuses.
+  def deserializeMapStatuses(bytes: Array[Byte]): Array[MapStatus] = {
     val objIn = new ObjectInputStream(new GZIPInputStream(new ByteArrayInputStream(bytes)))
-    objIn.readObject().
-      // // drop all null's from status - not sure why they are occuring though. Causes NPE downstream in slave if present
-      // comment this out - nulls could be due to missing location ? 
-      asInstanceOf[Array[MapStatus]] // .filter( _ != null )
+    objIn.readObject().asInstanceOf[Array[MapStatus]]
   }
-}
-
-private[spark] object MapOutputTracker {
-  private val LOG_BASE = 1.1
 
   // Convert an array of MapStatuses to locations and sizes for a given reduce ID. If
   // any of the statuses is null (indicating a missing location due to a failed mapper),
@@ -304,7 +311,7 @@ private[spark] object MapOutputTracker {
         statuses: Array[MapStatus]): Array[(BlockManagerId, Long)] = {
     assert (statuses != null)
     statuses.map {
-      status => 
+      status =>
         if (status == null) {
           throw new FetchFailedException(null, shuffleId, -1, reduceId,
             new Exception("Missing an output location for shuffle " + shuffleId))
diff --git a/core/src/main/scala/org/apache/spark/Partitioner.scala b/core/src/main/scala/org/apache/spark/Partitioner.scala
index 0e2c987a598ec..cfba43dec3111 100644
--- a/core/src/main/scala/org/apache/spark/Partitioner.scala
+++ b/core/src/main/scala/org/apache/spark/Partitioner.scala
@@ -17,8 +17,10 @@
 
 package org.apache.spark
 
-import org.apache.spark.util.Utils
+import scala.reflect.ClassTag
+
 import org.apache.spark.rdd.RDD
+import org.apache.spark.util.Utils
 
 /**
  * An object that defines how the elements in a key-value pair RDD are partitioned by key.
@@ -39,7 +41,7 @@ object Partitioner {
    * spark.default.parallelism is set, then we'll use the value from SparkContext
    * defaultParallelism, otherwise we'll use the max number of upstream partitions.
    *
-   * Unless spark.default.parallelism is set, He number of partitions will be the
+   * Unless spark.default.parallelism is set, the number of partitions will be the
    * same as the number of partitions in the largest upstream RDD, as this should
    * be least likely to cause out-of-memory errors.
    *
@@ -47,19 +49,20 @@ object Partitioner {
    */
   def defaultPartitioner(rdd: RDD[_], others: RDD[_]*): Partitioner = {
     val bySize = (Seq(rdd) ++ others).sortBy(_.partitions.size).reverse
-    for (r <- bySize if r.partitioner != None) {
+    for (r <- bySize if r.partitioner.isDefined) {
       return r.partitioner.get
     }
-    if (System.getProperty("spark.default.parallelism") != null) {
-      return new HashPartitioner(rdd.context.defaultParallelism)
+    if (rdd.context.conf.contains("spark.default.parallelism")) {
+      new HashPartitioner(rdd.context.defaultParallelism)
     } else {
-      return new HashPartitioner(bySize.head.partitions.size)
+      new HashPartitioner(bySize.head.partitions.size)
     }
   }
 }
 
 /**
- * A [[org.apache.spark.Partitioner]] that implements hash-based partitioning using Java's `Object.hashCode`.
+ * A [[org.apache.spark.Partitioner]] that implements hash-based partitioning using
+ * Java's `Object.hashCode`.
  *
  * Java arrays have hashCodes that are based on the arrays' identities rather than their contents,
  * so attempting to partition an RDD[Array[_]] or RDD[(Array[_], _)] using a HashPartitioner will
@@ -72,7 +75,7 @@ class HashPartitioner(partitions: Int) extends Partitioner {
     case null => 0
     case _ => Utils.nonNegativeMod(key.hashCode, numPartitions)
   }
-  
+
   override def equals(other: Any): Boolean = other match {
     case h: HashPartitioner =>
       h.numPartitions == numPartitions
@@ -82,13 +85,13 @@ class HashPartitioner(partitions: Int) extends Partitioner {
 }
 
 /**
- * A [[org.apache.spark.Partitioner]] that partitions sortable records by range into roughly equal ranges.
- * Determines the ranges by sampling the RDD passed in.
+ * A [[org.apache.spark.Partitioner]] that partitions sortable records by range into roughly
+ * equal ranges. The ranges are determined by sampling the content of the RDD passed in.
  */
-class RangePartitioner[K <% Ordered[K]: ClassManifest, V](
+class RangePartitioner[K <% Ordered[K]: ClassTag, V](
     partitions: Int,
     @transient rdd: RDD[_ <: Product2[K,V]],
-    private val ascending: Boolean = true) 
+    private val ascending: Boolean = true)
   extends Partitioner {
 
   // An array of upper bounds for the first (partitions - 1) partitions
diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
new file mode 100644
index 0000000000000..45d19bcbfa6f2
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -0,0 +1,220 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable.HashMap
+
+import java.io.{ObjectInputStream, ObjectOutputStream, IOException}
+
+/**
+ * Configuration for a Spark application. Used to set various Spark parameters as key-value pairs.
+ *
+ * Most of the time, you would create a SparkConf object with `new SparkConf()`, which will load
+ * values from any `spark.*` Java system properties set in your application as well. In this case,
+ * parameters you set directly on the `SparkConf` object take priority over system properties.
+ *
+ * For unit tests, you can also call `new SparkConf(false)` to skip loading external settings and
+ * get the same configuration no matter what the system properties are.
+ *
+ * All setter methods in this class support chaining. For example, you can write
+ * `new SparkConf().setMaster("local").setAppName("My app")`.
+ *
+ * Note that once a SparkConf object is passed to Spark, it is cloned and can no longer be modified
+ * by the user. Spark does not support modifying the configuration at runtime.
+ *
+ * @param loadDefaults whether to also load values from Java system properties
+ */
+class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging {
+
+  /** Create a SparkConf that loads defaults from system properties and the classpath */
+  def this() = this(true)
+
+  private val settings = new HashMap[String, String]()
+
+  if (loadDefaults) {
+    // Load any spark.* system properties
+    for ((k, v) <- System.getProperties.asScala if k.startsWith("spark.")) {
+      settings(k) = v
+    }
+  }
+
+  /** Set a configuration variable. */
+  def set(key: String, value: String): SparkConf = {
+    if (key == null) {
+      throw new NullPointerException("null key")
+    }
+    if (value == null) {
+      throw new NullPointerException("null value")
+    }
+    settings(key) = value
+    this
+  }
+
+  /**
+   * The master URL to connect to, such as "local" to run locally with one thread, "local[4]" to
+   * run locally with 4 cores, or "spark://master:7077" to run on a Spark standalone cluster.
+   */
+  def setMaster(master: String): SparkConf = {
+    set("spark.master", master)
+  }
+
+  /** Set a name for your application. Shown in the Spark web UI. */
+  def setAppName(name: String): SparkConf = {
+    set("spark.app.name", name)
+  }
+
+  /** Set JAR files to distribute to the cluster. */
+  def setJars(jars: Seq[String]): SparkConf = {
+    for (jar <- jars if (jar == null)) logWarning("null jar passed to SparkContext constructor")
+    set("spark.jars", jars.filter(_ != null).mkString(","))
+  }
+
+  /** Set JAR files to distribute to the cluster. (Java-friendly version.) */
+  def setJars(jars: Array[String]): SparkConf = {
+    setJars(jars.toSeq)
+  }
+
+  /**
+   * Set an environment variable to be used when launching executors for this application.
+   * These variables are stored as properties of the form spark.executorEnv.VAR_NAME
+   * (for example spark.executorEnv.PATH) but this method makes them easier to set.
+   */
+  def setExecutorEnv(variable: String, value: String): SparkConf = {
+    set("spark.executorEnv." + variable, value)
+  }
+
+  /**
+   * Set multiple environment variables to be used when launching executors.
+   * These variables are stored as properties of the form spark.executorEnv.VAR_NAME
+   * (for example spark.executorEnv.PATH) but this method makes them easier to set.
+   */
+  def setExecutorEnv(variables: Seq[(String, String)]): SparkConf = {
+    for ((k, v) <- variables) {
+      setExecutorEnv(k, v)
+    }
+    this
+  }
+
+  /**
+   * Set multiple environment variables to be used when launching executors.
+   * (Java-friendly version.)
+   */
+  def setExecutorEnv(variables: Array[(String, String)]): SparkConf = {
+    setExecutorEnv(variables.toSeq)
+  }
+
+  /**
+   * Set the location where Spark is installed on worker nodes.
+   */
+  def setSparkHome(home: String): SparkConf = {
+    set("spark.home", home)
+  }
+
+  /** Set multiple parameters together */
+  def setAll(settings: Traversable[(String, String)]) = {
+    this.settings ++= settings
+    this
+  }
+
+  /** Set a parameter if it isn't already configured */
+  def setIfMissing(key: String, value: String): SparkConf = {
+    if (!settings.contains(key)) {
+      settings(key) = value
+    }
+    this
+  }
+
+  /** Remove a parameter from the configuration */
+  def remove(key: String): SparkConf = {
+    settings.remove(key)
+    this
+  }
+
+  /** Get a parameter; throws a NoSuchElementException if it's not set */
+  def get(key: String): String = {
+    settings.getOrElse(key, throw new NoSuchElementException(key))
+  }
+
+  /** Get a parameter, falling back to a default if not set */
+  def get(key: String, defaultValue: String): String = {
+    settings.getOrElse(key, defaultValue)
+  }
+
+  /** Get a parameter as an Option */
+  def getOption(key: String): Option[String] = {
+    settings.get(key)
+  }
+
+  /** Get all parameters as a list of pairs */
+  def getAll: Array[(String, String)] = settings.clone().toArray
+
+  /** Get a parameter as an integer, falling back to a default if not set */
+  def getInt(key: String, defaultValue: Int): Int = {
+    getOption(key).map(_.toInt).getOrElse(defaultValue)
+  }
+
+  /** Get a parameter as a long, falling back to a default if not set */
+  def getLong(key: String, defaultValue: Long): Long = {
+    getOption(key).map(_.toLong).getOrElse(defaultValue)
+  }
+
+  /** Get a parameter as a double, falling back to a default if not set */
+  def getDouble(key: String, defaultValue: Double): Double = {
+    getOption(key).map(_.toDouble).getOrElse(defaultValue)
+  }
+
+  /** Get a parameter as a boolean, falling back to a default if not set */
+  def getBoolean(key: String, defaultValue: Boolean): Boolean = {
+    getOption(key).map(_.toBoolean).getOrElse(defaultValue)
+  }
+
+  /** Get all executor environment variables set on this SparkConf */
+  def getExecutorEnv: Seq[(String, String)] = {
+    val prefix = "spark.executorEnv."
+    getAll.filter{case (k, v) => k.startsWith(prefix)}
+          .map{case (k, v) => (k.substring(prefix.length), v)}
+  }
+
+  /** Get all akka conf variables set on this SparkConf */
+  def getAkkaConf: Seq[(String, String)] =
+    /* This is currently undocumented. If we want to make this public we should consider
+     * nesting options under the spark namespace to avoid conflicts with user akka options.
+     * Otherwise users configuring their own akka code via system properties could mess up
+     * spark's akka options.
+     *
+     *   E.g. spark.akka.option.x.y.x = "value"
+     */
+    getAll.filter {case (k, v) => k.startsWith("akka.")}
+
+  /** Does the configuration contain a given parameter? */
+  def contains(key: String): Boolean = settings.contains(key)
+
+  /** Copy this object */
+  override def clone: SparkConf = {
+    new SparkConf(false).setAll(settings)
+  }
+
+  /**
+   * Return a string listing all keys and values, one per line. This is useful to print the
+   * configuration out for debugging.
+   */
+  def toDebugString: String = {
+    settings.toArray.sorted.map{case (k, v) => k + "=" + v}.mkString("\n")
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index eb5bb17539fb6..566472e597958 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -19,35 +19,23 @@ package org.apache.spark
 
 import java.io._
 import java.net.URI
-import java.util.Properties
+import java.util.{UUID, Properties}
 import java.util.concurrent.atomic.AtomicInteger
 
-import scala.collection.Map
+import scala.collection.{Map, Set}
 import scala.collection.generic.Growable
-import scala.collection.mutable.ArrayBuffer
-import scala.collection.mutable.HashMap
+
+import scala.collection.mutable.{ArrayBuffer, HashMap}
+import scala.reflect.{ClassTag, classTag}
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
-import org.apache.hadoop.io.ArrayWritable
-import org.apache.hadoop.io.BooleanWritable
-import org.apache.hadoop.io.BytesWritable
-import org.apache.hadoop.io.DoubleWritable
-import org.apache.hadoop.io.FloatWritable
-import org.apache.hadoop.io.IntWritable
-import org.apache.hadoop.io.LongWritable
-import org.apache.hadoop.io.NullWritable
-import org.apache.hadoop.io.Text
-import org.apache.hadoop.io.Writable
-import org.apache.hadoop.mapred.FileInputFormat
-import org.apache.hadoop.mapred.InputFormat
-import org.apache.hadoop.mapred.JobConf
-import org.apache.hadoop.mapred.SequenceFileInputFormat
-import org.apache.hadoop.mapred.TextInputFormat
-import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat}
-import org.apache.hadoop.mapreduce.{Job => NewHadoopJob}
+import org.apache.hadoop.io.{ArrayWritable, BooleanWritable, BytesWritable, DoubleWritable,
+  FloatWritable, IntWritable, LongWritable, NullWritable, Text, Writable}
+import org.apache.hadoop.mapred.{FileInputFormat, InputFormat, JobConf, SequenceFileInputFormat,
+  TextInputFormat}
+import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat, Job => NewHadoopJob}
 import org.apache.hadoop.mapreduce.lib.input.{FileInputFormat => NewFileInputFormat}
-
 import org.apache.mesos.MesosNativeLibrary
 
 import org.apache.spark.deploy.{LocalSparkCluster, SparkHadoopUtil}
@@ -55,59 +43,106 @@ import org.apache.spark.partial.{ApproximateEvaluator, PartialResult}
 import org.apache.spark.rdd._
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.cluster.{CoarseGrainedSchedulerBackend,
-  SparkDeploySchedulerBackend, ClusterScheduler, SimrSchedulerBackend}
+  SparkDeploySchedulerBackend, SimrSchedulerBackend}
 import org.apache.spark.scheduler.cluster.mesos.{CoarseMesosSchedulerBackend, MesosSchedulerBackend}
-import org.apache.spark.scheduler.local.LocalScheduler
-import org.apache.spark.scheduler.StageInfo
+import org.apache.spark.scheduler.local.LocalBackend
 import org.apache.spark.storage.{BlockManagerSource, RDDInfo, StorageStatus, StorageUtils}
 import org.apache.spark.ui.SparkUI
-import org.apache.spark.util.{ClosureCleaner, MetadataCleaner, MetadataCleanerType,
-  TimeStampedHashMap, Utils}
+import org.apache.spark.util.{Utils, TimeStampedHashMap, MetadataCleaner, MetadataCleanerType,
+  ClosureCleaner}
 
 /**
  * Main entry point for Spark functionality. A SparkContext represents the connection to a Spark
  * cluster, and can be used to create RDDs, accumulators and broadcast variables on that cluster.
  *
- * @param master Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]).
- * @param appName A name for your application, to display on the cluster web UI.
- * @param sparkHome Location where Spark is installed on cluster nodes.
- * @param jars Collection of JARs to send to the cluster. These can be paths on the local file
- *             system or HDFS, HTTP, HTTPS, or FTP URLs.
- * @param environment Environment variables to set on worker nodes.
+ * @param config a Spark Config object describing the application configuration. Any settings in
+ *   this config overrides the default configs as well as system properties.
+ * @param preferredNodeLocationData used in YARN mode to select nodes to launch containers on. Can
+ *   be generated using [[org.apache.spark.scheduler.InputFormatInfo.computePreferredLocations]]
+ *   from a list of input files or InputFormats for the application.
  */
 class SparkContext(
-    val master: String,
-    val appName: String,
-    val sparkHome: String = null,
-    val jars: Seq[String] = Nil,
-    val environment: Map[String, String] = Map(),
+    config: SparkConf,
     // This is used only by YARN for now, but should be relevant to other cluster types (Mesos, etc)
-    // too. This is typically generated from InputFormatInfo.computePreferredLocations .. host, set
-    // of data-local splits on host
-    val preferredNodeLocationData: scala.collection.Map[String, scala.collection.Set[SplitInfo]] =
-      scala.collection.immutable.Map())
+    // too. This is typically generated from InputFormatInfo.computePreferredLocations. It contains
+    // a map from hostname to a list of input format splits on the host.
+    val preferredNodeLocationData: Map[String, Set[SplitInfo]] = Map())
   extends Logging {
 
-  // Ensure logging is initialized before we spawn any threads
-  initLogging()
+  /**
+   * Alternative constructor that allows setting common Spark properties directly
+   *
+   * @param master Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]).
+   * @param appName A name for your application, to display on the cluster web UI
+   * @param conf a [[org.apache.spark.SparkConf]] object specifying other Spark parameters
+   */
+  def this(master: String, appName: String, conf: SparkConf) =
+    this(SparkContext.updatedConf(conf, master, appName))
 
-  // Set Spark driver host and port system properties
-  if (System.getProperty("spark.driver.host") == null) {
-    System.setProperty("spark.driver.host", Utils.localHostName())
+  /**
+   * Alternative constructor that allows setting common Spark properties directly
+   *
+   * @param master Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]).
+   * @param appName A name for your application, to display on the cluster web UI.
+   * @param sparkHome Location where Spark is installed on cluster nodes.
+   * @param jars Collection of JARs to send to the cluster. These can be paths on the local file
+   *             system or HDFS, HTTP, HTTPS, or FTP URLs.
+   * @param environment Environment variables to set on worker nodes.
+   */
+  def this(
+      master: String,
+      appName: String,
+      sparkHome: String = null,
+      jars: Seq[String] = Nil,
+      environment: Map[String, String] = Map(),
+      preferredNodeLocationData: Map[String, Set[SplitInfo]] = Map()) =
+  {
+    this(SparkContext.updatedConf(new SparkConf(), master, appName, sparkHome, jars, environment),
+      preferredNodeLocationData)
+  }
+
+  private[spark] val conf = config.clone()
+
+  /**
+   * Return a copy of this SparkContext's configuration. The configuration ''cannot'' be
+   * changed at runtime.
+   */
+  def getConf: SparkConf = conf.clone()
+
+  if (!conf.contains("spark.master")) {
+    throw new SparkException("A master URL must be set in your configuration")
   }
-  if (System.getProperty("spark.driver.port") == null) {
-    System.setProperty("spark.driver.port", "0")
+  if (!conf.contains("spark.app.name")) {
+    throw new SparkException("An application must be set in your configuration")
   }
 
+  if (conf.getBoolean("spark.logConf", false)) {
+    logInfo("Spark configuration:\n" + conf.toDebugString)
+  }
+
+  // Set Spark driver host and port system properties
+  conf.setIfMissing("spark.driver.host", Utils.localHostName())
+  conf.setIfMissing("spark.driver.port", "0")
+
+  val jars: Seq[String] = if (conf.contains("spark.jars")) {
+    conf.get("spark.jars").split(",").filter(_.size != 0)
+  } else {
+    null
+  }
+
+  val master = conf.get("spark.master")
+  val appName = conf.get("spark.app.name")
+
   val isLocal = (master == "local" || master.startsWith("local["))
 
   // Create the Spark execution environment (cache, map output tracker, etc)
-  private[spark] val env = SparkEnv.createFromSystemProperties(
+  private[spark] val env = SparkEnv.create(
+    conf,
     "<driver>",
-    System.getProperty("spark.driver.host"),
-    System.getProperty("spark.driver.port").toInt,
-    true,
-    isLocal)
+    conf.get("spark.driver.host"),
+    conf.get("spark.driver.port").toInt,
+    isDriver = true,
+    isLocal = isLocal)
   SparkEnv.set(env)
 
   // Used to store a URL for each static file/jar together with the file's local timestamp
@@ -116,7 +151,8 @@ class SparkContext(
 
   // Keeps track of all persisted RDDs
   private[spark] val persistentRdds = new TimeStampedHashMap[Int, RDD[_]]
-  private[spark] val metadataCleaner = new MetadataCleaner(MetadataCleanerType.SPARK_CONTEXT, this.cleanup)
+  private[spark] val metadataCleaner =
+    new MetadataCleaner(MetadataCleanerType.SPARK_CONTEXT, this.cleanup, conf)
 
   // Initialize the Spark UI
   private[spark] val ui = new SparkUI(this)
@@ -126,23 +162,30 @@ class SparkContext(
 
   // Add each JAR given through the constructor
   if (jars != null) {
-    jars.foreach { addJar(_) }
+    jars.foreach(addJar)
   }
 
+  private[spark] val executorMemory = conf.getOption("spark.executor.memory")
+    .orElse(Option(System.getenv("SPARK_MEM")))
+    .map(Utils.memoryStringToMb)
+    .getOrElse(512)
+
   // Environment variables to pass to our executors
   private[spark] val executorEnvs = HashMap[String, String]()
   // Note: SPARK_MEM is included for Mesos, but overwritten for standalone mode in ExecutorRunner
-  for (key <- Seq("SPARK_CLASSPATH", "SPARK_LIBRARY_PATH", "SPARK_JAVA_OPTS", "SPARK_TESTING")) {
-    val value = System.getenv(key)
-    if (value != null) {
-      executorEnvs(key) = value
-    }
+  for (key <- Seq("SPARK_CLASSPATH", "SPARK_LIBRARY_PATH", "SPARK_JAVA_OPTS");
+      value <- Option(System.getenv(key))) {
+    executorEnvs(key) = value
   }
-  // Since memory can be set with a system property too, use that
-  executorEnvs("SPARK_MEM") = SparkContext.executorMemoryRequested + "m"
-  if (environment != null) {
-    executorEnvs ++= environment
+  // Convert java options to env vars as a work around
+  // since we can't set env vars directly in sbt.
+  for { (envKey, propKey) <- Seq(("SPARK_HOME", "spark.home"), ("SPARK_TESTING", "spark.testing"))
+    value <- Option(System.getenv(envKey)).orElse(Option(System.getProperty(propKey)))} {
+    executorEnvs(envKey) = value
   }
+  // Since memory can be set with a system property too, use that
+  executorEnvs("SPARK_MEM") = executorMemory + "m"
+  executorEnvs ++= conf.getExecutorEnv
 
   // Set SPARK_USER for user who is running SparkContext.
   val sparkUser = Option {
@@ -153,123 +196,7 @@ class SparkContext(
   executorEnvs("SPARK_USER") = sparkUser
 
   // Create and start the scheduler
-  private[spark] var taskScheduler: TaskScheduler = {
-    // Regular expression used for local[N] master format
-    val LOCAL_N_REGEX = """local\[([0-9]+)\]""".r
-    // Regular expression for local[N, maxRetries], used in tests with failing tasks
-    val LOCAL_N_FAILURES_REGEX = """local\[([0-9]+)\s*,\s*([0-9]+)\]""".r
-    // Regular expression for simulating a Spark cluster of [N, cores, memory] locally
-    val LOCAL_CLUSTER_REGEX = """local-cluster\[\s*([0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*]""".r
-    // Regular expression for connecting to Spark deploy clusters
-    val SPARK_REGEX = """spark://(.*)""".r
-    //Regular expression for connection to Mesos cluster
-    val MESOS_REGEX = """(mesos://.*)""".r
-    // Regular expression for connection to Simr cluster
-    val SIMR_REGEX = """simr://(.*)""".r
-
-    master match {
-      case "local" =>
-        new LocalScheduler(1, 0, this)
-
-      case LOCAL_N_REGEX(threads) =>
-        new LocalScheduler(threads.toInt, 0, this)
-
-      case LOCAL_N_FAILURES_REGEX(threads, maxFailures) =>
-        new LocalScheduler(threads.toInt, maxFailures.toInt, this)
-
-      case SPARK_REGEX(sparkUrl) =>
-        val scheduler = new ClusterScheduler(this)
-        val masterUrls = sparkUrl.split(",").map("spark://" + _)
-        val backend = new SparkDeploySchedulerBackend(scheduler, this, masterUrls, appName)
-        scheduler.initialize(backend)
-        scheduler
-
-      case SIMR_REGEX(simrUrl) =>
-        val scheduler = new ClusterScheduler(this)
-        val backend = new SimrSchedulerBackend(scheduler, this, simrUrl)
-        scheduler.initialize(backend)
-        scheduler
-
-      case LOCAL_CLUSTER_REGEX(numSlaves, coresPerSlave, memoryPerSlave) =>
-        // Check to make sure memory requested <= memoryPerSlave. Otherwise Spark will just hang.
-        val memoryPerSlaveInt = memoryPerSlave.toInt
-        if (SparkContext.executorMemoryRequested > memoryPerSlaveInt) {
-          throw new SparkException(
-            "Asked to launch cluster with %d MB RAM / worker but requested %d MB/worker".format(
-              memoryPerSlaveInt, SparkContext.executorMemoryRequested))
-        }
-
-        val scheduler = new ClusterScheduler(this)
-        val localCluster = new LocalSparkCluster(
-          numSlaves.toInt, coresPerSlave.toInt, memoryPerSlaveInt)
-        val masterUrls = localCluster.start()
-        val backend = new SparkDeploySchedulerBackend(scheduler, this, masterUrls, appName)
-        scheduler.initialize(backend)
-        backend.shutdownCallback = (backend: SparkDeploySchedulerBackend) => {
-          localCluster.stop()
-        }
-        scheduler
-
-      case "yarn-standalone" =>
-        val scheduler = try {
-          val clazz = Class.forName("org.apache.spark.scheduler.cluster.YarnClusterScheduler")
-          val cons = clazz.getConstructor(classOf[SparkContext])
-          cons.newInstance(this).asInstanceOf[ClusterScheduler]
-        } catch {
-          // TODO: Enumerate the exact reasons why it can fail
-          // But irrespective of it, it means we cannot proceed !
-          case th: Throwable => {
-            throw new SparkException("YARN mode not available ?", th)
-          }
-        }
-        val backend = new CoarseGrainedSchedulerBackend(scheduler, this.env.actorSystem)
-        scheduler.initialize(backend)
-        scheduler
-
-      case "yarn-client" =>
-        val scheduler = try {
-          val clazz = Class.forName("org.apache.spark.scheduler.cluster.YarnClientClusterScheduler")
-          val cons = clazz.getConstructor(classOf[SparkContext])
-          cons.newInstance(this).asInstanceOf[ClusterScheduler]
-
-        } catch {
-          case th: Throwable => {
-            throw new SparkException("YARN mode not available ?", th)
-          }
-        }
-
-        val backend = try {
-          val clazz = Class.forName("org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend")
-          val cons = clazz.getConstructor(classOf[ClusterScheduler], classOf[SparkContext])
-          cons.newInstance(scheduler, this).asInstanceOf[CoarseGrainedSchedulerBackend]
-        } catch {
-          case th: Throwable => {
-            throw new SparkException("YARN mode not available ?", th)
-          }
-        }
-
-        scheduler.initialize(backend)
-        scheduler
-
-      case _ =>
-        if (MESOS_REGEX.findFirstIn(master).isEmpty) {
-          logWarning("Master %s does not match expected format, parsing as Mesos URL".format(master))
-          logWarning("Deprecation warning: Mesos URLs not preceded by mesos:// are deprecated " +
-            "and will no longer be supported in Spark 0.9.")
-        }
-        MesosNativeLibrary.load()
-        val scheduler = new ClusterScheduler(this)
-        val coarseGrained = System.getProperty("spark.mesos.coarse", "false").toBoolean
-        val masterWithoutProtocol = master.replaceFirst("^mesos://", "")  // Strip initial mesos://
-        val backend = if (coarseGrained) {
-          new CoarseMesosSchedulerBackend(scheduler, this, masterWithoutProtocol, appName)
-        } else {
-          new MesosSchedulerBackend(scheduler, this, masterWithoutProtocol, appName)
-        }
-        scheduler.initialize(backend)
-        scheduler
-    }
-  }
+  private[spark] var taskScheduler = SparkContext.createTaskScheduler(this, master, appName)
   taskScheduler.start()
 
   @volatile private[spark] var dagScheduler = new DAGScheduler(taskScheduler)
@@ -280,24 +207,24 @@ class SparkContext(
   /** A default Hadoop Configuration for the Hadoop code (e.g. file systems) that we reuse. */
   val hadoopConfiguration = {
     val env = SparkEnv.get
-    val conf = SparkHadoopUtil.get.newConfiguration()
+    val hadoopConf = SparkHadoopUtil.get.newConfiguration()
     // Explicitly check for S3 environment variables
     if (System.getenv("AWS_ACCESS_KEY_ID") != null &&
         System.getenv("AWS_SECRET_ACCESS_KEY") != null) {
-      conf.set("fs.s3.awsAccessKeyId", System.getenv("AWS_ACCESS_KEY_ID"))
-      conf.set("fs.s3n.awsAccessKeyId", System.getenv("AWS_ACCESS_KEY_ID"))
-      conf.set("fs.s3.awsSecretAccessKey", System.getenv("AWS_SECRET_ACCESS_KEY"))
-      conf.set("fs.s3n.awsSecretAccessKey", System.getenv("AWS_SECRET_ACCESS_KEY"))
+      hadoopConf.set("fs.s3.awsAccessKeyId", System.getenv("AWS_ACCESS_KEY_ID"))
+      hadoopConf.set("fs.s3n.awsAccessKeyId", System.getenv("AWS_ACCESS_KEY_ID"))
+      hadoopConf.set("fs.s3.awsSecretAccessKey", System.getenv("AWS_SECRET_ACCESS_KEY"))
+      hadoopConf.set("fs.s3n.awsSecretAccessKey", System.getenv("AWS_SECRET_ACCESS_KEY"))
     }
     // Copy any "spark.hadoop.foo=bar" system properties into conf as "foo=bar"
-    Utils.getSystemProperties.foreach { case (key, value) =>
+    conf.getAll.foreach { case (key, value) =>
       if (key.startsWith("spark.hadoop.")) {
-        conf.set(key.substring("spark.hadoop.".length), value)
+        hadoopConf.set(key.substring("spark.hadoop.".length), value)
       }
     }
-    val bufferSize = System.getProperty("spark.buffer.size", "65536")
-    conf.set("io.file.buffer.size", bufferSize)
-    conf
+    val bufferSize = conf.get("spark.buffer.size", "65536")
+    hadoopConf.set("io.file.buffer.size", bufferSize)
+    hadoopConf
   }
 
   private[spark] var checkpointDir: Option[String] = None
@@ -307,7 +234,7 @@ class SparkContext(
     override protected def childValue(parent: Properties): Properties = new Properties(parent)
   }
 
-  private[spark] def getLocalProperties(): Properties = localProperties.get()
+  private[spark] def getLocalProperties: Properties = localProperties.get()
 
   private[spark] def setLocalProperties(props: Properties) {
     localProperties.set(props)
@@ -317,6 +244,10 @@ class SparkContext(
     localProperties.set(new Properties())
   }
 
+  /**
+   * Set a local property that affects jobs submitted from this thread, such as the
+   * Spark fair scheduler pool.
+   */
   def setLocalProperty(key: String, value: String) {
     if (localProperties.get() == null) {
       localProperties.set(new Properties())
@@ -328,6 +259,10 @@ class SparkContext(
     }
   }
 
+  /**
+   * Get a local property set in this thread, or null if it is missing. See
+   * [[org.apache.spark.SparkContext.setLocalProperty]].
+   */
   def getLocalProperty(key: String): String =
     Option(localProperties.get).map(_.getProperty(key)).getOrElse(null)
 
@@ -338,7 +273,7 @@ class SparkContext(
   }
 
   /**
-   * Assigns a group id to all the jobs started by this thread until the group id is set to a
+   * Assigns a group ID to all the jobs started by this thread until the group ID is set to a
    * different value or cleared.
    *
    * Often, a unit of execution in an application consists of multiple Spark actions or jobs.
@@ -361,7 +296,7 @@ class SparkContext(
     setLocalProperty(SparkContext.SPARK_JOB_GROUP_ID, groupId)
   }
 
-  /** Clear the job group id and its description. */
+  /** Clear the current thread's job group ID and its description. */
   def clearJobGroup() {
     setLocalProperty(SparkContext.SPARK_JOB_DESCRIPTION, null)
     setLocalProperty(SparkContext.SPARK_JOB_GROUP_ID, null)
@@ -383,19 +318,19 @@ class SparkContext(
   // Methods for creating RDDs
 
   /** Distribute a local Scala collection to form an RDD. */
-  def parallelize[T: ClassManifest](seq: Seq[T], numSlices: Int = defaultParallelism): RDD[T] = {
+  def parallelize[T: ClassTag](seq: Seq[T], numSlices: Int = defaultParallelism): RDD[T] = {
     new ParallelCollectionRDD[T](this, seq, numSlices, Map[Int, Seq[String]]())
   }
 
   /** Distribute a local Scala collection to form an RDD. */
-  def makeRDD[T: ClassManifest](seq: Seq[T], numSlices: Int = defaultParallelism): RDD[T] = {
+  def makeRDD[T: ClassTag](seq: Seq[T], numSlices: Int = defaultParallelism): RDD[T] = {
     parallelize(seq, numSlices)
   }
 
   /** Distribute a local Scala collection to form an RDD, with one or more
     * location preferences (hostnames of Spark nodes) for each object.
     * Create a new partition for each collection item. */
-   def makeRDD[T: ClassManifest](seq: Seq[(T, Seq[String])]): RDD[T] = {
+   def makeRDD[T: ClassTag](seq: Seq[(T, Seq[String])]): RDD[T] = {
     val indexToPrefs = seq.zipWithIndex.map(t => (t._2, t._1._2)).toMap
     new ParallelCollectionRDD[T](this, seq.map(_._1), seq.size, indexToPrefs)
   }
@@ -405,14 +340,25 @@ class SparkContext(
    * Hadoop-supported file system URI, and return it as an RDD of Strings.
    */
   def textFile(path: String, minSplits: Int = defaultMinSplits): RDD[String] = {
-    hadoopFile(path, classOf[TextInputFormat], classOf[LongWritable], classOf[Text], minSplits)
-      .map(pair => pair._2.toString)
+    hadoopFile(path, classOf[TextInputFormat], classOf[LongWritable], classOf[Text],
+      minSplits).map(pair => pair._2.toString)
   }
 
   /**
-   * Get an RDD for a Hadoop-readable dataset from a Hadoop JobConf given its InputFormat and any
-   * other necessary info (e.g. file name for a filesystem-based dataset, table name for HyperTable,
-   * etc).
+   * Get an RDD for a Hadoop-readable dataset from a Hadoop JobConf given its InputFormat and other
+   * necessary info (e.g. file name for a filesystem-based dataset, table name for HyperTable),
+   * using the older MapReduce API (`org.apache.hadoop.mapred`).
+   *
+   * @param conf JobConf for setting up the dataset
+   * @param inputFormatClass Class of the [[InputFormat]]
+   * @param keyClass Class of the keys
+   * @param valueClass Class of the values
+   * @param minSplits Minimum number of Hadoop Splits to generate.
+   *
+   * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+   * record, directly caching the returned RDD will create many references to the same object.
+   * If you plan to directly cache Hadoop writable objects, you should first copy them using
+   * a `map` function.
    */
   def hadoopRDD[K, V](
       conf: JobConf,
@@ -426,7 +372,13 @@ class SparkContext(
     new HadoopRDD(this, conf, inputFormatClass, keyClass, valueClass, minSplits)
   }
 
-  /** Get an RDD for a Hadoop file with an arbitrary InputFormat */
+  /** Get an RDD for a Hadoop file with an arbitrary InputFormat
+    *
+    * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+    * record, directly caching the returned RDD will create many references to the same object.
+    * If you plan to directly cache Hadoop writable objects, you should first copy them using
+    * a `map` function.
+    * */
   def hadoopFile[K, V](
       path: String,
       inputFormatClass: Class[_ <: InputFormat[K, V]],
@@ -448,48 +400,64 @@ class SparkContext(
   }
 
   /**
-   * Smarter version of hadoopFile() that uses class manifests to figure out the classes of keys,
+   * Smarter version of hadoopFile() that uses class tags to figure out the classes of keys,
    * values and the InputFormat so that users don't need to pass them directly. Instead, callers
    * can just write, for example,
    * {{{
    * val file = sparkContext.hadoopFile[LongWritable, Text, TextInputFormat](path, minSplits)
    * }}}
+   *
+   * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+   * record, directly caching the returned RDD will create many references to the same object.
+   * If you plan to directly cache Hadoop writable objects, you should first copy them using
+   * a `map` function.
    */
-  def hadoopFile[K, V, F <: InputFormat[K, V]](path: String, minSplits: Int)
-      (implicit km: ClassManifest[K], vm: ClassManifest[V], fm: ClassManifest[F])
-      : RDD[(K, V)] = {
+  def hadoopFile[K, V, F <: InputFormat[K, V]]
+      (path: String, minSplits: Int)
+      (implicit km: ClassTag[K], vm: ClassTag[V], fm: ClassTag[F]): RDD[(K, V)] = {
     hadoopFile(path,
-        fm.erasure.asInstanceOf[Class[F]],
-        km.erasure.asInstanceOf[Class[K]],
-        vm.erasure.asInstanceOf[Class[V]],
-        minSplits)
+      fm.runtimeClass.asInstanceOf[Class[F]],
+      km.runtimeClass.asInstanceOf[Class[K]],
+      vm.runtimeClass.asInstanceOf[Class[V]],
+      minSplits)
   }
 
   /**
-   * Smarter version of hadoopFile() that uses class manifests to figure out the classes of keys,
+   * Smarter version of hadoopFile() that uses class tags to figure out the classes of keys,
    * values and the InputFormat so that users don't need to pass them directly. Instead, callers
    * can just write, for example,
    * {{{
    * val file = sparkContext.hadoopFile[LongWritable, Text, TextInputFormat](path)
    * }}}
+   *
+   * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+   * record, directly caching the returned RDD will create many references to the same object.
+   * If you plan to directly cache Hadoop writable objects, you should first copy them using
+   * a `map` function.
    */
   def hadoopFile[K, V, F <: InputFormat[K, V]](path: String)
-      (implicit km: ClassManifest[K], vm: ClassManifest[V], fm: ClassManifest[F]): RDD[(K, V)] =
+      (implicit km: ClassTag[K], vm: ClassTag[V], fm: ClassTag[F]): RDD[(K, V)] =
     hadoopFile[K, V, F](path, defaultMinSplits)
 
   /** Get an RDD for a Hadoop file with an arbitrary new API InputFormat. */
-  def newAPIHadoopFile[K, V, F <: NewInputFormat[K, V]](path: String)
-      (implicit km: ClassManifest[K], vm: ClassManifest[V], fm: ClassManifest[F]): RDD[(K, V)] = {
+  def newAPIHadoopFile[K, V, F <: NewInputFormat[K, V]]
+      (path: String)
+      (implicit km: ClassTag[K], vm: ClassTag[V], fm: ClassTag[F]): RDD[(K, V)] = {
     newAPIHadoopFile(
-        path,
-        fm.erasure.asInstanceOf[Class[F]],
-        km.erasure.asInstanceOf[Class[K]],
-        vm.erasure.asInstanceOf[Class[V]])
+      path,
+      fm.runtimeClass.asInstanceOf[Class[F]],
+      km.runtimeClass.asInstanceOf[Class[K]],
+      vm.runtimeClass.asInstanceOf[Class[V]])
   }
 
   /**
    * Get an RDD for a given Hadoop file with an arbitrary new API InputFormat
    * and extra configuration options to pass to the input format.
+   *
+   * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+   * record, directly caching the returned RDD will create many references to the same object.
+   * If you plan to directly cache Hadoop writable objects, you should first copy them using
+   * a `map` function.
    */
   def newAPIHadoopFile[K, V, F <: NewInputFormat[K, V]](
       path: String,
@@ -506,6 +474,11 @@ class SparkContext(
   /**
    * Get an RDD for a given Hadoop file with an arbitrary new API InputFormat
    * and extra configuration options to pass to the input format.
+   *
+   * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+   * record, directly caching the returned RDD will create many references to the same object.
+   * If you plan to directly cache Hadoop writable objects, you should first copy them using
+   * a `map` function.
    */
   def newAPIHadoopRDD[K, V, F <: NewInputFormat[K, V]](
       conf: Configuration = hadoopConfiguration,
@@ -515,7 +488,13 @@ class SparkContext(
     new NewHadoopRDD(this, fClass, kClass, vClass, conf)
   }
 
-  /** Get an RDD for a Hadoop SequenceFile with given key and value types. */
+  /** Get an RDD for a Hadoop SequenceFile with given key and value types.
+    *
+    * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+    * record, directly caching the returned RDD will create many references to the same object.
+    * If you plan to directly cache Hadoop writable objects, you should first copy them using
+    * a `map` function.
+    */
   def sequenceFile[K, V](path: String,
       keyClass: Class[K],
       valueClass: Class[V],
@@ -525,8 +504,15 @@ class SparkContext(
     hadoopFile(path, inputFormatClass, keyClass, valueClass, minSplits)
   }
 
-  /** Get an RDD for a Hadoop SequenceFile with given key and value types. */
-  def sequenceFile[K, V](path: String, keyClass: Class[K], valueClass: Class[V]): RDD[(K, V)] =
+  /** Get an RDD for a Hadoop SequenceFile with given key and value types.
+    *
+    * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+    * record, directly caching the returned RDD will create many references to the same object.
+    * If you plan to directly cache Hadoop writable objects, you should first copy them using
+    * a `map` function.
+    * */
+  def sequenceFile[K, V](path: String, keyClass: Class[K], valueClass: Class[V]
+      ): RDD[(K, V)] =
     sequenceFile(path, keyClass, valueClass, defaultMinSplits)
 
   /**
@@ -542,12 +528,18 @@ class SparkContext(
    * IntWritable). The most natural thing would've been to have implicit objects for the
    * converters, but then we couldn't have an object for every subclass of Writable (you can't
    * have a parameterized singleton object). We use functions instead to create a new converter
-   * for the appropriate type. In addition, we pass the converter a ClassManifest of its type to
+   * for the appropriate type. In addition, we pass the converter a ClassTag of its type to
    * allow it to figure out the Writable class to use in the subclass case.
+   *
+   * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+   * record, directly caching the returned RDD will create many references to the same object.
+   * If you plan to directly cache Hadoop writable objects, you should first copy them using
+   * a `map` function.
    */
-   def sequenceFile[K, V](path: String, minSplits: Int = defaultMinSplits)
-      (implicit km: ClassManifest[K], vm: ClassManifest[V],
-          kcf: () => WritableConverter[K], vcf: () => WritableConverter[V])
+   def sequenceFile[K, V]
+       (path: String, minSplits: Int = defaultMinSplits)
+       (implicit km: ClassTag[K], vm: ClassTag[V],
+        kcf: () => WritableConverter[K], vcf: () => WritableConverter[V])
       : RDD[(K, V)] = {
     val kc = kcf()
     val vc = vcf()
@@ -555,7 +547,7 @@ class SparkContext(
     val writables = hadoopFile(path, format,
         kc.writableClass(km).asInstanceOf[Class[Writable]],
         vc.writableClass(vm).asInstanceOf[Class[Writable]], minSplits)
-    writables.map{case (k,v) => (kc.convert(k), vc.convert(v))}
+    writables.map { case (k, v) => (kc.convert(k), vc.convert(v)) }
   }
 
   /**
@@ -565,7 +557,7 @@ class SparkContext(
    * slow if you use the default serializer (Java serialization), though the nice thing about it is
    * that there's very little effort required to save arbitrary objects.
    */
-  def objectFile[T: ClassManifest](
+  def objectFile[T: ClassTag](
       path: String,
       minSplits: Int = defaultMinSplits
       ): RDD[T] = {
@@ -574,31 +566,31 @@ class SparkContext(
   }
 
 
-  protected[spark] def checkpointFile[T: ClassManifest](
+  protected[spark] def checkpointFile[T: ClassTag](
       path: String
     ): RDD[T] = {
     new CheckpointRDD[T](this, path)
   }
 
   /** Build the union of a list of RDDs. */
-  def union[T: ClassManifest](rdds: Seq[RDD[T]]): RDD[T] = new UnionRDD(this, rdds)
+  def union[T: ClassTag](rdds: Seq[RDD[T]]): RDD[T] = new UnionRDD(this, rdds)
 
   /** Build the union of a list of RDDs passed as variable-length arguments. */
-  def union[T: ClassManifest](first: RDD[T], rest: RDD[T]*): RDD[T] =
+  def union[T: ClassTag](first: RDD[T], rest: RDD[T]*): RDD[T] =
     new UnionRDD(this, Seq(first) ++ rest)
 
   // Methods for creating shared variables
 
   /**
-   * Create an [[org.apache.spark.Accumulator]] variable of a given type, which tasks can "add" values
-   * to using the `+=` method. Only the driver can access the accumulator's `value`.
+   * Create an [[org.apache.spark.Accumulator]] variable of a given type, which tasks can "add"
+   * values to using the `+=` method. Only the driver can access the accumulator's `value`.
    */
   def accumulator[T](initialValue: T)(implicit param: AccumulatorParam[T]) =
     new Accumulator(initialValue, param)
 
   /**
-   * Create an [[org.apache.spark.Accumulable]] shared variable, to which tasks can add values with `+=`.
-   * Only the driver can access the accumuable's `value`.
+   * Create an [[org.apache.spark.Accumulable]] shared variable, to which tasks can add values
+   * with `+=`. Only the driver can access the accumuable's `value`.
    * @tparam T accumulator type
    * @tparam R type that can be added to the accumulator
    */
@@ -611,14 +603,16 @@ class SparkContext(
    * Growable and TraversableOnce are the standard APIs that guarantee += and ++=, implemented by
    * standard mutable collections. So you can use this with mutable Map, Set, etc.
    */
-  def accumulableCollection[R <% Growable[T] with TraversableOnce[T] with Serializable, T](initialValue: R) = {
+  def accumulableCollection[R <% Growable[T] with TraversableOnce[T] with Serializable, T]
+      (initialValue: R) = {
     val param = new GrowableAccumulableParam[R,T]
     new Accumulable(initialValue, param)
   }
 
   /**
-   * Broadcast a read-only variable to the cluster, returning a [[org.apache.spark.broadcast.Broadcast]] object for
-   * reading it in distributed functions. The variable will be sent to each cluster only once.
+   * Broadcast a read-only variable to the cluster, returning a
+   * [[org.apache.spark.broadcast.Broadcast]] object for reading it in distributed functions.
+   * The variable will be sent to each cluster only once.
    */
   def broadcast[T](value: T) = env.broadcastManager.newBroadcast[T](value, isLocal)
 
@@ -637,10 +631,8 @@ class SparkContext(
     }
     addedFiles(key) = System.currentTimeMillis
 
-    // Fetch the file locally in case a job is executed locally.
-    // Jobs that run through LocalScheduler will already fetch the required dependencies,
-    // but jobs run in DAGScheduler.runLocally() will not so we must fetch the files here.
-    Utils.fetchFile(path, new File(SparkFiles.getRootDirectory))
+    // Fetch the file locally in case a job is executed using DAGScheduler.runLocally().
+    Utils.fetchFile(path, new File(SparkFiles.getRootDirectory), conf)
 
     logInfo("Added file " + path + " at " + key + " with timestamp " + addedFiles(key))
   }
@@ -742,17 +734,20 @@ class SparkContext(
         key = uri.getScheme match {
           // A JAR file which exists only on the driver node
           case null | "file" =>
-            if (SparkHadoopUtil.get.isYarnMode()) {
-              // In order for this to work on yarn the user must specify the --addjars option to
-              // the client to upload the file into the distributed cache to make it show up in the
-              // current working directory.
+            if (SparkHadoopUtil.get.isYarnMode() && master == "yarn-standalone") {
+              // In order for this to work in yarn standalone mode the user must specify the 
+              // --addjars option to the client to upload the file into the distributed cache 
+              // of the AM to make it show up in the current working directory.
               val fileName = new Path(uri.getPath).getName()
               try {
                 env.httpFileServer.addJar(new File(fileName))
               } catch {
                 case e: Exception => {
+                  // For now just log an error but allow to go through so spark examples work.
+                  // The spark examples don't really need the jar distributed since its also 
+                  // the app jar.
                   logError("Error adding jar (" + e + "), was the --addJars option used?")
-                  throw e
+                  null
                 }
               }
             } else {
@@ -765,8 +760,10 @@ class SparkContext(
             path
         }
       }
-      addedJars(key) = System.currentTimeMillis
-      logInfo("Added JAR " + path + " at " + key + " with timestamp " + addedJars(key))
+      if (key != null) {
+        addedJars(key) = System.currentTimeMillis
+        logInfo("Added JAR " + path + " at " + key + " with timestamp " + addedJars(key))
+      }
     }
   }
 
@@ -810,14 +807,29 @@ class SparkContext(
    * (in that order of preference). If neither of these is set, return None.
    */
   private[spark] def getSparkHome(): Option[String] = {
-    if (sparkHome != null) {
-      Some(sparkHome)
-    } else if (System.getProperty("spark.home") != null) {
-      Some(System.getProperty("spark.home"))
-    } else if (System.getenv("SPARK_HOME") != null) {
-      Some(System.getenv("SPARK_HOME"))
+    conf.getOption("spark.home").orElse(Option(System.getenv("SPARK_HOME")))
+  }
+
+  /**
+   * Support function for API backtraces.
+   */
+  def setCallSite(site: String) {
+    setLocalProperty("externalCallSite", site)
+  }
+
+  /**
+   * Support function for API backtraces.
+   */
+  def clearCallSite() {
+    setLocalProperty("externalCallSite", null)
+  }
+
+  private[spark] def getCallSite(): String = {
+    val callSite = getLocalProperty("externalCallSite")
+    if (callSite == null) {
+      Utils.formatSparkCallSite
     } else {
-      None
+      callSite
     }
   }
 
@@ -827,13 +839,13 @@ class SparkContext(
    * flag specifies whether the scheduler can run the computation on the driver rather than
    * shipping it out to the cluster, for short actions like first().
    */
-  def runJob[T, U: ClassManifest](
+  def runJob[T, U: ClassTag](
       rdd: RDD[T],
       func: (TaskContext, Iterator[T]) => U,
       partitions: Seq[Int],
       allowLocal: Boolean,
       resultHandler: (Int, U) => Unit) {
-    val callSite = Utils.formatSparkCallSite
+    val callSite = getCallSite
     val cleanedFunc = clean(func)
     logInfo("Starting job: " + callSite)
     val start = System.nanoTime
@@ -848,7 +860,7 @@ class SparkContext(
    * allowLocal flag specifies whether the scheduler can run the computation on the driver rather
    * than shipping it out to the cluster, for short actions like first().
    */
-  def runJob[T, U: ClassManifest](
+  def runJob[T, U: ClassTag](
       rdd: RDD[T],
       func: (TaskContext, Iterator[T]) => U,
       partitions: Seq[Int],
@@ -863,7 +875,7 @@ class SparkContext(
    * Run a job on a given set of partitions of an RDD, but take a function of type
    * `Iterator[T] => U` instead of `(TaskContext, Iterator[T]) => U`.
    */
-  def runJob[T, U: ClassManifest](
+  def runJob[T, U: ClassTag](
       rdd: RDD[T],
       func: Iterator[T] => U,
       partitions: Seq[Int],
@@ -875,21 +887,21 @@ class SparkContext(
   /**
    * Run a job on all partitions in an RDD and return the results in an array.
    */
-  def runJob[T, U: ClassManifest](rdd: RDD[T], func: (TaskContext, Iterator[T]) => U): Array[U] = {
+  def runJob[T, U: ClassTag](rdd: RDD[T], func: (TaskContext, Iterator[T]) => U): Array[U] = {
     runJob(rdd, func, 0 until rdd.partitions.size, false)
   }
 
   /**
    * Run a job on all partitions in an RDD and return the results in an array.
    */
-  def runJob[T, U: ClassManifest](rdd: RDD[T], func: Iterator[T] => U): Array[U] = {
+  def runJob[T, U: ClassTag](rdd: RDD[T], func: Iterator[T] => U): Array[U] = {
     runJob(rdd, func, 0 until rdd.partitions.size, false)
   }
 
   /**
    * Run a job on all partitions in an RDD and pass the results to a handler function.
    */
-  def runJob[T, U: ClassManifest](
+  def runJob[T, U: ClassTag](
     rdd: RDD[T],
     processPartition: (TaskContext, Iterator[T]) => U,
     resultHandler: (Int, U) => Unit)
@@ -900,7 +912,7 @@ class SparkContext(
   /**
    * Run a job on all partitions in an RDD and pass the results to a handler function.
    */
-  def runJob[T, U: ClassManifest](
+  def runJob[T, U: ClassTag](
       rdd: RDD[T],
       processPartition: Iterator[T] => U,
       resultHandler: (Int, U) => Unit)
@@ -917,7 +929,7 @@ class SparkContext(
       func: (TaskContext, Iterator[T]) => U,
       evaluator: ApproximateEvaluator[U, R],
       timeout: Long): PartialResult[R] = {
-    val callSite = Utils.formatSparkCallSite
+    val callSite = getCallSite
     logInfo("Starting job: " + callSite)
     val start = System.nanoTime
     val result = dagScheduler.runApproximateJob(rdd, func, evaluator, callSite, timeout,
@@ -937,7 +949,7 @@ class SparkContext(
       resultFunc: => R): SimpleFutureAction[R] =
   {
     val cleanF = clean(processPartition)
-    val callSite = Utils.formatSparkCallSite
+    val callSite = getCallSite
     val waiter = dagScheduler.submitJob(
       rdd,
       (context: TaskContext, iter: Iterator[T]) => cleanF(iter),
@@ -968,29 +980,24 @@ class SparkContext(
    */
   private[spark] def clean[F <: AnyRef](f: F): F = {
     ClosureCleaner.clean(f)
-    return f
+    f
   }
 
   /**
    * Set the directory under which RDDs are going to be checkpointed. The directory must
-   * be a HDFS path if running on a cluster. If the directory does not exist, it will
-   * be created. If the directory exists and useExisting is set to true, then the
-   * exisiting directory will be used. Otherwise an exception will be thrown to
-   * prevent accidental overriding of checkpoint files in the existing directory.
+   * be a HDFS path if running on a cluster.
    */
-  def setCheckpointDir(dir: String, useExisting: Boolean = false) {
-    val path = new Path(dir)
-    val fs = path.getFileSystem(SparkHadoopUtil.get.newConfiguration())
-    if (!useExisting) {
-      if (fs.exists(path)) {
-        throw new Exception("Checkpoint directory '" + path + "' already exists.")
-      } else {
-        fs.mkdirs(path)
-      }
+  def setCheckpointDir(directory: String) {
+    checkpointDir = Option(directory).map { dir =>
+      val path = new Path(dir, UUID.randomUUID().toString)
+      val fs = path.getFileSystem(hadoopConfiguration)
+      fs.mkdirs(path)
+      fs.getFileStatus(path).getPath.toString
     }
-    checkpointDir = Some(dir)
   }
 
+  def getCheckpointDir = checkpointDir
+
   /** Default level of parallelism to use when not given by user (e.g. parallelize and makeRDD). */
   def defaultParallelism: Int = taskScheduler.defaultParallelism
 
@@ -1046,16 +1053,16 @@ object SparkContext {
 
   // TODO: Add AccumulatorParams for other types, e.g. lists and strings
 
-  implicit def rddToPairRDDFunctions[K: ClassManifest, V: ClassManifest](rdd: RDD[(K, V)]) =
+  implicit def rddToPairRDDFunctions[K: ClassTag, V: ClassTag](rdd: RDD[(K, V)]) =
     new PairRDDFunctions(rdd)
 
-  implicit def rddToAsyncRDDActions[T: ClassManifest](rdd: RDD[T]) = new AsyncRDDActions(rdd)
+  implicit def rddToAsyncRDDActions[T: ClassTag](rdd: RDD[T]) = new AsyncRDDActions(rdd)
 
-  implicit def rddToSequenceFileRDDFunctions[K <% Writable: ClassManifest, V <% Writable: ClassManifest](
-      rdd: RDD[(K, V)]) =
+  implicit def rddToSequenceFileRDDFunctions[K <% Writable: ClassTag, V <% Writable: ClassTag](
+      rdd: RDD[(K, V)])   =
     new SequenceFileRDDFunctions(rdd)
 
-  implicit def rddToOrderedRDDFunctions[K <% Ordered[K]: ClassManifest, V: ClassManifest](
+  implicit def rddToOrderedRDDFunctions[K <% Ordered[K]: ClassTag, V: ClassTag](
       rdd: RDD[(K, V)]) =
     new OrderedRDDFunctions[K, V, (K, V)](rdd)
 
@@ -1080,16 +1087,17 @@ object SparkContext {
 
   implicit def stringToText(s: String) = new Text(s)
 
-  private implicit def arrayToArrayWritable[T <% Writable: ClassManifest](arr: Traversable[T]): ArrayWritable = {
+  private implicit def arrayToArrayWritable[T <% Writable: ClassTag](arr: Traversable[T])
+    : ArrayWritable = {
     def anyToWritable[U <% Writable](u: U): Writable = u
 
-    new ArrayWritable(classManifest[T].erasure.asInstanceOf[Class[Writable]],
+    new ArrayWritable(classTag[T].runtimeClass.asInstanceOf[Class[Writable]],
         arr.map(x => anyToWritable(x)).toArray)
   }
 
   // Helper objects for converting common types to Writable
-  private def simpleWritableConverter[T, W <: Writable: ClassManifest](convert: W => T) = {
-    val wClass = classManifest[W].erasure.asInstanceOf[Class[W]]
+  private def simpleWritableConverter[T, W <: Writable: ClassTag](convert: W => T) = {
+    val wClass = classTag[W].runtimeClass.asInstanceOf[Class[W]]
     new WritableConverter[T](_ => wClass, x => convert(x.asInstanceOf[W]))
   }
 
@@ -1103,23 +1111,26 @@ object SparkContext {
 
   implicit def booleanWritableConverter() = simpleWritableConverter[Boolean, BooleanWritable](_.get)
 
-  implicit def bytesWritableConverter() = simpleWritableConverter[Array[Byte], BytesWritable](_.getBytes)
+  implicit def bytesWritableConverter() = {
+    simpleWritableConverter[Array[Byte], BytesWritable](_.getBytes)
+  }
 
   implicit def stringWritableConverter() = simpleWritableConverter[String, Text](_.toString)
 
   implicit def writableWritableConverter[T <: Writable]() =
-    new WritableConverter[T](_.erasure.asInstanceOf[Class[T]], _.asInstanceOf[T])
+    new WritableConverter[T](_.runtimeClass.asInstanceOf[Class[T]], _.asInstanceOf[T])
 
   /**
    * Find the JAR from which a given class was loaded, to make it easy for users to pass
-   * their JARs to SparkContext
+   * their JARs to SparkContext.
    */
   def jarOfClass(cls: Class[_]): Seq[String] = {
     val uri = cls.getResource("/" + cls.getName.replace('.', '/') + ".class")
     if (uri != null) {
       val uriStr = uri.toString
       if (uriStr.startsWith("jar:file:")) {
-        // URI will be of the form "jar:file:/path/foo.jar!/package/cls.class", so pull out the /path/foo.jar
+        // URI will be of the form "jar:file:/path/foo.jar!/package/cls.class",
+        // so pull out the /path/foo.jar
         List(uriStr.substring("jar:file:".length, uriStr.indexOf('!')))
       } else {
         Nil
@@ -1129,28 +1140,181 @@ object SparkContext {
     }
   }
 
-  /** Find the JAR that contains the class of a particular object */
+  /**
+   * Find the JAR that contains the class of a particular object, to make it easy for users
+   * to pass their JARs to SparkContext. In most cases you can call jarOfObject(this) in
+   * your driver program.
+   */
   def jarOfObject(obj: AnyRef): Seq[String] = jarOfClass(obj.getClass)
 
-  /** Get the amount of memory per executor requested through system properties or SPARK_MEM */
-  private[spark] val executorMemoryRequested = {
-    // TODO: Might need to add some extra memory for the non-heap parts of the JVM
-    Option(System.getProperty("spark.executor.memory"))
-      .orElse(Option(System.getenv("SPARK_MEM")))
-      .map(Utils.memoryStringToMb)
-      .getOrElse(512)
+  /**
+   * Creates a modified version of a SparkConf with the parameters that can be passed separately
+   * to SparkContext, to make it easier to write SparkContext's constructors. This ignores
+   * parameters that are passed as the default value of null, instead of throwing an exception
+   * like SparkConf would.
+   */
+  private[spark] def updatedConf(
+      conf: SparkConf,
+      master: String,
+      appName: String,
+      sparkHome: String = null,
+      jars: Seq[String] = Nil,
+      environment: Map[String, String] = Map()): SparkConf =
+  {
+    val res = conf.clone()
+    res.setMaster(master)
+    res.setAppName(appName)
+    if (sparkHome != null) {
+      res.setSparkHome(sparkHome)
+    }
+    if (jars != null && !jars.isEmpty) {
+      res.setJars(jars)
+    }
+    res.setExecutorEnv(environment.toSeq)
+    res
+  }
+
+  /** Creates a task scheduler based on a given master URL. Extracted for testing. */
+  private def createTaskScheduler(sc: SparkContext, master: String, appName: String)
+      : TaskScheduler =
+  {
+    // Regular expression used for local[N] master format
+    val LOCAL_N_REGEX = """local\[([0-9]+)\]""".r
+    // Regular expression for local[N, maxRetries], used in tests with failing tasks
+    val LOCAL_N_FAILURES_REGEX = """local\[([0-9]+)\s*,\s*([0-9]+)\]""".r
+    // Regular expression for simulating a Spark cluster of [N, cores, memory] locally
+    val LOCAL_CLUSTER_REGEX = """local-cluster\[\s*([0-9]+)\s*,\s*([0-9]+)\s*,\s*([0-9]+)\s*]""".r
+    // Regular expression for connecting to Spark deploy clusters
+    val SPARK_REGEX = """spark://(.*)""".r
+    // Regular expression for connection to Mesos cluster by mesos:// or zk:// url
+    val MESOS_REGEX = """(mesos|zk)://.*""".r
+    // Regular expression for connection to Simr cluster
+    val SIMR_REGEX = """simr://(.*)""".r
+
+    // When running locally, don't try to re-execute tasks on failure.
+    val MAX_LOCAL_TASK_FAILURES = 1
+
+    master match {
+      case "local" =>
+        val scheduler = new TaskSchedulerImpl(sc, MAX_LOCAL_TASK_FAILURES, isLocal = true)
+        val backend = new LocalBackend(scheduler, 1)
+        scheduler.initialize(backend)
+        scheduler
+
+      case LOCAL_N_REGEX(threads) =>
+        val scheduler = new TaskSchedulerImpl(sc, MAX_LOCAL_TASK_FAILURES, isLocal = true)
+        val backend = new LocalBackend(scheduler, threads.toInt)
+        scheduler.initialize(backend)
+        scheduler
+
+      case LOCAL_N_FAILURES_REGEX(threads, maxFailures) =>
+        val scheduler = new TaskSchedulerImpl(sc, maxFailures.toInt, isLocal = true)
+        val backend = new LocalBackend(scheduler, threads.toInt)
+        scheduler.initialize(backend)
+        scheduler
+
+      case SPARK_REGEX(sparkUrl) =>
+        val scheduler = new TaskSchedulerImpl(sc)
+        val masterUrls = sparkUrl.split(",").map("spark://" + _)
+        val backend = new SparkDeploySchedulerBackend(scheduler, sc, masterUrls, appName)
+        scheduler.initialize(backend)
+        scheduler
+
+      case LOCAL_CLUSTER_REGEX(numSlaves, coresPerSlave, memoryPerSlave) =>
+        // Check to make sure memory requested <= memoryPerSlave. Otherwise Spark will just hang.
+        val memoryPerSlaveInt = memoryPerSlave.toInt
+        if (sc.executorMemory > memoryPerSlaveInt) {
+          throw new SparkException(
+            "Asked to launch cluster with %d MB RAM / worker but requested %d MB/worker".format(
+              memoryPerSlaveInt, sc.executorMemory))
+        }
+
+        val scheduler = new TaskSchedulerImpl(sc)
+        val localCluster = new LocalSparkCluster(
+          numSlaves.toInt, coresPerSlave.toInt, memoryPerSlaveInt)
+        val masterUrls = localCluster.start()
+        val backend = new SparkDeploySchedulerBackend(scheduler, sc, masterUrls, appName)
+        scheduler.initialize(backend)
+        backend.shutdownCallback = (backend: SparkDeploySchedulerBackend) => {
+          localCluster.stop()
+        }
+        scheduler
+
+      case "yarn-standalone" =>
+        val scheduler = try {
+          val clazz = Class.forName("org.apache.spark.scheduler.cluster.YarnClusterScheduler")
+          val cons = clazz.getConstructor(classOf[SparkContext])
+          cons.newInstance(sc).asInstanceOf[TaskSchedulerImpl]
+        } catch {
+          // TODO: Enumerate the exact reasons why it can fail
+          // But irrespective of it, it means we cannot proceed !
+          case th: Throwable => {
+            throw new SparkException("YARN mode not available ?", th)
+          }
+        }
+        val backend = new CoarseGrainedSchedulerBackend(scheduler, sc.env.actorSystem)
+        scheduler.initialize(backend)
+        scheduler
+
+      case "yarn-client" =>
+        val scheduler = try {
+          val clazz = Class.forName("org.apache.spark.scheduler.cluster.YarnClientClusterScheduler")
+          val cons = clazz.getConstructor(classOf[SparkContext])
+          cons.newInstance(sc).asInstanceOf[TaskSchedulerImpl]
+
+        } catch {
+          case th: Throwable => {
+            throw new SparkException("YARN mode not available ?", th)
+          }
+        }
+
+        val backend = try {
+          val clazz = Class.forName("org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend")
+          val cons = clazz.getConstructor(classOf[TaskSchedulerImpl], classOf[SparkContext])
+          cons.newInstance(scheduler, sc).asInstanceOf[CoarseGrainedSchedulerBackend]
+        } catch {
+          case th: Throwable => {
+            throw new SparkException("YARN mode not available ?", th)
+          }
+        }
+
+        scheduler.initialize(backend)
+        scheduler
+
+      case mesosUrl @ MESOS_REGEX(_) =>
+        MesosNativeLibrary.load()
+        val scheduler = new TaskSchedulerImpl(sc)
+        val coarseGrained = sc.conf.getBoolean("spark.mesos.coarse", false)
+        val url = mesosUrl.stripPrefix("mesos://") // strip scheme from raw Mesos URLs
+        val backend = if (coarseGrained) {
+          new CoarseMesosSchedulerBackend(scheduler, sc, url, appName)
+        } else {
+          new MesosSchedulerBackend(scheduler, sc, url, appName)
+        }
+        scheduler.initialize(backend)
+        scheduler
+
+      case SIMR_REGEX(simrUrl) =>
+        val scheduler = new TaskSchedulerImpl(sc)
+        val backend = new SimrSchedulerBackend(scheduler, sc, simrUrl)
+        scheduler.initialize(backend)
+        scheduler
+
+      case _ =>
+        throw new SparkException("Could not parse Master URL: '" + master + "'")
+    }
   }
 }
 
 /**
  * A class encapsulating how to convert some type T to Writable. It stores both the Writable class
  * corresponding to T (e.g. IntWritable for Int) and a function for doing the conversion.
- * The getter for the writable class takes a ClassManifest[T] in case this is a generic object
+ * The getter for the writable class takes a ClassTag[T] in case this is a generic object
  * that doesn't know the type of T when it is created. This sounds strange but is necessary to
  * support converting subclasses of Writable to themselves (writableWritableConverter).
  */
 private[spark] class WritableConverter[T](
-    val writableClass: ClassManifest[T] => Class[_ <: Writable],
+    val writableClass: ClassTag[T] => Class[_ <: Writable],
     val convert: Writable => T)
   extends Serializable
 
diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala
index 062852ae22992..ed788560e79f1 100644
--- a/core/src/main/scala/org/apache/spark/SparkEnv.scala
+++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@@ -17,11 +17,10 @@
 
 package org.apache.spark
 
-import collection.mutable
-import serializer.Serializer
+import scala.collection.mutable
+import scala.concurrent.Await
 
-import akka.actor.{Actor, ActorRef, Props, ActorSystemImpl, ActorSystem}
-import akka.remote.RemoteActorRefProvider
+import akka.actor._
 
 import org.apache.spark.broadcast.BroadcastManager
 import org.apache.spark.metrics.MetricsSystem
@@ -40,7 +39,7 @@ import com.google.common.collect.MapMaker
  * objects needs to have the right SparkEnv set. You can get the current environment with
  * SparkEnv.get (e.g. after creating a SparkContext) and set it with SparkEnv.set.
  */
-class SparkEnv (
+class SparkEnv private[spark] (
     val executorId: String,
     val actorSystem: ActorSystem,
     val serializerManager: SerializerManager,
@@ -54,7 +53,12 @@ class SparkEnv (
     val connectionManager: ConnectionManager,
     val httpFileServer: HttpFileServer,
     val sparkFilesDir: String,
-    val metricsSystem: MetricsSystem) {
+    val metricsSystem: MetricsSystem,
+    val conf: SparkConf) extends Logging {
+
+  // A mapping of thread ID to amount of memory used for shuffle in bytes
+  // All accesses should be manually synchronized
+  val shuffleMemoryMap = mutable.HashMap[Long, Long]()
 
   private val pythonWorkers = mutable.HashMap[(String, Map[String, String]), PythonWorkerFactory]()
 
@@ -62,7 +66,7 @@ class SparkEnv (
   // (e.g., HadoopFileRDD uses this to cache JobConfs and InputFormats).
   private[spark] val hadoopJobMetadata = new MapMaker().softValues().makeMap[String, Any]()
 
-  def stop() {
+  private[spark] def stop() {
     pythonWorkers.foreach { case(key, worker) => worker.stop() }
     httpFileServer.stop()
     mapOutputTracker.stop()
@@ -74,9 +78,11 @@ class SparkEnv (
     actorSystem.shutdown()
     // Unfortunately Akka's awaitTermination doesn't actually wait for the Netty server to shut
     // down, but let's call it anyway in case it gets fixed in a later release
-    actorSystem.awaitTermination()
+    // UPDATE: In Akka 2.1.x, this hangs if there are remote actors, so we can't call it.
+    //actorSystem.awaitTermination()
   }
 
+  private[spark]
   def createPythonWorker(pythonExec: String, envVars: Map[String, String]): java.net.Socket = {
     synchronized {
       val key = (pythonExec, envVars)
@@ -105,33 +111,25 @@ object SparkEnv extends Logging {
   /**
    * Returns the ThreadLocal SparkEnv.
    */
-  def getThreadLocal : SparkEnv = {
+  def getThreadLocal: SparkEnv = {
 	  env.get()
   }
 
-  def createFromSystemProperties(
+  private[spark] def create(
+      conf: SparkConf,
       executorId: String,
       hostname: String,
       port: Int,
       isDriver: Boolean,
       isLocal: Boolean): SparkEnv = {
 
-    val (actorSystem, boundPort) = AkkaUtils.createActorSystem("spark", hostname, port)
+    val (actorSystem, boundPort) = AkkaUtils.createActorSystem("spark", hostname, port,
+      conf = conf)
 
     // Bit of a hack: If this is the driver and our port was 0 (meaning bind to any free port),
     // figure out which port number Akka actually bound to and set spark.driver.port to it.
     if (isDriver && port == 0) {
-      System.setProperty("spark.driver.port", boundPort.toString)
-    }
-
-    // set only if unset until now.
-    if (System.getProperty("spark.hostPort", null) == null) {
-      if (!isDriver){
-        // unexpected
-        Utils.logErrorWithStack("Unexpected NOT to have spark.hostPort set")
-      }
-      Utils.checkHost(hostname)
-      System.setProperty("spark.hostPort", hostname + ":" + boundPort)
+      conf.set("spark.driver.port",  boundPort.toString)
     }
 
     val classLoader = Thread.currentThread.getContextClassLoader
@@ -139,61 +137,67 @@ object SparkEnv extends Logging {
     // Create an instance of the class named by the given Java system property, or by
     // defaultClassName if the property is not set, and return it as a T
     def instantiateClass[T](propertyName: String, defaultClassName: String): T = {
-      val name = System.getProperty(propertyName, defaultClassName)
+      val name = conf.get(propertyName,  defaultClassName)
       Class.forName(name, true, classLoader).newInstance().asInstanceOf[T]
     }
 
     val serializerManager = new SerializerManager
 
     val serializer = serializerManager.setDefault(
-      System.getProperty("spark.serializer", "org.apache.spark.serializer.JavaSerializer"))
+      conf.get("spark.serializer", "org.apache.spark.serializer.JavaSerializer"), conf)
 
     val closureSerializer = serializerManager.get(
-      System.getProperty("spark.closure.serializer", "org.apache.spark.serializer.JavaSerializer"))
+      conf.get("spark.closure.serializer", "org.apache.spark.serializer.JavaSerializer"),
+      conf)
 
     def registerOrLookup(name: String, newActor: => Actor): ActorRef = {
       if (isDriver) {
         logInfo("Registering " + name)
         actorSystem.actorOf(Props(newActor), name = name)
       } else {
-        val driverHost: String = System.getProperty("spark.driver.host", "localhost")
-        val driverPort: Int = System.getProperty("spark.driver.port", "7077").toInt
+        val driverHost: String = conf.get("spark.driver.host", "localhost")
+        val driverPort: Int = conf.getInt("spark.driver.port", 7077)
         Utils.checkHost(driverHost, "Expected hostname")
-        val url = "akka://spark@%s:%s/user/%s".format(driverHost, driverPort, name)
-        logInfo("Connecting to " + name + ": " + url)
-        actorSystem.actorFor(url)
+        val url = s"akka.tcp://spark@$driverHost:$driverPort/user/$name"
+        val timeout = AkkaUtils.lookupTimeout(conf)
+        logInfo(s"Connecting to $name: $url")
+        Await.result(actorSystem.actorSelection(url).resolveOne(timeout), timeout)
       }
     }
 
     val blockManagerMaster = new BlockManagerMaster(registerOrLookup(
       "BlockManagerMaster",
-      new BlockManagerMasterActor(isLocal)))
-    val blockManager = new BlockManager(executorId, actorSystem, blockManagerMaster, serializer)
+      new BlockManagerMasterActor(isLocal, conf)), conf)
+    val blockManager = new BlockManager(executorId, actorSystem, blockManagerMaster, serializer, conf)
 
     val connectionManager = blockManager.connectionManager
 
-    val broadcastManager = new BroadcastManager(isDriver)
+    val broadcastManager = new BroadcastManager(isDriver, conf)
 
     val cacheManager = new CacheManager(blockManager)
 
     // Have to assign trackerActor after initialization as MapOutputTrackerActor
     // requires the MapOutputTracker itself
-    val mapOutputTracker = new MapOutputTracker()
+    val mapOutputTracker =  if (isDriver) {
+      new MapOutputTrackerMaster(conf)
+    } else {
+      new MapOutputTracker(conf)
+    }
     mapOutputTracker.trackerActor = registerOrLookup(
       "MapOutputTracker",
-      new MapOutputTrackerActor(mapOutputTracker))
+      new MapOutputTrackerMasterActor(mapOutputTracker.asInstanceOf[MapOutputTrackerMaster]))
 
     val shuffleFetcher = instantiateClass[ShuffleFetcher](
       "spark.shuffle.fetcher", "org.apache.spark.BlockStoreShuffleFetcher")
 
     val httpFileServer = new HttpFileServer()
     httpFileServer.initialize()
-    System.setProperty("spark.fileserver.uri", httpFileServer.serverUri)
+    conf.set("spark.fileserver.uri",  httpFileServer.serverUri)
 
     val metricsSystem = if (isDriver) {
-      MetricsSystem.createMetricsSystem("driver")
+      MetricsSystem.createMetricsSystem("driver", conf)
     } else {
-      MetricsSystem.createMetricsSystem("executor")
+      MetricsSystem.createMetricsSystem("executor", conf)
     }
     metricsSystem.start()
 
@@ -207,7 +211,7 @@ object SparkEnv extends Logging {
     }
 
     // Warn about deprecated spark.cache.class property
-    if (System.getProperty("spark.cache.class") != null) {
+    if (conf.contains("spark.cache.class")) {
       logWarning("The spark.cache.class property is no longer being used! Specify storage " +
         "levels using the RDD.persist() method instead.")
     }
@@ -226,6 +230,7 @@ object SparkEnv extends Logging {
       connectionManager,
       httpFileServer,
       sparkFilesDir,
-      metricsSystem)
+      metricsSystem,
+      conf)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/SparkHadoopWriter.scala b/core/src/main/scala/org/apache/spark/SparkHadoopWriter.scala
index 103a1c205185f..4e63117a51334 100644
--- a/core/src/main/scala/org/apache/spark/SparkHadoopWriter.scala
+++ b/core/src/main/scala/org/apache/spark/SparkHadoopWriter.scala
@@ -127,10 +127,6 @@ class SparkHadoopWriter(@transient jobConf: JobConf)
     cmtr.commitJob(getJobContext())
   }
 
-  def cleanup() {
-    getOutputCommitter().cleanupJob(getJobContext())
-  }
-
   // ********* Private Functions *********
 
   private def getOutputFormat(): OutputFormat[AnyRef,AnyRef] = {
@@ -138,28 +134,28 @@ class SparkHadoopWriter(@transient jobConf: JobConf)
       format = conf.value.getOutputFormat()
         .asInstanceOf[OutputFormat[AnyRef,AnyRef]]
     }
-    return format 
+    format
   }
 
   private def getOutputCommitter(): OutputCommitter = {
     if (committer == null) {
       committer = conf.value.getOutputCommitter
     }
-    return committer
+    committer
   }
 
   private def getJobContext(): JobContext = {
     if (jobContext == null) { 
       jobContext = newJobContext(conf.value, jID.value)
     }
-    return jobContext
+    jobContext
   }
 
   private def getTaskContext(): TaskAttemptContext = {
     if (taskContext == null) {
       taskContext =  newTaskAttemptContext(conf.value, taID.value)
     }
-    return taskContext
+    taskContext
   }
 
   private def setIDs(jobid: Int, splitid: Int, attemptid: Int) {
@@ -186,19 +182,18 @@ object SparkHadoopWriter {
   def createJobID(time: Date, id: Int): JobID = {
     val formatter = new SimpleDateFormat("yyyyMMddHHmm")
     val jobtrackerID = formatter.format(new Date())
-    return new JobID(jobtrackerID, id)
+    new JobID(jobtrackerID, id)
   }
   
   def createPathFromString(path: String, conf: JobConf): Path = {
     if (path == null) {
       throw new IllegalArgumentException("Output path is null")
     }
-    var outputPath = new Path(path)
+    val outputPath = new Path(path)
     val fs = outputPath.getFileSystem(conf)
     if (outputPath == null || fs == null) {
       throw new IllegalArgumentException("Incorrectly formatted output path")
     }
-    outputPath = outputPath.makeQualified(fs)
-    return outputPath
+    outputPath.makeQualified(fs)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/TaskEndReason.scala b/core/src/main/scala/org/apache/spark/TaskEndReason.scala
index c1e5e04b31e60..3fd6f5eb472f4 100644
--- a/core/src/main/scala/org/apache/spark/TaskEndReason.scala
+++ b/core/src/main/scala/org/apache/spark/TaskEndReason.scala
@@ -54,4 +54,15 @@ private[spark] case object TaskResultLost extends TaskEndReason
 
 private[spark] case object TaskKilled extends TaskEndReason
 
-private[spark] case class OtherFailure(message: String) extends TaskEndReason
+/**
+ * The task failed because the executor that it was running on was lost. This may happen because
+ * the task crashed the JVM.
+ */
+private[spark] case object ExecutorLostFailure extends TaskEndReason
+
+/**
+ * We don't know why the task ended -- for example, because of a ClassNotFound exception when
+ * deserializing the task result.
+ */
+private[spark] case object UnknownReason extends TaskEndReason
+
diff --git a/core/src/main/scala/org/apache/spark/TaskState.scala b/core/src/main/scala/org/apache/spark/TaskState.scala
index 19ce8369d90c7..0bf1e4a5e2ccd 100644
--- a/core/src/main/scala/org/apache/spark/TaskState.scala
+++ b/core/src/main/scala/org/apache/spark/TaskState.scala
@@ -19,8 +19,7 @@ package org.apache.spark
 
 import org.apache.mesos.Protos.{TaskState => MesosTaskState}
 
-private[spark] object TaskState
-  extends Enumeration("LAUNCHING", "RUNNING", "FINISHED", "FAILED", "KILLED", "LOST") {
+private[spark] object TaskState extends Enumeration {
 
   val LAUNCHING, RUNNING, FINISHED, FAILED, KILLED, LOST = Value
 
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala
index 043cb183bad17..b0dedc6f4eb13 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala
@@ -17,18 +17,23 @@
 
 package org.apache.spark.api.java
 
+import scala.reflect.ClassTag
+
 import org.apache.spark.rdd.RDD
 import org.apache.spark.SparkContext.doubleRDDToDoubleRDDFunctions
 import org.apache.spark.api.java.function.{Function => JFunction}
 import org.apache.spark.util.StatCounter
 import org.apache.spark.partial.{BoundedDouble, PartialResult}
 import org.apache.spark.storage.StorageLevel
+
 import java.lang.Double
 import org.apache.spark.Partitioner
 
+import scala.collection.JavaConverters._
+
 class JavaDoubleRDD(val srdd: RDD[scala.Double]) extends JavaRDDLike[Double, JavaDoubleRDD] {
 
-  override val classManifest: ClassManifest[Double] = implicitly[ClassManifest[Double]]
+  override val classTag: ClassTag[Double] = implicitly[ClassTag[Double]]
 
   override val rdd: RDD[Double] = srdd.map(x => Double.valueOf(x))
 
@@ -42,7 +47,7 @@ class JavaDoubleRDD(val srdd: RDD[scala.Double]) extends JavaRDDLike[Double, Jav
   /** Persist this RDD with the default storage level (`MEMORY_ONLY`). */
   def cache(): JavaDoubleRDD = fromRDD(srdd.cache())
 
-  /** 
+  /**
    * Set this RDD's storage level to persist its values across operations after the first time
    * it is computed. Can only be called once on each RDD.
    */
@@ -106,7 +111,7 @@ class JavaDoubleRDD(val srdd: RDD[scala.Double]) extends JavaRDDLike[Double, Jav
 
   /**
    * Return an RDD with the elements from `this` that are not in `other`.
-   * 
+   *
    * Uses `this` partitioner/partition size, because even if `other` is huge, the resulting
    * RDD will be <= us.
    */
@@ -182,6 +187,50 @@ class JavaDoubleRDD(val srdd: RDD[scala.Double]) extends JavaRDDLike[Double, Jav
 
   /** (Experimental) Approximate operation to return the sum within a timeout. */
   def sumApprox(timeout: Long): PartialResult[BoundedDouble] = srdd.sumApprox(timeout)
+
+  /**
+   * Compute a histogram of the data using bucketCount number of buckets evenly
+   *  spaced between the minimum and maximum of the RDD. For example if the min
+   *  value is 0 and the max is 100 and there are two buckets the resulting
+   *  buckets will be [0,50) [50,100]. bucketCount must be at least 1
+   * If the RDD contains infinity, NaN throws an exception
+   * If the elements in RDD do not vary (max == min) always returns a single bucket.
+   */
+  def histogram(bucketCount: Int): Pair[Array[scala.Double], Array[Long]] = {
+    val result = srdd.histogram(bucketCount)
+    (result._1, result._2)
+  }
+
+  /**
+   * Compute a histogram using the provided buckets. The buckets are all open
+   * to the left except for the last which is closed
+   *  e.g. for the array
+   *  [1,10,20,50] the buckets are [1,10) [10,20) [20,50]
+   *  e.g 1<=x<10 , 10<=x<20, 20<=x<50
+   *  And on the input of 1 and 50 we would have a histogram of 1,0,0
+   *
+   * Note: if your histogram is evenly spaced (e.g. [0, 10, 20, 30]) this can be switched
+   * from an O(log n) inseration to O(1) per element. (where n = # buckets) if you set evenBuckets
+   * to true.
+   * buckets must be sorted and not contain any duplicates.
+   * buckets array must be at least two elements
+   * All NaN entries are treated the same. If you have a NaN bucket it must be
+   * the maximum value of the last position and all NaN entries will be counted
+   * in that bucket.
+   */
+  def histogram(buckets: Array[scala.Double]): Array[Long] = {
+    srdd.histogram(buckets, false)
+  }
+
+  def histogram(buckets: Array[Double], evenBuckets: Boolean): Array[Long] = {
+    srdd.histogram(buckets.map(_.toDouble), evenBuckets)
+  }
+
+  /** Assign a name to this RDD */
+  def setName(name: String): JavaDoubleRDD = {
+    srdd.setName(name)
+    this
+  }
 }
 
 object JavaDoubleRDD {
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
index 2142fd73278ac..f430a33db1e4a 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
@@ -22,6 +22,7 @@ import java.util.Comparator
 
 import scala.Tuple2
 import scala.collection.JavaConversions._
+import scala.reflect.ClassTag
 
 import com.google.common.base.Optional
 import org.apache.hadoop.io.compress.CompressionCodec
@@ -43,13 +44,12 @@ import org.apache.spark.rdd.OrderedRDDFunctions
 import org.apache.spark.storage.StorageLevel
 
 
-class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManifest[K],
-  implicit val vManifest: ClassManifest[V]) extends JavaRDDLike[(K, V), JavaPairRDD[K, V]] {
+class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kClassTag: ClassTag[K],
+  implicit val vClassTag: ClassTag[V]) extends JavaRDDLike[(K, V), JavaPairRDD[K, V]] {
 
   override def wrapRDD(rdd: RDD[(K, V)]): JavaPairRDD[K, V] = JavaPairRDD.fromRDD(rdd)
 
-  override val classManifest: ClassManifest[(K, V)] =
-    implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[Tuple2[K, V]]]
+  override val classTag: ClassTag[(K, V)] = rdd.elementClassTag
 
   import JavaPairRDD._
 
@@ -58,7 +58,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif
   /** Persist this RDD with the default storage level (`MEMORY_ONLY`). */
   def cache(): JavaPairRDD[K, V] = new JavaPairRDD[K, V](rdd.cache())
 
-  /** 
+  /**
    * Set this RDD's storage level to persist its values across operations after the first time
    * it is computed. Can only be called once on each RDD.
    */
@@ -138,14 +138,14 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif
   override def first(): (K, V) = rdd.first()
 
   // Pair RDD functions
- 
+
   /**
-   * Generic function to combine the elements for each key using a custom set of aggregation 
-   * functions. Turns a JavaPairRDD[(K, V)] into a result of type JavaPairRDD[(K, C)], for a 
-   * "combined type" C * Note that V and C can be different -- for example, one might group an 
-   * RDD of type (Int, Int) into an RDD of type (Int, List[Int]). Users provide three 
+   * Generic function to combine the elements for each key using a custom set of aggregation
+   * functions. Turns a JavaPairRDD[(K, V)] into a result of type JavaPairRDD[(K, C)], for a
+   * "combined type" C * Note that V and C can be different -- for example, one might group an
+   * RDD of type (Int, Int) into an RDD of type (Int, List[Int]). Users provide three
    * functions:
-   * 
+   *
    * - `createCombiner`, which turns a V into a C (e.g., creates a one-element list)
    * - `mergeValue`, to merge a V into a C (e.g., adds it to the end of a list)
    * - `mergeCombiners`, to combine two C's into a single one.
@@ -157,8 +157,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif
     mergeValue: JFunction2[C, V, C],
     mergeCombiners: JFunction2[C, C, C],
     partitioner: Partitioner): JavaPairRDD[K, C] = {
-    implicit val cm: ClassManifest[C] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[C]]
+    implicit val cm: ClassTag[C] = implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[C]]
     fromRDD(rdd.combineByKey(
       createCombiner,
       mergeValue,
@@ -195,14 +194,14 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif
   /** Count the number of elements for each key, and return the result to the master as a Map. */
   def countByKey(): java.util.Map[K, Long] = mapAsJavaMap(rdd.countByKey())
 
-  /** 
+  /**
    * (Experimental) Approximate version of countByKey that can return a partial result if it does
    * not finish within a timeout.
    */
   def countByKeyApprox(timeout: Long): PartialResult[java.util.Map[K, BoundedDouble]] =
     rdd.countByKeyApprox(timeout).map(mapAsJavaMap)
 
-  /** 
+  /**
    * (Experimental) Approximate version of countByKey that can return a partial result if it does
    * not finish within a timeout.
    */
@@ -258,7 +257,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif
 
   /**
    * Return an RDD with the elements from `this` that are not in `other`.
-   * 
+   *
    * Uses `this` partitioner/partition size, because even if `other` is huge, the resulting
    * RDD will be <= us.
    */
@@ -315,15 +314,14 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif
     fromRDD(joinResult.mapValues{case (v, w) => (JavaUtils.optionToOptional(v), w)})
   }
 
-  /** 
+  /**
    * Simplified version of combineByKey that hash-partitions the resulting RDD using the existing
    * partitioner/parallelism level.
    */
   def combineByKey[C](createCombiner: JFunction[V, C],
     mergeValue: JFunction2[C, V, C],
     mergeCombiners: JFunction2[C, C, C]): JavaPairRDD[K, C] = {
-    implicit val cm: ClassManifest[C] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[C]]
+    implicit val cm: ClassTag[C] = implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[C]]
     fromRDD(combineByKey(createCombiner, mergeValue, mergeCombiners, defaultPartitioner(rdd)))
   }
 
@@ -414,8 +412,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif
    * this also retains the original RDD's partitioning.
    */
   def mapValues[U](f: JFunction[V, U]): JavaPairRDD[K, U] = {
-    implicit val cm: ClassManifest[U] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[U]]
+    implicit val cm: ClassTag[U] = implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[U]]
     fromRDD(rdd.mapValues(f))
   }
 
@@ -426,8 +423,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif
   def flatMapValues[U](f: JFunction[V, java.lang.Iterable[U]]): JavaPairRDD[K, U] = {
     import scala.collection.JavaConverters._
     def fn = (x: V) => f.apply(x).asScala
-    implicit val cm: ClassManifest[U] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[U]]
+    implicit val cm: ClassTag[U] = implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[U]]
     fromRDD(rdd.flatMapValues(fn))
   }
 
@@ -591,6 +587,20 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif
     fromRDD(new OrderedRDDFunctions[K, V, (K, V)](rdd).sortByKey(ascending))
   }
 
+  /**
+   * Sort the RDD by key, so that each partition contains a sorted range of the elements. Calling
+   * `collect` or `save` on the resulting RDD will return or output an ordered list of records
+   * (in the `save` case, they will be written to multiple `part-X` files in the filesystem, in
+   * order of the keys).
+   */
+  def sortByKey(comp: Comparator[K], ascending: Boolean, numPartitions: Int): JavaPairRDD[K, V] = {
+    class KeyOrdering(val a: K) extends Ordered[K] {
+      override def compare(b: K) = comp.compare(a, b)
+    }
+    implicit def toOrdered(x: K): Ordered[K] = new KeyOrdering(x)
+    fromRDD(new OrderedRDDFunctions[K, V, (K, V)](rdd).sortByKey(ascending, numPartitions))
+  }
+
   /**
    * Return an RDD with the keys of each tuple.
    */
@@ -600,25 +610,67 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif
    * Return an RDD with the values of each tuple.
    */
   def values(): JavaRDD[V] = JavaRDD.fromRDD[V](rdd.map(_._2))
+
+  /**
+   * Return approximate number of distinct values for each key in this RDD.
+   * The accuracy of approximation can be controlled through the relative standard deviation
+   * (relativeSD) parameter, which also controls the amount of memory used. Lower values result in
+   * more accurate counts but increase the memory footprint and vise versa. Uses the provided
+   * Partitioner to partition the output RDD.
+   */
+  def countApproxDistinctByKey(relativeSD: Double, partitioner: Partitioner): JavaRDD[(K, Long)] = {
+    rdd.countApproxDistinctByKey(relativeSD, partitioner)
+  }
+
+  /**
+   * Return approximate number of distinct values for each key this RDD.
+   * The accuracy of approximation can be controlled through the relative standard deviation
+   * (relativeSD) parameter, which also controls the amount of memory used. Lower values result in
+   * more accurate counts but increase the memory footprint and vise versa. The default value of
+   * relativeSD is 0.05. Hash-partitions the output RDD using the existing partitioner/parallelism
+   * level.
+   */
+  def countApproxDistinctByKey(relativeSD: Double = 0.05): JavaRDD[(K, Long)] = {
+    rdd.countApproxDistinctByKey(relativeSD)
+  }
+
+
+  /**
+   * Return approximate number of distinct values for each key in this RDD.
+   * The accuracy of approximation can be controlled through the relative standard deviation
+   * (relativeSD) parameter, which also controls the amount of memory used. Lower values result in
+   * more accurate counts but increase the memory footprint and vise versa. HashPartitions the
+   * output RDD into numPartitions.
+   *
+   */
+  def countApproxDistinctByKey(relativeSD: Double, numPartitions: Int): JavaRDD[(K, Long)] = {
+    rdd.countApproxDistinctByKey(relativeSD, numPartitions)
+  }
+
+  /** Assign a name to this RDD */
+  def setName(name: String): JavaPairRDD[K, V] = {
+    rdd.setName(name)
+    this
+  }
 }
 
 object JavaPairRDD {
-  def groupByResultToJava[K, T](rdd: RDD[(K, Seq[T])])(implicit kcm: ClassManifest[K],
-    vcm: ClassManifest[T]): RDD[(K, JList[T])] =
+  def groupByResultToJava[K, T](rdd: RDD[(K, Seq[T])])(implicit kcm: ClassTag[K],
+    vcm: ClassTag[T]): RDD[(K, JList[T])] =
     rddToPairRDDFunctions(rdd).mapValues(seqAsJavaList _)
 
-  def cogroupResultToJava[W, K, V](rdd: RDD[(K, (Seq[V], Seq[W]))])(implicit kcm: ClassManifest[K],
-    vcm: ClassManifest[V]): RDD[(K, (JList[V], JList[W]))] = rddToPairRDDFunctions(rdd).mapValues((x: (Seq[V],
-    Seq[W])) => (seqAsJavaList(x._1), seqAsJavaList(x._2)))
+  def cogroupResultToJava[W, K, V](rdd: RDD[(K, (Seq[V], Seq[W]))])(implicit kcm: ClassTag[K],
+    vcm: ClassTag[V]): RDD[(K, (JList[V], JList[W]))] = rddToPairRDDFunctions(rdd)
+    .mapValues((x: (Seq[V], Seq[W])) => (seqAsJavaList(x._1), seqAsJavaList(x._2)))
 
   def cogroupResult2ToJava[W1, W2, K, V](rdd: RDD[(K, (Seq[V], Seq[W1],
-    Seq[W2]))])(implicit kcm: ClassManifest[K]) : RDD[(K, (JList[V], JList[W1],
+    Seq[W2]))])(implicit kcm: ClassTag[K]) : RDD[(K, (JList[V], JList[W1],
     JList[W2]))] = rddToPairRDDFunctions(rdd).mapValues(
     (x: (Seq[V], Seq[W1], Seq[W2])) => (seqAsJavaList(x._1),
       seqAsJavaList(x._2),
       seqAsJavaList(x._3)))
 
-  def fromRDD[K: ClassManifest, V: ClassManifest](rdd: RDD[(K, V)]): JavaPairRDD[K, V] =
+  def fromRDD[K: ClassTag, V: ClassTag](rdd: RDD[(K, V)]): JavaPairRDD[K, V] =
     new JavaPairRDD[K, V](rdd)
 
   implicit def toRDD[K, V](rdd: JavaPairRDD[K, V]): RDD[(K, V)] = rdd.rdd
@@ -626,10 +678,8 @@ object JavaPairRDD {
 
   /** Convert a JavaRDD of key-value pairs to JavaPairRDD. */
   def fromJavaRDD[K, V](rdd: JavaRDD[(K, V)]): JavaPairRDD[K, V] = {
-    implicit val cmk: ClassManifest[K] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[K]]
-    implicit val cmv: ClassManifest[V] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[V]]
+    implicit val cmk: ClassTag[K] = implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[K]]
+    implicit val cmv: ClassTag[V] = implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[V]]
     new JavaPairRDD[K, V](rdd.rdd)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
index 3b359a8fd6094..7d48ce01cf2cc 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
@@ -17,12 +17,14 @@
 
 package org.apache.spark.api.java
 
+import scala.reflect.ClassTag
+
 import org.apache.spark._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.api.java.function.{Function => JFunction}
 import org.apache.spark.storage.StorageLevel
 
-class JavaRDD[T](val rdd: RDD[T])(implicit val classManifest: ClassManifest[T]) extends
+class JavaRDD[T](val rdd: RDD[T])(implicit val classTag: ClassTag[T]) extends
 JavaRDDLike[T, JavaRDD[T]] {
 
   override def wrapRDD(rdd: RDD[T]): JavaRDD[T] = JavaRDD.fromRDD(rdd)
@@ -123,12 +125,19 @@ JavaRDDLike[T, JavaRDD[T]] {
    */
   def subtract(other: JavaRDD[T], p: Partitioner): JavaRDD[T] =
     wrapRDD(rdd.subtract(other, p))
+
+  override def toString = rdd.toString
+
+  /** Assign a name to this RDD */
+  def setName(name: String): JavaRDD[T] = {
+    rdd.setName(name)
+    this
+  }
 }
 
 object JavaRDD {
 
-  implicit def fromRDD[T: ClassManifest](rdd: RDD[T]): JavaRDD[T] = new JavaRDD[T](rdd)
+  implicit def fromRDD[T: ClassTag](rdd: RDD[T]): JavaRDD[T] = new JavaRDD[T](rdd)
 
   implicit def toRDD[T](rdd: JavaRDD[T]): RDD[T] = rdd.rdd
 }
-
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
index 2283a8e23a40b..0818ee4dbc801 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
@@ -20,6 +20,7 @@ package org.apache.spark.api.java
 import java.util.{List => JList, Comparator}
 import scala.Tuple2
 import scala.collection.JavaConversions._
+import scala.reflect.ClassTag
 
 import com.google.common.base.Optional
 import org.apache.hadoop.io.compress.CompressionCodec
@@ -35,7 +36,7 @@ import org.apache.spark.storage.StorageLevel
 trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
   def wrapRDD(rdd: RDD[T]): This
 
-  implicit val classManifest: ClassManifest[T]
+  implicit val classTag: ClassTag[T]
 
   def rdd: RDD[T]
 
@@ -71,7 +72,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
    * Return a new RDD by applying a function to each partition of this RDD, while tracking the index
    * of the original partition.
    */
-  def mapPartitionsWithIndex[R: ClassManifest](
+  def mapPartitionsWithIndex[R: ClassTag](
       f: JFunction2[Int, java.util.Iterator[T], java.util.Iterator[R]],
       preservesPartitioning: Boolean = false): JavaRDD[R] =
     new JavaRDD(rdd.mapPartitionsWithIndex(((a,b) => f(a,asJavaIterator(b))),
@@ -87,7 +88,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
    * Return a new RDD by applying a function to all elements of this RDD.
    */
   def map[K2, V2](f: PairFunction[T, K2, V2]): JavaPairRDD[K2, V2] = {
-    def cm = implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[Tuple2[K2, V2]]]
+    def cm = implicitly[ClassTag[Tuple2[_, _]]].asInstanceOf[ClassTag[Tuple2[K2, V2]]]
     new JavaPairRDD(rdd.map(f)(cm))(f.keyType(), f.valueType())
   }
 
@@ -118,7 +119,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
   def flatMap[K2, V2](f: PairFlatMapFunction[T, K2, V2]): JavaPairRDD[K2, V2] = {
     import scala.collection.JavaConverters._
     def fn = (x: T) => f.apply(x).asScala
-    def cm = implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[Tuple2[K2, V2]]]
+    def cm = implicitly[ClassTag[Tuple2[_, _]]].asInstanceOf[ClassTag[Tuple2[K2, V2]]]
     JavaPairRDD.fromRDD(rdd.flatMap(fn)(cm))(f.keyType(), f.valueType())
   }
 
@@ -158,18 +159,16 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
    * elements (a, b) where a is in `this` and b is in `other`.
    */
   def cartesian[U](other: JavaRDDLike[U, _]): JavaPairRDD[T, U] =
-    JavaPairRDD.fromRDD(rdd.cartesian(other.rdd)(other.classManifest))(classManifest,
-      other.classManifest)
+    JavaPairRDD.fromRDD(rdd.cartesian(other.rdd)(other.classTag))(classTag, other.classTag)
 
   /**
    * Return an RDD of grouped elements. Each group consists of a key and a sequence of elements
    * mapping to that key.
    */
   def groupBy[K](f: JFunction[T, K]): JavaPairRDD[K, JList[T]] = {
-    implicit val kcm: ClassManifest[K] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[K]]
-    implicit val vcm: ClassManifest[JList[T]] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[JList[T]]]
+    implicit val kcm: ClassTag[K] = implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[K]]
+    implicit val vcm: ClassTag[JList[T]] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[JList[T]]]
     JavaPairRDD.fromRDD(groupByResultToJava(rdd.groupBy(f)(f.returnType)))(kcm, vcm)
   }
 
@@ -178,10 +177,9 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
    * mapping to that key.
    */
   def groupBy[K](f: JFunction[T, K], numPartitions: Int): JavaPairRDD[K, JList[T]] = {
-    implicit val kcm: ClassManifest[K] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[K]]
-    implicit val vcm: ClassManifest[JList[T]] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[JList[T]]]
+    implicit val kcm: ClassTag[K] = implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[K]]
+    implicit val vcm: ClassTag[JList[T]] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[JList[T]]]
     JavaPairRDD.fromRDD(groupByResultToJava(rdd.groupBy(f, numPartitions)(f.returnType)))(kcm, vcm)
   }
 
@@ -209,7 +207,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
    * a map on the other).
    */
   def zip[U](other: JavaRDDLike[U, _]): JavaPairRDD[T, U] = {
-    JavaPairRDD.fromRDD(rdd.zip(other.rdd)(other.classManifest))(classManifest, other.classManifest)
+    JavaPairRDD.fromRDD(rdd.zip(other.rdd)(other.classTag))(classTag, other.classTag)
   }
 
   /**
@@ -224,7 +222,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
     def fn = (x: Iterator[T], y: Iterator[U]) => asScalaIterator(
       f.apply(asJavaIterator(x), asJavaIterator(y)).iterator())
     JavaRDD.fromRDD(
-      rdd.zipPartitions(other.rdd)(fn)(other.classManifest, f.elementType()))(f.elementType())
+      rdd.zipPartitions(other.rdd)(fn)(other.classTag, f.elementType()))(f.elementType())
   }
 
   // Actions (launch a job to return a value to the user program)
@@ -246,6 +244,11 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
     new java.util.ArrayList(arr)
   }
 
+  /**
+   * Return an array that contains all of the elements in this RDD.
+   */
+  def toArray(): JList[T] = collect()
+
   /**
    * Return an array that contains all of the elements in a specific partition of this RDD.
    */
@@ -367,7 +370,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
    * Creates tuples of the elements in this RDD by applying `f`.
    */
   def keyBy[K](f: JFunction[T, K]): JavaPairRDD[K, T] = {
-    implicit val kcm: ClassManifest[K] = implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[K]]
+    implicit val kcm: ClassTag[K] = implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[K]]
     JavaPairRDD.fromRDD(rdd.keyBy(f))
   }
 
@@ -446,4 +449,16 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
     val comp = com.google.common.collect.Ordering.natural().asInstanceOf[Comparator[T]]
     takeOrdered(num, comp)
   }
+
+  /**
+   * Return approximate number of distinct elements in the RDD.
+   *
+   * The accuracy of approximation can be controlled through the relative standard deviation
+   * (relativeSD) parameter, which also controls the amount of memory used. Lower values result in
+   * more accurate counts but increase the memory footprint and vise versa. The default value of
+   * relativeSD is 0.05.
+   */
+  def countApproxDistinct(relativeSD: Double = 0.05): Long = rdd.countApproxDistinct(relativeSD)
+
+  def name(): String = rdd.name
 }
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
index 8869e072bf1ce..c777472cd7310 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
@@ -21,6 +21,7 @@ import java.util.{Map => JMap}
 
 import scala.collection.JavaConversions
 import scala.collection.JavaConversions._
+import scala.reflect.ClassTag
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.mapred.InputFormat
@@ -28,17 +29,22 @@ import org.apache.hadoop.mapred.JobConf
 import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat}
 import com.google.common.base.Optional
 
-import org.apache.spark.{Accumulable, AccumulableParam, Accumulator, AccumulatorParam, SparkContext}
+import org.apache.spark._
 import org.apache.spark.SparkContext.IntAccumulatorParam
 import org.apache.spark.SparkContext.DoubleAccumulatorParam
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.rdd.RDD
 
+
 /**
- * A Java-friendly version of [[org.apache.spark.SparkContext]] that returns [[org.apache.spark.api.java.JavaRDD]]s and
- * works with Java collections instead of Scala ones.
+ * A Java-friendly version of [[org.apache.spark.SparkContext]] that returns
+ * [[org.apache.spark.api.java.JavaRDD]]s and works with Java collections instead of Scala ones.
  */
 class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWorkaround {
+  /**
+   * @param conf a [[org.apache.spark.SparkConf]] object specifying Spark parameters
+   */
+  def this(conf: SparkConf) = this(new SparkContext(conf))
 
   /**
    * @param master Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]).
@@ -46,6 +52,14 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
    */
   def this(master: String, appName: String) = this(new SparkContext(master, appName))
 
+  /**
+   * @param master Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]).
+   * @param appName A name for your application, to display on the cluster web UI
+   * @param conf a [[org.apache.spark.SparkConf]] object specifying other Spark parameters
+   */
+  def this(master: String, appName: String, conf: SparkConf) =
+    this(conf.setMaster(master).setAppName(appName))
+
   /**
    * @param master Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]).
    * @param appName A name for your application, to display on the cluster web UI
@@ -82,8 +96,7 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
 
   /** Distribute a local Scala collection to form an RDD. */
   def parallelize[T](list: java.util.List[T], numSlices: Int): JavaRDD[T] = {
-    implicit val cm: ClassManifest[T] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[T]]
+    implicit val cm: ClassTag[T] = implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[T]]
     sc.parallelize(JavaConversions.asScalaBuffer(list), numSlices)
   }
 
@@ -94,10 +107,8 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
   /** Distribute a local Scala collection to form an RDD. */
   def parallelizePairs[K, V](list: java.util.List[Tuple2[K, V]], numSlices: Int)
   : JavaPairRDD[K, V] = {
-    implicit val kcm: ClassManifest[K] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[K]]
-    implicit val vcm: ClassManifest[V] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[V]]
+    implicit val kcm: ClassTag[K] = implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[K]]
+    implicit val vcm: ClassTag[V] = implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[V]]
     JavaPairRDD.fromRDD(sc.parallelize(JavaConversions.asScalaBuffer(list), numSlices))
   }
 
@@ -126,22 +137,34 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
    */
   def textFile(path: String, minSplits: Int): JavaRDD[String] = sc.textFile(path, minSplits)
 
-  /**Get an RDD for a Hadoop SequenceFile with given key and value types. */
+  /** Get an RDD for a Hadoop SequenceFile with given key and value types.
+    *
+    * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+    * record, directly caching the returned RDD will create many references to the same object.
+    * If you plan to directly cache Hadoop writable objects, you should first copy them using
+    * a `map` function.
+    * */
   def sequenceFile[K, V](path: String,
     keyClass: Class[K],
     valueClass: Class[V],
     minSplits: Int
     ): JavaPairRDD[K, V] = {
-    implicit val kcm = ClassManifest.fromClass(keyClass)
-    implicit val vcm = ClassManifest.fromClass(valueClass)
+    implicit val kcm: ClassTag[K] = ClassTag(keyClass)
+    implicit val vcm: ClassTag[V] = ClassTag(valueClass)
     new JavaPairRDD(sc.sequenceFile(path, keyClass, valueClass, minSplits))
   }
 
-  /**Get an RDD for a Hadoop SequenceFile. */
+  /** Get an RDD for a Hadoop SequenceFile.
+    *
+    * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+    * record, directly caching the returned RDD will create many references to the same object.
+    * If you plan to directly cache Hadoop writable objects, you should first copy them using
+    * a `map` function.
+    */
   def sequenceFile[K, V](path: String, keyClass: Class[K], valueClass: Class[V]):
   JavaPairRDD[K, V] = {
-    implicit val kcm = ClassManifest.fromClass(keyClass)
-    implicit val vcm = ClassManifest.fromClass(valueClass)
+    implicit val kcm: ClassTag[K] = ClassTag(keyClass)
+    implicit val vcm: ClassTag[V] = ClassTag(valueClass)
     new JavaPairRDD(sc.sequenceFile(path, keyClass, valueClass))
   }
 
@@ -153,8 +176,7 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
    * that there's very little effort required to save arbitrary objects.
    */
   def objectFile[T](path: String, minSplits: Int): JavaRDD[T] = {
-    implicit val cm: ClassManifest[T] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[T]]
+    implicit val cm: ClassTag[T] = implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[T]]
     sc.objectFile(path, minSplits)(cm)
   }
 
@@ -166,8 +188,7 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
    * that there's very little effort required to save arbitrary objects.
    */
   def objectFile[T](path: String): JavaRDD[T] = {
-    implicit val cm: ClassManifest[T] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[T]]
+    implicit val cm: ClassTag[T] = implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[T]]
     sc.objectFile(path)(cm)
   }
 
@@ -175,6 +196,11 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
    * Get an RDD for a Hadoop-readable dataset from a Hadooop JobConf giving its InputFormat and any
    * other necessary info (e.g. file name for a filesystem-based dataset, table name for HyperTable,
    * etc).
+   *
+   * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+   * record, directly caching the returned RDD will create many references to the same object.
+   * If you plan to directly cache Hadoop writable objects, you should first copy them using
+   * a `map` function.
    */
   def hadoopRDD[K, V, F <: InputFormat[K, V]](
     conf: JobConf,
@@ -183,8 +209,8 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
     valueClass: Class[V],
     minSplits: Int
     ): JavaPairRDD[K, V] = {
-    implicit val kcm = ClassManifest.fromClass(keyClass)
-    implicit val vcm = ClassManifest.fromClass(valueClass)
+    implicit val kcm: ClassTag[K] = ClassTag(keyClass)
+    implicit val vcm: ClassTag[V] = ClassTag(valueClass)
     new JavaPairRDD(sc.hadoopRDD(conf, inputFormatClass, keyClass, valueClass, minSplits))
   }
 
@@ -192,6 +218,11 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
    * Get an RDD for a Hadoop-readable dataset from a Hadooop JobConf giving its InputFormat and any
    * other necessary info (e.g. file name for a filesystem-based dataset, table name for HyperTable,
    * etc).
+   *
+   * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+   * record, directly caching the returned RDD will create many references to the same object.
+   * If you plan to directly cache Hadoop writable objects, you should first copy them using
+   * a `map` function.
    */
   def hadoopRDD[K, V, F <: InputFormat[K, V]](
     conf: JobConf,
@@ -199,12 +230,18 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
     keyClass: Class[K],
     valueClass: Class[V]
     ): JavaPairRDD[K, V] = {
-    implicit val kcm = ClassManifest.fromClass(keyClass)
-    implicit val vcm = ClassManifest.fromClass(valueClass)
+    implicit val kcm: ClassTag[K] = ClassTag(keyClass)
+    implicit val vcm: ClassTag[V] = ClassTag(valueClass)
     new JavaPairRDD(sc.hadoopRDD(conf, inputFormatClass, keyClass, valueClass))
   }
 
-  /** Get an RDD for a Hadoop file with an arbitrary InputFormat */
+  /** Get an RDD for a Hadoop file with an arbitrary InputFormat.
+    *
+    * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+    * record, directly caching the returned RDD will create many references to the same object.
+    * If you plan to directly cache Hadoop writable objects, you should first copy them using
+    * a `map` function.
+    */
   def hadoopFile[K, V, F <: InputFormat[K, V]](
     path: String,
     inputFormatClass: Class[F],
@@ -212,20 +249,26 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
     valueClass: Class[V],
     minSplits: Int
     ): JavaPairRDD[K, V] = {
-    implicit val kcm = ClassManifest.fromClass(keyClass)
-    implicit val vcm = ClassManifest.fromClass(valueClass)
+    implicit val kcm: ClassTag[K] = ClassTag(keyClass)
+    implicit val vcm: ClassTag[V] = ClassTag(valueClass)
     new JavaPairRDD(sc.hadoopFile(path, inputFormatClass, keyClass, valueClass, minSplits))
   }
 
-  /** Get an RDD for a Hadoop file with an arbitrary InputFormat */
+  /** Get an RDD for a Hadoop file with an arbitrary InputFormat
+    *
+    * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+    * record, directly caching the returned RDD will create many references to the same object.
+    * If you plan to directly cache Hadoop writable objects, you should first copy them using
+    * a `map` function.
+    */
   def hadoopFile[K, V, F <: InputFormat[K, V]](
     path: String,
     inputFormatClass: Class[F],
     keyClass: Class[K],
     valueClass: Class[V]
     ): JavaPairRDD[K, V] = {
-    implicit val kcm = ClassManifest.fromClass(keyClass)
-    implicit val vcm = ClassManifest.fromClass(valueClass)
+    implicit val kcm: ClassTag[K] = ClassTag(keyClass)
+    implicit val vcm: ClassTag[V] = ClassTag(valueClass)
     new JavaPairRDD(sc.hadoopFile(path,
       inputFormatClass, keyClass, valueClass))
   }
@@ -233,6 +276,11 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
   /**
    * Get an RDD for a given Hadoop file with an arbitrary new API InputFormat
    * and extra configuration options to pass to the input format.
+   *
+   * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+   * record, directly caching the returned RDD will create many references to the same object.
+   * If you plan to directly cache Hadoop writable objects, you should first copy them using
+   * a `map` function.
    */
   def newAPIHadoopFile[K, V, F <: NewInputFormat[K, V]](
     path: String,
@@ -240,29 +288,34 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
     kClass: Class[K],
     vClass: Class[V],
     conf: Configuration): JavaPairRDD[K, V] = {
-    implicit val kcm = ClassManifest.fromClass(kClass)
-    implicit val vcm = ClassManifest.fromClass(vClass)
+    implicit val kcm: ClassTag[K] = ClassTag(kClass)
+    implicit val vcm: ClassTag[V] = ClassTag(vClass)
     new JavaPairRDD(sc.newAPIHadoopFile(path, fClass, kClass, vClass, conf))
   }
 
   /**
    * Get an RDD for a given Hadoop file with an arbitrary new API InputFormat
    * and extra configuration options to pass to the input format.
+   *
+   * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+   * record, directly caching the returned RDD will create many references to the same object.
+   * If you plan to directly cache Hadoop writable objects, you should first copy them using
+   * a `map` function.
    */
   def newAPIHadoopRDD[K, V, F <: NewInputFormat[K, V]](
     conf: Configuration,
     fClass: Class[F],
     kClass: Class[K],
     vClass: Class[V]): JavaPairRDD[K, V] = {
-    implicit val kcm = ClassManifest.fromClass(kClass)
-    implicit val vcm = ClassManifest.fromClass(vClass)
+    implicit val kcm: ClassTag[K] = ClassTag(kClass)
+    implicit val vcm: ClassTag[V] = ClassTag(vClass)
     new JavaPairRDD(sc.newAPIHadoopRDD(conf, fClass, kClass, vClass))
   }
 
   /** Build the union of two or more RDDs. */
   override def union[T](first: JavaRDD[T], rest: java.util.List[JavaRDD[T]]): JavaRDD[T] = {
     val rdds: Seq[RDD[T]] = (Seq(first) ++ asScalaBuffer(rest)).map(_.rdd)
-    implicit val cm: ClassManifest[T] = first.classManifest
+    implicit val cm: ClassTag[T] = first.classTag
     sc.union(rdds)(cm)
   }
 
@@ -270,9 +323,9 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
   override def union[K, V](first: JavaPairRDD[K, V], rest: java.util.List[JavaPairRDD[K, V]])
       : JavaPairRDD[K, V] = {
     val rdds: Seq[RDD[(K, V)]] = (Seq(first) ++ asScalaBuffer(rest)).map(_.rdd)
-    implicit val cm: ClassManifest[(K, V)] = first.classManifest
-    implicit val kcm: ClassManifest[K] = first.kManifest
-    implicit val vcm: ClassManifest[V] = first.vManifest
+    implicit val cm: ClassTag[(K, V)] = first.classTag
+    implicit val kcm: ClassTag[K] = first.kClassTag
+    implicit val vcm: ClassTag[V] = first.vClassTag
     new JavaPairRDD(sc.union(rdds)(cm))(kcm, vcm)
   }
 
@@ -324,8 +377,9 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
     sc.accumulable(initialValue)(param)
 
   /**
-   * Broadcast a read-only variable to the cluster, returning a [[org.apache.spark.Broadcast]] object for
-   * reading it in distributed functions. The variable will be sent to each cluster only once.
+   * Broadcast a read-only variable to the cluster, returning a
+   * [[org.apache.spark.broadcast.Broadcast]] object for reading it in distributed functions.
+   * The variable will be sent to each cluster only once.
    */
   def broadcast[T](value: T): Broadcast[T] = sc.broadcast(value)
 
@@ -385,34 +439,101 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
 
   /**
    * Set the directory under which RDDs are going to be checkpointed. The directory must
-   * be a HDFS path if running on a cluster. If the directory does not exist, it will
-   * be created. If the directory exists and useExisting is set to true, then the
-   * exisiting directory will be used. Otherwise an exception will be thrown to
-   * prevent accidental overriding of checkpoint files in the existing directory.
-   */
-  def setCheckpointDir(dir: String, useExisting: Boolean) {
-    sc.setCheckpointDir(dir, useExisting)
-  }
-
-  /**
-   * Set the directory under which RDDs are going to be checkpointed. The directory must
-   * be a HDFS path if running on a cluster. If the directory does not exist, it will
-   * be created. If the directory exists, an exception will be thrown to prevent accidental
-   * overriding of checkpoint files.
+   * be a HDFS path if running on a cluster.
    */
   def setCheckpointDir(dir: String) {
     sc.setCheckpointDir(dir)
   }
 
+  def getCheckpointDir = JavaUtils.optionToOptional(sc.getCheckpointDir)
+
   protected def checkpointFile[T](path: String): JavaRDD[T] = {
-    implicit val cm: ClassManifest[T] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[T]]
+    implicit val cm: ClassTag[T] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[T]]
     new JavaRDD(sc.checkpointFile(path))
   }
+
+  /**
+   * Return a copy of this JavaSparkContext's configuration. The configuration ''cannot'' be
+   * changed at runtime.
+   */
+  def getConf: SparkConf = sc.getConf
+
+  /**
+   * Pass-through to SparkContext.setCallSite.  For API support only.
+   */
+  def setCallSite(site: String) {
+    sc.setCallSite(site)
+  }
+
+  /**
+   * Pass-through to SparkContext.setCallSite.  For API support only.
+   */
+  def clearCallSite() {
+    sc.clearCallSite()
+  }
+
+  /**
+   * Set a local property that affects jobs submitted from this thread, such as the
+   * Spark fair scheduler pool.
+   */
+  def setLocalProperty(key: String, value: String): Unit = sc.setLocalProperty(key, value)
+
+  /**
+   * Get a local property set in this thread, or null if it is missing. See
+   * [[org.apache.spark.api.java.JavaSparkContext.setLocalProperty]].
+   */
+  def getLocalProperty(key: String): String = sc.getLocalProperty(key)
+
+  /**
+   * Assigns a group ID to all the jobs started by this thread until the group ID is set to a
+   * different value or cleared.
+   *
+   * Often, a unit of execution in an application consists of multiple Spark actions or jobs.
+   * Application programmers can use this method to group all those jobs together and give a
+   * group description. Once set, the Spark web UI will associate such jobs with this group.
+   *
+   * The application can also use [[org.apache.spark.api.java.JavaSparkContext.cancelJobGroup]]
+   * to cancel all running jobs in this group. For example,
+   * {{{
+   * // In the main thread:
+   * sc.setJobGroup("some_job_to_cancel", "some job description");
+   * rdd.map(...).count();
+   *
+   * // In a separate thread:
+   * sc.cancelJobGroup("some_job_to_cancel");
+   * }}}
+   */
+  def setJobGroup(groupId: String, description: String): Unit = sc.setJobGroup(groupId, description)
+
+  /** Clear the current thread's job group ID and its description. */
+  def clearJobGroup(): Unit = sc.clearJobGroup()
+
+  /**
+   * Cancel active jobs for the specified group. See
+   * [[org.apache.spark.api.java.JavaSparkContext.setJobGroup]] for more information.
+   */
+  def cancelJobGroup(groupId: String): Unit = sc.cancelJobGroup(groupId)
+
+  /** Cancel all jobs that have been scheduled or are running. */
+  def cancelAllJobs(): Unit = sc.cancelAllJobs()
 }
 
 object JavaSparkContext {
   implicit def fromSparkContext(sc: SparkContext): JavaSparkContext = new JavaSparkContext(sc)
 
   implicit def toSparkContext(jsc: JavaSparkContext): SparkContext = jsc.sc
+
+  /**
+   * Find the JAR from which a given class was loaded, to make it easy for users to pass
+   * their JARs to SparkContext.
+   */
+  def jarOfClass(cls: Class[_]): Array[String] = SparkContext.jarOfClass(cls).toArray
+
+  /**
+   * Find the JAR that contains the class of a particular object, to make it easy for users
+   * to pass their JARs to SparkContext. In most cases you can call jarOfObject(this) in
+   * your driver program.
+   */
+  def jarOfObject(obj: AnyRef): Array[String] = SparkContext.jarOfObject(obj).toArray
 }
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContextVarargsWorkaround.java b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContextVarargsWorkaround.java
index c9cbce5624afc..2090efd3b9990 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContextVarargsWorkaround.java
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContextVarargsWorkaround.java
@@ -17,7 +17,6 @@
 
 package org.apache.spark.api.java;
 
-import java.util.Arrays;
 import java.util.ArrayList;
 import java.util.List;
 
diff --git a/core/src/main/scala/org/apache/spark/api/java/function/FlatMapFunction.scala b/core/src/main/scala/org/apache/spark/api/java/function/FlatMapFunction.scala
index 2dfda8b09a880..bdb01f7670356 100644
--- a/core/src/main/scala/org/apache/spark/api/java/function/FlatMapFunction.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/function/FlatMapFunction.scala
@@ -17,9 +17,11 @@
 
 package org.apache.spark.api.java.function
 
+import scala.reflect.ClassTag
+
 /**
  * A function that returns zero or more output records from each input record.
  */
 abstract class FlatMapFunction[T, R] extends Function[T, java.lang.Iterable[R]] {
-  def elementType() : ClassManifest[R] = ClassManifest.Any.asInstanceOf[ClassManifest[R]]
+  def elementType(): ClassTag[R] = ClassTag.Any.asInstanceOf[ClassTag[R]]
 }
diff --git a/core/src/main/scala/org/apache/spark/api/java/function/FlatMapFunction2.scala b/core/src/main/scala/org/apache/spark/api/java/function/FlatMapFunction2.scala
index 528e1c0a7cd64..aae1349c5e17c 100644
--- a/core/src/main/scala/org/apache/spark/api/java/function/FlatMapFunction2.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/function/FlatMapFunction2.scala
@@ -17,9 +17,11 @@
 
 package org.apache.spark.api.java.function
 
+import scala.reflect.ClassTag
+
 /**
  * A function that takes two inputs and returns zero or more output records.
  */
 abstract class FlatMapFunction2[A, B, C] extends Function2[A, B, java.lang.Iterable[C]] {
-  def elementType() : ClassManifest[C] = ClassManifest.Any.asInstanceOf[ClassManifest[C]]
+  def elementType() : ClassTag[C] = ClassTag.Any.asInstanceOf[ClassTag[C]]
 }
diff --git a/core/src/main/scala/org/apache/spark/api/java/function/Function.java b/core/src/main/scala/org/apache/spark/api/java/function/Function.java
index ce368ee01b295..537439ef53888 100644
--- a/core/src/main/scala/org/apache/spark/api/java/function/Function.java
+++ b/core/src/main/scala/org/apache/spark/api/java/function/Function.java
@@ -17,8 +17,8 @@
 
 package org.apache.spark.api.java.function;
 
-import scala.reflect.ClassManifest;
-import scala.reflect.ClassManifest$;
+import scala.reflect.ClassTag;
+import scala.reflect.ClassTag$;
 
 import java.io.Serializable;
 
@@ -29,8 +29,8 @@
  * when mapping RDDs of other types.
  */
 public abstract class Function<T, R> extends WrappedFunction1<T, R> implements Serializable {
-  public ClassManifest<R> returnType() {
-    return (ClassManifest<R>) ClassManifest$.MODULE$.fromClass(Object.class);
+  public ClassTag<R> returnType() {
+    return ClassTag$.MODULE$.apply(Object.class);
   }
 }
 
diff --git a/core/src/main/scala/org/apache/spark/api/java/function/Function2.java b/core/src/main/scala/org/apache/spark/api/java/function/Function2.java
index 44ad559d48474..a2d1214fb4692 100644
--- a/core/src/main/scala/org/apache/spark/api/java/function/Function2.java
+++ b/core/src/main/scala/org/apache/spark/api/java/function/Function2.java
@@ -17,8 +17,8 @@
 
 package org.apache.spark.api.java.function;
 
-import scala.reflect.ClassManifest;
-import scala.reflect.ClassManifest$;
+import scala.reflect.ClassTag;
+import scala.reflect.ClassTag$;
 
 import java.io.Serializable;
 
@@ -28,8 +28,8 @@
 public abstract class Function2<T1, T2, R> extends WrappedFunction2<T1, T2, R>
   implements Serializable {
 
-  public ClassManifest<R> returnType() {
-    return (ClassManifest<R>) ClassManifest$.MODULE$.fromClass(Object.class);
+  public ClassTag<R> returnType() {
+    return (ClassTag<R>) ClassTag$.MODULE$.apply(Object.class);
   }
 }
 
diff --git a/core/src/main/scala/org/apache/spark/api/java/function/Function3.java b/core/src/main/scala/org/apache/spark/api/java/function/Function3.java
index 51c2f7e0afd55..fb1deceab5b56 100644
--- a/core/src/main/scala/org/apache/spark/api/java/function/Function3.java
+++ b/core/src/main/scala/org/apache/spark/api/java/function/Function3.java
@@ -17,20 +17,20 @@
 
 package org.apache.spark.api.java.function;
 
-import scala.reflect.ClassManifest;
-import scala.reflect.ClassManifest$;
+import scala.reflect.ClassTag;
+import scala.reflect.ClassTag$;
+import scala.runtime.AbstractFunction2;
 
 import java.io.Serializable;
 
 /**
- * A three-argument function that takes arguments of type T1, T2, and T3 and returns an R.
+ * A three-argument function that takes arguments of type T1, T2 and T3 and returns an R.
  */
 public abstract class Function3<T1, T2, T3, R> extends WrappedFunction3<T1, T2, T3, R>
-  implements Serializable {
+        implements Serializable {
 
-  public abstract R call(T1 t1, T2 t2, T3 t3) throws Exception;
-
-  public ClassManifest<R> returnType() {
-    return (ClassManifest<R>) ClassManifest$.MODULE$.fromClass(Object.class);
-  }
+    public ClassTag<R> returnType() {
+        return (ClassTag<R>) ClassTag$.MODULE$.apply(Object.class);
+    }
 }
+
diff --git a/core/src/main/scala/org/apache/spark/api/java/function/PairFlatMapFunction.java b/core/src/main/scala/org/apache/spark/api/java/function/PairFlatMapFunction.java
index 6d76a8f970f7c..ca485b3cc2d93 100644
--- a/core/src/main/scala/org/apache/spark/api/java/function/PairFlatMapFunction.java
+++ b/core/src/main/scala/org/apache/spark/api/java/function/PairFlatMapFunction.java
@@ -18,8 +18,8 @@
 package org.apache.spark.api.java.function;
 
 import scala.Tuple2;
-import scala.reflect.ClassManifest;
-import scala.reflect.ClassManifest$;
+import scala.reflect.ClassTag;
+import scala.reflect.ClassTag$;
 
 import java.io.Serializable;
 
@@ -33,11 +33,11 @@ public abstract class PairFlatMapFunction<T, K, V>
   extends WrappedFunction1<T, Iterable<Tuple2<K, V>>>
   implements Serializable {
 
-  public ClassManifest<K> keyType() {
-    return (ClassManifest<K>) ClassManifest$.MODULE$.fromClass(Object.class);
+  public ClassTag<K> keyType() {
+    return (ClassTag<K>) ClassTag$.MODULE$.apply(Object.class);
   }
 
-  public ClassManifest<V> valueType() {
-    return (ClassManifest<V>) ClassManifest$.MODULE$.fromClass(Object.class);
+  public ClassTag<V> valueType() {
+    return (ClassTag<V>) ClassTag$.MODULE$.apply(Object.class);
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/api/java/function/PairFunction.java b/core/src/main/scala/org/apache/spark/api/java/function/PairFunction.java
index ede7ceefb5d29..cbe2306026a61 100644
--- a/core/src/main/scala/org/apache/spark/api/java/function/PairFunction.java
+++ b/core/src/main/scala/org/apache/spark/api/java/function/PairFunction.java
@@ -18,8 +18,8 @@
 package org.apache.spark.api.java.function;
 
 import scala.Tuple2;
-import scala.reflect.ClassManifest;
-import scala.reflect.ClassManifest$;
+import scala.reflect.ClassTag;
+import scala.reflect.ClassTag$;
 
 import java.io.Serializable;
 
@@ -31,11 +31,11 @@
 public abstract class PairFunction<T, K, V> extends WrappedFunction1<T, Tuple2<K, V>>
   implements Serializable {
 
-  public ClassManifest<K> keyType() {
-    return (ClassManifest<K>) ClassManifest$.MODULE$.fromClass(Object.class);
+  public ClassTag<K> keyType() {
+    return (ClassTag<K>) ClassTag$.MODULE$.apply(Object.class);
   }
 
-  public ClassManifest<V> valueType() {
-    return (ClassManifest<V>) ClassManifest$.MODULE$.fromClass(Object.class);
+  public ClassTag<V> valueType() {
+    return (ClassTag<V>) ClassTag$.MODULE$.apply(Object.class);
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/api/java/function/WrappedFunction3.scala b/core/src/main/scala/org/apache/spark/api/java/function/WrappedFunction3.scala
index 129da30bb1df6..d314dbdf1d980 100644
--- a/core/src/main/scala/org/apache/spark/api/java/function/WrappedFunction3.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/function/WrappedFunction3.scala
@@ -31,3 +31,4 @@ private[spark] abstract class WrappedFunction3[T1, T2, T3, R]
 
   final def apply(t1: T1, t2: T2, t3: T3): R = call(t1, t2, t3)
 }
+
diff --git a/core/src/main/scala/org/apache/spark/api/java/package.scala b/core/src/main/scala/org/apache/spark/api/java/package.scala
new file mode 100644
index 0000000000000..8ec770046abe9
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/api/java/package.scala
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.api
+
+/** Spark Java programming APIs. */
+package object java {
+  // For package docs only
+}
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
index 12b4d94a567ce..9cbd26b607b24 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
@@ -22,18 +22,17 @@ import java.net._
 import java.util.{List => JList, ArrayList => JArrayList, Map => JMap, Collections}
 
 import scala.collection.JavaConversions._
+import scala.reflect.ClassTag
 
 import org.apache.spark.api.java.{JavaSparkContext, JavaPairRDD, JavaRDD}
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark._
 import org.apache.spark.rdd.RDD
-import org.apache.spark.rdd.PipedRDD
 import org.apache.spark.util.Utils
 
-
-private[spark] class PythonRDD[T: ClassManifest](
+private[spark] class PythonRDD[T: ClassTag](
     parent: RDD[T],
-    command: Seq[String],
+    command: Array[Byte],
     envVars: JMap[String, String],
     pythonIncludes: JList[String],
     preservePartitoning: Boolean,
@@ -42,28 +41,19 @@ private[spark] class PythonRDD[T: ClassManifest](
     accumulator: Accumulator[JList[Array[Byte]]])
   extends RDD[Array[Byte]](parent) {
 
-  val bufferSize = System.getProperty("spark.buffer.size", "65536").toInt
-
-  // Similar to Runtime.exec(), if we are given a single string, split it into words
-  // using a standard StringTokenizer (i.e. by spaces)
-  def this(parent: RDD[T], command: String, envVars: JMap[String, String],
-      pythonIncludes: JList[String],
-      preservePartitoning: Boolean, pythonExec: String,
-      broadcastVars: JList[Broadcast[Array[Byte]]],
-      accumulator: Accumulator[JList[Array[Byte]]]) =
-    this(parent, PipedRDD.tokenize(command), envVars, pythonIncludes, preservePartitoning, pythonExec,
-      broadcastVars, accumulator)
+  val bufferSize = conf.getInt("spark.buffer.size", 65536)
 
   override def getPartitions = parent.partitions
 
   override val partitioner = if (preservePartitoning) parent.partitioner else None
 
-
   override def compute(split: Partition, context: TaskContext): Iterator[Array[Byte]] = {
     val startTime = System.currentTimeMillis
     val env = SparkEnv.get
     val worker = env.createPythonWorker(pythonExec, envVars.toMap)
 
+    @volatile var readerException: Exception = null
+
     // Start a thread to feed the process input from our parent's iterator
     new Thread("stdin writer for " + pythonExec) {
       override def run() {
@@ -71,11 +61,10 @@ private[spark] class PythonRDD[T: ClassManifest](
           SparkEnv.set(env)
           val stream = new BufferedOutputStream(worker.getOutputStream, bufferSize)
           val dataOut = new DataOutputStream(stream)
-          val printOut = new PrintWriter(stream)
           // Partition index
           dataOut.writeInt(split.index)
           // sparkFilesDir
-          PythonRDD.writeAsPickle(SparkFiles.getRootDirectory, dataOut)
+          PythonRDD.writeUTF(SparkFiles.getRootDirectory, dataOut)
           // Broadcast variables
           dataOut.writeInt(broadcastVars.length)
           for (broadcast <- broadcastVars) {
@@ -85,23 +74,22 @@ private[spark] class PythonRDD[T: ClassManifest](
           }
           // Python includes (*.zip and *.egg files)
           dataOut.writeInt(pythonIncludes.length)
-          for (f <- pythonIncludes) {
-            PythonRDD.writeAsPickle(f, dataOut)
+          for (include <- pythonIncludes) {
+            PythonRDD.writeUTF(include, dataOut)
           }
           dataOut.flush()
-          // Serialized user code
-          for (elem <- command) {
-            printOut.println(elem)
-          }
-          printOut.flush()
+          // Serialized command:
+          dataOut.writeInt(command.length)
+          dataOut.write(command)
           // Data values
-          for (elem <- parent.iterator(split, context)) {
-            PythonRDD.writeAsPickle(elem, dataOut)
-          }
+          PythonRDD.writeIteratorToStream(parent.iterator(split, context), dataOut)
           dataOut.flush()
-          printOut.flush()
           worker.shutdownOutput()
         } catch {
+          case e: java.io.FileNotFoundException =>
+            readerException = e
+            // Kill the Python worker process:
+            worker.shutdownOutput()
           case e: IOException =>
             // This can happen for legitimate reasons if the Python code stops returning data before we are done
             // passing elements through, e.g., for take(). Just log a message to say it happened.
@@ -113,7 +101,7 @@ private[spark] class PythonRDD[T: ClassManifest](
 
     // Return an iterator that read lines from the process's stdout
     val stream = new DataInputStream(new BufferedInputStream(worker.getInputStream, bufferSize))
-    return new Iterator[Array[Byte]] {
+    val stdoutIterator = new Iterator[Array[Byte]] {
       def next(): Array[Byte] = {
         val obj = _nextObj
         if (hasNext) {
@@ -126,13 +114,16 @@ private[spark] class PythonRDD[T: ClassManifest](
       }
 
       private def read(): Array[Byte] = {
+        if (readerException != null) {
+          throw readerException
+        }
         try {
           stream.readInt() match {
             case length if length > 0 =>
               val obj = new Array[Byte](length)
               stream.readFully(obj)
               obj
-            case -3 =>
+            case SpecialLengths.TIMING_DATA =>
               // Timing data from worker
               val bootTime = stream.readLong()
               val initTime = stream.readLong()
@@ -143,30 +134,30 @@ private[spark] class PythonRDD[T: ClassManifest](
               val total = finishTime - startTime
               logInfo("Times: total = %s, boot = %s, init = %s, finish = %s".format(total, boot, init, finish))
               read
-            case -2 =>
+            case SpecialLengths.PYTHON_EXCEPTION_THROWN =>
               // Signals that an exception has been thrown in python
               val exLength = stream.readInt()
               val obj = new Array[Byte](exLength)
               stream.readFully(obj)
               throw new PythonException(new String(obj))
-            case -1 =>
+            case SpecialLengths.END_OF_DATA_SECTION =>
               // We've finished the data section of the output, but we can still
-              // read some accumulator updates; let's do that, breaking when we
-              // get a negative length record.
-              var len2 = stream.readInt()
-              while (len2 >= 0) {
-                val update = new Array[Byte](len2)
+              // read some accumulator updates:
+              val numAccumulatorUpdates = stream.readInt()
+              (1 to numAccumulatorUpdates).foreach { _ =>
+                val updateLen = stream.readInt()
+                val update = new Array[Byte](updateLen)
                 stream.readFully(update)
                 accumulator += Collections.singletonList(update)
-                len2 = stream.readInt()
+
               }
-              new Array[Byte](0)
+              Array.empty[Byte]
           }
         } catch {
           case eof: EOFException => {
             throw new SparkException("Python worker exited unexpectedly (crashed)", eof)
           }
-          case e => throw e
+          case e: Throwable => throw e
         }
       }
 
@@ -174,6 +165,7 @@ private[spark] class PythonRDD[T: ClassManifest](
 
       def hasNext = _nextObj.length != 0
     }
+    stdoutIterator
   }
 
   val asJavaRDD : JavaRDD[Array[Byte]] = JavaRDD.fromRDD(this)
@@ -197,62 +189,15 @@ private class PairwiseRDD(prev: RDD[Array[Byte]]) extends
   val asJavaPairRDD : JavaPairRDD[Long, Array[Byte]] = JavaPairRDD.fromRDD(this)
 }
 
-private[spark] object PythonRDD {
-
-  /** Strips the pickle PROTO and STOP opcodes from the start and end of a pickle */
-  def stripPickle(arr: Array[Byte]) : Array[Byte] = {
-    arr.slice(2, arr.length - 1)
-  }
+private object SpecialLengths {
+  val END_OF_DATA_SECTION = -1
+  val PYTHON_EXCEPTION_THROWN = -2
+  val TIMING_DATA = -3
+}
 
-  /**
-   * Write strings, pickled Python objects, or pairs of pickled objects to a data output stream.
-   * The data format is a 32-bit integer representing the pickled object's length (in bytes),
-   * followed by the pickled data.
-   *
-   * Pickle module:
-   *
-   *    http://docs.python.org/2/library/pickle.html
-   *
-   * The pickle protocol is documented in the source of the `pickle` and `pickletools` modules:
-   *
-   *    http://hg.python.org/cpython/file/2.6/Lib/pickle.py
-   *    http://hg.python.org/cpython/file/2.6/Lib/pickletools.py
-   *
-   * @param elem the object to write
-   * @param dOut a data output stream
-   */
-  def writeAsPickle(elem: Any, dOut: DataOutputStream) {
-    if (elem.isInstanceOf[Array[Byte]]) {
-      val arr = elem.asInstanceOf[Array[Byte]]
-      dOut.writeInt(arr.length)
-      dOut.write(arr)
-    } else if (elem.isInstanceOf[scala.Tuple2[Array[Byte], Array[Byte]]]) {
-      val t = elem.asInstanceOf[scala.Tuple2[Array[Byte], Array[Byte]]]
-      val length = t._1.length + t._2.length - 3 - 3 + 4  // stripPickle() removes 3 bytes
-      dOut.writeInt(length)
-      dOut.writeByte(Pickle.PROTO)
-      dOut.writeByte(Pickle.TWO)
-      dOut.write(PythonRDD.stripPickle(t._1))
-      dOut.write(PythonRDD.stripPickle(t._2))
-      dOut.writeByte(Pickle.TUPLE2)
-      dOut.writeByte(Pickle.STOP)
-    } else if (elem.isInstanceOf[String]) {
-      // For uniformity, strings are wrapped into Pickles.
-      val s = elem.asInstanceOf[String].getBytes("UTF-8")
-      val length = 2 + 1 + 4 + s.length + 1
-      dOut.writeInt(length)
-      dOut.writeByte(Pickle.PROTO)
-      dOut.writeByte(Pickle.TWO)
-      dOut.write(Pickle.BINUNICODE)
-      dOut.writeInt(Integer.reverseBytes(s.length))
-      dOut.write(s)
-      dOut.writeByte(Pickle.STOP)
-    } else {
-      throw new SparkException("Unexpected RDD type")
-    }
-  }
+private[spark] object PythonRDD {
 
-  def readRDDFromPickleFile(sc: JavaSparkContext, filename: String, parallelism: Int) :
+  def readRDDFromFile(sc: JavaSparkContext, filename: String, parallelism: Int):
   JavaRDD[Array[Byte]] = {
     val file = new DataInputStream(new FileInputStream(filename))
     val objs = new collection.mutable.ArrayBuffer[Array[Byte]]
@@ -265,39 +210,68 @@ private[spark] object PythonRDD {
       }
     } catch {
       case eof: EOFException => {}
-      case e => throw e
+      case e: Throwable => throw e
     }
     JavaRDD.fromRDD(sc.sc.parallelize(objs, parallelism))
   }
 
-  def writeIteratorToPickleFile[T](items: java.util.Iterator[T], filename: String) {
+  def writeIteratorToStream[T](iter: Iterator[T], dataOut: DataOutputStream) {
+    // The right way to implement this would be to use TypeTags to get the full
+    // type of T.  Since I don't want to introduce breaking changes throughout the
+    // entire Spark API, I have to use this hacky approach:
+    if (iter.hasNext) {
+      val first = iter.next()
+      val newIter = Seq(first).iterator ++ iter
+      first match {
+        case arr: Array[Byte] =>
+          newIter.asInstanceOf[Iterator[Array[Byte]]].foreach { bytes =>
+            dataOut.writeInt(bytes.length)
+            dataOut.write(bytes)
+          }
+        case string: String =>
+          newIter.asInstanceOf[Iterator[String]].foreach { str =>
+            writeUTF(str, dataOut)
+          }
+        case pair: Tuple2[_, _] =>
+          pair._1 match {
+            case bytePair: Array[Byte] =>
+              newIter.asInstanceOf[Iterator[Tuple2[Array[Byte], Array[Byte]]]].foreach { pair =>
+                dataOut.writeInt(pair._1.length)
+                dataOut.write(pair._1)
+                dataOut.writeInt(pair._2.length)
+                dataOut.write(pair._2)
+              }
+            case stringPair: String =>
+              newIter.asInstanceOf[Iterator[Tuple2[String, String]]].foreach { pair =>
+                writeUTF(pair._1, dataOut)
+                writeUTF(pair._2, dataOut)
+              }
+            case other =>
+              throw new SparkException("Unexpected Tuple2 element type " + pair._1.getClass)
+          }
+        case other =>
+          throw new SparkException("Unexpected element type " + first.getClass)
+      }
+    }
+  }
+
+  def writeUTF(str: String, dataOut: DataOutputStream) {
+    val bytes = str.getBytes("UTF-8")
+    dataOut.writeInt(bytes.length)
+    dataOut.write(bytes)
+  }
+
+  def writeToFile[T](items: java.util.Iterator[T], filename: String) {
     import scala.collection.JavaConverters._
-    writeIteratorToPickleFile(items.asScala, filename)
+    writeToFile(items.asScala, filename)
   }
 
-  def writeIteratorToPickleFile[T](items: Iterator[T], filename: String) {
+  def writeToFile[T](items: Iterator[T], filename: String) {
     val file = new DataOutputStream(new FileOutputStream(filename))
-    for (item <- items) {
-      writeAsPickle(item, file)
-    }
+    writeIteratorToStream(items, file)
     file.close()
   }
 
-  def takePartition[T](rdd: RDD[T], partition: Int): Iterator[T] = {
-    implicit val cm : ClassManifest[T] = rdd.elementClassManifest
-    rdd.context.runJob(rdd, ((x: Iterator[T]) => x.toArray), Seq(partition), true).head.iterator
-  }
-}
-
-private object Pickle {
-  val PROTO: Byte = 0x80.toByte
-  val TWO: Byte = 0x02.toByte
-  val BINUNICODE: Byte = 'X'
-  val STOP: Byte = '.'
-  val TUPLE2: Byte = 0x86.toByte
-  val EMPTY_LIST: Byte = ']'
-  val MARK: Byte = '('
-  val APPENDS: Byte = 'e'
 }
 
 private class BytesToString extends org.apache.spark.api.java.function.Function[Array[Byte], String] {
@@ -313,7 +287,7 @@ private class PythonAccumulatorParam(@transient serverHost: String, serverPort:
 
   Utils.checkHost(serverHost, "Expected hostname")
 
-  val bufferSize = System.getProperty("spark.buffer.size", "65536").toInt
+  val bufferSize = SparkEnv.get.conf.getInt("spark.buffer.size", 65536)
 
   override def zero(value: JList[Array[Byte]]): JList[Array[Byte]] = new JArrayList
 
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
index 67d45723badd8..f291266fcf17c 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
@@ -64,7 +64,7 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
           startDaemon()
           new Socket(daemonHost, daemonPort)
         }
-        case e => throw e
+        case e: Throwable => throw e
       }
     }
   }
@@ -198,7 +198,7 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
           }
         }.start()
       } catch {
-        case e => {
+        case e: Throwable => {
           stopDaemon()
           throw e
         }
diff --git a/core/src/main/scala/org/apache/spark/broadcast/BitTorrentBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/BitTorrentBroadcast.scala
deleted file mode 100644
index 82ce2826d2de6..0000000000000
--- a/core/src/main/scala/org/apache/spark/broadcast/BitTorrentBroadcast.scala
+++ /dev/null
@@ -1,1063 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.broadcast
-
-import java.io._
-import java.net._
-import java.util.{BitSet, Comparator, Timer, TimerTask, UUID}
-import java.util.concurrent.atomic.AtomicInteger
-
-import scala.collection.mutable.{ListBuffer, Map, Set}
-import scala.math
-
-import org.apache.spark._
-import org.apache.spark.storage.{BroadcastBlockId, StorageLevel}
-import org.apache.spark.util.Utils
-
-@deprecated("Use TorrentBroadcast", "0.8.1")
-private[spark] class BitTorrentBroadcast[T](@transient var value_ : T, isLocal: Boolean, id: Long)
-  extends Broadcast[T](id)
-  with Logging
-  with Serializable {
-
-  logWarning("BitTorrentBroadcast is deprecated. Use TorrentBroadcast.")
-
-  def value = value_
-
-  def blockId = BroadcastBlockId(id)
-
-  MultiTracker.synchronized {
-    SparkEnv.get.blockManager.putSingle(blockId, value_, StorageLevel.MEMORY_AND_DISK, false)
-  }
-
-  @transient var arrayOfBlocks: Array[BroadcastBlock] = null
-  @transient var hasBlocksBitVector: BitSet = null
-  @transient var numCopiesSent: Array[Int] = null
-  @transient var totalBytes = -1
-  @transient var totalBlocks = -1
-  @transient var hasBlocks = new AtomicInteger(0)
-
-  // Used ONLY by driver to track how many unique blocks have been sent out
-  @transient var sentBlocks = new AtomicInteger(0)
-
-  @transient var listenPortLock = new Object
-  @transient var guidePortLock = new Object
-  @transient var totalBlocksLock = new Object
-
-  @transient var listOfSources = ListBuffer[SourceInfo]()
-
-  @transient var serveMR: ServeMultipleRequests = null
-
-  // Used only in driver
-  @transient var guideMR: GuideMultipleRequests = null
-
-  // Used only in Workers
-  @transient var ttGuide: TalkToGuide = null
-
-  @transient var hostAddress = Utils.localIpAddress
-  @transient var listenPort = -1
-  @transient var guidePort = -1
-
-  @transient var stopBroadcast = false
-
-  // Must call this after all the variables have been created/initialized
-  if (!isLocal) {
-    sendBroadcast()
-  }
-
-  def sendBroadcast() {
-    logInfo("Local host address: " + hostAddress)
-
-    // Create a variableInfo object and store it in valueInfos
-    var variableInfo = MultiTracker.blockifyObject(value_)
-
-    // Prepare the value being broadcasted
-    arrayOfBlocks = variableInfo.arrayOfBlocks
-    totalBytes = variableInfo.totalBytes
-    totalBlocks = variableInfo.totalBlocks
-    hasBlocks.set(variableInfo.totalBlocks)
-
-    // Guide has all the blocks
-    hasBlocksBitVector = new BitSet(totalBlocks)
-    hasBlocksBitVector.set(0, totalBlocks)
-
-    // Guide still hasn't sent any block
-    numCopiesSent = new Array[Int](totalBlocks)
-
-    guideMR = new GuideMultipleRequests
-    guideMR.setDaemon(true)
-    guideMR.start()
-    logInfo("GuideMultipleRequests started...")
-
-    // Must always come AFTER guideMR is created
-    while (guidePort == -1) {
-      guidePortLock.synchronized { guidePortLock.wait() }
-    }
-
-    serveMR = new ServeMultipleRequests
-    serveMR.setDaemon(true)
-    serveMR.start()
-    logInfo("ServeMultipleRequests started...")
-
-    // Must always come AFTER serveMR is created
-    while (listenPort == -1) {
-      listenPortLock.synchronized { listenPortLock.wait() }
-    }
-
-    // Must always come AFTER listenPort is created
-    val driverSource =
-      SourceInfo(hostAddress, listenPort, totalBlocks, totalBytes)
-    hasBlocksBitVector.synchronized {
-      driverSource.hasBlocksBitVector = hasBlocksBitVector
-    }
-
-    // In the beginning, this is the only known source to Guide
-    listOfSources += driverSource
-
-    // Register with the Tracker
-    MultiTracker.registerBroadcast(id,
-      SourceInfo(hostAddress, guidePort, totalBlocks, totalBytes))
-  }
-
-  private def readObject(in: ObjectInputStream) {
-    in.defaultReadObject()
-    MultiTracker.synchronized {
-      SparkEnv.get.blockManager.getSingle(blockId) match {
-        case Some(x) =>
-          value_ = x.asInstanceOf[T]
-
-        case None =>
-          logInfo("Started reading broadcast variable " + id)
-          // Initializing everything because driver will only send null/0 values
-          // Only the 1st worker in a node can be here. Others will get from cache
-          initializeWorkerVariables()
-
-          logInfo("Local host address: " + hostAddress)
-
-          // Start local ServeMultipleRequests thread first
-          serveMR = new ServeMultipleRequests
-          serveMR.setDaemon(true)
-          serveMR.start()
-          logInfo("ServeMultipleRequests started...")
-
-          val start = System.nanoTime
-
-          val receptionSucceeded = receiveBroadcast(id)
-          if (receptionSucceeded) {
-            value_ = MultiTracker.unBlockifyObject[T](arrayOfBlocks, totalBytes, totalBlocks)
-            SparkEnv.get.blockManager.putSingle(
-              blockId, value_, StorageLevel.MEMORY_AND_DISK, false)
-          }  else {
-            logError("Reading broadcast variable " + id + " failed")
-          }
-
-          val time = (System.nanoTime - start) / 1e9
-          logInfo("Reading broadcast variable " + id + " took " + time + " s")
-      }
-    }
-  }
-
-  // Initialize variables in the worker node. Driver sends everything as 0/null
-  private def initializeWorkerVariables() {
-    arrayOfBlocks = null
-    hasBlocksBitVector = null
-    numCopiesSent = null
-    totalBytes = -1
-    totalBlocks = -1
-    hasBlocks = new AtomicInteger(0)
-
-    listenPortLock = new Object
-    totalBlocksLock = new Object
-
-    serveMR = null
-    ttGuide = null
-
-    hostAddress = Utils.localIpAddress
-    listenPort = -1
-
-    listOfSources = ListBuffer[SourceInfo]()
-
-    stopBroadcast = false
-  }
-
-  private def getLocalSourceInfo: SourceInfo = {
-    // Wait till hostName and listenPort are OK
-    while (listenPort == -1) {
-      listenPortLock.synchronized { listenPortLock.wait() }
-    }
-
-    // Wait till totalBlocks and totalBytes are OK
-    while (totalBlocks == -1) {
-      totalBlocksLock.synchronized { totalBlocksLock.wait() }
-    }
-
-    var localSourceInfo = SourceInfo(
-      hostAddress, listenPort, totalBlocks, totalBytes)
-
-    localSourceInfo.hasBlocks = hasBlocks.get
-
-    hasBlocksBitVector.synchronized {
-      localSourceInfo.hasBlocksBitVector = hasBlocksBitVector
-    }
-
-    return localSourceInfo
-  }
-
-  // Add new SourceInfo to the listOfSources. Update if it exists already.
-  // Optimizing just by OR-ing the BitVectors was BAD for performance
-  private def addToListOfSources(newSourceInfo: SourceInfo) {
-    listOfSources.synchronized {
-      if (listOfSources.contains(newSourceInfo)) {
-        listOfSources = listOfSources - newSourceInfo
-      }
-      listOfSources += newSourceInfo
-    }
-  }
-
-  private def addToListOfSources(newSourceInfos: ListBuffer[SourceInfo]) {
-    newSourceInfos.foreach { newSourceInfo =>
-      addToListOfSources(newSourceInfo)
-    }
-  }
-
-  class TalkToGuide(gInfo: SourceInfo)
-  extends Thread with Logging {
-    override def run() {
-
-      // Keep exchaning information until all blocks have been received
-      while (hasBlocks.get < totalBlocks) {
-        talkOnce
-        Thread.sleep(MultiTracker.ranGen.nextInt(
-          MultiTracker.MaxKnockInterval - MultiTracker.MinKnockInterval) +
-          MultiTracker.MinKnockInterval)
-      }
-
-      // Talk one more time to let the Guide know of reception completion
-      talkOnce
-    }
-
-    // Connect to Guide and send this worker's information
-    private def talkOnce {
-      var clientSocketToGuide: Socket = null
-      var oosGuide: ObjectOutputStream = null
-      var oisGuide: ObjectInputStream = null
-
-      clientSocketToGuide = new Socket(gInfo.hostAddress, gInfo.listenPort)
-      oosGuide = new ObjectOutputStream(clientSocketToGuide.getOutputStream)
-      oosGuide.flush()
-      oisGuide = new ObjectInputStream(clientSocketToGuide.getInputStream)
-
-      // Send local information
-      oosGuide.writeObject(getLocalSourceInfo)
-      oosGuide.flush()
-
-      // Receive source information from Guide
-      var suitableSources =
-        oisGuide.readObject.asInstanceOf[ListBuffer[SourceInfo]]
-      logDebug("Received suitableSources from Driver " + suitableSources)
-
-      addToListOfSources(suitableSources)
-
-      oisGuide.close()
-      oosGuide.close()
-      clientSocketToGuide.close()
-    }
-  }
-
-  def receiveBroadcast(variableID: Long): Boolean = {
-    val gInfo = MultiTracker.getGuideInfo(variableID)
-
-    if (gInfo.listenPort == SourceInfo.TxOverGoToDefault) {
-      return false
-    }
-
-    // Wait until hostAddress and listenPort are created by the
-    // ServeMultipleRequests thread
-    while (listenPort == -1) {
-      listenPortLock.synchronized { listenPortLock.wait() }
-    }
-
-    // Setup initial states of variables
-    totalBlocks = gInfo.totalBlocks
-    arrayOfBlocks = new Array[BroadcastBlock](totalBlocks)
-    hasBlocksBitVector = new BitSet(totalBlocks)
-    numCopiesSent = new Array[Int](totalBlocks)
-    totalBlocksLock.synchronized { totalBlocksLock.notifyAll() }
-    totalBytes = gInfo.totalBytes
-
-    // Start ttGuide to periodically talk to the Guide
-    var ttGuide = new TalkToGuide(gInfo)
-    ttGuide.setDaemon(true)
-    ttGuide.start()
-    logInfo("TalkToGuide started...")
-
-    // Start pController to run TalkToPeer threads
-    var pcController = new PeerChatterController
-    pcController.setDaemon(true)
-    pcController.start()
-    logInfo("PeerChatterController started...")
-
-    // FIXME: Must fix this. This might never break if broadcast fails.
-    // We should be able to break and send false. Also need to kill threads
-    while (hasBlocks.get < totalBlocks) {
-      Thread.sleep(MultiTracker.MaxKnockInterval)
-    }
-
-    return true
-  }
-
-  class PeerChatterController
-  extends Thread with Logging {
-    private var peersNowTalking = ListBuffer[SourceInfo]()
-    // TODO: There is a possible bug with blocksInRequestBitVector when a
-    // certain bit is NOT unset upon failure resulting in an infinite loop.
-    private var blocksInRequestBitVector = new BitSet(totalBlocks)
-
-    override def run() {
-      var threadPool = Utils.newDaemonFixedThreadPool(
-        MultiTracker.MaxChatSlots, "Bit Torrent Chatter")
-
-      while (hasBlocks.get < totalBlocks) {
-        var numThreadsToCreate = 0
-        listOfSources.synchronized {
-          numThreadsToCreate = math.min(listOfSources.size, MultiTracker.MaxChatSlots) -
-          threadPool.getActiveCount
-        }
-
-        while (hasBlocks.get < totalBlocks && numThreadsToCreate > 0) {
-          var peerToTalkTo = pickPeerToTalkToRandom
-
-          if (peerToTalkTo != null)
-            logDebug("Peer chosen: " + peerToTalkTo + " with " + peerToTalkTo.hasBlocksBitVector)
-          else
-            logDebug("No peer chosen...")
-
-          if (peerToTalkTo != null) {
-            threadPool.execute(new TalkToPeer(peerToTalkTo))
-
-            // Add to peersNowTalking. Remove in the thread. We have to do this
-            // ASAP, otherwise pickPeerToTalkTo picks the same peer more than once
-            peersNowTalking.synchronized { peersNowTalking += peerToTalkTo }
-          }
-
-          numThreadsToCreate = numThreadsToCreate - 1
-        }
-
-        // Sleep for a while before starting some more threads
-        Thread.sleep(MultiTracker.MinKnockInterval)
-      }
-      // Shutdown the thread pool
-      threadPool.shutdown()
-    }
-
-    // Right now picking the one that has the most blocks this peer wants
-    // Also picking peer randomly if no one has anything interesting
-    private def pickPeerToTalkToRandom: SourceInfo = {
-      var curPeer: SourceInfo = null
-      var curMax = 0
-
-      logDebug("Picking peers to talk to...")
-
-      // Find peers that are not connected right now
-      var peersNotInUse = ListBuffer[SourceInfo]()
-      listOfSources.synchronized {
-        peersNowTalking.synchronized {
-          peersNotInUse = listOfSources -- peersNowTalking
-        }
-      }
-
-      // Select the peer that has the most blocks that this receiver does not
-      peersNotInUse.foreach { eachSource =>
-        var tempHasBlocksBitVector: BitSet = null
-        hasBlocksBitVector.synchronized {
-          tempHasBlocksBitVector = hasBlocksBitVector.clone.asInstanceOf[BitSet]
-        }
-        tempHasBlocksBitVector.flip(0, tempHasBlocksBitVector.size)
-        tempHasBlocksBitVector.and(eachSource.hasBlocksBitVector)
-
-        if (tempHasBlocksBitVector.cardinality > curMax) {
-          curPeer = eachSource
-          curMax = tempHasBlocksBitVector.cardinality
-        }
-      }
-
-      // Always picking randomly
-      if (curPeer == null && peersNotInUse.size > 0) {
-        // Pick uniformly the i'th required peer
-        var i = MultiTracker.ranGen.nextInt(peersNotInUse.size)
-
-        var peerIter = peersNotInUse.iterator
-        curPeer = peerIter.next
-
-        while (i > 0) {
-          curPeer = peerIter.next
-          i = i - 1
-        }
-      }
-
-      return curPeer
-    }
-
-    // Picking peer with the weight of rare blocks it has
-    private def pickPeerToTalkToRarestFirst: SourceInfo = {
-      // Find peers that are not connected right now
-      var peersNotInUse = ListBuffer[SourceInfo]()
-      listOfSources.synchronized {
-        peersNowTalking.synchronized {
-          peersNotInUse = listOfSources -- peersNowTalking
-        }
-      }
-
-      // Count the number of copies of each block in the neighborhood
-      var numCopiesPerBlock = Array.tabulate [Int](totalBlocks)(_ => 0)
-
-      listOfSources.synchronized {
-        listOfSources.foreach { eachSource =>
-          for (i <- 0 until totalBlocks) {
-            numCopiesPerBlock(i) +=
-              ( if (eachSource.hasBlocksBitVector.get(i)) 1 else 0 )
-          }
-        }
-      }
-
-      // A block is considered rare if there are at most 2 copies of that block
-      // This CONSTANT could be a function of the neighborhood size
-      var rareBlocksIndices = ListBuffer[Int]()
-      for (i <- 0 until totalBlocks) {
-        if (numCopiesPerBlock(i) > 0 && numCopiesPerBlock(i) <= 2) {
-          rareBlocksIndices += i
-        }
-      }
-
-      // Find peers with rare blocks
-      var peersWithRareBlocks = ListBuffer[(SourceInfo, Int)]()
-      var totalRareBlocks = 0
-
-      peersNotInUse.foreach { eachPeer =>
-        var hasRareBlocks = 0
-        rareBlocksIndices.foreach { rareBlock =>
-          if (eachPeer.hasBlocksBitVector.get(rareBlock)) {
-            hasRareBlocks += 1
-          }
-        }
-
-        if (hasRareBlocks > 0) {
-          peersWithRareBlocks += ((eachPeer, hasRareBlocks))
-        }
-        totalRareBlocks += hasRareBlocks
-      }
-
-      // Select a peer from peersWithRareBlocks based on weight calculated from
-      // unique rare blocks
-      var selectedPeerToTalkTo: SourceInfo = null
-
-      if (peersWithRareBlocks.size > 0) {
-        // Sort the peers based on how many rare blocks they have
-        peersWithRareBlocks.sortBy(_._2)
-
-        var randomNumber = MultiTracker.ranGen.nextDouble
-        var tempSum = 0.0
-
-        var i = 0
-        do {
-          tempSum += (1.0 * peersWithRareBlocks(i)._2 / totalRareBlocks)
-          if (tempSum >= randomNumber) {
-            selectedPeerToTalkTo = peersWithRareBlocks(i)._1
-          }
-          i += 1
-        } while (i < peersWithRareBlocks.size && selectedPeerToTalkTo == null)
-      }
-
-      if (selectedPeerToTalkTo == null) {
-        selectedPeerToTalkTo = pickPeerToTalkToRandom
-      }
-
-      return selectedPeerToTalkTo
-    }
-
-    class TalkToPeer(peerToTalkTo: SourceInfo)
-    extends Thread with Logging {
-      private var peerSocketToSource: Socket = null
-      private var oosSource: ObjectOutputStream = null
-      private var oisSource: ObjectInputStream = null
-
-      override def run() {
-        // TODO: There is a possible bug here regarding blocksInRequestBitVector
-        var blockToAskFor = -1
-
-        // Setup the timeout mechanism
-        var timeOutTask = new TimerTask {
-          override def run() {
-            cleanUpConnections()
-          }
-        }
-
-        var timeOutTimer = new Timer
-        timeOutTimer.schedule(timeOutTask, MultiTracker.MaxKnockInterval)
-
-        logInfo("TalkToPeer started... => " + peerToTalkTo)
-
-        try {
-          // Connect to the source
-          peerSocketToSource =
-            new Socket(peerToTalkTo.hostAddress, peerToTalkTo.listenPort)
-          oosSource =
-            new ObjectOutputStream(peerSocketToSource.getOutputStream)
-          oosSource.flush()
-          oisSource =
-            new ObjectInputStream(peerSocketToSource.getInputStream)
-
-          // Receive latest SourceInfo from peerToTalkTo
-          var newPeerToTalkTo = oisSource.readObject.asInstanceOf[SourceInfo]
-          // Update listOfSources
-          addToListOfSources(newPeerToTalkTo)
-
-          // Turn the timer OFF, if the sender responds before timeout
-          timeOutTimer.cancel()
-
-          // Send the latest SourceInfo
-          oosSource.writeObject(getLocalSourceInfo)
-          oosSource.flush()
-
-          var keepReceiving = true
-
-          while (hasBlocks.get < totalBlocks && keepReceiving) {
-            blockToAskFor =
-              pickBlockRandom(newPeerToTalkTo.hasBlocksBitVector)
-
-            // No block to request
-            if (blockToAskFor < 0) {
-              // Nothing to receive from newPeerToTalkTo
-              keepReceiving = false
-            } else {
-              // Let other threads know that blockToAskFor is being requested
-              blocksInRequestBitVector.synchronized {
-                blocksInRequestBitVector.set(blockToAskFor)
-              }
-
-              // Start with sending the blockID
-              oosSource.writeObject(blockToAskFor)
-              oosSource.flush()
-
-              // CHANGED: Driver might send some other block than the one
-              // requested to ensure fast spreading of all blocks.
-              val recvStartTime = System.currentTimeMillis
-              val bcBlock = oisSource.readObject.asInstanceOf[BroadcastBlock]
-              val receptionTime = (System.currentTimeMillis - recvStartTime)
-
-              logDebug("Received block: " + bcBlock.blockID + " from " + peerToTalkTo + " in " + receptionTime + " millis.")
-
-              if (!hasBlocksBitVector.get(bcBlock.blockID)) {
-                arrayOfBlocks(bcBlock.blockID) = bcBlock
-
-                // Update the hasBlocksBitVector first
-                hasBlocksBitVector.synchronized {
-                  hasBlocksBitVector.set(bcBlock.blockID)
-                  hasBlocks.getAndIncrement
-                }
-
-                // Some block(may NOT be blockToAskFor) has arrived.
-                // In any case, blockToAskFor is not in request any more
-                blocksInRequestBitVector.synchronized {
-                  blocksInRequestBitVector.set(blockToAskFor, false)
-                }
-
-                // Reset blockToAskFor to -1. Else it will be considered missing
-                blockToAskFor = -1
-              }
-
-              // Send the latest SourceInfo
-              oosSource.writeObject(getLocalSourceInfo)
-              oosSource.flush()
-            }
-          }
-        } catch {
-          // EOFException is expected to happen because sender can break
-          // connection due to timeout
-          case eofe: java.io.EOFException => { }
-          case e: Exception => {
-            logError("TalktoPeer had a " + e)
-            // FIXME: Remove 'newPeerToTalkTo' from listOfSources
-            // We probably should have the following in some form, but not
-            // really here. This exception can happen if the sender just breaks connection
-            // listOfSources.synchronized {
-              // logInfo("Exception in TalkToPeer. Removing source: " + peerToTalkTo)
-              // listOfSources = listOfSources - peerToTalkTo
-            // }
-          }
-        } finally {
-          // blockToAskFor != -1 => there was an exception
-          if (blockToAskFor != -1) {
-            blocksInRequestBitVector.synchronized {
-              blocksInRequestBitVector.set(blockToAskFor, false)
-            }
-          }
-
-          cleanUpConnections()
-        }
-      }
-
-      // Right now it picks a block uniformly that this peer does not have
-      private def pickBlockRandom(txHasBlocksBitVector: BitSet): Int = {
-        var needBlocksBitVector: BitSet = null
-
-        // Blocks already present
-        hasBlocksBitVector.synchronized {
-          needBlocksBitVector = hasBlocksBitVector.clone.asInstanceOf[BitSet]
-        }
-
-        // Include blocks already in transmission ONLY IF
-        // MultiTracker.EndGameFraction has NOT been achieved
-        if ((1.0 * hasBlocks.get / totalBlocks) < MultiTracker.EndGameFraction) {
-          blocksInRequestBitVector.synchronized {
-            needBlocksBitVector.or(blocksInRequestBitVector)
-          }
-        }
-
-        // Find blocks that are neither here nor in transit
-        needBlocksBitVector.flip(0, needBlocksBitVector.size)
-
-        // Blocks that should/can be requested
-        needBlocksBitVector.and(txHasBlocksBitVector)
-
-        if (needBlocksBitVector.cardinality == 0) {
-          return -1
-        } else {
-          // Pick uniformly the i'th required block
-          var i = MultiTracker.ranGen.nextInt(needBlocksBitVector.cardinality)
-          var pickedBlockIndex = needBlocksBitVector.nextSetBit(0)
-
-          while (i > 0) {
-            pickedBlockIndex =
-              needBlocksBitVector.nextSetBit(pickedBlockIndex + 1)
-            i -= 1
-          }
-
-          return pickedBlockIndex
-        }
-      }
-
-      // Pick the block that seems to be the rarest across sources
-      private def pickBlockRarestFirst(txHasBlocksBitVector: BitSet): Int = {
-        var needBlocksBitVector: BitSet = null
-
-        // Blocks already present
-        hasBlocksBitVector.synchronized {
-          needBlocksBitVector = hasBlocksBitVector.clone.asInstanceOf[BitSet]
-        }
-
-        // Include blocks already in transmission ONLY IF
-        // MultiTracker.EndGameFraction has NOT been achieved
-        if ((1.0 * hasBlocks.get / totalBlocks) < MultiTracker.EndGameFraction) {
-          blocksInRequestBitVector.synchronized {
-            needBlocksBitVector.or(blocksInRequestBitVector)
-          }
-        }
-
-        // Find blocks that are neither here nor in transit
-        needBlocksBitVector.flip(0, needBlocksBitVector.size)
-
-        // Blocks that should/can be requested
-        needBlocksBitVector.and(txHasBlocksBitVector)
-
-        if (needBlocksBitVector.cardinality == 0) {
-          return -1
-        } else {
-          // Count the number of copies for each block across all sources
-          var numCopiesPerBlock = Array.tabulate [Int](totalBlocks)(_ => 0)
-
-          listOfSources.synchronized {
-            listOfSources.foreach { eachSource =>
-              for (i <- 0 until totalBlocks) {
-                numCopiesPerBlock(i) +=
-                  ( if (eachSource.hasBlocksBitVector.get(i)) 1 else 0 )
-              }
-            }
-          }
-
-          // Find the minimum
-          var minVal = Integer.MAX_VALUE
-          for (i <- 0 until totalBlocks) {
-            if (numCopiesPerBlock(i) > 0 && numCopiesPerBlock(i) < minVal) {
-              minVal = numCopiesPerBlock(i)
-            }
-          }
-
-          // Find the blocks with the least copies that this peer does not have
-          var minBlocksIndices = ListBuffer[Int]()
-          for (i <- 0 until totalBlocks) {
-            if (needBlocksBitVector.get(i) && numCopiesPerBlock(i) == minVal) {
-              minBlocksIndices += i
-            }
-          }
-
-          // Now select a random index from minBlocksIndices
-          if (minBlocksIndices.size == 0) {
-            return -1
-          } else {
-            // Pick uniformly the i'th index
-            var i = MultiTracker.ranGen.nextInt(minBlocksIndices.size)
-            return minBlocksIndices(i)
-          }
-        }
-      }
-
-      private def cleanUpConnections() {
-        if (oisSource != null) {
-          oisSource.close()
-        }
-        if (oosSource != null) {
-          oosSource.close()
-        }
-        if (peerSocketToSource != null) {
-          peerSocketToSource.close()
-        }
-
-        // Delete from peersNowTalking
-        peersNowTalking.synchronized { peersNowTalking -= peerToTalkTo }
-      }
-    }
-  }
-
-  class GuideMultipleRequests
-  extends Thread with Logging {
-    // Keep track of sources that have completed reception
-    private var setOfCompletedSources = Set[SourceInfo]()
-
-    override def run() {
-      var threadPool = Utils.newDaemonCachedThreadPool("Bit torrent guide multiple requests")
-      var serverSocket: ServerSocket = null
-
-      serverSocket = new ServerSocket(0)
-      guidePort = serverSocket.getLocalPort
-      logInfo("GuideMultipleRequests => " + serverSocket + " " + guidePort)
-
-      guidePortLock.synchronized { guidePortLock.notifyAll() }
-
-      try {
-        while (!stopBroadcast) {
-          var clientSocket: Socket = null
-          try {
-            serverSocket.setSoTimeout(MultiTracker.ServerSocketTimeout)
-            clientSocket = serverSocket.accept()
-          } catch {
-            case e: Exception => {
-              // Stop broadcast if at least one worker has connected and
-              // everyone connected so far are done. Comparing with
-              // listOfSources.size - 1, because it includes the Guide itself
-              listOfSources.synchronized {
-                setOfCompletedSources.synchronized {
-                  if (listOfSources.size > 1 &&
-                    setOfCompletedSources.size == listOfSources.size - 1) {
-                    stopBroadcast = true
-                    logInfo("GuideMultipleRequests Timeout. stopBroadcast == true.")
-                  }
-                }
-              }
-            }
-          }
-          if (clientSocket != null) {
-            logDebug("Guide: Accepted new client connection:" + clientSocket)
-            try {
-              threadPool.execute(new GuideSingleRequest(clientSocket))
-            } catch {
-              // In failure, close the socket here; else, thread will close it
-              case ioe: IOException => {
-                clientSocket.close()
-              }
-            }
-          }
-        }
-
-        // Shutdown the thread pool
-        threadPool.shutdown()
-
-        logInfo("Sending stopBroadcast notifications...")
-        sendStopBroadcastNotifications
-
-        MultiTracker.unregisterBroadcast(id)
-      } finally {
-        if (serverSocket != null) {
-          logInfo("GuideMultipleRequests now stopping...")
-          serverSocket.close()
-        }
-      }
-    }
-
-    private def sendStopBroadcastNotifications() {
-      listOfSources.synchronized {
-        listOfSources.foreach { sourceInfo =>
-
-          var guideSocketToSource: Socket = null
-          var gosSource: ObjectOutputStream = null
-          var gisSource: ObjectInputStream = null
-
-          try {
-            // Connect to the source
-            guideSocketToSource = new Socket(sourceInfo.hostAddress, sourceInfo.listenPort)
-            gosSource = new ObjectOutputStream(guideSocketToSource.getOutputStream)
-            gosSource.flush()
-            gisSource = new ObjectInputStream(guideSocketToSource.getInputStream)
-
-            // Throw away whatever comes in
-            gisSource.readObject.asInstanceOf[SourceInfo]
-
-            // Send stopBroadcast signal. listenPort = SourceInfo.StopBroadcast
-            gosSource.writeObject(SourceInfo("", SourceInfo.StopBroadcast))
-            gosSource.flush()
-          } catch {
-            case e: Exception => {
-              logError("sendStopBroadcastNotifications had a " + e)
-            }
-          } finally {
-            if (gisSource != null) {
-              gisSource.close()
-            }
-            if (gosSource != null) {
-              gosSource.close()
-            }
-            if (guideSocketToSource != null) {
-              guideSocketToSource.close()
-            }
-          }
-        }
-      }
-    }
-
-    class GuideSingleRequest(val clientSocket: Socket)
-    extends Thread with Logging {
-      private val oos = new ObjectOutputStream(clientSocket.getOutputStream)
-      oos.flush()
-      private val ois = new ObjectInputStream(clientSocket.getInputStream)
-
-      private var sourceInfo: SourceInfo = null
-      private var selectedSources: ListBuffer[SourceInfo] = null
-
-      override def run() {
-        try {
-          logInfo("new GuideSingleRequest is running")
-          // Connecting worker is sending in its information
-          sourceInfo = ois.readObject.asInstanceOf[SourceInfo]
-
-          // Select a suitable source and send it back to the worker
-          selectedSources = selectSuitableSources(sourceInfo)
-          logDebug("Sending selectedSources:" + selectedSources)
-          oos.writeObject(selectedSources)
-          oos.flush()
-
-          // Add this source to the listOfSources
-          addToListOfSources(sourceInfo)
-        } catch {
-          case e: Exception => {
-            // Assuming exception caused by receiver failure: remove
-            if (listOfSources != null) {
-              listOfSources.synchronized { listOfSources -= sourceInfo }
-            }
-          }
-        } finally {
-          logInfo("GuideSingleRequest is closing streams and sockets")
-          ois.close()
-          oos.close()
-          clientSocket.close()
-        }
-      }
-
-      // Randomly select some sources to send back
-      private def selectSuitableSources(skipSourceInfo: SourceInfo): ListBuffer[SourceInfo] = {
-        var selectedSources = ListBuffer[SourceInfo]()
-
-        // If skipSourceInfo.hasBlocksBitVector has all bits set to 'true'
-        // then add skipSourceInfo to setOfCompletedSources. Return blank.
-        if (skipSourceInfo.hasBlocks == totalBlocks) {
-          setOfCompletedSources.synchronized { setOfCompletedSources += skipSourceInfo }
-          return selectedSources
-        }
-
-        listOfSources.synchronized {
-          if (listOfSources.size <= MultiTracker.MaxPeersInGuideResponse) {
-            selectedSources = listOfSources.clone
-          } else {
-            var picksLeft = MultiTracker.MaxPeersInGuideResponse
-            var alreadyPicked = new BitSet(listOfSources.size)
-
-            while (picksLeft > 0) {
-              var i = -1
-
-              do {
-                i = MultiTracker.ranGen.nextInt(listOfSources.size)
-              } while (alreadyPicked.get(i))
-
-              var peerIter = listOfSources.iterator
-              var curPeer = peerIter.next
-
-              // Set the BitSet before i is decremented
-              alreadyPicked.set(i)
-
-              while (i > 0) {
-                curPeer = peerIter.next
-                i = i - 1
-              }
-
-              selectedSources += curPeer
-
-              picksLeft = picksLeft - 1
-            }
-          }
-        }
-
-        // Remove the receiving source (if present)
-        selectedSources = selectedSources - skipSourceInfo
-
-        return selectedSources
-      }
-    }
-  }
-
-  class ServeMultipleRequests
-  extends Thread with Logging {
-    // Server at most MultiTracker.MaxChatSlots peers
-    var threadPool = Utils.newDaemonFixedThreadPool(
-      MultiTracker.MaxChatSlots, "Bit torrent serve multiple requests")
-
-    override def run() {
-      var serverSocket = new ServerSocket(0)
-      listenPort = serverSocket.getLocalPort
-
-      logInfo("ServeMultipleRequests started with " + serverSocket)
-
-      listenPortLock.synchronized { listenPortLock.notifyAll() }
-
-      try {
-        while (!stopBroadcast) {
-          var clientSocket: Socket = null
-          try {
-            serverSocket.setSoTimeout(MultiTracker.ServerSocketTimeout)
-            clientSocket = serverSocket.accept()
-          } catch {
-            case e: Exception => { }
-          }
-          if (clientSocket != null) {
-            logDebug("Serve: Accepted new client connection:" + clientSocket)
-            try {
-              threadPool.execute(new ServeSingleRequest(clientSocket))
-            } catch {
-              // In failure, close socket here; else, the thread will close it
-              case ioe: IOException => clientSocket.close()
-            }
-          }
-        }
-      } finally {
-        if (serverSocket != null) {
-          logInfo("ServeMultipleRequests now stopping...")
-          serverSocket.close()
-        }
-      }
-      // Shutdown the thread pool
-      threadPool.shutdown()
-    }
-
-    class ServeSingleRequest(val clientSocket: Socket)
-    extends Thread with Logging {
-      private val oos = new ObjectOutputStream(clientSocket.getOutputStream)
-      oos.flush()
-      private val ois = new ObjectInputStream(clientSocket.getInputStream)
-
-      logInfo("new ServeSingleRequest is running")
-
-      override def run() {
-        try {
-          // Send latest local SourceInfo to the receiver
-          // In the case of receiver timeout and connection close, this will
-          // throw a java.net.SocketException: Broken pipe
-          oos.writeObject(getLocalSourceInfo)
-          oos.flush()
-
-          // Receive latest SourceInfo from the receiver
-          var rxSourceInfo = ois.readObject.asInstanceOf[SourceInfo]
-
-          if (rxSourceInfo.listenPort == SourceInfo.StopBroadcast) {
-            stopBroadcast = true
-          } else {
-            addToListOfSources(rxSourceInfo)
-          }
-
-          val startTime = System.currentTimeMillis
-          var curTime = startTime
-          var keepSending = true
-          var numBlocksToSend = MultiTracker.MaxChatBlocks
-
-          while (!stopBroadcast && keepSending && numBlocksToSend > 0) {
-            // Receive which block to send
-            var blockToSend = ois.readObject.asInstanceOf[Int]
-
-            // If it is driver AND at least one copy of each block has not been
-            // sent out already, MODIFY blockToSend
-            if (MultiTracker.isDriver && sentBlocks.get < totalBlocks) {
-              blockToSend = sentBlocks.getAndIncrement
-            }
-
-            // Send the block
-            sendBlock(blockToSend)
-            rxSourceInfo.hasBlocksBitVector.set(blockToSend)
-
-            numBlocksToSend -= 1
-
-            // Receive latest SourceInfo from the receiver
-            rxSourceInfo = ois.readObject.asInstanceOf[SourceInfo]
-            logDebug("rxSourceInfo: " + rxSourceInfo + " with " + rxSourceInfo.hasBlocksBitVector)
-            addToListOfSources(rxSourceInfo)
-
-            curTime = System.currentTimeMillis
-            // Revoke sending only if there is anyone waiting in the queue
-            if (curTime - startTime >= MultiTracker.MaxChatTime &&
-                threadPool.getQueue.size > 0) {
-              keepSending = false
-            }
-          }
-        } catch {
-          case e: Exception => logError("ServeSingleRequest had a " + e)
-        } finally {
-          logInfo("ServeSingleRequest is closing streams and sockets")
-          ois.close()
-          oos.close()
-          clientSocket.close()
-        }
-      }
-
-      private def sendBlock(blockToSend: Int) {
-        try {
-          oos.writeObject(arrayOfBlocks(blockToSend))
-          oos.flush()
-        } catch {
-          case e: Exception => logError("sendBlock had a " + e)
-        }
-        logDebug("Sent block: " + blockToSend + " to " + clientSocket)
-      }
-    }
-  }
-}
-
-private[spark] class BitTorrentBroadcastFactory
-extends BroadcastFactory {
-  def initialize(isDriver: Boolean) { MultiTracker.initialize(isDriver) }
-
-  def newBroadcast[T](value_ : T, isLocal: Boolean, id: Long) =
-    new BitTorrentBroadcast[T](value_, isLocal, id)
-
-  def stop() { MultiTracker.stop() }
-}
diff --git a/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala
index f22a66a70b35b..d113d4040594d 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala
@@ -17,11 +17,39 @@
 
 package org.apache.spark.broadcast
 
-import java.io._
+import java.io.Serializable
 import java.util.concurrent.atomic.AtomicLong
 
 import org.apache.spark._
 
+/**
+ * A broadcast variable. Broadcast variables allow the programmer to keep a read-only variable
+ * cached on each machine rather than shipping a copy of it with tasks. They can be used, for
+ * example, to give every node a copy of a large input dataset in an efficient manner. Spark also
+ * attempts to distribute broadcast variables using efficient broadcast algorithms to reduce
+ * communication cost.
+ *
+ * Broadcast variables are created from a variable `v` by calling [[SparkContext#broadcast]].
+ * The broadcast variable is a wrapper around `v`, and its value can be accessed by calling the
+ * `value` method. The interpreter session below shows this:
+ *
+ * {{{
+ * scala> val broadcastVar = sc.broadcast(Array(1, 2, 3))
+ * broadcastVar: spark.Broadcast[Array[Int]] = spark.Broadcast(b5c40191-a864-4c7d-b9bf-d87e1a4e787c)
+ *
+ * scala> broadcastVar.value
+ * res0: Array[Int] = Array(1, 2, 3)
+ * }}}
+ *
+ * After the broadcast variable is created, it should be used instead of the value `v` in any
+ * functions run on the cluster so that `v` is not shipped to the nodes more than once.
+ * In addition, the object `v` should not be modified after it is broadcast in order to ensure
+ * that all nodes get the same value of the broadcast variable (e.g. if the variable is shipped
+ * to a new node later).
+ *
+ * @param id A unique identifier for the broadcast variable.
+ * @tparam T Type of the data contained in the broadcast variable.
+ */
 abstract class Broadcast[T](val id: Long) extends Serializable {
   def value: T
 
@@ -31,8 +59,8 @@ abstract class Broadcast[T](val id: Long) extends Serializable {
   override def toString = "Broadcast(" + id + ")"
 }
 
-private[spark] 
-class BroadcastManager(val _isDriver: Boolean) extends Logging with Serializable {
+private[spark]
+class BroadcastManager(val _isDriver: Boolean, conf: SparkConf) extends Logging with Serializable {
 
   private var initialized = false
   private var broadcastFactory: BroadcastFactory = null
@@ -43,14 +71,14 @@ class BroadcastManager(val _isDriver: Boolean) extends Logging with Serializable
   private def initialize() {
     synchronized {
       if (!initialized) {
-        val broadcastFactoryClass = System.getProperty(
+        val broadcastFactoryClass = conf.get(
           "spark.broadcast.factory", "org.apache.spark.broadcast.HttpBroadcastFactory")
 
         broadcastFactory =
           Class.forName(broadcastFactoryClass).newInstance.asInstanceOf[BroadcastFactory]
 
         // Initialize appropriate BroadcastFactory and BroadcastObject
-        broadcastFactory.initialize(isDriver)
+        broadcastFactory.initialize(isDriver, conf)
 
         initialized = true
       }
diff --git a/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala b/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala
index 68bff75b908c7..940e5ab805100 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala
@@ -17,14 +17,16 @@
 
 package org.apache.spark.broadcast
 
+import org.apache.spark.SparkConf
+
 /**
  * An interface for all the broadcast implementations in Spark (to allow 
  * multiple broadcast implementations). SparkContext uses a user-specified
  * BroadcastFactory implementation to instantiate a particular broadcast for the
  * entire Spark job.
  */
-private[spark] trait BroadcastFactory {
-  def initialize(isDriver: Boolean): Unit
+trait BroadcastFactory {
+  def initialize(isDriver: Boolean, conf: SparkConf): Unit
   def newBroadcast[T](value: T, isLocal: Boolean, id: Long): Broadcast[T]
   def stop(): Unit
 }
diff --git a/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala
index 609464e38d5c6..39ee0dbb92841 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala
@@ -19,18 +19,19 @@ package org.apache.spark.broadcast
 
 import java.io.{File, FileOutputStream, ObjectInputStream, OutputStream}
 import java.net.URL
+import java.util.concurrent.TimeUnit
 
 import it.unimi.dsi.fastutil.io.FastBufferedInputStream
 import it.unimi.dsi.fastutil.io.FastBufferedOutputStream
 
-import org.apache.spark.{HttpServer, Logging, SparkEnv}
+import org.apache.spark.{SparkConf, HttpServer, Logging, SparkEnv}
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.storage.{BroadcastBlockId, StorageLevel}
 import org.apache.spark.util.{MetadataCleaner, MetadataCleanerType, TimeStampedHashSet, Utils}
 
 private[spark] class HttpBroadcast[T](@transient var value_ : T, isLocal: Boolean, id: Long)
   extends Broadcast[T](id) with Logging with Serializable {
-  
+
   def value = value_
 
   def blockId = BroadcastBlockId(id)
@@ -39,7 +40,7 @@ private[spark] class HttpBroadcast[T](@transient var value_ : T, isLocal: Boolea
     SparkEnv.get.blockManager.putSingle(blockId, value_, StorageLevel.MEMORY_AND_DISK, false)
   }
 
-  if (!isLocal) { 
+  if (!isLocal) {
     HttpBroadcast.write(id, value_)
   }
 
@@ -62,8 +63,11 @@ private[spark] class HttpBroadcast[T](@transient var value_ : T, isLocal: Boolea
   }
 }
 
-private[spark] class HttpBroadcastFactory extends BroadcastFactory {
-  def initialize(isDriver: Boolean) { HttpBroadcast.initialize(isDriver) }
+/**
+ * A [[BroadcastFactory]] implementation that uses a HTTP server as the broadcast medium.
+ */
+class HttpBroadcastFactory extends BroadcastFactory {
+  def initialize(isDriver: Boolean, conf: SparkConf) { HttpBroadcast.initialize(isDriver, conf) }
 
   def newBroadcast[T](value_ : T, isLocal: Boolean, id: Long) =
     new HttpBroadcast[T](value_, isLocal, id)
@@ -80,42 +84,51 @@ private object HttpBroadcast extends Logging {
   private var serverUri: String = null
   private var server: HttpServer = null
 
+  // TODO: This shouldn't be a global variable so that multiple SparkContexts can coexist
   private val files = new TimeStampedHashSet[String]
-  private val cleaner = new MetadataCleaner(MetadataCleanerType.HTTP_BROADCAST, cleanup)
+  private var cleaner: MetadataCleaner = null
+
+  private val httpReadTimeout = TimeUnit.MILLISECONDS.convert(5, TimeUnit.MINUTES).toInt
 
-  private lazy val compressionCodec = CompressionCodec.createCodec()
+  private var compressionCodec: CompressionCodec = null
 
-  def initialize(isDriver: Boolean) {
+  def initialize(isDriver: Boolean, conf: SparkConf) {
     synchronized {
       if (!initialized) {
-        bufferSize = System.getProperty("spark.buffer.size", "65536").toInt
-        compress = System.getProperty("spark.broadcast.compress", "true").toBoolean
+        bufferSize = conf.getInt("spark.buffer.size", 65536)
+        compress = conf.getBoolean("spark.broadcast.compress", true)
         if (isDriver) {
-          createServer()
+          createServer(conf)
+          conf.set("spark.httpBroadcast.uri",  serverUri)
         }
-        serverUri = System.getProperty("spark.httpBroadcast.uri")
+        serverUri = conf.get("spark.httpBroadcast.uri")
+        cleaner = new MetadataCleaner(MetadataCleanerType.HTTP_BROADCAST, cleanup, conf)
+        compressionCodec = CompressionCodec.createCodec(conf)
         initialized = true
       }
     }
   }
-  
+
   def stop() {
     synchronized {
       if (server != null) {
         server.stop()
         server = null
       }
+      if (cleaner != null) {
+        cleaner.cancel()
+        cleaner = null
+      }
+      compressionCodec = null
       initialized = false
-      cleaner.cancel()
     }
   }
 
-  private def createServer() {
-    broadcastDir = Utils.createTempDir(Utils.getLocalDir)
+  private def createServer(conf: SparkConf) {
+    broadcastDir = Utils.createTempDir(Utils.getLocalDir(conf))
     server = new HttpServer(broadcastDir)
     server.start()
     serverUri = server.uri
-    System.setProperty("spark.httpBroadcast.uri", serverUri)
     logInfo("Broadcast server started at " + serverUri)
   }
 
@@ -138,10 +151,13 @@ private object HttpBroadcast extends Logging {
   def read[T](id: Long): T = {
     val url = serverUri + "/" + BroadcastBlockId(id).name
     val in = {
+      val httpConnection = new URL(url).openConnection()
+      httpConnection.setReadTimeout(httpReadTimeout)
+      val inputStream = httpConnection.getInputStream
       if (compress) {
-        compressionCodec.compressedInputStream(new URL(url).openStream())
+        compressionCodec.compressedInputStream(inputStream)
       } else {
-        new FastBufferedInputStream(new URL(url).openStream(), bufferSize)
+        new FastBufferedInputStream(inputStream, bufferSize)
       }
     }
     val ser = SparkEnv.get.serializer.newInstance()
diff --git a/core/src/main/scala/org/apache/spark/broadcast/MultiTracker.scala b/core/src/main/scala/org/apache/spark/broadcast/MultiTracker.scala
deleted file mode 100644
index 82ed64f190b4f..0000000000000
--- a/core/src/main/scala/org/apache/spark/broadcast/MultiTracker.scala
+++ /dev/null
@@ -1,410 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.broadcast
-
-import java.io._
-import java.net._
-import java.util.Random
-
-import scala.collection.mutable.Map
-
-import org.apache.spark._
-import org.apache.spark.util.Utils
-
-private object MultiTracker
-extends Logging {
-
-  // Tracker Messages
-  val REGISTER_BROADCAST_TRACKER = 0
-  val UNREGISTER_BROADCAST_TRACKER = 1
-  val FIND_BROADCAST_TRACKER = 2
-
-  // Map to keep track of guides of ongoing broadcasts
-  var valueToGuideMap = Map[Long, SourceInfo]()
-
-  // Random number generator
-  var ranGen = new Random
-
-  private var initialized = false
-  private var _isDriver = false
-
-  private var stopBroadcast = false
-
-  private var trackMV: TrackMultipleValues = null
-
-  def initialize(__isDriver: Boolean) {
-    synchronized {
-      if (!initialized) {
-        _isDriver = __isDriver
-
-        if (isDriver) {
-          trackMV = new TrackMultipleValues
-          trackMV.setDaemon(true)
-          trackMV.start()
-        
-          // Set DriverHostAddress to the driver's IP address for the slaves to read
-          System.setProperty("spark.MultiTracker.DriverHostAddress", Utils.localIpAddress)
-        }
-
-        initialized = true
-      }
-    }
-  }
-  
-  def stop() {
-    stopBroadcast = true
-  }
-
-  // Load common parameters
-  private var DriverHostAddress_ = System.getProperty(
-    "spark.MultiTracker.DriverHostAddress", "")
-  private var DriverTrackerPort_ = System.getProperty(
-    "spark.broadcast.driverTrackerPort", "11111").toInt
-  private var BlockSize_ = System.getProperty(
-    "spark.broadcast.blockSize", "4096").toInt * 1024
-  private var MaxRetryCount_ = System.getProperty(
-    "spark.broadcast.maxRetryCount", "2").toInt
-
-  private var TrackerSocketTimeout_ = System.getProperty(
-    "spark.broadcast.trackerSocketTimeout", "50000").toInt
-  private var ServerSocketTimeout_ = System.getProperty(
-    "spark.broadcast.serverSocketTimeout", "10000").toInt
-
-  private var MinKnockInterval_ = System.getProperty(
-    "spark.broadcast.minKnockInterval", "500").toInt
-  private var MaxKnockInterval_ = System.getProperty(
-    "spark.broadcast.maxKnockInterval", "999").toInt
-
-  // Load TreeBroadcast config params
-  private var MaxDegree_ = System.getProperty(
-    "spark.broadcast.maxDegree", "2").toInt
-
-  // Load BitTorrentBroadcast config params
-  private var MaxPeersInGuideResponse_ = System.getProperty(
-    "spark.broadcast.maxPeersInGuideResponse", "4").toInt
-
-  private var MaxChatSlots_ = System.getProperty(
-    "spark.broadcast.maxChatSlots", "4").toInt
-  private var MaxChatTime_ = System.getProperty(
-    "spark.broadcast.maxChatTime", "500").toInt
-  private var MaxChatBlocks_ = System.getProperty(
-    "spark.broadcast.maxChatBlocks", "1024").toInt
-
-  private var EndGameFraction_ = System.getProperty(
-      "spark.broadcast.endGameFraction", "0.95").toDouble
-
-  def isDriver = _isDriver
-
-  // Common config params
-  def DriverHostAddress = DriverHostAddress_
-  def DriverTrackerPort = DriverTrackerPort_
-  def BlockSize = BlockSize_
-  def MaxRetryCount = MaxRetryCount_
-
-  def TrackerSocketTimeout = TrackerSocketTimeout_
-  def ServerSocketTimeout = ServerSocketTimeout_
-
-  def MinKnockInterval = MinKnockInterval_
-  def MaxKnockInterval = MaxKnockInterval_
-
-  // TreeBroadcast configs
-  def MaxDegree = MaxDegree_
-
-  // BitTorrentBroadcast configs
-  def MaxPeersInGuideResponse = MaxPeersInGuideResponse_
-
-  def MaxChatSlots = MaxChatSlots_
-  def MaxChatTime = MaxChatTime_
-  def MaxChatBlocks = MaxChatBlocks_
-
-  def EndGameFraction = EndGameFraction_
-
-  class TrackMultipleValues
-  extends Thread with Logging {
-    override def run() {
-      var threadPool = Utils.newDaemonCachedThreadPool("Track multiple values")
-      var serverSocket: ServerSocket = null
-
-      serverSocket = new ServerSocket(DriverTrackerPort)
-      logInfo("TrackMultipleValues started at " + serverSocket)
-
-      try {
-        while (!stopBroadcast) {
-          var clientSocket: Socket = null
-          try {
-            serverSocket.setSoTimeout(TrackerSocketTimeout)
-            clientSocket = serverSocket.accept()
-          } catch {
-            case e: Exception => {
-              if (stopBroadcast) {
-                logInfo("Stopping TrackMultipleValues...")
-              }              
-            }
-          }
-
-          if (clientSocket != null) {
-            try {
-              threadPool.execute(new Thread {
-                override def run() {
-                  val oos = new ObjectOutputStream(clientSocket.getOutputStream)
-                  oos.flush()
-                  val ois = new ObjectInputStream(clientSocket.getInputStream)
-
-                  try {
-                    // First, read message type
-                    val messageType = ois.readObject.asInstanceOf[Int]
-
-                    if (messageType == REGISTER_BROADCAST_TRACKER) {
-                      // Receive Long
-                      val id = ois.readObject.asInstanceOf[Long]
-                      // Receive hostAddress and listenPort
-                      val gInfo = ois.readObject.asInstanceOf[SourceInfo]
-
-                      // Add to the map
-                      valueToGuideMap.synchronized {
-                        valueToGuideMap += (id -> gInfo)
-                      }
-
-                      logInfo ("New broadcast " + id + " registered with TrackMultipleValues. Ongoing ones: " + valueToGuideMap)
-
-                      // Send dummy ACK
-                      oos.writeObject(-1)
-                      oos.flush()
-                    } else if (messageType == UNREGISTER_BROADCAST_TRACKER) {
-                      // Receive Long
-                      val id = ois.readObject.asInstanceOf[Long]
-
-                      // Remove from the map
-                      valueToGuideMap.synchronized {
-                        valueToGuideMap(id) = SourceInfo("", SourceInfo.TxOverGoToDefault)
-                      }
-
-                      logInfo ("Broadcast " + id + " unregistered from TrackMultipleValues. Ongoing ones: " + valueToGuideMap)
-
-                      // Send dummy ACK
-                      oos.writeObject(-1)
-                      oos.flush()
-                    } else if (messageType == FIND_BROADCAST_TRACKER) {
-                      // Receive Long
-                      val id = ois.readObject.asInstanceOf[Long]
-
-                      var gInfo =
-                        if (valueToGuideMap.contains(id)) valueToGuideMap(id)
-                        else SourceInfo("", SourceInfo.TxNotStartedRetry)
-
-                      logDebug("Got new request: " + clientSocket + " for " + id + " : " + gInfo.listenPort)
-
-                      // Send reply back
-                      oos.writeObject(gInfo)
-                      oos.flush()
-                    } else {
-                      throw new SparkException("Undefined messageType at TrackMultipleValues")
-                    }
-                  } catch {
-                    case e: Exception => {
-                      logError("TrackMultipleValues had a " + e)
-                    }
-                  } finally {
-                    ois.close()
-                    oos.close()
-                    clientSocket.close()
-                  }
-                }
-              })
-            } catch {
-              // In failure, close socket here; else, client thread will close
-              case ioe: IOException => clientSocket.close()
-            }
-          }
-        }
-      } finally {
-        serverSocket.close()
-      }
-      // Shutdown the thread pool
-      threadPool.shutdown()
-    }
-  }
-  
-  def getGuideInfo(variableLong: Long): SourceInfo = {
-    var clientSocketToTracker: Socket = null
-    var oosTracker: ObjectOutputStream = null
-    var oisTracker: ObjectInputStream = null
-
-    var gInfo: SourceInfo = SourceInfo("", SourceInfo.TxNotStartedRetry)
-
-    var retriesLeft = MultiTracker.MaxRetryCount
-    do {
-      try {
-        // Connect to the tracker to find out GuideInfo
-        clientSocketToTracker =
-          new Socket(MultiTracker.DriverHostAddress, MultiTracker.DriverTrackerPort)
-        oosTracker =
-          new ObjectOutputStream(clientSocketToTracker.getOutputStream)
-        oosTracker.flush()
-        oisTracker =
-          new ObjectInputStream(clientSocketToTracker.getInputStream)
-
-        // Send messageType/intention
-        oosTracker.writeObject(MultiTracker.FIND_BROADCAST_TRACKER)
-        oosTracker.flush()
-
-        // Send Long and receive GuideInfo
-        oosTracker.writeObject(variableLong)
-        oosTracker.flush()
-        gInfo = oisTracker.readObject.asInstanceOf[SourceInfo]
-      } catch {
-        case e: Exception => logError("getGuideInfo had a " + e)
-      } finally {
-        if (oisTracker != null) {
-          oisTracker.close()
-        }
-        if (oosTracker != null) {
-          oosTracker.close()
-        }
-        if (clientSocketToTracker != null) {
-          clientSocketToTracker.close()
-        }
-      }
-
-      Thread.sleep(MultiTracker.ranGen.nextInt(
-        MultiTracker.MaxKnockInterval - MultiTracker.MinKnockInterval) +
-        MultiTracker.MinKnockInterval)
-
-      retriesLeft -= 1
-    } while (retriesLeft > 0 && gInfo.listenPort == SourceInfo.TxNotStartedRetry)
-
-    logDebug("Got this guidePort from Tracker: " + gInfo.listenPort)
-    return gInfo
-  }
-  
-  def registerBroadcast(id: Long, gInfo: SourceInfo) {
-    val socket = new Socket(MultiTracker.DriverHostAddress, DriverTrackerPort)
-    val oosST = new ObjectOutputStream(socket.getOutputStream)
-    oosST.flush()
-    val oisST = new ObjectInputStream(socket.getInputStream)
-
-    // Send messageType/intention
-    oosST.writeObject(REGISTER_BROADCAST_TRACKER)
-    oosST.flush()
-
-    // Send Long of this broadcast
-    oosST.writeObject(id)
-    oosST.flush()
-
-    // Send this tracker's information
-    oosST.writeObject(gInfo)
-    oosST.flush()
-
-    // Receive ACK and throw it away
-    oisST.readObject.asInstanceOf[Int]
-
-    // Shut stuff down
-    oisST.close()
-    oosST.close()
-    socket.close()
-  }
-
-  def unregisterBroadcast(id: Long) {
-    val socket = new Socket(MultiTracker.DriverHostAddress, DriverTrackerPort)
-    val oosST = new ObjectOutputStream(socket.getOutputStream)
-    oosST.flush()
-    val oisST = new ObjectInputStream(socket.getInputStream)
-
-    // Send messageType/intention
-    oosST.writeObject(UNREGISTER_BROADCAST_TRACKER)
-    oosST.flush()
-
-    // Send Long of this broadcast
-    oosST.writeObject(id)
-    oosST.flush()
-
-    // Receive ACK and throw it away
-    oisST.readObject.asInstanceOf[Int]
-
-    // Shut stuff down
-    oisST.close()
-    oosST.close()
-    socket.close()
-  }
-
-  // Helper method to convert an object to Array[BroadcastBlock]
-  def blockifyObject[IN](obj: IN): VariableInfo = {
-    val baos = new ByteArrayOutputStream
-    val oos = new ObjectOutputStream(baos)
-    oos.writeObject(obj)
-    oos.close()
-    baos.close()
-    val byteArray = baos.toByteArray
-    val bais = new ByteArrayInputStream(byteArray)
-
-    var blockNum = (byteArray.length / BlockSize)
-    if (byteArray.length % BlockSize != 0)
-      blockNum += 1
-
-    var retVal = new Array[BroadcastBlock](blockNum)
-    var blockID = 0
-
-    for (i <- 0 until (byteArray.length, BlockSize)) {
-      val thisBlockSize = math.min(BlockSize, byteArray.length - i)
-      var tempByteArray = new Array[Byte](thisBlockSize)
-      val hasRead = bais.read(tempByteArray, 0, thisBlockSize)
-
-      retVal(blockID) = new BroadcastBlock(blockID, tempByteArray)
-      blockID += 1
-    }
-    bais.close()
-
-    var variableInfo = VariableInfo(retVal, blockNum, byteArray.length)
-    variableInfo.hasBlocks = blockNum
-
-    return variableInfo
-  }
-
-  // Helper method to convert Array[BroadcastBlock] to object
-  def unBlockifyObject[OUT](arrayOfBlocks: Array[BroadcastBlock],
-                            totalBytes: Int, 
-                            totalBlocks: Int): OUT = {
-
-    var retByteArray = new Array[Byte](totalBytes)
-    for (i <- 0 until totalBlocks) {
-      System.arraycopy(arrayOfBlocks(i).byteArray, 0, retByteArray,
-        i * BlockSize, arrayOfBlocks(i).byteArray.length)
-    }
-    byteArrayToObject(retByteArray)
-  }
-
-  private def byteArrayToObject[OUT](bytes: Array[Byte]): OUT = {
-    val in = new ObjectInputStream (new ByteArrayInputStream (bytes)){
-      override def resolveClass(desc: ObjectStreamClass) =
-        Class.forName(desc.getName, false, Thread.currentThread.getContextClassLoader)
-    }    
-    val retVal = in.readObject.asInstanceOf[OUT]
-    in.close()
-    return retVal
-  }
-}
-
-private[spark] case class BroadcastBlock(blockID: Int, byteArray: Array[Byte]) 
-extends Serializable
-
-private[spark] case class VariableInfo(@transient arrayOfBlocks : Array[BroadcastBlock],
-                        totalBlocks: Int, 
-                        totalBytes: Int) 
-extends Serializable {
- @transient var hasBlocks = 0 
-}
diff --git a/core/src/main/scala/org/apache/spark/broadcast/SourceInfo.scala b/core/src/main/scala/org/apache/spark/broadcast/SourceInfo.scala
deleted file mode 100644
index baa1fd6da46e8..0000000000000
--- a/core/src/main/scala/org/apache/spark/broadcast/SourceInfo.scala
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.broadcast
-
-import java.util.BitSet
-
-import org.apache.spark._
-
-/**
- * Used to keep and pass around information of peers involved in a broadcast
- */
-private[spark] case class SourceInfo (hostAddress: String,
-                       listenPort: Int,
-                       totalBlocks: Int = SourceInfo.UnusedParam,
-                       totalBytes: Int = SourceInfo.UnusedParam)
-extends Comparable[SourceInfo] with Logging {
-
-  var currentLeechers = 0
-  var receptionFailed = false
-
-  var hasBlocks = 0
-  var hasBlocksBitVector: BitSet = new BitSet (totalBlocks)
-
-  // Ascending sort based on leecher count
-  def compareTo (o: SourceInfo): Int = (currentLeechers - o.currentLeechers)
-}
-
-/**
- * Helper Object of SourceInfo for its constants
- */
-private[spark] object SourceInfo {
-  // Broadcast has not started yet! Should never happen.
-  val TxNotStartedRetry = -1
-  // Broadcast has already finished. Try default mechanism.
-  val TxOverGoToDefault = -3
-  // Other constants
-  val StopBroadcast = -2
-  val UnusedParam = 0
-}
diff --git a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
index 073a0a50297d0..d351dfc1f56a2 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
@@ -83,13 +83,13 @@ extends Broadcast[T](id) with Logging with Serializable {
         case None =>
           val start = System.nanoTime
           logInfo("Started reading broadcast variable " + id)
-          
+
           // Initialize @transient variables that will receive garbage values from the master.
           resetWorkerVariables()
 
           if (receiveBroadcast(id)) {
             value_ = TorrentBroadcast.unBlockifyObject[T](arrayOfBlocks, totalBytes, totalBlocks)
-            
+
             // Store the merged copy in cache so that the next worker doesn't need to rebuild it.
             // This creates a tradeoff between memory usage and latency.
             // Storing copy doubles the memory footprint; not storing doubles deserialization cost.
@@ -122,14 +122,14 @@ extends Broadcast[T](id) with Logging with Serializable {
     while (attemptId > 0 && totalBlocks == -1) {
       TorrentBroadcast.synchronized {
         SparkEnv.get.blockManager.getSingle(metaId) match {
-          case Some(x) => 
+          case Some(x) =>
             val tInfo = x.asInstanceOf[TorrentInfo]
             totalBlocks = tInfo.totalBlocks
             totalBytes = tInfo.totalBytes
             arrayOfBlocks = new Array[TorrentBlock](totalBlocks)
             hasBlocks = 0
-          
-          case None => 
+
+          case None =>
             Thread.sleep(500)
         }
       }
@@ -145,13 +145,13 @@ extends Broadcast[T](id) with Logging with Serializable {
       val pieceId = BroadcastHelperBlockId(broadcastId, "piece" + pid)
       TorrentBroadcast.synchronized {
         SparkEnv.get.blockManager.getSingle(pieceId) match {
-          case Some(x) => 
+          case Some(x) =>
             arrayOfBlocks(pid) = x.asInstanceOf[TorrentBlock]
             hasBlocks += 1
             SparkEnv.get.blockManager.putSingle(
               pieceId, arrayOfBlocks(pid), StorageLevel.MEMORY_AND_DISK, true)
-          
-          case None => 
+
+          case None =>
             throw new SparkException("Failed to get " + pieceId + " of " + broadcastId)
         }
       }
@@ -166,21 +166,22 @@ private object TorrentBroadcast
 extends Logging {
 
   private var initialized = false
-
-  def initialize(_isDriver: Boolean) {
+  private var conf: SparkConf = null
+  def initialize(_isDriver: Boolean, conf: SparkConf) {
+    TorrentBroadcast.conf = conf //TODO: we might have to fix it in tests
     synchronized {
       if (!initialized) {
         initialized = true
       }
     }
   }
-  
+
   def stop() {
     initialized = false
   }
 
-  val BLOCK_SIZE = System.getProperty("spark.broadcast.blockSize", "4096").toInt * 1024
-  
+  lazy val BLOCK_SIZE = conf.getInt("spark.broadcast.blockSize", 4096) * 1024
+
   def blockifyObject[T](obj: T): TorrentInfo = {
     val byteArray = Utils.serialize[T](obj)
     val bais = new ByteArrayInputStream(byteArray)
@@ -202,16 +203,16 @@ extends Logging {
     }
     bais.close()
 
-    var tInfo = TorrentInfo(retVal, blockNum, byteArray.length)
+    val tInfo = TorrentInfo(retVal, blockNum, byteArray.length)
     tInfo.hasBlocks = blockNum
 
-    return tInfo
+    tInfo
   }
 
   def unBlockifyObject[T](arrayOfBlocks: Array[TorrentBlock],
-                            totalBytes: Int, 
+                            totalBytes: Int,
                             totalBlocks: Int): T = {
-    var retByteArray = new Array[Byte](totalBytes)
+    val retByteArray = new Array[Byte](totalBytes)
     for (i <- 0 until totalBlocks) {
       System.arraycopy(arrayOfBlocks(i).byteArray, 0, retByteArray,
         i * BLOCK_SIZE, arrayOfBlocks(i).byteArray.length)
@@ -222,23 +223,25 @@ extends Logging {
 }
 
 private[spark] case class TorrentBlock(
-    blockID: Int, 
-    byteArray: Array[Byte]) 
+    blockID: Int,
+    byteArray: Array[Byte])
   extends Serializable
 
 private[spark] case class TorrentInfo(
     @transient arrayOfBlocks : Array[TorrentBlock],
-    totalBlocks: Int, 
-    totalBytes: Int) 
+    totalBlocks: Int,
+    totalBytes: Int)
   extends Serializable {
-  
-  @transient var hasBlocks = 0 
+
+  @transient var hasBlocks = 0
 }
 
-private[spark] class TorrentBroadcastFactory
-  extends BroadcastFactory {
-  
-  def initialize(isDriver: Boolean) { TorrentBroadcast.initialize(isDriver) }
+/**
+ * A [[BroadcastFactory]] that creates a torrent-based implementation of broadcast.
+ */
+class TorrentBroadcastFactory extends BroadcastFactory {
+
+  def initialize(isDriver: Boolean, conf: SparkConf) { TorrentBroadcast.initialize(isDriver, conf) }
 
   def newBroadcast[T](value_ : T, isLocal: Boolean, id: Long) =
     new TorrentBroadcast[T](value_, isLocal, id)
diff --git a/core/src/main/scala/org/apache/spark/broadcast/TreeBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/TreeBroadcast.scala
deleted file mode 100644
index 84ff6abf5d505..0000000000000
--- a/core/src/main/scala/org/apache/spark/broadcast/TreeBroadcast.scala
+++ /dev/null
@@ -1,604 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.broadcast
-
-import java.io._
-import java.net._
-
-import scala.collection.mutable.{ListBuffer, Set}
-
-import org.apache.spark._
-import org.apache.spark.storage.{BroadcastBlockId, StorageLevel}
-import org.apache.spark.util.Utils
-
-@deprecated("Use TorrentBroadcast", "0.8.1")
-private[spark] class TreeBroadcast[T](@transient var value_ : T, isLocal: Boolean, id: Long)
-extends Broadcast[T](id) with Logging with Serializable {
-
-  logWarning("BitTorrentBroadcast is deprecated. Use TorrentBroadcast.")
-
-  def value = value_
-
-  def blockId = BroadcastBlockId(id)
-
-  MultiTracker.synchronized {
-    SparkEnv.get.blockManager.putSingle(blockId, value_, StorageLevel.MEMORY_AND_DISK, false)
-  }
-
-  @transient var arrayOfBlocks: Array[BroadcastBlock] = null
-  @transient var totalBytes = -1
-  @transient var totalBlocks = -1
-  @transient var hasBlocks = 0
-
-  @transient var listenPortLock = new Object
-  @transient var guidePortLock = new Object
-  @transient var totalBlocksLock = new Object
-  @transient var hasBlocksLock = new Object
-
-  @transient var listOfSources = ListBuffer[SourceInfo]()
-
-  @transient var serveMR: ServeMultipleRequests = null
-  @transient var guideMR: GuideMultipleRequests = null
-
-  @transient var hostAddress = Utils.localIpAddress
-  @transient var listenPort = -1
-  @transient var guidePort = -1
-
-  @transient var stopBroadcast = false
-
-  // Must call this after all the variables have been created/initialized
-  if (!isLocal) {
-    sendBroadcast()
-  }
-
-  def sendBroadcast() {
-    logInfo("Local host address: " + hostAddress)
-
-    // Create a variableInfo object and store it in valueInfos
-    var variableInfo = MultiTracker.blockifyObject(value_)
-
-    // Prepare the value being broadcasted
-    arrayOfBlocks = variableInfo.arrayOfBlocks
-    totalBytes = variableInfo.totalBytes
-    totalBlocks = variableInfo.totalBlocks
-    hasBlocks = variableInfo.totalBlocks
-
-    guideMR = new GuideMultipleRequests
-    guideMR.setDaemon(true)
-    guideMR.start()
-    logInfo("GuideMultipleRequests started...")
-
-    // Must always come AFTER guideMR is created
-    while (guidePort == -1) {
-      guidePortLock.synchronized { guidePortLock.wait() }
-    }
-
-    serveMR = new ServeMultipleRequests
-    serveMR.setDaemon(true)
-    serveMR.start()
-    logInfo("ServeMultipleRequests started...")
-
-    // Must always come AFTER serveMR is created
-    while (listenPort == -1) {
-      listenPortLock.synchronized { listenPortLock.wait() }
-    }
-
-    // Must always come AFTER listenPort is created
-    val masterSource =
-      SourceInfo(hostAddress, listenPort, totalBlocks, totalBytes)
-    listOfSources += masterSource
-
-    // Register with the Tracker
-    MultiTracker.registerBroadcast(id,
-      SourceInfo(hostAddress, guidePort, totalBlocks, totalBytes))
-  }
-
-  private def readObject(in: ObjectInputStream) {
-    in.defaultReadObject()
-    MultiTracker.synchronized {
-      SparkEnv.get.blockManager.getSingle(blockId) match {
-        case Some(x) =>
-          value_ = x.asInstanceOf[T]
-
-        case None =>
-          logInfo("Started reading broadcast variable " + id)
-          // Initializing everything because Driver will only send null/0 values
-          // Only the 1st worker in a node can be here. Others will get from cache
-          initializeWorkerVariables()
-
-          logInfo("Local host address: " + hostAddress)
-
-          serveMR = new ServeMultipleRequests
-          serveMR.setDaemon(true)
-          serveMR.start()
-          logInfo("ServeMultipleRequests started...")
-
-          val start = System.nanoTime
-
-          val receptionSucceeded = receiveBroadcast(id)
-          if (receptionSucceeded) {
-            value_ = MultiTracker.unBlockifyObject[T](arrayOfBlocks, totalBytes, totalBlocks)
-            SparkEnv.get.blockManager.putSingle(
-              blockId, value_, StorageLevel.MEMORY_AND_DISK, false)
-          }  else {
-            logError("Reading broadcast variable " + id + " failed")
-          }
-
-          val time = (System.nanoTime - start) / 1e9
-          logInfo("Reading broadcast variable " + id + " took " + time + " s")
-      }
-    }
-  }
-
-  private def initializeWorkerVariables() {
-    arrayOfBlocks = null
-    totalBytes = -1
-    totalBlocks = -1
-    hasBlocks = 0
-
-    listenPortLock = new Object
-    totalBlocksLock = new Object
-    hasBlocksLock = new Object
-
-    serveMR =  null
-
-    hostAddress = Utils.localIpAddress
-    listenPort = -1
-
-    stopBroadcast = false
-  }
-
-  def receiveBroadcast(variableID: Long): Boolean = {
-    val gInfo = MultiTracker.getGuideInfo(variableID)
-    
-    if (gInfo.listenPort == SourceInfo.TxOverGoToDefault) {
-      return false
-    }
-
-    // Wait until hostAddress and listenPort are created by the
-    // ServeMultipleRequests thread
-    while (listenPort == -1) {
-      listenPortLock.synchronized { listenPortLock.wait() }
-    }
-
-    var clientSocketToDriver: Socket = null
-    var oosDriver: ObjectOutputStream = null
-    var oisDriver: ObjectInputStream = null
-
-    // Connect and receive broadcast from the specified source, retrying the
-    // specified number of times in case of failures
-    var retriesLeft = MultiTracker.MaxRetryCount
-    do {
-      // Connect to Driver and send this worker's Information
-      clientSocketToDriver = new Socket(MultiTracker.DriverHostAddress, gInfo.listenPort)
-      oosDriver = new ObjectOutputStream(clientSocketToDriver.getOutputStream)
-      oosDriver.flush()
-      oisDriver = new ObjectInputStream(clientSocketToDriver.getInputStream)
-
-      logDebug("Connected to Driver's guiding object")
-
-      // Send local source information
-      oosDriver.writeObject(SourceInfo(hostAddress, listenPort))
-      oosDriver.flush()
-
-      // Receive source information from Driver
-      var sourceInfo = oisDriver.readObject.asInstanceOf[SourceInfo]
-      totalBlocks = sourceInfo.totalBlocks
-      arrayOfBlocks = new Array[BroadcastBlock](totalBlocks)
-      totalBlocksLock.synchronized { totalBlocksLock.notifyAll() }
-      totalBytes = sourceInfo.totalBytes
-
-      logDebug("Received SourceInfo from Driver:" + sourceInfo + " My Port: " + listenPort)
-
-      val start = System.nanoTime
-      val receptionSucceeded = receiveSingleTransmission(sourceInfo)
-      val time = (System.nanoTime - start) / 1e9
-
-      // Updating some statistics in sourceInfo. Driver will be using them later
-      if (!receptionSucceeded) {
-        sourceInfo.receptionFailed = true
-      }
-
-      // Send back statistics to the Driver
-      oosDriver.writeObject(sourceInfo)
-
-      if (oisDriver != null) {
-        oisDriver.close()
-      }
-      if (oosDriver != null) {
-        oosDriver.close()
-      }
-      if (clientSocketToDriver != null) {
-        clientSocketToDriver.close()
-      }
-
-      retriesLeft -= 1
-    } while (retriesLeft > 0 && hasBlocks < totalBlocks)
-
-    return (hasBlocks == totalBlocks)
-  }
-
-  /**
-   * Tries to receive broadcast from the source and returns Boolean status.
-   * This might be called multiple times to retry a defined number of times.
-   */
-  private def receiveSingleTransmission(sourceInfo: SourceInfo): Boolean = {
-    var clientSocketToSource: Socket = null
-    var oosSource: ObjectOutputStream = null
-    var oisSource: ObjectInputStream = null
-
-    var receptionSucceeded = false
-    try {
-      // Connect to the source to get the object itself
-      clientSocketToSource = new Socket(sourceInfo.hostAddress, sourceInfo.listenPort)
-      oosSource = new ObjectOutputStream(clientSocketToSource.getOutputStream)
-      oosSource.flush()
-      oisSource = new ObjectInputStream(clientSocketToSource.getInputStream)
-
-      logDebug("Inside receiveSingleTransmission")
-      logDebug("totalBlocks: "+ totalBlocks + " " + "hasBlocks: " + hasBlocks)
-
-      // Send the range
-      oosSource.writeObject((hasBlocks, totalBlocks))
-      oosSource.flush()
-
-      for (i <- hasBlocks until totalBlocks) {
-        val recvStartTime = System.currentTimeMillis
-        val bcBlock = oisSource.readObject.asInstanceOf[BroadcastBlock]
-        val receptionTime = (System.currentTimeMillis - recvStartTime)
-
-        logDebug("Received block: " + bcBlock.blockID + " from " + sourceInfo + " in " + receptionTime + " millis.")
-
-        arrayOfBlocks(hasBlocks) = bcBlock
-        hasBlocks += 1
-        
-        // Set to true if at least one block is received
-        receptionSucceeded = true
-        hasBlocksLock.synchronized { hasBlocksLock.notifyAll() }
-      }
-    } catch {
-      case e: Exception => logError("receiveSingleTransmission had a " + e)
-    } finally {
-      if (oisSource != null) {
-        oisSource.close()
-      }
-      if (oosSource != null) {
-        oosSource.close()
-      }
-      if (clientSocketToSource != null) {
-        clientSocketToSource.close()
-      }
-    }
-
-    return receptionSucceeded
-  }
-
-  class GuideMultipleRequests
-  extends Thread with Logging {
-    // Keep track of sources that have completed reception
-    private var setOfCompletedSources = Set[SourceInfo]()
-
-    override def run() {
-      var threadPool = Utils.newDaemonCachedThreadPool("Tree broadcast guide multiple requests")
-      var serverSocket: ServerSocket = null
-
-      serverSocket = new ServerSocket(0)
-      guidePort = serverSocket.getLocalPort
-      logInfo("GuideMultipleRequests => " + serverSocket + " " + guidePort)
-
-      guidePortLock.synchronized { guidePortLock.notifyAll() }
-
-      try {
-        while (!stopBroadcast) {
-          var clientSocket: Socket = null
-          try {
-            serverSocket.setSoTimeout(MultiTracker.ServerSocketTimeout)
-            clientSocket = serverSocket.accept
-          } catch {
-            case e: Exception => {
-              // Stop broadcast if at least one worker has connected and
-              // everyone connected so far are done. Comparing with
-              // listOfSources.size - 1, because it includes the Guide itself
-              listOfSources.synchronized {
-                setOfCompletedSources.synchronized {
-                  if (listOfSources.size > 1 &&
-                    setOfCompletedSources.size == listOfSources.size - 1) {
-                    stopBroadcast = true
-                    logInfo("GuideMultipleRequests Timeout. stopBroadcast == true.")
-                  }
-                }
-              }
-            }
-          }
-          if (clientSocket != null) {
-            logDebug("Guide: Accepted new client connection: " + clientSocket)
-            try {
-              threadPool.execute(new GuideSingleRequest(clientSocket))
-            } catch {
-              // In failure, close() the socket here; else, the thread will close() it
-              case ioe: IOException => clientSocket.close()
-            }
-          }
-        }
-
-        logInfo("Sending stopBroadcast notifications...")
-        sendStopBroadcastNotifications
-
-        MultiTracker.unregisterBroadcast(id)
-      } finally {
-        if (serverSocket != null) {
-          logInfo("GuideMultipleRequests now stopping...")
-          serverSocket.close()
-        }
-      }
-      // Shutdown the thread pool
-      threadPool.shutdown()
-    }
-
-    private def sendStopBroadcastNotifications() {
-      listOfSources.synchronized {
-        var listIter = listOfSources.iterator
-        while (listIter.hasNext) {
-          var sourceInfo = listIter.next
-
-          var guideSocketToSource: Socket = null
-          var gosSource: ObjectOutputStream = null
-          var gisSource: ObjectInputStream = null
-
-          try {
-            // Connect to the source
-            guideSocketToSource = new Socket(sourceInfo.hostAddress, sourceInfo.listenPort)
-            gosSource = new ObjectOutputStream(guideSocketToSource.getOutputStream)
-            gosSource.flush()
-            gisSource = new ObjectInputStream(guideSocketToSource.getInputStream)
-
-            // Send stopBroadcast signal
-            gosSource.writeObject((SourceInfo.StopBroadcast, SourceInfo.StopBroadcast))
-            gosSource.flush()
-          } catch {
-            case e: Exception => {
-              logError("sendStopBroadcastNotifications had a " + e)
-            }
-          } finally {
-            if (gisSource != null) {
-              gisSource.close()
-            }
-            if (gosSource != null) {
-              gosSource.close()
-            }
-            if (guideSocketToSource != null) {
-              guideSocketToSource.close()
-            }
-          }
-        }
-      }
-    }
-
-    class GuideSingleRequest(val clientSocket: Socket)
-    extends Thread with Logging {
-      private val oos = new ObjectOutputStream(clientSocket.getOutputStream)
-      oos.flush()
-      private val ois = new ObjectInputStream(clientSocket.getInputStream)
-
-      private var selectedSourceInfo: SourceInfo = null
-      private var thisWorkerInfo:SourceInfo = null
-
-      override def run() {
-        try {
-          logInfo("new GuideSingleRequest is running")
-          // Connecting worker is sending in its hostAddress and listenPort it will
-          // be listening to. Other fields are invalid (SourceInfo.UnusedParam)
-          var sourceInfo = ois.readObject.asInstanceOf[SourceInfo]
-
-          listOfSources.synchronized {
-            // Select a suitable source and send it back to the worker
-            selectedSourceInfo = selectSuitableSource(sourceInfo)
-            logDebug("Sending selectedSourceInfo: " + selectedSourceInfo)
-            oos.writeObject(selectedSourceInfo)
-            oos.flush()
-
-            // Add this new (if it can finish) source to the list of sources
-            thisWorkerInfo = SourceInfo(sourceInfo.hostAddress,
-              sourceInfo.listenPort, totalBlocks, totalBytes)
-            logDebug("Adding possible new source to listOfSources: " + thisWorkerInfo)
-            listOfSources += thisWorkerInfo
-          }
-
-          // Wait till the whole transfer is done. Then receive and update source
-          // statistics in listOfSources
-          sourceInfo = ois.readObject.asInstanceOf[SourceInfo]
-
-          listOfSources.synchronized {
-            // This should work since SourceInfo is a case class
-            assert(listOfSources.contains(selectedSourceInfo))
-
-            // Remove first 
-            // (Currently removing a source based on just one failure notification!)
-            listOfSources = listOfSources - selectedSourceInfo
-
-            // Update sourceInfo and put it back in, IF reception succeeded
-            if (!sourceInfo.receptionFailed) {
-              // Add thisWorkerInfo to sources that have completed reception
-              setOfCompletedSources.synchronized {
-                setOfCompletedSources += thisWorkerInfo
-              }
-
-              // Update leecher count and put it back in 
-              selectedSourceInfo.currentLeechers -= 1
-              listOfSources += selectedSourceInfo
-            }
-          }
-        } catch {
-          case e: Exception => {
-            // Remove failed worker from listOfSources and update leecherCount of
-            // corresponding source worker
-            listOfSources.synchronized {
-              if (selectedSourceInfo != null) {
-                // Remove first
-                listOfSources = listOfSources - selectedSourceInfo
-                // Update leecher count and put it back in
-                selectedSourceInfo.currentLeechers -= 1
-                listOfSources += selectedSourceInfo
-              }
-
-              // Remove thisWorkerInfo
-              if (listOfSources != null) {
-                listOfSources = listOfSources - thisWorkerInfo
-              }
-            }
-          }
-        } finally {
-          logInfo("GuideSingleRequest is closing streams and sockets")
-          ois.close()
-          oos.close()
-          clientSocket.close()
-        }
-      }
-
-      // Assuming the caller to have a synchronized block on listOfSources
-      // Select one with the most leechers. This will level-wise fill the tree
-      private def selectSuitableSource(skipSourceInfo: SourceInfo): SourceInfo = {
-        var maxLeechers = -1
-        var selectedSource: SourceInfo = null
-
-        listOfSources.foreach { source =>
-          if ((source.hostAddress != skipSourceInfo.hostAddress || 
-               source.listenPort != skipSourceInfo.listenPort) && 
-            source.currentLeechers < MultiTracker.MaxDegree &&
-            source.currentLeechers > maxLeechers) {
-              selectedSource = source
-              maxLeechers = source.currentLeechers
-            }
-        }
-
-        // Update leecher count
-        selectedSource.currentLeechers += 1
-        return selectedSource
-      }
-    }
-  }
-
-  class ServeMultipleRequests
-  extends Thread with Logging {
-    
-    var threadPool = Utils.newDaemonCachedThreadPool("Tree broadcast serve multiple requests")
-    
-    override def run() {      
-      var serverSocket = new ServerSocket(0)
-      listenPort = serverSocket.getLocalPort
-      
-      logInfo("ServeMultipleRequests started with " + serverSocket)
-
-      listenPortLock.synchronized { listenPortLock.notifyAll() }
-
-      try {
-        while (!stopBroadcast) {
-          var clientSocket: Socket = null
-          try {
-            serverSocket.setSoTimeout(MultiTracker.ServerSocketTimeout)
-            clientSocket = serverSocket.accept
-          } catch {
-            case e: Exception => { }
-          }
-          
-          if (clientSocket != null) {
-            logDebug("Serve: Accepted new client connection: " + clientSocket)
-            try {
-              threadPool.execute(new ServeSingleRequest(clientSocket))
-            } catch {
-              // In failure, close socket here; else, the thread will close it
-              case ioe: IOException => clientSocket.close()
-            }
-          }
-        }
-      } finally {
-        if (serverSocket != null) {
-          logInfo("ServeMultipleRequests now stopping...")
-          serverSocket.close()
-        }
-      }
-      // Shutdown the thread pool
-      threadPool.shutdown()
-    }
-
-    class ServeSingleRequest(val clientSocket: Socket)
-    extends Thread with Logging {
-      private val oos = new ObjectOutputStream(clientSocket.getOutputStream)
-      oos.flush()
-      private val ois = new ObjectInputStream(clientSocket.getInputStream)
-
-      private var sendFrom = 0
-      private var sendUntil = totalBlocks
-
-      override def run() {
-        try {
-          logInfo("new ServeSingleRequest is running")
-
-          // Receive range to send
-          var rangeToSend = ois.readObject.asInstanceOf[(Int, Int)]
-          sendFrom = rangeToSend._1
-          sendUntil = rangeToSend._2
-
-          // If not a valid range, stop broadcast
-          if (sendFrom == SourceInfo.StopBroadcast && sendUntil == SourceInfo.StopBroadcast) {
-            stopBroadcast = true
-          } else {
-            sendObject
-          }
-        } catch {
-          case e: Exception => logError("ServeSingleRequest had a " + e)
-        } finally {
-          logInfo("ServeSingleRequest is closing streams and sockets")
-          ois.close()
-          oos.close()
-          clientSocket.close()
-        }
-      }
-
-      private def sendObject() {
-        // Wait till receiving the SourceInfo from Driver
-        while (totalBlocks == -1) {
-          totalBlocksLock.synchronized { totalBlocksLock.wait() }
-        }
-
-        for (i <- sendFrom until sendUntil) {
-          while (i == hasBlocks) {
-            hasBlocksLock.synchronized { hasBlocksLock.wait() }
-          }
-          try {
-            oos.writeObject(arrayOfBlocks(i))
-            oos.flush()
-          } catch {
-            case e: Exception => logError("sendObject had a " + e)
-          }
-          logDebug("Sent block: " + i + " to " + clientSocket)
-        }
-      }
-    }
-  }
-}
-
-private[spark] class TreeBroadcastFactory
-extends BroadcastFactory {
-  def initialize(isDriver: Boolean) { MultiTracker.initialize(isDriver) }
-
-  def newBroadcast[T](value_ : T, isLocal: Boolean, id: Long) =
-    new TreeBroadcast[T](value_, isLocal, id)
-
-  def stop() { MultiTracker.stop() }
-}
diff --git a/core/src/main/scala/org/apache/spark/broadcast/package.scala b/core/src/main/scala/org/apache/spark/broadcast/package.scala
new file mode 100644
index 0000000000000..01bf88629a7dd
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/broadcast/package.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+/**
+ * Package for broadcast variables. See [[broadcast.Broadcast]] for details.
+ */
+package object broadcast {
+  // For package docs only
+}
diff --git a/core/src/main/scala/org/apache/spark/deploy/ApplicationDescription.scala b/core/src/main/scala/org/apache/spark/deploy/ApplicationDescription.scala
index 19d393a0dbce7..e38459b883b66 100644
--- a/core/src/main/scala/org/apache/spark/deploy/ApplicationDescription.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/ApplicationDescription.scala
@@ -19,7 +19,7 @@ package org.apache.spark.deploy
 
 private[spark] class ApplicationDescription(
     val name: String,
-    val maxCores: Int, /* Integer.MAX_VALUE denotes an unlimited number of cores */
+    val maxCores: Option[Int],
     val memoryPerSlave: Int,
     val command: Command,
     val sparkHome: String,
diff --git a/core/src/main/scala/org/apache/spark/deploy/Client.scala b/core/src/main/scala/org/apache/spark/deploy/Client.scala
new file mode 100644
index 0000000000000..9987e2300ceb7
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/Client.scala
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy
+
+import scala.collection.JavaConversions._
+import scala.collection.mutable.Map
+import scala.concurrent._
+
+import akka.actor._
+import akka.pattern.ask
+import org.apache.log4j.{Level, Logger}
+
+import org.apache.spark.{Logging, SparkConf}
+import org.apache.spark.deploy.DeployMessages._
+import org.apache.spark.deploy.master.{DriverState, Master}
+import org.apache.spark.util.{AkkaUtils, Utils}
+import akka.remote.{AssociationErrorEvent, DisassociatedEvent, RemotingLifecycleEvent}
+
+/**
+ * Proxy that relays messages to the driver.
+ */
+private class ClientActor(driverArgs: ClientArguments, conf: SparkConf) extends Actor with Logging {
+  var masterActor: ActorSelection = _
+  val timeout = AkkaUtils.askTimeout(conf)
+
+  override def preStart() = {
+    masterActor = context.actorSelection(Master.toAkkaUrl(driverArgs.master))
+
+    context.system.eventStream.subscribe(self, classOf[RemotingLifecycleEvent])
+
+    println(s"Sending ${driverArgs.cmd} command to ${driverArgs.master}")
+
+    driverArgs.cmd match {
+      case "launch" =>
+        // TODO: We could add an env variable here and intercept it in `sc.addJar` that would
+        //       truncate filesystem paths similar to what YARN does. For now, we just require
+        //       people call `addJar` assuming the jar is in the same directory.
+        val env = Map[String, String]()
+        System.getenv().foreach{case (k, v) => env(k) = v}
+
+        val mainClass = "org.apache.spark.deploy.worker.DriverWrapper"
+        val command = new Command(mainClass, Seq("{{WORKER_URL}}", driverArgs.mainClass) ++
+          driverArgs.driverOptions, env)
+
+        val driverDescription = new DriverDescription(
+          driverArgs.jarUrl,
+          driverArgs.memory,
+          driverArgs.cores,
+          driverArgs.supervise,
+          command)
+
+        masterActor ! RequestSubmitDriver(driverDescription)
+
+      case "kill" =>
+        val driverId = driverArgs.driverId
+        val killFuture = masterActor ! RequestKillDriver(driverId)
+    }
+  }
+
+  /* Find out driver status then exit the JVM */
+  def pollAndReportStatus(driverId: String) {
+    println(s"... waiting before polling master for driver state")
+    Thread.sleep(5000)
+    println("... polling master for driver state")
+    val statusFuture = (masterActor ? RequestDriverStatus(driverId))(timeout)
+      .mapTo[DriverStatusResponse]
+    val statusResponse = Await.result(statusFuture, timeout)
+
+    statusResponse.found match {
+      case false =>
+        println(s"ERROR: Cluster master did not recognize $driverId")
+        System.exit(-1)
+      case true =>
+        println(s"State of $driverId is ${statusResponse.state.get}")
+        // Worker node, if present
+        (statusResponse.workerId, statusResponse.workerHostPort, statusResponse.state) match {
+          case (Some(id), Some(hostPort), Some(DriverState.RUNNING)) =>
+            println(s"Driver running on $hostPort ($id)")
+          case _ =>
+        }
+        // Exception, if present
+        statusResponse.exception.map { e =>
+          println(s"Exception from cluster was: $e")
+          System.exit(-1)
+        }
+        System.exit(0)
+    }
+  }
+
+  override def receive = {
+
+    case SubmitDriverResponse(success, driverId, message) =>
+      println(message)
+      if (success) pollAndReportStatus(driverId.get) else System.exit(-1)
+
+    case KillDriverResponse(driverId, success, message) =>
+      println(message)
+      if (success) pollAndReportStatus(driverId) else System.exit(-1)
+
+    case DisassociatedEvent(_, remoteAddress, _) =>
+      println(s"Error connecting to master ${driverArgs.master} ($remoteAddress), exiting.")
+      System.exit(-1)
+
+    case AssociationErrorEvent(cause, _, remoteAddress, _) =>
+      println(s"Error connecting to master ${driverArgs.master} ($remoteAddress), exiting.")
+      println(s"Cause was: $cause")
+      System.exit(-1)
+  }
+}
+
+/**
+ * Executable utility for starting and terminating drivers inside of a standalone cluster.
+ */
+object Client {
+  def main(args: Array[String]) {
+    val conf = new SparkConf()
+    val driverArgs = new ClientArguments(args)
+
+    if (!driverArgs.logLevel.isGreaterOrEqual(Level.WARN)) {
+      conf.set("spark.akka.logLifecycleEvents", "true")
+    }
+    conf.set("spark.akka.askTimeout", "10")
+    conf.set("akka.loglevel", driverArgs.logLevel.toString.replace("WARN", "WARNING"))
+    Logger.getRootLogger.setLevel(driverArgs.logLevel)
+
+    // TODO: See if we can initialize akka so return messages are sent back using the same TCP
+    //       flow. Else, this (sadly) requires the DriverClient be routable from the Master.
+    val (actorSystem, _) = AkkaUtils.createActorSystem(
+      "driverClient", Utils.localHostName(), 0, false, conf)
+
+    actorSystem.actorOf(Props(classOf[ClientActor], driverArgs, conf))
+
+    actorSystem.awaitTermination()
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala b/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala
new file mode 100644
index 0000000000000..13a0f1fa9b767
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy
+
+import scala.collection.mutable.ListBuffer
+
+import org.apache.log4j.Level
+
+/**
+ * Command-line parser for the driver client.
+ */
+private[spark] class ClientArguments(args: Array[String]) {
+  val defaultCores = 1
+  val defaultMemory = 512
+
+  var cmd: String = "" // 'launch' or 'kill'
+  var logLevel = Level.WARN
+
+  // launch parameters
+  var master: String = ""
+  var jarUrl: String = ""
+  var mainClass: String = ""
+  var supervise: Boolean = false
+  var memory: Int = defaultMemory
+  var cores: Int = defaultCores
+  private var _driverOptions = ListBuffer[String]()
+  def driverOptions = _driverOptions.toSeq
+
+  // kill parameters
+  var driverId: String = ""
+  
+  parse(args.toList)
+
+  def parse(args: List[String]): Unit = args match {
+    case ("--cores" | "-c") :: value :: tail =>
+      cores = value.toInt
+      parse(tail)
+
+    case ("--memory" | "-m") :: value :: tail =>
+      memory = value.toInt
+      parse(tail)
+
+    case ("--supervise" | "-s") :: tail =>
+      supervise = true
+      parse(tail)
+
+    case ("--help" | "-h") :: tail =>
+      printUsageAndExit(0)
+
+    case ("--verbose" | "-v") :: tail =>
+      logLevel = Level.INFO
+      parse(tail)
+
+    case "launch" :: _master :: _jarUrl :: _mainClass :: tail =>
+      cmd = "launch"
+
+      if (!ClientArguments.isValidJarUrl(_jarUrl)) {
+        println(s"Jar url '${_jarUrl}' is not in valid format.")
+        println(s"Must be a jar file path in URL format (e.g. hdfs://XX.jar, file://XX.jar)")
+        printUsageAndExit(-1)
+      }
+
+      jarUrl = _jarUrl
+      master = _master
+      mainClass = _mainClass
+      _driverOptions ++= tail
+
+    case "kill" :: _master :: _driverId :: tail =>
+      cmd = "kill"
+      master = _master
+      driverId = _driverId
+
+    case _ =>
+      printUsageAndExit(1)
+  }
+
+  /**
+   * Print usage and exit JVM with the given exit code.
+   */
+  def printUsageAndExit(exitCode: Int) {
+    // TODO: It wouldn't be too hard to allow users to submit their app and dependency jars
+    //       separately similar to in the YARN client.
+    val usage =
+      s"""
+        |Usage: DriverClient [options] launch <active-master> <jar-url> <main-class> [driver options]
+        |Usage: DriverClient kill <active-master> <driver-id>
+        |
+        |Options:
+        |   -c CORES, --cores CORES        Number of cores to request (default: $defaultCores)
+        |   -m MEMORY, --memory MEMORY     Megabytes of memory to request (default: $defaultMemory)
+        |   -s, --supervise                Whether to restart the driver on failure
+        |   -v, --verbose                  Print more debugging output
+      """.stripMargin
+    System.err.println(usage)
+    System.exit(exitCode)
+  }
+}
+
+object ClientArguments {
+  def isValidJarUrl(s: String) = s.matches("(.+):(.+)jar")
+}
diff --git a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
index 275331724afba..5e824e1a678b6 100644
--- a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
@@ -20,12 +20,12 @@ package org.apache.spark.deploy
 import scala.collection.immutable.List
 
 import org.apache.spark.deploy.ExecutorState.ExecutorState
-import org.apache.spark.deploy.master.{WorkerInfo, ApplicationInfo}
+import org.apache.spark.deploy.master.{ApplicationInfo, DriverInfo, WorkerInfo}
+import org.apache.spark.deploy.master.DriverState.DriverState
 import org.apache.spark.deploy.master.RecoveryState.MasterState
-import org.apache.spark.deploy.worker.ExecutorRunner
+import org.apache.spark.deploy.worker.{DriverRunner, ExecutorRunner}
 import org.apache.spark.util.Utils
 
-
 private[deploy] sealed trait DeployMessage extends Serializable
 
 /** Contains messages sent between Scheduler actor nodes. */
@@ -54,7 +54,14 @@ private[deploy] object DeployMessages {
       exitStatus: Option[Int])
     extends DeployMessage
 
-  case class WorkerSchedulerStateResponse(id: String, executors: List[ExecutorDescription])
+  case class DriverStateChanged(
+      driverId: String,
+      state: DriverState,
+      exception: Option[Exception])
+    extends DeployMessage
+
+  case class WorkerSchedulerStateResponse(id: String, executors: List[ExecutorDescription],
+     driverIds: Seq[String])
 
   case class Heartbeat(workerId: String) extends DeployMessage
 
@@ -76,14 +83,18 @@ private[deploy] object DeployMessages {
       sparkHome: String)
     extends DeployMessage
 
-  // Client to Master
+  case class LaunchDriver(driverId: String, driverDesc: DriverDescription) extends DeployMessage
+
+  case class KillDriver(driverId: String) extends DeployMessage
+
+  // AppClient to Master
 
   case class RegisterApplication(appDescription: ApplicationDescription)
     extends DeployMessage
 
   case class MasterChangeAcknowledged(appId: String)
 
-  // Master to Client
+  // Master to AppClient
 
   case class RegisteredApplication(appId: String, masterUrl: String) extends DeployMessage
 
@@ -97,11 +108,28 @@ private[deploy] object DeployMessages {
 
   case class ApplicationRemoved(message: String)
 
-  // Internal message in Client
+  // DriverClient <-> Master
+
+  case class RequestSubmitDriver(driverDescription: DriverDescription) extends DeployMessage
+
+  case class SubmitDriverResponse(success: Boolean, driverId: Option[String], message: String)
+    extends DeployMessage
+
+  case class RequestKillDriver(driverId: String) extends DeployMessage
+
+  case class KillDriverResponse(driverId: String, success: Boolean, message: String)
+    extends DeployMessage
+
+  case class RequestDriverStatus(driverId: String) extends DeployMessage
+
+  case class DriverStatusResponse(found: Boolean, state: Option[DriverState],
+    workerId: Option[String], workerHostPort: Option[String], exception: Option[Exception])
+
+  // Internal message in AppClient
 
-  case object StopClient
+  case object StopAppClient
 
-  // Master to Worker & Client
+  // Master to Worker & AppClient
 
   case class MasterChanged(masterUrl: String, masterWebUiUrl: String)
 
@@ -113,6 +141,7 @@ private[deploy] object DeployMessages {
 
   case class MasterStateResponse(host: String, port: Int, workers: Array[WorkerInfo],
     activeApps: Array[ApplicationInfo], completedApps: Array[ApplicationInfo],
+    activeDrivers: Array[DriverInfo], completedDrivers: Array[DriverInfo],
     status: MasterState) {
 
     Utils.checkHost(host, "Required hostname")
@@ -128,14 +157,15 @@ private[deploy] object DeployMessages {
   // Worker to WorkerWebUI
 
   case class WorkerStateResponse(host: String, port: Int, workerId: String,
-    executors: List[ExecutorRunner], finishedExecutors: List[ExecutorRunner], masterUrl: String,
+    executors: List[ExecutorRunner], finishedExecutors: List[ExecutorRunner],
+    drivers: List[DriverRunner], finishedDrivers: List[DriverRunner], masterUrl: String,
     cores: Int, memory: Int, coresUsed: Int, memoryUsed: Int, masterWebUiUrl: String) {
 
     Utils.checkHost(host, "Required hostname")
     assert (port > 0)
   }
 
-  // Actor System to Worker
+  // Liveness checks in various places
 
   case object SendHeartbeat
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/DriverDescription.scala b/core/src/main/scala/org/apache/spark/deploy/DriverDescription.scala
new file mode 100644
index 0000000000000..58c95dc4f9116
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/DriverDescription.scala
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy
+
+private[spark] class DriverDescription(
+    val jarUrl: String,
+    val mem: Int,
+    val cores: Int,
+    val supervise: Boolean,
+    val command: Command)
+  extends Serializable {
+
+  override def toString: String = s"DriverDescription (${command.mainClass})"
+}
diff --git a/core/src/main/scala/org/apache/spark/deploy/ExecutorState.scala b/core/src/main/scala/org/apache/spark/deploy/ExecutorState.scala
index fcfea96ad60b8..37dfa7fec0831 100644
--- a/core/src/main/scala/org/apache/spark/deploy/ExecutorState.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/ExecutorState.scala
@@ -17,8 +17,7 @@
 
 package org.apache.spark.deploy
 
-private[spark] object ExecutorState
-  extends Enumeration("LAUNCHING", "LOADING", "RUNNING", "KILLED", "FAILED", "LOST") {
+private[spark] object ExecutorState extends Enumeration {
 
   val LAUNCHING, LOADING, RUNNING, KILLED, FAILED, LOST = Value
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala b/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala
index 0aa8852649e05..4dfb19ed8adb6 100644
--- a/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala
@@ -190,7 +190,7 @@ private[spark] object FaultToleranceTest extends App with Logging {
   /** Creates a SparkContext, which constructs a Client to interact with our cluster. */
   def createClient() = {
     if (sc != null) { sc.stop() }
-    // Counter-hack: Because of a hack in SparkEnv#createFromSystemProperties() that changes this
+    // Counter-hack: Because of a hack in SparkEnv#create() that changes this
     // property, we need to reset it.
     System.setProperty("spark.driver.port", "0")
     sc = new SparkContext(getMasterUrls(masters), "fault-tolerance", containerSparkHome)
@@ -417,4 +417,4 @@ private[spark] object Docker extends Logging {
     "docker ps -l -q".!(ProcessLogger(line => id = line))
     new DockerId(id)
   }
-}
\ No newline at end of file
+}
diff --git a/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala b/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala
index a724900943108..ffc0cb09032fb 100644
--- a/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala
@@ -22,7 +22,7 @@ import akka.actor.ActorSystem
 import org.apache.spark.deploy.worker.Worker
 import org.apache.spark.deploy.master.Master
 import org.apache.spark.util.Utils
-import org.apache.spark.Logging
+import org.apache.spark.{SparkConf, Logging}
 
 import scala.collection.mutable.ArrayBuffer
 
@@ -34,16 +34,17 @@ import scala.collection.mutable.ArrayBuffer
  */
 private[spark]
 class LocalSparkCluster(numWorkers: Int, coresPerWorker: Int, memoryPerWorker: Int) extends Logging {
-  
+
   private val localHostname = Utils.localHostName()
   private val masterActorSystems = ArrayBuffer[ActorSystem]()
   private val workerActorSystems = ArrayBuffer[ActorSystem]()
-  
+
   def start(): Array[String] = {
     logInfo("Starting a local Spark cluster with " + numWorkers + " workers.")
 
     /* Start the Master */
-    val (masterSystem, masterPort, _) = Master.startSystemAndActor(localHostname, 0, 0)
+    val conf = new SparkConf(false)
+    val (masterSystem, masterPort, _) = Master.startSystemAndActor(localHostname, 0, 0, conf)
     masterActorSystems += masterSystem
     val masterUrl = "spark://" + localHostname + ":" + masterPort
     val masters = Array(masterUrl)
@@ -55,16 +56,19 @@ class LocalSparkCluster(numWorkers: Int, coresPerWorker: Int, memoryPerWorker: I
       workerActorSystems += workerSystem
     }
 
-    return masters
+    masters
   }
 
   def stop() {
     logInfo("Shutting down local Spark cluster.")
     // Stop the workers before the master so they don't get upset that it disconnected
+    // TODO: In Akka 2.1.x, ActorSystem.awaitTermination hangs when you have remote actors!
+    //       This is unfortunate, but for now we just comment it out.
     workerActorSystems.foreach(_.shutdown())
-    workerActorSystems.foreach(_.awaitTermination())
-
+    //workerActorSystems.foreach(_.awaitTermination())
     masterActorSystems.foreach(_.shutdown())
-    masterActorSystems.foreach(_.awaitTermination())
+    //masterActorSystems.foreach(_.awaitTermination())
+    masterActorSystems.clear()
+    workerActorSystems.clear()
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
index fc1537f7963c4..b479225b45ee9 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -28,16 +28,15 @@ import org.apache.spark.{SparkContext, SparkException}
 /**
  * Contains util methods to interact with Hadoop from Spark.
  */
-private[spark]
 class SparkHadoopUtil {
   val conf = newConfiguration()
   UserGroupInformation.setConfiguration(conf)
 
   def runAsUser(user: String)(func: () => Unit) {
-    // if we are already running as the user intended there is no reason to do the doAs. It 
+    // if we are already running as the user intended there is no reason to do the doAs. It
     // will actually break secure HDFS access as it doesn't fill in the credentials. Also if
-    // the user is UNKNOWN then we shouldn't be creating a remote unknown user 
-    // (this is actually the path spark on yarn takes) since SPARK_USER is initialized only 
+    // the user is UNKNOWN then we shouldn't be creating a remote unknown user
+    // (this is actually the path spark on yarn takes) since SPARK_USER is initialized only
     // in SparkContext.
     val currentUser = Option(System.getProperty("user.name")).
       getOrElse(SparkContext.SPARK_UNKNOWN_USER)
@@ -67,11 +66,15 @@ class SparkHadoopUtil {
 }
 
 object SparkHadoopUtil {
+
   private val hadoop = {
-    val yarnMode = java.lang.Boolean.valueOf(System.getProperty("SPARK_YARN_MODE", System.getenv("SPARK_YARN_MODE")))
+    val yarnMode = java.lang.Boolean.valueOf(
+        System.getProperty("SPARK_YARN_MODE", System.getenv("SPARK_YARN_MODE")))
     if (yarnMode) {
       try {
-        Class.forName("org.apache.spark.deploy.yarn.YarnSparkHadoopUtil").newInstance.asInstanceOf[SparkHadoopUtil]
+        Class.forName("org.apache.spark.deploy.yarn.YarnSparkHadoopUtil")
+          .newInstance()
+          .asInstanceOf[SparkHadoopUtil]
       } catch {
        case th: Throwable => throw new SparkException("Unable to load YARN support", th)
       }
diff --git a/core/src/main/scala/org/apache/spark/deploy/client/Client.scala b/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala
similarity index 73%
rename from core/src/main/scala/org/apache/spark/deploy/client/Client.scala
rename to core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala
index 77422f61ec901..1415e2f3d1886 100644
--- a/core/src/main/scala/org/apache/spark/deploy/client/Client.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala
@@ -19,50 +19,50 @@ package org.apache.spark.deploy.client
 
 import java.util.concurrent.TimeoutException
 
+import scala.concurrent.Await
+import scala.concurrent.duration._
+
 import akka.actor._
-import akka.actor.Terminated
 import akka.pattern.ask
-import akka.util.Duration
-import akka.util.duration._
-import akka.remote.RemoteClientDisconnected
-import akka.remote.RemoteClientLifeCycleEvent
-import akka.remote.RemoteClientShutdown
-import akka.dispatch.Await
-
-import org.apache.spark.Logging
+import akka.remote.{AssociationErrorEvent, DisassociatedEvent, RemotingLifecycleEvent}
+
+import org.apache.spark.{Logging, SparkConf, SparkException}
 import org.apache.spark.deploy.{ApplicationDescription, ExecutorState}
 import org.apache.spark.deploy.DeployMessages._
 import org.apache.spark.deploy.master.Master
-
+import org.apache.spark.util.AkkaUtils
 
 /**
- * The main class used to talk to a Spark deploy cluster. Takes a master URL, an app description,
- * and a listener for cluster events, and calls back the listener when various events occur.
+ * Interface allowing applications to speak with a Spark deploy cluster. Takes a master URL,
+ * an app description, and a listener for cluster events, and calls back the listener when various
+ * events occur.
  *
  * @param masterUrls Each url should look like spark://host:port.
  */
-private[spark] class Client(
+private[spark] class AppClient(
     actorSystem: ActorSystem,
     masterUrls: Array[String],
     appDescription: ApplicationDescription,
-    listener: ClientListener)
+    listener: AppClientListener,
+    conf: SparkConf)
   extends Logging {
 
   val REGISTRATION_TIMEOUT = 20.seconds
   val REGISTRATION_RETRIES = 3
 
+  var masterAddress: Address = null
   var actor: ActorRef = null
   var appId: String = null
   var registered = false
   var activeMasterUrl: String = null
 
   class ClientActor extends Actor with Logging {
-    var master: ActorRef = null
-    var masterAddress: Address = null
+    var master: ActorSelection = null
     var alreadyDisconnected = false  // To avoid calling listener.disconnected() multiple times
     var alreadyDead = false  // To avoid calling listener.dead() multiple times
 
     override def preStart() {
+      context.system.eventStream.subscribe(self, classOf[RemotingLifecycleEvent])
       try {
         registerWithMaster()
       } catch {
@@ -76,7 +76,7 @@ private[spark] class Client(
     def tryRegisterAllMasters() {
       for (masterUrl <- masterUrls) {
         logInfo("Connecting to master " + masterUrl + "...")
-        val actor = context.actorFor(Master.toAkkaUrl(masterUrl))
+        val actor = context.actorSelection(Master.toAkkaUrl(masterUrl))
         actor ! RegisterApplication(appDescription)
       }
     }
@@ -84,6 +84,7 @@ private[spark] class Client(
     def registerWithMaster() {
       tryRegisterAllMasters()
 
+      import context.dispatcher
       var retries = 0
       lazy val retryTimer: Cancellable =
         context.system.scheduler.schedule(REGISTRATION_TIMEOUT, REGISTRATION_TIMEOUT) {
@@ -102,10 +103,19 @@ private[spark] class Client(
 
     def changeMaster(url: String) {
       activeMasterUrl = url
-      master = context.actorFor(Master.toAkkaUrl(url))
-      masterAddress = master.path.address
-      context.system.eventStream.subscribe(self, classOf[RemoteClientLifeCycleEvent])
-      context.watch(master)  // Doesn't work with remote actors, but useful for testing
+      master = context.actorSelection(Master.toAkkaUrl(activeMasterUrl))
+      masterAddress = activeMasterUrl match {
+        case Master.sparkUrlRegex(host, port) =>
+          Address("akka.tcp", Master.systemName, host, port.toInt)
+        case x =>
+          throw new SparkException("Invalid spark URL: " + x)
+      }
+    }
+
+    private def isPossibleMaster(remoteUrl: Address) = {
+      masterUrls.map(s => Master.toAkkaUrl(s))
+        .map(u => AddressFromURIString(u).hostPort)
+        .contains(remoteUrl.hostPort)
     }
 
     override def receive = {
@@ -135,24 +145,18 @@ private[spark] class Client(
 
       case MasterChanged(masterUrl, masterWebUiUrl) =>
         logInfo("Master has changed, new master is at " + masterUrl)
-        context.unwatch(master)
         changeMaster(masterUrl)
         alreadyDisconnected = false
         sender ! MasterChangeAcknowledged(appId)
 
-      case Terminated(actor_) if actor_ == master =>
-        logWarning("Connection to master failed; waiting for master to reconnect...")
-        markDisconnected()
-
-      case RemoteClientDisconnected(transport, address) if address == masterAddress =>
-        logWarning("Connection to master failed; waiting for master to reconnect...")
+      case DisassociatedEvent(_, address, _) if address == masterAddress =>
+        logWarning(s"Connection to $address failed; waiting for master to reconnect...")
         markDisconnected()
 
-      case RemoteClientShutdown(transport, address) if address == masterAddress =>
-        logWarning("Connection to master failed; waiting for master to reconnect...")
-        markDisconnected()
+      case AssociationErrorEvent(cause, _, address, _) if isPossibleMaster(address) =>
+        logWarning(s"Could not connect to $address: $cause")
 
-      case StopClient =>
+      case StopAppClient =>
         markDead()
         sender ! true
         context.stop(self)
@@ -184,8 +188,8 @@ private[spark] class Client(
   def stop() {
     if (actor != null) {
       try {
-        val timeout = Duration.create(System.getProperty("spark.akka.askTimeout", "10").toLong, "seconds")
-        val future = actor.ask(StopClient)(timeout)
+        val timeout = AkkaUtils.askTimeout(conf)
+        val future = actor.ask(StopAppClient)(timeout)
         Await.result(future, timeout)
       } catch {
         case e: TimeoutException =>
diff --git a/core/src/main/scala/org/apache/spark/deploy/client/ClientListener.scala b/core/src/main/scala/org/apache/spark/deploy/client/AppClientListener.scala
similarity index 97%
rename from core/src/main/scala/org/apache/spark/deploy/client/ClientListener.scala
rename to core/src/main/scala/org/apache/spark/deploy/client/AppClientListener.scala
index be7a11bd15537..55d4ef1b31aaa 100644
--- a/core/src/main/scala/org/apache/spark/deploy/client/ClientListener.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/client/AppClientListener.scala
@@ -24,7 +24,7 @@ package org.apache.spark.deploy.client
  *
  * Users of this API should *not* block inside the callback methods.
  */
-private[spark] trait ClientListener {
+private[spark] trait AppClientListener {
   def connected(appId: String): Unit
 
   /** Disconnection may be a temporary state, as we fail over to a new Master. */
diff --git a/core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala b/core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala
index 5b62d3ba6c0e0..ffa909c26b64a 100644
--- a/core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala
@@ -18,12 +18,12 @@
 package org.apache.spark.deploy.client
 
 import org.apache.spark.util.{Utils, AkkaUtils}
-import org.apache.spark.{Logging}
+import org.apache.spark.{SparkConf, SparkContext, Logging}
 import org.apache.spark.deploy.{Command, ApplicationDescription}
 
 private[spark] object TestClient {
 
-  class TestListener extends ClientListener with Logging {
+  class TestListener extends AppClientListener with Logging {
     def connected(id: String) {
       logInfo("Connected to master, got app ID " + id)
     }
@@ -45,11 +45,13 @@ private[spark] object TestClient {
 
   def main(args: Array[String]) {
     val url = args(0)
-    val (actorSystem, port) = AkkaUtils.createActorSystem("spark", Utils.localIpAddress, 0)
+    val (actorSystem, port) = AkkaUtils.createActorSystem("spark", Utils.localIpAddress, 0,
+      conf = new SparkConf)
     val desc = new ApplicationDescription(
-      "TestClient", 1, 512, Command("spark.deploy.client.TestExecutor", Seq(), Map()), "dummy-spark-home", "ignored")
+      "TestClient", Some(1), 512, Command("spark.deploy.client.TestExecutor", Seq(), Map()),
+      "dummy-spark-home", "ignored")
     val listener = new TestListener
-    val client = new Client(actorSystem, Array(url), desc, listener)
+    val client = new AppClient(actorSystem, Array(url), desc, listener, new SparkConf)
     client.start()
     actorSystem.awaitTermination()
   }
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala
index 5150b7c7dec6a..3e26379166885 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala
@@ -28,7 +28,8 @@ private[spark] class ApplicationInfo(
     val desc: ApplicationDescription,
     val submitDate: Date,
     val driver: ActorRef,
-    val appUiUrl: String)
+    val appUiUrl: String,
+    defaultCores: Int)
   extends Serializable {
 
   @transient var state: ApplicationState.Value = _
@@ -81,7 +82,9 @@ private[spark] class ApplicationInfo(
     }
   }
 
-  def coresLeft: Int = desc.maxCores - coresGranted
+  private val myMaxCores = desc.maxCores.getOrElse(defaultCores)
+
+  def coresLeft: Int = myMaxCores - coresGranted
 
   private var _retryCount = 0
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ApplicationSource.scala b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationSource.scala
index 5a24042e14b98..c87b66f047dc8 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ApplicationSource.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationSource.scala
@@ -34,7 +34,7 @@ class ApplicationSource(val application: ApplicationInfo) extends Source {
     override def getValue: Long = application.duration
   })
 
-  metricRegistry.register(MetricRegistry.name("cores", "number"), new Gauge[Int] {
+  metricRegistry.register(MetricRegistry.name("cores"), new Gauge[Int] {
     override def getValue: Int = application.coresGranted
   })
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ApplicationState.scala b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationState.scala
index fedf879eff73b..67e6c5d66af0e 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ApplicationState.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationState.scala
@@ -17,8 +17,7 @@
 
 package org.apache.spark.deploy.master
 
-private[spark] object ApplicationState
-  extends Enumeration("WAITING", "RUNNING", "FINISHED", "FAILED", "UNKNOWN") {
+private[spark] object ApplicationState extends Enumeration {
 
   type ApplicationState = Value
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/DriverInfo.scala b/core/src/main/scala/org/apache/spark/deploy/master/DriverInfo.scala
new file mode 100644
index 0000000000000..33377931d6993
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/master/DriverInfo.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.master
+
+import java.util.Date
+
+import org.apache.spark.deploy.DriverDescription
+
+private[spark] class DriverInfo(
+    val startTime: Long,
+    val id: String,
+    val desc: DriverDescription,
+    val submitDate: Date)
+  extends Serializable {
+
+  @transient var state: DriverState.Value = DriverState.SUBMITTED
+  /* If we fail when launching the driver, the exception is stored here. */
+  @transient var exception: Option[Exception] = None
+  /* Most recent worker assigned to this driver */
+  @transient var worker: Option[WorkerInfo] = None
+}
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/DriverState.scala b/core/src/main/scala/org/apache/spark/deploy/master/DriverState.scala
new file mode 100644
index 0000000000000..26a68bade3c60
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/master/DriverState.scala
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.master
+
+private[spark] object DriverState extends Enumeration {
+
+  type DriverState = Value
+
+  // SUBMITTED: Submitted but not yet scheduled on a worker
+  // RUNNING: Has been allocated to a worker to run
+  // FINISHED: Previously ran and exited cleanly
+  // RELAUNCHING: Exited non-zero or due to worker failure, but has not yet started running again
+  // UNKNOWN: The state of the driver is temporarily not known due to master failure recovery
+  // KILLED: A user manually killed this driver
+  // FAILED: The driver exited non-zero and was not supervised
+  // ERROR: Unable to run or restart due to an unrecoverable error (e.g. missing jar file)
+  val SUBMITTED, RUNNING, FINISHED, RELAUNCHING, UNKNOWN, KILLED, FAILED, ERROR = Value
+}
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/FileSystemPersistenceEngine.scala b/core/src/main/scala/org/apache/spark/deploy/master/FileSystemPersistenceEngine.scala
index c0849ef324273..74bb9ebf1db4a 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/FileSystemPersistenceEngine.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/FileSystemPersistenceEngine.scala
@@ -19,8 +19,6 @@ package org.apache.spark.deploy.master
 
 import java.io._
 
-import scala.Serializable
-
 import akka.serialization.Serialization
 import org.apache.spark.Logging
 
@@ -47,6 +45,15 @@ private[spark] class FileSystemPersistenceEngine(
     new File(dir + File.separator + "app_" + app.id).delete()
   }
 
+  override def addDriver(driver: DriverInfo) {
+    val driverFile = new File(dir + File.separator + "driver_" + driver.id)
+    serializeIntoFile(driverFile, driver)
+  }
+
+  override def removeDriver(driver: DriverInfo) {
+    new File(dir + File.separator + "driver_" + driver.id).delete()
+  }
+
   override def addWorker(worker: WorkerInfo) {
     val workerFile = new File(dir + File.separator + "worker_" + worker.id)
     serializeIntoFile(workerFile, worker)
@@ -56,16 +63,18 @@ private[spark] class FileSystemPersistenceEngine(
     new File(dir + File.separator + "worker_" + worker.id).delete()
   }
 
-  override def readPersistedData(): (Seq[ApplicationInfo], Seq[WorkerInfo]) = {
+  override def readPersistedData(): (Seq[ApplicationInfo], Seq[DriverInfo], Seq[WorkerInfo]) = {
     val sortedFiles = new File(dir).listFiles().sortBy(_.getName)
     val appFiles = sortedFiles.filter(_.getName.startsWith("app_"))
     val apps = appFiles.map(deserializeFromFile[ApplicationInfo])
+    val driverFiles = sortedFiles.filter(_.getName.startsWith("driver_"))
+    val drivers = driverFiles.map(deserializeFromFile[DriverInfo])
     val workerFiles = sortedFiles.filter(_.getName.startsWith("worker_"))
     val workers = workerFiles.map(deserializeFromFile[WorkerInfo])
-    (apps, workers)
+    (apps, drivers, workers)
   }
 
-  private def serializeIntoFile(file: File, value: Serializable) {
+  private def serializeIntoFile(file: File, value: AnyRef) {
     val created = file.createNewFile()
     if (!created) { throw new IllegalStateException("Could not create file: " + file) }
 
@@ -77,13 +86,13 @@ private[spark] class FileSystemPersistenceEngine(
     out.close()
   }
 
-  def deserializeFromFile[T <: Serializable](file: File)(implicit m: Manifest[T]): T = {
+  def deserializeFromFile[T](file: File)(implicit m: Manifest[T]): T = {
     val fileData = new Array[Byte](file.length().asInstanceOf[Int])
     val dis = new DataInputStream(new FileInputStream(file))
     dis.readFully(fileData)
     dis.close()
 
-    val clazz = m.erasure.asInstanceOf[Class[T]]
+    val clazz = m.runtimeClass.asInstanceOf[Class[T]]
     val serializer = serialization.serializerFor(clazz)
     serializer.fromBinary(fileData).asInstanceOf[T]
   }
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index 2b537c91b4c84..389715662beee 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -17,37 +17,41 @@
 
 package org.apache.spark.deploy.master
 
-import java.util.Date
 import java.text.SimpleDateFormat
+import java.util.Date
 
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
+import scala.concurrent.Await
+import scala.concurrent.duration._
+import scala.util.Random
 
 import akka.actor._
-import akka.actor.Terminated
-import akka.dispatch.Await
 import akka.pattern.ask
-import akka.remote.{RemoteClientLifeCycleEvent, RemoteClientDisconnected, RemoteClientShutdown}
+import akka.remote.{DisassociatedEvent, RemotingLifecycleEvent}
 import akka.serialization.SerializationExtension
-import akka.util.duration._
-import akka.util.{Duration, Timeout}
 
-import org.apache.spark.{Logging, SparkException}
-import org.apache.spark.deploy.{ApplicationDescription, ExecutorState}
+
+import org.apache.spark.{SparkConf, Logging, SparkException}
+import org.apache.spark.deploy.{ApplicationDescription, DriverDescription, ExecutorState}
 import org.apache.spark.deploy.DeployMessages._
 import org.apache.spark.deploy.master.MasterMessages._
 import org.apache.spark.deploy.master.ui.MasterWebUI
 import org.apache.spark.metrics.MetricsSystem
 import org.apache.spark.util.{AkkaUtils, Utils}
+import org.apache.spark.deploy.master.DriverState.DriverState
 
 private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Actor with Logging {
+  import context.dispatcher   // to use Akka's scheduler.schedule()
+
+  val conf = new SparkConf
+
   val DATE_FORMAT = new SimpleDateFormat("yyyyMMddHHmmss")  // For application IDs
-  val WORKER_TIMEOUT = System.getProperty("spark.worker.timeout", "60").toLong * 1000
-  val RETAINED_APPLICATIONS = System.getProperty("spark.deploy.retainedApplications", "200").toInt
-  val REAPER_ITERATIONS = System.getProperty("spark.dead.worker.persistence", "15").toInt
-  val RECOVERY_DIR = System.getProperty("spark.deploy.recoveryDirectory", "")
-  val RECOVERY_MODE = System.getProperty("spark.deploy.recoveryMode", "NONE")
+  val WORKER_TIMEOUT = conf.getLong("spark.worker.timeout", 60) * 1000
+  val RETAINED_APPLICATIONS = conf.getInt("spark.deploy.retainedApplications", 200)
+  val REAPER_ITERATIONS = conf.getInt("spark.dead.worker.persistence", 15)
+  val RECOVERY_DIR = conf.get("spark.deploy.recoveryDirectory", "")
+  val RECOVERY_MODE = conf.get("spark.deploy.recoveryMode", "NONE")
 
-  var nextAppNumber = 0
   val workers = new HashSet[WorkerInfo]
   val idToWorker = new HashMap[String, WorkerInfo]
   val actorToWorker = new HashMap[ActorRef, WorkerInfo]
@@ -57,16 +61,19 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
   val idToApp = new HashMap[String, ApplicationInfo]
   val actorToApp = new HashMap[ActorRef, ApplicationInfo]
   val addressToApp = new HashMap[Address, ApplicationInfo]
-
   val waitingApps = new ArrayBuffer[ApplicationInfo]
   val completedApps = new ArrayBuffer[ApplicationInfo]
+  var nextAppNumber = 0
 
-  var firstApp: Option[ApplicationInfo] = None
+  val drivers = new HashSet[DriverInfo]
+  val completedDrivers = new ArrayBuffer[DriverInfo]
+  val waitingDrivers = new ArrayBuffer[DriverInfo] // Drivers currently spooled for scheduling
+  var nextDriverNumber = 0
 
   Utils.checkHost(host, "Expected hostname")
 
-  val masterMetricsSystem = MetricsSystem.createMetricsSystem("master")
-  val applicationMetricsSystem = MetricsSystem.createMetricsSystem("applications")
+  val masterMetricsSystem = MetricsSystem.createMetricsSystem("master", conf)
+  val applicationMetricsSystem = MetricsSystem.createMetricsSystem("applications", conf)
   val masterSource = new MasterSource(this)
 
   val webUi = new MasterWebUI(this, webUiPort)
@@ -88,12 +95,18 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
   // As a temporary workaround before better ways of configuring memory, we allow users to set
   // a flag that will perform round-robin scheduling across the nodes (spreading out each app
   // among all the nodes) instead of trying to consolidate each app onto a small # of nodes.
-  val spreadOutApps = System.getProperty("spark.deploy.spreadOut", "true").toBoolean
+  val spreadOutApps = conf.getBoolean("spark.deploy.spreadOut", true)
+
+  // Default maxCores for applications that don't specify it (i.e. pass Int.MaxValue)
+  val defaultCores = conf.getInt("spark.deploy.defaultCores", Int.MaxValue)
+  if (defaultCores < 1) {
+    throw new SparkException("spark.deploy.defaultCores must be positive")
+  }
 
   override def preStart() {
     logInfo("Starting Spark master at " + masterUrl)
     // Listen for remote client disconnection events, since they don't go through Akka's watch()
-    context.system.eventStream.subscribe(self, classOf[RemoteClientLifeCycleEvent])
+    context.system.eventStream.subscribe(self, classOf[RemotingLifecycleEvent])
     webUi.start()
     masterWebUiUrl = "http://" + masterPublicAddress + ":" + webUi.boundPort.get
     context.system.scheduler.schedule(0 millis, WORKER_TIMEOUT millis, self, CheckForWorkerTimeOut)
@@ -105,7 +118,7 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
     persistenceEngine = RECOVERY_MODE match {
       case "ZOOKEEPER" =>
         logInfo("Persisting recovery state to ZooKeeper")
-        new ZooKeeperPersistenceEngine(SerializationExtension(context.system))
+        new ZooKeeperPersistenceEngine(SerializationExtension(context.system), conf)
       case "FILESYSTEM" =>
         logInfo("Persisting recovery state to directory: " + RECOVERY_DIR)
         new FileSystemPersistenceEngine(RECOVERY_DIR, SerializationExtension(context.system))
@@ -113,13 +126,12 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
         new BlackHolePersistenceEngine()
     }
 
-    leaderElectionAgent = context.actorOf(Props(
-      RECOVERY_MODE match {
+    leaderElectionAgent = RECOVERY_MODE match {
         case "ZOOKEEPER" =>
-          new ZooKeeperLeaderElectionAgent(self, masterUrl)
+          context.actorOf(Props(classOf[ZooKeeperLeaderElectionAgent], self, masterUrl, conf))
         case _ =>
-          new MonarchyLeaderAgent(self)
-      }))
+          context.actorOf(Props(classOf[MonarchyLeaderAgent], self))
+      }
   }
 
   override def preRestart(reason: Throwable, message: Option[Any]) {
@@ -137,16 +149,14 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
 
   override def receive = {
     case ElectedLeader => {
-      val (storedApps, storedWorkers) = persistenceEngine.readPersistedData()
-      state = if (storedApps.isEmpty && storedWorkers.isEmpty)
+      val (storedApps, storedDrivers, storedWorkers) = persistenceEngine.readPersistedData()
+      state = if (storedApps.isEmpty && storedDrivers.isEmpty && storedWorkers.isEmpty)
         RecoveryState.ALIVE
       else
         RecoveryState.RECOVERING
-
       logInfo("I have been elected leader! New state: " + state)
-
       if (state == RecoveryState.RECOVERING) {
-        beginRecovery(storedApps, storedWorkers)
+        beginRecovery(storedApps, storedDrivers, storedWorkers)
         context.system.scheduler.scheduleOnce(WORKER_TIMEOUT millis) { completeRecovery() }
       }
     }
@@ -156,23 +166,86 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
       System.exit(0)
     }
 
-    case RegisterWorker(id, host, workerPort, cores, memory, webUiPort, publicAddress) => {
+    case RegisterWorker(id, workerHost, workerPort, cores, memory, workerWebUiPort, publicAddress) => {
       logInfo("Registering worker %s:%d with %d cores, %s RAM".format(
-        host, workerPort, cores, Utils.megabytesToString(memory)))
+        workerHost, workerPort, cores, Utils.megabytesToString(memory)))
       if (state == RecoveryState.STANDBY) {
         // ignore, don't send response
       } else if (idToWorker.contains(id)) {
         sender ! RegisterWorkerFailed("Duplicate worker ID")
       } else {
-        val worker = new WorkerInfo(id, host, port, cores, memory, sender, webUiPort, publicAddress)
+        val worker = new WorkerInfo(id, workerHost, workerPort, cores, memory,
+          sender, workerWebUiPort, publicAddress)
         registerWorker(worker)
-        context.watch(sender)  // This doesn't work with remote actors but helps for testing
         persistenceEngine.addWorker(worker)
         sender ! RegisteredWorker(masterUrl, masterWebUiUrl)
         schedule()
       }
     }
 
+    case RequestSubmitDriver(description) => {
+      if (state != RecoveryState.ALIVE) {
+        val msg = s"Can only accept driver submissions in ALIVE state. Current state: $state."
+        sender ! SubmitDriverResponse(false, None, msg)
+      } else {
+        logInfo("Driver submitted " + description.command.mainClass)
+        val driver = createDriver(description)
+        persistenceEngine.addDriver(driver)
+        waitingDrivers += driver
+        drivers.add(driver)
+        schedule()
+
+        // TODO: It might be good to instead have the submission client poll the master to determine
+        //       the current status of the driver. For now it's simply "fire and forget".
+
+        sender ! SubmitDriverResponse(true, Some(driver.id),
+          s"Driver successfully submitted as ${driver.id}")
+      }
+    }
+
+    case RequestKillDriver(driverId) => {
+      if (state != RecoveryState.ALIVE) {
+        val msg = s"Can only kill drivers in ALIVE state. Current state: $state."
+        sender ! KillDriverResponse(driverId, success = false, msg)
+      } else {
+        logInfo("Asked to kill driver " + driverId)
+        val driver = drivers.find(_.id == driverId)
+        driver match {
+          case Some(d) =>
+            if (waitingDrivers.contains(d)) {
+              waitingDrivers -= d
+              self ! DriverStateChanged(driverId, DriverState.KILLED, None)
+            }
+            else {
+              // We just notify the worker to kill the driver here. The final bookkeeping occurs
+              // on the return path when the worker submits a state change back to the master
+              // to notify it that the driver was successfully killed.
+              d.worker.foreach { w =>
+                w.actor ! KillDriver(driverId)
+              }
+            }
+            // TODO: It would be nice for this to be a synchronous response
+            val msg = s"Kill request for $driverId submitted"
+            logInfo(msg)
+            sender ! KillDriverResponse(driverId, success = true, msg)
+          case None =>
+            val msg = s"Driver $driverId has already finished or does not exist"
+            logWarning(msg)
+            sender ! KillDriverResponse(driverId, success = false, msg)
+        }
+      }
+    }
+
+    case RequestDriverStatus(driverId) => {
+      (drivers ++ completedDrivers).find(_.id == driverId) match {
+        case Some(driver) =>
+          sender ! DriverStatusResponse(found = true, Some(driver.state),
+            driver.worker.map(_.id), driver.worker.map(_.hostPort), driver.exception)
+        case None =>
+          sender ! DriverStatusResponse(found = false, None, None, None, None)
+      }
+    }
+
     case RegisterApplication(description) => {
       if (state == RecoveryState.STANDBY) {
         // ignore, don't send response
@@ -181,7 +254,6 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
         val app = createApplication(description, sender)
         registerApplication(app)
         logInfo("Registered app " + description.name + " with ID " + app.id)
-        context.watch(sender)  // This doesn't work with remote actors but helps for testing
         persistenceEngine.addApplication(app)
         sender ! RegisteredApplication(app.id, masterUrl)
         schedule()
@@ -216,6 +288,15 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
       }
     }
 
+    case DriverStateChanged(driverId, state, exception) => {
+      state match {
+        case DriverState.ERROR | DriverState.FINISHED | DriverState.KILLED | DriverState.FAILED =>
+          removeDriver(driverId, state, exception)
+        case _ =>
+          throw new Exception(s"Received unexpected state update for driver $driverId: $state")
+      }
+    }
+
     case Heartbeat(workerId) => {
       idToWorker.get(workerId) match {
         case Some(workerInfo) =>
@@ -237,7 +318,7 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
       if (canCompleteRecovery) { completeRecovery() }
     }
 
-    case WorkerSchedulerStateResponse(workerId, executors) => {
+    case WorkerSchedulerStateResponse(workerId, executors, driverIds) => {
       idToWorker.get(workerId) match {
         case Some(worker) =>
           logInfo("Worker has been re-registered: " + workerId)
@@ -250,6 +331,14 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
             worker.addExecutor(execInfo)
             execInfo.copyState(exec)
           }
+
+          for (driverId <- driverIds) {
+            drivers.find(_.id == driverId).foreach { driver =>
+              driver.worker = Some(worker)
+              driver.state = DriverState.RUNNING
+              worker.drivers(driverId) = driver
+            }
+          }
         case None =>
           logWarning("Scheduler state from unknown worker: " + workerId)
       }
@@ -257,23 +346,9 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
       if (canCompleteRecovery) { completeRecovery() }
     }
 
-    case Terminated(actor) => {
-      // The disconnected actor could've been either a worker or an app; remove whichever of
-      // those we have an entry for in the corresponding actor hashmap
-      actorToWorker.get(actor).foreach(removeWorker)
-      actorToApp.get(actor).foreach(finishApplication)
-      if (state == RecoveryState.RECOVERING && canCompleteRecovery) { completeRecovery() }
-    }
-
-    case RemoteClientDisconnected(transport, address) => {
-      // The disconnected client could've been either a worker or an app; remove whichever it was
-      addressToWorker.get(address).foreach(removeWorker)
-      addressToApp.get(address).foreach(finishApplication)
-      if (state == RecoveryState.RECOVERING && canCompleteRecovery) { completeRecovery() }
-    }
-
-    case RemoteClientShutdown(transport, address) => {
+    case DisassociatedEvent(_, address, _) => {
       // The disconnected client could've been either a worker or an app; remove whichever it was
+      logInfo(s"$address got disassociated, removing it.")
       addressToWorker.get(address).foreach(removeWorker)
       addressToApp.get(address).foreach(finishApplication)
       if (state == RecoveryState.RECOVERING && canCompleteRecovery) { completeRecovery() }
@@ -281,7 +356,7 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
 
     case RequestMasterState => {
       sender ! MasterStateResponse(host, port, workers.toArray, apps.toArray, completedApps.toArray,
-        state)
+        drivers.toArray, completedDrivers.toArray, state)
     }
 
     case CheckForWorkerTimeOut => {
@@ -297,7 +372,8 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
     workers.count(_.state == WorkerState.UNKNOWN) == 0 &&
       apps.count(_.state == ApplicationState.UNKNOWN) == 0
 
-  def beginRecovery(storedApps: Seq[ApplicationInfo], storedWorkers: Seq[WorkerInfo]) {
+  def beginRecovery(storedApps: Seq[ApplicationInfo], storedDrivers: Seq[DriverInfo],
+      storedWorkers: Seq[WorkerInfo]) {
     for (app <- storedApps) {
       logInfo("Trying to recover app: " + app.id)
       try {
@@ -309,6 +385,12 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
       }
     }
 
+    for (driver <- storedDrivers) {
+      // Here we just read in the list of drivers. Any drivers associated with now-lost workers
+      // will be re-launched when we detect that the worker is missing.
+      drivers += driver
+    }
+
     for (worker <- storedWorkers) {
       logInfo("Trying to recover worker: " + worker.id)
       try {
@@ -332,6 +414,18 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
     workers.filter(_.state == WorkerState.UNKNOWN).foreach(removeWorker)
     apps.filter(_.state == ApplicationState.UNKNOWN).foreach(finishApplication)
 
+    // Reschedule drivers which were not claimed by any workers
+    drivers.filter(_.worker.isEmpty).foreach { d =>
+      logWarning(s"Driver ${d.id} was not found after master recovery")
+      if (d.desc.supervise) {
+        logWarning(s"Re-launching ${d.id}")
+        relaunchDriver(d)
+      } else {
+        removeDriver(d.id, DriverState.ERROR, None)
+        logWarning(s"Did not re-launch ${d.id} because it was not supervised")
+      }
+    }
+
     state = RecoveryState.ALIVE
     schedule()
     logInfo("Recovery complete - resuming operations!")
@@ -352,6 +446,18 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
    */
   def schedule() {
     if (state != RecoveryState.ALIVE) { return }
+
+    // First schedule drivers, they take strict precedence over applications
+    val shuffledWorkers = Random.shuffle(workers) // Randomization helps balance drivers
+    for (worker <- shuffledWorkers if worker.state == WorkerState.ALIVE) {
+      for (driver <- waitingDrivers) {
+        if (worker.memoryFree >= driver.desc.mem && worker.coresFree >= driver.desc.cores) {
+          launchDriver(worker, driver)
+          waitingDrivers -= driver
+        }
+      }
+    }
+
     // Right now this is a very simple FIFO scheduler. We keep trying to fit in the first app
     // in the queue, then the second app, etc.
     if (spreadOutApps) {
@@ -438,13 +544,30 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
         exec.id, ExecutorState.LOST, Some("worker lost"), None)
       exec.application.removeExecutor(exec)
     }
+    for (driver <- worker.drivers.values) {
+      if (driver.desc.supervise) {
+        logInfo(s"Re-launching ${driver.id}")
+        relaunchDriver(driver)
+      } else {
+        logInfo(s"Not re-launching ${driver.id} because it was not supervised")
+        removeDriver(driver.id, DriverState.ERROR, None)
+      }
+    }
     persistenceEngine.removeWorker(worker)
   }
 
+  def relaunchDriver(driver: DriverInfo) {
+    driver.worker = None
+    driver.state = DriverState.RELAUNCHING
+    waitingDrivers += driver
+    schedule()
+  }
+
   def createApplication(desc: ApplicationDescription, driver: ActorRef): ApplicationInfo = {
     val now = System.currentTimeMillis()
     val date = new Date(now)
-    new ApplicationInfo(now, newApplicationId(date), desc, date, driver, desc.appUiUrl)
+    new ApplicationInfo(
+      now, newApplicationId(date), desc, date, driver, desc.appUiUrl, defaultCores)
   }
 
   def registerApplication(app: ApplicationInfo): Unit = {
@@ -459,14 +582,6 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
     idToApp(app.id) = app
     actorToApp(app.driver) = app
     addressToApp(appAddress) = app
-    if (firstApp == None) {
-      firstApp = Some(app)
-    }
-    // TODO: What is firstApp?? Can we remove it?
-    val workersAlive = workers.filter(_.state == WorkerState.ALIVE).toArray
-    if (workersAlive.size > 0 && !workersAlive.exists(_.memoryFree >= app.desc.memoryPerSlave)) {
-      logWarning("Could not find any workers with enough memory for " + firstApp.get.id)
-    }
     waitingApps += app
   }
 
@@ -523,41 +638,77 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
         removeWorker(worker)
       } else {
         if (worker.lastHeartbeat < currentTime - ((REAPER_ITERATIONS + 1) * WORKER_TIMEOUT))
-          workers -= worker // we've seen this DEAD worker in the UI, etc. for long enough; cull it 
+          workers -= worker // we've seen this DEAD worker in the UI, etc. for long enough; cull it
       }
     }
   }
+
+  def newDriverId(submitDate: Date): String = {
+    val appId = "driver-%s-%04d".format(DATE_FORMAT.format(submitDate), nextDriverNumber)
+    nextDriverNumber += 1
+    appId
+  }
+
+  def createDriver(desc: DriverDescription): DriverInfo = {
+    val now = System.currentTimeMillis()
+    val date = new Date(now)
+    new DriverInfo(now, newDriverId(date), desc, date)
+  }
+
+  def launchDriver(worker: WorkerInfo, driver: DriverInfo) {
+    logInfo("Launching driver " + driver.id + " on worker " + worker.id)
+    worker.addDriver(driver)
+    driver.worker = Some(worker)
+    worker.actor ! LaunchDriver(driver.id, driver.desc)
+    driver.state = DriverState.RUNNING
+  }
+
+  def removeDriver(driverId: String, finalState: DriverState, exception: Option[Exception]) {
+    drivers.find(d => d.id == driverId) match {
+      case Some(driver) =>
+        logInfo(s"Removing driver: $driverId")
+        drivers -= driver
+        completedDrivers += driver
+        persistenceEngine.removeDriver(driver)
+        driver.state = finalState
+        driver.exception = exception
+        driver.worker.foreach(w => w.removeDriver(driver))
+      case None =>
+        logWarning(s"Asked to remove unknown driver: $driverId")
+    }
+  }
 }
 
 private[spark] object Master {
-  private val systemName = "sparkMaster"
+  val systemName = "sparkMaster"
   private val actorName = "Master"
-  private val sparkUrlRegex = "spark://([^:]+):([0-9]+)".r
+  val sparkUrlRegex = "spark://([^:]+):([0-9]+)".r
 
   def main(argStrings: Array[String]) {
-    val args = new MasterArguments(argStrings)
-    val (actorSystem, _, _) = startSystemAndActor(args.host, args.port, args.webUiPort)
+    val conf = new SparkConf
+    val args = new MasterArguments(argStrings, conf)
+    val (actorSystem, _, _) = startSystemAndActor(args.host, args.port, args.webUiPort, conf)
     actorSystem.awaitTermination()
   }
 
-  /** Returns an `akka://...` URL for the Master actor given a sparkUrl `spark://host:ip`. */
+  /** Returns an `akka.tcp://...` URL for the Master actor given a sparkUrl `spark://host:ip`. */
   def toAkkaUrl(sparkUrl: String): String = {
     sparkUrl match {
       case sparkUrlRegex(host, port) =>
-        "akka://%s@%s:%s/user/%s".format(systemName, host, port, actorName)
+        "akka.tcp://%s@%s:%s/user/%s".format(systemName, host, port, actorName)
       case _ =>
         throw new SparkException("Invalid master URL: " + sparkUrl)
     }
   }
 
-  def startSystemAndActor(host: String, port: Int, webUiPort: Int): (ActorSystem, Int, Int) = {
-    val (actorSystem, boundPort) = AkkaUtils.createActorSystem(systemName, host, port)
-    val actor = actorSystem.actorOf(Props(new Master(host, boundPort, webUiPort)), name = actorName)
-    val timeoutDuration = Duration.create(
-      System.getProperty("spark.akka.askTimeout", "10").toLong, "seconds")
-    implicit val timeout = Timeout(timeoutDuration)
-    val respFuture = actor ? RequestWebUIPort   // ask pattern
-    val resp = Await.result(respFuture, timeoutDuration).asInstanceOf[WebUIPortResponse]
+  def startSystemAndActor(host: String, port: Int, webUiPort: Int, conf: SparkConf)
+      : (ActorSystem, Int, Int) =
+  {
+    val (actorSystem, boundPort) = AkkaUtils.createActorSystem(systemName, host, port, conf = conf)
+    val actor = actorSystem.actorOf(Props(classOf[Master], host, boundPort, webUiPort), actorName)
+    val timeout = AkkaUtils.askTimeout(conf)
+    val respFuture = actor.ask(RequestWebUIPort)(timeout)
+    val resp = Await.result(respFuture, timeout).asInstanceOf[WebUIPortResponse]
     (actorSystem, boundPort, resp.webUIBoundPort)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala b/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala
index 9d89b455fb961..e7f3224091ca8 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala
@@ -18,16 +18,17 @@
 package org.apache.spark.deploy.master
 
 import org.apache.spark.util.{Utils, IntParam}
+import org.apache.spark.SparkConf
 
 /**
  * Command-line parser for the master.
  */
-private[spark] class MasterArguments(args: Array[String]) {
+private[spark] class MasterArguments(args: Array[String], conf: SparkConf) {
   var host = Utils.localHostName()
   var port = 7077
   var webUiPort = 8080
-  
-  // Check for settings in environment variables 
+
+  // Check for settings in environment variables
   if (System.getenv("SPARK_MASTER_HOST") != null) {
     host = System.getenv("SPARK_MASTER_HOST")
   }
@@ -37,8 +38,8 @@ private[spark] class MasterArguments(args: Array[String]) {
   if (System.getenv("SPARK_MASTER_WEBUI_PORT") != null) {
     webUiPort = System.getenv("SPARK_MASTER_WEBUI_PORT").toInt
   }
-  if (System.getProperty("master.ui.port") != null) {
-    webUiPort = System.getProperty("master.ui.port").toInt
+  if (conf.contains("master.ui.port")) {
+    webUiPort = conf.get("master.ui.port").toInt
   }
 
   parse(args.toList)
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/MasterSource.scala b/core/src/main/scala/org/apache/spark/deploy/master/MasterSource.scala
index 23d1cb77da6f2..36c1b87b7f684 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/MasterSource.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/MasterSource.scala
@@ -26,17 +26,17 @@ private[spark] class MasterSource(val master: Master) extends Source {
   val sourceName = "master"
 
   // Gauge for worker numbers in cluster
-  metricRegistry.register(MetricRegistry.name("workers","number"), new Gauge[Int] {
+  metricRegistry.register(MetricRegistry.name("workers"), new Gauge[Int] {
     override def getValue: Int = master.workers.size
   })
 
   // Gauge for application numbers in cluster
-  metricRegistry.register(MetricRegistry.name("apps", "number"), new Gauge[Int] {
+  metricRegistry.register(MetricRegistry.name("apps"), new Gauge[Int] {
     override def getValue: Int = master.apps.size
   })
 
   // Gauge for waiting application numbers in cluster
-  metricRegistry.register(MetricRegistry.name("waitingApps", "number"), new Gauge[Int] {
+  metricRegistry.register(MetricRegistry.name("waitingApps"), new Gauge[Int] {
     override def getValue: Int = master.waitingApps.size
   })
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/PersistenceEngine.scala b/core/src/main/scala/org/apache/spark/deploy/master/PersistenceEngine.scala
index 94b986caf2835..e3640ea4f7e64 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/PersistenceEngine.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/PersistenceEngine.scala
@@ -35,11 +35,15 @@ private[spark] trait PersistenceEngine {
 
   def removeWorker(worker: WorkerInfo)
 
+  def addDriver(driver: DriverInfo)
+
+  def removeDriver(driver: DriverInfo)
+
   /**
    * Returns the persisted data sorted by their respective ids (which implies that they're
    * sorted by time of creation).
    */
-  def readPersistedData(): (Seq[ApplicationInfo], Seq[WorkerInfo])
+  def readPersistedData(): (Seq[ApplicationInfo], Seq[DriverInfo], Seq[WorkerInfo])
 
   def close() {}
 }
@@ -49,5 +53,8 @@ private[spark] class BlackHolePersistenceEngine extends PersistenceEngine {
   override def removeApplication(app: ApplicationInfo) {}
   override def addWorker(worker: WorkerInfo) {}
   override def removeWorker(worker: WorkerInfo) {}
-  override def readPersistedData() = (Nil, Nil)
+  override def addDriver(driver: DriverInfo) {}
+  override def removeDriver(driver: DriverInfo) {}
+
+  override def readPersistedData() = (Nil, Nil, Nil)
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/RecoveryState.scala b/core/src/main/scala/org/apache/spark/deploy/master/RecoveryState.scala
index b91be821f016c..256a5a7c28e47 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/RecoveryState.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/RecoveryState.scala
@@ -17,9 +17,7 @@
 
 package org.apache.spark.deploy.master
 
-private[spark] object RecoveryState
-  extends Enumeration("STANDBY", "ALIVE", "RECOVERING", "COMPLETING_RECOVERY") {
-
+private[spark] object RecoveryState extends Enumeration {
   type MasterState = Value
 
   val STANDBY, ALIVE, RECOVERING, COMPLETING_RECOVERY = Value
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/SparkZooKeeperSession.scala b/core/src/main/scala/org/apache/spark/deploy/master/SparkZooKeeperSession.scala
index 81e15c534fc31..999090ad7464d 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/SparkZooKeeperSession.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/SparkZooKeeperSession.scala
@@ -18,12 +18,12 @@
 package org.apache.spark.deploy.master
 
 import scala.collection.JavaConversions._
-import scala.concurrent.ops._
 
-import org.apache.spark.Logging
 import org.apache.zookeeper._
-import org.apache.zookeeper.data.Stat
 import org.apache.zookeeper.Watcher.Event.KeeperState
+import org.apache.zookeeper.data.Stat
+
+import org.apache.spark.{SparkConf, Logging}
 
 /**
  * Provides a Scala-side interface to the standard ZooKeeper client, with the addition of retry
@@ -33,10 +33,11 @@ import org.apache.zookeeper.Watcher.Event.KeeperState
  * informed via zkDown().
  *
  * Additionally, all commands sent to ZooKeeper will be retried until they either fail too many
- * times or a semantic exception is thrown (e.g.., "node already exists").
+ * times or a semantic exception is thrown (e.g., "node already exists").
  */
-private[spark] class SparkZooKeeperSession(zkWatcher: SparkZooKeeperWatcher) extends Logging {
-  val ZK_URL = System.getProperty("spark.deploy.zookeeper.url", "")
+private[spark] class SparkZooKeeperSession(zkWatcher: SparkZooKeeperWatcher,
+    conf: SparkConf) extends Logging {
+  val ZK_URL = conf.get("spark.deploy.zookeeper.url", "")
 
   val ZK_ACL = ZooDefs.Ids.OPEN_ACL_UNSAFE
   val ZK_TIMEOUT_MILLIS = 30000
@@ -103,6 +104,7 @@ private[spark] class SparkZooKeeperSession(zkWatcher: SparkZooKeeperWatcher) ext
           connectToZooKeeper()
         case KeeperState.Disconnected =>
           logWarning("ZooKeeper disconnected, will retry...")
+        case s => // Do nothing
       }
     }
   }
@@ -179,7 +181,7 @@ private[spark] class SparkZooKeeperSession(zkWatcher: SparkZooKeeperWatcher) ext
     } catch {
       case e: KeeperException.NoNodeException => throw e
       case e: KeeperException.NodeExistsException => throw e
-      case e if n > 0 =>
+      case e: Exception if n > 0 =>
         logError("ZooKeeper exception, " + n + " more retries...", e)
         Thread.sleep(RETRY_WAIT_MILLIS)
         retry(fn, n-1)
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/WorkerInfo.scala b/core/src/main/scala/org/apache/spark/deploy/master/WorkerInfo.scala
index e05f587b58c64..c5fa9cf7d7c2d 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/WorkerInfo.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/WorkerInfo.scala
@@ -17,8 +17,10 @@
 
 package org.apache.spark.deploy.master
 
-import akka.actor.ActorRef
 import scala.collection.mutable
+
+import akka.actor.ActorRef
+
 import org.apache.spark.util.Utils
 
 private[spark] class WorkerInfo(
@@ -35,7 +37,8 @@ private[spark] class WorkerInfo(
   Utils.checkHost(host, "Expected hostname")
   assert (port > 0)
 
-  @transient var executors: mutable.HashMap[String, ExecutorInfo] = _ // fullId => info
+  @transient var executors: mutable.HashMap[String, ExecutorInfo] = _ // executorId => info
+  @transient var drivers: mutable.HashMap[String, DriverInfo] = _ // driverId => info
   @transient var state: WorkerState.Value = _
   @transient var coresUsed: Int = _
   @transient var memoryUsed: Int = _
@@ -54,6 +57,7 @@ private[spark] class WorkerInfo(
 
   private def init() {
     executors = new mutable.HashMap
+    drivers = new mutable.HashMap
     state = WorkerState.ALIVE
     coresUsed = 0
     memoryUsed = 0
@@ -83,6 +87,18 @@ private[spark] class WorkerInfo(
     executors.values.exists(_.application == app)
   }
 
+  def addDriver(driver: DriverInfo) {
+    drivers(driver.id) = driver
+    memoryUsed += driver.desc.mem
+    coresUsed += driver.desc.cores
+  }
+
+  def removeDriver(driver: DriverInfo) {
+    drivers -= driver.id
+    memoryUsed -= driver.desc.mem
+    coresUsed -= driver.desc.cores
+  }
+
   def webUiAddress : String = {
     "http://" + this.publicAddress + ":" + this.webUiPort
   }
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/WorkerState.scala b/core/src/main/scala/org/apache/spark/deploy/master/WorkerState.scala
index c8d34f25e25e0..0b36ef60051fc 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/WorkerState.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/WorkerState.scala
@@ -17,9 +17,7 @@
 
 package org.apache.spark.deploy.master
 
-private[spark] object WorkerState
-  extends Enumeration("ALIVE", "DEAD", "DECOMMISSIONED", "UNKNOWN") {
-
+private[spark] object WorkerState extends Enumeration {
   type WorkerState = Value
 
   val ALIVE, DEAD, DECOMMISSIONED, UNKNOWN = Value
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperLeaderElectionAgent.scala b/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperLeaderElectionAgent.scala
index 7809013e8383b..77c23fb9fbeea 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperLeaderElectionAgent.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperLeaderElectionAgent.scala
@@ -21,16 +21,17 @@ import akka.actor.ActorRef
 import org.apache.zookeeper._
 import org.apache.zookeeper.Watcher.Event.EventType
 
+import org.apache.spark.{SparkConf, Logging}
 import org.apache.spark.deploy.master.MasterMessages._
-import org.apache.spark.Logging
 
-private[spark] class ZooKeeperLeaderElectionAgent(val masterActor: ActorRef, masterUrl: String)
+private[spark] class ZooKeeperLeaderElectionAgent(val masterActor: ActorRef,
+    masterUrl: String, conf: SparkConf)
   extends LeaderElectionAgent with SparkZooKeeperWatcher with Logging  {
 
-  val WORKING_DIR = System.getProperty("spark.deploy.zookeeper.dir", "/spark") + "/leader_election"
+  val WORKING_DIR = conf.get("spark.deploy.zookeeper.dir", "/spark") + "/leader_election"
 
   private val watcher = new ZooKeeperWatcher()
-  private val zk = new SparkZooKeeperSession(this)
+  private val zk = new SparkZooKeeperSession(this, conf)
   private var status = LeadershipStatus.NOT_LEADER
   private var myLeaderFile: String = _
   private var leaderUrl: String = _
@@ -105,7 +106,7 @@ private[spark] class ZooKeeperLeaderElectionAgent(val masterActor: ActorRef, mas
         // We found a different master file pointing to this process.
         // This can happen in the following two cases:
         // (1) The master process was restarted on the same node.
-        // (2) The ZK server died between creating the node and returning the name of the node.
+        // (2) The ZK server died between creating the file and returning the name of the file.
         //     For this case, we will end up creating a second file, and MUST explicitly delete the
         //     first one, since our ZK session is still open.
         // Note that this deletion will cause a NodeDeleted event to be fired so we check again for
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperPersistenceEngine.scala b/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperPersistenceEngine.scala
index f1dca28281e07..10816a1f43ff6 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperPersistenceEngine.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperPersistenceEngine.scala
@@ -17,19 +17,19 @@
 
 package org.apache.spark.deploy.master
 
-import org.apache.spark.Logging
+import org.apache.spark.{SparkConf, Logging}
 import org.apache.zookeeper._
 
 import akka.serialization.Serialization
 
-class ZooKeeperPersistenceEngine(serialization: Serialization)
+class ZooKeeperPersistenceEngine(serialization: Serialization, conf: SparkConf)
   extends PersistenceEngine
   with SparkZooKeeperWatcher
   with Logging
 {
-  val WORKING_DIR = System.getProperty("spark.deploy.zookeeper.dir", "/spark") + "/master_status"
+  val WORKING_DIR = conf.get("spark.deploy.zookeeper.dir", "/spark") + "/master_status"
 
-  val zk = new SparkZooKeeperSession(this)
+  val zk = new SparkZooKeeperSession(this, conf)
 
   zk.connect()
 
@@ -49,6 +49,14 @@ class ZooKeeperPersistenceEngine(serialization: Serialization)
     zk.delete(WORKING_DIR + "/app_" + app.id)
   }
 
+  override def addDriver(driver: DriverInfo) {
+    serializeIntoFile(WORKING_DIR + "/driver_" + driver.id, driver)
+  }
+
+  override def removeDriver(driver: DriverInfo) {
+    zk.delete(WORKING_DIR + "/driver_" + driver.id)
+  }
+
   override def addWorker(worker: WorkerInfo) {
     serializeIntoFile(WORKING_DIR + "/worker_" + worker.id, worker)
   }
@@ -61,24 +69,26 @@ class ZooKeeperPersistenceEngine(serialization: Serialization)
     zk.close()
   }
 
-  override def readPersistedData(): (Seq[ApplicationInfo], Seq[WorkerInfo]) = {
+  override def readPersistedData(): (Seq[ApplicationInfo], Seq[DriverInfo], Seq[WorkerInfo]) = {
     val sortedFiles = zk.getChildren(WORKING_DIR).toList.sorted
     val appFiles = sortedFiles.filter(_.startsWith("app_"))
     val apps = appFiles.map(deserializeFromFile[ApplicationInfo])
+    val driverFiles = sortedFiles.filter(_.startsWith("driver_"))
+    val drivers = driverFiles.map(deserializeFromFile[DriverInfo])
     val workerFiles = sortedFiles.filter(_.startsWith("worker_"))
     val workers = workerFiles.map(deserializeFromFile[WorkerInfo])
-    (apps, workers)
+    (apps, drivers, workers)
   }
 
-  private def serializeIntoFile(path: String, value: Serializable) {
+  private def serializeIntoFile(path: String, value: AnyRef) {
     val serializer = serialization.findSerializerFor(value)
     val serialized = serializer.toBinary(value)
     zk.create(path, serialized, CreateMode.PERSISTENT)
   }
 
-  def deserializeFromFile[T <: Serializable](filename: String)(implicit m: Manifest[T]): T = {
+  def deserializeFromFile[T](filename: String)(implicit m: Manifest[T]): T = {
     val fileData = zk.getData(WORKING_DIR + "/" + filename)
-    val clazz = m.erasure.asInstanceOf[Class[T]]
+    val clazz = m.runtimeClass.asInstanceOf[Class[T]]
     val serializer = serialization.serializerFor(clazz)
     serializer.fromBinary(fileData).asInstanceOf[T]
   }
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
index f4e574d15dbc3..f29a6ad2e7b92 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
@@ -17,31 +17,28 @@
 
 package org.apache.spark.deploy.master.ui
 
+import scala.concurrent.Await
 import scala.xml.Node
 
-import akka.dispatch.Await
 import akka.pattern.ask
-import akka.util.duration._
-
 import javax.servlet.http.HttpServletRequest
-
 import net.liftweb.json.JsonAST.JValue
 
-import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState}
 import org.apache.spark.deploy.JsonProtocol
+import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState}
 import org.apache.spark.deploy.master.ExecutorInfo
 import org.apache.spark.ui.UIUtils
 import org.apache.spark.util.Utils
 
 private[spark] class ApplicationPage(parent: MasterWebUI) {
   val master = parent.masterActorRef
-  implicit val timeout = parent.timeout
+  val timeout = parent.timeout
 
   /** Executor details for a particular application */
   def renderJson(request: HttpServletRequest): JValue = {
     val appId = request.getParameter("appId")
     val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterStateResponse]
-    val state = Await.result(stateFuture, 30 seconds)
+    val state = Await.result(stateFuture, timeout)
     val app = state.activeApps.find(_.id == appId).getOrElse({
       state.completedApps.find(_.id == appId).getOrElse(null)
     })
@@ -52,7 +49,7 @@ private[spark] class ApplicationPage(parent: MasterWebUI) {
   def render(request: HttpServletRequest): Seq[Node] = {
     val appId = request.getParameter("appId")
     val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterStateResponse]
-    val state = Await.result(stateFuture, 30 seconds)
+    val state = Await.result(stateFuture, timeout)
     val app = state.activeApps.find(_.id == appId).getOrElse({
       state.completedApps.find(_.id == appId).getOrElse(null)
     })
@@ -70,11 +67,11 @@ private[spark] class ApplicationPage(parent: MasterWebUI) {
               <li><strong>User:</strong> {app.desc.user}</li>
               <li><strong>Cores:</strong>
                 {
-                if (app.desc.maxCores == Integer.MAX_VALUE) {
+                if (app.desc.maxCores.isEmpty) {
                   "Unlimited (%s granted)".format(app.coresGranted)
                 } else {
                   "%s (%s granted, %s left)".format(
-                    app.desc.maxCores, app.coresGranted, app.coresLeft)
+                    app.desc.maxCores.get, app.coresGranted, app.coresLeft)
                 }
                 }
               </li>
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/IndexPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/IndexPage.scala
index d7a57229b0063..a9af8df5525d6 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/IndexPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/IndexPage.scala
@@ -17,37 +17,34 @@
 
 package org.apache.spark.deploy.master.ui
 
-import javax.servlet.http.HttpServletRequest
-
+import scala.concurrent.Await
+import scala.concurrent.duration._
 import scala.xml.Node
 
-import akka.dispatch.Await
 import akka.pattern.ask
-import akka.util.duration._
-
+import javax.servlet.http.HttpServletRequest
 import net.liftweb.json.JsonAST.JValue
 
-import org.apache.spark.deploy.DeployWebUI
+import org.apache.spark.deploy.{DeployWebUI, JsonProtocol}
 import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState}
-import org.apache.spark.deploy.JsonProtocol
-import org.apache.spark.deploy.master.{ApplicationInfo, WorkerInfo}
+import org.apache.spark.deploy.master.{ApplicationInfo, DriverInfo, WorkerInfo}
 import org.apache.spark.ui.UIUtils
 import org.apache.spark.util.Utils
 
 private[spark] class IndexPage(parent: MasterWebUI) {
   val master = parent.masterActorRef
-  implicit val timeout = parent.timeout
+  val timeout = parent.timeout
 
   def renderJson(request: HttpServletRequest): JValue = {
     val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterStateResponse]
-    val state = Await.result(stateFuture, 30 seconds)
+    val state = Await.result(stateFuture, timeout)
     JsonProtocol.writeMasterState(state)
   }
 
   /** Index view listing applications and executors */
   def render(request: HttpServletRequest): Seq[Node] = {
     val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterStateResponse]
-    val state = Await.result(stateFuture, 30 seconds)
+    val state = Await.result(stateFuture, timeout)
 
     val workerHeaders = Seq("Id", "Address", "State", "Cores", "Memory")
     val workers = state.workers.sortBy(_.id)
@@ -60,6 +57,16 @@ private[spark] class IndexPage(parent: MasterWebUI) {
     val completedApps = state.completedApps.sortBy(_.endTime).reverse
     val completedAppsTable = UIUtils.listingTable(appHeaders, appRow, completedApps)
 
+    val driverHeaders = Seq("ID", "Submitted Time", "Worker", "State", "Cores", "Memory", "Main Class")
+    val activeDrivers = state.activeDrivers.sortBy(_.startTime).reverse
+    val activeDriversTable = UIUtils.listingTable(driverHeaders, driverRow, activeDrivers)
+    val completedDrivers = state.completedDrivers.sortBy(_.startTime).reverse
+    val completedDriversTable = UIUtils.listingTable(driverHeaders, driverRow, completedDrivers)
+
+    // For now we only show driver information if the user has submitted drivers to the cluster.
+    // This is until we integrate the notion of drivers and applications in the UI.
+    def hasDrivers = activeDrivers.length > 0 || completedDrivers.length > 0
+
     val content =
         <div class="row-fluid">
           <div class="span12">
@@ -74,6 +81,9 @@ private[spark] class IndexPage(parent: MasterWebUI) {
               <li><strong>Applications:</strong>
                 {state.activeApps.size} Running,
                 {state.completedApps.size} Completed </li>
+              <li><strong>Drivers:</strong>
+                {state.activeDrivers.size} Running,
+                {state.completedDrivers.size} Completed </li>
             </ul>
           </div>
         </div>
@@ -88,17 +98,39 @@ private[spark] class IndexPage(parent: MasterWebUI) {
         <div class="row-fluid">
           <div class="span12">
             <h4> Running Applications </h4>
-
             {activeAppsTable}
           </div>
         </div>
 
+        <div>
+          {if (hasDrivers)
+          <div class="row-fluid">
+            <div class="span12">
+              <h4> Running Drivers </h4>
+              {activeDriversTable}
+            </div>
+          </div>
+          }
+        </div>
+
         <div class="row-fluid">
           <div class="span12">
             <h4> Completed Applications </h4>
             {completedAppsTable}
           </div>
+        </div>
+
+        <div>
+          {if (hasDrivers)
+          <div class="row-fluid">
+            <div class="span12">
+              <h4> Completed Drivers </h4>
+              {completedDriversTable}
+            </div>
+          </div>
+          }
         </div>;
+
     UIUtils.basicSparkPage(content, "Spark Master at " + state.uri)
   }
 
@@ -138,4 +170,20 @@ private[spark] class IndexPage(parent: MasterWebUI) {
       <td>{DeployWebUI.formatDuration(app.duration)}</td>
     </tr>
   }
+
+  def driverRow(driver: DriverInfo): Seq[Node] = {
+    <tr>
+      <td>{driver.id} </td>
+      <td>{driver.submitDate}</td>
+      <td>{driver.worker.map(w => <a href={w.webUiAddress}>{w.id.toString}</a>).getOrElse("None")}</td>
+      <td>{driver.state}</td>
+      <td sorttable_customkey={driver.desc.cores.toString}>
+        {driver.desc.cores}
+      </td>
+      <td sorttable_customkey={driver.desc.mem.toString}>
+        {Utils.megabytesToString(driver.desc.mem.toLong)}
+      </td>
+      <td>{driver.desc.command.arguments(1)}</td>
+    </tr>
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
index f4df729e87137..ead35662fc75a 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
@@ -17,25 +17,21 @@
 
 package org.apache.spark.deploy.master.ui
 
-import akka.util.Duration
-
 import javax.servlet.http.HttpServletRequest
-
 import org.eclipse.jetty.server.{Handler, Server}
 
-import org.apache.spark.{Logging}
+import org.apache.spark.Logging
 import org.apache.spark.deploy.master.Master
 import org.apache.spark.ui.JettyUtils
 import org.apache.spark.ui.JettyUtils._
-import org.apache.spark.util.Utils
+import org.apache.spark.util.{AkkaUtils, Utils}
 
 /**
  * Web UI server for the standalone master.
  */
 private[spark]
 class MasterWebUI(val master: Master, requestedPort: Int) extends Logging {
-  implicit val timeout = Duration.create(
-    System.getProperty("spark.akka.askTimeout", "10").toLong, "seconds")
+  val timeout = AkkaUtils.askTimeout(master.conf)
   val host = Utils.localHostName()
   val port = requestedPort
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/CommandUtils.scala b/core/src/main/scala/org/apache/spark/deploy/worker/CommandUtils.scala
new file mode 100644
index 0000000000000..460883ec7ae24
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/CommandUtils.scala
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.worker
+
+import java.io.{File, FileOutputStream, IOException, InputStream}
+import java.lang.System._
+
+import org.apache.spark.Logging
+import org.apache.spark.deploy.Command
+import org.apache.spark.util.Utils
+
+/**
+ ** Utilities for running commands with the spark classpath.
+ */
+private[spark]
+object CommandUtils extends Logging {
+  def buildCommandSeq(command: Command, memory: Int, sparkHome: String): Seq[String] = {
+    val runner = getEnv("JAVA_HOME", command).map(_ + "/bin/java").getOrElse("java")
+
+    // SPARK-698: do not call the run.cmd script, as process.destroy()
+    // fails to kill a process tree on Windows
+    Seq(runner) ++ buildJavaOpts(command, memory, sparkHome) ++ Seq(command.mainClass) ++
+      command.arguments
+  }
+
+  private def getEnv(key: String, command: Command): Option[String] =
+    command.environment.get(key).orElse(Option(System.getenv(key)))
+
+  /**
+   * Attention: this must always be aligned with the environment variables in the run scripts and
+   * the way the JAVA_OPTS are assembled there.
+   */
+  def buildJavaOpts(command: Command, memory: Int, sparkHome: String): Seq[String] = {
+    val libraryOpts = getEnv("SPARK_LIBRARY_PATH", command)
+      .map(p => List("-Djava.library.path=" + p))
+      .getOrElse(Nil)
+    val workerLocalOpts = Option(getenv("SPARK_JAVA_OPTS")).map(Utils.splitCommandString).getOrElse(Nil)
+    val userOpts = getEnv("SPARK_JAVA_OPTS", command).map(Utils.splitCommandString).getOrElse(Nil)
+    val memoryOpts = Seq(s"-Xms${memory}M", s"-Xmx${memory}M")
+
+    // Figure out our classpath with the external compute-classpath script
+    val ext = if (System.getProperty("os.name").startsWith("Windows")) ".cmd" else ".sh"
+    val classPath = Utils.executeAndGetOutput(
+      Seq(sparkHome + "/bin/compute-classpath" + ext),
+      extraEnvironment=command.environment)
+
+    Seq("-cp", classPath) ++ libraryOpts ++ workerLocalOpts ++ userOpts ++ memoryOpts
+  }
+
+  /** Spawn a thread that will redirect a given stream to a file */
+  def redirectStream(in: InputStream, file: File) {
+    val out = new FileOutputStream(file, true)
+    // TODO: It would be nice to add a shutdown hook here that explains why the output is
+    //       terminating. Otherwise if the worker dies the executor logs will silently stop.
+    new Thread("redirect output to " + file) {
+      override def run() {
+        try {
+          Utils.copyStream(in, out, true)
+        } catch {
+          case e: IOException =>
+            logInfo("Redirection to " + file + " closed: " + e.getMessage)
+        }
+      }
+    }.start()
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
new file mode 100644
index 0000000000000..b4df1a0dd4718
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
@@ -0,0 +1,234 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.worker
+
+import java.io._
+
+import scala.collection.JavaConversions._
+import scala.collection.mutable.Map
+
+import akka.actor.ActorRef
+import com.google.common.base.Charsets
+import com.google.common.io.Files
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileUtil, Path}
+
+import org.apache.spark.Logging
+import org.apache.spark.deploy.{Command, DriverDescription}
+import org.apache.spark.deploy.DeployMessages.DriverStateChanged
+import org.apache.spark.deploy.master.DriverState
+import org.apache.spark.deploy.master.DriverState.DriverState
+
+/**
+ * Manages the execution of one driver, including automatically restarting the driver on failure.
+ */
+private[spark] class DriverRunner(
+    val driverId: String,
+    val workDir: File,
+    val sparkHome: File,
+    val driverDesc: DriverDescription,
+    val worker: ActorRef,
+    val workerUrl: String)
+  extends Logging {
+
+  @volatile var process: Option[Process] = None
+  @volatile var killed = false
+
+  // Populated once finished
+  var finalState: Option[DriverState] = None
+  var finalException: Option[Exception] = None
+  var finalExitCode: Option[Int] = None
+
+  // Decoupled for testing
+  private[deploy] def setClock(_clock: Clock) = clock = _clock
+  private[deploy] def setSleeper(_sleeper: Sleeper) = sleeper = _sleeper
+  private var clock = new Clock {
+    def currentTimeMillis(): Long = System.currentTimeMillis()
+  }
+  private var sleeper = new Sleeper {
+    def sleep(seconds: Int): Unit = (0 until seconds).takeWhile(f => {Thread.sleep(1000); !killed})
+  }
+
+  /** Starts a thread to run and manage the driver. */
+  def start() = {
+    new Thread("DriverRunner for " + driverId) {
+      override def run() {
+        try {
+          val driverDir = createWorkingDirectory()
+          val localJarFilename = downloadUserJar(driverDir)
+
+          // Make sure user application jar is on the classpath
+          // TODO: If we add ability to submit multiple jars they should also be added here
+          val env = Map(driverDesc.command.environment.toSeq: _*)
+          env("SPARK_CLASSPATH") = env.getOrElse("SPARK_CLASSPATH", "") + s":$localJarFilename"
+          val newCommand = Command(driverDesc.command.mainClass,
+            driverDesc.command.arguments.map(substituteVariables), env)
+          val command = CommandUtils.buildCommandSeq(newCommand, driverDesc.mem,
+            sparkHome.getAbsolutePath)
+          launchDriver(command, env, driverDir, driverDesc.supervise)
+        }
+        catch {
+          case e: Exception => finalException = Some(e)
+        }
+
+        val state =
+          if (killed) { DriverState.KILLED }
+          else if (finalException.isDefined) { DriverState.ERROR }
+          else {
+            finalExitCode match {
+              case Some(0) => DriverState.FINISHED
+              case _ => DriverState.FAILED
+            }
+          }
+
+        finalState = Some(state)
+
+        worker ! DriverStateChanged(driverId, state, finalException)
+      }
+    }.start()
+  }
+
+  /** Terminate this driver (or prevent it from ever starting if not yet started) */
+  def kill() {
+    synchronized {
+      process.foreach(p => p.destroy())
+      killed = true
+    }
+  }
+
+  /** Replace variables in a command argument passed to us */
+  private def substituteVariables(argument: String): String = argument match {
+    case "{{WORKER_URL}}" => workerUrl
+    case other => other
+  }
+
+  /**
+   * Creates the working directory for this driver.
+   * Will throw an exception if there are errors preparing the directory.
+   */
+  private def createWorkingDirectory(): File = {
+    val driverDir = new File(workDir, driverId)
+    if (!driverDir.exists() && !driverDir.mkdirs()) {
+      throw new IOException("Failed to create directory " + driverDir)
+    }
+    driverDir
+  }
+
+  /**
+   * Download the user jar into the supplied directory and return its local path.
+   * Will throw an exception if there are errors downloading the jar.
+   */
+  private def downloadUserJar(driverDir: File): String = {
+
+    val jarPath = new Path(driverDesc.jarUrl)
+
+    val emptyConf = new Configuration()
+    val jarFileSystem = jarPath.getFileSystem(emptyConf)
+
+    val destPath = new File(driverDir.getAbsolutePath, jarPath.getName)
+    val jarFileName = jarPath.getName
+    val localJarFile = new File(driverDir, jarFileName)
+    val localJarFilename = localJarFile.getAbsolutePath
+
+    if (!localJarFile.exists()) { // May already exist if running multiple workers on one node
+      logInfo(s"Copying user jar $jarPath to $destPath")
+      FileUtil.copy(jarFileSystem, jarPath, destPath, false, emptyConf)
+    }
+
+    if (!localJarFile.exists()) { // Verify copy succeeded
+      throw new Exception(s"Did not see expected jar $jarFileName in $driverDir")
+    }
+
+    localJarFilename
+  }
+
+  private def launchDriver(command: Seq[String], envVars: Map[String, String], baseDir: File,
+                           supervise: Boolean) {
+    val builder = new ProcessBuilder(command: _*).directory(baseDir)
+    envVars.map{ case(k,v) => builder.environment().put(k, v) }
+
+    def initialize(process: Process) = {
+      // Redirect stdout and stderr to files
+      val stdout = new File(baseDir, "stdout")
+      CommandUtils.redirectStream(process.getInputStream, stdout)
+
+      val stderr = new File(baseDir, "stderr")
+      val header = "Launch Command: %s\n%s\n\n".format(
+        command.mkString("\"", "\" \"", "\""), "=" * 40)
+      Files.append(header, stderr, Charsets.UTF_8)
+      CommandUtils.redirectStream(process.getErrorStream, stderr)
+    }
+    runCommandWithRetry(ProcessBuilderLike(builder), initialize, supervise)
+  }
+
+  private[deploy] def runCommandWithRetry(command: ProcessBuilderLike, initialize: Process => Unit,
+    supervise: Boolean) {
+    // Time to wait between submission retries.
+    var waitSeconds = 1
+    // A run of this many seconds resets the exponential back-off.
+    val successfulRunDuration = 5
+
+    var keepTrying = !killed
+
+    while (keepTrying) {
+      logInfo("Launch Command: " + command.command.mkString("\"", "\" \"", "\""))
+
+      synchronized {
+        if (killed) { return }
+        process = Some(command.start())
+        initialize(process.get)
+      }
+
+      val processStart = clock.currentTimeMillis()
+      val exitCode = process.get.waitFor()
+      if (clock.currentTimeMillis() - processStart > successfulRunDuration * 1000) {
+        waitSeconds = 1
+      }
+
+      if (supervise && exitCode != 0 && !killed) {
+        logInfo(s"Command exited with status $exitCode, re-launching after $waitSeconds s.")
+        sleeper.sleep(waitSeconds)
+        waitSeconds = waitSeconds * 2 // exponential back-off
+      }
+
+      keepTrying = supervise && exitCode != 0 && !killed
+      finalExitCode = Some(exitCode)
+    }
+  }
+}
+
+private[deploy] trait Clock {
+  def currentTimeMillis(): Long
+}
+
+private[deploy] trait Sleeper {
+  def sleep(seconds: Int)
+}
+
+// Needed because ProcessBuilder is a final class and cannot be mocked
+private[deploy] trait ProcessBuilderLike {
+  def start(): Process
+  def command: Seq[String]
+}
+
+private[deploy] object ProcessBuilderLike {
+  def apply(processBuilder: ProcessBuilder) = new ProcessBuilderLike {
+    def start() = processBuilder.start()
+    def command = processBuilder.command()
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala b/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala
new file mode 100644
index 0000000000000..6f6c101547c3c
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.worker
+
+import akka.actor._
+
+import org.apache.spark.SparkConf
+import org.apache.spark.util.{AkkaUtils, Utils}
+
+/**
+ * Utility object for launching driver programs such that they share fate with the Worker process.
+ */
+object DriverWrapper {
+  def main(args: Array[String]) {
+    args.toList match {
+      case workerUrl :: mainClass :: extraArgs =>
+        val (actorSystem, _) = AkkaUtils.createActorSystem("Driver",
+          Utils.localHostName(), 0, false, new SparkConf())
+        actorSystem.actorOf(Props(classOf[WorkerWatcher], workerUrl), name = "workerWatcher")
+
+        // Delegate to supplied main class
+        val clazz = Class.forName(args(1))
+        val mainMethod = clazz.getMethod("main", classOf[Array[String]])
+        mainMethod.invoke(null, extraArgs.toArray[String])
+
+        actorSystem.shutdown()
+
+      case _ =>
+        System.err.println("Usage: DriverWrapper <workerUrl> <driverMainClass> [options]")
+        System.exit(-1)
+    }
+  }
+}
\ No newline at end of file
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
index dd69043d26cd3..18885d7ca6daa 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
@@ -18,17 +18,15 @@
 package org.apache.spark.deploy.worker
 
 import java.io._
-import java.lang.System.getenv
 
 import akka.actor.ActorRef
 
 import com.google.common.base.Charsets
 import com.google.common.io.Files
 
-import org.apache.spark.{Logging}
-import org.apache.spark.deploy.{ExecutorState, ApplicationDescription}
+import org.apache.spark.Logging
+import org.apache.spark.deploy.{ExecutorState, ApplicationDescription, Command}
 import org.apache.spark.deploy.DeployMessages.ExecutorStateChanged
-import org.apache.spark.util.Utils
 
 /**
  * Manages the execution of one executor process.
@@ -44,16 +42,17 @@ private[spark] class ExecutorRunner(
     val host: String,
     val sparkHome: File,
     val workDir: File,
+    val workerUrl: String,
     var state: ExecutorState.Value)
   extends Logging {
 
   val fullId = appId + "/" + execId
   var workerThread: Thread = null
   var process: Process = null
-  var shutdownHook: Thread = null
 
-  private def getAppEnv(key: String): Option[String] =
-    appDesc.command.environment.get(key).orElse(Option(getenv(key)))
+  // NOTE: This is now redundant with the automated shut-down enforced by the Executor. It might
+  // make sense to remove this in the future.
+  var shutdownHook: Thread = null
 
   def start() {
     workerThread = new Thread("ExecutorRunner for " + fullId) {
@@ -92,79 +91,17 @@ private[spark] class ExecutorRunner(
 
   /** Replace variables such as {{EXECUTOR_ID}} and {{CORES}} in a command argument passed to us */
   def substituteVariables(argument: String): String = argument match {
+    case "{{WORKER_URL}}" => workerUrl
     case "{{EXECUTOR_ID}}" => execId.toString
     case "{{HOSTNAME}}" => host
     case "{{CORES}}" => cores.toString
     case other => other
   }
 
-  def buildCommandSeq(): Seq[String] = {
-    val command = appDesc.command
-    val runner = getAppEnv("JAVA_HOME").map(_ + "/bin/java").getOrElse("java")
-    // SPARK-698: do not call the run.cmd script, as process.destroy()
-    // fails to kill a process tree on Windows
-    Seq(runner) ++ buildJavaOpts() ++ Seq(command.mainClass) ++
-      (command.arguments ++ Seq(appId)).map(substituteVariables)
-  }
-
-  /**
-   * Attention: this must always be aligned with the environment variables in the run scripts and
-   * the way the JAVA_OPTS are assembled there.
-   */
-  def buildJavaOpts(): Seq[String] = {
-    def mergeSparkJavaOpts(wlo: Seq[String], uo: Seq[String]): Seq[String] = {
-      def mergeWithSplitChar(splitter: Char, os: Set[String]) = {
-        val keyGroups = os.groupBy(_.takeWhile(_ != splitter))
-        val (solos, dupes) = keyGroups.partition { case (key, values) => values.size == 1 }
-        val (uoDupes, wloDupes) = dupes.partition { case (key, valSet) => valSet.toSeq.intersect(wlo).isEmpty }
-        val wloOverrides = wloDupes.values.toSeq.map(_.toSeq.intersect(wlo))
-
-        uoDupes.foreach { case (_, s) if s.size > 1 =>
-          logError("Conflicting Application-level SPARK_JAVA_OPTS: " + s) }
-        wloOverrides.foreach { s => if (s.size > 1)
-          logError("Conflicting Worker Local SPARK_JAVA_OPTS: " + s) }
-
-        solos.values.map(_.head) ++
-          uoDupes.values.map(_.head) ++
-          wloOverrides.map(_.head)
-      }
-
-      val opts = wlo.toSet ++ uo
-      val (withEquals, withoutEquals) = opts.partition(_.contains("="))
-      val (withColon, withoutColon) = withoutEquals.partition(_.contains(":"))
-      val resultSet = withoutColon ++ mergeWithSplitChar('=', withEquals) ++ mergeWithSplitChar(':', withColon)
-      resultSet.toSeq
-    }
-
-    val libraryOpts = getAppEnv("SPARK_LIBRARY_PATH")
-      .map(p => List("-Djava.library.path=" + p))
-      .getOrElse(Nil)
-    val workerLocalOpts = Option(getenv("SPARK_JAVA_OPTS")).map(Utils.splitCommandString).getOrElse(Nil)
-    val userOpts = getAppEnv("SPARK_JAVA_OPTS").map(Utils.splitCommandString).getOrElse(Nil)
-    val memoryOpts = Seq("-Xms" + memory + "M", "-Xmx" + memory + "M")
-
-    // Figure out our classpath with the external compute-classpath script
-    val ext = if (System.getProperty("os.name").startsWith("Windows")) ".cmd" else ".sh"
-    val classPath = Utils.executeAndGetOutput(
-        Seq(sparkHome + "/bin/compute-classpath" + ext),
-        extraEnvironment=appDesc.command.environment)
-
-    Seq("-cp", classPath) ++ libraryOpts ++ mergeSparkJavaOpts(workerLocalOpts, userOpts) ++ memoryOpts
-  }
-
-  /** Spawn a thread that will redirect a given stream to a file */
-  def redirectStream(in: InputStream, file: File) {
-    val out = new FileOutputStream(file, true)
-    new Thread("redirect output to " + file) {
-      override def run() {
-        try {
-          Utils.copyStream(in, out, true)
-        } catch {
-          case e: IOException =>
-            logInfo("Redirection to " + file + " closed: " + e.getMessage)
-        }
-      }
-    }.start()
+  def getCommandSeq = {
+    val command = Command(appDesc.command.mainClass,
+      appDesc.command.arguments.map(substituteVariables) ++ Seq(appId), appDesc.command.environment)
+    CommandUtils.buildCommandSeq(command, memory, sparkHome.getAbsolutePath)
   }
 
   /**
@@ -179,7 +116,7 @@ private[spark] class ExecutorRunner(
       }
 
       // Launch the process
-      val command = buildCommandSeq()
+      val command = getCommandSeq
       logInfo("Launch command: " + command.mkString("\"", "\" \"", "\""))
       val builder = new ProcessBuilder(command: _*).directory(executorDir)
       val env = builder.environment()
@@ -196,11 +133,11 @@ private[spark] class ExecutorRunner(
 
       // Redirect its stdout and stderr to files
       val stdout = new File(executorDir, "stdout")
-      redirectStream(process.getInputStream, stdout)
+      CommandUtils.redirectStream(process.getInputStream, stdout)
 
       val stderr = new File(executorDir, "stderr")
       Files.write(header, stderr, Charsets.UTF_8)
-      redirectStream(process.getErrorStream, stderr)
+      CommandUtils.redirectStream(process.getErrorStream, stderr)
 
       // Wait for it to exit; this is actually a bad thing if it happens, because we expect to run
       // long-lived processes only. However, in the future, we might restart the executor a few
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
old mode 100644
new mode 100755
index 216d9d44ac619..36bb28912fbda
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -17,23 +17,24 @@
 
 package org.apache.spark.deploy.worker
 
+import java.io.File
 import java.text.SimpleDateFormat
 import java.util.Date
-import java.io.File
 
 import scala.collection.mutable.HashMap
+import scala.concurrent.duration._
 
 import akka.actor._
-import akka.remote.{RemoteClientLifeCycleEvent, RemoteClientShutdown, RemoteClientDisconnected}
-import akka.util.duration._
+import akka.remote.{DisassociatedEvent, RemotingLifecycleEvent}
 
-import org.apache.spark.Logging
+import org.apache.spark.{Logging, SparkConf, SparkException}
 import org.apache.spark.deploy.{ExecutorDescription, ExecutorState}
 import org.apache.spark.deploy.DeployMessages._
-import org.apache.spark.deploy.master.Master
+import org.apache.spark.deploy.master.{DriverState, Master}
+import org.apache.spark.deploy.master.DriverState.DriverState
 import org.apache.spark.deploy.worker.ui.WorkerWebUI
 import org.apache.spark.metrics.MetricsSystem
-import org.apache.spark.util.{Utils, AkkaUtils}
+import org.apache.spark.util.{AkkaUtils, Utils}
 
 /**
   * @param masterUrls Each url should look like spark://host:port.
@@ -45,8 +46,12 @@ private[spark] class Worker(
     cores: Int,
     memory: Int,
     masterUrls: Array[String],
-    workDirPath: String = null)
+    actorSystemName: String,
+    actorName: String,
+    workDirPath: String = null,
+    val conf: SparkConf)
   extends Actor with Logging {
+  import context.dispatcher
 
   Utils.checkHost(host, "Expected hostname")
   assert (port > 0)
@@ -54,7 +59,7 @@ private[spark] class Worker(
   val DATE_FORMAT = new SimpleDateFormat("yyyyMMddHHmmss")  // For worker and executor IDs
 
   // Send a heartbeat every (heartbeat timeout) / 4 milliseconds
-  val HEARTBEAT_MILLIS = System.getProperty("spark.worker.timeout", "60").toLong * 1000 / 4
+  val HEARTBEAT_MILLIS = conf.getLong("spark.worker.timeout", 60) * 1000 / 4
 
   val REGISTRATION_TIMEOUT = 20.seconds
   val REGISTRATION_RETRIES = 3
@@ -63,9 +68,11 @@ private[spark] class Worker(
   var masterIndex = 0
 
   val masterLock: Object = new Object()
-  var master: ActorRef = null
+  var master: ActorSelection = null
+  var masterAddress: Address = null
   var activeMasterUrl: String = ""
   var activeMasterWebUiUrl : String = ""
+  val akkaUrl = "akka.tcp://%s@%s:%s/user/%s".format(actorSystemName, host, port, actorName)
   @volatile var registered = false
   @volatile var connected = false
   val workerId = generateWorkerId()
@@ -73,6 +80,9 @@ private[spark] class Worker(
   var workDir: File = null
   val executors = new HashMap[String, ExecutorRunner]
   val finishedExecutors = new HashMap[String, ExecutorRunner]
+  val drivers = new HashMap[String, DriverRunner]
+  val finishedDrivers = new HashMap[String, DriverRunner]
+
   val publicAddress = {
     val envVar = System.getenv("SPARK_PUBLIC_DNS")
     if (envVar != null) envVar else host
@@ -82,7 +92,7 @@ private[spark] class Worker(
   var coresUsed = 0
   var memoryUsed = 0
 
-  val metricsSystem = MetricsSystem.createMetricsSystem("worker")
+  val metricsSystem = MetricsSystem.createMetricsSystem("worker", conf)
   val workerSource = new WorkerSource(this)
 
   def coresFree: Int = cores - coresUsed
@@ -114,7 +124,7 @@ private[spark] class Worker(
     logInfo("Spark home: " + sparkHome)
     createWorkDir()
     webUi = new WorkerWebUI(this, workDir, Some(webUiPort))
-
+    context.system.eventStream.subscribe(self, classOf[RemotingLifecycleEvent])
     webUi.start()
     registerWithMaster()
 
@@ -126,9 +136,13 @@ private[spark] class Worker(
     masterLock.synchronized {
       activeMasterUrl = url
       activeMasterWebUiUrl = uiUrl
-      master = context.actorFor(Master.toAkkaUrl(activeMasterUrl))
-      context.system.eventStream.subscribe(self, classOf[RemoteClientLifeCycleEvent])
-      context.watch(master) // Doesn't work with remote actors, but useful for testing
+      master = context.actorSelection(Master.toAkkaUrl(activeMasterUrl))
+      masterAddress = activeMasterUrl match {
+        case Master.sparkUrlRegex(_host, _port) =>
+          Address("akka.tcp", Master.systemName, _host, _port.toInt)
+        case x =>
+          throw new SparkException("Invalid spark URL: " + x)
+      }
       connected = true
     }
   }
@@ -136,7 +150,7 @@ private[spark] class Worker(
   def tryRegisterAllMasters() {
     for (masterUrl <- masterUrls) {
       logInfo("Connecting to master " + masterUrl + "...")
-      val actor = context.actorFor(Master.toAkkaUrl(masterUrl))
+      val actor = context.actorSelection(Master.toAkkaUrl(masterUrl))
       actor ! RegisterWorker(workerId, host, port, cores, memory, webUi.boundPort.get,
         publicAddress)
     }
@@ -175,12 +189,14 @@ private[spark] class Worker(
 
     case MasterChanged(masterUrl, masterWebUiUrl) =>
       logInfo("Master has changed, new master is at " + masterUrl)
-      context.unwatch(master)
       changeMaster(masterUrl, masterWebUiUrl)
 
       val execs = executors.values.
         map(e => new ExecutorDescription(e.appId, e.execId, e.cores, e.state))
-      sender ! WorkerSchedulerStateResponse(workerId, execs.toList)
+      sender ! WorkerSchedulerStateResponse(workerId, execs.toList, drivers.keys.toSeq)
+
+    case Heartbeat =>
+      logInfo(s"Received heartbeat from driver ${sender.path}")
 
     case RegisterWorkerFailed(message) =>
       if (!registered) {
@@ -193,8 +209,11 @@ private[spark] class Worker(
         logWarning("Invalid Master (" + masterUrl + ") attempted to launch executor.")
       } else {
         logInfo("Asked to launch executor %s/%d for %s".format(appId, execId, appDesc.name))
+        // TODO (pwendell): We shuld make sparkHome an Option[String] in
+        // ApplicationDescription to be more explicit about this.
+        val effectiveSparkHome = Option(execSparkHome_).getOrElse(sparkHome.getAbsolutePath)
         val manager = new ExecutorRunner(appId, execId, appDesc, cores_, memory_,
-          self, workerId, host, new File(execSparkHome_), workDir, ExecutorState.RUNNING)
+          self, workerId, host, new File(effectiveSparkHome), workDir, akkaUrl, ExecutorState.RUNNING)
         executors(appId + "/" + execId) = manager
         manager.start()
         coresUsed += cores_
@@ -214,8 +233,8 @@ private[spark] class Worker(
         logInfo("Executor " + fullId + " finished with state " + state +
           message.map(" message " + _).getOrElse("") +
           exitStatus.map(" exitStatus " + _).getOrElse(""))
-        finishedExecutors(fullId) = executor
         executors -= fullId
+        finishedExecutors(fullId) = executor
         coresUsed -= executor.cores
         memoryUsed -= executor.memory
       }
@@ -234,18 +253,52 @@ private[spark] class Worker(
         }
       }
 
-    case Terminated(actor_) if actor_ == master =>
-      masterDisconnected()
+    case LaunchDriver(driverId, driverDesc) => {
+      logInfo(s"Asked to launch driver $driverId")
+      val driver = new DriverRunner(driverId, workDir, sparkHome, driverDesc, self, akkaUrl)
+      drivers(driverId) = driver
+      driver.start()
 
-    case RemoteClientDisconnected(transport, address) if address == master.path.address =>
-      masterDisconnected()
+      coresUsed += driverDesc.cores
+      memoryUsed += driverDesc.mem
+    }
+
+    case KillDriver(driverId) => {
+      logInfo(s"Asked to kill driver $driverId")
+      drivers.get(driverId) match {
+        case Some(runner) =>
+          runner.kill()
+        case None =>
+          logError(s"Asked to kill unknown driver $driverId")
+      }
+    }
+
+    case DriverStateChanged(driverId, state, exception) => {
+      state match {
+        case DriverState.ERROR =>
+          logWarning(s"Driver $driverId failed with unrecoverable exception: ${exception.get}")
+        case DriverState.FINISHED =>
+          logInfo(s"Driver $driverId exited successfully")
+        case DriverState.KILLED =>
+          logInfo(s"Driver $driverId was killed by user")
+      }
+      masterLock.synchronized {
+        master ! DriverStateChanged(driverId, state, exception)
+      }
+      val driver = drivers.remove(driverId).get
+      finishedDrivers(driverId) = driver
+      memoryUsed -= driver.driverDesc.mem
+      coresUsed -= driver.driverDesc.cores
+    }
 
-    case RemoteClientShutdown(transport, address) if address == master.path.address =>
+    case x: DisassociatedEvent if x.remoteAddress == masterAddress =>
+      logInfo(s"$x Disassociated !")
       masterDisconnected()
 
     case RequestWorkerState => {
       sender ! WorkerStateResponse(host, port, workerId, executors.values.toList,
-        finishedExecutors.values.toList, activeMasterUrl, cores, memory,
+        finishedExecutors.values.toList, drivers.values.toList,
+        finishedDrivers.values.toList, activeMasterUrl, cores, memory,
         coresUsed, memoryUsed, activeMasterWebUiUrl)
     }
   }
@@ -261,12 +314,14 @@ private[spark] class Worker(
 
   override def postStop() {
     executors.values.foreach(_.kill())
+    drivers.values.foreach(_.kill())
     webUi.stop()
     metricsSystem.stop()
   }
 }
 
 private[spark] object Worker {
+
   def main(argStrings: Array[String]) {
     val args = new WorkerArguments(argStrings)
     val (actorSystem, _) = startSystemAndActor(args.host, args.port, args.webUiPort, args.cores,
@@ -275,13 +330,17 @@ private[spark] object Worker {
   }
 
   def startSystemAndActor(host: String, port: Int, webUiPort: Int, cores: Int, memory: Int,
-    masterUrls: Array[String], workDir: String, workerNumber: Option[Int] = None)
-    : (ActorSystem, Int) = {
+      masterUrls: Array[String], workDir: String, workerNumber: Option[Int] = None)
+      : (ActorSystem, Int) =
+  {
     // The LocalSparkCluster runs multiple local sparkWorkerX actor systems
+    val conf = new SparkConf
     val systemName = "sparkWorker" + workerNumber.map(_.toString).getOrElse("")
-    val (actorSystem, boundPort) = AkkaUtils.createActorSystem(systemName, host, port)
-    val actor = actorSystem.actorOf(Props(new Worker(host, boundPort, webUiPort, cores, memory,
-      masterUrls, workDir)), name = "Worker")
+    val actorName = "Worker"
+    val (actorSystem, boundPort) = AkkaUtils.createActorSystem(systemName, host, port,
+      conf = conf)
+    actorSystem.actorOf(Props(classOf[Worker], host, boundPort, webUiPort, cores, memory,
+      masterUrls, systemName, actorName,  workDir, conf), name = actorName)
     (actorSystem, boundPort)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerSource.scala b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerSource.scala
index df269fd04777f..b7ddd8c816cbc 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerSource.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerSource.scala
@@ -25,27 +25,27 @@ private[spark] class WorkerSource(val worker: Worker) extends Source {
   val sourceName = "worker"
   val metricRegistry = new MetricRegistry()
 
-  metricRegistry.register(MetricRegistry.name("executors", "number"), new Gauge[Int] {
+  metricRegistry.register(MetricRegistry.name("executors"), new Gauge[Int] {
     override def getValue: Int = worker.executors.size
   })
 
   // Gauge for cores used of this worker
-  metricRegistry.register(MetricRegistry.name("coresUsed", "number"), new Gauge[Int] {
+  metricRegistry.register(MetricRegistry.name("coresUsed"), new Gauge[Int] {
     override def getValue: Int = worker.coresUsed
   })
 
   // Gauge for memory used of this worker
-  metricRegistry.register(MetricRegistry.name("memUsed", "MBytes"), new Gauge[Int] {
+  metricRegistry.register(MetricRegistry.name("memUsed_MB"), new Gauge[Int] {
     override def getValue: Int = worker.memoryUsed
   })
 
   // Gauge for cores free of this worker
-  metricRegistry.register(MetricRegistry.name("coresFree", "number"), new Gauge[Int] {
+  metricRegistry.register(MetricRegistry.name("coresFree"), new Gauge[Int] {
     override def getValue: Int = worker.coresFree
   })
 
   // Gauge for memory free of this worker
-  metricRegistry.register(MetricRegistry.name("memFree", "MBytes"), new Gauge[Int] {
+  metricRegistry.register(MetricRegistry.name("memFree_MB"), new Gauge[Int] {
     override def getValue: Int = worker.memoryFree
   })
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerWatcher.scala b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerWatcher.scala
new file mode 100644
index 0000000000000..1dc39c450ea16
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerWatcher.scala
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.worker
+
+import akka.actor.{Actor, Address, AddressFromURIString}
+import akka.remote.{AssociatedEvent, AssociationErrorEvent, AssociationEvent, DisassociatedEvent, RemotingLifecycleEvent}
+
+import org.apache.spark.Logging
+import org.apache.spark.deploy.DeployMessages.SendHeartbeat
+
+/**
+ * Actor which connects to a worker process and terminates the JVM if the connection is severed.
+ * Provides fate sharing between a worker and its associated child processes.
+ */
+private[spark] class WorkerWatcher(workerUrl: String) extends Actor
+    with Logging {
+  override def preStart() {
+    context.system.eventStream.subscribe(self, classOf[RemotingLifecycleEvent])
+
+    logInfo(s"Connecting to worker $workerUrl")
+    val worker = context.actorSelection(workerUrl)
+    worker ! SendHeartbeat // need to send a message here to initiate connection
+  }
+
+  // Used to avoid shutting down JVM during tests
+  private[deploy] var isShutDown = false
+  private[deploy] def setTesting(testing: Boolean) = isTesting = testing
+  private var isTesting = false
+
+  // Lets us filter events only from the worker's actor system
+  private val expectedHostPort = AddressFromURIString(workerUrl).hostPort
+  private def isWorker(address: Address) = address.hostPort == expectedHostPort
+
+  def exitNonZero() = if (isTesting) isShutDown = true else System.exit(-1)
+
+  override def receive = {
+    case AssociatedEvent(localAddress, remoteAddress, inbound) if isWorker(remoteAddress) =>
+      logInfo(s"Successfully connected to $workerUrl")
+
+    case AssociationErrorEvent(cause, localAddress, remoteAddress, inbound)
+        if isWorker(remoteAddress) =>
+      // These logs may not be seen if the worker (and associated pipe) has died
+      logError(s"Could not initialize connection to worker $workerUrl. Exiting.")
+      logError(s"Error was: $cause")
+      exitNonZero()
+
+    case DisassociatedEvent(localAddress, remoteAddress, inbound) if isWorker(remoteAddress) =>
+      // This log message will never be seen
+      logError(s"Lost connection to worker actor $workerUrl. Exiting.")
+      exitNonZero()
+
+    case e: AssociationEvent =>
+      // pass through association events relating to other remote actor systems
+
+    case e => logWarning(s"Received unexpected actor system event: $e")
+  }
+}
\ No newline at end of file
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ui/IndexPage.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ui/IndexPage.scala
index d2d3617498592..925c6fb1832d7 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ui/IndexPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ui/IndexPage.scala
@@ -17,23 +17,20 @@
 
 package org.apache.spark.deploy.worker.ui
 
-import javax.servlet.http.HttpServletRequest
-
+import scala.concurrent.Await
 import scala.xml.Node
 
-import akka.dispatch.Await
 import akka.pattern.ask
-import akka.util.duration._
-
+import javax.servlet.http.HttpServletRequest
 import net.liftweb.json.JsonAST.JValue
 
 import org.apache.spark.deploy.JsonProtocol
 import org.apache.spark.deploy.DeployMessages.{RequestWorkerState, WorkerStateResponse}
-import org.apache.spark.deploy.worker.ExecutorRunner
+import org.apache.spark.deploy.master.DriverState
+import org.apache.spark.deploy.worker.{DriverRunner, ExecutorRunner}
 import org.apache.spark.ui.UIUtils
 import org.apache.spark.util.Utils
 
-
 private[spark] class IndexPage(parent: WorkerWebUI) {
   val workerActor = parent.worker.self
   val worker = parent.worker
@@ -41,13 +38,13 @@ private[spark] class IndexPage(parent: WorkerWebUI) {
 
   def renderJson(request: HttpServletRequest): JValue = {
     val stateFuture = (workerActor ? RequestWorkerState)(timeout).mapTo[WorkerStateResponse]
-    val workerState = Await.result(stateFuture, 30 seconds)
+    val workerState = Await.result(stateFuture, timeout)
     JsonProtocol.writeWorkerState(workerState)
   }
 
   def render(request: HttpServletRequest): Seq[Node] = {
     val stateFuture = (workerActor ? RequestWorkerState)(timeout).mapTo[WorkerStateResponse]
-    val workerState = Await.result(stateFuture, 30 seconds)
+    val workerState = Await.result(stateFuture, timeout)
 
     val executorHeaders = Seq("ExecutorID", "Cores", "Memory", "Job Details", "Logs")
     val runningExecutorTable =
@@ -55,6 +52,16 @@ private[spark] class IndexPage(parent: WorkerWebUI) {
     val finishedExecutorTable =
       UIUtils.listingTable(executorHeaders, executorRow, workerState.finishedExecutors)
 
+    val driverHeaders = Seq("DriverID", "Main Class", "State", "Cores", "Memory", "Logs", "Notes")
+    val runningDrivers = workerState.drivers.sortBy(_.driverId).reverse
+    val runningDriverTable = UIUtils.listingTable(driverHeaders, driverRow, runningDrivers)
+    val finishedDrivers = workerState.finishedDrivers.sortBy(_.driverId).reverse
+    def finishedDriverTable = UIUtils.listingTable(driverHeaders, driverRow, finishedDrivers)
+
+    // For now we only show driver information if the user has submitted drivers to the cluster.
+    // This is until we integrate the notion of drivers and applications in the UI.
+    def hasDrivers = runningDrivers.length > 0 || finishedDrivers.length > 0
+
     val content =
         <div class="row-fluid"> <!-- Worker Details -->
           <div class="span12">
@@ -78,11 +85,33 @@ private[spark] class IndexPage(parent: WorkerWebUI) {
           </div>
         </div>
 
+        <div>
+          {if (hasDrivers)
+            <div class="row-fluid"> <!-- Running Drivers -->
+              <div class="span12">
+                <h4> Running Drivers {workerState.drivers.size} </h4>
+                {runningDriverTable}
+              </div>
+            </div>
+          }
+        </div>
+
         <div class="row-fluid"> <!-- Finished Executors  -->
           <div class="span12">
             <h4> Finished Executors </h4>
             {finishedExecutorTable}
           </div>
+        </div>
+
+        <div>
+          {if (hasDrivers)
+            <div class="row-fluid"> <!-- Finished Drivers  -->
+              <div class="span12">
+                <h4> Finished Drivers </h4>
+                {finishedDriverTable}
+              </div>
+            </div>
+          }
         </div>;
 
     UIUtils.basicSparkPage(content, "Spark Worker at %s:%s".format(
@@ -110,6 +139,27 @@ private[spark] class IndexPage(parent: WorkerWebUI) {
           .format(executor.appId, executor.execId)}>stderr</a>
       </td> 
     </tr>
+
   }
 
+  def driverRow(driver: DriverRunner): Seq[Node] = {
+    <tr>
+      <td>{driver.driverId}</td>
+      <td>{driver.driverDesc.command.arguments(1)}</td>
+      <td>{driver.finalState.getOrElse(DriverState.RUNNING)}</td>
+      <td sorttable_customkey={driver.driverDesc.cores.toString}>
+        {driver.driverDesc.cores.toString}
+      </td>
+      <td sorttable_customkey={driver.driverDesc.mem.toString}>
+        {Utils.megabytesToString(driver.driverDesc.mem)}
+      </td>
+      <td>
+        <a href={s"logPage?driverId=${driver.driverId}&logType=stdout"}>stdout</a>
+        <a href={s"logPage?driverId=${driver.driverId}&logType=stderr"}>stderr</a>
+      </td>
+      <td>
+        {driver.finalException.getOrElse("")}
+      </td>
+    </tr>
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala
index 800f1cafccaa5..8daa47b2b2435 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala
@@ -17,20 +17,16 @@
 
 package org.apache.spark.deploy.worker.ui
 
-import akka.util.{Duration, Timeout}
-
-import java.io.{FileInputStream, File}
+import java.io.File
 
 import javax.servlet.http.HttpServletRequest
-
 import org.eclipse.jetty.server.{Handler, Server}
 
+import org.apache.spark.{Logging, SparkConf}
 import org.apache.spark.deploy.worker.Worker
-import org.apache.spark.{Logging}
-import org.apache.spark.ui.JettyUtils
+import org.apache.spark.ui.{JettyUtils, UIUtils}
 import org.apache.spark.ui.JettyUtils._
-import org.apache.spark.ui.UIUtils
-import org.apache.spark.util.Utils
+import org.apache.spark.util.{AkkaUtils, Utils}
 
 /**
  * Web UI server for the standalone worker.
@@ -38,11 +34,10 @@ import org.apache.spark.util.Utils
 private[spark]
 class WorkerWebUI(val worker: Worker, val workDir: File, requestedPort: Option[Int] = None)
   extends Logging {
-  implicit val timeout = Timeout(
-    Duration.create(System.getProperty("spark.akka.askTimeout", "10").toLong, "seconds"))
+  val timeout = AkkaUtils.askTimeout(worker.conf)
   val host = Utils.localHostName()
   val port = requestedPort.getOrElse(
-    System.getProperty("worker.ui.port", WorkerWebUI.DEFAULT_PORT).toInt)
+    worker.conf.get("worker.ui.port",  WorkerWebUI.DEFAULT_PORT).toInt)
 
   var server: Option[Server] = None
   var boundPort: Option[Int] = None
@@ -74,30 +69,48 @@ class WorkerWebUI(val worker: Worker, val workDir: File, requestedPort: Option[I
 
   def log(request: HttpServletRequest): String = {
     val defaultBytes = 100 * 1024
-    val appId = request.getParameter("appId")
-    val executorId = request.getParameter("executorId")
+
+    val appId = Option(request.getParameter("appId"))
+    val executorId = Option(request.getParameter("executorId"))
+    val driverId = Option(request.getParameter("driverId"))
     val logType = request.getParameter("logType")
     val offset = Option(request.getParameter("offset")).map(_.toLong)
     val byteLength = Option(request.getParameter("byteLength")).map(_.toInt).getOrElse(defaultBytes)
-    val path = "%s/%s/%s/%s".format(workDir.getPath, appId, executorId, logType)
+
+    val path = (appId, executorId, driverId) match {
+      case (Some(a), Some(e), None) =>
+        s"${workDir.getPath}/$appId/$executorId/$logType"
+      case (None, None, Some(d)) =>
+        s"${workDir.getPath}/$driverId/$logType"
+      case _ =>
+        throw new Exception("Request must specify either application or driver identifiers")
+    }
 
     val (startByte, endByte) = getByteRange(path, offset, byteLength)
     val file = new File(path)
     val logLength = file.length
 
-    val pre = "==== Bytes %s-%s of %s of %s/%s/%s ====\n"
-      .format(startByte, endByte, logLength, appId, executorId, logType)
+    val pre = s"==== Bytes $startByte-$endByte of $logLength of $path ====\n"
     pre + Utils.offsetBytes(path, startByte, endByte)
   }
 
   def logPage(request: HttpServletRequest): Seq[scala.xml.Node] = {
     val defaultBytes = 100 * 1024
-    val appId = request.getParameter("appId")
-    val executorId = request.getParameter("executorId")
+    val appId = Option(request.getParameter("appId"))
+    val executorId = Option(request.getParameter("executorId"))
+    val driverId = Option(request.getParameter("driverId"))
     val logType = request.getParameter("logType")
     val offset = Option(request.getParameter("offset")).map(_.toLong)
     val byteLength = Option(request.getParameter("byteLength")).map(_.toInt).getOrElse(defaultBytes)
-    val path = "%s/%s/%s/%s".format(workDir.getPath, appId, executorId, logType)
+
+    val (path, params) = (appId, executorId, driverId) match {
+      case (Some(a), Some(e), None) =>
+        (s"${workDir.getPath}/$a/$e/$logType", s"appId=$a&executorId=$e")
+      case (None, None, Some(d)) =>
+        (s"${workDir.getPath}/$d/$logType", s"driverId=$d")
+      case _ =>
+        throw new Exception("Request must specify either application or driver identifiers")
+    }
 
     val (startByte, endByte) = getByteRange(path, offset, byteLength)
     val file = new File(path)
@@ -111,9 +124,8 @@ class WorkerWebUI(val worker: Worker, val workDir: File, requestedPort: Option[I
 
     val backButton =
       if (startByte > 0) {
-        <a href={"?appId=%s&executorId=%s&logType=%s&offset=%s&byteLength=%s"
-          .format(appId, executorId, logType, math.max(startByte-byteLength, 0),
-          byteLength)}>
+        <a href={"?%s&logType=%s&offset=%s&byteLength=%s"
+          .format(params, logType, math.max(startByte-byteLength, 0), byteLength)}>
           <button type="button" class="btn btn-default">
             Previous {Utils.bytesToString(math.min(byteLength, startByte))}
           </button>
@@ -127,8 +139,8 @@ class WorkerWebUI(val worker: Worker, val workDir: File, requestedPort: Option[I
 
     val nextButton =
       if (endByte < logLength) {
-        <a href={"?appId=%s&executorId=%s&logType=%s&offset=%s&byteLength=%s".
-          format(appId, executorId, logType, endByte, byteLength)}>
+        <a href={"?%s&logType=%s&offset=%s&byteLength=%s".
+          format(params, logType, endByte, byteLength)}>
           <button type="button" class="btn btn-default">
             Next {Utils.bytesToString(math.min(byteLength, logLength-endByte))}
           </button>
@@ -145,12 +157,12 @@ class WorkerWebUI(val worker: Worker, val workDir: File, requestedPort: Option[I
         <body>
           {linkToMaster}
           <div>
-            <div style="float:left;width:40%">{backButton}</div>
+            <div style="float:left; margin-right:10px">{backButton}</div>
             <div style="float:left;">{range}</div>
-            <div style="float:right;">{nextButton}</div>
+            <div style="float:right; margin-left:10px">{nextButton}</div>
           </div>
           <br />
-          <div style="height:500px;overflow:auto;padding:5px;">
+          <div style="height:500px; overflow:auto; padding:5px;">
             <pre>{logText}</pre>
           </div>
         </body>
diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
index 8332631838d0a..45b43b403dd8c 100644
--- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -19,14 +19,14 @@ package org.apache.spark.executor
 
 import java.nio.ByteBuffer
 
-import akka.actor.{ActorRef, Actor, Props, Terminated}
-import akka.remote.{RemoteClientLifeCycleEvent, RemoteClientShutdown, RemoteClientDisconnected}
+import akka.actor._
+import akka.remote._
 
-import org.apache.spark.Logging
+import org.apache.spark.{SparkConf, SparkContext, Logging}
 import org.apache.spark.TaskState.TaskState
+import org.apache.spark.deploy.worker.WorkerWatcher
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
-import org.apache.spark.util.{Utils, AkkaUtils}
-
+import org.apache.spark.util.{AkkaUtils, Utils}
 
 private[spark] class CoarseGrainedExecutorBackend(
     driverUrl: String,
@@ -40,14 +40,13 @@ private[spark] class CoarseGrainedExecutorBackend(
   Utils.checkHostPort(hostPort, "Expected hostport")
 
   var executor: Executor = null
-  var driver: ActorRef = null
+  var driver: ActorSelection = null
 
   override def preStart() {
     logInfo("Connecting to driver: " + driverUrl)
-    driver = context.actorFor(driverUrl)
+    driver = context.actorSelection(driverUrl)
     driver ! RegisterExecutor(executorId, hostPort, cores)
-    context.system.eventStream.subscribe(self, classOf[RemoteClientLifeCycleEvent])
-    context.watch(driver) // Doesn't work with remote actors, but useful for testing
+    context.system.eventStream.subscribe(self, classOf[RemotingLifecycleEvent])
   }
 
   override def receive = {
@@ -77,8 +76,8 @@ private[spark] class CoarseGrainedExecutorBackend(
         executor.killTask(taskId)
       }
 
-    case Terminated(_) | RemoteClientDisconnected(_, _) | RemoteClientShutdown(_, _) =>
-      logError("Driver terminated or disconnected! Shutting down.")
+    case x: DisassociatedEvent =>
+      logError(s"Driver $x disassociated! Shutting down.")
       System.exit(1)
 
     case StopExecutor =>
@@ -93,30 +92,38 @@ private[spark] class CoarseGrainedExecutorBackend(
 }
 
 private[spark] object CoarseGrainedExecutorBackend {
-  def run(driverUrl: String, executorId: String, hostname: String, cores: Int) {
+  def run(driverUrl: String, executorId: String, hostname: String, cores: Int,
+          workerUrl: Option[String]) {
     // Debug code
     Utils.checkHost(hostname)
 
     // Create a new ActorSystem to run the backend, because we can't create a SparkEnv / Executor
     // before getting started with all our system properties, etc
-    val (actorSystem, boundPort) = AkkaUtils.createActorSystem("sparkExecutor", hostname, 0)
+    val (actorSystem, boundPort) = AkkaUtils.createActorSystem("sparkExecutor", hostname, 0,
+      indestructible = true, conf = new SparkConf)
     // set it
     val sparkHostPort = hostname + ":" + boundPort
-    System.setProperty("spark.hostPort", sparkHostPort)
-    val actor = actorSystem.actorOf(
-      Props(new CoarseGrainedExecutorBackend(driverUrl, executorId, sparkHostPort, cores)),
+    actorSystem.actorOf(
+      Props(classOf[CoarseGrainedExecutorBackend], driverUrl, executorId, sparkHostPort, cores),
       name = "Executor")
+    workerUrl.foreach{ url =>
+      actorSystem.actorOf(Props(classOf[WorkerWatcher], url), name = "WorkerWatcher")
+    }
     actorSystem.awaitTermination()
   }
 
   def main(args: Array[String]) {
-    if (args.length < 4) {
-      //the reason we allow the last appid argument is to make it easy to kill rogue executors
-      System.err.println(
-        "Usage: CoarseGrainedExecutorBackend <driverUrl> <executorId> <hostname> <cores> " +
-        "[<appid>]")
-      System.exit(1)
+    args.length match {
+      case x if x < 4 =>
+        System.err.println(
+          // Worker url is used in spark standalone mode to enforce fate-sharing with worker
+          "Usage: CoarseGrainedExecutorBackend <driverUrl> <executorId> <hostname> " +
+          "<cores> [<workerUrl>]")
+        System.exit(1)
+      case 4 =>
+        run(args(0), args(1), args(2), args(3).toInt, None)
+      case x if x > 4 =>
+        run(args(0), args(1), args(2), args(3).toInt, Some(args(4)))
     }
-    run(args(0), args(1), args(2), args(3).toInt)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index e3a8d4a224839..c1b57f74d7e9a 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -48,8 +48,6 @@ private[spark] class Executor(
 
   private val EMPTY_BYTE_BUFFER = ByteBuffer.wrap(new Array[Byte](0))
 
-  initLogging()
-
   // No ip or host:port - just hostname
   Utils.checkHost(slaveHostname, "Expected executed slave to be a hostname")
   // must not have port specified.
@@ -58,16 +56,17 @@ private[spark] class Executor(
   // Make sure the local hostname we report matches the cluster scheduler's name for this host
   Utils.setCustomHostname(slaveHostname)
 
-  // Set spark.* system properties from executor arg
-  for ((key, value) <- properties) {
-    System.setProperty(key, value)
-  }
+  // Set spark.* properties from executor arg
+  val conf = new SparkConf(true)
+  conf.setAll(properties)
 
   // If we are in yarn mode, systems can have different disk layouts so we must set it
   // to what Yarn on this system said was available. This will be used later when SparkEnv
   // created.
-  if (java.lang.Boolean.valueOf(System.getenv("SPARK_YARN_MODE"))) {
-    System.setProperty("spark.local.dir", getYarnLocalDirs())
+  if (java.lang.Boolean.valueOf(
+      System.getProperty("SPARK_YARN_MODE", System.getenv("SPARK_YARN_MODE"))))
+  {
+    conf.set("spark.local.dir", getYarnLocalDirs())
   }
 
   // Create our ClassLoader and set it on this thread
@@ -108,7 +107,7 @@ private[spark] class Executor(
   // Initialize Spark environment (using system properties read above)
   private val env = {
     if (!isLocal) {
-      val _env = SparkEnv.createFromSystemProperties(executorId, slaveHostname, 0,
+      val _env = SparkEnv.create(conf, executorId, slaveHostname, 0,
         isDriver = false, isLocal = false)
       SparkEnv.set(_env)
       _env.metricsSystem.registerSource(executorSource)
@@ -121,7 +120,7 @@ private[spark] class Executor(
   // Akka's message frame size. If task result is bigger than this, we use the block manager
   // to send the result back.
   private val akkaFrameSize = {
-    env.actorSystem.settings.config.getBytes("akka.remote.netty.message-frame-size")
+    env.actorSystem.settings.config.getBytes("akka.remote.netty.tcp.maximum-frame-size")
   }
 
   // Start worker thread pool
@@ -219,18 +218,22 @@ private[spark] class Executor(
           throw TaskKilledException
         }
 
+        val resultSer = SparkEnv.get.serializer.newInstance()
+        val beforeSerialization = System.currentTimeMillis()
+        val valueBytes = resultSer.serialize(value)
+        val afterSerialization = System.currentTimeMillis()
+
         for (m <- task.metrics) {
           m.hostname = Utils.localHostName()
           m.executorDeserializeTime = (taskStart - startTime).toInt
           m.executorRunTime = (taskFinish - taskStart).toInt
           m.jvmGCTime = gcTime - startGCTime
+          m.resultSerializationTime = (afterSerialization - beforeSerialization).toInt
         }
-        // TODO I'd also like to track the time it takes to serialize the task results, but that is
-        // huge headache, b/c we need to serialize the task metrics first.  If TaskMetrics had a
-        // custom serialized format, we could just change the relevants bytes in the byte buffer
+
         val accumUpdates = Accumulators.values
 
-        val directResult = new DirectTaskResult(value, accumUpdates, task.metrics.getOrElse(null))
+        val directResult = new DirectTaskResult(valueBytes, accumUpdates, task.metrics.getOrElse(null))
         val serializedDirectResult = ser.serialize(directResult)
         logInfo("Serialized size of result for " + taskId + " is " + serializedDirectResult.limit)
         val serializedResult = {
@@ -276,6 +279,11 @@ private[spark] class Executor(
           //System.exit(1)
         }
       } finally {
+        // TODO: Unregister shuffle memory only for ResultTask
+        val shuffleMemoryMap = env.shuffleMemoryMap
+        shuffleMemoryMap.synchronized {
+          shuffleMemoryMap.remove(Thread.currentThread().getId)
+        }
         runningTasks.remove(taskId)
       }
     }
@@ -301,7 +309,7 @@ private[spark] class Executor(
    * new classes defined by the REPL as the user types code
    */
   private def addReplClassLoaderIfNeeded(parent: ClassLoader): ClassLoader = {
-    val classUri = System.getProperty("spark.repl.class.uri")
+    val classUri = conf.get("spark.repl.class.uri", null)
     if (classUri != null) {
       logInfo("Using REPL class URI: " + classUri)
       try {
@@ -329,12 +337,12 @@ private[spark] class Executor(
       // Fetch missing dependencies
       for ((name, timestamp) <- newFiles if currentFiles.getOrElse(name, -1L) < timestamp) {
         logInfo("Fetching " + name + " with timestamp " + timestamp)
-        Utils.fetchFile(name, new File(SparkFiles.getRootDirectory))
+        Utils.fetchFile(name, new File(SparkFiles.getRootDirectory), conf)
         currentFiles(name) = timestamp
       }
       for ((name, timestamp) <- newJars if currentJars.getOrElse(name, -1L) < timestamp) {
         logInfo("Fetching " + name + " with timestamp " + timestamp)
-        Utils.fetchFile(name, new File(SparkFiles.getRootDirectory))
+        Utils.fetchFile(name, new File(SparkFiles.getRootDirectory), conf)
         currentJars(name) = timestamp
         // Add it to our class loader
         val localName = name.split("/").last
diff --git a/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala
index a537b2ceb1142..97176e4f5b727 100644
--- a/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala
+++ b/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala
@@ -41,31 +41,31 @@ class ExecutorSource(val executor: Executor, executorId: String) extends Source
   val sourceName = "executor.%s".format(executorId)
 
   // Gauge for executor thread pool's actively executing task counts
-  metricRegistry.register(MetricRegistry.name("threadpool", "activeTask", "count"), new Gauge[Int] {
+  metricRegistry.register(MetricRegistry.name("threadpool", "activeTasks"), new Gauge[Int] {
     override def getValue: Int = executor.threadPool.getActiveCount()
   })
 
   // Gauge for executor thread pool's approximate total number of tasks that have been completed
-  metricRegistry.register(MetricRegistry.name("threadpool", "completeTask", "count"), new Gauge[Long] {
+  metricRegistry.register(MetricRegistry.name("threadpool", "completeTasks"), new Gauge[Long] {
     override def getValue: Long = executor.threadPool.getCompletedTaskCount()
   })
 
   // Gauge for executor thread pool's current number of threads
-  metricRegistry.register(MetricRegistry.name("threadpool", "currentPool", "size"), new Gauge[Int] {
+  metricRegistry.register(MetricRegistry.name("threadpool", "currentPool_size"), new Gauge[Int] {
     override def getValue: Int = executor.threadPool.getPoolSize()
   })
 
   // Gauge got executor thread pool's largest number of threads that have ever simultaneously been in th pool
-  metricRegistry.register(MetricRegistry.name("threadpool", "maxPool", "size"), new Gauge[Int] {
+  metricRegistry.register(MetricRegistry.name("threadpool", "maxPool_size"), new Gauge[Int] {
     override def getValue: Int = executor.threadPool.getMaximumPoolSize()
   })
 
   // Gauge for file system stats of this executor
   for (scheme <- Array("hdfs", "file")) {
-    registerFileSystemStat(scheme, "bytesRead", _.getBytesRead(), 0L)
-    registerFileSystemStat(scheme, "bytesWritten", _.getBytesWritten(), 0L)
-    registerFileSystemStat(scheme, "readOps", _.getReadOps(), 0)
-    registerFileSystemStat(scheme, "largeReadOps", _.getLargeReadOps(), 0)
-    registerFileSystemStat(scheme, "writeOps", _.getWriteOps(), 0)
+    registerFileSystemStat(scheme, "read_bytes", _.getBytesRead(), 0L)
+    registerFileSystemStat(scheme, "write_bytes", _.getBytesWritten(), 0L)
+    registerFileSystemStat(scheme, "read_ops", _.getReadOps(), 0)
+    registerFileSystemStat(scheme, "largeRead_ops", _.getLargeReadOps(), 0)
+    registerFileSystemStat(scheme, "write_ops", _.getWriteOps(), 0)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
index 0b4892f98f039..0c8f4662a5f3a 100644
--- a/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
+++ b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
@@ -43,6 +43,21 @@ class TaskMetrics extends Serializable {
    */
   var jvmGCTime: Long = _
 
+  /**
+   * Amount of time spent serializing the task result
+   */
+  var resultSerializationTime: Long = _
+
+  /**
+   * The number of in-memory bytes spilled by this task
+   */
+  var memoryBytesSpilled: Long = _
+
+  /**
+   * The number of on-disk bytes spilled by this task
+   */
+  var diskBytesSpilled: Long = _
+
   /**
    * If this task reads from shuffle output, metrics on getting shuffle data will be collected here
    */
@@ -61,50 +76,53 @@ object TaskMetrics {
 
 class ShuffleReadMetrics extends Serializable {
   /**
-   * Time when shuffle finishs
+   * Absolute time when this task finished reading shuffle data
    */
   var shuffleFinishTime: Long = _
 
   /**
-   * Total number of blocks fetched in a shuffle (remote or local)
+   * Number of blocks fetched in this shuffle by this task (remote or local)
    */
   var totalBlocksFetched: Int = _
 
   /**
-   * Number of remote blocks fetched in a shuffle
+   * Number of remote blocks fetched in this shuffle by this task
    */
   var remoteBlocksFetched: Int = _
 
   /**
-   * Local blocks fetched in a shuffle
+   * Number of local blocks fetched in this shuffle by this task
    */
   var localBlocksFetched: Int = _
 
   /**
-   * Total time that is spent blocked waiting for shuffle to fetch data
+   * Time the task spent waiting for remote shuffle blocks. This only includes the time
+   * blocking on shuffle input data. For instance if block B is being fetched while the task is
+   * still not finished processing block A, it is not considered to be blocking on block B.
    */
   var fetchWaitTime: Long = _
 
   /**
-   * The total amount of time for all the shuffle fetches.  This adds up time from overlapping
-   *     shuffles, so can be longer than task time
+   * Total time spent fetching remote shuffle blocks. This aggregates the time spent fetching all
+   * input blocks. Since block fetches are both pipelined and parallelized, this can
+   * exceed fetchWaitTime and executorRunTime.
    */
   var remoteFetchTime: Long = _
 
   /**
-   * Total number of remote bytes read from a shuffle
+   * Total number of remote bytes read from the shuffle by this task
    */
   var remoteBytesRead: Long = _
 }
 
 class ShuffleWriteMetrics extends Serializable {
   /**
-   * Number of bytes written for a shuffle
+   * Number of bytes written for the shuffle by this task
    */
   var shuffleBytesWritten: Long = _
 
   /**
-   * Time spent blocking on writes to disk or buffer cache, in nanoseconds.
+   * Time the task spent blocking on writes to disk or buffer cache, in nanoseconds
    */
   var shuffleWriteTime: Long = _
 }
diff --git a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
index 570a979b56879..59801773205bd 100644
--- a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
+++ b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
@@ -22,6 +22,7 @@ import java.io.{InputStream, OutputStream}
 import com.ning.compress.lzf.{LZFInputStream, LZFOutputStream}
 
 import org.xerial.snappy.{SnappyInputStream, SnappyOutputStream}
+import org.apache.spark.{SparkEnv, SparkConf}
 
 
 /**
@@ -37,15 +38,15 @@ trait CompressionCodec {
 
 
 private[spark] object CompressionCodec {
-
-  def createCodec(): CompressionCodec = {
-    createCodec(System.getProperty(
+  def createCodec(conf: SparkConf): CompressionCodec = {
+    createCodec(conf, conf.get(
       "spark.io.compression.codec", classOf[LZFCompressionCodec].getName))
   }
 
-  def createCodec(codecName: String): CompressionCodec = {
-    Class.forName(codecName, true, Thread.currentThread.getContextClassLoader)
-      .newInstance().asInstanceOf[CompressionCodec]
+  def createCodec(conf: SparkConf, codecName: String): CompressionCodec = {
+    val ctor = Class.forName(codecName, true, Thread.currentThread.getContextClassLoader)
+      .getConstructor(classOf[SparkConf])
+    ctor.newInstance(conf).asInstanceOf[CompressionCodec]
   }
 }
 
@@ -53,7 +54,7 @@ private[spark] object CompressionCodec {
 /**
  * LZF implementation of [[org.apache.spark.io.CompressionCodec]].
  */
-class LZFCompressionCodec extends CompressionCodec {
+class LZFCompressionCodec(conf: SparkConf) extends CompressionCodec {
 
   override def compressedOutputStream(s: OutputStream): OutputStream = {
     new LZFOutputStream(s).setFinishBlockOnFlush(true)
@@ -67,10 +68,10 @@ class LZFCompressionCodec extends CompressionCodec {
  * Snappy implementation of [[org.apache.spark.io.CompressionCodec]].
  * Block size can be configured by spark.io.compression.snappy.block.size.
  */
-class SnappyCompressionCodec extends CompressionCodec {
+class SnappyCompressionCodec(conf: SparkConf) extends CompressionCodec {
 
   override def compressedOutputStream(s: OutputStream): OutputStream = {
-    val blockSize = System.getProperty("spark.io.compression.snappy.block.size", "32768").toInt
+    val blockSize = conf.getInt("spark.io.compression.snappy.block.size", 32768)
     new SnappyOutputStream(s, blockSize)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala b/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala
index caab748d602ee..e54ac0b332093 100644
--- a/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala
@@ -26,7 +26,6 @@ import scala.util.matching.Regex
 import org.apache.spark.Logging
 
 private[spark] class MetricsConfig(val configFile: Option[String]) extends Logging {
-  initLogging()
 
   val DEFAULT_PREFIX = "*"
   val INSTANCE_REGEX = "^(\\*|[a-zA-Z]+)\\.(.+)".r
@@ -81,7 +80,7 @@ private[spark] class MetricsConfig(val configFile: Option[String]) extends Loggi
     val subProperties = new mutable.HashMap[String, Properties]
     import scala.collection.JavaConversions._
     prop.foreach { kv =>
-      if (regex.findPrefixOf(kv._1) != None) {
+      if (regex.findPrefixOf(kv._1).isDefined) {
         val regex(prefix, suffix) = kv._1
         subProperties.getOrElseUpdate(prefix, new Properties).setProperty(suffix, kv._2)
       }
diff --git a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
index bec0c83be8bea..9930537b34db0 100644
--- a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
@@ -24,7 +24,7 @@ import java.util.concurrent.TimeUnit
 
 import scala.collection.mutable
 
-import org.apache.spark.Logging
+import org.apache.spark.{SparkConf, Logging}
 import org.apache.spark.metrics.sink.{MetricsServlet, Sink}
 import org.apache.spark.metrics.source.Source
 
@@ -62,10 +62,10 @@ import org.apache.spark.metrics.source.Source
  *
  * [options] is the specific property of this source or sink.
  */
-private[spark] class MetricsSystem private (val instance: String) extends Logging {
-  initLogging()
+private[spark] class MetricsSystem private (val instance: String,
+    conf: SparkConf) extends Logging {
 
-  val confFile = System.getProperty("spark.metrics.conf")
+  val confFile = conf.get("spark.metrics.conf", null)
   val metricsConfig = new MetricsConfig(Option(confFile))
 
   val sinks = new mutable.ArrayBuffer[Sink]
@@ -159,5 +159,6 @@ private[spark] object MetricsSystem {
     }
   }
 
-  def createMetricsSystem(instance: String): MetricsSystem = new MetricsSystem(instance)
+  def createMetricsSystem(instance: String, conf: SparkConf): MetricsSystem =
+    new MetricsSystem(instance, conf)
 }
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/GraphiteSink.scala b/core/src/main/scala/org/apache/spark/metrics/sink/GraphiteSink.scala
new file mode 100644
index 0000000000000..cdcfec8ca785b
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/GraphiteSink.scala
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.metrics.sink
+
+import java.util.Properties
+import java.util.concurrent.TimeUnit
+import java.net.InetSocketAddress
+
+import com.codahale.metrics.MetricRegistry
+import com.codahale.metrics.graphite.{GraphiteReporter, Graphite}
+
+import org.apache.spark.metrics.MetricsSystem
+
+class GraphiteSink(val property: Properties, val registry: MetricRegistry) extends Sink {
+  val GRAPHITE_DEFAULT_PERIOD = 10
+  val GRAPHITE_DEFAULT_UNIT = "SECONDS"
+  val GRAPHITE_DEFAULT_PREFIX = ""
+
+  val GRAPHITE_KEY_HOST = "host"
+  val GRAPHITE_KEY_PORT = "port"
+  val GRAPHITE_KEY_PERIOD = "period"
+  val GRAPHITE_KEY_UNIT = "unit"
+  val GRAPHITE_KEY_PREFIX = "prefix"
+
+  def propertyToOption(prop: String) = Option(property.getProperty(prop))
+
+  if (!propertyToOption(GRAPHITE_KEY_HOST).isDefined) {
+    throw new Exception("Graphite sink requires 'host' property.")
+  }
+
+  if (!propertyToOption(GRAPHITE_KEY_PORT).isDefined) {
+    throw new Exception("Graphite sink requires 'port' property.")
+  }
+
+  val host = propertyToOption(GRAPHITE_KEY_HOST).get
+  val port = propertyToOption(GRAPHITE_KEY_PORT).get.toInt
+
+  val pollPeriod = propertyToOption(GRAPHITE_KEY_PERIOD) match {
+    case Some(s) => s.toInt
+    case None => GRAPHITE_DEFAULT_PERIOD
+  }
+
+  val pollUnit = propertyToOption(GRAPHITE_KEY_UNIT) match {
+    case Some(s) => TimeUnit.valueOf(s.toUpperCase())
+    case None => TimeUnit.valueOf(GRAPHITE_DEFAULT_UNIT)
+  }
+
+  val prefix = propertyToOption(GRAPHITE_KEY_PREFIX).getOrElse(GRAPHITE_DEFAULT_PREFIX)
+
+  MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod)
+
+  val graphite: Graphite = new Graphite(new InetSocketAddress(host, port))
+
+  val reporter: GraphiteReporter = GraphiteReporter.forRegistry(registry)
+      .convertDurationsTo(TimeUnit.MILLISECONDS)
+      .convertRatesTo(TimeUnit.SECONDS)
+      .prefixedWith(prefix)
+      .build(graphite)
+
+  override def start() {
+    reporter.start(pollPeriod, pollUnit)
+  }
+
+  override def stop() {
+    reporter.stop()
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/network/BufferMessage.scala b/core/src/main/scala/org/apache/spark/network/BufferMessage.scala
index f736bb3713061..fb4c65909a9e2 100644
--- a/core/src/main/scala/org/apache/spark/network/BufferMessage.scala
+++ b/core/src/main/scala/org/apache/spark/network/BufferMessage.scala
@@ -46,7 +46,7 @@ class BufferMessage(id_ : Int, val buffers: ArrayBuffer[ByteBuffer], var ackId:
       throw new Exception("Max chunk size is " + maxChunkSize)
     }
 
-    if (size == 0 && gotChunkForSendingOnce == false) {
+    if (size == 0 && !gotChunkForSendingOnce) {
       val newChunk = new MessageChunk(
         new MessageChunkHeader(typ, id, 0, 0, ackId, senderAddress), null)
       gotChunkForSendingOnce = true
diff --git a/core/src/main/scala/org/apache/spark/network/Connection.scala b/core/src/main/scala/org/apache/spark/network/Connection.scala
index 95cb0206acd62..cba8477ed5723 100644
--- a/core/src/main/scala/org/apache/spark/network/Connection.scala
+++ b/core/src/main/scala/org/apache/spark/network/Connection.scala
@@ -330,7 +330,7 @@ class SendingConnection(val address: InetSocketAddress, selector_ : Selector,
       // Is highly unlikely unless there was an unclean close of socket, etc
       registerInterest()
       logInfo("Connected to [" + address + "], " + outbox.messages.size + " messages pending")
-      return true
+      true
     } catch {
       case e: Exception => {
         logWarning("Error finishing connection to " + address, e)
@@ -385,7 +385,7 @@ class SendingConnection(val address: InetSocketAddress, selector_ : Selector,
       }
     }
     // should not happen - to keep scala compiler happy
-    return true
+    true
   }
 
   // This is a hack to determine if remote socket was closed or not.
@@ -559,7 +559,7 @@ private[spark] class ReceivingConnection(channel_ : SocketChannel, selector_ : S
       }
     }
     // should not happen - to keep scala compiler happy
-    return true
+    true
   }
 
   def onReceive(callback: (Connection, Message) => Unit) {onReceiveCallback = callback}
diff --git a/core/src/main/scala/org/apache/spark/network/ConnectionManager.scala b/core/src/main/scala/org/apache/spark/network/ConnectionManager.scala
index 9c2fee4023be6..e6e01783c8895 100644
--- a/core/src/main/scala/org/apache/spark/network/ConnectionManager.scala
+++ b/core/src/main/scala/org/apache/spark/network/ConnectionManager.scala
@@ -31,13 +31,13 @@ import scala.collection.mutable.SynchronizedMap
 import scala.collection.mutable.SynchronizedQueue
 import scala.collection.mutable.ArrayBuffer
 
-import akka.dispatch.{Await, Promise, ExecutionContext, Future}
-import akka.util.Duration
-import akka.util.duration._
-import org.apache.spark.util.Utils
+import scala.concurrent.{Await, Promise, ExecutionContext, Future}
+import scala.concurrent.duration.Duration
+import scala.concurrent.duration._
 
+import org.apache.spark.util.Utils
 
-private[spark] class ConnectionManager(port: Int) extends Logging {
+private[spark] class ConnectionManager(port: Int, conf: SparkConf) extends Logging {
 
   class MessageStatus(
       val message: Message,
@@ -54,22 +54,22 @@ private[spark] class ConnectionManager(port: Int) extends Logging {
   private val selector = SelectorProvider.provider.openSelector()
 
   private val handleMessageExecutor = new ThreadPoolExecutor(
-    System.getProperty("spark.core.connection.handler.threads.min","20").toInt,
-    System.getProperty("spark.core.connection.handler.threads.max","60").toInt,
-    System.getProperty("spark.core.connection.handler.threads.keepalive","60").toInt, TimeUnit.SECONDS,
+    conf.getInt("spark.core.connection.handler.threads.min", 20),
+    conf.getInt("spark.core.connection.handler.threads.max", 60),
+    conf.getInt("spark.core.connection.handler.threads.keepalive", 60), TimeUnit.SECONDS,
     new LinkedBlockingDeque[Runnable]())
 
   private val handleReadWriteExecutor = new ThreadPoolExecutor(
-    System.getProperty("spark.core.connection.io.threads.min","4").toInt,
-    System.getProperty("spark.core.connection.io.threads.max","32").toInt,
-    System.getProperty("spark.core.connection.io.threads.keepalive","60").toInt, TimeUnit.SECONDS,
+    conf.getInt("spark.core.connection.io.threads.min", 4),
+    conf.getInt("spark.core.connection.io.threads.max", 32),
+    conf.getInt("spark.core.connection.io.threads.keepalive", 60), TimeUnit.SECONDS,
     new LinkedBlockingDeque[Runnable]())
 
   // Use a different, yet smaller, thread pool - infrequently used with very short lived tasks : which should be executed asap
   private val handleConnectExecutor = new ThreadPoolExecutor(
-    System.getProperty("spark.core.connection.connect.threads.min","1").toInt,
-    System.getProperty("spark.core.connection.connect.threads.max","8").toInt,
-    System.getProperty("spark.core.connection.connect.threads.keepalive","60").toInt, TimeUnit.SECONDS,
+    conf.getInt("spark.core.connection.connect.threads.min", 1),
+    conf.getInt("spark.core.connection.connect.threads.max", 8),
+    conf.getInt("spark.core.connection.connect.threads.keepalive", 60), TimeUnit.SECONDS,
     new LinkedBlockingDeque[Runnable]())
 
   private val serverChannel = ServerSocketChannel.open()
@@ -594,7 +594,7 @@ private[spark] class ConnectionManager(port: Int) extends Logging {
 private[spark] object ConnectionManager {
 
   def main(args: Array[String]) {
-    val manager = new ConnectionManager(9999)
+    val manager = new ConnectionManager(9999, new SparkConf)
     manager.onReceiveMessage((msg: Message, id: ConnectionManagerId) => {
       println("Received [" + msg + "] from [" + id + "]")
       None
diff --git a/core/src/main/scala/org/apache/spark/network/ConnectionManagerTest.scala b/core/src/main/scala/org/apache/spark/network/ConnectionManagerTest.scala
index 8d9ad9604d645..4f5742d29b367 100644
--- a/core/src/main/scala/org/apache/spark/network/ConnectionManagerTest.scala
+++ b/core/src/main/scala/org/apache/spark/network/ConnectionManagerTest.scala
@@ -25,8 +25,8 @@ import scala.io.Source
 import java.nio.ByteBuffer
 import java.net.InetAddress
 
-import akka.dispatch.Await
-import akka.util.duration._
+import scala.concurrent.Await
+import scala.concurrent.duration._
 
 private[spark] object ConnectionManagerTest extends Logging{
   def main(args: Array[String]) {
diff --git a/core/src/main/scala/org/apache/spark/network/Message.scala b/core/src/main/scala/org/apache/spark/network/Message.scala
index f2ecc6d439aaa..2612884bdbe15 100644
--- a/core/src/main/scala/org/apache/spark/network/Message.scala
+++ b/core/src/main/scala/org/apache/spark/network/Message.scala
@@ -61,7 +61,7 @@ private[spark] object Message {
     if (dataBuffers.exists(_ == null)) {
       throw new Exception("Attempting to create buffer message with null buffer")
     }
-    return new BufferMessage(getNewId(), new ArrayBuffer[ByteBuffer] ++= dataBuffers, ackId)
+    new BufferMessage(getNewId(), new ArrayBuffer[ByteBuffer] ++= dataBuffers, ackId)
   }
 
   def createBufferMessage(dataBuffers: Seq[ByteBuffer]): BufferMessage =
@@ -69,9 +69,9 @@ private[spark] object Message {
 
   def createBufferMessage(dataBuffer: ByteBuffer, ackId: Int): BufferMessage = {
     if (dataBuffer == null) {
-      return createBufferMessage(Array(ByteBuffer.allocate(0)), ackId)
+      createBufferMessage(Array(ByteBuffer.allocate(0)), ackId)
     } else {
-      return createBufferMessage(Array(dataBuffer), ackId)
+      createBufferMessage(Array(dataBuffer), ackId)
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/network/ReceiverTest.scala b/core/src/main/scala/org/apache/spark/network/ReceiverTest.scala
index 781715108be63..1c9d6030d68d7 100644
--- a/core/src/main/scala/org/apache/spark/network/ReceiverTest.scala
+++ b/core/src/main/scala/org/apache/spark/network/ReceiverTest.scala
@@ -19,19 +19,19 @@ package org.apache.spark.network
 
 import java.nio.ByteBuffer
 import java.net.InetAddress
+import org.apache.spark.SparkConf
 
 private[spark] object ReceiverTest {
-
   def main(args: Array[String]) {
-    val manager = new ConnectionManager(9999)
+    val manager = new ConnectionManager(9999, new SparkConf)
     println("Started connection manager with id = " + manager.id)
-    
-    manager.onReceiveMessage((msg: Message, id: ConnectionManagerId) => { 
+
+    manager.onReceiveMessage((msg: Message, id: ConnectionManagerId) => {
       /*println("Received [" + msg + "] from [" + id + "] at " + System.currentTimeMillis)*/
-      val buffer = ByteBuffer.wrap("response".getBytes())
+      val buffer = ByteBuffer.wrap("response".getBytes)
       Some(Message.createBufferMessage(buffer, msg.id))
     })
-    Thread.currentThread.join()  
+    Thread.currentThread.join()
   }
 }
 
diff --git a/core/src/main/scala/org/apache/spark/network/SenderTest.scala b/core/src/main/scala/org/apache/spark/network/SenderTest.scala
index 777574980fbc6..dcbd183c88d09 100644
--- a/core/src/main/scala/org/apache/spark/network/SenderTest.scala
+++ b/core/src/main/scala/org/apache/spark/network/SenderTest.scala
@@ -19,29 +19,29 @@ package org.apache.spark.network
 
 import java.nio.ByteBuffer
 import java.net.InetAddress
+import org.apache.spark.SparkConf
 
 private[spark] object SenderTest {
-
   def main(args: Array[String]) {
-    
+
     if (args.length < 2) {
       println("Usage: SenderTest <target host> <target port>")
       System.exit(1)
     }
-   
+
     val targetHost = args(0)
     val targetPort = args(1).toInt
     val targetConnectionManagerId = new ConnectionManagerId(targetHost, targetPort)
 
-    val manager = new ConnectionManager(0)
+    val manager = new ConnectionManager(0, new SparkConf)
     println("Started connection manager with id = " + manager.id)
 
-    manager.onReceiveMessage((msg: Message, id: ConnectionManagerId) => { 
+    manager.onReceiveMessage((msg: Message, id: ConnectionManagerId) => {
       println("Received [" + msg + "] from [" + id + "]")
       None
     })
-  
-    val size =  100 * 1024  * 1024 
+
+    val size =  100 * 1024  * 1024
     val buffer = ByteBuffer.allocate(size).put(Array.tabulate[Byte](size)(x => x.toByte))
     buffer.flip
 
@@ -50,7 +50,7 @@ private[spark] object SenderTest {
     val count = 100
     (0 until count).foreach(i => {
       val dataMessage = Message.createBufferMessage(buffer.duplicate)
-      val startTime = System.currentTimeMillis  
+      val startTime = System.currentTimeMillis
       /*println("Started timer at " + startTime)*/
       val responseStr = manager.sendMessageReliablySync(targetConnectionManagerId, dataMessage) match {
         case Some(response) =>
diff --git a/core/src/main/scala/org/apache/spark/network/netty/ShuffleCopier.scala b/core/src/main/scala/org/apache/spark/network/netty/ShuffleCopier.scala
index b1e1576dadc1a..d87157e12c487 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/ShuffleCopier.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/ShuffleCopier.scala
@@ -23,20 +23,20 @@ import io.netty.buffer.ByteBuf
 import io.netty.channel.ChannelHandlerContext
 import io.netty.util.CharsetUtil
 
-import org.apache.spark.Logging
+import org.apache.spark.{SparkContext, SparkConf, Logging}
 import org.apache.spark.network.ConnectionManagerId
 
 import scala.collection.JavaConverters._
 import org.apache.spark.storage.BlockId
 
 
-private[spark] class ShuffleCopier extends Logging {
+private[spark] class ShuffleCopier(conf: SparkConf) extends Logging {
 
   def getBlock(host: String, port: Int, blockId: BlockId,
       resultCollectCallback: (BlockId, Long, ByteBuf) => Unit) {
 
     val handler = new ShuffleCopier.ShuffleClientHandler(resultCollectCallback)
-    val connectTimeout = System.getProperty("spark.shuffle.netty.connect.timeout", "60000").toInt
+    val connectTimeout = conf.getInt("spark.shuffle.netty.connect.timeout", 60000)
     val fc = new FileClient(handler, connectTimeout)
 
     try {
@@ -104,10 +104,10 @@ private[spark] object ShuffleCopier extends Logging {
     val threads = if (args.length > 3) args(3).toInt else 10
 
     val copiers = Executors.newFixedThreadPool(80)
-    val tasks = (for (i <- Range(0, threads)) yield { 
+    val tasks = (for (i <- Range(0, threads)) yield {
       Executors.callable(new Runnable() {
         def run() {
-          val copier = new ShuffleCopier()
+          val copier = new ShuffleCopier(new SparkConf)
           copier.getBlock(host, port, blockId, echoResultCollectCallBack)
         }
       })
diff --git a/core/src/main/scala/org/apache/spark/network/netty/ShuffleSender.scala b/core/src/main/scala/org/apache/spark/network/netty/ShuffleSender.scala
index 546d921067175..44204a8c46572 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/ShuffleSender.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/ShuffleSender.scala
@@ -64,7 +64,7 @@ private[spark] object ShuffleSender {
         val subDirId = (hash / localDirs.length) % subDirsPerLocalDir
         val subDir = new File(localDirs(dirId), "%02x".format(subDirId))
         val file = new File(subDir, blockId.name)
-        return new FileSegment(file, 0, file.length())
+        new FileSegment(file, 0, file.length())
       }
     }
     val sender = new ShuffleSender(port, pResovler)
diff --git a/core/src/main/scala/org/apache/spark/package.scala b/core/src/main/scala/org/apache/spark/package.scala
index 70a5a8caff839..2625a7f6a575a 100644
--- a/core/src/main/scala/org/apache/spark/package.scala
+++ b/core/src/main/scala/org/apache/spark/package.scala
@@ -29,6 +29,9 @@ package org.apache
  * be saved as SequenceFiles. These operations are automatically available on any RDD of the right
  * type (e.g. RDD[(Int, Int)] through implicit conversions when you
  * `import org.apache.spark.SparkContext._`.
+ *
+ * Java programmers should reference the [[spark.api.java]] package
+ * for Spark programming APIs in Java.
  */
 package object spark {
   // For package docs only
diff --git a/core/src/main/scala/org/apache/spark/partial/ApproximateActionListener.scala b/core/src/main/scala/org/apache/spark/partial/ApproximateActionListener.scala
index d71069444a73f..423ff67a5fd43 100644
--- a/core/src/main/scala/org/apache/spark/partial/ApproximateActionListener.scala
+++ b/core/src/main/scala/org/apache/spark/partial/ApproximateActionListener.scala
@@ -71,7 +71,7 @@ private[spark] class ApproximateActionListener[T, U, R](
     val finishTime = startTime + timeout
     while (true) {
       val time = System.currentTimeMillis()
-      if (failure != None) {
+      if (failure.isDefined) {
         throw failure.get
       } else if (finishedTasks == totalTasks) {
         return new PartialResult(evaluator.currentResult(), true)
diff --git a/core/src/main/scala/org/apache/spark/partial/PartialResult.scala b/core/src/main/scala/org/apache/spark/partial/PartialResult.scala
index 5ce49b8100ee6..812368e04ac0d 100644
--- a/core/src/main/scala/org/apache/spark/partial/PartialResult.scala
+++ b/core/src/main/scala/org/apache/spark/partial/PartialResult.scala
@@ -31,10 +31,10 @@ class PartialResult[R](initialVal: R, isFinal: Boolean) {
    * Blocking method to wait for and return the final value.
    */
   def getFinalValue(): R = synchronized {
-    while (finalValue == None && failure == None) {
+    while (finalValue.isEmpty && failure.isEmpty) {
       this.wait()
     }
-    if (finalValue != None) {
+    if (finalValue.isDefined) {
       return finalValue.get
     } else {
       throw failure.get
@@ -46,11 +46,11 @@ class PartialResult[R](initialVal: R, isFinal: Boolean) {
    * is supported per PartialResult.
    */
   def onComplete(handler: R => Unit): PartialResult[R] = synchronized {
-    if (completionHandler != None) {
+    if (completionHandler.isDefined) {
       throw new UnsupportedOperationException("onComplete cannot be called twice")
     }
     completionHandler = Some(handler)
-    if (finalValue != None) {
+    if (finalValue.isDefined) {
       // We already have a final value, so let's call the handler
       handler(finalValue.get)
     }
@@ -63,11 +63,11 @@ class PartialResult[R](initialVal: R, isFinal: Boolean) {
    */
   def onFail(handler: Exception => Unit) {
     synchronized {
-      if (failureHandler != None) {
+      if (failureHandler.isDefined) {
         throw new UnsupportedOperationException("onFail cannot be called twice")
       }
       failureHandler = Some(handler)
-      if (failure != None) {
+      if (failure.isDefined) {
         // We already have a failure, so let's call the handler
         handler(failure.get)
       }
@@ -102,7 +102,7 @@ class PartialResult[R](initialVal: R, isFinal: Boolean) {
 
   private[spark] def setFinalValue(value: R) {
     synchronized {
-      if (finalValue != None) {
+      if (finalValue.isDefined) {
         throw new UnsupportedOperationException("setFinalValue called twice on a PartialResult")
       }
       finalValue = Some(value)
@@ -117,7 +117,7 @@ class PartialResult[R](initialVal: R, isFinal: Boolean) {
 
   private[spark] def setFailure(exception: Exception) {
     synchronized {
-      if (failure != None) {
+      if (failure.isDefined) {
         throw new UnsupportedOperationException("setFailure called twice on a PartialResult")
       }
       failure = Some(exception)
diff --git a/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala b/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala
index faaf837be0f5a..d1c74a5063510 100644
--- a/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala
@@ -21,6 +21,7 @@ import java.util.concurrent.atomic.AtomicLong
 
 import scala.collection.mutable.ArrayBuffer
 import scala.concurrent.ExecutionContext.Implicits.global
+import scala.reflect.ClassTag
 
 import org.apache.spark.{ComplexFutureAction, FutureAction, Logging}
 
@@ -28,7 +29,7 @@ import org.apache.spark.{ComplexFutureAction, FutureAction, Logging}
  * A set of asynchronous RDD actions available through an implicit conversion.
  * Import `org.apache.spark.SparkContext._` at the top of your program to use these functions.
  */
-class AsyncRDDActions[T: ClassManifest](self: RDD[T]) extends Serializable with Logging {
+class AsyncRDDActions[T: ClassTag](self: RDD[T]) extends Serializable with Logging {
 
   /**
    * Returns a future for counting the number of elements in the RDD.
diff --git a/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala b/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala
index 44ea573a7c825..424354ae165a0 100644
--- a/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.rdd
 
+import scala.reflect.ClassTag
+
 import org.apache.spark.{SparkContext, SparkEnv, Partition, TaskContext}
 import org.apache.spark.storage.{BlockId, BlockManager}
 
@@ -25,7 +27,7 @@ private[spark] class BlockRDDPartition(val blockId: BlockId, idx: Int) extends P
 }
 
 private[spark]
-class BlockRDD[T: ClassManifest](sc: SparkContext, @transient blockIds: Array[BlockId])
+class BlockRDD[T: ClassTag](sc: SparkContext, @transient blockIds: Array[BlockId])
   extends RDD[T](sc, Nil) {
 
   @transient lazy val locations_ = BlockManager.blockIdsToHosts(blockIds, SparkEnv.get)
diff --git a/core/src/main/scala/org/apache/spark/rdd/CartesianRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CartesianRDD.scala
index 0de22f0e06e49..87b950ba431f9 100644
--- a/core/src/main/scala/org/apache/spark/rdd/CartesianRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CartesianRDD.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.rdd
 
 import java.io.{ObjectOutputStream, IOException}
+import scala.reflect.ClassTag
 import org.apache.spark._
 
 
@@ -43,7 +44,7 @@ class CartesianPartition(
 }
 
 private[spark]
-class CartesianRDD[T: ClassManifest, U:ClassManifest](
+class CartesianRDD[T: ClassTag, U: ClassTag](
     sc: SparkContext,
     var rdd1 : RDD[T],
     var rdd2 : RDD[U])
diff --git a/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala
index d3033ea4a627b..30e578dd93e8d 100644
--- a/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala
@@ -17,15 +17,13 @@
 
 package org.apache.spark.rdd
 
+import java.io.IOException
+import scala.reflect.ClassTag
 import org.apache.spark._
+import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.hadoop.mapred.{FileInputFormat, SequenceFileInputFormat, JobConf, Reporter}
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.io.{NullWritable, BytesWritable}
-import org.apache.hadoop.util.ReflectionUtils
 import org.apache.hadoop.fs.Path
-import java.io.{File, IOException, EOFException}
-import java.text.NumberFormat
 
 private[spark] class CheckpointRDDPartition(val index: Int) extends Partition {}
 
@@ -33,9 +31,11 @@ private[spark] class CheckpointRDDPartition(val index: Int) extends Partition {}
  * This RDD represents a RDD checkpoint file (similar to HadoopRDD).
  */
 private[spark]
-class CheckpointRDD[T: ClassManifest](sc: SparkContext, val checkpointPath: String)
+class CheckpointRDD[T: ClassTag](sc: SparkContext, val checkpointPath: String)
   extends RDD[T](sc, Nil) {
 
+  val broadcastedConf = sc.broadcast(new SerializableWritable(sc.hadoopConfiguration))
+
   @transient val fs = new Path(checkpointPath).getFileSystem(sc.hadoopConfiguration)
 
   override def getPartitions: Array[Partition] = {
@@ -43,8 +43,8 @@ class CheckpointRDD[T: ClassManifest](sc: SparkContext, val checkpointPath: Stri
     val numPartitions =
     // listStatus can throw exception if path does not exist.
     if (fs.exists(cpath)) {
-      val dirContents = fs.listStatus(cpath)
-      val partitionFiles = dirContents.map(_.getPath.toString).filter(_.contains("part-")).sorted
+      val dirContents = fs.listStatus(cpath).map(_.getPath)
+      val partitionFiles = dirContents.filter(_.getName.startsWith("part-")).map(_.toString).sorted
       val numPart =  partitionFiles.size
       if (numPart > 0 && (! partitionFiles(0).endsWith(CheckpointRDD.splitIdToFile(0)) ||
           ! partitionFiles(numPart-1).endsWith(CheckpointRDD.splitIdToFile(numPart-1)))) {
@@ -67,7 +67,7 @@ class CheckpointRDD[T: ClassManifest](sc: SparkContext, val checkpointPath: Stri
 
   override def compute(split: Partition, context: TaskContext): Iterator[T] = {
     val file = new Path(checkpointPath, CheckpointRDD.splitIdToFile(split.index))
-    CheckpointRDD.readFromFile(file, context)
+    CheckpointRDD.readFromFile(file, broadcastedConf, context)
   }
 
   override def checkpoint() {
@@ -76,15 +76,18 @@ class CheckpointRDD[T: ClassManifest](sc: SparkContext, val checkpointPath: Stri
 }
 
 private[spark] object CheckpointRDD extends Logging {
-
   def splitIdToFile(splitId: Int): String = {
     "part-%05d".format(splitId)
   }
 
-  def writeToFile[T](path: String, blockSize: Int = -1)(ctx: TaskContext, iterator: Iterator[T]) {
+  def writeToFile[T](
+      path: String,
+      broadcastedConf: Broadcast[SerializableWritable[Configuration]],
+      blockSize: Int = -1
+    )(ctx: TaskContext, iterator: Iterator[T]) {
     val env = SparkEnv.get
     val outputDir = new Path(path)
-    val fs = outputDir.getFileSystem(SparkHadoopUtil.get.newConfiguration())
+    val fs = outputDir.getFileSystem(broadcastedConf.value.value)
 
     val finalOutputName = splitIdToFile(ctx.partitionId)
     val finalOutputPath = new Path(outputDir, finalOutputName)
@@ -94,7 +97,7 @@ private[spark] object CheckpointRDD extends Logging {
       throw new IOException("Checkpoint failed: temporary path " +
         tempOutputPath + " already exists")
     }
-    val bufferSize = System.getProperty("spark.buffer.size", "65536").toInt
+    val bufferSize = env.conf.getInt("spark.buffer.size", 65536)
 
     val fileOutputStream = if (blockSize < 0) {
       fs.create(tempOutputPath, false, bufferSize)
@@ -121,10 +124,14 @@ private[spark] object CheckpointRDD extends Logging {
     }
   }
 
-  def readFromFile[T](path: Path, context: TaskContext): Iterator[T] = {
+  def readFromFile[T](
+      path: Path,
+      broadcastedConf: Broadcast[SerializableWritable[Configuration]],
+      context: TaskContext
+    ): Iterator[T] = {
     val env = SparkEnv.get
-    val fs = path.getFileSystem(SparkHadoopUtil.get.newConfiguration())
-    val bufferSize = System.getProperty("spark.buffer.size", "65536").toInt
+    val fs = path.getFileSystem(broadcastedConf.value.value)
+    val bufferSize = env.conf.getInt("spark.buffer.size", 65536)
     val fileInputStream = fs.open(path, bufferSize)
     val serializer = env.serializer.newInstance()
     val deserializeStream = serializer.deserializeStream(fileInputStream)
@@ -146,8 +153,10 @@ private[spark] object CheckpointRDD extends Logging {
     val sc = new SparkContext(cluster, "CheckpointRDD Test")
     val rdd = sc.makeRDD(1 to 10, 10).flatMap(x => 1 to 10000)
     val path = new Path(hdfsPath, "temp")
-    val fs = path.getFileSystem(SparkHadoopUtil.get.newConfiguration())
-    sc.runJob(rdd, CheckpointRDD.writeToFile(path.toString, 1024) _)
+    val conf = SparkHadoopUtil.get.newConfiguration()
+    val fs = path.getFileSystem(conf)
+    val broadcastedConf = sc.broadcast(new SerializableWritable(conf))
+    sc.runJob(rdd, CheckpointRDD.writeToFile(path.toString, broadcastedConf, 1024) _)
     val cpRDD = new CheckpointRDD[Int](sc, path.toString)
     assert(cpRDD.partitions.length == rdd.partitions.length, "Number of partitions is not the same")
     assert(cpRDD.collect.toList == rdd.collect.toList, "Data of partitions not the same")
diff --git a/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
index 911a002884d65..0e47f2e022610 100644
--- a/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
@@ -23,8 +23,7 @@ import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.{InterruptibleIterator, Partition, Partitioner, SparkEnv, TaskContext}
 import org.apache.spark.{Dependency, OneToOneDependency, ShuffleDependency}
-import org.apache.spark.util.AppendOnlyMap
-
+import org.apache.spark.util.collection.{ExternalAppendOnlyMap, AppendOnlyMap}
 
 private[spark] sealed trait CoGroupSplitDep extends Serializable
 
@@ -44,14 +43,12 @@ private[spark] case class NarrowCoGroupSplitDep(
 
 private[spark] case class ShuffleCoGroupSplitDep(shuffleId: Int) extends CoGroupSplitDep
 
-private[spark]
-class CoGroupPartition(idx: Int, val deps: Array[CoGroupSplitDep])
+private[spark] class CoGroupPartition(idx: Int, val deps: Array[CoGroupSplitDep])
   extends Partition with Serializable {
   override val index: Int = idx
   override def hashCode(): Int = idx
 }
 
-
 /**
  * A RDD that cogroups its parents. For each key k in parent RDDs, the resulting RDD contains a
  * tuple with the list of values for that key.
@@ -62,6 +59,13 @@ class CoGroupPartition(idx: Int, val deps: Array[CoGroupSplitDep])
 class CoGroupedRDD[K](@transient var rdds: Seq[RDD[_ <: Product2[K, _]]], part: Partitioner)
   extends RDD[(K, Seq[Seq[_]])](rdds.head.context, Nil) {
 
+  // For example, `(k, a) cogroup (k, b)` produces k -> Seq(ArrayBuffer as, ArrayBuffer bs).
+  // Each ArrayBuffer is represented as a CoGroup, and the resulting Seq as a CoGroupCombiner.
+  // CoGroupValue is the intermediate state of each value before being merged in compute.
+  private type CoGroup = ArrayBuffer[Any]
+  private type CoGroupValue = (Any, Int)  // Int is dependency number
+  private type CoGroupCombiner = Seq[CoGroup]
+
   private var serializerClass: String = null
 
   def setSerializer(cls: String): CoGroupedRDD[K] = {
@@ -100,37 +104,77 @@ class CoGroupedRDD[K](@transient var rdds: Seq[RDD[_ <: Product2[K, _]]], part:
 
   override val partitioner = Some(part)
 
-  override def compute(s: Partition, context: TaskContext): Iterator[(K, Seq[Seq[_]])] = {
+  override def compute(s: Partition, context: TaskContext): Iterator[(K, CoGroupCombiner)] = {
+    val sparkConf = SparkEnv.get.conf
+    val externalSorting = sparkConf.getBoolean("spark.shuffle.spill", true)
     val split = s.asInstanceOf[CoGroupPartition]
     val numRdds = split.deps.size
-    // e.g. for `(k, a) cogroup (k, b)`, K -> Seq(ArrayBuffer as, ArrayBuffer bs)
-    val map = new AppendOnlyMap[K, Seq[ArrayBuffer[Any]]]
 
-    val update: (Boolean, Seq[ArrayBuffer[Any]]) => Seq[ArrayBuffer[Any]] = (hadVal, oldVal) => {
-      if (hadVal) oldVal else Array.fill(numRdds)(new ArrayBuffer[Any])
-    }
-
-    val getSeq = (k: K) => {
-      map.changeValue(k, update)
-    }
-
-    val ser = SparkEnv.get.serializerManager.get(serializerClass)
+    // A list of (rdd iterator, dependency number) pairs
+    val rddIterators = new ArrayBuffer[(Iterator[Product2[K, Any]], Int)]
     for ((dep, depNum) <- split.deps.zipWithIndex) dep match {
       case NarrowCoGroupSplitDep(rdd, _, itsSplit) => {
         // Read them from the parent
-        rdd.iterator(itsSplit, context).asInstanceOf[Iterator[Product2[K, Any]]].foreach { kv =>
-          getSeq(kv._1)(depNum) += kv._2
-        }
+        val it = rdd.iterator(itsSplit, context).asInstanceOf[Iterator[Product2[K, Any]]]
+        rddIterators += ((it, depNum))
       }
       case ShuffleCoGroupSplitDep(shuffleId) => {
         // Read map outputs of shuffle
         val fetcher = SparkEnv.get.shuffleFetcher
-        fetcher.fetch[Product2[K, Any]](shuffleId, split.index, context, ser).foreach {
-          kv => getSeq(kv._1)(depNum) += kv._2
+        val ser = SparkEnv.get.serializerManager.get(serializerClass, sparkConf)
+        val it = fetcher.fetch[Product2[K, Any]](shuffleId, split.index, context, ser)
+        rddIterators += ((it, depNum))
+      }
+    }
+
+    if (!externalSorting) {
+      val map = new AppendOnlyMap[K, CoGroupCombiner]
+      val update: (Boolean, CoGroupCombiner) => CoGroupCombiner = (hadVal, oldVal) => {
+        if (hadVal) oldVal else Array.fill(numRdds)(new CoGroup)
+      }
+      val getCombiner: K => CoGroupCombiner = key => {
+        map.changeValue(key, update)
+      }
+      rddIterators.foreach { case (it, depNum) =>
+        while (it.hasNext) {
+          val kv = it.next()
+          getCombiner(kv._1)(depNum) += kv._2
         }
       }
+      new InterruptibleIterator(context, map.iterator)
+    } else {
+      val map = createExternalMap(numRdds)
+      rddIterators.foreach { case (it, depNum) =>
+        while (it.hasNext) {
+          val kv = it.next()
+          map.insert(kv._1, new CoGroupValue(kv._2, depNum))
+        }
+      }
+      context.taskMetrics.memoryBytesSpilled = map.memoryBytesSpilled
+      context.taskMetrics.diskBytesSpilled = map.diskBytesSpilled
+      new InterruptibleIterator(context, map.iterator)
+    }
+  }
+
+  private def createExternalMap(numRdds: Int)
+    : ExternalAppendOnlyMap[K, CoGroupValue, CoGroupCombiner] = {
+
+    val createCombiner: (CoGroupValue => CoGroupCombiner) = value => {
+      val newCombiner = Array.fill(numRdds)(new CoGroup)
+      value match { case (v, depNum) => newCombiner(depNum) += v }
+      newCombiner
     }
-    new InterruptibleIterator(context, map.iterator)
+    val mergeValue: (CoGroupCombiner, CoGroupValue) => CoGroupCombiner =
+      (combiner, value) => {
+      value match { case (v, depNum) => combiner(depNum) += v }
+      combiner
+    }
+    val mergeCombiners: (CoGroupCombiner, CoGroupCombiner) => CoGroupCombiner =
+      (combiner1, combiner2) => {
+        combiner1.zip(combiner2).map { case (v1, v2) => v1 ++ v2 }
+      }
+    new ExternalAppendOnlyMap[K, CoGroupValue, CoGroupCombiner](
+      createCombiner, mergeValue, mergeCombiners)
   }
 
   override def clearDependencies() {
diff --git a/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala
index c5de6362a9aa7..cefcc3d2d9420 100644
--- a/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala
@@ -22,6 +22,7 @@ import java.io.{ObjectOutputStream, IOException}
 import scala.collection.mutable
 import scala.Some
 import scala.collection.mutable.ArrayBuffer
+import scala.reflect.ClassTag
 
 /**
  * Class that captures a coalesced RDD by essentially keeping track of parent partitions
@@ -68,7 +69,7 @@ case class CoalescedRDDPartition(
  * @param maxPartitions number of desired partitions in the coalesced RDD
  * @param balanceSlack used to trade-off balance and locality. 1.0 is all locality, 0 is all balance
  */
-class CoalescedRDD[T: ClassManifest](
+class CoalescedRDD[T: ClassTag](
                                       @transient var prev: RDD[T],
                                       maxPartitions: Int,
                                       balanceSlack: Double = 0.10)
@@ -294,10 +295,10 @@ private[spark] class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanc
 
     val prefPartActual = prefPart.get
 
-    if (minPowerOfTwo.size + slack <= prefPartActual.size)  // more imbalance than the slack allows
-      return minPowerOfTwo  // prefer balance over locality
-    else {
-      return prefPartActual // prefer locality over balance
+    if (minPowerOfTwo.size + slack <= prefPartActual.size) { // more imbalance than the slack allows
+      minPowerOfTwo  // prefer balance over locality
+    } else {
+      prefPartActual // prefer locality over balance
     }
   }
 
@@ -330,7 +331,7 @@ private[spark] class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanc
    */
   def run(): Array[PartitionGroup] = {
     setupGroups(math.min(prev.partitions.length, maxPartitions))   // setup the groups (bins)
-    throwBalls()             // assign partitions (balls) to each group (bins)
+    throwBalls() // assign partitions (balls) to each group (bins)
     getPartitions
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala
index a4bec417529fc..688c310ee9caf 100644
--- a/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala
@@ -24,6 +24,8 @@ import org.apache.spark.partial.SumEvaluator
 import org.apache.spark.util.StatCounter
 import org.apache.spark.{TaskContext, Logging}
 
+import scala.collection.immutable.NumericRange
+
 /**
  * Extra functions available on RDDs of Doubles through an implicit conversion.
  * Import `org.apache.spark.SparkContext._` at the top of your program to use these functions.
@@ -76,4 +78,129 @@ class DoubleRDDFunctions(self: RDD[Double]) extends Logging with Serializable {
     val evaluator = new SumEvaluator(self.partitions.size, confidence)
     self.context.runApproximateJob(self, processPartition, evaluator, timeout)
   }
+
+  /**
+   * Compute a histogram of the data using bucketCount number of buckets evenly
+   *  spaced between the minimum and maximum of the RDD. For example if the min
+   *  value is 0 and the max is 100 and there are two buckets the resulting
+   *  buckets will be [0, 50) [50, 100]. bucketCount must be at least 1
+   * If the RDD contains infinity, NaN throws an exception
+   * If the elements in RDD do not vary (max == min) always returns a single bucket.
+   */
+  def histogram(bucketCount: Int): Pair[Array[Double], Array[Long]] = {
+    // Compute the minimum and the maxium
+    val (max: Double, min: Double) = self.mapPartitions { items =>
+      Iterator(items.foldRight(Double.NegativeInfinity,
+        Double.PositiveInfinity)((e: Double, x: Pair[Double, Double]) =>
+        (x._1.max(e), x._2.min(e))))
+    }.reduce { (maxmin1, maxmin2) =>
+      (maxmin1._1.max(maxmin2._1), maxmin1._2.min(maxmin2._2))
+    }
+    if (min.isNaN || max.isNaN || max.isInfinity || min.isInfinity ) {
+      throw new UnsupportedOperationException(
+        "Histogram on either an empty RDD or RDD containing +/-infinity or NaN")
+    }
+    val increment = (max-min)/bucketCount.toDouble
+    val range = if (increment != 0) {
+      Range.Double.inclusive(min, max, increment)
+    } else {
+      List(min, min)
+    }
+    val buckets = range.toArray
+    (buckets, histogram(buckets, true))
+  }
+
+  /**
+   * Compute a histogram using the provided buckets. The buckets are all open
+   * to the left except for the last which is closed
+   *  e.g. for the array
+   *  [1, 10, 20, 50] the buckets are [1, 10) [10, 20) [20, 50]
+   *  e.g 1<=x<10 , 10<=x<20, 20<=x<50
+   *  And on the input of 1 and 50 we would have a histogram of 1, 0, 0 
+   * 
+   * Note: if your histogram is evenly spaced (e.g. [0, 10, 20, 30]) this can be switched
+   * from an O(log n) inseration to O(1) per element. (where n = # buckets) if you set evenBuckets
+   * to true.
+   * buckets must be sorted and not contain any duplicates.
+   * buckets array must be at least two elements 
+   * All NaN entries are treated the same. If you have a NaN bucket it must be
+   * the maximum value of the last position and all NaN entries will be counted
+   * in that bucket.
+   */
+  def histogram(buckets: Array[Double], evenBuckets: Boolean = false): Array[Long] = {
+    if (buckets.length < 2) {
+      throw new IllegalArgumentException("buckets array must have at least two elements")
+    }
+    // The histogramPartition function computes the partail histogram for a given
+    // partition. The provided bucketFunction determines which bucket in the array
+    // to increment or returns None if there is no bucket. This is done so we can
+    // specialize for uniformly distributed buckets and save the O(log n) binary
+    // search cost.
+    def histogramPartition(bucketFunction: (Double) => Option[Int])(iter: Iterator[Double]):
+        Iterator[Array[Long]] = {
+      val counters = new Array[Long](buckets.length - 1)
+      while (iter.hasNext) {
+        bucketFunction(iter.next()) match {
+          case Some(x: Int) => {counters(x) += 1}
+          case _ => {}
+        }
+      }
+      Iterator(counters)
+    }
+    // Merge the counters.
+    def mergeCounters(a1: Array[Long], a2: Array[Long]): Array[Long] = {
+      a1.indices.foreach(i => a1(i) += a2(i))
+      a1
+    }
+    // Basic bucket function. This works using Java's built in Array
+    // binary search. Takes log(size(buckets))
+    def basicBucketFunction(e: Double): Option[Int] = {
+      val location = java.util.Arrays.binarySearch(buckets, e)
+      if (location < 0) {
+        // If the location is less than 0 then the insertion point in the array
+        // to keep it sorted is -location-1
+        val insertionPoint = -location-1
+        // If we have to insert before the first element or after the last one
+        // its out of bounds.
+        // We do this rather than buckets.lengthCompare(insertionPoint)
+        // because Array[Double] fails to override it (for now).
+        if (insertionPoint > 0 && insertionPoint < buckets.length) {
+          Some(insertionPoint-1)
+        } else {
+          None
+        }
+      } else if (location < buckets.length - 1) {
+        // Exact match, just insert here
+        Some(location)
+      } else {
+        // Exact match to the last element
+        Some(location - 1)
+      }
+    }
+    // Determine the bucket function in constant time. Requires that buckets are evenly spaced
+    def fastBucketFunction(min: Double, increment: Double, count: Int)(e: Double): Option[Int] = {
+      // If our input is not a number unless the increment is also NaN then we fail fast
+      if (e.isNaN()) {
+        return None
+      }
+      val bucketNumber = (e - min)/(increment)
+      // We do this rather than buckets.lengthCompare(bucketNumber)
+      // because Array[Double] fails to override it (for now).
+      if (bucketNumber > count || bucketNumber < 0) {
+        None
+      } else {
+        Some(bucketNumber.toInt.min(count - 1))
+      }
+    }
+    // Decide which bucket function to pass to histogramPartition. We decide here
+    // rather than having a general function so that the decission need only be made
+    // once rather than once per shard
+    val bucketFunction = if (evenBuckets) {
+      fastBucketFunction(buckets(0), buckets(1)-buckets(0), buckets.length-1) _
+    } else {
+      basicBucketFunction _
+    }
+    self.mapPartitions(histogramPartition(bucketFunction)).reduce(mergeCounters)
+  }
+
 }
diff --git a/core/src/main/scala/org/apache/spark/rdd/EmptyRDD.scala b/core/src/main/scala/org/apache/spark/rdd/EmptyRDD.scala
index c8900d1a9346d..a84e5f9fd8ef8 100644
--- a/core/src/main/scala/org/apache/spark/rdd/EmptyRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/EmptyRDD.scala
@@ -17,13 +17,14 @@
 
 package org.apache.spark.rdd
 
-import org.apache.spark.{SparkContext, SparkEnv, Partition, TaskContext}
+import scala.reflect.ClassTag
 
+import org.apache.spark.{Partition, SparkContext, TaskContext}
 
 /**
  * An RDD that is empty, i.e. has no element in it.
  */
-class EmptyRDD[T: ClassManifest](sc: SparkContext) extends RDD[T](sc, Nil) {
+class EmptyRDD[T: ClassTag](sc: SparkContext) extends RDD[T](sc, Nil) {
 
   override def getPartitions: Array[Partition] = Array.empty
 
diff --git a/core/src/main/scala/org/apache/spark/rdd/FilteredRDD.scala b/core/src/main/scala/org/apache/spark/rdd/FilteredRDD.scala
index 5312dc0b59388..e74c83b90baa3 100644
--- a/core/src/main/scala/org/apache/spark/rdd/FilteredRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/FilteredRDD.scala
@@ -18,8 +18,9 @@
 package org.apache.spark.rdd
 
 import org.apache.spark.{OneToOneDependency, Partition, TaskContext}
+import scala.reflect.ClassTag
 
-private[spark] class FilteredRDD[T: ClassManifest](
+private[spark] class FilteredRDD[T: ClassTag](
     prev: RDD[T],
     f: T => Boolean)
   extends RDD[T](prev) {
diff --git a/core/src/main/scala/org/apache/spark/rdd/FlatMappedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/FlatMappedRDD.scala
index cbdf6d84c0706..4d1878fc142ac 100644
--- a/core/src/main/scala/org/apache/spark/rdd/FlatMappedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/FlatMappedRDD.scala
@@ -18,10 +18,11 @@
 package org.apache.spark.rdd
 
 import org.apache.spark.{Partition, TaskContext}
+import scala.reflect.ClassTag
 
 
 private[spark]
-class FlatMappedRDD[U: ClassManifest, T: ClassManifest](
+class FlatMappedRDD[U: ClassTag, T: ClassTag](
     prev: RDD[T],
     f: T => TraversableOnce[U])
   extends RDD[U](prev) {
diff --git a/core/src/main/scala/org/apache/spark/rdd/GlommedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/GlommedRDD.scala
index 829545d7b0aff..1a694475f6975 100644
--- a/core/src/main/scala/org/apache/spark/rdd/GlommedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/GlommedRDD.scala
@@ -18,8 +18,9 @@
 package org.apache.spark.rdd
 
 import org.apache.spark.{Partition, TaskContext}
+import scala.reflect.ClassTag
 
-private[spark] class GlommedRDD[T: ClassManifest](prev: RDD[T])
+private[spark] class GlommedRDD[T: ClassTag](prev: RDD[T])
   extends RDD[Array[T]](prev) {
 
   override def getPartitions: Array[Partition] = firstParent[T].partitions
diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
index 53f77a38f55f6..ad74d4636fb1b 100644
--- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
@@ -19,7 +19,7 @@ package org.apache.spark.rdd
 
 import java.io.EOFException
 
-import org.apache.hadoop.mapred.FileInputFormat
+import org.apache.hadoop.conf.{Configuration, Configurable}
 import org.apache.hadoop.mapred.InputFormat
 import org.apache.hadoop.mapred.InputSplit
 import org.apache.hadoop.mapred.JobConf
@@ -31,7 +31,6 @@ import org.apache.spark._
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.util.NextIterator
-import org.apache.hadoop.conf.{Configuration, Configurable}
 
 
 /**
@@ -42,14 +41,14 @@ private[spark] class HadoopPartition(rddId: Int, idx: Int, @transient s: InputSp
 
   val inputSplit = new SerializableWritable[InputSplit](s)
 
-  override def hashCode(): Int = (41 * (41 + rddId) + idx).toInt
+  override def hashCode(): Int = 41 * (41 + rddId) + idx
 
   override val index: Int = idx
 }
 
 /**
  * An RDD that provides core functionality for reading data stored in Hadoop (e.g., files in HDFS,
- * sources in HBase, or S3).
+ * sources in HBase, or S3), using the older MapReduce API (`org.apache.hadoop.mapred`).
  *
  * @param sc The SparkContext to associate the RDD with.
  * @param broadcastedConf A general Hadoop Configuration, or a subclass of it. If the enclosed
@@ -99,11 +98,11 @@ class HadoopRDD[K, V](
     val conf: Configuration = broadcastedConf.value.value
     if (conf.isInstanceOf[JobConf]) {
       // A user-broadcasted JobConf was provided to the HadoopRDD, so always use it.
-      return conf.asInstanceOf[JobConf]
+      conf.asInstanceOf[JobConf]
     } else if (HadoopRDD.containsCachedMetadata(jobConfCacheKey)) {
       // getJobConf() has been called previously, so there is already a local cache of the JobConf
       // needed by this RDD.
-      return HadoopRDD.getCachedMetadata(jobConfCacheKey).asInstanceOf[JobConf]
+      HadoopRDD.getCachedMetadata(jobConfCacheKey).asInstanceOf[JobConf]
     } else {
       // Create a JobConf that will be cached and used across this RDD's getJobConf() calls in the
       // local process. The local cache is accessed through HadoopRDD.putCachedMetadata().
@@ -111,7 +110,7 @@ class HadoopRDD[K, V](
       val newJobConf = new JobConf(broadcastedConf.value.value)
       initLocalJobConfFuncOpt.map(f => f(newJobConf))
       HadoopRDD.putCachedMetadata(jobConfCacheKey, newJobConf)
-      return newJobConf
+      newJobConf
     }
   }
 
@@ -127,7 +126,7 @@ class HadoopRDD[K, V](
       newInputFormat.asInstanceOf[Configurable].setConf(conf)
     }
     HadoopRDD.putCachedMetadata(inputFormatCacheKey, newInputFormat)
-    return newInputFormat
+    newInputFormat
   }
 
   override def getPartitions: Array[Partition] = {
@@ -158,10 +157,8 @@ class HadoopRDD[K, V](
 
       // Register an on-task-completion callback to close the input stream.
       context.addOnCompleteCallback{ () => closeIfNeeded() }
-
       val key: K = reader.createKey()
       val value: V = reader.createValue()
-
       override def getNext() = {
         try {
           finished = !reader.next(key, value)
diff --git a/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala b/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
index aca01468842c4..8df8718f3b65b 100644
--- a/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
@@ -19,6 +19,8 @@ package org.apache.spark.rdd
 
 import java.sql.{Connection, ResultSet}
 
+import scala.reflect.ClassTag
+
 import org.apache.spark.{Logging, Partition, SparkContext, TaskContext}
 import org.apache.spark.util.NextIterator
 
@@ -45,7 +47,7 @@ private[spark] class JdbcPartition(idx: Int, val lower: Long, val upper: Long) e
  *   This should only call getInt, getString, etc; the RDD takes care of calling next.
  *   The default maps a ResultSet to an array of Object.
  */
-class JdbcRDD[T: ClassManifest](
+class JdbcRDD[T: ClassTag](
     sc: SparkContext,
     getConnection: () => Connection,
     sql: String,
diff --git a/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala b/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala
index ae70d559511c9..db15baf503ad6 100644
--- a/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala
@@ -18,9 +18,9 @@
 package org.apache.spark.rdd
 
 import org.apache.spark.{Partition, TaskContext}
+import scala.reflect.ClassTag
 
-
-private[spark] class MapPartitionsRDD[U: ClassManifest, T: ClassManifest](
+private[spark] class MapPartitionsRDD[U: ClassTag, T: ClassTag](
     prev: RDD[T],
     f: (TaskContext, Int, Iterator[T]) => Iterator[U],  // (TaskContext, partition index, iterator)
     preservesPartitioning: Boolean = false)
diff --git a/core/src/main/scala/org/apache/spark/rdd/MappedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/MappedRDD.scala
index e8be1c4816e45..8d7c288593665 100644
--- a/core/src/main/scala/org/apache/spark/rdd/MappedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/MappedRDD.scala
@@ -17,10 +17,12 @@
 
 package org.apache.spark.rdd
 
+import scala.reflect.ClassTag
+
 import org.apache.spark.{Partition, TaskContext}
 
 private[spark]
-class MappedRDD[U: ClassManifest, T: ClassManifest](prev: RDD[T], f: T => U)
+class MappedRDD[U: ClassTag, T: ClassTag](prev: RDD[T], f: T => U)
   extends RDD[U](prev) {
 
   override def getPartitions: Array[Partition] = firstParent[T].partitions
diff --git a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
index 2662d48c84a2e..d1fff296878c3 100644
--- a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
@@ -26,16 +26,25 @@ import org.apache.hadoop.mapreduce._
 
 import org.apache.spark.{InterruptibleIterator, Logging, Partition, SerializableWritable, SparkContext, TaskContext}
 
-
 private[spark]
 class NewHadoopPartition(rddId: Int, val index: Int, @transient rawSplit: InputSplit with Writable)
   extends Partition {
 
   val serializableHadoopSplit = new SerializableWritable(rawSplit)
 
-  override def hashCode(): Int = (41 * (41 + rddId) + index)
+  override def hashCode(): Int = 41 * (41 + rddId) + index
 }
 
+/**
+ * An RDD that provides core functionality for reading data stored in Hadoop (e.g., files in HDFS,
+ * sources in HBase, or S3), using the new MapReduce API (`org.apache.hadoop.mapreduce`).
+ *
+ * @param sc The SparkContext to associate the RDD with.
+ * @param inputFormatClass Storage format of the data to be read.
+ * @param keyClass Class of the key associated with the inputFormatClass.
+ * @param valueClass Class of the value associated with the inputFormatClass.
+ * @param conf The Hadoop configuration.
+ */
 class NewHadoopRDD[K, V](
     sc : SparkContext,
     inputFormatClass: Class[_ <: InputFormat[K, V]],
@@ -76,7 +85,7 @@ class NewHadoopRDD[K, V](
       val split = theSplit.asInstanceOf[NewHadoopPartition]
       logInfo("Input split: " + split.serializableHadoopSplit)
       val conf = confBroadcast.value.value
-      val attemptId = newTaskAttemptID(jobtrackerId, id, true, split.index, 0)
+      val attemptId = newTaskAttemptID(jobtrackerId, id, isMap = true, split.index, 0)
       val hadoopAttemptContext = newTaskAttemptContext(conf, attemptId)
       val format = inputFormatClass.newInstance
       if (format.isInstanceOf[Configurable]) {
@@ -88,7 +97,6 @@ class NewHadoopRDD[K, V](
 
       // Register an on-task-completion callback to close the input stream.
       context.addOnCompleteCallback(() => close())
-
       var havePair = false
       var finished = false
 
diff --git a/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala
index 697be8b997bbd..d5691f2267bfa 100644
--- a/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala
@@ -17,7 +17,9 @@
 
 package org.apache.spark.rdd
 
-import org.apache.spark.{RangePartitioner, Logging}
+import scala.reflect.ClassTag
+
+import org.apache.spark.{Logging, RangePartitioner}
 
 /**
  * Extra functions available on RDDs of (key, value) pairs where the key is sortable through
@@ -25,9 +27,9 @@ import org.apache.spark.{RangePartitioner, Logging}
  * use these functions. They will work with any key type that has a `scala.math.Ordered`
  * implementation.
  */
-class OrderedRDDFunctions[K <% Ordered[K]: ClassManifest,
-                          V: ClassManifest,
-                          P <: Product2[K, V] : ClassManifest](
+class OrderedRDDFunctions[K <% Ordered[K]: ClassTag,
+                          V: ClassTag,
+                          P <: Product2[K, V] : ClassTag](
     self: RDD[P])
   extends Logging with Serializable {
 
diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
index 93b78e123267c..9bafe41f0da58 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -18,39 +18,42 @@
 package org.apache.spark.rdd
 
 import java.nio.ByteBuffer
-import java.util.Date
 import java.text.SimpleDateFormat
+import java.util.Date
 import java.util.{HashMap => JHashMap}
 
-import scala.collection.{mutable, Map}
+import scala.collection.Map
+import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.JavaConversions._
+import scala.reflect.{ClassTag, classTag}
 
-import org.apache.hadoop.mapred._
-import org.apache.hadoop.io.compress.CompressionCodec
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.io.SequenceFile.CompressionType
-import org.apache.hadoop.mapred.FileOutputFormat
-import org.apache.hadoop.mapred.OutputFormat
+import org.apache.hadoop.io.compress.CompressionCodec
+import org.apache.hadoop.mapred.{FileOutputCommitter, FileOutputFormat, JobConf, OutputFormat}
 import org.apache.hadoop.mapreduce.{OutputFormat => NewOutputFormat}
-import org.apache.hadoop.mapreduce.lib.output.{FileOutputFormat => NewFileOutputFormat}
-import org.apache.hadoop.mapreduce.SparkHadoopMapReduceUtil
 import org.apache.hadoop.mapreduce.{Job => NewAPIHadoopJob}
 import org.apache.hadoop.mapreduce.{RecordWriter => NewRecordWriter}
+import org.apache.hadoop.mapreduce.lib.output.{FileOutputFormat => NewFileOutputFormat}
+
+import com.clearspring.analytics.stream.cardinality.HyperLogLog
 
+// SparkHadoopWriter and SparkHadoopMapReduceUtil are actually source files defined in Spark.
+import org.apache.hadoop.mapred.SparkHadoopWriter
+import org.apache.hadoop.mapreduce.SparkHadoopMapReduceUtil
 import org.apache.spark._
 import org.apache.spark.SparkContext._
 import org.apache.spark.partial.{BoundedDouble, PartialResult}
-import org.apache.spark.Aggregator
-import org.apache.spark.Partitioner
 import org.apache.spark.Partitioner.defaultPartitioner
+import org.apache.spark.util.SerializableHyperLogLog
 
 /**
  * Extra functions available on RDDs of (key, value) pairs through an implicit conversion.
  * Import `org.apache.spark.SparkContext._` at the top of your program to use these functions.
  */
-class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[(K, V)])
+class PairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)])
   extends Logging
   with SparkHadoopMapReduceUtil
   with Serializable {
@@ -74,6 +77,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[(K, V)])
       partitioner: Partitioner,
       mapSideCombine: Boolean = true,
       serializerClass: String = null): RDD[(K, C)] = {
+    require(mergeCombiners != null, "mergeCombiners must be defined") // required as of Spark 0.9.0
     if (getKeyClass().isArray) {
       if (mapSideCombine) {
         throw new SparkException("Cannot use map-side combining with array keys.")
@@ -85,22 +89,22 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[(K, V)])
     val aggregator = new Aggregator[K, V, C](createCombiner, mergeValue, mergeCombiners)
     if (self.partitioner == Some(partitioner)) {
       self.mapPartitionsWithContext((context, iter) => {
-        new InterruptibleIterator(context, aggregator.combineValuesByKey(iter))
+        new InterruptibleIterator(context, aggregator.combineValuesByKey(iter, context))
       }, preservesPartitioning = true)
     } else if (mapSideCombine) {
-      val combined = self.mapPartitions(aggregator.combineValuesByKey, preservesPartitioning = true)
+      val combined = self.mapPartitionsWithContext((context, iter) => {
+        aggregator.combineValuesByKey(iter, context)
+      }, preservesPartitioning = true)
       val partitioned = new ShuffledRDD[K, C, (K, C)](combined, partitioner)
         .setSerializer(serializerClass)
       partitioned.mapPartitionsWithContext((context, iter) => {
-        new InterruptibleIterator(context, aggregator.combineCombinersByKey(iter))
+        new InterruptibleIterator(context, aggregator.combineCombinersByKey(iter, context))
       }, preservesPartitioning = true)
     } else {
       // Don't apply map-side combiner.
-      // A sanity check to make sure mergeCombiners is not defined.
-      assert(mergeCombiners == null)
       val values = new ShuffledRDD[K, V, (K, V)](self, partitioner).setSerializer(serializerClass)
       values.mapPartitionsWithContext((context, iter) => {
-        new InterruptibleIterator(context, aggregator.combineValuesByKey(iter))
+        new InterruptibleIterator(context, aggregator.combineValuesByKey(iter, context))
       }, preservesPartitioning = true)
     }
   }
@@ -116,9 +120,9 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[(K, V)])
   }
 
   /**
-   * Merge the values for each key using an associative function and a neutral "zero value" which may
-   * be added to the result an arbitrary number of times, and must not change the result (e.g., Nil for
-   * list concatenation, 0 for addition, or 1 for multiplication.).
+   * Merge the values for each key using an associative function and a neutral "zero value" which
+   * may be added to the result an arbitrary number of times, and must not change the result
+   * (e.g., Nil for list concatenation, 0 for addition, or 1 for multiplication.).
    */
   def foldByKey(zeroValue: V, partitioner: Partitioner)(func: (V, V) => V): RDD[(K, V)] = {
     // Serialize the zero value to a byte array so that we can get a new clone of it on each key
@@ -134,18 +138,18 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[(K, V)])
   }
 
   /**
-   * Merge the values for each key using an associative function and a neutral "zero value" which may
-   * be added to the result an arbitrary number of times, and must not change the result (e.g., Nil for
-   * list concatenation, 0 for addition, or 1 for multiplication.).
+   * Merge the values for each key using an associative function and a neutral "zero value" which
+   * may be added to the result an arbitrary number of times, and must not change the result
+   * (e.g., Nil for list concatenation, 0 for addition, or 1 for multiplication.).
    */
   def foldByKey(zeroValue: V, numPartitions: Int)(func: (V, V) => V): RDD[(K, V)] = {
     foldByKey(zeroValue, new HashPartitioner(numPartitions))(func)
   }
 
   /**
-   * Merge the values for each key using an associative function and a neutral "zero value" which may
-   * be added to the result an arbitrary number of times, and must not change the result (e.g., Nil for
-   * list concatenation, 0 for addition, or 1 for multiplication.).
+   * Merge the values for each key using an associative function and a neutral "zero value" which
+   * may be added to the result an arbitrary number of times, and must not change the result
+   * (e.g., Nil for list concatenation, 0 for addition, or 1 for multiplication.).
    */
   def foldByKey(zeroValue: V)(func: (V, V) => V): RDD[(K, V)] = {
     foldByKey(zeroValue, defaultPartitioner(self))(func)
@@ -206,6 +210,45 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[(K, V)])
     self.map(_._1).countByValueApprox(timeout, confidence)
   }
 
+  /**
+   * Return approximate number of distinct values for each key in this RDD.
+   * The accuracy of approximation can be controlled through the relative standard deviation
+   * (relativeSD) parameter, which also controls the amount of memory used. Lower values result in
+   * more accurate counts but increase the memory footprint and vise versa. Uses the provided
+   * Partitioner to partition the output RDD.
+   */
+  def countApproxDistinctByKey(relativeSD: Double, partitioner: Partitioner): RDD[(K, Long)] = {
+    val createHLL = (v: V) => new SerializableHyperLogLog(new HyperLogLog(relativeSD)).add(v)
+    val mergeValueHLL = (hll: SerializableHyperLogLog, v: V) => hll.add(v)
+    val mergeHLL = (h1: SerializableHyperLogLog, h2: SerializableHyperLogLog) => h1.merge(h2)
+
+    combineByKey(createHLL, mergeValueHLL, mergeHLL, partitioner).mapValues(_.value.cardinality())
+  }
+
+  /**
+   * Return approximate number of distinct values for each key in this RDD.
+   * The accuracy of approximation can be controlled through the relative standard deviation
+   * (relativeSD) parameter, which also controls the amount of memory used. Lower values result in
+   * more accurate counts but increase the memory footprint and vise versa. HashPartitions the
+   * output RDD into numPartitions.
+   *
+   */
+  def countApproxDistinctByKey(relativeSD: Double, numPartitions: Int): RDD[(K, Long)] = {
+    countApproxDistinctByKey(relativeSD, new HashPartitioner(numPartitions))
+  }
+
+  /**
+   * Return approximate number of distinct values for each key this RDD.
+   * The accuracy of approximation can be controlled through the relative standard deviation
+   * (relativeSD) parameter, which also controls the amount of memory used. Lower values result in
+   * more accurate counts but increase the memory footprint and vise versa. The default value of
+   * relativeSD is 0.05. Hash-partitions the output RDD using the existing partitioner/parallelism
+   * level.
+   */
+  def countApproxDistinctByKey(relativeSD: Double = 0.05): RDD[(K, Long)] = {
+    countApproxDistinctByKey(relativeSD, defaultPartitioner(self))
+  }
+
   /**
    * Merge the values for each key using an associative reduce function. This will also perform
    * the merging locally on each mapper before sending results to a reducer, similarly to a
@@ -225,8 +268,9 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[(K, V)])
     // into a hash table, leading to more objects in the old gen.
     def createCombiner(v: V) = ArrayBuffer(v)
     def mergeValue(buf: ArrayBuffer[V], v: V) = buf += v
+    def mergeCombiners(c1: ArrayBuffer[V], c2: ArrayBuffer[V]) = c1 ++ c2
     val bufs = combineByKey[ArrayBuffer[V]](
-      createCombiner _, mergeValue _, null, partitioner, mapSideCombine=false)
+      createCombiner _, mergeValue _, mergeCombiners _, partitioner, mapSideCombine=false)
     bufs.asInstanceOf[RDD[(K, Seq[V])]]
   }
 
@@ -245,7 +289,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[(K, V)])
     if (getKeyClass().isArray && partitioner.isInstanceOf[HashPartitioner]) {
       throw new SparkException("Default partitioner cannot partition array keys.")
     }
-    new ShuffledRDD[K, V, (K, V)](self, partitioner)
+    if (self.partitioner == partitioner) self else new ShuffledRDD[K, V, (K, V)](self, partitioner)
   }
 
   /**
@@ -297,7 +341,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[(K, V)])
    * existing partitioner/parallelism level.
    */
   def combineByKey[C](createCombiner: V => C, mergeValue: (C, V) => C, mergeCombiners: (C, C) => C)
-      : RDD[(K, C)] = {
+    : RDD[(K, C)] = {
     combineByKey(createCombiner, mergeValue, mergeCombiners, defaultPartitioner(self))
   }
 
@@ -415,7 +459,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[(K, V)])
       throw new SparkException("Default partitioner cannot partition array keys.")
     }
     val cg = new CoGroupedRDD[K](Seq(self, other), partitioner)
-    val prfs = new PairRDDFunctions[K, Seq[Seq[_]]](cg)(classManifest[K], Manifests.seqSeqManifest)
+    val prfs = new PairRDDFunctions[K, Seq[Seq[_]]](cg)(classTag[K], ClassTags.seqSeqClassTag)
     prfs.mapValues { case Seq(vs, ws) =>
       (vs.asInstanceOf[Seq[V]], ws.asInstanceOf[Seq[W]])
     }
@@ -431,7 +475,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[(K, V)])
       throw new SparkException("Default partitioner cannot partition array keys.")
     }
     val cg = new CoGroupedRDD[K](Seq(self, other1, other2), partitioner)
-    val prfs = new PairRDDFunctions[K, Seq[Seq[_]]](cg)(classManifest[K], Manifests.seqSeqManifest)
+    val prfs = new PairRDDFunctions[K, Seq[Seq[_]]](cg)(classTag[K], ClassTags.seqSeqClassTag)
     prfs.mapValues { case Seq(vs, w1s, w2s) =>
       (vs.asInstanceOf[Seq[V]], w1s.asInstanceOf[Seq[W1]], w2s.asInstanceOf[Seq[W2]])
     }
@@ -488,15 +532,15 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[(K, V)])
    * Uses `this` partitioner/partition size, because even if `other` is huge, the resulting
    * RDD will be <= us.
    */
-  def subtractByKey[W: ClassManifest](other: RDD[(K, W)]): RDD[(K, V)] =
+  def subtractByKey[W: ClassTag](other: RDD[(K, W)]): RDD[(K, V)] =
     subtractByKey(other, self.partitioner.getOrElse(new HashPartitioner(self.partitions.size)))
 
   /** Return an RDD with the pairs from `this` whose keys are not in `other`. */
-  def subtractByKey[W: ClassManifest](other: RDD[(K, W)], numPartitions: Int): RDD[(K, V)] =
+  def subtractByKey[W: ClassTag](other: RDD[(K, W)], numPartitions: Int): RDD[(K, V)] =
     subtractByKey(other, new HashPartitioner(numPartitions))
 
   /** Return an RDD with the pairs from `this` whose keys are not in `other`. */
-  def subtractByKey[W: ClassManifest](other: RDD[(K, W)], p: Partitioner): RDD[(K, V)] =
+  def subtractByKey[W: ClassTag](other: RDD[(K, W)], p: Partitioner): RDD[(K, V)] =
     new SubtractedRDD[K, V, W](self, other, p)
 
   /**
@@ -525,8 +569,8 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[(K, V)])
    * Output the RDD to any Hadoop-supported file system, using a Hadoop `OutputFormat` class
    * supporting the key and value types K and V in this RDD.
    */
-  def saveAsHadoopFile[F <: OutputFormat[K, V]](path: String)(implicit fm: ClassManifest[F]) {
-    saveAsHadoopFile(path, getKeyClass, getValueClass, fm.erasure.asInstanceOf[Class[F]])
+  def saveAsHadoopFile[F <: OutputFormat[K, V]](path: String)(implicit fm: ClassTag[F]) {
+    saveAsHadoopFile(path, getKeyClass, getValueClass, fm.runtimeClass.asInstanceOf[Class[F]])
   }
 
   /**
@@ -535,16 +579,17 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[(K, V)])
    * supplied codec.
    */
   def saveAsHadoopFile[F <: OutputFormat[K, V]](
-      path: String, codec: Class[_ <: CompressionCodec]) (implicit fm: ClassManifest[F]) {
-    saveAsHadoopFile(path, getKeyClass, getValueClass, fm.erasure.asInstanceOf[Class[F]], codec)
+      path: String, codec: Class[_ <: CompressionCodec]) (implicit fm: ClassTag[F]) {
+    val runtimeClass = fm.runtimeClass
+    saveAsHadoopFile(path, getKeyClass, getValueClass, runtimeClass.asInstanceOf[Class[F]], codec)
   }
 
   /**
    * Output the RDD to any Hadoop-supported file system, using a new Hadoop API `OutputFormat`
    * (mapreduce.OutputFormat) object supporting the key and value types K and V in this RDD.
    */
-  def saveAsNewAPIHadoopFile[F <: NewOutputFormat[K, V]](path: String)(implicit fm: ClassManifest[F]) {
-    saveAsNewAPIHadoopFile(path, getKeyClass, getValueClass, fm.erasure.asInstanceOf[Class[F]])
+  def saveAsNewAPIHadoopFile[F <: NewOutputFormat[K, V]](path: String)(implicit fm: ClassTag[F]) {
+    saveAsNewAPIHadoopFile(path, getKeyClass, getValueClass, fm.runtimeClass.asInstanceOf[Class[F]])
   }
 
   /**
@@ -556,7 +601,8 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[(K, V)])
       keyClass: Class[_],
       valueClass: Class[_],
       outputFormatClass: Class[_ <: NewOutputFormat[_, _]],
-      conf: Configuration = self.context.hadoopConfiguration) {
+      conf: Configuration = self.context.hadoopConfiguration)
+  {
     val job = new NewAPIHadoopJob(conf)
     job.setOutputKeyClass(keyClass)
     job.setOutputValueClass(valueClass)
@@ -570,7 +616,8 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[(K, V)])
       // around by taking a mod. We expect that no task will be attempted 2 billion times.
       val attemptNumber = (context.attemptId % Int.MaxValue).toInt
       /* "reduce task" <split #> <attempt # = spark task #> */
-      val attemptId = newTaskAttemptID(jobtrackerID, stageId, false, context.partitionId, attemptNumber)
+      val attemptId = newTaskAttemptID(jobtrackerID, stageId, isMap = false, context.partitionId,
+        attemptNumber)
       val hadoopContext = newTaskAttemptContext(wrappedConf.value, attemptId)
       val format = outputFormatClass.newInstance
       val committer = format.getOutputCommitter(hadoopContext)
@@ -589,13 +636,12 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[(K, V)])
      * however we're only going to use this local OutputCommitter for
      * setupJob/commitJob, so we just use a dummy "map" task.
      */
-    val jobAttemptId = newTaskAttemptID(jobtrackerID, stageId, true, 0, 0)
+    val jobAttemptId = newTaskAttemptID(jobtrackerID, stageId, isMap = true, 0, 0)
     val jobTaskContext = newTaskAttemptContext(wrappedConf.value, jobAttemptId)
     val jobCommitter = jobFormat.getOutputCommitter(jobTaskContext)
     jobCommitter.setupJob(jobTaskContext)
     val count = self.context.runJob(self, writeShard _).sum
     jobCommitter.commitJob(jobTaskContext)
-    jobCommitter.cleanupJob(jobTaskContext)
   }
 
   /**
@@ -625,7 +671,9 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[(K, V)])
       codec: Option[Class[_ <: CompressionCodec]] = None) {
     conf.setOutputKeyClass(keyClass)
     conf.setOutputValueClass(valueClass)
-    // conf.setOutputFormat(outputFormatClass) // Doesn't work in Scala 2.9 due to what may be a generics bug
+    // Doesn't work in Scala 2.9 due to what may be a generics bug
+    // TODO: Should we uncomment this for Scala 2.10?
+    // conf.setOutputFormat(outputFormatClass)
     conf.set("mapred.output.format.class", outputFormatClass.getName)
     for (c <- codec) {
       conf.setCompressMapOutput(true)
@@ -659,7 +707,8 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[(K, V)])
       throw new SparkException("Output value class not set")
     }
 
-    logInfo("Saving as hadoop file of type (" + keyClass.getSimpleName+ ", " + valueClass.getSimpleName+ ")")
+    logDebug("Saving as hadoop file of type (" + keyClass.getSimpleName + ", " +
+      valueClass.getSimpleName+ ")")
 
     val writer = new SparkHadoopWriter(conf)
     writer.preSetup()
@@ -685,7 +734,6 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[(K, V)])
 
     self.context.runJob(self, writeToFile _)
     writer.commitJob()
-    writer.cleanup()
   }
 
   /**
@@ -698,11 +746,11 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[(K, V)])
    */
   def values: RDD[V] = self.map(_._2)
 
-  private[spark] def getKeyClass() = implicitly[ClassManifest[K]].erasure
+  private[spark] def getKeyClass() = implicitly[ClassTag[K]].runtimeClass
 
-  private[spark] def getValueClass() = implicitly[ClassManifest[V]].erasure
+  private[spark] def getValueClass() = implicitly[ClassTag[V]].runtimeClass
 }
 
-private[spark] object Manifests {
-  val seqSeqManifest = classManifest[Seq[Seq[_]]]
+private[spark] object ClassTags {
+  val seqSeqClassTag = classTag[Seq[Seq[_]]]
 }
diff --git a/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala
index cd96250389862..09d0a8189d25c 100644
--- a/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala
@@ -20,13 +20,15 @@ package org.apache.spark.rdd
 import scala.collection.immutable.NumericRange
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.Map
+import scala.reflect.ClassTag
+
 import org.apache.spark._
 import java.io._
 import scala.Serializable
 import org.apache.spark.serializer.JavaSerializer
 import org.apache.spark.util.Utils
 
-private[spark] class ParallelCollectionPartition[T: ClassManifest](
+private[spark] class ParallelCollectionPartition[T: ClassTag](
     var rddId: Long,
     var slice: Int,
     var values: Seq[T])
@@ -78,7 +80,7 @@ private[spark] class ParallelCollectionPartition[T: ClassManifest](
   }
 }
 
-private[spark] class ParallelCollectionRDD[T: ClassManifest](
+private[spark] class ParallelCollectionRDD[T: ClassTag](
     @transient sc: SparkContext,
     @transient data: Seq[T],
     numSlices: Int,
@@ -109,7 +111,7 @@ private object ParallelCollectionRDD {
    * collections specially, encoding the slices as other Ranges to minimize memory cost. This makes
    * it efficient to run Spark over RDDs representing large sets of numbers.
    */
-  def slice[T: ClassManifest](seq: Seq[T], numSlices: Int): Seq[Seq[T]] = {
+  def slice[T: ClassTag](seq: Seq[T], numSlices: Int): Seq[Seq[T]] = {
     if (numSlices < 1) {
       throw new IllegalArgumentException("Positive number of slices required")
     }
diff --git a/core/src/main/scala/org/apache/spark/rdd/PartitionPruningRDD.scala b/core/src/main/scala/org/apache/spark/rdd/PartitionPruningRDD.scala
index 574dd4233fb27..ea8885b36e3ae 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PartitionPruningRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PartitionPruningRDD.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.rdd
 
+import scala.reflect.ClassTag
+
 import org.apache.spark.{NarrowDependency, SparkEnv, Partition, TaskContext}
 
 
@@ -49,7 +51,7 @@ class PruneDependency[T](rdd: RDD[T], @transient partitionFilterFunc: Int => Boo
  * and the execution DAG has a filter on the key, we can avoid launching tasks
  * on partitions that don't have the range covering the key.
  */
-class PartitionPruningRDD[T: ClassManifest](
+class PartitionPruningRDD[T: ClassTag](
     @transient prev: RDD[T],
     @transient partitionFilterFunc: Int => Boolean)
   extends RDD[T](prev.context, List(new PruneDependency(prev, partitionFilterFunc))) {
@@ -69,6 +71,6 @@ object PartitionPruningRDD {
    * when its type T is not known at compile time.
    */
   def create[T](rdd: RDD[T], partitionFilterFunc: Int => Boolean) = {
-    new PartitionPruningRDD[T](rdd, partitionFilterFunc)(rdd.elementClassManifest)
+    new PartitionPruningRDD[T](rdd, partitionFilterFunc)(rdd.elementClassTag)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala b/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala
new file mode 100644
index 0000000000000..4c625d062eb9b
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.rdd
+
+import scala.reflect.ClassTag
+import java.io.{ObjectOutputStream, IOException}
+import org.apache.spark.{TaskContext, OneToOneDependency, SparkContext, Partition}
+
+
+/**
+ * Class representing partitions of PartitionerAwareUnionRDD, which maintains the list of corresponding partitions
+ * of parent RDDs.
+ */
+private[spark]
+class PartitionerAwareUnionRDDPartition(
+    @transient val rdds: Seq[RDD[_]],
+    val idx: Int
+  ) extends Partition {
+  var parents = rdds.map(_.partitions(idx)).toArray
+  
+  override val index = idx
+  override def hashCode(): Int = idx
+
+  @throws(classOf[IOException])
+  private def writeObject(oos: ObjectOutputStream) {
+    // Update the reference to parent partition at the time of task serialization
+    parents = rdds.map(_.partitions(index)).toArray
+    oos.defaultWriteObject()
+  }
+}
+
+/**
+ * Class representing an RDD that can take multiple RDDs partitioned by the same partitioner and
+ * unify them into a single RDD while preserving the partitioner. So m RDDs with p partitions each
+ * will be unified to a single RDD with p partitions and the same partitioner. The preferred
+ * location for each partition of the unified RDD will be the most common preferred location
+ * of the corresponding partitions of the parent RDDs. For example, location of partition 0
+ * of the unified RDD will be where most of partition 0 of the parent RDDs are located.
+ */
+private[spark]
+class PartitionerAwareUnionRDD[T: ClassTag](
+    sc: SparkContext,
+    var rdds: Seq[RDD[T]]
+  ) extends RDD[T](sc, rdds.map(x => new OneToOneDependency(x))) {
+  require(rdds.length > 0)
+  require(rdds.flatMap(_.partitioner).toSet.size == 1,
+    "Parent RDDs have different partitioners: " + rdds.flatMap(_.partitioner))
+
+  override val partitioner = rdds.head.partitioner
+
+  override def getPartitions: Array[Partition] = {
+    val numPartitions = partitioner.get.numPartitions
+    (0 until numPartitions).map(index => {
+      new PartitionerAwareUnionRDDPartition(rdds, index)
+    }).toArray
+  }
+
+  // Get the location where most of the partitions of parent RDDs are located
+  override def getPreferredLocations(s: Partition): Seq[String] = {
+    logDebug("Finding preferred location for " + this + ", partition " + s.index)
+    val parentPartitions = s.asInstanceOf[PartitionerAwareUnionRDDPartition].parents
+    val locations = rdds.zip(parentPartitions).flatMap {
+      case (rdd, part) => {
+        val parentLocations = currPrefLocs(rdd, part)
+        logDebug("Location of " + rdd + " partition " + part.index + " = " + parentLocations)
+        parentLocations
+      }
+    }
+    val location = if (locations.isEmpty) {
+      None
+    } else  {
+      // Find the location that maximum number of parent partitions prefer
+      Some(locations.groupBy(x => x).maxBy(_._2.length)._1)
+    }
+    logDebug("Selected location for " + this + ", partition " + s.index + " = " + location)
+    location.toSeq
+  }
+
+  override def compute(s: Partition, context: TaskContext): Iterator[T] = {
+    val parentPartitions = s.asInstanceOf[PartitionerAwareUnionRDDPartition].parents
+    rdds.zip(parentPartitions).iterator.flatMap {
+      case (rdd, p) => rdd.iterator(p, context)
+    }
+  }
+
+  override def clearDependencies() {
+    super.clearDependencies()
+    rdds = null
+  }
+
+  // Get the *current* preferred locations from the DAGScheduler (as opposed to the static ones)
+  private def currPrefLocs(rdd: RDD[_], part: Partition): Seq[String] = {
+    rdd.context.getPreferredLocs(rdd, part.index).map(tl => tl.host)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala
index d5304ab0aed67..8ef919c4b58cb 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala
@@ -24,16 +24,16 @@ import scala.collection.Map
 import scala.collection.JavaConversions._
 import scala.collection.mutable.ArrayBuffer
 import scala.io.Source
+import scala.reflect.ClassTag
 
 import org.apache.spark.{SparkEnv, Partition, TaskContext}
-import org.apache.spark.broadcast.Broadcast
 
 
 /**
  * An RDD that pipes the contents of each parent partition through an external command
  * (printing them one per line) and returns the output as a collection of strings.
  */
-class PipedRDD[T: ClassManifest](
+class PipedRDD[T: ClassTag](
     prev: RDD[T],
     command: Seq[String],
     envVars: Map[String, String],
@@ -95,7 +95,7 @@ class PipedRDD[T: ClassManifest](
 
     // Return an iterator that read lines from the process's stdout
     val lines = Source.fromInputStream(proc.getInputStream).getLines
-    return new Iterator[String] {
+    new Iterator[String] {
       def next() = lines.next()
       def hasNext = {
         if (lines.hasNext) {
@@ -112,7 +112,7 @@ class PipedRDD[T: ClassManifest](
   }
 }
 
-object PipedRDD {
+private object PipedRDD {
   // Split a string into words using a standard StringTokenizer
   def tokenize(command: String): Seq[String] = {
     val buf = new ArrayBuffer[String]
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index db9889f185e50..1472c92b6031d 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -23,6 +23,8 @@ import scala.collection.Map
 import scala.collection.JavaConversions.mapAsScalaMap
 import scala.collection.mutable.ArrayBuffer
 
+import scala.reflect.{classTag, ClassTag}
+
 import org.apache.hadoop.io.BytesWritable
 import org.apache.hadoop.io.compress.CompressionCodec
 import org.apache.hadoop.io.NullWritable
@@ -30,6 +32,7 @@ import org.apache.hadoop.io.Text
 import org.apache.hadoop.mapred.TextOutputFormat
 
 import it.unimi.dsi.fastutil.objects.{Object2LongOpenHashMap => OLMap}
+import com.clearspring.analytics.stream.cardinality.HyperLogLog
 
 import org.apache.spark.Partitioner._
 import org.apache.spark.api.java.JavaRDD
@@ -38,23 +41,23 @@ import org.apache.spark.partial.CountEvaluator
 import org.apache.spark.partial.GroupedCountEvaluator
 import org.apache.spark.partial.PartialResult
 import org.apache.spark.storage.StorageLevel
-import org.apache.spark.util.{Utils, BoundedPriorityQueue}
+import org.apache.spark.util.{Utils, BoundedPriorityQueue, SerializableHyperLogLog}
 
 import org.apache.spark.SparkContext._
 import org.apache.spark._
-import scala.concurrent.duration.Duration
-import java.util.concurrent.TimeUnit
 
 /**
  * A Resilient Distributed Dataset (RDD), the basic abstraction in Spark. Represents an immutable,
  * partitioned collection of elements that can be operated on in parallel. This class contains the
  * basic operations available on all RDDs, such as `map`, `filter`, and `persist`. In addition,
  * [[org.apache.spark.rdd.PairRDDFunctions]] contains operations available only on RDDs of key-value
- * pairs, such as `groupByKey` and `join`; [[org.apache.spark.rdd.DoubleRDDFunctions]] contains
- * operations available only on RDDs of Doubles; and [[org.apache.spark.rdd.SequenceFileRDDFunctions]]
- * contains operations available on RDDs that can be saved as SequenceFiles. These operations are
- * automatically available on any RDD of the right type (e.g. RDD[(Int, Int)] through implicit
- * conversions when you `import org.apache.spark.SparkContext._`.
+ * pairs, such as `groupByKey` and `join`;
+ * [[org.apache.spark.rdd.DoubleRDDFunctions]] contains operations available only on RDDs of
+ * Doubles; and
+ * [[org.apache.spark.rdd.SequenceFileRDDFunctions]] contains operations available on RDDs that
+ * can be saved as SequenceFiles.
+ * These operations are automatically available on any RDD of the right type (e.g. RDD[(Int, Int)]
+ * through implicit conversions when you `import org.apache.spark.SparkContext._`.
  *
  * Internally, each RDD is characterized by five main properties:
  *
@@ -71,7 +74,7 @@ import java.util.concurrent.TimeUnit
  * [[http://www.cs.berkeley.edu/~matei/papers/2012/nsdi_spark.pdf Spark paper]] for more details
  * on RDD internals.
  */
-abstract class RDD[T: ClassManifest](
+abstract class RDD[T: ClassTag](
     @transient private var sc: SparkContext,
     @transient private var deps: Seq[Dependency[_]]
   ) extends Serializable with Logging {
@@ -80,6 +83,7 @@ abstract class RDD[T: ClassManifest](
   def this(@transient oneParent: RDD[_]) =
     this(oneParent.context , List(new OneToOneDependency(oneParent)))
 
+  private[spark] def conf = sc.conf
   // =======================================================================
   // Methods that should be implemented by subclasses of RDD
   // =======================================================================
@@ -232,12 +236,9 @@ abstract class RDD[T: ClassManifest](
   /**
    * Compute an RDD partition or read it from a checkpoint if the RDD is checkpointing.
    */
-  private[spark] def computeOrReadCheckpoint(split: Partition, context: TaskContext): Iterator[T] = {
-    if (isCheckpointed) {
-      firstParent[T].iterator(split, context)
-    } else {
-      compute(split, context)
-    }
+  private[spark] def computeOrReadCheckpoint(split: Partition, context: TaskContext): Iterator[T] =
+  {
+    if (isCheckpointed) firstParent[T].iterator(split, context) else compute(split, context)
   }
 
   // Transformations (return a new RDD)
@@ -245,13 +246,13 @@ abstract class RDD[T: ClassManifest](
   /**
    * Return a new RDD by applying a function to all elements of this RDD.
    */
-  def map[U: ClassManifest](f: T => U): RDD[U] = new MappedRDD(this, sc.clean(f))
+  def map[U: ClassTag](f: T => U): RDD[U] = new MappedRDD(this, sc.clean(f))
 
   /**
    *  Return a new RDD by first applying a function to all elements of this
    *  RDD, and then flattening the results.
    */
-  def flatMap[U: ClassManifest](f: T => TraversableOnce[U]): RDD[U] =
+  def flatMap[U: ClassTag](f: T => TraversableOnce[U]): RDD[U] =
     new FlatMappedRDD(this, sc.clean(f))
 
   /**
@@ -265,6 +266,9 @@ abstract class RDD[T: ClassManifest](
   def distinct(numPartitions: Int): RDD[T] =
     map(x => (x, null)).reduceByKey((x, y) => x, numPartitions).map(_._1)
 
+  /**
+   * Return a new RDD containing the distinct elements in this RDD.
+   */
   def distinct(): RDD[T] = distinct(partitions.size)
 
   /**
@@ -277,7 +281,7 @@ abstract class RDD[T: ClassManifest](
    * which can avoid performing a shuffle.
    */
   def repartition(numPartitions: Int): RDD[T] = {
-    coalesce(numPartitions, true)
+    coalesce(numPartitions, shuffle = true)
   }
 
   /**
@@ -376,25 +380,25 @@ abstract class RDD[T: ClassManifest](
    * Return the Cartesian product of this RDD and another one, that is, the RDD of all pairs of
    * elements (a, b) where a is in `this` and b is in `other`.
    */
-  def cartesian[U: ClassManifest](other: RDD[U]): RDD[(T, U)] = new CartesianRDD(sc, this, other)
+  def cartesian[U: ClassTag](other: RDD[U]): RDD[(T, U)] = new CartesianRDD(sc, this, other)
 
   /**
    * Return an RDD of grouped items.
    */
-  def groupBy[K: ClassManifest](f: T => K): RDD[(K, Seq[T])] =
+  def groupBy[K: ClassTag](f: T => K): RDD[(K, Seq[T])] =
     groupBy[K](f, defaultPartitioner(this))
 
   /**
    * Return an RDD of grouped elements. Each group consists of a key and a sequence of elements
    * mapping to that key.
    */
-  def groupBy[K: ClassManifest](f: T => K, numPartitions: Int): RDD[(K, Seq[T])] =
+  def groupBy[K: ClassTag](f: T => K, numPartitions: Int): RDD[(K, Seq[T])] =
     groupBy(f, new HashPartitioner(numPartitions))
 
   /**
    * Return an RDD of grouped items.
    */
-  def groupBy[K: ClassManifest](f: T => K, p: Partitioner): RDD[(K, Seq[T])] = {
+  def groupBy[K: ClassTag](f: T => K, p: Partitioner): RDD[(K, Seq[T])] = {
     val cleanF = sc.clean(f)
     this.map(t => (cleanF(t), t)).groupByKey(p)
   }
@@ -441,7 +445,7 @@ abstract class RDD[T: ClassManifest](
   /**
    * Return a new RDD by applying a function to each partition of this RDD.
    */
-  def mapPartitions[U: ClassManifest](
+  def mapPartitions[U: ClassTag](
       f: Iterator[T] => Iterator[U], preservesPartitioning: Boolean = false): RDD[U] = {
     val func = (context: TaskContext, index: Int, iter: Iterator[T]) => f(iter)
     new MapPartitionsRDD(this, sc.clean(func), preservesPartitioning)
@@ -451,7 +455,7 @@ abstract class RDD[T: ClassManifest](
    * Return a new RDD by applying a function to each partition of this RDD, while tracking the index
    * of the original partition.
    */
-  def mapPartitionsWithIndex[U: ClassManifest](
+  def mapPartitionsWithIndex[U: ClassTag](
       f: (Int, Iterator[T]) => Iterator[U], preservesPartitioning: Boolean = false): RDD[U] = {
     val func = (context: TaskContext, index: Int, iter: Iterator[T]) => f(index, iter)
     new MapPartitionsRDD(this, sc.clean(func), preservesPartitioning)
@@ -461,7 +465,7 @@ abstract class RDD[T: ClassManifest](
    * Return a new RDD by applying a function to each partition of this RDD. This is a variant of
    * mapPartitions that also passes the TaskContext into the closure.
    */
-  def mapPartitionsWithContext[U: ClassManifest](
+  def mapPartitionsWithContext[U: ClassTag](
       f: (TaskContext, Iterator[T]) => Iterator[U],
       preservesPartitioning: Boolean = false): RDD[U] = {
     val func = (context: TaskContext, index: Int, iter: Iterator[T]) => f(context, iter)
@@ -473,7 +477,7 @@ abstract class RDD[T: ClassManifest](
    * of the original partition.
    */
   @deprecated("use mapPartitionsWithIndex", "0.7.0")
-  def mapPartitionsWithSplit[U: ClassManifest](
+  def mapPartitionsWithSplit[U: ClassTag](
       f: (Int, Iterator[T]) => Iterator[U], preservesPartitioning: Boolean = false): RDD[U] = {
     mapPartitionsWithIndex(f, preservesPartitioning)
   }
@@ -483,7 +487,7 @@ abstract class RDD[T: ClassManifest](
    * additional parameter is produced by constructA, which is called in each
    * partition with the index of that partition.
    */
-  def mapWith[A: ClassManifest, U: ClassManifest]
+  def mapWith[A: ClassTag, U: ClassTag]
       (constructA: Int => A, preservesPartitioning: Boolean = false)
       (f: (T, A) => U): RDD[U] = {
     mapPartitionsWithIndex((index, iter) => {
@@ -497,7 +501,7 @@ abstract class RDD[T: ClassManifest](
    * additional parameter is produced by constructA, which is called in each
    * partition with the index of that partition.
    */
-  def flatMapWith[A: ClassManifest, U: ClassManifest]
+  def flatMapWith[A: ClassTag, U: ClassTag]
       (constructA: Int => A, preservesPartitioning: Boolean = false)
       (f: (T, A) => Seq[U]): RDD[U] = {
     mapPartitionsWithIndex((index, iter) => {
@@ -511,7 +515,7 @@ abstract class RDD[T: ClassManifest](
    * This additional parameter is produced by constructA, which is called in each
    * partition with the index of that partition.
    */
-  def foreachWith[A: ClassManifest](constructA: Int => A)(f: (T, A) => Unit) {
+  def foreachWith[A: ClassTag](constructA: Int => A)(f: (T, A) => Unit) {
     mapPartitionsWithIndex { (index, iter) =>
       val a = constructA(index)
       iter.map(t => {f(t, a); t})
@@ -523,7 +527,7 @@ abstract class RDD[T: ClassManifest](
    * additional parameter is produced by constructA, which is called in each
    * partition with the index of that partition.
    */
-  def filterWith[A: ClassManifest](constructA: Int => A)(p: (T, A) => Boolean): RDD[T] = {
+  def filterWith[A: ClassTag](constructA: Int => A)(p: (T, A) => Boolean): RDD[T] = {
     mapPartitionsWithIndex((index, iter) => {
       val a = constructA(index)
       iter.filter(t => p(t, a))
@@ -536,7 +540,7 @@ abstract class RDD[T: ClassManifest](
    * partitions* and the *same number of elements in each partition* (e.g. one was made through
    * a map on the other).
    */
-  def zip[U: ClassManifest](other: RDD[U]): RDD[(T, U)] = new ZippedRDD(sc, this, other)
+  def zip[U: ClassTag](other: RDD[U]): RDD[(T, U)] = new ZippedRDD(sc, this, other)
 
   /**
    * Zip this RDD's partitions with one (or more) RDD(s) and return a new RDD by
@@ -544,20 +548,35 @@ abstract class RDD[T: ClassManifest](
    * *same number of partitions*, but does *not* require them to have the same number
    * of elements in each partition.
    */
-  def zipPartitions[B: ClassManifest, V: ClassManifest]
+  def zipPartitions[B: ClassTag, V: ClassTag]
+      (rdd2: RDD[B], preservesPartitioning: Boolean)
+      (f: (Iterator[T], Iterator[B]) => Iterator[V]): RDD[V] =
+    new ZippedPartitionsRDD2(sc, sc.clean(f), this, rdd2, preservesPartitioning)
+
+  def zipPartitions[B: ClassTag, V: ClassTag]
       (rdd2: RDD[B])
       (f: (Iterator[T], Iterator[B]) => Iterator[V]): RDD[V] =
-    new ZippedPartitionsRDD2(sc, sc.clean(f), this, rdd2)
+    new ZippedPartitionsRDD2(sc, sc.clean(f), this, rdd2, false)
+
+  def zipPartitions[B: ClassTag, C: ClassTag, V: ClassTag]
+      (rdd2: RDD[B], rdd3: RDD[C], preservesPartitioning: Boolean)
+      (f: (Iterator[T], Iterator[B], Iterator[C]) => Iterator[V]): RDD[V] =
+    new ZippedPartitionsRDD3(sc, sc.clean(f), this, rdd2, rdd3, preservesPartitioning)
 
-  def zipPartitions[B: ClassManifest, C: ClassManifest, V: ClassManifest]
+  def zipPartitions[B: ClassTag, C: ClassTag, V: ClassTag]
       (rdd2: RDD[B], rdd3: RDD[C])
       (f: (Iterator[T], Iterator[B], Iterator[C]) => Iterator[V]): RDD[V] =
-    new ZippedPartitionsRDD3(sc, sc.clean(f), this, rdd2, rdd3)
+    new ZippedPartitionsRDD3(sc, sc.clean(f), this, rdd2, rdd3, false)
 
-  def zipPartitions[B: ClassManifest, C: ClassManifest, D: ClassManifest, V: ClassManifest]
+  def zipPartitions[B: ClassTag, C: ClassTag, D: ClassTag, V: ClassTag]
+      (rdd2: RDD[B], rdd3: RDD[C], rdd4: RDD[D], preservesPartitioning: Boolean)
+      (f: (Iterator[T], Iterator[B], Iterator[C], Iterator[D]) => Iterator[V]): RDD[V] =
+    new ZippedPartitionsRDD4(sc, sc.clean(f), this, rdd2, rdd3, rdd4, preservesPartitioning)
+
+  def zipPartitions[B: ClassTag, C: ClassTag, D: ClassTag, V: ClassTag]
       (rdd2: RDD[B], rdd3: RDD[C], rdd4: RDD[D])
       (f: (Iterator[T], Iterator[B], Iterator[C], Iterator[D]) => Iterator[V]): RDD[V] =
-    new ZippedPartitionsRDD4(sc, sc.clean(f), this, rdd2, rdd3, rdd4)
+    new ZippedPartitionsRDD4(sc, sc.clean(f), this, rdd2, rdd3, rdd4, false)
 
 
   // Actions (launch a job to return a value to the user program)
@@ -592,23 +611,10 @@ abstract class RDD[T: ClassManifest](
   /**
    * Return an RDD that contains all matching values by applying `f`.
    */
-  def collect[U: ClassManifest](f: PartialFunction[T, U]): RDD[U] = {
+  def collect[U: ClassTag](f: PartialFunction[T, U]): RDD[U] = {
     filter(f.isDefinedAt).map(f)
   }
 
-  /**
-   * Return iterator that lazily fetches partitions
-   * @param prefetchPartitions How many partitions to prefetch. Larger value increases parallelism
-   *                           but also increases driver memory requirement.
-   * @param partitionBatchSize How many partitions fetch per job
-   * @param timeOut how long to wait for each partition fetch
-   * @return Iterable of every element in this RDD
-   */
-  def toIterator(prefetchPartitions: Int = 1, partitionBatchSize: Int = 10,
-                 timeOut: Duration = Duration(30, TimeUnit.SECONDS)):Iterator[T] = {
-    new RDDiterator[T](this, prefetchPartitions,partitionBatchSize, timeOut)
-  }
-
   /**
    * Return an RDD with the elements from `this` that are not in `other`.
    *
@@ -646,7 +652,8 @@ abstract class RDD[T: ClassManifest](
   }
 
   /**
-   * Reduces the elements of this RDD using the specified commutative and associative binary operator.
+   * Reduces the elements of this RDD using the specified commutative and
+   * associative binary operator.
    */
   def reduce(f: (T, T) => T): T = {
     val cleanF = sc.clean(f)
@@ -659,7 +666,7 @@ abstract class RDD[T: ClassManifest](
     }
     var jobResult: Option[T] = None
     val mergeResult = (index: Int, taskResult: Option[T]) => {
-      if (taskResult != None) {
+      if (taskResult.isDefined) {
         jobResult = jobResult match {
           case Some(value) => Some(f(value, taskResult.get))
           case None => taskResult
@@ -695,7 +702,7 @@ abstract class RDD[T: ClassManifest](
    * allowed to modify and return their first argument instead of creating a new U to avoid memory
    * allocation.
    */
-  def aggregate[U: ClassManifest](zeroValue: U)(seqOp: (U, T) => U, combOp: (U, U) => U): U = {
+  def aggregate[U: ClassTag](zeroValue: U)(seqOp: (U, T) => U, combOp: (U, U) => U): U = {
     // Clone the zero value since we will also be serializing it as part of tasks
     var jobResult = Utils.clone(zeroValue, sc.env.closureSerializer.newInstance())
     val cleanSeqOp = sc.clean(seqOp)
@@ -744,7 +751,7 @@ abstract class RDD[T: ClassManifest](
    * combine step happens locally on the master, equivalent to running a single reduce task.
    */
   def countByValue(): Map[T, Long] = {
-    if (elementClassManifest.erasure.isArray) {
+    if (elementClassTag.runtimeClass.isArray) {
       throw new SparkException("countByValue() does not support arrays")
     }
     // TODO: This should perhaps be distributed by default.
@@ -762,7 +769,7 @@ abstract class RDD[T: ClassManifest](
         val entry = iter.next()
         m1.put(entry.getKey, m1.getLong(entry.getKey) + entry.getLongValue)
       }
-      return m1
+      m1
     }
     val myResult = mapPartitions(countPartition).reduce(mergeMaps)
     myResult.asInstanceOf[java.util.Map[T, Long]]   // Will be wrapped as a Scala mutable Map
@@ -775,7 +782,7 @@ abstract class RDD[T: ClassManifest](
       timeout: Long,
       confidence: Double = 0.95
       ): PartialResult[Map[T, BoundedDouble]] = {
-    if (elementClassManifest.erasure.isArray) {
+    if (elementClassTag.runtimeClass.isArray) {
       throw new SparkException("countByValueApprox() does not support arrays")
     }
     val countPartition: (TaskContext, Iterator[T]) => OLMap[T] = { (ctx, iter) =>
@@ -790,6 +797,19 @@ abstract class RDD[T: ClassManifest](
     sc.runApproximateJob(this, countPartition, evaluator, timeout)
   }
 
+  /**
+   * Return approximate number of distinct elements in the RDD.
+   *
+   * The accuracy of approximation can be controlled through the relative standard deviation
+   * (relativeSD) parameter, which also controls the amount of memory used. Lower values result in
+   * more accurate counts but increase the memory footprint and vise versa. The default value of
+   * relativeSD is 0.05.
+   */
+  def countApproxDistinct(relativeSD: Double = 0.05): Long = {
+    val zeroCounter = new SerializableHyperLogLog(new HyperLogLog(relativeSD))
+    aggregate(zeroCounter)(_.add(_), _.merge(_)).value.cardinality()
+  }
+
   /**
    * Take the first num elements of the RDD. It works by first scanning one partition, and use the
    * results from that partition to estimate the number of additional partitions needed to satisfy
@@ -827,7 +847,7 @@ abstract class RDD[T: ClassManifest](
       partsScanned += numPartsToTry
     }
 
-    return buf.toArray
+    buf.toArray
   }
 
   /**
@@ -940,14 +960,14 @@ abstract class RDD[T: ClassManifest](
   private var storageLevel: StorageLevel = StorageLevel.NONE
 
   /** Record user function generating this RDD. */
-  @transient private[spark] val origin = Utils.formatSparkCallSite
+  @transient private[spark] val origin = sc.getCallSite()
 
-  private[spark] def elementClassManifest: ClassManifest[T] = classManifest[T]
+  private[spark] def elementClassTag: ClassTag[T] = classTag[T]
 
   private[spark] var checkpointData: Option[RDDCheckpointData[T]] = None
 
   /** Returns the first parent RDD */
-  protected[spark] def firstParent[U: ClassManifest] = {
+  protected[spark] def firstParent[U: ClassTag] = {
     dependencies.head.rdd.asInstanceOf[RDD[U]]
   }
 
@@ -1009,7 +1029,7 @@ abstract class RDD[T: ClassManifest](
     origin)
 
   def toJavaRDD() : JavaRDD[T] = {
-    new JavaRDD(this)(elementClassManifest)
+    new JavaRDD(this)(elementClassTag)
   }
 
 }
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDDCheckpointData.scala b/core/src/main/scala/org/apache/spark/rdd/RDDCheckpointData.scala
index 6009a41570ebd..bc688110f4736 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDDCheckpointData.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDDCheckpointData.scala
@@ -17,10 +17,12 @@
 
 package org.apache.spark.rdd
 
+import scala.reflect.ClassTag
+
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.conf.Configuration
 
-import org.apache.spark.{Partition, SparkException, Logging}
+import org.apache.spark.{SerializableWritable, Partition, SparkException, Logging}
 import org.apache.spark.scheduler.{ResultTask, ShuffleMapTask}
 
 /**
@@ -38,7 +40,7 @@ private[spark] object CheckpointState extends Enumeration {
  * manages the post-checkpoint state by providing the updated partitions, iterator and preferred locations
  * of the checkpointed RDD.
  */
-private[spark] class RDDCheckpointData[T: ClassManifest](rdd: RDD[T])
+private[spark] class RDDCheckpointData[T: ClassTag](@transient rdd: RDD[T])
   extends Logging with Serializable {
 
   import CheckpointState._
@@ -83,14 +85,21 @@ private[spark] class RDDCheckpointData[T: ClassManifest](rdd: RDD[T])
 
     // Create the output path for the checkpoint
     val path = new Path(rdd.context.checkpointDir.get, "rdd-" + rdd.id)
-    val fs = path.getFileSystem(new Configuration())
+    val fs = path.getFileSystem(rdd.context.hadoopConfiguration)
     if (!fs.mkdirs(path)) {
       throw new SparkException("Failed to create checkpoint path " + path)
     }
 
     // Save to file, and reload it as an RDD
-    rdd.context.runJob(rdd, CheckpointRDD.writeToFile(path.toString) _)
+    val broadcastedConf = rdd.context.broadcast(
+      new SerializableWritable(rdd.context.hadoopConfiguration))
+    rdd.context.runJob(rdd, CheckpointRDD.writeToFile(path.toString, broadcastedConf) _)
     val newRDD = new CheckpointRDD[T](rdd.context, path.toString)
+    if (newRDD.partitions.size != rdd.partitions.size) {
+      throw new SparkException(
+        "Checkpoint RDD " + newRDD + "("+ newRDD.partitions.size + ") has different " +
+          "number of partitions than original RDD " + rdd + "(" + rdd.partitions.size + ")")
+    }
 
     // Change the dependencies and partitions of the RDD
     RDDCheckpointData.synchronized {
@@ -99,8 +108,8 @@ private[spark] class RDDCheckpointData[T: ClassManifest](rdd: RDD[T])
       rdd.markCheckpointed(newRDD)   // Update the RDD's dependencies and partitions
       cpState = Checkpointed
       RDDCheckpointData.clearTaskCaches()
-      logInfo("Done checkpointing RDD " + rdd.id + ", new parent is RDD " + newRDD.id)
     }
+    logInfo("Done checkpointing RDD " + rdd.id + " to " + path + ", new parent is RDD " + newRDD.id)
   }
 
   // Get preferred location of a split after checkpointing
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDDiterator.scala b/core/src/main/scala/org/apache/spark/rdd/RDDiterator.scala
deleted file mode 100644
index 1b423a1d7e387..0000000000000
--- a/core/src/main/scala/org/apache/spark/rdd/RDDiterator.scala
+++ /dev/null
@@ -1,76 +0,0 @@
-package org.apache.spark.rdd
-
-import scala.concurrent.{Await, Future}
-import scala.collection.mutable.ArrayBuffer
-import scala.concurrent.duration.Duration
-import scala.annotation.tailrec
-import scala.collection.mutable
-import org.apache.spark.rdd.RDDiterator._
-import org.apache.spark.FutureAction
-
-/**
- * Iterable whose iterator iterates over all elements of an RDD without fetching all partitions
- * to the driver process
- *
- * @param rdd RDD to iterate
- * @param prefetchPartitions    The number of partitions to prefetch.
- *                              If <1 will not prefetch.
- *                              partitions prefetched = min(prefetchPartitions, partitionBatchSize)
- * @param partitionBatchSize    How many partitions to fetch per job
- * @param timeOut How long to wait for each partition before failing.
- */
-class RDDiterator[T: ClassManifest](rdd: RDD[T], prefetchPartitions: Int, partitionBatchSize: Int,
-                               timeOut: Duration)
-  extends Iterator[T] {
-
-  val batchSize = math.max(1,partitionBatchSize)
-  var partitionsBatches: Iterator[Seq[Int]] = Range(0, rdd.partitions.size).grouped(batchSize)
-  var pendingFetchesQueue = mutable.Queue.empty[Future[Seq[Seq[T]]]]
-  //add prefetchPartitions prefetch
-  0.until(math.max(0, prefetchPartitions / batchSize)).foreach(x=>enqueueDataFetch())
-
-  var currentIterator: Iterator[T] = Iterator.empty
-  @tailrec
-  final def hasNext = {
-    if (currentIterator.hasNext) {
-      //Still values in the current partition
-      true
-    } else {
-      //Move on to the next partition
-      //Queue new prefetch of a partition
-       enqueueDataFetch()
-      if (pendingFetchesQueue.isEmpty) {
-        //No more partitions
-        currentIterator = Iterator.empty
-        false
-      } else {
-        val future = pendingFetchesQueue.dequeue()
-        currentIterator = Await.result(future, timeOut).flatMap(x => x).iterator
-        //Next partition might be empty so check again.
-        this.hasNext
-      }
-    }
-  }
-  def next() = {
-    hasNext
-    currentIterator.next()
-  }
-
-  def enqueueDataFetch() ={
-    if (partitionsBatches.hasNext) {
-      pendingFetchesQueue.enqueue(fetchData(partitionsBatches.next(), rdd))
-    }
-  }
-}
-
-object RDDiterator {
-  private def fetchData[T: ClassManifest](partitionIndexes: Seq[Int],
-                                     rdd: RDD[T]): FutureAction[Seq[Seq[T]]] = {
-    val results = new ArrayBuffer[Seq[T]]()
-    rdd.context.submitJob[T, Array[T], Seq[Seq[T]]](rdd,
-      x => x.toArray,
-      partitionIndexes,
-      (inx: Int, res: Array[T]) => results.append(res),
-      results.toSeq)
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/rdd/SampledRDD.scala b/core/src/main/scala/org/apache/spark/rdd/SampledRDD.scala
index 2c5253ae301c3..d433670cc2b7f 100644
--- a/core/src/main/scala/org/apache/spark/rdd/SampledRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/SampledRDD.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.rdd
 
+import scala.reflect.ClassTag
 import java.util.Random
 
 import cern.jet.random.Poisson
@@ -29,9 +30,9 @@ class SampledRDDPartition(val prev: Partition, val seed: Int) extends Partition
   override val index: Int = prev.index
 }
 
-class SampledRDD[T: ClassManifest](
+class SampledRDD[T: ClassTag](
     prev: RDD[T],
-    withReplacement: Boolean, 
+    withReplacement: Boolean,
     frac: Double,
     seed: Int)
   extends RDD[T](prev) {
diff --git a/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala
index 5fe4676029d97..2d1bd5b4813c9 100644
--- a/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala
@@ -14,9 +14,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package org.apache.spark.rdd
 
+import scala.reflect.{ ClassTag, classTag}
+
 import org.apache.hadoop.mapred.JobConf
 import org.apache.hadoop.mapred.SequenceFileOutputFormat
 import org.apache.hadoop.io.compress.CompressionCodec
@@ -32,15 +33,15 @@ import org.apache.spark.Logging
  *
  * Import `org.apache.spark.SparkContext._` at the top of their program to use these functions.
  */
-class SequenceFileRDDFunctions[K <% Writable: ClassManifest, V <% Writable : ClassManifest](
+class SequenceFileRDDFunctions[K <% Writable: ClassTag, V <% Writable : ClassTag](
     self: RDD[(K, V)])
   extends Logging
   with Serializable {
 
-  private def getWritableClass[T <% Writable: ClassManifest](): Class[_ <: Writable] = {
+  private def getWritableClass[T <% Writable: ClassTag](): Class[_ <: Writable] = {
     val c = {
-      if (classOf[Writable].isAssignableFrom(classManifest[T].erasure)) {
-        classManifest[T].erasure
+      if (classOf[Writable].isAssignableFrom(classTag[T].runtimeClass)) {
+        classTag[T].runtimeClass
       } else {
         // We get the type of the Writable class by looking at the apply method which converts
         // from T to Writable. Since we have two apply methods we filter out the one which
diff --git a/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala
index a5d751a7bdce4..0ccb309d0d9d5 100644
--- a/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala
@@ -17,8 +17,10 @@
 
 package org.apache.spark.rdd
 
-import org.apache.spark.{Dependency, Partitioner, SparkEnv, ShuffleDependency, Partition, TaskContext}
+import scala.reflect.ClassTag
 
+import org.apache.spark.{Dependency, Partition, Partitioner, ShuffleDependency,
+  SparkEnv, TaskContext}
 
 private[spark] class ShuffledRDDPartition(val idx: Int) extends Partition {
   override val index = idx
@@ -32,7 +34,7 @@ private[spark] class ShuffledRDDPartition(val idx: Int) extends Partition {
  * @tparam K the key class.
  * @tparam V the value class.
  */
-class ShuffledRDD[K, V, P <: Product2[K, V] : ClassManifest](
+class ShuffledRDD[K, V, P <: Product2[K, V] : ClassTag](
     @transient var prev: RDD[P],
     part: Partitioner)
   extends RDD[P](prev.context, Nil) {
@@ -57,7 +59,7 @@ class ShuffledRDD[K, V, P <: Product2[K, V] : ClassManifest](
   override def compute(split: Partition, context: TaskContext): Iterator[P] = {
     val shuffledId = dependencies.head.asInstanceOf[ShuffleDependency[K, V]].shuffleId
     SparkEnv.get.shuffleFetcher.fetch[P](shuffledId, split.index, context,
-      SparkEnv.get.serializerManager.get(serializerClass))
+      SparkEnv.get.serializerManager.get(serializerClass, SparkEnv.get.conf))
   }
 
   override def clearDependencies() {
diff --git a/core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala
index 7af4d803e76d7..4f90c7d3d68ab 100644
--- a/core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala
@@ -18,8 +18,11 @@
 package org.apache.spark.rdd
 
 import java.util.{HashMap => JHashMap}
+
 import scala.collection.JavaConversions._
 import scala.collection.mutable.ArrayBuffer
+import scala.reflect.ClassTag
+
 import org.apache.spark.Partitioner
 import org.apache.spark.Dependency
 import org.apache.spark.TaskContext
@@ -45,7 +48,7 @@ import org.apache.spark.OneToOneDependency
  * you can use `rdd1`'s partitioner/partition size and not worry about running
  * out of memory because of the size of `rdd2`.
  */
-private[spark] class SubtractedRDD[K: ClassManifest, V: ClassManifest, W: ClassManifest](
+private[spark] class SubtractedRDD[K: ClassTag, V: ClassTag, W: ClassTag](
     @transient var rdd1: RDD[_ <: Product2[K, V]],
     @transient var rdd2: RDD[_ <: Product2[K, W]],
     part: Partitioner)
@@ -90,7 +93,7 @@ private[spark] class SubtractedRDD[K: ClassManifest, V: ClassManifest, W: ClassM
 
   override def compute(p: Partition, context: TaskContext): Iterator[(K, V)] = {
     val partition = p.asInstanceOf[CoGroupPartition]
-    val serializer = SparkEnv.get.serializerManager.get(serializerClass)
+    val serializer = SparkEnv.get.serializerManager.get(serializerClass, SparkEnv.get.conf)
     val map = new JHashMap[K, ArrayBuffer[V]]
     def getSeq(k: K): ArrayBuffer[V] = {
       val seq = map.get(k)
diff --git a/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala b/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala
index ae8a9f36a6014..08a41ac5583d5 100644
--- a/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala
@@ -18,10 +18,13 @@
 package org.apache.spark.rdd
 
 import scala.collection.mutable.ArrayBuffer
+import scala.reflect.ClassTag
+
 import org.apache.spark.{Dependency, RangeDependency, SparkContext, Partition, TaskContext}
+
 import java.io.{ObjectOutputStream, IOException}
 
-private[spark] class UnionPartition[T: ClassManifest](idx: Int, rdd: RDD[T], splitIndex: Int)
+private[spark] class UnionPartition[T: ClassTag](idx: Int, rdd: RDD[T], splitIndex: Int)
   extends Partition {
 
   var split: Partition = rdd.partitions(splitIndex)
@@ -40,7 +43,7 @@ private[spark] class UnionPartition[T: ClassManifest](idx: Int, rdd: RDD[T], spl
   }
 }
 
-class UnionRDD[T: ClassManifest](
+class UnionRDD[T: ClassTag](
     sc: SparkContext,
     @transient var rdds: Seq[RDD[T]])
   extends RDD[T](sc, Nil) {  // Nil since we implement getDependencies
diff --git a/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala
index 31e6fd519d0dd..83be3c6eb40c4 100644
--- a/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala
@@ -19,10 +19,12 @@ package org.apache.spark.rdd
 
 import org.apache.spark.{OneToOneDependency, SparkContext, Partition, TaskContext}
 import java.io.{ObjectOutputStream, IOException}
+import scala.reflect.ClassTag
 
 private[spark] class ZippedPartitionsPartition(
     idx: Int,
-    @transient rdds: Seq[RDD[_]])
+    @transient rdds: Seq[RDD[_]],
+    @transient val preferredLocations: Seq[String])
   extends Partition {
 
   override val index: Int = idx
@@ -37,33 +39,31 @@ private[spark] class ZippedPartitionsPartition(
   }
 }
 
-abstract class ZippedPartitionsBaseRDD[V: ClassManifest](
+abstract class ZippedPartitionsBaseRDD[V: ClassTag](
     sc: SparkContext,
-    var rdds: Seq[RDD[_]])
+    var rdds: Seq[RDD[_]],
+    preservesPartitioning: Boolean = false)
   extends RDD[V](sc, rdds.map(x => new OneToOneDependency(x))) {
 
+  override val partitioner =
+    if (preservesPartitioning) firstParent[Any].partitioner else None
+
   override def getPartitions: Array[Partition] = {
-    val sizes = rdds.map(x => x.partitions.size)
-    if (!sizes.forall(x => x == sizes(0))) {
+    val numParts = rdds.head.partitions.size
+    if (!rdds.forall(rdd => rdd.partitions.size == numParts)) {
       throw new IllegalArgumentException("Can't zip RDDs with unequal numbers of partitions")
     }
-    val array = new Array[Partition](sizes(0))
-    for (i <- 0 until sizes(0)) {
-      array(i) = new ZippedPartitionsPartition(i, rdds)
+    Array.tabulate[Partition](numParts) { i =>
+      val prefs = rdds.map(rdd => rdd.preferredLocations(rdd.partitions(i)))
+      // Check whether there are any hosts that match all RDDs; otherwise return the union
+      val exactMatchLocations = prefs.reduce((x, y) => x.intersect(y))
+      val locs = if (!exactMatchLocations.isEmpty) exactMatchLocations else prefs.flatten.distinct
+      new ZippedPartitionsPartition(i, rdds, locs)
     }
-    array
   }
 
   override def getPreferredLocations(s: Partition): Seq[String] = {
-    val parts = s.asInstanceOf[ZippedPartitionsPartition].partitions
-    val prefs = rdds.zip(parts).map { case (rdd, p) => rdd.preferredLocations(p) }
-    // Check whether there are any hosts that match all RDDs; otherwise return the union
-    val exactMatchLocations = prefs.reduce((x, y) => x.intersect(y))
-    if (!exactMatchLocations.isEmpty) {
-      exactMatchLocations
-    } else {
-      prefs.flatten.distinct
-    }
+    s.asInstanceOf[ZippedPartitionsPartition].preferredLocations
   }
 
   override def clearDependencies() {
@@ -72,12 +72,13 @@ abstract class ZippedPartitionsBaseRDD[V: ClassManifest](
   }
 }
 
-class ZippedPartitionsRDD2[A: ClassManifest, B: ClassManifest, V: ClassManifest](
+class ZippedPartitionsRDD2[A: ClassTag, B: ClassTag, V: ClassTag](
     sc: SparkContext,
     f: (Iterator[A], Iterator[B]) => Iterator[V],
     var rdd1: RDD[A],
-    var rdd2: RDD[B])
-  extends ZippedPartitionsBaseRDD[V](sc, List(rdd1, rdd2)) {
+    var rdd2: RDD[B],
+    preservesPartitioning: Boolean = false)
+  extends ZippedPartitionsBaseRDD[V](sc, List(rdd1, rdd2), preservesPartitioning) {
 
   override def compute(s: Partition, context: TaskContext): Iterator[V] = {
     val partitions = s.asInstanceOf[ZippedPartitionsPartition].partitions
@@ -92,13 +93,14 @@ class ZippedPartitionsRDD2[A: ClassManifest, B: ClassManifest, V: ClassManifest]
 }
 
 class ZippedPartitionsRDD3
-  [A: ClassManifest, B: ClassManifest, C: ClassManifest, V: ClassManifest](
+  [A: ClassTag, B: ClassTag, C: ClassTag, V: ClassTag](
     sc: SparkContext,
     f: (Iterator[A], Iterator[B], Iterator[C]) => Iterator[V],
     var rdd1: RDD[A],
     var rdd2: RDD[B],
-    var rdd3: RDD[C])
-  extends ZippedPartitionsBaseRDD[V](sc, List(rdd1, rdd2, rdd3)) {
+    var rdd3: RDD[C],
+    preservesPartitioning: Boolean = false)
+  extends ZippedPartitionsBaseRDD[V](sc, List(rdd1, rdd2, rdd3), preservesPartitioning) {
 
   override def compute(s: Partition, context: TaskContext): Iterator[V] = {
     val partitions = s.asInstanceOf[ZippedPartitionsPartition].partitions
@@ -116,14 +118,15 @@ class ZippedPartitionsRDD3
 }
 
 class ZippedPartitionsRDD4
-  [A: ClassManifest, B: ClassManifest, C: ClassManifest, D:ClassManifest, V: ClassManifest](
+  [A: ClassTag, B: ClassTag, C: ClassTag, D:ClassTag, V: ClassTag](
     sc: SparkContext,
     f: (Iterator[A], Iterator[B], Iterator[C], Iterator[D]) => Iterator[V],
     var rdd1: RDD[A],
     var rdd2: RDD[B],
     var rdd3: RDD[C],
-    var rdd4: RDD[D])
-  extends ZippedPartitionsBaseRDD[V](sc, List(rdd1, rdd2, rdd3, rdd4)) {
+    var rdd4: RDD[D],
+    preservesPartitioning: Boolean = false)
+  extends ZippedPartitionsBaseRDD[V](sc, List(rdd1, rdd2, rdd3, rdd4), preservesPartitioning) {
 
   override def compute(s: Partition, context: TaskContext): Iterator[V] = {
     val partitions = s.asInstanceOf[ZippedPartitionsPartition].partitions
diff --git a/core/src/main/scala/org/apache/spark/rdd/ZippedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ZippedRDD.scala
index 567b67dfee0bf..fb5b070c18d43 100644
--- a/core/src/main/scala/org/apache/spark/rdd/ZippedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ZippedRDD.scala
@@ -18,10 +18,12 @@
 package org.apache.spark.rdd
 
 import org.apache.spark.{OneToOneDependency, SparkContext, Partition, TaskContext}
+
 import java.io.{ObjectOutputStream, IOException}
 
+import scala.reflect.ClassTag
 
-private[spark] class ZippedPartition[T: ClassManifest, U: ClassManifest](
+private[spark] class ZippedPartition[T: ClassTag, U: ClassTag](
     idx: Int,
     @transient rdd1: RDD[T],
     @transient rdd2: RDD[U]
@@ -42,7 +44,7 @@ private[spark] class ZippedPartition[T: ClassManifest, U: ClassManifest](
   }
 }
 
-class ZippedRDD[T: ClassManifest, U: ClassManifest](
+class ZippedRDD[T: ClassTag, U: ClassTag](
     sc: SparkContext,
     var rdd1: RDD[T],
     var rdd2: RDD[U])
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index f34e98f86b86b..536d84f07e5ec 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -19,10 +19,13 @@ package org.apache.spark.scheduler
 
 import java.io.NotSerializableException
 import java.util.Properties
-import java.util.concurrent.{LinkedBlockingQueue, TimeUnit}
 import java.util.concurrent.atomic.AtomicInteger
 
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, Map}
+import scala.concurrent.duration._
+import scala.reflect.ClassTag
+
+import akka.actor._
 
 import org.apache.spark._
 import org.apache.spark.rdd.RDD
@@ -52,24 +55,25 @@ import org.apache.spark.util.{MetadataCleaner, MetadataCleanerType, TimeStampedH
 private[spark]
 class DAGScheduler(
     taskSched: TaskScheduler,
-    mapOutputTracker: MapOutputTracker,
+    mapOutputTracker: MapOutputTrackerMaster,
     blockManagerMaster: BlockManagerMaster,
     env: SparkEnv)
   extends Logging {
 
   def this(taskSched: TaskScheduler) {
-    this(taskSched, SparkEnv.get.mapOutputTracker, SparkEnv.get.blockManager.master, SparkEnv.get)
+    this(taskSched, SparkEnv.get.mapOutputTracker.asInstanceOf[MapOutputTrackerMaster],
+      SparkEnv.get.blockManager.master, SparkEnv.get)
   }
   taskSched.setDAGScheduler(this)
 
   // Called by TaskScheduler to report task's starting.
   def taskStarted(task: Task[_], taskInfo: TaskInfo) {
-    eventQueue.put(BeginEvent(task, taskInfo))
+    eventProcessActor ! BeginEvent(task, taskInfo)
   }
 
   // Called to report that a task has completed and results are being fetched remotely.
   def taskGettingResult(task: Task[_], taskInfo: TaskInfo) {
-    eventQueue.put(GettingResultEvent(task, taskInfo))
+    eventProcessActor ! GettingResultEvent(task, taskInfo)
   }
 
   // Called by TaskScheduler to report task completions or failures.
@@ -80,35 +84,38 @@ class DAGScheduler(
       accumUpdates: Map[Long, Any],
       taskInfo: TaskInfo,
       taskMetrics: TaskMetrics) {
-    eventQueue.put(CompletionEvent(task, reason, result, accumUpdates, taskInfo, taskMetrics))
+    eventProcessActor ! CompletionEvent(task, reason, result, accumUpdates, taskInfo, taskMetrics)
   }
 
   // Called by TaskScheduler when an executor fails.
   def executorLost(execId: String) {
-    eventQueue.put(ExecutorLost(execId))
+    eventProcessActor ! ExecutorLost(execId)
   }
 
   // Called by TaskScheduler when a host is added
   def executorGained(execId: String, host: String) {
-    eventQueue.put(ExecutorGained(execId, host))
+    eventProcessActor ! ExecutorGained(execId, host)
   }
 
   // Called by TaskScheduler to cancel an entire TaskSet due to either repeated failures or
   // cancellation of the job itself.
   def taskSetFailed(taskSet: TaskSet, reason: String) {
-    eventQueue.put(TaskSetFailed(taskSet, reason))
+    eventProcessActor ! TaskSetFailed(taskSet, reason)
   }
 
   // The time, in millis, to wait for fetch failure events to stop coming in after one is detected;
   // this is a simplistic way to avoid resubmitting tasks in the non-fetchable map stage one by one
   // as more failure events come in
-  val RESUBMIT_TIMEOUT = 50L
+  val RESUBMIT_TIMEOUT = 200.milliseconds
 
   // The time, in millis, to wake up between polls of the completion queue in order to potentially
   // resubmit failed stages
   val POLL_TIMEOUT = 10L
 
-  private val eventQueue = new LinkedBlockingQueue[DAGSchedulerEvent]
+  // Warns the user if a stage contains a task with size greater than this value (in KB)
+  val TASK_SIZE_TO_WARN = 100
+
+  private var eventProcessActor: ActorRef = _
 
   private[scheduler] val nextJobId = new AtomicInteger(0)
 
@@ -126,7 +133,8 @@ class DAGScheduler(
 
   private[spark] val stageToInfos = new TimeStampedHashMap[Stage, StageInfo]
 
-  private[spark] val listenerBus = new SparkListenerBus()
+  // An async scheduler event bus. The bus should be stopped when DAGSCheduler is stopped.
+  private[spark] val listenerBus = new SparkListenerBus
 
   // Contains the locations that each RDD's partitions are cached on
   private val cacheLocs = new HashMap[Int, Array[Seq[TaskLocation]]]
@@ -145,22 +153,47 @@ class DAGScheduler(
   val waiting = new HashSet[Stage] // Stages we need to run whose parents aren't done
   val running = new HashSet[Stage] // Stages we are running right now
   val failed = new HashSet[Stage]  // Stages that must be resubmitted due to fetch failures
-  val pendingTasks = new TimeStampedHashMap[Stage, HashSet[Task[_]]] // Missing tasks from each stage
-  var lastFetchFailureTime: Long = 0  // Used to wait a bit to avoid repeated resubmits
+  // Missing tasks from each stage
+  val pendingTasks = new TimeStampedHashMap[Stage, HashSet[Task[_]]]
 
   val activeJobs = new HashSet[ActiveJob]
   val resultStageToJob = new HashMap[Stage, ActiveJob]
 
-  val metadataCleaner = new MetadataCleaner(MetadataCleanerType.DAG_SCHEDULER, this.cleanup)
+  val metadataCleaner = new MetadataCleaner(
+    MetadataCleanerType.DAG_SCHEDULER, this.cleanup, env.conf)
 
-  // Start a thread to run the DAGScheduler event loop
+  /**
+   * Starts the event processing actor.  The actor has two responsibilities:
+   *
+   * 1. Waits for events like job submission, task finished, task failure etc., and calls
+   *    [[org.apache.spark.scheduler.DAGScheduler.processEvent()]] to process them.
+   * 2. Schedules a periodical task to resubmit failed stages.
+   *
+   * NOTE: the actor cannot be started in the constructor, because the periodical task references
+   * some internal states of the enclosing [[org.apache.spark.scheduler.DAGScheduler]] object, thus
+   * cannot be scheduled until the [[org.apache.spark.scheduler.DAGScheduler]] is fully constructed.
+   */
   def start() {
-    new Thread("DAGScheduler") {
-      setDaemon(true)
-      override def run() {
-        DAGScheduler.this.run()
+    eventProcessActor = env.actorSystem.actorOf(Props(new Actor {
+      /**
+       * The main event loop of the DAG scheduler.
+       */
+      def receive = {
+        case event: DAGSchedulerEvent =>
+          logTrace("Got event of type " + event.getClass.getName)
+
+          /**
+           * All events are forwarded to `processEvent()`, so that the event processing logic can
+           * easily tested without starting a dedicated actor.  Please refer to `DAGSchedulerSuite`
+           * for details.
+           */
+          if (!processEvent(event)) {
+            submitWaitingStages()
+          } else {
+            context.stop(self)
+          }
       }
-    }.start()
+    }))
   }
 
   def addSparkListener(listener: SparkListener) {
@@ -191,7 +224,8 @@ class DAGScheduler(
     shuffleToMapStage.get(shuffleDep.shuffleId) match {
       case Some(stage) => stage
       case None =>
-        val stage = newOrUsedStage(shuffleDep.rdd, shuffleDep.rdd.partitions.size, shuffleDep, jobId)
+        val stage =
+          newOrUsedStage(shuffleDep.rdd, shuffleDep.rdd.partitions.size, shuffleDep, jobId)
         shuffleToMapStage(shuffleDep.shuffleId) = stage
         stage
     }
@@ -200,7 +234,8 @@ class DAGScheduler(
   /**
    * Create a Stage -- either directly for use as a result stage, or as part of the (re)-creation
    * of a shuffle map stage in newOrUsedStage.  The stage will be associated with the provided
-   * jobId. Production of shuffle map stages should always use newOrUsedStage, not newStage directly.
+   * jobId. Production of shuffle map stages should always use newOrUsedStage, not newStage
+   * directly.
    */
   private def newStage(
       rdd: RDD[_],
@@ -216,8 +251,6 @@ class DAGScheduler(
     stageIdToStage(id) = stage
     updateJobIdStageIdMaps(jobId, stage)
     stageToInfos(stage) = new StageInfo(stage)
-    val stageIdSet = jobIdToStageIds.getOrElseUpdate(jobId, new HashSet)
-    stageIdSet += id
     stage
   }
 
@@ -237,8 +270,8 @@ class DAGScheduler(
   {
     val stage = newStage(rdd, numTasks, Some(shuffleDep), jobId, callSite)
     if (mapOutputTracker.has(shuffleDep.shuffleId)) {
-      val serLocs = mapOutputTracker.getSerializedLocations(shuffleDep.shuffleId)
-      val locs = mapOutputTracker.deserializeStatuses(serLocs)
+      val serLocs = mapOutputTracker.getSerializedMapOutputStatuses(shuffleDep.shuffleId)
+      val locs = MapOutputTracker.deserializeMapStatuses(serLocs)
       for (i <- 0 until locs.size) {
         stage.outputLocs(i) = Option(locs(i)).toList   // locs(i) will be null if missing
       }
@@ -314,7 +347,8 @@ class DAGScheduler(
         stageIdToJobIds.getOrElseUpdate(s.id, new HashSet[Int]()) += jobId
         jobIdToStageIds.getOrElseUpdate(jobId, new HashSet[Int]()) += s.id
         val parents = getParentStages(s.rdd, jobId)
-        val parentsWithoutThisJobId = parents.filter(p => !stageIdToJobIds.get(p.id).exists(_.contains(jobId)))
+        val parentsWithoutThisJobId = parents.filter(p =>
+          !stageIdToJobIds.get(p.id).exists(_.contains(jobId)))
         updateJobIdStageIdMapsList(parentsWithoutThisJobId ++ stages.tail)
       }
     }
@@ -322,8 +356,9 @@ class DAGScheduler(
   }
 
   /**
-   * Removes job and any stages that are not needed by any other job.  Returns the set of ids for stages that
-   * were removed.  The associated tasks for those stages need to be cancelled if we got here via job cancellation.
+   * Removes job and any stages that are not needed by any other job.  Returns the set of ids for
+   * stages that were removed.  The associated tasks for those stages need to be cancelled if we
+   * got here via job cancellation.
    */
   private def removeJobAndIndependentStages(jobId: Int): Set[Int] = {
     val registeredStages = jobIdToStageIds(jobId)
@@ -334,7 +369,8 @@ class DAGScheduler(
       stageIdToJobIds.filterKeys(stageId => registeredStages.contains(stageId)).foreach {
         case (stageId, jobSet) =>
           if (!jobSet.contains(jobId)) {
-            logError("Job %d not registered for stage %d even though that stage was registered for the job"
+            logError(
+              "Job %d not registered for stage %d even though that stage was registered for the job"
               .format(jobId, stageId))
           } else {
             def removeStage(stageId: Int) {
@@ -365,7 +401,8 @@ class DAGScheduler(
               stageIdToStage -= stageId
               stageIdToJobIds -= stageId
 
-              logDebug("After removal of stage %d, remaining stages = %d".format(stageId, stageIdToStage.size))
+              logDebug("After removal of stage %d, remaining stages = %d"
+                .format(stageId, stageIdToStage.size))
             }
 
             jobSet -= jobId
@@ -417,12 +454,12 @@ class DAGScheduler(
     assert(partitions.size > 0)
     val func2 = func.asInstanceOf[(TaskContext, Iterator[_]) => _]
     val waiter = new JobWaiter(this, jobId, partitions.size, resultHandler)
-    eventQueue.put(JobSubmitted(jobId, rdd, func2, partitions.toArray, allowLocal, callSite,
-      waiter, properties))
+    eventProcessActor ! JobSubmitted(
+      jobId, rdd, func2, partitions.toArray, allowLocal, callSite, waiter, properties)
     waiter
   }
 
-  def runJob[T, U: ClassManifest](
+  def runJob[T, U: ClassTag](
       rdd: RDD[T],
       func: (TaskContext, Iterator[T]) => U,
       partitions: Seq[Int],
@@ -453,8 +490,8 @@ class DAGScheduler(
     val func2 = func.asInstanceOf[(TaskContext, Iterator[_]) => _]
     val partitions = (0 until rdd.partitions.size).toArray
     val jobId = nextJobId.getAndIncrement()
-    eventQueue.put(JobSubmitted(jobId, rdd, func2, partitions, allowLocal = false, callSite,
-      listener, properties))
+    eventProcessActor ! JobSubmitted(
+      jobId, rdd, func2, partitions, allowLocal = false, callSite, listener, properties)
     listener.awaitResult()    // Will throw an exception if the job fails
   }
 
@@ -463,32 +500,34 @@ class DAGScheduler(
    */
   def cancelJob(jobId: Int) {
     logInfo("Asked to cancel job " + jobId)
-    eventQueue.put(JobCancelled(jobId))
+    eventProcessActor ! JobCancelled(jobId)
   }
 
   def cancelJobGroup(groupId: String) {
     logInfo("Asked to cancel job group " + groupId)
-    eventQueue.put(JobGroupCancelled(groupId))
+    eventProcessActor ! JobGroupCancelled(groupId)
   }
 
   /**
    * Cancel all jobs that are running or waiting in the queue.
    */
   def cancelAllJobs() {
-    eventQueue.put(AllJobsCancelled)
+    eventProcessActor ! AllJobsCancelled
   }
 
   /**
-   * Process one event retrieved from the event queue.
-   * Returns true if we should stop the event loop.
+   * Process one event retrieved from the event processing actor.
+   *
+   * @param event The event to be processed.
+   * @return `true` if we should stop the event loop.
    */
   private[scheduler] def processEvent(event: DAGSchedulerEvent): Boolean = {
     event match {
       case JobSubmitted(jobId, rdd, func, partitions, allowLocal, callSite, listener, properties) =>
         var finalStage: Stage = null
         try {
-          // New stage creation at times and if its not protected, the scheduler thread is killed. 
-          // e.g. it can fail when jobs are run on HadoopRDD whose underlying hdfs files have been deleted
+          // New stage creation may throw an exception if, for example, jobs are run on a HadoopRDD
+          // whose underlying HDFS files have been deleted.
           finalStage = newStage(rdd, partitions.size, None, jobId, Some(callSite))
         } catch {
           case e: Exception =>
@@ -521,7 +560,8 @@ class DAGScheduler(
       case JobGroupCancelled(groupId) =>
         // Cancel all jobs belonging to this job group.
         // First finds all active jobs with this group id, and then kill stages for them.
-        val activeInGroup = activeJobs.filter(groupId == _.properties.get(SparkContext.SPARK_JOB_GROUP_ID))
+        val activeInGroup = activeJobs.filter(activeJob =>
+          groupId == activeJob.properties.get(SparkContext.SPARK_JOB_GROUP_ID))
         val jobIds = activeInGroup.map(_.jobId)
         jobIds.foreach { handleJobCancellation }
 
@@ -538,6 +578,19 @@ class DAGScheduler(
         handleExecutorLost(execId)
 
       case BeginEvent(task, taskInfo) =>
+        for (
+          job <- idToActiveJob.get(task.stageId);
+          stage <- stageIdToStage.get(task.stageId);
+          stageInfo <- stageToInfos.get(stage)
+        ) {
+          if (taskInfo.serializedSize > TASK_SIZE_TO_WARN * 1024 &&
+              !stageInfo.emittedTaskSizeWarning) {
+            stageInfo.emittedTaskSizeWarning = true
+            logWarning(("Stage %d (%s) contains a task of very large " +
+              "size (%d KB). The maximum recommended task size is %d KB.").format(
+              task.stageId, stageInfo.name, taskInfo.serializedSize / 1024, TASK_SIZE_TO_WARN))
+          }
+        }
         listenerBus.post(SparkListenerTaskStart(task, taskInfo))
 
       case GettingResultEvent(task, taskInfo) =>
@@ -550,6 +603,13 @@ class DAGScheduler(
       case TaskSetFailed(taskSet, reason) =>
         stageIdToStage.get(taskSet.stageId).foreach { abortStage(_, reason) }
 
+      case ResubmitFailedStages =>
+        if (failed.size > 0) {
+          // Failed stages may be removed by job cancellation, so failed might be empty even if
+          // the ResubmitFailedStages event has been scheduled.
+          resubmitFailedStages()
+        }
+
       case StopDAGScheduler =>
         // Cancel any active jobs
         for (job <- activeJobs) {
@@ -594,42 +654,6 @@ class DAGScheduler(
     }
   }
 
-
-  /**
-   * The main event loop of the DAG scheduler, which waits for new-job / task-finished / failure
-   * events and responds by launching tasks. This runs in a dedicated thread and receives events
-   * via the eventQueue.
-   */
-  private def run() {
-    SparkEnv.set(env)
-
-    while (true) {
-      val event = eventQueue.poll(POLL_TIMEOUT, TimeUnit.MILLISECONDS)
-      if (event != null) {
-        logDebug("Got event of type " + event.getClass.getName)
-      }
-      this.synchronized { // needed in case other threads makes calls into methods of this class
-        if (event != null) {
-          if (processEvent(event)) {
-            return
-          }
-        }
-
-        val time = System.currentTimeMillis() // TODO: use a pluggable clock for testability
-        // Periodically resubmit failed stages if some map output fetches have failed and we have
-        // waited at least RESUBMIT_TIMEOUT. We wait for this short time because when a node fails,
-        // tasks on many other nodes are bound to get a fetch failure, and they won't all get it at
-        // the same time, so we want to make sure we've identified all the reduce tasks that depend
-        // on the failed node.
-        if (failed.size > 0 && time > lastFetchFailureTime + RESUBMIT_TIMEOUT) {
-          resubmitFailedStages()
-        } else {
-          submitWaitingStages()
-        }
-      }
-    }
-  }
-
   /**
    * Run a job on an RDD locally, assuming it has only a single partition and no dependencies.
    * We run the operation in a separate thread just in case it takes a bunch of time, so that we
@@ -778,10 +802,12 @@ class DAGScheduler(
    */
   private def handleTaskCompletion(event: CompletionEvent) {
     val task = event.task
-    val stage = stageIdToStage.getOrElse(task.stageId, {
-      logInfo("Redundant task %s completed for stage %s.".format(task, task.stageId))
+
+    if (!stageIdToStage.contains(task.stageId)) {
+      // Skip all the actions if the stage has been cancelled.
       return
-    })
+    }
+    val stage = stageIdToStage(task.stageId)
 
     def markStageAsFinished(stage: Stage) = {
       val serviceTime = stageToInfos(stage).submissionTime match {
@@ -790,7 +816,7 @@ class DAGScheduler(
       }
       logInfo("%s (%s) finished in %s s".format(stage, stage.name, serviceTime))
       stageToInfos(stage).completionTime = Some(System.currentTimeMillis())
-      listenerBus.post(StageCompleted(stageToInfos(stage)))
+      listenerBus.post(SparkListenerStageCompleted(stageToInfos(stage)))
       running -= stage
     }
     event.reason match {
@@ -838,7 +864,7 @@ class DAGScheduler(
               logInfo("running: " + running)
               logInfo("waiting: " + waiting)
               logInfo("failed: " + failed)
-              if (stage.shuffleDep != None) {
+              if (stage.shuffleDep.isDefined) {
                 // We supply true to increment the epoch number here in case this is a
                 // recomputation of the map outputs. In that case, some nodes may have cached
                 // locations with holes (from when we detected the error) and will need the
@@ -885,13 +911,11 @@ class DAGScheduler(
 
       case FetchFailed(bmAddress, shuffleId, mapId, reduceId) =>
         // Mark the stage that the reducer was in as unrunnable
-        stageIdToStage.get(task.stageId).foreach { failedStage =>
-          running -= failedStage
-          failed += failedStage
-          // TODO: Cancel running tasks in the stage
-          logInfo("Marking " + failedStage + " (" + failedStage.name +
-            ") for resubmision due to a fetch failure")
-        }
+        val failedStage = stageIdToStage(task.stageId)
+        running -= failedStage
+        // TODO: Cancel running tasks in the stage
+        logInfo("Marking " + failedStage + " (" + failedStage.name +
+          ") for resubmision due to a fetch failure")
         // Mark the map whose fetch failed as broken in the map stage
         val mapStage = shuffleToMapStage(shuffleId)
         if (mapId != -1) {
@@ -900,10 +924,16 @@ class DAGScheduler(
         }
         logInfo("The failed fetch was from " + mapStage + " (" + mapStage.name +
           "); marking it for resubmission")
+        if (failed.isEmpty && eventProcessActor != null) {
+          // Don't schedule an event to resubmit failed stages if failed isn't empty, because
+          // in that case the event will already have been scheduled. eventProcessActor may be
+          // null during unit tests.
+          import env.actorSystem.dispatcher
+          env.actorSystem.scheduler.scheduleOnce(
+            RESUBMIT_TIMEOUT, eventProcessActor, ResubmitFailedStages)
+        }
+        failed += failedStage
         failed += mapStage
-        // Remember that a fetch failed now; this is used to resubmit the broken
-        // stages later, after a small wait (to give other tasks the chance to fail)
-        lastFetchFailureTime = System.currentTimeMillis() // TODO: Use pluggable clock
         // TODO: mark the executor as failed only if there were lots of fetch failures on it
         if (bmAddress != null) {
           handleExecutorLost(bmAddress.executorId, Some(task.epoch))
@@ -916,8 +946,8 @@ class DAGScheduler(
         // Do nothing here; the TaskScheduler handles these failures and resubmits the task.
 
       case other =>
-        // Unrecognized failure - abort all jobs depending on this stage
-        abortStage(stageIdToStage(task.stageId), task + " failed: " + other)
+        // Unrecognized failure - also do nothing. If the task fails repeatedly, the TaskScheduler
+        // will abort the job.
     }
   }
 
@@ -1051,7 +1081,7 @@ class DAGScheduler(
     // If the RDD has narrow dependencies, pick the first partition of the first narrow dep
     // that has any placement preferences. Ideally we would choose based on transfer sizes,
     // but this will do for now.
-    rdd.dependencies.foreach(_ match {
+    rdd.dependencies.foreach {
       case n: NarrowDependency[_] =>
         for (inPart <- n.getParents(partition)) {
           val locs = getPreferredLocs(n.rdd, inPart)
@@ -1059,7 +1089,7 @@ class DAGScheduler(
             return locs
         }
       case _ =>
-    })
+    }
     Nil
   }
 
@@ -1078,22 +1108,12 @@ class DAGScheduler(
     }}
   }
 
-  def removeStages(job: ActiveJob) = {
-    jobIdToStageIds(job.jobId).foreach(stageId => {
-      stageIdToStage.get(stageId).map( stage => {
-        pendingTasks -= stage
-        waiting -= stage
-        running -= stage
-        failed -= stage
-      })
-      stageIdToStage -= stageId
-    })
-    jobIdToStageIds -= job.jobId
-  }
-
   def stop() {
-    eventQueue.put(StopDAGScheduler)
+    if (eventProcessActor != null) {
+      eventProcessActor ! StopDAGScheduler
+    }
     metadataCleaner.cancel()
     taskSched.stop()
+    listenerBus.stop()
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerEvent.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerEvent.scala
index 9713a889cda45..add11876130b1 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerEvent.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerEvent.scala
@@ -72,4 +72,6 @@ private[scheduler] case class ExecutorLost(execId: String) extends DAGSchedulerE
 private[scheduler]
 case class TaskSetFailed(taskSet: TaskSet, reason: String) extends DAGSchedulerEvent
 
+private[scheduler] case object ResubmitFailedStages extends DAGSchedulerEvent
+
 private[scheduler] case object StopDAGScheduler extends DAGSchedulerEvent
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerSource.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerSource.scala
index 9fe70028c5b6b..7b5c0e29ad840 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerSource.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerSource.scala
@@ -27,23 +27,23 @@ private[spark] class DAGSchedulerSource(val dagScheduler: DAGScheduler, sc: Spar
   val metricRegistry = new MetricRegistry()
   val sourceName = "%s.DAGScheduler".format(sc.appName)
 
-  metricRegistry.register(MetricRegistry.name("stage", "failedStages", "number"), new Gauge[Int] {
+  metricRegistry.register(MetricRegistry.name("stage", "failedStages"), new Gauge[Int] {
     override def getValue: Int = dagScheduler.failed.size
   })
 
-  metricRegistry.register(MetricRegistry.name("stage", "runningStages", "number"), new Gauge[Int] {
+  metricRegistry.register(MetricRegistry.name("stage", "runningStages"), new Gauge[Int] {
     override def getValue: Int = dagScheduler.running.size
   })
 
-  metricRegistry.register(MetricRegistry.name("stage", "waitingStages", "number"), new Gauge[Int] {
+  metricRegistry.register(MetricRegistry.name("stage", "waitingStages"), new Gauge[Int] {
     override def getValue: Int = dagScheduler.waiting.size
   })
 
-  metricRegistry.register(MetricRegistry.name("job", "allJobs", "number"), new Gauge[Int] {
+  metricRegistry.register(MetricRegistry.name("job", "allJobs"), new Gauge[Int] {
     override def getValue: Int = dagScheduler.numTotalJobs
   })
 
-  metricRegistry.register(MetricRegistry.name("job", "activeJobs", "number"), new Gauge[Int] {
+  metricRegistry.register(MetricRegistry.name("job", "activeJobs"), new Gauge[Int] {
     override def getValue: Int = dagScheduler.activeJobs.size
   })
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorLossReason.scala b/core/src/main/scala/org/apache/spark/scheduler/ExecutorLossReason.scala
similarity index 96%
rename from core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorLossReason.scala
rename to core/src/main/scala/org/apache/spark/scheduler/ExecutorLossReason.scala
index 5077b2b48b574..2bc43a9186449 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorLossReason.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ExecutorLossReason.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.scheduler.cluster
+package org.apache.spark.scheduler
 
 import org.apache.spark.executor.ExecutorExitCode
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala
index 1791ee660db02..cc10cc0849bc7 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala
@@ -32,7 +32,7 @@ import scala.collection.JavaConversions._
 /**
  * Parses and holds information about inputFormat (and files) specified as a parameter.
  */
-class InputFormatInfo(val configuration: Configuration, val inputFormatClazz: Class[_], 
+class InputFormatInfo(val configuration: Configuration, val inputFormatClazz: Class[_],
                       val path: String) extends Logging {
 
   var mapreduceInputFormat: Boolean = false
@@ -40,7 +40,7 @@ class InputFormatInfo(val configuration: Configuration, val inputFormatClazz: Cl
 
   validate()
 
-  override def toString(): String = {
+  override def toString: String = {
     "InputFormatInfo " + super.toString + " .. inputFormatClazz " + inputFormatClazz + ", path : " + path
   }
 
@@ -103,7 +103,7 @@ class InputFormatInfo(val configuration: Configuration, val inputFormatClazz: Cl
       retval ++= SplitInfo.toSplitInfo(inputFormatClazz, path, split)
     }
 
-    return retval.toSet
+    retval.toSet
   }
 
   // This method does not expect failures, since validate has already passed ...
@@ -121,18 +121,18 @@ class InputFormatInfo(val configuration: Configuration, val inputFormatClazz: Cl
         elem => retval ++= SplitInfo.toSplitInfo(inputFormatClazz, path, elem)
     )
 
-    return retval.toSet
+    retval.toSet
    }
 
   private def findPreferredLocations(): Set[SplitInfo] = {
-    logDebug("mapreduceInputFormat : " + mapreduceInputFormat + ", mapredInputFormat : " + mapredInputFormat + 
+    logDebug("mapreduceInputFormat : " + mapreduceInputFormat + ", mapredInputFormat : " + mapredInputFormat +
       ", inputFormatClazz : " + inputFormatClazz)
     if (mapreduceInputFormat) {
-      return prefLocsFromMapreduceInputFormat()
+      prefLocsFromMapreduceInputFormat()
     }
     else {
       assert(mapredInputFormat)
-      return prefLocsFromMapredInputFormat()
+      prefLocsFromMapredInputFormat()
     }
   }
 }
@@ -143,14 +143,14 @@ class InputFormatInfo(val configuration: Configuration, val inputFormatClazz: Cl
 object InputFormatInfo {
   /**
     Computes the preferred locations based on input(s) and returned a location to block map.
-    Typical use of this method for allocation would follow some algo like this 
-    (which is what we currently do in YARN branch) :
+    Typical use of this method for allocation would follow some algo like this:
+
     a) For each host, count number of splits hosted on that host.
     b) Decrement the currently allocated containers on that host.
     c) Compute rack info for each host and update rack -> count map based on (b).
     d) Allocate nodes based on (c)
-    e) On the allocation result, ensure that we dont allocate "too many" jobs on a single node 
-       (even if data locality on that is very high) : this is to prevent fragility of job if a single 
+    e) On the allocation result, ensure that we dont allocate "too many" jobs on a single node
+       (even if data locality on that is very high) : this is to prevent fragility of job if a single
        (or small set of) hosts go down.
 
     go to (a) until required nodes are allocated.
diff --git a/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala b/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala
index 60927831a159a..f8fa5a9f7a590 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala
@@ -297,7 +297,7 @@ class JobLogger(val user: String, val logDirName: String)
    * When stage is completed, record stage completion status
    * @param stageCompleted Stage completed event
    */
-  override def onStageCompleted(stageCompleted: StageCompleted) {
+  override def onStageCompleted(stageCompleted: SparkListenerStageCompleted) {
     stageLogInfo(stageCompleted.stage.stageId, "STAGE_ID=%d STATUS=COMPLETED".format(
         stageCompleted.stage.stageId))
   }
@@ -328,10 +328,6 @@ class JobLogger(val user: String, val logDirName: String)
                       task.stageId + " SHUFFLE_ID=" + shuffleId + " MAP_ID=" +
                       mapId + " REDUCE_ID=" + reduceId
         stageLogInfo(task.stageId, taskStatus)
-      case OtherFailure(message) =>
-        taskStatus += " STATUS=FAILURE TID=" + taskInfo.taskId +
-                      " STAGE_ID=" + task.stageId + " INFO=" + message
-        stageLogInfo(task.stageId, taskStatus)
       case _ =>
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Pool.scala b/core/src/main/scala/org/apache/spark/scheduler/Pool.scala
index 596f9adde9493..4bc13c23d980b 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/Pool.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Pool.scala
@@ -75,12 +75,12 @@ private[spark] class Pool(
       return schedulableNameToSchedulable(schedulableName)
     }
     for (schedulable <- schedulableQueue) {
-      var sched = schedulable.getSchedulableByName(schedulableName)
+      val sched = schedulable.getSchedulableByName(schedulableName)
       if (sched != null) {
         return sched
       }
     }
-    return null
+    null
   }
 
   override def executorLost(executorId: String, host: String) {
@@ -92,7 +92,7 @@ private[spark] class Pool(
     for (schedulable <- schedulableQueue) {
       shouldRevive |= schedulable.checkSpeculatableTasks()
     }
-    return shouldRevive
+    shouldRevive
   }
 
   override def getSortedTaskSetQueue(): ArrayBuffer[TaskSetManager] = {
@@ -101,7 +101,7 @@ private[spark] class Pool(
     for (schedulable <- sortedSchedulableQueue) {
       sortedTaskSetQueue ++= schedulable.getSortedTaskSetQueue()
     }
-    return sortedTaskSetQueue
+    sortedTaskSetQueue
   }
 
   def increaseRunningTasks(taskNum: Int) {
@@ -117,8 +117,4 @@ private[spark] class Pool(
       parent.decreaseRunningTasks(taskNum)
     }
   }
-
-  override def hasPendingTasks(): Boolean = {
-    schedulableQueue.exists(_.hasPendingTasks())
-  }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala b/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala
index 310ec62ca8afe..28f3ba53b8425 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala
@@ -32,7 +32,9 @@ private[spark] object ResultTask {
   // expensive on the master node if it needs to launch thousands of tasks.
   val serializedInfoCache = new TimeStampedHashMap[Int, Array[Byte]]
 
-  val metadataCleaner = new MetadataCleaner(MetadataCleanerType.RESULT_TASK, serializedInfoCache.clearOldValues)
+  // TODO: This object shouldn't have global variables
+  val metadataCleaner = new MetadataCleaner(
+    MetadataCleanerType.RESULT_TASK, serializedInfoCache.clearOldValues, new SparkConf)
 
   def serializeInfo(stageId: Int, rdd: RDD[_], func: (TaskContext, Iterator[_]) => _): Array[Byte] = {
     synchronized {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Schedulable.scala b/core/src/main/scala/org/apache/spark/scheduler/Schedulable.scala
index 1c7ea2dccc7d9..d573e125a33d1 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/Schedulable.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Schedulable.scala
@@ -42,5 +42,4 @@ private[spark] trait Schedulable {
   def executorLost(executorId: String, host: String): Unit
   def checkSpeculatableTasks(): Boolean
   def getSortedTaskSetQueue(): ArrayBuffer[TaskSetManager]
-  def hasPendingTasks(): Boolean
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala b/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala
index 356fe56bf32c7..3cf995ea74244 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala
@@ -20,7 +20,7 @@ package org.apache.spark.scheduler
 import java.io.{FileInputStream, InputStream}
 import java.util.{NoSuchElementException, Properties}
 
-import org.apache.spark.Logging
+import org.apache.spark.{SparkConf, Logging}
 
 import scala.xml.XML
 
@@ -49,10 +49,10 @@ private[spark] class FIFOSchedulableBuilder(val rootPool: Pool)
   }
 }
 
-private[spark] class FairSchedulableBuilder(val rootPool: Pool)
+private[spark] class FairSchedulableBuilder(val rootPool: Pool, conf: SparkConf)
   extends SchedulableBuilder with Logging {
 
-  val schedulerAllocFile = Option(System.getProperty("spark.scheduler.allocation.file"))
+  val schedulerAllocFile = conf.getOption("spark.scheduler.allocation.file")
   val DEFAULT_SCHEDULER_FILE = "fairscheduler.xml"
   val FAIR_SCHEDULER_PROPERTIES = "spark.scheduler.pool"
   val DEFAULT_POOL_NAME = "default"
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala
similarity index 82%
rename from core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulerBackend.scala
rename to core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala
index 5367218faa685..02bdbba825781 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala
@@ -15,12 +15,12 @@
  * limitations under the License.
  */
 
-package org.apache.spark.scheduler.cluster
+package org.apache.spark.scheduler
 
 import org.apache.spark.SparkContext
 
 /**
- * A backend interface for cluster scheduling systems that allows plugging in different ones under
+ * A backend interface for scheduling systems that allows plugging in different ones under
  * ClusterScheduler. We assume a Mesos-like model where the application gets resource offers as
  * machines become available and can launch tasks on them.
  */
@@ -31,7 +31,4 @@ private[spark] trait SchedulerBackend {
   def defaultParallelism(): Int
 
   def killTask(taskId: Long, executorId: String): Unit = throw new UnsupportedOperationException
-
-  // Memory used by each executor (in megabytes)
-  protected val executorMemory: Int = SparkContext.executorMemoryRequested
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/SchedulingAlgorithm.scala b/core/src/main/scala/org/apache/spark/scheduler/SchedulingAlgorithm.scala
index 3418640b8c59e..5e62c8468f007 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/SchedulingAlgorithm.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SchedulingAlgorithm.scala
@@ -37,9 +37,9 @@ private[spark] class FIFOSchedulingAlgorithm extends SchedulingAlgorithm {
       res = math.signum(stageId1 - stageId2)
     }
     if (res < 0) {
-      return true
+      true
     } else {
-      return false
+      false
     }
   }
 }
@@ -56,7 +56,6 @@ private[spark] class FairSchedulingAlgorithm extends SchedulingAlgorithm {
     val minShareRatio2 = runningTasks2.toDouble / math.max(minShare2, 1.0).toDouble
     val taskToWeightRatio1 = runningTasks1.toDouble / s1.weight.toDouble
     val taskToWeightRatio2 = runningTasks2.toDouble / s2.weight.toDouble
-    var res:Boolean = true
     var compare:Int = 0
 
     if (s1Needy && !s2Needy) {
@@ -70,11 +69,11 @@ private[spark] class FairSchedulingAlgorithm extends SchedulingAlgorithm {
     }
 
     if (compare < 0) {
-      return true
+      true
     } else if (compare > 0) {
-      return false
+      false
     } else {
-      return s1.name < s2.name
+      s1.name < s2.name
     }
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/SchedulingMode.scala b/core/src/main/scala/org/apache/spark/scheduler/SchedulingMode.scala
index 0a786deb161ae..3832ee7ff6eef 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/SchedulingMode.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SchedulingMode.scala
@@ -22,7 +22,7 @@ package org.apache.spark.scheduler
  *    to order tasks amongst a Schedulable's sub-queues
  *  "NONE" is used when the a Schedulable has no sub-queues.
  */
-object SchedulingMode extends Enumeration("FAIR", "FIFO", "NONE") {
+object SchedulingMode extends Enumeration {
 
   type SchedulingMode = Value
   val FAIR,FIFO,NONE = Value
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
index 0f2deb4bcbbb2..a37ead563271a 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
@@ -37,7 +37,9 @@ private[spark] object ShuffleMapTask {
   // expensive on the master node if it needs to launch thousands of tasks.
   val serializedInfoCache = new TimeStampedHashMap[Int, Array[Byte]]
 
-  val metadataCleaner = new MetadataCleaner(MetadataCleanerType.SHUFFLE_MAP_TASK, serializedInfoCache.clearOldValues)
+  // TODO: This object shouldn't have global variables
+  val metadataCleaner = new MetadataCleaner(
+    MetadataCleanerType.SHUFFLE_MAP_TASK, serializedInfoCache.clearOldValues, new SparkConf)
 
   def serializeInfo(stageId: Int, rdd: RDD[_], dep: ShuffleDependency[_,_]): Array[Byte] = {
     synchronized {
@@ -152,7 +154,7 @@ private[spark] class ShuffleMapTask(
 
     try {
       // Obtain all the block writers for shuffle blocks.
-      val ser = SparkEnv.get.serializerManager.get(dep.serializerClass)
+      val ser = SparkEnv.get.serializerManager.get(dep.serializerClass, SparkEnv.get.conf)
       shuffle = shuffleBlockManager.forMapTask(dep.shuffleId, partitionId, numOutputSplits, ser)
 
       // Write the map output to its associated buckets.
diff --git a/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala b/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala
index 3841b5616dca2..d8e97c3b7c7b0 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala
@@ -19,7 +19,7 @@ package org.apache.spark.scheduler
 
 import java.util.Properties
 import org.apache.spark.util.{Utils, Distribution}
-import org.apache.spark.{Logging, SparkContext, TaskEndReason}
+import org.apache.spark.{Logging, TaskEndReason}
 import org.apache.spark.executor.TaskMetrics
 
 sealed trait SparkListenerEvents
@@ -27,7 +27,7 @@ sealed trait SparkListenerEvents
 case class SparkListenerStageSubmitted(stage: StageInfo, properties: Properties)
      extends SparkListenerEvents
 
-case class StageCompleted(val stage: StageInfo) extends SparkListenerEvents
+case class SparkListenerStageCompleted(stage: StageInfo) extends SparkListenerEvents
 
 case class SparkListenerTaskStart(task: Task[_], taskInfo: TaskInfo) extends SparkListenerEvents
 
@@ -43,11 +43,17 @@ case class SparkListenerJobStart(job: ActiveJob, stageIds: Array[Int], propertie
 case class SparkListenerJobEnd(job: ActiveJob, jobResult: JobResult)
      extends SparkListenerEvents
 
+/** An event used in the listener to shutdown the listener daemon thread. */
+private[scheduler] case object SparkListenerShutdown extends SparkListenerEvents
+
+/**
+ * Interface for listening to events from the Spark scheduler.
+ */
 trait SparkListener {
   /**
    * Called when a stage is completed, with information on the completed stage
    */
-  def onStageCompleted(stageCompleted: StageCompleted) { }
+  def onStageCompleted(stageCompleted: SparkListenerStageCompleted) { }
 
   /**
    * Called when a stage is submitted
@@ -63,7 +69,7 @@ trait SparkListener {
    * Called when a task begins remotely fetching its result (will not be called for tasks that do
    * not need to fetch the result remotely).
    */
- def onTaskGettingResult(taskGettingResult: SparkListenerTaskGettingResult) { }
+  def onTaskGettingResult(taskGettingResult: SparkListenerTaskGettingResult) { }
 
   /**
    * Called when a task ends
@@ -86,7 +92,7 @@ trait SparkListener {
  * Simple SparkListener that logs a few summary statistics when each stage completes
  */
 class StatsReportListener extends SparkListener with Logging {
-  override def onStageCompleted(stageCompleted: StageCompleted) {
+  override def onStageCompleted(stageCompleted: SparkListenerStageCompleted) {
     import org.apache.spark.scheduler.StatsReportListener._
     implicit val sc = stageCompleted
     this.logInfo("Finished stage: " + stageCompleted.stage)
@@ -112,27 +118,31 @@ class StatsReportListener extends SparkListener with Logging {
 
 }
 
-object StatsReportListener extends Logging {
+private[spark] object StatsReportListener extends Logging {
 
   //for profiling, the extremes are more interesting
   val percentiles = Array[Int](0,5,10,25,50,75,90,95,100)
   val probabilities = percentiles.map{_ / 100.0}
   val percentilesHeader = "\t" + percentiles.mkString("%\t") + "%"
 
-  def extractDoubleDistribution(stage:StageCompleted, getMetric: (TaskInfo,TaskMetrics) => Option[Double]): Option[Distribution] = {
+  def extractDoubleDistribution(stage: SparkListenerStageCompleted,
+      getMetric: (TaskInfo,TaskMetrics) => Option[Double])
+    : Option[Distribution] = {
     Distribution(stage.stage.taskInfos.flatMap {
       case ((info,metric)) => getMetric(info, metric)})
   }
 
   //is there some way to setup the types that I can get rid of this completely?
-  def extractLongDistribution(stage:StageCompleted, getMetric: (TaskInfo,TaskMetrics) => Option[Long]): Option[Distribution] = {
+  def extractLongDistribution(stage: SparkListenerStageCompleted,
+      getMetric: (TaskInfo,TaskMetrics) => Option[Long])
+    : Option[Distribution] = {
     extractDoubleDistribution(stage, (info, metric) => getMetric(info,metric).map{_.toDouble})
   }
 
   def showDistribution(heading: String, d: Distribution, formatNumber: Double => String) {
     val stats = d.statCounter
-    logInfo(heading + stats)
     val quantiles = d.getQuantiles(probabilities).map{formatNumber}
+    logInfo(heading + stats)
     logInfo(percentilesHeader)
     logInfo("\t" + quantiles.mkString("\t"))
   }
@@ -147,12 +157,12 @@ object StatsReportListener extends Logging {
   }
 
   def showDistribution(heading:String, format: String, getMetric: (TaskInfo,TaskMetrics) => Option[Double])
-    (implicit stage: StageCompleted) {
+    (implicit stage: SparkListenerStageCompleted) {
     showDistribution(heading, extractDoubleDistribution(stage, getMetric), format)
   }
 
   def showBytesDistribution(heading:String, getMetric: (TaskInfo,TaskMetrics) => Option[Long])
-    (implicit stage: StageCompleted) {
+    (implicit stage: SparkListenerStageCompleted) {
     showBytesDistribution(heading, extractLongDistribution(stage, getMetric))
   }
 
@@ -169,12 +179,10 @@ object StatsReportListener extends Logging {
   }
 
   def showMillisDistribution(heading: String, getMetric: (TaskInfo, TaskMetrics) => Option[Long])
-    (implicit stage: StageCompleted) {
+    (implicit stage: SparkListenerStageCompleted) {
     showMillisDistribution(heading, extractLongDistribution(stage, getMetric))
   }
 
-
-
   val seconds = 1000L
   val minutes = seconds * 60
   val hours = minutes * 60
@@ -197,10 +205,9 @@ object StatsReportListener extends Logging {
   }
 }
 
+private case class RuntimePercentage(executorPct: Double, fetchPct: Option[Double], other: Double)
 
-
-case class RuntimePercentage(executorPct: Double, fetchPct: Option[Double], other: Double)
-object RuntimePercentage {
+private object RuntimePercentage {
   def apply(totalTime: Long, metrics: TaskMetrics): RuntimePercentage = {
     val denom = totalTime.toDouble
     val fetchTime = metrics.shuffleReadMetrics.map{_.fetchWaitTime}
diff --git a/core/src/main/scala/org/apache/spark/scheduler/SparkListenerBus.scala b/core/src/main/scala/org/apache/spark/scheduler/SparkListenerBus.scala
index d5824e7954797..17b1328b86788 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/SparkListenerBus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SparkListenerBus.scala
@@ -24,15 +24,17 @@ import scala.collection.mutable.{ArrayBuffer, SynchronizedBuffer}
 import org.apache.spark.Logging
 
 /** Asynchronously passes SparkListenerEvents to registered SparkListeners. */
-private[spark] class SparkListenerBus() extends Logging {
-  private val sparkListeners = new ArrayBuffer[SparkListener]() with SynchronizedBuffer[SparkListener]
+private[spark] class SparkListenerBus extends Logging {
+  private val sparkListeners = new ArrayBuffer[SparkListener] with SynchronizedBuffer[SparkListener]
 
   /* Cap the capacity of the SparkListenerEvent queue so we get an explicit error (rather than
    * an OOM exception) if it's perpetually being added to more quickly than it's being drained. */
-  private val EVENT_QUEUE_CAPACITY = 10000 
+  private val EVENT_QUEUE_CAPACITY = 10000
   private val eventQueue = new LinkedBlockingQueue[SparkListenerEvents](EVENT_QUEUE_CAPACITY)
   private var queueFullErrorMessageLogged = false
 
+  // Create a new daemon thread to listen for events. This thread is stopped when it receives
+  // a SparkListenerShutdown event, using the stop method.
   new Thread("SparkListenerBus") {
     setDaemon(true)
     override def run() {
@@ -41,7 +43,7 @@ private[spark] class SparkListenerBus() extends Logging {
         event match {
           case stageSubmitted: SparkListenerStageSubmitted =>
             sparkListeners.foreach(_.onStageSubmitted(stageSubmitted))
-          case stageCompleted: StageCompleted =>
+          case stageCompleted: SparkListenerStageCompleted =>
             sparkListeners.foreach(_.onStageCompleted(stageCompleted))
           case jobStart: SparkListenerJobStart =>
             sparkListeners.foreach(_.onJobStart(jobStart))
@@ -53,6 +55,9 @@ private[spark] class SparkListenerBus() extends Logging {
             sparkListeners.foreach(_.onTaskGettingResult(taskGettingResult))
           case taskEnd: SparkListenerTaskEnd =>
             sparkListeners.foreach(_.onTaskEnd(taskEnd))
+          case SparkListenerShutdown =>
+            // Get out of the while loop and shutdown the daemon thread
+            return
           case _ =>
         }
       }
@@ -80,7 +85,7 @@ private[spark] class SparkListenerBus() extends Logging {
    */
   def waitUntilEmpty(timeoutMillis: Int): Boolean = {
     val finishTime = System.currentTimeMillis + timeoutMillis
-    while (!eventQueue.isEmpty()) {
+    while (!eventQueue.isEmpty) {
       if (System.currentTimeMillis > finishTime) {
         return false
       }
@@ -88,7 +93,8 @@ private[spark] class SparkListenerBus() extends Logging {
        * add overhead in the general case. */
       Thread.sleep(10)
     }
-    return true
+    true
   }
-}
 
+  def stop(): Unit = post(SparkListenerShutdown)
+}
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Stage.scala b/core/src/main/scala/org/apache/spark/scheduler/Stage.scala
index 7cb3fe46e5baf..520c0b29e3536 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/Stage.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Stage.scala
@@ -46,7 +46,7 @@ private[spark] class Stage(
     callSite: Option[String])
   extends Logging {
 
-  val isShuffleMap = shuffleDep != None
+  val isShuffleMap = shuffleDep.isDefined
   val numPartitions = rdd.partitions.size
   val outputLocs = Array.fill[List[MapStatus]](numPartitions)(Nil)
   var numAvailableOutputs = 0
@@ -96,7 +96,7 @@ private[spark] class Stage(
   def newAttemptId(): Int = {
     val id = nextAttemptId
     nextAttemptId += 1
-    return id
+    id
   }
 
   val name = callSite.getOrElse(rdd.origin)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala
index 93599dfdc85ff..c4d1ad5733b4c 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala
@@ -21,6 +21,12 @@ import scala.collection._
 
 import org.apache.spark.executor.TaskMetrics
 
+/**
+ * Stores information about a stage to pass from the scheduler to SparkListeners.
+ *
+ * taskInfos stores the metrics for all tasks that have completed, including redundant, speculated
+ * tasks.
+ */
 class StageInfo(
     stage: Stage,
     val taskInfos: mutable.Buffer[(TaskInfo, TaskMetrics)] = mutable.Buffer[(TaskInfo, TaskMetrics)]()
@@ -33,4 +39,5 @@ class StageInfo(
   val name = stage.name
   val numPartitions = stage.numPartitions
   val numTasks = stage.numTasks
+  var emittedTaskSizeWarning = false
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskInfo.scala
index 4bae26f3a6a88..3c22edd5248f4 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskInfo.scala
@@ -46,6 +46,8 @@ class TaskInfo(
 
   var failed = false
 
+  var serializedSize: Int = 0
+
   def markGettingResult(time: Long = System.currentTimeMillis) {
     gettingResultTime = time
   }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskLocality.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskLocality.scala
index 47b0f387aa0c5..35de13c385187 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskLocality.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskLocality.scala
@@ -18,9 +18,7 @@
 package org.apache.spark.scheduler
 
 
-private[spark] object TaskLocality
-  extends Enumeration("PROCESS_LOCAL", "NODE_LOCAL", "RACK_LOCAL", "ANY")
-{
+private[spark] object TaskLocality extends Enumeration {
   // process local is expected to be used ONLY within tasksetmanager for now.
   val PROCESS_LOCAL, NODE_LOCAL, RACK_LOCAL, ANY = Value
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskResult.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskResult.scala
index 7e468d0d67808..9d3e6158266b8 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskResult.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskResult.scala
@@ -35,18 +35,15 @@ case class IndirectTaskResult[T](blockId: BlockId) extends TaskResult[T] with Se
 
 /** A TaskResult that contains the task's return value and accumulator updates. */
 private[spark]
-class DirectTaskResult[T](var value: T, var accumUpdates: Map[Long, Any], var metrics: TaskMetrics)
+class DirectTaskResult[T](var valueBytes: ByteBuffer, var accumUpdates: Map[Long, Any], var metrics: TaskMetrics)
   extends TaskResult[T] with Externalizable {
 
-  def this() = this(null.asInstanceOf[T], null, null)
+  def this() = this(null.asInstanceOf[ByteBuffer], null, null)
 
   override def writeExternal(out: ObjectOutput) {
 
-    val objectSer = SparkEnv.get.serializer.newInstance()
-    val bb = objectSer.serialize(value)
-
-    out.writeInt(bb.remaining())
-    Utils.writeByteBuffer(bb, out)
+    out.writeInt(valueBytes.remaining);
+    Utils.writeByteBuffer(valueBytes, out)
 
     out.writeInt(accumUpdates.size)
     for ((key, value) <- accumUpdates) {
@@ -58,12 +55,10 @@ class DirectTaskResult[T](var value: T, var accumUpdates: Map[Long, Any], var me
 
   override def readExternal(in: ObjectInput) {
 
-    val objectSer = SparkEnv.get.serializer.newInstance()
-
     val blen = in.readInt()
     val byteVal = new Array[Byte](blen)
     in.readFully(byteVal)
-    value = objectSer.deserialize(ByteBuffer.wrap(byteVal))
+    valueBytes = ByteBuffer.wrap(byteVal)
 
     val numUpdates = in.readInt
     if (numUpdates == 0) {
@@ -76,4 +71,9 @@ class DirectTaskResult[T](var value: T, var accumUpdates: Map[Long, Any], var me
     }
     metrics = in.readObject().asInstanceOf[TaskMetrics]
   }
+
+  def value(): T = {
+    val resultSer = SparkEnv.get.serializer.newInstance()
+    resultSer.deserialize(valueBytes)
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/TaskResultGetter.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
similarity index 83%
rename from core/src/main/scala/org/apache/spark/scheduler/cluster/TaskResultGetter.scala
rename to core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
index 2064d97b49cc0..bdec08e968a45 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/TaskResultGetter.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
@@ -15,34 +15,34 @@
  * limitations under the License.
  */
 
-package org.apache.spark.scheduler.cluster
+package org.apache.spark.scheduler
 
 import java.nio.ByteBuffer
 import java.util.concurrent.{LinkedBlockingDeque, ThreadFactory, ThreadPoolExecutor, TimeUnit}
 
 import org.apache.spark._
 import org.apache.spark.TaskState.TaskState
-import org.apache.spark.scheduler.{DirectTaskResult, IndirectTaskResult, TaskResult}
 import org.apache.spark.serializer.SerializerInstance
 import org.apache.spark.util.Utils
 
 /**
  * Runs a thread pool that deserializes and remotely fetches (if necessary) task results.
  */
-private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: ClusterScheduler)
+private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedulerImpl)
   extends Logging {
-  private val THREADS = System.getProperty("spark.resultGetter.threads", "4").toInt
+
+  private val THREADS = sparkEnv.conf.getInt("spark.resultGetter.threads", 4)
   private val getTaskResultExecutor = Utils.newDaemonFixedThreadPool(
     THREADS, "Result resolver thread")
 
   protected val serializer = new ThreadLocal[SerializerInstance] {
     override def initialValue(): SerializerInstance = {
-      return sparkEnv.closureSerializer.newInstance()
+      sparkEnv.closureSerializer.newInstance()
     }
   }
 
   def enqueueSuccessfulTask(
-    taskSetManager: ClusterTaskSetManager, tid: Long, serializedData: ByteBuffer) {
+    taskSetManager: TaskSetManager, tid: Long, serializedData: ByteBuffer) {
     getTaskResultExecutor.execute(new Runnable {
       override def run() {
         try {
@@ -57,7 +57,7 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: ClusterSche
                  * between when the task ended and when we tried to fetch the result, or if the
                  * block manager had to flush the result. */
                 scheduler.handleFailedTask(
-                  taskSetManager, tid, TaskState.FINISHED, Some(TaskResultLost))
+                  taskSetManager, tid, TaskState.FINISHED, TaskResultLost)
                 return
               }
               val deserializedResult = serializer.get().deserialize[DirectTaskResult[_]](
@@ -71,22 +71,22 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: ClusterSche
           case cnf: ClassNotFoundException =>
             val loader = Thread.currentThread.getContextClassLoader
             taskSetManager.abort("ClassNotFound with classloader: " + loader)
-          case ex =>
+          case ex: Throwable =>
             taskSetManager.abort("Exception while deserializing and fetching task: %s".format(ex))
         }
       }
     })
   }
 
-  def enqueueFailedTask(taskSetManager: ClusterTaskSetManager, tid: Long, taskState: TaskState,
+  def enqueueFailedTask(taskSetManager: TaskSetManager, tid: Long, taskState: TaskState,
     serializedData: ByteBuffer) {
-    var reason: Option[TaskEndReason] = None
+    var reason : TaskEndReason = UnknownReason
     getTaskResultExecutor.execute(new Runnable {
       override def run() {
         try {
           if (serializedData != null && serializedData.limit() > 0) {
-            reason = Some(serializer.get().deserialize[TaskEndReason](
-              serializedData, getClass.getClassLoader))
+            reason = serializer.get().deserialize[TaskEndReason](
+              serializedData, getClass.getClassLoader)
           }
         } catch {
           case cnd: ClassNotFoundException =>
@@ -95,7 +95,7 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: ClusterSche
             val loader = Thread.currentThread.getContextClassLoader
             logError(
               "Could not deserialize TaskEndReason: ClassNotFound with classloader " + loader)
-          case ex => {}
+          case ex: Throwable => {}
         }
         scheduler.handleFailedTask(taskSetManager, tid, taskState, reason)
       }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
index 10e047810827c..17b6d97e90e0a 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
@@ -20,11 +20,12 @@ package org.apache.spark.scheduler
 import org.apache.spark.scheduler.SchedulingMode.SchedulingMode
 
 /**
- * Low-level task scheduler interface, implemented by both ClusterScheduler and LocalScheduler.
- * Each TaskScheduler schedulers task for a single SparkContext.
- * These schedulers get sets of tasks submitted to them from the DAGScheduler for each stage,
- * and are responsible for sending the tasks to the cluster, running them, retrying if there
- * are failures, and mitigating stragglers. They return events to the DAGScheduler.
+ * Low-level task scheduler interface, currently implemented exclusively by the ClusterScheduler.
+ * This interface allows plugging in different task schedulers. Each TaskScheduler schedulers tasks
+ * for a single SparkContext. These schedulers get sets of tasks submitted to them from the
+ * DAGScheduler for each stage, and are responsible for sending the tasks to the cluster, running
+ * them, retrying if there are failures, and mitigating stragglers. They return events to the
+ * DAGScheduler.
  */
 private[spark] trait TaskScheduler {
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
similarity index 77%
rename from core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterScheduler.scala
rename to core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
index e42146d021eae..5b525155e9f62 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.scheduler.cluster
+package org.apache.spark.scheduler
 
 import java.nio.ByteBuffer
 import java.util.concurrent.atomic.AtomicLong
@@ -24,44 +24,49 @@ import java.util.{TimerTask, Timer}
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.mutable.HashMap
 import scala.collection.mutable.HashSet
-import scala.util.Random
+import scala.concurrent.duration._
 
 import org.apache.spark._
 import org.apache.spark.TaskState.TaskState
-import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.SchedulingMode.SchedulingMode
 
 /**
- * The main TaskScheduler implementation, for running tasks on a cluster. Clients should first call
- * initialize() and start(), then submit task sets through the runTasks method.
- *
- * This class can work with multiple types of clusters by acting through a SchedulerBackend.
+ * Schedules tasks for multiple types of clusters by acting through a SchedulerBackend.
+ * It can also work with a local setup by using a LocalBackend and setting isLocal to true.
  * It handles common logic, like determining a scheduling order across jobs, waking up to launch
  * speculative tasks, etc.
  *
+ * Clients should first call initialize() and start(), then submit task sets through the
+ * runTasks method.
+ *
  * THREADING: SchedulerBackends and task-submitting clients can call this class from multiple
  * threads, so it needs locks in public API methods to maintain its state. In addition, some
  * SchedulerBackends sycnchronize on themselves when they want to send events here, and then
  * acquire a lock on us, so we need to make sure that we don't try to lock the backend while
  * we are holding a lock on ourselves.
  */
-private[spark] class ClusterScheduler(val sc: SparkContext)
-  extends TaskScheduler
-  with Logging
+private[spark] class TaskSchedulerImpl(
+    val sc: SparkContext,
+    val maxTaskFailures: Int,
+    isLocal: Boolean = false)
+  extends TaskScheduler with Logging
 {
+  def this(sc: SparkContext) = this(sc, sc.conf.getInt("spark.task.maxFailures", 4))
+
+  val conf = sc.conf
+
   // How often to check for speculative tasks
-  val SPECULATION_INTERVAL = System.getProperty("spark.speculation.interval", "100").toLong
+  val SPECULATION_INTERVAL = conf.getLong("spark.speculation.interval", 100)
 
   // Threshold above which we warn user initial TaskSet may be starved
-  val STARVATION_TIMEOUT = System.getProperty("spark.starvation.timeout", "15000").toLong
+  val STARVATION_TIMEOUT = conf.getLong("spark.starvation.timeout", 15000)
 
-  // ClusterTaskSetManagers are not thread safe, so any access to one should be synchronized
+  // TaskSetManagers are not thread safe, so any access to one should be synchronized
   // on this class.
-  val activeTaskSets = new HashMap[String, ClusterTaskSetManager]
+  val activeTaskSets = new HashMap[String, TaskSetManager]
 
   val taskIdToTaskSetId = new HashMap[Long, String]
   val taskIdToExecutorId = new HashMap[Long, String]
-  val taskSetTaskIds = new HashMap[String, HashSet[Long]]
 
   @volatile private var hasReceivedTask = false
   @volatile private var hasLaunchedTask = false
@@ -90,7 +95,7 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
   var rootPool: Pool = null
   // default scheduler is FIFO
   val schedulingMode: SchedulingMode = SchedulingMode.withName(
-    System.getProperty("spark.scheduler.mode", "FIFO"))
+    conf.get("spark.scheduler.mode", "FIFO"))
 
   // This is a var so that we can reset it for testing purposes.
   private[spark] var taskResultGetter = new TaskResultGetter(sc.env, this)
@@ -99,8 +104,8 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
     this.dagScheduler = dagScheduler
   }
 
-  def initialize(context: SchedulerBackend) {
-    backend = context
+  def initialize(backend: SchedulerBackend) {
+    this.backend = backend
     // temporarily set rootPool name to empty
     rootPool = new Pool("", schedulingMode, 0, 0)
     schedulableBuilder = {
@@ -108,7 +113,7 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
         case SchedulingMode.FIFO =>
           new FIFOSchedulableBuilder(rootPool)
         case SchedulingMode.FAIR =>
-          new FairSchedulableBuilder(rootPool)
+          new FairSchedulableBuilder(rootPool, conf)
       }
     }
     schedulableBuilder.buildPools()
@@ -119,22 +124,13 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
   override def start() {
     backend.start()
 
-    if (System.getProperty("spark.speculation", "false").toBoolean) {
-      new Thread("ClusterScheduler speculation check") {
-        setDaemon(true)
-
-        override def run() {
-          logInfo("Starting speculative execution thread")
-          while (true) {
-            try {
-              Thread.sleep(SPECULATION_INTERVAL)
-            } catch {
-              case e: InterruptedException => {}
-            }
-            checkSpeculatableTasks()
-          }
-        }
-      }.start()
+    if (!isLocal && conf.getBoolean("spark.speculation", false)) {
+      logInfo("Starting speculative execution thread")
+      import sc.env.actorSystem.dispatcher
+      sc.env.actorSystem.scheduler.schedule(SPECULATION_INTERVAL milliseconds,
+            SPECULATION_INTERVAL milliseconds) {
+        checkSpeculatableTasks()
+      }
     }
   }
 
@@ -142,12 +138,11 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
     val tasks = taskSet.tasks
     logInfo("Adding task set " + taskSet.id + " with " + tasks.length + " tasks")
     this.synchronized {
-      val manager = new ClusterTaskSetManager(this, taskSet)
+      val manager = new TaskSetManager(this, taskSet, maxTaskFailures)
       activeTaskSets(taskSet.id) = manager
       schedulableBuilder.addTaskSetManager(manager, manager.taskSet.properties)
-      taskSetTaskIds(taskSet.id) = new HashSet[Long]()
 
-      if (!hasReceivedTask) {
+      if (!isLocal && !hasReceivedTask) {
         starvationTimer.scheduleAtFixedRate(new TimerTask() {
           override def run() {
             if (!hasLaunchedTask) {
@@ -174,31 +169,25 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
       //    the stage.
       // 2. The task set manager has been created but no tasks has been scheduled. In this case,
       //    simply abort the stage.
-      val taskIds = taskSetTaskIds(tsm.taskSet.id)
-      if (taskIds.size > 0) {
-        taskIds.foreach { tid =>
-          val execId = taskIdToExecutorId(tid)
-          backend.killTask(tid, execId)
-        }
+      tsm.runningTasksSet.foreach { tid =>
+        val execId = taskIdToExecutorId(tid)
+        backend.killTask(tid, execId)
       }
+      tsm.abort("Stage %s cancelled".format(stageId))
       logInfo("Stage %d was cancelled".format(stageId))
-      tsm.removeAllRunningTasks()
-      taskSetFinished(tsm)
     }
   }
 
+  /**
+   * Called to indicate that all task attempts (including speculated tasks) associated with the
+   * given TaskSetManager have completed, so state associated with the TaskSetManager should be
+   * cleaned up.
+   */
   def taskSetFinished(manager: TaskSetManager): Unit = synchronized {
-    // Check to see if the given task set has been removed. This is possible in the case of
-    // multiple unrecoverable task failures (e.g. if the entire task set is killed when it has
-    // more than one running tasks).
-    if (activeTaskSets.contains(manager.taskSet.id)) {
-      activeTaskSets -= manager.taskSet.id
-      manager.parent.removeSchedulable(manager)
-      logInfo("Remove TaskSet %s from pool %s".format(manager.taskSet.id, manager.parent.name))
-      taskIdToTaskSetId --= taskSetTaskIds(manager.taskSet.id)
-      taskIdToExecutorId --= taskSetTaskIds(manager.taskSet.id)
-      taskSetTaskIds.remove(manager.taskSet.id)
-    }
+    activeTaskSets -= manager.taskSet.id
+    manager.parent.removeSchedulable(manager)
+    logInfo("Removed TaskSet %s, whose tasks have all completed, from pool %s"
+      .format(manager.taskSet.id, manager.parent.name))
   }
 
   /**
@@ -218,11 +207,9 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
       }
     }
 
-    // Randomly shuffle offers to avoid always placing tasks on the same set of workers.
-    val shuffledOffers = Random.shuffle(offers)
-    // Build a list of tasks to assign to each worker.
-    val tasks = shuffledOffers.map(o => new ArrayBuffer[TaskDescription](o.cores))
-    val availableCpus = shuffledOffers.map(o => o.cores).toArray
+    // Build a list of tasks to assign to each worker
+    val tasks = offers.map(o => new ArrayBuffer[TaskDescription](o.cores))
+    val availableCpus = offers.map(o => o.cores).toArray
     val sortedTaskSets = rootPool.getSortedTaskSetQueue()
     for (taskSet <- sortedTaskSets) {
       logDebug("parentName: %s, name: %s, runningTasks: %s".format(
@@ -235,14 +222,13 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
     for (taskSet <- sortedTaskSets; maxLocality <- TaskLocality.values) {
       do {
         launchedTask = false
-        for (i <- 0 until shuffledOffers.size) {
-          val execId = shuffledOffers(i).executorId
-          val host = shuffledOffers(i).host
+        for (i <- 0 until offers.size) {
+          val execId = offers(i).executorId
+          val host = offers(i).host
           for (task <- taskSet.resourceOffer(execId, host, availableCpus(i), maxLocality)) {
             tasks(i) += task
             val tid = task.taskId
             taskIdToTaskSetId(tid) = taskSet.taskSet.id
-            taskSetTaskIds(taskSet.taskSet.id) += tid
             taskIdToExecutorId(tid) = execId
             activeExecutorIds += execId
             executorsByHost(host) += execId
@@ -275,9 +261,6 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
           case Some(taskSetId) =>
             if (TaskState.isFinished(state)) {
               taskIdToTaskSetId.remove(tid)
-              if (taskSetTaskIds.contains(taskSetId)) {
-                taskSetTaskIds(taskSetId) -= tid
-              }
               taskIdToExecutorId.remove(tid)
             }
             activeTaskSets.get(taskSetId).foreach { taskSet =>
@@ -290,7 +273,9 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
               }
             }
           case None =>
-            logInfo("Ignoring update with state %s from TID %s because its task set is gone"
+            logError(
+              ("Ignoring update with state %s for TID %s because its task set is gone (this is " +
+               "likely the result of receiving duplicate task finished status updates)")
               .format(state, tid))
         }
       } catch {
@@ -298,30 +283,30 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
       }
     }
     // Update the DAGScheduler without holding a lock on this, since that can deadlock
-    if (failedExecutor != None) {
+    if (failedExecutor.isDefined) {
       dagScheduler.executorLost(failedExecutor.get)
       backend.reviveOffers()
     }
   }
 
-  def handleTaskGettingResult(taskSetManager: ClusterTaskSetManager, tid: Long) {
+  def handleTaskGettingResult(taskSetManager: TaskSetManager, tid: Long) {
     taskSetManager.handleTaskGettingResult(tid)
   }
 
   def handleSuccessfulTask(
-    taskSetManager: ClusterTaskSetManager,
+    taskSetManager: TaskSetManager,
     tid: Long,
     taskResult: DirectTaskResult[_]) = synchronized {
     taskSetManager.handleSuccessfulTask(tid, taskResult)
   }
 
   def handleFailedTask(
-    taskSetManager: ClusterTaskSetManager,
+    taskSetManager: TaskSetManager,
     tid: Long,
     taskState: TaskState,
-    reason: Option[TaskEndReason]) = synchronized {
+    reason: TaskEndReason) = synchronized {
     taskSetManager.handleFailedTask(tid, taskState, reason)
-    if (taskState != TaskState.KILLED) {
+    if (!taskSetManager.isZombie && taskState != TaskState.KILLED) {
       // Need to revive offers again now that the task set manager state has been updated to
       // reflect failed tasks that need to be re-run.
       backend.reviveOffers()
@@ -334,7 +319,7 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
         // Have each task set throw a SparkException with the error
         for ((taskSetId, manager) <- activeTaskSets) {
           try {
-            manager.error(message)
+            manager.abort(message)
           } catch {
             case e: Exception => logError("Exception in error callback", e)
           }
@@ -357,14 +342,12 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
       taskResultGetter.stop()
     }
 
-    // sleeping for an arbitrary 5 seconds : to ensure that messages are sent out.
-    // TODO: Do something better !
-    Thread.sleep(5000L)
+    // sleeping for an arbitrary 1 seconds to ensure that messages are sent out.
+    Thread.sleep(1000L)
   }
 
   override def defaultParallelism() = backend.defaultParallelism()
 
-
   // Check for speculatable tasks in all our active jobs.
   def checkSpeculatableTasks() {
     var shouldRevive = false
@@ -376,13 +359,6 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
     }
   }
 
-  // Check for pending tasks in all our active jobs.
-  def hasPendingTasks: Boolean = {
-    synchronized {
-      rootPool.hasPendingTasks()
-    }
-  }
-
   def executorLost(executorId: String, reason: ExecutorLossReason) {
     var failedExecutor: Option[String] = None
 
@@ -401,7 +377,7 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
       }
     }
     // Call dagScheduler.executorLost without holding the lock on this to prevent deadlock
-    if (failedExecutor != None) {
+    if (failedExecutor.isDefined) {
       dagScheduler.executorLost(failedExecutor.get)
       backend.reviveOffers()
     }
@@ -441,7 +417,7 @@ private[spark] class ClusterScheduler(val sc: SparkContext)
 }
 
 
-object ClusterScheduler {
+private[spark] object TaskSchedulerImpl {
   /**
    * Used to balance containers across hosts.
    *
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index 90f6bcefac0bf..3f0ee7a6d48cb 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -17,32 +17,686 @@
 
 package org.apache.spark.scheduler
 
-import java.nio.ByteBuffer
+import java.io.NotSerializableException
+import java.util.Arrays
 
+import scala.collection.mutable.ArrayBuffer
+import scala.collection.mutable.HashMap
+import scala.collection.mutable.HashSet
+import scala.math.max
+import scala.math.min
+
+import org.apache.spark.{ExceptionFailure, ExecutorLostFailure, FetchFailed, Logging, Resubmitted,
+  SparkEnv, Success, TaskEndReason, TaskKilled, TaskResultLost, TaskState}
 import org.apache.spark.TaskState.TaskState
+import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.util.{Clock, SystemClock}
+
 
 /**
- * Tracks and schedules the tasks within a single TaskSet. This class keeps track of the status of
- * each task and is responsible for retries on failure and locality. The main interfaces to it
- * are resourceOffer, which asks the TaskSet whether it wants to run a task on one node, and
- * statusUpdate, which tells it that one of its tasks changed state (e.g. finished).
+ * Schedules the tasks within a single TaskSet in the ClusterScheduler. This class keeps track of
+ * each task, retries tasks if they fail (up to a limited number of times), and
+ * handles locality-aware scheduling for this TaskSet via delay scheduling. The main interfaces
+ * to it are resourceOffer, which asks the TaskSet whether it wants to run a task on one node,
+ * and statusUpdate, which tells it that one of its tasks changed state (e.g. finished).
+ *
+ * THREADING: This class is designed to only be called from code with a lock on the
+ * TaskScheduler (e.g. its event handlers). It should not be called from other threads.
  *
- * THREADING: This class is designed to only be called from code with a lock on the TaskScheduler
- * (e.g. its event handlers). It should not be called from other threads.
+ * @param sched           the ClusterScheduler associated with the TaskSetManager
+ * @param taskSet         the TaskSet to manage scheduling for
+ * @param maxTaskFailures if any particular task fails more than this number of times, the entire
+ *                        task set will be aborted
  */
-private[spark] trait TaskSetManager extends Schedulable {
-  def schedulableQueue = null
-  
-  def schedulingMode = SchedulingMode.NONE
-  
-  def taskSet: TaskSet
+private[spark] class TaskSetManager(
+    sched: TaskSchedulerImpl,
+    val taskSet: TaskSet,
+    val maxTaskFailures: Int,
+    clock: Clock = SystemClock)
+  extends Schedulable with Logging
+{
+  val conf = sched.sc.conf
+
+  // CPUs to request per task
+  val CPUS_PER_TASK = conf.getInt("spark.task.cpus", 1)
+
+  // Quantile of tasks at which to start speculation
+  val SPECULATION_QUANTILE = conf.getDouble("spark.speculation.quantile", 0.75)
+  val SPECULATION_MULTIPLIER = conf.getDouble("spark.speculation.multiplier", 1.5)
+
+  // Serializer for closures and tasks.
+  val env = SparkEnv.get
+  val ser = env.closureSerializer.newInstance()
+
+  val tasks = taskSet.tasks
+  val numTasks = tasks.length
+  val copiesRunning = new Array[Int](numTasks)
+  val successful = new Array[Boolean](numTasks)
+  val numFailures = new Array[Int](numTasks)
+  val taskAttempts = Array.fill[List[TaskInfo]](numTasks)(Nil)
+  var tasksSuccessful = 0
+
+  var weight = 1
+  var minShare = 0
+  var priority = taskSet.priority
+  var stageId = taskSet.stageId
+  var name = "TaskSet_"+taskSet.stageId.toString
+  var parent: Pool = null
+
+  val runningTasksSet = new HashSet[Long]
+  override def runningTasks = runningTasksSet.size
+
+  // True once no more tasks should be launched for this task set manager. TaskSetManagers enter
+  // the zombie state once at least one attempt of each task has completed successfully, or if the
+  // task set is aborted (for example, because it was killed).  TaskSetManagers remain in the zombie
+  // state until all tasks have finished running; we keep TaskSetManagers that are in the zombie
+  // state in order to continue to track and account for the running tasks.
+  // TODO: We should kill any running task attempts when the task set manager becomes a zombie.
+  var isZombie = false
+
+  // Set of pending tasks for each executor. These collections are actually
+  // treated as stacks, in which new tasks are added to the end of the
+  // ArrayBuffer and removed from the end. This makes it faster to detect
+  // tasks that repeatedly fail because whenever a task failed, it is put
+  // back at the head of the stack. They are also only cleaned up lazily;
+  // when a task is launched, it remains in all the pending lists except
+  // the one that it was launched from, but gets removed from them later.
+  private val pendingTasksForExecutor = new HashMap[String, ArrayBuffer[Int]]
+
+  // Set of pending tasks for each host. Similar to pendingTasksForExecutor,
+  // but at host level.
+  private val pendingTasksForHost = new HashMap[String, ArrayBuffer[Int]]
+
+  // Set of pending tasks for each rack -- similar to the above.
+  private val pendingTasksForRack = new HashMap[String, ArrayBuffer[Int]]
+
+  // Set containing pending tasks with no locality preferences.
+  val pendingTasksWithNoPrefs = new ArrayBuffer[Int]
+
+  // Set containing all pending tasks (also used as a stack, as above).
+  val allPendingTasks = new ArrayBuffer[Int]
+
+  // Tasks that can be speculated. Since these will be a small fraction of total
+  // tasks, we'll just hold them in a HashSet.
+  val speculatableTasks = new HashSet[Int]
+
+  // Task index, start and finish time for each task attempt (indexed by task ID)
+  val taskInfos = new HashMap[Long, TaskInfo]
+
+  // How frequently to reprint duplicate exceptions in full, in milliseconds
+  val EXCEPTION_PRINT_INTERVAL =
+    conf.getLong("spark.logging.exceptionPrintInterval", 10000)
+
+  // Map of recent exceptions (identified by string representation and top stack frame) to
+  // duplicate count (how many times the same exception has appeared) and time the full exception
+  // was printed. This should ideally be an LRU map that can drop old exceptions automatically.
+  val recentExceptions = HashMap[String, (Int, Long)]()
+
+  // Figure out the current map output tracker epoch and set it on all tasks
+  val epoch = sched.mapOutputTracker.getEpoch
+  logDebug("Epoch for " + taskSet + ": " + epoch)
+  for (t <- tasks) {
+    t.epoch = epoch
+  }
+
+  // Add all our tasks to the pending lists. We do this in reverse order
+  // of task index so that tasks with low indices get launched first.
+  for (i <- (0 until numTasks).reverse) {
+    addPendingTask(i)
+  }
+
+  // Figure out which locality levels we have in our TaskSet, so we can do delay scheduling
+  val myLocalityLevels = computeValidLocalityLevels()
+  val localityWaits = myLocalityLevels.map(getLocalityWait) // Time to wait at each level
+
+  // Delay scheduling variables: we keep track of our current locality level and the time we
+  // last launched a task at that level, and move up a level when localityWaits[curLevel] expires.
+  // We then move down if we manage to launch a "more local" task.
+  var currentLocalityIndex = 0    // Index of our current locality level in validLocalityLevels
+  var lastLaunchTime = clock.getTime()  // Time we last launched a task at this level
+
+  override def schedulableQueue = null
+
+  override def schedulingMode = SchedulingMode.NONE
+
+  /**
+   * Add a task to all the pending-task lists that it should be on. If readding is set, we are
+   * re-adding the task so only include it in each list if it's not already there.
+   */
+  private def addPendingTask(index: Int, readding: Boolean = false) {
+    // Utility method that adds `index` to a list only if readding=false or it's not already there
+    def addTo(list: ArrayBuffer[Int]) {
+      if (!readding || !list.contains(index)) {
+        list += index
+      }
+    }
+
+    var hadAliveLocations = false
+    for (loc <- tasks(index).preferredLocations) {
+      for (execId <- loc.executorId) {
+        if (sched.isExecutorAlive(execId)) {
+          addTo(pendingTasksForExecutor.getOrElseUpdate(execId, new ArrayBuffer))
+          hadAliveLocations = true
+        }
+      }
+      if (sched.hasExecutorsAliveOnHost(loc.host)) {
+        addTo(pendingTasksForHost.getOrElseUpdate(loc.host, new ArrayBuffer))
+        for (rack <- sched.getRackForHost(loc.host)) {
+          addTo(pendingTasksForRack.getOrElseUpdate(rack, new ArrayBuffer))
+        }
+        hadAliveLocations = true
+      }
+    }
+
+    if (!hadAliveLocations) {
+      // Even though the task might've had preferred locations, all of those hosts or executors
+      // are dead; put it in the no-prefs list so we can schedule it elsewhere right away.
+      addTo(pendingTasksWithNoPrefs)
+    }
+
+    if (!readding) {
+      allPendingTasks += index  // No point scanning this whole list to find the old task there
+    }
+  }
+
+  /**
+   * Return the pending tasks list for a given executor ID, or an empty list if
+   * there is no map entry for that host
+   */
+  private def getPendingTasksForExecutor(executorId: String): ArrayBuffer[Int] = {
+    pendingTasksForExecutor.getOrElse(executorId, ArrayBuffer())
+  }
+
+  /**
+   * Return the pending tasks list for a given host, or an empty list if
+   * there is no map entry for that host
+   */
+  private def getPendingTasksForHost(host: String): ArrayBuffer[Int] = {
+    pendingTasksForHost.getOrElse(host, ArrayBuffer())
+  }
+
+  /**
+   * Return the pending rack-local task list for a given rack, or an empty list if
+   * there is no map entry for that rack
+   */
+  private def getPendingTasksForRack(rack: String): ArrayBuffer[Int] = {
+    pendingTasksForRack.getOrElse(rack, ArrayBuffer())
+  }
+
+  /**
+   * Dequeue a pending task from the given list and return its index.
+   * Return None if the list is empty.
+   * This method also cleans up any tasks in the list that have already
+   * been launched, since we want that to happen lazily.
+   */
+  private def findTaskFromList(list: ArrayBuffer[Int]): Option[Int] = {
+    while (!list.isEmpty) {
+      val index = list.last
+      list.trimEnd(1)
+      if (copiesRunning(index) == 0 && !successful(index)) {
+        return Some(index)
+      }
+    }
+    None
+  }
+
+  /** Check whether a task is currently running an attempt on a given host */
+  private def hasAttemptOnHost(taskIndex: Int, host: String): Boolean = {
+    taskAttempts(taskIndex).exists(_.host == host)
+  }
+
+  /**
+   * Return a speculative task for a given executor if any are available. The task should not have
+   * an attempt running on this host, in case the host is slow. In addition, the task should meet
+   * the given locality constraint.
+   */
+  private def findSpeculativeTask(execId: String, host: String, locality: TaskLocality.Value)
+    : Option[(Int, TaskLocality.Value)] =
+  {
+    speculatableTasks.retain(index => !successful(index)) // Remove finished tasks from set
 
+    if (!speculatableTasks.isEmpty) {
+      // Check for process-local or preference-less tasks; note that tasks can be process-local
+      // on multiple nodes when we replicate cached blocks, as in Spark Streaming
+      for (index <- speculatableTasks if !hasAttemptOnHost(index, host)) {
+        val prefs = tasks(index).preferredLocations
+        val executors = prefs.flatMap(_.executorId)
+        if (prefs.size == 0 || executors.contains(execId)) {
+          speculatableTasks -= index
+          return Some((index, TaskLocality.PROCESS_LOCAL))
+        }
+      }
+
+      // Check for node-local tasks
+      if (TaskLocality.isAllowed(locality, TaskLocality.NODE_LOCAL)) {
+        for (index <- speculatableTasks if !hasAttemptOnHost(index, host)) {
+          val locations = tasks(index).preferredLocations.map(_.host)
+          if (locations.contains(host)) {
+            speculatableTasks -= index
+            return Some((index, TaskLocality.NODE_LOCAL))
+          }
+        }
+      }
+
+      // Check for rack-local tasks
+      if (TaskLocality.isAllowed(locality, TaskLocality.RACK_LOCAL)) {
+        for (rack <- sched.getRackForHost(host)) {
+          for (index <- speculatableTasks if !hasAttemptOnHost(index, host)) {
+            val racks = tasks(index).preferredLocations.map(_.host).map(sched.getRackForHost)
+            if (racks.contains(rack)) {
+              speculatableTasks -= index
+              return Some((index, TaskLocality.RACK_LOCAL))
+            }
+          }
+        }
+      }
+
+      // Check for non-local tasks
+      if (TaskLocality.isAllowed(locality, TaskLocality.ANY)) {
+        for (index <- speculatableTasks if !hasAttemptOnHost(index, host)) {
+          speculatableTasks -= index
+          return Some((index, TaskLocality.ANY))
+        }
+      }
+    }
+
+    None
+  }
+
+  /**
+   * Dequeue a pending task for a given node and return its index and locality level.
+   * Only search for tasks matching the given locality constraint.
+   */
+  private def findTask(execId: String, host: String, locality: TaskLocality.Value)
+    : Option[(Int, TaskLocality.Value)] =
+  {
+    for (index <- findTaskFromList(getPendingTasksForExecutor(execId))) {
+      return Some((index, TaskLocality.PROCESS_LOCAL))
+    }
+
+    if (TaskLocality.isAllowed(locality, TaskLocality.NODE_LOCAL)) {
+      for (index <- findTaskFromList(getPendingTasksForHost(host))) {
+        return Some((index, TaskLocality.NODE_LOCAL))
+      }
+    }
+
+    if (TaskLocality.isAllowed(locality, TaskLocality.RACK_LOCAL)) {
+      for {
+        rack <- sched.getRackForHost(host)
+        index <- findTaskFromList(getPendingTasksForRack(rack))
+      } {
+        return Some((index, TaskLocality.RACK_LOCAL))
+      }
+    }
+
+    // Look for no-pref tasks after rack-local tasks since they can run anywhere.
+    for (index <- findTaskFromList(pendingTasksWithNoPrefs)) {
+      return Some((index, TaskLocality.PROCESS_LOCAL))
+    }
+
+    if (TaskLocality.isAllowed(locality, TaskLocality.ANY)) {
+      for (index <- findTaskFromList(allPendingTasks)) {
+        return Some((index, TaskLocality.ANY))
+      }
+    }
+
+    // Finally, if all else has failed, find a speculative task
+    findSpeculativeTask(execId, host, locality)
+  }
+
+  /**
+   * Respond to an offer of a single executor from the scheduler by finding a task
+   */
   def resourceOffer(
       execId: String,
       host: String,
       availableCpus: Int,
       maxLocality: TaskLocality.TaskLocality)
-    : Option[TaskDescription]
+    : Option[TaskDescription] =
+  {
+    if (!isZombie && availableCpus >= CPUS_PER_TASK) {
+      val curTime = clock.getTime()
+
+      var allowedLocality = getAllowedLocalityLevel(curTime)
+      if (allowedLocality > maxLocality) {
+        allowedLocality = maxLocality   // We're not allowed to search for farther-away tasks
+      }
+
+      findTask(execId, host, allowedLocality) match {
+        case Some((index, taskLocality)) => {
+          // Found a task; do some bookkeeping and return a task description
+          val task = tasks(index)
+          val taskId = sched.newTaskId()
+          // Figure out whether this should count as a preferred launch
+          logInfo("Starting task %s:%d as TID %s on executor %s: %s (%s)".format(
+            taskSet.id, index, taskId, execId, host, taskLocality))
+          // Do various bookkeeping
+          copiesRunning(index) += 1
+          val info = new TaskInfo(taskId, index, curTime, execId, host, taskLocality)
+          taskInfos(taskId) = info
+          taskAttempts(index) = info :: taskAttempts(index)
+          // Update our locality level for delay scheduling
+          currentLocalityIndex = getLocalityIndex(taskLocality)
+          lastLaunchTime = curTime
+          // Serialize and return the task
+          val startTime = clock.getTime()
+          // We rely on the DAGScheduler to catch non-serializable closures and RDDs, so in here
+          // we assume the task can be serialized without exceptions.
+          val serializedTask = Task.serializeWithDependencies(
+            task, sched.sc.addedFiles, sched.sc.addedJars, ser)
+          val timeTaken = clock.getTime() - startTime
+          addRunningTask(taskId)
+          logInfo("Serialized task %s:%d as %d bytes in %d ms".format(
+            taskSet.id, index, serializedTask.limit, timeTaken))
+          val taskName = "task %s:%d".format(taskSet.id, index)
+          sched.dagScheduler.taskStarted(task, info)
+          return Some(new TaskDescription(taskId, execId, taskName, index, serializedTask))
+        }
+        case _ =>
+      }
+    }
+    None
+  }
+
+  private def maybeFinishTaskSet() {
+    if (isZombie && runningTasks == 0) {
+      sched.taskSetFinished(this)
+    }
+  }
+
+  /**
+   * Get the level we can launch tasks according to delay scheduling, based on current wait time.
+   */
+  private def getAllowedLocalityLevel(curTime: Long): TaskLocality.TaskLocality = {
+    while (curTime - lastLaunchTime >= localityWaits(currentLocalityIndex) &&
+        currentLocalityIndex < myLocalityLevels.length - 1)
+    {
+      // Jump to the next locality level, and remove our waiting time for the current one since
+      // we don't want to count it again on the next one
+      lastLaunchTime += localityWaits(currentLocalityIndex)
+      currentLocalityIndex += 1
+    }
+    myLocalityLevels(currentLocalityIndex)
+  }
+
+  /**
+   * Find the index in myLocalityLevels for a given locality. This is also designed to work with
+   * localities that are not in myLocalityLevels (in case we somehow get those) by returning the
+   * next-biggest level we have. Uses the fact that the last value in myLocalityLevels is ANY.
+   */
+  def getLocalityIndex(locality: TaskLocality.TaskLocality): Int = {
+    var index = 0
+    while (locality > myLocalityLevels(index)) {
+      index += 1
+    }
+    index
+  }
+
+  def handleTaskGettingResult(tid: Long) = {
+    val info = taskInfos(tid)
+    info.markGettingResult()
+    sched.dagScheduler.taskGettingResult(tasks(info.index), info)
+  }
+
+  /**
+   * Marks the task as successful and notifies the DAGScheduler that a task has ended.
+   */
+  def handleSuccessfulTask(tid: Long, result: DirectTaskResult[_]) = {
+    val info = taskInfos(tid)
+    val index = info.index
+    info.markSuccessful()
+    removeRunningTask(tid)
+    sched.dagScheduler.taskEnded(
+      tasks(index), Success, result.value, result.accumUpdates, info, result.metrics)
+    if (!successful(index)) {
+      tasksSuccessful += 1
+      logInfo("Finished TID %s in %d ms on %s (progress: %d/%d)".format(
+        tid, info.duration, info.host, tasksSuccessful, numTasks))
+      // Mark successful and stop if all the tasks have succeeded.
+      successful(index) = true
+      if (tasksSuccessful == numTasks) {
+        isZombie = true
+      }
+    } else {
+      logInfo("Ignorning task-finished event for TID " + tid + " because task " +
+        index + " has already completed successfully")
+    }
+    maybeFinishTaskSet()
+  }
+
+  /**
+   * Marks the task as failed, re-adds it to the list of pending tasks, and notifies the
+   * DAG Scheduler.
+   */
+  def handleFailedTask(tid: Long, state: TaskState, reason: TaskEndReason) {
+    val info = taskInfos(tid)
+    if (info.failed) {
+      return
+    }
+    removeRunningTask(tid)
+    info.markFailed()
+    val index = info.index
+    copiesRunning(index) -= 1
+    if (!isZombie) {
+      logWarning("Lost TID %s (task %s:%d)".format(tid, taskSet.id, index))
+    }
+    var taskMetrics : TaskMetrics = null
+    var failureReason = "unknown"
+    reason match {
+      case fetchFailed: FetchFailed =>
+        logWarning("Loss was due to fetch failure from " + fetchFailed.bmAddress)
+        if (!successful(index)) {
+          successful(index) = true
+          tasksSuccessful += 1
+        }
+        isZombie = true
+
+      case TaskKilled =>
+        logWarning("Task %d was killed.".format(tid))
+
+      case ef: ExceptionFailure =>
+        taskMetrics = ef.metrics.getOrElse(null)
+        if (ef.className == classOf[NotSerializableException].getName()) {
+          // If the task result wasn't serializable, there's no point in trying to re-execute it.
+          logError("Task %s:%s had a not serializable result: %s; not retrying".format(
+            taskSet.id, index, ef.description))
+          abort("Task %s:%s had a not serializable result: %s".format(
+            taskSet.id, index, ef.description))
+          return
+        }
+        val key = ef.description
+        failureReason = "Exception failure: %s".format(ef.description)
+        val now = clock.getTime()
+        val (printFull, dupCount) = {
+          if (recentExceptions.contains(key)) {
+            val (dupCount, printTime) = recentExceptions(key)
+            if (now - printTime > EXCEPTION_PRINT_INTERVAL) {
+              recentExceptions(key) = (0, now)
+              (true, 0)
+            } else {
+              recentExceptions(key) = (dupCount + 1, printTime)
+              (false, dupCount + 1)
+            }
+          } else {
+            recentExceptions(key) = (0, now)
+            (true, 0)
+          }
+        }
+        if (printFull) {
+          val locs = ef.stackTrace.map(loc => "\tat %s".format(loc.toString))
+          logWarning("Loss was due to %s\n%s\n%s".format(
+            ef.className, ef.description, locs.mkString("\n")))
+        } else {
+          logInfo("Loss was due to %s [duplicate %d]".format(ef.description, dupCount))
+        }
+
+      case TaskResultLost =>
+        failureReason = "Lost result for TID %s on host %s".format(tid, info.host)
+        logWarning(failureReason)
+
+      case _ => {}
+    }
+    sched.dagScheduler.taskEnded(tasks(index), reason, null, null, info, taskMetrics)
+    addPendingTask(index)
+    if (!isZombie && state != TaskState.KILLED) {
+      numFailures(index) += 1
+      if (numFailures(index) >= maxTaskFailures) {
+        logError("Task %s:%d failed %d times; aborting job".format(
+          taskSet.id, index, maxTaskFailures))
+        abort("Task %s:%d failed %d times (most recent failure: %s)".format(
+          taskSet.id, index, maxTaskFailures, failureReason))
+        return
+      }
+    }
+    maybeFinishTaskSet()
+  }
+
+  def abort(message: String) {
+    // TODO: Kill running tasks if we were not terminated due to a Mesos error
+    sched.dagScheduler.taskSetFailed(taskSet, message)
+    isZombie = true
+    maybeFinishTaskSet()
+  }
+
+  /** If the given task ID is not in the set of running tasks, adds it.
+   *
+   * Used to keep track of the number of running tasks, for enforcing scheduling policies.
+   */
+  def addRunningTask(tid: Long) {
+    if (runningTasksSet.add(tid) && parent != null) {
+      parent.increaseRunningTasks(1)
+    }
+  }
+
+  /** If the given task ID is in the set of running tasks, removes it. */
+  def removeRunningTask(tid: Long) {
+    if (runningTasksSet.remove(tid) && parent != null) {
+      parent.decreaseRunningTasks(1)
+    }
+  }
+
+  override def getSchedulableByName(name: String): Schedulable = {
+    null
+  }
+
+  override def addSchedulable(schedulable: Schedulable) {}
+
+  override def removeSchedulable(schedulable: Schedulable) {}
+
+  override def getSortedTaskSetQueue(): ArrayBuffer[TaskSetManager] = {
+    var sortedTaskSetQueue = new ArrayBuffer[TaskSetManager]()
+    sortedTaskSetQueue += this
+    sortedTaskSetQueue
+  }
+
+  /** Called by TaskScheduler when an executor is lost so we can re-enqueue our tasks */
+  override def executorLost(execId: String, host: String) {
+    logInfo("Re-queueing tasks for " + execId + " from TaskSet " + taskSet.id)
+
+    // Re-enqueue pending tasks for this host based on the status of the cluster -- for example, a
+    // task that used to have locations on only this host might now go to the no-prefs list. Note
+    // that it's okay if we add a task to the same queue twice (if it had multiple preferred
+    // locations), because findTaskFromList will skip already-running tasks.
+    for (index <- getPendingTasksForExecutor(execId)) {
+      addPendingTask(index, readding=true)
+    }
+    for (index <- getPendingTasksForHost(host)) {
+      addPendingTask(index, readding=true)
+    }
+
+    // Re-enqueue any tasks that ran on the failed executor if this is a shuffle map stage
+    if (tasks(0).isInstanceOf[ShuffleMapTask]) {
+      for ((tid, info) <- taskInfos if info.executorId == execId) {
+        val index = taskInfos(tid).index
+        if (successful(index)) {
+          successful(index) = false
+          copiesRunning(index) -= 1
+          tasksSuccessful -= 1
+          addPendingTask(index)
+          // Tell the DAGScheduler that this task was resubmitted so that it doesn't think our
+          // stage finishes when a total of tasks.size tasks finish.
+          sched.dagScheduler.taskEnded(tasks(index), Resubmitted, null, null, info, null)
+        }
+      }
+    }
+    // Also re-enqueue any tasks that were running on the node
+    for ((tid, info) <- taskInfos if info.running && info.executorId == execId) {
+      handleFailedTask(tid, TaskState.FAILED, ExecutorLostFailure)
+    }
+  }
+
+  /**
+   * Check for tasks to be speculated and return true if there are any. This is called periodically
+   * by the TaskScheduler.
+   *
+   * TODO: To make this scale to large jobs, we need to maintain a list of running tasks, so that
+   * we don't scan the whole task set. It might also help to make this sorted by launch time.
+   */
+  override def checkSpeculatableTasks(): Boolean = {
+    // Can't speculate if we only have one task, and no need to speculate if the task set is a
+    // zombie.
+    if (isZombie || numTasks == 1) {
+      return false
+    }
+    var foundTasks = false
+    val minFinishedForSpeculation = (SPECULATION_QUANTILE * numTasks).floor.toInt
+    logDebug("Checking for speculative tasks: minFinished = " + minFinishedForSpeculation)
+    if (tasksSuccessful >= minFinishedForSpeculation && tasksSuccessful > 0) {
+      val time = clock.getTime()
+      val durations = taskInfos.values.filter(_.successful).map(_.duration).toArray
+      Arrays.sort(durations)
+      val medianDuration = durations(min((0.5 * tasksSuccessful).round.toInt, durations.size - 1))
+      val threshold = max(SPECULATION_MULTIPLIER * medianDuration, 100)
+      // TODO: Threshold should also look at standard deviation of task durations and have a lower
+      // bound based on that.
+      logDebug("Task length threshold for speculation: " + threshold)
+      for ((tid, info) <- taskInfos) {
+        val index = info.index
+        if (!successful(index) && copiesRunning(index) == 1 && info.timeRunning(time) > threshold &&
+          !speculatableTasks.contains(index)) {
+          logInfo(
+            "Marking task %s:%d (on %s) as speculatable because it ran more than %.0f ms".format(
+              taskSet.id, index, info.host, threshold))
+          speculatableTasks += index
+          foundTasks = true
+        }
+      }
+    }
+    foundTasks
+  }
+
+  private def getLocalityWait(level: TaskLocality.TaskLocality): Long = {
+    val defaultWait = conf.get("spark.locality.wait", "3000")
+    level match {
+      case TaskLocality.PROCESS_LOCAL =>
+        conf.get("spark.locality.wait.process", defaultWait).toLong
+      case TaskLocality.NODE_LOCAL =>
+        conf.get("spark.locality.wait.node", defaultWait).toLong
+      case TaskLocality.RACK_LOCAL =>
+        conf.get("spark.locality.wait.rack", defaultWait).toLong
+      case TaskLocality.ANY =>
+        0L
+    }
+  }
 
-  def error(message: String)
+  /**
+   * Compute the locality levels used in this TaskSet. Assumes that all tasks have already been
+   * added to queues using addPendingTask.
+   */
+  private def computeValidLocalityLevels(): Array[TaskLocality.TaskLocality] = {
+    import TaskLocality.{PROCESS_LOCAL, NODE_LOCAL, RACK_LOCAL, ANY}
+    val levels = new ArrayBuffer[TaskLocality.TaskLocality]
+    if (!pendingTasksForExecutor.isEmpty && getLocalityWait(PROCESS_LOCAL) != 0) {
+      levels += PROCESS_LOCAL
+    }
+    if (!pendingTasksForHost.isEmpty && getLocalityWait(NODE_LOCAL) != 0) {
+      levels += NODE_LOCAL
+    }
+    if (!pendingTasksForRack.isEmpty && getLocalityWait(RACK_LOCAL) != 0) {
+      levels += RACK_LOCAL
+    }
+    levels += ANY
+    logDebug("Valid locality levels for " + taskSet + ": " + levels.mkString(", "))
+    levels.toArray
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/WorkerOffer.scala b/core/src/main/scala/org/apache/spark/scheduler/WorkerOffer.scala
similarity index 95%
rename from core/src/main/scala/org/apache/spark/scheduler/cluster/WorkerOffer.scala
rename to core/src/main/scala/org/apache/spark/scheduler/WorkerOffer.scala
index 938f62883a104..ba6bab3f91a65 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/WorkerOffer.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/WorkerOffer.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.scheduler.cluster
+package org.apache.spark.scheduler
 
 /**
  * Represents free resources available on an executor.
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManager.scala
deleted file mode 100644
index 5afd3f3e0629d..0000000000000
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManager.scala
+++ /dev/null
@@ -1,712 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.scheduler.cluster
-
-import java.io.NotSerializableException
-import java.util.Arrays
-
-import scala.collection.mutable.ArrayBuffer
-import scala.collection.mutable.HashMap
-import scala.collection.mutable.HashSet
-import scala.math.max
-import scala.math.min
-
-import org.apache.spark.{ExceptionFailure, FetchFailed, Logging, Resubmitted, SparkEnv,
-  Success, TaskEndReason, TaskKilled, TaskResultLost, TaskState}
-import org.apache.spark.TaskState.TaskState
-import org.apache.spark.scheduler._
-import org.apache.spark.util.{SystemClock, Clock}
-
-
-/**
- * Schedules the tasks within a single TaskSet in the ClusterScheduler. This class keeps track of
- * the status of each task, retries tasks if they fail (up to a limited number of times), and
- * handles locality-aware scheduling for this TaskSet via delay scheduling. The main interfaces
- * to it are resourceOffer, which asks the TaskSet whether it wants to run a task on one node,
- * and statusUpdate, which tells it that one of its tasks changed state (e.g. finished).
- *
- * THREADING: This class is designed to only be called from code with a lock on the
- * ClusterScheduler (e.g. its event handlers). It should not be called from other threads.
- */
-private[spark] class ClusterTaskSetManager(
-    sched: ClusterScheduler,
-    val taskSet: TaskSet,
-    clock: Clock = SystemClock)
-  extends TaskSetManager
-  with Logging
-{
-  // CPUs to request per task
-  val CPUS_PER_TASK = System.getProperty("spark.task.cpus", "1").toInt
-
-  // Maximum times a task is allowed to fail before failing the job
-  val MAX_TASK_FAILURES = System.getProperty("spark.task.maxFailures", "4").toInt
-
-  // Quantile of tasks at which to start speculation
-  val SPECULATION_QUANTILE = System.getProperty("spark.speculation.quantile", "0.75").toDouble
-  val SPECULATION_MULTIPLIER = System.getProperty("spark.speculation.multiplier", "1.5").toDouble
-
-  // Serializer for closures and tasks.
-  val env = SparkEnv.get
-  val ser = env.closureSerializer.newInstance()
-
-  val tasks = taskSet.tasks
-  val numTasks = tasks.length
-  val copiesRunning = new Array[Int](numTasks)
-  val successful = new Array[Boolean](numTasks)
-  val numFailures = new Array[Int](numTasks)
-  val taskAttempts = Array.fill[List[TaskInfo]](numTasks)(Nil)
-  var tasksSuccessful = 0
-
-  var weight = 1
-  var minShare = 0
-  var priority = taskSet.priority
-  var stageId = taskSet.stageId
-  var name = "TaskSet_"+taskSet.stageId.toString
-  var parent: Pool = null
-
-  var runningTasks = 0
-  private val runningTasksSet = new HashSet[Long]
-
-  // Set of pending tasks for each executor. These collections are actually
-  // treated as stacks, in which new tasks are added to the end of the
-  // ArrayBuffer and removed from the end. This makes it faster to detect
-  // tasks that repeatedly fail because whenever a task failed, it is put
-  // back at the head of the stack. They are also only cleaned up lazily;
-  // when a task is launched, it remains in all the pending lists except
-  // the one that it was launched from, but gets removed from them later.
-  private val pendingTasksForExecutor = new HashMap[String, ArrayBuffer[Int]]
-
-  // Set of pending tasks for each host. Similar to pendingTasksForExecutor,
-  // but at host level.
-  private val pendingTasksForHost = new HashMap[String, ArrayBuffer[Int]]
-
-  // Set of pending tasks for each rack -- similar to the above.
-  private val pendingTasksForRack = new HashMap[String, ArrayBuffer[Int]]
-
-  // Set containing pending tasks with no locality preferences.
-  val pendingTasksWithNoPrefs = new ArrayBuffer[Int]
-
-  // Set containing all pending tasks (also used as a stack, as above).
-  val allPendingTasks = new ArrayBuffer[Int]
-
-  // Tasks that can be speculated. Since these will be a small fraction of total
-  // tasks, we'll just hold them in a HashSet.
-  val speculatableTasks = new HashSet[Int]
-
-  // Task index, start and finish time for each task attempt (indexed by task ID)
-  val taskInfos = new HashMap[Long, TaskInfo]
-
-  // Did the TaskSet fail?
-  var failed = false
-  var causeOfFailure = ""
-
-  // How frequently to reprint duplicate exceptions in full, in milliseconds
-  val EXCEPTION_PRINT_INTERVAL =
-    System.getProperty("spark.logging.exceptionPrintInterval", "10000").toLong
-
-  // Map of recent exceptions (identified by string representation and top stack frame) to
-  // duplicate count (how many times the same exception has appeared) and time the full exception
-  // was printed. This should ideally be an LRU map that can drop old exceptions automatically.
-  val recentExceptions = HashMap[String, (Int, Long)]()
-
-  // Figure out the current map output tracker epoch and set it on all tasks
-  val epoch = sched.mapOutputTracker.getEpoch
-  logDebug("Epoch for " + taskSet + ": " + epoch)
-  for (t <- tasks) {
-    t.epoch = epoch
-  }
-
-  // Add all our tasks to the pending lists. We do this in reverse order
-  // of task index so that tasks with low indices get launched first.
-  for (i <- (0 until numTasks).reverse) {
-    addPendingTask(i)
-  }
-
-  // Figure out which locality levels we have in our TaskSet, so we can do delay scheduling
-  val myLocalityLevels = computeValidLocalityLevels()
-  val localityWaits = myLocalityLevels.map(getLocalityWait) // Time to wait at each level
-
-  // Delay scheduling variables: we keep track of our current locality level and the time we
-  // last launched a task at that level, and move up a level when localityWaits[curLevel] expires.
-  // We then move down if we manage to launch a "more local" task.
-  var currentLocalityIndex = 0    // Index of our current locality level in validLocalityLevels
-  var lastLaunchTime = clock.getTime()  // Time we last launched a task at this level
-
-  /**
-   * Add a task to all the pending-task lists that it should be on. If readding is set, we are
-   * re-adding the task so only include it in each list if it's not already there.
-   */
-  private def addPendingTask(index: Int, readding: Boolean = false) {
-    // Utility method that adds `index` to a list only if readding=false or it's not already there
-    def addTo(list: ArrayBuffer[Int]) {
-      if (!readding || !list.contains(index)) {
-        list += index
-      }
-    }
-
-    var hadAliveLocations = false
-    for (loc <- tasks(index).preferredLocations) {
-      for (execId <- loc.executorId) {
-        if (sched.isExecutorAlive(execId)) {
-          addTo(pendingTasksForExecutor.getOrElseUpdate(execId, new ArrayBuffer))
-          hadAliveLocations = true
-        }
-      }
-      if (sched.hasExecutorsAliveOnHost(loc.host)) {
-        addTo(pendingTasksForHost.getOrElseUpdate(loc.host, new ArrayBuffer))
-        for (rack <- sched.getRackForHost(loc.host)) {
-          addTo(pendingTasksForRack.getOrElseUpdate(rack, new ArrayBuffer))
-        }
-        hadAliveLocations = true
-      }
-    }
-
-    if (!hadAliveLocations) {
-      // Even though the task might've had preferred locations, all of those hosts or executors
-      // are dead; put it in the no-prefs list so we can schedule it elsewhere right away.
-      addTo(pendingTasksWithNoPrefs)
-    }
-
-    if (!readding) {
-      allPendingTasks += index  // No point scanning this whole list to find the old task there
-    }
-  }
-
-  /**
-   * Return the pending tasks list for a given executor ID, or an empty list if
-   * there is no map entry for that host
-   */
-  private def getPendingTasksForExecutor(executorId: String): ArrayBuffer[Int] = {
-    pendingTasksForExecutor.getOrElse(executorId, ArrayBuffer())
-  }
-
-  /**
-   * Return the pending tasks list for a given host, or an empty list if
-   * there is no map entry for that host
-   */
-  private def getPendingTasksForHost(host: String): ArrayBuffer[Int] = {
-    pendingTasksForHost.getOrElse(host, ArrayBuffer())
-  }
-
-  /**
-   * Return the pending rack-local task list for a given rack, or an empty list if
-   * there is no map entry for that rack
-   */
-  private def getPendingTasksForRack(rack: String): ArrayBuffer[Int] = {
-    pendingTasksForRack.getOrElse(rack, ArrayBuffer())
-  }
-
-  /**
-   * Dequeue a pending task from the given list and return its index.
-   * Return None if the list is empty.
-   * This method also cleans up any tasks in the list that have already
-   * been launched, since we want that to happen lazily.
-   */
-  private def findTaskFromList(list: ArrayBuffer[Int]): Option[Int] = {
-    while (!list.isEmpty) {
-      val index = list.last
-      list.trimEnd(1)
-      if (copiesRunning(index) == 0 && !successful(index)) {
-        return Some(index)
-      }
-    }
-    return None
-  }
-
-  /** Check whether a task is currently running an attempt on a given host */
-  private def hasAttemptOnHost(taskIndex: Int, host: String): Boolean = {
-    !taskAttempts(taskIndex).exists(_.host == host)
-  }
-
-  /**
-   * Return a speculative task for a given executor if any are available. The task should not have
-   * an attempt running on this host, in case the host is slow. In addition, the task should meet
-   * the given locality constraint.
-   */
-  private def findSpeculativeTask(execId: String, host: String, locality: TaskLocality.Value)
-    : Option[(Int, TaskLocality.Value)] =
-  {
-    speculatableTasks.retain(index => !successful(index)) // Remove finished tasks from set
-
-    if (!speculatableTasks.isEmpty) {
-      // Check for process-local or preference-less tasks; note that tasks can be process-local
-      // on multiple nodes when we replicate cached blocks, as in Spark Streaming
-      for (index <- speculatableTasks if !hasAttemptOnHost(index, host)) {
-        val prefs = tasks(index).preferredLocations
-        val executors = prefs.flatMap(_.executorId)
-        if (prefs.size == 0 || executors.contains(execId)) {
-          speculatableTasks -= index
-          return Some((index, TaskLocality.PROCESS_LOCAL))
-        }
-      }
-
-      // Check for node-local tasks
-      if (TaskLocality.isAllowed(locality, TaskLocality.NODE_LOCAL)) {
-        for (index <- speculatableTasks if !hasAttemptOnHost(index, host)) {
-          val locations = tasks(index).preferredLocations.map(_.host)
-          if (locations.contains(host)) {
-            speculatableTasks -= index
-            return Some((index, TaskLocality.NODE_LOCAL))
-          }
-        }
-      }
-
-      // Check for rack-local tasks
-      if (TaskLocality.isAllowed(locality, TaskLocality.RACK_LOCAL)) {
-        for (rack <- sched.getRackForHost(host)) {
-          for (index <- speculatableTasks if !hasAttemptOnHost(index, host)) {
-            val racks = tasks(index).preferredLocations.map(_.host).map(sched.getRackForHost)
-            if (racks.contains(rack)) {
-              speculatableTasks -= index
-              return Some((index, TaskLocality.RACK_LOCAL))
-            }
-          }
-        }
-      }
-
-      // Check for non-local tasks
-      if (TaskLocality.isAllowed(locality, TaskLocality.ANY)) {
-        for (index <- speculatableTasks if !hasAttemptOnHost(index, host)) {
-          speculatableTasks -= index
-          return Some((index, TaskLocality.ANY))
-        }
-      }
-    }
-
-    return None
-  }
-
-  /**
-   * Dequeue a pending task for a given node and return its index and locality level.
-   * Only search for tasks matching the given locality constraint.
-   */
-  private def findTask(execId: String, host: String, locality: TaskLocality.Value)
-    : Option[(Int, TaskLocality.Value)] =
-  {
-    for (index <- findTaskFromList(getPendingTasksForExecutor(execId))) {
-      return Some((index, TaskLocality.PROCESS_LOCAL))
-    }
-
-    if (TaskLocality.isAllowed(locality, TaskLocality.NODE_LOCAL)) {
-      for (index <- findTaskFromList(getPendingTasksForHost(host))) {
-        return Some((index, TaskLocality.NODE_LOCAL))
-      }
-    }
-
-    if (TaskLocality.isAllowed(locality, TaskLocality.RACK_LOCAL)) {
-      for {
-        rack <- sched.getRackForHost(host)
-        index <- findTaskFromList(getPendingTasksForRack(rack))
-      } {
-        return Some((index, TaskLocality.RACK_LOCAL))
-      }
-    }
-
-    // Look for no-pref tasks after rack-local tasks since they can run anywhere.
-    for (index <- findTaskFromList(pendingTasksWithNoPrefs)) {
-      return Some((index, TaskLocality.PROCESS_LOCAL))
-    }
-
-    if (TaskLocality.isAllowed(locality, TaskLocality.ANY)) {
-      for (index <- findTaskFromList(allPendingTasks)) {
-        return Some((index, TaskLocality.ANY))
-      }
-    }
-
-    // Finally, if all else has failed, find a speculative task
-    return findSpeculativeTask(execId, host, locality)
-  }
-
-  /**
-   * Respond to an offer of a single executor from the scheduler by finding a task
-   */
-  override def resourceOffer(
-      execId: String,
-      host: String,
-      availableCpus: Int,
-      maxLocality: TaskLocality.TaskLocality)
-    : Option[TaskDescription] =
-  {
-    if (tasksSuccessful < numTasks && availableCpus >= CPUS_PER_TASK) {
-      val curTime = clock.getTime()
-
-      var allowedLocality = getAllowedLocalityLevel(curTime)
-      if (allowedLocality > maxLocality) {
-        allowedLocality = maxLocality   // We're not allowed to search for farther-away tasks
-      }
-
-      findTask(execId, host, allowedLocality) match {
-        case Some((index, taskLocality)) => {
-          // Found a task; do some bookkeeping and return a task description
-          val task = tasks(index)
-          val taskId = sched.newTaskId()
-          // Figure out whether this should count as a preferred launch
-          logInfo("Starting task %s:%d as TID %s on executor %s: %s (%s)".format(
-            taskSet.id, index, taskId, execId, host, taskLocality))
-          // Do various bookkeeping
-          copiesRunning(index) += 1
-          val info = new TaskInfo(taskId, index, curTime, execId, host, taskLocality)
-          taskInfos(taskId) = info
-          taskAttempts(index) = info :: taskAttempts(index)
-          // Update our locality level for delay scheduling
-          currentLocalityIndex = getLocalityIndex(taskLocality)
-          lastLaunchTime = curTime
-          // Serialize and return the task
-          val startTime = clock.getTime()
-          // We rely on the DAGScheduler to catch non-serializable closures and RDDs, so in here
-          // we assume the task can be serialized without exceptions.
-          val serializedTask = Task.serializeWithDependencies(
-            task, sched.sc.addedFiles, sched.sc.addedJars, ser)
-          val timeTaken = clock.getTime() - startTime
-          addRunningTask(taskId)
-          logInfo("Serialized task %s:%d as %d bytes in %d ms".format(
-            taskSet.id, index, serializedTask.limit, timeTaken))
-          val taskName = "task %s:%d".format(taskSet.id, index)
-          if (taskAttempts(index).size == 1)
-            taskStarted(task,info)
-          return Some(new TaskDescription(taskId, execId, taskName, index, serializedTask))
-        }
-        case _ =>
-      }
-    }
-    return None
-  }
-
-  /**
-   * Get the level we can launch tasks according to delay scheduling, based on current wait time.
-   */
-  private def getAllowedLocalityLevel(curTime: Long): TaskLocality.TaskLocality = {
-    while (curTime - lastLaunchTime >= localityWaits(currentLocalityIndex) &&
-        currentLocalityIndex < myLocalityLevels.length - 1)
-    {
-      // Jump to the next locality level, and remove our waiting time for the current one since
-      // we don't want to count it again on the next one
-      lastLaunchTime += localityWaits(currentLocalityIndex)
-      currentLocalityIndex += 1
-    }
-    myLocalityLevels(currentLocalityIndex)
-  }
-
-  /**
-   * Find the index in myLocalityLevels for a given locality. This is also designed to work with
-   * localities that are not in myLocalityLevels (in case we somehow get those) by returning the
-   * next-biggest level we have. Uses the fact that the last value in myLocalityLevels is ANY.
-   */
-  def getLocalityIndex(locality: TaskLocality.TaskLocality): Int = {
-    var index = 0
-    while (locality > myLocalityLevels(index)) {
-      index += 1
-    }
-    index
-  }
-
-  private def taskStarted(task: Task[_], info: TaskInfo) {
-    sched.dagScheduler.taskStarted(task, info)
-  }
-
-  def handleTaskGettingResult(tid: Long) = {
-    val info = taskInfos(tid)
-    info.markGettingResult()
-    sched.dagScheduler.taskGettingResult(tasks(info.index), info)
-  }
-
-  /**
-   * Marks the task as successful and notifies the DAGScheduler that a task has ended.
-   */
-  def handleSuccessfulTask(tid: Long, result: DirectTaskResult[_]) = {
-    val info = taskInfos(tid)
-    val index = info.index
-    info.markSuccessful()
-    removeRunningTask(tid)
-    if (!successful(index)) {
-      logInfo("Finished TID %s in %d ms on %s (progress: %d/%d)".format(
-        tid, info.duration, info.host, tasksSuccessful, numTasks))
-      sched.dagScheduler.taskEnded(
-        tasks(index), Success, result.value, result.accumUpdates, info, result.metrics)
-
-      // Mark successful and stop if all the tasks have succeeded.
-      tasksSuccessful += 1
-      successful(index) = true
-      if (tasksSuccessful == numTasks) {
-        sched.taskSetFinished(this)
-      }
-    } else {
-      logInfo("Ignorning task-finished event for TID " + tid + " because task " +
-        index + " has already completed successfully")
-    }
-  }
-
-  /**
-   * Marks the task as failed, re-adds it to the list of pending tasks, and notifies the
-   * DAG Scheduler.
-   */
-  def handleFailedTask(tid: Long, state: TaskState, reason: Option[TaskEndReason]) {
-    val info = taskInfos(tid)
-    if (info.failed) {
-      return
-    }
-    removeRunningTask(tid)
-    val index = info.index
-    info.markFailed()
-    if (!successful(index)) {
-      logWarning("Lost TID %s (task %s:%d)".format(tid, taskSet.id, index))
-      copiesRunning(index) -= 1
-      // Check if the problem is a map output fetch failure. In that case, this
-      // task will never succeed on any node, so tell the scheduler about it.
-      reason.foreach {
-        case fetchFailed: FetchFailed =>
-          logWarning("Loss was due to fetch failure from " + fetchFailed.bmAddress)
-          sched.dagScheduler.taskEnded(tasks(index), fetchFailed, null, null, info, null)
-          successful(index) = true
-          tasksSuccessful += 1
-          sched.taskSetFinished(this)
-          removeAllRunningTasks()
-          return
-
-        case TaskKilled =>
-          logWarning("Task %d was killed.".format(tid))
-          sched.dagScheduler.taskEnded(tasks(index), reason.get, null, null, info, null)
-          return
-
-        case ef: ExceptionFailure =>
-          sched.dagScheduler.taskEnded(tasks(index), ef, null, null, info, ef.metrics.getOrElse(null))
-          if (ef.className == classOf[NotSerializableException].getName()) {
-            // If the task result wasn't serializable, there's no point in trying to re-execute it.
-            logError("Task %s:%s had a not serializable result: %s; not retrying".format(
-              taskSet.id, index, ef.description))
-            abort("Task %s:%s had a not serializable result: %s".format(
-              taskSet.id, index, ef.description))
-            return
-          }
-          val key = ef.description
-          val now = clock.getTime()
-          val (printFull, dupCount) = {
-            if (recentExceptions.contains(key)) {
-              val (dupCount, printTime) = recentExceptions(key)
-              if (now - printTime > EXCEPTION_PRINT_INTERVAL) {
-                recentExceptions(key) = (0, now)
-                (true, 0)
-              } else {
-                recentExceptions(key) = (dupCount + 1, printTime)
-                (false, dupCount + 1)
-              }
-            } else {
-              recentExceptions(key) = (0, now)
-              (true, 0)
-            }
-          }
-          if (printFull) {
-            val locs = ef.stackTrace.map(loc => "\tat %s".format(loc.toString))
-            logWarning("Loss was due to %s\n%s\n%s".format(
-              ef.className, ef.description, locs.mkString("\n")))
-          } else {
-            logInfo("Loss was due to %s [duplicate %d]".format(ef.description, dupCount))
-          }
-
-        case TaskResultLost =>
-          logWarning("Lost result for TID %s on host %s".format(tid, info.host))
-          sched.dagScheduler.taskEnded(tasks(index), TaskResultLost, null, null, info, null)
-
-        case _ => {}
-      }
-      // On non-fetch failures, re-enqueue the task as pending for a max number of retries
-      addPendingTask(index)
-      if (state != TaskState.KILLED) {
-        numFailures(index) += 1
-        if (numFailures(index) > MAX_TASK_FAILURES) {
-          logError("Task %s:%d failed more than %d times; aborting job".format(
-            taskSet.id, index, MAX_TASK_FAILURES))
-          abort("Task %s:%d failed more than %d times".format(taskSet.id, index, MAX_TASK_FAILURES))
-        }
-      }
-    } else {
-      logInfo("Ignoring task-lost event for TID " + tid +
-        " because task " + index + " is already finished")
-    }
-  }
-
-  override def error(message: String) {
-    // Save the error message
-    abort("Error: " + message)
-  }
-
-  def abort(message: String) {
-    failed = true
-    causeOfFailure = message
-    // TODO: Kill running tasks if we were not terminated due to a Mesos error
-    sched.dagScheduler.taskSetFailed(taskSet, message)
-    removeAllRunningTasks()
-    sched.taskSetFinished(this)
-  }
-
-  /** If the given task ID is not in the set of running tasks, adds it.
-   *
-   * Used to keep track of the number of running tasks, for enforcing scheduling policies.
-   */
-  def addRunningTask(tid: Long) {
-    if (runningTasksSet.add(tid) && parent != null) {
-      parent.increaseRunningTasks(1)
-    }
-    runningTasks = runningTasksSet.size
-  }
-
-  /** If the given task ID is in the set of running tasks, removes it. */
-  def removeRunningTask(tid: Long) {
-    if (runningTasksSet.remove(tid) && parent != null) {
-      parent.decreaseRunningTasks(1)
-    }
-    runningTasks = runningTasksSet.size
-  }
-
-  private[cluster] def removeAllRunningTasks() {
-    val numRunningTasks = runningTasksSet.size
-    runningTasksSet.clear()
-    if (parent != null) {
-      parent.decreaseRunningTasks(numRunningTasks)
-    }
-    runningTasks = 0
-  }
-
-  override def getSchedulableByName(name: String): Schedulable = {
-    return null
-  }
-
-  override def addSchedulable(schedulable: Schedulable) {}
-
-  override def removeSchedulable(schedulable: Schedulable) {}
-
-  override def getSortedTaskSetQueue(): ArrayBuffer[TaskSetManager] = {
-    var sortedTaskSetQueue = ArrayBuffer[TaskSetManager](this)
-    sortedTaskSetQueue += this
-    return sortedTaskSetQueue
-  }
-
-  /** Called by cluster scheduler when an executor is lost so we can re-enqueue our tasks */
-  override def executorLost(execId: String, host: String) {
-    logInfo("Re-queueing tasks for " + execId + " from TaskSet " + taskSet.id)
-
-    // Re-enqueue pending tasks for this host based on the status of the cluster -- for example, a
-    // task that used to have locations on only this host might now go to the no-prefs list. Note
-    // that it's okay if we add a task to the same queue twice (if it had multiple preferred
-    // locations), because findTaskFromList will skip already-running tasks.
-    for (index <- getPendingTasksForExecutor(execId)) {
-      addPendingTask(index, readding=true)
-    }
-    for (index <- getPendingTasksForHost(host)) {
-      addPendingTask(index, readding=true)
-    }
-
-    // Re-enqueue any tasks that ran on the failed executor if this is a shuffle map stage
-    if (tasks(0).isInstanceOf[ShuffleMapTask]) {
-      for ((tid, info) <- taskInfos if info.executorId == execId) {
-        val index = taskInfos(tid).index
-        if (successful(index)) {
-          successful(index) = false
-          copiesRunning(index) -= 1
-          tasksSuccessful -= 1
-          addPendingTask(index)
-          // Tell the DAGScheduler that this task was resubmitted so that it doesn't think our
-          // stage finishes when a total of tasks.size tasks finish.
-          sched.dagScheduler.taskEnded(tasks(index), Resubmitted, null, null, info, null)
-        }
-      }
-    }
-    // Also re-enqueue any tasks that were running on the node
-    for ((tid, info) <- taskInfos if info.running && info.executorId == execId) {
-      handleFailedTask(tid, TaskState.KILLED, None)
-    }
-  }
-
-  /**
-   * Check for tasks to be speculated and return true if there are any. This is called periodically
-   * by the ClusterScheduler.
-   *
-   * TODO: To make this scale to large jobs, we need to maintain a list of running tasks, so that
-   * we don't scan the whole task set. It might also help to make this sorted by launch time.
-   */
-  override def checkSpeculatableTasks(): Boolean = {
-    // Can't speculate if we only have one task, or if all tasks have finished.
-    if (numTasks == 1 || tasksSuccessful == numTasks) {
-      return false
-    }
-    var foundTasks = false
-    val minFinishedForSpeculation = (SPECULATION_QUANTILE * numTasks).floor.toInt
-    logDebug("Checking for speculative tasks: minFinished = " + minFinishedForSpeculation)
-    if (tasksSuccessful >= minFinishedForSpeculation && tasksSuccessful > 0) {
-      val time = clock.getTime()
-      val durations = taskInfos.values.filter(_.successful).map(_.duration).toArray
-      Arrays.sort(durations)
-      val medianDuration = durations(min((0.5 * tasksSuccessful).round.toInt, durations.size - 1))
-      val threshold = max(SPECULATION_MULTIPLIER * medianDuration, 100)
-      // TODO: Threshold should also look at standard deviation of task durations and have a lower
-      // bound based on that.
-      logDebug("Task length threshold for speculation: " + threshold)
-      for ((tid, info) <- taskInfos) {
-        val index = info.index
-        if (!successful(index) && copiesRunning(index) == 1 && info.timeRunning(time) > threshold &&
-          !speculatableTasks.contains(index)) {
-          logInfo(
-            "Marking task %s:%d (on %s) as speculatable because it ran more than %.0f ms".format(
-              taskSet.id, index, info.host, threshold))
-          speculatableTasks += index
-          foundTasks = true
-        }
-      }
-    }
-    return foundTasks
-  }
-
-  override def hasPendingTasks(): Boolean = {
-    numTasks > 0 && tasksSuccessful < numTasks
-  }
-
-  private def getLocalityWait(level: TaskLocality.TaskLocality): Long = {
-    val defaultWait = System.getProperty("spark.locality.wait", "3000")
-    level match {
-      case TaskLocality.PROCESS_LOCAL =>
-        System.getProperty("spark.locality.wait.process", defaultWait).toLong
-      case TaskLocality.NODE_LOCAL =>
-        System.getProperty("spark.locality.wait.node", defaultWait).toLong
-      case TaskLocality.RACK_LOCAL =>
-        System.getProperty("spark.locality.wait.rack", defaultWait).toLong
-      case TaskLocality.ANY =>
-        0L
-    }
-  }
-
-  /**
-   * Compute the locality levels used in this TaskSet. Assumes that all tasks have already been
-   * added to queues using addPendingTask.
-   */
-  private def computeValidLocalityLevels(): Array[TaskLocality.TaskLocality] = {
-    import TaskLocality.{PROCESS_LOCAL, NODE_LOCAL, RACK_LOCAL, ANY}
-    val levels = new ArrayBuffer[TaskLocality.TaskLocality]
-    if (!pendingTasksForExecutor.isEmpty && getLocalityWait(PROCESS_LOCAL) != 0) {
-      levels += PROCESS_LOCAL
-    }
-    if (!pendingTasksForHost.isEmpty && getLocalityWait(NODE_LOCAL) != 0) {
-      levels += NODE_LOCAL
-    }
-    if (!pendingTasksForRack.isEmpty && getLocalityWait(RACK_LOCAL) != 0) {
-      levels += RACK_LOCAL
-    }
-    levels += ANY
-    logDebug("Valid locality levels for " + taskSet + ": " + levels.mkString(", "))
-    levels.toArray
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
index d0ba5bf55dcfd..0208388e86680 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -20,18 +20,19 @@ package org.apache.spark.scheduler.cluster
 import java.util.concurrent.atomic.AtomicInteger
 
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
+import scala.concurrent.Await
+import scala.concurrent.duration._
 
 import akka.actor._
-import akka.dispatch.Await
 import akka.pattern.ask
-import akka.remote.{RemoteClientShutdown, RemoteClientDisconnected, RemoteClientLifeCycleEvent}
-import akka.util.Duration
-import akka.util.duration._
+import akka.remote.{DisassociatedEvent, RemotingLifecycleEvent}
 
 import org.apache.spark.{SparkException, Logging, TaskState}
-import org.apache.spark.scheduler.TaskDescription
+import org.apache.spark.{Logging, SparkException, TaskState}
+import org.apache.spark.scheduler.{TaskSchedulerImpl, SchedulerBackend, SlaveLost, TaskDescription,
+  WorkerOffer}
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
-import org.apache.spark.util.Utils
+import org.apache.spark.util.{AkkaUtils, Utils}
 
 /**
  * A scheduler backend that waits for coarse grained executors to connect to it through Akka.
@@ -42,26 +43,28 @@ import org.apache.spark.util.Utils
  * (spark.deploy.*).
  */
 private[spark]
-class CoarseGrainedSchedulerBackend(scheduler: ClusterScheduler, actorSystem: ActorSystem)
+class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, actorSystem: ActorSystem)
   extends SchedulerBackend with Logging
 {
   // Use an atomic variable to track total number of cores in the cluster for simplicity and speed
   var totalCoreCount = new AtomicInteger(0)
+  val conf = scheduler.sc.conf
+  private val timeout = AkkaUtils.askTimeout(conf)
 
   class DriverActor(sparkProperties: Seq[(String, String)]) extends Actor {
     private val executorActor = new HashMap[String, ActorRef]
     private val executorAddress = new HashMap[String, Address]
     private val executorHost = new HashMap[String, String]
     private val freeCores = new HashMap[String, Int]
-    private val actorToExecutorId = new HashMap[ActorRef, String]
     private val addressToExecutorId = new HashMap[Address, String]
 
     override def preStart() {
       // Listen for remote client disconnection events, since they don't go through Akka's watch()
-      context.system.eventStream.subscribe(self, classOf[RemoteClientLifeCycleEvent])
+      context.system.eventStream.subscribe(self, classOf[RemotingLifecycleEvent])
 
       // Periodically revive offers to allow delay scheduling to work
-      val reviveInterval = System.getProperty("spark.scheduler.revive.interval", "1000").toLong
+      val reviveInterval = conf.getLong("spark.scheduler.revive.interval", 1000)
+      import context.dispatcher
       context.system.scheduler.schedule(0.millis, reviveInterval.millis, self, ReviveOffers)
     }
 
@@ -73,12 +76,10 @@ class CoarseGrainedSchedulerBackend(scheduler: ClusterScheduler, actorSystem: Ac
         } else {
           logInfo("Registered executor: " + sender + " with ID " + executorId)
           sender ! RegisteredExecutor(sparkProperties)
-          context.watch(sender)
           executorActor(executorId) = sender
           executorHost(executorId) = Utils.parseHostPort(hostPort)._1
           freeCores(executorId) = cores
           executorAddress(executorId) = sender.path.address
-          actorToExecutorId(sender) = executorId
           addressToExecutorId(sender.path.address) = executorId
           totalCoreCount.addAndGet(cores)
           makeOffers()
@@ -118,14 +119,9 @@ class CoarseGrainedSchedulerBackend(scheduler: ClusterScheduler, actorSystem: Ac
         removeExecutor(executorId, reason)
         sender ! true
 
-      case Terminated(actor) =>
-        actorToExecutorId.get(actor).foreach(removeExecutor(_, "Akka actor terminated"))
+      case DisassociatedEvent(_, address, _) =>
+        addressToExecutorId.get(address).foreach(removeExecutor(_, "remote Akka client disassociated"))
 
-      case RemoteClientDisconnected(transport, address) =>
-        addressToExecutorId.get(address).foreach(removeExecutor(_, "remote Akka client disconnected"))
-
-      case RemoteClientShutdown(transport, address) =>
-        addressToExecutorId.get(address).foreach(removeExecutor(_, "remote Akka client shutdown"))
     }
 
     // Make fake resource offers on all executors
@@ -153,7 +149,6 @@ class CoarseGrainedSchedulerBackend(scheduler: ClusterScheduler, actorSystem: Ac
       if (executorActor.contains(executorId)) {
         logInfo("Executor " + executorId + " disconnected, so removing it")
         val numCores = freeCores(executorId)
-        actorToExecutorId -= executorActor(executorId)
         addressToExecutorId -= executorAddress(executorId)
         executorActor -= executorId
         executorHost -= executorId
@@ -169,22 +164,16 @@ class CoarseGrainedSchedulerBackend(scheduler: ClusterScheduler, actorSystem: Ac
 
   override def start() {
     val properties = new ArrayBuffer[(String, String)]
-    val iterator = System.getProperties.entrySet.iterator
-    while (iterator.hasNext) {
-      val entry = iterator.next
-      val (key, value) = (entry.getKey.toString, entry.getValue.toString)
-      if (key.startsWith("spark.") && !key.equals("spark.hostPort")) {
+    for ((key, value) <- scheduler.sc.conf.getAll) {
+      if (key.startsWith("spark.")) {
         properties += ((key, value))
       }
     }
+    //TODO (prashant) send conf instead of properties
     driverActor = actorSystem.actorOf(
       Props(new DriverActor(properties)), name = CoarseGrainedSchedulerBackend.ACTOR_NAME)
   }
 
-  private val timeout = {
-    Duration.create(System.getProperty("spark.akka.askTimeout", "10").toLong, "seconds")
-  }
-
   def stopExecutors() {
     try {
       if (driverActor != null) {
@@ -219,8 +208,9 @@ class CoarseGrainedSchedulerBackend(scheduler: ClusterScheduler, actorSystem: Ac
     driverActor ! KillTask(taskId, executorId)
   }
 
-  override def defaultParallelism() = Option(System.getProperty("spark.default.parallelism"))
-      .map(_.toInt).getOrElse(math.max(totalCoreCount.get(), 2))
+  override def defaultParallelism(): Int = {
+    conf.getInt("spark.default.parallelism", math.max(totalCoreCount.get(), 2))
+  }
 
   // Called by subclasses when notified of a lost worker
   def removeExecutor(executorId: String, reason: String) {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/SimrSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/SimrSchedulerBackend.scala
index e000531a26f7e..d99c76117c168 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/SimrSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/SimrSchedulerBackend.scala
@@ -19,10 +19,12 @@ package org.apache.spark.scheduler.cluster
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{Path, FileSystem}
+
 import org.apache.spark.{Logging, SparkContext}
+import org.apache.spark.scheduler.TaskSchedulerImpl
 
 private[spark] class SimrSchedulerBackend(
-    scheduler: ClusterScheduler,
+    scheduler: TaskSchedulerImpl,
     sc: SparkContext,
     driverFilePath: String)
   extends CoarseGrainedSchedulerBackend(scheduler, sc.env.actorSystem)
@@ -31,13 +33,13 @@ private[spark] class SimrSchedulerBackend(
   val tmpPath = new Path(driverFilePath + "_tmp")
   val filePath = new Path(driverFilePath)
 
-  val maxCores = System.getProperty("spark.simr.executor.cores", "1").toInt
+  val maxCores = conf.getInt("spark.simr.executor.cores", 1)
 
   override def start() {
     super.start()
 
-    val driverUrl = "akka://spark@%s:%s/user/%s".format(
-      System.getProperty("spark.driver.host"), System.getProperty("spark.driver.port"),
+    val driverUrl = "akka.tcp://spark@%s:%s/user/%s".format(
+      sc.conf.get("spark.driver.host"), sc.conf.get("spark.driver.port"),
       CoarseGrainedSchedulerBackend.ACTOR_NAME)
 
     val conf = new Configuration()
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
index cefa970bb92f9..faa6e1ebe886f 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
@@ -17,42 +17,44 @@
 
 package org.apache.spark.scheduler.cluster
 
+import scala.collection.mutable.HashMap
+
 import org.apache.spark.{Logging, SparkContext}
-import org.apache.spark.deploy.client.{Client, ClientListener}
+import org.apache.spark.deploy.client.{AppClient, AppClientListener}
 import org.apache.spark.deploy.{Command, ApplicationDescription}
-import scala.collection.mutable.HashMap
+import org.apache.spark.scheduler.{ExecutorExited, ExecutorLossReason, SlaveLost, TaskSchedulerImpl}
 import org.apache.spark.util.Utils
 
 private[spark] class SparkDeploySchedulerBackend(
-    scheduler: ClusterScheduler,
+    scheduler: TaskSchedulerImpl,
     sc: SparkContext,
     masters: Array[String],
     appName: String)
   extends CoarseGrainedSchedulerBackend(scheduler, sc.env.actorSystem)
-  with ClientListener
+  with AppClientListener
   with Logging {
 
-  var client: Client = null
+  var client: AppClient = null
   var stopping = false
   var shutdownCallback : (SparkDeploySchedulerBackend) => Unit = _
 
-  val maxCores = System.getProperty("spark.cores.max", Int.MaxValue.toString).toInt
+  val maxCores = conf.getOption("spark.cores.max").map(_.toInt)
 
   override def start() {
     super.start()
 
     // The endpoint for executors to talk to us
-    val driverUrl = "akka://spark@%s:%s/user/%s".format(
-      System.getProperty("spark.driver.host"), System.getProperty("spark.driver.port"),
+    val driverUrl = "akka.tcp://spark@%s:%s/user/%s".format(
+      conf.get("spark.driver.host"),  conf.get("spark.driver.port"),
       CoarseGrainedSchedulerBackend.ACTOR_NAME)
-    val args = Seq(driverUrl, "{{EXECUTOR_ID}}", "{{HOSTNAME}}", "{{CORES}}")
+    val args = Seq(driverUrl, "{{EXECUTOR_ID}}", "{{HOSTNAME}}", "{{CORES}}", "{{WORKER_URL}}")
     val command = Command(
       "org.apache.spark.executor.CoarseGrainedExecutorBackend", args, sc.executorEnvs)
     val sparkHome = sc.getSparkHome().getOrElse(null)
-    val appDesc = new ApplicationDescription(appName, maxCores, executorMemory, command, sparkHome,
+    val appDesc = new ApplicationDescription(appName, maxCores, sc.executorMemory, command, sparkHome,
         "http://" + sc.ui.appUIAddress)
 
-    client = new Client(sc.env.actorSystem, masters, appDesc, this)
+    client = new AppClient(sc.env.actorSystem, masters, appDesc, this, conf)
     client.start()
   }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala
index cd521e0f2be0a..c27049bdb5208 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala
@@ -30,7 +30,8 @@ import org.apache.mesos._
 import org.apache.mesos.Protos.{TaskInfo => MesosTaskInfo, TaskState => MesosTaskState, _}
 
 import org.apache.spark.{SparkException, Logging, SparkContext, TaskState}
-import org.apache.spark.scheduler.cluster.{ClusterScheduler, CoarseGrainedSchedulerBackend}
+import org.apache.spark.scheduler.TaskSchedulerImpl
+import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
 
 /**
  * A SchedulerBackend that runs tasks on Mesos, but uses "coarse-grained" tasks, where it holds
@@ -43,7 +44,7 @@ import org.apache.spark.scheduler.cluster.{ClusterScheduler, CoarseGrainedSchedu
  * remove this.
  */
 private[spark] class CoarseMesosSchedulerBackend(
-    scheduler: ClusterScheduler,
+    scheduler: TaskSchedulerImpl,
     sc: SparkContext,
     master: String,
     appName: String)
@@ -61,7 +62,7 @@ private[spark] class CoarseMesosSchedulerBackend(
   var driver: SchedulerDriver = null
 
   // Maximum number of cores to acquire (TODO: we'll need more flexible controls here)
-  val maxCores = System.getProperty("spark.cores.max", Int.MaxValue.toString).toInt
+  val maxCores = conf.get("spark.cores.max",  Int.MaxValue.toString).toInt
 
   // Cores we have acquired with each Mesos task ID
   val coresByTaskId = new HashMap[Int, Int]
@@ -76,7 +77,7 @@ private[spark] class CoarseMesosSchedulerBackend(
     "Spark home is not set; set it through the spark.home system " +
     "property, the SPARK_HOME environment variable or the SparkContext constructor"))
 
-  val extraCoresPerSlave = System.getProperty("spark.mesos.extra.cores", "0").toInt
+  val extraCoresPerSlave = conf.getInt("spark.mesos.extra.cores", 0)
 
   var nextMesosTaskId = 0
 
@@ -120,13 +121,13 @@ private[spark] class CoarseMesosSchedulerBackend(
     }
     val command = CommandInfo.newBuilder()
       .setEnvironment(environment)
-    val driverUrl = "akka://spark@%s:%s/user/%s".format(
-      System.getProperty("spark.driver.host"),
-      System.getProperty("spark.driver.port"),
+    val driverUrl = "akka.tcp://spark@%s:%s/user/%s".format(
+      conf.get("spark.driver.host"),
+      conf.get("spark.driver.port"),
       CoarseGrainedSchedulerBackend.ACTOR_NAME)
-    val uri = System.getProperty("spark.executor.uri")
+    val uri = conf.get("spark.executor.uri", null)
     if (uri == null) {
-      val runScript = new File(sparkHome, "spark-class").getCanonicalPath
+      val runScript = new File(sparkHome, "./bin/spark-class").getCanonicalPath
       command.setValue(
         "\"%s\" org.apache.spark.executor.CoarseGrainedExecutorBackend %s %s %s %d".format(
           runScript, driverUrl, offer.getSlaveId.getValue, offer.getHostname, numCores))
@@ -135,11 +136,11 @@ private[spark] class CoarseMesosSchedulerBackend(
       // glob the directory "correctly".
       val basename = uri.split('/').last.split('.').head
       command.setValue(
-        "cd %s*; ./spark-class org.apache.spark.executor.CoarseGrainedExecutorBackend %s %s %s %d"
+        "cd %s*; ./bin/spark-class org.apache.spark.executor.CoarseGrainedExecutorBackend %s %s %s %d"
           .format(basename, driverUrl, offer.getSlaveId.getValue, offer.getHostname, numCores))
       command.addUris(CommandInfo.URI.newBuilder().setValue(uri))
     }
-    return command.build()
+    command.build()
   }
 
   override def offerRescinded(d: SchedulerDriver, o: OfferID) {}
@@ -176,7 +177,7 @@ private[spark] class CoarseMesosSchedulerBackend(
         val slaveId = offer.getSlaveId.toString
         val mem = getResource(offer.getResourcesList, "mem")
         val cpus = getResource(offer.getResourcesList, "cpus").toInt
-        if (totalCoresAcquired < maxCores && mem >= executorMemory && cpus >= 1 &&
+        if (totalCoresAcquired < maxCores && mem >= sc.executorMemory && cpus >= 1 &&
             failuresBySlaveId.getOrElse(slaveId, 0) < MAX_SLAVE_FAILURES &&
             !slaveIdsWithExecutors.contains(slaveId)) {
           // Launch an executor on the slave
@@ -192,7 +193,7 @@ private[spark] class CoarseMesosSchedulerBackend(
             .setCommand(createCommand(offer, cpusToUse + extraCoresPerSlave))
             .setName("Task " + taskId)
             .addResources(createResource("cpus", cpusToUse))
-            .addResources(createResource("mem", executorMemory))
+            .addResources(createResource("mem", sc.executorMemory))
             .build()
           d.launchTasks(offer.getId, Collections.singletonList(task), filters)
         } else {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
index 50cbc2ca92e7e..fef291eea0257 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
@@ -30,9 +30,8 @@ import org.apache.mesos._
 import org.apache.mesos.Protos.{TaskInfo => MesosTaskInfo, TaskState => MesosTaskState, _}
 
 import org.apache.spark.{Logging, SparkException, SparkContext, TaskState}
-import org.apache.spark.scheduler.TaskDescription
-import org.apache.spark.scheduler.cluster.{ClusterScheduler, ExecutorExited, ExecutorLossReason}
-import org.apache.spark.scheduler.cluster.{SchedulerBackend, SlaveLost, WorkerOffer}
+import org.apache.spark.scheduler.{ExecutorExited, ExecutorLossReason, SchedulerBackend, SlaveLost,
+  TaskDescription, TaskSchedulerImpl, WorkerOffer}
 import org.apache.spark.util.Utils
 
 /**
@@ -41,7 +40,7 @@ import org.apache.spark.util.Utils
  * from multiple apps can run on different cores) and in time (a core can switch ownership).
  */
 private[spark] class MesosSchedulerBackend(
-    scheduler: ClusterScheduler,
+    scheduler: TaskSchedulerImpl,
     sc: SparkContext,
     master: String,
     appName: String)
@@ -101,20 +100,20 @@ private[spark] class MesosSchedulerBackend(
     }
     val command = CommandInfo.newBuilder()
       .setEnvironment(environment)
-    val uri = System.getProperty("spark.executor.uri")
+    val uri = sc.conf.get("spark.executor.uri", null)
     if (uri == null) {
-      command.setValue(new File(sparkHome, "spark-executor").getCanonicalPath)
+      command.setValue(new File(sparkHome, "/sbin/spark-executor").getCanonicalPath)
     } else {
       // Grab everything to the first '.'. We'll use that and '*' to
       // glob the directory "correctly".
       val basename = uri.split('/').last.split('.').head
-      command.setValue("cd %s*; ./spark-executor".format(basename))
+      command.setValue("cd %s*; ./sbin/spark-executor".format(basename))
       command.addUris(CommandInfo.URI.newBuilder().setValue(uri))
     }
     val memory = Resource.newBuilder()
       .setName("mem")
       .setType(Value.Type.SCALAR)
-      .setScalar(Value.Scalar.newBuilder().setValue(executorMemory).build())
+      .setScalar(Value.Scalar.newBuilder().setValue(sc.executorMemory).build())
       .build()
     ExecutorInfo.newBuilder()
       .setExecutorId(ExecutorID.newBuilder().setValue(execId).build())
@@ -131,24 +130,19 @@ private[spark] class MesosSchedulerBackend(
   private def createExecArg(): Array[Byte] = {
     if (execArgs == null) {
       val props = new HashMap[String, String]
-      val iterator = System.getProperties.entrySet.iterator
-      while (iterator.hasNext) {
-        val entry = iterator.next
-        val (key, value) = (entry.getKey.toString, entry.getValue.toString)
-        if (key.startsWith("spark.")) {
-          props(key) = value
-        }
+      for ((key,value) <- sc.conf.getAll) {
+        props(key) = value
       }
       // Serialize the map as an array of (String, String) pairs
       execArgs = Utils.serialize(props.toArray)
     }
-    return execArgs
+    execArgs
   }
 
   private def setClassLoader(): ClassLoader = {
     val oldClassLoader = Thread.currentThread.getContextClassLoader
     Thread.currentThread.setContextClassLoader(classLoader)
-    return oldClassLoader
+    oldClassLoader
   }
 
   private def restoreClassLoader(oldClassLoader: ClassLoader) {
@@ -199,7 +193,7 @@ private[spark] class MesosSchedulerBackend(
         def enoughMemory(o: Offer) = {
           val mem = getResource(o.getResourcesList, "mem")
           val slaveId = o.getSlaveId.getValue
-          mem >= executorMemory || slaveIdsWithExecutors.contains(slaveId)
+          mem >= sc.executorMemory || slaveIdsWithExecutors.contains(slaveId)
         }
 
         for ((offer, index) <- offers.zipWithIndex if enoughMemory(offer)) {
@@ -256,7 +250,7 @@ private[spark] class MesosSchedulerBackend(
       .setType(Value.Type.SCALAR)
       .setScalar(Value.Scalar.newBuilder().setValue(1).build())
       .build()
-    return MesosTaskInfo.newBuilder()
+    MesosTaskInfo.newBuilder()
       .setTaskId(taskId)
       .setSlaveId(SlaveID.newBuilder().setValue(slaveId).build())
       .setExecutor(createExecutorInfo(slaveId))
@@ -341,5 +335,5 @@ private[spark] class MesosSchedulerBackend(
   }
 
   // TODO: query Mesos for number of cores
-  override def defaultParallelism() = System.getProperty("spark.default.parallelism", "8").toInt
+  override def defaultParallelism() = sc.conf.getInt("spark.default.parallelism", 8)
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/local/LocalBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/local/LocalBackend.scala
new file mode 100644
index 0000000000000..897d47a9ad981
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/scheduler/local/LocalBackend.scala
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler.local
+
+import java.nio.ByteBuffer
+
+import akka.actor.{Actor, ActorRef, Props}
+
+import org.apache.spark.{Logging, SparkContext, SparkEnv, TaskState}
+import org.apache.spark.TaskState.TaskState
+import org.apache.spark.executor.{Executor, ExecutorBackend}
+import org.apache.spark.scheduler.{SchedulerBackend, TaskSchedulerImpl, WorkerOffer}
+
+private case class ReviveOffers()
+
+private case class StatusUpdate(taskId: Long, state: TaskState, serializedData: ByteBuffer)
+
+private case class KillTask(taskId: Long)
+
+/**
+ * Calls to LocalBackend are all serialized through LocalActor. Using an actor makes the calls on
+ * LocalBackend asynchronous, which is necessary to prevent deadlock between LocalBackend
+ * and the ClusterScheduler.
+ */
+private[spark] class LocalActor(
+  scheduler: TaskSchedulerImpl,
+  executorBackend: LocalBackend,
+  private val totalCores: Int) extends Actor with Logging {
+
+  private var freeCores = totalCores
+
+  private val localExecutorId = "localhost"
+  private val localExecutorHostname = "localhost"
+
+  val executor = new Executor(
+    localExecutorId, localExecutorHostname, scheduler.conf.getAll, isLocal = true)
+
+  def receive = {
+    case ReviveOffers =>
+      reviveOffers()
+
+    case StatusUpdate(taskId, state, serializedData) =>
+      scheduler.statusUpdate(taskId, state, serializedData)
+      if (TaskState.isFinished(state)) {
+        freeCores += 1
+        reviveOffers()
+      }
+
+    case KillTask(taskId) =>
+      executor.killTask(taskId)
+  }
+
+  def reviveOffers() {
+    val offers = Seq(new WorkerOffer(localExecutorId, localExecutorHostname, freeCores))
+    for (task <- scheduler.resourceOffers(offers).flatten) {
+      freeCores -= 1
+      executor.launchTask(executorBackend, task.taskId, task.serializedTask)
+    }
+  }
+}
+
+/**
+ * LocalBackend is used when running a local version of Spark where the executor, backend, and
+ * master all run in the same JVM. It sits behind a ClusterScheduler and handles launching tasks
+ * on a single Executor (created by the LocalBackend) running locally.
+ */
+private[spark] class LocalBackend(scheduler: TaskSchedulerImpl, val totalCores: Int)
+  extends SchedulerBackend with ExecutorBackend {
+
+  var localActor: ActorRef = null
+
+  override def start() {
+    localActor = SparkEnv.get.actorSystem.actorOf(
+      Props(new LocalActor(scheduler, this, totalCores)),
+      "LocalBackendActor")
+  }
+
+  override def stop() {
+  }
+
+  override def reviveOffers() {
+    localActor ! ReviveOffers
+  }
+
+  override def defaultParallelism() = totalCores
+
+  override def killTask(taskId: Long, executorId: String) {
+    localActor ! KillTask(taskId)
+  }
+
+  override def statusUpdate(taskId: Long, state: TaskState, serializedData: ByteBuffer) {
+    localActor ! StatusUpdate(taskId, state, serializedData)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/scheduler/local/LocalScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/local/LocalScheduler.scala
deleted file mode 100644
index 1c227fefe48d3..0000000000000
--- a/core/src/main/scala/org/apache/spark/scheduler/local/LocalScheduler.scala
+++ /dev/null
@@ -1,222 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.scheduler.local
-
-import java.nio.ByteBuffer
-import java.util.concurrent.atomic.AtomicInteger
-
-import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
-
-import akka.actor._
-
-import org.apache.spark._
-import org.apache.spark.TaskState.TaskState
-import org.apache.spark.executor.{Executor, ExecutorBackend}
-import org.apache.spark.scheduler._
-import org.apache.spark.scheduler.SchedulingMode.SchedulingMode
-
-
-/**
- * A FIFO or Fair TaskScheduler implementation that runs tasks locally in a thread pool. Optionally
- * the scheduler also allows each task to fail up to maxFailures times, which is useful for
- * testing fault recovery.
- */
-
-private[local]
-case class LocalReviveOffers()
-
-private[local]
-case class LocalStatusUpdate(taskId: Long, state: TaskState, serializedData: ByteBuffer)
-
-private[local]
-case class KillTask(taskId: Long)
-
-private[spark]
-class LocalActor(localScheduler: LocalScheduler, private var freeCores: Int)
-  extends Actor with Logging {
-
-  val executor = new Executor("localhost", "localhost", Seq.empty, isLocal = true)
-
-  def receive = {
-    case LocalReviveOffers =>
-      launchTask(localScheduler.resourceOffer(freeCores))
-
-    case LocalStatusUpdate(taskId, state, serializeData) =>
-      if (TaskState.isFinished(state)) {
-        freeCores += 1
-        launchTask(localScheduler.resourceOffer(freeCores))
-      }
-
-    case KillTask(taskId) =>
-      executor.killTask(taskId)
-  }
-
-  private def launchTask(tasks: Seq[TaskDescription]) {
-    for (task <- tasks) {
-      freeCores -= 1
-      executor.launchTask(localScheduler, task.taskId, task.serializedTask)
-    }
-  }
-}
-
-private[spark] class LocalScheduler(threads: Int, val maxFailures: Int, val sc: SparkContext)
-  extends TaskScheduler
-  with ExecutorBackend
-  with Logging {
-
-  val env = SparkEnv.get
-  val attemptId = new AtomicInteger
-  var dagScheduler: DAGScheduler = null
-
-  // Application dependencies (added through SparkContext) that we've fetched so far on this node.
-  // Each map holds the master's timestamp for the version of that file or JAR we got.
-  val currentFiles: HashMap[String, Long] = new HashMap[String, Long]()
-  val currentJars: HashMap[String, Long] = new HashMap[String, Long]()
-
-  var schedulableBuilder: SchedulableBuilder = null
-  var rootPool: Pool = null
-  val schedulingMode: SchedulingMode = SchedulingMode.withName(
-    System.getProperty("spark.scheduler.mode", "FIFO"))
-  val activeTaskSets = new HashMap[String, LocalTaskSetManager]
-  val taskIdToTaskSetId = new HashMap[Long, String]
-  val taskSetTaskIds = new HashMap[String, HashSet[Long]]
-
-  var localActor: ActorRef = null
-
-  override def start() {
-    // temporarily set rootPool name to empty
-    rootPool = new Pool("", schedulingMode, 0, 0)
-    schedulableBuilder = {
-      schedulingMode match {
-        case SchedulingMode.FIFO =>
-          new FIFOSchedulableBuilder(rootPool)
-        case SchedulingMode.FAIR =>
-          new FairSchedulableBuilder(rootPool)
-      }
-    }
-    schedulableBuilder.buildPools()
-
-    localActor = env.actorSystem.actorOf(Props(new LocalActor(this, threads)), "Test")
-  }
-
-  override def setDAGScheduler(dagScheduler: DAGScheduler) {
-    this.dagScheduler = dagScheduler
-  }
-
-  override def submitTasks(taskSet: TaskSet) {
-    synchronized {
-      val manager = new LocalTaskSetManager(this, taskSet)
-      schedulableBuilder.addTaskSetManager(manager, manager.taskSet.properties)
-      activeTaskSets(taskSet.id) = manager
-      taskSetTaskIds(taskSet.id) = new HashSet[Long]()
-      localActor ! LocalReviveOffers
-    }
-  }
-
-  override def cancelTasks(stageId: Int): Unit = synchronized {
-    logInfo("Cancelling stage " + stageId)
-    logInfo("Cancelling stage " + activeTaskSets.map(_._2.stageId))
-    activeTaskSets.find(_._2.stageId == stageId).foreach { case (_, tsm) =>
-      // There are two possible cases here:
-      // 1. The task set manager has been created and some tasks have been scheduled.
-      //    In this case, send a kill signal to the executors to kill the task and then abort
-      //    the stage.
-      // 2. The task set manager has been created but no tasks has been scheduled. In this case,
-      //    simply abort the stage.
-      val taskIds = taskSetTaskIds(tsm.taskSet.id)
-      if (taskIds.size > 0) {
-        taskIds.foreach { tid =>
-          localActor ! KillTask(tid)
-        }
-      }
-      logInfo("Stage %d was cancelled".format(stageId))
-      taskSetFinished(tsm)
-    }
-  }
-
-  def resourceOffer(freeCores: Int): Seq[TaskDescription] = {
-    synchronized {
-      var freeCpuCores = freeCores
-      val tasks = new ArrayBuffer[TaskDescription](freeCores)
-      val sortedTaskSetQueue = rootPool.getSortedTaskSetQueue()
-      for (manager <- sortedTaskSetQueue) {
-        logDebug("parentName:%s,name:%s,runningTasks:%s".format(
-          manager.parent.name, manager.name, manager.runningTasks))
-      }
-
-      var launchTask = false
-      for (manager <- sortedTaskSetQueue) {
-        do {
-          launchTask = false
-          manager.resourceOffer(null, null, freeCpuCores, null) match {
-            case Some(task) =>
-              tasks += task
-              taskIdToTaskSetId(task.taskId) = manager.taskSet.id
-              taskSetTaskIds(manager.taskSet.id) += task.taskId
-              freeCpuCores -= 1
-              launchTask = true
-            case None => {}
-          }
-        } while(launchTask)
-      }
-      return tasks
-    }
-  }
-
-  def taskSetFinished(manager: TaskSetManager) {
-    synchronized {
-      activeTaskSets -= manager.taskSet.id
-      manager.parent.removeSchedulable(manager)
-      logInfo("Remove TaskSet %s from pool %s".format(manager.taskSet.id, manager.parent.name))
-      taskIdToTaskSetId --= taskSetTaskIds(manager.taskSet.id)
-      taskSetTaskIds -= manager.taskSet.id
-    }
-  }
-
-  override def statusUpdate(taskId: Long, state: TaskState, serializedData: ByteBuffer) {
-    if (TaskState.isFinished(state)) {
-      synchronized {
-        taskIdToTaskSetId.get(taskId) match {
-          case Some(taskSetId) =>
-            val taskSetManager = activeTaskSets.get(taskSetId)
-            taskSetManager.foreach { tsm =>
-              taskSetTaskIds(taskSetId) -= taskId
-
-              state match {
-                case TaskState.FINISHED =>
-                  tsm.taskEnded(taskId, state, serializedData)
-                case TaskState.FAILED =>
-                  tsm.taskFailed(taskId, state, serializedData)
-                case TaskState.KILLED =>
-                  tsm.error("Task %d was killed".format(taskId))
-                case _ => {}
-              }
-            }
-          case None =>
-            logInfo("Ignoring update from TID " + taskId + " because its task set is gone")
-        }
-      }
-      localActor ! LocalStatusUpdate(taskId, state, serializedData)
-    }
-  }
-
-  override def stop() {
-  }
-
-  override def defaultParallelism() = threads
-}
diff --git a/core/src/main/scala/org/apache/spark/scheduler/local/LocalTaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/local/LocalTaskSetManager.scala
deleted file mode 100644
index f92ad4a0c5616..0000000000000
--- a/core/src/main/scala/org/apache/spark/scheduler/local/LocalTaskSetManager.scala
+++ /dev/null
@@ -1,213 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.scheduler.local
-
-import java.nio.ByteBuffer
-import scala.collection.mutable.ArrayBuffer
-import scala.collection.mutable.HashMap
-
-import org.apache.spark.{ExceptionFailure, Logging, SparkEnv, SparkException, Success,
-  TaskEndReason, TaskResultLost, TaskState}
-import org.apache.spark.TaskState.TaskState
-import org.apache.spark.scheduler.{DirectTaskResult, IndirectTaskResult, Pool, Schedulable, Task,
-  TaskDescription, TaskInfo, TaskLocality, TaskResult, TaskSet, TaskSetManager}
-
-
-private[spark] class LocalTaskSetManager(sched: LocalScheduler, val taskSet: TaskSet)
-  extends TaskSetManager with Logging {
-
-  var parent: Pool = null
-  var weight: Int = 1
-  var minShare: Int = 0
-  var runningTasks: Int = 0
-  var priority: Int = taskSet.priority
-  var stageId: Int = taskSet.stageId
-  var name: String = "TaskSet_" + taskSet.stageId.toString
-
-  var failCount = new Array[Int](taskSet.tasks.size)
-  val taskInfos = new HashMap[Long, TaskInfo]
-  val numTasks = taskSet.tasks.size
-  var numFinished = 0
-  val env = SparkEnv.get
-  val ser = env.closureSerializer.newInstance()
-  val copiesRunning = new Array[Int](numTasks)
-  val finished = new Array[Boolean](numTasks)
-  val numFailures = new Array[Int](numTasks)
-  val MAX_TASK_FAILURES = sched.maxFailures
-
-  def increaseRunningTasks(taskNum: Int): Unit = {
-    runningTasks += taskNum
-    if (parent != null) {
-     parent.increaseRunningTasks(taskNum)
-    }
-  }
-
-  def decreaseRunningTasks(taskNum: Int): Unit = {
-    runningTasks -= taskNum
-    if (parent != null) {
-      parent.decreaseRunningTasks(taskNum)
-    }
-  }
-
-  override def addSchedulable(schedulable: Schedulable): Unit = {
-    // nothing
-  }
-
-  override def removeSchedulable(schedulable: Schedulable): Unit = {
-    // nothing
-  }
-
-  override def getSchedulableByName(name: String): Schedulable = {
-    return null
-  }
-
-  override def executorLost(executorId: String, host: String): Unit = {
-    // nothing
-  }
-
-  override def checkSpeculatableTasks() = true
-
-  override def getSortedTaskSetQueue(): ArrayBuffer[TaskSetManager] = {
-    var sortedTaskSetQueue = new ArrayBuffer[TaskSetManager]
-    sortedTaskSetQueue += this
-    return sortedTaskSetQueue
-  }
-
-  override def hasPendingTasks() = true
-
-  def findTask(): Option[Int] = {
-    for (i <- 0 to numTasks-1) {
-      if (copiesRunning(i) == 0 && !finished(i)) {
-        return Some(i)
-      }
-    }
-    return None
-  }
-
-  override def resourceOffer(
-      execId: String,
-      host: String,
-      availableCpus: Int,
-      maxLocality: TaskLocality.TaskLocality)
-    : Option[TaskDescription] =
-  {
-    SparkEnv.set(sched.env)
-    logDebug("availableCpus:%d, numFinished:%d, numTasks:%d".format(
-      availableCpus.toInt, numFinished, numTasks))
-    if (availableCpus > 0 && numFinished < numTasks) {
-      findTask() match {
-        case Some(index) =>
-          val taskId = sched.attemptId.getAndIncrement()
-          val task = taskSet.tasks(index)
-          val info = new TaskInfo(taskId, index, System.currentTimeMillis(), "local", "local:1",
-            TaskLocality.NODE_LOCAL)
-          taskInfos(taskId) = info
-          // We rely on the DAGScheduler to catch non-serializable closures and RDDs, so in here
-          // we assume the task can be serialized without exceptions.
-          val bytes = Task.serializeWithDependencies(
-            task, sched.sc.addedFiles, sched.sc.addedJars, ser)
-          logInfo("Size of task " + taskId + " is " + bytes.limit + " bytes")
-          val taskName = "task %s:%d".format(taskSet.id, index)
-          copiesRunning(index) += 1
-          increaseRunningTasks(1)
-          taskStarted(task, info)
-          return Some(new TaskDescription(taskId, null, taskName, index, bytes))
-        case None => {}
-      }
-    }
-    return None
-  }
-
-  def taskStarted(task: Task[_], info: TaskInfo) {
-    sched.dagScheduler.taskStarted(task, info)
-  }
-
-  def taskEnded(tid: Long, state: TaskState, serializedData: ByteBuffer) {
-    val info = taskInfos(tid)
-    val index = info.index
-    val task = taskSet.tasks(index)
-    info.markSuccessful()
-    val result = ser.deserialize[TaskResult[_]](serializedData, getClass.getClassLoader) match {
-      case directResult: DirectTaskResult[_] => directResult
-      case IndirectTaskResult(blockId) => {
-        logDebug("Fetching indirect task result for TID %s".format(tid))
-        val serializedTaskResult = env.blockManager.getRemoteBytes(blockId)
-        if (!serializedTaskResult.isDefined) {
-          /* We won't be able to get the task result if the block manager had to flush the
-           * result. */
-          taskFailed(tid, state, serializedData)
-          return
-        }
-        val deserializedResult = ser.deserialize[DirectTaskResult[_]](
-          serializedTaskResult.get)
-        env.blockManager.master.removeBlock(blockId)
-        deserializedResult
-      }
-    }
-    result.metrics.resultSize = serializedData.limit()
-    sched.dagScheduler.taskEnded(task, Success, result.value, result.accumUpdates, info,
-      result.metrics)
-    numFinished += 1
-    decreaseRunningTasks(1)
-    finished(index) = true
-    if (numFinished == numTasks) {
-      sched.taskSetFinished(this)
-    }
-  }
-
-  def taskFailed(tid: Long, state: TaskState, serializedData: ByteBuffer) {
-    val info = taskInfos(tid)
-    val index = info.index
-    val task = taskSet.tasks(index)
-    info.markFailed()
-    decreaseRunningTasks(1)
-    var failureReason = "unknown"
-    ser.deserialize[TaskEndReason](serializedData, getClass.getClassLoader) match {
-      case ef: ExceptionFailure =>
-        failureReason = "Exception failure: %s".format(ef.description)
-        val locs = ef.stackTrace.map(loc => "\tat %s".format(loc.toString))
-        logInfo("Task loss due to %s\n%s\n%s".format(
-          ef.className, ef.description, locs.mkString("\n")))
-        sched.dagScheduler.taskEnded(task, ef, null, null, info, ef.metrics.getOrElse(null))
-
-      case TaskResultLost =>
-        failureReason = "Lost result for TID %s".format(tid)
-        logWarning(failureReason)
-        sched.dagScheduler.taskEnded(task, TaskResultLost, null, null, info, null)
-
-      case _ => {}
-    }
-    if (!finished(index)) {
-      copiesRunning(index) -= 1
-      numFailures(index) += 1
-      if (numFailures(index) > MAX_TASK_FAILURES) {
-        val errorMessage = ("Task %s:%d failed more than %d times; aborting job" +
-          "(most recent failure: %s").format(taskSet.id, index, MAX_TASK_FAILURES, failureReason)
-        decreaseRunningTasks(runningTasks)
-        sched.dagScheduler.taskSetFailed(taskSet, errorMessage)
-        // need to delete failed Taskset from schedule queue
-        sched.taskSetFinished(this)
-      }
-    }
-  }
-
-  override def error(message: String) {
-    sched.dagScheduler.taskSetFailed(taskSet, message)
-    sched.taskSetFinished(this)
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala
index 4de81617b1dd8..5d3d43623d9d2 100644
--- a/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala
@@ -21,6 +21,7 @@ import java.io._
 import java.nio.ByteBuffer
 
 import org.apache.spark.util.ByteBufferInputStream
+import org.apache.spark.SparkConf
 
 private[spark] class JavaSerializationStream(out: OutputStream) extends SerializationStream {
   val objOut = new ObjectOutputStream(out)
@@ -77,6 +78,6 @@ private[spark] class JavaSerializerInstance extends SerializerInstance {
 /**
  * A Spark serializer that uses Java's built-in serialization.
  */
-class JavaSerializer extends Serializer {
+class JavaSerializer(conf: SparkConf) extends Serializer {
   def newInstance(): SerializerInstance = new JavaSerializerInstance
 }
diff --git a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
index e748c2275d589..c14cd47556987 100644
--- a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
@@ -25,18 +25,18 @@ import com.esotericsoftware.kryo.{KryoException, Kryo}
 import com.esotericsoftware.kryo.io.{Input => KryoInput, Output => KryoOutput}
 import com.twitter.chill.{EmptyScalaKryoInstantiator, AllScalaRegistrar}
 
-import org.apache.spark.{SerializableWritable, Logging}
+import org.apache.spark._
 import org.apache.spark.broadcast.HttpBroadcast
 import org.apache.spark.scheduler.MapStatus
 import org.apache.spark.storage._
+import org.apache.spark.storage.{GetBlock, GotBlock, PutBlock}
 
 /**
  * A Spark serializer that uses the [[https://code.google.com/p/kryo/ Kryo serialization library]].
  */
-class KryoSerializer extends org.apache.spark.serializer.Serializer with Logging {
-
+class KryoSerializer(conf: SparkConf) extends org.apache.spark.serializer.Serializer with Logging {
   private val bufferSize = {
-    System.getProperty("spark.kryoserializer.buffer.mb", "2").toInt * 1024 * 1024
+    conf.getInt("spark.kryoserializer.buffer.mb", 2) * 1024 * 1024
   }
 
   def newKryoOutput() = new KryoOutput(bufferSize)
@@ -48,7 +48,7 @@ class KryoSerializer extends org.apache.spark.serializer.Serializer with Logging
 
     // Allow disabling Kryo reference tracking if user knows their object graphs don't have loops.
     // Do this before we invoke the user registrator so the user registrator can override this.
-    kryo.setReferences(System.getProperty("spark.kryo.referenceTracking", "true").toBoolean)
+    kryo.setReferences(conf.getBoolean("spark.kryo.referenceTracking", true))
 
     for (cls <- KryoSerializer.toRegister) kryo.register(cls)
 
@@ -58,13 +58,13 @@ class KryoSerializer extends org.apache.spark.serializer.Serializer with Logging
 
     // Allow the user to register their own classes by setting spark.kryo.registrator
     try {
-      Option(System.getProperty("spark.kryo.registrator")).foreach { regCls =>
+      for (regCls <- conf.getOption("spark.kryo.registrator")) {
         logDebug("Running user registrator: " + regCls)
         val reg = Class.forName(regCls, true, classLoader).newInstance().asInstanceOf[KryoRegistrator]
         reg.registerClasses(kryo)
       }
     } catch {
-      case _: Exception => println("Failed to register spark.kryo.registrator")
+      case e: Exception => logError("Failed to run spark.kryo.registrator", e)
     }
 
     // Register Chill's classes; we do this after our ranges and the user's own classes to let
diff --git a/core/src/main/scala/org/apache/spark/serializer/Serializer.scala b/core/src/main/scala/org/apache/spark/serializer/Serializer.scala
index 160cca4d6c54a..9a5e3cb77e1d5 100644
--- a/core/src/main/scala/org/apache/spark/serializer/Serializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/Serializer.scala
@@ -29,6 +29,9 @@ import org.apache.spark.util.{NextIterator, ByteBufferInputStream}
  * A serializer. Because some serialization libraries are not thread safe, this class is used to
  * create [[org.apache.spark.serializer.SerializerInstance]] objects that do the actual serialization and are
  * guaranteed to only be called from one thread at a time.
+ *
+ * Implementations of this trait should have a zero-arg constructor or a constructor that accepts a
+ * [[org.apache.spark.SparkConf]] as parameter. If both constructors are defined, the latter takes precedence.
  */
 trait Serializer {
   def newInstance(): SerializerInstance
diff --git a/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala b/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
index 2955986feced5..36a37af4f821d 100644
--- a/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.serializer
 
 import java.util.concurrent.ConcurrentHashMap
+import org.apache.spark.SparkConf
 
 
 /**
@@ -26,18 +27,19 @@ import java.util.concurrent.ConcurrentHashMap
  * creating a new one.
  */
 private[spark] class SerializerManager {
+  // TODO: Consider moving this into SparkConf itself to remove the global singleton.
 
   private val serializers = new ConcurrentHashMap[String, Serializer]
   private var _default: Serializer = _
 
   def default = _default
 
-  def setDefault(clsName: String): Serializer = {
-    _default = get(clsName)
+  def setDefault(clsName: String, conf: SparkConf): Serializer = {
+    _default = get(clsName, conf)
     _default
   }
 
-  def get(clsName: String): Serializer = {
+  def get(clsName: String, conf: SparkConf): Serializer = {
     if (clsName == null) {
       default
     } else {
@@ -51,8 +53,19 @@ private[spark] class SerializerManager {
         serializer = serializers.get(clsName)
         if (serializer == null) {
           val clsLoader = Thread.currentThread.getContextClassLoader
-          serializer =
-            Class.forName(clsName, true, clsLoader).newInstance().asInstanceOf[Serializer]
+          val cls = Class.forName(clsName, true, clsLoader)
+
+          // First try with the constructor that takes SparkConf. If we can't find one,
+          // use a no-arg constructor instead.
+          try {
+            val constructor = cls.getConstructor(classOf[SparkConf])
+            serializer = constructor.newInstance(conf).asInstanceOf[Serializer]
+          } catch {
+            case _: NoSuchMethodException =>
+              val constructor = cls.getConstructor()
+              serializer = constructor.newInstance().asInstanceOf[Serializer]
+          }
+
           serializers.put(clsName, serializer)
         }
         serializer
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockFetcherIterator.scala b/core/src/main/scala/org/apache/spark/storage/BlockFetcherIterator.scala
index e51c5b30a3dab..4fa2ab96d9725 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockFetcherIterator.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockFetcherIterator.scala
@@ -312,7 +312,7 @@ object BlockFetcherIterator {
       logDebug("Sending request for %d blocks (%s) from %s".format(
         req.blocks.size, Utils.bytesToString(req.size), req.address.host))
       val cmId = new ConnectionManagerId(req.address.host, req.address.nettyPort)
-      val cpier = new ShuffleCopier
+      val cpier = new ShuffleCopier(blockManager.conf)
       cpier.getBlocks(cmId, req.blocks, putResult)
       logDebug("Sent request for remote blocks " + req.blocks + " from " + req.address.host )
     }
@@ -327,7 +327,7 @@ object BlockFetcherIterator {
         fetchRequestsSync.put(request)
       }
 
-      copiers = startCopiers(System.getProperty("spark.shuffle.copier.threads", "6").toInt)
+      copiers = startCopiers(conf.getInt("spark.shuffle.copier.threads", 6))
       logInfo("Started " + fetchRequestsSync.size + " remote gets in " +
         Utils.getUsedTimeMs(startTime))
 
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockId.scala b/core/src/main/scala/org/apache/spark/storage/BlockId.scala
index 7156d855d873b..301d784b350a3 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockId.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockId.scala
@@ -17,12 +17,14 @@
 
 package org.apache.spark.storage
 
+import java.util.UUID
+
 /**
  * Identifies a particular Block of data, usually associated with a single file.
  * A Block can be uniquely identified by its filename, but each type of Block has a different
  * set of keys which produce its unique name.
  *
- * If your BlockId should be serializable, be sure to add it to the BlockId.fromString() method.
+ * If your BlockId should be serializable, be sure to add it to the BlockId.apply() method.
  */
 private[spark] sealed abstract class BlockId {
   /** A globally unique identifier for this Block. Can be used for ser/de. */
@@ -55,7 +57,8 @@ private[spark] case class BroadcastBlockId(broadcastId: Long) extends BlockId {
   def name = "broadcast_" + broadcastId
 }
 
-private[spark] case class BroadcastHelperBlockId(broadcastId: BroadcastBlockId, hType: String) extends BlockId {
+private[spark]
+case class BroadcastHelperBlockId(broadcastId: BroadcastBlockId, hType: String) extends BlockId {
   def name = broadcastId.name + "_" + hType
 }
 
@@ -67,6 +70,11 @@ private[spark] case class StreamBlockId(streamId: Int, uniqueId: Long) extends B
   def name = "input-" + streamId + "-" + uniqueId
 }
 
+/** Id associated with temporary data managed as blocks. Not serializable. */
+private[spark] case class TempBlockId(id: UUID) extends BlockId {
+  def name = "temp_" + id
+}
+
 // Intended only for testing purposes
 private[spark] case class TestBlockId(id: String) extends BlockId {
   def name = "test_" + id
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index a34c95b6f07b6..ed53558566edf 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -24,13 +24,13 @@ import scala.collection.mutable.{HashMap, ArrayBuffer}
 import scala.util.Random
 
 import akka.actor.{ActorSystem, Cancellable, Props}
-import akka.dispatch.{Await, Future}
-import akka.util.Duration
-import akka.util.duration._
+import scala.concurrent.{Await, Future}
+import scala.concurrent.duration.Duration
+import scala.concurrent.duration._
 
 import it.unimi.dsi.fastutil.io.{FastBufferedOutputStream, FastByteArrayOutputStream}
 
-import org.apache.spark.{Logging, SparkEnv, SparkException}
+import org.apache.spark.{SparkConf, Logging, SparkEnv, SparkException}
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.network._
 import org.apache.spark.serializer.Serializer
@@ -43,12 +43,13 @@ private[spark] class BlockManager(
     actorSystem: ActorSystem,
     val master: BlockManagerMaster,
     val defaultSerializer: Serializer,
-    maxMemory: Long)
+    maxMemory: Long,
+    val conf: SparkConf)
   extends Logging {
 
   val shuffleBlockManager = new ShuffleBlockManager(this)
   val diskBlockManager = new DiskBlockManager(shuffleBlockManager,
-    System.getProperty("spark.local.dir", System.getProperty("java.io.tmpdir")))
+    conf.get("spark.local.dir",  System.getProperty("java.io.tmpdir")))
 
   private val blockInfo = new TimeStampedHashMap[BlockId, BlockInfo]
 
@@ -57,12 +58,12 @@ private[spark] class BlockManager(
 
   // If we use Netty for shuffle, start a new Netty-based shuffle sender service.
   private val nettyPort: Int = {
-    val useNetty = System.getProperty("spark.shuffle.use.netty", "false").toBoolean
-    val nettyPortConfig = System.getProperty("spark.shuffle.sender.port", "0").toInt
+    val useNetty = conf.getBoolean("spark.shuffle.use.netty", false)
+    val nettyPortConfig = conf.getInt("spark.shuffle.sender.port", 0)
     if (useNetty) diskBlockManager.startShuffleBlockSender(nettyPortConfig) else 0
   }
 
-  val connectionManager = new ConnectionManager(0)
+  val connectionManager = new ConnectionManager(0, conf)
   implicit val futureExecContext = connectionManager.futureExecContext
 
   val blockManagerId = BlockManagerId(
@@ -71,18 +72,18 @@ private[spark] class BlockManager(
   // Max megabytes of data to keep in flight per reducer (to avoid over-allocating memory
   // for receiving shuffle outputs)
   val maxBytesInFlight =
-    System.getProperty("spark.reducer.maxMbInFlight", "48").toLong * 1024 * 1024
+    conf.getLong("spark.reducer.maxMbInFlight", 48) * 1024 * 1024
 
   // Whether to compress broadcast variables that are stored
-  val compressBroadcast = System.getProperty("spark.broadcast.compress", "true").toBoolean
+  val compressBroadcast = conf.getBoolean("spark.broadcast.compress", true)
   // Whether to compress shuffle output that are stored
-  val compressShuffle = System.getProperty("spark.shuffle.compress", "true").toBoolean
+  val compressShuffle = conf.getBoolean("spark.shuffle.compress", true)
   // Whether to compress RDD partitions that are stored serialized
-  val compressRdds = System.getProperty("spark.rdd.compress", "false").toBoolean
+  val compressRdds = conf.getBoolean("spark.rdd.compress", false)
+  // Whether to compress shuffle output temporarily spilled to disk
+  val compressShuffleSpill = conf.getBoolean("spark.shuffle.spill.compress", true)
 
-  val heartBeatFrequency = BlockManager.getHeartBeatFrequencyFromSystemProperties
-
-  val hostPort = Utils.localHostPort()
+  val heartBeatFrequency = BlockManager.getHeartBeatFrequency(conf)
 
   val slaveActor = actorSystem.actorOf(Props(new BlockManagerSlaveActor(this)),
     name = "BlockManagerActor" + BlockManager.ID_GENERATOR.next)
@@ -100,8 +101,11 @@ private[spark] class BlockManager(
 
   var heartBeatTask: Cancellable = null
 
-  private val metadataCleaner = new MetadataCleaner(MetadataCleanerType.BLOCK_MANAGER, this.dropOldNonBroadcastBlocks)
-  private val broadcastCleaner = new MetadataCleaner(MetadataCleanerType.BROADCAST_VARS, this.dropOldBroadcastBlocks)
+  private val metadataCleaner = new MetadataCleaner(
+    MetadataCleanerType.BLOCK_MANAGER, this.dropOldNonBroadcastBlocks, conf)
+  private val broadcastCleaner = new MetadataCleaner(
+    MetadataCleanerType.BROADCAST_VARS, this.dropOldBroadcastBlocks, conf)
+
   initialize()
 
   // The compression codec to use. Note that the "lazy" val is necessary because we want to delay
@@ -109,14 +113,14 @@ private[spark] class BlockManager(
   // program could be using a user-defined codec in a third party jar, which is loaded in
   // Executor.updateDependencies. When the BlockManager is initialized, user level jars hasn't been
   // loaded yet.
-  private lazy val compressionCodec: CompressionCodec = CompressionCodec.createCodec()
+  private lazy val compressionCodec: CompressionCodec = CompressionCodec.createCodec(conf)
 
   /**
    * Construct a BlockManager with a memory limit set based on system properties.
    */
   def this(execId: String, actorSystem: ActorSystem, master: BlockManagerMaster,
-           serializer: Serializer) = {
-    this(execId, actorSystem, master, serializer, BlockManager.getMaxMemoryFromSystemProperties)
+           serializer: Serializer, conf: SparkConf) = {
+    this(execId, actorSystem, master, serializer, BlockManager.getMaxMemory(conf), conf)
   }
 
   /**
@@ -126,7 +130,7 @@ private[spark] class BlockManager(
   private def initialize() {
     master.registerBlockManager(blockManagerId, maxMemory, slaveActor)
     BlockManagerWorker.startBlockManagerWorker(this)
-    if (!BlockManager.getDisableHeartBeatsForTesting) {
+    if (!BlockManager.getDisableHeartBeatsForTesting(conf)) {
       heartBeatTask = actorSystem.scheduler.schedule(0.seconds, heartBeatFrequency.milliseconds) {
         heartBeat()
       }
@@ -155,7 +159,7 @@ private[spark] class BlockManager(
 
   /**
    * Reregister with the master and report all blocks to it. This will be called by the heart beat
-   * thread if our heartbeat to the block amnager indicates that we were not registered.
+   * thread if our heartbeat to the block manager indicates that we were not registered.
    *
    * Note that this method must be called without any BlockInfo locks held.
    */
@@ -408,7 +412,7 @@ private[spark] class BlockManager(
       logDebug("The value of block " + blockId + " is null")
     }
     logDebug("Block " + blockId + " not found")
-    return None
+    None
   }
 
   /**
@@ -439,7 +443,7 @@ private[spark] class BlockManager(
       : BlockFetcherIterator = {
 
     val iter =
-      if (System.getProperty("spark.shuffle.use.netty", "false").toBoolean) {
+      if (conf.getBoolean("spark.shuffle.use.netty", false)) {
         new BlockFetcherIterator.NettyBlockFetcherIterator(this, blocksByAddress, serializer)
       } else {
         new BlockFetcherIterator.BasicBlockFetcherIterator(this, blocksByAddress, serializer)
@@ -465,7 +469,8 @@ private[spark] class BlockManager(
   def getDiskWriter(blockId: BlockId, file: File, serializer: Serializer, bufferSize: Int)
     : BlockObjectWriter = {
     val compressStream: OutputStream => OutputStream = wrapForCompression(blockId, _)
-    new DiskBlockObjectWriter(blockId, file, serializer, bufferSize, compressStream)
+    val syncWrites = conf.getBoolean("spark.shuffle.sync", false)
+    new DiskBlockObjectWriter(blockId, file, serializer, bufferSize, compressStream, syncWrites)
   }
 
   /**
@@ -787,6 +792,7 @@ private[spark] class BlockManager(
     case ShuffleBlockId(_, _, _) => compressShuffle
     case BroadcastBlockId(_) => compressBroadcast
     case RDDBlockId(_, _) => compressRdds
+    case TempBlockId(_) => compressShuffleSpill
     case _ => false
   }
 
@@ -856,19 +862,18 @@ private[spark] class BlockManager(
 
 
 private[spark] object BlockManager extends Logging {
-
   val ID_GENERATOR = new IdGenerator
 
-  def getMaxMemoryFromSystemProperties: Long = {
-    val memoryFraction = System.getProperty("spark.storage.memoryFraction", "0.66").toDouble
+  def getMaxMemory(conf: SparkConf): Long = {
+    val memoryFraction = conf.getDouble("spark.storage.memoryFraction", 0.6)
     (Runtime.getRuntime.maxMemory * memoryFraction).toLong
   }
 
-  def getHeartBeatFrequencyFromSystemProperties: Long =
-    System.getProperty("spark.storage.blockManagerTimeoutIntervalMs", "60000").toLong / 4
+  def getHeartBeatFrequency(conf: SparkConf): Long =
+    conf.getLong("spark.storage.blockManagerTimeoutIntervalMs", 60000) / 4
 
-  def getDisableHeartBeatsForTesting: Boolean =
-    System.getProperty("spark.test.disableBlockManagerHeartBeat", "false").toBoolean
+  def getDisableHeartBeatsForTesting(conf: SparkConf): Boolean =
+    conf.getBoolean("spark.test.disableBlockManagerHeartBeat", false)
 
   /**
    * Attempt to clean up a ByteBuffer if it is memory-mapped. This uses an *unsafe* Sun API that
@@ -891,9 +896,9 @@ private[spark] object BlockManager extends Logging {
       blockManagerMaster: BlockManagerMaster = null)
   : Map[BlockId, Seq[BlockManagerId]] =
   {
-    // env == null and blockManagerMaster != null is used in tests
+    // blockManagerMaster != null is used in tests
     assert (env != null || blockManagerMaster != null)
-    val blockLocations: Seq[Seq[BlockManagerId]] = if (env != null) {
+    val blockLocations: Seq[Seq[BlockManagerId]] = if (blockManagerMaster == null) {
       env.blockManager.getLocationBlockIds(blockIds)
     } else {
       blockManagerMaster.getLocations(blockIds)
@@ -924,4 +929,3 @@ private[spark] object BlockManager extends Logging {
     blockIdsToBlockManagers(blockIds, env, blockManagerMaster).mapValues(s => s.map(_.host))
   }
 }
-
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
index 94038649b3024..c54e4f2664753 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
@@ -17,23 +17,25 @@
 
 package org.apache.spark.storage
 
-import akka.actor.ActorRef
-import akka.dispatch.{Await, Future}
+import scala.concurrent.{Await, Future}
+import scala.concurrent.ExecutionContext.Implicits.global
+
+import akka.actor._
 import akka.pattern.ask
-import akka.util.Duration
 
-import org.apache.spark.{Logging, SparkException}
+import org.apache.spark.{SparkConf, Logging, SparkException}
 import org.apache.spark.storage.BlockManagerMessages._
+import org.apache.spark.util.AkkaUtils
 
+private[spark]
+class BlockManagerMaster(var driverActor : ActorRef, conf: SparkConf) extends Logging {
 
-private[spark] class BlockManagerMaster(var driverActor: ActorRef) extends Logging {
-
-  val AKKA_RETRY_ATTEMPTS: Int = System.getProperty("spark.akka.num.retries", "3").toInt
-  val AKKA_RETRY_INTERVAL_MS: Int = System.getProperty("spark.akka.retry.wait", "3000").toInt
+  val AKKA_RETRY_ATTEMPTS: Int = conf.getInt("spark.akka.num.retries", 3)
+  val AKKA_RETRY_INTERVAL_MS: Int = conf.getInt("spark.akka.retry.wait", 3000)
 
   val DRIVER_AKKA_ACTOR_NAME = "BlockManagerMaster"
 
-  val timeout = Duration.create(System.getProperty("spark.akka.askTimeout", "10").toLong, "seconds")
+  val timeout = AkkaUtils.askTimeout(conf)
 
   /** Remove a dead executor from the driver actor. This is only called on the driver side. */
   def removeExecutor(execId: String) {
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterActor.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterActor.scala
index f8cf14b503b08..2c1a4e2f5d3a1 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterActor.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterActor.scala
@@ -21,24 +21,22 @@ import java.util.{HashMap => JHashMap}
 
 import scala.collection.mutable
 import scala.collection.JavaConversions._
+import scala.concurrent.Future
+import scala.concurrent.duration._
 
 import akka.actor.{Actor, ActorRef, Cancellable}
-import akka.dispatch.Future
 import akka.pattern.ask
-import akka.util.Duration
-import akka.util.duration._
 
-import org.apache.spark.{Logging, SparkException}
+import org.apache.spark.{SparkConf, Logging, SparkException}
 import org.apache.spark.storage.BlockManagerMessages._
-import org.apache.spark.util.Utils
-
+import org.apache.spark.util.{AkkaUtils, Utils}
 
 /**
  * BlockManagerMasterActor is an actor on the master node to track statuses of
  * all slaves' block managers.
  */
 private[spark]
-class BlockManagerMasterActor(val isLocal: Boolean) extends Actor with Logging {
+class BlockManagerMasterActor(val isLocal: Boolean, conf: SparkConf) extends Actor with Logging {
 
   // Mapping from block manager id to the block manager's information.
   private val blockManagerInfo =
@@ -50,21 +48,19 @@ class BlockManagerMasterActor(val isLocal: Boolean) extends Actor with Logging {
   // Mapping from block id to the set of block managers that have the block.
   private val blockLocations = new JHashMap[BlockId, mutable.HashSet[BlockManagerId]]
 
-  val akkaTimeout = Duration.create(
-    System.getProperty("spark.akka.askTimeout", "10").toLong, "seconds")
-
-  initLogging()
+  private val akkaTimeout = AkkaUtils.askTimeout(conf)
 
-  val slaveTimeout = System.getProperty("spark.storage.blockManagerSlaveTimeoutMs",
-    "" + (BlockManager.getHeartBeatFrequencyFromSystemProperties * 3)).toLong
+  val slaveTimeout = conf.get("spark.storage.blockManagerSlaveTimeoutMs",
+    "" + (BlockManager.getHeartBeatFrequency(conf) * 3)).toLong
 
-  val checkTimeoutInterval = System.getProperty("spark.storage.blockManagerTimeoutIntervalMs",
+  val checkTimeoutInterval = conf.get("spark.storage.blockManagerTimeoutIntervalMs",
     "60000").toLong
 
   var timeoutCheckingTask: Cancellable = null
 
   override def preStart() {
-    if (!BlockManager.getDisableHeartBeatsForTesting) {
+    if (!BlockManager.getDisableHeartBeatsForTesting(conf)) {
+      import context.dispatcher
       timeoutCheckingTask = context.system.scheduler.schedule(
         0.seconds, checkTimeoutInterval.milliseconds, self, ExpireDeadHosts)
     }
@@ -352,14 +348,19 @@ object BlockManagerMasterActor {
 
       if (storageLevel.isValid) {
         // isValid means it is either stored in-memory or on-disk.
-        _blocks.put(blockId, BlockStatus(storageLevel, memSize, diskSize))
+        // But the memSize here indicates the data size in or dropped from memory,
+        // and the diskSize here indicates the data size in or dropped to disk.
+        // They can be both larger than 0, when a block is dropped from memory to disk.
+        // Therefore, a safe way to set BlockStatus is to set its info in accurate modes.
         if (storageLevel.useMemory) {
+          _blocks.put(blockId, BlockStatus(storageLevel, memSize, 0))
           _remainingMem -= memSize
           logInfo("Added %s in memory on %s (size: %s, free: %s)".format(
             blockId, blockManagerId.hostPort, Utils.bytesToString(memSize),
             Utils.bytesToString(_remainingMem)))
         }
         if (storageLevel.useDisk) {
+          _blocks.put(blockId, BlockStatus(storageLevel, 0, diskSize))
           logInfo("Added %s on disk on %s (size: %s)".format(
             blockId, blockManagerId.hostPort, Utils.bytesToString(diskSize)))
         }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerSource.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerSource.scala
index acc3951088a8d..365866d1e3397 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerSource.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerSource.scala
@@ -28,7 +28,7 @@ private[spark] class BlockManagerSource(val blockManager: BlockManager, sc: Spar
   val metricRegistry = new MetricRegistry()
   val sourceName = "%s.BlockManager".format(sc.appName)
 
-  metricRegistry.register(MetricRegistry.name("memory", "maxMem", "MBytes"), new Gauge[Long] {
+  metricRegistry.register(MetricRegistry.name("memory", "maxMem_MB"), new Gauge[Long] {
     override def getValue: Long = {
       val storageStatusList = blockManager.master.getStorageStatus
       val maxMem = storageStatusList.map(_.maxMem).reduce(_ + _)
@@ -36,7 +36,7 @@ private[spark] class BlockManagerSource(val blockManager: BlockManager, sc: Spar
     }
   })
 
-  metricRegistry.register(MetricRegistry.name("memory", "remainingMem", "MBytes"), new Gauge[Long] {
+  metricRegistry.register(MetricRegistry.name("memory", "remainingMem_MB"), new Gauge[Long] {
     override def getValue: Long = {
       val storageStatusList = blockManager.master.getStorageStatus
       val remainingMem = storageStatusList.map(_.memRemaining).reduce(_ + _)
@@ -44,7 +44,7 @@ private[spark] class BlockManagerSource(val blockManager: BlockManager, sc: Spar
     }
   })
 
-  metricRegistry.register(MetricRegistry.name("memory", "memUsed", "MBytes"), new Gauge[Long] {
+  metricRegistry.register(MetricRegistry.name("memory", "memUsed_MB"), new Gauge[Long] {
     override def getValue: Long = {
       val storageStatusList = blockManager.master.getStorageStatus
       val maxMem = storageStatusList.map(_.maxMem).reduce(_ + _)
@@ -53,7 +53,7 @@ private[spark] class BlockManagerSource(val blockManager: BlockManager, sc: Spar
     }
   })
 
-  metricRegistry.register(MetricRegistry.name("disk", "diskSpaceUsed", "MBytes"), new Gauge[Long] {
+  metricRegistry.register(MetricRegistry.name("disk", "diskSpaceUsed_MB"), new Gauge[Long] {
     override def getValue: Long = {
       val storageStatusList = blockManager.master.getStorageStatus
       val diskSpaceUsed = storageStatusList
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerWorker.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerWorker.scala
index 0c66addf9def6..3efe738a08f66 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerWorker.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerWorker.scala
@@ -30,7 +30,6 @@ import org.apache.spark.util.Utils
  * TODO: Use event model.
  */
 private[spark] class BlockManagerWorker(val blockManager: BlockManager) extends Logging {
-  initLogging()
 
   blockManager.connectionManager.onReceiveMessage(onBlockMessageReceive)
 
@@ -43,15 +42,15 @@ private[spark] class BlockManagerWorker(val blockManager: BlockManager) extends
           val blockMessages = BlockMessageArray.fromBufferMessage(bufferMessage)
           logDebug("Parsed as a block message array")
           val responseMessages = blockMessages.map(processBlockMessage).filter(_ != None).map(_.get)
-          return Some(new BlockMessageArray(responseMessages).toBufferMessage)
+          Some(new BlockMessageArray(responseMessages).toBufferMessage)
         } catch {
           case e: Exception => logError("Exception handling buffer message", e)
-          return None
+          None
         }
       }
       case otherMessage: Any => {
         logError("Unknown type message received: " + otherMessage)
-        return None
+        None
       }
     }
   }
@@ -62,7 +61,7 @@ private[spark] class BlockManagerWorker(val blockManager: BlockManager) extends
         val pB = PutBlock(blockMessage.getId, blockMessage.getData, blockMessage.getLevel)
         logDebug("Received [" + pB + "]")
         putBlock(pB.id, pB.data, pB.level)
-        return None
+        None
       }
       case BlockMessage.TYPE_GET_BLOCK => {
         val gB = new GetBlock(blockMessage.getId)
@@ -71,9 +70,9 @@ private[spark] class BlockManagerWorker(val blockManager: BlockManager) extends
         if (buffer == null) {
           return None
         }
-        return Some(BlockMessage.fromGotBlock(GotBlock(gB.id, buffer)))
+        Some(BlockMessage.fromGotBlock(GotBlock(gB.id, buffer)))
       }
-      case _ => return None
+      case _ => None
     }
   }
 
@@ -94,15 +93,13 @@ private[spark] class BlockManagerWorker(val blockManager: BlockManager) extends
     }
     logDebug("GetBlock " + id + " used " + Utils.getUsedTimeMs(startTimeMs)
         + " and got buffer " + buffer)
-    return buffer
+    buffer
   }
 }
 
 private[spark] object BlockManagerWorker extends Logging {
   private var blockManagerWorker: BlockManagerWorker = null
 
-  initLogging()
-
   def startBlockManagerWorker(manager: BlockManager) {
     blockManagerWorker = new BlockManagerWorker(manager)
   }
@@ -114,7 +111,7 @@ private[spark] object BlockManagerWorker extends Logging {
     val blockMessageArray = new BlockMessageArray(blockMessage)
     val resultMessage = connectionManager.sendMessageReliablySync(
         toConnManagerId, blockMessageArray.toBufferMessage)
-    return (resultMessage != None)
+    resultMessage.isDefined
   }
 
   def syncGetBlock(msg: GetBlock, toConnManagerId: ConnectionManagerId): ByteBuffer = {
@@ -133,8 +130,8 @@ private[spark] object BlockManagerWorker extends Logging {
             return blockMessage.getData
           })
       }
-      case None => logDebug("No response message received"); return null
+      case None => logDebug("No response message received")
     }
-    return null
+    null
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockMessage.scala b/core/src/main/scala/org/apache/spark/storage/BlockMessage.scala
index 80dcb5a2074d0..fbafcf79d2833 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockMessage.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockMessage.scala
@@ -154,7 +154,7 @@ private[spark] class BlockMessage() {
     println()
     */
     val finishTime = System.currentTimeMillis
-    return Message.createBufferMessage(buffers)
+    Message.createBufferMessage(buffers)
   }
 
   override def toString: String = {
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockMessageArray.scala b/core/src/main/scala/org/apache/spark/storage/BlockMessageArray.scala
index 6ce9127c74501..59329361f320b 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockMessageArray.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockMessageArray.scala
@@ -37,8 +37,6 @@ class BlockMessageArray(var blockMessages: Seq[BlockMessage]) extends Seq[BlockM
 
   def length = blockMessages.length 
 
-  initLogging()
-  
   def set(bufferMessage: BufferMessage) {
     val startTime = System.currentTimeMillis
     val newBlockMessages = new ArrayBuffer[BlockMessage]()
@@ -98,7 +96,7 @@ class BlockMessageArray(var blockMessages: Seq[BlockMessage]) extends Seq[BlockM
     println()
     println()
     */
-    return Message.createBufferMessage(buffers)
+    Message.createBufferMessage(buffers)
   }
 }
 
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala b/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala
index b4451fc7b8e56..696b930a26b9e 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala
@@ -32,7 +32,7 @@ import org.apache.spark.serializer.{SerializationStream, Serializer}
  *
  * This interface does not support concurrent writes.
  */
-abstract class BlockObjectWriter(val blockId: BlockId) {
+private[spark] abstract class BlockObjectWriter(val blockId: BlockId) {
 
   def open(): BlockObjectWriter
 
@@ -66,15 +66,21 @@ abstract class BlockObjectWriter(val blockId: BlockId) {
    * Cumulative time spent performing blocking writes, in ns.
    */
   def timeWriting(): Long
+
+  /**
+   * Number of bytes written so far
+   */
+  def bytesWritten: Long
 }
 
 /** BlockObjectWriter which writes directly to a file on disk. Appends to the given file. */
-class DiskBlockObjectWriter(
+private[spark] class DiskBlockObjectWriter(
     blockId: BlockId,
     file: File,
     serializer: Serializer,
     bufferSize: Int,
-    compressStream: OutputStream => OutputStream)
+    compressStream: OutputStream => OutputStream,
+    syncWrites: Boolean)
   extends BlockObjectWriter(blockId)
   with Logging
 {
@@ -97,8 +103,6 @@ class DiskBlockObjectWriter(
     override def flush() = out.flush()
   }
 
-  private val syncWrites = System.getProperty("spark.shuffle.sync", "false").toBoolean
-
   /** The file channel, used for repositioning / truncating the file. */
   private var channel: FileChannel = null
   private var bs: OutputStream = null
@@ -139,6 +143,7 @@ class DiskBlockObjectWriter(
       fos = null
       ts = null
       objOut = null
+      initialized = false
     }
   }
 
@@ -146,7 +151,8 @@ class DiskBlockObjectWriter(
 
   override def commit(): Long = {
     if (initialized) {
-      // NOTE: Flush the serializer first and then the compressed/buffered output stream
+      // NOTE: Because Kryo doesn't flush the underlying stream we explicitly flush both the
+      //       serializer stream and the lower level stream.
       objOut.flush()
       bs.flush()
       val prevPos = lastValidPosition
@@ -176,10 +182,14 @@ class DiskBlockObjectWriter(
   }
 
   override def fileSegment(): FileSegment = {
-    val bytesWritten = lastValidPosition - initialPosition
     new FileSegment(file, initialPosition, bytesWritten)
   }
 
   // Only valid if called after close()
   override def timeWriting() = _timeWriting
+
+  // Only valid if called after commit()
+  override def bytesWritten: Long = {
+    lastValidPosition - initialPosition
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
index fcd2e97982955..f3e1c38744d78 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
@@ -19,7 +19,7 @@ package org.apache.spark.storage
 
 import java.io.File
 import java.text.SimpleDateFormat
-import java.util.{Date, Random}
+import java.util.{Date, Random, UUID}
 
 import org.apache.spark.Logging
 import org.apache.spark.executor.ExecutorExitCode
@@ -38,7 +38,7 @@ private[spark] class DiskBlockManager(shuffleManager: ShuffleBlockManager, rootD
   extends PathResolver with Logging {
 
   private val MAX_DIR_CREATION_ATTEMPTS: Int = 10
-  private val subDirsPerLocalDir = System.getProperty("spark.diskStore.subDirectories", "64").toInt
+  private val subDirsPerLocalDir = shuffleManager.conf.getInt("spark.diskStore.subDirectories", 64)
 
   // Create one local directory for each path mentioned in spark.local.dir; then, inside this
   // directory, create multiple subdirectories that we will hash files into, in order to avoid
@@ -50,7 +50,7 @@ private[spark] class DiskBlockManager(shuffleManager: ShuffleBlockManager, rootD
   addShutdownHook()
 
   /**
-   * Returns the phyiscal file segment in which the given BlockId is located.
+   * Returns the physical file segment in which the given BlockId is located.
    * If the BlockId has been mapped to a specific FileSegment, that will be returned.
    * Otherwise, we assume the Block is mapped to a whole file identified by the BlockId directly.
    */
@@ -90,6 +90,15 @@ private[spark] class DiskBlockManager(shuffleManager: ShuffleBlockManager, rootD
 
   def getFile(blockId: BlockId): File = getFile(blockId.name)
 
+  /** Produces a unique block id and File suitable for intermediate results. */
+  def createTempBlock(): (TempBlockId, File) = {
+    var blockId = new TempBlockId(UUID.randomUUID())
+    while (getFile(blockId).exists()) {
+      blockId = new TempBlockId(UUID.randomUUID())
+    }
+    (blockId, getFile(blockId))
+  }
+
   private def createLocalDirs(): Array[File] = {
     logDebug("Creating local directories at root dirs '" + rootDirs + "'")
     val dateFormat = new SimpleDateFormat("yyyyMMddHHmmss")
diff --git a/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala b/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala
index 05f676c6e2249..eb5a18521683e 100644
--- a/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala
@@ -214,7 +214,7 @@ private class MemoryStore(blockManager: BlockManager, maxMemory: Long)
         while (maxMemory - (currentMemory - selectedMemory) < space && iterator.hasNext) {
           val pair = iterator.next()
           val blockId = pair.getKey
-          if (rddToAdd != None && rddToAdd == getRddId(blockId)) {
+          if (rddToAdd.isDefined && rddToAdd == getRddId(blockId)) {
             logInfo("Will not store " + blockIdToAdd + " as it would require dropping another " +
               "block from the same RDD")
             return false
@@ -245,7 +245,7 @@ private class MemoryStore(blockManager: BlockManager, maxMemory: Long)
         return false
       }
     }
-    return true
+    true
   }
 
   override def contains(blockId: BlockId): Boolean = {
diff --git a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockManager.scala
index e828e1d1c5e7b..bb07c8cb134cc 100644
--- a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockManager.scala
@@ -23,10 +23,11 @@ import java.util.concurrent.atomic.AtomicInteger
 
 import scala.collection.JavaConversions._
 
+import org.apache.spark.Logging
 import org.apache.spark.serializer.Serializer
-import org.apache.spark.util.{MetadataCleanerType, MetadataCleaner, TimeStampedHashMap}
-import org.apache.spark.util.collection.{PrimitiveKeyOpenHashMap, PrimitiveVector}
 import org.apache.spark.storage.ShuffleBlockManager.ShuffleFileGroup
+import org.apache.spark.util.{MetadataCleaner, MetadataCleanerType, TimeStampedHashMap}
+import org.apache.spark.util.collection.{PrimitiveKeyOpenHashMap, PrimitiveVector}
 
 /** A group of writers for a ShuffleMapTask, one writer per reducer. */
 private[spark] trait ShuffleWriterGroup {
@@ -58,33 +59,41 @@ private[spark] trait ShuffleWriterGroup {
  * files within a ShuffleFileGroups associated with the block's reducer.
  */
 private[spark]
-class ShuffleBlockManager(blockManager: BlockManager) {
+class ShuffleBlockManager(blockManager: BlockManager) extends Logging {
+  def conf = blockManager.conf
+
   // Turning off shuffle file consolidation causes all shuffle Blocks to get their own file.
   // TODO: Remove this once the shuffle file consolidation feature is stable.
   val consolidateShuffleFiles =
-    System.getProperty("spark.shuffle.consolidateFiles", "false").toBoolean
+    conf.getBoolean("spark.shuffle.consolidateFiles", false)
 
-  private val bufferSize = System.getProperty("spark.shuffle.file.buffer.kb", "100").toInt * 1024
+  private val bufferSize = conf.getInt("spark.shuffle.file.buffer.kb", 100) * 1024
 
   /**
    * Contains all the state related to a particular shuffle. This includes a pool of unused
    * ShuffleFileGroups, as well as all ShuffleFileGroups that have been created for the shuffle.
    */
-  private class ShuffleState() {
+  private class ShuffleState(val numBuckets: Int) {
     val nextFileId = new AtomicInteger(0)
     val unusedFileGroups = new ConcurrentLinkedQueue[ShuffleFileGroup]()
     val allFileGroups = new ConcurrentLinkedQueue[ShuffleFileGroup]()
+
+    /**
+     * The mapIds of all map tasks completed on this Executor for this shuffle.
+     * NB: This is only populated if consolidateShuffleFiles is FALSE. We don't need it otherwise.
+     */
+    val completedMapTasks = new ConcurrentLinkedQueue[Int]()
   }
 
   type ShuffleId = Int
   private val shuffleStates = new TimeStampedHashMap[ShuffleId, ShuffleState]
 
-  private
-  val metadataCleaner = new MetadataCleaner(MetadataCleanerType.SHUFFLE_BLOCK_MANAGER, this.cleanup)
+  private val metadataCleaner =
+    new MetadataCleaner(MetadataCleanerType.SHUFFLE_BLOCK_MANAGER, this.cleanup, conf)
 
   def forMapTask(shuffleId: Int, mapId: Int, numBuckets: Int, serializer: Serializer) = {
     new ShuffleWriterGroup {
-      shuffleStates.putIfAbsent(shuffleId, new ShuffleState())
+      shuffleStates.putIfAbsent(shuffleId, new ShuffleState(numBuckets))
       private val shuffleState = shuffleStates(shuffleId)
       private var fileGroup: ShuffleFileGroup = null
 
@@ -98,6 +107,15 @@ class ShuffleBlockManager(blockManager: BlockManager) {
         Array.tabulate[BlockObjectWriter](numBuckets) { bucketId =>
           val blockId = ShuffleBlockId(shuffleId, mapId, bucketId)
           val blockFile = blockManager.diskBlockManager.getFile(blockId)
+          // Because of previous failures, the shuffle file may already exist on this machine.
+          // If so, remove it.
+          if (blockFile.exists) {
+            if (blockFile.delete()) {
+              logInfo(s"Removed existing shuffle file $blockFile")
+            } else {
+              logWarning(s"Failed to remove existing shuffle file $blockFile")
+            }
+          }
           blockManager.getDiskWriter(blockId, blockFile, serializer, bufferSize)
         }
       }
@@ -109,6 +127,8 @@ class ShuffleBlockManager(blockManager: BlockManager) {
             fileGroup.recordMapOutput(mapId, offsets)
           }
           recycleFileGroup(fileGroup)
+        } else {
+          shuffleState.completedMapTasks.add(mapId)
         }
       }
 
@@ -154,7 +174,18 @@ class ShuffleBlockManager(blockManager: BlockManager) {
   }
 
   private def cleanup(cleanupTime: Long) {
-    shuffleStates.clearOldValues(cleanupTime)
+    shuffleStates.clearOldValues(cleanupTime, (shuffleId, state) => {
+      if (consolidateShuffleFiles) {
+        for (fileGroup <- state.allFileGroups; file <- fileGroup.files) {
+          file.delete()
+        }
+      } else {
+        for (mapId <- state.completedMapTasks; reduceId <- 0 until state.numBuckets) {
+          val blockId = new ShuffleBlockId(shuffleId, mapId, reduceId)
+          blockManager.diskBlockManager.getFile(blockId).delete()
+        }
+      }
+    })
   }
 }
 
diff --git a/core/src/main/scala/org/apache/spark/storage/StorageLevel.scala b/core/src/main/scala/org/apache/spark/storage/StorageLevel.scala
index 632ff047d1042..1b7934d59fa1d 100644
--- a/core/src/main/scala/org/apache/spark/storage/StorageLevel.scala
+++ b/core/src/main/scala/org/apache/spark/storage/StorageLevel.scala
@@ -74,7 +74,7 @@ class StorageLevel private(
     if (deserialized_) {
       ret |= 1
     }
-    return ret
+    ret
   }
 
   override def writeExternal(out: ObjectOutput) {
@@ -101,13 +101,17 @@ class StorageLevel private(
     var result = ""
     result += (if (useDisk) "Disk " else "")
     result += (if (useMemory) "Memory " else "")
-    result += (if (deserialized) "Deserialized " else "Serialized")
+    result += (if (deserialized) "Deserialized " else "Serialized ")
     result += "%sx Replicated".format(replication)
     result
   }
 }
 
 
+/**
+ * Various [[org.apache.spark.storage.StorageLevel]] defined and utility functions for creating
+ * new storage levels.
+ */
 object StorageLevel {
   val NONE = new StorageLevel(false, false, false)
   val DISK_ONLY = new StorageLevel(true, false, false)
diff --git a/core/src/main/scala/org/apache/spark/storage/StoragePerfTester.scala b/core/src/main/scala/org/apache/spark/storage/StoragePerfTester.scala
index 369a2e536b068..40734aab49f93 100644
--- a/core/src/main/scala/org/apache/spark/storage/StoragePerfTester.scala
+++ b/core/src/main/scala/org/apache/spark/storage/StoragePerfTester.scala
@@ -24,9 +24,11 @@ import org.apache.spark.serializer.KryoSerializer
 import org.apache.spark.SparkContext
 import org.apache.spark.util.Utils
 
-/** Utility for micro-benchmarking shuffle write performance.
-  *
-  * Writes simulated shuffle output from several threads and records the observed throughput*/
+/**
+ * Utility for micro-benchmarking shuffle write performance.
+ *
+ * Writes simulated shuffle output from several threads and records the observed throughput.
+ */
 object StoragePerfTester {
   def main(args: Array[String]) = {
     /** Total amount of data to generate. Distributed evenly amongst maps and reduce splits. */
@@ -54,7 +56,7 @@ object StoragePerfTester {
 
     def writeOutputBytes(mapId: Int, total: AtomicLong) = {
       val shuffle = blockManager.shuffleBlockManager.forMapTask(1, mapId, numOutputSplits,
-        new KryoSerializer())
+        new KryoSerializer(sc.conf))
       val writers = shuffle.writers
       for (i <- 1 to recordsPerMap) {
         writers(i % numOutputSplits).write(writeData)
diff --git a/core/src/main/scala/org/apache/spark/storage/ThreadingTest.scala b/core/src/main/scala/org/apache/spark/storage/ThreadingTest.scala
index 860e6805761a8..729ba2c550a20 100644
--- a/core/src/main/scala/org/apache/spark/storage/ThreadingTest.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ThreadingTest.scala
@@ -22,6 +22,7 @@ import akka.actor._
 import java.util.concurrent.ArrayBlockingQueue
 import util.Random
 import org.apache.spark.serializer.KryoSerializer
+import org.apache.spark.{SparkConf, SparkContext}
 
 /**
  * This class tests the BlockManager and MemoryStore for thread safety and
@@ -91,11 +92,12 @@ private[spark] object ThreadingTest {
   def main(args: Array[String]) {
     System.setProperty("spark.kryoserializer.buffer.mb", "1")
     val actorSystem = ActorSystem("test")
-    val serializer = new KryoSerializer
+    val conf = new SparkConf()
+    val serializer = new KryoSerializer(conf)
     val blockManagerMaster = new BlockManagerMaster(
-      actorSystem.actorOf(Props(new BlockManagerMasterActor(true))))
+      actorSystem.actorOf(Props(new BlockManagerMasterActor(true, conf))), conf)
     val blockManager = new BlockManager(
-      "<driver>", actorSystem, blockManagerMaster, serializer, 1024 * 1024)
+      "<driver>", actorSystem, blockManagerMaster, serializer, 1024 * 1024, conf)
     val producers = (1 to numProducers).map(i => new ProducerThread(blockManager, i))
     val consumers = producers.map(p => new ConsumerThread(blockManager, p.queue))
     producers.foreach(_.start)
diff --git a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
index f1d86c0221fea..50dfdbdf5ae9b 100644
--- a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -32,7 +32,7 @@ import org.apache.spark.util.Utils
 /** Top level user interface for Spark */
 private[spark] class SparkUI(sc: SparkContext) extends Logging {
   val host = Option(System.getenv("SPARK_PUBLIC_DNS")).getOrElse(Utils.localHostName())
-  val port = Option(System.getProperty("spark.ui.port")).getOrElse(SparkUI.DEFAULT_PORT).toInt
+  val port = sc.conf.get("spark.ui.port", SparkUI.DEFAULT_PORT).toInt
   var boundPort: Option[Int] = None
   var server: Option[Server] = None
 
diff --git a/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala b/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala
index fcd1b518d099b..6ba15187d9f63 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ui
 
 import scala.util.Random
 
-import org.apache.spark.SparkContext
+import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.SparkContext._
 import org.apache.spark.scheduler.SchedulingMode
 
@@ -27,25 +27,26 @@ import org.apache.spark.scheduler.SchedulingMode
 /**
  * Continuously generates jobs that expose various features of the WebUI (internal testing tool).
  *
- * Usage: ./run spark.ui.UIWorkloadGenerator [master]
+ * Usage: ./bin/spark-class org.apache.spark.ui.UIWorkloadGenerator [master] [FIFO|FAIR]
  */
 private[spark] object UIWorkloadGenerator {
+
   val NUM_PARTITIONS = 100
   val INTER_JOB_WAIT_MS = 5000
 
   def main(args: Array[String]) {
     if (args.length < 2) {
-      println("usage: ./spark-class org.apache.spark.ui.UIWorkloadGenerator [master] [FIFO|FAIR]")
+      println("usage: ./bin/spark-class org.apache.spark.ui.UIWorkloadGenerator [master] [FIFO|FAIR]")
       System.exit(1)
     }
-    val master = args(0)
-    val schedulingMode = SchedulingMode.withName(args(1))
-    val appName = "Spark UI Tester"
 
+    val conf = new SparkConf().setMaster(args(0)).setAppName("Spark UI tester")
+
+    val schedulingMode = SchedulingMode.withName(args(1))
     if (schedulingMode == SchedulingMode.FAIR) {
-      System.setProperty("spark.scheduler.mode", "FAIR")
+      conf.set("spark.scheduler.mode", "FAIR")
     }
-    val sc = new SparkContext(master, appName)
+    val sc = new SparkContext(conf)
 
     def setProperties(s: String) = {
       if(schedulingMode == SchedulingMode.FAIR) {
@@ -55,11 +56,11 @@ private[spark] object UIWorkloadGenerator {
     }
 
     val baseData = sc.makeRDD(1 to NUM_PARTITIONS * 10, NUM_PARTITIONS)
-    def nextFloat() = (new Random()).nextFloat()
+    def nextFloat() = new Random().nextFloat()
 
     val jobs = Seq[(String, () => Long)](
       ("Count", baseData.count),
-      ("Cache and Count", baseData.map(x => x).cache.count),
+      ("Cache and Count", baseData.map(x => x).cache().count),
       ("Single Shuffle", baseData.map(x => (x % 10, x)).reduceByKey(_ + _).count),
       ("Entirely failed phase", baseData.map(x => throw new Exception).count),
       ("Partially failed phase", {
diff --git a/core/src/main/scala/org/apache/spark/ui/env/EnvironmentUI.scala b/core/src/main/scala/org/apache/spark/ui/env/EnvironmentUI.scala
index c5bf2acc9ec95..88f41be8d3dd2 100644
--- a/core/src/main/scala/org/apache/spark/ui/env/EnvironmentUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/env/EnvironmentUI.scala
@@ -48,12 +48,15 @@ private[spark] class EnvironmentUI(sc: SparkContext) {
     def jvmTable =
       UIUtils.listingTable(Seq("Name", "Value"), jvmRow, jvmInformation, fixedWidth = true)
 
-    val properties = System.getProperties.iterator.toSeq
-    val classPathProperty = properties.find { case (k, v) =>
-      k.contains("java.class.path")
+    val sparkProperties = sc.conf.getAll.sorted
+
+    val systemProperties = System.getProperties.iterator.toSeq
+    val classPathProperty = systemProperties.find { case (k, v) =>
+      k == "java.class.path"
     }.getOrElse(("", ""))
-    val sparkProperties = properties.filter(_._1.startsWith("spark")).sorted
-    val otherProperties = properties.diff(sparkProperties :+ classPathProperty).sorted
+    val otherProperties = systemProperties.filter { case (k, v) =>
+      k != "java.class.path" && !k.startsWith("spark.")
+    }.sorted
 
     val propertyHeaders = Seq("Name", "Value")
     def propertyRow(kv: (String, String)) = <tr><td>{kv._1}</td><td>{kv._2}</td></tr>
@@ -63,7 +66,7 @@ private[spark] class EnvironmentUI(sc: SparkContext) {
       UIUtils.listingTable(propertyHeaders, propertyRow, otherProperties, fixedWidth = true)
 
     val classPathEntries = classPathProperty._2
-        .split(System.getProperty("path.separator", ":"))
+        .split(sc.conf.get("path.separator", ":"))
         .filterNot(e => e.isEmpty)
         .map(e => (e, "System Classpath"))
     val addedJars = sc.addedJars.iterator.toSeq.map{case (path, time) => (path, "Added By User")}
diff --git a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsUI.scala b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsUI.scala
index e596690bc3df8..a31a7e1d58374 100644
--- a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsUI.scala
@@ -56,7 +56,8 @@ private[spark] class ExecutorsUI(val sc: SparkContext) {
     val diskSpaceUsed = storageStatusList.flatMap(_.blocks.values.map(_.diskSize)).fold(0L)(_+_)
 
     val execHead = Seq("Executor ID", "Address", "RDD blocks", "Memory used", "Disk used",
-      "Active tasks", "Failed tasks", "Complete tasks", "Total tasks")
+      "Active tasks", "Failed tasks", "Complete tasks", "Total tasks", "Task Time", "Shuffle Read",
+      "Shuffle Write")
 
     def execRow(kv: Seq[String]) = {
       <tr>
@@ -73,6 +74,9 @@ private[spark] class ExecutorsUI(val sc: SparkContext) {
         <td>{kv(7)}</td>
         <td>{kv(8)}</td>
         <td>{kv(9)}</td>
+        <td>{Utils.msDurationToString(kv(10).toLong)}</td>
+        <td>{Utils.bytesToString(kv(11).toLong)}</td>
+        <td>{Utils.bytesToString(kv(12).toLong)}</td>
       </tr>
     }
 
@@ -111,6 +115,9 @@ private[spark] class ExecutorsUI(val sc: SparkContext) {
     val failedTasks = listener.executorToTasksFailed.getOrElse(execId, 0)
     val completedTasks = listener.executorToTasksComplete.getOrElse(execId, 0)
     val totalTasks = activeTasks + failedTasks + completedTasks
+    val totalDuration = listener.executorToDuration.getOrElse(execId, 0)
+    val totalShuffleRead = listener.executorToShuffleRead.getOrElse(execId, 0)
+    val totalShuffleWrite = listener.executorToShuffleWrite.getOrElse(execId, 0)
 
     Seq(
       execId,
@@ -122,7 +129,10 @@ private[spark] class ExecutorsUI(val sc: SparkContext) {
       activeTasks.toString,
       failedTasks.toString,
       completedTasks.toString,
-      totalTasks.toString
+      totalTasks.toString,
+      totalDuration.toString,
+      totalShuffleRead.toString,
+      totalShuffleWrite.toString
     )
   }
 
@@ -130,6 +140,9 @@ private[spark] class ExecutorsUI(val sc: SparkContext) {
     val executorToTasksActive = HashMap[String, HashSet[TaskInfo]]()
     val executorToTasksComplete = HashMap[String, Int]()
     val executorToTasksFailed = HashMap[String, Int]()
+    val executorToDuration = HashMap[String, Long]()
+    val executorToShuffleRead = HashMap[String, Long]()
+    val executorToShuffleWrite = HashMap[String, Long]()
 
     override def onTaskStart(taskStart: SparkListenerTaskStart) {
       val eid = taskStart.taskInfo.executorId
@@ -140,6 +153,9 @@ private[spark] class ExecutorsUI(val sc: SparkContext) {
     override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
       val eid = taskEnd.taskInfo.executorId
       val activeTasks = executorToTasksActive.getOrElseUpdate(eid, new HashSet[TaskInfo]())
+      val newDuration = executorToDuration.getOrElse(eid, 0L) + taskEnd.taskInfo.duration
+      executorToDuration.put(eid, newDuration)
+
       activeTasks -= taskEnd.taskInfo
       val (failureInfo, metrics): (Option[ExceptionFailure], Option[TaskMetrics]) =
         taskEnd.reason match {
@@ -150,6 +166,17 @@ private[spark] class ExecutorsUI(val sc: SparkContext) {
             executorToTasksComplete(eid) = executorToTasksComplete.getOrElse(eid, 0) + 1
             (None, Option(taskEnd.taskMetrics))
         }
+
+      // update shuffle read/write
+      if (null != taskEnd.taskMetrics) {
+        taskEnd.taskMetrics.shuffleReadMetrics.foreach(shuffleRead =>
+          executorToShuffleRead.put(eid, executorToShuffleRead.getOrElse(eid, 0L) +
+            shuffleRead.remoteBytesRead))
+
+        taskEnd.taskMetrics.shuffleWriteMetrics.foreach(shuffleWrite =>
+          executorToShuffleWrite.put(eid, executorToShuffleWrite.getOrElse(eid, 0L) +
+            shuffleWrite.shuffleBytesWritten))
+      }
     }
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/ExecutorSummary.scala b/core/src/main/scala/org/apache/spark/ui/jobs/ExecutorSummary.scala
new file mode 100644
index 0000000000000..64e22a30b48f9
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/ExecutorSummary.scala
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ui.jobs
+
+/** class for reporting aggregated metrics for each executors in stageUI */
+private[spark] class ExecutorSummary {
+  var taskTime : Long = 0
+  var failedTasks : Int = 0
+  var succeededTasks : Int = 0
+  var shuffleRead : Long = 0
+  var shuffleWrite : Long = 0
+  var memoryBytesSpilled : Long = 0
+  var diskBytesSpilled : Long = 0
+}
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/ExecutorTable.scala b/core/src/main/scala/org/apache/spark/ui/jobs/ExecutorTable.scala
new file mode 100644
index 0000000000000..ab03eb5ce1ab4
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/ExecutorTable.scala
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ui.jobs
+
+import scala.xml.Node
+
+import org.apache.spark.scheduler.SchedulingMode
+import org.apache.spark.util.Utils
+import scala.collection.mutable
+
+/** Page showing executor summary */
+private[spark] class ExecutorTable(val parent: JobProgressUI, val stageId: Int) {
+
+  val listener = parent.listener
+  val dateFmt = parent.dateFmt
+  val isFairScheduler = listener.sc.getSchedulingMode == SchedulingMode.FAIR
+
+  def toNodeSeq(): Seq[Node] = {
+    listener.synchronized {
+      executorTable()
+    }
+  }
+
+  /** Special table which merges two header cells. */
+  private def executorTable[T](): Seq[Node] = {
+    <table class="table table-bordered table-striped table-condensed sortable">
+      <thead>
+        <th>Executor ID</th>
+        <th>Address</th>
+        <th>Task Time</th>
+        <th>Total Tasks</th>
+        <th>Failed Tasks</th>
+        <th>Succeeded Tasks</th>
+        <th>Shuffle Read</th>
+        <th>Shuffle Write</th>
+        <th>Shuffle Spill (Memory)</th>
+        <th>Shuffle Spill (Disk)</th>
+      </thead>
+      <tbody>
+        {createExecutorTable()}
+      </tbody>
+    </table>
+  }
+
+  private def createExecutorTable() : Seq[Node] = {
+    // make a executor-id -> address map
+    val executorIdToAddress = mutable.HashMap[String, String]()
+    val storageStatusList = parent.sc.getExecutorStorageStatus
+    for (statusId <- 0 until storageStatusList.size) {
+      val blockManagerId = parent.sc.getExecutorStorageStatus(statusId).blockManagerId
+      val address = blockManagerId.hostPort
+      val executorId = blockManagerId.executorId
+      executorIdToAddress.put(executorId, address)
+    }
+
+    val executorIdToSummary = listener.stageIdToExecutorSummaries.get(stageId)
+    executorIdToSummary match {
+      case Some(x) => {
+        x.toSeq.sortBy(_._1).map{
+          case (k,v) => {
+            <tr>
+              <td>{k}</td>
+              <td>{executorIdToAddress.getOrElse(k, "CANNOT FIND ADDRESS")}</td>
+              <td>{parent.formatDuration(v.taskTime)}</td>
+              <td>{v.failedTasks + v.succeededTasks}</td>
+              <td>{v.failedTasks}</td>
+              <td>{v.succeededTasks}</td>
+              <td>{Utils.bytesToString(v.shuffleRead)}</td>
+              <td>{Utils.bytesToString(v.shuffleWrite)}</td>
+              <td>{Utils.bytesToString(v.memoryBytesSpilled)}</td>
+              <td>{Utils.bytesToString(v.diskBytesSpilled)}</td>
+            </tr>
+          }
+        }
+      }
+      case _ => { Seq[Node]() }
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
index 6b854740d6a24..858a10ce750ff 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
@@ -33,7 +33,7 @@ import org.apache.spark.scheduler._
  */
 private[spark] class JobProgressListener(val sc: SparkContext) extends SparkListener {
   // How many stages to remember
-  val RETAINED_STAGES = System.getProperty("spark.ui.retained_stages", "1000").toInt
+  val RETAINED_STAGES = sc.conf.getInt("spark.ui.retainedStages", 1000)
   val DEFAULT_POOL_NAME = "default"
 
   val stageIdToPool = new HashMap[Int, String]()
@@ -52,15 +52,18 @@ private[spark] class JobProgressListener(val sc: SparkContext) extends SparkList
   val stageIdToTime = HashMap[Int, Long]()
   val stageIdToShuffleRead = HashMap[Int, Long]()
   val stageIdToShuffleWrite = HashMap[Int, Long]()
+  val stageIdToMemoryBytesSpilled = HashMap[Int, Long]()
+  val stageIdToDiskBytesSpilled = HashMap[Int, Long]()
   val stageIdToTasksActive = HashMap[Int, HashSet[TaskInfo]]()
   val stageIdToTasksComplete = HashMap[Int, Int]()
   val stageIdToTasksFailed = HashMap[Int, Int]()
   val stageIdToTaskInfos =
     HashMap[Int, HashSet[(TaskInfo, Option[TaskMetrics], Option[ExceptionFailure])]]()
+  val stageIdToExecutorSummaries = HashMap[Int, HashMap[String, ExecutorSummary]]()
 
   override def onJobStart(jobStart: SparkListenerJobStart) {}
 
-  override def onStageCompleted(stageCompleted: StageCompleted) = synchronized {
+  override def onStageCompleted(stageCompleted: SparkListenerStageCompleted) = synchronized {
     val stage = stageCompleted.stage
     poolToActiveStages(stageIdToPool(stage.stageId)) -= stage
     activeStages -= stage
@@ -77,6 +80,8 @@ private[spark] class JobProgressListener(val sc: SparkContext) extends SparkList
         stageIdToTime.remove(s.stageId)
         stageIdToShuffleRead.remove(s.stageId)
         stageIdToShuffleWrite.remove(s.stageId)
+        stageIdToMemoryBytesSpilled.remove(s.stageId)
+        stageIdToDiskBytesSpilled.remove(s.stageId)
         stageIdToTasksActive.remove(s.stageId)
         stageIdToTasksComplete.remove(s.stageId)
         stageIdToTasksFailed.remove(s.stageId)
@@ -105,7 +110,7 @@ private[spark] class JobProgressListener(val sc: SparkContext) extends SparkList
     val stages = poolToActiveStages.getOrElseUpdate(poolName, new HashSet[StageInfo]())
     stages += stage
   }
-  
+
   override def onTaskStart(taskStart: SparkListenerTaskStart) = synchronized {
     val sid = taskStart.task.stageId
     val tasksActive = stageIdToTasksActive.getOrElseUpdate(sid, new HashSet[TaskInfo]())
@@ -124,8 +129,40 @@ private[spark] class JobProgressListener(val sc: SparkContext) extends SparkList
 
   override def onTaskEnd(taskEnd: SparkListenerTaskEnd) = synchronized {
     val sid = taskEnd.task.stageId
+
+    // create executor summary map if necessary
+    val executorSummaryMap = stageIdToExecutorSummaries.getOrElseUpdate(key = sid,
+      op = new HashMap[String, ExecutorSummary]())
+    executorSummaryMap.getOrElseUpdate(key = taskEnd.taskInfo.executorId,
+      op = new ExecutorSummary())
+
+    val executorSummary = executorSummaryMap.get(taskEnd.taskInfo.executorId)
+    executorSummary match {
+      case Some(y) => {
+        // first update failed-task, succeed-task
+        taskEnd.reason match {
+          case Success =>
+            y.succeededTasks += 1
+          case _ =>
+            y.failedTasks += 1
+        }
+
+        // update duration
+        y.taskTime += taskEnd.taskInfo.duration
+
+        Option(taskEnd.taskMetrics).foreach { taskMetrics =>
+          taskMetrics.shuffleReadMetrics.foreach { y.shuffleRead += _.remoteBytesRead }
+          taskMetrics.shuffleWriteMetrics.foreach { y.shuffleWrite += _.shuffleBytesWritten }
+          y.memoryBytesSpilled += taskMetrics.memoryBytesSpilled
+          y.diskBytesSpilled += taskMetrics.diskBytesSpilled
+        }
+      }
+      case _ => {}
+    }
+
     val tasksActive = stageIdToTasksActive.getOrElseUpdate(sid, new HashSet[TaskInfo]())
     tasksActive -= taskEnd.taskInfo
+
     val (failureInfo, metrics): (Option[ExceptionFailure], Option[TaskMetrics]) =
       taskEnd.reason match {
         case e: ExceptionFailure =>
@@ -153,6 +190,14 @@ private[spark] class JobProgressListener(val sc: SparkContext) extends SparkList
     stageIdToShuffleWrite(sid) += shuffleWrite
     totalShuffleWrite += shuffleWrite
 
+    stageIdToMemoryBytesSpilled.getOrElseUpdate(sid, 0L)
+    val memoryBytesSpilled = metrics.map(m => m.memoryBytesSpilled).getOrElse(0L)
+    stageIdToMemoryBytesSpilled(sid) += memoryBytesSpilled
+
+    stageIdToDiskBytesSpilled.getOrElseUpdate(sid, 0L)
+    val diskBytesSpilled = metrics.map(m => m.diskBytesSpilled).getOrElse(0L)
+    stageIdToDiskBytesSpilled(sid) += diskBytesSpilled
+
     val taskList = stageIdToTaskInfos.getOrElse(
       sid, HashSet[(TaskInfo, Option[TaskMetrics], Option[ExceptionFailure])]())
     taskList -= ((taskEnd.taskInfo, None, None))
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressUI.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressUI.scala
index e7eab374ad7e0..c1ee2f3d00d66 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressUI.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.ui.jobs
 
-import akka.util.Duration
+import scala.concurrent.duration._
 
 import java.text.SimpleDateFormat
 
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
index baccc4281a06c..cfaf121895ec2 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
@@ -56,15 +56,20 @@ private[spark] class StagePage(parent: JobProgressUI) {
       val hasShuffleRead = shuffleReadBytes > 0
       val shuffleWriteBytes = listener.stageIdToShuffleWrite.getOrElse(stageId, 0L)
       val hasShuffleWrite = shuffleWriteBytes > 0
+      val memoryBytesSpilled = listener.stageIdToMemoryBytesSpilled.getOrElse(stageId, 0L)
+      val diskBytesSpilled = listener.stageIdToDiskBytesSpilled.getOrElse(stageId, 0L)
+      val hasBytesSpilled = (memoryBytesSpilled > 0 && diskBytesSpilled > 0)
 
       var activeTime = 0L
       listener.stageIdToTasksActive(stageId).foreach(activeTime += _.timeRunning(now))
 
+      val finishedTasks = listener.stageIdToTaskInfos(stageId).filter(_._1.finished)
+
       val summary =
         <div>
           <ul class="unstyled">
             <li>
-              <strong>CPU time: </strong>
+              <strong>Total task time across all tasks: </strong>
               {parent.formatDuration(listener.stageIdToTime.getOrElse(stageId, 0L) + activeTime)}
             </li>
             {if (hasShuffleRead)
@@ -79,17 +84,28 @@ private[spark] class StagePage(parent: JobProgressUI) {
                 {Utils.bytesToString(shuffleWriteBytes)}
               </li>
             }
+            {if (hasBytesSpilled)
+            <li>
+              <strong>Shuffle spill (memory): </strong>
+              {Utils.bytesToString(memoryBytesSpilled)}
+            </li>
+            <li>
+              <strong>Shuffle spill (disk): </strong>
+              {Utils.bytesToString(diskBytesSpilled)}
+            </li>
+            }
           </ul>
         </div>
 
       val taskHeaders: Seq[String] =
         Seq("Task Index", "Task ID", "Status", "Locality Level", "Executor", "Launch Time") ++
-        Seq("Duration", "GC Time") ++
+        Seq("Duration", "GC Time", "Result Ser Time") ++
         {if (hasShuffleRead) Seq("Shuffle Read")  else Nil} ++
         {if (hasShuffleWrite) Seq("Write Time", "Shuffle Write") else Nil} ++
+        {if (hasBytesSpilled) Seq("Shuffle Spill (Memory)", "Shuffle Spill (Disk)") else Nil} ++
         Seq("Errors")
 
-      val taskTable = listingTable(taskHeaders, taskRow(hasShuffleRead, hasShuffleWrite), tasks)
+      val taskTable = listingTable(taskHeaders, taskRow(hasShuffleRead, hasShuffleWrite, hasBytesSpilled), tasks)
 
       // Excludes tasks which failed and have incomplete metrics
       val validTasks = tasks.filter(t => t._1.status == "SUCCESS" && (t._2.isDefined))
@@ -99,11 +115,43 @@ private[spark] class StagePage(parent: JobProgressUI) {
           None
         }
         else {
+          val serializationTimes = validTasks.map{case (info, metrics, exception) =>
+            metrics.get.resultSerializationTime.toDouble}
+          val serializationQuantiles = "Result serialization time" +: Distribution(serializationTimes).get.getQuantiles().map(
+            ms => parent.formatDuration(ms.toLong))
+
           val serviceTimes = validTasks.map{case (info, metrics, exception) =>
             metrics.get.executorRunTime.toDouble}
           val serviceQuantiles = "Duration" +: Distribution(serviceTimes).get.getQuantiles().map(
             ms => parent.formatDuration(ms.toLong))
 
+          val gettingResultTimes = validTasks.map{case (info, metrics, exception) =>
+            if (info.gettingResultTime > 0) {
+              (info.finishTime - info.gettingResultTime).toDouble
+            } else {
+              0.0
+            }
+          }
+          val gettingResultQuantiles = ("Time spent fetching task results" +:
+            Distribution(gettingResultTimes).get.getQuantiles().map(
+              millis => parent.formatDuration(millis.toLong)))
+          // The scheduler delay includes the network delay to send the task to the worker
+          // machine and to send back the result (but not the time to fetch the task result,
+          // if it needed to be fetched from the block manager on the worker).
+          val schedulerDelays = validTasks.map{case (info, metrics, exception) =>
+            val totalExecutionTime = {
+              if (info.gettingResultTime > 0) {
+                (info.gettingResultTime - info.launchTime).toDouble
+              } else {
+                (info.finishTime - info.launchTime).toDouble
+              }
+            }
+            totalExecutionTime - metrics.get.executorRunTime
+          }
+          val schedulerDelayQuantiles = ("Scheduler delay" +:
+            Distribution(schedulerDelays).get.getQuantiles().map(
+              millis => parent.formatDuration(millis.toLong)))
+
           def getQuantileCols(data: Seq[Double]) =
             Distribution(data).get.getQuantiles().map(d => Utils.bytesToString(d.toLong))
 
@@ -119,28 +167,48 @@ private[spark] class StagePage(parent: JobProgressUI) {
           }
           val shuffleWriteQuantiles = "Shuffle Write" +: getQuantileCols(shuffleWriteSizes)
 
-          val listings: Seq[Seq[String]] = Seq(serviceQuantiles,
+          val memoryBytesSpilledSizes = validTasks.map {
+            case(info, metrics, exception) =>
+              metrics.get.memoryBytesSpilled.toDouble
+          }
+          val memoryBytesSpilledQuantiles = "Shuffle spill (memory)" +:
+            getQuantileCols(memoryBytesSpilledSizes)
+
+          val diskBytesSpilledSizes = validTasks.map {
+            case(info, metrics, exception) =>
+              metrics.get.diskBytesSpilled.toDouble
+          }
+          val diskBytesSpilledQuantiles = "Shuffle spill (disk)" +:
+            getQuantileCols(diskBytesSpilledSizes)
+
+          val listings: Seq[Seq[String]] = Seq(
+            serializationQuantiles,
+            serviceQuantiles,
+            gettingResultQuantiles,
+            schedulerDelayQuantiles,
             if (hasShuffleRead) shuffleReadQuantiles else Nil,
-            if (hasShuffleWrite) shuffleWriteQuantiles else Nil)
+            if (hasShuffleWrite) shuffleWriteQuantiles else Nil,
+            if (hasBytesSpilled) memoryBytesSpilledQuantiles else Nil,
+            if (hasBytesSpilled) diskBytesSpilledQuantiles else Nil)
 
           val quantileHeaders = Seq("Metric", "Min", "25th percentile",
             "Median", "75th percentile", "Max")
           def quantileRow(data: Seq[String]): Seq[Node] = <tr> {data.map(d => <td>{d}</td>)} </tr>
           Some(listingTable(quantileHeaders, quantileRow, listings, fixedWidth = true))
         }
-
+      val executorTable = new ExecutorTable(parent, stageId)
       val content =
         summary ++
         <h4>Summary Metrics for {numCompleted} Completed Tasks</h4> ++
         <div>{summaryTable.getOrElse("No tasks have reported metrics yet.")}</div> ++
+        <h4>Aggregated Metrics by Executor</h4> ++ executorTable.toNodeSeq() ++
         <h4>Tasks</h4> ++ taskTable
 
       headerSparkPage(content, parent.sc, "Details for Stage %d".format(stageId), Stages)
     }
   }
 
-
-  def taskRow(shuffleRead: Boolean, shuffleWrite: Boolean)
+  def taskRow(shuffleRead: Boolean, shuffleWrite: Boolean, bytesSpilled: Boolean)
              (taskData: (TaskInfo, Option[TaskMetrics], Option[ExceptionFailure])): Seq[Node] = {
     def fmtStackTrace(trace: Seq[StackTraceElement]): Seq[Node] =
       trace.map(e => <span style="display:block;">{e.toString}</span>)
@@ -151,6 +219,7 @@ private[spark] class StagePage(parent: JobProgressUI) {
     val formatDuration = if (info.status == "RUNNING") parent.formatDuration(duration)
       else metrics.map(m => parent.formatDuration(m.executorRunTime)).getOrElse("")
     val gcTime = metrics.map(m => m.jvmGCTime).getOrElse(0L)
+    val serializationTime = metrics.map(m => m.resultSerializationTime).getOrElse(0L)
 
     val maybeShuffleRead = metrics.flatMap{m => m.shuffleReadMetrics}.map{s => s.remoteBytesRead}
     val shuffleReadSortable = maybeShuffleRead.map(_.toString).getOrElse("")
@@ -165,6 +234,14 @@ private[spark] class StagePage(parent: JobProgressUI) {
     val writeTimeReadable = maybeWriteTime.map{ t => t / (1000 * 1000)}.map{ ms =>
       if (ms == 0) "" else parent.formatDuration(ms)}.getOrElse("")
 
+    val maybeMemoryBytesSpilled = metrics.map{m => m.memoryBytesSpilled}
+    val memoryBytesSpilledSortable = maybeMemoryBytesSpilled.map(_.toString).getOrElse("")
+    val memoryBytesSpilledReadable = maybeMemoryBytesSpilled.map{Utils.bytesToString(_)}.getOrElse("")
+
+    val maybeDiskBytesSpilled = metrics.map{m => m.diskBytesSpilled}
+    val diskBytesSpilledSortable = maybeDiskBytesSpilled.map(_.toString).getOrElse("")
+    val diskBytesSpilledReadable = maybeDiskBytesSpilled.map{Utils.bytesToString(_)}.getOrElse("")
+
     <tr>
       <td>{info.index}</td>
       <td>{info.taskId}</td>
@@ -178,6 +255,9 @@ private[spark] class StagePage(parent: JobProgressUI) {
       <td sorttable_customkey={gcTime.toString}>
         {if (gcTime > 0) parent.formatDuration(gcTime) else ""}
       </td>
+      <td sorttable_customkey={serializationTime.toString}>
+        {if (serializationTime > 0) parent.formatDuration(serializationTime) else ""}
+      </td>
       {if (shuffleRead) {
          <td sorttable_customkey={shuffleReadSortable}>
            {shuffleReadReadable}
@@ -191,6 +271,14 @@ private[spark] class StagePage(parent: JobProgressUI) {
            {shuffleWriteReadable}
          </td>
       }}
+      {if (bytesSpilled) {
+        <td sorttable_customkey={memoryBytesSpilledSortable}>
+          {memoryBytesSpilledReadable}
+        </td>
+        <td sorttable_customkey={diskBytesSpilledSortable}>
+          {diskBytesSpilledReadable}
+        </td>
+      }}
       <td>{exception.map(e =>
         <span>
           {e.className} ({e.description})<br/>
diff --git a/core/src/main/scala/org/apache/spark/ui/storage/BlockManagerUI.scala b/core/src/main/scala/org/apache/spark/ui/storage/BlockManagerUI.scala
index 1d633d374a0a0..39f422dd6b90f 100644
--- a/core/src/main/scala/org/apache/spark/ui/storage/BlockManagerUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/storage/BlockManagerUI.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.ui.storage
 
-import akka.util.Duration
+import scala.concurrent.duration._
 
 import javax.servlet.http.HttpServletRequest
 
@@ -28,9 +28,6 @@ import org.apache.spark.ui.JettyUtils._
 
 /** Web UI showing storage status of all RDD's in the given SparkContext. */
 private[spark] class BlockManagerUI(val sc: SparkContext) extends Logging {
-  implicit val timeout = Duration.create(
-    System.getProperty("spark.akka.askTimeout", "10").toLong, "seconds")
-
   val indexPage = new IndexPage(this)
   val rddPage = new RDDPage(this)
 
diff --git a/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala b/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala
index d4c5065c3f5cf..761d378c7fd8b 100644
--- a/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala
@@ -17,11 +17,14 @@
 
 package org.apache.spark.util
 
-import akka.actor.{ActorSystem, ExtendedActorSystem}
+import scala.collection.JavaConversions.mapAsJavaMap
+import scala.concurrent.duration.{Duration, FiniteDuration}
+
+import akka.actor.{ActorSystem, ExtendedActorSystem, IndestructibleActorSystem}
 import com.typesafe.config.ConfigFactory
-import akka.util.duration._
-import akka.remote.RemoteActorRefProvider
 
+import org.apache.log4j.{Level, Logger}
+import org.apache.spark.SparkConf
 
 /**
  * Various utility classes for working with Akka.
@@ -34,39 +37,77 @@ private[spark] object AkkaUtils {
    *
    * Note: the `name` parameter is important, as even if a client sends a message to right
    * host + port, if the system name is incorrect, Akka will drop the message.
+   *
+   * If indestructible is set to true, the Actor System will continue running in the event
+   * of a fatal exception. This is used by [[org.apache.spark.executor.Executor]].
    */
-  def createActorSystem(name: String, host: String, port: Int): (ActorSystem, Int) = {
-    val akkaThreads = System.getProperty("spark.akka.threads", "4").toInt
-    val akkaBatchSize = System.getProperty("spark.akka.batchSize", "15").toInt
-    val akkaTimeout = System.getProperty("spark.akka.timeout", "60").toInt
-    val akkaFrameSize = System.getProperty("spark.akka.frameSize", "10").toInt
-    val lifecycleEvents = if (System.getProperty("spark.akka.logLifecycleEvents", "false").toBoolean) "on" else "off"
-    // 10 seconds is the default akka timeout, but in a cluster, we need higher by default.
-    val akkaWriteTimeout = System.getProperty("spark.akka.writeTimeout", "30").toInt
-    
-    val akkaConf = ConfigFactory.parseString("""
-      akka.daemonic = on
-      akka.event-handlers = ["akka.event.slf4j.Slf4jEventHandler"]
-      akka.stdout-loglevel = "ERROR"
-      akka.actor.provider = "akka.remote.RemoteActorRefProvider"
-      akka.remote.transport = "akka.remote.netty.NettyRemoteTransport"
-      akka.remote.netty.hostname = "%s"
-      akka.remote.netty.port = %d
-      akka.remote.netty.connection-timeout = %ds
-      akka.remote.netty.message-frame-size = %d MiB
-      akka.remote.netty.execution-pool-size = %d
-      akka.actor.default-dispatcher.throughput = %d
-      akka.remote.log-remote-lifecycle-events = %s
-      akka.remote.netty.write-timeout = %ds
-      """.format(host, port, akkaTimeout, akkaFrameSize, akkaThreads, akkaBatchSize,
-        lifecycleEvents, akkaWriteTimeout))
-
-    val actorSystem = ActorSystem(name, akkaConf)
-
-    // Figure out the port number we bound to, in case port was passed as 0. This is a bit of a
-    // hack because Akka doesn't let you figure out the port through the public API yet.
+  def createActorSystem(name: String, host: String, port: Int, indestructible: Boolean = false,
+    conf: SparkConf): (ActorSystem, Int) = {
+
+    val akkaThreads   = conf.getInt("spark.akka.threads", 4)
+    val akkaBatchSize = conf.getInt("spark.akka.batchSize", 15)
+
+    val akkaTimeout = conf.getInt("spark.akka.timeout", 100)
+
+    val akkaFrameSize = conf.getInt("spark.akka.frameSize", 10)
+    val akkaLogLifecycleEvents = conf.getBoolean("spark.akka.logLifecycleEvents", false)
+    val lifecycleEvents = if (akkaLogLifecycleEvents) "on" else "off"
+    if (!akkaLogLifecycleEvents) {
+      // As a workaround for Akka issue #3787, we coerce the "EndpointWriter" log to be silent.
+      // See: https://www.assembla.com/spaces/akka/tickets/3787#/
+      Option(Logger.getLogger("akka.remote.EndpointWriter")).map(l => l.setLevel(Level.FATAL))
+    }
+
+    val logAkkaConfig = if (conf.getBoolean("spark.akka.logAkkaConfig", false)) "on" else "off"
+
+    val akkaHeartBeatPauses = conf.getInt("spark.akka.heartbeat.pauses", 600)
+    val akkaFailureDetector =
+      conf.getDouble("spark.akka.failure-detector.threshold", 300.0)
+    val akkaHeartBeatInterval = conf.getInt("spark.akka.heartbeat.interval", 1000)
+
+    val akkaConf = ConfigFactory.parseMap(conf.getAkkaConf.toMap[String, String]).withFallback(
+      ConfigFactory.parseString(
+      s"""
+      |akka.daemonic = on
+      |akka.loggers = [""akka.event.slf4j.Slf4jLogger""]
+      |akka.stdout-loglevel = "ERROR"
+      |akka.jvm-exit-on-fatal-error = off
+      |akka.remote.transport-failure-detector.heartbeat-interval = $akkaHeartBeatInterval s
+      |akka.remote.transport-failure-detector.acceptable-heartbeat-pause = $akkaHeartBeatPauses s
+      |akka.remote.transport-failure-detector.threshold = $akkaFailureDetector
+      |akka.actor.provider = "akka.remote.RemoteActorRefProvider"
+      |akka.remote.netty.tcp.transport-class = "akka.remote.transport.netty.NettyTransport"
+      |akka.remote.netty.tcp.hostname = "$host"
+      |akka.remote.netty.tcp.port = $port
+      |akka.remote.netty.tcp.tcp-nodelay = on
+      |akka.remote.netty.tcp.connection-timeout = $akkaTimeout s
+      |akka.remote.netty.tcp.maximum-frame-size = ${akkaFrameSize}MiB
+      |akka.remote.netty.tcp.execution-pool-size = $akkaThreads
+      |akka.actor.default-dispatcher.throughput = $akkaBatchSize
+      |akka.log-config-on-start = $logAkkaConfig
+      |akka.remote.log-remote-lifecycle-events = $lifecycleEvents
+      |akka.log-dead-letters = $lifecycleEvents
+      |akka.log-dead-letters-during-shutdown = $lifecycleEvents
+      """.stripMargin))
+
+    val actorSystem = if (indestructible) {
+      IndestructibleActorSystem(name, akkaConf)
+    } else {
+      ActorSystem(name, akkaConf)
+    }
+
     val provider = actorSystem.asInstanceOf[ExtendedActorSystem].provider
-    val boundPort = provider.asInstanceOf[RemoteActorRefProvider].transport.address.port.get
-    return (actorSystem, boundPort)
+    val boundPort = provider.getDefaultAddress.port.get
+    (actorSystem, boundPort)
+  }
+
+  /** Returns the default Spark timeout to use for Akka ask operations. */
+  def askTimeout(conf: SparkConf): FiniteDuration = {
+    Duration.create(conf.getLong("spark.akka.askTimeout", 30), "seconds")
+  }
+
+  /** Returns the default Spark timeout to use for Akka remote actor lookup. */
+  def lookupTimeout(conf: SparkConf): FiniteDuration = {
+    Duration.create(conf.get("spark.akka.lookupTimeout", "30").toLong, "seconds")
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala b/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
index 7108595e3e445..1df6b87fb0730 100644
--- a/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
+++ b/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
@@ -61,7 +61,7 @@ private[spark] object ClosureCleaner extends Logging {
         return f.getType :: Nil // Stop at the first $outer that is not a closure
       }
     }
-    return Nil
+    Nil
   }
   
   // Get a list of the outer objects for a given closure object.
@@ -74,7 +74,7 @@ private[spark] object ClosureCleaner extends Logging {
         return f.get(obj) :: Nil // Stop at the first $outer that is not a closure
       }
     }
-    return Nil
+    Nil
   }
   
   private def getInnerClasses(obj: AnyRef): List[Class[_]] = {
@@ -174,7 +174,7 @@ private[spark] object ClosureCleaner extends Logging {
         field.setAccessible(true)
         field.set(obj, outer)
       }
-      return obj
+      obj
     }
   }
 }
@@ -182,7 +182,7 @@ private[spark] object ClosureCleaner extends Logging {
 private[spark] class FieldAccessFinder(output: Map[Class[_], Set[String]]) extends ClassVisitor(ASM4) {
   override def visitMethod(access: Int, name: String, desc: String,
       sig: String, exceptions: Array[String]): MethodVisitor = {
-    return new MethodVisitor(ASM4) {
+    new MethodVisitor(ASM4) {
       override def visitFieldInsn(op: Int, owner: String, name: String, desc: String) {
         if (op == GETFIELD) {
           for (cl <- output.keys if cl.getName == owner.replace('/', '.')) {
@@ -215,7 +215,7 @@ private[spark] class InnerClosureFinder(output: Set[Class[_]]) extends ClassVisi
   
   override def visitMethod(access: Int, name: String, desc: String,
       sig: String, exceptions: Array[String]): MethodVisitor = {
-    return new MethodVisitor(ASM4) {
+    new MethodVisitor(ASM4) {
       override def visitMethodInsn(op: Int, owner: String, name: String,
           desc: String) {
         val argTypes = Type.getArgumentTypes(desc)
diff --git a/core/src/main/scala/org/apache/spark/util/CompletionIterator.scala b/core/src/main/scala/org/apache/spark/util/CompletionIterator.scala
index dc15a38b29d70..fcc1ca9502aa1 100644
--- a/core/src/main/scala/org/apache/spark/util/CompletionIterator.scala
+++ b/core/src/main/scala/org/apache/spark/util/CompletionIterator.scala
@@ -18,14 +18,15 @@
 package org.apache.spark.util
 
 /**
- * Wrapper around an iterator which calls a completion method after it successfully iterates through all the elements
+ * Wrapper around an iterator which calls a completion method after it successfully iterates
+ * through all the elements.
  */
-abstract class CompletionIterator[+A, +I <: Iterator[A]](sub: I) extends Iterator[A]{
-  def next = sub.next
+private[spark] abstract class CompletionIterator[+A, +I <: Iterator[A]](sub: I) extends Iterator[A]{
+  def next() = sub.next()
   def hasNext = {
     val r = sub.hasNext
     if (!r) {
-      completion
+      completion()
     }
     r
   }
@@ -33,7 +34,7 @@ abstract class CompletionIterator[+A, +I <: Iterator[A]](sub: I) extends Iterato
   def completion()
 }
 
-object CompletionIterator {
+private[spark] object CompletionIterator {
   def apply[A, I <: Iterator[A]](sub: I, completionFunction: => Unit) : CompletionIterator[A,I] = {
     new CompletionIterator[A,I](sub) {
       def completion() = completionFunction
diff --git a/core/src/main/scala/org/apache/spark/util/IndestructibleActorSystem.scala b/core/src/main/scala/org/apache/spark/util/IndestructibleActorSystem.scala
new file mode 100644
index 0000000000000..bf71882ef770a
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/IndestructibleActorSystem.scala
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Must be in akka.actor package as ActorSystemImpl is protected[akka].
+package akka.actor
+
+import scala.util.control.{ControlThrowable, NonFatal}
+
+import com.typesafe.config.Config
+
+/**
+ * An [[akka.actor.ActorSystem]] which refuses to shut down in the event of a fatal exception.
+ * This is necessary as Spark Executors are allowed to recover from fatal exceptions
+ * (see [[org.apache.spark.executor.Executor]]).
+ */
+object IndestructibleActorSystem {
+  def apply(name: String, config: Config): ActorSystem =
+    apply(name, config, ActorSystem.findClassLoader())
+
+  def apply(name: String, config: Config, classLoader: ClassLoader): ActorSystem =
+    new IndestructibleActorSystemImpl(name, config, classLoader).start()
+}
+
+private[akka] class IndestructibleActorSystemImpl(
+    override val name: String,
+    applicationConfig: Config,
+    classLoader: ClassLoader)
+  extends ActorSystemImpl(name, applicationConfig, classLoader) {
+
+  protected override def uncaughtExceptionHandler: Thread.UncaughtExceptionHandler = {
+    val fallbackHandler = super.uncaughtExceptionHandler
+
+    new Thread.UncaughtExceptionHandler() {
+      def uncaughtException(thread: Thread, cause: Throwable): Unit = {
+        if (isFatalError(cause) && !settings.JvmExitOnFatalError) {
+          log.error(cause, "Uncaught fatal error from thread [{}] not shutting down " +
+            "ActorSystem [{}] tolerating and continuing.... ", thread.getName, name)
+          //shutdown()                 //TODO make it configurable
+        } else {
+          fallbackHandler.uncaughtException(thread, cause)
+        }
+      }
+    }
+  }
+
+  def isFatalError(e: Throwable): Boolean = {
+    e match {
+      case NonFatal(_) | _: InterruptedException | _: NotImplementedError | _: ControlThrowable =>
+        false
+      case _ =>
+        true
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/util/MetadataCleaner.scala b/core/src/main/scala/org/apache/spark/util/MetadataCleaner.scala
index 061221d1d4c81..b0febe906ade3 100644
--- a/core/src/main/scala/org/apache/spark/util/MetadataCleaner.scala
+++ b/core/src/main/scala/org/apache/spark/util/MetadataCleaner.scala
@@ -18,16 +18,21 @@
 package org.apache.spark.util
 
 import java.util.{TimerTask, Timer}
-import org.apache.spark.Logging
+import org.apache.spark.{SparkConf, Logging}
 
 
 /**
  * Runs a timer task to periodically clean up metadata (e.g. old files or hashtable entries)
  */
-class MetadataCleaner(cleanerType: MetadataCleanerType.MetadataCleanerType, cleanupFunc: (Long) => Unit) extends Logging {
+private[spark] class MetadataCleaner(
+    cleanerType: MetadataCleanerType.MetadataCleanerType,
+    cleanupFunc: (Long) => Unit,
+    conf: SparkConf)
+  extends Logging
+{
   val name = cleanerType.toString
 
-  private val delaySeconds = MetadataCleaner.getDelaySeconds(cleanerType)
+  private val delaySeconds = MetadataCleaner.getDelaySeconds(conf, cleanerType)
   private val periodSeconds = math.max(10, delaySeconds / 10)
   private val timer = new Timer(name + " cleanup timer", true)
 
@@ -55,8 +60,7 @@ class MetadataCleaner(cleanerType: MetadataCleanerType.MetadataCleanerType, clea
   }
 }
 
-object MetadataCleanerType extends Enumeration("MapOutputTracker", "SparkContext", "HttpBroadcast", "DagScheduler", "ResultTask",
-  "ShuffleMapTask", "BlockManager", "DiskBlockManager", "BroadcastVars") {
+private[spark] object MetadataCleanerType extends Enumeration {
 
   val MAP_OUTPUT_TRACKER, SPARK_CONTEXT, HTTP_BROADCAST, DAG_SCHEDULER, RESULT_TASK,
     SHUFFLE_MAP_TASK, BLOCK_MANAGER, SHUFFLE_BLOCK_MANAGER, BROADCAST_VARS = Value
@@ -66,22 +70,28 @@ object MetadataCleanerType extends Enumeration("MapOutputTracker", "SparkContext
   def systemProperty(which: MetadataCleanerType.MetadataCleanerType) = "spark.cleaner.ttl." + which.toString
 }
 
-object MetadataCleaner {
-
-  // using only sys props for now : so that workers can also get to it while preserving earlier behavior.
-  def getDelaySeconds = System.getProperty("spark.cleaner.ttl", "-1").toInt
+// TODO: This mutates a Conf to set properties right now, which is kind of ugly when used in the
+// initialization of StreamingContext. It's okay for users trying to configure stuff themselves.
+private[spark] object MetadataCleaner {
+  def getDelaySeconds(conf: SparkConf) = {
+    conf.getInt("spark.cleaner.ttl", -1)
+  }
 
-  def getDelaySeconds(cleanerType: MetadataCleanerType.MetadataCleanerType): Int = {
-    System.getProperty(MetadataCleanerType.systemProperty(cleanerType), getDelaySeconds.toString).toInt
+  def getDelaySeconds(conf: SparkConf, cleanerType: MetadataCleanerType.MetadataCleanerType): Int =
+  {
+    conf.get(MetadataCleanerType.systemProperty(cleanerType), getDelaySeconds(conf).toString)
+      .toInt
   }
 
-  def setDelaySeconds(cleanerType: MetadataCleanerType.MetadataCleanerType, delay: Int) {
-    System.setProperty(MetadataCleanerType.systemProperty(cleanerType), delay.toString)
+  def setDelaySeconds(conf: SparkConf, cleanerType: MetadataCleanerType.MetadataCleanerType,
+      delay: Int)
+  {
+    conf.set(MetadataCleanerType.systemProperty(cleanerType),  delay.toString)
   }
 
-  def setDelaySeconds(delay: Int, resetAll: Boolean = true) {
+  def setDelaySeconds(conf: SparkConf, delay: Int, resetAll: Boolean = true) {
     // override for all ?
-    System.setProperty("spark.cleaner.ttl", delay.toString)
+    conf.set("spark.cleaner.ttl", delay.toString)
     if (resetAll) {
       for (cleanerType <- MetadataCleanerType.values) {
         System.clearProperty(MetadataCleanerType.systemProperty(cleanerType))
diff --git a/core/src/main/scala/org/apache/spark/util/SerializableHyperLogLog.scala b/core/src/main/scala/org/apache/spark/util/SerializableHyperLogLog.scala
new file mode 100644
index 0000000000000..8b4e7c104cb19
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/SerializableHyperLogLog.scala
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import java.io.{Externalizable, ObjectOutput, ObjectInput}
+import com.clearspring.analytics.stream.cardinality.{ICardinality, HyperLogLog}
+
+/**
+ * A wrapper around [[com.clearspring.analytics.stream.cardinality.HyperLogLog]] that is serializable.
+ */
+private[spark]
+class SerializableHyperLogLog(var value: ICardinality) extends Externalizable {
+
+  def this() = this(null)  // For deserialization
+
+  def merge(other: SerializableHyperLogLog) = new SerializableHyperLogLog(value.merge(other.value))
+
+  def add[T](elem: T) = {
+    this.value.offer(elem)
+    this
+  }
+
+  def readExternal(in: ObjectInput) {
+    val byteLength = in.readInt()
+    val bytes = new Array[Byte](byteLength)
+    in.readFully(bytes)
+    value = HyperLogLog.Builder.build(bytes)
+  }
+
+  def writeExternal(out: ObjectOutput) {
+    val bytes = value.getBytes()
+    out.writeInt(bytes.length)
+    out.write(bytes)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
index a25b37a2a96a9..3cf94892e9680 100644
--- a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
+++ b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
@@ -30,10 +30,10 @@ import java.lang.management.ManagementFactory
 import scala.collection.mutable.ArrayBuffer
 
 import it.unimi.dsi.fastutil.ints.IntOpenHashSet
-import org.apache.spark.Logging
+import org.apache.spark.{SparkEnv, SparkConf, SparkContext, Logging}
 
 /**
- * Estimates the sizes of Java objects (number of bytes of memory they occupy), for use in 
+ * Estimates the sizes of Java objects (number of bytes of memory they occupy), for use in
  * memory-aware caches.
  *
  * Based on the following JavaWorld article:
@@ -89,9 +89,11 @@ private[spark] object SizeEstimator extends Logging {
     classInfos.put(classOf[Object], new ClassInfo(objectSize, Nil))
   }
 
-  private def getIsCompressedOops : Boolean = {
+  private def getIsCompressedOops: Boolean = {
+    // This is only used by tests to override the detection of compressed oops. The test
+    // actually uses a system property instead of a SparkConf, so we'll stick with that.
     if (System.getProperty("spark.test.useCompressedOops") != null) {
-      return System.getProperty("spark.test.useCompressedOops").toBoolean 
+      return System.getProperty("spark.test.useCompressedOops").toBoolean
     }
 
     try {
@@ -103,10 +105,10 @@ private[spark] object SizeEstimator extends Logging {
       val getVMMethod = hotSpotMBeanClass.getDeclaredMethod("getVMOption",
           Class.forName("java.lang.String"))
 
-      val bean = ManagementFactory.newPlatformMXBeanProxy(server, 
+      val bean = ManagementFactory.newPlatformMXBeanProxy(server,
         hotSpotMBeanName, hotSpotMBeanClass)
       // TODO: We could use reflection on the VMOption returned ?
-      return getVMMethod.invoke(bean, "UseCompressedOops").toString.contains("true")
+      getVMMethod.invoke(bean, "UseCompressedOops").toString.contains("true")
     } catch {
       case e: Exception => {
         // Guess whether they've enabled UseCompressedOops based on whether maxMemory < 32 GB
@@ -139,7 +141,7 @@ private[spark] object SizeEstimator extends Logging {
     def dequeue(): AnyRef = {
       val elem = stack.last
       stack.trimEnd(1)
-      return elem
+      elem
     }
   }
 
@@ -160,7 +162,7 @@ private[spark] object SizeEstimator extends Logging {
     while (!state.isFinished) {
       visitSingleObject(state.dequeue(), state)
     }
-    return state.size
+    state.size
   }
 
   private def visitSingleObject(obj: AnyRef, state: SearchState) {
@@ -251,7 +253,7 @@ private[spark] object SizeEstimator extends Logging {
     if (info != null) {
       return info
     }
-    
+
     val parent = getClassInfo(cls.getSuperclass)
     var shellSize = parent.shellSize
     var pointerFields = parent.pointerFields
@@ -274,11 +276,11 @@ private[spark] object SizeEstimator extends Logging {
     // Create and cache a new ClassInfo
     val newInfo = new ClassInfo(shellSize, pointerFields)
     classInfos.put(cls, newInfo)
-    return newInfo
+    newInfo
   }
 
   private def alignSize(size: Long): Long = {
     val rem = size % ALIGN_SIZE
-    return if (rem == 0) size else (size + ALIGN_SIZE - rem)
+    if (rem == 0) size else (size + ALIGN_SIZE - rem)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/util/TimeStampedHashMap.scala b/core/src/main/scala/org/apache/spark/util/TimeStampedHashMap.scala
index 277de2f8a6caa..8e07a0f29addf 100644
--- a/core/src/main/scala/org/apache/spark/util/TimeStampedHashMap.scala
+++ b/core/src/main/scala/org/apache/spark/util/TimeStampedHashMap.scala
@@ -26,16 +26,23 @@ import org.apache.spark.Logging
 
 /**
  * This is a custom implementation of scala.collection.mutable.Map which stores the insertion
- * time stamp along with each key-value pair. Key-value pairs that are older than a particular
- * threshold time can them be removed using the clearOldValues method. This is intended to be a drop-in
- * replacement of scala.collection.mutable.HashMap.
+ * timestamp along with each key-value pair. If specified, the timestamp of each pair can be
+ * updated every time it is accessed. Key-value pairs whose timestamp are older than a particular
+ * threshold time can then be removed using the clearOldValues method. This is intended to
+ * be a drop-in replacement of scala.collection.mutable.HashMap.
+ * @param updateTimeStampOnGet When enabled, the timestamp of a pair will be
+ *                             updated when it is accessed
  */
-class TimeStampedHashMap[A, B] extends Map[A, B]() with Logging {
+class TimeStampedHashMap[A, B](updateTimeStampOnGet: Boolean = false)
+  extends Map[A, B]() with Logging {
   val internalMap = new ConcurrentHashMap[A, (B, Long)]()
 
   def get(key: A): Option[B] = {
     val value = internalMap.get(key)
-    if (value != null) Some(value._1) else None
+    if (value != null && updateTimeStampOnGet) {
+      internalMap.replace(key, value, (value._1, currentTime))
+    }
+    Option(value).map(_._1)
   }
 
   def iterator: Iterator[(A, B)] = {
@@ -85,7 +92,7 @@ class TimeStampedHashMap[A, B] extends Map[A, B]() with Logging {
   }
 
   override def filter(p: ((A, B)) => Boolean): Map[A, B] = {
-    JavaConversions.asScalaConcurrentMap(internalMap).map(kv => (kv._1, kv._2._1)).filter(p)
+    JavaConversions.mapAsScalaConcurrentMap(internalMap).map(kv => (kv._1, kv._2._1)).filter(p)
   }
 
   override def empty: Map[A, B] = new TimeStampedHashMap[A, B]()
@@ -104,19 +111,28 @@ class TimeStampedHashMap[A, B] extends Map[A, B]() with Logging {
   def toMap: immutable.Map[A, B] = iterator.toMap
 
   /**
-   * Removes old key-value pairs that have timestamp earlier than `threshTime`
+   * Removes old key-value pairs that have timestamp earlier than `threshTime`,
+   * calling the supplied function on each such entry before removing.
    */
-  def clearOldValues(threshTime: Long) {
+  def clearOldValues(threshTime: Long, f: (A, B) => Unit) {
     val iterator = internalMap.entrySet().iterator()
-    while(iterator.hasNext) {
+    while (iterator.hasNext) {
       val entry = iterator.next()
       if (entry.getValue._2 < threshTime) {
+        f(entry.getKey, entry.getValue._1)
         logDebug("Removing key " + entry.getKey)
         iterator.remove()
       }
     }
   }
 
+  /**
+   * Removes old key-value pairs that have timestamp earlier than `threshTime`
+   */
+  def clearOldValues(threshTime: Long) {
+    clearOldValues(threshTime, (_, _) => ())
+  }
+
   private def currentTime: Long = System.currentTimeMillis()
 
 }
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index fe932d8ede2f3..8447773343d25 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -22,20 +22,23 @@ import java.net.{InetAddress, URL, URI, NetworkInterface, Inet4Address}
 import java.util.{Locale, Random, UUID}
 import java.util.concurrent.{ConcurrentHashMap, Executors, ThreadPoolExecutor}
 
+import scala.collection.JavaConversions._
 import scala.collection.Map
 import scala.collection.mutable.ArrayBuffer
-import scala.collection.JavaConversions._
 import scala.io.Source
+import scala.reflect.ClassTag
 
 import com.google.common.io.Files
 import com.google.common.util.concurrent.ThreadFactoryBuilder
 
+import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{Path, FileSystem, FileUtil}
+import org.apache.hadoop.io._
 
 import org.apache.spark.serializer.{DeserializationStream, SerializationStream, SerializerInstance}
 import org.apache.spark.deploy.SparkHadoopUtil
 import java.nio.ByteBuffer
-import org.apache.spark.{SparkException, Logging}
+import org.apache.spark.{SparkConf, SparkException, Logging}
 
 
 /**
@@ -49,14 +52,14 @@ private[spark] object Utils extends Logging {
     val oos = new ObjectOutputStream(bos)
     oos.writeObject(o)
     oos.close()
-    return bos.toByteArray
+    bos.toByteArray
   }
 
   /** Deserialize an object using Java serialization */
   def deserialize[T](bytes: Array[Byte]): T = {
     val bis = new ByteArrayInputStream(bytes)
     val ois = new ObjectInputStream(bis)
-    return ois.readObject.asInstanceOf[T]
+    ois.readObject.asInstanceOf[T]
   }
 
   /** Deserialize an object using Java serialization and the given ClassLoader */
@@ -66,7 +69,7 @@ private[spark] object Utils extends Logging {
       override def resolveClass(desc: ObjectStreamClass) =
         Class.forName(desc.getName, false, loader)
     }
-    return ois.readObject.asInstanceOf[T]
+    ois.readObject.asInstanceOf[T]
   }
 
   /** Deserialize a Long value (used for {@link org.apache.spark.api.python.PythonPartitioner}) */
@@ -144,7 +147,7 @@ private[spark] object Utils extends Logging {
         i += 1
       }
     }
-    return buf
+    buf
   }
 
   private val shutdownDeletePaths = new scala.collection.mutable.HashSet[String]()
@@ -238,9 +241,9 @@ private[spark] object Utils extends Logging {
    * Throws SparkException if the target file already exists and has different contents than
    * the requested file.
    */
-  def fetchFile(url: String, targetDir: File) {
+  def fetchFile(url: String, targetDir: File, conf: SparkConf) {
     val filename = url.split("/").last
-    val tempDir = getLocalDir
+    val tempDir = getLocalDir(conf)
     val tempFile =  File.createTempFile("fetchFileTemp", null, new File(tempDir))
     val targetFile = new File(targetDir, filename)
     val uri = new URI(url)
@@ -310,8 +313,8 @@ private[spark] object Utils extends Logging {
    * return a single directory, even though the spark.local.dir property might be a list of
    * multiple paths.
    */
-  def getLocalDir: String = {
-    System.getProperty("spark.local.dir", System.getProperty("java.io.tmpdir")).split(',')(0)
+  def getLocalDir(conf: SparkConf): String = {
+    conf.get("spark.local.dir",  System.getProperty("java.io.tmpdir")).split(',')(0)
   }
 
   /**
@@ -319,7 +322,7 @@ private[spark] object Utils extends Logging {
    * result in a new collection. Unlike scala.util.Random.shuffle, this method
    * uses a local random number generator, avoiding inter-thread contention.
    */
-  def randomize[T: ClassManifest](seq: TraversableOnce[T]): Seq[T] = {
+  def randomize[T: ClassTag](seq: TraversableOnce[T]): Seq[T] = {
     randomizeInPlace(seq.toArray)
   }
 
@@ -396,16 +399,6 @@ private[spark] object Utils extends Logging {
     InetAddress.getByName(address).getHostName
   }
 
-  def localHostPort(): String = {
-    val retval = System.getProperty("spark.hostPort", null)
-    if (retval == null) {
-      logErrorWithStack("spark.hostPort not set but invoking localHostPort")
-      return localHostName()
-    }
-
-    retval
-  }
-
   def checkHost(host: String, message: String = "") {
     assert(host.indexOf(':') == -1, message)
   }
@@ -414,11 +407,6 @@ private[spark] object Utils extends Logging {
     assert(hostPort.indexOf(':') != -1, message)
   }
 
-  // Used by DEBUG code : remove when all testing done
-  def logErrorWithStack(msg: String) {
-    try { throw new Exception } catch { case ex: Exception => { logError(msg, ex) } }
-  }
-
   // Typically, this will be of order of number of nodes in cluster
   // If not, we should change it to LRUCache or something.
   private val hostPortParseResults = new ConcurrentHashMap[String, (String, Int)]()
@@ -426,7 +414,7 @@ private[spark] object Utils extends Logging {
   def parseHostPort(hostPort: String): (String,  Int) = {
     {
       // Check cache first.
-      var cached = hostPortParseResults.get(hostPort)
+      val cached = hostPortParseResults.get(hostPort)
       if (cached != null) return cached
     }
 
@@ -664,7 +652,7 @@ private[spark] object Utils extends Logging {
 
     for (el <- trace) {
       if (!finished) {
-        if (SPARK_CLASS_REGEX.findFirstIn(el.getClassName) != None) {
+        if (SPARK_CLASS_REGEX.findFirstIn(el.getClassName).isDefined) {
           lastSparkMethod = if (el.getMethodName == "<init>") {
             // Spark method is a constructor; get its class name
             el.getClassName.substring(el.getClassName.lastIndexOf('.') + 1)
@@ -729,7 +717,7 @@ private[spark] object Utils extends Logging {
     } catch {
       case ise: IllegalStateException => return true
     }
-    return false
+    false
   }
 
   def isSpace(c: Char): Boolean = {
@@ -746,7 +734,7 @@ private[spark] object Utils extends Logging {
     var inWord = false
     var inSingleQuote = false
     var inDoubleQuote = false
-    var curWord = new StringBuilder
+    val curWord = new StringBuilder
     def endWord() {
       buf += curWord.toString
       curWord.clear()
@@ -792,7 +780,7 @@ private[spark] object Utils extends Logging {
     if (inWord || inDoubleQuote || inSingleQuote) {
       endWord()
     }
-    return buf
+    buf
   }
 
  /* Calculates 'x' modulo 'mod', takes to consideration sign of x,
@@ -820,7 +808,30 @@ private[spark] object Utils extends Logging {
 
   /** Returns a copy of the system properties that is thread-safe to iterator over. */
   def getSystemProperties(): Map[String, String] = {
-    return System.getProperties().clone()
-      .asInstanceOf[java.util.Properties].toMap[String, String]
+    System.getProperties.clone().asInstanceOf[java.util.Properties].toMap[String, String]
   }
+
+  /**
+   * Method executed for repeating a task for side effects.
+   * Unlike a for comprehension, it permits JVM JIT optimization
+   */
+  def times(numIters: Int)(f: => Unit): Unit = {
+    var i = 0
+    while (i < numIters) {
+      f
+      i += 1
+    }
+  }
+
+  /**
+   * Timing method based on iterations that permit JVM JIT optimization.
+   * @param numIters number of iterations
+   * @param f function to be executed
+   */
+  def timeIt(numIters: Int)(f: => Unit): Long = {
+    val start = System.currentTimeMillis
+    times(numIters)(f)
+    System.currentTimeMillis - start
+  }
+
 }
diff --git a/core/src/main/scala/org/apache/spark/util/Vector.scala b/core/src/main/scala/org/apache/spark/util/Vector.scala
index fe710c58acc44..fcdf8486371a4 100644
--- a/core/src/main/scala/org/apache/spark/util/Vector.scala
+++ b/core/src/main/scala/org/apache/spark/util/Vector.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.util
 
+import scala.util.Random
+
 class Vector(val elements: Array[Double]) extends Serializable {
   def length = elements.length
 
@@ -25,7 +27,7 @@ class Vector(val elements: Array[Double]) extends Serializable {
   def + (other: Vector): Vector = {
     if (length != other.length)
       throw new IllegalArgumentException("Vectors of different length")
-    return Vector(length, i => this(i) + other(i))
+    Vector(length, i => this(i) + other(i))
   }
 
   def add(other: Vector) = this + other
@@ -33,7 +35,7 @@ class Vector(val elements: Array[Double]) extends Serializable {
   def - (other: Vector): Vector = {
     if (length != other.length)
       throw new IllegalArgumentException("Vectors of different length")
-    return Vector(length, i => this(i) - other(i))
+    Vector(length, i => this(i) - other(i))
   }
 
   def subtract(other: Vector) = this - other
@@ -47,7 +49,7 @@ class Vector(val elements: Array[Double]) extends Serializable {
       ans += this(i) * other(i)
       i += 1
     }
-    return ans
+    ans
   }
 
   /**
@@ -67,7 +69,7 @@ class Vector(val elements: Array[Double]) extends Serializable {
       ans += (this(i) + plus(i)) * other(i)
       i += 1
     }
-    return ans
+    ans
   }
 
   def += (other: Vector): Vector = {
@@ -102,7 +104,7 @@ class Vector(val elements: Array[Double]) extends Serializable {
       ans += (this(i) - other(i)) * (this(i) - other(i))
       i += 1
     }
-    return ans
+    ans
   }
 
   def dist(other: Vector): Double = math.sqrt(squaredDist(other))
@@ -117,13 +119,19 @@ object Vector {
 
   def apply(length: Int, initializer: Int => Double): Vector = {
     val elements: Array[Double] = Array.tabulate(length)(initializer)
-    return new Vector(elements)
+    new Vector(elements)
   }
 
   def zeros(length: Int) = new Vector(new Array[Double](length))
 
   def ones(length: Int) = Vector(length, _ => 1)
 
+  /**
+   * Creates this [[org.apache.spark.util.Vector]] of given length containing random numbers 
+   * between 0.0 and 1.0. Optional [[scala.util.Random]] number generator can be provided.
+   */
+  def random(length: Int, random: Random = new XORShiftRandom()) = Vector(length, _ => random.nextDouble())
+
   class Multiplier(num: Double) {
     def * (vec: Vector) = vec * num
   }
diff --git a/core/src/main/scala/org/apache/spark/util/XORShiftRandom.scala b/core/src/main/scala/org/apache/spark/util/XORShiftRandom.scala
new file mode 100644
index 0000000000000..08b31ac64f290
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/XORShiftRandom.scala
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import java.util.{Random => JavaRandom}
+import org.apache.spark.util.Utils.timeIt
+
+/**
+ * This class implements a XORShift random number generator algorithm 
+ * Source:
+ * Marsaglia, G. (2003). Xorshift RNGs. Journal of Statistical Software, Vol. 8, Issue 14.
+ * @see <a href="http://www.jstatsoft.org/v08/i14/paper">Paper</a>
+ * This implementation is approximately 3.5 times faster than
+ * {@link java.util.Random java.util.Random}, partly because of the algorithm, but also due
+ * to renouncing thread safety. JDK's implementation uses an AtomicLong seed, this class 
+ * uses a regular Long. We can forgo thread safety since we use a new instance of the RNG
+ * for each thread.
+ */
+private[spark] class XORShiftRandom(init: Long) extends JavaRandom(init) {
+  
+  def this() = this(System.nanoTime)
+
+  private var seed = init
+  
+  // we need to just override next - this will be called by nextInt, nextDouble,
+  // nextGaussian, nextLong, etc.
+  override protected def next(bits: Int): Int = {    
+    var nextSeed = seed ^ (seed << 21)
+    nextSeed ^= (nextSeed >>> 35)
+    nextSeed ^= (nextSeed << 4)  
+    seed = nextSeed
+    (nextSeed & ((1L << bits) -1)).asInstanceOf[Int]
+  }
+}
+
+/** Contains benchmark method and main method to run benchmark of the RNG */
+private[spark] object XORShiftRandom {
+
+  /**
+   * Main method for running benchmark
+   * @param args takes one argument - the number of random numbers to generate
+   */
+  def main(args: Array[String]): Unit = {
+    if (args.length != 1) {
+      println("Benchmark of XORShiftRandom vis-a-vis java.util.Random")
+      println("Usage: XORShiftRandom number_of_random_numbers_to_generate")
+      System.exit(1)
+    }
+    println(benchmark(args(0).toInt))
+  }
+
+  /**
+   * @param numIters Number of random numbers to generate while running the benchmark
+   * @return Map of execution times for {@link java.util.Random java.util.Random}
+   * and XORShift
+   */
+  def benchmark(numIters: Int) = {
+
+    val seed = 1L
+    val million = 1e6.toInt
+    val javaRand = new JavaRandom(seed)
+    val xorRand = new XORShiftRandom(seed)
+    
+    // this is just to warm up the JIT - we're not timing anything
+    timeIt(1e6.toInt) {
+      javaRand.nextInt()
+      xorRand.nextInt()
+    }
+
+    val iters = timeIt(numIters)(_)
+    
+    /* Return results as a map instead of just printing to screen
+    in case the user wants to do something with them */ 
+    Map("javaTime" -> iters {javaRand.nextInt()},
+        "xorTime" -> iters {xorRand.nextInt()})
+
+  }
+
+}
diff --git a/core/src/main/scala/org/apache/spark/util/AppendOnlyMap.scala b/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala
similarity index 68%
rename from core/src/main/scala/org/apache/spark/util/AppendOnlyMap.scala
rename to core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala
index 8bb4ee3bfa22e..b8c852b4ff5c7 100644
--- a/core/src/main/scala/org/apache/spark/util/AppendOnlyMap.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala
@@ -15,7 +15,9 @@
  * limitations under the License.
  */
 
-package org.apache.spark.util
+package org.apache.spark.util.collection
+
+import java.util.{Arrays, Comparator}
 
 /**
  * A simple open hash table optimized for the append-only use case, where keys
@@ -28,14 +30,15 @@ package org.apache.spark.util
  * TODO: Cache the hash values of each key? java.util.HashMap does that.
  */
 private[spark]
-class AppendOnlyMap[K, V](initialCapacity: Int = 64) extends Iterable[(K, V)] with Serializable {
+class AppendOnlyMap[K, V](initialCapacity: Int = 64) extends Iterable[(K,
+  V)] with Serializable {
   require(initialCapacity <= (1 << 29), "Can't make capacity bigger than 2^29 elements")
   require(initialCapacity >= 1, "Invalid initial capacity")
 
   private var capacity = nextPowerOf2(initialCapacity)
   private var mask = capacity - 1
   private var curSize = 0
-  private var growThreshold = LOAD_FACTOR * capacity
+  private var growThreshold = (LOAD_FACTOR * capacity).toInt
 
   // Holds keys and values in the same array for memory locality; specifically, the order of
   // elements is key0, value0, key1, value1, key2, value2, etc.
@@ -45,10 +48,15 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64) extends Iterable[(K, V)] wi
   private var haveNullValue = false
   private var nullValue: V = null.asInstanceOf[V]
 
+  // Triggered by destructiveSortedIterator; the underlying data array may no longer be used
+  private var destroyed = false
+  private val destructionMessage = "Map state is invalid from destructive sorting!"
+
   private val LOAD_FACTOR = 0.7
 
   /** Get the value for a given key */
   def apply(key: K): V = {
+    assert(!destroyed, destructionMessage)
     val k = key.asInstanceOf[AnyRef]
     if (k.eq(null)) {
       return nullValue
@@ -67,11 +75,12 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64) extends Iterable[(K, V)] wi
         i += 1
       }
     }
-    return null.asInstanceOf[V]
+    null.asInstanceOf[V]
   }
 
   /** Set the value for a key */
   def update(key: K, value: V): Unit = {
+    assert(!destroyed, destructionMessage)
     val k = key.asInstanceOf[AnyRef]
     if (k.eq(null)) {
       if (!haveNullValue) {
@@ -106,6 +115,7 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64) extends Iterable[(K, V)] wi
    * for key, if any, or null otherwise. Returns the newly updated value.
    */
   def changeValue(key: K, updateFunc: (Boolean, V) => V): V = {
+    assert(!destroyed, destructionMessage)
     val k = key.asInstanceOf[AnyRef]
     if (k.eq(null)) {
       if (!haveNullValue) {
@@ -139,35 +149,38 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64) extends Iterable[(K, V)] wi
   }
 
   /** Iterator method from Iterable */
-  override def iterator: Iterator[(K, V)] = new Iterator[(K, V)] {
-    var pos = -1
-
-    /** Get the next value we should return from next(), or null if we're finished iterating */
-    def nextValue(): (K, V) = {
-      if (pos == -1) {    // Treat position -1 as looking at the null value
-        if (haveNullValue) {
-          return (null.asInstanceOf[K], nullValue)
+  override def iterator: Iterator[(K, V)] = {
+    assert(!destroyed, destructionMessage)
+    new Iterator[(K, V)] {
+      var pos = -1
+
+      /** Get the next value we should return from next(), or null if we're finished iterating */
+      def nextValue(): (K, V) = {
+        if (pos == -1) {    // Treat position -1 as looking at the null value
+          if (haveNullValue) {
+            return (null.asInstanceOf[K], nullValue)
+          }
+          pos += 1
         }
-        pos += 1
-      }
-      while (pos < capacity) {
-        if (!data(2 * pos).eq(null)) {
-          return (data(2 * pos).asInstanceOf[K], data(2 * pos + 1).asInstanceOf[V])
+        while (pos < capacity) {
+          if (!data(2 * pos).eq(null)) {
+            return (data(2 * pos).asInstanceOf[K], data(2 * pos + 1).asInstanceOf[V])
+          }
+          pos += 1
         }
-        pos += 1
+        null
       }
-      null
-    }
 
-    override def hasNext: Boolean = nextValue() != null
+      override def hasNext: Boolean = nextValue() != null
 
-    override def next(): (K, V) = {
-      val value = nextValue()
-      if (value == null) {
-        throw new NoSuchElementException("End of iterator")
+      override def next(): (K, V) = {
+        val value = nextValue()
+        if (value == null) {
+          throw new NoSuchElementException("End of iterator")
+        }
+        pos += 1
+        value
       }
-      pos += 1
-      value
     }
   }
 
@@ -190,7 +203,7 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64) extends Iterable[(K, V)] wi
   }
 
   /** Double the table's size and re-hash everything */
-  private def growTable() {
+  protected def growTable() {
     val newCapacity = capacity * 2
     if (newCapacity >= (1 << 30)) {
       // We can't make the table this big because we want an array of 2x
@@ -227,11 +240,58 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64) extends Iterable[(K, V)] wi
     data = newData
     capacity = newCapacity
     mask = newMask
-    growThreshold = LOAD_FACTOR * newCapacity
+    growThreshold = (LOAD_FACTOR * newCapacity).toInt
   }
 
   private def nextPowerOf2(n: Int): Int = {
     val highBit = Integer.highestOneBit(n)
     if (highBit == n) n else highBit << 1
   }
+
+  /**
+   * Return an iterator of the map in sorted order. This provides a way to sort the map without
+   * using additional memory, at the expense of destroying the validity of the map.
+   */
+  def destructiveSortedIterator(cmp: Comparator[(K, V)]): Iterator[(K, V)] = {
+    destroyed = true
+    // Pack KV pairs into the front of the underlying array
+    var keyIndex, newIndex = 0
+    while (keyIndex < capacity) {
+      if (data(2 * keyIndex) != null) {
+        data(newIndex) = (data(2 * keyIndex), data(2 * keyIndex + 1))
+        newIndex += 1
+      }
+      keyIndex += 1
+    }
+    assert(curSize == newIndex + (if (haveNullValue) 1 else 0))
+
+    // Sort by the given ordering
+    val rawOrdering = new Comparator[AnyRef] {
+      def compare(x: AnyRef, y: AnyRef): Int = {
+        cmp.compare(x.asInstanceOf[(K, V)], y.asInstanceOf[(K, V)])
+      }
+    }
+    Arrays.sort(data, 0, newIndex, rawOrdering)
+
+    new Iterator[(K, V)] {
+      var i = 0
+      var nullValueReady = haveNullValue
+      def hasNext: Boolean = (i < newIndex || nullValueReady)
+      def next(): (K, V) = {
+        if (nullValueReady) {
+          nullValueReady = false
+          (null.asInstanceOf[K], nullValue)
+        } else {
+          val item = data(i).asInstanceOf[(K, V)]
+          i += 1
+          item
+        }
+      }
+    }
+  }
+
+  /**
+   * Return whether the next insert will cause the map to grow
+   */
+  def atGrowThreshold: Boolean = curSize == growThreshold
 }
diff --git a/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala b/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala
index a1a452315d143..856eb772a1084 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala
@@ -22,10 +22,72 @@ package org.apache.spark.util.collection
  * A simple, fixed-size bit set implementation. This implementation is fast because it avoids
  * safety/bound checking.
  */
-class BitSet(numBits: Int) {
+class BitSet(numBits: Int) extends Serializable {
 
-  private[this] val words = new Array[Long](bit2words(numBits))
-  private[this] val numWords = words.length
+  private val words = new Array[Long](bit2words(numBits))
+  private val numWords = words.length
+
+  /**
+   * Compute the capacity (number of bits) that can be represented
+   * by this bitset.
+   */
+  def capacity: Int = numWords * 64
+
+  /**
+   * Set all the bits up to a given index
+   */
+  def setUntil(bitIndex: Int) {
+    val wordIndex = bitIndex >> 6 // divide by 64
+    var i = 0
+    while(i < wordIndex) { words(i) = -1; i += 1 }
+    if(wordIndex < words.size) {
+      // Set the remaining bits (note that the mask could still be zero)
+      val mask = ~(-1L << (bitIndex & 0x3f))
+      words(wordIndex) |= mask
+    }
+  }
+
+  /**
+   * Compute the bit-wise AND of the two sets returning the
+   * result.
+   */
+  def &(other: BitSet): BitSet = {
+    val newBS = new BitSet(math.max(capacity, other.capacity))
+    val smaller = math.min(numWords, other.numWords)
+    assert(newBS.numWords >= numWords)
+    assert(newBS.numWords >= other.numWords)
+    var ind = 0
+    while( ind < smaller ) {
+      newBS.words(ind) = words(ind) & other.words(ind)
+      ind += 1
+    }
+    newBS
+  }
+
+  /**
+   * Compute the bit-wise OR of the two sets returning the
+   * result.
+   */
+  def |(other: BitSet): BitSet = {
+    val newBS = new BitSet(math.max(capacity, other.capacity))
+    assert(newBS.numWords >= numWords)
+    assert(newBS.numWords >= other.numWords)
+    val smaller = math.min(numWords, other.numWords)
+    var ind = 0
+    while( ind < smaller ) {
+      newBS.words(ind) = words(ind) | other.words(ind)
+      ind += 1
+    }
+    while( ind < numWords ) {
+      newBS.words(ind) = words(ind)
+      ind += 1
+    }
+    while( ind < other.numWords ) {
+      newBS.words(ind) = other.words(ind)
+      ind += 1
+    }
+    newBS
+  }
 
   /**
    * Sets the bit at the specified index to true.
@@ -36,6 +98,11 @@ class BitSet(numBits: Int) {
     words(index >> 6) |= bitmask        // div by 64 and mask
   }
 
+  def unset(index: Int) {
+    val bitmask = 1L << (index & 0x3f)  // mod 64 and shift
+    words(index >> 6) &= ~bitmask        // div by 64 and mask
+  }
+
   /**
    * Return the value of the bit with the specified index. The value is true if the bit with
    * the index is currently set in this BitSet; otherwise, the result is false.
@@ -48,6 +115,20 @@ class BitSet(numBits: Int) {
     (words(index >> 6) & bitmask) != 0  // div by 64 and mask
   }
 
+  /**
+   * Get an iterator over the set bits.
+   */
+  def iterator = new Iterator[Int] {
+    var ind = nextSetBit(0)
+    override def hasNext: Boolean = ind >= 0
+    override def next() = {
+      val tmp = ind
+      ind  = nextSetBit(ind+1)
+      tmp
+    }
+  }
+
+
   /** Return the number of bits set to true in this BitSet. */
   def cardinality(): Int = {
     var sum = 0
diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
new file mode 100644
index 0000000000000..6f368179bbfbd
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
@@ -0,0 +1,427 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.collection
+
+import java.io._
+import java.util.Comparator
+
+import scala.collection.mutable
+import scala.collection.mutable.ArrayBuffer
+
+import it.unimi.dsi.fastutil.io.FastBufferedInputStream
+import com.google.common.io.ByteStreams
+
+import org.apache.spark.{Logging, SparkEnv}
+import org.apache.spark.serializer.Serializer
+import org.apache.spark.storage.{BlockId, BlockManager}
+
+/**
+ * An append-only map that spills sorted content to disk when there is insufficient space for it
+ * to grow.
+ *
+ * This map takes two passes over the data:
+ *
+ *   (1) Values are merged into combiners, which are sorted and spilled to disk as necessary
+ *   (2) Combiners are read from disk and merged together
+ *
+ * The setting of the spill threshold faces the following trade-off: If the spill threshold is
+ * too high, the in-memory map may occupy more memory than is available, resulting in OOM.
+ * However, if the spill threshold is too low, we spill frequently and incur unnecessary disk
+ * writes. This may lead to a performance regression compared to the normal case of using the
+ * non-spilling AppendOnlyMap.
+ *
+ * Two parameters control the memory threshold:
+ *
+ *   `spark.shuffle.memoryFraction` specifies the collective amount of memory used for storing
+ *   these maps as a fraction of the executor's total memory. Since each concurrently running
+ *   task maintains one map, the actual threshold for each map is this quantity divided by the
+ *   number of running tasks.
+ *
+ *   `spark.shuffle.safetyFraction` specifies an additional margin of safety as a fraction of
+ *   this threshold, in case map size estimation is not sufficiently accurate.
+ */
+
+private[spark] class ExternalAppendOnlyMap[K, V, C](
+    createCombiner: V => C,
+    mergeValue: (C, V) => C,
+    mergeCombiners: (C, C) => C,
+    serializer: Serializer = SparkEnv.get.serializerManager.default,
+    blockManager: BlockManager = SparkEnv.get.blockManager)
+  extends Iterable[(K, C)] with Serializable with Logging {
+
+  import ExternalAppendOnlyMap._
+
+  private var currentMap = new SizeTrackingAppendOnlyMap[K, C]
+  private val spilledMaps = new ArrayBuffer[DiskMapIterator]
+  private val sparkConf = SparkEnv.get.conf
+  private val diskBlockManager = blockManager.diskBlockManager
+
+  // Collective memory threshold shared across all running tasks
+  private val maxMemoryThreshold = {
+    val memoryFraction = sparkConf.getDouble("spark.shuffle.memoryFraction", 0.3)
+    val safetyFraction = sparkConf.getDouble("spark.shuffle.safetyFraction", 0.8)
+    (Runtime.getRuntime.maxMemory * memoryFraction * safetyFraction).toLong
+  }
+
+  // Number of pairs in the in-memory map
+  private var numPairsInMemory = 0L
+
+  // Number of in-memory pairs inserted before tracking the map's shuffle memory usage
+  private val trackMemoryThreshold = 1000
+
+  /**
+   * Size of object batches when reading/writing from serializers.
+   *
+   * Objects are written in batches, with each batch using its own serialization stream. This
+   * cuts down on the size of reference-tracking maps constructed when deserializing a stream.
+   *
+   * NOTE: Setting this too low can cause excessive copying when serializing, since some serializers
+   * grow internal data structures by growing + copying every time the number of objects doubles.
+   */
+  private val serializerBatchSize = sparkConf.getLong("spark.shuffle.spill.batchSize", 10000)
+
+  // How many times we have spilled so far
+  private var spillCount = 0
+
+  // Number of bytes spilled in total
+  private var _memoryBytesSpilled = 0L
+  private var _diskBytesSpilled = 0L
+
+  private val fileBufferSize = sparkConf.getInt("spark.shuffle.file.buffer.kb", 100) * 1024
+  private val comparator = new KCComparator[K, C]
+  private val ser = serializer.newInstance()
+
+  /**
+   * Insert the given key and value into the map.
+   *
+   * If the underlying map is about to grow, check if the global pool of shuffle memory has
+   * enough room for this to happen. If so, allocate the memory required to grow the map;
+   * otherwise, spill the in-memory map to disk.
+   *
+   * The shuffle memory usage of the first trackMemoryThreshold entries is not tracked.
+   */
+  def insert(key: K, value: V) {
+    val update: (Boolean, C) => C = (hadVal, oldVal) => {
+      if (hadVal) mergeValue(oldVal, value) else createCombiner(value)
+    }
+    if (numPairsInMemory > trackMemoryThreshold && currentMap.atGrowThreshold) {
+      val mapSize = currentMap.estimateSize()
+      var shouldSpill = false
+      val shuffleMemoryMap = SparkEnv.get.shuffleMemoryMap
+
+      // Atomically check whether there is sufficient memory in the global pool for
+      // this map to grow and, if possible, allocate the required amount
+      shuffleMemoryMap.synchronized {
+        val threadId = Thread.currentThread().getId
+        val previouslyOccupiedMemory = shuffleMemoryMap.get(threadId)
+        val availableMemory = maxMemoryThreshold -
+          (shuffleMemoryMap.values.sum - previouslyOccupiedMemory.getOrElse(0L))
+
+        // Assume map growth factor is 2x
+        shouldSpill = availableMemory < mapSize * 2
+        if (!shouldSpill) {
+          shuffleMemoryMap(threadId) = mapSize * 2
+        }
+      }
+      // Do not synchronize spills
+      if (shouldSpill) {
+        spill(mapSize)
+      }
+    }
+    currentMap.changeValue(key, update)
+    numPairsInMemory += 1
+  }
+
+  /**
+   * Sort the existing contents of the in-memory map and spill them to a temporary file on disk.
+   */
+  private def spill(mapSize: Long) {
+    spillCount += 1
+    logWarning("Spilling in-memory map of %d MB to disk (%d time%s so far)"
+      .format(mapSize / (1024 * 1024), spillCount, if (spillCount > 1) "s" else ""))
+    val (blockId, file) = diskBlockManager.createTempBlock()
+    var writer = blockManager.getDiskWriter(blockId, file, serializer, fileBufferSize)
+    var objectsWritten = 0
+
+    // List of batch sizes (bytes) in the order they are written to disk
+    val batchSizes = new ArrayBuffer[Long]
+
+    // Flush the disk writer's contents to disk, and update relevant variables
+    def flush() = {
+      writer.commit()
+      val bytesWritten = writer.bytesWritten
+      batchSizes.append(bytesWritten)
+      _diskBytesSpilled += bytesWritten
+      objectsWritten = 0
+    }
+
+    try {
+      val it = currentMap.destructiveSortedIterator(comparator)
+      while (it.hasNext) {
+        val kv = it.next()
+        writer.write(kv)
+        objectsWritten += 1
+
+        if (objectsWritten == serializerBatchSize) {
+          flush()
+          writer.close()
+          writer = blockManager.getDiskWriter(blockId, file, serializer, fileBufferSize)
+        }
+      }
+      if (objectsWritten > 0) {
+        flush()
+      }
+    } finally {
+      // Partial failures cannot be tolerated; do not revert partial writes
+      writer.close()
+    }
+
+    currentMap = new SizeTrackingAppendOnlyMap[K, C]
+    spilledMaps.append(new DiskMapIterator(file, blockId, batchSizes))
+
+    // Reset the amount of shuffle memory used by this map in the global pool
+    val shuffleMemoryMap = SparkEnv.get.shuffleMemoryMap
+    shuffleMemoryMap.synchronized {
+      shuffleMemoryMap(Thread.currentThread().getId) = 0
+    }
+    numPairsInMemory = 0
+    _memoryBytesSpilled += mapSize
+  }
+
+  def memoryBytesSpilled: Long = _memoryBytesSpilled
+  def diskBytesSpilled: Long = _diskBytesSpilled
+
+  /**
+   * Return an iterator that merges the in-memory map with the spilled maps.
+   * If no spill has occurred, simply return the in-memory map's iterator.
+   */
+  override def iterator: Iterator[(K, C)] = {
+    if (spilledMaps.isEmpty) {
+      currentMap.iterator
+    } else {
+      new ExternalIterator()
+    }
+  }
+
+  /**
+   * An iterator that sort-merges (K, C) pairs from the in-memory map and the spilled maps
+   */
+  private class ExternalIterator extends Iterator[(K, C)] {
+
+    // A queue that maintains a buffer for each stream we are currently merging
+    // This queue maintains the invariant that it only contains non-empty buffers
+    private val mergeHeap = new mutable.PriorityQueue[StreamBuffer]
+
+    // Input streams are derived both from the in-memory map and spilled maps on disk
+    // The in-memory map is sorted in place, while the spilled maps are already in sorted order
+    private val sortedMap = currentMap.destructiveSortedIterator(comparator)
+    private val inputStreams = Seq(sortedMap) ++ spilledMaps
+
+    inputStreams.foreach { it =>
+      val kcPairs = getMorePairs(it)
+      if (kcPairs.length > 0) {
+        mergeHeap.enqueue(new StreamBuffer(it, kcPairs))
+      }
+    }
+
+    /**
+     * Fetch from the given iterator until a key of different hash is retrieved.
+     *
+     * In the event of key hash collisions, this ensures no pairs are hidden from being merged.
+     * Assume the given iterator is in sorted order.
+     */
+    private def getMorePairs(it: Iterator[(K, C)]): ArrayBuffer[(K, C)] = {
+      val kcPairs = new ArrayBuffer[(K, C)]
+      if (it.hasNext) {
+        var kc = it.next()
+        kcPairs += kc
+        val minHash = kc._1.hashCode()
+        while (it.hasNext && kc._1.hashCode() == minHash) {
+          kc = it.next()
+          kcPairs += kc
+        }
+      }
+      kcPairs
+    }
+
+    /**
+     * If the given buffer contains a value for the given key, merge that value into
+     * baseCombiner and remove the corresponding (K, C) pair from the buffer.
+     */
+    private def mergeIfKeyExists(key: K, baseCombiner: C, buffer: StreamBuffer): C = {
+      var i = 0
+      while (i < buffer.pairs.length) {
+        val (k, c) = buffer.pairs(i)
+        if (k == key) {
+          buffer.pairs.remove(i)
+          return mergeCombiners(baseCombiner, c)
+        }
+        i += 1
+      }
+      baseCombiner
+    }
+
+    /**
+     * Return true if there exists an input stream that still has unvisited pairs.
+     */
+    override def hasNext: Boolean = mergeHeap.length > 0
+
+    /**
+     * Select a key with the minimum hash, then combine all values with the same key from all
+     * input streams
+     */
+    override def next(): (K, C) = {
+      if (mergeHeap.length == 0) {
+        throw new NoSuchElementException
+      }
+      // Select a key from the StreamBuffer that holds the lowest key hash
+      val minBuffer = mergeHeap.dequeue()
+      val (minPairs, minHash) = (minBuffer.pairs, minBuffer.minKeyHash)
+      var (minKey, minCombiner) = minPairs.remove(0)
+      assert(minKey.hashCode() == minHash)
+
+      // For all other streams that may have this key (i.e. have the same minimum key hash),
+      // merge in the corresponding value (if any) from that stream
+      val mergedBuffers = ArrayBuffer[StreamBuffer](minBuffer)
+      while (mergeHeap.length > 0 && mergeHeap.head.minKeyHash == minHash) {
+        val newBuffer = mergeHeap.dequeue()
+        minCombiner = mergeIfKeyExists(minKey, minCombiner, newBuffer)
+        mergedBuffers += newBuffer
+      }
+
+      // Repopulate each visited stream buffer and add it back to the queue if it is non-empty
+      mergedBuffers.foreach { buffer =>
+        if (buffer.isEmpty) {
+          buffer.pairs ++= getMorePairs(buffer.iterator)
+        }
+        if (!buffer.isEmpty) {
+          mergeHeap.enqueue(buffer)
+        }
+      }
+
+      (minKey, minCombiner)
+    }
+
+    /**
+     * A buffer for streaming from a map iterator (in-memory or on-disk) sorted by key hash.
+     * Each buffer maintains the lowest-ordered keys in the corresponding iterator. Due to
+     * hash collisions, it is possible for multiple keys to be "tied" for being the lowest.
+     *
+     * StreamBuffers are ordered by the minimum key hash found across all of their own pairs.
+     */
+    private case class StreamBuffer(iterator: Iterator[(K, C)], pairs: ArrayBuffer[(K, C)])
+      extends Comparable[StreamBuffer] {
+
+      def isEmpty = pairs.length == 0
+
+      // Invalid if there are no more pairs in this stream
+      def minKeyHash = {
+        assert(pairs.length > 0)
+        pairs.head._1.hashCode()
+      }
+
+      override def compareTo(other: StreamBuffer): Int = {
+        // minus sign because mutable.PriorityQueue dequeues the max, not the min
+        -minKeyHash.compareTo(other.minKeyHash)
+      }
+    }
+  }
+
+  /**
+   * An iterator that returns (K, C) pairs in sorted order from an on-disk map
+   */
+  private class DiskMapIterator(file: File, blockId: BlockId, batchSizes: ArrayBuffer[Long])
+    extends Iterator[(K, C)] {
+    private val fileStream = new FileInputStream(file)
+    private val bufferedStream = new FastBufferedInputStream(fileStream, fileBufferSize)
+
+    // An intermediate stream that reads from exactly one batch
+    // This guards against pre-fetching and other arbitrary behavior of higher level streams
+    private var batchStream = nextBatchStream()
+    private var compressedStream = blockManager.wrapForCompression(blockId, batchStream)
+    private var deserializeStream = ser.deserializeStream(compressedStream)
+    private var nextItem: (K, C) = null
+    private var objectsRead = 0
+
+    /**
+     * Construct a stream that reads only from the next batch.
+     */
+    private def nextBatchStream(): InputStream = {
+      if (batchSizes.length > 0) {
+        ByteStreams.limit(bufferedStream, batchSizes.remove(0))
+      } else {
+        // No more batches left
+        bufferedStream
+      }
+    }
+
+    /**
+     * Return the next (K, C) pair from the deserialization stream.
+     *
+     * If the current batch is drained, construct a stream for the next batch and read from it.
+     * If no more pairs are left, return null.
+     */
+    private def readNextItem(): (K, C) = {
+      try {
+        val item = deserializeStream.readObject().asInstanceOf[(K, C)]
+        objectsRead += 1
+        if (objectsRead == serializerBatchSize) {
+          batchStream = nextBatchStream()
+          compressedStream = blockManager.wrapForCompression(blockId, batchStream)
+          deserializeStream = ser.deserializeStream(compressedStream)
+          objectsRead = 0
+        }
+        item
+      } catch {
+        case e: EOFException =>
+          cleanup()
+          null
+      }
+    }
+
+    override def hasNext: Boolean = {
+      if (nextItem == null) {
+        nextItem = readNextItem()
+      }
+      nextItem != null
+    }
+
+    override def next(): (K, C) = {
+      val item = if (nextItem == null) readNextItem() else nextItem
+      if (item == null) {
+        throw new NoSuchElementException
+      }
+      nextItem = null
+      item
+    }
+
+    // TODO: Ensure this gets called even if the iterator isn't drained.
+    private def cleanup() {
+      deserializeStream.close()
+      file.delete()
+    }
+  }
+}
+
+private[spark] object ExternalAppendOnlyMap {
+  private class KCComparator[K, C] extends Comparator[(K, C)] {
+    def compare(kc1: (K, C), kc2: (K, C)): Int = {
+      kc1._1.hashCode().compareTo(kc2._1.hashCode())
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/util/collection/OpenHashMap.scala b/core/src/main/scala/org/apache/spark/util/collection/OpenHashMap.scala
index 80545c9688aa6..c26f23d50024a 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/OpenHashMap.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/OpenHashMap.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.util.collection
 
+import scala.reflect.ClassTag
 
 /**
  * A fast hash map implementation for nullable keys. This hash map supports insertions and updates,
@@ -26,7 +27,7 @@ package org.apache.spark.util.collection
  * Under the hood, it uses our OpenHashSet implementation.
  */
 private[spark]
-class OpenHashMap[K >: Null : ClassManifest, @specialized(Long, Int, Double) V: ClassManifest](
+class OpenHashMap[K >: Null : ClassTag, @specialized(Long, Int, Double) V: ClassTag](
     initialCapacity: Int)
   extends Iterable[(K, V)]
   with Serializable {
diff --git a/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala b/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala
index 4592e4f939e5c..5ded5d0b6da84 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.util.collection
 
+import scala.reflect._
 
 /**
  * A simple, fast hash set optimized for non-null insertion-only use case, where keys are never
@@ -36,7 +37,7 @@ package org.apache.spark.util.collection
  * to explore all spaces for each key (see http://en.wikipedia.org/wiki/Quadratic_probing).
  */
 private[spark]
-class OpenHashSet[@specialized(Long, Int) T: ClassManifest](
+class OpenHashSet[@specialized(Long, Int) T: ClassTag](
     initialCapacity: Int,
     loadFactor: Double)
   extends Serializable {
@@ -62,14 +63,14 @@ class OpenHashSet[@specialized(Long, Int) T: ClassManifest](
     // throws:
     // scala.tools.nsc.symtab.Types$TypeError: type mismatch;
     //  found   : scala.reflect.AnyValManifest[Long]
-    //  required: scala.reflect.ClassManifest[Int]
+    //  required: scala.reflect.ClassTag[Int]
     //         at scala.tools.nsc.typechecker.Contexts$Context.error(Contexts.scala:298)
     //         at scala.tools.nsc.typechecker.Infer$Inferencer.error(Infer.scala:207)
     //         ...
-    val mt = classManifest[T]
-    if (mt == ClassManifest.Long) {
+    val mt = classTag[T]
+    if (mt == ClassTag.Long) {
       (new LongHasher).asInstanceOf[Hasher[T]]
-    } else if (mt == ClassManifest.Int) {
+    } else if (mt == ClassTag.Int) {
       (new IntHasher).asInstanceOf[Hasher[T]]
     } else {
       new Hasher[T]
@@ -79,9 +80,12 @@ class OpenHashSet[@specialized(Long, Int) T: ClassManifest](
   protected var _capacity = nextPowerOf2(initialCapacity)
   protected var _mask = _capacity - 1
   protected var _size = 0
+  protected var _growThreshold = (loadFactor * _capacity).toInt
 
   protected var _bitset = new BitSet(_capacity)
 
+  def getBitSet = _bitset
+
   // Init of the array in constructor (instead of in declaration) to work around a Scala compiler
   // specialization bug that would generate two arrays (one for Object and one for specialized T).
   protected var _data: Array[T] = _
@@ -115,7 +119,29 @@ class OpenHashSet[@specialized(Long, Int) T: ClassManifest](
    * @return The position where the key is placed, plus the highest order bit is set if the key
    *         exists previously.
    */
-  def addWithoutResize(k: T): Int = putInto(_bitset, _data, k)
+  def addWithoutResize(k: T): Int = {
+    var pos = hashcode(hasher.hash(k)) & _mask
+    var i = 1
+    while (true) {
+      if (!_bitset.get(pos)) {
+        // This is a new key.
+        _data(pos) = k
+        _bitset.set(pos)
+        _size += 1
+        return pos | NONEXISTENCE_MASK
+      } else if (_data(pos) == k) {
+        // Found an existing key.
+        return pos
+      } else {
+        val delta = i
+        pos = (pos + delta) & _mask
+        i += 1
+      }
+    }
+    // Never reached here
+    assert(INVALID_POS != INVALID_POS)
+    INVALID_POS
+  }
 
   /**
    * Rehash the set if it is overloaded.
@@ -126,7 +152,7 @@ class OpenHashSet[@specialized(Long, Int) T: ClassManifest](
    *                 to a new position (in the new data array).
    */
   def rehashIfNeeded(k: T, allocateFunc: (Int) => Unit, moveFunc: (Int, Int) => Unit) {
-    if (_size > loadFactor * _capacity) {
+    if (_size > _growThreshold) {
       rehash(k, allocateFunc, moveFunc)
     }
   }
@@ -137,7 +163,8 @@ class OpenHashSet[@specialized(Long, Int) T: ClassManifest](
   def getPos(k: T): Int = {
     var pos = hashcode(hasher.hash(k)) & _mask
     var i = 1
-    while (true) {
+    val maxProbe = _data.size
+    while (i < maxProbe) {
       if (!_bitset.get(pos)) {
         return INVALID_POS
       } else if (k == _data(pos)) {
@@ -155,42 +182,27 @@ class OpenHashSet[@specialized(Long, Int) T: ClassManifest](
   /** Return the value at the specified position. */
   def getValue(pos: Int): T = _data(pos)
 
+  def iterator = new Iterator[T] {
+    var pos = nextPos(0)
+    override def hasNext: Boolean = pos != INVALID_POS
+    override def next(): T = {
+      val tmp = getValue(pos)
+      pos = nextPos(pos+1)
+      tmp
+    }
+  }
+
+  /** Return the value at the specified position. */
+  def getValueSafe(pos: Int): T = {
+    assert(_bitset.get(pos))
+    _data(pos)
+  }
+
   /**
    * Return the next position with an element stored, starting from the given position inclusively.
    */
   def nextPos(fromPos: Int): Int = _bitset.nextSetBit(fromPos)
 
-  /**
-   * Put an entry into the set. Return the position where the key is placed. In addition, the
-   * highest bit in the returned position is set if the key exists prior to this put.
-   *
-   * This function assumes the data array has at least one empty slot.
-   */
-  private def putInto(bitset: BitSet, data: Array[T], k: T): Int = {
-    val mask = data.length - 1
-    var pos = hashcode(hasher.hash(k)) & mask
-    var i = 1
-    while (true) {
-      if (!bitset.get(pos)) {
-        // This is a new key.
-        data(pos) = k
-        bitset.set(pos)
-        _size += 1
-        return pos | NONEXISTENCE_MASK
-      } else if (data(pos) == k) {
-        // Found an existing key.
-        return pos
-      } else {
-        val delta = i
-        pos = (pos + delta) & mask
-        i += 1
-      }
-    }
-    // Never reached here
-    assert(INVALID_POS != INVALID_POS)
-    INVALID_POS
-  }
-
   /**
    * Double the table's size and re-hash everything. We are not really using k, but it is declared
    * so Scala compiler can specialize this method (which leads to calling the specialized version
@@ -204,34 +216,49 @@ class OpenHashSet[@specialized(Long, Int) T: ClassManifest](
    */
   private def rehash(k: T, allocateFunc: (Int) => Unit, moveFunc: (Int, Int) => Unit) {
     val newCapacity = _capacity * 2
-    require(newCapacity <= (1 << 29), "Can't make capacity bigger than 2^29 elements")
-
     allocateFunc(newCapacity)
-    val newData = new Array[T](newCapacity)
     val newBitset = new BitSet(newCapacity)
-    var pos = 0
-    _size = 0
-    while (pos < _capacity) {
-      if (_bitset.get(pos)) {
-        val newPos = putInto(newBitset, newData, _data(pos))
-        moveFunc(pos, newPos & POSITION_MASK)
+    val newData = new Array[T](newCapacity)
+    val newMask = newCapacity - 1
+
+    var oldPos = 0
+    while (oldPos < capacity) {
+      if (_bitset.get(oldPos)) {
+        val key = _data(oldPos)
+        var newPos = hashcode(hasher.hash(key)) & newMask
+        var i = 1
+        var keepGoing = true
+        // No need to check for equality here when we insert so this has one less if branch than
+        // the similar code path in addWithoutResize.
+        while (keepGoing) {
+          if (!newBitset.get(newPos)) {
+            // Inserting the key at newPos
+            newData(newPos) = key
+            newBitset.set(newPos)
+            moveFunc(oldPos, newPos)
+            keepGoing = false
+          } else {
+            val delta = i
+            newPos = (newPos + delta) & newMask
+            i += 1
+          }
+        }
       }
-      pos += 1
+      oldPos += 1
     }
+
     _bitset = newBitset
     _data = newData
     _capacity = newCapacity
-    _mask = newCapacity - 1
+    _mask = newMask
+    _growThreshold = (loadFactor * newCapacity).toInt
   }
 
   /**
-   * Re-hash a value to deal better with hash functions that don't differ
-   * in the lower bits, similar to java.util.HashMap
+   * Re-hash a value to deal better with hash functions that don't differ in the lower bits.
+   * We use the Murmur Hash 3 finalization step that's also used in fastutil.
    */
-  private def hashcode(h: Int): Int = {
-    val r = h ^ (h >>> 20) ^ (h >>> 12)
-    r ^ (r >>> 7) ^ (r >>> 4)
-  }
+  private def hashcode(h: Int): Int = it.unimi.dsi.fastutil.HashCommon.murmurHash3(h)
 
   private def nextPowerOf2(n: Int): Int = {
     val highBit = Integer.highestOneBit(n)
@@ -251,7 +278,7 @@ object OpenHashSet {
    * A set of specialized hash function implementation to avoid boxing hash code computation
    * in the specialized implementation of OpenHashSet.
    */
-  sealed class Hasher[@specialized(Long, Int) T] {
+  sealed class Hasher[@specialized(Long, Int) T] extends Serializable {
     def hash(o: T): Int = o.hashCode()
   }
 
diff --git a/core/src/main/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMap.scala b/core/src/main/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMap.scala
index d76143e45aa58..2e1ef06cbc4e1 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMap.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMap.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.util.collection
 
+import scala.reflect._
 
 /**
  * A fast hash map implementation for primitive, non-null keys. This hash map supports
@@ -26,15 +27,15 @@ package org.apache.spark.util.collection
  * Under the hood, it uses our OpenHashSet implementation.
  */
 private[spark]
-class PrimitiveKeyOpenHashMap[@specialized(Long, Int) K: ClassManifest,
-                              @specialized(Long, Int, Double) V: ClassManifest](
+class PrimitiveKeyOpenHashMap[@specialized(Long, Int) K: ClassTag,
+                              @specialized(Long, Int, Double) V: ClassTag](
     initialCapacity: Int)
   extends Iterable[(K, V)]
   with Serializable {
 
   def this() = this(64)
 
-  require(classManifest[K] == classManifest[Long] || classManifest[K] == classManifest[Int])
+  require(classTag[K] == classTag[Long] || classTag[K] == classTag[Int])
 
   // Init in constructor (instead of in declaration) to work around a Scala compiler specialization
   // bug that would generate two arrays (one for Object and one for specialized T).
diff --git a/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala b/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala
index 20554f0aaba70..b84eb65c62bc7 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/PrimitiveVector.scala
@@ -17,11 +17,13 @@
 
 package org.apache.spark.util.collection
 
+import scala.reflect.ClassTag
+
 /**
  * An append-only, non-threadsafe, array-backed vector that is optimized for primitive types.
  */
 private[spark]
-class PrimitiveVector[@specialized(Long, Int, Double) V: ClassManifest](initialSize: Int = 64) {
+class PrimitiveVector[@specialized(Long, Int, Double) V: ClassTag](initialSize: Int = 64) {
   private var _numElements = 0
   private var _array: Array[V] = _
 
diff --git a/core/src/main/scala/org/apache/spark/util/collection/SizeTrackingAppendOnlyMap.scala b/core/src/main/scala/org/apache/spark/util/collection/SizeTrackingAppendOnlyMap.scala
new file mode 100644
index 0000000000000..204330dad48b9
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/collection/SizeTrackingAppendOnlyMap.scala
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.collection
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.spark.util.SizeEstimator
+import org.apache.spark.util.collection.SizeTrackingAppendOnlyMap.Sample
+
+/**
+ * Append-only map that keeps track of its estimated size in bytes.
+ * We sample with a slow exponential back-off using the SizeEstimator to amortize the time,
+ * as each call to SizeEstimator can take a sizable amount of time (order of a few milliseconds).
+ */
+private[spark] class SizeTrackingAppendOnlyMap[K, V] extends AppendOnlyMap[K, V] {
+
+  /**
+   * Controls the base of the exponential which governs the rate of sampling.
+   * E.g., a value of 2 would mean we sample at 1, 2, 4, 8, ... elements.
+   */
+  private val SAMPLE_GROWTH_RATE = 1.1
+
+  /** All samples taken since last resetSamples(). Only the last two are used for extrapolation. */
+  private val samples = new ArrayBuffer[Sample]()
+
+  /** Total number of insertions and updates into the map since the last resetSamples(). */
+  private var numUpdates: Long = _
+
+  /** The value of 'numUpdates' at which we will take our next sample. */
+  private var nextSampleNum: Long = _
+
+  /** The average number of bytes per update between our last two samples. */
+  private var bytesPerUpdate: Double = _
+
+  resetSamples()
+
+  /** Called after the map grows in size, as this can be a dramatic change for small objects. */
+  def resetSamples() {
+    numUpdates = 1
+    nextSampleNum = 1
+    samples.clear()
+    takeSample()
+  }
+
+  override def update(key: K, value: V): Unit = {
+    super.update(key, value)
+    numUpdates += 1
+    if (nextSampleNum == numUpdates) { takeSample() }
+  }
+
+  override def changeValue(key: K, updateFunc: (Boolean, V) => V): V = {
+    val newValue = super.changeValue(key, updateFunc)
+    numUpdates += 1
+    if (nextSampleNum == numUpdates) { takeSample() }
+    newValue
+  }
+
+  /** Takes a new sample of the current map's size. */
+  def takeSample() {
+    samples += Sample(SizeEstimator.estimate(this), numUpdates)
+    // Only use the last two samples to extrapolate. If fewer than 2 samples, assume no change.
+    bytesPerUpdate = math.max(0, samples.toSeq.reverse match {
+      case latest :: previous :: tail =>
+        (latest.size - previous.size).toDouble / (latest.numUpdates - previous.numUpdates)
+      case _ =>
+        0
+    })
+    nextSampleNum = math.ceil(numUpdates * SAMPLE_GROWTH_RATE).toLong
+  }
+
+  override protected def growTable() {
+    super.growTable()
+    resetSamples()
+  }
+
+  /** Estimates the current size of the map in bytes. O(1) time. */
+  def estimateSize(): Long = {
+    assert(samples.nonEmpty)
+    val extrapolatedDelta = bytesPerUpdate * (numUpdates - samples.last.numUpdates)
+    (samples.last.size + extrapolatedDelta).toLong
+  }
+}
+
+private object SizeTrackingAppendOnlyMap {
+  case class Sample(size: Long, numUpdates: Long)
+}
diff --git a/core/src/test/resources/uncommons-maths-1.2.2.jar b/core/src/test/resources/uncommons-maths-1.2.2.jar
deleted file mode 100644
index e126001c1c270..0000000000000
Binary files a/core/src/test/resources/uncommons-maths-1.2.2.jar and /dev/null differ
diff --git a/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala b/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala
index 4434f3b87c920..c443c5266e42b 100644
--- a/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala
@@ -27,6 +27,21 @@ import org.apache.spark.SparkContext._
 
 class AccumulatorSuite extends FunSuite with ShouldMatchers with LocalSparkContext {
 
+
+  implicit def setAccum[A] = new AccumulableParam[mutable.Set[A], A] {
+    def addInPlace(t1: mutable.Set[A], t2: mutable.Set[A]) : mutable.Set[A] = {
+      t1 ++= t2
+      t1
+    }
+    def addAccumulator(t1: mutable.Set[A], t2: A) : mutable.Set[A] = {
+      t1 += t2
+      t1
+    }
+    def zero(t: mutable.Set[A]) : mutable.Set[A] = {
+      new mutable.HashSet[A]()
+    }
+  }
+
   test ("basic accumulation"){
     sc = new SparkContext("local", "test")
     val acc : Accumulator[Int] = sc.accumulator(0)
@@ -51,7 +66,6 @@ class AccumulatorSuite extends FunSuite with ShouldMatchers with LocalSparkConte
   }
 
   test ("add value to collection accumulators") {
-    import SetAccum._
     val maxI = 1000
     for (nThreads <- List(1, 10)) { //test single & multi-threaded
       sc = new SparkContext("local[" + nThreads + "]", "test")
@@ -68,22 +82,7 @@ class AccumulatorSuite extends FunSuite with ShouldMatchers with LocalSparkConte
     }
   }
 
-  implicit object SetAccum extends AccumulableParam[mutable.Set[Any], Any] {
-    def addInPlace(t1: mutable.Set[Any], t2: mutable.Set[Any]) : mutable.Set[Any] = {
-      t1 ++= t2
-      t1
-    }
-    def addAccumulator(t1: mutable.Set[Any], t2: Any) : mutable.Set[Any] = {
-      t1 += t2
-      t1
-    }
-    def zero(t: mutable.Set[Any]) : mutable.Set[Any] = {
-      new mutable.HashSet[Any]()
-    }
-  }
-
   test ("value not readable in tasks") {
-    import SetAccum._
     val maxI = 1000
     for (nThreads <- List(1, 10)) { //test single & multi-threaded
       sc = new SparkContext("local[" + nThreads + "]", "test")
@@ -125,7 +124,6 @@ class AccumulatorSuite extends FunSuite with ShouldMatchers with LocalSparkConte
   }
 
   test ("localValue readable in tasks") {
-    import SetAccum._
     val maxI = 1000
     for (nThreads <- List(1, 10)) { //test single & multi-threaded
       sc = new SparkContext("local[" + nThreads + "]", "test")
diff --git a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
index d2226aa5a5663..ec13b329b25a8 100644
--- a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
+++ b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark
 
+import scala.reflect.ClassTag
 import org.scalatest.FunSuite
 import java.io.File
 import org.apache.spark.rdd._
@@ -25,8 +26,6 @@ import org.apache.spark.storage.{BlockId, StorageLevel, TestBlockId}
 import org.apache.spark.util.Utils
 
 class CheckpointSuite extends FunSuite with LocalSparkContext with Logging {
-  initLogging()
-
   var checkpointDir: File = _
   val partitioner = new HashPartitioner(2)
 
@@ -56,15 +55,15 @@ class CheckpointSuite extends FunSuite with LocalSparkContext with Logging {
   }
 
   test("RDDs with one-to-one dependencies") {
-    testCheckpointing(_.map(x => x.toString))
-    testCheckpointing(_.flatMap(x => 1 to x))
-    testCheckpointing(_.filter(_ % 2 == 0))
-    testCheckpointing(_.sample(false, 0.5, 0))
-    testCheckpointing(_.glom())
-    testCheckpointing(_.mapPartitions(_.map(_.toString)))
-    testCheckpointing(_.map(x => (x % 2, 1)).reduceByKey(_ + _).mapValues(_.toString))
-    testCheckpointing(_.map(x => (x % 2, 1)).reduceByKey(_ + _).flatMapValues(x => 1 to x))
-    testCheckpointing(_.pipe(Seq("cat")))
+    testRDD(_.map(x => x.toString))
+    testRDD(_.flatMap(x => 1 to x))
+    testRDD(_.filter(_ % 2 == 0))
+    testRDD(_.sample(false, 0.5, 0))
+    testRDD(_.glom())
+    testRDD(_.mapPartitions(_.map(_.toString)))
+    testRDD(_.map(x => (x % 2, 1)).reduceByKey(_ + _).mapValues(_.toString))
+    testRDD(_.map(x => (x % 2, 1)).reduceByKey(_ + _).flatMapValues(x => 1 to x))
+    testRDD(_.pipe(Seq("cat")))
   }
 
   test("ParallelCollection") {
@@ -96,7 +95,7 @@ class CheckpointSuite extends FunSuite with LocalSparkContext with Logging {
   }
 
   test("ShuffledRDD") {
-    testCheckpointing(rdd => {
+    testRDD(rdd => {
       // Creating ShuffledRDD directly as PairRDDFunctions.combineByKey produces a MapPartitionedRDD
       new ShuffledRDD[Int, Int, (Int, Int)](rdd.map(x => (x % 2, 1)), partitioner)
     })
@@ -104,25 +103,17 @@ class CheckpointSuite extends FunSuite with LocalSparkContext with Logging {
 
   test("UnionRDD") {
     def otherRDD = sc.makeRDD(1 to 10, 1)
-
-    // Test whether the size of UnionRDDPartitions reduce in size after parent RDD is checkpointed.
-    // Current implementation of UnionRDD has transient reference to parent RDDs,
-    // so only the partitions will reduce in serialized size, not the RDD.
-    testCheckpointing(_.union(otherRDD), false, true)
-    testParentCheckpointing(_.union(otherRDD), false, true)
+    testRDD(_.union(otherRDD))
+    testRDDPartitions(_.union(otherRDD))
   }
 
   test("CartesianRDD") {
     def otherRDD = sc.makeRDD(1 to 10, 1)
-    testCheckpointing(new CartesianRDD(sc, _, otherRDD))
-
-    // Test whether size of CoalescedRDD reduce in size after parent RDD is checkpointed
-    // Current implementation of CoalescedRDDPartition has transient reference to parent RDD,
-    // so only the RDD will reduce in serialized size, not the partitions.
-    testParentCheckpointing(new CartesianRDD(sc, _, otherRDD), true, false)
+    testRDD(new CartesianRDD(sc, _, otherRDD))
+    testRDDPartitions(new CartesianRDD(sc, _, otherRDD))
 
     // Test that the CartesianRDD updates parent partitions (CartesianRDD.s1/s2) after
-    // the parent RDD has been checkpointed and parent partitions have been changed to HadoopPartitions.
+    // the parent RDD has been checkpointed and parent partitions have been changed.
     // Note that this test is very specific to the current implementation of CartesianRDD.
     val ones = sc.makeRDD(1 to 100, 10).map(x => x)
     ones.checkpoint() // checkpoint that MappedRDD
@@ -133,23 +124,20 @@ class CheckpointSuite extends FunSuite with LocalSparkContext with Logging {
     val splitAfterCheckpoint =
       serializeDeserialize(cartesian.partitions.head.asInstanceOf[CartesianPartition])
     assert(
-      (splitAfterCheckpoint.s1 != splitBeforeCheckpoint.s1) &&
-        (splitAfterCheckpoint.s2 != splitBeforeCheckpoint.s2),
-      "CartesianRDD.parents not updated after parent RDD checkpointed"
+      (splitAfterCheckpoint.s1.getClass != splitBeforeCheckpoint.s1.getClass) &&
+        (splitAfterCheckpoint.s2.getClass != splitBeforeCheckpoint.s2.getClass),
+      "CartesianRDD.s1 and CartesianRDD.s2 not updated after parent RDD is checkpointed"
     )
   }
 
   test("CoalescedRDD") {
-    testCheckpointing(_.coalesce(2))
-
-    // Test whether size of CoalescedRDD reduce in size after parent RDD is checkpointed
-    // Current implementation of CoalescedRDDPartition has transient reference to parent RDD,
-    // so only the RDD will reduce in serialized size, not the partitions.
-    testParentCheckpointing(_.coalesce(2), true, false)
+    testRDD(_.coalesce(2))
+    testRDDPartitions(_.coalesce(2))
 
-    // Test that the CoalescedRDDPartition updates parent partitions (CoalescedRDDPartition.parents) after
-    // the parent RDD has been checkpointed and parent partitions have been changed to HadoopPartitions.
-    // Note that this test is very specific to the current implementation of CoalescedRDDPartitions
+    // Test that the CoalescedRDDPartition updates parent partitions (CoalescedRDDPartition.parents)
+    // after the parent RDD has been checkpointed and parent partitions have been changed.
+    // Note that this test is very specific to the current implementation of
+    // CoalescedRDDPartitions.
     val ones = sc.makeRDD(1 to 100, 10).map(x => x)
     ones.checkpoint() // checkpoint that MappedRDD
     val coalesced = new CoalescedRDD(ones, 2)
@@ -159,33 +147,78 @@ class CheckpointSuite extends FunSuite with LocalSparkContext with Logging {
     val splitAfterCheckpoint =
       serializeDeserialize(coalesced.partitions.head.asInstanceOf[CoalescedRDDPartition])
     assert(
-      splitAfterCheckpoint.parents.head != splitBeforeCheckpoint.parents.head,
-      "CoalescedRDDPartition.parents not updated after parent RDD checkpointed"
+      splitAfterCheckpoint.parents.head.getClass != splitBeforeCheckpoint.parents.head.getClass,
+      "CoalescedRDDPartition.parents not updated after parent RDD is checkpointed"
     )
   }
 
   test("CoGroupedRDD") {
-    val longLineageRDD1 = generateLongLineageRDDForCoGroupedRDD()
-    testCheckpointing(rdd => {
+    val longLineageRDD1 = generateFatPairRDD()
+    testRDD(rdd => {
       CheckpointSuite.cogroup(longLineageRDD1, rdd.map(x => (x % 2, 1)), partitioner)
-    }, false, true)
+    })
 
-    val longLineageRDD2 = generateLongLineageRDDForCoGroupedRDD()
-    testParentCheckpointing(rdd => {
+    val longLineageRDD2 = generateFatPairRDD()
+    testRDDPartitions(rdd => {
       CheckpointSuite.cogroup(
         longLineageRDD2, sc.makeRDD(1 to 2, 2).map(x => (x % 2, 1)), partitioner)
-    }, false, true)
+    })
   }
 
   test("ZippedRDD") {
-    testCheckpointing(
-      rdd => new ZippedRDD(sc, rdd, rdd.map(x => x)), true, false)
-
-    // Test whether size of ZippedRDD reduce in size after parent RDD is checkpointed
-    // Current implementation of ZippedRDDPartitions has transient references to parent RDDs,
-    // so only the RDD will reduce in serialized size, not the partitions.
-    testParentCheckpointing(
-      rdd => new ZippedRDD(sc, rdd, rdd.map(x => x)), true, false)
+    testRDD(rdd => new ZippedRDD(sc, rdd, rdd.map(x => x)))
+    testRDDPartitions(rdd => new ZippedRDD(sc, rdd, rdd.map(x => x)))
+
+    // Test that the ZippedPartition updates parent partitions
+    // after the parent RDD has been checkpointed and parent partitions have been changed.
+    // Note that this test is very specific to the current implementation of ZippedRDD.
+    val rdd = generateFatRDD()
+    val zippedRDD = new ZippedRDD(sc, rdd, rdd.map(x => x))
+    zippedRDD.rdd1.checkpoint()
+    zippedRDD.rdd2.checkpoint()
+    val partitionBeforeCheckpoint =
+      serializeDeserialize(zippedRDD.partitions.head.asInstanceOf[ZippedPartition[_, _]])
+    zippedRDD.count()
+    val partitionAfterCheckpoint =
+      serializeDeserialize(zippedRDD.partitions.head.asInstanceOf[ZippedPartition[_, _]])
+    assert(
+      partitionAfterCheckpoint.partition1.getClass != partitionBeforeCheckpoint.partition1.getClass &&
+        partitionAfterCheckpoint.partition2.getClass != partitionBeforeCheckpoint.partition2.getClass,
+      "ZippedRDD.partition1 and ZippedRDD.partition2 not updated after parent RDD is checkpointed"
+    )
+  }
+
+  test("PartitionerAwareUnionRDD") {
+    testRDD(rdd => {
+      new PartitionerAwareUnionRDD[(Int, Int)](sc, Array(
+        generateFatPairRDD(),
+        rdd.map(x => (x % 2, 1)).reduceByKey(partitioner, _ + _)
+      ))
+    })
+
+    testRDDPartitions(rdd => {
+      new PartitionerAwareUnionRDD[(Int, Int)](sc, Array(
+        generateFatPairRDD(),
+        rdd.map(x => (x % 2, 1)).reduceByKey(partitioner, _ + _)
+      ))
+    })
+
+    // Test that the PartitionerAwareUnionRDD updates parent partitions
+    // (PartitionerAwareUnionRDD.parents) after the parent RDD has been checkpointed and parent
+    // partitions have been changed. Note that this test is very specific to the current
+    // implementation of PartitionerAwareUnionRDD.
+    val pairRDD = generateFatPairRDD()
+    pairRDD.checkpoint()
+    val unionRDD = new PartitionerAwareUnionRDD(sc, Array(pairRDD))
+    val partitionBeforeCheckpoint =  serializeDeserialize(
+      unionRDD.partitions.head.asInstanceOf[PartitionerAwareUnionRDDPartition])
+    pairRDD.count()
+    val partitionAfterCheckpoint =  serializeDeserialize(
+      unionRDD.partitions.head.asInstanceOf[PartitionerAwareUnionRDDPartition])
+    assert(
+      partitionBeforeCheckpoint.parents.head.getClass != partitionAfterCheckpoint.parents.head.getClass,
+      "PartitionerAwareUnionRDDPartition.parents not updated after parent RDD is checkpointed"
+    )
   }
 
   test("CheckpointRDD with zero partitions") {
@@ -199,29 +232,32 @@ class CheckpointSuite extends FunSuite with LocalSparkContext with Logging {
   }
 
   /**
-   * Test checkpointing of the final RDD generated by the given operation. By default,
-   * this method tests whether the size of serialized RDD has reduced after checkpointing or not.
-   * It can also test whether the size of serialized RDD partitions has reduced after checkpointing or
-   * not, but this is not done by default as usually the partitions do not refer to any RDD and
-   * therefore never store the lineage.
+   * Test checkpointing of the RDD generated by the given operation. It tests whether the
+   * serialized size of the RDD is reduce after checkpointing or not. This function should be called
+   * on all RDDs that have a parent RDD (i.e., do not call on ParallelCollection, BlockRDD, etc.).
    */
-  def testCheckpointing[U: ClassManifest](
-      op: (RDD[Int]) => RDD[U],
-      testRDDSize: Boolean = true,
-      testRDDPartitionSize: Boolean = false
-    ) {
+  def testRDD[U: ClassTag](op: (RDD[Int]) => RDD[U]) {
     // Generate the final RDD using given RDD operation
-    val baseRDD = generateLongLineageRDD()
+    val baseRDD = generateFatRDD()
     val operatedRDD = op(baseRDD)
     val parentRDD = operatedRDD.dependencies.headOption.orNull
     val rddType = operatedRDD.getClass.getSimpleName
     val numPartitions = operatedRDD.partitions.length
 
+    // Force initialization of all the data structures in RDDs
+    // Without this, serializing the RDD will give a wrong estimate of the size of the RDD
+    initializeRdd(operatedRDD)
+
+    val partitionsBeforeCheckpoint = operatedRDD.partitions
+
     // Find serialized sizes before and after the checkpoint
-    val (rddSizeBeforeCheckpoint, splitSizeBeforeCheckpoint) = getSerializedSizes(operatedRDD)
+    logInfo("RDD after checkpoint: " + operatedRDD + "\n" + operatedRDD.toDebugString)
+    val (rddSizeBeforeCheckpoint, partitionSizeBeforeCheckpoint) = getSerializedSizes(operatedRDD)
     operatedRDD.checkpoint()
     val result = operatedRDD.collect()
-    val (rddSizeAfterCheckpoint, splitSizeAfterCheckpoint) = getSerializedSizes(operatedRDD)
+    operatedRDD.collect() // force re-initialization of post-checkpoint lazy variables
+    val (rddSizeAfterCheckpoint, partitionSizeAfterCheckpoint) = getSerializedSizes(operatedRDD)
+    logInfo("RDD after checkpoint: " + operatedRDD + "\n" + operatedRDD.toDebugString)
 
     // Test whether the checkpoint file has been created
     assert(sc.checkpointFile[U](operatedRDD.getCheckpointFile.get).collect() === result)
@@ -229,6 +265,9 @@ class CheckpointSuite extends FunSuite with LocalSparkContext with Logging {
     // Test whether dependencies have been changed from its earlier parent RDD
     assert(operatedRDD.dependencies.head.rdd != parentRDD)
 
+    // Test whether the partitions have been changed from its earlier partitions
+    assert(operatedRDD.partitions.toList != partitionsBeforeCheckpoint.toList)
+
     // Test whether the partitions have been changed to the new Hadoop partitions
     assert(operatedRDD.partitions.toList === operatedRDD.checkpointData.get.getPartitions.toList)
 
@@ -238,122 +277,72 @@ class CheckpointSuite extends FunSuite with LocalSparkContext with Logging {
     // Test whether the data in the checkpointed RDD is same as original
     assert(operatedRDD.collect() === result)
 
-    // Test whether serialized size of the RDD has reduced. If the RDD
-    // does not have any dependency to another RDD (e.g., ParallelCollection,
-    // ShuffleRDD with ShuffleDependency), it may not reduce in size after checkpointing.
-    if (testRDDSize) {
-      logInfo("Size of " + rddType +
-        "[" + rddSizeBeforeCheckpoint + " --> " + rddSizeAfterCheckpoint + "]")
-      assert(
-        rddSizeAfterCheckpoint < rddSizeBeforeCheckpoint,
-        "Size of " + rddType + " did not reduce after checkpointing " +
-          "[" + rddSizeBeforeCheckpoint + " --> " + rddSizeAfterCheckpoint + "]"
-      )
-    }
+    // Test whether serialized size of the RDD has reduced.
+    logInfo("Size of " + rddType +
+      " [" + rddSizeBeforeCheckpoint + " --> " + rddSizeAfterCheckpoint + "]")
+    assert(
+      rddSizeAfterCheckpoint < rddSizeBeforeCheckpoint,
+      "Size of " + rddType + " did not reduce after checkpointing " +
+        " [" + rddSizeBeforeCheckpoint + " --> " + rddSizeAfterCheckpoint + "]"
+    )
 
-    // Test whether serialized size of the partitions has reduced. If the partitions
-    // do not have any non-transient reference to another RDD or another RDD's partitions, it
-    // does not refer to a lineage and therefore may not reduce in size after checkpointing.
-    // However, if the original partitions before checkpointing do refer to a parent RDD, the partitions
-    // must be forgotten after checkpointing (to remove all reference to parent RDDs) and
-    // replaced with the HadooPartitions of the checkpointed RDD.
-    if (testRDDPartitionSize) {
-      logInfo("Size of " + rddType + " partitions "
-        + "[" + splitSizeBeforeCheckpoint + " --> " + splitSizeAfterCheckpoint + "]")
-      assert(
-        splitSizeAfterCheckpoint < splitSizeBeforeCheckpoint,
-        "Size of " + rddType + " partitions did not reduce after checkpointing " +
-          "[" + splitSizeBeforeCheckpoint + " --> " + splitSizeAfterCheckpoint + "]"
-      )
-    }
   }
 
   /**
    * Test whether checkpointing of the parent of the generated RDD also
    * truncates the lineage or not. Some RDDs like CoGroupedRDD hold on to its parent
    * RDDs partitions. So even if the parent RDD is checkpointed and its partitions changed,
-   * this RDD will remember the partitions and therefore potentially the whole lineage.
+   * the generated RDD will remember the partitions and therefore potentially the whole lineage.
+   * This function should be called only those RDD whose partitions refer to parent RDD's
+   * partitions (i.e., do not call it on simple RDD like MappedRDD).
+   *
    */
-  def testParentCheckpointing[U: ClassManifest](
-      op: (RDD[Int]) => RDD[U],
-      testRDDSize: Boolean,
-      testRDDPartitionSize: Boolean
-    ) {
+  def testRDDPartitions[U: ClassTag](op: (RDD[Int]) => RDD[U]) {
     // Generate the final RDD using given RDD operation
-    val baseRDD = generateLongLineageRDD()
+    val baseRDD = generateFatRDD()
     val operatedRDD = op(baseRDD)
-    val parentRDD = operatedRDD.dependencies.head.rdd
+    val parentRDDs = operatedRDD.dependencies.map(_.rdd)
     val rddType = operatedRDD.getClass.getSimpleName
-    val parentRDDType = parentRDD.getClass.getSimpleName
 
-    // Get the partitions and dependencies of the parent in case they're lazily computed
-    parentRDD.dependencies
-    parentRDD.partitions
+    // Force initialization of all the data structures in RDDs
+    // Without this, serializing the RDD will give a wrong estimate of the size of the RDD
+    initializeRdd(operatedRDD)
 
     // Find serialized sizes before and after the checkpoint
-    val (rddSizeBeforeCheckpoint, splitSizeBeforeCheckpoint) = getSerializedSizes(operatedRDD)
-    parentRDD.checkpoint()  // checkpoint the parent RDD, not the generated one
-    val result = operatedRDD.collect()
-    val (rddSizeAfterCheckpoint, splitSizeAfterCheckpoint) = getSerializedSizes(operatedRDD)
+    logInfo("RDD after checkpoint: " + operatedRDD + "\n" + operatedRDD.toDebugString)
+    val (rddSizeBeforeCheckpoint, partitionSizeBeforeCheckpoint) = getSerializedSizes(operatedRDD)
+    parentRDDs.foreach(_.checkpoint())  // checkpoint the parent RDD, not the generated one
+    val result = operatedRDD.collect()  // force checkpointing
+    operatedRDD.collect() // force re-initialization of post-checkpoint lazy variables
+    val (rddSizeAfterCheckpoint, partitionSizeAfterCheckpoint) = getSerializedSizes(operatedRDD)
+    logInfo("RDD after checkpoint: " + operatedRDD + "\n" + operatedRDD.toDebugString)
 
     // Test whether the data in the checkpointed RDD is same as original
     assert(operatedRDD.collect() === result)
 
-    // Test whether serialized size of the RDD has reduced because of its parent being
-    // checkpointed. If this RDD or its parent RDD do not have any dependency
-    // to another RDD (e.g., ParallelCollection, ShuffleRDD with ShuffleDependency), it may
-    // not reduce in size after checkpointing.
-    if (testRDDSize) {
-      assert(
-        rddSizeAfterCheckpoint < rddSizeBeforeCheckpoint,
-        "Size of " + rddType + " did not reduce after checkpointing parent " + parentRDDType +
-          "[" + rddSizeBeforeCheckpoint + " --> " + rddSizeAfterCheckpoint + "]"
-      )
-    }
-
-    // Test whether serialized size of the partitions has reduced because of its parent being
-    // checkpointed. If the partitions do not have any non-transient reference to another RDD
-    // or another RDD's partitions, it does not refer to a lineage and therefore may not reduce
-    // in size after checkpointing. However, if the partitions do refer to the *partitions* of a parent
-    // RDD, then these partitions must update reference to the parent RDD partitions as the parent RDD's
-    // partitions must have changed after checkpointing.
-    if (testRDDPartitionSize) {
-      assert(
-        splitSizeAfterCheckpoint < splitSizeBeforeCheckpoint,
-        "Size of " + rddType + " partitions did not reduce after checkpointing parent " + parentRDDType +
-          "[" + splitSizeBeforeCheckpoint + " --> " + splitSizeAfterCheckpoint + "]"
-      )
-    }
-
+    // Test whether serialized size of the partitions has reduced
+    logInfo("Size of partitions of " + rddType +
+      " [" + partitionSizeBeforeCheckpoint + " --> " + partitionSizeAfterCheckpoint + "]")
+    assert(
+      partitionSizeAfterCheckpoint < partitionSizeBeforeCheckpoint,
+      "Size of " + rddType + " partitions did not reduce after checkpointing parent RDDs" +
+        " [" + partitionSizeBeforeCheckpoint + " --> " + partitionSizeAfterCheckpoint + "]"
+    )
   }
 
   /**
-   * Generate an RDD with a long lineage of one-to-one dependencies.
+   * Generate an RDD such that both the RDD and its partitions have large size.
    */
-  def generateLongLineageRDD(): RDD[Int] = {
-    var rdd = sc.makeRDD(1 to 100, 4)
-    for (i <- 1 to 50) {
-      rdd = rdd.map(x => x + 1)
-    }
-    rdd
+  def generateFatRDD(): RDD[Int] = {
+    new FatRDD(sc.makeRDD(1 to 100, 4)).map(x => x)
   }
 
   /**
-   * Generate an RDD with a long lineage specifically for CoGroupedRDD.
-   * A CoGroupedRDD can have a long lineage only one of its parents have a long lineage
-   * and narrow dependency with this RDD. This method generate such an RDD by a sequence
-   * of cogroups and mapValues which creates a long lineage of narrow dependencies.
+   * Generate an pair RDD (with partitioner) such that both the RDD and its partitions
+   * have large size.
    */
-  def generateLongLineageRDDForCoGroupedRDD() = {
-    val add = (x: (Seq[Int], Seq[Int])) => (x._1 ++ x._2).reduce(_ + _)
-
-    def ones: RDD[(Int, Int)] = sc.makeRDD(1 to 2, 2).map(x => (x % 2, 1)).reduceByKey(partitioner, _ + _)
-
-    var cogrouped: RDD[(Int, (Seq[Int], Seq[Int]))] = ones.cogroup(ones)
-    for(i <- 1 to 10) {
-      cogrouped = cogrouped.mapValues(add).cogroup(ones)
-    }
-    cogrouped.mapValues(add)
+  def generateFatPairRDD() = {
+    new FatPairRDD(sc.makeRDD(1 to 100, 4), partitioner).mapValues(x => x)
   }
 
   /**
@@ -361,8 +350,26 @@ class CheckpointSuite extends FunSuite with LocalSparkContext with Logging {
    * upon checkpointing. Ignores the checkpointData field, which may grow when we checkpoint.
    */
   def getSerializedSizes(rdd: RDD[_]): (Int, Int) = {
-    (Utils.serialize(rdd).length - Utils.serialize(rdd.checkpointData).length,
-     Utils.serialize(rdd.partitions).length)
+    val rddSize = Utils.serialize(rdd).size
+    val rddCpDataSize = Utils.serialize(rdd.checkpointData).size
+    val rddPartitionSize = Utils.serialize(rdd.partitions).size
+    val rddDependenciesSize = Utils.serialize(rdd.dependencies).size
+
+    // Print detailed size, helps in debugging
+    logInfo("Serialized sizes of " + rdd +
+      ": RDD = " + rddSize +
+      ", RDD checkpoint data = " + rddCpDataSize +
+      ", RDD partitions = " + rddPartitionSize +
+      ", RDD dependencies = " + rddDependenciesSize
+    )
+    // this makes sure that serializing the RDD's checkpoint data does not
+    // serialize the whole RDD as well
+    assert(
+      rddSize > rddCpDataSize,
+      "RDD's checkpoint data (" + rddCpDataSize  + ") is equal or larger than the " +
+        "whole RDD with checkpoint data (" + rddSize + ")"
+    )
+    (rddSize - rddCpDataSize, rddPartitionSize)
   }
 
   /**
@@ -374,8 +381,49 @@ class CheckpointSuite extends FunSuite with LocalSparkContext with Logging {
     val bytes = Utils.serialize(obj)
     Utils.deserialize[T](bytes)
   }
+
+  /**
+   * Recursively force the initialization of the all members of an RDD and it parents.
+   */
+  def initializeRdd(rdd: RDD[_]) {
+    rdd.partitions // forces the
+    rdd.dependencies.map(_.rdd).foreach(initializeRdd(_))
+  }
 }
 
+/** RDD partition that has large serialized size. */
+class FatPartition(val partition: Partition) extends Partition {
+  val bigData = new Array[Byte](10000)
+  def index: Int = partition.index
+}
+
+/** RDD that has large serialized size. */
+class FatRDD(parent: RDD[Int]) extends RDD[Int](parent) {
+  val bigData = new Array[Byte](100000)
+
+  protected def getPartitions: Array[Partition] = {
+    parent.partitions.map(p => new FatPartition(p))
+  }
+
+  def compute(split: Partition, context: TaskContext): Iterator[Int] = {
+    parent.compute(split.asInstanceOf[FatPartition].partition, context)
+  }
+}
+
+/** Pair RDD that has large serialized size. */
+class FatPairRDD(parent: RDD[Int], _partitioner: Partitioner) extends RDD[(Int, Int)](parent) {
+  val bigData = new Array[Byte](100000)
+
+  protected def getPartitions: Array[Partition] = {
+    parent.partitions.map(p => new FatPartition(p))
+  }
+
+  @transient override val partitioner = Some(_partitioner)
+
+  def compute(split: Partition, context: TaskContext): Iterator[(Int, Int)] = {
+    parent.compute(split.asInstanceOf[FatPartition].partition, context).map(x => (x, x))
+  }
+}
 
 object CheckpointSuite {
   // This is a custom cogroup function that does not use mapValues like
diff --git a/core/src/test/scala/org/apache/spark/DistributedSuite.scala b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
index 480bac84f3649..8de7a328d1cf5 100644
--- a/core/src/test/scala/org/apache/spark/DistributedSuite.scala
+++ b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
@@ -122,7 +122,24 @@ class DistributedSuite extends FunSuite with ShouldMatchers with BeforeAndAfter
       sc.parallelize(1 to 10, 10).foreach(x => println(x / 0))
     }
     assert(thrown.getClass === classOf[SparkException])
-    assert(thrown.getMessage.contains("more than 4 times"))
+    assert(thrown.getMessage.contains("failed 4 times"))
+  }
+
+  test("repeatedly failing task that crashes JVM") {
+    // Ensures that if a task fails in a way that crashes the JVM, the job eventually fails rather
+    // than hanging due to retrying the failed task infinitely many times (eventually the
+    // standalone scheduler will remove the application, causing the job to hang waiting to
+    // reconnect to the master).
+    sc = new SparkContext(clusterUrl, "test")
+    failAfter(Span(100000, Millis)) {
+      val thrown = intercept[SparkException] {
+        // One of the tasks always fails.
+        sc.parallelize(1 to 10, 2).foreach { x => if (x == 1) System.exit(42) }
+      }
+      assert(thrown.getClass === classOf[SparkException])
+      System.out.println(thrown.getMessage)
+      assert(thrown.getMessage.contains("failed 4 times"))
+    }
   }
 
   test("caching") {
@@ -303,12 +320,13 @@ class DistributedSuite extends FunSuite with ShouldMatchers with BeforeAndAfter
           Thread.sleep(200)
         }
       } catch {
-        case _ => { Thread.sleep(10) }
+        case _: Throwable => { Thread.sleep(10) }
           // Do nothing. We might see exceptions because block manager
           // is racing this thread to remove entries from the driver.
       }
     }
   }
+
 }
 
 object DistributedSuite {
diff --git a/core/src/test/scala/org/apache/spark/DriverSuite.scala b/core/src/test/scala/org/apache/spark/DriverSuite.scala
index 01a72d8401636..fb89537258542 100644
--- a/core/src/test/scala/org/apache/spark/DriverSuite.scala
+++ b/core/src/test/scala/org/apache/spark/DriverSuite.scala
@@ -30,13 +30,15 @@ import org.apache.spark.util.Utils
 
 class DriverSuite extends FunSuite with Timeouts {
   test("driver should exit after finishing") {
-    assert(System.getenv("SPARK_HOME") != null)
+    val sparkHome = sys.env.get("SPARK_HOME").orElse(sys.props.get("spark.home")).get
     // Regression test for SPARK-530: "Spark driver process doesn't exit after finishing"
     val masters = Table(("master"), ("local"), ("local-cluster[2,1,512]"))
     forAll(masters) { (master: String) =>
-      failAfter(30 seconds) {
-        Utils.execute(Seq("./spark-class", "org.apache.spark.DriverWithoutCleanup", master),
-          new File(System.getenv("SPARK_HOME")))
+      failAfter(60 seconds) {
+        Utils.executeAndGetOutput(
+          Seq("./bin/spark-class", "org.apache.spark.DriverWithoutCleanup", master),
+          new File(sparkHome), 
+          Map("SPARK_TESTING" -> "1", "SPARK_HOME" -> sparkHome))
       }
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/FailureSuite.scala b/core/src/test/scala/org/apache/spark/FailureSuite.scala
index b7e95b639032c..6465a80e4c8ba 100644
--- a/core/src/test/scala/org/apache/spark/FailureSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FailureSuite.scala
@@ -42,7 +42,7 @@ class FailureSuite extends FunSuite with LocalSparkContext {
   // Run a 3-task map job in which task 1 deterministically fails once, and check
   // whether the job completes successfully and we ran 4 tasks in total.
   test("failure in a single-stage job") {
-    sc = new SparkContext("local[1,1]", "test")
+    sc = new SparkContext("local[1,2]", "test")
     val results = sc.makeRDD(1 to 3, 3).map { x =>
       FailureSuiteState.synchronized {
         FailureSuiteState.tasksRun += 1
@@ -62,7 +62,7 @@ class FailureSuite extends FunSuite with LocalSparkContext {
 
   // Run a map-reduce job in which a reduce task deterministically fails once.
   test("failure in a two-stage job") {
-    sc = new SparkContext("local[1,1]", "test")
+    sc = new SparkContext("local[1,2]", "test")
     val results = sc.makeRDD(1 to 3).map(x => (x, x)).groupByKey(3).map {
       case (k, v) =>
         FailureSuiteState.synchronized {
diff --git a/core/src/test/scala/org/apache/spark/FileServerSuite.scala b/core/src/test/scala/org/apache/spark/FileServerSuite.scala
index c210dd5c3b4e2..a2eb9a4e84696 100644
--- a/core/src/test/scala/org/apache/spark/FileServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FileServerSuite.scala
@@ -17,33 +17,49 @@
 
 package org.apache.spark
 
+import java.io._
+import java.util.jar.{JarEntry, JarOutputStream}
+
+import SparkContext._
 import com.google.common.io.Files
 import org.scalatest.FunSuite
-import java.io.{File, PrintWriter, FileReader, BufferedReader}
-import SparkContext._
 
 class FileServerSuite extends FunSuite with LocalSparkContext {
 
   @transient var tmpFile: File = _
-  @transient var testJarFile: File = _
-
-  override def beforeEach() {
-    super.beforeEach()
-    // Create a sample text file
-    val tmpdir = new File(Files.createTempDir(), "test")
-    tmpdir.mkdir()
-    tmpFile = new File(tmpdir, "FileServerSuite.txt")
-    val pw = new PrintWriter(tmpFile)
+  @transient var tmpJarUrl: String = _
+
+  override def beforeAll() {
+    super.beforeAll()
+    val tmpDir = new File(Files.createTempDir(), "test")
+    tmpDir.mkdir()
+
+    val textFile = new File(tmpDir, "FileServerSuite.txt")
+    val pw = new PrintWriter(textFile)
     pw.println("100")
     pw.close()
-  }
+    
+    val jarFile = new File(tmpDir, "test.jar")
+    val jarStream = new FileOutputStream(jarFile)
+    val jar = new JarOutputStream(jarStream, new java.util.jar.Manifest())
 
-  override def afterEach() {
-    super.afterEach()
-    // Clean up downloaded file
-    if (tmpFile.exists) {
-      tmpFile.delete()
+    val jarEntry = new JarEntry(textFile.getName)
+    jar.putNextEntry(jarEntry)
+    
+    val in = new FileInputStream(textFile)
+    val buffer = new Array[Byte](10240)
+    var nRead = 0
+    while (nRead <= 0) {
+      nRead = in.read(buffer, 0, buffer.length)
+      jar.write(buffer, 0, nRead)
     }
+
+    in.close()
+    jar.close()
+    jarStream.close()
+
+    tmpFile = textFile
+    tmpJarUrl = jarFile.toURI.toURL.toString
   }
 
   test("Distributing files locally") {
@@ -77,18 +93,13 @@ class FileServerSuite extends FunSuite with LocalSparkContext {
 
   test ("Dynamically adding JARS locally") {
     sc = new SparkContext("local[4]", "test")
-    val sampleJarFile = getClass.getClassLoader.getResource("uncommons-maths-1.2.2.jar").getFile()
-    sc.addJar(sampleJarFile)
-    val testData = Array((1,1), (1,1), (2,1), (3,5), (2,3), (3,0))
-    val result = sc.parallelize(testData).reduceByKey { (x,y) =>
-      val fac = Thread.currentThread.getContextClassLoader()
-                                    .loadClass("org.uncommons.maths.Maths")
-                                    .getDeclaredMethod("factorial", classOf[Int])
-      val a = fac.invoke(null, x.asInstanceOf[java.lang.Integer]).asInstanceOf[Long].toInt
-      val b = fac.invoke(null, y.asInstanceOf[java.lang.Integer]).asInstanceOf[Long].toInt
-      a + b
-    }.collect()
-    assert(result.toSet === Set((1,2), (2,7), (3,121)))
+    sc.addJar(tmpJarUrl)
+    val testData = Array((1, 1))
+    sc.parallelize(testData).foreach { x =>
+      if (Thread.currentThread.getContextClassLoader.getResource("FileServerSuite.txt") == null) {
+        throw new SparkException("jar not added")
+      }
+    }
   }
 
   test("Distributing files on a standalone cluster") {
@@ -107,33 +118,24 @@ class FileServerSuite extends FunSuite with LocalSparkContext {
 
   test ("Dynamically adding JARS on a standalone cluster") {
     sc = new SparkContext("local-cluster[1,1,512]", "test")
-    val sampleJarFile = getClass.getClassLoader.getResource("uncommons-maths-1.2.2.jar").getFile()
-    sc.addJar(sampleJarFile)
-    val testData = Array((1,1), (1,1), (2,1), (3,5), (2,3), (3,0))
-    val result = sc.parallelize(testData).reduceByKey { (x,y) =>
-      val fac = Thread.currentThread.getContextClassLoader()
-                                    .loadClass("org.uncommons.maths.Maths")
-                                    .getDeclaredMethod("factorial", classOf[Int])
-      val a = fac.invoke(null, x.asInstanceOf[java.lang.Integer]).asInstanceOf[Long].toInt
-      val b = fac.invoke(null, y.asInstanceOf[java.lang.Integer]).asInstanceOf[Long].toInt
-      a + b
-    }.collect()
-    assert(result.toSet === Set((1,2), (2,7), (3,121)))
+    sc.addJar(tmpJarUrl)
+    val testData = Array((1,1))
+    sc.parallelize(testData).foreach { x =>
+      if (Thread.currentThread.getContextClassLoader.getResource("FileServerSuite.txt") == null) {
+        throw new SparkException("jar not added")
+      }
+    }
   }
 
   test ("Dynamically adding JARS on a standalone cluster using local: URL") {
     sc = new SparkContext("local-cluster[1,1,512]", "test")
-    val sampleJarFile = getClass.getClassLoader.getResource("uncommons-maths-1.2.2.jar").getFile()
-    sc.addJar(sampleJarFile.replace("file", "local"))
-    val testData = Array((1,1), (1,1), (2,1), (3,5), (2,3), (3,0))
-    val result = sc.parallelize(testData).reduceByKey { (x,y) =>
-      val fac = Thread.currentThread.getContextClassLoader()
-                                    .loadClass("org.uncommons.maths.Maths")
-                                    .getDeclaredMethod("factorial", classOf[Int])
-      val a = fac.invoke(null, x.asInstanceOf[java.lang.Integer]).asInstanceOf[Long].toInt
-      val b = fac.invoke(null, y.asInstanceOf[java.lang.Integer]).asInstanceOf[Long].toInt
-      a + b
-    }.collect()
-    assert(result.toSet === Set((1,2), (2,7), (3,121)))
+    sc.addJar(tmpJarUrl.replace("file", "local"))
+    val testData = Array((1,1))
+    sc.parallelize(testData).foreach { x =>
+      if (Thread.currentThread.getContextClassLoader.getResource("FileServerSuite.txt") == null) {
+        throw new SparkException("jar not added")
+      }
+    }
   }
+
 }
diff --git a/core/src/test/scala/org/apache/spark/JavaAPISuite.java b/core/src/test/scala/org/apache/spark/JavaAPISuite.java
index 07f312f0a0f7e..8c573ac0d65e0 100644
--- a/core/src/test/scala/org/apache/spark/JavaAPISuite.java
+++ b/core/src/test/scala/org/apache/spark/JavaAPISuite.java
@@ -364,6 +364,20 @@ public Boolean call(Double x) {
     List<Double> take = rdd.take(5);
   }
 
+  @Test
+  public void javaDoubleRDDHistoGram() {
+   JavaDoubleRDD rdd = sc.parallelizeDoubles(Arrays.asList(1.0, 2.0, 3.0, 4.0));
+   // Test using generated buckets
+   Tuple2<double[], long[]> results = rdd.histogram(2);
+   double[] expected_buckets = {1.0, 2.5, 4.0};
+   long[] expected_counts = {2, 2};
+   Assert.assertArrayEquals(expected_buckets, results._1, 0.1);
+   Assert.assertArrayEquals(expected_counts, results._2);
+   // Test with provided buckets
+   long[] histogram = rdd.histogram(expected_buckets);
+   Assert.assertArrayEquals(expected_counts, histogram);
+  }
+
   @Test
   public void map() {
     JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
@@ -373,18 +387,21 @@ public Double call(Integer x) {
         return 1.0 * x;
       }
     }).cache();
+    doubles.collect();
     JavaPairRDD<Integer, Integer> pairs = rdd.map(new PairFunction<Integer, Integer, Integer>() {
       @Override
       public Tuple2<Integer, Integer> call(Integer x) {
         return new Tuple2<Integer, Integer>(x, x);
       }
     }).cache();
+    pairs.collect();
     JavaRDD<String> strings = rdd.map(new Function<Integer, String>() {
       @Override
       public String call(Integer x) {
         return x.toString();
       }
     }).cache();
+    strings.collect();
   }
 
   @Test
@@ -837,7 +854,7 @@ public String call(Integer t) throws Exception {
   public void checkpointAndComputation() {
     File tempDir = Files.createTempDir();
     JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
-    sc.setCheckpointDir(tempDir.getAbsolutePath(), true);
+    sc.setCheckpointDir(tempDir.getAbsolutePath());
     Assert.assertEquals(false, rdd.isCheckpointed());
     rdd.checkpoint();
     rdd.count(); // Forces the DAG to cause a checkpoint
@@ -849,7 +866,7 @@ public void checkpointAndComputation() {
   public void checkpointAndRestore() {
     File tempDir = Files.createTempDir();
     JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
-    sc.setCheckpointDir(tempDir.getAbsolutePath(), true);
+    sc.setCheckpointDir(tempDir.getAbsolutePath());
     Assert.assertEquals(false, rdd.isCheckpointed());
     rdd.checkpoint();
     rdd.count(); // Forces the DAG to cause a checkpoint
@@ -916,4 +933,50 @@ public Tuple2<Integer, Integer> call(Integer i) throws Exception {
                         parts[1]);
   }
 
+  @Test
+  public void countApproxDistinct() {
+    List<Integer> arrayData = new ArrayList<Integer>();
+    int size = 100;
+    for (int i = 0; i < 100000; i++) {
+      arrayData.add(i % size);
+    }
+    JavaRDD<Integer> simpleRdd = sc.parallelize(arrayData, 10);
+    Assert.assertTrue(Math.abs((simpleRdd.countApproxDistinct(0.2) - size) / (size * 1.0)) < 0.2);
+    Assert.assertTrue(Math.abs((simpleRdd.countApproxDistinct(0.05) - size) / (size * 1.0)) <= 0.05);
+    Assert.assertTrue(Math.abs((simpleRdd.countApproxDistinct(0.01) - size) / (size * 1.0)) <= 0.01);
+  }
+
+  @Test
+  public void countApproxDistinctByKey() {
+    double relativeSD = 0.001;
+
+    List<Tuple2<Integer, Integer>> arrayData = new ArrayList<Tuple2<Integer, Integer>>();
+    for (int i = 10; i < 100; i++)
+      for (int j = 0; j < i; j++)
+        arrayData.add(new Tuple2<Integer, Integer>(i, j));
+
+    JavaPairRDD<Integer, Integer> pairRdd = sc.parallelizePairs(arrayData);
+    List<Tuple2<Integer, Object>> res =  pairRdd.countApproxDistinctByKey(relativeSD).collect();
+    for (Tuple2<Integer, Object> resItem : res) {
+      double count = (double)resItem._1();
+      Long resCount = (Long)resItem._2();
+      Double error = Math.abs((resCount - count) / count);
+      Assert.assertTrue(error < relativeSD);
+    }
+
+  }
+
+  @Test
+  public void collectAsMapWithIntArrayValues() {
+    // Regression test for SPARK-1040
+    JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(new Integer[] { 1 }));
+    JavaPairRDD<Integer, int[]> pairRDD = rdd.map(new PairFunction<Integer, Integer, int[]>() {
+      @Override
+      public Tuple2<Integer, int[]> call(Integer x) throws Exception {
+        return new Tuple2<Integer, int[]>(x, new int[] { x });
+      }
+    });
+    pairRDD.collect();  // Works fine
+    Map<Integer, int[]> map = pairRDD.collectAsMap();  // Used to crash with ClassCastException
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/LocalSparkContext.scala b/core/src/test/scala/org/apache/spark/LocalSparkContext.scala
index 03f7c0b43a71e..3ac706110e287 100644
--- a/core/src/test/scala/org/apache/spark/LocalSparkContext.scala
+++ b/core/src/test/scala/org/apache/spark/LocalSparkContext.scala
@@ -40,20 +40,19 @@ trait LocalSparkContext extends BeforeAndAfterEach with BeforeAndAfterAll { self
   }
 
   def resetSparkContext() = {
-    if (sc != null) {
-      LocalSparkContext.stop(sc)
-      sc = null
-    }
+    LocalSparkContext.stop(sc)
+    sc = null
   }
 
 }
 
 object LocalSparkContext {
   def stop(sc: SparkContext) {
-    sc.stop()
+    if (sc != null) {
+      sc.stop()
+    }
     // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
     System.clearProperty("spark.driver.port")
-    System.clearProperty("spark.hostPort")
   }
 
   /** Runs `f` by passing in `sc` and ensures that `sc` is stopped. */
diff --git a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
index 6013320eaab73..930c2523caf8c 100644
--- a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
@@ -23,9 +23,10 @@ import akka.actor._
 import org.apache.spark.scheduler.MapStatus
 import org.apache.spark.storage.BlockManagerId
 import org.apache.spark.util.AkkaUtils
+import scala.concurrent.Await
 
 class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
-
+  private val conf = new SparkConf
   test("compressSize") {
     assert(MapOutputTracker.compressSize(0L) === 0)
     assert(MapOutputTracker.compressSize(1L) === 1)
@@ -48,15 +49,15 @@ class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
 
   test("master start and stop") {
     val actorSystem = ActorSystem("test")
-    val tracker = new MapOutputTracker()
-    tracker.trackerActor = actorSystem.actorOf(Props(new MapOutputTrackerActor(tracker)))
+    val tracker = new MapOutputTrackerMaster(conf)
+    tracker.trackerActor = actorSystem.actorOf(Props(new MapOutputTrackerMasterActor(tracker)))
     tracker.stop()
   }
 
   test("master register and fetch") {
     val actorSystem = ActorSystem("test")
-    val tracker = new MapOutputTracker()
-    tracker.trackerActor = actorSystem.actorOf(Props(new MapOutputTrackerActor(tracker)))
+    val tracker = new MapOutputTrackerMaster(conf)
+    tracker.trackerActor = actorSystem.actorOf(Props(new MapOutputTrackerMasterActor(tracker)))
     tracker.registerShuffle(10, 2)
     val compressedSize1000 = MapOutputTracker.compressSize(1000L)
     val compressedSize10000 = MapOutputTracker.compressSize(10000L)
@@ -74,19 +75,17 @@ class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
 
   test("master register and unregister and fetch") {
     val actorSystem = ActorSystem("test")
-    val tracker = new MapOutputTracker()
-    tracker.trackerActor = actorSystem.actorOf(Props(new MapOutputTrackerActor(tracker)))
+    val tracker = new MapOutputTrackerMaster(conf)
+    tracker.trackerActor = actorSystem.actorOf(Props(new MapOutputTrackerMasterActor(tracker)))
     tracker.registerShuffle(10, 2)
     val compressedSize1000 = MapOutputTracker.compressSize(1000L)
     val compressedSize10000 = MapOutputTracker.compressSize(10000L)
-    val size1000 = MapOutputTracker.decompressSize(compressedSize1000)
-    val size10000 = MapOutputTracker.decompressSize(compressedSize10000)
     tracker.registerMapOutput(10, 0, new MapStatus(BlockManagerId("a", "hostA", 1000, 0),
         Array(compressedSize1000, compressedSize1000, compressedSize1000)))
     tracker.registerMapOutput(10, 1, new MapStatus(BlockManagerId("b", "hostB", 1000, 0),
         Array(compressedSize10000, compressedSize1000, compressedSize1000)))
 
-    // As if we had two simulatenous fetch failures
+    // As if we had two simultaneous fetch failures
     tracker.unregisterMapOutput(10, 0, BlockManagerId("a", "hostA", 1000, 0))
     tracker.unregisterMapOutput(10, 0, BlockManagerId("a", "hostA", 1000, 0))
 
@@ -98,18 +97,19 @@ class MapOutputTrackerSuite extends FunSuite with LocalSparkContext {
 
   test("remote fetch") {
     val hostname = "localhost"
-    val (actorSystem, boundPort) = AkkaUtils.createActorSystem("spark", hostname, 0)
+    val (actorSystem, boundPort) = AkkaUtils.createActorSystem("spark", hostname, 0, conf = conf)
     System.setProperty("spark.driver.port", boundPort.toString)    // Will be cleared by LocalSparkContext
-    System.setProperty("spark.hostPort", hostname + ":" + boundPort)
 
-    val masterTracker = new MapOutputTracker()
+    val masterTracker = new MapOutputTrackerMaster(conf)
     masterTracker.trackerActor = actorSystem.actorOf(
-        Props(new MapOutputTrackerActor(masterTracker)), "MapOutputTracker")
-
-    val (slaveSystem, _) = AkkaUtils.createActorSystem("spark-slave", hostname, 0)
-    val slaveTracker = new MapOutputTracker()
-    slaveTracker.trackerActor = slaveSystem.actorFor(
-        "akka://spark@localhost:" + boundPort + "/user/MapOutputTracker")
+        Props(new MapOutputTrackerMasterActor(masterTracker)), "MapOutputTracker")
+
+    val (slaveSystem, _) = AkkaUtils.createActorSystem("spark-slave", hostname, 0, conf = conf)
+    val slaveTracker = new MapOutputTracker(conf)
+    val selection = slaveSystem.actorSelection(
+      s"akka.tcp://spark@localhost:$boundPort/user/MapOutputTracker")
+    val timeout = AkkaUtils.lookupTimeout(conf)
+    slaveTracker.trackerActor = Await.result(selection.resolveOne(timeout), timeout)
 
     masterTracker.registerShuffle(10, 1)
     masterTracker.incrementEpoch()
diff --git a/core/src/test/scala/org/apache/spark/SharedSparkContext.scala b/core/src/test/scala/org/apache/spark/SharedSparkContext.scala
index 97cbca09bfa26..c650ef4ed58cb 100644
--- a/core/src/test/scala/org/apache/spark/SharedSparkContext.scala
+++ b/core/src/test/scala/org/apache/spark/SharedSparkContext.scala
@@ -27,16 +27,16 @@ trait SharedSparkContext extends BeforeAndAfterAll { self: Suite =>
 
   def sc: SparkContext = _sc
 
+  var conf = new SparkConf(false)
+
   override def beforeAll() {
-    _sc = new SparkContext("local", "test")
+    _sc = new SparkContext("local", "test", conf)
     super.beforeAll()
   }
 
   override def afterAll() {
-    if (_sc != null) {
-      LocalSparkContext.stop(_sc)
-      _sc = null
-    }
+    LocalSparkContext.stop(_sc)
+    _sc = null
     super.afterAll()
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
new file mode 100644
index 0000000000000..87e9012622456
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import org.scalatest.FunSuite
+
+class SparkConfSuite extends FunSuite with LocalSparkContext {
+  test("loading from system properties") {
+    try {
+      System.setProperty("spark.test.testProperty", "2")
+      val conf = new SparkConf()
+      assert(conf.get("spark.test.testProperty") === "2")
+    } finally {
+      System.clearProperty("spark.test.testProperty")
+    }
+  }
+
+  test("initializing without loading defaults") {
+    try {
+      System.setProperty("spark.test.testProperty", "2")
+      val conf = new SparkConf(false)
+      assert(!conf.contains("spark.test.testProperty"))
+    } finally {
+      System.clearProperty("spark.test.testProperty")
+    }
+  }
+
+  test("named set methods") {
+    val conf = new SparkConf(false)
+
+    conf.setMaster("local[3]")
+    conf.setAppName("My app")
+    conf.setSparkHome("/path")
+    conf.setJars(Seq("a.jar", "b.jar"))
+    conf.setExecutorEnv("VAR1", "value1")
+    conf.setExecutorEnv(Seq(("VAR2", "value2"), ("VAR3", "value3")))
+
+    assert(conf.get("spark.master") === "local[3]")
+    assert(conf.get("spark.app.name") === "My app")
+    assert(conf.get("spark.home") === "/path")
+    assert(conf.get("spark.jars") === "a.jar,b.jar")
+    assert(conf.get("spark.executorEnv.VAR1") === "value1")
+    assert(conf.get("spark.executorEnv.VAR2") === "value2")
+    assert(conf.get("spark.executorEnv.VAR3") === "value3")
+
+    // Test the Java-friendly versions of these too
+    conf.setJars(Array("c.jar", "d.jar"))
+    conf.setExecutorEnv(Array(("VAR4", "value4"), ("VAR5", "value5")))
+    assert(conf.get("spark.jars") === "c.jar,d.jar")
+    assert(conf.get("spark.executorEnv.VAR4") === "value4")
+    assert(conf.get("spark.executorEnv.VAR5") === "value5")
+  }
+
+  test("basic get and set") {
+    val conf = new SparkConf(false)
+    assert(conf.getAll.toSet === Set())
+    conf.set("k1", "v1")
+    conf.setAll(Seq(("k2", "v2"), ("k3", "v3")))
+    assert(conf.getAll.toSet === Set(("k1", "v1"), ("k2", "v2"), ("k3", "v3")))
+    conf.set("k1", "v4")
+    conf.setAll(Seq(("k2", "v5"), ("k3", "v6")))
+    assert(conf.getAll.toSet === Set(("k1", "v4"), ("k2", "v5"), ("k3", "v6")))
+    assert(conf.contains("k1"), "conf did not contain k1")
+    assert(!conf.contains("k4"), "conf contained k4")
+    assert(conf.get("k1") === "v4")
+    intercept[Exception] { conf.get("k4") }
+    assert(conf.get("k4", "not found") === "not found")
+    assert(conf.getOption("k1") === Some("v4"))
+    assert(conf.getOption("k4") === None)
+  }
+
+  test("creating SparkContext without master and app name") {
+    val conf = new SparkConf(false)
+    intercept[SparkException] { sc = new SparkContext(conf) }
+  }
+
+  test("creating SparkContext without master") {
+    val conf = new SparkConf(false).setAppName("My app")
+    intercept[SparkException] { sc = new SparkContext(conf) }
+  }
+
+  test("creating SparkContext without app name") {
+    val conf = new SparkConf(false).setMaster("local")
+    intercept[SparkException] { sc = new SparkContext(conf) }
+  }
+
+  test("creating SparkContext with both master and app name") {
+    val conf = new SparkConf(false).setMaster("local").setAppName("My app")
+    sc = new SparkContext(conf)
+    assert(sc.master === "local")
+    assert(sc.appName === "My app")
+  }
+
+  test("SparkContext property overriding") {
+    val conf = new SparkConf(false).setMaster("local").setAppName("My app")
+    sc = new SparkContext("local[2]", "My other app", conf)
+    assert(sc.master === "local[2]")
+    assert(sc.appName === "My other app")
+  }
+
+  test("nested property names") {
+    // This wasn't supported by some external conf parsing libraries
+    try {
+      System.setProperty("spark.test.a", "a")
+      System.setProperty("spark.test.a.b", "a.b")
+      System.setProperty("spark.test.a.b.c", "a.b.c")
+      val conf = new SparkConf()
+      assert(conf.get("spark.test.a") === "a")
+      assert(conf.get("spark.test.a.b") === "a.b")
+      assert(conf.get("spark.test.a.b.c") === "a.b.c")
+      conf.set("spark.test.a.b", "A.B")
+      assert(conf.get("spark.test.a") === "a")
+      assert(conf.get("spark.test.a.b") === "A.B")
+      assert(conf.get("spark.test.a.b.c") === "a.b.c")
+    } finally {
+      System.clearProperty("spark.test.a")
+      System.clearProperty("spark.test.a.b")
+      System.clearProperty("spark.test.a.b.c")
+    }
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala
new file mode 100644
index 0000000000000..f28d5c7b133b3
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import org.scalatest.{FunSuite, PrivateMethodTester}
+
+import org.apache.spark.scheduler.{TaskSchedulerImpl, TaskScheduler}
+import org.apache.spark.scheduler.cluster.{SimrSchedulerBackend, SparkDeploySchedulerBackend}
+import org.apache.spark.scheduler.cluster.mesos.{CoarseMesosSchedulerBackend, MesosSchedulerBackend}
+import org.apache.spark.scheduler.local.LocalBackend
+
+class SparkContextSchedulerCreationSuite
+  extends FunSuite with PrivateMethodTester with LocalSparkContext with Logging {
+
+  def createTaskScheduler(master: String): TaskSchedulerImpl = {
+    // Create local SparkContext to setup a SparkEnv. We don't actually want to start() the
+    // real schedulers, so we don't want to create a full SparkContext with the desired scheduler.
+    sc = new SparkContext("local", "test")
+    val createTaskSchedulerMethod = PrivateMethod[TaskScheduler]('createTaskScheduler)
+    val sched = SparkContext invokePrivate createTaskSchedulerMethod(sc, master, "test")
+    sched.asInstanceOf[TaskSchedulerImpl]
+  }
+
+  test("bad-master") {
+    val e = intercept[SparkException] {
+      createTaskScheduler("localhost:1234")
+    }
+    assert(e.getMessage.contains("Could not parse Master URL"))
+  }
+
+  test("local") {
+    val sched = createTaskScheduler("local")
+    sched.backend match {
+      case s: LocalBackend => assert(s.totalCores === 1)
+      case _ => fail()
+    }
+  }
+
+  test("local-n") {
+    val sched = createTaskScheduler("local[5]")
+    assert(sched.maxTaskFailures === 1)
+    sched.backend match {
+      case s: LocalBackend => assert(s.totalCores === 5)
+      case _ => fail()
+    }
+  }
+
+  test("local-n-failures") {
+    val sched = createTaskScheduler("local[4, 2]")
+    assert(sched.maxTaskFailures === 2)
+    sched.backend match {
+      case s: LocalBackend => assert(s.totalCores === 4)
+      case _ => fail()
+    }
+  }
+
+  test("simr") {
+    createTaskScheduler("simr://uri").backend match {
+      case s: SimrSchedulerBackend => // OK
+      case _ => fail()
+    }
+  }
+
+  test("local-cluster") {
+    createTaskScheduler("local-cluster[3, 14, 512]").backend match {
+      case s: SparkDeploySchedulerBackend => // OK
+      case _ => fail()
+    }
+  }
+
+  def testYarn(master: String, expectedClassName: String) {
+    try {
+      val sched = createTaskScheduler(master)
+      assert(sched.getClass === Class.forName(expectedClassName))
+    } catch {
+      case e: SparkException =>
+        assert(e.getMessage.contains("YARN mode not available"))
+        logWarning("YARN not available, could not test actual YARN scheduler creation")
+      case e: Throwable => fail(e)
+    }
+  }
+
+  test("yarn-standalone") {
+    testYarn("yarn-standalone", "org.apache.spark.scheduler.cluster.YarnClusterScheduler")
+  }
+
+  test("yarn-client") {
+    testYarn("yarn-client", "org.apache.spark.scheduler.cluster.YarnClientClusterScheduler")
+  }
+
+  def testMesos(master: String, expectedClass: Class[_]) {
+    try {
+      val sched = createTaskScheduler(master)
+      assert(sched.backend.getClass === expectedClass)
+    } catch {
+      case e: UnsatisfiedLinkError =>
+        assert(e.getMessage.contains("no mesos in"))
+        logWarning("Mesos not available, could not test actual Mesos scheduler creation")
+      case e: Throwable => fail(e)
+    }
+  }
+
+  test("mesos fine-grained") {
+    System.setProperty("spark.mesos.coarse", "false")
+    testMesos("mesos://localhost:1234", classOf[MesosSchedulerBackend])
+  }
+
+  test("mesos coarse-grained") {
+    System.setProperty("spark.mesos.coarse", "true")
+    testMesos("mesos://localhost:1234", classOf[CoarseMesosSchedulerBackend])
+  }
+
+  test("mesos with zookeeper") {
+    System.setProperty("spark.mesos.coarse", "false")
+    testMesos("zk://localhost:1234,localhost:2345", classOf[MesosSchedulerBackend])
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/UnpersistSuite.scala b/core/src/test/scala/org/apache/spark/UnpersistSuite.scala
index 46a2da172407f..768ca3850e7e7 100644
--- a/core/src/test/scala/org/apache/spark/UnpersistSuite.scala
+++ b/core/src/test/scala/org/apache/spark/UnpersistSuite.scala
@@ -37,7 +37,7 @@ class UnpersistSuite extends FunSuite with LocalSparkContext {
           Thread.sleep(200)
         }
       } catch {
-        case _ => { Thread.sleep(10) }
+        case _: Throwable => { Thread.sleep(10) }
           // Do nothing. We might see exceptions because block manager
           // is racing this thread to remove entries from the driver.
       }
diff --git a/core/src/test/scala/org/apache/spark/api/python/PythonRDDSuite.scala b/core/src/test/scala/org/apache/spark/api/python/PythonRDDSuite.scala
new file mode 100644
index 0000000000000..1bebfe5ec84ec
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/api/python/PythonRDDSuite.scala
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.api.python
+
+import org.scalatest.FunSuite
+import org.scalatest.matchers.ShouldMatchers
+import org.apache.spark.api.python.PythonRDD
+
+import java.io.{ByteArrayOutputStream, DataOutputStream}
+
+class PythonRDDSuite extends FunSuite {
+
+    test("Writing large strings to the worker") {
+        val input: List[String] = List("a"*100000)
+        val buffer = new DataOutputStream(new ByteArrayOutputStream)
+        PythonRDD.writeIteratorToStream(input.iterator, buffer)
+    }
+
+}
+
diff --git a/core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala b/core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala
new file mode 100644
index 0000000000000..d6b93f5fedd3b
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy
+
+import org.scalatest.FunSuite
+import org.scalatest.matchers.ShouldMatchers
+
+class ClientSuite extends FunSuite with ShouldMatchers {
+  test("correctly validates driver jar URL's") {
+    ClientArguments.isValidJarUrl("http://someHost:8080/foo.jar") should be (true)
+    ClientArguments.isValidJarUrl("file://some/path/to/a/jarFile.jar") should be (true)
+    ClientArguments.isValidJarUrl("hdfs://someHost:1234/foo.jar") should be (true)
+
+    ClientArguments.isValidJarUrl("hdfs://someHost:1234/foo") should be (false)
+    ClientArguments.isValidJarUrl("/missing/a/protocol/jarfile.jar") should be (false)
+    ClientArguments.isValidJarUrl("not-even-a-path.jar") should be (false)
+  }
+
+}
diff --git a/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala b/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala
index 0b38e239f9b02..d05bbd6ff7e6f 100644
--- a/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala
@@ -25,8 +25,8 @@ import net.liftweb.json.JsonAST.JValue
 import org.scalatest.FunSuite
 
 import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, WorkerStateResponse}
-import org.apache.spark.deploy.master.{ApplicationInfo, RecoveryState, WorkerInfo}
-import org.apache.spark.deploy.worker.ExecutorRunner
+import org.apache.spark.deploy.master.{ApplicationInfo, DriverInfo, RecoveryState, WorkerInfo}
+import org.apache.spark.deploy.worker.{ExecutorRunner, DriverRunner}
 
 class JsonProtocolSuite extends FunSuite {
   test("writeApplicationInfo") {
@@ -50,11 +50,13 @@ class JsonProtocolSuite extends FunSuite {
   }
 
   test("writeMasterState") {
-    val workers = Array[WorkerInfo](createWorkerInfo(), createWorkerInfo())
-    val activeApps = Array[ApplicationInfo](createAppInfo())
+    val workers = Array(createWorkerInfo(), createWorkerInfo())
+    val activeApps = Array(createAppInfo())
     val completedApps = Array[ApplicationInfo]()
+    val activeDrivers = Array(createDriverInfo())
+    val completedDrivers = Array(createDriverInfo())
     val stateResponse = new MasterStateResponse("host", 8080, workers, activeApps, completedApps,
-      RecoveryState.ALIVE)
+      activeDrivers, completedDrivers, RecoveryState.ALIVE)
     val output = JsonProtocol.writeMasterState(stateResponse)
     assertValidJson(output)
   }
@@ -62,25 +64,44 @@ class JsonProtocolSuite extends FunSuite {
   test("writeWorkerState") {
     val executors = List[ExecutorRunner]()
     val finishedExecutors = List[ExecutorRunner](createExecutorRunner(), createExecutorRunner())
+    val drivers = List(createDriverRunner())
+    val finishedDrivers = List(createDriverRunner(), createDriverRunner())
     val stateResponse = new WorkerStateResponse("host", 8080, "workerId", executors,
-      finishedExecutors, "masterUrl", 4, 1234, 4, 1234, "masterWebUiUrl")
+      finishedExecutors, drivers, finishedDrivers, "masterUrl", 4, 1234, 4, 1234, "masterWebUiUrl")
     val output = JsonProtocol.writeWorkerState(stateResponse)
     assertValidJson(output)
   }
 
-  def createAppDesc() : ApplicationDescription = {
+  def createAppDesc(): ApplicationDescription = {
     val cmd = new Command("mainClass", List("arg1", "arg2"), Map())
-    new ApplicationDescription("name", 4, 1234, cmd, "sparkHome", "appUiUrl")
+    new ApplicationDescription("name", Some(4), 1234, cmd, "sparkHome", "appUiUrl")
   }
+
   def createAppInfo() : ApplicationInfo = {
-    new ApplicationInfo(3, "id", createAppDesc(), new Date(123456789), null, "appUriStr")
+    new ApplicationInfo(
+      3, "id", createAppDesc(), new Date(123456789), null, "appUriStr", Int.MaxValue)
   }
-  def createWorkerInfo() : WorkerInfo = {
+
+  def createDriverCommand() = new Command(
+    "org.apache.spark.FakeClass", Seq("some arg --and-some options -g foo"),
+    Map(("K1", "V1"), ("K2", "V2"))
+  )
+
+  def createDriverDesc() = new DriverDescription("hdfs://some-dir/some.jar", 100, 3,
+    false, createDriverCommand())
+
+  def createDriverInfo(): DriverInfo = new DriverInfo(3, "driver-3", createDriverDesc(), new Date())
+
+  def createWorkerInfo(): WorkerInfo = {
     new WorkerInfo("id", "host", 8080, 4, 1234, null, 80, "publicAddress")
   }
-  def createExecutorRunner() : ExecutorRunner = {
+  def createExecutorRunner(): ExecutorRunner = {
     new ExecutorRunner("appId", 123, createAppDesc(), 4, 1234, null, "workerId", "host",
-      new File("sparkHome"), new File("workDir"), ExecutorState.RUNNING)
+      new File("sparkHome"), new File("workDir"), "akka://worker", ExecutorState.RUNNING)
+  }
+  def createDriverRunner(): DriverRunner = {
+    new DriverRunner("driverId", new File("workDir"), new File("sparkHome"), createDriverDesc(),
+      null, "akka://worker")
   }
 
   def assertValidJson(json: JValue) {
diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/DriverRunnerTest.scala b/core/src/test/scala/org/apache/spark/deploy/worker/DriverRunnerTest.scala
new file mode 100644
index 0000000000000..0c502612647a2
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/deploy/worker/DriverRunnerTest.scala
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.worker
+
+import java.io.File
+
+import scala.collection.JavaConversions._
+
+import org.mockito.Mockito._
+import org.mockito.Matchers._
+import org.scalatest.FunSuite
+
+import org.apache.spark.deploy.{Command, DriverDescription}
+import org.mockito.stubbing.Answer
+import org.mockito.invocation.InvocationOnMock
+
+class DriverRunnerTest extends FunSuite {
+  private def createDriverRunner() = {
+    val command = new Command("mainClass", Seq(), Map())
+    val driverDescription = new DriverDescription("jarUrl", 512, 1, true, command)
+    new DriverRunner("driverId", new File("workDir"), new File("sparkHome"), driverDescription,
+      null, "akka://1.2.3.4/worker/")
+  }
+
+  private def createProcessBuilderAndProcess(): (ProcessBuilderLike, Process) = {
+    val processBuilder = mock(classOf[ProcessBuilderLike])
+    when(processBuilder.command).thenReturn(Seq("mocked", "command"))
+    val process = mock(classOf[Process])
+    when(processBuilder.start()).thenReturn(process)
+    (processBuilder, process)
+  }
+
+  test("Process succeeds instantly") {
+    val runner = createDriverRunner()
+
+    val sleeper = mock(classOf[Sleeper])
+    runner.setSleeper(sleeper)
+
+    val (processBuilder, process) = createProcessBuilderAndProcess()
+    // One failure then a successful run
+    when(process.waitFor()).thenReturn(0)
+    runner.runCommandWithRetry(processBuilder, p => (), supervise = true)
+
+    verify(process, times(1)).waitFor()
+    verify(sleeper, times(0)).sleep(anyInt())
+  }
+
+  test("Process failing several times and then succeeding") {
+    val runner = createDriverRunner()
+
+    val sleeper = mock(classOf[Sleeper])
+    runner.setSleeper(sleeper)
+
+    val (processBuilder, process) = createProcessBuilderAndProcess()
+    // fail, fail, fail, success
+    when(process.waitFor()).thenReturn(-1).thenReturn(-1).thenReturn(-1).thenReturn(0)
+    runner.runCommandWithRetry(processBuilder, p => (), supervise = true)
+
+    verify(process, times(4)).waitFor()
+    verify(sleeper, times(3)).sleep(anyInt())
+    verify(sleeper, times(1)).sleep(1)
+    verify(sleeper, times(1)).sleep(2)
+    verify(sleeper, times(1)).sleep(4)
+  }
+
+  test("Process doesn't restart if not supervised") {
+    val runner = createDriverRunner()
+
+    val sleeper = mock(classOf[Sleeper])
+    runner.setSleeper(sleeper)
+
+    val (processBuilder, process) = createProcessBuilderAndProcess()
+    when(process.waitFor()).thenReturn(-1)
+
+    runner.runCommandWithRetry(processBuilder, p => (), supervise = false)
+
+    verify(process, times(1)).waitFor()
+    verify(sleeper, times(0)).sleep(anyInt())
+  }
+
+  test("Process doesn't restart if killed") {
+    val runner = createDriverRunner()
+
+    val sleeper = mock(classOf[Sleeper])
+    runner.setSleeper(sleeper)
+
+    val (processBuilder, process) = createProcessBuilderAndProcess()
+    when(process.waitFor()).thenAnswer(new Answer[Int] {
+      def answer(invocation: InvocationOnMock): Int = {
+        runner.kill()
+        -1
+      }
+    })
+
+    runner.runCommandWithRetry(processBuilder, p => (), supervise = true)
+
+    verify(process, times(1)).waitFor()
+    verify(sleeper, times(0)).sleep(anyInt())
+  }
+
+  test("Reset of backoff counter") {
+    val runner = createDriverRunner()
+
+    val sleeper = mock(classOf[Sleeper])
+    runner.setSleeper(sleeper)
+
+    val clock = mock(classOf[Clock])
+    runner.setClock(clock)
+
+    val (processBuilder, process) = createProcessBuilderAndProcess()
+
+    when(process.waitFor())
+      .thenReturn(-1) // fail 1
+      .thenReturn(-1) // fail 2
+      .thenReturn(-1) // fail 3
+      .thenReturn(-1) // fail 4
+      .thenReturn(0) // success
+    when(clock.currentTimeMillis())
+      .thenReturn(0).thenReturn(1000) // fail 1 (short)
+      .thenReturn(1000).thenReturn(2000) // fail 2 (short)
+      .thenReturn(2000).thenReturn(10000) // fail 3 (long)
+      .thenReturn(10000).thenReturn(11000) // fail 4 (short)
+      .thenReturn(11000).thenReturn(21000) // success (long)
+
+    runner.runCommandWithRetry(processBuilder, p => (), supervise = true)
+
+    verify(sleeper, times(4)).sleep(anyInt())
+    // Expected sequence of sleeps is 1,2,1,2
+    verify(sleeper, times(2)).sleep(1)
+    verify(sleeper, times(2)).sleep(2)
+  }
+
+}
diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala b/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala
index 4cb4ddc9cd66a..a79ee690d39ff 100644
--- a/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala
@@ -18,19 +18,21 @@
 package org.apache.spark.deploy.worker
 
 import java.io.File
+
 import org.scalatest.FunSuite
+
 import org.apache.spark.deploy.{ExecutorState, Command, ApplicationDescription}
 
 class ExecutorRunnerTest extends FunSuite {
   test("command includes appId") {
     def f(s:String) = new File(s)
-    val sparkHome = sys.env("SPARK_HOME")
-    val appDesc = new ApplicationDescription("app name", 8, 500, Command("foo", Seq(),Map()),
+    val sparkHome = sys.env.get("SPARK_HOME").orElse(sys.props.get("spark.home")).get
+    val appDesc = new ApplicationDescription("app name", Some(8), 500, Command("foo", Seq(),Map()),
       sparkHome, "appUiUrl")
     val appId = "12345-worker321-9876"
     val er = new ExecutorRunner(appId, 1, appDesc, 8, 500, null, "blah", "worker321", f(sparkHome),
-      f("ooga"), ExecutorState.RUNNING)
+      f("ooga"), "blah", ExecutorState.RUNNING)
 
-    assert(er.buildCommandSeq().last === appId)
+    assert(er.getCommandSeq.last === appId)
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerWatcherSuite.scala b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerWatcherSuite.scala
new file mode 100644
index 0000000000000..1f1d8d138005b
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerWatcherSuite.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.worker
+
+
+import akka.testkit.TestActorRef
+import org.scalatest.FunSuite
+import akka.remote.DisassociatedEvent
+import akka.actor.{ActorSystem, AddressFromURIString, Props}
+
+class WorkerWatcherSuite extends FunSuite {
+  test("WorkerWatcher shuts down on valid disassociation") {
+    val actorSystem = ActorSystem("test")
+    val targetWorkerUrl = "akka://1.2.3.4/user/Worker"
+    val targetWorkerAddress = AddressFromURIString(targetWorkerUrl)
+    val actorRef = TestActorRef[WorkerWatcher](Props(classOf[WorkerWatcher], targetWorkerUrl))(actorSystem)
+    val workerWatcher = actorRef.underlyingActor
+    workerWatcher.setTesting(testing = true)
+    actorRef.underlyingActor.receive(new DisassociatedEvent(null, targetWorkerAddress, false))
+    assert(actorRef.underlyingActor.isShutDown)
+  }
+
+  test("WorkerWatcher stays alive on invalid disassociation") {
+    val actorSystem = ActorSystem("test")
+    val targetWorkerUrl = "akka://1.2.3.4/user/Worker"
+    val otherAkkaURL = "akka://4.3.2.1/user/OtherActor"
+    val otherAkkaAddress = AddressFromURIString(otherAkkaURL)
+    val actorRef = TestActorRef[WorkerWatcher](Props(classOf[WorkerWatcher], targetWorkerUrl))(actorSystem)
+    val workerWatcher = actorRef.underlyingActor
+    workerWatcher.setTesting(testing = true)
+    actorRef.underlyingActor.receive(new DisassociatedEvent(null, otherAkkaAddress, false))
+    assert(!actorRef.underlyingActor.isShutDown)
+  }
+}
\ No newline at end of file
diff --git a/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala b/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala
index ab81bfbe55947..8d7546085f489 100644
--- a/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala
+++ b/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala
@@ -20,9 +20,11 @@ package org.apache.spark.io
 import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
 
 import org.scalatest.FunSuite
+import org.apache.spark.SparkConf
 
 
 class CompressionCodecSuite extends FunSuite {
+  val conf = new SparkConf(false)
 
   def testCodec(codec: CompressionCodec) {
     // Write 1000 integers to the output stream, compressed.
@@ -43,19 +45,19 @@ class CompressionCodecSuite extends FunSuite {
   }
 
   test("default compression codec") {
-    val codec = CompressionCodec.createCodec()
+    val codec = CompressionCodec.createCodec(conf)
     assert(codec.getClass === classOf[LZFCompressionCodec])
     testCodec(codec)
   }
 
   test("lzf compression codec") {
-    val codec = CompressionCodec.createCodec(classOf[LZFCompressionCodec].getName)
+    val codec = CompressionCodec.createCodec(conf, classOf[LZFCompressionCodec].getName)
     assert(codec.getClass === classOf[LZFCompressionCodec])
     testCodec(codec)
   }
 
   test("snappy compression codec") {
-    val codec = CompressionCodec.createCodec(classOf[SnappyCompressionCodec].getName)
+    val codec = CompressionCodec.createCodec(conf, classOf[SnappyCompressionCodec].getName)
     assert(codec.getClass === classOf[SnappyCompressionCodec])
     testCodec(codec)
   }
diff --git a/core/src/test/scala/org/apache/spark/metrics/MetricsSystemSuite.scala b/core/src/test/scala/org/apache/spark/metrics/MetricsSystemSuite.scala
index 7181333adfe68..71a2c6c498eef 100644
--- a/core/src/test/scala/org/apache/spark/metrics/MetricsSystemSuite.scala
+++ b/core/src/test/scala/org/apache/spark/metrics/MetricsSystemSuite.scala
@@ -19,17 +19,19 @@ package org.apache.spark.metrics
 
 import org.scalatest.{BeforeAndAfter, FunSuite}
 import org.apache.spark.deploy.master.MasterSource
+import org.apache.spark.SparkConf
 
 class MetricsSystemSuite extends FunSuite with BeforeAndAfter {
   var filePath: String = _
+  var conf: SparkConf = null
 
   before {
     filePath = getClass.getClassLoader.getResource("test_metrics_system.properties").getFile()
-    System.setProperty("spark.metrics.conf", filePath)
+    conf = new SparkConf(false).set("spark.metrics.conf", filePath)
   }
 
   test("MetricsSystem with default config") {
-    val metricsSystem = MetricsSystem.createMetricsSystem("default")
+    val metricsSystem = MetricsSystem.createMetricsSystem("default", conf)
     val sources = metricsSystem.sources
     val sinks = metricsSystem.sinks
 
@@ -39,7 +41,7 @@ class MetricsSystemSuite extends FunSuite with BeforeAndAfter {
   }
 
   test("MetricsSystem with sources add") {
-    val metricsSystem = MetricsSystem.createMetricsSystem("test")
+    val metricsSystem = MetricsSystem.createMetricsSystem("test", conf)
     val sources = metricsSystem.sources
     val sinks = metricsSystem.sinks
 
diff --git a/core/src/test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala
new file mode 100644
index 0000000000000..7f50a5a47c2ff
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala
@@ -0,0 +1,271 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.rdd
+
+import scala.math.abs
+import scala.collection.mutable.ArrayBuffer
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.SparkContext._
+import org.apache.spark.rdd._
+import org.apache.spark._
+
+class DoubleRDDSuite extends FunSuite with SharedSparkContext {
+  // Verify tests on the histogram functionality. We test with both evenly
+  // and non-evenly spaced buckets as the bucket lookup function changes.
+  test("WorksOnEmpty") {
+    // Make sure that it works on an empty input
+    val rdd: RDD[Double] = sc.parallelize(Seq())
+    val buckets = Array(0.0, 10.0)
+    val histogramResults = rdd.histogram(buckets)
+    val histogramResults2 = rdd.histogram(buckets, true)
+    val expectedHistogramResults = Array(0)
+    assert(histogramResults === expectedHistogramResults)
+    assert(histogramResults2 === expectedHistogramResults)
+  }
+
+  test("WorksWithOutOfRangeWithOneBucket") {
+    // Verify that if all of the elements are out of range the counts are zero
+    val rdd = sc.parallelize(Seq(10.01, -0.01))
+    val buckets = Array(0.0, 10.0)
+    val histogramResults = rdd.histogram(buckets)
+    val histogramResults2 = rdd.histogram(buckets, true)
+    val expectedHistogramResults = Array(0)
+    assert(histogramResults === expectedHistogramResults)
+    assert(histogramResults2 === expectedHistogramResults)
+  }
+
+  test("WorksInRangeWithOneBucket") {
+    // Verify the basic case of one bucket and all elements in that bucket works
+    val rdd = sc.parallelize(Seq(1, 2, 3, 4))
+    val buckets = Array(0.0, 10.0)
+    val histogramResults = rdd.histogram(buckets)
+    val histogramResults2 = rdd.histogram(buckets, true)
+    val expectedHistogramResults = Array(4)
+    assert(histogramResults === expectedHistogramResults)
+    assert(histogramResults2 === expectedHistogramResults)
+  }
+
+  test("WorksInRangeWithOneBucketExactMatch") {
+    // Verify the basic case of one bucket and all elements in that bucket works
+    val rdd = sc.parallelize(Seq(1, 2, 3, 4))
+    val buckets = Array(1.0, 4.0)
+    val histogramResults = rdd.histogram(buckets)
+    val histogramResults2 = rdd.histogram(buckets, true)
+    val expectedHistogramResults = Array(4)
+    assert(histogramResults === expectedHistogramResults)
+    assert(histogramResults2 === expectedHistogramResults)
+  }
+
+  test("WorksWithOutOfRangeWithTwoBuckets") {
+    // Verify that out of range works with two buckets
+    val rdd = sc.parallelize(Seq(10.01, -0.01))
+    val buckets = Array(0.0, 5.0, 10.0)
+    val histogramResults = rdd.histogram(buckets)
+    val histogramResults2 = rdd.histogram(buckets, true)
+    val expectedHistogramResults = Array(0, 0)
+    assert(histogramResults === expectedHistogramResults)
+    assert(histogramResults2 === expectedHistogramResults)
+  }
+
+  test("WorksWithOutOfRangeWithTwoUnEvenBuckets") {
+    // Verify that out of range works with two un even buckets
+    val rdd = sc.parallelize(Seq(10.01, -0.01))
+    val buckets = Array(0.0, 4.0, 10.0)
+    val histogramResults = rdd.histogram(buckets)
+    val expectedHistogramResults = Array(0, 0)
+    assert(histogramResults === expectedHistogramResults)
+  }
+
+  test("WorksInRangeWithTwoBuckets") {
+    // Make sure that it works with two equally spaced buckets and elements in each
+    val rdd = sc.parallelize(Seq(1, 2, 3, 5, 6))
+    val buckets = Array(0.0, 5.0, 10.0)
+    val histogramResults = rdd.histogram(buckets)
+    val histogramResults2 = rdd.histogram(buckets, true)
+    val expectedHistogramResults = Array(3, 2)
+    assert(histogramResults === expectedHistogramResults)
+    assert(histogramResults2 === expectedHistogramResults)
+  }
+
+  test("WorksInRangeWithTwoBucketsAndNaN") {
+    // Make sure that it works with two equally spaced buckets and elements in each
+    val rdd = sc.parallelize(Seq(1, 2, 3, 5, 6, Double.NaN))
+    val buckets = Array(0.0, 5.0, 10.0)
+    val histogramResults = rdd.histogram(buckets)
+    val histogramResults2 = rdd.histogram(buckets, true)
+    val expectedHistogramResults = Array(3, 2)
+    assert(histogramResults === expectedHistogramResults)
+    assert(histogramResults2 === expectedHistogramResults)
+  }
+
+  test("WorksInRangeWithTwoUnevenBuckets") {
+    // Make sure that it works with two unequally spaced buckets and elements in each
+    val rdd = sc.parallelize(Seq(1, 2, 3, 5, 6))
+    val buckets = Array(0.0, 5.0, 11.0)
+    val histogramResults = rdd.histogram(buckets)
+    val expectedHistogramResults = Array(3, 2)
+    assert(histogramResults === expectedHistogramResults)
+  }
+
+  test("WorksMixedRangeWithTwoUnevenBuckets") {
+    // Make sure that it works with two unequally spaced buckets and elements in each
+    val rdd = sc.parallelize(Seq(-0.01, 0.0, 1, 2, 3, 5, 6, 11.0, 11.01))
+    val buckets = Array(0.0, 5.0, 11.0)
+    val histogramResults = rdd.histogram(buckets)
+    val expectedHistogramResults = Array(4, 3)
+    assert(histogramResults === expectedHistogramResults)
+  }
+
+  test("WorksMixedRangeWithFourUnevenBuckets") {
+    // Make sure that it works with two unequally spaced buckets and elements in each
+    val rdd = sc.parallelize(Seq(-0.01, 0.0, 1, 2, 3, 5, 6, 11.01, 12.0, 199.0,
+      200.0, 200.1))
+    val buckets = Array(0.0, 5.0, 11.0, 12.0, 200.0)
+    val histogramResults = rdd.histogram(buckets)
+    val expectedHistogramResults = Array(4, 2, 1, 3)
+    assert(histogramResults === expectedHistogramResults)
+  }
+
+  test("WorksMixedRangeWithUnevenBucketsAndNaN") {
+    // Make sure that it works with two unequally spaced buckets and elements in each
+    val rdd = sc.parallelize(Seq(-0.01, 0.0, 1, 2, 3, 5, 6, 11.01, 12.0, 199.0,
+      200.0, 200.1, Double.NaN))
+    val buckets = Array(0.0, 5.0, 11.0, 12.0, 200.0)
+    val histogramResults = rdd.histogram(buckets)
+    val expectedHistogramResults = Array(4, 2, 1, 3)
+    assert(histogramResults === expectedHistogramResults)
+  }
+  // Make sure this works with a NaN end bucket
+  test("WorksMixedRangeWithUnevenBucketsAndNaNAndNaNRange") {
+    // Make sure that it works with two unequally spaced buckets and elements in each
+    val rdd = sc.parallelize(Seq(-0.01, 0.0, 1, 2, 3, 5, 6, 11.01, 12.0, 199.0,
+      200.0, 200.1, Double.NaN))
+    val buckets = Array(0.0, 5.0, 11.0, 12.0, 200.0, Double.NaN)
+    val histogramResults = rdd.histogram(buckets)
+    val expectedHistogramResults = Array(4, 2, 1, 2, 3)
+    assert(histogramResults === expectedHistogramResults)
+  }
+  // Make sure this works with a NaN end bucket and an inifity
+  test("WorksMixedRangeWithUnevenBucketsAndNaNAndNaNRangeAndInfity") {
+    // Make sure that it works with two unequally spaced buckets and elements in each
+    val rdd = sc.parallelize(Seq(-0.01, 0.0, 1, 2, 3, 5, 6, 11.01, 12.0, 199.0,
+      200.0, 200.1, 1.0/0.0, -1.0/0.0, Double.NaN))
+    val buckets = Array(0.0, 5.0, 11.0, 12.0, 200.0, Double.NaN)
+    val histogramResults = rdd.histogram(buckets)
+    val expectedHistogramResults = Array(4, 2, 1, 2, 4)
+    assert(histogramResults === expectedHistogramResults)
+  }
+
+  test("WorksWithOutOfRangeWithInfiniteBuckets") {
+    // Verify that out of range works with two buckets
+    val rdd = sc.parallelize(Seq(10.01, -0.01, Double.NaN))
+    val buckets = Array(-1.0/0.0 , 0.0, 1.0/0.0)
+    val histogramResults = rdd.histogram(buckets)
+    val expectedHistogramResults = Array(1, 1)
+    assert(histogramResults === expectedHistogramResults)
+  }
+  // Test the failure mode with an invalid bucket array
+  test("ThrowsExceptionOnInvalidBucketArray") {
+    val rdd = sc.parallelize(Seq(1.0))
+    // Empty array
+    intercept[IllegalArgumentException] {
+      val buckets = Array.empty[Double]
+      val result = rdd.histogram(buckets)
+    }
+    // Single element array
+    intercept[IllegalArgumentException] {
+      val buckets = Array(1.0)
+      val result = rdd.histogram(buckets)
+    }
+  }
+
+  // Test automatic histogram function
+  test("WorksWithoutBucketsBasic") {
+    // Verify the basic case of one bucket and all elements in that bucket works
+    val rdd = sc.parallelize(Seq(1, 2, 3, 4))
+    val (histogramBuckets, histogramResults) = rdd.histogram(1)
+    val expectedHistogramResults = Array(4)
+    val expectedHistogramBuckets = Array(1.0, 4.0)
+    assert(histogramResults === expectedHistogramResults)
+    assert(histogramBuckets === expectedHistogramBuckets)
+  }
+  // Test automatic histogram function with a single element
+  test("WorksWithoutBucketsBasicSingleElement") {
+    // Verify the basic case of one bucket and all elements in that bucket works
+    val rdd = sc.parallelize(Seq(1))
+    val (histogramBuckets, histogramResults) = rdd.histogram(1)
+    val expectedHistogramResults = Array(1)
+    val expectedHistogramBuckets = Array(1.0, 1.0)
+    assert(histogramResults === expectedHistogramResults)
+    assert(histogramBuckets === expectedHistogramBuckets)
+  }
+  // Test automatic histogram function with a single element
+  test("WorksWithoutBucketsBasicNoRange") {
+    // Verify the basic case of one bucket and all elements in that bucket works
+    val rdd = sc.parallelize(Seq(1, 1, 1, 1))
+    val (histogramBuckets, histogramResults) = rdd.histogram(1)
+    val expectedHistogramResults = Array(4)
+    val expectedHistogramBuckets = Array(1.0, 1.0)
+    assert(histogramResults === expectedHistogramResults)
+    assert(histogramBuckets === expectedHistogramBuckets)
+  }
+
+  test("WorksWithoutBucketsBasicTwo") {
+    // Verify the basic case of one bucket and all elements in that bucket works
+    val rdd = sc.parallelize(Seq(1, 2, 3, 4))
+    val (histogramBuckets, histogramResults) = rdd.histogram(2)
+    val expectedHistogramResults = Array(2, 2)
+    val expectedHistogramBuckets = Array(1.0, 2.5, 4.0)
+    assert(histogramResults === expectedHistogramResults)
+    assert(histogramBuckets === expectedHistogramBuckets)
+  }
+
+  test("WorksWithoutBucketsWithMoreRequestedThanElements") {
+    // Verify the basic case of one bucket and all elements in that bucket works
+    val rdd = sc.parallelize(Seq(1, 2))
+    val (histogramBuckets, histogramResults) = rdd.histogram(10)
+    val expectedHistogramResults =
+      Array(1, 0, 0, 0, 0, 0, 0, 0, 0, 1)
+    val expectedHistogramBuckets =
+      Array(1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0)
+    assert(histogramResults === expectedHistogramResults)
+    assert(histogramBuckets === expectedHistogramBuckets)
+  }
+
+  // Test the failure mode with an invalid RDD
+  test("ThrowsExceptionOnInvalidRDDs") {
+    // infinity
+    intercept[UnsupportedOperationException] {
+      val rdd = sc.parallelize(Seq(1, 1.0/0.0))
+      val result = rdd.histogram(1)
+    }
+    // NaN
+    intercept[UnsupportedOperationException] {
+      val rdd = sc.parallelize(Seq(1, Double.NaN))
+      val result = rdd.histogram(1)
+    }
+    // Empty
+    intercept[UnsupportedOperationException] {
+      val rdd: RDD[Double] = sc.parallelize(Seq())
+      val result = rdd.histogram(1)
+    }
+  }
+
+}
diff --git a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
index 57d3382ed0b3f..5da538a1ddfd5 100644
--- a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.rdd
 
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.mutable.HashSet
+import scala.util.Random
 
 import org.scalatest.FunSuite
 
@@ -109,6 +110,39 @@ class PairRDDFunctionsSuite extends FunSuite with SharedSparkContext {
     assert(deps.size === 2) // ShuffledRDD, ParallelCollection.
   }
 
+  test("countApproxDistinctByKey") {
+    def error(est: Long, size: Long) = math.abs(est - size) / size.toDouble
+
+    /* Since HyperLogLog unique counting is approximate, and the relative standard deviation is
+     * only a statistical bound, the tests can fail for large values of relativeSD. We will be using
+     * relatively tight error bounds to check correctness of functionality rather than checking
+     * whether the approximation conforms with the requested bound.
+     */
+    val relativeSD = 0.001
+
+    // For each value i, there are i tuples with first element equal to i.
+    // Therefore, the expected count for key i would be i.
+    val stacked = (1 to 100).flatMap(i => (1 to i).map(j => (i, j)))
+    val rdd1 = sc.parallelize(stacked)
+    val counted1 = rdd1.countApproxDistinctByKey(relativeSD).collect()
+    counted1.foreach{
+      case(k, count) => assert(error(count, k) < relativeSD)
+    }
+
+    val rnd = new Random()
+
+    // The expected count for key num would be num
+    val randStacked = (1 to 100).flatMap { i =>
+      val num = rnd.nextInt % 500
+      (1 to num).map(j => (num, j))
+    }
+    val rdd2 = sc.parallelize(randStacked)
+    val counted2 = rdd2.countApproxDistinctByKey(relativeSD, 4).collect()
+    counted2.foreach{
+      case(k, count) => assert(error(count, k) < relativeSD)
+    }
+  }
+
   test("join") {
     val rdd1 = sc.parallelize(Array((1, 1), (1, 2), (2, 1), (3, 1)))
     val rdd2 = sc.parallelize(Array((1, 'x'), (2, 'y'), (2, 'z'), (4, 'w')))
diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
index 673be9101f568..559ea051d3533 100644
--- a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
@@ -63,6 +63,19 @@ class RDDSuite extends FunSuite with SharedSparkContext {
     }
   }
 
+  test("countApproxDistinct") {
+
+    def error(est: Long, size: Long) = math.abs(est - size) / size.toDouble
+
+    val size = 100
+    val uniformDistro = for (i <- 1 to 100000) yield i % size
+    val simpleRdd = sc.makeRDD(uniformDistro)
+    assert(error(simpleRdd.countApproxDistinct(0.2), size) < 0.2)
+    assert(error(simpleRdd.countApproxDistinct(0.05), size) < 0.05)
+    assert(error(simpleRdd.countApproxDistinct(0.01), size) < 0.01)
+    assert(error(simpleRdd.countApproxDistinct(0.001), size) < 0.001)
+  }
+
   test("SparkContext.union") {
     val nums = sc.makeRDD(Array(1, 2, 3, 4), 2)
     assert(sc.union(nums).collect().toList === List(1, 2, 3, 4))
@@ -71,6 +84,33 @@ class RDDSuite extends FunSuite with SharedSparkContext {
     assert(sc.union(Seq(nums, nums)).collect().toList === List(1, 2, 3, 4, 1, 2, 3, 4))
   }
 
+  test("partitioner aware union") {
+    import SparkContext._
+    def makeRDDWithPartitioner(seq: Seq[Int]) = {
+      sc.makeRDD(seq, 1)
+        .map(x => (x, null))
+        .partitionBy(new HashPartitioner(2))
+        .mapPartitions(_.map(_._1), true)
+    }
+
+    val nums1 = makeRDDWithPartitioner(1 to 4)
+    val nums2 = makeRDDWithPartitioner(5 to 8)
+    assert(nums1.partitioner == nums2.partitioner)
+    assert(new PartitionerAwareUnionRDD(sc, Seq(nums1)).collect().toSet === Set(1, 2, 3, 4))
+
+    val union = new PartitionerAwareUnionRDD(sc, Seq(nums1, nums2))
+    assert(union.collect().toSet === Set(1, 2, 3, 4, 5, 6, 7, 8))
+    val nums1Parts = nums1.collectPartitions()
+    val nums2Parts = nums2.collectPartitions()
+    val unionParts = union.collectPartitions()
+    assert(nums1Parts.length === 2)
+    assert(nums2Parts.length === 2)
+    assert(unionParts.length === 2)
+    assert((nums1Parts(0) ++ nums2Parts(0)).toList === unionParts(0).toList)
+    assert((nums1Parts(1) ++ nums2Parts(1)).toList === unionParts(1).toList)
+    assert(union.partitioner === nums1.partitioner)
+  }
+
   test("aggregate") {
     val pairs = sc.makeRDD(Array(("a", 1), ("b", 2), ("a", 2), ("c", 5), ("a", 3)))
     type StringMap = HashMap[String, Int]
@@ -244,8 +284,8 @@ class RDDSuite extends FunSuite with SharedSparkContext {
     // test that you get over 90% locality in each group
     val minLocality = coalesced2.partitions
       .map(part => part.asInstanceOf[CoalescedRDDPartition].localFraction)
-      .foldLeft(1.)((perc, loc) => math.min(perc,loc))
-    assert(minLocality >= 0.90, "Expected 90% locality but got " + (minLocality*100.).toInt + "%")
+      .foldLeft(1.0)((perc, loc) => math.min(perc,loc))
+    assert(minLocality >= 0.90, "Expected 90% locality but got " + (minLocality*100.0).toInt + "%")
 
     // test that the groups are load balanced with 100 +/- 20 elements in each
     val maxImbalance = coalesced2.partitions
@@ -257,9 +297,9 @@ class RDDSuite extends FunSuite with SharedSparkContext {
     val coalesced3 = data3.coalesce(numMachines*2)
     val minLocality2 = coalesced3.partitions
       .map(part => part.asInstanceOf[CoalescedRDDPartition].localFraction)
-      .foldLeft(1.)((perc, loc) => math.min(perc,loc))
+      .foldLeft(1.0)((perc, loc) => math.min(perc,loc))
     assert(minLocality2 >= 0.90, "Expected 90% locality for derived RDD but got " +
-      (minLocality2*100.).toInt + "%")
+      (minLocality2*100.0).toInt + "%")
   }
 
   test("zipped RDDs") {
@@ -340,36 +380,6 @@ class RDDSuite extends FunSuite with SharedSparkContext {
     for (i <- 0 until sample.size) assert(sample(i) === checkSample(i))
   }
 
-  test("toIterable") {
-    var nums = sc.makeRDD(Range(1, 1000), 100)
-    assert(nums.toIterator(prefetchPartitions = 10).size === 999)
-    assert(nums.toIterator().toArray === (1 to 999).toArray)
-
-    nums = sc.makeRDD(Range(1000, 1, -1), 100)
-    assert(nums.toIterator(prefetchPartitions = 10).size === 999)
-    assert(nums.toIterator(prefetchPartitions = 10).toArray === Range(1000, 1, -1).toArray)
-
-    nums = sc.makeRDD(Range(1, 100), 1000)
-    assert(nums.toIterator(prefetchPartitions = 10).size === 99)
-    assert(nums.toIterator(prefetchPartitions = 10).toArray === Range(1, 100).toArray)
-
-    nums = sc.makeRDD(Range(1, 1000), 100)
-    assert(nums.toIterator(prefetchPartitions = -1).size === 999)
-    assert(nums.toIterator().toArray === (1 to 999).toArray)
-
-    nums = sc.makeRDD(Range(1, 1000), 100)
-    assert(nums.toIterator(prefetchPartitions = 3,partitionBatchSize = 10).size === 999)
-    assert(nums.toIterator().toArray === (1 to 999).toArray)
-
-    nums = sc.makeRDD(Range(1, 1000), 100)
-    assert(nums.toIterator(prefetchPartitions = -1,partitionBatchSize = 0).size === 999)
-    assert(nums.toIterator().toArray === (1 to 999).toArray)
-
-    nums = sc.makeRDD(Range(1, 1000), 100)
-    assert(nums.toIterator(prefetchPartitions = -1).size === 999)
-    assert(nums.toIterator().toArray === (1 to 999).toArray)
-  }
-
   test("take") {
     var nums = sc.makeRDD(Range(1, 1000), 1)
     assert(nums.take(0).size === 0)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/cluster/ClusterSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/ClusterSchedulerSuite.scala
similarity index 76%
rename from core/src/test/scala/org/apache/spark/scheduler/cluster/ClusterSchedulerSuite.scala
rename to core/src/test/scala/org/apache/spark/scheduler/ClusterSchedulerSuite.scala
index 674f84c1f177e..98ea4cb5612ec 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/cluster/ClusterSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/ClusterSchedulerSuite.scala
@@ -15,52 +15,45 @@
  * limitations under the License.
  */
 
-package org.apache.spark.scheduler.cluster
+package org.apache.spark.scheduler
 
 import org.scalatest.FunSuite
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark._
-import org.apache.spark.scheduler._
-import org.apache.spark.scheduler.cluster._
 import scala.collection.mutable.ArrayBuffer
 
 import java.util.Properties
 
-class FakeSchedulerBackend extends SchedulerBackend {
-  def start() {}
-  def stop() {}
-  def reviveOffers() {}
-  def defaultParallelism() = 1
-}
-
 class FakeTaskSetManager(
     initPriority: Int,
     initStageId: Int,
     initNumTasks: Int,
-    clusterScheduler: ClusterScheduler,
+    clusterScheduler: TaskSchedulerImpl,
     taskSet: TaskSet)
-  extends ClusterTaskSetManager(clusterScheduler, taskSet) {
+  extends TaskSetManager(clusterScheduler, taskSet, 0) {
 
   parent = null
   weight = 1
   minShare = 2
-  runningTasks = 0
   priority = initPriority
   stageId = initStageId
   name = "TaskSet_"+stageId
   override val numTasks = initNumTasks
   tasksSuccessful = 0
 
+  var numRunningTasks = 0
+  override def runningTasks = numRunningTasks
+
   def increaseRunningTasks(taskNum: Int) {
-    runningTasks += taskNum
+    numRunningTasks += taskNum
     if (parent != null) {
       parent.increaseRunningTasks(taskNum)
     }
   }
 
   def decreaseRunningTasks(taskNum: Int) {
-    runningTasks -= taskNum
+    numRunningTasks -= taskNum
     if (parent != null) {
       parent.decreaseRunningTasks(taskNum)
     }
@@ -73,7 +66,7 @@ class FakeTaskSetManager(
   }
 
   override def getSchedulableByName(name: String): Schedulable = {
-    return null
+    null
   }
 
   override def executorLost(executorId: String, host: String): Unit = {
@@ -86,15 +79,16 @@ class FakeTaskSetManager(
       maxLocality: TaskLocality.TaskLocality)
     : Option[TaskDescription] =
   {
-    if (tasksSuccessful + runningTasks < numTasks) {
+    if (tasksSuccessful + numRunningTasks < numTasks) {
       increaseRunningTasks(1)
-      return Some(new TaskDescription(0, execId, "task 0:0", 0, null))
+      Some(new TaskDescription(0, execId, "task 0:0", 0, null))
+    } else {
+      None
     }
-    return None
   }
 
   override def checkSpeculatableTasks(): Boolean = {
-    return true
+    true
   }
 
   def taskFinished() {
@@ -106,15 +100,14 @@ class FakeTaskSetManager(
   }
 
   def abort() {
-    decreaseRunningTasks(runningTasks)
+    decreaseRunningTasks(numRunningTasks)
     parent.removeSchedulable(this)
   }
 }
 
 class ClusterSchedulerSuite extends FunSuite with LocalSparkContext with Logging {
 
-  def createDummyTaskSetManager(priority: Int, stage: Int, numTasks: Int, cs: ClusterScheduler,
-      taskSet: TaskSet): FakeTaskSetManager = {
+  def createDummyTaskSetManager(priority: Int, stage: Int, numTasks: Int, cs: TaskSchedulerImpl, taskSet: TaskSet): FakeTaskSetManager = {
     new FakeTaskSetManager(priority, stage, numTasks, cs , taskSet)
   }
 
@@ -141,8 +134,11 @@ class ClusterSchedulerSuite extends FunSuite with LocalSparkContext with Logging
 
   test("FIFO Scheduler Test") {
     sc = new SparkContext("local", "ClusterSchedulerSuite")
-    val clusterScheduler = new ClusterScheduler(sc)
-    val taskSet = FakeTask.createTaskSet(1)
+    val clusterScheduler = new TaskSchedulerImpl(sc)
+    var tasks = ArrayBuffer[Task[_]]()
+    val task = new FakeTask(0)
+    tasks += task
+    val taskSet = new TaskSet(tasks.toArray,0,0,0,null)
 
     val rootPool = new Pool("", SchedulingMode.FIFO, 0, 0)
     val schedulableBuilder = new FIFOSchedulableBuilder(rootPool)
@@ -165,13 +161,16 @@ class ClusterSchedulerSuite extends FunSuite with LocalSparkContext with Logging
 
   test("Fair Scheduler Test") {
     sc = new SparkContext("local", "ClusterSchedulerSuite")
-    val clusterScheduler = new ClusterScheduler(sc)
-    val taskSet = FakeTask.createTaskSet(1)
+    val clusterScheduler = new TaskSchedulerImpl(sc)
+    var tasks = ArrayBuffer[Task[_]]()
+    val task = new FakeTask(0)
+    tasks += task
+    val taskSet = new TaskSet(tasks.toArray,0,0,0,null)
 
     val xmlPath = getClass.getClassLoader.getResource("fairscheduler.xml").getFile()
     System.setProperty("spark.scheduler.allocation.file", xmlPath)
     val rootPool = new Pool("", SchedulingMode.FAIR, 0, 0)
-    val schedulableBuilder = new FairSchedulableBuilder(rootPool)
+    val schedulableBuilder = new FairSchedulableBuilder(rootPool, sc.conf)
     schedulableBuilder.buildPools()
 
     assert(rootPool.getSchedulableByName("default") != null)
@@ -219,8 +218,11 @@ class ClusterSchedulerSuite extends FunSuite with LocalSparkContext with Logging
 
   test("Nested Pool Test") {
     sc = new SparkContext("local", "ClusterSchedulerSuite")
-    val clusterScheduler = new ClusterScheduler(sc)
-    val taskSet = FakeTask.createTaskSet(1)
+    val clusterScheduler = new TaskSchedulerImpl(sc)
+    var tasks = ArrayBuffer[Task[_]]()
+    val task = new FakeTask(0)
+    tasks += task
+    val taskSet = new TaskSet(tasks.toArray,0,0,0,null)
 
     val rootPool = new Pool("", SchedulingMode.FAIR, 0, 0)
     val pool0 = new Pool("0", SchedulingMode.FAIR, 3, 1)
@@ -263,35 +265,4 @@ class ClusterSchedulerSuite extends FunSuite with LocalSparkContext with Logging
     checkTaskSetId(rootPool, 6)
     checkTaskSetId(rootPool, 2)
   }
-
-  test("Scheduler does not always schedule tasks on the same workers") {
-    sc = new SparkContext("local", "ClusterSchedulerSuite")
-    val taskScheduler = new ClusterScheduler(sc) 
-    taskScheduler.initialize(new FakeSchedulerBackend)
-    // Need to initialize a DAGScheduler for the taskScheduler to use for callbacks.
-    var dagScheduler = new DAGScheduler(taskScheduler) {
-      override def taskStarted(task: Task[_], taskInfo: TaskInfo) {}
-      override def executorGained(execId: String, host: String) {}
-    }
-
-    val numFreeCores = 1
-    val workerOffers = Seq(new WorkerOffer("executor0", "host0", numFreeCores),
-      new WorkerOffer("executor1", "host1", numFreeCores))
-    // Repeatedly try to schedule a 1-task job, and make sure that it doesn't always
-    // get scheduled on the same executor. While there is a chance this test will fail
-    // because the task randomly gets placed on the first executor all 1000 times, the
-    // probability of that happening is 2^-1000 (so sufficiently small to be considered
-    // negligible).
-    val numTrials = 1000
-    val selectedExecutorIds = 1.to(numTrials).map { _ =>
-      val taskSet = FakeTask.createTaskSet(1)
-      taskScheduler.submitTasks(taskSet)
-      val taskDescriptions = taskScheduler.resourceOffers(workerOffers).flatten
-      assert(1 === taskDescriptions.length)
-      taskDescriptions(0).executorId
-    }
-    var count = selectedExecutorIds.count(_ == workerOffers(0).executorId)
-    assert(count > 0)
-    assert(count < numTrials)
-  }
 }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
index 03fc8c020e005..f0236ef1e975b 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -17,21 +17,14 @@
 
 package org.apache.spark.scheduler
 
-import scala.collection.mutable.{Map, HashMap}
-
-import org.scalatest.FunSuite
-import org.scalatest.BeforeAndAfter
-
-import org.apache.spark.LocalSparkContext
-import org.apache.spark.MapOutputTracker
-import org.apache.spark.SparkContext
-import org.apache.spark.Partition
-import org.apache.spark.TaskContext
-import org.apache.spark.{Dependency, ShuffleDependency, OneToOneDependency}
-import org.apache.spark.{FetchFailed, Success, TaskEndReason}
+import scala.Tuple2
+import scala.collection.mutable.{HashMap, Map}
+
+import org.apache.spark._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.scheduler.SchedulingMode.SchedulingMode
 import org.apache.spark.storage.{BlockId, BlockManagerId, BlockManagerMaster}
+import org.scalatest.{BeforeAndAfter, FunSuite}
 
 /**
  * Tests for DAGScheduler. These tests directly call the event processing functions in DAGScheduler
@@ -46,7 +39,7 @@ import org.apache.spark.storage.{BlockId, BlockManagerId, BlockManagerMaster}
  * and capturing the resulting TaskSets from the mock TaskScheduler.
  */
 class DAGSchedulerSuite extends FunSuite with BeforeAndAfter with LocalSparkContext {
-
+  val conf = new SparkConf
   /** Set of TaskSets the DAGScheduler has requested executed. */
   val taskSets = scala.collection.mutable.Buffer[TaskSet]()
   val taskScheduler = new TaskScheduler() {
@@ -64,7 +57,7 @@ class DAGSchedulerSuite extends FunSuite with BeforeAndAfter with LocalSparkCont
     override def defaultParallelism() = 2
   }
 
-  var mapOutputTracker: MapOutputTracker = null
+  var mapOutputTracker: MapOutputTrackerMaster = null
   var scheduler: DAGScheduler = null
 
   /**
@@ -74,7 +67,7 @@ class DAGSchedulerSuite extends FunSuite with BeforeAndAfter with LocalSparkCont
    */
   val cacheLocations = new HashMap[(Int, Int), Seq[BlockManagerId]]
   // stub out BlockManagerMaster.getLocations to use our cacheLocations
-  val blockManagerMaster = new BlockManagerMaster(null) {
+  val blockManagerMaster = new BlockManagerMaster(null, conf) {
       override def getLocations(blockIds: Array[BlockId]): Seq[Seq[BlockManagerId]] = {
         blockIds.map {
           _.asRDDId.map(id => (id.rddId -> id.splitIndex)).flatMap(key => cacheLocations.get(key)).
@@ -99,8 +92,8 @@ class DAGSchedulerSuite extends FunSuite with BeforeAndAfter with LocalSparkCont
     taskSets.clear()
     cacheLocations.clear()
     results.clear()
-    mapOutputTracker = new MapOutputTracker()
-    scheduler = new DAGScheduler(taskScheduler, mapOutputTracker, blockManagerMaster, null) {
+    mapOutputTracker = new MapOutputTrackerMaster(conf)
+    scheduler = new DAGScheduler(taskScheduler, mapOutputTracker, blockManagerMaster, sc.env) {
       override def runLocally(job: ActiveJob) {
         // don't bother with the thread while unit testing
         runLocallyWithinThread(job)
@@ -129,7 +122,7 @@ class DAGSchedulerSuite extends FunSuite with BeforeAndAfter with LocalSparkCont
         locations: Seq[Seq[String]] = Nil
       ): MyRDD = {
     val maxPartition = numPartitions - 1
-    return new MyRDD(sc, dependencies) {
+    val newRDD = new MyRDD(sc, dependencies) {
       override def compute(split: Partition, context: TaskContext): Iterator[(Int, Int)] =
         throw new RuntimeException("should not be reached")
       override def getPartitions = (0 to maxPartition).map(i => new Partition {
@@ -142,6 +135,7 @@ class DAGSchedulerSuite extends FunSuite with BeforeAndAfter with LocalSparkCont
           Nil
       override def toString: String = "DAGSchedulerSuiteRDD " + id
     }
+    newRDD
   }
 
   /**
@@ -407,40 +401,6 @@ class DAGSchedulerSuite extends FunSuite with BeforeAndAfter with LocalSparkCont
     assertDataStructuresEmpty
   }
 
-  // test("oneGoodJob") {
-  //   try {
-  //     val rdd = new ParallelCollectionRDD(sc, 1.to(100).toSeq, 5, Map.empty)
-  //     val func = (tc: TaskContext, iter: Iterator[Int]) => 1
-  //     val callSite = Utils.getSparkCallSite
-
-  //     scheduler.runJob(rdd, func, 0 until rdd.partitions.size, callSite, false, {(_: Int, _: Int) => {}})
-  //     assertDagSchedulerEmpty(scheduler)
-  //   } finally {
-  //     scheduler.stop()
-  //     sc.stop()
-  //     // pause to let scheduler stop (separate thread)
-  //     Thread.sleep(10)
-  //   }
-  // }
-
-  // test("manyGoodJobs") {
-  //   try {
-  //     val rdd = new ParallelCollectionRDD(sc, 1.to(100).toSeq, 5, Map.empty)
-  //     val func = (tc: TaskContext, iter: Iterator[Int]) => 1
-  //     val callSite = Utils.getSparkCallSite
-
-  //     1.to(100).foreach( v => {
-  //       scheduler.runJob(rdd, func, 0 until rdd.partitions.size, callSite, false, {(_: Int, _: Int) => {}})
-  //     })
-  //     assertDagSchedulerEmpty(scheduler)
-  //   } finally {
-  //     scheduler.stop()
-  //     sc.stop()
-  //     // pause to let scheduler stop (separate thread)
-  //     Thread.sleep(10)
-  //   }
-  // }
-
   /**
    * Assert that the supplied TaskSet has exactly the given hosts as its preferred locations.
    * Note that this checks only the host and not the executor ID.
@@ -473,4 +433,3 @@ class DAGSchedulerSuite extends FunSuite with BeforeAndAfter with LocalSparkCont
     assert(scheduler.waiting.isEmpty)
   }
 }
-
diff --git a/core/src/test/scala/org/apache/spark/scheduler/cluster/FakeTask.scala b/core/src/test/scala/org/apache/spark/scheduler/FakeTask.scala
similarity index 59%
rename from core/src/test/scala/org/apache/spark/scheduler/cluster/FakeTask.scala
rename to core/src/test/scala/org/apache/spark/scheduler/FakeTask.scala
index 52546add0af66..0b90c4e74c8a4 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/cluster/FakeTask.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/FakeTask.scala
@@ -15,29 +15,12 @@
  * limitations under the License.
  */
 
-package org.apache.spark.scheduler.cluster
+package org.apache.spark.scheduler
 
 import org.apache.spark.TaskContext
-import org.apache.spark.scheduler.{TaskLocation, Task, TaskSet}
 
 class FakeTask(stageId: Int, prefLocs: Seq[TaskLocation] = Nil) extends Task[Int](stageId, 0) {
   override def runTask(context: TaskContext): Int = 0
 
   override def preferredLocations: Seq[TaskLocation] = prefLocs
 }
-
-object FakeTask {
-  /**
-   * Utility method to create a TaskSet, potentially setting a particular sequence of preferred
-   * locations for each task (given as varargs) if this sequence is not empty.
-   */
-  def createTaskSet(numTasks: Int, prefLocs: Seq[TaskLocation]*): TaskSet = {
-    if (prefLocs.size != 0 && prefLocs.size != numTasks) {
-      throw new IllegalArgumentException("Wrong number of task locations")
-    }
-    val tasks = Array.tabulate[Task[_]](numTasks) { i =>
-      new FakeTask(i, if (prefLocs.size != 0) prefLocs(i) else Nil)
-    }
-    new TaskSet(tasks, 0, 0, 0, null)
-  }
-}
diff --git a/core/src/test/scala/org/apache/spark/scheduler/JobLoggerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/JobLoggerSuite.scala
index 22babe8d2ef78..29102913c719c 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/JobLoggerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/JobLoggerSuite.scala
@@ -31,7 +31,6 @@ import org.apache.spark.rdd.RDD
 
 
 class JobLoggerSuite extends FunSuite with LocalSparkContext with ShouldMatchers {
-  /** Length of time to wait while draining listener events. */
   val WAIT_TIMEOUT_MILLIS = 10000
 
   test("inner method") {
@@ -43,12 +42,9 @@ class JobLoggerSuite extends FunSuite with LocalSparkContext with ShouldMatchers
       def buildJobDepTest(jobID: Int, stage: Stage) = buildJobDep(jobID, stage) 
     }
     type MyRDD = RDD[(Int, Int)]
-    def makeRdd(
-        numPartitions: Int,
-        dependencies: List[Dependency[_]]
-      ): MyRDD = {
+    def makeRdd(numPartitions: Int, dependencies: List[Dependency[_]]): MyRDD = {
       val maxPartition = numPartitions - 1
-      return new MyRDD(sc, dependencies) {
+      new MyRDD(sc, dependencies) {
         override def compute(split: Partition, context: TaskContext): Iterator[(Int, Int)] =
           throw new RuntimeException("should not be reached")
         override def getPartitions = (0 to maxPartition).map(i => new Partition {
@@ -93,9 +89,10 @@ class JobLoggerSuite extends FunSuite with LocalSparkContext with ShouldMatchers
     sc.addSparkListener(joblogger)
     val rdd = sc.parallelize(1 to 1e2.toInt, 4).map{ i => (i % 12, 2 * i) }
     rdd.reduceByKey(_+_).collect()
+
     assert(sc.dagScheduler.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS))
 
-    val user = System.getProperty("user.name", SparkContext.SPARK_UNKNOWN_USER)
+    val user = System.getProperty("user.name",  SparkContext.SPARK_UNKNOWN_USER)
     
     joblogger.getLogDir should be ("/tmp/spark-%s".format(user))
     joblogger.getJobIDtoPrintWriter.size should be (1)
@@ -117,12 +114,13 @@ class JobLoggerSuite extends FunSuite with LocalSparkContext with ShouldMatchers
       override def onTaskEnd(taskEnd: SparkListenerTaskEnd)  = onTaskEndCount += 1
       override def onJobEnd(jobEnd: SparkListenerJobEnd) = onJobEndCount += 1
       override def onJobStart(jobStart: SparkListenerJobStart) = onJobStartCount += 1
-      override def onStageCompleted(stageCompleted: StageCompleted) = onStageCompletedCount += 1
+      override def onStageCompleted(stageCompleted: SparkListenerStageCompleted) = onStageCompletedCount += 1
       override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted) = onStageSubmittedCount += 1
     }
     sc.addSparkListener(joblogger)
     val rdd = sc.parallelize(1 to 1e2.toInt, 4).map{ i => (i % 12, 2 * i) }
-    rdd.reduceByKey(_+_).collect() 
+    rdd.reduceByKey(_+_).collect()
+
     assert(sc.dagScheduler.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS))
 
     joblogger.onJobStartCount should be (1)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
index 1fd76420eaa24..368c5154ea3b9 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
@@ -19,23 +19,26 @@ package org.apache.spark.scheduler
 
 import scala.collection.mutable.{Buffer, HashSet}
 
-import org.scalatest.{BeforeAndAfterAll, FunSuite}
+import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, FunSuite}
 import org.scalatest.matchers.ShouldMatchers
 
 import org.apache.spark.{LocalSparkContext, SparkContext}
 import org.apache.spark.SparkContext._
 
 class SparkListenerSuite extends FunSuite with LocalSparkContext with ShouldMatchers
-    with BeforeAndAfterAll {
+    with BeforeAndAfter with BeforeAndAfterAll {
   /** Length of time to wait while draining listener events. */
   val WAIT_TIMEOUT_MILLIS = 10000
 
+  before {
+    sc = new SparkContext("local", "SparkListenerSuite")
+  }
+
   override def afterAll {
     System.clearProperty("spark.akka.frameSize")
   }
 
   test("basic creation of StageInfo") {
-    sc = new SparkContext("local", "DAGSchedulerSuite")
     val listener = new SaveStageInfo
     sc.addSparkListener(listener)
     val rdd1 = sc.parallelize(1 to 100, 4)
@@ -56,7 +59,6 @@ class SparkListenerSuite extends FunSuite with LocalSparkContext with ShouldMatc
   }
 
   test("StageInfo with fewer tasks than partitions") {
-    sc = new SparkContext("local", "DAGSchedulerSuite")
     val listener = new SaveStageInfo
     sc.addSparkListener(listener)
     val rdd1 = sc.parallelize(1 to 100, 4)
@@ -72,7 +74,6 @@ class SparkListenerSuite extends FunSuite with LocalSparkContext with ShouldMatc
   }
 
   test("local metrics") {
-    sc = new SparkContext("local", "DAGSchedulerSuite")
     val listener = new SaveStageInfo
     sc.addSparkListener(listener)
     sc.addSparkListener(new StatsReportListener)
@@ -135,17 +136,13 @@ class SparkListenerSuite extends FunSuite with LocalSparkContext with ShouldMatc
   }
 
   test("onTaskGettingResult() called when result fetched remotely") {
-    // Need to use local cluster mode here, because results are not ever returned through the
-    // block manager when using the LocalScheduler.
-    sc = new SparkContext("local-cluster[1,1,512]", "test")
-
     val listener = new SaveTaskEvents
     sc.addSparkListener(listener)
  
     // Make a task whose result is larger than the akka frame size
     System.setProperty("spark.akka.frameSize", "1")
     val akkaFrameSize =
-      sc.env.actorSystem.settings.config.getBytes("akka.remote.netty.message-frame-size").toInt
+      sc.env.actorSystem.settings.config.getBytes("akka.remote.netty.tcp.maximum-frame-size").toInt
     val result = sc.parallelize(Seq(1), 1).map(x => 1.to(akkaFrameSize).toArray).reduce((x,y) => x)
     assert(result === 1.to(akkaFrameSize).toArray)
 
@@ -157,10 +154,6 @@ class SparkListenerSuite extends FunSuite with LocalSparkContext with ShouldMatc
   }
 
   test("onTaskGettingResult() not called when result sent directly") {
-    // Need to use local cluster mode here, because results are not ever returned through the
-    // block manager when using the LocalScheduler.
-    sc = new SparkContext("local-cluster[1,1,512]", "test")
-
     val listener = new SaveTaskEvents
     sc.addSparkListener(listener)
  
@@ -175,13 +168,46 @@ class SparkListenerSuite extends FunSuite with LocalSparkContext with ShouldMatc
     assert(listener.endedTasks.contains(TASK_INDEX))
   }
 
+  test("onTaskEnd() should be called for all started tasks, even after job has been killed") {
+    val WAIT_TIMEOUT_MILLIS = 10000
+    val listener = new SaveTaskEvents
+    sc.addSparkListener(listener)
+
+    val numTasks = 10
+    val f = sc.parallelize(1 to 10000, numTasks).map { i => Thread.sleep(10); i }.countAsync()
+    // Wait until one task has started (because we want to make sure that any tasks that are started
+    // have corresponding end events sent to the listener).
+    var finishTime = System.currentTimeMillis + WAIT_TIMEOUT_MILLIS
+    listener.synchronized {
+      var remainingWait = finishTime - System.currentTimeMillis
+      while (listener.startedTasks.isEmpty && remainingWait > 0) {
+        listener.wait(remainingWait)
+        remainingWait = finishTime - System.currentTimeMillis
+      }
+      assert(!listener.startedTasks.isEmpty)
+    }
+
+    f.cancel()
+
+    // Ensure that onTaskEnd is called for all started tasks.
+    finishTime = System.currentTimeMillis + WAIT_TIMEOUT_MILLIS
+    listener.synchronized {
+      var remainingWait = finishTime - System.currentTimeMillis
+      while (listener.endedTasks.size < listener.startedTasks.size && remainingWait > 0) {
+        listener.wait(finishTime - System.currentTimeMillis)
+        remainingWait = finishTime - System.currentTimeMillis
+      }
+      assert(listener.endedTasks.size === listener.startedTasks.size)
+    }
+  }
+
   def checkNonZeroAvg(m: Traversable[Long], msg: String) {
     assert(m.sum / m.size.toDouble > 0.0, msg)
   }
 
   class SaveStageInfo extends SparkListener {
     val stageInfos = Buffer[StageInfo]()
-    override def onStageCompleted(stage: StageCompleted) {
+    override def onStageCompleted(stage: SparkListenerStageCompleted) {
       stageInfos += stage.stage
     }
   }
@@ -191,12 +217,14 @@ class SparkListenerSuite extends FunSuite with LocalSparkContext with ShouldMatc
     val startedGettingResultTasks = new HashSet[Int]()
     val endedTasks = new HashSet[Int]()
 
-    override def onTaskStart(taskStart: SparkListenerTaskStart) {
+    override def onTaskStart(taskStart: SparkListenerTaskStart) = synchronized {
       startedTasks += taskStart.taskInfo.index
+      notify()
     }
 
-    override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
-        endedTasks += taskEnd.taskInfo.index
+    override def onTaskEnd(taskEnd: SparkListenerTaskEnd) = synchronized {
+      endedTasks += taskEnd.taskInfo.index
+      notify()
     }
 
     override def onTaskGettingResult(taskGettingResult: SparkListenerTaskGettingResult) {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/cluster/TaskResultGetterSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala
similarity index 83%
rename from core/src/test/scala/org/apache/spark/scheduler/cluster/TaskResultGetterSuite.scala
rename to core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala
index ee150a31076cb..4b52d9651ebe8 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/cluster/TaskResultGetterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala
@@ -15,14 +15,13 @@
  * limitations under the License.
  */
 
-package org.apache.spark.scheduler.cluster
+package org.apache.spark.scheduler
 
 import java.nio.ByteBuffer
 
 import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, FunSuite}
 
-import org.apache.spark.{LocalSparkContext, SparkContext, SparkEnv}
-import org.apache.spark.scheduler.{DirectTaskResult, IndirectTaskResult, TaskResult}
+import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkEnv}
 import org.apache.spark.storage.TaskResultBlockId
 
 /**
@@ -31,12 +30,12 @@ import org.apache.spark.storage.TaskResultBlockId
  * Used to test the case where a BlockManager evicts the task result (or dies) before the
  * TaskResult is retrieved.
  */
-class ResultDeletingTaskResultGetter(sparkEnv: SparkEnv, scheduler: ClusterScheduler)
+class ResultDeletingTaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedulerImpl)
   extends TaskResultGetter(sparkEnv, scheduler) {
   var removedResult = false
 
   override def enqueueSuccessfulTask(
-    taskSetManager: ClusterTaskSetManager, tid: Long, serializedData: ByteBuffer) {
+    taskSetManager: TaskSetManager, tid: Long, serializedData: ByteBuffer) {
     if (!removedResult) {
       // Only remove the result once, since we'd like to test the case where the task eventually
       // succeeds.
@@ -44,13 +43,13 @@ class ResultDeletingTaskResultGetter(sparkEnv: SparkEnv, scheduler: ClusterSched
         case IndirectTaskResult(blockId) =>
           sparkEnv.blockManager.master.removeBlock(blockId)
         case directResult: DirectTaskResult[_] =>
-          taskSetManager.abort("Internal error: expect only indirect results") 
+          taskSetManager.abort("Internal error: expect only indirect results")
       }
       serializedData.rewind()
       removedResult = true
     }
     super.enqueueSuccessfulTask(taskSetManager, tid, serializedData)
-  } 
+  }
 }
 
 /**
@@ -65,24 +64,20 @@ class TaskResultGetterSuite extends FunSuite with BeforeAndAfter with BeforeAndA
     System.setProperty("spark.akka.frameSize", "1")
   }
 
-  before {
-    // Use local-cluster mode because results are returned differently when running with the
-    // LocalScheduler.
-    sc = new SparkContext("local-cluster[1,1,512]", "test")
-  }
-
   override def afterAll {
     System.clearProperty("spark.akka.frameSize")
   }
 
   test("handling results smaller than Akka frame size") {
+    sc = new SparkContext("local", "test")
     val result = sc.parallelize(Seq(1), 1).map(x => 2 * x).reduce((x, y) => x)
     assert(result === 2)
   }
 
-  test("handling results larger than Akka frame size") { 
+  test("handling results larger than Akka frame size") {
+    sc = new SparkContext("local", "test")
     val akkaFrameSize =
-      sc.env.actorSystem.settings.config.getBytes("akka.remote.netty.message-frame-size").toInt
+      sc.env.actorSystem.settings.config.getBytes("akka.remote.netty.tcp.maximum-frame-size").toInt
     val result = sc.parallelize(Seq(1), 1).map(x => 1.to(akkaFrameSize).toArray).reduce((x, y) => x)
     assert(result === 1.to(akkaFrameSize).toArray)
 
@@ -92,10 +87,13 @@ class TaskResultGetterSuite extends FunSuite with BeforeAndAfter with BeforeAndA
   }
 
   test("task retried if result missing from block manager") {
+    // Set the maximum number of task failures to > 0, so that the task set isn't aborted
+    // after the result is missing.
+    sc = new SparkContext("local[1,2]", "test")
     // If this test hangs, it's probably because no resource offers were made after the task
     // failed.
-    val scheduler: ClusterScheduler = sc.taskScheduler match {
-      case clusterScheduler: ClusterScheduler =>
+    val scheduler: TaskSchedulerImpl = sc.taskScheduler match {
+      case clusterScheduler: TaskSchedulerImpl =>
         clusterScheduler
       case _ =>
         assert(false, "Expect local cluster to use ClusterScheduler")
@@ -103,7 +101,7 @@ class TaskResultGetterSuite extends FunSuite with BeforeAndAfter with BeforeAndA
     }
     scheduler.taskResultGetter = new ResultDeletingTaskResultGetter(sc.env, scheduler)
     val akkaFrameSize =
-      sc.env.actorSystem.settings.config.getBytes("akka.remote.netty.message-frame-size").toInt
+      sc.env.actorSystem.settings.config.getBytes("akka.remote.netty.tcp.maximum-frame-size").toInt
     val result = sc.parallelize(Seq(1), 1).map(x => 1.to(akkaFrameSize).toArray).reduce((x, y) => x)
     assert(result === 1.to(akkaFrameSize).toArray)
 
diff --git a/core/src/test/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
similarity index 83%
rename from core/src/test/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManagerSuite.scala
rename to core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
index 3fb205964c867..de321c45b547c 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.scheduler.cluster
+package org.apache.spark.scheduler
 
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.mutable
@@ -23,7 +23,6 @@ import scala.collection.mutable
 import org.scalatest.FunSuite
 
 import org.apache.spark._
-import org.apache.spark.scheduler._
 import org.apache.spark.executor.TaskMetrics
 import java.nio.ByteBuffer
 import org.apache.spark.util.{Utils, FakeClock}
@@ -56,10 +55,10 @@ class FakeDAGScheduler(taskScheduler: FakeClusterScheduler) extends DAGScheduler
  * A mock ClusterScheduler implementation that just remembers information about tasks started and
  * feedback received from the TaskSetManagers. Note that it's important to initialize this with
  * a list of "live" executors and their hostnames for isExecutorAlive and hasExecutorsAliveOnHost
- * to work, and these are required for locality in ClusterTaskSetManager.
+ * to work, and these are required for locality in TaskSetManager.
  */
 class FakeClusterScheduler(sc: SparkContext, liveExecutors: (String, String)* /* execId, host */)
-  extends ClusterScheduler(sc)
+  extends TaskSchedulerImpl(sc)
 {
   val startedTasks = new ArrayBuffer[Long]
   val endedTasks = new mutable.HashMap[Long, TaskEndReason]
@@ -79,16 +78,19 @@ class FakeClusterScheduler(sc: SparkContext, liveExecutors: (String, String)* /*
   override def hasExecutorsAliveOnHost(host: String): Boolean = executors.values.exists(_ == host)
 }
 
-class ClusterTaskSetManagerSuite extends FunSuite with LocalSparkContext with Logging {
+class TaskSetManagerSuite extends FunSuite with LocalSparkContext with Logging {
   import TaskLocality.{ANY, PROCESS_LOCAL, NODE_LOCAL, RACK_LOCAL}
 
-  val LOCALITY_WAIT = System.getProperty("spark.locality.wait", "3000").toLong
+  private val conf = new SparkConf
+
+  val LOCALITY_WAIT = conf.getLong("spark.locality.wait", 3000)
+  val MAX_TASK_FAILURES = 4
 
   test("TaskSet with no preferences") {
     sc = new SparkContext("local", "test")
     val sched = new FakeClusterScheduler(sc, ("exec1", "host1"))
-    val taskSet = FakeTask.createTaskSet(1)
-    val manager = new ClusterTaskSetManager(sched, taskSet)
+    val taskSet = createTaskSet(1)
+    val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES)
 
     // Offer a host with no CPUs
     assert(manager.resourceOffer("exec1", "host1", 0, ANY) === None)
@@ -113,8 +115,8 @@ class ClusterTaskSetManagerSuite extends FunSuite with LocalSparkContext with Lo
   test("multiple offers with no preferences") {
     sc = new SparkContext("local", "test")
     val sched = new FakeClusterScheduler(sc, ("exec1", "host1"))
-    val taskSet = FakeTask.createTaskSet(3)
-    val manager = new ClusterTaskSetManager(sched, taskSet)
+    val taskSet = createTaskSet(3)
+    val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES)
 
     // First three offers should all find tasks
     for (i <- 0 until 3) {
@@ -144,14 +146,14 @@ class ClusterTaskSetManagerSuite extends FunSuite with LocalSparkContext with Lo
   test("basic delay scheduling") {
     sc = new SparkContext("local", "test")
     val sched = new FakeClusterScheduler(sc, ("exec1", "host1"), ("exec2", "host2"))
-    val taskSet = FakeTask.createTaskSet(4,
+    val taskSet = createTaskSet(4,
       Seq(TaskLocation("host1", "exec1")),
       Seq(TaskLocation("host2", "exec2")),
       Seq(TaskLocation("host1"), TaskLocation("host2", "exec2")),
       Seq()   // Last task has no locality prefs
     )
     val clock = new FakeClock
-    val manager = new ClusterTaskSetManager(sched, taskSet, clock)
+    val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES, clock)
 
     // First offer host1, exec1: first task should be chosen
     assert(manager.resourceOffer("exec1", "host1", 1, ANY).get.index === 0)
@@ -189,7 +191,7 @@ class ClusterTaskSetManagerSuite extends FunSuite with LocalSparkContext with Lo
     sc = new SparkContext("local", "test")
     val sched = new FakeClusterScheduler(sc,
       ("exec1", "host1"), ("exec2", "host2"), ("exec3", "host3"))
-    val taskSet = FakeTask.createTaskSet(5,
+    val taskSet = createTaskSet(5,
       Seq(TaskLocation("host1")),
       Seq(TaskLocation("host2")),
       Seq(TaskLocation("host2")),
@@ -197,7 +199,7 @@ class ClusterTaskSetManagerSuite extends FunSuite with LocalSparkContext with Lo
       Seq(TaskLocation("host2"))
     )
     val clock = new FakeClock
-    val manager = new ClusterTaskSetManager(sched, taskSet, clock)
+    val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES, clock)
 
     // First offer host1: first task should be chosen
     assert(manager.resourceOffer("exec1", "host1", 1, ANY).get.index === 0)
@@ -228,13 +230,13 @@ class ClusterTaskSetManagerSuite extends FunSuite with LocalSparkContext with Lo
   test("delay scheduling with failed hosts") {
     sc = new SparkContext("local", "test")
     val sched = new FakeClusterScheduler(sc, ("exec1", "host1"), ("exec2", "host2"))
-    val taskSet = FakeTask.createTaskSet(3,
+    val taskSet = createTaskSet(3,
       Seq(TaskLocation("host1")),
       Seq(TaskLocation("host2")),
       Seq(TaskLocation("host3"))
     )
     val clock = new FakeClock
-    val manager = new ClusterTaskSetManager(sched, taskSet, clock)
+    val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES, clock)
 
     // First offer host1: first task should be chosen
     assert(manager.resourceOffer("exec1", "host1", 1, ANY).get.index === 0)
@@ -260,14 +262,14 @@ class ClusterTaskSetManagerSuite extends FunSuite with LocalSparkContext with Lo
   test("task result lost") {
     sc = new SparkContext("local", "test")
     val sched = new FakeClusterScheduler(sc, ("exec1", "host1"))
-    val taskSet = FakeTask.createTaskSet(1)
+    val taskSet = createTaskSet(1)
     val clock = new FakeClock
-    val manager = new ClusterTaskSetManager(sched, taskSet, clock)
+    val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES, clock)
 
     assert(manager.resourceOffer("exec1", "host1", 1, ANY).get.index === 0)
 
     // Tell it the task has finished but the result was lost.
-    manager.handleFailedTask(0, TaskState.FINISHED, Some(TaskResultLost))
+    manager.handleFailedTask(0, TaskState.FINISHED, TaskResultLost)
     assert(sched.endedTasks(0) === TaskResultLost)
 
     // Re-offer the host -- now we should get task 0 again.
@@ -277,19 +279,19 @@ class ClusterTaskSetManagerSuite extends FunSuite with LocalSparkContext with Lo
   test("repeated failures lead to task set abortion") {
     sc = new SparkContext("local", "test")
     val sched = new FakeClusterScheduler(sc, ("exec1", "host1"))
-    val taskSet = FakeTask.createTaskSet(1)
+    val taskSet = createTaskSet(1)
     val clock = new FakeClock
-    val manager = new ClusterTaskSetManager(sched, taskSet, clock)
+    val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES, clock)
 
     // Fail the task MAX_TASK_FAILURES times, and check that the task set is aborted
     // after the last failure.
-    (0 until manager.MAX_TASK_FAILURES).foreach { index =>
+    (1 to manager.maxTaskFailures).foreach { index =>
       val offerResult = manager.resourceOffer("exec1", "host1", 1, ANY)
-      assert(offerResult != None,
+      assert(offerResult.isDefined,
         "Expect resource offer on iteration %s to return a task".format(index))
       assert(offerResult.get.index === 0)
-      manager.handleFailedTask(offerResult.get.taskId, TaskState.FINISHED, Some(TaskResultLost))
-      if (index < manager.MAX_TASK_FAILURES) {
+      manager.handleFailedTask(offerResult.get.taskId, TaskState.FINISHED, TaskResultLost)
+      if (index < MAX_TASK_FAILURES) {
         assert(!sched.taskSetsFailed.contains(taskSet.id))
       } else {
         assert(sched.taskSetsFailed.contains(taskSet.id))
@@ -297,7 +299,23 @@ class ClusterTaskSetManagerSuite extends FunSuite with LocalSparkContext with Lo
     }
   }
 
+
+  /**
+   * Utility method to create a TaskSet, potentially setting a particular sequence of preferred
+   * locations for each task (given as varargs) if this sequence is not empty.
+   */
+  def createTaskSet(numTasks: Int, prefLocs: Seq[TaskLocation]*): TaskSet = {
+    if (prefLocs.size != 0 && prefLocs.size != numTasks) {
+      throw new IllegalArgumentException("Wrong number of task locations")
+    }
+    val tasks = Array.tabulate[Task[_]](numTasks) { i =>
+      new FakeTask(i, if (prefLocs.size != 0) prefLocs(i) else Nil)
+    }
+    new TaskSet(tasks, 0, 0, 0, null)
+  }
+
   def createTaskResult(id: Int): DirectTaskResult[Int] = {
-    new DirectTaskResult[Int](id, mutable.Map.empty, new TaskMetrics)
+    val valueSer = SparkEnv.get.serializer.newInstance()
+    new DirectTaskResult[Int](valueSer.serialize(id), mutable.Map.empty, new TaskMetrics)
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/local/LocalSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/local/LocalSchedulerSuite.scala
deleted file mode 100644
index 1e676c1719337..0000000000000
--- a/core/src/test/scala/org/apache/spark/scheduler/local/LocalSchedulerSuite.scala
+++ /dev/null
@@ -1,227 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.scheduler.local
-
-import java.util.concurrent.Semaphore
-import java.util.concurrent.CountDownLatch
-
-import scala.collection.mutable.HashMap
-
-import org.scalatest.{BeforeAndAfterEach, FunSuite}
-
-import org.apache.spark._
-
-
-class Lock() {
-  var finished = false
-  def jobWait() = {
-    synchronized {
-      while(!finished) {
-        this.wait()
-      }
-    }
-  }
-
-  def jobFinished() = {
-    synchronized {
-      finished = true
-      this.notifyAll()
-    }
-  }
-}
-
-object TaskThreadInfo {
-  val threadToLock = HashMap[Int, Lock]()
-  val threadToRunning = HashMap[Int, Boolean]()
-  val threadToStarted = HashMap[Int, CountDownLatch]()
-}
-
-/*
- * 1. each thread contains one job.
- * 2. each job contains one stage.
- * 3. each stage only contains one task.
- * 4. each task(launched) must be lanched orderly(using threadToStarted) to make sure
- *    it will get cpu core resource, and will wait to finished after user manually
- *    release "Lock" and then cluster will contain another free cpu cores.
- * 5. each task(pending) must use "sleep" to  make sure it has been added to taskSetManager queue,
- *    thus it will be scheduled later when cluster has free cpu cores.
- */
-class LocalSchedulerSuite extends FunSuite with LocalSparkContext with BeforeAndAfterEach {
-
-  override def afterEach() {
-    super.afterEach()
-    System.clearProperty("spark.scheduler.mode")
-  }
-
-  def createThread(threadIndex: Int, poolName: String, sc: SparkContext, sem: Semaphore) {
-
-    TaskThreadInfo.threadToRunning(threadIndex) = false
-    val nums = sc.parallelize(threadIndex to threadIndex, 1)
-    TaskThreadInfo.threadToLock(threadIndex) = new Lock()
-    TaskThreadInfo.threadToStarted(threadIndex) = new CountDownLatch(1)
-    new Thread {
-      if (poolName != null) {
-        sc.setLocalProperty("spark.scheduler.pool", poolName)
-      }
-      override def run() {
-        val ans = nums.map(number => {
-          TaskThreadInfo.threadToRunning(number) = true
-          TaskThreadInfo.threadToStarted(number).countDown()
-          TaskThreadInfo.threadToLock(number).jobWait()
-          TaskThreadInfo.threadToRunning(number) = false
-          number
-        }).collect()
-        assert(ans.toList === List(threadIndex))
-        sem.release()
-      }
-    }.start()
-  }
-
-  test("Local FIFO scheduler end-to-end test") {
-    System.setProperty("spark.scheduler.mode", "FIFO")
-    sc = new SparkContext("local[4]", "test")
-    val sem = new Semaphore(0)
-
-    createThread(1,null,sc,sem)
-    TaskThreadInfo.threadToStarted(1).await()
-    createThread(2,null,sc,sem)
-    TaskThreadInfo.threadToStarted(2).await()
-    createThread(3,null,sc,sem)
-    TaskThreadInfo.threadToStarted(3).await()
-    createThread(4,null,sc,sem)
-    TaskThreadInfo.threadToStarted(4).await()
-    // thread 5 and 6 (stage pending)must meet following two points
-    // 1. stages (taskSetManager) of jobs in thread 5 and 6 should be add to taskSetManager
-    //    queue before executing TaskThreadInfo.threadToLock(1).jobFinished()
-    // 2. priority of stage in thread 5 should be prior to priority of stage in thread 6
-    // So I just use "sleep" 1s here for each thread.
-    // TODO: any better solution?
-    createThread(5,null,sc,sem)
-    Thread.sleep(1000)
-    createThread(6,null,sc,sem)
-    Thread.sleep(1000)
-
-    assert(TaskThreadInfo.threadToRunning(1) === true)
-    assert(TaskThreadInfo.threadToRunning(2) === true)
-    assert(TaskThreadInfo.threadToRunning(3) === true)
-    assert(TaskThreadInfo.threadToRunning(4) === true)
-    assert(TaskThreadInfo.threadToRunning(5) === false)
-    assert(TaskThreadInfo.threadToRunning(6) === false)
-
-    TaskThreadInfo.threadToLock(1).jobFinished()
-    TaskThreadInfo.threadToStarted(5).await()
-
-    assert(TaskThreadInfo.threadToRunning(1) === false)
-    assert(TaskThreadInfo.threadToRunning(2) === true)
-    assert(TaskThreadInfo.threadToRunning(3) === true)
-    assert(TaskThreadInfo.threadToRunning(4) === true)
-    assert(TaskThreadInfo.threadToRunning(5) === true)
-    assert(TaskThreadInfo.threadToRunning(6) === false)
-
-    TaskThreadInfo.threadToLock(3).jobFinished()
-    TaskThreadInfo.threadToStarted(6).await()
-
-    assert(TaskThreadInfo.threadToRunning(1) === false)
-    assert(TaskThreadInfo.threadToRunning(2) === true)
-    assert(TaskThreadInfo.threadToRunning(3) === false)
-    assert(TaskThreadInfo.threadToRunning(4) === true)
-    assert(TaskThreadInfo.threadToRunning(5) === true)
-    assert(TaskThreadInfo.threadToRunning(6) === true)
-
-    TaskThreadInfo.threadToLock(2).jobFinished()
-    TaskThreadInfo.threadToLock(4).jobFinished()
-    TaskThreadInfo.threadToLock(5).jobFinished()
-    TaskThreadInfo.threadToLock(6).jobFinished()
-    sem.acquire(6)
-  }
-
-  test("Local fair scheduler end-to-end test") {
-    System.setProperty("spark.scheduler.mode", "FAIR")
-    val xmlPath = getClass.getClassLoader.getResource("fairscheduler.xml").getFile()
-    System.setProperty("spark.scheduler.allocation.file", xmlPath)
-
-    sc = new SparkContext("local[8]", "LocalSchedulerSuite")
-    val sem = new Semaphore(0)
-
-    createThread(10,"1",sc,sem)
-    TaskThreadInfo.threadToStarted(10).await()
-    createThread(20,"2",sc,sem)
-    TaskThreadInfo.threadToStarted(20).await()
-    createThread(30,"3",sc,sem)
-    TaskThreadInfo.threadToStarted(30).await()
-
-    assert(TaskThreadInfo.threadToRunning(10) === true)
-    assert(TaskThreadInfo.threadToRunning(20) === true)
-    assert(TaskThreadInfo.threadToRunning(30) === true)
-
-    createThread(11,"1",sc,sem)
-    TaskThreadInfo.threadToStarted(11).await()
-    createThread(21,"2",sc,sem)
-    TaskThreadInfo.threadToStarted(21).await()
-    createThread(31,"3",sc,sem)
-    TaskThreadInfo.threadToStarted(31).await()
-
-    assert(TaskThreadInfo.threadToRunning(11) === true)
-    assert(TaskThreadInfo.threadToRunning(21) === true)
-    assert(TaskThreadInfo.threadToRunning(31) === true)
-
-    createThread(12,"1",sc,sem)
-    TaskThreadInfo.threadToStarted(12).await()
-    createThread(22,"2",sc,sem)
-    TaskThreadInfo.threadToStarted(22).await()
-    createThread(32,"3",sc,sem)
-
-    assert(TaskThreadInfo.threadToRunning(12) === true)
-    assert(TaskThreadInfo.threadToRunning(22) === true)
-    assert(TaskThreadInfo.threadToRunning(32) === false)
-
-    TaskThreadInfo.threadToLock(10).jobFinished()
-    TaskThreadInfo.threadToStarted(32).await()
-
-    assert(TaskThreadInfo.threadToRunning(32) === true)
-
-    //1. Similar with above scenario, sleep 1s for stage of 23 and 33 to be added to taskSetManager
-    //   queue so that cluster will assign free cpu core to stage 23 after stage 11 finished.
-    //2. priority of 23 and 33 will be meaningless as using fair scheduler here.
-    createThread(23,"2",sc,sem)
-    createThread(33,"3",sc,sem)
-    Thread.sleep(1000)
-
-    TaskThreadInfo.threadToLock(11).jobFinished()
-    TaskThreadInfo.threadToStarted(23).await()
-
-    assert(TaskThreadInfo.threadToRunning(23) === true)
-    assert(TaskThreadInfo.threadToRunning(33) === false)
-
-    TaskThreadInfo.threadToLock(12).jobFinished()
-    TaskThreadInfo.threadToStarted(33).await()
-
-    assert(TaskThreadInfo.threadToRunning(33) === true)
-
-    TaskThreadInfo.threadToLock(20).jobFinished()
-    TaskThreadInfo.threadToLock(21).jobFinished()
-    TaskThreadInfo.threadToLock(22).jobFinished()
-    TaskThreadInfo.threadToLock(23).jobFinished()
-    TaskThreadInfo.threadToLock(30).jobFinished()
-    TaskThreadInfo.threadToLock(31).jobFinished()
-    TaskThreadInfo.threadToLock(32).jobFinished()
-    TaskThreadInfo.threadToLock(33).jobFinished()
-
-    sem.acquire(11)
-  }
-}
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
index c016c5117149f..3898583275557 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
@@ -22,12 +22,15 @@ import scala.collection.mutable
 import com.esotericsoftware.kryo.Kryo
 
 import org.scalatest.FunSuite
-import org.apache.spark.SharedSparkContext
+import org.apache.spark.{SparkConf, SharedSparkContext}
 import org.apache.spark.serializer.KryoTest._
 
 class KryoSerializerSuite extends FunSuite with SharedSparkContext {
+  conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
+  conf.set("spark.kryo.registrator", classOf[MyRegistrator].getName)
+
   test("basic types") {
-    val ser = (new KryoSerializer).newInstance()
+    val ser = new KryoSerializer(conf).newInstance()
     def check[T](t: T) {
       assert(ser.deserialize[T](ser.serialize(t)) === t)
     }
@@ -57,7 +60,7 @@ class KryoSerializerSuite extends FunSuite with SharedSparkContext {
   }
 
   test("pairs") {
-    val ser = (new KryoSerializer).newInstance()
+    val ser = new KryoSerializer(conf).newInstance()
     def check[T](t: T) {
       assert(ser.deserialize[T](ser.serialize(t)) === t)
     }
@@ -81,7 +84,7 @@ class KryoSerializerSuite extends FunSuite with SharedSparkContext {
   }
 
   test("Scala data structures") {
-    val ser = (new KryoSerializer).newInstance()
+    val ser = new KryoSerializer(conf).newInstance()
     def check[T](t: T) {
       assert(ser.deserialize[T](ser.serialize(t)) === t)
     }
@@ -104,7 +107,7 @@ class KryoSerializerSuite extends FunSuite with SharedSparkContext {
   }
 
   test("ranges") {
-    val ser = (new KryoSerializer).newInstance()
+    val ser = new KryoSerializer(conf).newInstance()
     def check[T](t: T) {
       assert(ser.deserialize[T](ser.serialize(t)) === t)
       // Check that very long ranges don't get written one element at a time
@@ -125,9 +128,7 @@ class KryoSerializerSuite extends FunSuite with SharedSparkContext {
   }
 
   test("custom registrator") {
-    System.setProperty("spark.kryo.registrator", classOf[MyRegistrator].getName)
-
-    val ser = (new KryoSerializer).newInstance()
+    val ser = new KryoSerializer(conf).newInstance()
     def check[T](t: T) {
       assert(ser.deserialize[T](ser.serialize(t)) === t)
     }
@@ -172,6 +173,10 @@ class KryoSerializerSuite extends FunSuite with SharedSparkContext {
     assert (sc.parallelize( Array((1, 11), (2, 22), (3, 33)) ).collect().head === (1, 11))
   }
 
+  test("kryo with SerializableHyperLogLog") {
+    assert(sc.parallelize( Array(1, 2, 3, 2, 3, 3, 2, 3, 1) ).countApproxDistinct(0.01) === 3)
+  }
+
   test("kryo with reduce") {
     val control = 1 :: 2 :: Nil
     val result = sc.parallelize(control, 2).map(new ClassWithoutNoArgConstructor(_))
@@ -186,18 +191,6 @@ class KryoSerializerSuite extends FunSuite with SharedSparkContext {
         .fold(new ClassWithoutNoArgConstructor(10))((t1, t2) => new ClassWithoutNoArgConstructor(t1.x + t2.x)).x
     assert(10 + control.sum === result)
   }
-
-  override def beforeAll() {
-    System.setProperty("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
-    System.setProperty("spark.kryo.registrator", classOf[MyRegistrator].getName)
-    super.beforeAll()
-  }
-
-  override def afterAll() {
-    super.afterAll()
-    System.clearProperty("spark.kryo.registrator")
-    System.clearProperty("spark.serializer")
-  }
 }
 
 object KryoTest {
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockIdSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockIdSuite.scala
index cb76275e39309..b647e8a6728ec 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockIdSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockIdSuite.scala
@@ -39,7 +39,7 @@ class BlockIdSuite extends FunSuite {
       fail()
     } catch {
       case e: IllegalStateException => // OK
-      case _ => fail()
+      case _: Throwable => fail()
     }
   }
 
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index 484a654108da9..85011c6451777 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -31,46 +31,43 @@ import org.scalatest.time.SpanSugar._
 
 import org.apache.spark.util.{SizeEstimator, Utils, AkkaUtils, ByteBufferInputStream}
 import org.apache.spark.serializer.{JavaSerializer, KryoSerializer}
+import org.apache.spark.{SparkConf, SparkContext}
 
 class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodTester {
+  private val conf = new SparkConf(false)
   var store: BlockManager = null
   var store2: BlockManager = null
   var actorSystem: ActorSystem = null
   var master: BlockManagerMaster = null
   var oldArch: String = null
-  var oldOops: String = null
-  var oldHeartBeat: String = null
 
   // Reuse a serializer across tests to avoid creating a new thread-local buffer on each test
-  System.setProperty("spark.kryoserializer.buffer.mb", "1")
-  val serializer = new KryoSerializer
+  conf.set("spark.kryoserializer.buffer.mb", "1")
+  val serializer = new KryoSerializer(conf)
 
   // Implicitly convert strings to BlockIds for test clarity.
   implicit def StringToBlockId(value: String): BlockId = new TestBlockId(value)
   def rdd(rddId: Int, splitId: Int) = RDDBlockId(rddId, splitId)
 
   before {
-    val (actorSystem, boundPort) = AkkaUtils.createActorSystem("test", "localhost", 0)
+    val (actorSystem, boundPort) = AkkaUtils.createActorSystem("test", "localhost", 0, conf = conf)
     this.actorSystem = actorSystem
-    System.setProperty("spark.driver.port", boundPort.toString)
-    System.setProperty("spark.hostPort", "localhost:" + boundPort)
+    conf.set("spark.driver.port", boundPort.toString)
 
     master = new BlockManagerMaster(
-      actorSystem.actorOf(Props(new BlockManagerMasterActor(true))))
+      actorSystem.actorOf(Props(new BlockManagerMasterActor(true, conf))), conf)
 
     // Set the arch to 64-bit and compressedOops to true to get a deterministic test-case
     oldArch = System.setProperty("os.arch", "amd64")
-    oldOops = System.setProperty("spark.test.useCompressedOops", "true")
-    oldHeartBeat = System.setProperty("spark.storage.disableBlockManagerHeartBeat", "true")
+    conf.set("os.arch", "amd64")
+    conf.set("spark.test.useCompressedOops", "true")
+    conf.set("spark.storage.disableBlockManagerHeartBeat", "true")
     val initialize = PrivateMethod[Unit]('initialize)
     SizeEstimator invokePrivate initialize()
-    // Set some value ...
-    System.setProperty("spark.hostPort", Utils.localHostName() + ":" + 1111)
   }
 
   after {
     System.clearProperty("spark.driver.port")
-    System.clearProperty("spark.hostPort")
 
     if (store != null) {
       store.stop()
@@ -86,16 +83,12 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
     master = null
 
     if (oldArch != null) {
-      System.setProperty("os.arch", oldArch)
+      conf.set("os.arch", oldArch)
     } else {
       System.clearProperty("os.arch")
     }
 
-    if (oldOops != null) {
-      System.setProperty("spark.test.useCompressedOops", oldOops)
-    } else {
-      System.clearProperty("spark.test.useCompressedOops")
-    }
+    System.clearProperty("spark.test.useCompressedOops")
   }
 
   test("StorageLevel object caching") {
@@ -133,7 +126,7 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
   }
 
   test("master + 1 manager interaction") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 2000)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 2000, conf)
     val a1 = new Array[Byte](400)
     val a2 = new Array[Byte](400)
     val a3 = new Array[Byte](400)
@@ -144,9 +137,9 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
     store.putSingle("a3", a3, StorageLevel.MEMORY_ONLY, tellMaster = false)
 
     // Checking whether blocks are in memory
-    assert(store.getSingle("a1") != None, "a1 was not in store")
-    assert(store.getSingle("a2") != None, "a2 was not in store")
-    assert(store.getSingle("a3") != None, "a3 was not in store")
+    assert(store.getSingle("a1").isDefined, "a1 was not in store")
+    assert(store.getSingle("a2").isDefined, "a2 was not in store")
+    assert(store.getSingle("a3").isDefined, "a3 was not in store")
 
     // Checking whether master knows about the blocks or not
     assert(master.getLocations("a1").size > 0, "master was not told about a1")
@@ -163,8 +156,8 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
   }
 
   test("master + 2 managers interaction") {
-    store = new BlockManager("exec1", actorSystem, master, serializer, 2000)
-    store2 = new BlockManager("exec2", actorSystem, master, new KryoSerializer, 2000)
+    store = new BlockManager("exec1", actorSystem, master, serializer, 2000, conf)
+    store2 = new BlockManager("exec2", actorSystem, master, new KryoSerializer(conf), 2000, conf)
 
     val peers = master.getPeers(store.blockManagerId, 1)
     assert(peers.size === 1, "master did not return the other manager as a peer")
@@ -179,7 +172,7 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
   }
 
   test("removing block") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 2000)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 2000, conf)
     val a1 = new Array[Byte](400)
     val a2 = new Array[Byte](400)
     val a3 = new Array[Byte](400)
@@ -193,9 +186,9 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
     val memStatus = master.getMemoryStatus.head._2
     assert(memStatus._1 == 2000L, "total memory " + memStatus._1 + " should equal 2000")
     assert(memStatus._2 <= 1200L, "remaining memory " + memStatus._2 + " should <= 1200")
-    assert(store.getSingle("a1-to-remove") != None, "a1 was not in store")
-    assert(store.getSingle("a2-to-remove") != None, "a2 was not in store")
-    assert(store.getSingle("a3-to-remove") != None, "a3 was not in store")
+    assert(store.getSingle("a1-to-remove").isDefined, "a1 was not in store")
+    assert(store.getSingle("a2-to-remove").isDefined, "a2 was not in store")
+    assert(store.getSingle("a3-to-remove").isDefined, "a3 was not in store")
 
     // Checking whether master knows about the blocks or not
     assert(master.getLocations("a1-to-remove").size > 0, "master was not told about a1")
@@ -227,7 +220,7 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
   }
 
   test("removing rdd") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 2000)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 2000, conf)
     val a1 = new Array[Byte](400)
     val a2 = new Array[Byte](400)
     val a3 = new Array[Byte](400)
@@ -261,12 +254,12 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
 
   test("reregistration on heart beat") {
     val heartBeat = PrivateMethod[Unit]('heartBeat)
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 2000)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 2000, conf)
     val a1 = new Array[Byte](400)
 
     store.putSingle("a1", a1, StorageLevel.MEMORY_ONLY)
 
-    assert(store.getSingle("a1") != None, "a1 was not in store")
+    assert(store.getSingle("a1").isDefined, "a1 was not in store")
     assert(master.getLocations("a1").size > 0, "master was not told about a1")
 
     master.removeExecutor(store.blockManagerId.executorId)
@@ -277,7 +270,7 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
   }
 
   test("reregistration on block update") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 2000)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 2000, conf)
     val a1 = new Array[Byte](400)
     val a2 = new Array[Byte](400)
 
@@ -296,7 +289,7 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
 
   test("reregistration doesn't dead lock") {
     val heartBeat = PrivateMethod[Unit]('heartBeat)
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 2000)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 2000, conf)
     val a1 = new Array[Byte](400)
     val a2 = List(new Array[Byte](400))
 
@@ -333,45 +326,45 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
   }
 
   test("in-memory LRU storage") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200, conf)
     val a1 = new Array[Byte](400)
     val a2 = new Array[Byte](400)
     val a3 = new Array[Byte](400)
     store.putSingle("a1", a1, StorageLevel.MEMORY_ONLY)
     store.putSingle("a2", a2, StorageLevel.MEMORY_ONLY)
     store.putSingle("a3", a3, StorageLevel.MEMORY_ONLY)
-    assert(store.getSingle("a2") != None, "a2 was not in store")
-    assert(store.getSingle("a3") != None, "a3 was not in store")
+    assert(store.getSingle("a2").isDefined, "a2 was not in store")
+    assert(store.getSingle("a3").isDefined, "a3 was not in store")
     assert(store.getSingle("a1") === None, "a1 was in store")
-    assert(store.getSingle("a2") != None, "a2 was not in store")
+    assert(store.getSingle("a2").isDefined, "a2 was not in store")
     // At this point a2 was gotten last, so LRU will getSingle rid of a3
     store.putSingle("a1", a1, StorageLevel.MEMORY_ONLY)
-    assert(store.getSingle("a1") != None, "a1 was not in store")
-    assert(store.getSingle("a2") != None, "a2 was not in store")
+    assert(store.getSingle("a1").isDefined, "a1 was not in store")
+    assert(store.getSingle("a2").isDefined, "a2 was not in store")
     assert(store.getSingle("a3") === None, "a3 was in store")
   }
 
   test("in-memory LRU storage with serialization") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200, conf)
     val a1 = new Array[Byte](400)
     val a2 = new Array[Byte](400)
     val a3 = new Array[Byte](400)
     store.putSingle("a1", a1, StorageLevel.MEMORY_ONLY_SER)
     store.putSingle("a2", a2, StorageLevel.MEMORY_ONLY_SER)
     store.putSingle("a3", a3, StorageLevel.MEMORY_ONLY_SER)
-    assert(store.getSingle("a2") != None, "a2 was not in store")
-    assert(store.getSingle("a3") != None, "a3 was not in store")
+    assert(store.getSingle("a2").isDefined, "a2 was not in store")
+    assert(store.getSingle("a3").isDefined, "a3 was not in store")
     assert(store.getSingle("a1") === None, "a1 was in store")
-    assert(store.getSingle("a2") != None, "a2 was not in store")
+    assert(store.getSingle("a2").isDefined, "a2 was not in store")
     // At this point a2 was gotten last, so LRU will getSingle rid of a3
     store.putSingle("a1", a1, StorageLevel.MEMORY_ONLY_SER)
-    assert(store.getSingle("a1") != None, "a1 was not in store")
-    assert(store.getSingle("a2") != None, "a2 was not in store")
+    assert(store.getSingle("a1").isDefined, "a1 was not in store")
+    assert(store.getSingle("a2").isDefined, "a2 was not in store")
     assert(store.getSingle("a3") === None, "a3 was in store")
   }
 
   test("in-memory LRU for partitions of same RDD") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200, conf)
     val a1 = new Array[Byte](400)
     val a2 = new Array[Byte](400)
     val a3 = new Array[Byte](400)
@@ -381,8 +374,8 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
     // Even though we accessed rdd_0_3 last, it should not have replaced partitions 1 and 2
     // from the same RDD
     assert(store.getSingle(rdd(0, 3)) === None, "rdd_0_3 was in store")
-    assert(store.getSingle(rdd(0, 2)) != None, "rdd_0_2 was not in store")
-    assert(store.getSingle(rdd(0, 1)) != None, "rdd_0_1 was not in store")
+    assert(store.getSingle(rdd(0, 2)).isDefined, "rdd_0_2 was not in store")
+    assert(store.getSingle(rdd(0, 1)).isDefined, "rdd_0_1 was not in store")
     // Check that rdd_0_3 doesn't replace them even after further accesses
     assert(store.getSingle(rdd(0, 3)) === None, "rdd_0_3 was in store")
     assert(store.getSingle(rdd(0, 3)) === None, "rdd_0_3 was in store")
@@ -390,7 +383,7 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
   }
 
   test("in-memory LRU for partitions of multiple RDDs") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200, conf)
     store.putSingle(rdd(0, 1), new Array[Byte](400), StorageLevel.MEMORY_ONLY)
     store.putSingle(rdd(0, 2), new Array[Byte](400), StorageLevel.MEMORY_ONLY)
     store.putSingle(rdd(1, 1), new Array[Byte](400), StorageLevel.MEMORY_ONLY)
@@ -399,7 +392,7 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
     assert(!store.memoryStore.contains(rdd(0, 1)), "rdd_0_1 was in store")
     assert(store.memoryStore.contains(rdd(0, 2)), "rdd_0_2 was not in store")
     // Do a get() on rdd_0_2 so that it is the most recently used item
-    assert(store.getSingle(rdd(0, 2)) != None, "rdd_0_2 was not in store")
+    assert(store.getSingle(rdd(0, 2)).isDefined, "rdd_0_2 was not in store")
     // Put in more partitions from RDD 0; they should replace rdd_1_1
     store.putSingle(rdd(0, 3), new Array[Byte](400), StorageLevel.MEMORY_ONLY)
     store.putSingle(rdd(0, 4), new Array[Byte](400), StorageLevel.MEMORY_ONLY)
@@ -413,80 +406,80 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
   }
 
   test("on-disk storage") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200, conf)
     val a1 = new Array[Byte](400)
     val a2 = new Array[Byte](400)
     val a3 = new Array[Byte](400)
     store.putSingle("a1", a1, StorageLevel.DISK_ONLY)
     store.putSingle("a2", a2, StorageLevel.DISK_ONLY)
     store.putSingle("a3", a3, StorageLevel.DISK_ONLY)
-    assert(store.getSingle("a2") != None, "a2 was in store")
-    assert(store.getSingle("a3") != None, "a3 was in store")
-    assert(store.getSingle("a1") != None, "a1 was in store")
+    assert(store.getSingle("a2").isDefined, "a2 was in store")
+    assert(store.getSingle("a3").isDefined, "a3 was in store")
+    assert(store.getSingle("a1").isDefined, "a1 was in store")
   }
 
   test("disk and memory storage") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200, conf)
     val a1 = new Array[Byte](400)
     val a2 = new Array[Byte](400)
     val a3 = new Array[Byte](400)
     store.putSingle("a1", a1, StorageLevel.MEMORY_AND_DISK)
     store.putSingle("a2", a2, StorageLevel.MEMORY_AND_DISK)
     store.putSingle("a3", a3, StorageLevel.MEMORY_AND_DISK)
-    assert(store.getSingle("a2") != None, "a2 was not in store")
-    assert(store.getSingle("a3") != None, "a3 was not in store")
+    assert(store.getSingle("a2").isDefined, "a2 was not in store")
+    assert(store.getSingle("a3").isDefined, "a3 was not in store")
     assert(store.memoryStore.getValues("a1") == None, "a1 was in memory store")
-    assert(store.getSingle("a1") != None, "a1 was not in store")
-    assert(store.memoryStore.getValues("a1") != None, "a1 was not in memory store")
+    assert(store.getSingle("a1").isDefined, "a1 was not in store")
+    assert(store.memoryStore.getValues("a1").isDefined, "a1 was not in memory store")
   }
 
   test("disk and memory storage with getLocalBytes") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200, conf)
     val a1 = new Array[Byte](400)
     val a2 = new Array[Byte](400)
     val a3 = new Array[Byte](400)
     store.putSingle("a1", a1, StorageLevel.MEMORY_AND_DISK)
     store.putSingle("a2", a2, StorageLevel.MEMORY_AND_DISK)
     store.putSingle("a3", a3, StorageLevel.MEMORY_AND_DISK)
-    assert(store.getLocalBytes("a2") != None, "a2 was not in store")
-    assert(store.getLocalBytes("a3") != None, "a3 was not in store")
+    assert(store.getLocalBytes("a2").isDefined, "a2 was not in store")
+    assert(store.getLocalBytes("a3").isDefined, "a3 was not in store")
     assert(store.memoryStore.getValues("a1") == None, "a1 was in memory store")
-    assert(store.getLocalBytes("a1") != None, "a1 was not in store")
-    assert(store.memoryStore.getValues("a1") != None, "a1 was not in memory store")
+    assert(store.getLocalBytes("a1").isDefined, "a1 was not in store")
+    assert(store.memoryStore.getValues("a1").isDefined, "a1 was not in memory store")
   }
 
   test("disk and memory storage with serialization") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200, conf)
     val a1 = new Array[Byte](400)
     val a2 = new Array[Byte](400)
     val a3 = new Array[Byte](400)
     store.putSingle("a1", a1, StorageLevel.MEMORY_AND_DISK_SER)
     store.putSingle("a2", a2, StorageLevel.MEMORY_AND_DISK_SER)
     store.putSingle("a3", a3, StorageLevel.MEMORY_AND_DISK_SER)
-    assert(store.getSingle("a2") != None, "a2 was not in store")
-    assert(store.getSingle("a3") != None, "a3 was not in store")
+    assert(store.getSingle("a2").isDefined, "a2 was not in store")
+    assert(store.getSingle("a3").isDefined, "a3 was not in store")
     assert(store.memoryStore.getValues("a1") == None, "a1 was in memory store")
-    assert(store.getSingle("a1") != None, "a1 was not in store")
-    assert(store.memoryStore.getValues("a1") != None, "a1 was not in memory store")
+    assert(store.getSingle("a1").isDefined, "a1 was not in store")
+    assert(store.memoryStore.getValues("a1").isDefined, "a1 was not in memory store")
   }
 
   test("disk and memory storage with serialization and getLocalBytes") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200, conf)
     val a1 = new Array[Byte](400)
     val a2 = new Array[Byte](400)
     val a3 = new Array[Byte](400)
     store.putSingle("a1", a1, StorageLevel.MEMORY_AND_DISK_SER)
     store.putSingle("a2", a2, StorageLevel.MEMORY_AND_DISK_SER)
     store.putSingle("a3", a3, StorageLevel.MEMORY_AND_DISK_SER)
-    assert(store.getLocalBytes("a2") != None, "a2 was not in store")
-    assert(store.getLocalBytes("a3") != None, "a3 was not in store")
+    assert(store.getLocalBytes("a2").isDefined, "a2 was not in store")
+    assert(store.getLocalBytes("a3").isDefined, "a3 was not in store")
     assert(store.memoryStore.getValues("a1") == None, "a1 was in memory store")
-    assert(store.getLocalBytes("a1") != None, "a1 was not in store")
-    assert(store.memoryStore.getValues("a1") != None, "a1 was not in memory store")
+    assert(store.getLocalBytes("a1").isDefined, "a1 was not in store")
+    assert(store.memoryStore.getValues("a1").isDefined, "a1 was not in memory store")
   }
 
   test("LRU with mixed storage levels") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200, conf)
     val a1 = new Array[Byte](400)
     val a2 = new Array[Byte](400)
     val a3 = new Array[Byte](400)
@@ -496,46 +489,46 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
     store.putSingle("a2", a2, StorageLevel.MEMORY_ONLY_SER)
     store.putSingle("a3", a3, StorageLevel.DISK_ONLY)
     // At this point LRU should not kick in because a3 is only on disk
-    assert(store.getSingle("a1") != None, "a2 was not in store")
-    assert(store.getSingle("a2") != None, "a3 was not in store")
-    assert(store.getSingle("a3") != None, "a1 was not in store")
-    assert(store.getSingle("a1") != None, "a2 was not in store")
-    assert(store.getSingle("a2") != None, "a3 was not in store")
-    assert(store.getSingle("a3") != None, "a1 was not in store")
+    assert(store.getSingle("a1").isDefined, "a2 was not in store")
+    assert(store.getSingle("a2").isDefined, "a3 was not in store")
+    assert(store.getSingle("a3").isDefined, "a1 was not in store")
+    assert(store.getSingle("a1").isDefined, "a2 was not in store")
+    assert(store.getSingle("a2").isDefined, "a3 was not in store")
+    assert(store.getSingle("a3").isDefined, "a1 was not in store")
     // Now let's add in a4, which uses both disk and memory; a1 should drop out
     store.putSingle("a4", a4, StorageLevel.MEMORY_AND_DISK_SER)
     assert(store.getSingle("a1") == None, "a1 was in store")
-    assert(store.getSingle("a2") != None, "a2 was not in store")
-    assert(store.getSingle("a3") != None, "a3 was not in store")
-    assert(store.getSingle("a4") != None, "a4 was not in store")
+    assert(store.getSingle("a2").isDefined, "a2 was not in store")
+    assert(store.getSingle("a3").isDefined, "a3 was not in store")
+    assert(store.getSingle("a4").isDefined, "a4 was not in store")
   }
 
   test("in-memory LRU with streams") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200, conf)
     val list1 = List(new Array[Byte](200), new Array[Byte](200))
     val list2 = List(new Array[Byte](200), new Array[Byte](200))
     val list3 = List(new Array[Byte](200), new Array[Byte](200))
     store.put("list1", list1.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
     store.put("list2", list2.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
     store.put("list3", list3.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
-    assert(store.get("list2") != None, "list2 was not in store")
+    assert(store.get("list2").isDefined, "list2 was not in store")
     assert(store.get("list2").get.size == 2)
-    assert(store.get("list3") != None, "list3 was not in store")
+    assert(store.get("list3").isDefined, "list3 was not in store")
     assert(store.get("list3").get.size == 2)
     assert(store.get("list1") === None, "list1 was in store")
-    assert(store.get("list2") != None, "list2 was not in store")
+    assert(store.get("list2").isDefined, "list2 was not in store")
     assert(store.get("list2").get.size == 2)
     // At this point list2 was gotten last, so LRU will getSingle rid of list3
     store.put("list1", list1.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
-    assert(store.get("list1") != None, "list1 was not in store")
+    assert(store.get("list1").isDefined, "list1 was not in store")
     assert(store.get("list1").get.size == 2)
-    assert(store.get("list2") != None, "list2 was not in store")
+    assert(store.get("list2").isDefined, "list2 was not in store")
     assert(store.get("list2").get.size == 2)
     assert(store.get("list3") === None, "list1 was in store")
   }
 
   test("LRU with mixed storage levels and streams") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 1200, conf)
     val list1 = List(new Array[Byte](200), new Array[Byte](200))
     val list2 = List(new Array[Byte](200), new Array[Byte](200))
     val list3 = List(new Array[Byte](200), new Array[Byte](200))
@@ -545,26 +538,26 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
     store.put("list2", list2.iterator, StorageLevel.MEMORY_ONLY_SER, tellMaster = true)
     store.put("list3", list3.iterator, StorageLevel.DISK_ONLY, tellMaster = true)
     // At this point LRU should not kick in because list3 is only on disk
-    assert(store.get("list1") != None, "list2 was not in store")
+    assert(store.get("list1").isDefined, "list2 was not in store")
     assert(store.get("list1").get.size === 2)
-    assert(store.get("list2") != None, "list3 was not in store")
+    assert(store.get("list2").isDefined, "list3 was not in store")
     assert(store.get("list2").get.size === 2)
-    assert(store.get("list3") != None, "list1 was not in store")
+    assert(store.get("list3").isDefined, "list1 was not in store")
     assert(store.get("list3").get.size === 2)
-    assert(store.get("list1") != None, "list2 was not in store")
+    assert(store.get("list1").isDefined, "list2 was not in store")
     assert(store.get("list1").get.size === 2)
-    assert(store.get("list2") != None, "list3 was not in store")
+    assert(store.get("list2").isDefined, "list3 was not in store")
     assert(store.get("list2").get.size === 2)
-    assert(store.get("list3") != None, "list1 was not in store")
+    assert(store.get("list3").isDefined, "list1 was not in store")
     assert(store.get("list3").get.size === 2)
     // Now let's add in list4, which uses both disk and memory; list1 should drop out
     store.put("list4", list4.iterator, StorageLevel.MEMORY_AND_DISK_SER, tellMaster = true)
     assert(store.get("list1") === None, "list1 was in store")
-    assert(store.get("list2") != None, "list3 was not in store")
+    assert(store.get("list2").isDefined, "list3 was not in store")
     assert(store.get("list2").get.size === 2)
-    assert(store.get("list3") != None, "list1 was not in store")
+    assert(store.get("list3").isDefined, "list1 was not in store")
     assert(store.get("list3").get.size === 2)
-    assert(store.get("list4") != None, "list4 was not in store")
+    assert(store.get("list4").isDefined, "list4 was not in store")
     assert(store.get("list4").get.size === 2)
   }
 
@@ -581,63 +574,63 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
   }
 
   test("overly large block") {
-    store = new BlockManager("<driver>", actorSystem, master, serializer, 500)
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 500, conf)
     store.putSingle("a1", new Array[Byte](1000), StorageLevel.MEMORY_ONLY)
     assert(store.getSingle("a1") === None, "a1 was in store")
     store.putSingle("a2", new Array[Byte](1000), StorageLevel.MEMORY_AND_DISK)
     assert(store.memoryStore.getValues("a2") === None, "a2 was in memory store")
-    assert(store.getSingle("a2") != None, "a2 was not in store")
+    assert(store.getSingle("a2").isDefined, "a2 was not in store")
   }
 
   test("block compression") {
     try {
-      System.setProperty("spark.shuffle.compress", "true")
-      store = new BlockManager("exec1", actorSystem, master, serializer, 2000)
+      conf.set("spark.shuffle.compress", "true")
+      store = new BlockManager("exec1", actorSystem, master, serializer, 2000, conf)
       store.putSingle(ShuffleBlockId(0, 0, 0), new Array[Byte](1000), StorageLevel.MEMORY_ONLY_SER)
       assert(store.memoryStore.getSize(ShuffleBlockId(0, 0, 0)) <= 100,
         "shuffle_0_0_0 was not compressed")
       store.stop()
       store = null
 
-      System.setProperty("spark.shuffle.compress", "false")
-      store = new BlockManager("exec2", actorSystem, master, serializer, 2000)
+      conf.set("spark.shuffle.compress", "false")
+      store = new BlockManager("exec2", actorSystem, master, serializer, 2000, conf)
       store.putSingle(ShuffleBlockId(0, 0, 0), new Array[Byte](1000), StorageLevel.MEMORY_ONLY_SER)
       assert(store.memoryStore.getSize(ShuffleBlockId(0, 0, 0)) >= 1000,
         "shuffle_0_0_0 was compressed")
       store.stop()
       store = null
 
-      System.setProperty("spark.broadcast.compress", "true")
-      store = new BlockManager("exec3", actorSystem, master, serializer, 2000)
+      conf.set("spark.broadcast.compress", "true")
+      store = new BlockManager("exec3", actorSystem, master, serializer, 2000, conf)
       store.putSingle(BroadcastBlockId(0), new Array[Byte](1000), StorageLevel.MEMORY_ONLY_SER)
       assert(store.memoryStore.getSize(BroadcastBlockId(0)) <= 100,
         "broadcast_0 was not compressed")
       store.stop()
       store = null
 
-      System.setProperty("spark.broadcast.compress", "false")
-      store = new BlockManager("exec4", actorSystem, master, serializer, 2000)
+      conf.set("spark.broadcast.compress", "false")
+      store = new BlockManager("exec4", actorSystem, master, serializer, 2000, conf)
       store.putSingle(BroadcastBlockId(0), new Array[Byte](1000), StorageLevel.MEMORY_ONLY_SER)
       assert(store.memoryStore.getSize(BroadcastBlockId(0)) >= 1000, "broadcast_0 was compressed")
       store.stop()
       store = null
 
-      System.setProperty("spark.rdd.compress", "true")
-      store = new BlockManager("exec5", actorSystem, master, serializer, 2000)
+      conf.set("spark.rdd.compress", "true")
+      store = new BlockManager("exec5", actorSystem, master, serializer, 2000, conf)
       store.putSingle(rdd(0, 0), new Array[Byte](1000), StorageLevel.MEMORY_ONLY_SER)
       assert(store.memoryStore.getSize(rdd(0, 0)) <= 100, "rdd_0_0 was not compressed")
       store.stop()
       store = null
 
-      System.setProperty("spark.rdd.compress", "false")
-      store = new BlockManager("exec6", actorSystem, master, serializer, 2000)
+      conf.set("spark.rdd.compress", "false")
+      store = new BlockManager("exec6", actorSystem, master, serializer, 2000, conf)
       store.putSingle(rdd(0, 0), new Array[Byte](1000), StorageLevel.MEMORY_ONLY_SER)
       assert(store.memoryStore.getSize(rdd(0, 0)) >= 1000, "rdd_0_0 was compressed")
       store.stop()
       store = null
 
       // Check that any other block types are also kept uncompressed
-      store = new BlockManager("exec7", actorSystem, master, serializer, 2000)
+      store = new BlockManager("exec7", actorSystem, master, serializer, 2000, conf)
       store.putSingle("other_block", new Array[Byte](1000), StorageLevel.MEMORY_ONLY)
       assert(store.memoryStore.getSize("other_block") >= 1000, "other_block was compressed")
       store.stop()
@@ -651,7 +644,7 @@ class BlockManagerSuite extends FunSuite with BeforeAndAfter with PrivateMethodT
 
   test("block store put failure") {
     // Use Java serializer so we can create an unserializable error.
-    store = new BlockManager("<driver>", actorSystem, master, new JavaSerializer, 1200)
+    store = new BlockManager("<driver>", actorSystem, master, new JavaSerializer(conf), 1200, conf)
 
     // The put should fail since a1 is not serializable.
     class UnserializableClass
diff --git a/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala
index 070982e798f34..829f389460f3b 100644
--- a/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala
@@ -17,15 +17,16 @@
 
 package org.apache.spark.storage
 
-import java.io.{FileWriter, File}
+import java.io.{File, FileWriter}
 
 import scala.collection.mutable
 
 import com.google.common.io.Files
-import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite}
-
-class DiskBlockManagerSuite extends FunSuite with BeforeAndAfterEach with BeforeAndAfterAll {
+import org.apache.spark.SparkConf
+import org.scalatest.{BeforeAndAfterEach, FunSuite}
 
+class DiskBlockManagerSuite extends FunSuite with BeforeAndAfterEach {
+  private val testConf = new SparkConf(false)
   val rootDir0 = Files.createTempDir()
   rootDir0.deleteOnExit()
   val rootDir1 = Files.createTempDir()
@@ -35,21 +36,16 @@ class DiskBlockManagerSuite extends FunSuite with BeforeAndAfterEach with Before
 
   // This suite focuses primarily on consolidation features,
   // so we coerce consolidation if not already enabled.
-  val consolidateProp = "spark.shuffle.consolidateFiles"
-  val oldConsolidate = Option(System.getProperty(consolidateProp))
-  System.setProperty(consolidateProp, "true")
+  testConf.set("spark.shuffle.consolidateFiles", "true")
 
   val shuffleBlockManager = new ShuffleBlockManager(null) {
+    override def conf = testConf.clone
     var idToSegmentMap = mutable.Map[ShuffleBlockId, FileSegment]()
     override def getBlockLocation(id: ShuffleBlockId) = idToSegmentMap(id)
   }
 
   var diskBlockManager: DiskBlockManager = _
 
-  override def afterAll() {
-    oldConsolidate.map(c => System.setProperty(consolidateProp, c))
-  }
-
   override def beforeEach() {
     diskBlockManager = new DiskBlockManager(shuffleBlockManager, rootDirs)
     shuffleBlockManager.idToSegmentMap.clear()
diff --git a/core/src/test/scala/org/apache/spark/ui/UISuite.scala b/core/src/test/scala/org/apache/spark/ui/UISuite.scala
index 8f0ec6683b809..3764f4d1a0c9b 100644
--- a/core/src/test/scala/org/apache/spark/ui/UISuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/UISuite.scala
@@ -34,7 +34,6 @@ class UISuite extends FunSuite {
     }
     val (jettyServer1, boundPort1) = JettyUtils.startJettyServer("localhost", startPort, Seq())
     val (jettyServer2, boundPort2) = JettyUtils.startJettyServer("localhost", startPort, Seq())
-
     // Allow some wiggle room in case ports on the machine are under contention
     assert(boundPort1 > startPort && boundPort1 < startPort + 10)
     assert(boundPort2 > boundPort1 && boundPort2 < boundPort1 + 10)
diff --git a/core/src/test/scala/org/apache/spark/ui/jobs/JobProgressListenerSuite.scala b/core/src/test/scala/org/apache/spark/ui/jobs/JobProgressListenerSuite.scala
new file mode 100644
index 0000000000000..67a57a0e7f9d0
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/ui/jobs/JobProgressListenerSuite.scala
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ui.jobs
+
+import org.scalatest.FunSuite
+import org.apache.spark.scheduler._
+import org.apache.spark.{LocalSparkContext, SparkContext, Success}
+import org.apache.spark.scheduler.SparkListenerTaskStart
+import org.apache.spark.executor.{ShuffleReadMetrics, TaskMetrics}
+
+class JobProgressListenerSuite extends FunSuite with LocalSparkContext {
+  test("test executor id to summary") {
+    val sc = new SparkContext("local", "test")
+    val listener = new JobProgressListener(sc)
+    val taskMetrics = new TaskMetrics()
+    val shuffleReadMetrics = new ShuffleReadMetrics()
+
+    // nothing in it
+    assert(listener.stageIdToExecutorSummaries.size == 0)
+
+    // finish this task, should get updated shuffleRead
+    shuffleReadMetrics.remoteBytesRead = 1000
+    taskMetrics.shuffleReadMetrics = Some(shuffleReadMetrics)
+    var taskInfo = new TaskInfo(1234L, 0, 0L, "exe-1", "host1", TaskLocality.NODE_LOCAL)
+    taskInfo.finishTime = 1
+    listener.onTaskEnd(new SparkListenerTaskEnd(
+      new ShuffleMapTask(0, null, null, 0, null), Success, taskInfo, taskMetrics))
+    assert(listener.stageIdToExecutorSummaries.getOrElse(0, fail()).getOrElse("exe-1", fail())
+      .shuffleRead == 1000)
+
+    // finish a task with unknown executor-id, nothing should happen
+    taskInfo = new TaskInfo(1234L, 0, 1000L, "exe-unknown", "host1", TaskLocality.NODE_LOCAL)
+    taskInfo.finishTime = 1
+    listener.onTaskEnd(new SparkListenerTaskEnd(
+      new ShuffleMapTask(0, null, null, 0, null), Success, taskInfo, taskMetrics))
+    assert(listener.stageIdToExecutorSummaries.size == 1)
+
+    // finish this task, should get updated duration
+    shuffleReadMetrics.remoteBytesRead = 1000
+    taskMetrics.shuffleReadMetrics = Some(shuffleReadMetrics)
+    taskInfo = new TaskInfo(1235L, 0, 0L, "exe-1", "host1", TaskLocality.NODE_LOCAL)
+    taskInfo.finishTime = 1
+    listener.onTaskEnd(new SparkListenerTaskEnd(
+      new ShuffleMapTask(0, null, null, 0, null), Success, taskInfo, taskMetrics))
+    assert(listener.stageIdToExecutorSummaries.getOrElse(0, fail()).getOrElse("exe-1", fail())
+      .shuffleRead == 2000)
+
+    // finish this task, should get updated duration
+    shuffleReadMetrics.remoteBytesRead = 1000
+    taskMetrics.shuffleReadMetrics = Some(shuffleReadMetrics)
+    taskInfo = new TaskInfo(1236L, 0, 0L, "exe-2", "host1", TaskLocality.NODE_LOCAL)
+    taskInfo.finishTime = 1
+    listener.onTaskEnd(new SparkListenerTaskEnd(
+      new ShuffleMapTask(0, null, null, 0, null), Success, taskInfo, taskMetrics))
+    assert(listener.stageIdToExecutorSummaries.getOrElse(0, fail()).getOrElse("exe-2", fail())
+      .shuffleRead == 1000)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala b/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala
index 0ed366fb707b7..de4871d0433ef 100644
--- a/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala
@@ -61,8 +61,8 @@ class NonSerializable {}
 object TestObject {
   def run(): Int = {
     var nonSer = new NonSerializable
-    var x = 5
-    return withSpark(new SparkContext("local", "test")) { sc =>
+    val x = 5
+    withSpark(new SparkContext("local", "test")) { sc =>
       val nums = sc.parallelize(Array(1, 2, 3, 4))
       nums.map(_ + x).reduce(_ + _)
     }
@@ -76,7 +76,7 @@ class TestClass extends Serializable {
 
   def run(): Int = {
     var nonSer = new NonSerializable
-    return withSpark(new SparkContext("local", "test")) { sc =>
+    withSpark(new SparkContext("local", "test")) { sc =>
       val nums = sc.parallelize(Array(1, 2, 3, 4))
       nums.map(_ + getX).reduce(_ + _)
     }
@@ -88,7 +88,7 @@ class TestClassWithoutDefaultConstructor(x: Int) extends Serializable {
 
   def run(): Int = {
     var nonSer = new NonSerializable
-    return withSpark(new SparkContext("local", "test")) { sc =>
+    withSpark(new SparkContext("local", "test")) { sc =>
       val nums = sc.parallelize(Array(1, 2, 3, 4))
       nums.map(_ + getX).reduce(_ + _)
     }
@@ -103,7 +103,7 @@ class TestClassWithoutFieldAccess {
   def run(): Int = {
     var nonSer2 = new NonSerializable
     var x = 5
-    return withSpark(new SparkContext("local", "test")) { sc =>
+    withSpark(new SparkContext("local", "test")) { sc =>
       val nums = sc.parallelize(Array(1, 2, 3, 4))
       nums.map(_ + x).reduce(_ + _)
     }
@@ -115,7 +115,7 @@ object TestObjectWithNesting {
   def run(): Int = {
     var nonSer = new NonSerializable
     var answer = 0
-    return withSpark(new SparkContext("local", "test")) { sc =>
+    withSpark(new SparkContext("local", "test")) { sc =>
       val nums = sc.parallelize(Array(1, 2, 3, 4))
       var y = 1
       for (i <- 1 to 4) {
@@ -134,7 +134,7 @@ class TestClassWithNesting(val y: Int) extends Serializable {
   def run(): Int = {
     var nonSer = new NonSerializable
     var answer = 0
-    return withSpark(new SparkContext("local", "test")) { sc =>
+    withSpark(new SparkContext("local", "test")) { sc =>
       val nums = sc.parallelize(Array(1, 2, 3, 4))
       for (i <- 1 to 4) {
         var nonSer2 = new NonSerializable
diff --git a/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala b/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala
index 4e40dcbdeebe8..11ebdc352bd74 100644
--- a/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.util
 import org.scalatest.FunSuite
 import org.scalatest.BeforeAndAfterAll
 import org.scalatest.PrivateMethodTester
+import org.apache.spark.SparkContext
 
 class DummyClass1 {}
 
@@ -63,54 +64,53 @@ class SizeEstimatorSuite
   }
 
   test("simple classes") {
-    assert(SizeEstimator.estimate(new DummyClass1) === 16)
-    assert(SizeEstimator.estimate(new DummyClass2) === 16)
-    assert(SizeEstimator.estimate(new DummyClass3) === 24)
-    assert(SizeEstimator.estimate(new DummyClass4(null)) === 24)
-    assert(SizeEstimator.estimate(new DummyClass4(new DummyClass3)) === 48)
+    expectResult(16)(SizeEstimator.estimate(new DummyClass1))
+    expectResult(16)(SizeEstimator.estimate(new DummyClass2))
+    expectResult(24)(SizeEstimator.estimate(new DummyClass3))
+    expectResult(24)(SizeEstimator.estimate(new DummyClass4(null)))
+    expectResult(48)(SizeEstimator.estimate(new DummyClass4(new DummyClass3)))
   }
 
   // NOTE: The String class definition varies across JDK versions (1.6 vs. 1.7) and vendors
   // (Sun vs IBM). Use a DummyString class to make tests deterministic.
   test("strings") {
-    assert(SizeEstimator.estimate(DummyString("")) === 40)
-    assert(SizeEstimator.estimate(DummyString("a")) === 48)
-    assert(SizeEstimator.estimate(DummyString("ab")) === 48)
-    assert(SizeEstimator.estimate(DummyString("abcdefgh")) === 56)
+    expectResult(40)(SizeEstimator.estimate(DummyString("")))
+    expectResult(48)(SizeEstimator.estimate(DummyString("a")))
+    expectResult(48)(SizeEstimator.estimate(DummyString("ab")))
+    expectResult(56)(SizeEstimator.estimate(DummyString("abcdefgh")))
   }
 
   test("primitive arrays") {
-    assert(SizeEstimator.estimate(new Array[Byte](10)) === 32)
-    assert(SizeEstimator.estimate(new Array[Char](10)) === 40)
-    assert(SizeEstimator.estimate(new Array[Short](10)) === 40)
-    assert(SizeEstimator.estimate(new Array[Int](10)) === 56)
-    assert(SizeEstimator.estimate(new Array[Long](10)) === 96)
-    assert(SizeEstimator.estimate(new Array[Float](10)) === 56)
-    assert(SizeEstimator.estimate(new Array[Double](10)) === 96)
-    assert(SizeEstimator.estimate(new Array[Int](1000)) === 4016)
-    assert(SizeEstimator.estimate(new Array[Long](1000)) === 8016)
+    expectResult(32)(SizeEstimator.estimate(new Array[Byte](10)))
+    expectResult(40)(SizeEstimator.estimate(new Array[Char](10)))
+    expectResult(40)(SizeEstimator.estimate(new Array[Short](10)))
+    expectResult(56)(SizeEstimator.estimate(new Array[Int](10)))
+    expectResult(96)(SizeEstimator.estimate(new Array[Long](10)))
+    expectResult(56)(SizeEstimator.estimate(new Array[Float](10)))
+    expectResult(96)(SizeEstimator.estimate(new Array[Double](10)))
+    expectResult(4016)(SizeEstimator.estimate(new Array[Int](1000)))
+    expectResult(8016)(SizeEstimator.estimate(new Array[Long](1000)))
   }
 
   test("object arrays") {
     // Arrays containing nulls should just have one pointer per element
-    assert(SizeEstimator.estimate(new Array[String](10)) === 56)
-    assert(SizeEstimator.estimate(new Array[AnyRef](10)) === 56)
-
+    expectResult(56)(SizeEstimator.estimate(new Array[String](10)))
+    expectResult(56)(SizeEstimator.estimate(new Array[AnyRef](10)))
     // For object arrays with non-null elements, each object should take one pointer plus
     // however many bytes that class takes. (Note that Array.fill calls the code in its
     // second parameter separately for each object, so we get distinct objects.)
-    assert(SizeEstimator.estimate(Array.fill(10)(new DummyClass1)) === 216)
-    assert(SizeEstimator.estimate(Array.fill(10)(new DummyClass2)) === 216)
-    assert(SizeEstimator.estimate(Array.fill(10)(new DummyClass3)) === 296)
-    assert(SizeEstimator.estimate(Array(new DummyClass1, new DummyClass2)) === 56)
+    expectResult(216)(SizeEstimator.estimate(Array.fill(10)(new DummyClass1)))
+    expectResult(216)(SizeEstimator.estimate(Array.fill(10)(new DummyClass2)))
+    expectResult(296)(SizeEstimator.estimate(Array.fill(10)(new DummyClass3)))
+    expectResult(56)(SizeEstimator.estimate(Array(new DummyClass1, new DummyClass2)))
 
     // Past size 100, our samples 100 elements, but we should still get the right size.
-    assert(SizeEstimator.estimate(Array.fill(1000)(new DummyClass3)) === 28016)
+    expectResult(28016)(SizeEstimator.estimate(Array.fill(1000)(new DummyClass3)))
 
     // If an array contains the *same* element many times, we should only count it once.
     val d1 = new DummyClass1
-    assert(SizeEstimator.estimate(Array.fill(10)(d1)) === 72) // 10 pointers plus 8-byte object
-    assert(SizeEstimator.estimate(Array.fill(100)(d1)) === 432) // 100 pointers plus 8-byte object
+    expectResult(72)(SizeEstimator.estimate(Array.fill(10)(d1))) // 10 pointers plus 8-byte object
+    expectResult(432)(SizeEstimator.estimate(Array.fill(100)(d1))) // 100 pointers plus 8-byte object
 
     // Same thing with huge array containing the same element many times. Note that this won't
     // return exactly 4032 because it can't tell that *all* the elements will equal the first
@@ -128,11 +128,10 @@ class SizeEstimatorSuite
     val initialize = PrivateMethod[Unit]('initialize)
     SizeEstimator invokePrivate initialize()
 
-    assert(SizeEstimator.estimate(DummyString("")) === 40)
-    assert(SizeEstimator.estimate(DummyString("a")) === 48)
-    assert(SizeEstimator.estimate(DummyString("ab")) === 48)
-    assert(SizeEstimator.estimate(DummyString("abcdefgh")) === 56)
-
+    expectResult(40)(SizeEstimator.estimate(DummyString("")))
+    expectResult(48)(SizeEstimator.estimate(DummyString("a")))
+    expectResult(48)(SizeEstimator.estimate(DummyString("ab")))
+    expectResult(56)(SizeEstimator.estimate(DummyString("abcdefgh")))
     resetOrClear("os.arch", arch)
   }
 
@@ -141,14 +140,13 @@ class SizeEstimatorSuite
   test("64-bit arch with no compressed oops") {
     val arch = System.setProperty("os.arch", "amd64")
     val oops = System.setProperty("spark.test.useCompressedOops", "false")
-
     val initialize = PrivateMethod[Unit]('initialize)
     SizeEstimator invokePrivate initialize()
 
-    assert(SizeEstimator.estimate(DummyString("")) === 56)
-    assert(SizeEstimator.estimate(DummyString("a")) === 64)
-    assert(SizeEstimator.estimate(DummyString("ab")) === 64)
-    assert(SizeEstimator.estimate(DummyString("abcdefgh")) === 72)
+    expectResult(56)(SizeEstimator.estimate(DummyString("")))
+    expectResult(64)(SizeEstimator.estimate(DummyString("a")))
+    expectResult(64)(SizeEstimator.estimate(DummyString("ab")))
+    expectResult(72)(SizeEstimator.estimate(DummyString("abcdefgh")))
 
     resetOrClear("os.arch", arch)
     resetOrClear("spark.test.useCompressedOops", oops)
diff --git a/core/src/test/scala/org/apache/spark/util/SizeTrackingAppendOnlyMapSuite.scala b/core/src/test/scala/org/apache/spark/util/SizeTrackingAppendOnlyMapSuite.scala
new file mode 100644
index 0000000000000..93f0c6a8e6408
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/util/SizeTrackingAppendOnlyMapSuite.scala
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import scala.util.Random
+
+import org.scalatest.{BeforeAndAfterAll, FunSuite}
+
+import org.apache.spark.util.SizeTrackingAppendOnlyMapSuite.LargeDummyClass
+import org.apache.spark.util.collection.{AppendOnlyMap, SizeTrackingAppendOnlyMap}
+
+class SizeTrackingAppendOnlyMapSuite extends FunSuite with BeforeAndAfterAll {
+  val NORMAL_ERROR = 0.20
+  val HIGH_ERROR = 0.30
+
+  test("fixed size insertions") {
+    testWith[Int, Long](10000, i => (i, i.toLong))
+    testWith[Int, (Long, Long)](10000, i => (i, (i.toLong, i.toLong)))
+    testWith[Int, LargeDummyClass](10000, i => (i, new LargeDummyClass()))
+  }
+
+  test("variable size insertions") {
+    val rand = new Random(123456789)
+    def randString(minLen: Int, maxLen: Int): String = {
+      "a" * (rand.nextInt(maxLen - minLen) + minLen)
+    }
+    testWith[Int, String](10000, i => (i, randString(0, 10)))
+    testWith[Int, String](10000, i => (i, randString(0, 100)))
+    testWith[Int, String](10000, i => (i, randString(90, 100)))
+  }
+
+  test("updates") {
+    val rand = new Random(123456789)
+    def randString(minLen: Int, maxLen: Int): String = {
+      "a" * (rand.nextInt(maxLen - minLen) + minLen)
+    }
+    testWith[String, Int](10000, i => (randString(0, 10000), i))
+  }
+
+  def testWith[K, V](numElements: Int, makeElement: (Int) => (K, V)) {
+    val map = new SizeTrackingAppendOnlyMap[K, V]()
+    for (i <- 0 until numElements) {
+      val (k, v) = makeElement(i)
+      map(k) = v
+      expectWithinError(map, map.estimateSize(), if (i < 32) HIGH_ERROR else NORMAL_ERROR)
+    }
+  }
+
+  def expectWithinError(obj: AnyRef, estimatedSize: Long, error: Double) {
+    val betterEstimatedSize = SizeEstimator.estimate(obj)
+    assert(betterEstimatedSize * (1 - error) < estimatedSize,
+      s"Estimated size $estimatedSize was less than expected size $betterEstimatedSize")
+    assert(betterEstimatedSize * (1 + 2 * error) > estimatedSize,
+      s"Estimated size $estimatedSize was greater than expected size $betterEstimatedSize")
+  }
+}
+
+object SizeTrackingAppendOnlyMapSuite {
+  // Speed test, for reproducibility of results.
+  // These could be highly non-deterministic in general, however.
+  // Results:
+  // AppendOnlyMap:   31 ms
+  // SizeTracker:     54 ms
+  // SizeEstimator: 1500 ms
+  def main(args: Array[String]) {
+    val numElements = 100000
+
+    val baseTimes = for (i <- 0 until 10) yield time {
+      val map = new AppendOnlyMap[Int, LargeDummyClass]()
+      for (i <- 0 until numElements) {
+        map(i) = new LargeDummyClass()
+      }
+    }
+
+    val sampledTimes = for (i <- 0 until 10) yield time {
+      val map = new SizeTrackingAppendOnlyMap[Int, LargeDummyClass]()
+      for (i <- 0 until numElements) {
+        map(i) = new LargeDummyClass()
+        map.estimateSize()
+      }
+    }
+
+    val unsampledTimes = for (i <- 0 until 3) yield time {
+      val map = new AppendOnlyMap[Int, LargeDummyClass]()
+      for (i <- 0 until numElements) {
+        map(i) = new LargeDummyClass()
+        SizeEstimator.estimate(map)
+      }
+    }
+
+    println("Base: " + baseTimes)
+    println("SizeTracker (sampled): " + sampledTimes)
+    println("SizeEstimator (unsampled): " + unsampledTimes)
+  }
+
+  def time(f: => Unit): Long = {
+    val start = System.currentTimeMillis()
+    f
+    System.currentTimeMillis() - start
+  }
+
+  private class LargeDummyClass {
+    val arr = new Array[Int](100)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/util/VectorSuite.scala b/core/src/test/scala/org/apache/spark/util/VectorSuite.scala
new file mode 100644
index 0000000000000..7006571ef0ef6
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/util/VectorSuite.scala
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import scala.util.Random
+
+import org.scalatest.FunSuite
+
+/**
+ * Tests org.apache.spark.util.Vector functionality
+ */
+class VectorSuite extends FunSuite {
+
+  def verifyVector(vector: Vector, expectedLength: Int) = {
+    assert(vector.length == expectedLength)
+    assert(vector.elements.min > 0.0)
+    assert(vector.elements.max < 1.0)
+  }
+
+  test("random with default random number generator") {
+    val vector100 = Vector.random(100)
+    verifyVector(vector100, 100)
+  }
+
+  test("random with given random number generator") {
+    val vector100 = Vector.random(100, new Random(100))
+    verifyVector(vector100, 100)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/util/XORShiftRandomSuite.scala b/core/src/test/scala/org/apache/spark/util/XORShiftRandomSuite.scala
new file mode 100644
index 0000000000000..f1d7b61b31e63
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/util/XORShiftRandomSuite.scala
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import java.util.Random
+import org.scalatest.FlatSpec
+import org.scalatest.FunSuite
+import org.scalatest.matchers.ShouldMatchers
+import org.apache.spark.util.Utils.times
+
+class XORShiftRandomSuite extends FunSuite with ShouldMatchers {
+
+  def fixture = new {
+    val seed = 1L
+    val xorRand = new XORShiftRandom(seed)
+    val hundMil = 1e8.toInt
+  }
+   
+  /*
+   * This test is based on a chi-squared test for randomness. The values are hard-coded 
+   * so as not to create Spark's dependency on apache.commons.math3 just to call one
+   * method for calculating the exact p-value for a given number of random numbers
+   * and bins. In case one would want to move to a full-fledged test based on 
+   * apache.commons.math3, the relevant class is here:
+   * org.apache.commons.math3.stat.inference.ChiSquareTest
+   */
+  test ("XORShift generates valid random numbers") {
+
+    val f = fixture
+
+    val numBins = 10
+    // create 10 bins
+    val bins = Array.fill(numBins)(0)
+
+    // populate bins based on modulus of the random number
+    times(f.hundMil) {bins(math.abs(f.xorRand.nextInt) % 10) += 1}
+
+    /* since the seed is deterministic, until the algorithm is changed, we know the result will be 
+     * exactly this: Array(10004908, 9993136, 9994600, 10000744, 10000091, 10002474, 10002272, 
+     * 10000790, 10002286, 9998699), so the test will never fail at the prespecified (5%) 
+     * significance level. However, should the RNG implementation change, the test should still 
+     * pass at the same significance level. The chi-squared test done in R gave the following 
+     * results:
+     *   > chisq.test(c(10004908, 9993136, 9994600, 10000744, 10000091, 10002474, 10002272,
+     *     10000790, 10002286, 9998699))
+     *     Chi-squared test for given probabilities
+     *     data:  c(10004908, 9993136, 9994600, 10000744, 10000091, 10002474, 10002272, 10000790, 
+     *            10002286, 9998699)
+     *     X-squared = 11.975, df = 9, p-value = 0.2147
+     * Note that the p-value was ~0.22. The test will fail if alpha < 0.05, which for 100 million 
+     * random numbers
+     * and 10 bins will happen at X-squared of ~16.9196. So, the test will fail if X-squared
+     * is greater than or equal to that number.
+     */
+    val binSize = f.hundMil/numBins
+    val xSquared = bins.map(x => math.pow((binSize - x), 2)/binSize).sum
+    xSquared should be <  (16.9196)
+
+  }
+
+}
diff --git a/core/src/test/scala/org/apache/spark/util/AppendOnlyMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/AppendOnlyMapSuite.scala
similarity index 75%
rename from core/src/test/scala/org/apache/spark/util/AppendOnlyMapSuite.scala
rename to core/src/test/scala/org/apache/spark/util/collection/AppendOnlyMapSuite.scala
index 7177919a58157..f44442f1a5328 100644
--- a/core/src/test/scala/org/apache/spark/util/AppendOnlyMapSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/AppendOnlyMapSuite.scala
@@ -15,11 +15,12 @@
  * limitations under the License.
  */
 
-package org.apache.spark.util
+package org.apache.spark.util.collection
 
 import scala.collection.mutable.HashSet
 
 import org.scalatest.FunSuite
+import java.util.Comparator
 
 class AppendOnlyMapSuite extends FunSuite {
   test("initialization") {
@@ -151,4 +152,47 @@ class AppendOnlyMapSuite extends FunSuite {
       assert(map("" + i) === "" + i)
     }
   }
+
+  test("destructive sort") {
+    val map = new AppendOnlyMap[String, String]()
+    for (i <- 1 to 100) {
+      map("" + i) = "" + i
+    }
+    map.update(null, "happy new year!")
+
+    try {
+      map.apply("1")
+      map.update("1", "2013")
+      map.changeValue("1", (hadValue, oldValue) => "2014")
+      map.iterator
+    } catch {
+      case e: IllegalStateException => fail()
+    }
+
+    val it = map.destructiveSortedIterator(new Comparator[(String, String)] {
+      def compare(kv1: (String, String), kv2: (String, String)): Int = {
+        val x = if (kv1 != null && kv1._1 != null) kv1._1.toInt else Int.MinValue
+        val y = if (kv2 != null && kv2._1 != null) kv2._1.toInt else Int.MinValue
+        x.compareTo(y)
+      }
+    })
+
+    // Should be sorted by key
+    assert(it.hasNext)
+    var previous = it.next()
+    assert(previous == (null, "happy new year!"))
+    previous = it.next()
+    assert(previous == ("1", "2014"))
+    while (it.hasNext) {
+      val kv = it.next()
+      assert(kv._1.toInt > previous._1.toInt)
+      previous = kv
+    }
+
+    // All subsequent calls to apply, update, changeValue and iterator should throw exception
+    intercept[AssertionError] { map.apply("1") }
+    intercept[AssertionError] { map.update("1", "2013") }
+    intercept[AssertionError] { map.changeValue("1", (hadValue, oldValue) => "2014") }
+    intercept[AssertionError] { map.iterator }
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
new file mode 100644
index 0000000000000..fce1184d46364
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
@@ -0,0 +1,319 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.collection
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.scalatest.FunSuite
+
+import org.apache.spark._
+import org.apache.spark.SparkContext._
+
+class ExternalAppendOnlyMapSuite extends FunSuite with LocalSparkContext {
+
+  private def createCombiner(i: Int) = ArrayBuffer[Int](i)
+  private def mergeValue(buffer: ArrayBuffer[Int], i: Int) = buffer += i
+  private def mergeCombiners(buf1: ArrayBuffer[Int], buf2: ArrayBuffer[Int]) = buf1 ++= buf2
+
+  test("simple insert") {
+    val conf = new SparkConf(false)
+    sc = new SparkContext("local", "test", conf)
+
+    val map = new ExternalAppendOnlyMap[Int, Int, ArrayBuffer[Int]](createCombiner,
+      mergeValue, mergeCombiners)
+
+    // Single insert
+    map.insert(1, 10)
+    var it = map.iterator
+    assert(it.hasNext)
+    val kv = it.next()
+    assert(kv._1 == 1 && kv._2 == ArrayBuffer[Int](10))
+    assert(!it.hasNext)
+
+    // Multiple insert
+    map.insert(2, 20)
+    map.insert(3, 30)
+    it = map.iterator
+    assert(it.hasNext)
+    assert(it.toSet == Set[(Int, ArrayBuffer[Int])](
+      (1, ArrayBuffer[Int](10)),
+      (2, ArrayBuffer[Int](20)),
+      (3, ArrayBuffer[Int](30))))
+  }
+
+  test("insert with collision") {
+    val conf = new SparkConf(false)
+    sc = new SparkContext("local", "test", conf)
+
+    val map = new ExternalAppendOnlyMap[Int, Int, ArrayBuffer[Int]](createCombiner,
+      mergeValue, mergeCombiners)
+
+    map.insert(1, 10)
+    map.insert(2, 20)
+    map.insert(3, 30)
+    map.insert(1, 100)
+    map.insert(2, 200)
+    map.insert(1, 1000)
+    val it = map.iterator
+    assert(it.hasNext)
+    val result = it.toSet[(Int, ArrayBuffer[Int])].map(kv => (kv._1, kv._2.toSet))
+    assert(result == Set[(Int, Set[Int])](
+      (1, Set[Int](10, 100, 1000)),
+      (2, Set[Int](20, 200)),
+      (3, Set[Int](30))))
+  }
+
+  test("ordering") {
+    val conf = new SparkConf(false)
+    sc = new SparkContext("local", "test", conf)
+
+    val map1 = new ExternalAppendOnlyMap[Int, Int, ArrayBuffer[Int]](createCombiner,
+      mergeValue, mergeCombiners)
+    map1.insert(1, 10)
+    map1.insert(2, 20)
+    map1.insert(3, 30)
+
+    val map2 = new ExternalAppendOnlyMap[Int, Int, ArrayBuffer[Int]](createCombiner,
+      mergeValue, mergeCombiners)
+    map2.insert(2, 20)
+    map2.insert(3, 30)
+    map2.insert(1, 10)
+
+    val map3 = new ExternalAppendOnlyMap[Int, Int, ArrayBuffer[Int]](createCombiner,
+      mergeValue, mergeCombiners)
+    map3.insert(3, 30)
+    map3.insert(1, 10)
+    map3.insert(2, 20)
+
+    val it1 = map1.iterator
+    val it2 = map2.iterator
+    val it3 = map3.iterator
+
+    var kv1 = it1.next()
+    var kv2 = it2.next()
+    var kv3 = it3.next()
+    assert(kv1._1 == kv2._1 && kv2._1 == kv3._1)
+    assert(kv1._2 == kv2._2 && kv2._2 == kv3._2)
+
+    kv1 = it1.next()
+    kv2 = it2.next()
+    kv3 = it3.next()
+    assert(kv1._1 == kv2._1 && kv2._1 == kv3._1)
+    assert(kv1._2 == kv2._2 && kv2._2 == kv3._2)
+
+    kv1 = it1.next()
+    kv2 = it2.next()
+    kv3 = it3.next()
+    assert(kv1._1 == kv2._1 && kv2._1 == kv3._1)
+    assert(kv1._2 == kv2._2 && kv2._2 == kv3._2)
+  }
+
+  test("null keys and values") {
+    val conf = new SparkConf(false)
+    sc = new SparkContext("local", "test", conf)
+
+    val map = new ExternalAppendOnlyMap[Int, Int, ArrayBuffer[Int]](createCombiner,
+      mergeValue, mergeCombiners)
+    map.insert(1, 5)
+    map.insert(2, 6)
+    map.insert(3, 7)
+    assert(map.size === 3)
+    assert(map.iterator.toSet == Set[(Int, Seq[Int])](
+      (1, Seq[Int](5)),
+      (2, Seq[Int](6)),
+      (3, Seq[Int](7))
+    ))
+
+    // Null keys
+    val nullInt = null.asInstanceOf[Int]
+    map.insert(nullInt, 8)
+    assert(map.size === 4)
+    assert(map.iterator.toSet == Set[(Int, Seq[Int])](
+      (1, Seq[Int](5)),
+      (2, Seq[Int](6)),
+      (3, Seq[Int](7)),
+      (nullInt, Seq[Int](8))
+    ))
+
+    // Null values
+    map.insert(4, nullInt)
+    map.insert(nullInt, nullInt)
+    assert(map.size === 5)
+    val result = map.iterator.toSet[(Int, ArrayBuffer[Int])].map(kv => (kv._1, kv._2.toSet))
+    assert(result == Set[(Int, Set[Int])](
+      (1, Set[Int](5)),
+      (2, Set[Int](6)),
+      (3, Set[Int](7)),
+      (4, Set[Int](nullInt)),
+      (nullInt, Set[Int](nullInt, 8))
+    ))
+  }
+
+  test("simple aggregator") {
+    val conf = new SparkConf(false)
+    sc = new SparkContext("local", "test", conf)
+
+    // reduceByKey
+    val rdd = sc.parallelize(1 to 10).map(i => (i%2, 1))
+    val result1 = rdd.reduceByKey(_+_).collect()
+    assert(result1.toSet == Set[(Int, Int)]((0, 5), (1, 5)))
+
+    // groupByKey
+    val result2 = rdd.groupByKey().collect()
+    assert(result2.toSet == Set[(Int, Seq[Int])]
+      ((0, ArrayBuffer[Int](1, 1, 1, 1, 1)), (1, ArrayBuffer[Int](1, 1, 1, 1, 1))))
+  }
+
+  test("simple cogroup") {
+    val conf = new SparkConf(false)
+    sc = new SparkContext("local", "test", conf)
+    val rdd1 = sc.parallelize(1 to 4).map(i => (i, i))
+    val rdd2 = sc.parallelize(1 to 4).map(i => (i%2, i))
+    val result = rdd1.cogroup(rdd2).collect()
+
+    result.foreach { case (i, (seq1, seq2)) =>
+      i match {
+        case 0 => assert(seq1.toSet == Set[Int]() && seq2.toSet == Set[Int](2, 4))
+        case 1 => assert(seq1.toSet == Set[Int](1) && seq2.toSet == Set[Int](1, 3))
+        case 2 => assert(seq1.toSet == Set[Int](2) && seq2.toSet == Set[Int]())
+        case 3 => assert(seq1.toSet == Set[Int](3) && seq2.toSet == Set[Int]())
+        case 4 => assert(seq1.toSet == Set[Int](4) && seq2.toSet == Set[Int]())
+      }
+    }
+  }
+
+  test("spilling") {
+    val conf = new SparkConf(true)  // Load defaults, otherwise SPARK_HOME is not found
+    conf.set("spark.shuffle.memoryFraction", "0.001")
+    sc = new SparkContext("local-cluster[1,1,512]", "test", conf)
+
+    // reduceByKey - should spill ~8 times
+    val rddA = sc.parallelize(0 until 100000).map(i => (i/2, i))
+    val resultA = rddA.reduceByKey(math.max).collect()
+    assert(resultA.length == 50000)
+    resultA.foreach { case(k, v) =>
+      k match {
+        case 0 => assert(v == 1)
+        case 25000 => assert(v == 50001)
+        case 49999 => assert(v == 99999)
+        case _ =>
+      }
+    }
+
+    // groupByKey - should spill ~17 times
+    val rddB = sc.parallelize(0 until 100000).map(i => (i/4, i))
+    val resultB = rddB.groupByKey().collect()
+    assert(resultB.length == 25000)
+    resultB.foreach { case(i, seq) =>
+      i match {
+        case 0 => assert(seq.toSet == Set[Int](0, 1, 2, 3))
+        case 12500 => assert(seq.toSet == Set[Int](50000, 50001, 50002, 50003))
+        case 24999 => assert(seq.toSet == Set[Int](99996, 99997, 99998, 99999))
+        case _ =>
+      }
+    }
+
+    // cogroup - should spill ~7 times
+    val rddC1 = sc.parallelize(0 until 10000).map(i => (i, i))
+    val rddC2 = sc.parallelize(0 until 10000).map(i => (i%1000, i))
+    val resultC = rddC1.cogroup(rddC2).collect()
+    assert(resultC.length == 10000)
+    resultC.foreach { case(i, (seq1, seq2)) =>
+      i match {
+        case 0 =>
+          assert(seq1.toSet == Set[Int](0))
+          assert(seq2.toSet == Set[Int](0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000))
+        case 5000 =>
+          assert(seq1.toSet == Set[Int](5000))
+          assert(seq2.toSet == Set[Int]())
+        case 9999 =>
+          assert(seq1.toSet == Set[Int](9999))
+          assert(seq2.toSet == Set[Int]())
+        case _ =>
+      }
+    }
+  }
+
+  test("spilling with hash collisions") {
+    val conf = new SparkConf(true)
+    conf.set("spark.shuffle.memoryFraction", "0.001")
+    sc = new SparkContext("local-cluster[1,1,512]", "test", conf)
+
+    def createCombiner(i: String) = ArrayBuffer[String](i)
+    def mergeValue(buffer: ArrayBuffer[String], i: String) = buffer += i
+    def mergeCombiners(buffer1: ArrayBuffer[String], buffer2: ArrayBuffer[String]) =
+      buffer1 ++= buffer2
+
+    val map = new ExternalAppendOnlyMap[String, String, ArrayBuffer[String]](
+      createCombiner, mergeValue, mergeCombiners)
+
+    val collisionPairs = Seq(
+      ("Aa", "BB"),                   // 2112
+      ("to", "v1"),                   // 3707
+      ("variants", "gelato"),         // -1249574770
+      ("Teheran", "Siblings"),        // 231609873
+      ("misused", "horsemints"),      // 1069518484
+      ("isohel", "epistolaries"),     // -1179291542
+      ("righto", "buzzards"),         // -931102253
+      ("hierarch", "crinolines"),     // -1732884796
+      ("inwork", "hypercatalexes"),   // -1183663690
+      ("wainages", "presentencing"),  // 240183619
+      ("trichothecenes", "locular"),  // 339006536
+      ("pomatoes", "eructation")      // 568647356
+    )
+
+    (1 to 100000).map(_.toString).foreach { i => map.insert(i, i) }
+    collisionPairs.foreach { case (w1, w2) =>
+      map.insert(w1, w2)
+      map.insert(w2, w1)
+    }
+
+    // A map of collision pairs in both directions
+    val collisionPairsMap = (collisionPairs ++ collisionPairs.map(_.swap)).toMap
+
+    // Avoid map.size or map.iterator.length because this destructively sorts the underlying map
+    var count = 0
+
+    val it = map.iterator
+    while (it.hasNext) {
+      val kv = it.next()
+      val expectedValue = ArrayBuffer[String](collisionPairsMap.getOrElse(kv._1, kv._1))
+      assert(kv._2.equals(expectedValue))
+      count += 1
+    }
+    assert(count == 100000 + collisionPairs.size * 2)
+  }
+
+  test("spilling with hash collisions using the Int.MaxValue key") {
+    val conf = new SparkConf(true)
+    conf.set("spark.shuffle.memoryFraction", "0.001")
+    sc = new SparkContext("local-cluster[1,1,512]", "test", conf)
+
+    val map = new ExternalAppendOnlyMap[Int, Int, ArrayBuffer[Int]](createCombiner,
+      mergeValue, mergeCombiners)
+
+    (1 to 100000).foreach { i => map.insert(i, i) }
+    map.insert(Int.MaxValue, Int.MaxValue)
+
+    val it = map.iterator
+    while (it.hasNext) {
+      // Should not throw NoSuchElementException
+      it.next()
+    }
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala
index 80ee59f26f49e..e9b62ea70db22 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala
@@ -19,8 +19,20 @@ package org.apache.spark.util.collection
 
 import scala.collection.mutable.HashSet
 import org.scalatest.FunSuite
-
-class OpenHashMapSuite extends FunSuite {
+import org.scalatest.matchers.ShouldMatchers
+import org.apache.spark.util.SizeEstimator
+
+class OpenHashMapSuite extends FunSuite with ShouldMatchers {
+
+  test("size for specialized, primitive value (int)") {
+    val capacity = 1024
+    val map = new OpenHashMap[String, Int](capacity)
+    val actualSize = SizeEstimator.estimate(map)
+    // 64 bit for pointers, 32 bit for ints, and 1 bit for the bitset.
+    val expectedSize = capacity * (64 + 32 + 1) / 8
+    // Make sure we are not allocating a significant amount of memory beyond our expected.
+    actualSize should be <= (expectedSize * 1.1).toLong
+  }
 
   test("initialization") {
     val goodMap1 = new OpenHashMap[String, Int](1)
diff --git a/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala
index 5ad7ea74f186f..1b24f8f287cfd 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala
@@ -18,9 +18,27 @@
 package org.apache.spark.util.collection
 
 import org.scalatest.FunSuite
+import org.scalatest.matchers.ShouldMatchers
 
+import org.apache.spark.util.SizeEstimator
 
-class OpenHashSetSuite extends FunSuite {
+
+class OpenHashSetSuite extends FunSuite with ShouldMatchers {
+
+  test("size for specialized, primitive int") {
+    val loadFactor = 0.7
+    val set = new OpenHashSet[Int](64, loadFactor)
+    for (i <- 0 until 1024) {
+      set.add(i)
+    }
+    assert(set.size === 1024)
+    assert(set.capacity > 1024)
+    val actualSize = SizeEstimator.estimate(set)
+    // 32 bits for the ints + 1 bit for the bitset
+    val expectedSize = set.capacity * (32 + 1) / 8
+    // Make sure we are not allocating a significant amount of memory beyond our expected.
+    actualSize should be <= (expectedSize * 1.1).toLong
+  }
 
   test("primitive int") {
     val set = new OpenHashSet[Int]
diff --git a/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashSetSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMapSuite.scala
similarity index 83%
rename from core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashSetSuite.scala
rename to core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMapSuite.scala
index 0342195161dc0..3b60decee9acb 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashSetSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMapSuite.scala
@@ -19,8 +19,20 @@ package org.apache.spark.util.collection
 
 import scala.collection.mutable.HashSet
 import org.scalatest.FunSuite
+import org.scalatest.matchers.ShouldMatchers
+import org.apache.spark.util.SizeEstimator
 
-class PrimitiveKeyOpenHashSetSuite extends FunSuite {
+class PrimitiveKeyOpenHashMapSuite extends FunSuite with ShouldMatchers {
+
+  test("size for specialized, primitive key, value (int, int)") {
+    val capacity = 1024
+    val map = new PrimitiveKeyOpenHashMap[Int, Int](capacity)
+    val actualSize = SizeEstimator.estimate(map)
+    // 32 bit for keys, 32 bit for values, and 1 bit for the bitset.
+    val expectedSize = capacity * (32 + 32 + 1) / 8
+    // Make sure we are not allocating a significant amount of memory beyond our expected.
+    actualSize should be <= (expectedSize * 1.1).toLong
+  }
 
   test("initialization") {
     val goodMap1 = new PrimitiveKeyOpenHashMap[Int, Int](1)
diff --git a/kmeans_data.txt b/data/kmeans_data.txt
similarity index 100%
rename from kmeans_data.txt
rename to data/kmeans_data.txt
diff --git a/lr_data.txt b/data/lr_data.txt
similarity index 100%
rename from lr_data.txt
rename to data/lr_data.txt
diff --git a/pagerank_data.txt b/data/pagerank_data.txt
similarity index 100%
rename from pagerank_data.txt
rename to data/pagerank_data.txt
diff --git a/dev/README.md b/dev/README.md
new file mode 100644
index 0000000000000..2b0f3d8ee8924
--- /dev/null
+++ b/dev/README.md
@@ -0,0 +1,5 @@
+# Spark Developer Scripts
+This directory contains scripts useful to developers when packaging,
+testing, or committing to Spark.
+
+Many of these scripts require Apache credentials to work correctly.
diff --git a/dev/run-tests b/dev/run-tests
new file mode 100644
index 0000000000000..a5dcacb4fd0c1
--- /dev/null
+++ b/dev/run-tests
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Go to the Spark project root directory
+FWDIR="$(cd `dirname $0`/..; pwd)"
+cd $FWDIR
+
+# Remove work directory
+rm -rf ./work
+
+# Fail fast
+set -e
+
+echo "========================================================================="
+echo "Running Spark unit tests"
+echo "========================================================================="
+sbt/sbt assembly test
+
+echo "========================================================================="
+echo "Running PySpark tests"
+echo "========================================================================="
+if [ -z "$PYSPARK_PYTHON" ]; then
+  export PYSPARK_PYTHON=/usr/local/bin/python2.7
+fi
+./python/run-tests
diff --git a/docker/spark-test/base/Dockerfile b/docker/spark-test/base/Dockerfile
index 60962776dda57..e543db6143e4d 100644
--- a/docker/spark-test/base/Dockerfile
+++ b/docker/spark-test/base/Dockerfile
@@ -25,8 +25,7 @@ RUN apt-get update
 # install a few other useful packages plus Open Jdk 7
 RUN apt-get install -y less openjdk-7-jre-headless net-tools vim-tiny sudo openssh-server
 
-ENV SCALA_VERSION 2.9.3
-ENV SPARK_VERSION 0.8.1
+ENV SCALA_VERSION 2.10.3
 ENV CDH_VERSION cdh4
 ENV SCALA_HOME /opt/scala-$SCALA_VERSION
 ENV SPARK_HOME /opt/spark
diff --git a/docs/_config.yml b/docs/_config.yml
index 558e719a13555..868fa2410fde2 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -3,8 +3,10 @@ markdown: kramdown
 
 # These allow the documentation to be updated with nerw releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 0.8.2-incubating-SNAPSHOT
-SPARK_VERSION_SHORT: 0.8.2-SNAPSHOT
-SCALA_VERSION: 2.9.3
+SPARK_VERSION: 0.9.0-incubating
+SPARK_VERSION_SHORT: 0.9.0
+SCALA_BINARY_VERSION: "2.10"
+SCALA_VERSION: "2.10.3"
 MESOS_VERSION: 0.13.0
 SPARK_ISSUE_TRACKER_URL: https://spark-project.atlassian.net
+SPARK_GITHUB_URL: https://github.com/apache/incubator-spark
diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html
index ad7969d012283..33525953ac4f6 100755
--- a/docs/_layouts/global.html
+++ b/docs/_layouts/global.html
@@ -21,7 +21,7 @@
         <link rel="stylesheet" href="css/main.css">
 
         <script src="js/vendor/modernizr-2.6.1-respond-1.1.0.min.js"></script>
-        
+
         <link rel="stylesheet" href="css/pygments-default.css">
 
         <!-- Google analytics script -->
@@ -68,9 +68,10 @@
                                 <li><a href="streaming-programming-guide.html">Spark Streaming</a></li>
                                 <li><a href="mllib-guide.html">MLlib (Machine Learning)</a></li>
                                 <li><a href="bagel-programming-guide.html">Bagel (Pregel on Spark)</a></li>
+                                <li><a href="graphx-programming-guide.html">GraphX (Graph Processing)</a></li>
                             </ul>
                         </li>
-                        
+
                         <li class="dropdown">
                             <a href="#" class="dropdown-toggle" data-toggle="dropdown">API Docs<b class="caret"></b></a>
                             <ul class="dropdown-menu">
@@ -80,6 +81,18 @@
                                 <li><a href="api/streaming/index.html#org.apache.spark.streaming.package">Spark Streaming</a></li>
                                 <li><a href="api/mllib/index.html#org.apache.spark.mllib.package">MLlib (Machine Learning)</a></li>
                                 <li><a href="api/bagel/index.html#org.apache.spark.bagel.package">Bagel (Pregel on Spark)</a></li>
+                                <li><a href="api/graphx/index.html#org.apache.spark.graphx.package">GraphX (Graph Processing)</a></li>
+                                <li class="divider"></li>
+                                <li class="dropdown-submenu">
+                                    <a tabindex="-1" href="#">External Data Sources</a>
+                                    <ul class="dropdown-menu">
+                                        <li><a href="api/external/kafka/index.html#org.apache.spark.streaming.kafka.KafkaUtils$">Kafka</a></li>
+                                        <li><a href="api/external/flume/index.html#org.apache.spark.streaming.flume.FlumeUtils$">Flume</a></li>
+                                        <li><a href="api/external/twitter/index.html#org.apache.spark.streaming.twitter.TwitterUtils$">Twitter</a></li>
+                                        <li><a href="api/external/zeromq/index.html#org.apache.spark.streaming.zeromq.ZeroMQUtils$">ZeroMQ</a></li>
+                                        <li><a href="api/external/mqtt/index.html#org.apache.spark.streaming.mqtt.MQTTUtils$">MQTT</a></li>
+                                    </ul>
+                                </li>
                             </ul>
                         </li>
 
@@ -161,7 +174,7 @@ <h2>Heading</h2>
         <script src="js/vendor/jquery-1.8.0.min.js"></script>
         <script src="js/vendor/bootstrap.min.js"></script>
         <script src="js/main.js"></script>
-        
+
         <!-- A script to fix internal hash links because we have an overlapping top bar.
              Based on https://github.com/twitter/bootstrap/issues/193#issuecomment-2281510 -->
         <script>
diff --git a/docs/_plugins/copy_api_dirs.rb b/docs/_plugins/copy_api_dirs.rb
index c574ea7f5cd22..44d64057f4fb3 100644
--- a/docs/_plugins/copy_api_dirs.rb
+++ b/docs/_plugins/copy_api_dirs.rb
@@ -20,7 +20,10 @@
 
 if not (ENV['SKIP_API'] == '1' or ENV['SKIP_SCALADOC'] == '1')
   # Build Scaladoc for Java/Scala
-  projects = ["core", "examples", "repl", "bagel", "streaming", "mllib"]
+  core_projects = ["core", "examples", "repl", "bagel", "graphx", "streaming", "mllib"]
+  external_projects = ["flume", "kafka", "mqtt", "twitter", "zeromq"]
+
+  projects = core_projects + external_projects.map { |project_name| "external/" + project_name }
 
   puts "Moving to project root and building scaladoc."
   curr_dir = pwd
@@ -35,7 +38,7 @@
   # Copy over the scaladoc from each project into the docs directory.
   # This directory will be copied over to _site when `jekyll` command is run.
   projects.each do |project_name|
-    source = "../" + project_name + "/target/scala-2.9.3/api"
+    source = "../" + project_name + "/target/scala-2.10/api"
     dest = "api/" + project_name
 
     puts "echo making directory " + dest
diff --git a/docs/api.md b/docs/api.md
index e86d07770a80b..91c8e51d26c15 100644
--- a/docs/api.md
+++ b/docs/api.md
@@ -9,4 +9,5 @@ Here you can find links to the Scaladoc generated for the Spark sbt subprojects.
 - [Spark Examples](api/examples/index.html)
 - [Spark Streaming](api/streaming/index.html)
 - [Bagel](api/bagel/index.html)
+- [GraphX](api/graphx/index.html)
 - [PySpark](api/pyspark/index.html)
diff --git a/docs/bagel-programming-guide.md b/docs/bagel-programming-guide.md
index de001e6c52f85..b070d8e73a38b 100644
--- a/docs/bagel-programming-guide.md
+++ b/docs/bagel-programming-guide.md
@@ -3,6 +3,8 @@ layout: global
 title: Bagel Programming Guide
 ---
 
+**Bagel will soon be superseded by [GraphX](graphx-programming-guide.html); we recommend that new users try GraphX instead.**
+
 Bagel is a Spark implementation of Google's [Pregel](http://portal.acm.org/citation.cfm?id=1807184) graph processing framework. Bagel currently supports basic graph computation, combiners, and aggregators.
 
 In the Pregel programming model, jobs run as a sequence of iterations called _supersteps_. In each superstep, each vertex in the graph runs a user-specified function that can update state associated with the vertex and send messages to other vertices for use in the *next* iteration.
@@ -14,14 +16,14 @@ This guide shows the programming model and features of Bagel by walking through
 To use Bagel in your program, add the following SBT or Maven dependency:
 
     groupId = org.apache.spark
-    artifactId = spark-bagel_{{site.SCALA_VERSION}}
+    artifactId = spark-bagel_{{site.SCALA_BINARY_VERSION}}
     version = {{site.SPARK_VERSION}}
 
 # Programming Model
 
 Bagel operates on a graph represented as a [distributed dataset](scala-programming-guide.html) of (K, V) pairs, where keys are vertex IDs and values are vertices plus their associated state. In each superstep, Bagel runs a user-specified compute function on each vertex that takes as input the current vertex state and a list of messages sent to that vertex during the previous superstep, and returns the new vertex state and a list of outgoing messages.
 
-For example, we can use Bagel to implement PageRank. Here, vertices represent pages, edges represent links between pages, and messages represent shares of PageRank sent to the pages that a particular page links to. 
+For example, we can use Bagel to implement PageRank. Here, vertices represent pages, edges represent links between pages, and messages represent shares of PageRank sent to the pages that a particular page links to.
 
 We first extend the default `Vertex` class to store a `Double`
 representing the current PageRank of the vertex, and similarly extend
@@ -38,7 +40,7 @@ import org.apache.spark.bagel.Bagel._
   val active: Boolean) extends Vertex
 
 @serializable class PRMessage(
-  val targetId: String, val rankShare: Double) extends Message             
+  val targetId: String, val rankShare: Double) extends Message
 {% endhighlight %}
 
 Next, we load a sample graph from a text file as a distributed dataset and package it into `PRVertex` objects. We also cache the distributed dataset because Bagel will use it multiple times and we'd like to avoid recomputing it.
@@ -114,7 +116,7 @@ Here are the actions and types in the Bagel API. See [Bagel.scala](https://githu
 /*** Full form ***/
 
 Bagel.run(sc, vertices, messages, combiner, aggregator, partitioner, numSplits)(compute)
-// where compute takes (vertex: V, combinedMessages: Option[C], aggregated: Option[A], superstep: Int) 
+// where compute takes (vertex: V, combinedMessages: Option[C], aggregated: Option[A], superstep: Int)
 // and returns (newVertex: V, outMessages: Array[M])
 
 /*** Abbreviated forms ***/
@@ -124,7 +126,7 @@ Bagel.run(sc, vertices, messages, combiner, partitioner, numSplits)(compute)
 // and returns (newVertex: V, outMessages: Array[M])
 
 Bagel.run(sc, vertices, messages, combiner, numSplits)(compute)
-// where compute takes (vertex: V, combinedMessages: Option[C], superstep: Int) 
+// where compute takes (vertex: V, combinedMessages: Option[C], superstep: Int)
 // and returns (newVertex: V, outMessages: Array[M])
 
 Bagel.run(sc, vertices, messages, numSplits)(compute)
@@ -157,8 +159,8 @@ trait Message[K] {
 
 # Where to Go from Here
 
-Two example jobs, PageRank and shortest path, are included in `examples/src/main/scala/org/apache/spark/examples/bagel`. You can run them by passing the class name to the `run-example` script included in Spark; e.g.:
+Two example jobs, PageRank and shortest path, are included in `examples/src/main/scala/org/apache/spark/examples/bagel`. You can run them by passing the class name to the `bin/run-example` script included in Spark; e.g.:
 
-    ./run-example org.apache.spark.examples.bagel.WikipediaPageRank
+    ./bin/run-example org.apache.spark.examples.bagel.WikipediaPageRank
 
 Each example program prints usage help when run without any arguments.
diff --git a/docs/building-with-maven.md b/docs/building-with-maven.md
index b2744b002cca3..ded12926885b9 100644
--- a/docs/building-with-maven.md
+++ b/docs/building-with-maven.md
@@ -17,10 +17,10 @@ You'll need to configure Maven to use more memory than usual by setting `MAVEN_O
 
 If you don't run this, you may see errors like the following:
 
-    [INFO] Compiling 203 Scala sources and 9 Java sources to /Users/me/Development/spark/core/target/scala-{{site.SCALA_VERSION}}/classes...
+    [INFO] Compiling 203 Scala sources and 9 Java sources to /Users/me/Development/spark/core/target/scala-{{site.SCALA_BINARY_VERSION}}/classes...
     [ERROR] PermGen space -> [Help 1]
 
-    [INFO] Compiling 203 Scala sources and 9 Java sources to /Users/me/Development/spark/core/target/scala-{{site.SCALA_VERSION}}/classes...
+    [INFO] Compiling 203 Scala sources and 9 Java sources to /Users/me/Development/spark/core/target/scala-{{site.SCALA_BINARY_VERSION}}/classes...
     [ERROR] Java heap space -> [Help 1]
 
 You can fix this by setting the `MAVEN_OPTS` variable as discussed before.
@@ -37,20 +37,16 @@ For Apache Hadoop versions 1.x, Cloudera CDH MRv1, and other Hadoop versions wit
     # Cloudera CDH 4.2.0 with MapReduce v1
     $ mvn -Dhadoop.version=2.0.0-mr1-cdh4.2.0 -DskipTests clean package
 
-For Apache Hadoop 2.x, 0.23.x, Cloudera CDH MRv2, and other Hadoop versions with YARN, you should enable the "hadoop2-yarn" profile and set the "yarn.version" property:
+For Apache Hadoop 2.x, 0.23.x, Cloudera CDH MRv2, and other Hadoop versions with YARN, you should enable the "yarn-alpha" or "yarn" profile and set the "hadoop.version", "yarn.version" property:
 
     # Apache Hadoop 2.0.5-alpha
-    $ mvn -Phadoop2-yarn -Dhadoop.version=2.0.5-alpha -Dyarn.version=2.0.5-alpha -DskipTests clean package
+    $ mvn -Pyarn-alpha -Dhadoop.version=2.0.5-alpha -Dyarn.version=2.0.5-alpha -DskipTests clean package
 
     # Cloudera CDH 4.2.0 with MapReduce v2
-    $ mvn -Phadoop2-yarn -Dhadoop.version=2.0.0-cdh4.2.0 -Dyarn.version=2.0.0-chd4.2.0 -DskipTests clean package
+    $ mvn -Pyarn-alpha -Dhadoop.version=2.0.0-cdh4.2.0 -Dyarn.version=2.0.0-chd4.2.0 -DskipTests clean package
 
-Hadoop versions 2.2.x and newer can be built by setting the ```new-yarn``` and the ```yarn.version``` as follows:
- 
-    # Apache Hadoop 2.2.X and newer
-    $ mvn -Pnew-yarn -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 -DskipTests clean package
-
-The build process handles Hadoop 2.2.x as a special case that uses the directory ```new-yarn```, which supports the new YARN API. Furthermore, for this version, the build depends on artifacts published by the spark-project to enable Akka 2.0.5 to work with protobuf 2.5. 
+    # Apache Hadoop 2.2.X ( e.g. 2.2.0 as below ) and newer
+    $ mvn -Pyarn -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 -DskipTests clean package
 
 ## Spark Tests in Maven ##
 
@@ -75,8 +71,8 @@ This setup works fine in IntelliJ IDEA 11.1.4. After opening the project via the
 
 ## Building Spark Debian Packages ##
 
-It includes support for building a Debian package containing a 'fat-jar' which includes the repl, the examples and bagel. This can be created by specifying the following profiles:
+The maven build includes support for building a Debian package containing the assembly 'fat-jar', PySpark, and the necessary scripts and configuration files. This can be created by specifying the following:
 
-    $ mvn -Prepl-bin -Pdeb clean package
+    $ mvn -Pdeb -DskipTests clean package
 
-The debian package can then be found under repl/target. We added the short commit hash to the file name so that we can distinguish individual packages build for SNAPSHOT versions.
+The debian package can then be found under assembly/target. We added the short commit hash to the file name so that we can distinguish individual packages built for SNAPSHOT versions.
diff --git a/docs/configuration.md b/docs/configuration.md
index 2accfe84c606e..3f03d97e8054c 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -3,26 +3,37 @@ layout: global
 title: Spark Configuration
 ---
 
-Spark provides three main locations to configure the system:
+Spark provides three locations to configure the system:
 
-* [Java system properties](#system-properties), which control internal configuration parameters and can be set
-  either programmatically (by calling `System.setProperty` *before* creating a `SparkContext`) or through
-  JVM arguments.
-* [Environment variables](#environment-variables) for configuring per-machine settings such as the IP address,
-  which can be set in the `conf/spark-env.sh` script.
-* [Logging configuration](#configuring-logging), which is done through `log4j.properties`.
+* [Spark properties](#spark-properties) control most application parameters and can be set by passing
+  a [SparkConf](api/core/index.html#org.apache.spark.SparkConf) object to SparkContext, or through Java
+  system properties.
+* [Environment variables](#environment-variables) can be used to set per-machine settings, such as
+  the IP address, through the `conf/spark-env.sh` script on each node.
+* [Logging](#configuring-logging) can be configured through `log4j.properties`.
 
 
-# System Properties
+# Spark Properties
 
-To set a system property for configuring Spark, you need to either pass it with a -D flag to the JVM (for example `java -Dspark.cores.max=5 MyProgram`) or call `System.setProperty` in your code *before* creating your Spark context, as follows:
+Spark properties control most application settings and are configured separately for each application.
+The preferred way to set them is by passing a [SparkConf](api/core/index.html#org.apache.spark.SparkConf)
+class to your SparkContext constructor.
+Alternatively, Spark will also load them from Java system properties, for compatibility with old versions
+of Spark.
+
+SparkConf lets you configure most of the common properties to initialize a cluster (e.g., master URL and
+application name), as well as arbitrary key-value pairs through the `set()` method. For example, we could
+initialize an application as follows:
 
 {% highlight scala %}
-System.setProperty("spark.cores.max", "5")
-val sc = new SparkContext(...)
+val conf = new SparkConf()
+             .setMaster("local")
+             .setAppName("My application")
+             .set("spark.executor.memory", "1g")
+val sc = new SparkContext(conf)
 {% endhighlight %}
 
-Most of the configurable system properties control internal settings that have reasonable default values. However,
+Most of the properties control internal settings that have reasonable default values. However,
 there are at least five properties that you will commonly want to control:
 
 <table class="table">
@@ -66,12 +77,14 @@ there are at least five properties that you will commonly want to control:
 </tr>
 <tr>
   <td>spark.cores.max</td>
-  <td>(infinite)</td>
+  <td>(not set)</td>
   <td>
     When running on a <a href="spark-standalone.html">standalone deploy cluster</a> or a
     <a href="running-on-mesos.html#mesos-run-modes">Mesos cluster in "coarse-grained"
-    sharing mode</a>, how many CPU cores to request at most. The default will use all available cores
-    offered by the cluster manager.
+    sharing mode</a>, the maximum amount of CPU cores to request for the application from
+    across the cluster (not from each machine). If not set, the default will be
+    <code>spark.deploy.defaultCores</code> on Spark's standalone cluster manager, or
+    infinite (all available cores) on Mesos.
   </td>
 </tr>
 </table>
@@ -85,19 +98,30 @@ Apart from these, the following properties are also available, and may be useful
   <td>spark.default.parallelism</td>
   <td>8</td>
   <td>
-    Default number of tasks to use for distributed shuffle operations (<code>groupByKey</code>,
+    Default number of tasks to use across the cluster for distributed shuffle operations (<code>groupByKey</code>,
     <code>reduceByKey</code>, etc) when not set by user.
   </td>
 </tr>
 <tr>
   <td>spark.storage.memoryFraction</td>
-  <td>0.66</td>
+  <td>0.6</td>
   <td>
     Fraction of Java heap to use for Spark's memory cache. This should not be larger than the "old"
-    generation of objects in the JVM, which by default is given 2/3 of the heap, but you can increase
+    generation of objects in the JVM, which by default is given 0.6 of the heap, but you can increase
     it if you configure your own old generation size.
   </td>
 </tr>
+<tr>
+  <td>spark.shuffle.memoryFraction</td>
+  <td>0.3</td>
+  <td>
+    Fraction of Java heap to use for aggregation and cogroups during shuffles, if
+    <code>spark.shuffle.spill</code> is true. At any given time, the collective size of
+    all in-memory maps used for shuffles is bounded by this limit, beyond which the contents will
+    begin to spill to disk. If spills are often, consider increasing this value at the expense of
+    <code>spark.storage.memoryFraction</code>.
+  </td>
+</tr>
 <tr>
   <td>spark.mesos.coarse</td>
   <td>false</td>
@@ -117,7 +141,7 @@ Apart from these, the following properties are also available, and may be useful
   </td>
 </tr>
 <tr>
-  <td>spark.ui.retained_stages</td>
+  <td>spark.ui.retainedStages</td>
   <td>1000</td>
   <td>
     How many stages the Spark UI remembers before garbage collecting.
@@ -130,6 +154,13 @@ Apart from these, the following properties are also available, and may be useful
     Whether to compress map output files. Generally a good idea.
   </td>
 </tr>
+<tr>
+  <td>spark.shuffle.spill.compress</td>
+  <td>true</td>
+  <td>
+    Whether to compress data spilled during shuffles.
+  </td>
+</tr>
 <tr>
   <td>spark.broadcast.compress</td>
   <td>true</td>
@@ -275,11 +306,32 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 <tr>
   <td>spark.akka.timeout</td>
-  <td>20</td>
+  <td>100</td>
   <td>
     Communication timeout between Spark nodes, in seconds.
   </td>
 </tr>
+<tr>
+  <td>spark.akka.heartbeat.pauses</td>
+  <td>600</td>
+  <td>
+     This is set to a larger value to disable failure detector that comes inbuilt akka. It can be enabled again, if you plan to use this feature (Not recommended). Acceptable heart beat pause in seconds for akka. This can be used to control sensitivity to gc pauses. Tune this in combination of `spark.akka.heartbeat.interval` and `spark.akka.failure-detector.threshold` if you need to.
+  </td>
+</tr>
+<tr>
+  <td>spark.akka.failure-detector.threshold</td>
+  <td>300.0</td>
+  <td>
+     This is set to a larger value to disable failure detector that comes inbuilt akka. It can be enabled again, if you plan to use this feature (Not recommended). This maps to akka's `akka.remote.transport-failure-detector.threshold`. Tune this in combination of `spark.akka.heartbeat.pauses` and `spark.akka.heartbeat.interval` if you need to.
+  </td>
+</tr>
+<tr>
+  <td>spark.akka.heartbeat.interval</td>
+  <td>1000</td>
+  <td>
+    This is set to a larger value to disable failure detector that comes inbuilt akka. It can be enabled again, if you plan to use this feature (Not recommended). A larger interval value in seconds reduces network overhead and a smaller value ( ~ 1 s) might be more informative for akka's failure detector. Tune this in combination of `spark.akka.heartbeat.pauses` and `spark.akka.failure-detector.threshold` if you need to. Only positive use case for using failure detector can be, a sensistive failure detector can help evict rogue executors really quick. However this is usually not the case as gc pauses and network lags are expected in a real spark cluster. Apart from that enabling this leads to a lot of exchanges of heart beats between nodes leading to flooding the network with those. 
+  </td>
+</tr>
 <tr>
   <td>spark.driver.host</td>
   <td>(local hostname)</td>
@@ -308,7 +360,16 @@ Apart from these, the following properties are also available, and may be useful
   <td>spark.streaming.blockInterval</td>
   <td>200</td>
   <td>
-    Duration (milliseconds) of how long to batch new objects coming from network receivers.
+    Duration (milliseconds) of how long to batch new objects coming from network receivers used
+    in Spark Streaming.
+  </td>
+</tr>
+<tr>
+  <td>spark.streaming.unpersist</td>
+  <td>false</td>
+  <td>
+    Force RDDs generated and persisted by Spark Streaming to be automatically unpersisted from
+    Spark's memory. Setting this to true is likely to reduce Spark's RDD memory usage.
   </td>
 </tr>
 <tr>
@@ -327,11 +388,28 @@ Apart from these, the following properties are also available, and may be useful
     Too large a value decreases parallelism during broadcast (makes it slower); however, if it is too small, <code>BlockManager</code> might take a performance hit.
   </td>
 </tr>
+
 <tr>
   <td>spark.shuffle.consolidateFiles</td>
   <td>false</td>
   <td>
-    If set to "true", consolidates intermediate files created during a shuffle. Creating fewer files can improve filesystem performance for shuffles with large numbers of reduce tasks. It is recomended to set this to "true" when using ext4 or xfs filesystems. On ext3, this option might degrade performance on machines with many (>8) cores due to filesystem limitations.
+    If set to "true", consolidates intermediate files created during a shuffle. Creating fewer files can improve filesystem performance for shuffles with large numbers of reduce tasks. It is recommended to set this to "true" when using ext4 or xfs filesystems. On ext3, this option might degrade performance on machines with many (>8) cores due to filesystem limitations.
+  </td>
+</tr>
+<tr>
+  <td>spark.shuffle.file.buffer.kb</td>
+  <td>100</td>
+  <td>
+    Size of the in-memory buffer for each shuffle file output stream, in kilobytes. These buffers
+    reduce the number of disk seeks and system calls made in creating intermediate shuffle files.
+  </td>
+</tr>
+<tr>
+  <td>spark.shuffle.spill</td>
+  <td>true</td>
+  <td>
+    If set to "true", limits the amount of memory used during reduces by spilling data out to disk. This spilling
+    threshold is specified by <code>spark.shuffle.memoryFraction</code>.
   </td>
 </tr>
 <tr>
@@ -362,13 +440,49 @@ Apart from these, the following properties are also available, and may be useful
     How many times slower a task is than the median to be considered for speculation.
   </td>
 </tr>
+<tr>
+  <td>spark.logConf</td>
+  <td>false</td>
+  <td>
+    Log the supplied SparkConf as INFO at start of spark context.
+  </td>
+</tr>
+<tr>
+  <td>spark.deploy.spreadOut</td>
+  <td>true</td>
+  <td>
+    Whether the standalone cluster manager should spread applications out across nodes or try
+    to consolidate them onto as few nodes as possible. Spreading out is usually better for
+    data locality in HDFS, but consolidating is more efficient for compute-intensive workloads. <br/>
+    <b>Note:</b> this setting needs to be configured in the standalone cluster master, not in individual
+    applications; you can set it through <code>SPARK_JAVA_OPTS</code> in <code>spark-env.sh</code>.
+  </td>
+</tr>
+<tr>
+  <td>spark.deploy.defaultCores</td>
+  <td>(infinite)</td>
+  <td>
+    Default number of cores to give to applications in Spark's standalone mode if they don't
+    set <code>spark.cores.max</code>. If not set, applications always get all available
+    cores unless they configure <code>spark.cores.max</code> themselves.
+    Set this lower on a shared cluster to prevent users from grabbing
+    the whole cluster by default. <br/>
+    <b>Note:</b> this setting needs to be configured in the standalone cluster master, not in individual
+    applications; you can set it through <code>SPARK_JAVA_OPTS</code> in <code>spark-env.sh</code>.
+  </td>
+</tr>
 </table>
 
+## Viewing Spark Properties
+
+The application web UI at `http://<driver>:4040` lists Spark properties in the "Environment" tab.
+This is a useful place to check to make sure that your properties have been set correctly.
+
 # Environment Variables
 
-Certain Spark settings can also be configured through environment variables, which are read from the `conf/spark-env.sh`
+Certain Spark settings can be configured through environment variables, which are read from the `conf/spark-env.sh`
 script in the directory where Spark is installed (or `conf/spark-env.cmd` on Windows). These variables are meant to be for machine-specific settings, such
-as library search paths. While Java system properties can also be set here, for application settings, we recommend setting
+as library search paths. While Spark properties can also be set there through `SPARK_JAVA_OPTS`, for per-application settings, we recommend setting
 these properties within the application instead of in `spark-env.sh` so that different applications can use different
 settings.
 
@@ -385,7 +499,8 @@ The following variables can be set in `spark-env.sh`:
    Note that applications can also add dependencies for themselves through `SparkContext.addJar` -- we recommend
    doing that when possible.
 * `SPARK_JAVA_OPTS`, to add JVM options. This includes Java options like garbage collector settings and any system
-   properties that you'd like to pass with `-D` (e.g., `-Dspark.local.dir=/disk1,/disk2`). 
+   properties that you'd like to pass with `-D`. One use case is to set some Spark properties differently on this
+   machine, e.g., `-Dspark.local.dir=/disk1,/disk2`.
 * Options for the Spark [standalone cluster scripts](spark-standalone.html#cluster-launch-scripts), such as number of cores
   to use on each machine and maximum memory.
 
diff --git a/docs/css/bootstrap.min.css b/docs/css/bootstrap.min.css
index ccb529eababd8..30ea440c008e3 100644
--- a/docs/css/bootstrap.min.css
+++ b/docs/css/bootstrap.min.css
@@ -6,4 +6,4 @@
  * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Designed and built with all the love in the world @twitter by @mdo and @fat.
- */article,aside,details,figcaption,figure,footer,header,hgroup,nav,section{display:block}audio,canvas,video{display:inline-block;*display:inline;*zoom:1}audio:not([controls]){display:none}html{font-size:100%;-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%}a:focus{outline:thin dotted #333;outline:5px auto -webkit-focus-ring-color;outline-offset:-2px}a:hover,a:active{outline:0}sub,sup{position:relative;font-size:75%;line-height:0;vertical-align:baseline}sup{top:-0.5em}sub{bottom:-0.25em}img{height:auto;max-width:100%;vertical-align:middle;border:0;-ms-interpolation-mode:bicubic}#map_canvas img{max-width:none}button,input,select,textarea{margin:0;font-size:100%;vertical-align:middle}button,input{*overflow:visible;line-height:normal}button::-moz-focus-inner,input::-moz-focus-inner{padding:0;border:0}button,input[type="button"],input[type="reset"],input[type="submit"]{cursor:pointer;-webkit-appearance:button}input[type="search"]{-webkit-box-sizing:content-box;-moz-box-sizing:content-box;box-sizing:content-box;-webkit-appearance:textfield}input[type="search"]::-webkit-search-decoration,input[type="search"]::-webkit-search-cancel-button{-webkit-appearance:none}textarea{overflow:auto;vertical-align:top}.clearfix{*zoom:1}.clearfix:before,.clearfix:after{display:table;line-height:0;content:""}.clearfix:after{clear:both}.hide-text{font:0/0 a;color:transparent;text-shadow:none;background-color:transparent;border:0}.input-block-level{display:block;width:100%;min-height:30px;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}body{margin:0;font-family:"Helvetica Neue",Helvetica,Arial,sans-serif;font-size:14px;line-height:20px;color:#333;background-color:#fff}a{color:#08c;text-decoration:none}a:hover{color:#005580;text-decoration:underline}.img-rounded{-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px}.img-polaroid{padding:4px;background-color:#fff;border:1px solid #ccc;border:1px solid rgba(0,0,0,0.2);-webkit-box-shadow:0 1px 3px rgba(0,0,0,0.1);-moz-box-shadow:0 1px 3px rgba(0,0,0,0.1);box-shadow:0 1px 3px rgba(0,0,0,0.1)}.img-circle{-webkit-border-radius:500px;-moz-border-radius:500px;border-radius:500px}.row{margin-left:-20px;*zoom:1}.row:before,.row:after{display:table;line-height:0;content:""}.row:after{clear:both}[class*="span"]{float:left;margin-left:20px}.container,.navbar-static-top .container,.navbar-fixed-top .container,.navbar-fixed-bottom .container{width:940px}.span12{width:940px}.span11{width:860px}.span10{width:780px}.span9{width:700px}.span8{width:620px}.span7{width:540px}.span6{width:460px}.span5{width:380px}.span4{width:300px}.span3{width:220px}.span2{width:140px}.span1{width:60px}.offset12{margin-left:980px}.offset11{margin-left:900px}.offset10{margin-left:820px}.offset9{margin-left:740px}.offset8{margin-left:660px}.offset7{margin-left:580px}.offset6{margin-left:500px}.offset5{margin-left:420px}.offset4{margin-left:340px}.offset3{margin-left:260px}.offset2{margin-left:180px}.offset1{margin-left:100px}.row-fluid{width:100%;*zoom:1}.row-fluid:before,.row-fluid:after{display:table;line-height:0;content:""}.row-fluid:after{clear:both}.row-fluid [class*="span"]{display:block;float:left;width:100%;min-height:30px;margin-left:2.127659574468085%;*margin-left:2.074468085106383%;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}.row-fluid [class*="span"]:first-child{margin-left:0}.row-fluid .span12{width:100%;*width:99.94680851063829%}.row-fluid .span11{width:91.48936170212765%;*width:91.43617021276594%}.row-fluid .span10{width:82.97872340425532%;*width:82.92553191489361%}.row-fluid .span9{width:74.46808510638297%;*width:74.41489361702126%}.row-fluid .span8{width:65.95744680851064%;*width:65.90425531914893%}.row-fluid .span7{width:57.44680851063829%;*width:57.39361702127659%}.row-fluid .span6{width:48.93617021276595%;*width:48.88297872340425%}.row-fluid .span5{width:40.42553191489362%;*width:40.37234042553192%}.row-fluid .span4{width:31.914893617021278%;*width:31.861702127659576%}.row-fluid .span3{width:23.404255319148934%;*width:23.351063829787233%}.row-fluid .span2{width:14.893617021276595%;*width:14.840425531914894%}.row-fluid .span1{width:6.382978723404255%;*width:6.329787234042553%}.row-fluid .offset12{margin-left:104.25531914893617%;*margin-left:104.14893617021275%}.row-fluid .offset12:first-child{margin-left:102.12765957446808%;*margin-left:102.02127659574467%}.row-fluid .offset11{margin-left:95.74468085106382%;*margin-left:95.6382978723404%}.row-fluid .offset11:first-child{margin-left:93.61702127659574%;*margin-left:93.51063829787232%}.row-fluid .offset10{margin-left:87.23404255319149%;*margin-left:87.12765957446807%}.row-fluid .offset10:first-child{margin-left:85.1063829787234%;*margin-left:84.99999999999999%}.row-fluid .offset9{margin-left:78.72340425531914%;*margin-left:78.61702127659572%}.row-fluid .offset9:first-child{margin-left:76.59574468085106%;*margin-left:76.48936170212764%}.row-fluid .offset8{margin-left:70.2127659574468%;*margin-left:70.10638297872339%}.row-fluid .offset8:first-child{margin-left:68.08510638297872%;*margin-left:67.9787234042553%}.row-fluid .offset7{margin-left:61.70212765957446%;*margin-left:61.59574468085106%}.row-fluid .offset7:first-child{margin-left:59.574468085106375%;*margin-left:59.46808510638297%}.row-fluid .offset6{margin-left:53.191489361702125%;*margin-left:53.085106382978715%}.row-fluid .offset6:first-child{margin-left:51.063829787234035%;*margin-left:50.95744680851063%}.row-fluid .offset5{margin-left:44.68085106382979%;*margin-left:44.57446808510638%}.row-fluid .offset5:first-child{margin-left:42.5531914893617%;*margin-left:42.4468085106383%}.row-fluid .offset4{margin-left:36.170212765957444%;*margin-left:36.06382978723405%}.row-fluid .offset4:first-child{margin-left:34.04255319148936%;*margin-left:33.93617021276596%}.row-fluid .offset3{margin-left:27.659574468085104%;*margin-left:27.5531914893617%}.row-fluid .offset3:first-child{margin-left:25.53191489361702%;*margin-left:25.425531914893618%}.row-fluid .offset2{margin-left:19.148936170212764%;*margin-left:19.04255319148936%}.row-fluid .offset2:first-child{margin-left:17.02127659574468%;*margin-left:16.914893617021278%}.row-fluid .offset1{margin-left:10.638297872340425%;*margin-left:10.53191489361702%}.row-fluid .offset1:first-child{margin-left:8.51063829787234%;*margin-left:8.404255319148938%}[class*="span"].hide,.row-fluid [class*="span"].hide{display:none}[class*="span"].pull-right,.row-fluid [class*="span"].pull-right{float:right}.container{margin-right:auto;margin-left:auto;*zoom:1}.container:before,.container:after{display:table;line-height:0;content:""}.container:after{clear:both}.container-fluid{padding-right:20px;padding-left:20px;*zoom:1}.container-fluid:before,.container-fluid:after{display:table;line-height:0;content:""}.container-fluid:after{clear:both}p{margin:0 0 10px}.lead{margin-bottom:20px;font-size:20px;font-weight:200;line-height:30px}small{font-size:85%}strong{font-weight:bold}em{font-style:italic}cite{font-style:normal}.muted{color:#999}h1,h2,h3,h4,h5,h6{margin:10px 0;font-family:inherit;font-weight:bold;line-height:1;color:inherit;text-rendering:optimizelegibility}h1 small,h2 small,h3 small,h4 small,h5 small,h6 small{font-weight:normal;line-height:1;color:#999}h1{font-size:36px;line-height:40px}h2{font-size:30px;line-height:40px}h3{font-size:24px;line-height:40px}h4{font-size:18px;line-height:20px}h5{font-size:14px;line-height:20px}h6{font-size:12px;line-height:20px}h1 small{font-size:24px}h2 small{font-size:18px}h3 small{font-size:14px}h4 small{font-size:14px}.page-header{padding-bottom:9px;margin:20px 0 30px;border-bottom:1px solid #eee}ul,ol{padding:0;margin:0 0 10px 25px}ul ul,ul ol,ol ol,ol ul{margin-bottom:0}li{line-height:20px}ul.unstyled,ol.unstyled{margin-left:0;list-style:none}dl{margin-bottom:20px}dt,dd{line-height:20px}dt{font-weight:bold}dd{margin-left:10px}.dl-horizontal dt{float:left;width:120px;overflow:hidden;clear:left;text-align:right;text-overflow:ellipsis;white-space:nowrap}.dl-horizontal dd{margin-left:130px}hr{margin:20px 0;border:0;border-top:1px solid #eee;border-bottom:1px solid #fff}abbr[title]{cursor:help;border-bottom:1px dotted #999}abbr.initialism{font-size:90%;text-transform:uppercase}blockquote{padding:0 0 0 15px;margin:0 0 20px;border-left:5px solid #eee}blockquote p{margin-bottom:0;font-size:16px;font-weight:300;line-height:25px}blockquote small{display:block;line-height:20px;color:#999}blockquote small:before{content:'\2014 \00A0'}blockquote.pull-right{float:right;padding-right:15px;padding-left:0;border-right:5px solid #eee;border-left:0}blockquote.pull-right p,blockquote.pull-right small{text-align:right}blockquote.pull-right small:before{content:''}blockquote.pull-right small:after{content:'\00A0 \2014'}q:before,q:after,blockquote:before,blockquote:after{content:""}address{display:block;margin-bottom:20px;font-style:normal;line-height:20px}code,pre{padding:0 3px 2px;font-family:Monaco,Menlo,Consolas,"Courier New",monospace;font-size:12px;color:#333;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px}code{padding:2px 4px;color:#d14;background-color:#f7f7f9;border:1px solid #e1e1e8}pre{display:block;padding:9.5px;margin:0 0 10px;font-size:13px;line-height:20px;word-break:break-all;word-wrap:break-word;white-space:pre;white-space:pre-wrap;background-color:#f5f5f5;border:1px solid #ccc;border:1px solid rgba(0,0,0,0.15);-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}pre.prettyprint{margin-bottom:20px}pre code{padding:0;color:inherit;background-color:transparent;border:0}.pre-scrollable{max-height:340px;overflow-y:scroll}form{margin:0 0 20px}fieldset{padding:0;margin:0;border:0}legend{display:block;width:100%;padding:0;margin-bottom:20px;font-size:21px;line-height:40px;color:#333;border:0;border-bottom:1px solid #e5e5e5}legend small{font-size:15px;color:#999}label,input,button,select,textarea{font-size:14px;font-weight:normal;line-height:20px}input,button,select,textarea{font-family:"Helvetica Neue",Helvetica,Arial,sans-serif}label{display:block;margin-bottom:5px}select,textarea,input[type="text"],input[type="password"],input[type="datetime"],input[type="datetime-local"],input[type="date"],input[type="month"],input[type="time"],input[type="week"],input[type="number"],input[type="email"],input[type="url"],input[type="search"],input[type="tel"],input[type="color"],.uneditable-input{display:inline-block;height:20px;padding:4px 6px;margin-bottom:9px;font-size:14px;line-height:20px;color:#555;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px}input,textarea{width:210px}textarea{height:auto}textarea,input[type="text"],input[type="password"],input[type="datetime"],input[type="datetime-local"],input[type="date"],input[type="month"],input[type="time"],input[type="week"],input[type="number"],input[type="email"],input[type="url"],input[type="search"],input[type="tel"],input[type="color"],.uneditable-input{background-color:#fff;border:1px solid #ccc;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-webkit-transition:border linear .2s,box-shadow linear .2s;-moz-transition:border linear .2s,box-shadow linear .2s;-o-transition:border linear .2s,box-shadow linear .2s;transition:border linear .2s,box-shadow linear .2s}textarea:focus,input[type="text"]:focus,input[type="password"]:focus,input[type="datetime"]:focus,input[type="datetime-local"]:focus,input[type="date"]:focus,input[type="month"]:focus,input[type="time"]:focus,input[type="week"]:focus,input[type="number"]:focus,input[type="email"]:focus,input[type="url"]:focus,input[type="search"]:focus,input[type="tel"]:focus,input[type="color"]:focus,.uneditable-input:focus{border-color:rgba(82,168,236,0.8);outline:0;outline:thin dotted \9;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 8px rgba(82,168,236,0.6);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 8px rgba(82,168,236,0.6);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 8px rgba(82,168,236,0.6)}input[type="radio"],input[type="checkbox"]{margin:4px 0 0;margin-top:1px \9;*margin-top:0;line-height:normal;cursor:pointer}input[type="file"],input[type="image"],input[type="submit"],input[type="reset"],input[type="button"],input[type="radio"],input[type="checkbox"]{width:auto}select,input[type="file"]{height:30px;*margin-top:4px;line-height:30px}select{width:220px;background-color:#fff;border:1px solid #bbb}select[multiple],select[size]{height:auto}select:focus,input[type="file"]:focus,input[type="radio"]:focus,input[type="checkbox"]:focus{outline:thin dotted #333;outline:5px auto -webkit-focus-ring-color;outline-offset:-2px}.uneditable-input,.uneditable-textarea{color:#999;cursor:not-allowed;background-color:#fcfcfc;border-color:#ccc;-webkit-box-shadow:inset 0 1px 2px rgba(0,0,0,0.025);-moz-box-shadow:inset 0 1px 2px rgba(0,0,0,0.025);box-shadow:inset 0 1px 2px rgba(0,0,0,0.025)}.uneditable-input{overflow:hidden;white-space:nowrap}.uneditable-textarea{width:auto;height:auto}input:-moz-placeholder,textarea:-moz-placeholder{color:#999}input:-ms-input-placeholder,textarea:-ms-input-placeholder{color:#999}input::-webkit-input-placeholder,textarea::-webkit-input-placeholder{color:#999}.radio,.checkbox{min-height:18px;padding-left:18px}.radio input[type="radio"],.checkbox input[type="checkbox"]{float:left;margin-left:-18px}.controls>.radio:first-child,.controls>.checkbox:first-child{padding-top:5px}.radio.inline,.checkbox.inline{display:inline-block;padding-top:5px;margin-bottom:0;vertical-align:middle}.radio.inline+.radio.inline,.checkbox.inline+.checkbox.inline{margin-left:10px}.input-mini{width:60px}.input-small{width:90px}.input-medium{width:150px}.input-large{width:210px}.input-xlarge{width:270px}.input-xxlarge{width:530px}input[class*="span"],select[class*="span"],textarea[class*="span"],.uneditable-input[class*="span"],.row-fluid input[class*="span"],.row-fluid select[class*="span"],.row-fluid textarea[class*="span"],.row-fluid .uneditable-input[class*="span"]{float:none;margin-left:0}.input-append input[class*="span"],.input-append .uneditable-input[class*="span"],.input-prepend input[class*="span"],.input-prepend .uneditable-input[class*="span"],.row-fluid input[class*="span"],.row-fluid select[class*="span"],.row-fluid textarea[class*="span"],.row-fluid .uneditable-input[class*="span"],.row-fluid .input-prepend [class*="span"],.row-fluid .input-append [class*="span"]{display:inline-block}input,textarea,.uneditable-input{margin-left:0}.controls-row [class*="span"]+[class*="span"]{margin-left:20px}input.span12,textarea.span12,.uneditable-input.span12{width:926px}input.span11,textarea.span11,.uneditable-input.span11{width:846px}input.span10,textarea.span10,.uneditable-input.span10{width:766px}input.span9,textarea.span9,.uneditable-input.span9{width:686px}input.span8,textarea.span8,.uneditable-input.span8{width:606px}input.span7,textarea.span7,.uneditable-input.span7{width:526px}input.span6,textarea.span6,.uneditable-input.span6{width:446px}input.span5,textarea.span5,.uneditable-input.span5{width:366px}input.span4,textarea.span4,.uneditable-input.span4{width:286px}input.span3,textarea.span3,.uneditable-input.span3{width:206px}input.span2,textarea.span2,.uneditable-input.span2{width:126px}input.span1,textarea.span1,.uneditable-input.span1{width:46px}.controls-row{*zoom:1}.controls-row:before,.controls-row:after{display:table;line-height:0;content:""}.controls-row:after{clear:both}.controls-row [class*="span"]{float:left}input[disabled],select[disabled],textarea[disabled],input[readonly],select[readonly],textarea[readonly]{cursor:not-allowed;background-color:#eee}input[type="radio"][disabled],input[type="checkbox"][disabled],input[type="radio"][readonly],input[type="checkbox"][readonly]{background-color:transparent}.control-group.warning>label,.control-group.warning .help-block,.control-group.warning .help-inline{color:#c09853}.control-group.warning .checkbox,.control-group.warning .radio,.control-group.warning input,.control-group.warning select,.control-group.warning textarea{color:#c09853;border-color:#c09853;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075)}.control-group.warning .checkbox:focus,.control-group.warning .radio:focus,.control-group.warning input:focus,.control-group.warning select:focus,.control-group.warning textarea:focus{border-color:#a47e3c;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #dbc59e;-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #dbc59e;box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #dbc59e}.control-group.warning .input-prepend .add-on,.control-group.warning .input-append .add-on{color:#c09853;background-color:#fcf8e3;border-color:#c09853}.control-group.error>label,.control-group.error .help-block,.control-group.error .help-inline{color:#b94a48}.control-group.error .checkbox,.control-group.error .radio,.control-group.error input,.control-group.error select,.control-group.error textarea{color:#b94a48;border-color:#b94a48;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075)}.control-group.error .checkbox:focus,.control-group.error .radio:focus,.control-group.error input:focus,.control-group.error select:focus,.control-group.error textarea:focus{border-color:#953b39;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #d59392;-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #d59392;box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #d59392}.control-group.error .input-prepend .add-on,.control-group.error .input-append .add-on{color:#b94a48;background-color:#f2dede;border-color:#b94a48}.control-group.success>label,.control-group.success .help-block,.control-group.success .help-inline{color:#468847}.control-group.success .checkbox,.control-group.success .radio,.control-group.success input,.control-group.success select,.control-group.success textarea{color:#468847;border-color:#468847;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075)}.control-group.success .checkbox:focus,.control-group.success .radio:focus,.control-group.success input:focus,.control-group.success select:focus,.control-group.success textarea:focus{border-color:#356635;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #7aba7b;-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #7aba7b;box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #7aba7b}.control-group.success .input-prepend .add-on,.control-group.success .input-append .add-on{color:#468847;background-color:#dff0d8;border-color:#468847}input:focus:required:invalid,textarea:focus:required:invalid,select:focus:required:invalid{color:#b94a48;border-color:#ee5f5b}input:focus:required:invalid:focus,textarea:focus:required:invalid:focus,select:focus:required:invalid:focus{border-color:#e9322d;-webkit-box-shadow:0 0 6px #f8b9b7;-moz-box-shadow:0 0 6px #f8b9b7;box-shadow:0 0 6px #f8b9b7}.form-actions{padding:19px 20px 20px;margin-top:20px;margin-bottom:20px;background-color:#f5f5f5;border-top:1px solid #e5e5e5;*zoom:1}.form-actions:before,.form-actions:after{display:table;line-height:0;content:""}.form-actions:after{clear:both}.help-block,.help-inline{color:#595959}.help-block{display:block;margin-bottom:10px}.help-inline{display:inline-block;*display:inline;padding-left:5px;vertical-align:middle;*zoom:1}.input-append,.input-prepend{margin-bottom:5px;font-size:0;white-space:nowrap}.input-append input,.input-prepend input,.input-append select,.input-prepend select,.input-append .uneditable-input,.input-prepend .uneditable-input{position:relative;margin-bottom:0;*margin-left:0;font-size:14px;vertical-align:top;-webkit-border-radius:0 3px 3px 0;-moz-border-radius:0 3px 3px 0;border-radius:0 3px 3px 0}.input-append input:focus,.input-prepend input:focus,.input-append select:focus,.input-prepend select:focus,.input-append .uneditable-input:focus,.input-prepend .uneditable-input:focus{z-index:2}.input-append .add-on,.input-prepend .add-on{display:inline-block;width:auto;height:20px;min-width:16px;padding:4px 5px;font-size:14px;font-weight:normal;line-height:20px;text-align:center;text-shadow:0 1px 0 #fff;background-color:#eee;border:1px solid #ccc}.input-append .add-on,.input-prepend .add-on,.input-append .btn,.input-prepend .btn{margin-left:-1px;vertical-align:top;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.input-append .active,.input-prepend .active{background-color:#a9dba9;border-color:#46a546}.input-prepend .add-on,.input-prepend .btn{margin-right:-1px}.input-prepend .add-on:first-child,.input-prepend .btn:first-child{-webkit-border-radius:3px 0 0 3px;-moz-border-radius:3px 0 0 3px;border-radius:3px 0 0 3px}.input-append input,.input-append select,.input-append .uneditable-input{-webkit-border-radius:3px 0 0 3px;-moz-border-radius:3px 0 0 3px;border-radius:3px 0 0 3px}.input-append .add-on:last-child,.input-append .btn:last-child{-webkit-border-radius:0 3px 3px 0;-moz-border-radius:0 3px 3px 0;border-radius:0 3px 3px 0}.input-prepend.input-append input,.input-prepend.input-append select,.input-prepend.input-append .uneditable-input{-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.input-prepend.input-append .add-on:first-child,.input-prepend.input-append .btn:first-child{margin-right:-1px;-webkit-border-radius:3px 0 0 3px;-moz-border-radius:3px 0 0 3px;border-radius:3px 0 0 3px}.input-prepend.input-append .add-on:last-child,.input-prepend.input-append .btn:last-child{margin-left:-1px;-webkit-border-radius:0 3px 3px 0;-moz-border-radius:0 3px 3px 0;border-radius:0 3px 3px 0}input.search-query{padding-right:14px;padding-right:4px \9;padding-left:14px;padding-left:4px \9;margin-bottom:0;-webkit-border-radius:15px;-moz-border-radius:15px;border-radius:15px}.form-search .input-append .search-query,.form-search .input-prepend .search-query{-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.form-search .input-append .search-query{-webkit-border-radius:14px 0 0 14px;-moz-border-radius:14px 0 0 14px;border-radius:14px 0 0 14px}.form-search .input-append .btn{-webkit-border-radius:0 14px 14px 0;-moz-border-radius:0 14px 14px 0;border-radius:0 14px 14px 0}.form-search .input-prepend .search-query{-webkit-border-radius:0 14px 14px 0;-moz-border-radius:0 14px 14px 0;border-radius:0 14px 14px 0}.form-search .input-prepend .btn{-webkit-border-radius:14px 0 0 14px;-moz-border-radius:14px 0 0 14px;border-radius:14px 0 0 14px}.form-search input,.form-inline input,.form-horizontal input,.form-search textarea,.form-inline textarea,.form-horizontal textarea,.form-search select,.form-inline select,.form-horizontal select,.form-search .help-inline,.form-inline .help-inline,.form-horizontal .help-inline,.form-search .uneditable-input,.form-inline .uneditable-input,.form-horizontal .uneditable-input,.form-search .input-prepend,.form-inline .input-prepend,.form-horizontal .input-prepend,.form-search .input-append,.form-inline .input-append,.form-horizontal .input-append{display:inline-block;*display:inline;margin-bottom:0;vertical-align:middle;*zoom:1}.form-search .hide,.form-inline .hide,.form-horizontal .hide{display:none}.form-search label,.form-inline label,.form-search .btn-group,.form-inline .btn-group{display:inline-block}.form-search .input-append,.form-inline .input-append,.form-search .input-prepend,.form-inline .input-prepend{margin-bottom:0}.form-search .radio,.form-search .checkbox,.form-inline .radio,.form-inline .checkbox{padding-left:0;margin-bottom:0;vertical-align:middle}.form-search .radio input[type="radio"],.form-search .checkbox input[type="checkbox"],.form-inline .radio input[type="radio"],.form-inline .checkbox input[type="checkbox"]{float:left;margin-right:3px;margin-left:0}.control-group{margin-bottom:10px}legend+.control-group{margin-top:20px;-webkit-margin-top-collapse:separate}.form-horizontal .control-group{margin-bottom:20px;*zoom:1}.form-horizontal .control-group:before,.form-horizontal .control-group:after{display:table;line-height:0;content:""}.form-horizontal .control-group:after{clear:both}.form-horizontal .control-label{float:left;width:140px;padding-top:5px;text-align:right}.form-horizontal .controls{*display:inline-block;*padding-left:20px;margin-left:160px;*margin-left:0}.form-horizontal .controls:first-child{*padding-left:160px}.form-horizontal .help-block{margin-top:10px;margin-bottom:0}.form-horizontal .form-actions{padding-left:160px}table{max-width:100%;background-color:transparent;border-collapse:collapse;border-spacing:0}.table{width:100%;margin-bottom:20px}.table th,.table td{padding:8px;line-height:20px;text-align:left;vertical-align:top;border-top:1px solid #ddd}.table th{font-weight:bold}.table thead th{vertical-align:bottom}.table caption+thead tr:first-child th,.table caption+thead tr:first-child td,.table colgroup+thead tr:first-child th,.table colgroup+thead tr:first-child td,.table thead:first-child tr:first-child th,.table thead:first-child tr:first-child td{border-top:0}.table tbody+tbody{border-top:2px solid #ddd}.table-condensed th,.table-condensed td{padding:4px 5px}.table-bordered{border:1px solid #ddd;border-collapse:separate;*border-collapse:collapse;border-left:0;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.table-bordered th,.table-bordered td{border-left:1px solid #ddd}.table-bordered caption+thead tr:first-child th,.table-bordered caption+tbody tr:first-child th,.table-bordered caption+tbody tr:first-child td,.table-bordered colgroup+thead tr:first-child th,.table-bordered colgroup+tbody tr:first-child th,.table-bordered colgroup+tbody tr:first-child td,.table-bordered thead:first-child tr:first-child th,.table-bordered tbody:first-child tr:first-child th,.table-bordered tbody:first-child tr:first-child td{border-top:0}.table-bordered thead:first-child tr:first-child th:first-child,.table-bordered tbody:first-child tr:first-child td:first-child{-webkit-border-top-left-radius:4px;border-top-left-radius:4px;-moz-border-radius-topleft:4px}.table-bordered thead:first-child tr:first-child th:last-child,.table-bordered tbody:first-child tr:first-child td:last-child{-webkit-border-top-right-radius:4px;border-top-right-radius:4px;-moz-border-radius-topright:4px}.table-bordered thead:last-child tr:last-child th:first-child,.table-bordered tbody:last-child tr:last-child td:first-child,.table-bordered tfoot:last-child tr:last-child td:first-child{-webkit-border-radius:0 0 0 4px;-moz-border-radius:0 0 0 4px;border-radius:0 0 0 4px;-webkit-border-bottom-left-radius:4px;border-bottom-left-radius:4px;-moz-border-radius-bottomleft:4px}.table-bordered thead:last-child tr:last-child th:last-child,.table-bordered tbody:last-child tr:last-child td:last-child,.table-bordered tfoot:last-child tr:last-child td:last-child{-webkit-border-bottom-right-radius:4px;border-bottom-right-radius:4px;-moz-border-radius-bottomright:4px}.table-bordered caption+thead tr:first-child th:first-child,.table-bordered caption+tbody tr:first-child td:first-child,.table-bordered colgroup+thead tr:first-child th:first-child,.table-bordered colgroup+tbody tr:first-child td:first-child{-webkit-border-top-left-radius:4px;border-top-left-radius:4px;-moz-border-radius-topleft:4px}.table-bordered caption+thead tr:first-child th:last-child,.table-bordered caption+tbody tr:first-child td:last-child,.table-bordered colgroup+thead tr:first-child th:last-child,.table-bordered colgroup+tbody tr:first-child td:last-child{-webkit-border-top-right-radius:4px;border-top-right-radius:4px;-moz-border-right-topleft:4px}.table-striped tbody tr:nth-child(odd) td,.table-striped tbody tr:nth-child(odd) th{background-color:#f9f9f9}.table-hover tbody tr:hover td,.table-hover tbody tr:hover th{background-color:#f5f5f5}table [class*=span],.row-fluid table [class*=span]{display:table-cell;float:none;margin-left:0}table .span1{float:none;width:44px;margin-left:0}table .span2{float:none;width:124px;margin-left:0}table .span3{float:none;width:204px;margin-left:0}table .span4{float:none;width:284px;margin-left:0}table .span5{float:none;width:364px;margin-left:0}table .span6{float:none;width:444px;margin-left:0}table .span7{float:none;width:524px;margin-left:0}table .span8{float:none;width:604px;margin-left:0}table .span9{float:none;width:684px;margin-left:0}table .span10{float:none;width:764px;margin-left:0}table .span11{float:none;width:844px;margin-left:0}table .span12{float:none;width:924px;margin-left:0}table .span13{float:none;width:1004px;margin-left:0}table .span14{float:none;width:1084px;margin-left:0}table .span15{float:none;width:1164px;margin-left:0}table .span16{float:none;width:1244px;margin-left:0}table .span17{float:none;width:1324px;margin-left:0}table .span18{float:none;width:1404px;margin-left:0}table .span19{float:none;width:1484px;margin-left:0}table .span20{float:none;width:1564px;margin-left:0}table .span21{float:none;width:1644px;margin-left:0}table .span22{float:none;width:1724px;margin-left:0}table .span23{float:none;width:1804px;margin-left:0}table .span24{float:none;width:1884px;margin-left:0}.table tbody tr.success td{background-color:#dff0d8}.table tbody tr.error td{background-color:#f2dede}.table tbody tr.info td{background-color:#d9edf7}[class^="icon-"],[class*=" icon-"]{display:inline-block;width:14px;height:14px;margin-top:1px;*margin-right:.3em;line-height:14px;vertical-align:text-top;background-image:url("../img/glyphicons-halflings.png");background-position:14px 14px;background-repeat:no-repeat}.icon-white,.nav>.active>a>[class^="icon-"],.nav>.active>a>[class*=" icon-"],.dropdown-menu>li>a:hover>[class^="icon-"],.dropdown-menu>li>a:hover>[class*=" icon-"],.dropdown-menu>.active>a>[class^="icon-"],.dropdown-menu>.active>a>[class*=" icon-"]{background-image:url("../img/glyphicons-halflings-white.png")}.icon-glass{background-position:0 0}.icon-music{background-position:-24px 0}.icon-search{background-position:-48px 0}.icon-envelope{background-position:-72px 0}.icon-heart{background-position:-96px 0}.icon-star{background-position:-120px 0}.icon-star-empty{background-position:-144px 0}.icon-user{background-position:-168px 0}.icon-film{background-position:-192px 0}.icon-th-large{background-position:-216px 0}.icon-th{background-position:-240px 0}.icon-th-list{background-position:-264px 0}.icon-ok{background-position:-288px 0}.icon-remove{background-position:-312px 0}.icon-zoom-in{background-position:-336px 0}.icon-zoom-out{background-position:-360px 0}.icon-off{background-position:-384px 0}.icon-signal{background-position:-408px 0}.icon-cog{background-position:-432px 0}.icon-trash{background-position:-456px 0}.icon-home{background-position:0 -24px}.icon-file{background-position:-24px -24px}.icon-time{background-position:-48px -24px}.icon-road{background-position:-72px -24px}.icon-download-alt{background-position:-96px -24px}.icon-download{background-position:-120px -24px}.icon-upload{background-position:-144px -24px}.icon-inbox{background-position:-168px -24px}.icon-play-circle{background-position:-192px -24px}.icon-repeat{background-position:-216px -24px}.icon-refresh{background-position:-240px -24px}.icon-list-alt{background-position:-264px -24px}.icon-lock{background-position:-287px -24px}.icon-flag{background-position:-312px -24px}.icon-headphones{background-position:-336px -24px}.icon-volume-off{background-position:-360px -24px}.icon-volume-down{background-position:-384px -24px}.icon-volume-up{background-position:-408px -24px}.icon-qrcode{background-position:-432px -24px}.icon-barcode{background-position:-456px -24px}.icon-tag{background-position:0 -48px}.icon-tags{background-position:-25px -48px}.icon-book{background-position:-48px -48px}.icon-bookmark{background-position:-72px -48px}.icon-print{background-position:-96px -48px}.icon-camera{background-position:-120px -48px}.icon-font{background-position:-144px -48px}.icon-bold{background-position:-167px -48px}.icon-italic{background-position:-192px -48px}.icon-text-height{background-position:-216px -48px}.icon-text-width{background-position:-240px -48px}.icon-align-left{background-position:-264px -48px}.icon-align-center{background-position:-288px -48px}.icon-align-right{background-position:-312px -48px}.icon-align-justify{background-position:-336px -48px}.icon-list{background-position:-360px -48px}.icon-indent-left{background-position:-384px -48px}.icon-indent-right{background-position:-408px -48px}.icon-facetime-video{background-position:-432px -48px}.icon-picture{background-position:-456px -48px}.icon-pencil{background-position:0 -72px}.icon-map-marker{background-position:-24px -72px}.icon-adjust{background-position:-48px -72px}.icon-tint{background-position:-72px -72px}.icon-edit{background-position:-96px -72px}.icon-share{background-position:-120px -72px}.icon-check{background-position:-144px -72px}.icon-move{background-position:-168px -72px}.icon-step-backward{background-position:-192px -72px}.icon-fast-backward{background-position:-216px -72px}.icon-backward{background-position:-240px -72px}.icon-play{background-position:-264px -72px}.icon-pause{background-position:-288px -72px}.icon-stop{background-position:-312px -72px}.icon-forward{background-position:-336px -72px}.icon-fast-forward{background-position:-360px -72px}.icon-step-forward{background-position:-384px -72px}.icon-eject{background-position:-408px -72px}.icon-chevron-left{background-position:-432px -72px}.icon-chevron-right{background-position:-456px -72px}.icon-plus-sign{background-position:0 -96px}.icon-minus-sign{background-position:-24px -96px}.icon-remove-sign{background-position:-48px -96px}.icon-ok-sign{background-position:-72px -96px}.icon-question-sign{background-position:-96px -96px}.icon-info-sign{background-position:-120px -96px}.icon-screenshot{background-position:-144px -96px}.icon-remove-circle{background-position:-168px -96px}.icon-ok-circle{background-position:-192px -96px}.icon-ban-circle{background-position:-216px -96px}.icon-arrow-left{background-position:-240px -96px}.icon-arrow-right{background-position:-264px -96px}.icon-arrow-up{background-position:-289px -96px}.icon-arrow-down{background-position:-312px -96px}.icon-share-alt{background-position:-336px -96px}.icon-resize-full{background-position:-360px -96px}.icon-resize-small{background-position:-384px -96px}.icon-plus{background-position:-408px -96px}.icon-minus{background-position:-433px -96px}.icon-asterisk{background-position:-456px -96px}.icon-exclamation-sign{background-position:0 -120px}.icon-gift{background-position:-24px -120px}.icon-leaf{background-position:-48px -120px}.icon-fire{background-position:-72px -120px}.icon-eye-open{background-position:-96px -120px}.icon-eye-close{background-position:-120px -120px}.icon-warning-sign{background-position:-144px -120px}.icon-plane{background-position:-168px -120px}.icon-calendar{background-position:-192px -120px}.icon-random{width:16px;background-position:-216px -120px}.icon-comment{background-position:-240px -120px}.icon-magnet{background-position:-264px -120px}.icon-chevron-up{background-position:-288px -120px}.icon-chevron-down{background-position:-313px -119px}.icon-retweet{background-position:-336px -120px}.icon-shopping-cart{background-position:-360px -120px}.icon-folder-close{background-position:-384px -120px}.icon-folder-open{width:16px;background-position:-408px -120px}.icon-resize-vertical{background-position:-432px -119px}.icon-resize-horizontal{background-position:-456px -118px}.icon-hdd{background-position:0 -144px}.icon-bullhorn{background-position:-24px -144px}.icon-bell{background-position:-48px -144px}.icon-certificate{background-position:-72px -144px}.icon-thumbs-up{background-position:-96px -144px}.icon-thumbs-down{background-position:-120px -144px}.icon-hand-right{background-position:-144px -144px}.icon-hand-left{background-position:-168px -144px}.icon-hand-up{background-position:-192px -144px}.icon-hand-down{background-position:-216px -144px}.icon-circle-arrow-right{background-position:-240px -144px}.icon-circle-arrow-left{background-position:-264px -144px}.icon-circle-arrow-up{background-position:-288px -144px}.icon-circle-arrow-down{background-position:-312px -144px}.icon-globe{background-position:-336px -144px}.icon-wrench{background-position:-360px -144px}.icon-tasks{background-position:-384px -144px}.icon-filter{background-position:-408px -144px}.icon-briefcase{background-position:-432px -144px}.icon-fullscreen{background-position:-456px -144px}.dropup,.dropdown{position:relative}.dropdown-toggle{*margin-bottom:-3px}.dropdown-toggle:active,.open .dropdown-toggle{outline:0}.caret{display:inline-block;width:0;height:0;vertical-align:top;border-top:4px solid #000;border-right:4px solid transparent;border-left:4px solid transparent;content:""}.dropdown .caret{margin-top:8px;margin-left:2px}.dropdown-menu{position:absolute;top:100%;left:0;z-index:1000;display:none;float:left;min-width:160px;padding:5px 0;margin:2px 0 0;list-style:none;background-color:#fff;border:1px solid #ccc;border:1px solid rgba(0,0,0,0.2);*border-right-width:2px;*border-bottom-width:2px;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;-webkit-box-shadow:0 5px 10px rgba(0,0,0,0.2);-moz-box-shadow:0 5px 10px rgba(0,0,0,0.2);box-shadow:0 5px 10px rgba(0,0,0,0.2);-webkit-background-clip:padding-box;-moz-background-clip:padding;background-clip:padding-box}.dropdown-menu.pull-right{right:0;left:auto}.dropdown-menu .divider{*width:100%;height:1px;margin:9px 1px;*margin:-5px 0 5px;overflow:hidden;background-color:#e5e5e5;border-bottom:1px solid #fff}.dropdown-menu a{display:block;padding:3px 20px;clear:both;font-weight:normal;line-height:20px;color:#333;white-space:nowrap}.dropdown-menu li>a:hover,.dropdown-menu li>a:focus,.dropdown-submenu:hover>a{color:#fff;text-decoration:none;background-color:#0088cc;background-color:#0088cc;background-image:-moz-linear-gradient(top,#0088cc,#0087b3);background-image:-webkit-gradient(linear,0 0,0 100%,from(#0088cc),to(#0087b3));background-image:-webkit-linear-gradient(top,#0088cc,#0087b3);background-image:-o-linear-gradient(top,#0088cc,#0087b3);background-image:linear-gradient(to bottom,#0088cc,#0087b3);background-repeat:repeat-x;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff0088cc',endColorstr='#ff0087b3',GradientType=0)}.dropdown-menu .active>a,.dropdown-menu .active>a:hover{color:#fff;text-decoration:none;background-color:#0088cc;background-color:#0081c2;background-image:linear-gradient(to bottom,#0088cc,#0087b3);background-image:-moz-linear-gradient(top,#0088cc,#0087b3);background-image:-webkit-gradient(linear,0 0,0 100%,from(#0088cc),to(#0087b3));background-image:-webkit-linear-gradient(top,#0088cc,#0087b3);background-image:-o-linear-gradient(top,#0088cc,#0087b3);background-repeat:repeat-x;outline:0;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff0088cc',endColorstr='#ff0087b3',GradientType=0)}.dropdown-menu .disabled>a,.dropdown-menu .disabled>a:hover{color:#999}.dropdown-menu .disabled>a:hover{text-decoration:none;cursor:default;background-color:transparent}.open{*z-index:1000}.open>.dropdown-menu{display:block}.pull-right>.dropdown-menu{right:0;left:auto}.dropup .caret,.navbar-fixed-bottom .dropdown .caret{border-top:0;border-bottom:4px solid #000;content:"\2191"}.dropup .dropdown-menu,.navbar-fixed-bottom .dropdown .dropdown-menu{top:auto;bottom:100%;margin-bottom:1px}.dropdown-submenu{position:relative}.dropdown-submenu>.dropdown-menu{top:0;left:100%;margin-top:-6px;margin-left:-1px;-webkit-border-radius:0 6px 6px 6px;-moz-border-radius:0 6px 6px 6px;border-radius:0 6px 6px 6px}.dropdown-submenu:hover .dropdown-menu{display:block}.dropdown-submenu>a:after{display:block;float:right;width:0;height:0;margin-top:5px;margin-right:-10px;border-color:transparent;border-left-color:#ccc;border-style:solid;border-width:5px 0 5px 5px;content:" "}.dropdown-submenu:hover>a:after{border-left-color:#fff}.dropdown .dropdown-menu .nav-header{padding-right:20px;padding-left:20px}.typeahead{margin-top:2px;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.well{min-height:20px;padding:19px;margin-bottom:20px;background-color:#f5f5f5;border:1px solid #e3e3e3;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.05);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.05);box-shadow:inset 0 1px 1px rgba(0,0,0,0.05)}.well blockquote{border-color:#ddd;border-color:rgba(0,0,0,0.15)}.well-large{padding:24px;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px}.well-small{padding:9px;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px}.fade{opacity:0;-webkit-transition:opacity .15s linear;-moz-transition:opacity .15s linear;-o-transition:opacity .15s linear;transition:opacity .15s linear}.fade.in{opacity:1}.collapse{position:relative;height:0;overflow:hidden;overflow:visible \9;-webkit-transition:height .35s ease;-moz-transition:height .35s ease;-o-transition:height .35s ease;transition:height .35s ease}.collapse.in{height:auto}.close{float:right;font-size:20px;font-weight:bold;line-height:20px;color:#000;text-shadow:0 1px 0 #fff;opacity:.2;filter:alpha(opacity=20)}.close:hover{color:#000;text-decoration:none;cursor:pointer;opacity:.4;filter:alpha(opacity=40)}button.close{padding:0;cursor:pointer;background:transparent;border:0;-webkit-appearance:none}.btn{display:inline-block;*display:inline;padding:4px 14px;margin-bottom:0;*margin-left:.3em;font-size:14px;line-height:20px;*line-height:20px;color:#333;text-align:center;text-shadow:0 1px 1px rgba(255,255,255,0.75);vertical-align:middle;cursor:pointer;background-color:#f5f5f5;*background-color:#e6e6e6;background-image:-webkit-gradient(linear,0 0,0 100%,from(#fff),to(#e6e6e6));background-image:-webkit-linear-gradient(top,#fff,#e6e6e6);background-image:-o-linear-gradient(top,#fff,#e6e6e6);background-image:linear-gradient(to bottom,#fff,#e6e6e6);background-image:-moz-linear-gradient(top,#fff,#e6e6e6);background-repeat:repeat-x;border:1px solid #bbb;*border:0;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);border-color:#e6e6e6 #e6e6e6 #bfbfbf;border-bottom-color:#a2a2a2;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ffffffff',endColorstr='#ffe6e6e6',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false);*zoom:1;-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05);box-shadow:inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05)}.btn:hover,.btn:active,.btn.active,.btn.disabled,.btn[disabled]{color:#333;background-color:#e6e6e6;*background-color:#d9d9d9}.btn:active,.btn.active{background-color:#ccc \9}.btn:first-child{*margin-left:0}.btn:hover{color:#333;text-decoration:none;background-color:#e6e6e6;*background-color:#d9d9d9;background-position:0 -15px;-webkit-transition:background-position .1s linear;-moz-transition:background-position .1s linear;-o-transition:background-position .1s linear;transition:background-position .1s linear}.btn:focus{outline:thin dotted #333;outline:5px auto -webkit-focus-ring-color;outline-offset:-2px}.btn.active,.btn:active{background-color:#e6e6e6;background-color:#d9d9d9 \9;background-image:none;outline:0;-webkit-box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05);box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05)}.btn.disabled,.btn[disabled]{cursor:default;background-color:#e6e6e6;background-image:none;opacity:.65;filter:alpha(opacity=65);-webkit-box-shadow:none;-moz-box-shadow:none;box-shadow:none}.btn-large{padding:9px 14px;font-size:16px;line-height:normal;-webkit-border-radius:5px;-moz-border-radius:5px;border-radius:5px}.btn-large [class^="icon-"]{margin-top:2px}.btn-small{padding:3px 9px;font-size:12px;line-height:18px}.btn-small [class^="icon-"]{margin-top:0}.btn-mini{padding:2px 6px;font-size:11px;line-height:16px}.btn-block{display:block;width:100%;padding-right:0;padding-left:0;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}.btn-block+.btn-block{margin-top:5px}.btn-primary.active,.btn-warning.active,.btn-danger.active,.btn-success.active,.btn-info.active,.btn-inverse.active{color:rgba(255,255,255,0.75)}.btn{border-color:#c5c5c5;border-color:rgba(0,0,0,0.15) rgba(0,0,0,0.15) rgba(0,0,0,0.25)}.btn-primary{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#006dcc;*background-color:#04c;background-image:-webkit-gradient(linear,0 0,0 100%,from(#08c),to(#04c));background-image:-webkit-linear-gradient(top,#08c,#04c);background-image:-o-linear-gradient(top,#08c,#04c);background-image:linear-gradient(to bottom,#08c,#04c);background-image:-moz-linear-gradient(top,#08c,#04c);background-repeat:repeat-x;border-color:#04c #04c #002a80;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff0088cc',endColorstr='#ff0044cc',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.btn-primary:hover,.btn-primary:active,.btn-primary.active,.btn-primary.disabled,.btn-primary[disabled]{color:#fff;background-color:#04c;*background-color:#003bb3}.btn-primary:active,.btn-primary.active{background-color:#039 \9}.btn-warning{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#faa732;*background-color:#f89406;background-image:-webkit-gradient(linear,0 0,0 100%,from(#fbb450),to(#f89406));background-image:-webkit-linear-gradient(top,#fbb450,#f89406);background-image:-o-linear-gradient(top,#fbb450,#f89406);background-image:linear-gradient(to bottom,#fbb450,#f89406);background-image:-moz-linear-gradient(top,#fbb450,#f89406);background-repeat:repeat-x;border-color:#f89406 #f89406 #ad6704;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#fffbb450',endColorstr='#fff89406',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.btn-warning:hover,.btn-warning:active,.btn-warning.active,.btn-warning.disabled,.btn-warning[disabled]{color:#fff;background-color:#f89406;*background-color:#df8505}.btn-warning:active,.btn-warning.active{background-color:#c67605 \9}.btn-danger{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#da4f49;*background-color:#bd362f;background-image:-webkit-gradient(linear,0 0,0 100%,from(#ee5f5b),to(#bd362f));background-image:-webkit-linear-gradient(top,#ee5f5b,#bd362f);background-image:-o-linear-gradient(top,#ee5f5b,#bd362f);background-image:linear-gradient(to bottom,#ee5f5b,#bd362f);background-image:-moz-linear-gradient(top,#ee5f5b,#bd362f);background-repeat:repeat-x;border-color:#bd362f #bd362f #802420;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ffee5f5b',endColorstr='#ffbd362f',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.btn-danger:hover,.btn-danger:active,.btn-danger.active,.btn-danger.disabled,.btn-danger[disabled]{color:#fff;background-color:#bd362f;*background-color:#a9302a}.btn-danger:active,.btn-danger.active{background-color:#942a25 \9}.btn-success{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#5bb75b;*background-color:#51a351;background-image:-webkit-gradient(linear,0 0,0 100%,from(#62c462),to(#51a351));background-image:-webkit-linear-gradient(top,#62c462,#51a351);background-image:-o-linear-gradient(top,#62c462,#51a351);background-image:linear-gradient(to bottom,#62c462,#51a351);background-image:-moz-linear-gradient(top,#62c462,#51a351);background-repeat:repeat-x;border-color:#51a351 #51a351 #387038;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff62c462',endColorstr='#ff51a351',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.btn-success:hover,.btn-success:active,.btn-success.active,.btn-success.disabled,.btn-success[disabled]{color:#fff;background-color:#51a351;*background-color:#499249}.btn-success:active,.btn-success.active{background-color:#408140 \9}.btn-info{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#49afcd;*background-color:#2f96b4;background-image:-webkit-gradient(linear,0 0,0 100%,from(#5bc0de),to(#2f96b4));background-image:-webkit-linear-gradient(top,#5bc0de,#2f96b4);background-image:-o-linear-gradient(top,#5bc0de,#2f96b4);background-image:linear-gradient(to bottom,#5bc0de,#2f96b4);background-image:-moz-linear-gradient(top,#5bc0de,#2f96b4);background-repeat:repeat-x;border-color:#2f96b4 #2f96b4 #1f6377;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff5bc0de',endColorstr='#ff2f96b4',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.btn-info:hover,.btn-info:active,.btn-info.active,.btn-info.disabled,.btn-info[disabled]{color:#fff;background-color:#2f96b4;*background-color:#2a85a0}.btn-info:active,.btn-info.active{background-color:#24748c \9}.btn-inverse{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#363636;*background-color:#222;background-image:-webkit-gradient(linear,0 0,0 100%,from(#444),to(#222));background-image:-webkit-linear-gradient(top,#444,#222);background-image:-o-linear-gradient(top,#444,#222);background-image:linear-gradient(to bottom,#444,#222);background-image:-moz-linear-gradient(top,#444,#222);background-repeat:repeat-x;border-color:#222 #222 #000;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff444444',endColorstr='#ff222222',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.btn-inverse:hover,.btn-inverse:active,.btn-inverse.active,.btn-inverse.disabled,.btn-inverse[disabled]{color:#fff;background-color:#222;*background-color:#151515}.btn-inverse:active,.btn-inverse.active{background-color:#080808 \9}button.btn,input[type="submit"].btn{*padding-top:3px;*padding-bottom:3px}button.btn::-moz-focus-inner,input[type="submit"].btn::-moz-focus-inner{padding:0;border:0}button.btn.btn-large,input[type="submit"].btn.btn-large{*padding-top:7px;*padding-bottom:7px}button.btn.btn-small,input[type="submit"].btn.btn-small{*padding-top:3px;*padding-bottom:3px}button.btn.btn-mini,input[type="submit"].btn.btn-mini{*padding-top:1px;*padding-bottom:1px}.btn-link,.btn-link:active{background-color:transparent;background-image:none;-webkit-box-shadow:none;-moz-box-shadow:none;box-shadow:none}.btn-link{color:#08c;cursor:pointer;border-color:transparent;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.btn-link:hover{color:#005580;text-decoration:underline;background-color:transparent}.btn-group{position:relative;*margin-left:.3em;font-size:0;white-space:nowrap}.btn-group:first-child{*margin-left:0}.btn-group+.btn-group{margin-left:5px}.btn-toolbar{margin-top:10px;margin-bottom:10px;font-size:0}.btn-toolbar .btn-group{display:inline-block;*display:inline;*zoom:1}.btn-toolbar .btn+.btn,.btn-toolbar .btn-group+.btn,.btn-toolbar .btn+.btn-group{margin-left:5px}.btn-group>.btn{position:relative;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.btn-group>.btn+.btn{margin-left:-1px}.btn-group>.btn,.btn-group>.dropdown-menu{font-size:14px}.btn-group>.btn-mini{font-size:11px}.btn-group>.btn-small{font-size:12px}.btn-group>.btn-large{font-size:16px}.btn-group>.btn:first-child{margin-left:0;-webkit-border-bottom-left-radius:4px;border-bottom-left-radius:4px;-webkit-border-top-left-radius:4px;border-top-left-radius:4px;-moz-border-radius-bottomleft:4px;-moz-border-radius-topleft:4px}.btn-group>.btn:last-child,.btn-group>.dropdown-toggle{-webkit-border-top-right-radius:4px;border-top-right-radius:4px;-webkit-border-bottom-right-radius:4px;border-bottom-right-radius:4px;-moz-border-radius-topright:4px;-moz-border-radius-bottomright:4px}.btn-group>.btn.large:first-child{margin-left:0;-webkit-border-bottom-left-radius:6px;border-bottom-left-radius:6px;-webkit-border-top-left-radius:6px;border-top-left-radius:6px;-moz-border-radius-bottomleft:6px;-moz-border-radius-topleft:6px}.btn-group>.btn.large:last-child,.btn-group>.large.dropdown-toggle{-webkit-border-top-right-radius:6px;border-top-right-radius:6px;-webkit-border-bottom-right-radius:6px;border-bottom-right-radius:6px;-moz-border-radius-topright:6px;-moz-border-radius-bottomright:6px}.btn-group>.btn:hover,.btn-group>.btn:focus,.btn-group>.btn:active,.btn-group>.btn.active{z-index:2}.btn-group .dropdown-toggle:active,.btn-group.open .dropdown-toggle{outline:0}.btn-group>.btn+.dropdown-toggle{*padding-top:5px;padding-right:8px;*padding-bottom:5px;padding-left:8px;-webkit-box-shadow:inset 1px 0 0 rgba(255,255,255,0.125),inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:inset 1px 0 0 rgba(255,255,255,0.125),inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05);box-shadow:inset 1px 0 0 rgba(255,255,255,0.125),inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05)}.btn-group>.btn-mini+.dropdown-toggle{*padding-top:2px;padding-right:5px;*padding-bottom:2px;padding-left:5px}.btn-group>.btn-small+.dropdown-toggle{*padding-top:5px;*padding-bottom:4px}.btn-group>.btn-large+.dropdown-toggle{*padding-top:7px;padding-right:12px;*padding-bottom:7px;padding-left:12px}.btn-group.open .dropdown-toggle{background-image:none;-webkit-box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05);box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05)}.btn-group.open .btn.dropdown-toggle{background-color:#e6e6e6}.btn-group.open .btn-primary.dropdown-toggle{background-color:#04c}.btn-group.open .btn-warning.dropdown-toggle{background-color:#f89406}.btn-group.open .btn-danger.dropdown-toggle{background-color:#bd362f}.btn-group.open .btn-success.dropdown-toggle{background-color:#51a351}.btn-group.open .btn-info.dropdown-toggle{background-color:#2f96b4}.btn-group.open .btn-inverse.dropdown-toggle{background-color:#222}.btn .caret{margin-top:8px;margin-left:0}.btn-mini .caret,.btn-small .caret,.btn-large .caret{margin-top:6px}.btn-large .caret{border-top-width:5px;border-right-width:5px;border-left-width:5px}.dropup .btn-large .caret{border-top:0;border-bottom:5px solid #000}.btn-primary .caret,.btn-warning .caret,.btn-danger .caret,.btn-info .caret,.btn-success .caret,.btn-inverse .caret{border-top-color:#fff;border-bottom-color:#fff}.btn-group-vertical{display:inline-block;*display:inline;*zoom:1}.btn-group-vertical .btn{display:block;float:none;width:100%;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.btn-group-vertical .btn+.btn{margin-top:-1px;margin-left:0}.btn-group-vertical .btn:first-child{-webkit-border-radius:4px 4px 0 0;-moz-border-radius:4px 4px 0 0;border-radius:4px 4px 0 0}.btn-group-vertical .btn:last-child{-webkit-border-radius:0 0 4px 4px;-moz-border-radius:0 0 4px 4px;border-radius:0 0 4px 4px}.btn-group-vertical .btn-large:first-child{-webkit-border-radius:6px 6px 0 0;-moz-border-radius:6px 6px 0 0;border-radius:6px 6px 0 0}.btn-group-vertical .btn-large:last-child{-webkit-border-radius:0 0 6px 6px;-moz-border-radius:0 0 6px 6px;border-radius:0 0 6px 6px}.alert{padding:8px 35px 8px 14px;margin-bottom:20px;color:#c09853;text-shadow:0 1px 0 rgba(255,255,255,0.5);background-color:#fcf8e3;border:1px solid #fbeed5;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.alert h4{margin:0}.alert .close{position:relative;top:-2px;right:-21px;line-height:20px}.alert-success{color:#468847;background-color:#dff0d8;border-color:#d6e9c6}.alert-danger,.alert-error{color:#b94a48;background-color:#f2dede;border-color:#eed3d7}.alert-info{color:#3a87ad;background-color:#d9edf7;border-color:#bce8f1}.alert-block{padding-top:14px;padding-bottom:14px}.alert-block>p,.alert-block>ul{margin-bottom:0}.alert-block p+p{margin-top:5px}.nav{margin-bottom:20px;margin-left:0;list-style:none}.nav>li>a{display:block}.nav>li>a:hover{text-decoration:none;background-color:#eee}.nav>.pull-right{float:right}.nav-header{display:block;padding:3px 15px;font-size:11px;font-weight:bold;line-height:20px;color:#999;text-shadow:0 1px 0 rgba(255,255,255,0.5);text-transform:uppercase}.nav li+.nav-header{margin-top:9px}.nav-list{padding-right:15px;padding-left:15px;margin-bottom:0}.nav-list>li>a,.nav-list .nav-header{margin-right:-15px;margin-left:-15px;text-shadow:0 1px 0 rgba(255,255,255,0.5)}.nav-list>li>a{padding:3px 15px}.nav-list>.active>a,.nav-list>.active>a:hover{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.2);background-color:#08c}.nav-list [class^="icon-"]{margin-right:2px}.nav-list .divider{*width:100%;height:1px;margin:9px 1px;*margin:-5px 0 5px;overflow:hidden;background-color:#e5e5e5;border-bottom:1px solid #fff}.nav-tabs,.nav-pills{*zoom:1}.nav-tabs:before,.nav-pills:before,.nav-tabs:after,.nav-pills:after{display:table;line-height:0;content:""}.nav-tabs:after,.nav-pills:after{clear:both}.nav-tabs>li,.nav-pills>li{float:left}.nav-tabs>li>a,.nav-pills>li>a{padding-right:12px;padding-left:12px;margin-right:2px;line-height:14px}.nav-tabs{border-bottom:1px solid #ddd}.nav-tabs>li{margin-bottom:-1px}.nav-tabs>li>a{padding-top:8px;padding-bottom:8px;line-height:20px;border:1px solid transparent;-webkit-border-radius:4px 4px 0 0;-moz-border-radius:4px 4px 0 0;border-radius:4px 4px 0 0}.nav-tabs>li>a:hover{border-color:#eee #eee #ddd}.nav-tabs>.active>a,.nav-tabs>.active>a:hover{color:#555;cursor:default;background-color:#fff;border:1px solid #ddd;border-bottom-color:transparent}.nav-pills>li>a{padding-top:8px;padding-bottom:8px;margin-top:2px;margin-bottom:2px;-webkit-border-radius:5px;-moz-border-radius:5px;border-radius:5px}.nav-pills>.active>a,.nav-pills>.active>a:hover{color:#fff;background-color:#08c}.nav-stacked>li{float:none}.nav-stacked>li>a{margin-right:0}.nav-tabs.nav-stacked{border-bottom:0}.nav-tabs.nav-stacked>li>a{border:1px solid #ddd;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.nav-tabs.nav-stacked>li:first-child>a{-webkit-border-top-right-radius:4px;border-top-right-radius:4px;-webkit-border-top-left-radius:4px;border-top-left-radius:4px;-moz-border-radius-topright:4px;-moz-border-radius-topleft:4px}.nav-tabs.nav-stacked>li:last-child>a{-webkit-border-bottom-right-radius:4px;border-bottom-right-radius:4px;-webkit-border-bottom-left-radius:4px;border-bottom-left-radius:4px;-moz-border-radius-bottomright:4px;-moz-border-radius-bottomleft:4px}.nav-tabs.nav-stacked>li>a:hover{z-index:2;border-color:#ddd}.nav-pills.nav-stacked>li>a{margin-bottom:3px}.nav-pills.nav-stacked>li:last-child>a{margin-bottom:1px}.nav-tabs .dropdown-menu{-webkit-border-radius:0 0 6px 6px;-moz-border-radius:0 0 6px 6px;border-radius:0 0 6px 6px}.nav-pills .dropdown-menu{-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px}.nav .dropdown-toggle .caret{margin-top:6px;border-top-color:#08c;border-bottom-color:#08c}.nav .dropdown-toggle:hover .caret{border-top-color:#005580;border-bottom-color:#005580}.nav-tabs .dropdown-toggle .caret{margin-top:8px}.nav .active .dropdown-toggle .caret{border-top-color:#fff;border-bottom-color:#fff}.nav-tabs .active .dropdown-toggle .caret{border-top-color:#555;border-bottom-color:#555}.nav>.dropdown.active>a:hover{cursor:pointer}.nav-tabs .open .dropdown-toggle,.nav-pills .open .dropdown-toggle,.nav>li.dropdown.open.active>a:hover{color:#fff;background-color:#999;border-color:#999}.nav li.dropdown.open .caret,.nav li.dropdown.open.active .caret,.nav li.dropdown.open a:hover .caret{border-top-color:#fff;border-bottom-color:#fff;opacity:1;filter:alpha(opacity=100)}.tabs-stacked .open>a:hover{border-color:#999}.tabbable{*zoom:1}.tabbable:before,.tabbable:after{display:table;line-height:0;content:""}.tabbable:after{clear:both}.tab-content{overflow:auto}.tabs-below>.nav-tabs,.tabs-right>.nav-tabs,.tabs-left>.nav-tabs{border-bottom:0}.tab-content>.tab-pane,.pill-content>.pill-pane{display:none}.tab-content>.active,.pill-content>.active{display:block}.tabs-below>.nav-tabs{border-top:1px solid #ddd}.tabs-below>.nav-tabs>li{margin-top:-1px;margin-bottom:0}.tabs-below>.nav-tabs>li>a{-webkit-border-radius:0 0 4px 4px;-moz-border-radius:0 0 4px 4px;border-radius:0 0 4px 4px}.tabs-below>.nav-tabs>li>a:hover{border-top-color:#ddd;border-bottom-color:transparent}.tabs-below>.nav-tabs>.active>a,.tabs-below>.nav-tabs>.active>a:hover{border-color:transparent #ddd #ddd #ddd}.tabs-left>.nav-tabs>li,.tabs-right>.nav-tabs>li{float:none}.tabs-left>.nav-tabs>li>a,.tabs-right>.nav-tabs>li>a{min-width:74px;margin-right:0;margin-bottom:3px}.tabs-left>.nav-tabs{float:left;margin-right:19px;border-right:1px solid #ddd}.tabs-left>.nav-tabs>li>a{margin-right:-1px;-webkit-border-radius:4px 0 0 4px;-moz-border-radius:4px 0 0 4px;border-radius:4px 0 0 4px}.tabs-left>.nav-tabs>li>a:hover{border-color:#eee #ddd #eee #eee}.tabs-left>.nav-tabs .active>a,.tabs-left>.nav-tabs .active>a:hover{border-color:#ddd transparent #ddd #ddd;*border-right-color:#fff}.tabs-right>.nav-tabs{float:right;margin-left:19px;border-left:1px solid #ddd}.tabs-right>.nav-tabs>li>a{margin-left:-1px;-webkit-border-radius:0 4px 4px 0;-moz-border-radius:0 4px 4px 0;border-radius:0 4px 4px 0}.tabs-right>.nav-tabs>li>a:hover{border-color:#eee #eee #eee #ddd}.tabs-right>.nav-tabs .active>a,.tabs-right>.nav-tabs .active>a:hover{border-color:#ddd #ddd #ddd transparent;*border-left-color:#fff}.nav>.disabled>a{color:#999}.nav>.disabled>a:hover{text-decoration:none;cursor:default;background-color:transparent}.navbar{*position:relative;*z-index:2;margin-bottom:20px;overflow:visible;color:#555}.navbar-inner{min-height:40px;padding-right:20px;padding-left:20px;background-color:#fafafa;background-image:-moz-linear-gradient(top,#fff,#d4e4f4);background-image:-webkit-gradient(linear,0 0,0 100%,from(#fff),to(#d4e4f4));background-image:-webkit-linear-gradient(top,#fff,#d4e4f4);background-image:-o-linear-gradient(top,#fff,#d4e4f4);background-image:linear-gradient(to bottom,#fff,#d4e4f4);background-repeat:repeat-x;border:1px solid #d4d4d4;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ffffffff',endColorstr='#ffd4e4f4',GradientType=0);-webkit-box-shadow:0 1px 4px rgba(0,0,0,0.065);-moz-box-shadow:0 1px 4px rgba(0,0,0,0.065);box-shadow:0 1px 4px rgba(0,0,0,0.065)}.navbar .container{width:auto}.nav-collapse.collapse{height:auto}.navbar .brand{display:block;float:left;padding:10px 20px 10px;margin-left:-20px;font-size:20px;font-weight:200;color:#555;text-shadow:0 1px 0 #fff}.navbar .brand:hover{text-decoration:none}.navbar-text{margin-bottom:0;line-height:40px}.navbar-link{color:#555}.navbar-link:hover{color:#333}.navbar .divider-vertical{height:40px;margin:0 9px;border-right:1px solid #fff;border-left:1px solid #f2f2f2}.navbar .btn,.navbar .btn-group{margin-top:6px}.navbar .btn-group .btn{margin:0}.navbar-form{margin-bottom:0;*zoom:1}.navbar-form:before,.navbar-form:after{display:table;line-height:0;content:""}.navbar-form:after{clear:both}.navbar-form input,.navbar-form select,.navbar-form .radio,.navbar-form .checkbox{margin-top:5px}.navbar-form input,.navbar-form select,.navbar-form .btn{display:inline-block;margin-bottom:0}.navbar-form input[type="image"],.navbar-form input[type="checkbox"],.navbar-form input[type="radio"]{margin-top:3px}.navbar-form .input-append,.navbar-form .input-prepend{margin-top:6px;white-space:nowrap}.navbar-form .input-append input,.navbar-form .input-prepend input{margin-top:0}.navbar-search{position:relative;float:left;margin-top:5px;margin-bottom:0}.navbar-search .search-query{padding:4px 14px;margin-bottom:0;font-family:"Helvetica Neue",Helvetica,Arial,sans-serif;font-size:13px;font-weight:normal;line-height:1;-webkit-border-radius:15px;-moz-border-radius:15px;border-radius:15px}.navbar-static-top{position:static;width:100%;margin-bottom:0}.navbar-static-top .navbar-inner{-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.navbar-fixed-top,.navbar-fixed-bottom{position:fixed;right:0;left:0;z-index:1030;margin-bottom:0}.navbar-fixed-top .navbar-inner,.navbar-fixed-bottom .navbar-inner,.navbar-static-top .navbar-inner{border:0}.navbar-fixed-top .navbar-inner,.navbar-fixed-bottom .navbar-inner{padding-right:0;padding-left:0;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.navbar-static-top .container,.navbar-fixed-top .container,.navbar-fixed-bottom .container{width:940px}.navbar-fixed-top{top:0}.navbar-fixed-top .navbar-inner,.navbar-static-top .navbar-inner{-webkit-box-shadow:inset 0 -1px 0 rgba(0,0,0,0.1),0 1px 10px rgba(0,0,0,0.1);-moz-box-shadow:inset 0 -1px 0 rgba(0,0,0,0.1),0 1px 10px rgba(0,0,0,0.1);box-shadow:inset 0 -1px 0 rgba(0,0,0,0.1),0 1px 10px rgba(0,0,0,0.1)}.navbar-fixed-bottom{bottom:0}.navbar-fixed-bottom .navbar-inner{-webkit-box-shadow:inset 0 1px 0 rgba(0,0,0,0.1),0 -1px 10px rgba(0,0,0,0.1);-moz-box-shadow:inset 0 1px 0 rgba(0,0,0,0.1),0 -1px 10px rgba(0,0,0,0.1);box-shadow:inset 0 1px 0 rgba(0,0,0,0.1),0 -1px 10px rgba(0,0,0,0.1)}.navbar .nav{position:relative;left:0;display:block;float:left;margin:0 10px 0 0}.navbar .nav.pull-right{float:right}.navbar .nav>li{float:left}.navbar .nav>li>a{float:none;padding:10px 15px 10px;color:#555;text-decoration:none;text-shadow:0 1px 0 #fff}.navbar .nav .dropdown-toggle .caret{margin-top:8px}.navbar .nav>li>a:focus,.navbar .nav>li>a:hover{color:#333;text-decoration:none;background-color:transparent}.navbar .nav>.active>a,.navbar .nav>.active>a:hover,.navbar .nav>.active>a:focus{color:#555;text-decoration:none;background-color:#e5e5e5;-webkit-box-shadow:inset 0 3px 8px rgba(0,0,0,0.125);-moz-box-shadow:inset 0 3px 8px rgba(0,0,0,0.125);box-shadow:inset 0 3px 8px rgba(0,0,0,0.125)}.navbar .btn-navbar{display:none;float:right;padding:7px 10px;margin-right:5px;margin-left:5px;color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#ededed;*background-color:#e5e5e5;background-image:-webkit-gradient(linear,0 0,0 100%,from(#f2f2f2),to(#e5e5e5));background-image:-webkit-linear-gradient(top,#f2f2f2,#e5e5e5);background-image:-o-linear-gradient(top,#f2f2f2,#e5e5e5);background-image:linear-gradient(to bottom,#f2f2f2,#e5e5e5);background-image:-moz-linear-gradient(top,#f2f2f2,#e5e5e5);background-repeat:repeat-x;border-color:#e5e5e5 #e5e5e5 #bfbfbf;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#fff2f2f2',endColorstr='#ffe5e5e5',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false);-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,0.1),0 1px 0 rgba(255,255,255,0.075);-moz-box-shadow:inset 0 1px 0 rgba(255,255,255,0.1),0 1px 0 rgba(255,255,255,0.075);box-shadow:inset 0 1px 0 rgba(255,255,255,0.1),0 1px 0 rgba(255,255,255,0.075)}.navbar .btn-navbar:hover,.navbar .btn-navbar:active,.navbar .btn-navbar.active,.navbar .btn-navbar.disabled,.navbar .btn-navbar[disabled]{color:#fff;background-color:#e5e5e5;*background-color:#d9d9d9}.navbar .btn-navbar:active,.navbar .btn-navbar.active{background-color:#ccc \9}.navbar .btn-navbar .icon-bar{display:block;width:18px;height:2px;background-color:#f5f5f5;-webkit-border-radius:1px;-moz-border-radius:1px;border-radius:1px;-webkit-box-shadow:0 1px 0 rgba(0,0,0,0.25);-moz-box-shadow:0 1px 0 rgba(0,0,0,0.25);box-shadow:0 1px 0 rgba(0,0,0,0.25)}.btn-navbar .icon-bar+.icon-bar{margin-top:3px}.navbar .nav>li>.dropdown-menu:before{position:absolute;top:-7px;left:9px;display:inline-block;border-right:7px solid transparent;border-bottom:7px solid #ccc;border-left:7px solid transparent;border-bottom-color:rgba(0,0,0,0.2);content:''}.navbar .nav>li>.dropdown-menu:after{position:absolute;top:-6px;left:10px;display:inline-block;border-right:6px solid transparent;border-bottom:6px solid #fff;border-left:6px solid transparent;content:''}.navbar-fixed-bottom .nav>li>.dropdown-menu:before{top:auto;bottom:-7px;border-top:7px solid #ccc;border-bottom:0;border-top-color:rgba(0,0,0,0.2)}.navbar-fixed-bottom .nav>li>.dropdown-menu:after{top:auto;bottom:-6px;border-top:6px solid #fff;border-bottom:0}.navbar .nav li.dropdown.open>.dropdown-toggle,.navbar .nav li.dropdown.active>.dropdown-toggle,.navbar .nav li.dropdown.open.active>.dropdown-toggle{color:#555;background-color:#e5e5e5}.navbar .nav li.dropdown>.dropdown-toggle .caret{border-top-color:#555;border-bottom-color:#555}.navbar .nav li.dropdown.open>.dropdown-toggle .caret,.navbar .nav li.dropdown.active>.dropdown-toggle .caret,.navbar .nav li.dropdown.open.active>.dropdown-toggle .caret{border-top-color:#555;border-bottom-color:#555}.navbar .pull-right>li>.dropdown-menu,.navbar .nav>li>.dropdown-menu.pull-right{right:0;left:auto}.navbar .pull-right>li>.dropdown-menu:before,.navbar .nav>li>.dropdown-menu.pull-right:before{right:12px;left:auto}.navbar .pull-right>li>.dropdown-menu:after,.navbar .nav>li>.dropdown-menu.pull-right:after{right:13px;left:auto}.navbar .pull-right>li>.dropdown-menu .dropdown-menu,.navbar .nav>li>.dropdown-menu.pull-right .dropdown-menu{right:100%;left:auto;margin-right:-1px;margin-left:0;-webkit-border-radius:6px 0 6px 6px;-moz-border-radius:6px 0 6px 6px;border-radius:6px 0 6px 6px}.navbar-inverse{color:#999}.navbar-inverse .navbar-inner{background-color:#1b1b1b;background-image:-moz-linear-gradient(top,#222,#111);background-image:-webkit-gradient(linear,0 0,0 100%,from(#222),to(#111));background-image:-webkit-linear-gradient(top,#222,#111);background-image:-o-linear-gradient(top,#222,#111);background-image:linear-gradient(to bottom,#222,#111);background-repeat:repeat-x;border-color:#252525;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff222222',endColorstr='#ff111111',GradientType=0)}.navbar-inverse .brand,.navbar-inverse .nav>li>a{color:#999;text-shadow:0 -1px 0 rgba(0,0,0,0.25)}.navbar-inverse .brand:hover,.navbar-inverse .nav>li>a:hover{color:#fff}.navbar-inverse .nav>li>a:focus,.navbar-inverse .nav>li>a:hover{color:#fff;background-color:transparent}.navbar-inverse .nav .active>a,.navbar-inverse .nav .active>a:hover,.navbar-inverse .nav .active>a:focus{color:#fff;background-color:#111}.navbar-inverse .navbar-link{color:#999}.navbar-inverse .navbar-link:hover{color:#fff}.navbar-inverse .divider-vertical{border-right-color:#222;border-left-color:#111}.navbar-inverse .nav li.dropdown.open>.dropdown-toggle,.navbar-inverse .nav li.dropdown.active>.dropdown-toggle,.navbar-inverse .nav li.dropdown.open.active>.dropdown-toggle{color:#fff;background-color:#111}.navbar-inverse .nav li.dropdown>.dropdown-toggle .caret{border-top-color:#999;border-bottom-color:#999}.navbar-inverse .nav li.dropdown.open>.dropdown-toggle .caret,.navbar-inverse .nav li.dropdown.active>.dropdown-toggle .caret,.navbar-inverse .nav li.dropdown.open.active>.dropdown-toggle .caret{border-top-color:#fff;border-bottom-color:#fff}.navbar-inverse .navbar-search .search-query{color:#fff;background-color:#515151;border-color:#111;-webkit-box-shadow:inset 0 1px 2px rgba(0,0,0,0.1),0 1px 0 rgba(255,255,255,0.15);-moz-box-shadow:inset 0 1px 2px rgba(0,0,0,0.1),0 1px 0 rgba(255,255,255,0.15);box-shadow:inset 0 1px 2px rgba(0,0,0,0.1),0 1px 0 rgba(255,255,255,0.15);-webkit-transition:none;-moz-transition:none;-o-transition:none;transition:none}.navbar-inverse .navbar-search .search-query:-moz-placeholder{color:#ccc}.navbar-inverse .navbar-search .search-query:-ms-input-placeholder{color:#ccc}.navbar-inverse .navbar-search .search-query::-webkit-input-placeholder{color:#ccc}.navbar-inverse .navbar-search .search-query:focus,.navbar-inverse .navbar-search .search-query.focused{padding:5px 15px;color:#333;text-shadow:0 1px 0 #fff;background-color:#fff;border:0;outline:0;-webkit-box-shadow:0 0 3px rgba(0,0,0,0.15);-moz-box-shadow:0 0 3px rgba(0,0,0,0.15);box-shadow:0 0 3px rgba(0,0,0,0.15)}.navbar-inverse .btn-navbar{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#0e0e0e;*background-color:#040404;background-image:-webkit-gradient(linear,0 0,0 100%,from(#151515),to(#040404));background-image:-webkit-linear-gradient(top,#151515,#040404);background-image:-o-linear-gradient(top,#151515,#040404);background-image:linear-gradient(to bottom,#151515,#040404);background-image:-moz-linear-gradient(top,#151515,#040404);background-repeat:repeat-x;border-color:#040404 #040404 #000;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff151515',endColorstr='#ff040404',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.navbar-inverse .btn-navbar:hover,.navbar-inverse .btn-navbar:active,.navbar-inverse .btn-navbar.active,.navbar-inverse .btn-navbar.disabled,.navbar-inverse .btn-navbar[disabled]{color:#fff;background-color:#040404;*background-color:#000}.navbar-inverse .btn-navbar:active,.navbar-inverse .btn-navbar.active{background-color:#000 \9}.breadcrumb{padding:8px 15px;margin:0 0 20px;list-style:none;background-color:#f5f5f5;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.breadcrumb li{display:inline-block;*display:inline;text-shadow:0 1px 0 #fff;*zoom:1}.breadcrumb .divider{padding:0 5px;color:#ccc}.breadcrumb .active{color:#999}.pagination{height:40px;margin:20px 0}.pagination ul{display:inline-block;*display:inline;margin-bottom:0;margin-left:0;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px;*zoom:1;-webkit-box-shadow:0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:0 1px 2px rgba(0,0,0,0.05);box-shadow:0 1px 2px rgba(0,0,0,0.05)}.pagination li{display:inline}.pagination a,.pagination span{float:left;padding:0 14px;line-height:38px;text-decoration:none;background-color:#fff;border:1px solid #ddd;border-left-width:0}.pagination a:hover,.pagination .active a,.pagination .active span{background-color:#f5f5f5}.pagination .active a,.pagination .active span{color:#999;cursor:default}.pagination .disabled span,.pagination .disabled a,.pagination .disabled a:hover{color:#999;cursor:default;background-color:transparent}.pagination li:first-child a,.pagination li:first-child span{border-left-width:1px;-webkit-border-radius:3px 0 0 3px;-moz-border-radius:3px 0 0 3px;border-radius:3px 0 0 3px}.pagination li:last-child a,.pagination li:last-child span{-webkit-border-radius:0 3px 3px 0;-moz-border-radius:0 3px 3px 0;border-radius:0 3px 3px 0}.pagination-centered{text-align:center}.pagination-right{text-align:right}.pager{margin:20px 0;text-align:center;list-style:none;*zoom:1}.pager:before,.pager:after{display:table;line-height:0;content:""}.pager:after{clear:both}.pager li{display:inline}.pager a{display:inline-block;padding:5px 14px;background-color:#fff;border:1px solid #ddd;-webkit-border-radius:15px;-moz-border-radius:15px;border-radius:15px}.pager a:hover{text-decoration:none;background-color:#f5f5f5}.pager .next a{float:right}.pager .previous a{float:left}.pager .disabled a,.pager .disabled a:hover{color:#999;cursor:default;background-color:#fff}.modal-open .dropdown-menu{z-index:2050}.modal-open .dropdown.open{*z-index:2050}.modal-open .popover{z-index:2060}.modal-open .tooltip{z-index:2080}.modal-backdrop{position:fixed;top:0;right:0;bottom:0;left:0;z-index:1040;background-color:#000}.modal-backdrop.fade{opacity:0}.modal-backdrop,.modal-backdrop.fade.in{opacity:.8;filter:alpha(opacity=80)}.modal{position:fixed;top:50%;left:50%;z-index:1050;width:560px;margin:-250px 0 0 -280px;overflow:auto;background-color:#fff;border:1px solid #999;border:1px solid rgba(0,0,0,0.3);*border:1px solid #999;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;-webkit-box-shadow:0 3px 7px rgba(0,0,0,0.3);-moz-box-shadow:0 3px 7px rgba(0,0,0,0.3);box-shadow:0 3px 7px rgba(0,0,0,0.3);-webkit-background-clip:padding-box;-moz-background-clip:padding-box;background-clip:padding-box}.modal.fade{top:-25%;-webkit-transition:opacity .3s linear,top .3s ease-out;-moz-transition:opacity .3s linear,top .3s ease-out;-o-transition:opacity .3s linear,top .3s ease-out;transition:opacity .3s linear,top .3s ease-out}.modal.fade.in{top:50%}.modal-header{padding:9px 15px;border-bottom:1px solid #eee}.modal-header .close{margin-top:2px}.modal-header h3{margin:0;line-height:30px}.modal-body{max-height:400px;padding:15px;overflow-y:auto}.modal-form{margin-bottom:0}.modal-footer{padding:14px 15px 15px;margin-bottom:0;text-align:right;background-color:#f5f5f5;border-top:1px solid #ddd;-webkit-border-radius:0 0 6px 6px;-moz-border-radius:0 0 6px 6px;border-radius:0 0 6px 6px;*zoom:1;-webkit-box-shadow:inset 0 1px 0 #fff;-moz-box-shadow:inset 0 1px 0 #fff;box-shadow:inset 0 1px 0 #fff}.modal-footer:before,.modal-footer:after{display:table;line-height:0;content:""}.modal-footer:after{clear:both}.modal-footer .btn+.btn{margin-bottom:0;margin-left:5px}.modal-footer .btn-group .btn+.btn{margin-left:-1px}.tooltip{position:absolute;z-index:1030;display:block;padding:5px;font-size:11px;opacity:0;filter:alpha(opacity=0);visibility:visible}.tooltip.in{opacity:.8;filter:alpha(opacity=80)}.tooltip.top{margin-top:-3px}.tooltip.right{margin-left:3px}.tooltip.bottom{margin-top:3px}.tooltip.left{margin-left:-3px}.tooltip-inner{max-width:200px;padding:3px 8px;color:#fff;text-align:center;text-decoration:none;background-color:#000;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.tooltip-arrow{position:absolute;width:0;height:0;border-color:transparent;border-style:solid}.tooltip.top .tooltip-arrow{bottom:0;left:50%;margin-left:-5px;border-top-color:#000;border-width:5px 5px 0}.tooltip.right .tooltip-arrow{top:50%;left:0;margin-top:-5px;border-right-color:#000;border-width:5px 5px 5px 0}.tooltip.left .tooltip-arrow{top:50%;right:0;margin-top:-5px;border-left-color:#000;border-width:5px 0 5px 5px}.tooltip.bottom .tooltip-arrow{top:0;left:50%;margin-left:-5px;border-bottom-color:#000;border-width:0 5px 5px}.popover{position:absolute;top:0;left:0;z-index:1010;display:none;width:236px;padding:1px;background-color:#fff;border:1px solid #ccc;border:1px solid rgba(0,0,0,0.2);-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;-webkit-box-shadow:0 5px 10px rgba(0,0,0,0.2);-moz-box-shadow:0 5px 10px rgba(0,0,0,0.2);box-shadow:0 5px 10px rgba(0,0,0,0.2);-webkit-background-clip:padding-box;-moz-background-clip:padding;background-clip:padding-box}.popover.top{margin-bottom:10px}.popover.right{margin-left:10px}.popover.bottom{margin-top:10px}.popover.left{margin-right:10px}.popover-title{padding:8px 14px;margin:0;font-size:14px;font-weight:normal;line-height:18px;background-color:#f7f7f7;border-bottom:1px solid #ebebeb;-webkit-border-radius:5px 5px 0 0;-moz-border-radius:5px 5px 0 0;border-radius:5px 5px 0 0}.popover-content{padding:9px 14px}.popover-content p,.popover-content ul,.popover-content ol{margin-bottom:0}.popover .arrow,.popover .arrow:after{position:absolute;display:inline-block;width:0;height:0;border-color:transparent;border-style:solid}.popover .arrow:after{z-index:-1;content:""}.popover.top .arrow{bottom:-10px;left:50%;margin-left:-10px;border-top-color:#fff;border-width:10px 10px 0}.popover.top .arrow:after{bottom:-1px;left:-11px;border-top-color:rgba(0,0,0,0.25);border-width:11px 11px 0}.popover.right .arrow{top:50%;left:-10px;margin-top:-10px;border-right-color:#fff;border-width:10px 10px 10px 0}.popover.right .arrow:after{bottom:-11px;left:-1px;border-right-color:rgba(0,0,0,0.25);border-width:11px 11px 11px 0}.popover.bottom .arrow{top:-10px;left:50%;margin-left:-10px;border-bottom-color:#fff;border-width:0 10px 10px}.popover.bottom .arrow:after{top:-1px;left:-11px;border-bottom-color:rgba(0,0,0,0.25);border-width:0 11px 11px}.popover.left .arrow{top:50%;right:-10px;margin-top:-10px;border-left-color:#fff;border-width:10px 0 10px 10px}.popover.left .arrow:after{right:-1px;bottom:-11px;border-left-color:rgba(0,0,0,0.25);border-width:11px 0 11px 11px}.thumbnails{margin-left:-20px;list-style:none;*zoom:1}.thumbnails:before,.thumbnails:after{display:table;line-height:0;content:""}.thumbnails:after{clear:both}.row-fluid .thumbnails{margin-left:0}.thumbnails>li{float:left;margin-bottom:20px;margin-left:20px}.thumbnail{display:block;padding:4px;line-height:20px;border:1px solid #ddd;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;-webkit-box-shadow:0 1px 3px rgba(0,0,0,0.055);-moz-box-shadow:0 1px 3px rgba(0,0,0,0.055);box-shadow:0 1px 3px rgba(0,0,0,0.055);-webkit-transition:all .2s ease-in-out;-moz-transition:all .2s ease-in-out;-o-transition:all .2s ease-in-out;transition:all .2s ease-in-out}a.thumbnail:hover{border-color:#08c;-webkit-box-shadow:0 1px 4px rgba(0,105,214,0.25);-moz-box-shadow:0 1px 4px rgba(0,105,214,0.25);box-shadow:0 1px 4px rgba(0,105,214,0.25)}.thumbnail>img{display:block;max-width:100%;margin-right:auto;margin-left:auto}.thumbnail .caption{padding:9px;color:#555}.label,.badge{font-size:11.844px;font-weight:bold;line-height:14px;color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);white-space:nowrap;vertical-align:baseline;background-color:#999}.label{padding:1px 4px 2px;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px}.badge{padding:1px 9px 2px;-webkit-border-radius:9px;-moz-border-radius:9px;border-radius:9px}a.label:hover,a.badge:hover{color:#fff;text-decoration:none;cursor:pointer}.label-important,.badge-important{background-color:#b94a48}.label-important[href],.badge-important[href]{background-color:#953b39}.label-warning,.badge-warning{background-color:#f89406}.label-warning[href],.badge-warning[href]{background-color:#c67605}.label-success,.badge-success{background-color:#468847}.label-success[href],.badge-success[href]{background-color:#356635}.label-info,.badge-info{background-color:#3a87ad}.label-info[href],.badge-info[href]{background-color:#2d6987}.label-inverse,.badge-inverse{background-color:#333}.label-inverse[href],.badge-inverse[href]{background-color:#1a1a1a}.btn .label,.btn .badge{position:relative;top:-1px}.btn-mini .label,.btn-mini .badge{top:0}@-webkit-keyframes progress-bar-stripes{from{background-position:40px 0}to{background-position:0 0}}@-moz-keyframes progress-bar-stripes{from{background-position:40px 0}to{background-position:0 0}}@-ms-keyframes progress-bar-stripes{from{background-position:40px 0}to{background-position:0 0}}@-o-keyframes progress-bar-stripes{from{background-position:0 0}to{background-position:40px 0}}@keyframes progress-bar-stripes{from{background-position:40px 0}to{background-position:0 0}}.progress{height:20px;margin-bottom:20px;overflow:hidden;background-color:#f7f7f7;background-image:-moz-linear-gradient(top,#f5f5f5,#f9f9f9);background-image:-webkit-gradient(linear,0 0,0 100%,from(#f5f5f5),to(#f9f9f9));background-image:-webkit-linear-gradient(top,#f5f5f5,#f9f9f9);background-image:-o-linear-gradient(top,#f5f5f5,#f9f9f9);background-image:linear-gradient(to bottom,#f5f5f5,#f9f9f9);background-repeat:repeat-x;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#fff5f5f5',endColorstr='#fff9f9f9',GradientType=0);-webkit-box-shadow:inset 0 1px 2px rgba(0,0,0,0.1);-moz-box-shadow:inset 0 1px 2px rgba(0,0,0,0.1);box-shadow:inset 0 1px 2px rgba(0,0,0,0.1)}.progress .bar{float:left;width:0;height:100%;font-size:12px;color:#fff;text-align:center;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#0e90d2;background-image:-moz-linear-gradient(top,#149bdf,#0480be);background-image:-webkit-gradient(linear,0 0,0 100%,from(#149bdf),to(#0480be));background-image:-webkit-linear-gradient(top,#149bdf,#0480be);background-image:-o-linear-gradient(top,#149bdf,#0480be);background-image:linear-gradient(to bottom,#149bdf,#0480be);background-repeat:repeat-x;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff149bdf',endColorstr='#ff0480be',GradientType=0);-webkit-box-shadow:inset 0 -1px 0 rgba(0,0,0,0.15);-moz-box-shadow:inset 0 -1px 0 rgba(0,0,0,0.15);box-shadow:inset 0 -1px 0 rgba(0,0,0,0.15);-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;-webkit-transition:width .6s ease;-moz-transition:width .6s ease;-o-transition:width .6s ease;transition:width .6s ease}.progress .bar+.bar{-webkit-box-shadow:inset 1px 0 0 rgba(0,0,0,0.15),inset 0 -1px 0 rgba(0,0,0,0.15);-moz-box-shadow:inset 1px 0 0 rgba(0,0,0,0.15),inset 0 -1px 0 rgba(0,0,0,0.15);box-shadow:inset 1px 0 0 rgba(0,0,0,0.15),inset 0 -1px 0 rgba(0,0,0,0.15)}.progress-striped .bar{background-color:#149bdf;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);-webkit-background-size:40px 40px;-moz-background-size:40px 40px;-o-background-size:40px 40px;background-size:40px 40px}.progress.active .bar{-webkit-animation:progress-bar-stripes 2s linear infinite;-moz-animation:progress-bar-stripes 2s linear infinite;-ms-animation:progress-bar-stripes 2s linear infinite;-o-animation:progress-bar-stripes 2s linear infinite;animation:progress-bar-stripes 2s linear infinite}.progress-danger .bar,.progress .bar-danger{background-color:#dd514c;background-image:-moz-linear-gradient(top,#ee5f5b,#c43c35);background-image:-webkit-gradient(linear,0 0,0 100%,from(#ee5f5b),to(#c43c35));background-image:-webkit-linear-gradient(top,#ee5f5b,#c43c35);background-image:-o-linear-gradient(top,#ee5f5b,#c43c35);background-image:linear-gradient(to bottom,#ee5f5b,#c43c35);background-repeat:repeat-x;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ffee5f5b',endColorstr='#ffc43c35',GradientType=0)}.progress-danger.progress-striped .bar,.progress-striped .bar-danger{background-color:#ee5f5b;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent)}.progress-success .bar,.progress .bar-success{background-color:#5eb95e;background-image:-moz-linear-gradient(top,#62c462,#57a957);background-image:-webkit-gradient(linear,0 0,0 100%,from(#62c462),to(#57a957));background-image:-webkit-linear-gradient(top,#62c462,#57a957);background-image:-o-linear-gradient(top,#62c462,#57a957);background-image:linear-gradient(to bottom,#62c462,#57a957);background-repeat:repeat-x;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff62c462',endColorstr='#ff57a957',GradientType=0)}.progress-success.progress-striped .bar,.progress-striped .bar-success{background-color:#62c462;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent)}.progress-info .bar,.progress .bar-info{background-color:#4bb1cf;background-image:-moz-linear-gradient(top,#5bc0de,#339bb9);background-image:-webkit-gradient(linear,0 0,0 100%,from(#5bc0de),to(#339bb9));background-image:-webkit-linear-gradient(top,#5bc0de,#339bb9);background-image:-o-linear-gradient(top,#5bc0de,#339bb9);background-image:linear-gradient(to bottom,#5bc0de,#339bb9);background-repeat:repeat-x;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff5bc0de',endColorstr='#ff339bb9',GradientType=0)}.progress-info.progress-striped .bar,.progress-striped .bar-info{background-color:#5bc0de;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent)}.progress-warning .bar,.progress .bar-warning{background-color:#faa732;background-image:-moz-linear-gradient(top,#fbb450,#f89406);background-image:-webkit-gradient(linear,0 0,0 100%,from(#fbb450),to(#f89406));background-image:-webkit-linear-gradient(top,#fbb450,#f89406);background-image:-o-linear-gradient(top,#fbb450,#f89406);background-image:linear-gradient(to bottom,#fbb450,#f89406);background-repeat:repeat-x;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#fffbb450',endColorstr='#fff89406',GradientType=0)}.progress-warning.progress-striped .bar,.progress-striped .bar-warning{background-color:#fbb450;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent)}.accordion{margin-bottom:20px}.accordion-group{margin-bottom:2px;border:1px solid #e5e5e5;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.accordion-heading{border-bottom:0}.accordion-heading .accordion-toggle{display:block;padding:8px 15px}.accordion-toggle{cursor:pointer}.accordion-inner{padding:9px 15px;border-top:1px solid #e5e5e5}.carousel{position:relative;margin-bottom:20px;line-height:1}.carousel-inner{position:relative;width:100%;overflow:hidden}.carousel .item{position:relative;display:none;-webkit-transition:.6s ease-in-out left;-moz-transition:.6s ease-in-out left;-o-transition:.6s ease-in-out left;transition:.6s ease-in-out left}.carousel .item>img{display:block;line-height:1}.carousel .active,.carousel .next,.carousel .prev{display:block}.carousel .active{left:0}.carousel .next,.carousel .prev{position:absolute;top:0;width:100%}.carousel .next{left:100%}.carousel .prev{left:-100%}.carousel .next.left,.carousel .prev.right{left:0}.carousel .active.left{left:-100%}.carousel .active.right{left:100%}.carousel-control{position:absolute;top:40%;left:15px;width:40px;height:40px;margin-top:-20px;font-size:60px;font-weight:100;line-height:30px;color:#fff;text-align:center;background:#222;border:3px solid #fff;-webkit-border-radius:23px;-moz-border-radius:23px;border-radius:23px;opacity:.5;filter:alpha(opacity=50)}.carousel-control.right{right:15px;left:auto}.carousel-control:hover{color:#fff;text-decoration:none;opacity:.9;filter:alpha(opacity=90)}.carousel-caption{position:absolute;right:0;bottom:0;left:0;padding:15px;background:#333;background:rgba(0,0,0,0.75)}.carousel-caption h4,.carousel-caption p{line-height:20px;color:#fff}.carousel-caption h4{margin:0 0 5px}.carousel-caption p{margin-bottom:0}.hero-unit{padding:60px;margin-bottom:30px;background-color:#eee;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px}.hero-unit h1{margin-bottom:0;font-size:60px;line-height:1;letter-spacing:-1px;color:inherit}.hero-unit p{font-size:18px;font-weight:200;line-height:30px;color:inherit}.pull-right{float:right}.pull-left{float:left}.hide{display:none}.show{display:block}.invisible{visibility:hidden}.affix{position:fixed}
+ */article,aside,details,figcaption,figure,footer,header,hgroup,nav,section{display:block}audio,canvas,video{display:inline-block;*display:inline;*zoom:1}audio:not([controls]){display:none}html{font-size:100%;-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%}a:focus{outline:thin dotted #333;outline:5px auto -webkit-focus-ring-color;outline-offset:-2px}a:hover,a:active{outline:0}sub,sup{position:relative;font-size:75%;line-height:0;vertical-align:baseline}sup{top:-0.5em}sub{bottom:-0.25em}img{height:auto;max-width:100%;vertical-align:middle;border:0;-ms-interpolation-mode:bicubic}#map_canvas img{max-width:none}button,input,select,textarea{margin:0;font-size:100%;vertical-align:middle}button,input{*overflow:visible;line-height:normal}button::-moz-focus-inner,input::-moz-focus-inner{padding:0;border:0}button,input[type="button"],input[type="reset"],input[type="submit"]{cursor:pointer;-webkit-appearance:button}input[type="search"]{-webkit-box-sizing:content-box;-moz-box-sizing:content-box;box-sizing:content-box;-webkit-appearance:textfield}input[type="search"]::-webkit-search-decoration,input[type="search"]::-webkit-search-cancel-button{-webkit-appearance:none}textarea{overflow:auto;vertical-align:top}.clearfix{*zoom:1}.clearfix:before,.clearfix:after{display:table;line-height:0;content:""}.clearfix:after{clear:both}.hide-text{font:0/0 a;color:transparent;text-shadow:none;background-color:transparent;border:0}.input-block-level{display:block;width:100%;min-height:30px;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}body{margin:0;font-family:"Helvetica Neue",Helvetica,Arial,sans-serif;font-size:14px;line-height:20px;color:#333;background-color:#fff}a{color:#08c;text-decoration:none}a:hover{color:#005580;text-decoration:underline}.img-rounded{-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px}.img-polaroid{padding:4px;background-color:#fff;border:1px solid #ccc;border:1px solid rgba(0,0,0,0.2);-webkit-box-shadow:0 1px 3px rgba(0,0,0,0.1);-moz-box-shadow:0 1px 3px rgba(0,0,0,0.1);box-shadow:0 1px 3px rgba(0,0,0,0.1)}.img-circle{-webkit-border-radius:500px;-moz-border-radius:500px;border-radius:500px}.row{margin-left:-20px;*zoom:1}.row:before,.row:after{display:table;line-height:0;content:""}.row:after{clear:both}[class*="span"]{float:left;margin-left:20px}.container,.navbar-static-top .container,.navbar-fixed-top .container,.navbar-fixed-bottom .container{width:940px}.span12{width:940px}.span11{width:860px}.span10{width:780px}.span9{width:700px}.span8{width:620px}.span7{width:540px}.span6{width:460px}.span5{width:380px}.span4{width:300px}.span3{width:220px}.span2{width:140px}.span1{width:60px}.offset12{margin-left:980px}.offset11{margin-left:900px}.offset10{margin-left:820px}.offset9{margin-left:740px}.offset8{margin-left:660px}.offset7{margin-left:580px}.offset6{margin-left:500px}.offset5{margin-left:420px}.offset4{margin-left:340px}.offset3{margin-left:260px}.offset2{margin-left:180px}.offset1{margin-left:100px}.row-fluid{width:100%;*zoom:1}.row-fluid:before,.row-fluid:after{display:table;line-height:0;content:""}.row-fluid:after{clear:both}.row-fluid [class*="span"]{display:block;float:left;width:100%;min-height:30px;margin-left:2.127659574468085%;*margin-left:2.074468085106383%;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}.row-fluid [class*="span"]:first-child{margin-left:0}.row-fluid .span12{width:100%;*width:99.94680851063829%}.row-fluid .span11{width:91.48936170212765%;*width:91.43617021276594%}.row-fluid .span10{width:82.97872340425532%;*width:82.92553191489361%}.row-fluid .span9{width:74.46808510638297%;*width:74.41489361702126%}.row-fluid .span8{width:65.95744680851064%;*width:65.90425531914893%}.row-fluid .span7{width:57.44680851063829%;*width:57.39361702127659%}.row-fluid .span6{width:48.93617021276595%;*width:48.88297872340425%}.row-fluid .span5{width:40.42553191489362%;*width:40.37234042553192%}.row-fluid .span4{width:31.914893617021278%;*width:31.861702127659576%}.row-fluid .span3{width:23.404255319148934%;*width:23.351063829787233%}.row-fluid .span2{width:14.893617021276595%;*width:14.840425531914894%}.row-fluid .span1{width:6.382978723404255%;*width:6.329787234042553%}.row-fluid .offset12{margin-left:104.25531914893617%;*margin-left:104.14893617021275%}.row-fluid .offset12:first-child{margin-left:102.12765957446808%;*margin-left:102.02127659574467%}.row-fluid .offset11{margin-left:95.74468085106382%;*margin-left:95.6382978723404%}.row-fluid .offset11:first-child{margin-left:93.61702127659574%;*margin-left:93.51063829787232%}.row-fluid .offset10{margin-left:87.23404255319149%;*margin-left:87.12765957446807%}.row-fluid .offset10:first-child{margin-left:85.1063829787234%;*margin-left:84.99999999999999%}.row-fluid .offset9{margin-left:78.72340425531914%;*margin-left:78.61702127659572%}.row-fluid .offset9:first-child{margin-left:76.59574468085106%;*margin-left:76.48936170212764%}.row-fluid .offset8{margin-left:70.2127659574468%;*margin-left:70.10638297872339%}.row-fluid .offset8:first-child{margin-left:68.08510638297872%;*margin-left:67.9787234042553%}.row-fluid .offset7{margin-left:61.70212765957446%;*margin-left:61.59574468085106%}.row-fluid .offset7:first-child{margin-left:59.574468085106375%;*margin-left:59.46808510638297%}.row-fluid .offset6{margin-left:53.191489361702125%;*margin-left:53.085106382978715%}.row-fluid .offset6:first-child{margin-left:51.063829787234035%;*margin-left:50.95744680851063%}.row-fluid .offset5{margin-left:44.68085106382979%;*margin-left:44.57446808510638%}.row-fluid .offset5:first-child{margin-left:42.5531914893617%;*margin-left:42.4468085106383%}.row-fluid .offset4{margin-left:36.170212765957444%;*margin-left:36.06382978723405%}.row-fluid .offset4:first-child{margin-left:34.04255319148936%;*margin-left:33.93617021276596%}.row-fluid .offset3{margin-left:27.659574468085104%;*margin-left:27.5531914893617%}.row-fluid .offset3:first-child{margin-left:25.53191489361702%;*margin-left:25.425531914893618%}.row-fluid .offset2{margin-left:19.148936170212764%;*margin-left:19.04255319148936%}.row-fluid .offset2:first-child{margin-left:17.02127659574468%;*margin-left:16.914893617021278%}.row-fluid .offset1{margin-left:10.638297872340425%;*margin-left:10.53191489361702%}.row-fluid .offset1:first-child{margin-left:8.51063829787234%;*margin-left:8.404255319148938%}[class*="span"].hide,.row-fluid [class*="span"].hide{display:none}[class*="span"].pull-right,.row-fluid [class*="span"].pull-right{float:right}.container{margin-right:auto;margin-left:auto;*zoom:1}.container:before,.container:after{display:table;line-height:0;content:""}.container:after{clear:both}.container-fluid{padding-right:20px;padding-left:20px;*zoom:1}.container-fluid:before,.container-fluid:after{display:table;line-height:0;content:""}.container-fluid:after{clear:both}p{margin:0 0 10px}.lead{margin-bottom:20px;font-size:20px;font-weight:200;line-height:30px}small{font-size:85%}strong{font-weight:bold}em{font-style:italic}cite{font-style:normal}.muted{color:#999}h1,h2,h3,h4,h5,h6{margin:10px 0;font-family:inherit;font-weight:bold;line-height:1;color:inherit;text-rendering:optimizelegibility}h1 small,h2 small,h3 small,h4 small,h5 small,h6 small{font-weight:normal;line-height:1;color:#999}h1{font-size:36px;line-height:40px}h2{font-size:30px;line-height:40px}h3{font-size:24px;line-height:40px}h4{font-size:18px;line-height:20px}h5{font-size:14px;line-height:20px}h6{font-size:12px;line-height:20px}h1 small{font-size:24px}h2 small{font-size:18px}h3 small{font-size:14px}h4 small{font-size:14px}.page-header{padding-bottom:9px;margin:20px 0 30px;border-bottom:1px solid #eee}ul,ol{padding:0;margin:0 0 10px 25px}ul ul,ul ol,ol ol,ol ul{margin-bottom:0}li{line-height:20px}ul.unstyled,ol.unstyled{margin-left:0;list-style:none}dl{margin-bottom:20px}dt,dd{line-height:20px}dt{font-weight:bold}dd{margin-left:10px}.dl-horizontal dt{float:left;width:120px;overflow:hidden;clear:left;text-align:right;text-overflow:ellipsis;white-space:nowrap}.dl-horizontal dd{margin-left:130px}hr{margin:20px 0;border:0;border-top:1px solid #eee;border-bottom:1px solid #fff}abbr[title]{cursor:help;border-bottom:1px dotted #999}abbr.initialism{font-size:90%;text-transform:uppercase}blockquote{padding:0 0 0 15px;margin:0 0 20px;border-left:5px solid #eee}blockquote p{margin-bottom:0;font-size:16px;font-weight:300;line-height:25px}blockquote small{display:block;line-height:20px;color:#999}blockquote small:before{content:'\2014 \00A0'}blockquote.pull-right{float:right;padding-right:15px;padding-left:0;border-right:5px solid #eee;border-left:0}blockquote.pull-right p,blockquote.pull-right small{text-align:right}blockquote.pull-right small:before{content:''}blockquote.pull-right small:after{content:'\00A0 \2014'}q:before,q:after,blockquote:before,blockquote:after{content:""}address{display:block;margin-bottom:20px;font-style:normal;line-height:20px}code,pre{padding:0 3px 2px;font-family:Monaco,Menlo,Consolas,"Courier New",monospace;font-size:12px;color:#333;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px}code{padding:2px 4px;color:#d14;background-color:#f7f7f9;border:1px solid #e1e1e8}pre{display:block;padding:9.5px;margin:0 0 10px;font-size:13px;line-height:20px;word-break:break-all;word-wrap:break-word;white-space:pre;white-space:pre-wrap;background-color:#f5f5f5;border:1px solid #ccc;border:1px solid rgba(0,0,0,0.15);-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}pre.prettyprint{margin-bottom:20px}pre code{padding:0;color:inherit;background-color:transparent;border:0}.pre-scrollable{max-height:340px;overflow-y:scroll}form{margin:0 0 20px}fieldset{padding:0;margin:0;border:0}legend{display:block;width:100%;padding:0;margin-bottom:20px;font-size:21px;line-height:40px;color:#333;border:0;border-bottom:1px solid #e5e5e5}legend small{font-size:15px;color:#999}label,input,button,select,textarea{font-size:14px;font-weight:normal;line-height:20px}input,button,select,textarea{font-family:"Helvetica Neue",Helvetica,Arial,sans-serif}label{display:block;margin-bottom:5px}select,textarea,input[type="text"],input[type="password"],input[type="datetime"],input[type="datetime-local"],input[type="date"],input[type="month"],input[type="time"],input[type="week"],input[type="number"],input[type="email"],input[type="url"],input[type="search"],input[type="tel"],input[type="color"],.uneditable-input{display:inline-block;height:20px;padding:4px 6px;margin-bottom:9px;font-size:14px;line-height:20px;color:#555;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px}input,textarea{width:210px}textarea{height:auto}textarea,input[type="text"],input[type="password"],input[type="datetime"],input[type="datetime-local"],input[type="date"],input[type="month"],input[type="time"],input[type="week"],input[type="number"],input[type="email"],input[type="url"],input[type="search"],input[type="tel"],input[type="color"],.uneditable-input{background-color:#fff;border:1px solid #ccc;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-webkit-transition:border linear .2s,box-shadow linear .2s;-moz-transition:border linear .2s,box-shadow linear .2s;-o-transition:border linear .2s,box-shadow linear .2s;transition:border linear .2s,box-shadow linear .2s}textarea:focus,input[type="text"]:focus,input[type="password"]:focus,input[type="datetime"]:focus,input[type="datetime-local"]:focus,input[type="date"]:focus,input[type="month"]:focus,input[type="time"]:focus,input[type="week"]:focus,input[type="number"]:focus,input[type="email"]:focus,input[type="url"]:focus,input[type="search"]:focus,input[type="tel"]:focus,input[type="color"]:focus,.uneditable-input:focus{border-color:rgba(82,168,236,0.8);outline:0;outline:thin dotted \9;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 8px rgba(82,168,236,0.6);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 8px rgba(82,168,236,0.6);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 8px rgba(82,168,236,0.6)}input[type="radio"],input[type="checkbox"]{margin:4px 0 0;margin-top:1px \9;*margin-top:0;line-height:normal;cursor:pointer}input[type="file"],input[type="image"],input[type="submit"],input[type="reset"],input[type="button"],input[type="radio"],input[type="checkbox"]{width:auto}select,input[type="file"]{height:30px;*margin-top:4px;line-height:30px}select{width:220px;background-color:#fff;border:1px solid #bbb}select[multiple],select[size]{height:auto}select:focus,input[type="file"]:focus,input[type="radio"]:focus,input[type="checkbox"]:focus{outline:thin dotted #333;outline:5px auto -webkit-focus-ring-color;outline-offset:-2px}.uneditable-input,.uneditable-textarea{color:#999;cursor:not-allowed;background-color:#fcfcfc;border-color:#ccc;-webkit-box-shadow:inset 0 1px 2px rgba(0,0,0,0.025);-moz-box-shadow:inset 0 1px 2px rgba(0,0,0,0.025);box-shadow:inset 0 1px 2px rgba(0,0,0,0.025)}.uneditable-input{overflow:hidden;white-space:nowrap}.uneditable-textarea{width:auto;height:auto}input:-moz-placeholder,textarea:-moz-placeholder{color:#999}input:-ms-input-placeholder,textarea:-ms-input-placeholder{color:#999}input::-webkit-input-placeholder,textarea::-webkit-input-placeholder{color:#999}.radio,.checkbox{min-height:18px;padding-left:18px}.radio input[type="radio"],.checkbox input[type="checkbox"]{float:left;margin-left:-18px}.controls>.radio:first-child,.controls>.checkbox:first-child{padding-top:5px}.radio.inline,.checkbox.inline{display:inline-block;padding-top:5px;margin-bottom:0;vertical-align:middle}.radio.inline+.radio.inline,.checkbox.inline+.checkbox.inline{margin-left:10px}.input-mini{width:60px}.input-small{width:90px}.input-medium{width:150px}.input-large{width:210px}.input-xlarge{width:270px}.input-xxlarge{width:530px}input[class*="span"],select[class*="span"],textarea[class*="span"],.uneditable-input[class*="span"],.row-fluid input[class*="span"],.row-fluid select[class*="span"],.row-fluid textarea[class*="span"],.row-fluid .uneditable-input[class*="span"]{float:none;margin-left:0}.input-append input[class*="span"],.input-append .uneditable-input[class*="span"],.input-prepend input[class*="span"],.input-prepend .uneditable-input[class*="span"],.row-fluid input[class*="span"],.row-fluid select[class*="span"],.row-fluid textarea[class*="span"],.row-fluid .uneditable-input[class*="span"],.row-fluid .input-prepend [class*="span"],.row-fluid .input-append [class*="span"]{display:inline-block}input,textarea,.uneditable-input{margin-left:0}.controls-row [class*="span"]+[class*="span"]{margin-left:20px}input.span12,textarea.span12,.uneditable-input.span12{width:926px}input.span11,textarea.span11,.uneditable-input.span11{width:846px}input.span10,textarea.span10,.uneditable-input.span10{width:766px}input.span9,textarea.span9,.uneditable-input.span9{width:686px}input.span8,textarea.span8,.uneditable-input.span8{width:606px}input.span7,textarea.span7,.uneditable-input.span7{width:526px}input.span6,textarea.span6,.uneditable-input.span6{width:446px}input.span5,textarea.span5,.uneditable-input.span5{width:366px}input.span4,textarea.span4,.uneditable-input.span4{width:286px}input.span3,textarea.span3,.uneditable-input.span3{width:206px}input.span2,textarea.span2,.uneditable-input.span2{width:126px}input.span1,textarea.span1,.uneditable-input.span1{width:46px}.controls-row{*zoom:1}.controls-row:before,.controls-row:after{display:table;line-height:0;content:""}.controls-row:after{clear:both}.controls-row [class*="span"]{float:left}input[disabled],select[disabled],textarea[disabled],input[readonly],select[readonly],textarea[readonly]{cursor:not-allowed;background-color:#eee}input[type="radio"][disabled],input[type="checkbox"][disabled],input[type="radio"][readonly],input[type="checkbox"][readonly]{background-color:transparent}.control-group.warning>label,.control-group.warning .help-block,.control-group.warning .help-inline{color:#c09853}.control-group.warning .checkbox,.control-group.warning .radio,.control-group.warning input,.control-group.warning select,.control-group.warning textarea{color:#c09853;border-color:#c09853;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075)}.control-group.warning .checkbox:focus,.control-group.warning .radio:focus,.control-group.warning input:focus,.control-group.warning select:focus,.control-group.warning textarea:focus{border-color:#a47e3c;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #dbc59e;-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #dbc59e;box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #dbc59e}.control-group.warning .input-prepend .add-on,.control-group.warning .input-append .add-on{color:#c09853;background-color:#fcf8e3;border-color:#c09853}.control-group.error>label,.control-group.error .help-block,.control-group.error .help-inline{color:#b94a48}.control-group.error .checkbox,.control-group.error .radio,.control-group.error input,.control-group.error select,.control-group.error textarea{color:#b94a48;border-color:#b94a48;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075)}.control-group.error .checkbox:focus,.control-group.error .radio:focus,.control-group.error input:focus,.control-group.error select:focus,.control-group.error textarea:focus{border-color:#953b39;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #d59392;-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #d59392;box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #d59392}.control-group.error .input-prepend .add-on,.control-group.error .input-append .add-on{color:#b94a48;background-color:#f2dede;border-color:#b94a48}.control-group.success>label,.control-group.success .help-block,.control-group.success .help-inline{color:#468847}.control-group.success .checkbox,.control-group.success .radio,.control-group.success input,.control-group.success select,.control-group.success textarea{color:#468847;border-color:#468847;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075);box-shadow:inset 0 1px 1px rgba(0,0,0,0.075)}.control-group.success .checkbox:focus,.control-group.success .radio:focus,.control-group.success input:focus,.control-group.success select:focus,.control-group.success textarea:focus{border-color:#356635;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #7aba7b;-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #7aba7b;box-shadow:inset 0 1px 1px rgba(0,0,0,0.075),0 0 6px #7aba7b}.control-group.success .input-prepend .add-on,.control-group.success .input-append .add-on{color:#468847;background-color:#dff0d8;border-color:#468847}input:focus:required:invalid,textarea:focus:required:invalid,select:focus:required:invalid{color:#b94a48;border-color:#ee5f5b}input:focus:required:invalid:focus,textarea:focus:required:invalid:focus,select:focus:required:invalid:focus{border-color:#e9322d;-webkit-box-shadow:0 0 6px #f8b9b7;-moz-box-shadow:0 0 6px #f8b9b7;box-shadow:0 0 6px #f8b9b7}.form-actions{padding:19px 20px 20px;margin-top:20px;margin-bottom:20px;background-color:#f5f5f5;border-top:1px solid #e5e5e5;*zoom:1}.form-actions:before,.form-actions:after{display:table;line-height:0;content:""}.form-actions:after{clear:both}.help-block,.help-inline{color:#595959}.help-block{display:block;margin-bottom:10px}.help-inline{display:inline-block;*display:inline;padding-left:5px;vertical-align:middle;*zoom:1}.input-append,.input-prepend{margin-bottom:5px;font-size:0;white-space:nowrap}.input-append input,.input-prepend input,.input-append select,.input-prepend select,.input-append .uneditable-input,.input-prepend .uneditable-input{position:relative;margin-bottom:0;*margin-left:0;font-size:14px;vertical-align:top;-webkit-border-radius:0 3px 3px 0;-moz-border-radius:0 3px 3px 0;border-radius:0 3px 3px 0}.input-append input:focus,.input-prepend input:focus,.input-append select:focus,.input-prepend select:focus,.input-append .uneditable-input:focus,.input-prepend .uneditable-input:focus{z-index:2}.input-append .add-on,.input-prepend .add-on{display:inline-block;width:auto;height:20px;min-width:16px;padding:4px 5px;font-size:14px;font-weight:normal;line-height:20px;text-align:center;text-shadow:0 1px 0 #fff;background-color:#eee;border:1px solid #ccc}.input-append .add-on,.input-prepend .add-on,.input-append .btn,.input-prepend .btn{margin-left:-1px;vertical-align:top;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.input-append .active,.input-prepend .active{background-color:#a9dba9;border-color:#46a546}.input-prepend .add-on,.input-prepend .btn{margin-right:-1px}.input-prepend .add-on:first-child,.input-prepend .btn:first-child{-webkit-border-radius:3px 0 0 3px;-moz-border-radius:3px 0 0 3px;border-radius:3px 0 0 3px}.input-append input,.input-append select,.input-append .uneditable-input{-webkit-border-radius:3px 0 0 3px;-moz-border-radius:3px 0 0 3px;border-radius:3px 0 0 3px}.input-append .add-on:last-child,.input-append .btn:last-child{-webkit-border-radius:0 3px 3px 0;-moz-border-radius:0 3px 3px 0;border-radius:0 3px 3px 0}.input-prepend.input-append input,.input-prepend.input-append select,.input-prepend.input-append .uneditable-input{-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.input-prepend.input-append .add-on:first-child,.input-prepend.input-append .btn:first-child{margin-right:-1px;-webkit-border-radius:3px 0 0 3px;-moz-border-radius:3px 0 0 3px;border-radius:3px 0 0 3px}.input-prepend.input-append .add-on:last-child,.input-prepend.input-append .btn:last-child{margin-left:-1px;-webkit-border-radius:0 3px 3px 0;-moz-border-radius:0 3px 3px 0;border-radius:0 3px 3px 0}input.search-query{padding-right:14px;padding-right:4px \9;padding-left:14px;padding-left:4px \9;margin-bottom:0;-webkit-border-radius:15px;-moz-border-radius:15px;border-radius:15px}.form-search .input-append .search-query,.form-search .input-prepend .search-query{-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.form-search .input-append .search-query{-webkit-border-radius:14px 0 0 14px;-moz-border-radius:14px 0 0 14px;border-radius:14px 0 0 14px}.form-search .input-append .btn{-webkit-border-radius:0 14px 14px 0;-moz-border-radius:0 14px 14px 0;border-radius:0 14px 14px 0}.form-search .input-prepend .search-query{-webkit-border-radius:0 14px 14px 0;-moz-border-radius:0 14px 14px 0;border-radius:0 14px 14px 0}.form-search .input-prepend .btn{-webkit-border-radius:14px 0 0 14px;-moz-border-radius:14px 0 0 14px;border-radius:14px 0 0 14px}.form-search input,.form-inline input,.form-horizontal input,.form-search textarea,.form-inline textarea,.form-horizontal textarea,.form-search select,.form-inline select,.form-horizontal select,.form-search .help-inline,.form-inline .help-inline,.form-horizontal .help-inline,.form-search .uneditable-input,.form-inline .uneditable-input,.form-horizontal .uneditable-input,.form-search .input-prepend,.form-inline .input-prepend,.form-horizontal .input-prepend,.form-search .input-append,.form-inline .input-append,.form-horizontal .input-append{display:inline-block;*display:inline;margin-bottom:0;vertical-align:middle;*zoom:1}.form-search .hide,.form-inline .hide,.form-horizontal .hide{display:none}.form-search label,.form-inline label,.form-search .btn-group,.form-inline .btn-group{display:inline-block}.form-search .input-append,.form-inline .input-append,.form-search .input-prepend,.form-inline .input-prepend{margin-bottom:0}.form-search .radio,.form-search .checkbox,.form-inline .radio,.form-inline .checkbox{padding-left:0;margin-bottom:0;vertical-align:middle}.form-search .radio input[type="radio"],.form-search .checkbox input[type="checkbox"],.form-inline .radio input[type="radio"],.form-inline .checkbox input[type="checkbox"]{float:left;margin-right:3px;margin-left:0}.control-group{margin-bottom:10px}legend+.control-group{margin-top:20px;-webkit-margin-top-collapse:separate}.form-horizontal .control-group{margin-bottom:20px;*zoom:1}.form-horizontal .control-group:before,.form-horizontal .control-group:after{display:table;line-height:0;content:""}.form-horizontal .control-group:after{clear:both}.form-horizontal .control-label{float:left;width:140px;padding-top:5px;text-align:right}.form-horizontal .controls{*display:inline-block;*padding-left:20px;margin-left:160px;*margin-left:0}.form-horizontal .controls:first-child{*padding-left:160px}.form-horizontal .help-block{margin-top:10px;margin-bottom:0}.form-horizontal .form-actions{padding-left:160px}table{max-width:100%;background-color:transparent;border-collapse:collapse;border-spacing:0}.table{width:100%;margin-bottom:20px}.table th,.table td{padding:8px;line-height:20px;text-align:left;vertical-align:top;border-top:1px solid #ddd}.table th{font-weight:bold}.table thead th{vertical-align:bottom}.table caption+thead tr:first-child th,.table caption+thead tr:first-child td,.table colgroup+thead tr:first-child th,.table colgroup+thead tr:first-child td,.table thead:first-child tr:first-child th,.table thead:first-child tr:first-child td{border-top:0}.table tbody+tbody{border-top:2px solid #ddd}.table-condensed th,.table-condensed td{padding:4px 5px}.table-bordered{border:1px solid #ddd;border-collapse:separate;*border-collapse:collapse;border-left:0;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.table-bordered th,.table-bordered td{border-left:1px solid #ddd}.table-bordered caption+thead tr:first-child th,.table-bordered caption+tbody tr:first-child th,.table-bordered caption+tbody tr:first-child td,.table-bordered colgroup+thead tr:first-child th,.table-bordered colgroup+tbody tr:first-child th,.table-bordered colgroup+tbody tr:first-child td,.table-bordered thead:first-child tr:first-child th,.table-bordered tbody:first-child tr:first-child th,.table-bordered tbody:first-child tr:first-child td{border-top:0}.table-bordered thead:first-child tr:first-child th:first-child,.table-bordered tbody:first-child tr:first-child td:first-child{-webkit-border-top-left-radius:4px;border-top-left-radius:4px;-moz-border-radius-topleft:4px}.table-bordered thead:first-child tr:first-child th:last-child,.table-bordered tbody:first-child tr:first-child td:last-child{-webkit-border-top-right-radius:4px;border-top-right-radius:4px;-moz-border-radius-topright:4px}.table-bordered thead:last-child tr:last-child th:first-child,.table-bordered tbody:last-child tr:last-child td:first-child,.table-bordered tfoot:last-child tr:last-child td:first-child{-webkit-border-radius:0 0 0 4px;-moz-border-radius:0 0 0 4px;border-radius:0 0 0 4px;-webkit-border-bottom-left-radius:4px;border-bottom-left-radius:4px;-moz-border-radius-bottomleft:4px}.table-bordered thead:last-child tr:last-child th:last-child,.table-bordered tbody:last-child tr:last-child td:last-child,.table-bordered tfoot:last-child tr:last-child td:last-child{-webkit-border-bottom-right-radius:4px;border-bottom-right-radius:4px;-moz-border-radius-bottomright:4px}.table-bordered caption+thead tr:first-child th:first-child,.table-bordered caption+tbody tr:first-child td:first-child,.table-bordered colgroup+thead tr:first-child th:first-child,.table-bordered colgroup+tbody tr:first-child td:first-child{-webkit-border-top-left-radius:4px;border-top-left-radius:4px;-moz-border-radius-topleft:4px}.table-bordered caption+thead tr:first-child th:last-child,.table-bordered caption+tbody tr:first-child td:last-child,.table-bordered colgroup+thead tr:first-child th:last-child,.table-bordered colgroup+tbody tr:first-child td:last-child{-webkit-border-top-right-radius:4px;border-top-right-radius:4px;-moz-border-right-topleft:4px}.table-striped tbody tr:nth-child(odd) td,.table-striped tbody tr:nth-child(odd) th{background-color:#f9f9f9}.table-hover tbody tr:hover td,.table-hover tbody tr:hover th{background-color:#f5f5f5}table [class*=span],.row-fluid table [class*=span]{display:table-cell;float:none;margin-left:0}table .span1{float:none;width:44px;margin-left:0}table .span2{float:none;width:124px;margin-left:0}table .span3{float:none;width:204px;margin-left:0}table .span4{float:none;width:284px;margin-left:0}table .span5{float:none;width:364px;margin-left:0}table .span6{float:none;width:444px;margin-left:0}table .span7{float:none;width:524px;margin-left:0}table .span8{float:none;width:604px;margin-left:0}table .span9{float:none;width:684px;margin-left:0}table .span10{float:none;width:764px;margin-left:0}table .span11{float:none;width:844px;margin-left:0}table .span12{float:none;width:924px;margin-left:0}table .span13{float:none;width:1004px;margin-left:0}table .span14{float:none;width:1084px;margin-left:0}table .span15{float:none;width:1164px;margin-left:0}table .span16{float:none;width:1244px;margin-left:0}table .span17{float:none;width:1324px;margin-left:0}table .span18{float:none;width:1404px;margin-left:0}table .span19{float:none;width:1484px;margin-left:0}table .span20{float:none;width:1564px;margin-left:0}table .span21{float:none;width:1644px;margin-left:0}table .span22{float:none;width:1724px;margin-left:0}table .span23{float:none;width:1804px;margin-left:0}table .span24{float:none;width:1884px;margin-left:0}.table tbody tr.success td{background-color:#dff0d8}.table tbody tr.error td{background-color:#f2dede}.table tbody tr.info td{background-color:#d9edf7}[class^="icon-"],[class*=" icon-"]{display:inline-block;width:14px;height:14px;margin-top:1px;*margin-right:.3em;line-height:14px;vertical-align:text-top;background-image:url("../img/glyphicons-halflings.png");background-position:14px 14px;background-repeat:no-repeat}.icon-white,.nav>.active>a>[class^="icon-"],.nav>.active>a>[class*=" icon-"],.dropdown-menu>li>a:hover>[class^="icon-"],.dropdown-menu>li>a:hover>[class*=" icon-"],.dropdown-menu>.active>a>[class^="icon-"],.dropdown-menu>.active>a>[class*=" icon-"]{background-image:url("../img/glyphicons-halflings-white.png")}.icon-glass{background-position:0 0}.icon-music{background-position:-24px 0}.icon-search{background-position:-48px 0}.icon-envelope{background-position:-72px 0}.icon-heart{background-position:-96px 0}.icon-star{background-position:-120px 0}.icon-star-empty{background-position:-144px 0}.icon-user{background-position:-168px 0}.icon-film{background-position:-192px 0}.icon-th-large{background-position:-216px 0}.icon-th{background-position:-240px 0}.icon-th-list{background-position:-264px 0}.icon-ok{background-position:-288px 0}.icon-remove{background-position:-312px 0}.icon-zoom-in{background-position:-336px 0}.icon-zoom-out{background-position:-360px 0}.icon-off{background-position:-384px 0}.icon-signal{background-position:-408px 0}.icon-cog{background-position:-432px 0}.icon-trash{background-position:-456px 0}.icon-home{background-position:0 -24px}.icon-file{background-position:-24px -24px}.icon-time{background-position:-48px -24px}.icon-road{background-position:-72px -24px}.icon-download-alt{background-position:-96px -24px}.icon-download{background-position:-120px -24px}.icon-upload{background-position:-144px -24px}.icon-inbox{background-position:-168px -24px}.icon-play-circle{background-position:-192px -24px}.icon-repeat{background-position:-216px -24px}.icon-refresh{background-position:-240px -24px}.icon-list-alt{background-position:-264px -24px}.icon-lock{background-position:-287px -24px}.icon-flag{background-position:-312px -24px}.icon-headphones{background-position:-336px -24px}.icon-volume-off{background-position:-360px -24px}.icon-volume-down{background-position:-384px -24px}.icon-volume-up{background-position:-408px -24px}.icon-qrcode{background-position:-432px -24px}.icon-barcode{background-position:-456px -24px}.icon-tag{background-position:0 -48px}.icon-tags{background-position:-25px -48px}.icon-book{background-position:-48px -48px}.icon-bookmark{background-position:-72px -48px}.icon-print{background-position:-96px -48px}.icon-camera{background-position:-120px -48px}.icon-font{background-position:-144px -48px}.icon-bold{background-position:-167px -48px}.icon-italic{background-position:-192px -48px}.icon-text-height{background-position:-216px -48px}.icon-text-width{background-position:-240px -48px}.icon-align-left{background-position:-264px -48px}.icon-align-center{background-position:-288px -48px}.icon-align-right{background-position:-312px -48px}.icon-align-justify{background-position:-336px -48px}.icon-list{background-position:-360px -48px}.icon-indent-left{background-position:-384px -48px}.icon-indent-right{background-position:-408px -48px}.icon-facetime-video{background-position:-432px -48px}.icon-picture{background-position:-456px -48px}.icon-pencil{background-position:0 -72px}.icon-map-marker{background-position:-24px -72px}.icon-adjust{background-position:-48px -72px}.icon-tint{background-position:-72px -72px}.icon-edit{background-position:-96px -72px}.icon-share{background-position:-120px -72px}.icon-check{background-position:-144px -72px}.icon-move{background-position:-168px -72px}.icon-step-backward{background-position:-192px -72px}.icon-fast-backward{background-position:-216px -72px}.icon-backward{background-position:-240px -72px}.icon-play{background-position:-264px -72px}.icon-pause{background-position:-288px -72px}.icon-stop{background-position:-312px -72px}.icon-forward{background-position:-336px -72px}.icon-fast-forward{background-position:-360px -72px}.icon-step-forward{background-position:-384px -72px}.icon-eject{background-position:-408px -72px}.icon-chevron-left{background-position:-432px -72px}.icon-chevron-right{background-position:-456px -72px}.icon-plus-sign{background-position:0 -96px}.icon-minus-sign{background-position:-24px -96px}.icon-remove-sign{background-position:-48px -96px}.icon-ok-sign{background-position:-72px -96px}.icon-question-sign{background-position:-96px -96px}.icon-info-sign{background-position:-120px -96px}.icon-screenshot{background-position:-144px -96px}.icon-remove-circle{background-position:-168px -96px}.icon-ok-circle{background-position:-192px -96px}.icon-ban-circle{background-position:-216px -96px}.icon-arrow-left{background-position:-240px -96px}.icon-arrow-right{background-position:-264px -96px}.icon-arrow-up{background-position:-289px -96px}.icon-arrow-down{background-position:-312px -96px}.icon-share-alt{background-position:-336px -96px}.icon-resize-full{background-position:-360px -96px}.icon-resize-small{background-position:-384px -96px}.icon-plus{background-position:-408px -96px}.icon-minus{background-position:-433px -96px}.icon-asterisk{background-position:-456px -96px}.icon-exclamation-sign{background-position:0 -120px}.icon-gift{background-position:-24px -120px}.icon-leaf{background-position:-48px -120px}.icon-fire{background-position:-72px -120px}.icon-eye-open{background-position:-96px -120px}.icon-eye-close{background-position:-120px -120px}.icon-warning-sign{background-position:-144px -120px}.icon-plane{background-position:-168px -120px}.icon-calendar{background-position:-192px -120px}.icon-random{width:16px;background-position:-216px -120px}.icon-comment{background-position:-240px -120px}.icon-magnet{background-position:-264px -120px}.icon-chevron-up{background-position:-288px -120px}.icon-chevron-down{background-position:-313px -119px}.icon-retweet{background-position:-336px -120px}.icon-shopping-cart{background-position:-360px -120px}.icon-folder-close{background-position:-384px -120px}.icon-folder-open{width:16px;background-position:-408px -120px}.icon-resize-vertical{background-position:-432px -119px}.icon-resize-horizontal{background-position:-456px -118px}.icon-hdd{background-position:0 -144px}.icon-bullhorn{background-position:-24px -144px}.icon-bell{background-position:-48px -144px}.icon-certificate{background-position:-72px -144px}.icon-thumbs-up{background-position:-96px -144px}.icon-thumbs-down{background-position:-120px -144px}.icon-hand-right{background-position:-144px -144px}.icon-hand-left{background-position:-168px -144px}.icon-hand-up{background-position:-192px -144px}.icon-hand-down{background-position:-216px -144px}.icon-circle-arrow-right{background-position:-240px -144px}.icon-circle-arrow-left{background-position:-264px -144px}.icon-circle-arrow-up{background-position:-288px -144px}.icon-circle-arrow-down{background-position:-312px -144px}.icon-globe{background-position:-336px -144px}.icon-wrench{background-position:-360px -144px}.icon-tasks{background-position:-384px -144px}.icon-filter{background-position:-408px -144px}.icon-briefcase{background-position:-432px -144px}.icon-fullscreen{background-position:-456px -144px}.dropup,.dropdown{position:relative}.dropdown-toggle{*margin-bottom:-3px}.dropdown-toggle:active,.open .dropdown-toggle{outline:0}.caret{display:inline-block;width:0;height:0;vertical-align:top;border-top:4px solid #000;border-right:4px solid transparent;border-left:4px solid transparent;content:""}.dropdown .caret{margin-top:8px;margin-left:2px}.dropdown-menu{position:absolute;top:100%;left:0;z-index:1000;display:none;float:left;min-width:160px;padding:5px 0;margin:2px 0 0;list-style:none;background-color:#fff;border:1px solid #ccc;border:1px solid rgba(0,0,0,0.2);*border-right-width:2px;*border-bottom-width:2px;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;-webkit-box-shadow:0 5px 10px rgba(0,0,0,0.2);-moz-box-shadow:0 5px 10px rgba(0,0,0,0.2);box-shadow:0 5px 10px rgba(0,0,0,0.2);-webkit-background-clip:padding-box;-moz-background-clip:padding;background-clip:padding-box}.dropdown-menu.pull-right{right:0;left:auto}.dropdown-menu .divider{*width:100%;height:1px;margin:9px 1px;*margin:-5px 0 5px;overflow:hidden;background-color:#e5e5e5;border-bottom:1px solid #fff}.dropdown-menu a{display:block;padding:3px 20px;clear:both;font-weight:normal;line-height:20px;color:#333;white-space:nowrap}.dropdown-menu li>a:hover,.dropdown-menu li>a:focus,.dropdown-submenu:hover>a{color:#fff;text-decoration:none;background-color:#0088cc;background-color:#0088cc;background-image:-moz-linear-gradient(top,#0088cc,#0087b3);background-image:-webkit-gradient(linear,0 0,0 100%,from(#0088cc),to(#0087b3));background-image:-webkit-linear-gradient(top,#0088cc,#0087b3);background-image:-o-linear-gradient(top,#0088cc,#0087b3);background-image:linear-gradient(to bottom,#0088cc,#0087b3);background-repeat:repeat-x;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff0088cc',endColorstr='#ff0087b3',GradientType=0)}.dropdown-menu .active>a,.dropdown-menu .active>a:hover{color:#fff;text-decoration:none;background-color:#0088cc;background-color:#0081c2;background-image:linear-gradient(to bottom,#0088cc,#0087b3);background-image:-moz-linear-gradient(top,#0088cc,#0087b3);background-image:-webkit-gradient(linear,0 0,0 100%,from(#0088cc),to(#0087b3));background-image:-webkit-linear-gradient(top,#0088cc,#0087b3);background-image:-o-linear-gradient(top,#0088cc,#0087b3);background-repeat:repeat-x;outline:0;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff0088cc',endColorstr='#ff0087b3',GradientType=0)}.dropdown-menu .disabled>a,.dropdown-menu .disabled>a:hover{color:#999}.dropdown-menu .disabled>a:hover{text-decoration:none;cursor:default;background-color:transparent}.open{*z-index:1000}.open>.dropdown-menu{display:block}.pull-right>.dropdown-menu{right:0;left:auto}.dropup .caret,.navbar-fixed-bottom .dropdown .caret{border-top:0;border-bottom:4px solid #000;content:"\2191"}.dropup .dropdown-menu,.navbar-fixed-bottom .dropdown .dropdown-menu{top:auto;bottom:100%;margin-bottom:1px}.dropdown-submenu{position:relative}.dropdown-submenu>.dropdown-menu{top:0;left:100%;margin-top:-6px;margin-left:-1px;-webkit-border-radius:0 6px 6px 6px;-moz-border-radius:0 6px 6px 6px;border-radius:0 6px 6px 6px}.dropdown-submenu:hover .dropdown-menu{display:block}.dropdown-submenu>a:after{display:block;float:right;width:0;height:0;margin-top:5px;margin-right:-10px;border-color:transparent;border-left-color:#ccc;border-style:solid;border-width:5px 0 5px 5px;content:" "}.dropdown-submenu:hover>a:after{border-left-color:#fff}.dropdown .dropdown-menu .nav-header{padding-right:20px;padding-left:20px}.typeahead{margin-top:2px;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.well{min-height:20px;padding:19px;margin-bottom:20px;background-color:#f5f5f5;border:1px solid #e3e3e3;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,0.05);-moz-box-shadow:inset 0 1px 1px rgba(0,0,0,0.05);box-shadow:inset 0 1px 1px rgba(0,0,0,0.05)}.well blockquote{border-color:#ddd;border-color:rgba(0,0,0,0.15)}.well-large{padding:24px;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px}.well-small{padding:9px;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px}.fade{opacity:0;-webkit-transition:opacity .15s linear;-moz-transition:opacity .15s linear;-o-transition:opacity .15s linear;transition:opacity .15s linear}.fade.in{opacity:1}.collapse{position:relative;height:0;overflow:hidden;overflow:visible \9;-webkit-transition:height .35s ease;-moz-transition:height .35s ease;-o-transition:height .35s ease;transition:height .35s ease}.collapse.in{height:auto}.close{float:right;font-size:20px;font-weight:bold;line-height:20px;color:#000;text-shadow:0 1px 0 #fff;opacity:.2;filter:alpha(opacity=20)}.close:hover{color:#000;text-decoration:none;cursor:pointer;opacity:.4;filter:alpha(opacity=40)}button.close{padding:0;cursor:pointer;background:transparent;border:0;-webkit-appearance:none}.btn{display:inline-block;*display:inline;padding:4px 14px;margin-bottom:0;*margin-left:.3em;font-size:14px;line-height:20px;*line-height:20px;color:#333;text-align:center;text-shadow:0 1px 1px rgba(255,255,255,0.75);vertical-align:middle;cursor:pointer;background-color:#f5f5f5;*background-color:#e6e6e6;background-image:-webkit-gradient(linear,0 0,0 100%,from(#fff),to(#e6e6e6));background-image:-webkit-linear-gradient(top,#fff,#e6e6e6);background-image:-o-linear-gradient(top,#fff,#e6e6e6);background-image:linear-gradient(to bottom,#fff,#e6e6e6);background-image:-moz-linear-gradient(top,#fff,#e6e6e6);background-repeat:repeat-x;border:1px solid #bbb;*border:0;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);border-color:#e6e6e6 #e6e6e6 #bfbfbf;border-bottom-color:#a2a2a2;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ffffffff',endColorstr='#ffe6e6e6',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false);*zoom:1;-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05);box-shadow:inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05)}.btn:hover,.btn:active,.btn.active,.btn.disabled,.btn[disabled]{color:#333;background-color:#e6e6e6;*background-color:#d9d9d9}.btn:active,.btn.active{background-color:#ccc \9}.btn:first-child{*margin-left:0}.btn:hover{color:#333;text-decoration:none;background-color:#e6e6e6;*background-color:#d9d9d9;background-position:0 -15px;-webkit-transition:background-position .1s linear;-moz-transition:background-position .1s linear;-o-transition:background-position .1s linear;transition:background-position .1s linear}.btn:focus{outline:thin dotted #333;outline:5px auto -webkit-focus-ring-color;outline-offset:-2px}.btn.active,.btn:active{background-color:#e6e6e6;background-color:#d9d9d9 \9;background-image:none;outline:0;-webkit-box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05);box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05)}.btn.disabled,.btn[disabled]{cursor:default;background-color:#e6e6e6;background-image:none;opacity:.65;filter:alpha(opacity=65);-webkit-box-shadow:none;-moz-box-shadow:none;box-shadow:none}.btn-large{padding:9px 14px;font-size:16px;line-height:normal;-webkit-border-radius:5px;-moz-border-radius:5px;border-radius:5px}.btn-large [class^="icon-"]{margin-top:2px}.btn-small{padding:3px 9px;font-size:12px;line-height:18px}.btn-small [class^="icon-"]{margin-top:0}.btn-mini{padding:2px 6px;font-size:11px;line-height:16px}.btn-block{display:block;width:100%;padding-right:0;padding-left:0;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}.btn-block+.btn-block{margin-top:5px}.btn-primary.active,.btn-warning.active,.btn-danger.active,.btn-success.active,.btn-info.active,.btn-inverse.active{color:rgba(255,255,255,0.75)}.btn{border-color:#c5c5c5;border-color:rgba(0,0,0,0.15) rgba(0,0,0,0.15) rgba(0,0,0,0.25)}.btn-primary{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#006dcc;*background-color:#04c;background-image:-webkit-gradient(linear,0 0,0 100%,from(#08c),to(#04c));background-image:-webkit-linear-gradient(top,#08c,#04c);background-image:-o-linear-gradient(top,#08c,#04c);background-image:linear-gradient(to bottom,#08c,#04c);background-image:-moz-linear-gradient(top,#08c,#04c);background-repeat:repeat-x;border-color:#04c #04c #002a80;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff0088cc',endColorstr='#ff0044cc',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.btn-primary:hover,.btn-primary:active,.btn-primary.active,.btn-primary.disabled,.btn-primary[disabled]{color:#fff;background-color:#04c;*background-color:#003bb3}.btn-primary:active,.btn-primary.active{background-color:#039 \9}.btn-warning{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#faa732;*background-color:#f89406;background-image:-webkit-gradient(linear,0 0,0 100%,from(#fbb450),to(#f89406));background-image:-webkit-linear-gradient(top,#fbb450,#f89406);background-image:-o-linear-gradient(top,#fbb450,#f89406);background-image:linear-gradient(to bottom,#fbb450,#f89406);background-image:-moz-linear-gradient(top,#fbb450,#f89406);background-repeat:repeat-x;border-color:#f89406 #f89406 #ad6704;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#fffbb450',endColorstr='#fff89406',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.btn-warning:hover,.btn-warning:active,.btn-warning.active,.btn-warning.disabled,.btn-warning[disabled]{color:#fff;background-color:#f89406;*background-color:#df8505}.btn-warning:active,.btn-warning.active{background-color:#c67605 \9}.btn-danger{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#da4f49;*background-color:#bd362f;background-image:-webkit-gradient(linear,0 0,0 100%,from(#ee5f5b),to(#bd362f));background-image:-webkit-linear-gradient(top,#ee5f5b,#bd362f);background-image:-o-linear-gradient(top,#ee5f5b,#bd362f);background-image:linear-gradient(to bottom,#ee5f5b,#bd362f);background-image:-moz-linear-gradient(top,#ee5f5b,#bd362f);background-repeat:repeat-x;border-color:#bd362f #bd362f #802420;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ffee5f5b',endColorstr='#ffbd362f',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.btn-danger:hover,.btn-danger:active,.btn-danger.active,.btn-danger.disabled,.btn-danger[disabled]{color:#fff;background-color:#bd362f;*background-color:#a9302a}.btn-danger:active,.btn-danger.active{background-color:#942a25 \9}.btn-success{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#5bb75b;*background-color:#51a351;background-image:-webkit-gradient(linear,0 0,0 100%,from(#62c462),to(#51a351));background-image:-webkit-linear-gradient(top,#62c462,#51a351);background-image:-o-linear-gradient(top,#62c462,#51a351);background-image:linear-gradient(to bottom,#62c462,#51a351);background-image:-moz-linear-gradient(top,#62c462,#51a351);background-repeat:repeat-x;border-color:#51a351 #51a351 #387038;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff62c462',endColorstr='#ff51a351',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.btn-success:hover,.btn-success:active,.btn-success.active,.btn-success.disabled,.btn-success[disabled]{color:#fff;background-color:#51a351;*background-color:#499249}.btn-success:active,.btn-success.active{background-color:#408140 \9}.btn-info{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#49afcd;*background-color:#2f96b4;background-image:-webkit-gradient(linear,0 0,0 100%,from(#5bc0de),to(#2f96b4));background-image:-webkit-linear-gradient(top,#5bc0de,#2f96b4);background-image:-o-linear-gradient(top,#5bc0de,#2f96b4);background-image:linear-gradient(to bottom,#5bc0de,#2f96b4);background-image:-moz-linear-gradient(top,#5bc0de,#2f96b4);background-repeat:repeat-x;border-color:#2f96b4 #2f96b4 #1f6377;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff5bc0de',endColorstr='#ff2f96b4',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.btn-info:hover,.btn-info:active,.btn-info.active,.btn-info.disabled,.btn-info[disabled]{color:#fff;background-color:#2f96b4;*background-color:#2a85a0}.btn-info:active,.btn-info.active{background-color:#24748c \9}.btn-inverse{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#363636;*background-color:#222;background-image:-webkit-gradient(linear,0 0,0 100%,from(#444),to(#222));background-image:-webkit-linear-gradient(top,#444,#222);background-image:-o-linear-gradient(top,#444,#222);background-image:linear-gradient(to bottom,#444,#222);background-image:-moz-linear-gradient(top,#444,#222);background-repeat:repeat-x;border-color:#222 #222 #000;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff444444',endColorstr='#ff222222',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.btn-inverse:hover,.btn-inverse:active,.btn-inverse.active,.btn-inverse.disabled,.btn-inverse[disabled]{color:#fff;background-color:#222;*background-color:#151515}.btn-inverse:active,.btn-inverse.active{background-color:#080808 \9}button.btn,input[type="submit"].btn{*padding-top:3px;*padding-bottom:3px}button.btn::-moz-focus-inner,input[type="submit"].btn::-moz-focus-inner{padding:0;border:0}button.btn.btn-large,input[type="submit"].btn.btn-large{*padding-top:7px;*padding-bottom:7px}button.btn.btn-small,input[type="submit"].btn.btn-small{*padding-top:3px;*padding-bottom:3px}button.btn.btn-mini,input[type="submit"].btn.btn-mini{*padding-top:1px;*padding-bottom:1px}.btn-link,.btn-link:active{background-color:transparent;background-image:none;-webkit-box-shadow:none;-moz-box-shadow:none;box-shadow:none}.btn-link{color:#08c;cursor:pointer;border-color:transparent;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.btn-link:hover{color:#005580;text-decoration:underline;background-color:transparent}.btn-group{position:relative;*margin-left:.3em;font-size:0;white-space:nowrap}.btn-group:first-child{*margin-left:0}.btn-group+.btn-group{margin-left:5px}.btn-toolbar{margin-top:10px;margin-bottom:10px;font-size:0}.btn-toolbar .btn-group{display:inline-block;*display:inline;*zoom:1}.btn-toolbar .btn+.btn,.btn-toolbar .btn-group+.btn,.btn-toolbar .btn+.btn-group{margin-left:5px}.btn-group>.btn{position:relative;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.btn-group>.btn+.btn{margin-left:-1px}.btn-group>.btn,.btn-group>.dropdown-menu{font-size:14px}.btn-group>.btn-mini{font-size:11px}.btn-group>.btn-small{font-size:12px}.btn-group>.btn-large{font-size:16px}.btn-group>.btn:first-child{margin-left:0;-webkit-border-bottom-left-radius:4px;border-bottom-left-radius:4px;-webkit-border-top-left-radius:4px;border-top-left-radius:4px;-moz-border-radius-bottomleft:4px;-moz-border-radius-topleft:4px}.btn-group>.btn:last-child,.btn-group>.dropdown-toggle{-webkit-border-top-right-radius:4px;border-top-right-radius:4px;-webkit-border-bottom-right-radius:4px;border-bottom-right-radius:4px;-moz-border-radius-topright:4px;-moz-border-radius-bottomright:4px}.btn-group>.btn.large:first-child{margin-left:0;-webkit-border-bottom-left-radius:6px;border-bottom-left-radius:6px;-webkit-border-top-left-radius:6px;border-top-left-radius:6px;-moz-border-radius-bottomleft:6px;-moz-border-radius-topleft:6px}.btn-group>.btn.large:last-child,.btn-group>.large.dropdown-toggle{-webkit-border-top-right-radius:6px;border-top-right-radius:6px;-webkit-border-bottom-right-radius:6px;border-bottom-right-radius:6px;-moz-border-radius-topright:6px;-moz-border-radius-bottomright:6px}.btn-group>.btn:hover,.btn-group>.btn:focus,.btn-group>.btn:active,.btn-group>.btn.active{z-index:2}.btn-group .dropdown-toggle:active,.btn-group.open .dropdown-toggle{outline:0}.btn-group>.btn+.dropdown-toggle{*padding-top:5px;padding-right:8px;*padding-bottom:5px;padding-left:8px;-webkit-box-shadow:inset 1px 0 0 rgba(255,255,255,0.125),inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:inset 1px 0 0 rgba(255,255,255,0.125),inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05);box-shadow:inset 1px 0 0 rgba(255,255,255,0.125),inset 0 1px 0 rgba(255,255,255,0.2),0 1px 2px rgba(0,0,0,0.05)}.btn-group>.btn-mini+.dropdown-toggle{*padding-top:2px;padding-right:5px;*padding-bottom:2px;padding-left:5px}.btn-group>.btn-small+.dropdown-toggle{*padding-top:5px;*padding-bottom:4px}.btn-group>.btn-large+.dropdown-toggle{*padding-top:7px;padding-right:12px;*padding-bottom:7px;padding-left:12px}.btn-group.open .dropdown-toggle{background-image:none;-webkit-box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05);box-shadow:inset 0 2px 4px rgba(0,0,0,0.15),0 1px 2px rgba(0,0,0,0.05)}.btn-group.open .btn.dropdown-toggle{background-color:#e6e6e6}.btn-group.open .btn-primary.dropdown-toggle{background-color:#04c}.btn-group.open .btn-warning.dropdown-toggle{background-color:#f89406}.btn-group.open .btn-danger.dropdown-toggle{background-color:#bd362f}.btn-group.open .btn-success.dropdown-toggle{background-color:#51a351}.btn-group.open .btn-info.dropdown-toggle{background-color:#2f96b4}.btn-group.open .btn-inverse.dropdown-toggle{background-color:#222}.btn .caret{margin-top:8px;margin-left:0}.btn-mini .caret,.btn-small .caret,.btn-large .caret{margin-top:6px}.btn-large .caret{border-top-width:5px;border-right-width:5px;border-left-width:5px}.dropup .btn-large .caret{border-top:0;border-bottom:5px solid #000}.btn-primary .caret,.btn-warning .caret,.btn-danger .caret,.btn-info .caret,.btn-success .caret,.btn-inverse .caret{border-top-color:#fff;border-bottom-color:#fff}.btn-group-vertical{display:inline-block;*display:inline;*zoom:1}.btn-group-vertical .btn{display:block;float:none;width:100%;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.btn-group-vertical .btn+.btn{margin-top:-1px;margin-left:0}.btn-group-vertical .btn:first-child{-webkit-border-radius:4px 4px 0 0;-moz-border-radius:4px 4px 0 0;border-radius:4px 4px 0 0}.btn-group-vertical .btn:last-child{-webkit-border-radius:0 0 4px 4px;-moz-border-radius:0 0 4px 4px;border-radius:0 0 4px 4px}.btn-group-vertical .btn-large:first-child{-webkit-border-radius:6px 6px 0 0;-moz-border-radius:6px 6px 0 0;border-radius:6px 6px 0 0}.btn-group-vertical .btn-large:last-child{-webkit-border-radius:0 0 6px 6px;-moz-border-radius:0 0 6px 6px;border-radius:0 0 6px 6px}.alert{padding:8px 35px 8px 14px;margin-bottom:20px;color:#c09853;text-shadow:0 1px 0 rgba(255,255,255,0.5);background-color:#fcf8e3;border:1px solid #fbeed5;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.alert h4{margin:0}.alert .close{position:relative;top:-2px;right:-21px;line-height:20px}.alert-success{color:#468847;background-color:#dff0d8;border-color:#d6e9c6}.alert-danger,.alert-error{color:#b94a48;background-color:#f2dede;border-color:#eed3d7}.alert-info{color:#3a87ad;background-color:#d9edf7;border-color:#bce8f1}.alert-block{padding-top:14px;padding-bottom:14px}.alert-block>p,.alert-block>ul{margin-bottom:0}.alert-block p+p{margin-top:5px}.nav{margin-bottom:20px;margin-left:0;list-style:none}.nav>li>a{display:block}.nav>li>a:hover{text-decoration:none;background-color:#eee}.nav>.pull-right{float:right}.nav-header{display:block;padding:3px 15px;font-size:11px;font-weight:bold;line-height:20px;color:#999;text-shadow:0 1px 0 rgba(255,255,255,0.5);text-transform:uppercase}.nav li+.nav-header{margin-top:9px}.nav-list{padding-right:15px;padding-left:15px;margin-bottom:0}.nav-list>li>a,.nav-list .nav-header{margin-right:-15px;margin-left:-15px;text-shadow:0 1px 0 rgba(255,255,255,0.5)}.nav-list>li>a{padding:3px 15px}.nav-list>.active>a,.nav-list>.active>a:hover{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.2);background-color:#08c}.nav-list [class^="icon-"]{margin-right:2px}.nav-list .divider{*width:100%;height:1px;margin:9px 1px;*margin:-5px 0 5px;overflow:hidden;background-color:#e5e5e5;border-bottom:1px solid #fff}.nav-tabs,.nav-pills{*zoom:1}.nav-tabs:before,.nav-pills:before,.nav-tabs:after,.nav-pills:after{display:table;line-height:0;content:""}.nav-tabs:after,.nav-pills:after{clear:both}.nav-tabs>li,.nav-pills>li{float:left}.nav-tabs>li>a,.nav-pills>li>a{padding-right:12px;padding-left:12px;margin-right:2px;line-height:14px}.nav-tabs{border-bottom:1px solid #ddd}.nav-tabs>li{margin-bottom:-1px}.nav-tabs>li>a{padding-top:8px;padding-bottom:8px;line-height:20px;border:1px solid transparent;-webkit-border-radius:4px 4px 0 0;-moz-border-radius:4px 4px 0 0;border-radius:4px 4px 0 0}.nav-tabs>li>a:hover{border-color:#eee #eee #ddd}.nav-tabs>.active>a,.nav-tabs>.active>a:hover{color:#555;cursor:default;background-color:#fff;border:1px solid #ddd;border-bottom-color:transparent}.nav-pills>li>a{padding-top:8px;padding-bottom:8px;margin-top:2px;margin-bottom:2px;-webkit-border-radius:5px;-moz-border-radius:5px;border-radius:5px}.nav-pills>.active>a,.nav-pills>.active>a:hover{color:#fff;background-color:#08c}.nav-stacked>li{float:none}.nav-stacked>li>a{margin-right:0}.nav-tabs.nav-stacked{border-bottom:0}.nav-tabs.nav-stacked>li>a{border:1px solid #ddd;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.nav-tabs.nav-stacked>li:first-child>a{-webkit-border-top-right-radius:4px;border-top-right-radius:4px;-webkit-border-top-left-radius:4px;border-top-left-radius:4px;-moz-border-radius-topright:4px;-moz-border-radius-topleft:4px}.nav-tabs.nav-stacked>li:last-child>a{-webkit-border-bottom-right-radius:4px;border-bottom-right-radius:4px;-webkit-border-bottom-left-radius:4px;border-bottom-left-radius:4px;-moz-border-radius-bottomright:4px;-moz-border-radius-bottomleft:4px}.nav-tabs.nav-stacked>li>a:hover{z-index:2;border-color:#ddd}.nav-pills.nav-stacked>li>a{margin-bottom:3px}.nav-pills.nav-stacked>li:last-child>a{margin-bottom:1px}.nav-tabs .dropdown-menu{-webkit-border-radius:0 0 6px 6px;-moz-border-radius:0 0 6px 6px;border-radius:0 0 6px 6px}.nav-pills .dropdown-menu{-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px}.nav .dropdown-toggle .caret{margin-top:6px;border-top-color:#08c;border-bottom-color:#08c}.nav .dropdown-toggle:hover .caret{border-top-color:#005580;border-bottom-color:#005580}.nav-tabs .dropdown-toggle .caret{margin-top:8px}.nav .active .dropdown-toggle .caret{border-top-color:#fff;border-bottom-color:#fff}.nav-tabs .active .dropdown-toggle .caret{border-top-color:#555;border-bottom-color:#555}.nav>.dropdown.active>a:hover{cursor:pointer}.nav-tabs .open .dropdown-toggle,.nav-pills .open .dropdown-toggle,.nav>li.dropdown.open.active>a:hover{color:#fff;background-color:#999;border-color:#999}.nav li.dropdown.open .caret,.nav li.dropdown.open.active .caret,.nav li.dropdown.open a:hover .caret{border-top-color:#fff;border-bottom-color:#fff;opacity:1;filter:alpha(opacity=100)}.tabs-stacked .open>a:hover{border-color:#999}.tabbable{*zoom:1}.tabbable:before,.tabbable:after{display:table;line-height:0;content:""}.tabbable:after{clear:both}.tab-content{overflow:auto}.tabs-below>.nav-tabs,.tabs-right>.nav-tabs,.tabs-left>.nav-tabs{border-bottom:0}.tab-content>.tab-pane,.pill-content>.pill-pane{display:none}.tab-content>.active,.pill-content>.active{display:block}.tabs-below>.nav-tabs{border-top:1px solid #ddd}.tabs-below>.nav-tabs>li{margin-top:-1px;margin-bottom:0}.tabs-below>.nav-tabs>li>a{-webkit-border-radius:0 0 4px 4px;-moz-border-radius:0 0 4px 4px;border-radius:0 0 4px 4px}.tabs-below>.nav-tabs>li>a:hover{border-top-color:#ddd;border-bottom-color:transparent}.tabs-below>.nav-tabs>.active>a,.tabs-below>.nav-tabs>.active>a:hover{border-color:transparent #ddd #ddd #ddd}.tabs-left>.nav-tabs>li,.tabs-right>.nav-tabs>li{float:none}.tabs-left>.nav-tabs>li>a,.tabs-right>.nav-tabs>li>a{min-width:74px;margin-right:0;margin-bottom:3px}.tabs-left>.nav-tabs{float:left;margin-right:19px;border-right:1px solid #ddd}.tabs-left>.nav-tabs>li>a{margin-right:-1px;-webkit-border-radius:4px 0 0 4px;-moz-border-radius:4px 0 0 4px;border-radius:4px 0 0 4px}.tabs-left>.nav-tabs>li>a:hover{border-color:#eee #ddd #eee #eee}.tabs-left>.nav-tabs .active>a,.tabs-left>.nav-tabs .active>a:hover{border-color:#ddd transparent #ddd #ddd;*border-right-color:#fff}.tabs-right>.nav-tabs{float:right;margin-left:19px;border-left:1px solid #ddd}.tabs-right>.nav-tabs>li>a{margin-left:-1px;-webkit-border-radius:0 4px 4px 0;-moz-border-radius:0 4px 4px 0;border-radius:0 4px 4px 0}.tabs-right>.nav-tabs>li>a:hover{border-color:#eee #eee #eee #ddd}.tabs-right>.nav-tabs .active>a,.tabs-right>.nav-tabs .active>a:hover{border-color:#ddd #ddd #ddd transparent;*border-left-color:#fff}.nav>.disabled>a{color:#999}.nav>.disabled>a:hover{text-decoration:none;cursor:default;background-color:transparent}.navbar{*position:relative;*z-index:2;margin-bottom:20px;overflow:visible;color:#555}.navbar-inner{min-height:40px;padding-right:20px;padding-left:20px;background-color:#fafafa;background-image:-moz-linear-gradient(top,#fff,#feead8);background-image:-webkit-gradient(linear,0 0,0 100%,from(#fff),to(#feead8));background-image:-webkit-linear-gradient(top,#fff,#feead8);background-image:-o-linear-gradient(top,#fff,#feead8);background-image:linear-gradient(to bottom,#fff,#feead8);background-repeat:repeat-x;border:1px solid #d4d4d4;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ffffffff',endColorstr='#fffeead8',GradientType=0);-webkit-box-shadow:0 1px 4px rgba(0,0,0,0.065);-moz-box-shadow:0 1px 4px rgba(0,0,0,0.065);box-shadow:0 1px 4px rgba(0,0,0,0.065)}.navbar .container{width:auto}.nav-collapse.collapse{height:auto}.navbar .brand{display:block;float:left;padding:10px 20px 10px;margin-left:-20px;font-size:20px;font-weight:200;color:#555;text-shadow:0 1px 0 #fff}.navbar .brand:hover{text-decoration:none}.navbar-text{margin-bottom:0;line-height:40px}.navbar-link{color:#555}.navbar-link:hover{color:#333}.navbar .divider-vertical{height:40px;margin:0 9px;border-right:1px solid #fff;border-left:1px solid #f2f2f2}.navbar .btn,.navbar .btn-group{margin-top:6px}.navbar .btn-group .btn{margin:0}.navbar-form{margin-bottom:0;*zoom:1}.navbar-form:before,.navbar-form:after{display:table;line-height:0;content:""}.navbar-form:after{clear:both}.navbar-form input,.navbar-form select,.navbar-form .radio,.navbar-form .checkbox{margin-top:5px}.navbar-form input,.navbar-form select,.navbar-form .btn{display:inline-block;margin-bottom:0}.navbar-form input[type="image"],.navbar-form input[type="checkbox"],.navbar-form input[type="radio"]{margin-top:3px}.navbar-form .input-append,.navbar-form .input-prepend{margin-top:6px;white-space:nowrap}.navbar-form .input-append input,.navbar-form .input-prepend input{margin-top:0}.navbar-search{position:relative;float:left;margin-top:5px;margin-bottom:0}.navbar-search .search-query{padding:4px 14px;margin-bottom:0;font-family:"Helvetica Neue",Helvetica,Arial,sans-serif;font-size:13px;font-weight:normal;line-height:1;-webkit-border-radius:15px;-moz-border-radius:15px;border-radius:15px}.navbar-static-top{position:static;width:100%;margin-bottom:0}.navbar-static-top .navbar-inner{-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.navbar-fixed-top,.navbar-fixed-bottom{position:fixed;right:0;left:0;z-index:1030;margin-bottom:0}.navbar-fixed-top .navbar-inner,.navbar-fixed-bottom .navbar-inner,.navbar-static-top .navbar-inner{border:0}.navbar-fixed-top .navbar-inner,.navbar-fixed-bottom .navbar-inner{padding-right:0;padding-left:0;-webkit-border-radius:0;-moz-border-radius:0;border-radius:0}.navbar-static-top .container,.navbar-fixed-top .container,.navbar-fixed-bottom .container{width:940px}.navbar-fixed-top{top:0}.navbar-fixed-top .navbar-inner,.navbar-static-top .navbar-inner{-webkit-box-shadow:inset 0 -1px 0 rgba(0,0,0,0.1),0 1px 10px rgba(0,0,0,0.1);-moz-box-shadow:inset 0 -1px 0 rgba(0,0,0,0.1),0 1px 10px rgba(0,0,0,0.1);box-shadow:inset 0 -1px 0 rgba(0,0,0,0.1),0 1px 10px rgba(0,0,0,0.1)}.navbar-fixed-bottom{bottom:0}.navbar-fixed-bottom .navbar-inner{-webkit-box-shadow:inset 0 1px 0 rgba(0,0,0,0.1),0 -1px 10px rgba(0,0,0,0.1);-moz-box-shadow:inset 0 1px 0 rgba(0,0,0,0.1),0 -1px 10px rgba(0,0,0,0.1);box-shadow:inset 0 1px 0 rgba(0,0,0,0.1),0 -1px 10px rgba(0,0,0,0.1)}.navbar .nav{position:relative;left:0;display:block;float:left;margin:0 10px 0 0}.navbar .nav.pull-right{float:right}.navbar .nav>li{float:left}.navbar .nav>li>a{float:none;padding:10px 15px 10px;color:#555;text-decoration:none;text-shadow:0 1px 0 #fff}.navbar .nav .dropdown-toggle .caret{margin-top:8px}.navbar .nav>li>a:focus,.navbar .nav>li>a:hover{color:#333;text-decoration:none;background-color:transparent}.navbar .nav>.active>a,.navbar .nav>.active>a:hover,.navbar .nav>.active>a:focus{color:#555;text-decoration:none;background-color:#e5e5e5;-webkit-box-shadow:inset 0 3px 8px rgba(0,0,0,0.125);-moz-box-shadow:inset 0 3px 8px rgba(0,0,0,0.125);box-shadow:inset 0 3px 8px rgba(0,0,0,0.125)}.navbar .btn-navbar{display:none;float:right;padding:7px 10px;margin-right:5px;margin-left:5px;color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#ededed;*background-color:#e5e5e5;background-image:-webkit-gradient(linear,0 0,0 100%,from(#f2f2f2),to(#e5e5e5));background-image:-webkit-linear-gradient(top,#f2f2f2,#e5e5e5);background-image:-o-linear-gradient(top,#f2f2f2,#e5e5e5);background-image:linear-gradient(to bottom,#f2f2f2,#e5e5e5);background-image:-moz-linear-gradient(top,#f2f2f2,#e5e5e5);background-repeat:repeat-x;border-color:#e5e5e5 #e5e5e5 #bfbfbf;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#fff2f2f2',endColorstr='#ffe5e5e5',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false);-webkit-box-shadow:inset 0 1px 0 rgba(255,255,255,0.1),0 1px 0 rgba(255,255,255,0.075);-moz-box-shadow:inset 0 1px 0 rgba(255,255,255,0.1),0 1px 0 rgba(255,255,255,0.075);box-shadow:inset 0 1px 0 rgba(255,255,255,0.1),0 1px 0 rgba(255,255,255,0.075)}.navbar .btn-navbar:hover,.navbar .btn-navbar:active,.navbar .btn-navbar.active,.navbar .btn-navbar.disabled,.navbar .btn-navbar[disabled]{color:#fff;background-color:#e5e5e5;*background-color:#d9d9d9}.navbar .btn-navbar:active,.navbar .btn-navbar.active{background-color:#ccc \9}.navbar .btn-navbar .icon-bar{display:block;width:18px;height:2px;background-color:#f5f5f5;-webkit-border-radius:1px;-moz-border-radius:1px;border-radius:1px;-webkit-box-shadow:0 1px 0 rgba(0,0,0,0.25);-moz-box-shadow:0 1px 0 rgba(0,0,0,0.25);box-shadow:0 1px 0 rgba(0,0,0,0.25)}.btn-navbar .icon-bar+.icon-bar{margin-top:3px}.navbar .nav>li>.dropdown-menu:before{position:absolute;top:-7px;left:9px;display:inline-block;border-right:7px solid transparent;border-bottom:7px solid #ccc;border-left:7px solid transparent;border-bottom-color:rgba(0,0,0,0.2);content:''}.navbar .nav>li>.dropdown-menu:after{position:absolute;top:-6px;left:10px;display:inline-block;border-right:6px solid transparent;border-bottom:6px solid #fff;border-left:6px solid transparent;content:''}.navbar-fixed-bottom .nav>li>.dropdown-menu:before{top:auto;bottom:-7px;border-top:7px solid #ccc;border-bottom:0;border-top-color:rgba(0,0,0,0.2)}.navbar-fixed-bottom .nav>li>.dropdown-menu:after{top:auto;bottom:-6px;border-top:6px solid #fff;border-bottom:0}.navbar .nav li.dropdown.open>.dropdown-toggle,.navbar .nav li.dropdown.active>.dropdown-toggle,.navbar .nav li.dropdown.open.active>.dropdown-toggle{color:#555;background-color:#e5e5e5}.navbar .nav li.dropdown>.dropdown-toggle .caret{border-top-color:#555;border-bottom-color:#555}.navbar .nav li.dropdown.open>.dropdown-toggle .caret,.navbar .nav li.dropdown.active>.dropdown-toggle .caret,.navbar .nav li.dropdown.open.active>.dropdown-toggle .caret{border-top-color:#555;border-bottom-color:#555}.navbar .pull-right>li>.dropdown-menu,.navbar .nav>li>.dropdown-menu.pull-right{right:0;left:auto}.navbar .pull-right>li>.dropdown-menu:before,.navbar .nav>li>.dropdown-menu.pull-right:before{right:12px;left:auto}.navbar .pull-right>li>.dropdown-menu:after,.navbar .nav>li>.dropdown-menu.pull-right:after{right:13px;left:auto}.navbar .pull-right>li>.dropdown-menu .dropdown-menu,.navbar .nav>li>.dropdown-menu.pull-right .dropdown-menu{right:100%;left:auto;margin-right:-1px;margin-left:0;-webkit-border-radius:6px 0 6px 6px;-moz-border-radius:6px 0 6px 6px;border-radius:6px 0 6px 6px}.navbar-inverse{color:#999}.navbar-inverse .navbar-inner{background-color:#1b1b1b;background-image:-moz-linear-gradient(top,#222,#111);background-image:-webkit-gradient(linear,0 0,0 100%,from(#222),to(#111));background-image:-webkit-linear-gradient(top,#222,#111);background-image:-o-linear-gradient(top,#222,#111);background-image:linear-gradient(to bottom,#222,#111);background-repeat:repeat-x;border-color:#252525;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff222222',endColorstr='#ff111111',GradientType=0)}.navbar-inverse .brand,.navbar-inverse .nav>li>a{color:#999;text-shadow:0 -1px 0 rgba(0,0,0,0.25)}.navbar-inverse .brand:hover,.navbar-inverse .nav>li>a:hover{color:#fff}.navbar-inverse .nav>li>a:focus,.navbar-inverse .nav>li>a:hover{color:#fff;background-color:transparent}.navbar-inverse .nav .active>a,.navbar-inverse .nav .active>a:hover,.navbar-inverse .nav .active>a:focus{color:#fff;background-color:#111}.navbar-inverse .navbar-link{color:#999}.navbar-inverse .navbar-link:hover{color:#fff}.navbar-inverse .divider-vertical{border-right-color:#222;border-left-color:#111}.navbar-inverse .nav li.dropdown.open>.dropdown-toggle,.navbar-inverse .nav li.dropdown.active>.dropdown-toggle,.navbar-inverse .nav li.dropdown.open.active>.dropdown-toggle{color:#fff;background-color:#111}.navbar-inverse .nav li.dropdown>.dropdown-toggle .caret{border-top-color:#999;border-bottom-color:#999}.navbar-inverse .nav li.dropdown.open>.dropdown-toggle .caret,.navbar-inverse .nav li.dropdown.active>.dropdown-toggle .caret,.navbar-inverse .nav li.dropdown.open.active>.dropdown-toggle .caret{border-top-color:#fff;border-bottom-color:#fff}.navbar-inverse .navbar-search .search-query{color:#fff;background-color:#515151;border-color:#111;-webkit-box-shadow:inset 0 1px 2px rgba(0,0,0,0.1),0 1px 0 rgba(255,255,255,0.15);-moz-box-shadow:inset 0 1px 2px rgba(0,0,0,0.1),0 1px 0 rgba(255,255,255,0.15);box-shadow:inset 0 1px 2px rgba(0,0,0,0.1),0 1px 0 rgba(255,255,255,0.15);-webkit-transition:none;-moz-transition:none;-o-transition:none;transition:none}.navbar-inverse .navbar-search .search-query:-moz-placeholder{color:#ccc}.navbar-inverse .navbar-search .search-query:-ms-input-placeholder{color:#ccc}.navbar-inverse .navbar-search .search-query::-webkit-input-placeholder{color:#ccc}.navbar-inverse .navbar-search .search-query:focus,.navbar-inverse .navbar-search .search-query.focused{padding:5px 15px;color:#333;text-shadow:0 1px 0 #fff;background-color:#fff;border:0;outline:0;-webkit-box-shadow:0 0 3px rgba(0,0,0,0.15);-moz-box-shadow:0 0 3px rgba(0,0,0,0.15);box-shadow:0 0 3px rgba(0,0,0,0.15)}.navbar-inverse .btn-navbar{color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#0e0e0e;*background-color:#040404;background-image:-webkit-gradient(linear,0 0,0 100%,from(#151515),to(#040404));background-image:-webkit-linear-gradient(top,#151515,#040404);background-image:-o-linear-gradient(top,#151515,#040404);background-image:linear-gradient(to bottom,#151515,#040404);background-image:-moz-linear-gradient(top,#151515,#040404);background-repeat:repeat-x;border-color:#040404 #040404 #000;border-color:rgba(0,0,0,0.1) rgba(0,0,0,0.1) rgba(0,0,0,0.25);filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff151515',endColorstr='#ff040404',GradientType=0);filter:progid:dximagetransform.microsoft.gradient(enabled=false)}.navbar-inverse .btn-navbar:hover,.navbar-inverse .btn-navbar:active,.navbar-inverse .btn-navbar.active,.navbar-inverse .btn-navbar.disabled,.navbar-inverse .btn-navbar[disabled]{color:#fff;background-color:#040404;*background-color:#000}.navbar-inverse .btn-navbar:active,.navbar-inverse .btn-navbar.active{background-color:#000 \9}.breadcrumb{padding:8px 15px;margin:0 0 20px;list-style:none;background-color:#f5f5f5;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.breadcrumb li{display:inline-block;*display:inline;text-shadow:0 1px 0 #fff;*zoom:1}.breadcrumb .divider{padding:0 5px;color:#ccc}.breadcrumb .active{color:#999}.pagination{height:40px;margin:20px 0}.pagination ul{display:inline-block;*display:inline;margin-bottom:0;margin-left:0;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px;*zoom:1;-webkit-box-shadow:0 1px 2px rgba(0,0,0,0.05);-moz-box-shadow:0 1px 2px rgba(0,0,0,0.05);box-shadow:0 1px 2px rgba(0,0,0,0.05)}.pagination li{display:inline}.pagination a,.pagination span{float:left;padding:0 14px;line-height:38px;text-decoration:none;background-color:#fff;border:1px solid #ddd;border-left-width:0}.pagination a:hover,.pagination .active a,.pagination .active span{background-color:#f5f5f5}.pagination .active a,.pagination .active span{color:#999;cursor:default}.pagination .disabled span,.pagination .disabled a,.pagination .disabled a:hover{color:#999;cursor:default;background-color:transparent}.pagination li:first-child a,.pagination li:first-child span{border-left-width:1px;-webkit-border-radius:3px 0 0 3px;-moz-border-radius:3px 0 0 3px;border-radius:3px 0 0 3px}.pagination li:last-child a,.pagination li:last-child span{-webkit-border-radius:0 3px 3px 0;-moz-border-radius:0 3px 3px 0;border-radius:0 3px 3px 0}.pagination-centered{text-align:center}.pagination-right{text-align:right}.pager{margin:20px 0;text-align:center;list-style:none;*zoom:1}.pager:before,.pager:after{display:table;line-height:0;content:""}.pager:after{clear:both}.pager li{display:inline}.pager a{display:inline-block;padding:5px 14px;background-color:#fff;border:1px solid #ddd;-webkit-border-radius:15px;-moz-border-radius:15px;border-radius:15px}.pager a:hover{text-decoration:none;background-color:#f5f5f5}.pager .next a{float:right}.pager .previous a{float:left}.pager .disabled a,.pager .disabled a:hover{color:#999;cursor:default;background-color:#fff}.modal-open .dropdown-menu{z-index:2050}.modal-open .dropdown.open{*z-index:2050}.modal-open .popover{z-index:2060}.modal-open .tooltip{z-index:2080}.modal-backdrop{position:fixed;top:0;right:0;bottom:0;left:0;z-index:1040;background-color:#000}.modal-backdrop.fade{opacity:0}.modal-backdrop,.modal-backdrop.fade.in{opacity:.8;filter:alpha(opacity=80)}.modal{position:fixed;top:50%;left:50%;z-index:1050;width:560px;margin:-250px 0 0 -280px;overflow:auto;background-color:#fff;border:1px solid #999;border:1px solid rgba(0,0,0,0.3);*border:1px solid #999;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;-webkit-box-shadow:0 3px 7px rgba(0,0,0,0.3);-moz-box-shadow:0 3px 7px rgba(0,0,0,0.3);box-shadow:0 3px 7px rgba(0,0,0,0.3);-webkit-background-clip:padding-box;-moz-background-clip:padding-box;background-clip:padding-box}.modal.fade{top:-25%;-webkit-transition:opacity .3s linear,top .3s ease-out;-moz-transition:opacity .3s linear,top .3s ease-out;-o-transition:opacity .3s linear,top .3s ease-out;transition:opacity .3s linear,top .3s ease-out}.modal.fade.in{top:50%}.modal-header{padding:9px 15px;border-bottom:1px solid #eee}.modal-header .close{margin-top:2px}.modal-header h3{margin:0;line-height:30px}.modal-body{max-height:400px;padding:15px;overflow-y:auto}.modal-form{margin-bottom:0}.modal-footer{padding:14px 15px 15px;margin-bottom:0;text-align:right;background-color:#f5f5f5;border-top:1px solid #ddd;-webkit-border-radius:0 0 6px 6px;-moz-border-radius:0 0 6px 6px;border-radius:0 0 6px 6px;*zoom:1;-webkit-box-shadow:inset 0 1px 0 #fff;-moz-box-shadow:inset 0 1px 0 #fff;box-shadow:inset 0 1px 0 #fff}.modal-footer:before,.modal-footer:after{display:table;line-height:0;content:""}.modal-footer:after{clear:both}.modal-footer .btn+.btn{margin-bottom:0;margin-left:5px}.modal-footer .btn-group .btn+.btn{margin-left:-1px}.tooltip{position:absolute;z-index:1030;display:block;padding:5px;font-size:11px;opacity:0;filter:alpha(opacity=0);visibility:visible}.tooltip.in{opacity:.8;filter:alpha(opacity=80)}.tooltip.top{margin-top:-3px}.tooltip.right{margin-left:3px}.tooltip.bottom{margin-top:3px}.tooltip.left{margin-left:-3px}.tooltip-inner{max-width:200px;padding:3px 8px;color:#fff;text-align:center;text-decoration:none;background-color:#000;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.tooltip-arrow{position:absolute;width:0;height:0;border-color:transparent;border-style:solid}.tooltip.top .tooltip-arrow{bottom:0;left:50%;margin-left:-5px;border-top-color:#000;border-width:5px 5px 0}.tooltip.right .tooltip-arrow{top:50%;left:0;margin-top:-5px;border-right-color:#000;border-width:5px 5px 5px 0}.tooltip.left .tooltip-arrow{top:50%;right:0;margin-top:-5px;border-left-color:#000;border-width:5px 0 5px 5px}.tooltip.bottom .tooltip-arrow{top:0;left:50%;margin-left:-5px;border-bottom-color:#000;border-width:0 5px 5px}.popover{position:absolute;top:0;left:0;z-index:1010;display:none;width:236px;padding:1px;background-color:#fff;border:1px solid #ccc;border:1px solid rgba(0,0,0,0.2);-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px;-webkit-box-shadow:0 5px 10px rgba(0,0,0,0.2);-moz-box-shadow:0 5px 10px rgba(0,0,0,0.2);box-shadow:0 5px 10px rgba(0,0,0,0.2);-webkit-background-clip:padding-box;-moz-background-clip:padding;background-clip:padding-box}.popover.top{margin-bottom:10px}.popover.right{margin-left:10px}.popover.bottom{margin-top:10px}.popover.left{margin-right:10px}.popover-title{padding:8px 14px;margin:0;font-size:14px;font-weight:normal;line-height:18px;background-color:#f7f7f7;border-bottom:1px solid #ebebeb;-webkit-border-radius:5px 5px 0 0;-moz-border-radius:5px 5px 0 0;border-radius:5px 5px 0 0}.popover-content{padding:9px 14px}.popover-content p,.popover-content ul,.popover-content ol{margin-bottom:0}.popover .arrow,.popover .arrow:after{position:absolute;display:inline-block;width:0;height:0;border-color:transparent;border-style:solid}.popover .arrow:after{z-index:-1;content:""}.popover.top .arrow{bottom:-10px;left:50%;margin-left:-10px;border-top-color:#fff;border-width:10px 10px 0}.popover.top .arrow:after{bottom:-1px;left:-11px;border-top-color:rgba(0,0,0,0.25);border-width:11px 11px 0}.popover.right .arrow{top:50%;left:-10px;margin-top:-10px;border-right-color:#fff;border-width:10px 10px 10px 0}.popover.right .arrow:after{bottom:-11px;left:-1px;border-right-color:rgba(0,0,0,0.25);border-width:11px 11px 11px 0}.popover.bottom .arrow{top:-10px;left:50%;margin-left:-10px;border-bottom-color:#fff;border-width:0 10px 10px}.popover.bottom .arrow:after{top:-1px;left:-11px;border-bottom-color:rgba(0,0,0,0.25);border-width:0 11px 11px}.popover.left .arrow{top:50%;right:-10px;margin-top:-10px;border-left-color:#fff;border-width:10px 0 10px 10px}.popover.left .arrow:after{right:-1px;bottom:-11px;border-left-color:rgba(0,0,0,0.25);border-width:11px 0 11px 11px}.thumbnails{margin-left:-20px;list-style:none;*zoom:1}.thumbnails:before,.thumbnails:after{display:table;line-height:0;content:""}.thumbnails:after{clear:both}.row-fluid .thumbnails{margin-left:0}.thumbnails>li{float:left;margin-bottom:20px;margin-left:20px}.thumbnail{display:block;padding:4px;line-height:20px;border:1px solid #ddd;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;-webkit-box-shadow:0 1px 3px rgba(0,0,0,0.055);-moz-box-shadow:0 1px 3px rgba(0,0,0,0.055);box-shadow:0 1px 3px rgba(0,0,0,0.055);-webkit-transition:all .2s ease-in-out;-moz-transition:all .2s ease-in-out;-o-transition:all .2s ease-in-out;transition:all .2s ease-in-out}a.thumbnail:hover{border-color:#08c;-webkit-box-shadow:0 1px 4px rgba(0,105,214,0.25);-moz-box-shadow:0 1px 4px rgba(0,105,214,0.25);box-shadow:0 1px 4px rgba(0,105,214,0.25)}.thumbnail>img{display:block;max-width:100%;margin-right:auto;margin-left:auto}.thumbnail .caption{padding:9px;color:#555}.label,.badge{font-size:11.844px;font-weight:bold;line-height:14px;color:#fff;text-shadow:0 -1px 0 rgba(0,0,0,0.25);white-space:nowrap;vertical-align:baseline;background-color:#999}.label{padding:1px 4px 2px;-webkit-border-radius:3px;-moz-border-radius:3px;border-radius:3px}.badge{padding:1px 9px 2px;-webkit-border-radius:9px;-moz-border-radius:9px;border-radius:9px}a.label:hover,a.badge:hover{color:#fff;text-decoration:none;cursor:pointer}.label-important,.badge-important{background-color:#b94a48}.label-important[href],.badge-important[href]{background-color:#953b39}.label-warning,.badge-warning{background-color:#f89406}.label-warning[href],.badge-warning[href]{background-color:#c67605}.label-success,.badge-success{background-color:#468847}.label-success[href],.badge-success[href]{background-color:#356635}.label-info,.badge-info{background-color:#3a87ad}.label-info[href],.badge-info[href]{background-color:#2d6987}.label-inverse,.badge-inverse{background-color:#333}.label-inverse[href],.badge-inverse[href]{background-color:#1a1a1a}.btn .label,.btn .badge{position:relative;top:-1px}.btn-mini .label,.btn-mini .badge{top:0}@-webkit-keyframes progress-bar-stripes{from{background-position:40px 0}to{background-position:0 0}}@-moz-keyframes progress-bar-stripes{from{background-position:40px 0}to{background-position:0 0}}@-ms-keyframes progress-bar-stripes{from{background-position:40px 0}to{background-position:0 0}}@-o-keyframes progress-bar-stripes{from{background-position:0 0}to{background-position:40px 0}}@keyframes progress-bar-stripes{from{background-position:40px 0}to{background-position:0 0}}.progress{height:20px;margin-bottom:20px;overflow:hidden;background-color:#f7f7f7;background-image:-moz-linear-gradient(top,#f5f5f5,#f9f9f9);background-image:-webkit-gradient(linear,0 0,0 100%,from(#f5f5f5),to(#f9f9f9));background-image:-webkit-linear-gradient(top,#f5f5f5,#f9f9f9);background-image:-o-linear-gradient(top,#f5f5f5,#f9f9f9);background-image:linear-gradient(to bottom,#f5f5f5,#f9f9f9);background-repeat:repeat-x;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#fff5f5f5',endColorstr='#fff9f9f9',GradientType=0);-webkit-box-shadow:inset 0 1px 2px rgba(0,0,0,0.1);-moz-box-shadow:inset 0 1px 2px rgba(0,0,0,0.1);box-shadow:inset 0 1px 2px rgba(0,0,0,0.1)}.progress .bar{float:left;width:0;height:100%;font-size:12px;color:#fff;text-align:center;text-shadow:0 -1px 0 rgba(0,0,0,0.25);background-color:#0e90d2;background-image:-moz-linear-gradient(top,#149bdf,#0480be);background-image:-webkit-gradient(linear,0 0,0 100%,from(#149bdf),to(#0480be));background-image:-webkit-linear-gradient(top,#149bdf,#0480be);background-image:-o-linear-gradient(top,#149bdf,#0480be);background-image:linear-gradient(to bottom,#149bdf,#0480be);background-repeat:repeat-x;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff149bdf',endColorstr='#ff0480be',GradientType=0);-webkit-box-shadow:inset 0 -1px 0 rgba(0,0,0,0.15);-moz-box-shadow:inset 0 -1px 0 rgba(0,0,0,0.15);box-shadow:inset 0 -1px 0 rgba(0,0,0,0.15);-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;-webkit-transition:width .6s ease;-moz-transition:width .6s ease;-o-transition:width .6s ease;transition:width .6s ease}.progress .bar+.bar{-webkit-box-shadow:inset 1px 0 0 rgba(0,0,0,0.15),inset 0 -1px 0 rgba(0,0,0,0.15);-moz-box-shadow:inset 1px 0 0 rgba(0,0,0,0.15),inset 0 -1px 0 rgba(0,0,0,0.15);box-shadow:inset 1px 0 0 rgba(0,0,0,0.15),inset 0 -1px 0 rgba(0,0,0,0.15)}.progress-striped .bar{background-color:#149bdf;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);-webkit-background-size:40px 40px;-moz-background-size:40px 40px;-o-background-size:40px 40px;background-size:40px 40px}.progress.active .bar{-webkit-animation:progress-bar-stripes 2s linear infinite;-moz-animation:progress-bar-stripes 2s linear infinite;-ms-animation:progress-bar-stripes 2s linear infinite;-o-animation:progress-bar-stripes 2s linear infinite;animation:progress-bar-stripes 2s linear infinite}.progress-danger .bar,.progress .bar-danger{background-color:#dd514c;background-image:-moz-linear-gradient(top,#ee5f5b,#c43c35);background-image:-webkit-gradient(linear,0 0,0 100%,from(#ee5f5b),to(#c43c35));background-image:-webkit-linear-gradient(top,#ee5f5b,#c43c35);background-image:-o-linear-gradient(top,#ee5f5b,#c43c35);background-image:linear-gradient(to bottom,#ee5f5b,#c43c35);background-repeat:repeat-x;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ffee5f5b',endColorstr='#ffc43c35',GradientType=0)}.progress-danger.progress-striped .bar,.progress-striped .bar-danger{background-color:#ee5f5b;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent)}.progress-success .bar,.progress .bar-success{background-color:#5eb95e;background-image:-moz-linear-gradient(top,#62c462,#57a957);background-image:-webkit-gradient(linear,0 0,0 100%,from(#62c462),to(#57a957));background-image:-webkit-linear-gradient(top,#62c462,#57a957);background-image:-o-linear-gradient(top,#62c462,#57a957);background-image:linear-gradient(to bottom,#62c462,#57a957);background-repeat:repeat-x;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff62c462',endColorstr='#ff57a957',GradientType=0)}.progress-success.progress-striped .bar,.progress-striped .bar-success{background-color:#62c462;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent)}.progress-info .bar,.progress .bar-info{background-color:#4bb1cf;background-image:-moz-linear-gradient(top,#5bc0de,#339bb9);background-image:-webkit-gradient(linear,0 0,0 100%,from(#5bc0de),to(#339bb9));background-image:-webkit-linear-gradient(top,#5bc0de,#339bb9);background-image:-o-linear-gradient(top,#5bc0de,#339bb9);background-image:linear-gradient(to bottom,#5bc0de,#339bb9);background-repeat:repeat-x;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#ff5bc0de',endColorstr='#ff339bb9',GradientType=0)}.progress-info.progress-striped .bar,.progress-striped .bar-info{background-color:#5bc0de;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent)}.progress-warning .bar,.progress .bar-warning{background-color:#faa732;background-image:-moz-linear-gradient(top,#fbb450,#f89406);background-image:-webkit-gradient(linear,0 0,0 100%,from(#fbb450),to(#f89406));background-image:-webkit-linear-gradient(top,#fbb450,#f89406);background-image:-o-linear-gradient(top,#fbb450,#f89406);background-image:linear-gradient(to bottom,#fbb450,#f89406);background-repeat:repeat-x;filter:progid:dximagetransform.microsoft.gradient(startColorstr='#fffbb450',endColorstr='#fff89406',GradientType=0)}.progress-warning.progress-striped .bar,.progress-striped .bar-warning{background-color:#fbb450;background-image:-webkit-gradient(linear,0 100%,100% 0,color-stop(0.25,rgba(255,255,255,0.15)),color-stop(0.25,transparent),color-stop(0.5,transparent),color-stop(0.5,rgba(255,255,255,0.15)),color-stop(0.75,rgba(255,255,255,0.15)),color-stop(0.75,transparent),to(transparent));background-image:-webkit-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-moz-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:-o-linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent);background-image:linear-gradient(45deg,rgba(255,255,255,0.15) 25%,transparent 25%,transparent 50%,rgba(255,255,255,0.15) 50%,rgba(255,255,255,0.15) 75%,transparent 75%,transparent)}.accordion{margin-bottom:20px}.accordion-group{margin-bottom:2px;border:1px solid #e5e5e5;-webkit-border-radius:4px;-moz-border-radius:4px;border-radius:4px}.accordion-heading{border-bottom:0}.accordion-heading .accordion-toggle{display:block;padding:8px 15px}.accordion-toggle{cursor:pointer}.accordion-inner{padding:9px 15px;border-top:1px solid #e5e5e5}.carousel{position:relative;margin-bottom:20px;line-height:1}.carousel-inner{position:relative;width:100%;overflow:hidden}.carousel .item{position:relative;display:none;-webkit-transition:.6s ease-in-out left;-moz-transition:.6s ease-in-out left;-o-transition:.6s ease-in-out left;transition:.6s ease-in-out left}.carousel .item>img{display:block;line-height:1}.carousel .active,.carousel .next,.carousel .prev{display:block}.carousel .active{left:0}.carousel .next,.carousel .prev{position:absolute;top:0;width:100%}.carousel .next{left:100%}.carousel .prev{left:-100%}.carousel .next.left,.carousel .prev.right{left:0}.carousel .active.left{left:-100%}.carousel .active.right{left:100%}.carousel-control{position:absolute;top:40%;left:15px;width:40px;height:40px;margin-top:-20px;font-size:60px;font-weight:100;line-height:30px;color:#fff;text-align:center;background:#222;border:3px solid #fff;-webkit-border-radius:23px;-moz-border-radius:23px;border-radius:23px;opacity:.5;filter:alpha(opacity=50)}.carousel-control.right{right:15px;left:auto}.carousel-control:hover{color:#fff;text-decoration:none;opacity:.9;filter:alpha(opacity=90)}.carousel-caption{position:absolute;right:0;bottom:0;left:0;padding:15px;background:#333;background:rgba(0,0,0,0.75)}.carousel-caption h4,.carousel-caption p{line-height:20px;color:#fff}.carousel-caption h4{margin:0 0 5px}.carousel-caption p{margin-bottom:0}.hero-unit{padding:60px;margin-bottom:30px;background-color:#eee;-webkit-border-radius:6px;-moz-border-radius:6px;border-radius:6px}.hero-unit h1{margin-bottom:0;font-size:60px;line-height:1;letter-spacing:-1px;color:inherit}.hero-unit p{font-size:18px;font-weight:200;line-height:30px;color:inherit}.pull-right{float:right}.pull-left{float:left}.hide{display:none}.show{display:block}.invisible{visibility:hidden}.affix{position:fixed}
diff --git a/docs/css/main.css b/docs/css/main.css
index 31122d5633801..8566400f071c9 100755
--- a/docs/css/main.css
+++ b/docs/css/main.css
@@ -87,20 +87,54 @@ a:hover code {
   max-width: 914px;
 }
 
-/**
- * Make dropdown menus in nav bars show on hover instead of click
- * using solution at http://stackoverflow.com/questions/8878033/how-
- * to-make-twitter-bootstrap-menu-dropdown-on-hover-rather-than-click
- **/
 .dropdown-menu {
   /* Remove the default 2px top margin which causes a small
     gap between the hover trigger area and the popup menu */
   margin-top: 0;
+  /* Avoid too much whitespace at the right for shorter menu items */
+  min-width: 50px;
 }
+
+/**
+ * Make dropdown menus in nav bars show on hover instead of click
+ * using solution at http://stackoverflow.com/questions/8878033/how-
+ * to-make-twitter-bootstrap-menu-dropdown-on-hover-rather-than-click
+ **/
 ul.nav li.dropdown:hover ul.dropdown-menu{
   display: block;
 }
+
 a.menu:after, .dropdown-toggle:after {
   content: none;
 }
 
+/** Make the submenus open on hover on the parent menu item */
+ul.nav li.dropdown ul.dropdown-menu li.dropdown-submenu:hover ul.dropdown-menu {
+  display: block;
+}
+
+/** Make the submenus be invisible until the parent menu item is hovered upon */
+ul.nav li.dropdown ul.dropdown-menu li.dropdown-submenu ul.dropdown-menu {
+  display: none;
+}
+
+/**
+ * Made the navigation bar buttons not grey out when clicked.
+ * Essentially making nav bar buttons not react to clicks, only hover events.
+ */
+.navbar .nav li.dropdown.open > .dropdown-toggle {
+  background-color: transparent;
+}
+
+/**
+ * Made the active tab caption blue. Otherwise the active tab is black, and inactive tab is blue.
+ * That looks weird. Changed the colors to active - blue, inactive - black, and
+ * no color change on hover.
+ */
+.nav-tabs > .active > a, .nav-tabs > .active > a:hover {
+  color: #08c;
+}
+
+.nav-tabs > li > a, .nav-tabs > li > a:hover {
+  color: #333;
+}
diff --git a/docs/graphx-programming-guide.md b/docs/graphx-programming-guide.md
new file mode 100644
index 0000000000000..3dfed7bea9ea8
--- /dev/null
+++ b/docs/graphx-programming-guide.md
@@ -0,0 +1,1092 @@
+---
+layout: global
+title: GraphX Programming Guide
+---
+
+* This will become a table of contents (this text will be scraped).
+{:toc}
+
+<p style="text-align: center;">
+  <img src="img/graphx_logo.png"
+       title="GraphX Logo"
+       alt="GraphX"
+       width="65%" />
+  <!-- Images are downsized intentionally to improve quality on retina displays -->
+</p>
+
+# Overview
+
+GraphX is the new (alpha) Spark API for graphs and graph-parallel computation. At a high-level,
+GraphX extends the Spark [RDD](api/core/index.html#org.apache.spark.rdd.RDD) by introducing the
+[Resilient Distributed Property Graph](#property_graph): a directed multigraph with properties
+attached to each vertex and edge.  To support graph computation, GraphX exposes a set of fundamental
+operators (e.g., [subgraph](#structural_operators), [joinVertices](#join_operators), and
+[mapReduceTriplets](#mrTriplets)) as well as an optimized variant of the [Pregel](#pregel) API. In
+addition, GraphX includes a growing collection of graph [algorithms](#graph_algorithms) and
+[builders](#graph_builders) to simplify graph analytics tasks.
+
+## Background on Graph-Parallel Computation
+
+From social networks to language modeling, the growing scale and importance of
+graph data has driven the development of numerous new *graph-parallel* systems
+(e.g., [Giraph](http://giraph.apache.org) and
+[GraphLab](http://graphlab.org)).  By restricting the types of computation that can be
+expressed and introducing new techniques to partition and distribute graphs,
+these systems can efficiently execute sophisticated graph algorithms orders of
+magnitude faster than more general *data-parallel* systems.
+
+<p style="text-align: center;">
+  <img src="img/data_parallel_vs_graph_parallel.png"
+       title="Data-Parallel vs. Graph-Parallel"
+       alt="Data-Parallel vs. Graph-Parallel"
+       width="50%" />
+  <!-- Images are downsized intentionally to improve quality on retina displays -->
+</p>
+
+However, the same restrictions that enable these substantial performance gains also make it
+difficult to express many of the important stages in a typical graph-analytics pipeline:
+constructing the graph, modifying its structure, or expressing computation that spans multiple
+graphs.  Furthermore, how we look at data depends on our objectives and the same raw data may have
+many different table and graph views.
+
+<p style="text-align: center;">
+  <img src="img/tables_and_graphs.png"
+       title="Tables and Graphs"
+       alt="Tables and Graphs"
+       width="50%" />
+  <!-- Images are downsized intentionally to improve quality on retina displays -->
+</p>
+
+As a consequence, it is often necessary to be able to move between table and graph views of the same
+physical data and to leverage the properties of each view to easily and efficiently express
+computation.  However, existing graph analytics pipelines must compose graph-parallel and data-
+parallel systems, leading to extensive data movement and duplication and a complicated programming
+model.
+
+<p style="text-align: center;">
+  <img src="img/graph_analytics_pipeline.png"
+       title="Graph Analytics Pipeline"
+       alt="Graph Analytics Pipeline"
+       width="50%" />
+  <!-- Images are downsized intentionally to improve quality on retina displays -->
+</p>
+
+The goal of the GraphX project is to unify graph-parallel and data-parallel computation in one
+system with a single composable API. The GraphX API enables users to view data both as a graph and
+as collections (i.e., RDDs) without data movement or duplication. By incorporating recent advances
+in graph-parallel systems, GraphX is able to optimize the execution of graph operations.
+
+## GraphX Replaces the Spark Bagel API
+
+Prior to the release of GraphX, graph computation in Spark was expressed using Bagel, an
+implementation of Pregel.  GraphX improves upon Bagel by exposing a richer property graph API, a
+more streamlined version of the Pregel abstraction, and system optimizations to improve performance
+and reduce memory overhead.  While we plan to eventually deprecate Bagel, we will continue to
+support the [Bagel API](api/bagel/index.html#org.apache.spark.bagel.package) and
+[Bagel programming guide](bagel-programming-guide.html). However, we encourage Bagel users to
+explore the new GraphX API and comment on issues that may complicate the transition from Bagel.
+
+# Getting Started
+
+To get started you first need to import Spark and GraphX into your project, as follows:
+
+{% highlight scala %}
+import org.apache.spark._
+import org.apache.spark.graphx._
+// To make some of the examples work we will also need RDD
+import org.apache.spark.rdd.RDD
+{% endhighlight %}
+
+If you are not using the Spark shell you will also need a `SparkContext`.  To learn more about
+getting started with Spark refer to the [Spark Quick Start Guide](quick-start.html).
+
+# The Property Graph
+<a name="property_graph"></a>
+
+The [property graph](api/graphx/index.html#org.apache.spark.graphx.Graph) is a directed multigraph
+with user defined objects attached to each vertex and edge.  A directed multigraph is a directed
+graph with potentially multiple parallel edges sharing the same source and destination vertex.  The
+ability to support parallel edges simplifies modeling scenarios where there can be multiple
+relationships (e.g., co-worker and friend) between the same vertices.  Each vertex is keyed by a
+*unique* 64-bit long identifier (`VertexID`).  GraphX does not impose any ordering constraints on
+the vertex identifiers.  Similarly, edges have corresponding source and destination vertex
+identifiers.
+
+The property graph is parameterized over the vertex (`VD`) and edge (`ED`) types.  These
+are the types of the objects associated with each vertex and edge respectively.
+
+> GraphX optimizes the representation of vertex and edge types when they are plain old data-types
+> (e.g., int, double, etc...) reducing the in memory footprint by storing them in specialized
+> arrays.
+
+In some cases it may be desirable to have vertices with different property types in the same graph.
+This can be accomplished through inheritance.  For example to model users and products as a
+bipartite graph we might do the following:
+
+{% highlight scala %}
+class VertexProperty()
+case class UserProperty(val name: String) extends VertexProperty
+case class ProductProperty(val name: String, val price: Double) extends VertexProperty
+// The graph might then have the type:
+var graph: Graph[VertexProperty, String] = null
+{% endhighlight %}
+
+Like RDDs, property graphs are immutable, distributed, and fault-tolerant.  Changes to the values or
+structure of the graph are accomplished by producing a new graph with the desired changes.  Note
+that substantial parts of the original graph (i.e., unaffected structure, attributes, and indicies)
+are reused in the new graph reducing the cost of this inherently functional data-structure.  The
+graph is partitioned across the workers using a range of vertex-partitioning heuristics.  As with
+RDDs, each partition of the graph can be recreated on a different machine in the event of a failure.
+
+Logically the property graph corresponds to a pair of typed collections (RDDs) encoding the
+properties for each vertex and edge.  As a consequence, the graph class contains members to access
+the vertices and edges of the graph:
+
+{% highlight scala %}
+class Graph[VD, ED] {
+  val vertices: VertexRDD[VD]
+  val edges: EdgeRDD[ED]
+}
+{% endhighlight %}
+
+The classes `VertexRDD[VD]` and `EdgeRDD[ED]` extend and are optimized versions of `RDD[(VertexID,
+VD)]` and `RDD[Edge[ED]]` respectively.  Both `VertexRDD[VD]` and `EdgeRDD[ED]` provide  additional
+functionality built around graph computation and leverage internal optimizations.  We discuss the
+`VertexRDD` and `EdgeRDD` API in greater detail in the section on [vertex and edge
+RDDs](#vertex_and_edge_rdds) but for now they can be thought of as simply RDDs of the form:
+`RDD[(VertexID, VD)]` and `RDD[Edge[ED]]`.
+
+### Example Property Graph
+
+Suppose we want to construct a property graph consisting of the various collaborators on the GraphX
+project. The vertex property might contain the username and occupation.  We could annotate edges
+with a string describing the relationships between collaborators:
+
+<p style="text-align: center;">
+  <img src="img/property_graph.png"
+       title="The Property Graph"
+       alt="The Property Graph"
+       width="50%" />
+  <!-- Images are downsized intentionally to improve quality on retina displays -->
+</p>
+
+The resulting graph would have the type signature:
+
+{% highlight scala %}
+val userGraph: Graph[(String, String), String]
+{% endhighlight %}
+
+There are numerous ways to construct a property graph from raw files, RDDs, and even synthetic
+generators and these are discussed in more detail in the section on
+[graph builders](#graph_builders).  Probably the most general method is to use the
+[Graph object](api/graphx/index.html#org.apache.spark.graphx.Graph$).  For example the following
+code constructs a graph from a collection of RDDs:
+
+{% highlight scala %}
+// Assume the SparkContext has already been constructed
+val sc: SparkContext
+// Create an RDD for the vertices
+val users: RDD[(VertexId, (String, String))] =
+  sc.parallelize(Array((3L, ("rxin", "student")), (7L, ("jgonzal", "postdoc")),
+                       (5L, ("franklin", "prof")), (2L, ("istoica", "prof"))))
+// Create an RDD for edges
+val relationships: RDD[Edge[String]] =
+  sc.parallelize(Array(Edge(3L, 7L, "collab"),    Edge(5L, 3L, "advisor"),
+                       Edge(2L, 5L, "colleague"), Edge(5L, 7L, "pi")))
+// Define a default user in case there are relationship with missing user
+val defaultUser = ("John Doe", "Missing")
+// Build the initial Graph
+val graph = Graph(users, relationships, defaultUser)
+{% endhighlight %}
+
+In the above example we make use of the [`Edge`][Edge] case class. Edges have a `srcId` and a
+`dstId` corresponding to the source and destination vertex identifiers. In addition, the `Edge`
+class has an `attr` member which stores the edge property.
+
+[Edge]: api/graphx/index.html#org.apache.spark.graphx.Edge
+
+We can deconstruct a graph into the respective vertex and edge views by using the `graph.vertices`
+and `graph.edges` members respectively.
+
+{% highlight scala %}
+val graph: Graph[(String, String), String] // Constructed from above
+// Count all users which are postdocs
+graph.vertices.filter { case (id, (name, pos)) => pos == "postdoc" }.count
+// Count all the edges where src > dst
+graph.edges.filter(e => e.srcId > e.dstId).count
+{% endhighlight %}
+
+> Note that `graph.vertices` returns an `VertexRDD[(String, String)]` which extends
+> `RDD[(VertexID, (String, String))]` and so we use the scala `case` expression to deconstruct the
+> tuple.  On the other hand, `graph.edges` returns an `EdgeRDD` containing `Edge[String]` objects.
+> We could have also used the case class type constructor as in the following:
+> {% highlight scala %}
+graph.edges.filter { case Edge(src, dst, prop) => src > dst }.count
+{% endhighlight %}
+
+In addition to the vertex and edge views of the property graph, GraphX also exposes a triplet view.
+The triplet view logically joins the vertex and edge properties yielding an
+`RDD[EdgeTriplet[VD, ED]]` containing instances of the [`EdgeTriplet`][EdgeTriplet] class. This
+*join* can be expressed in the following SQL expression:
+
+[EdgeTriplet]: api/graphx/index.html#org.apache.spark.graphx.EdgeTriplet
+
+{% highlight sql %}
+SELECT src.id, dst.id, src.attr, e.attr, dst.attr
+FROM edges AS e LEFT JOIN vertices AS src, vertices AS dst
+ON e.srcId = src.Id AND e.dstId = dst.Id
+{% endhighlight %}
+
+or graphically as:
+
+<p style="text-align: center;">
+  <img src="img/triplet.png"
+       title="Edge Triplet"
+       alt="Edge Triplet"
+       width="50%" />
+  <!-- Images are downsized intentionally to improve quality on retina displays -->
+</p>
+
+The [`EdgeTriplet`][EdgeTriplet] class extends the [`Edge`][Edge] class by adding the `srcAttr` and
+`dstAttr` members which contain the source and destination properties respectively. We can use the
+triplet view of a graph to render a collection of strings describing relationships between users.
+
+{% highlight scala %}
+val graph: Graph[(String, String), String] // Constructed from above
+// Use the triplets view to create an RDD of facts.
+val facts: RDD[String] =
+  graph.triplets.map(triplet =>
+    triplet.srcAttr._1 + " is the " + triplet.attr + " of " + triplet.dstAttr._1)
+facts.collect.foreach(println(_))
+{% endhighlight %}
+
+# Graph Operators
+
+Just as RDDs have basic operations like `map`, `filter`, and `reduceByKey`, property graphs also
+have a collection of basic operators that take user defined functions and produce new graphs with
+transformed properties and structure.  The core operators that have optimized implementations are
+defined in [`Graph`][Graph] and convenient operators that are expressed as a compositions of the
+core operators are defined in [`GraphOps`][GraphOps].  However, thanks to Scala implicits the
+operators in `GraphOps` are automatically available as members of `Graph`.  For example, we can
+compute the in-degree of each vertex (defined in `GraphOps`) by the following:
+
+[Graph]: api/graphx/index.html#org.apache.spark.graphx.Graph
+[GraphOps]: api/graphx/index.html#org.apache.spark.graphx.GraphOps
+
+{% highlight scala %}
+val graph: Graph[(String, String), String]
+// Use the implicit GraphOps.inDegrees operator
+val inDegrees: VertexRDD[Int] = graph.inDegrees
+{% endhighlight %}
+
+The reason for differentiating between core graph operations and [`GraphOps`][GraphOps] is to be
+able to support different graph representations in the future.  Each graph representation must
+provide implementations of the core operations and reuse many of the useful operations defined in
+[`GraphOps`][GraphOps].
+
+### Summary List of Operators
+The following is a quick summary of the functionality defined in both [`Graph`][Graph] and
+[`GraphOps`][GraphOps] but presented as members of Graph for simplicity. Note that some function
+signatures have been simplified (e.g., default arguments and type constraints removed) and some more
+advanced functionality has been removed so please consult the API docs for the official list of
+operations.
+
+{% highlight scala %}
+/** Summary of the functionality in the property graph */
+class Graph[VD, ED] {
+  // Information about the Graph ===================================================================
+  val numEdges: Long
+  val numVertices: Long
+  val inDegrees: VertexRDD[Int]
+  val outDegrees: VertexRDD[Int]
+  val degrees: VertexRDD[Int]
+  // Views of the graph as collections =============================================================
+  val vertices: VertexRDD[VD]
+  val edges: EdgeRDD[ED]
+  val triplets: RDD[EdgeTriplet[VD, ED]]
+  // Functions for caching graphs ==================================================================
+  def persist(newLevel: StorageLevel = StorageLevel.MEMORY_ONLY): Graph[VD, ED]
+  def cache(): Graph[VD, ED]
+  def unpersistVertices(blocking: Boolean = true): Graph[VD, ED]
+  // Change the partitioning heuristic  ============================================================
+  def partitionBy(partitionStrategy: PartitionStrategy): Graph[VD, ED]
+  // Transform vertex and edge attributes ==========================================================
+  def mapVertices[VD2](map: (VertexID, VD) => VD2): Graph[VD2, ED]
+  def mapEdges[ED2](map: Edge[ED] => ED2): Graph[VD, ED2]
+  def mapEdges[ED2](map: (PartitionID, Iterator[Edge[ED]]) => Iterator[ED2]): Graph[VD, ED2]
+  def mapTriplets[ED2](map: EdgeTriplet[VD, ED] => ED2): Graph[VD, ED2]
+  def mapTriplets[ED2](map: (PartitionID, Iterator[EdgeTriplet[VD, ED]]) => Iterator[ED2])
+    : Graph[VD, ED2]
+  // Modify the graph structure ====================================================================
+  def reverse: Graph[VD, ED]
+  def subgraph(
+      epred: EdgeTriplet[VD,ED] => Boolean = (x => true),
+      vpred: (VertexID, VD) => Boolean = ((v, d) => true))
+    : Graph[VD, ED]
+  def mask[VD2, ED2](other: Graph[VD2, ED2]): Graph[VD, ED]
+  def groupEdges(merge: (ED, ED) => ED): Graph[VD, ED]
+  // Join RDDs with the graph ======================================================================
+  def joinVertices[U](table: RDD[(VertexID, U)])(mapFunc: (VertexID, VD, U) => VD): Graph[VD, ED]
+  def outerJoinVertices[U, VD2](other: RDD[(VertexID, U)])
+      (mapFunc: (VertexID, VD, Option[U]) => VD2)
+    : Graph[VD2, ED]
+  // Aggregate information about adjacent triplets =================================================
+  def collectNeighborIds(edgeDirection: EdgeDirection): VertexRDD[Array[VertexID]]
+  def collectNeighbors(edgeDirection: EdgeDirection): VertexRDD[Array[(VertexID, VD)]]
+  def mapReduceTriplets[A: ClassTag](
+      mapFunc: EdgeTriplet[VD, ED] => Iterator[(VertexID, A)],
+      reduceFunc: (A, A) => A,
+      activeSetOpt: Option[(VertexRDD[_], EdgeDirection)] = None)
+    : VertexRDD[A]
+  // Iterative graph-parallel computation ==========================================================
+  def pregel[A](initialMsg: A, maxIterations: Int, activeDirection: EdgeDirection)(
+      vprog: (VertexID, VD, A) => VD,
+      sendMsg: EdgeTriplet[VD, ED] => Iterator[(VertexID,A)],
+      mergeMsg: (A, A) => A)
+    : Graph[VD, ED]
+  // Basic graph algorithms ========================================================================
+  def pageRank(tol: Double, resetProb: Double = 0.15): Graph[Double, Double]
+  def connectedComponents(): Graph[VertexID, ED]
+  def triangleCount(): Graph[Int, ED]
+  def stronglyConnectedComponents(numIter: Int): Graph[VertexID, ED]
+}
+{% endhighlight %}
+
+
+## Property Operators
+
+In direct analogy to the RDD `map` operator, the property
+graph contains the following:
+
+{% highlight scala %}
+class Graph[VD, ED] {
+  def mapVertices[VD2](map: (VertexId, VD) => VD2): Graph[VD2, ED]
+  def mapEdges[ED2](map: Edge[ED] => ED2): Graph[VD, ED2]
+  def mapTriplets[ED2](map: EdgeTriplet[VD, ED] => ED2): Graph[VD, ED2]
+}
+{% endhighlight %}
+
+Each of these operators yields a new graph with the vertex or edge properties modified by the user
+defined `map` function.
+
+> Note that in all cases the graph structure is unaffected. This is a key feature of these operators
+> which allows the resulting graph to reuse the structural indices of the original graph. The
+> following snippets are logically equivalent, but the first one does not preserve the structural
+> indices and would not benefit from the GraphX system optimizations:
+> {% highlight scala %}
+val newVertices = graph.vertices.map { case (id, attr) => (id, mapUdf(id, attr)) }
+val newGraph = Graph(newVertices, graph.edges)
+{% endhighlight %}
+> Instead, use [`mapVertices`][Graph.mapVertices] to preserve the indices:
+> {% highlight scala %}
+val newGraph = graph.mapVertices((id, attr) => mapUdf(id, attr))
+{% endhighlight %}
+
+[Graph.mapVertices]: api/graphx/index.html#org.apache.spark.graphx.Graph@mapVertices[VD2]((VertexId,VD)⇒VD2)(ClassTag[VD2]):Graph[VD2,ED]
+
+These operators are often used to initialize the graph for a particular computation or project away
+unnecessary properties.  For example, given a graph with the out-degrees as the vertex properties
+(we describe how to construct such a graph later), we initialize it for PageRank:
+
+{% highlight scala %}
+// Given a graph where the vertex property is the out-degree
+val inputGraph: Graph[Int, String] =
+  graph.outerJoinVertices(graph.outDegrees)((vid, _, degOpt) => degOpt.getOrElse(0))
+// Construct a graph where each edge contains the weight
+// and each vertex is the initial PageRank
+val outputGraph: Graph[Double, Double] =
+  inputGraph.mapTriplets(triplet => 1.0 / triplet.srcAttr).mapVertices((id, _) => 1.0)
+{% endhighlight %}
+
+## Structural Operators
+<a name="structural_operators"></a>
+
+Currently GraphX supports only a simple set of commonly used structural operators and we expect to
+add more in the future.  The following is a list of the basic structural operators.
+
+{% highlight scala %}
+class Graph[VD, ED] {
+  def reverse: Graph[VD, ED]
+  def subgraph(epred: EdgeTriplet[VD,ED] => Boolean,
+               vpred: (VertexId, VD) => Boolean): Graph[VD, ED]
+  def mask[VD2, ED2](other: Graph[VD2, ED2]): Graph[VD, ED]
+  def groupEdges(merge: (ED, ED) => ED): Graph[VD,ED]
+}
+{% endhighlight %}
+
+The [`reverse`][Graph.reverse] operator returns a new graph with all the edge directions reversed.
+This can be useful when, for example, trying to compute the inverse PageRank.  Because the reverse
+operation does not modify vertex or edge properties or change the number of edges, it can be
+implemented efficiently without data-movement or duplication.
+
+[Graph.reverse]: api/graphx/index.html#org.apache.spark.graphx.Graph@reverse:Graph[VD,ED]
+
+The [`subgraph`][Graph.subgraph] operator takes vertex and edge predicates and returns the graph
+containing only the vertices that satisfy the vertex predicate (evaluate to true) and edges that
+satisfy the edge predicate *and connect vertices that satisfy the vertex predicate*.  The `subgraph`
+operator can be used in number of situations to restrict the graph to the vertices and edges of
+interest or eliminate broken links. For example in the following code we remove broken links:
+
+[Graph.subgraph]: api/graphx/index.html#org.apache.spark.graphx.Graph@subgraph((EdgeTriplet[VD,ED])⇒Boolean,(VertexId,VD)⇒Boolean):Graph[VD,ED]
+
+{% highlight scala %}
+// Create an RDD for the vertices
+val users: RDD[(VertexId, (String, String))] =
+  sc.parallelize(Array((3L, ("rxin", "student")), (7L, ("jgonzal", "postdoc")),
+                       (5L, ("franklin", "prof")), (2L, ("istoica", "prof")),
+                       (4L, ("peter", "student"))))
+// Create an RDD for edges
+val relationships: RDD[Edge[String]] =
+  sc.parallelize(Array(Edge(3L, 7L, "collab"),    Edge(5L, 3L, "advisor"),
+                       Edge(2L, 5L, "colleague"), Edge(5L, 7L, "pi"),
+                       Edge(4L, 0L, "student"),   Edge(5L, 0L, "colleague")))
+// Define a default user in case there are relationship with missing user
+val defaultUser = ("John Doe", "Missing")
+// Build the initial Graph
+val graph = Graph(users, relationships, defaultUser)
+// Notice that there is a user 0 (for which we have no information) connected to users
+// 4 (peter) and 5 (franklin).
+graph.triplets.map(
+    triplet => triplet.srcAttr._1 + " is the " + triplet.attr + " of " + triplet.dstAttr._1
+  ).collect.foreach(println(_))
+// Remove missing vertices as well as the edges to connected to them
+val validGraph = graph.subgraph(vpred = (id, attr) => attr._2 != "Missing")
+// The valid subgraph will disconnect users 4 and 5 by removing user 0
+validGraph.vertices.collect.foreach(println(_))
+validGraph.triplets.map(
+    triplet => triplet.srcAttr._1 + " is the " + triplet.attr + " of " + triplet.dstAttr._1
+  ).collect.foreach(println(_))
+{% endhighlight %}
+
+> Note in the above example only the vertex predicate is provided.  The `subgraph` operator defaults
+> to `true` if the vertex or edge predicates are not provided.
+
+The [`mask`][Graph.mask] operator also constructs a subgraph by returning a graph that contains the
+vertices and edges that are also found in the input graph.  This can be used in conjunction with the
+`subgraph` operator to restrict a graph based on the properties in another related graph.  For
+example, we might run connected components using the graph with missing vertices and then restrict
+the answer to the valid subgraph.
+
+[Graph.mask]: api/graphx/index.html#org.apache.spark.graphx.Graph@mask[VD2,ED2](Graph[VD2,ED2])(ClassTag[VD2],ClassTag[ED2]):Graph[VD,ED]
+
+{% highlight scala %}
+// Run Connected Components
+val ccGraph = graph.connectedComponents() // No longer contains missing field
+// Remove missing vertices as well as the edges to connected to them
+val validGraph = graph.subgraph(vpred = (id, attr) => attr._2 != "Missing")
+// Restrict the answer to the valid subgraph
+val validCCGraph = ccGraph.mask(validGraph)
+{% endhighlight %}
+
+The [`groupEdges`][Graph.groupEdges] operator merges parallel edges (i.e., duplicate edges between
+pairs of vertices) in the multigraph.  In many numerical applications, parallel edges can be *added*
+(their weights combined) into a single edge thereby reducing the size of the graph.
+
+[Graph.groupEdges]: api/graphx/index.html#org.apache.spark.graphx.Graph@groupEdges((ED,ED)⇒ED):Graph[VD,ED]
+
+## Join Operators
+<a name="join_operators"></a>
+
+In many cases it is necessary to join data from external collections (RDDs) with graphs.  For
+example, we might have extra user properties that we want to merge with an existing graph or we
+might want to pull vertex properties from one graph into another.  These tasks can be accomplished
+using the *join* operators. Below we list the key join operators:
+
+{% highlight scala %}
+class Graph[VD, ED] {
+  def joinVertices[U](table: RDD[(VertexId, U)])(map: (VertexId, VD, U) => VD)
+    : Graph[VD, ED]
+  def outerJoinVertices[U, VD2](table: RDD[(VertexId, U)])(map: (VertexId, VD, Option[U]) => VD2)
+    : Graph[VD2, ED]
+}
+{% endhighlight %}
+
+The [`joinVertices`][GraphOps.joinVertices] operator joins the vertices with the input RDD and
+returns a new graph with the vertex properties obtained by applying the user defined `map` function
+to the result of the joined vertices.  Vertices without a matching value in the RDD retain their
+original value.
+
+[GraphOps.joinVertices]: api/graphx/index.html#org.apache.spark.graphx.GraphOps@joinVertices[U](RDD[(VertexId,U)])((VertexId,VD,U)⇒VD)(ClassTag[U]):Graph[VD,ED]
+
+> Note that if the RDD contains more than one value for a given vertex only one will be used.   It
+> is therefore recommended that the input RDD be first made unique using the following which will
+> also *pre-index* the resulting values to substantially accelerate the subsequent join.
+> {% highlight scala %}
+val nonUniqueCosts: RDD[(VertexID, Double)]
+val uniqueCosts: VertexRDD[Double] =
+  graph.vertices.aggregateUsingIndex(nonUnique, (a,b) => a + b)
+val joinedGraph = graph.joinVertices(uniqueCosts)(
+  (id, oldCost, extraCost) => oldCost + extraCost)
+{% endhighlight %}
+
+The more general [`outerJoinVertices`][Graph.outerJoinVertices] behaves similarly to `joinVertices`
+except that the user defined `map` function is applied to all vertices and can change the vertex
+property type.  Because not all vertices may have a matching value in the input RDD the `map`
+function takes an `Option` type.  For example, we can setup a graph for PageRank by initializing
+vertex properties with their `outDegree`.
+
+[Graph.outerJoinVertices]: api/graphx/index.html#org.apache.spark.graphx.Graph@outerJoinVertices[U,VD2](RDD[(VertexId,U)])((VertexId,VD,Option[U])⇒VD2)(ClassTag[U],ClassTag[VD2]):Graph[VD2,ED]
+
+
+{% highlight scala %}
+val outDegrees: VertexRDD[Int] = graph.outDegrees
+val degreeGraph = graph.outerJoinVertices(outDegrees) { (id, oldAttr, outDegOpt) =>
+  outDegOpt match {
+    case Some(outDeg) => outDeg
+    case None => 0 // No outDegree means zero outDegree
+  }
+}
+{% endhighlight %}
+
+> You may have noticed the multiple parameter lists (e.g., `f(a)(b)`) curried function pattern used
+> in the above examples.  While we could have equally written `f(a)(b)` as `f(a,b)` this would mean
+> that type inference on `b` would not depend on `a`.  As a consequence, the user would need to
+> provide type annotation for the user defined function:
+> {% highlight scala %}
+val joinedGraph = graph.joinVertices(uniqueCosts,
+  (id: VertexID, oldCost: Double, extraCost: Double) => oldCost + extraCost)
+{% endhighlight %}
+
+
+## Neighborhood Aggregation
+
+A key part of graph computation is aggregating information about the neighborhood of each vertex.
+For example we might want to know the number of followers each user has or the average age of the
+the followers of each user.  Many iterative graph algorithms (e.g., PageRank, Shortest Path, and
+connected components) repeatedly aggregate properties of neighboring vertices (e.g., current
+PageRank Value, shortest path to the source, and smallest reachable vertex id).
+
+### Map Reduce Triplets (mapReduceTriplets)
+<a name="mrTriplets"></a>
+
+[Graph.mapReduceTriplets]: api/graphx/index.html#org.apache.spark.graphx.Graph@mapReduceTriplets[A](mapFunc:org.apache.spark.graphx.EdgeTriplet[VD,ED]=&gt;Iterator[(org.apache.spark.graphx.VertexId,A)],reduceFunc:(A,A)=&gt;A,activeSetOpt:Option[(org.apache.spark.graphx.VertexRDD[_],org.apache.spark.graphx.EdgeDirection)])(implicitevidence$10:scala.reflect.ClassTag[A]):org.apache.spark.graphx.VertexRDD[A]
+
+The core (heavily optimized) aggregation primitive in GraphX is the
+[`mapReduceTriplets`][Graph.mapReduceTriplets] operator:
+
+{% highlight scala %}
+class Graph[VD, ED] {
+  def mapReduceTriplets[A](
+      map: EdgeTriplet[VD, ED] => Iterator[(VertexId, A)],
+      reduce: (A, A) => A)
+    : VertexRDD[A]
+}
+{% endhighlight %}
+
+The [`mapReduceTriplets`][Graph.mapReduceTriplets] operator takes a user defined map function which
+is applied to each triplet and can yield *messages* destined to either (none or both) vertices in
+the triplet.  To facilitate optimized pre-aggregation, we currently only support messages destined
+to the source or destination vertex of the triplet.  The user defined `reduce` function combines the
+messages destined to each vertex.  The `mapReduceTriplets` operator returns a `VertexRDD[A]`
+containing the aggregate message (of type `A`) destined to each vertex.  Vertices that do not
+receive a message are not included in the returned `VertexRDD`.
+
+<blockquote>
+
+<p>Note that <code>mapReduceTriplets</code> takes an additional optional <code>activeSet</code>
+(not shown above see API docs for details) which restricts the map phase to edges adjacent to the
+vertices in the provided <code>VertexRDD</code>: </p>
+
+{% highlight scala %}
+  activeSetOpt: Option[(VertexRDD[_], EdgeDirection)] = None
+{% endhighlight %}
+
+<p>The EdgeDirection specifies which edges adjacent to the vertex set are included in the map
+phase. If the direction is <code>In</code>, then the user defined <code>map</code> function will
+only be run only on edges with the destination vertex in the active set. If the direction is
+<code>Out</code>, then the <code>map</code> function will only be run only on edges originating from
+vertices in the active set.  If the direction is <code>Either</code>, then the <code>map</code>
+function will be run only on edges with <i>either</i> vertex in the active set.  If the direction is
+<code>Both</code>, then the <code>map</code> function will be run only on edges with both vertices
+in the active set.  The active set must be derived from the set of vertices in the graph.
+Restricting computation to triplets adjacent to a subset of the vertices is often necessary in
+incremental iterative computation and is a key part of the GraphX implementation of Pregel. </p>
+
+</blockquote>
+
+In the following example we use the `mapReduceTriplets` operator to compute the average age of the
+more senior followers of each user.
+
+{% highlight scala %}
+// Import random graph generation library
+import org.apache.spark.graphx.util.GraphGenerators
+// Create a graph with "age" as the vertex property.  Here we use a random graph for simplicity.
+val graph: Graph[Double, Int] =
+  GraphGenerators.logNormalGraph(sc, numVertices = 100).mapVertices( (id, _) => id.toDouble )
+// Compute the number of older followers and their total age
+val olderFollowers: VertexRDD[(Int, Double)] = graph.mapReduceTriplets[(Int, Double)](
+  triplet => { // Map Function
+    if (triplet.srcAttr > triplet.dstAttr) {
+      // Send message to destination vertex containing counter and age
+      Iterator((triplet.dstId, (1, triplet.srcAttr)))
+    } else {
+      // Don't send a message for this triplet
+      Iterator.empty
+    }
+  },
+  // Add counter and age
+  (a, b) => (a._1 + b._1, a._2 + b._2) // Reduce Function
+)
+// Divide total age by number of older followers to get average age of older followers
+val avgAgeOfOlderFollowers: VertexRDD[Double] =
+  olderFollowers.mapValues( (id, value) => value match { case (count, totalAge) => totalAge / count } )
+// Display the results
+avgAgeOfOlderFollowers.collect.foreach(println(_))
+{% endhighlight %}
+
+> Note that the `mapReduceTriplets` operation performs optimally when the messages (and the sums of
+> messages) are constant sized (e.g., floats and addition instead of lists and concatenation).  More
+> precisely, the result of `mapReduceTriplets` should ideally be sub-linear in the degree of each
+> vertex.
+
+### Computing Degree Information
+
+A common aggregation task is computing the degree of each vertex: the number of edges adjacent to
+each vertex.  In the context of directed graphs it often necessary to know the in-degree, out-
+degree, and the total degree of each vertex.  The  [`GraphOps`][GraphOps] class contains a
+collection of operators to compute the degrees of each vertex.  For example in the following we
+compute the max in, out, and total degrees:
+
+{% highlight scala %}
+// Define a reduce operation to compute the highest degree vertex
+def max(a: (VertexId, Int), b: (VertexId, Int)): (VertexId, Int) = {
+  if (a._2 > b._2) a else b
+}
+// Compute the max degrees
+val maxInDegree: (VertexId, Int)  = graph.inDegrees.reduce(max)
+val maxOutDegree: (VertexId, Int) = graph.outDegrees.reduce(max)
+val maxDegrees: (VertexId, Int)   = graph.degrees.reduce(max)
+{% endhighlight %}
+
+### Collecting Neighbors
+
+In some cases it may be easier to express computation by collecting neighboring vertices and their
+attributes at each vertex. This can be easily accomplished using the
+[`collectNeighborIds`][GraphOps.collectNeighborIds] and the
+[`collectNeighbors`][GraphOps.collectNeighbors] operators.
+
+[GraphOps.collectNeighborIds]: api/graphx/index.html#org.apache.spark.graphx.GraphOps@collectNeighborIds(EdgeDirection):VertexRDD[Array[VertexId]]
+[GraphOps.collectNeighbors]: api/graphx/index.html#org.apache.spark.graphx.GraphOps@collectNeighbors(EdgeDirection):VertexRDD[Array[(VertexId,VD)]]
+
+
+{% highlight scala %}
+class GraphOps[VD, ED] {
+  def collectNeighborIds(edgeDirection: EdgeDirection): VertexRDD[Array[VertexId]]
+  def collectNeighbors(edgeDirection: EdgeDirection): VertexRDD[ Array[(VertexId, VD)] ]
+}
+{% endhighlight %}
+
+> Note that these operators can be quite costly as they duplicate information and require
+> substantial communication.  If possible try expressing the same computation using the
+> `mapReduceTriplets` operator directly.
+
+## Caching and Uncaching
+
+In Spark, RDDs are not persisted in memory by default. To avoid recomputation, they must be explicitly cached when using them multiple times (see the [Spark Programming Guide][RDD Persistence]). Graphs in GraphX behave the same way. **When using a graph multiple times, make sure to call [`Graph.cache()`][Graph.cache] on it first.**
+
+[RDD Persistence]: scala-programming-guide.html#rdd-persistence
+[Graph.cache]: api/graphx/index.html#org.apache.spark.graphx.Graph@cache():Graph[VD,ED]
+
+In iterative computations, *uncaching* may also be necessary for best performance. By default, cached RDDs and graphs will remain in memory until memory pressure forces them to be evicted in LRU order. For iterative computation, intermediate results from previous iterations will fill up the cache. Though they will eventually be evicted, the unnecessary data stored in memory will slow down garbage collection. It would be more efficient to uncache intermediate results as soon as they are no longer necessary. This involves materializing (caching and forcing) a graph or RDD every iteration, uncaching all other datasets, and only using the materialized dataset in future iterations. However, because graphs are composed of multiple RDDs, it can be difficult to unpersist them correctly. **For iterative computation we recommend using the Pregel API, which correctly unpersists intermediate results.**
+
+# Pregel API
+<a name="pregel"></a>
+
+Graphs are inherently recursive data-structures as properties of vertices depend on properties of
+their neighbors which in turn depend on properties of *their* neighbors.  As a
+consequence many important graph algorithms iteratively recompute the properties of each vertex
+until a fixed-point condition is reached.  A range of graph-parallel abstractions have been proposed
+to express these iterative algorithms.  GraphX exposes a Pregel-like operator which is a fusion of
+the widely used Pregel and GraphLab abstractions.
+
+At a high-level the Pregel operator in GraphX is a bulk-synchronous parallel messaging abstraction
+*constrained to the topology of the graph*.  The Pregel operator executes in a series of super-steps
+in which vertices receive the *sum* of their inbound messages from the previous super- step, compute
+a new value for the vertex property, and then send messages to neighboring vertices in the next
+super-step.  Unlike Pregel and instead more like GraphLab messages are computed in parallel as a
+function of the edge triplet and the message computation has access to both the source and
+destination vertex attributes.  Vertices that do not receive a message are skipped within a super-
+step.  The Pregel operators terminates iteration and returns the final graph when there are no
+messages remaining.
+
+> Note, unlike more standard Pregel implementations, vertices in GraphX can only send messages to
+> neighboring vertices and the message construction is done in parallel using a user defined
+> messaging function.  These constraints allow additional optimization within GraphX.
+
+The following is the type signature of the [Pregel operator][GraphOps.pregel] as well as a *sketch*
+of its implementation (note calls to graph.cache have been removed):
+
+[GraphOps.pregel]: api/graphx/index.html#org.apache.spark.graphx.GraphOps@pregel[A](A,Int,EdgeDirection)((VertexId,VD,A)⇒VD,(EdgeTriplet[VD,ED])⇒Iterator[(VertexId,A)],(A,A)⇒A)(ClassTag[A]):Graph[VD,ED]
+
+{% highlight scala %}
+class GraphOps[VD, ED] {
+  def pregel[A]
+      (initialMsg: A,
+       maxIter: Int = Int.MaxValue,
+       activeDir: EdgeDirection = EdgeDirection.Out)
+      (vprog: (VertexId, VD, A) => VD,
+       sendMsg: EdgeTriplet[VD, ED] => Iterator[(VertexId, A)],
+       mergeMsg: (A, A) => A)
+    : Graph[VD, ED] = {
+    // Receive the initial message at each vertex
+    var g = mapVertices( (vid, vdata) => vprog(vid, vdata, initialMsg) ).cache()
+    // compute the messages
+    var messages = g.mapReduceTriplets(sendMsg, mergeMsg)
+    var activeMessages = messages.count()
+    // Loop until no messages remain or maxIterations is achieved
+    var i = 0
+    while (activeMessages > 0 && i < maxIterations) {
+      // Receive the messages: -----------------------------------------------------------------------
+      // Run the vertex program on all vertices that receive messages
+      val newVerts = g.vertices.innerJoin(messages)(vprog).cache()
+      // Merge the new vertex values back into the graph
+      g = g.outerJoinVertices(newVerts) { (vid, old, newOpt) => newOpt.getOrElse(old) }.cache()
+      // Send Messages: ------------------------------------------------------------------------------
+      // Vertices that didn't receive a message above don't appear in newVerts and therefore don't
+      // get to send messages.  More precisely the map phase of mapReduceTriplets is only invoked
+      // on edges in the activeDir of vertices in newVerts
+      messages = g.mapReduceTriplets(sendMsg, mergeMsg, Some((newVerts, activeDir))).cache()
+      activeMessages = messages.count()
+      i += 1
+    }
+    g
+  }
+}
+{% endhighlight %}
+
+Notice that Pregel takes two argument lists (i.e., `graph.pregel(list1)(list2)`).  The first
+argument list contains configuration parameters including the initial message, the maximum number of
+iterations, and the edge direction in which to send messages (by default along out edges).  The
+second argument list contains the user defined functions for receiving messages (the vertex program
+`vprog`), computing messages (`sendMsg`), and combining messages `mergeMsg`.
+
+We can use the Pregel operator to express computation such as single source
+shortest path in the following example.
+
+{% highlight scala %}
+import org.apache.spark.graphx._
+// Import random graph generation library
+import org.apache.spark.graphx.util.GraphGenerators
+// A graph with edge attributes containing distances
+val graph: Graph[Int, Double] =
+  GraphGenerators.logNormalGraph(sc, numVertices = 100).mapEdges(e => e.attr.toDouble)
+val sourceId: VertexId = 42 // The ultimate source
+// Initialize the graph such that all vertices except the root have distance infinity.
+val initialGraph = graph.mapVertices((id, _) => if (id == sourceId) 0.0 else Double.PositiveInfinity)
+val sssp = initialGraph.pregel(Double.PositiveInfinity)(
+  (id, dist, newDist) => math.min(dist, newDist), // Vertex Program
+  triplet => {  // Send Message
+    if (triplet.srcAttr + triplet.attr < triplet.dstAttr) {
+      Iterator((triplet.dstId, triplet.srcAttr + triplet.attr))
+    } else {
+      Iterator.empty
+    }
+  },
+  (a,b) => math.min(a,b) // Merge Message
+  )
+println(sssp.vertices.collect.mkString("\n"))
+{% endhighlight %}
+
+# Graph Builders
+<a name="graph_builders"></a>
+
+GraphX provides several ways of building a graph from a collection of vertices and edges in an RDD or on disk. None of the graph builders repartitions the graph's edges by default; instead, edges are left in their default partitions (such as their original blocks in HDFS). [`Graph.groupEdges`][Graph.groupEdges] requires the graph to be repartitioned because it assumes identical edges will be colocated on the same partition, so you must call [`Graph.partitionBy`][Graph.partitionBy] before calling `groupEdges`.
+
+{% highlight scala %}
+object GraphLoader {
+  def edgeListFile(
+      sc: SparkContext,
+      path: String,
+      canonicalOrientation: Boolean = false,
+      minEdgePartitions: Int = 1)
+    : Graph[Int, Int]
+}
+{% endhighlight %}
+
+[`GraphLoader.edgeListFile`][GraphLoader.edgeListFile] provides a way to load a graph from a list of edges on disk. It parses an adjacency list of (source vertex ID, destination vertex ID) pairs of the following form, skipping comment lines that begin with `#`:
+
+~~~
+# This is a comment
+2 1
+4 1
+1 2
+~~~
+
+It creates a `Graph` from the specified edges, automatically creating any vertices mentioned by edges. All vertex and edge attributes default to 1. The `canonicalOrientation` argument allows reorienting edges in the positive direction (`srcId < dstId`), which is required by the [connected components][ConnectedComponents] algorithm. The `minEdgePartitions` argument specifies the minimum number of edge partitions to generate; there may be more edge partitions than specified if, for example, the HDFS file has more blocks.
+
+{% highlight scala %}
+object Graph {
+  def apply[VD, ED](
+      vertices: RDD[(VertexId, VD)],
+      edges: RDD[Edge[ED]],
+      defaultVertexAttr: VD = null)
+    : Graph[VD, ED]
+
+  def fromEdges[VD, ED](
+      edges: RDD[Edge[ED]],
+      defaultValue: VD): Graph[VD, ED]
+
+  def fromEdgeTuples[VD](
+      rawEdges: RDD[(VertexId, VertexId)],
+      defaultValue: VD,
+      uniqueEdges: Option[PartitionStrategy] = None): Graph[VD, Int]
+
+}
+{% endhighlight %}
+
+[`Graph.apply`][Graph.apply] allows creating a graph from RDDs of vertices and edges. Duplicate vertices are picked arbitrarily and vertices found in the edge RDD but not the vertex RDD are assigned the default attribute.
+
+[`Graph.fromEdges`][Graph.fromEdges] allows creating a graph from only an RDD of edges, automatically creating any vertices mentioned by edges and assigning them the default value.
+
+[`Graph.fromEdgeTuples`][Graph.fromEdgeTuples] allows creating a graph from only an RDD of edge tuples, assigning the edges the value 1, and automatically creating any vertices mentioned by edges and assigning them the default value. It also supports deduplicating the edges; to deduplicate, pass `Some` of a [`PartitionStrategy`][PartitionStrategy] as the `uniqueEdges` parameter (for example, `uniqueEdges = Some(PartitionStrategy.RandomVertexCut)`). A partition strategy is necessary to colocate identical edges on the same partition so they can be deduplicated.
+
+[PartitionStrategy]: api/graphx/index.html#org.apache.spark.graphx.PartitionStrategy$
+
+[GraphLoader.edgeListFile]: api/graphx/index.html#org.apache.spark.graphx.GraphLoader$@edgeListFile(SparkContext,String,Boolean,Int):Graph[Int,Int]
+[Graph.apply]: api/graphx/index.html#org.apache.spark.graphx.Graph$@apply[VD,ED](RDD[(VertexId,VD)],RDD[Edge[ED]],VD)(ClassTag[VD],ClassTag[ED]):Graph[VD,ED]
+[Graph.fromEdgeTuples]: api/graphx/index.html#org.apache.spark.graphx.Graph$@fromEdgeTuples[VD](RDD[(VertexId,VertexId)],VD,Option[PartitionStrategy])(ClassTag[VD]):Graph[VD,Int]
+[Graph.fromEdges]: api/graphx/index.html#org.apache.spark.graphx.Graph$@fromEdges[VD,ED](RDD[Edge[ED]],VD)(ClassTag[VD],ClassTag[ED]):Graph[VD,ED]
+
+# Vertex and Edge RDDs
+<a name="vertex_and_edge_rdds"></a>
+
+GraphX exposes `RDD` views of the vertices and edges stored within the graph.  However, because
+GraphX maintains the vertices and edges in optimized data-structures and these data-structures
+provide additional functionality, the vertices and edges are returned as `VertexRDD` and `EdgeRDD`
+respectively.  In this section we review some of the additional useful functionality in these types.
+
+## VertexRDDs
+
+The `VertexRDD[A]` extends `RDD[(VertexID, A)]` and adds the additional constraint that each
+`VertexID` occurs only *once*.  Moreover, `VertexRDD[A]` represents a *set* of vertices each with an
+attribute of type `A`.  Internally, this is achieved by storing the vertex attributes in a reusable
+hash-map data-structure.  As a consequence if two `VertexRDD`s are derived from the same base
+`VertexRDD` (e.g., by `filter` or `mapValues`) they can be joined in constant time without hash
+evaluations. To leverage this indexed data-structure, the `VertexRDD` exposes the following
+additional functionality:
+
+{% highlight scala %}
+class VertexRDD[VD] extends RDD[(VertexID, VD)] {
+  // Filter the vertex set but preserves the internal index
+  def filter(pred: Tuple2[VertexId, VD] => Boolean): VertexRDD[VD]
+  // Transform the values without changing the ids (preserves the internal index)
+  def mapValues[VD2](map: VD => VD2): VertexRDD[VD2]
+  def mapValues[VD2](map: (VertexId, VD) => VD2): VertexRDD[VD2]
+  // Remove vertices from this set that appear in the other set
+  def diff(other: VertexRDD[VD]): VertexRDD[VD]
+  // Join operators that take advantage of the internal indexing to accelerate joins (substantially)
+  def leftJoin[VD2, VD3](other: RDD[(VertexId, VD2)])(f: (VertexId, VD, Option[VD2]) => VD3): VertexRDD[VD3]
+  def innerJoin[U, VD2](other: RDD[(VertexId, U)])(f: (VertexId, VD, U) => VD2): VertexRDD[VD2]
+  // Use the index on this RDD to accelerate a `reduceByKey` operation on the input RDD.
+  def aggregateUsingIndex[VD2](other: RDD[(VertexId, VD2)], reduceFunc: (VD2, VD2) => VD2): VertexRDD[VD2]
+}
+{% endhighlight %}
+
+Notice, for example,  how the `filter` operator returns an `VertexRDD`.  Filter is actually
+implemented using a `BitSet` thereby reusing the index and preserving the ability to do fast joins
+with other `VertexRDD`s.  Likewise, the `mapValues` operators do not allow the `map` function to
+change the `VertexID` thereby enabling the same `HashMap` data-structures to be reused.  Both the
+`leftJoin` and `innerJoin` are able to identify when joining two `VertexRDD`s derived from the same
+`HashMap` and implement the join by linear scan rather than costly point lookups.
+
+The `aggregateUsingIndex` operator is useful for efficient construction of a new `VertexRDD` from an
+`RDD[(VertexID, A)]`.  Conceptually, if I have constructed a `VertexRDD[B]` over a set of vertices,
+*which is a super-set* of the vertices in some `RDD[(VertexID, A)]` then I can reuse the index to
+both aggregate and then subsequently index the `RDD[(VertexID, A)]`.  For example:
+
+{% highlight scala %}
+val setA: VertexRDD[Int] = VertexRDD(sc.parallelize(0L until 100L).map(id => (id, 1)))
+val rddB: RDD[(VertexId, Double)] = sc.parallelize(0L until 100L).flatMap(id => List((id, 1.0), (id, 2.0)))
+// There should be 200 entries in rddB
+rddB.count
+val setB: VertexRDD[Double] = setA.aggregateUsingIndex(rddB, _ + _)
+// There should be 100 entries in setB
+setB.count
+// Joining A and B should now be fast!
+val setC: VertexRDD[Double] = setA.innerJoin(setB)((id, a, b) => a + b)
+{% endhighlight %}
+
+## EdgeRDDs
+
+The `EdgeRDD[ED]`, which extends `RDD[Edge[ED]]` organizes the edges in blocks partitioned using one
+of the various partitioning strategies defined in [`PartitionStrategy`][PartitionStrategy].  Within
+each partition, edge attributes and adjacency structure, are stored separately enabling maximum
+reuse when changing attribute values.
+
+[PartitionStrategy]: api/graphx/index.html#org.apache.spark.graphx.PartitionStrategy
+
+The three additional functions exposed by the `EdgeRDD` are:
+{% highlight scala %}
+// Transform the edge attributes while preserving the structure
+def mapValues[ED2](f: Edge[ED] => ED2): EdgeRDD[ED2]
+// Revere the edges reusing both attributes and structure
+def reverse: EdgeRDD[ED]
+// Join two `EdgeRDD`s partitioned using the same partitioning strategy.
+def innerJoin[ED2, ED3](other: EdgeRDD[ED2])(f: (VertexId, VertexId, ED, ED2) => ED3): EdgeRDD[ED3]
+{% endhighlight %}
+
+In most applications we have found that operations on the `EdgeRDD` are accomplished through the
+graph operators or rely on operations defined in the base `RDD` class.
+
+# Optimized Representation
+
+While a detailed description of the optimizations used in the GraphX representation of distributed
+graphs is beyond the scope of this guide, some high-level understanding may aid in the design of
+scalable algorithms as well as optimal use of the API.  GraphX adopts a vertex-cut approach to
+distributed graph partitioning:
+
+<p style="text-align: center;">
+  <img src="img/edge_cut_vs_vertex_cut.png"
+       title="Edge Cut vs. Vertex Cut"
+       alt="Edge Cut vs. Vertex Cut"
+       width="50%" />
+  <!-- Images are downsized intentionally to improve quality on retina displays -->
+</p>
+
+Rather than splitting graphs along edges, GraphX partitions the graph along vertices which can
+reduce both the communication and storage overhead.  Logically, this corresponds to assigning edges
+to machines and allowing vertices to span multiple machines.  The exact method of assigning edges
+depends on the [`PartitionStrategy`][PartitionStrategy] and there are several tradeoffs to the
+various heuristics.  Users can choose between different strategies by repartitioning the graph with
+the [`Graph.partitionBy`][Graph.partitionBy] operator.  The default partitioning strategy is to use
+the initial partitioning of the edges as provided on graph construction.  However, users can easily
+switch to 2D-partitioning or other heuristics included in GraphX.
+
+[Graph.partitionBy]: api/graphx/index.html#org.apache.spark.graphx.Graph$@partitionBy(partitionStrategy:org.apache.spark.graphx.PartitionStrategy):org.apache.spark.graphx.Graph[VD,ED]
+
+<p style="text-align: center;">
+  <img src="img/vertex_routing_edge_tables.png"
+       title="RDD Graph Representation"
+       alt="RDD Graph Representation"
+       width="50%" />
+  <!-- Images are downsized intentionally to improve quality on retina displays -->
+</p>
+
+Once the edges have be partitioned the key challenge to efficient graph-parallel computation is
+efficiently joining vertex attributes with the edges.  Because real-world graphs typically have more
+edges than vertices, we move vertex attributes to the edges.  Because not all partitions will
+contain edges adjacent to all vertices we internally maintain a routing table which identifies where
+to broadcast vertices when implementing the join required for operations like `triplets` and
+`mapReduceTriplets`.
+
+# Graph Algorithms
+<a name="graph_algorithms"></a>
+
+GraphX includes a set of graph algorithms to simplify analytics tasks. The algorithms are contained in the `org.apache.spark.graphx.lib` package and can be accessed directly as methods on `Graph` via [`GraphOps`][GraphOps]. This section describes the algorithms and how they are used.
+
+## PageRank
+<a name="pagerank"></a>
+
+PageRank measures the importance of each vertex in a graph, assuming an edge from *u* to *v* represents an endorsement of *v*'s importance by *u*. For example, if a Twitter user is followed by many others, the user will be ranked highly.
+
+GraphX comes with static and dynamic implementations of PageRank as methods on the [`PageRank` object][PageRank]. Static PageRank runs for a fixed number of iterations, while dynamic PageRank runs until the ranks converge (i.e., stop changing by more than a specified tolerance). [`GraphOps`][GraphOps] allows calling these algorithms directly as methods on `Graph`.
+
+GraphX also includes an example social network dataset that we can run PageRank on. A set of users is given in `graphx/data/users.txt`, and a set of relationships between users is given in `graphx/data/followers.txt`. We compute the PageRank of each user as follows:
+
+[PageRank]: api/graphx/index.html#org.apache.spark.graphx.lib.PageRank$
+
+{% highlight scala %}
+// Load the edges as a graph
+val graph = GraphLoader.edgeListFile(sc, "graphx/data/followers.txt")
+// Run PageRank
+val ranks = graph.pageRank(0.0001).vertices
+// Join the ranks with the usernames
+val users = sc.textFile("graphx/data/users.txt").map { line =>
+  val fields = line.split(",")
+  (fields(0).toLong, fields(1))
+}
+val ranksByUsername = users.join(ranks).map {
+  case (id, (username, rank)) => (username, rank)
+}
+// Print the result
+println(ranksByUsername.collect().mkString("\n"))
+{% endhighlight %}
+
+## Connected Components
+
+The connected components algorithm labels each connected component of the graph with the ID of its lowest-numbered vertex. For example, in a social network, connected components can approximate clusters. GraphX contains an implementation of the algorithm in the [`ConnectedComponents` object][ConnectedComponents], and we compute the connected components of the example social network dataset from the [PageRank section](#pagerank) as follows:
+
+[ConnectedComponents]: api/graphx/index.html#org.apache.spark.graphx.lib.ConnectedComponents$
+
+{% highlight scala %}
+// Load the graph as in the PageRank example
+val graph = GraphLoader.edgeListFile(sc, "graphx/data/followers.txt")
+// Find the connected components
+val cc = graph.connectedComponents().vertices
+// Join the connected components with the usernames
+val users = sc.textFile("graphx/data/users.txt").map { line =>
+  val fields = line.split(",")
+  (fields(0).toLong, fields(1))
+}
+val ccByUsername = users.join(cc).map {
+  case (id, (username, cc)) => (username, cc)
+}
+// Print the result
+println(ccByUsername.collect().mkString("\n"))
+{% endhighlight %}
+
+## Triangle Counting
+
+A vertex is part of a triangle when it has two adjacent vertices with an edge between them. GraphX implements a triangle counting algorithm in the [`TriangleCount` object][TriangleCount] that determines the number of triangles passing through each vertex, providing a measure of clustering. We compute the triangle count of the social network dataset from the [PageRank section](#pagerank). *Note that `TriangleCount` requires the edges to be in canonical orientation (`srcId < dstId`) and the graph to be partitioned using [`Graph.partitionBy`][Graph.partitionBy].*
+
+[TriangleCount]: api/graphx/index.html#org.apache.spark.graphx.lib.TriangleCount$
+[Graph.partitionBy]: api/graphx/index.html#org.apache.spark.graphx.Graph@partitionBy(PartitionStrategy):Graph[VD,ED]
+
+{% highlight scala %}
+// Load the edges in canonical order and partition the graph for triangle count
+val graph = GraphLoader.edgeListFile(sc, "graphx/data/followers.txt", true).partitionBy(PartitionStrategy.RandomVertexCut)
+// Find the triangle count for each vertex
+val triCounts = graph.triangleCount().vertices
+// Join the triangle counts with the usernames
+val users = sc.textFile("graphx/data/users.txt").map { line =>
+  val fields = line.split(",")
+  (fields(0).toLong, fields(1))
+}
+val triCountByUsername = users.join(triCounts).map { case (id, (username, tc)) =>
+  (username, tc)
+}
+// Print the result
+println(triCountByUsername.collect().mkString("\n"))
+{% endhighlight %}
+
+
+# Examples
+
+Suppose I want to build a graph from some text files, restrict the graph
+to important relationships and users, run page-rank on the sub-graph, and
+then finally return attributes associated with the top users.  I can do
+all of this in just a few lines with GraphX:
+
+{% highlight scala %}
+// Connect to the Spark cluster
+val sc = new SparkContext("spark://master.amplab.org", "research")
+
+// Load my user data and parse into tuples of user id and attribute list
+val users = (sc.textFile("graphx/data/users.txt")
+  .map(line => line.split(",")).map( parts => (parts.head.toLong, parts.tail) ))
+
+// Parse the edge data which is already in userId -> userId format
+val followerGraph = GraphLoader.edgeListFile(sc, "graphx/data/followers.txt")
+
+// Attach the user attributes
+val graph = followerGraph.outerJoinVertices(users) {
+  case (uid, deg, Some(attrList)) => attrList
+  // Some users may not have attributes so we set them as empty
+  case (uid, deg, None) => Array.empty[String]
+}
+
+// Restrict the graph to users with usernames and names
+val subgraph = graph.subgraph(vpred = (vid, attr) => attr.size == 2)
+
+// Compute the PageRank
+val pagerankGraph = subgraph.pageRank(0.001)
+
+// Get the attributes of the top pagerank users
+val userInfoWithPageRank = subgraph.outerJoinVertices(pagerankGraph.vertices) {
+  case (uid, attrList, Some(pr)) => (pr, attrList.toList)
+  case (uid, attrList, None) => (0.0, attrList.toList)
+}
+
+println(userInfoWithPageRank.vertices.top(5)(Ordering.by(_._2._1)).mkString("\n"))
+
+{% endhighlight %}
diff --git a/docs/img/data_parallel_vs_graph_parallel.png b/docs/img/data_parallel_vs_graph_parallel.png
new file mode 100644
index 0000000000000..d3918f01d8f3b
Binary files /dev/null and b/docs/img/data_parallel_vs_graph_parallel.png differ
diff --git a/docs/img/edge-cut.png b/docs/img/edge-cut.png
new file mode 100644
index 0000000000000..698f4ff181e43
Binary files /dev/null and b/docs/img/edge-cut.png differ
diff --git a/docs/img/edge_cut_vs_vertex_cut.png b/docs/img/edge_cut_vs_vertex_cut.png
new file mode 100644
index 0000000000000..ae30396d3fe19
Binary files /dev/null and b/docs/img/edge_cut_vs_vertex_cut.png differ
diff --git a/docs/img/graph_analytics_pipeline.png b/docs/img/graph_analytics_pipeline.png
new file mode 100644
index 0000000000000..6d606e01894ae
Binary files /dev/null and b/docs/img/graph_analytics_pipeline.png differ
diff --git a/docs/img/graph_parallel.png b/docs/img/graph_parallel.png
new file mode 100644
index 0000000000000..330be5567cf97
Binary files /dev/null and b/docs/img/graph_parallel.png differ
diff --git a/docs/img/graphx_figures.pptx b/docs/img/graphx_figures.pptx
new file mode 100644
index 0000000000000..e567bf08fe7d5
Binary files /dev/null and b/docs/img/graphx_figures.pptx differ
diff --git a/docs/img/graphx_logo.png b/docs/img/graphx_logo.png
new file mode 100644
index 0000000000000..9869ac148cad5
Binary files /dev/null and b/docs/img/graphx_logo.png differ
diff --git a/docs/img/graphx_performance_comparison.png b/docs/img/graphx_performance_comparison.png
new file mode 100644
index 0000000000000..62dcf098c904f
Binary files /dev/null and b/docs/img/graphx_performance_comparison.png differ
diff --git a/docs/img/java-sm.png b/docs/img/java-sm.png
new file mode 100644
index 0000000000000..a82ee7d682e49
Binary files /dev/null and b/docs/img/java-sm.png differ
diff --git a/docs/img/property_graph.png b/docs/img/property_graph.png
new file mode 100644
index 0000000000000..6f3f89a010c5e
Binary files /dev/null and b/docs/img/property_graph.png differ
diff --git a/docs/img/python-sm.png b/docs/img/python-sm.png
new file mode 100644
index 0000000000000..ae01e05252abd
Binary files /dev/null and b/docs/img/python-sm.png differ
diff --git a/docs/img/scala-sm.png b/docs/img/scala-sm.png
new file mode 100644
index 0000000000000..30db034b70cf9
Binary files /dev/null and b/docs/img/scala-sm.png differ
diff --git a/docs/img/streaming-arch.png b/docs/img/streaming-arch.png
new file mode 100644
index 0000000000000..bc57b460fdf8b
Binary files /dev/null and b/docs/img/streaming-arch.png differ
diff --git a/docs/img/streaming-dstream-ops.png b/docs/img/streaming-dstream-ops.png
new file mode 100644
index 0000000000000..a1c5634aa3c3a
Binary files /dev/null and b/docs/img/streaming-dstream-ops.png differ
diff --git a/docs/img/streaming-dstream-window.png b/docs/img/streaming-dstream-window.png
new file mode 100644
index 0000000000000..276d2fee5e30e
Binary files /dev/null and b/docs/img/streaming-dstream-window.png differ
diff --git a/docs/img/streaming-dstream.png b/docs/img/streaming-dstream.png
new file mode 100644
index 0000000000000..90f43b8c7138c
Binary files /dev/null and b/docs/img/streaming-dstream.png differ
diff --git a/docs/img/streaming-figures.pptx b/docs/img/streaming-figures.pptx
new file mode 100644
index 0000000000000..1b18c2ee0ea3e
Binary files /dev/null and b/docs/img/streaming-figures.pptx differ
diff --git a/docs/img/streaming-flow.png b/docs/img/streaming-flow.png
new file mode 100644
index 0000000000000..a870cb9b1839b
Binary files /dev/null and b/docs/img/streaming-flow.png differ
diff --git a/docs/img/tables_and_graphs.png b/docs/img/tables_and_graphs.png
new file mode 100644
index 0000000000000..ec37bb45a62f0
Binary files /dev/null and b/docs/img/tables_and_graphs.png differ
diff --git a/docs/img/triplet.png b/docs/img/triplet.png
new file mode 100644
index 0000000000000..8b82a09bed29f
Binary files /dev/null and b/docs/img/triplet.png differ
diff --git a/docs/img/vertex-cut.png b/docs/img/vertex-cut.png
new file mode 100644
index 0000000000000..0a508dcee99e5
Binary files /dev/null and b/docs/img/vertex-cut.png differ
diff --git a/docs/img/vertex_routing_edge_tables.png b/docs/img/vertex_routing_edge_tables.png
new file mode 100644
index 0000000000000..4379becc87ee4
Binary files /dev/null and b/docs/img/vertex_routing_edge_tables.png differ
diff --git a/docs/index.md b/docs/index.md
index d3ac696d1e818..aa9c8666e7d75 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -5,7 +5,7 @@ title: Spark Overview
 
 Apache Spark is a fast and general-purpose cluster computing system.
 It provides high-level APIs in [Scala](scala-programming-guide.html), [Java](java-programming-guide.html), and [Python](python-programming-guide.html) that make parallel jobs easy to write, and an optimized engine that supports general computation graphs.
-It also supports a rich set of higher-level tools including [Shark](http://shark.cs.berkeley.edu) (Hive on Spark), [MLlib](mllib-guide.html) for machine learning, [Bagel](bagel-programming-guide.html) for graph processing, and [Spark Streaming](streaming-programming-guide.html).
+It also supports a rich set of higher-level tools including [Shark](http://shark.cs.berkeley.edu) (Hive on Spark), [MLlib](mllib-guide.html) for machine learning, [GraphX](graphx-programming-guide.html) for graph processing, and [Spark Streaming](streaming-programming-guide.html).
 
 # Downloading
 
@@ -19,14 +19,14 @@ Spark uses [Simple Build Tool](http://www.scala-sbt.org), which is bundled with
 
     sbt/sbt assembly
 
-For its Scala API, Spark {{site.SPARK_VERSION}} depends on Scala {{site.SCALA_VERSION}}. If you write applications in Scala, you will need to use this same version of Scala in your own program -- newer major versions may not work. You can get the right version of Scala from [scala-lang.org](http://www.scala-lang.org/download/).
+For its Scala API, Spark {{site.SPARK_VERSION}} depends on Scala {{site.SCALA_BINARY_VERSION}}. If you write applications in Scala, you will need to use a compatible Scala version (e.g. {{site.SCALA_BINARY_VERSION}}.X) -- newer major versions may not work. You can get the right version of Scala from [scala-lang.org](http://www.scala-lang.org/download/).
 
 # Running the Examples and Shell
 
 Spark comes with several sample programs in the `examples` directory.
-To run one of the samples, use `./run-example <class> <params>` in the top-level Spark directory
-(the `run-example` script sets up the appropriate paths and launches that program).
-For example, try `./run-example org.apache.spark.examples.SparkPi local`.
+To run one of the samples, use `./bin/run-example <class> <params>` in the top-level Spark directory
+(the `bin/run-example` script sets up the appropriate paths and launches that program).
+For example, try `./bin/run-example org.apache.spark.examples.SparkPi local`.
 Each example prints usage help when run with no parameters.
 
 Note that all of the sample programs take a `<master>` parameter specifying the cluster URL
@@ -34,8 +34,8 @@ to connect to. This can be a [URL for a distributed cluster](scala-programming-g
 or `local` to run locally with one thread, or `local[N]` to run locally with N threads. You should start by using
 `local` for testing.
 
-Finally, you can run Spark interactively through modified versions of the Scala shell (`./spark-shell`) or
-Python interpreter (`./pyspark`). These are a great way to learn the framework.
+Finally, you can run Spark interactively through modified versions of the Scala shell (`./bin/spark-shell`) or
+Python interpreter (`./bin/pyspark`). These are a great way to learn the framework.
 
 # Launching on a Cluster
 
@@ -75,9 +75,10 @@ For this version of Spark (0.8.1) Hadoop 2.2.x (or newer) users will have to bui
 * [Spark Programming Guide](scala-programming-guide.html): an overview of Spark concepts, and details on the Scala API
   * [Java Programming Guide](java-programming-guide.html): using Spark from Java
   * [Python Programming Guide](python-programming-guide.html): using Spark from Python
-* [Spark Streaming](streaming-programming-guide.html): using the alpha release of Spark Streaming
+* [Spark Streaming](streaming-programming-guide.html): Spark's API for processing data streams
 * [MLlib (Machine Learning)](mllib-guide.html): Spark's built-in machine learning library
 * [Bagel (Pregel on Spark)](bagel-programming-guide.html): simple graph processing model
+* [GraphX (Graphs on Spark)](graphx-programming-guide.html): Spark's new API for graphs
 
 **API Docs:**
 
@@ -86,6 +87,7 @@ For this version of Spark (0.8.1) Hadoop 2.2.x (or newer) users will have to bui
 * [Spark Streaming for Java/Scala (Scaladoc)](api/streaming/index.html)
 * [MLlib (Machine Learning) for Java/Scala (Scaladoc)](api/mllib/index.html)
 * [Bagel (Pregel on Spark) for Scala (Scaladoc)](api/bagel/index.html)
+* [GraphX (Graphs on Spark) for Scala (Scaladoc)](api/graphx/index.html)
 
 
 **Deployment guides:**
diff --git a/docs/java-programming-guide.md b/docs/java-programming-guide.md
index 53085cc6719b0..07732fa1229f3 100644
--- a/docs/java-programming-guide.md
+++ b/docs/java-programming-guide.md
@@ -190,9 +190,9 @@ We hope to generate documentation with Java-style syntax in the future.
 
 Spark includes several sample programs using the Java API in
 [`examples/src/main/java`](https://github.com/apache/incubator-spark/tree/master/examples/src/main/java/org/apache/spark/examples).  You can run them by passing the class name to the
-`run-example` script included in Spark; for example:
+`bin/run-example` script included in Spark; for example:
 
-    ./run-example org.apache.spark.examples.JavaWordCount
+    ./bin/run-example org.apache.spark.examples.JavaWordCount
 
 Each example program prints usage help when run
 without any arguments.
diff --git a/docs/job-scheduling.md b/docs/job-scheduling.md
index dbcb9ae343357..df2faa5e41b18 100644
--- a/docs/job-scheduling.md
+++ b/docs/job-scheduling.md
@@ -32,12 +32,11 @@ Resource allocation can be configured as follows, based on the cluster type:
 
 * **Standalone mode:** By default, applications submitted to the standalone mode cluster will run in
   FIFO (first-in-first-out) order, and each application will try to use all available nodes. You can limit
-  the number of nodes an application uses by setting the `spark.cores.max` system property in it. This
-  will allow multiple users/applications to run concurrently. For example, you might launch a long-running
-  server that uses 10 cores, and allow users to launch shells that use 20 cores each.
+  the number of nodes an application uses by setting the `spark.cores.max` configuration property in it,
+  or change the default for applications that don't set this setting through `spark.deploy.defaultCores`. 
   Finally, in addition to controlling cores, each application's `spark.executor.memory` setting controls
   its memory use.
-* **Mesos:** To use static partitioning on Mesos, set the `spark.mesos.coarse` system property to `true`,
+* **Mesos:** To use static partitioning on Mesos, set the `spark.mesos.coarse` configuration property to `true`,
   and optionally set `spark.cores.max` to limit each application's resource share as in the standalone mode.
   You should also set `spark.executor.memory` to control the executor memory.
 * **YARN:** The `--num-workers` option to the Spark YARN client controls how many workers it will allocate
@@ -78,11 +77,13 @@ of cluster resources. This means that short jobs submitted while a long job is r
 resources right away and still get good response times, without waiting for the long job to finish. This
 mode is best for multi-user settings.
 
-To enable the fair scheduler, simply set the `spark.scheduler.mode` to `FAIR` before creating
+To enable the fair scheduler, simply set the `spark.scheduler.mode` property to `FAIR` when configuring
 a SparkContext:
 
 {% highlight scala %}
-System.setProperty("spark.scheduler.mode", "FAIR")
+val conf = new SparkConf().setMaster(...).setAppName(...)
+conf.set("spark.scheduler.mode", "FAIR")
+val sc = new SparkContext(conf)
 {% endhighlight %}
 
 ## Fair Scheduler Pools
@@ -98,8 +99,8 @@ adding the `spark.scheduler.pool` "local property" to the SparkContext in the th
 This is done as follows:
 
 {% highlight scala %}
-// Assuming context is your SparkContext variable
-context.setLocalProperty("spark.scheduler.pool", "pool1")
+// Assuming sc is your SparkContext variable
+sc.setLocalProperty("spark.scheduler.pool", "pool1")
 {% endhighlight %}
 
 After setting this local property, _all_ jobs submitted within this thread (by calls in this thread
@@ -108,7 +109,7 @@ it easy to have a thread run multiple jobs on behalf of the same user. If you'd
 pool that a thread is associated with, simply call:
 
 {% highlight scala %}
-context.setLocalProperty("spark.scheduler.pool", null)
+sc.setLocalProperty("spark.scheduler.pool", null)
 {% endhighlight %}
 
 ## Default Behavior of Pools
@@ -138,10 +139,11 @@ properties:
   of the cluster. By default, each pool's `minShare` is 0.
 
 The pool properties can be set by creating an XML file, similar to `conf/fairscheduler.xml.template`,
-and setting the `spark.scheduler.allocation.file` property:
+and setting a `spark.scheduler.allocation.file` property in your
+[SparkConf](configuration.html#spark-properties).
 
 {% highlight scala %}
-System.setProperty("spark.scheduler.allocation.file", "/path/to/file")
+conf.set("spark.scheduler.allocation.file", "/path/to/file")
 {% endhighlight %}
 
 The format of the XML file is simply a `<pool>` element for each pool, with different elements
diff --git a/docs/js/main.js b/docs/js/main.js
index 8b137891791fe..0bd2286cced19 100755
--- a/docs/js/main.js
+++ b/docs/js/main.js
@@ -1 +1,80 @@
+function codeTabs() {
+  var counter = 0;
+  var langImages = {
+    "scala": "img/scala-sm.png",
+    "python": "img/python-sm.png",
+    "java": "img/java-sm.png"
+  };
+  $("div.codetabs").each(function() {
+    $(this).addClass("tab-content");
 
+    // Insert the tab bar
+    var tabBar = $('<ul class="nav nav-tabs" data-tabs="tabs"></ul>');
+    $(this).before(tabBar);
+
+    // Add each code sample to the tab bar:
+    var codeSamples = $(this).children("div");
+    codeSamples.each(function() {
+      $(this).addClass("tab-pane");
+      var lang = $(this).data("lang");
+      var image = $(this).data("image");
+      var notabs = $(this).data("notabs");
+      var capitalizedLang = lang.substr(0, 1).toUpperCase() + lang.substr(1);
+      var id = "tab_" + lang + "_" + counter;
+      $(this).attr("id", id);
+      if (image != null && langImages[lang]) {
+        var buttonLabel = "<img src='" +langImages[lang] + "' alt='" + capitalizedLang + "' />";
+      } else if (notabs == null) {
+        var buttonLabel = "<b>" + capitalizedLang + "</b>";
+      } else {
+        var buttonLabel = ""
+      }
+      tabBar.append(
+        '<li><a class="tab_' + lang + '" href="#' + id + '">' + buttonLabel + '</a></li>'
+      );
+    });
+
+    codeSamples.first().addClass("active");
+    tabBar.children("li").first().addClass("active");
+    counter++;
+  });
+  $("ul.nav-tabs a").click(function (e) {
+    // Toggling a tab should switch all tabs corresponding to the same language
+    // while retaining the scroll position
+    e.preventDefault();
+    var scrollOffset = $(this).offset().top - $(document).scrollTop();
+    $("." + $(this).attr('class')).tab('show');
+    $(document).scrollTop($(this).offset().top - scrollOffset);
+  });
+}
+
+function makeCollapsable(elt, accordionClass, accordionBodyId, title) {
+  $(elt).addClass("accordion-inner");
+  $(elt).wrap('<div class="accordion ' + accordionClass + '"></div>')
+  $(elt).wrap('<div class="accordion-group"></div>')
+  $(elt).wrap('<div id="' + accordionBodyId + '" class="accordion-body collapse"></div>')
+  $(elt).parent().before(
+    '<div class="accordion-heading">' +
+      '<a class="accordion-toggle" data-toggle="collapse" href="#' + accordionBodyId + '">' +
+             title +
+      '</a>' +
+    '</div>'
+  );
+}
+
+function viewSolution() {
+  var counter = 0
+  $("div.solution").each(function() {
+    var id = "solution_" + counter
+    makeCollapsable(this, "", id,
+      '<i class="icon-ok-sign" style="text-decoration: none; color: #0088cc">' +
+      '</i>' + "View Solution");
+    counter++;
+  });
+}
+
+
+$(function() {
+  codeTabs();
+  viewSolution();
+});
diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md
index c1ff9c417c353..a22a22184b5c6 100644
--- a/docs/mllib-guide.md
+++ b/docs/mllib-guide.md
@@ -3,6 +3,9 @@ layout: global
 title: Machine Learning Library (MLlib)
 ---
 
+* Table of contents
+{:toc}
+
 MLlib is a Spark implementation of some common machine learning (ML)
 functionality, as well associated tests and data generators.  MLlib
 currently supports four common types of machine learning problem settings,
@@ -18,6 +21,9 @@ depends on native Fortran routines. You may need to install the
 if it is not already present on your nodes. MLlib will throw a linking error if it cannot 
 detect these libraries automatically.
 
+To use MLlib in Python, you will need [NumPy](http://www.numpy.org) version 1.7 or newer
+and Python 2.7.
+
 # Binary Classification
 
 Binary classification is a supervised learning problem in which we want to
@@ -39,57 +45,6 @@ underlying gradient descent primitive (described
 parameter (*regParam*) along with various parameters associated with gradient
 descent (*stepSize*, *numIterations*, *miniBatchFraction*). 
 
-The following code snippet illustrates how to load a sample dataset, execute a
-training algorithm on this training data using a static method in the algorithm
-object, and make predictions with the resulting model to compute the training
-error.
-
-{% highlight scala %}
-import org.apache.spark.SparkContext
-import org.apache.spark.mllib.classification.SVMWithSGD
-import org.apache.spark.mllib.regression.LabeledPoint
-
-// Load and parse the data file
-val data = sc.textFile("mllib/data/sample_svm_data.txt")
-val parsedData = data.map { line =>
-  val parts = line.split(' ')
-  LabeledPoint(parts(0).toDouble, parts.tail.map(x => x.toDouble).toArray)
-}
-
-// Run training algorithm
-val numIterations = 20
-val model = SVMWithSGD.train(parsedData, numIterations)
- 
-// Evaluate model on training examples and compute training error
-val labelAndPreds = parsedData.map { point =>
-  val prediction = model.predict(point.features)
-  (point.label, prediction)
-}
-val trainErr = labelAndPreds.filter(r => r._1 != r._2).count.toDouble / parsedData.count
-println("trainError = " + trainErr)
-{% endhighlight %}
-
-The `SVMWithSGD.train()` method by default performs L2 regularization with the
-regularization parameter set to 1.0. If we want to configure this algorithm, we
-can customize `SVMWithSGD` further by creating a new object directly and
-calling setter methods. All other MLlib algorithms support customization in
-this way as well. For example, the following code produces an L1 regularized
-variant of SVMs with regularization parameter set to 0.1, and runs the training
-algorithm for 200 iterations. 
-
-{% highlight scala %}
-import org.apache.spark.mllib.optimization.L1Updater
-
-val svmAlg = new SVMWithSGD()
-svmAlg.optimizer.setNumIterations(200)
-  .setRegParam(0.1)
-  .setUpdater(new L1Updater)
-val modelL1 = svmAlg.run(parsedData)
-{% endhighlight %}
-
-Both of the code snippets above can be executed in `spark-shell` to generate a
-classifier for the provided dataset.
-
 Available algorithms for binary classification:
 
 * [SVMWithSGD](api/mllib/index.html#org.apache.spark.mllib.classification.SVMWithSGD)
@@ -121,14 +76,14 @@ of entities with one another based on some notion of similarity.  Clustering is
 often used for exploratory analysis and/or as a component of a hierarchical
 supervised learning pipeline (in which distinct classifiers or regression
 models are trained for each cluster). MLlib supports
-[k-means](http://en.wikipedia.org/wiki/K-means_clustering) clustering, arguably
-the most commonly used clustering approach that clusters the data points into
-*k* clusters. The MLlib implementation includes a parallelized 
+[k-means](http://en.wikipedia.org/wiki/K-means_clustering) clustering, one of
+the most commonly used clustering algorithms that clusters the data points into
+predfined number of clusters. The MLlib implementation includes a parallelized
 variant of the [k-means++](http://en.wikipedia.org/wiki/K-means%2B%2B) method
 called [kmeans||](http://theory.stanford.edu/~sergei/papers/vldb12-kmpar.pdf).
 The implementation in MLlib has the following parameters:  
 
-* *k* is the number of clusters.
+* *k* is the number of desired clusters.
 * *maxIterations* is the maximum number of iterations to run.
 * *initializationMode* specifies either random initialization or
 initialization via k-means\|\|.
@@ -169,7 +124,7 @@ the entries in the user-item matrix as *explicit* preferences given by the user
 It is common in many real-world use cases to only have access to *implicit feedback* 
 (e.g. views, clicks, purchases, likes, shares etc.). The approach used in MLlib to deal with 
 such data is taken from 
-[Collaborative Filtering for Implicit Feedback Datasets](http://research.yahoo.com/pub/2433).
+[Collaborative Filtering for Implicit Feedback Datasets](http://www2.research.att.com/~yifanhu/PUB/cf.pdf).
 Essentially instead of trying to model the matrix of ratings directly, this approach treats the data as 
 a combination of binary preferences and *confidence values*. The ratings are then related 
 to the level of confidence in observed user preferences, rather than explicit ratings given to items. 
@@ -210,3 +165,276 @@ at each iteration.
 Available algorithms for gradient descent:
 
 * [GradientDescent](api/mllib/index.html#org.apache.spark.mllib.optimization.GradientDescent)
+
+# Using MLLib in Scala
+
+Following code snippets can be executed in `spark-shell`.
+
+## Binary Classification
+
+The following code snippet illustrates how to load a sample dataset, execute a
+training algorithm on this training data using a static method in the algorithm
+object, and make predictions with the resulting model to compute the training
+error.
+
+{% highlight scala %}
+import org.apache.spark.SparkContext
+import org.apache.spark.mllib.classification.SVMWithSGD
+import org.apache.spark.mllib.regression.LabeledPoint
+
+// Load and parse the data file
+val data = sc.textFile("mllib/data/sample_svm_data.txt")
+val parsedData = data.map { line =>
+  val parts = line.split(' ')
+  LabeledPoint(parts(0).toDouble, parts.tail.map(x => x.toDouble).toArray)
+}
+
+// Run training algorithm to build the model
+val numIterations = 20
+val model = SVMWithSGD.train(parsedData, numIterations)
+
+// Evaluate model on training examples and compute training error
+val labelAndPreds = parsedData.map { point =>
+  val prediction = model.predict(point.features)
+  (point.label, prediction)
+}
+val trainErr = labelAndPreds.filter(r => r._1 != r._2).count.toDouble / parsedData.count
+println("Training Error = " + trainErr)
+{% endhighlight %}
+
+
+The `SVMWithSGD.train()` method by default performs L2 regularization with the
+regularization parameter set to 1.0. If we want to configure this algorithm, we
+can customize `SVMWithSGD` further by creating a new object directly and
+calling setter methods. All other MLlib algorithms support customization in
+this way as well. For example, the following code produces an L1 regularized
+variant of SVMs with regularization parameter set to 0.1, and runs the training
+algorithm for 200 iterations.
+
+{% highlight scala %}
+import org.apache.spark.mllib.optimization.L1Updater
+
+val svmAlg = new SVMWithSGD()
+svmAlg.optimizer.setNumIterations(200)
+  .setRegParam(0.1)
+  .setUpdater(new L1Updater)
+val modelL1 = svmAlg.run(parsedData)
+{% endhighlight %}
+
+## Linear Regression
+The following example demonstrate how to load training data, parse it as an RDD of LabeledPoint. The
+example then uses LinearRegressionWithSGD to build a simple linear model to predict label values. We
+compute the Mean Squared Error at the end to evaluate
+[goodness of fit](http://en.wikipedia.org/wiki/Goodness_of_fit)
+
+{% highlight scala %}
+import org.apache.spark.mllib.regression.LinearRegressionWithSGD
+import org.apache.spark.mllib.regression.LabeledPoint
+
+// Load and parse the data
+val data = sc.textFile("mllib/data/ridge-data/lpsa.data")
+val parsedData = data.map { line =>
+  val parts = line.split(',')
+  LabeledPoint(parts(0).toDouble, parts(1).split(' ').map(x => x.toDouble).toArray)
+}
+
+// Building the model
+val numIterations = 20
+val model = LinearRegressionWithSGD.train(parsedData, numIterations)
+
+// Evaluate model on training examples and compute training error
+val valuesAndPreds = parsedData.map { point =>
+  val prediction = model.predict(point.features)
+  (point.label, prediction)
+}
+val MSE = valuesAndPreds.map{ case(v, p) => math.pow((v - p), 2)}.reduce(_ + _)/valuesAndPreds.count
+println("training Mean Squared Error = " + MSE)
+{% endhighlight %}
+
+
+Similarly you can use RidgeRegressionWithSGD and LassoWithSGD and compare training
+[Mean Squared Errors](http://en.wikipedia.org/wiki/Mean_squared_error).
+
+## Clustering
+In the following example after loading and parsing data, we use the KMeans object to cluster the data
+into two clusters. The number of desired clusters is passed to the algorithm. We then compute Within
+Set Sum of Squared Error (WSSSE). You can reduce this error measure by increasing *k*. In fact the
+optimal *k* is usually one where there is an "elbow" in the WSSSE graph.
+
+{% highlight scala %}
+import org.apache.spark.mllib.clustering.KMeans
+
+// Load and parse the data
+val data = sc.textFile("kmeans_data.txt")
+val parsedData = data.map( _.split(' ').map(_.toDouble))
+
+// Cluster the data into two classes using KMeans
+val numIterations = 20
+val numClusters = 2
+val clusters = KMeans.train(parsedData, numClusters, numIterations)
+
+// Evaluate clustering by computing Within Set Sum of Squared Errors
+val WSSSE = clusters.computeCost(parsedData)
+println("Within Set Sum of Squared Errors = " + WSSSE)
+{% endhighlight %}
+
+
+## Collaborative Filtering
+In the following example we load rating data. Each row consists of a user, a product and a rating.
+We use the default ALS.train() method which assumes ratings are explicit. We evaluate the recommendation
+model by measuring the Mean Squared Error of rating prediction.
+
+{% highlight scala %}
+import org.apache.spark.mllib.recommendation.ALS
+import org.apache.spark.mllib.recommendation.Rating
+
+// Load and parse the data
+val data = sc.textFile("mllib/data/als/test.data")
+val ratings = data.map(_.split(',') match {
+    case Array(user, item, rate) =>  Rating(user.toInt, item.toInt, rate.toDouble)
+})
+
+// Build the recommendation model using ALS
+val numIterations = 20
+val model = ALS.train(ratings, 1, 20, 0.01)
+
+// Evaluate the model on rating data
+val usersProducts = ratings.map{ case Rating(user, product, rate)  => (user, product)}
+val predictions = model.predict(usersProducts).map{
+    case Rating(user, product, rate) => ((user, product), rate)
+}
+val ratesAndPreds = ratings.map{
+    case Rating(user, product, rate) => ((user, product), rate)
+}.join(predictions)
+val MSE = ratesAndPreds.map{
+    case ((user, product), (r1, r2)) =>  math.pow((r1- r2), 2)
+}.reduce(_ + _)/ratesAndPreds.count
+println("Mean Squared Error = " + MSE)
+{% endhighlight %}
+
+If the rating matrix is derived from other source of information (i.e., it is inferred from
+other signals), you can use the trainImplicit method to get better results.
+
+{% highlight scala %}
+val model = ALS.trainImplicit(ratings, 1, 20, 0.01)
+{% endhighlight %}
+
+# Using MLLib in Java
+
+All of MLlib's methods use Java-friendly types, so you can import and call them there the same
+way you do in Scala. The only caveat is that the methods take Scala RDD objects, while the
+Spark Java API uses a separate `JavaRDD` class. You can convert a Java RDD to a Scala one by
+calling `.rdd()` on your `JavaRDD` object.
+
+# Using MLLib in Python
+Following examples can be tested in the PySpark shell.
+
+## Binary Classification
+The following example shows how to load a sample dataset, build Logistic Regression model,
+and make predictions with the resulting model to compute the training error.
+
+{% highlight python %}
+from pyspark.mllib.classification import LogisticRegressionWithSGD
+from numpy import array
+
+# Load and parse the data
+data = sc.textFile("mllib/data/sample_svm_data.txt")
+parsedData = data.map(lambda line: array([float(x) for x in line.split(' ')]))
+model = LogisticRegressionWithSGD.train(parsedData)
+
+# Build the model
+labelsAndPreds = parsedData.map(lambda point: (int(point.item(0)),
+        model.predict(point.take(range(1, point.size)))))
+
+# Evaluating the model on training data
+trainErr = labelsAndPreds.filter(lambda (v, p): v != p).count() / float(parsedData.count())
+print("Training Error = " + str(trainErr))
+{% endhighlight %}
+
+## Linear Regression
+The following example demonstrate how to load training data, parse it as an RDD of LabeledPoint. The
+example then uses LinearRegressionWithSGD to build a simple linear model to predict label values. We
+compute the Mean Squared Error at the end to evaluate
+[goodness of fit](http://en.wikipedia.org/wiki/Goodness_of_fit)
+
+{% highlight python %}
+from pyspark.mllib.regression import LinearRegressionWithSGD
+from numpy import array
+
+# Load and parse the data
+data = sc.textFile("mllib/data/ridge-data/lpsa.data")
+parsedData = data.map(lambda line: array([float(x) for x in line.replace(',', ' ').split(' ')]))
+
+# Build the model
+model = LinearRegressionWithSGD.train(parsedData)
+
+# Evaluate the model on training data
+valuesAndPreds = parsedData.map(lambda point: (point.item(0),
+        model.predict(point.take(range(1, point.size)))))
+MSE = valuesAndPreds.map(lambda (v, p): (v - p)**2).reduce(lambda x, y: x + y)/valuesAndPreds.count()
+print("Mean Squared Error = " + str(MSE))
+{% endhighlight %}
+
+
+## Clustering
+In the following example after loading and parsing data, we use the KMeans object to cluster the data
+into two clusters. The number of desired clusters is passed to the algorithm. We then compute Within
+Set Sum of Squared Error (WSSSE). You can reduce this error measure by increasing *k*. In fact the
+optimal *k* is usually one where there is an "elbow" in the WSSSE graph.
+
+{% highlight python %}
+from pyspark.mllib.clustering import KMeans
+from numpy import array
+from math import sqrt
+
+# Load and parse the data
+data = sc.textFile("kmeans_data.txt")
+parsedData = data.map(lambda line: array([float(x) for x in line.split(' ')]))
+
+# Build the model (cluster the data)
+clusters = KMeans.train(parsedData, 2, maxIterations=10,
+        runs=30, initialization_mode="random")
+
+# Evaluate clustering by computing Within Set Sum of Squared Errors
+def error(point):
+    center = clusters.centers[clusters.predict(point)]
+    return sqrt(sum([x**2 for x in (point - center)]))
+
+WSSSE = parsedData.map(lambda point: error(point)).reduce(lambda x, y: x + y)
+print("Within Set Sum of Squared Error = " + str(WSSSE))
+{% endhighlight %}
+
+Similarly you can use RidgeRegressionWithSGD and LassoWithSGD and compare training Mean Squared
+Errors.
+
+## Collaborative Filtering
+In the following example we load rating data. Each row consists of a user, a product and a rating.
+We use the default ALS.train() method which assumes ratings are explicit. We evaluate the
+recommendation by measuring the Mean Squared Error of rating prediction.
+
+{% highlight python %}
+from pyspark.mllib.recommendation import ALS
+from numpy import array
+
+# Load and parse the data
+data = sc.textFile("mllib/data/als/test.data")
+ratings = data.map(lambda line: array([float(x) for x in line.split(',')]))
+
+# Build the recommendation model using Alternating Least Squares
+model = ALS.train(ratings, 1, 20)
+
+# Evaluate the model on training data
+testdata = ratings.map(lambda p: (int(p[0]), int(p[1])))
+predictions = model.predictAll(testdata).map(lambda r: ((r[0], r[1]), r[2]))
+ratesAndPreds = ratings.map(lambda r: ((r[0], r[1]), r[2])).join(predictions)
+MSE = ratesAndPreds.map(lambda r: (r[1][0] - r[1][1])**2).reduce(lambda x, y: x + y)/ratesAndPreds.count()
+print("Mean Squared Error = " + str(MSE))
+{% endhighlight %}
+
+If the rating matrix is derived from other source of information (i.e., it is inferred from other
+signals), you can use the trainImplicit method to get better results.
+
+{% highlight python %}
+# Build the recommendation model using Alternating Least Squares based on implicit ratings
+model = ALS.trainImplicit(ratings, 1, 20)
+{% endhighlight %}
diff --git a/docs/monitoring.md b/docs/monitoring.md
index 5f456b999b0e1..0d5eb7065e9f0 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -32,7 +32,8 @@ Spark has a configurable metrics system based on the
 This allows users to report Spark metrics to a variety of sinks including HTTP, JMX, and CSV 
 files. The metrics system is configured via a configuration file that Spark expects to be present 
 at `$SPARK_HOME/conf/metrics.conf`. A custom file location can be specified via the 
-`spark.metrics.conf` Java system property. Spark's metrics are decoupled into different 
+`spark.metrics.conf` [configuration property](configuration.html#spark-properties).
+Spark's metrics are decoupled into different 
 _instances_ corresponding to Spark components. Within each instance, you can configure a 
 set of sinks to which metrics are reported. The following instances are currently supported:
 
@@ -50,6 +51,7 @@ Each instance can report to zero or more _sinks_. Sinks are contained in the
 * `GangliaSink`: Sends metrics to a Ganglia node or multicast group.
 * `JmxSink`: Registers metrics for viewing in a JXM console.
 * `MetricsServlet`: Adds a servlet within the existing Spark UI to serve metrics data as JSON data.
+* `GraphiteSink`: Sends metrics to a Graphite node.
 
 The syntax of the metrics configuration file is defined in an example configuration file, 
 `$SPARK_HOME/conf/metrics.conf.template`.
diff --git a/docs/python-programming-guide.md b/docs/python-programming-guide.md
index 55e39b1de17a0..7c5283fb0b6fb 100644
--- a/docs/python-programming-guide.md
+++ b/docs/python-programming-guide.md
@@ -47,7 +47,7 @@ PySpark will automatically ship these functions to workers, along with any objec
 Instances of classes will be serialized and shipped to workers by PySpark, but classes themselves cannot be automatically distributed to workers.
 The [Standalone Use](#standalone-use) section describes how to ship code dependencies to workers.
 
-In addition, PySpark fully supports interactive use---simply run `./pyspark` to launch an interactive shell.
+In addition, PySpark fully supports interactive use---simply run `./bin/pyspark` to launch an interactive shell.
 
 
 # Installing and Configuring PySpark
@@ -60,17 +60,17 @@ By default, PySpark requires `python` to be available on the system `PATH` and u
 
 All of PySpark's library dependencies, including [Py4J](http://py4j.sourceforge.net/), are bundled with PySpark and automatically imported.
 
-Standalone PySpark applications should be run using the `pyspark` script, which automatically configures the Java and Python environment using the settings in `conf/spark-env.sh` or `.cmd`.
-The script automatically adds the `pyspark` package to the `PYTHONPATH`.
+Standalone PySpark applications should be run using the `bin/pyspark` script, which automatically configures the Java and Python environment using the settings in `conf/spark-env.sh` or `.cmd`.
+The script automatically adds the `bin/pyspark` package to the `PYTHONPATH`.
 
 
 # Interactive Use
 
-The `pyspark` script launches a Python interpreter that is configured to run PySpark applications. To use `pyspark` interactively, first build Spark, then launch it directly from the command line without any options:
+The `bin/pyspark` script launches a Python interpreter that is configured to run PySpark applications. To use `pyspark` interactively, first build Spark, then launch it directly from the command line without any options:
 
 {% highlight bash %}
 $ sbt/sbt assembly
-$ ./pyspark
+$ ./bin/pyspark
 {% endhighlight %}
 
 The Python shell can be used explore data interactively and is a simple way to learn the API:
@@ -82,35 +82,36 @@ The Python shell can be used explore data interactively and is a simple way to l
 >>> help(pyspark) # Show all pyspark functions
 {% endhighlight %}
 
-By default, the `pyspark` shell creates SparkContext that runs applications locally on a single core.
+By default, the `bin/pyspark` shell creates SparkContext that runs applications locally on a single core.
 To connect to a non-local cluster, or use multiple cores, set the `MASTER` environment variable.
-For example, to use the `pyspark` shell with a [standalone Spark cluster](spark-standalone.html):
+For example, to use the `bin/pyspark` shell with a [standalone Spark cluster](spark-standalone.html):
 
 {% highlight bash %}
-$ MASTER=spark://IP:PORT ./pyspark
+$ MASTER=spark://IP:PORT ./bin/pyspark
 {% endhighlight %}
 
 Or, to use four cores on the local machine:
 
 {% highlight bash %}
-$ MASTER=local[4] ./pyspark
+$ MASTER=local[4] ./bin/pyspark
 {% endhighlight %}
 
 
 ## IPython
 
-It is also possible to launch PySpark in [IPython](http://ipython.org), the enhanced Python interpreter.
-To do this, set the `IPYTHON` variable to `1` when running `pyspark`:
+It is also possible to launch PySpark in [IPython](http://ipython.org), the 
+enhanced Python interpreter. PySpark works with IPython 1.0.0 and later. To 
+use IPython, set the `IPYTHON` variable to `1` when running `bin/pyspark`:
 
 {% highlight bash %}
-$ IPYTHON=1 ./pyspark
+$ IPYTHON=1 ./bin/pyspark
 {% endhighlight %}
 
 Alternatively, you can customize the `ipython` command by setting `IPYTHON_OPTS`. For example, to launch
 the [IPython Notebook](http://ipython.org/notebook.html) with PyLab graphing support:
 
 {% highlight bash %}
-$ IPYTHON_OPTS="notebook --pylab inline" ./pyspark
+$ IPYTHON_OPTS="notebook --pylab inline" ./bin/pyspark
 {% endhighlight %}
 
 IPython also works on a cluster or on multiple cores if you set the `MASTER` environment variable.
@@ -118,7 +119,7 @@ IPython also works on a cluster or on multiple cores if you set the `MASTER` env
 
 # Standalone Programs
 
-PySpark can also be used from standalone Python scripts by creating a SparkContext in your script and running the script using `pyspark`.
+PySpark can also be used from standalone Python scripts by creating a SparkContext in your script and running the script using `bin/pyspark`.
 The Quick Start guide includes a [complete example](quick-start.html#a-standalone-app-in-python) of a standalone Python application.
 
 Code dependencies can be deployed by listing them in the `pyFiles` option in the SparkContext constructor:
@@ -131,15 +132,16 @@ sc = SparkContext("local", "App Name", pyFiles=['MyFile.py', 'lib.zip', 'app.egg
 Files listed here will be added to the `PYTHONPATH` and shipped to remote worker machines.
 Code dependencies can be added to an existing SparkContext using its `addPyFile()` method.
 
-You can set [system properties](configuration.html#system-properties)
-using `SparkContext.setSystemProperty()` class method *before*
-instantiating SparkContext. For example, to set the amount of memory
-per executor process:
+You can set [configuration properties](configuration.html#spark-properties) by passing a
+[SparkConf](api/pyspark/pyspark.conf.SparkConf-class.html) object to SparkContext:
 
 {% highlight python %}
-from pyspark import SparkContext
-SparkContext.setSystemProperty('spark.executor.memory', '2g')
-sc = SparkContext("local", "App Name")
+from pyspark import SparkConf, SparkContext
+conf = (SparkConf()
+         .setMaster("local")
+         .setAppName("My app")
+         .set("spark.executor.memory", "1g"))
+sc = SparkContext(conf = conf)
 {% endhighlight %}
 
 # API Docs
@@ -147,11 +149,17 @@ sc = SparkContext("local", "App Name")
 [API documentation](api/pyspark/index.html) for PySpark is available as Epydoc.
 Many of the methods also contain [doctests](http://docs.python.org/2/library/doctest.html) that provide additional usage examples.
 
+# Libraries
+
+[MLlib](mllib-guide.html) is also available in PySpark. To use it, you'll need
+[NumPy](http://www.numpy.org) version 1.7 or newer, and Python 2.7. The [MLlib guide](mllib-guide.html) contains
+some example applications.
+
 # Where to Go from Here
 
 PySpark also includes several sample programs in the [`python/examples` folder](https://github.com/apache/incubator-spark/tree/master/python/examples).
 You can run them by passing the files to `pyspark`; e.g.:
 
-    ./pyspark python/examples/wordcount.py
+    ./bin/pyspark python/examples/wordcount.py
 
 Each program prints usage help when run without arguments.
diff --git a/docs/quick-start.md b/docs/quick-start.md
index 8f782db5b822b..13df6beea16e8 100644
--- a/docs/quick-start.md
+++ b/docs/quick-start.md
@@ -20,7 +20,7 @@ $ sbt/sbt assembly
 ## Basics
 
 Spark's interactive shell provides a simple way to learn the API, as well as a powerful tool to analyze datasets interactively.
-Start the shell by running `./spark-shell` in the Spark directory.
+Start the shell by running `./bin/spark-shell` in the Spark directory.
 
 Spark's primary abstraction is a distributed collection of items called a Resilient Distributed Dataset (RDD). RDDs can be created from Hadoop InputFormats (such as HDFS files) or by transforming other RDDs. Let's make a new RDD from the text of the README file in the Spark source directory:
 
@@ -99,7 +99,7 @@ scala> linesWithSpark.count()
 res9: Long = 15
 {% endhighlight %}
 
-It may seem silly to use Spark to explore and cache a 30-line text file. The interesting part is that these same functions can be used on very large data sets, even when they are striped across tens or hundreds of nodes. You can also do this interactively by connecting `spark-shell` to a cluster, as described in the [programming guide](scala-programming-guide.html#initializing-spark).
+It may seem silly to use Spark to explore and cache a 30-line text file. The interesting part is that these same functions can be used on very large data sets, even when they are striped across tens or hundreds of nodes. You can also do this interactively by connecting `bin/spark-shell` to a cluster, as described in the [programming guide](scala-programming-guide.html#initializing-spark).
 
 # A Standalone App in Scala
 Now say we wanted to write a standalone application using the Spark API. We will walk through a simple application in both Scala (with SBT), Java (with Maven), and Python. If you are using other build systems, consider using the Spark assembly JAR described in the developer guide.
@@ -115,7 +115,7 @@ object SimpleApp {
   def main(args: Array[String]) {
     val logFile = "$YOUR_SPARK_HOME/README.md" // Should be some file on your system
     val sc = new SparkContext("local", "Simple App", "YOUR_SPARK_HOME",
-      List("target/scala-{{site.SCALA_VERSION}}/simple-project_{{site.SCALA_VERSION}}-1.0.jar"))
+      List("target/scala-{{site.SCALA_BINARY_VERSION}}/simple-project_{{site.SCALA_BINARY_VERSION}}-1.0.jar"))
     val logData = sc.textFile(logFile, 2).cache()
     val numAs = logData.filter(line => line.contains("a")).count()
     val numBs = logData.filter(line => line.contains("b")).count()
@@ -124,7 +124,7 @@ object SimpleApp {
 }
 {% endhighlight %}
 
-This program simply counts the number of lines containing 'a' and the number containing 'b' in the Spark README. Note that you'll need to replace $YOUR_SPARK_HOME with the location where Spark is installed. Unlike the earlier examples with the Spark shell, which initializes its own SparkContext, we initialize a SparkContext as part of the proogram. We pass the SparkContext constructor four arguments, the type of scheduler we want to use (in this case, a local scheduler), a name for the application, the directory where Spark is installed, and a name for the jar file containing the application's code. The final two arguments are needed in a distributed setting, where Spark is running across several nodes, so we include them for completeness. Spark will automatically ship the jar files you list to slave nodes.
+This program just counts the number of lines containing 'a' and the number containing 'b' in the Spark README. Note that you'll need to replace $YOUR_SPARK_HOME with the location where Spark is installed. Unlike the earlier examples with the Spark shell, which initializes its own SparkContext, we initialize a SparkContext as part of the proogram. We pass the SparkContext constructor four arguments, the type of scheduler we want to use (in this case, a local scheduler), a name for the application, the directory where Spark is installed, and a name for the jar file containing the application's code. The final two arguments are needed in a distributed setting, where Spark is running across several nodes, so we include them for completeness. Spark will automatically ship the jar files you list to slave nodes.
 
 This file depends on the Spark API, so we'll also include an sbt configuration file, `simple.sbt` which explains that Spark is a dependency. This file also adds a repository that Spark depends on:
 
@@ -146,7 +146,7 @@ If you also wish to read data from Hadoop's HDFS, you will also need to add a de
 libraryDependencies += "org.apache.hadoop" % "hadoop-client" % "<your-hdfs-version>"
 {% endhighlight %}
 
-Finally, for sbt to work correctly, we'll need to layout `SimpleApp.scala` and `simple.sbt` according to the typical directory structure. Once that is in place, we can create a JAR package containing the application's code, then use `sbt run` to execute our program.
+Finally, for sbt to work correctly, we'll need to layout `SimpleApp.scala` and `simple.sbt` according to the typical directory structure. Once that is in place, we can create a JAR package containing the application's code, then use `sbt/sbt run` to execute our program.
 
 {% highlight bash %}
 $ find .
@@ -157,8 +157,8 @@ $ find .
 ./src/main/scala
 ./src/main/scala/SimpleApp.scala
 
-$ sbt package
-$ sbt run
+$ sbt/sbt package
+$ sbt/sbt run
 ...
 Lines with a: 46, Lines with b: 23
 {% endhighlight %}
@@ -193,7 +193,7 @@ public class SimpleApp {
 }
 {% endhighlight %}
 
-This program simply counts the number of lines containing 'a' and the number containing 'b' in a system log file. Note that you'll need to replace $YOUR_SPARK_HOME with the location where Spark is installed. As with the Scala example, we initialize a SparkContext, though we use the special `JavaSparkContext` class to get a Java-friendly one. We also create RDDs (represented by `JavaRDD`) and run transformations on them. Finally, we pass functions to Spark by creating classes that extend `spark.api.java.function.Function`. The [Java programming guide](java-programming-guide.html) describes these differences in more detail.
+This program just counts the number of lines containing 'a' and the number containing 'b' in a text file. Note that you'll need to replace $YOUR_SPARK_HOME with the location where Spark is installed. As with the Scala example, we initialize a SparkContext, though we use the special `JavaSparkContext` class to get a Java-friendly one. We also create RDDs (represented by `JavaRDD`) and run transformations on them. Finally, we pass functions to Spark by creating classes that extend `spark.api.java.function.Function`. The [Java programming guide](java-programming-guide.html) describes these differences in more detail.
 
 To build the program, we also write a Maven `pom.xml` file that lists Spark as a dependency. Note that Spark artifacts are tagged with a Scala version.
 
@@ -214,7 +214,7 @@ To build the program, we also write a Maven `pom.xml` file that lists Spark as a
   <dependencies>
     <dependency> <!-- Spark dependency -->
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_{{site.SCALA_VERSION}}</artifactId>
+      <artifactId>spark-core_{{site.SCALA_BINARY_VERSION}}</artifactId>
       <version>{{site.SPARK_VERSION}}</version>
     </dependency>
   </dependencies>
@@ -224,11 +224,11 @@ To build the program, we also write a Maven `pom.xml` file that lists Spark as a
 If you also wish to read data from Hadoop's HDFS, you will also need to add a dependency on `hadoop-client` for your version of HDFS:
 
 {% highlight xml %}
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-client</artifactId>
-      <version>...</version>
-    </dependency>
+<dependency>
+  <groupId>org.apache.hadoop</groupId>
+  <artifactId>hadoop-client</artifactId>
+  <version>...</version>
+</dependency>
 {% endhighlight %}
 
 We lay out these files according to the canonical Maven directory structure:
@@ -270,18 +270,18 @@ print "Lines with a: %i, lines with b: %i" % (numAs, numBs)
 {% endhighlight %}
 
 
-This program simply counts the number of lines containing 'a' and the number containing 'b' in a system log file.
+This program just counts the number of lines containing 'a' and the number containing 'b' in a text file.
 Note that you'll need to replace $YOUR_SPARK_HOME with the location where Spark is installed. 
 As with the Scala and Java examples, we use a SparkContext to create RDDs.
 We can pass Python functions to Spark, which are automatically serialized along with any variables that they reference.
 For applications that use custom classes or third-party libraries, we can add those code dependencies to SparkContext to ensure that they will be available on remote machines; this is described in more detail in the [Python programming guide](python-programming-guide.html).
 `SimpleApp` is simple enough that we do not need to specify any code dependencies.
 
-We can run this application using the `pyspark` script:
+We can run this application using the `bin/pyspark` script:
 
 {% highlight python %}
 $ cd $SPARK_HOME
-$ ./pyspark SimpleApp.py
+$ ./bin/pyspark SimpleApp.py
 ...
 Lines with a: 46, Lines with b: 23
 {% endhighlight python %}
@@ -301,14 +301,38 @@ assembly jar (or "uber" jar) containing your code and its dependencies. Both
 have assembly plugins. When creating assembly jars, list Spark 
 itself as a `provided` dependency; it need not be bundled since it is 
 already present on the slaves. Once you have an assembled jar, 
-add it to the SparkContext as shown here. It is also possible to submit 
-your dependent jars one-by-one when creating a SparkContext.
+add it to the SparkContext as shown here. It is also possible to add
+your dependent jars one-by-one using the `addJar` method of `SparkContext`.
+
+For Python, you can use the `pyFiles` argument of SparkContext
+or its `addPyFile` method to add `.py`, `.zip` or `.egg` files to be distributed.
 
 ### Setting Configuration Options
-Spark includes several configuration options which influence the behavior
-of your application. These should be set as 
-[JVM system properties](configuration.html#system-properties) in your 
-program. The options will be captured and shipped to all slave nodes.
+Spark includes several [configuration options](configuration.html#spark-properties)
+that influence the behavior of your application.
+These should be set by building a [SparkConf](api/core/index.html#org.apache.spark.SparkConf)
+object and passing it to the SparkContext constructor.
+For example, in Java and Scala, you can do:
+
+{% highlight scala %}
+import org.apache.spark.{SparkConf, SparkContext}
+val conf = new SparkConf()
+             .setMaster("local")
+             .setAppName("My application")
+             .set("spark.executor.memory", "1g")
+val sc = new SparkContext(conf)
+{% endhighlight %}
+
+Or in Python:
+
+{% highlight scala %}
+from pyspark import SparkConf, SparkContext
+conf = SparkConf()
+conf.setMaster("local")
+conf.setAppName("My application")
+conf.set("spark.executor.memory", "1g"))
+sc = SparkContext(conf = conf)
+{% endhighlight %}
 
 ### Accessing Hadoop Filesystems
 
diff --git a/docs/running-on-mesos.md b/docs/running-on-mesos.md
index 322ff585f1878..68259f0cb8ad1 100644
--- a/docs/running-on-mesos.md
+++ b/docs/running-on-mesos.md
@@ -15,15 +15,16 @@ Spark can run on clusters managed by [Apache Mesos](http://mesos.apache.org/). F
    * `export MESOS_NATIVE_LIBRARY=<path to libmesos.so>`. This path is usually `<prefix>/lib/libmesos.so` (where the prefix is `/usr/local` by default, see above). Also, on Mac OS X, the library is called `libmesos.dylib` instead of `libmesos.so`.
    * `export SPARK_EXECUTOR_URI=<path to spark-{{site.SPARK_VERSION}}.tar.gz uploaded above>`.
    * `export MASTER=mesos://HOST:PORT` where HOST:PORT is the host and port (default: 5050) of your Mesos master (or `zk://...` if using Mesos with ZooKeeper).
-8. To run a Spark application against the cluster, when you create your `SparkContext`, pass the string `mesos://HOST:PORT` as the first parameter. In addition, you'll need to set the `spark.executor.uri` property. For example:
+8. To run a Spark application against the cluster, when you create your `SparkContext`, pass the string `mesos://HOST:PORT` as the master URL. In addition, you'll need to set the `spark.executor.uri` property. For example:
 
 {% highlight scala %}
-System.setProperty("spark.executor.uri", "<path to spark-{{site.SPARK_VERSION}}.tar.gz uploaded above>")
-val sc = new SparkContext("mesos://HOST:5050", "App Name", ...)
+val conf = new SparkConf()
+  .setMaster("mesos://HOST:5050")
+  .setAppName("My app")
+  .set("spark.executor.uri", "<path to spark-{{site.SPARK_VERSION}}.tar.gz uploaded above>")
+val sc = new SparkContext(conf)
 {% endhighlight %}
 
-If you want to run Spark on Amazon EC2, you can use the Spark [EC2 launch scripts](ec2-scripts.html), which provide an easy way to launch a cluster with Mesos, Spark, and HDFS pre-configured. This will get you a cluster in about five minutes without any configuration on your part.
-
 # Mesos Run Modes
 
 Spark can run over Mesos in two modes: "fine-grained" and "coarse-grained". In fine-grained mode, which is the default,
@@ -34,17 +35,15 @@ launch only *one* long-running Spark task on each Mesos machine, and dynamically
 it. The benefit is much lower startup overhead, but at the cost of reserving the Mesos resources for the complete duration
 of the application.
 
-To run in coarse-grained mode, set the `spark.mesos.coarse` system property to true *before* creating your SparkContext:
+To run in coarse-grained mode, set the `spark.mesos.coarse` property in your [SparkConf](configuration.html#spark-properties):
 
 {% highlight scala %}
-System.setProperty("spark.mesos.coarse", "true")
-val sc = new SparkContext("mesos://HOST:5050", "App Name", ...)
+conf.set("spark.mesos.coarse", "true")
 {% endhighlight %}
 
 In addition, for coarse-grained mode, you can control the maximum number of resources Spark will acquire. By default,
 it will acquire *all* cores in the cluster (that get offered by Mesos), which only makes sense if you run just one
-application at a time. You can cap the maximum number of cores using `System.setProperty("spark.cores.max", "10")` (for example).
-Again, this must be done *before* initializing a SparkContext.
+application at a time. You can cap the maximum number of cores using `conf.set("spark.cores.max", "10")` (for example).
 
 
 # Running Alongside Hadoop
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 9e4c4e123f0af..cd4509ede735a 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -12,10 +12,10 @@ was added to Spark in version 0.6.0, and improved in 0.7.0 and 0.8.0.
 We need a consolidated Spark JAR (which bundles all the required dependencies) to run Spark jobs on a YARN cluster.
 This can be built by setting the Hadoop version and `SPARK_YARN` environment variable, as follows:
 
-    SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true ./sbt/sbt assembly
+    SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true sbt/sbt assembly
 
 The assembled JAR will be something like this:
-`./assembly/target/scala-{{site.SCALA_VERSION}}/spark-assembly_{{site.SPARK_VERSION}}-hadoop2.0.5.jar`.
+`./assembly/target/scala-{{site.SCALA_BINARY_VERSION}}/spark-assembly_{{site.SPARK_VERSION}}-hadoop2.0.5.jar`.
 
 The build process now also supports new YARN versions (2.2.x). See below.
 
@@ -25,21 +25,23 @@ The build process now also supports new YARN versions (2.2.x). See below.
 - The assembled jar can be installed into HDFS or used locally.
 - Your application code must be packaged into a separate JAR file.
 
-If you want to test out the YARN deployment mode, you can use the current Spark examples. A `spark-examples_{{site.SCALA_VERSION}}-{{site.SPARK_VERSION}}` file can be generated by running `sbt/sbt assembly`. NOTE: since the documentation you're reading is for Spark version {{site.SPARK_VERSION}}, we are assuming here that you have downloaded Spark {{site.SPARK_VERSION}} or checked it out of source control. If you are using a different version of Spark, the version numbers in the jar generated by the sbt package command will obviously be different.
+If you want to test out the YARN deployment mode, you can use the current Spark examples. A `spark-examples_{{site.SCALA_BINARY_VERSION}}-{{site.SPARK_VERSION}}` file can be generated by running `sbt/sbt assembly`. NOTE: since the documentation you're reading is for Spark version {{site.SPARK_VERSION}}, we are assuming here that you have downloaded Spark {{site.SPARK_VERSION}} or checked it out of source control. If you are using a different version of Spark, the version numbers in the jar generated by the sbt package command will obviously be different.
 
 # Configuration
 
 Most of the configs are the same for Spark on YARN as other deploys. See the Configuration page for more information on those.  These are configs that are specific to SPARK on YARN.
 
 Environment variables:
+
 * `SPARK_YARN_USER_ENV`, to add environment variables to the Spark processes launched on YARN. This can be a comma separated list of environment variables, e.g. `SPARK_YARN_USER_ENV="JAVA_HOME=/jdk64,FOO=bar"`.
 
 System Properties:
-* 'spark.yarn.applicationMaster.waitTries', property to set the number of times the ApplicationMaster waits for the the spark master and then also the number of tries it waits for the Spark Context to be intialized. Default is 10.
-* 'spark.yarn.submit.file.replication', the HDFS replication level for the files uploaded into HDFS for the application. These include things like the spark jar, the app jar, and any distributed cache files/archives.
-* 'spark.yarn.preserve.staging.files', set to true to preserve the staged files(spark jar, app jar, distributed cache files) at the end of the job rather then delete them.
-* 'spark.yarn.scheduler.heartbeat.interval-ms', the interval in ms in which the Spark application master heartbeats into the YARN ResourceManager. Default is 5 seconds. 
-* 'spark.yarn.max.worker.failures', the maximum number of worker failures before failing the application. Default is the number of workers requested times 2 with minimum of 3.
+
+* `spark.yarn.applicationMaster.waitTries`, property to set the number of times the ApplicationMaster waits for the the spark master and then also the number of tries it waits for the Spark Context to be intialized. Default is 10.
+* `spark.yarn.submit.file.replication`, the HDFS replication level for the files uploaded into HDFS for the application. These include things like the spark jar, the app jar, and any distributed cache files/archives.
+* `spark.yarn.preserve.staging.files`, set to true to preserve the staged files(spark jar, app jar, distributed cache files) at the end of the job rather then delete them.
+* `spark.yarn.scheduler.heartbeat.interval-ms`, the interval in ms in which the Spark application master heartbeats into the YARN ResourceManager. Default is 5 seconds. 
+* `spark.yarn.max.worker.failures`, the maximum number of worker failures before failing the application. Default is the number of workers requested times 2 with minimum of 3.
 
 # Launching Spark on YARN
 
@@ -52,7 +54,7 @@ There are two scheduler mode that can be used to launch spark application on YAR
 
 The command to launch the YARN Client is as follows:
 
-    SPARK_JAR=<SPARK_ASSEMBLY_JAR_FILE> ./spark-class org.apache.spark.deploy.yarn.Client \
+    SPARK_JAR=<SPARK_ASSEMBLY_JAR_FILE> ./bin/spark-class org.apache.spark.deploy.yarn.Client \
       --jar <YOUR_APP_JAR_FILE> \
       --class <APP_MAIN_CLASS> \
       --args <APP_MAIN_ARGUMENTS> \
@@ -70,15 +72,15 @@ The command to launch the YARN Client is as follows:
 For example:
 
     # Build the Spark assembly JAR and the Spark examples JAR
-    $ SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true ./sbt/sbt assembly
+    $ SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true sbt/sbt assembly
 
     # Configure logging
     $ cp conf/log4j.properties.template conf/log4j.properties
 
     # Submit Spark's ApplicationMaster to YARN's ResourceManager, and instruct Spark to run the SparkPi example
-    $ SPARK_JAR=./assembly/target/scala-{{site.SCALA_VERSION}}/spark-assembly-{{site.SPARK_VERSION}}-hadoop2.0.5-alpha.jar \
-        ./spark-class org.apache.spark.deploy.yarn.Client \
-          --jar examples/target/scala-{{site.SCALA_VERSION}}/spark-examples-assembly-{{site.SPARK_VERSION}}.jar \
+    $ SPARK_JAR=./assembly/target/scala-{{site.SCALA_BINARY_VERSION}}/spark-assembly-{{site.SPARK_VERSION}}-hadoop2.0.5-alpha.jar \
+        ./bin/spark-class org.apache.spark.deploy.yarn.Client \
+          --jar examples/target/scala-{{site.SCALA_BINARY_VERSION}}/spark-examples-assembly-{{site.SPARK_VERSION}}.jar \
           --class org.apache.spark.examples.SparkPi \
           --args yarn-standalone \
           --num-workers 3 \
@@ -99,29 +101,39 @@ With this mode, your application is actually run on the remote machine where the
 
 With yarn-client mode, the application will be launched locally. Just like running application or spark-shell on Local / Mesos / Standalone mode. The launch method is also the similar with them, just make sure that when you need to specify a master url, use "yarn-client" instead. And you also need to export the env value for SPARK_JAR and SPARK_YARN_APP_JAR
 
-In order to tune worker core/number/memory etc. You need to export SPARK_WORKER_CORES, SPARK_WORKER_MEMORY, SPARK_WORKER_INSTANCES e.g. by ./conf/spark-env.sh
+Configuration in yarn-client mode:
+
+In order to tune worker core/number/memory etc. You need to export environment variables or add them to the spark configuration file (./conf/spark_env.sh). The following are the list of options.
+
+* `SPARK_YARN_APP_JAR`, Path to your application's JAR file (required)
+* `SPARK_WORKER_INSTANCES`, Number of workers to start (Default: 2)
+* `SPARK_WORKER_CORES`, Number of cores for the workers (Default: 1).
+* `SPARK_WORKER_MEMORY`, Memory per Worker (e.g. 1000M, 2G) (Default: 1G)
+* `SPARK_MASTER_MEMORY`, Memory for Master (e.g. 1000M, 2G) (Default: 512 Mb)
+* `SPARK_YARN_APP_NAME`, The name of your application (Default: Spark)
+* `SPARK_YARN_QUEUE`, The hadoop queue to use for allocation requests (Default: 'default')
+* `SPARK_YARN_DIST_FILES`, Comma separated list of files to be distributed with the job.
+* `SPARK_YARN_DIST_ARCHIVES`, Comma separated list of archives to be distributed with the job.
 
 For example:
 
-    SPARK_JAR=./assembly/target/scala-{{site.SCALA_VERSION}}/spark-assembly-{{site.SPARK_VERSION}}-hadoop2.0.5-alpha.jar \
-    SPARK_YARN_APP_JAR=examples/target/scala-{{site.SCALA_VERSION}}/spark-examples-assembly-{{site.SPARK_VERSION}}.jar \
-    ./run-example org.apache.spark.examples.SparkPi yarn-client
+    SPARK_JAR=./assembly/target/scala-{{site.SCALA_BINARY_VERSION}}/spark-assembly-{{site.SPARK_VERSION}}-hadoop2.0.5-alpha.jar \
+    SPARK_YARN_APP_JAR=examples/target/scala-{{site.SCALA_BINARY_VERSION}}/spark-examples-assembly-{{site.SPARK_VERSION}}.jar \
+    ./bin/run-example org.apache.spark.examples.SparkPi yarn-client
 
 
-    SPARK_JAR=./assembly/target/scala-{{site.SCALA_VERSION}}/spark-assembly-{{site.SPARK_VERSION}}-hadoop2.0.5-alpha.jar \
-    SPARK_YARN_APP_JAR=examples/target/scala-{{site.SCALA_VERSION}}/spark-examples-assembly-{{site.SPARK_VERSION}}.jar \
-    MASTER=yarn-client ./spark-shell
+    SPARK_JAR=./assembly/target/scala-{{site.SCALA_BINARY_VERSION}}/spark-assembly-{{site.SPARK_VERSION}}-hadoop2.0.5-alpha.jar \
+    SPARK_YARN_APP_JAR=examples/target/scala-{{site.SCALA_BINARY_VERSION}}/spark-examples-assembly-{{site.SPARK_VERSION}}.jar \
+    MASTER=yarn-client ./bin/spark-shell
 
-# Building Spark for Hadoop/YARN 2.2.x
 
-Hadoop 2.2.x users must build Spark and publish it locally. The SBT build process handles Hadoop 2.2.x as a special case. This version of Hadoop has new YARN API changes and depends on a Protobuf version (2.5) that is not compatible with the Akka version (2.0.5) that Spark uses. Therefore, if the Hadoop version (e.g. set through ```SPARK_HADOOP_VERSION```) starts with 2.2.0 or higher then the build process will depend on Akka artifacts distributed by the Spark project compatible with Protobuf 2.5. Furthermore, the build process then uses the directory ```new-yarn``` (instead of ```yarn```), which supports the new YARN API. The build process should seamlessly work out of the box. 
+# Building Spark for Hadoop/YARN 2.2.x
 
 See [Building Spark with Maven](building-with-maven.html) for instructions on how to build Spark using the Maven process.
 
 # Important Notes
 
-- We do not requesting container resources based on the number of cores. Thus the numbers of cores given via command line arguments cannot be guaranteed.
+- Before Hadoop 2.2, YARN does not support cores in container resource requests. Thus, when running against an earlier version, the numbers of cores given via command line arguments cannot be passed to YARN.  Whether core requests are honored in scheduling decisions depends on which scheduler is in use and how it is configured.
 - The local directories used for spark will be the local directories configured for YARN (Hadoop Yarn config yarn.nodemanager.local-dirs). If the user specifies spark.local.dir, it will be ignored.
 - The --files and --archives options support specifying file names with the # similar to Hadoop. For example you can specify: --files localtest.txt#appSees.txt and this will upload the file you have locally named localtest.txt into HDFS but this will be linked to by the name appSees.txt and your application should use the name as appSees.txt to reference it when running on YARN.
 - The --addJars option allows the SparkContext.addJar function to work if you are using it with local files. It does not need to be used if you are using it with HDFS, HTTP, HTTPS, or FTP files.
-- YARN 2.2.x users cannot simply depend on the Spark packages without building Spark, as the published Spark artifacts are compiled to work with the pre 2.2 API. Those users must build Spark and publish it locally.  
diff --git a/docs/scala-programming-guide.md b/docs/scala-programming-guide.md
index 94e8563a8b693..506d3faa767f3 100644
--- a/docs/scala-programming-guide.md
+++ b/docs/scala-programming-guide.md
@@ -13,16 +13,16 @@ At a high level, every Spark application consists of a *driver program* that run
 
 A second abstraction in Spark is *shared variables* that can be used in parallel operations. By default, when Spark runs a function in parallel as a set of tasks on different nodes, it ships a copy of each variable used in the function to each task. Sometimes, a variable needs to be shared across tasks, or between tasks and the driver program. Spark supports two types of shared variables: *broadcast variables*, which can be used to cache a value in memory on all nodes, and *accumulators*, which are variables that are only "added" to, such as counters and sums.
 
-This guide shows each of these features and walks through some samples. It assumes some familiarity with Scala, especially with the syntax for [closures](http://www.scala-lang.org/node/133). Note that you can also run Spark interactively using the `spark-shell` script. We highly recommend doing that to follow along!
+This guide shows each of these features and walks through some samples. It assumes some familiarity with Scala, especially with the syntax for [closures](http://www.scala-lang.org/node/133). Note that you can also run Spark interactively using the `bin/spark-shell` script. We highly recommend doing that to follow along!
 
 # Linking with Spark
 
-Spark {{site.SPARK_VERSION}} uses Scala {{site.SCALA_VERSION}}. If you write applications in Scala, you'll need to use this same version of Scala in your program -- newer major versions may not work.
+Spark {{site.SPARK_VERSION}} uses Scala {{site.SCALA_BINARY_VERSION}}. If you write applications in Scala, you will need to use a compatible Scala version (e.g. {{site.SCALA_BINARY_VERSION}}.X) -- newer major versions may not work.
 
 To write a Spark application, you need to add a dependency on Spark. If you use SBT or Maven, Spark is available through Maven Central at:
 
     groupId = org.apache.spark
-    artifactId = spark-core_{{site.SCALA_VERSION}}
+    artifactId = spark-core_{{site.SCALA_BINARY_VERSION}}
     version = {{site.SPARK_VERSION}} 
 
 In addition, if you wish to access an HDFS cluster, you need to add a dependency on `hadoop-client` for your version of HDFS:
@@ -31,7 +31,7 @@ In addition, if you wish to access an HDFS cluster, you need to add a dependency
     artifactId = hadoop-client
     version = <your-hdfs-version>
 
-For other build systems, you can run `sbt/sbt assembly` to pack Spark and its dependencies into one JAR (`assembly/target/scala-{{site.SCALA_VERSION}}/spark-assembly-{{site.SPARK_VERSION}}-hadoop*.jar`), then add this to your CLASSPATH. Set the HDFS version as described [here](index.html#a-note-about-hadoop-versions).
+For other build systems, you can run `sbt/sbt assembly` to pack Spark and its dependencies into one JAR (`assembly/target/scala-{{site.SCALA_BINARY_VERSION}}/spark-assembly-{{site.SPARK_VERSION}}-hadoop*.jar`), then add this to your CLASSPATH. Set the HDFS version as described [here](index.html#a-note-about-hadoop-versions).
 
 Finally, you need to import some Spark classes and implicit conversions into your program. Add the following lines:
 
@@ -49,18 +49,21 @@ This is done through the following constructor:
 new SparkContext(master, appName, [sparkHome], [jars])
 {% endhighlight %}
 
+or through `new SparkContext(conf)`, which takes a [SparkConf](api/core/index.html#org.apache.spark.SparkConf)
+object for more advanced configuration.
+
 The `master` parameter is a string specifying a [Spark or Mesos cluster URL](#master-urls) to connect to, or a special "local" string to run in local mode, as described below. `appName` is a name for your application, which will be shown in the cluster web UI. Finally, the last two parameters are needed to deploy your code to a cluster if running in distributed mode, as described later.
 
-In the Spark shell, a special interpreter-aware SparkContext is already created for you, in the variable called `sc`. Making your own SparkContext will not work. You can set which master the context connects to using the `MASTER` environment variable, and you can add JARs to the classpath with the `ADD_JARS` variable. For example, to run `spark-shell` on four cores, use
+In the Spark shell, a special interpreter-aware SparkContext is already created for you, in the variable called `sc`. Making your own SparkContext will not work. You can set which master the context connects to using the `MASTER` environment variable, and you can add JARs to the classpath with the `ADD_JARS` variable. For example, to run `bin/spark-shell` on four cores, use
 
 {% highlight bash %}
-$ MASTER=local[4] ./spark-shell
+$ MASTER=local[4] ./bin/spark-shell
 {% endhighlight %}
 
 Or, to also add `code.jar` to its classpath, use:
 
 {% highlight bash %}
-$ MASTER=local[4] ADD_JARS=code.jar ./spark-shell
+$ MASTER=local[4] ADD_JARS=code.jar ./bin/spark-shell
 {% endhighlight %}
 
 ### Master URLs
@@ -92,8 +95,7 @@ If you want to run your application on a cluster, you will need to specify the t
 * `sparkHome`: The path at which Spark is installed on your worker machines (it should be the same on all of them).
 * `jars`: A list of JAR files on the local machine containing your application's code and any dependencies, which Spark will deploy to all the worker nodes. You'll need to package your application into a set of JARs using your build system. For example, if you're using SBT, the [sbt-assembly](https://github.com/sbt/sbt-assembly) plugin is a good way to make a single JAR with your code and dependencies.
 
-If you run `spark-shell` on a cluster, you can add JARs to it by specifying the `ADD_JARS` environment variable before you launch it.  This variable should contain a comma-separated list of JARs. For example, `ADD_JARS=a.jar,b.jar ./spark-shell` will launch a shell with `a.jar` and `b.jar` on its classpath. In addition, any new classes you define in the shell will automatically be distributed.
-
+If you run `bin/spark-shell` on a cluster, you can add JARs to it by specifying the `ADD_JARS` environment variable before you launch it.  This variable should contain a comma-separated list of JARs. For example, `ADD_JARS=a.jar,b.jar ./bin/spark-shell` will launch a shell with `a.jar` and `b.jar` on its classpath. In addition, any new classes you define in the shell will automatically be distributed.
 
 # Resilient Distributed Datasets (RDDs)
 
@@ -166,9 +168,9 @@ The following tables list the transformations and actions currently supported (s
     Iterator[T] => Iterator[U] when running on an RDD of type T. </td>
 </tr>
 <tr>
-  <td> <b>mapPartitionsWithSplit</b>(<i>func</i>) </td>
+  <td> <b>mapPartitionsWithIndex</b>(<i>func</i>) </td>
   <td> Similar to mapPartitions, but also provides <i>func</i> with an integer value representing the index of
-  the split, so <i>func</i> must be of type (Int, Iterator[T]) => Iterator[U] when running on an RDD of type T.
+  the partition, so <i>func</i> must be of type (Int, Iterator[T]) => Iterator[U] when running on an RDD of type T.
   </td>
 </tr>
 <tr>
@@ -342,7 +344,7 @@ After the broadcast variable is created, it should be used instead of the value
 
 ## Accumulators
 
-Accumulators are variables that are only "added" to through an associative operation and can therefore be efficiently supported in parallel. They can be used to implement counters (as in MapReduce) or sums. Spark natively supports accumulators of type Int and Double, and programmers can add support for new types.
+Accumulators are variables that are only "added" to through an associative operation and can therefore be efficiently supported in parallel. They can be used to implement counters (as in MapReduce) or sums. Spark natively supports accumulators of numeric value types and standard mutable collections, and programmers can add support for new types.
 
 An accumulator is created from an initial value `v` by calling `SparkContext.accumulator(v)`. Tasks running on the cluster can then add to it using the `+=` operator. However, they cannot read its value. Only the driver program can read the accumulator's value, using its `value` method.
 
@@ -363,10 +365,10 @@ res2: Int = 10
 
 # Where to Go from Here
 
-You can see some [example Spark programs](http://www.spark-project.org/examples.html) on the Spark website.
-In addition, Spark includes several samples in `examples/src/main/scala`. Some of them have both Spark versions and local (non-parallel) versions, allowing you to see what had to be changed to make the program run on a cluster. You can run them using by passing the class name to the `run-example` script included in Spark; for example:
+You can see some [example Spark programs](http://spark.incubator.apache.org/examples.html) on the Spark website.
+In addition, Spark includes several samples in `examples/src/main/scala`. Some of them have both Spark versions and local (non-parallel) versions, allowing you to see what had to be changed to make the program run on a cluster. You can run them using by passing the class name to the `bin/run-example` script included in Spark; for example:
 
-    ./run-example org.apache.spark.examples.SparkPi
+    ./bin/run-example org.apache.spark.examples.SparkPi
 
 Each example program prints usage help when run without any arguments.
 
diff --git a/docs/spark-debugger.md b/docs/spark-debugger.md
index d6315d97f4d59..11c51d5cde7c9 100644
--- a/docs/spark-debugger.md
+++ b/docs/spark-debugger.md
@@ -39,7 +39,7 @@ where `path/to/event-log` is where you want the event log to go relative to `$SP
 
 ### Loading the event log into the debugger
 
-1. Run a Spark shell with `MASTER=<i>host</i> ./spark-shell`.
+1. Run a Spark shell with `MASTER=<i>host</i> ./bin/spark-shell`.
 2. Use `EventLogReader` to load the event log as follows:
     {% highlight scala %}
 spark> val r = new spark.EventLogReader(sc, Some("path/to/event-log"))
diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md
index 17066ef0dde5e..3388c14ec4d48 100644
--- a/docs/spark-standalone.md
+++ b/docs/spark-standalone.md
@@ -10,17 +10,13 @@ In addition to running on the Mesos or YARN cluster managers, Spark also provide
 
 # Installing Spark Standalone to a Cluster
 
-The easiest way to deploy Spark is by running the `./make-distribution.sh` script to create a binary distribution.
-This distribution can be deployed to any machine with the Java runtime installed; there is no need to install Scala.
-
-The recommended procedure is to deploy and start the master on one node first, get the master spark URL,
-then modify `conf/spark-env.sh` in the `dist/` directory before deploying to all the other nodes.
+To install Spark Standlone mode, you simply place a compiled version of Spark on each node on the cluster. You can obtain pre-built versions of Spark with each release or [build it yourself](index.html#building).
 
 # Starting a Cluster Manually
 
 You can start a standalone master server by executing:
 
-    ./bin/start-master.sh
+    ./sbin/start-master.sh
 
 Once started, the master will print out a `spark://HOST:PORT` URL for itself, which you can use to connect workers to it,
 or pass as the "master" argument to `SparkContext`. You can also find this URL on
@@ -28,7 +24,7 @@ the master's web UI, which is [http://localhost:8080](http://localhost:8080) by
 
 Similarly, you can start one or more workers and connect them to the master via:
 
-    ./spark-class org.apache.spark.deploy.worker.Worker spark://IP:PORT
+    ./bin/spark-class org.apache.spark.deploy.worker.Worker spark://IP:PORT
 
 Once you have started a worker, look at the master's web UI ([http://localhost:8080](http://localhost:8080) by default).
 You should see the new node listed there, along with its number of CPUs and memory (minus one gigabyte left for the OS).
@@ -51,11 +47,11 @@ Finally, the following configuration options can be passed to the master and wor
   </tr>
   <tr>
     <td><code>-c CORES</code>, <code>--cores CORES</code></td>
-    <td>Total CPU cores to allow Spark applicatons to use on the machine (default: all available); only on worker</td>
+    <td>Total CPU cores to allow Spark applications to use on the machine (default: all available); only on worker</td>
   </tr>
   <tr>
     <td><code>-m MEM</code>, <code>--memory MEM</code></td>
-    <td>Total amount of memory to allow Spark applicatons to use on the machine, in a format like 1000M or 2G (default: your machine's total RAM minus 1 GB); only on worker</td>
+    <td>Total amount of memory to allow Spark applications to use on the machine, in a format like 1000M or 2G (default: your machine's total RAM minus 1 GB); only on worker</td>
   </tr>
   <tr>
     <td><code>-d DIR</code>, <code>--work-dir DIR</code></td>
@@ -70,12 +66,12 @@ To launch a Spark standalone cluster with the launch scripts, you need to create
 
 Once you've set up this file, you can launch or stop your cluster with the following shell scripts, based on Hadoop's deploy scripts, and available in `SPARK_HOME/bin`:
 
-- `bin/start-master.sh` - Starts a master instance on the machine the script is executed on.
-- `bin/start-slaves.sh` - Starts a slave instance on each machine specified in the `conf/slaves` file.
-- `bin/start-all.sh` - Starts both a master and a number of slaves as described above.
-- `bin/stop-master.sh` - Stops the master that was started via the `bin/start-master.sh` script.
-- `bin/stop-slaves.sh` - Stops the slave instances that were started via `bin/start-slaves.sh`.
-- `bin/stop-all.sh` - Stops both the master and the slaves as described above.
+- `sbin/start-master.sh` - Starts a master instance on the machine the script is executed on.
+- `sbin/start-slaves.sh` - Starts a slave instance on each machine specified in the `conf/slaves` file.
+- `sbin/start-all.sh` - Starts both a master and a number of slaves as described above.
+- `sbin/stop-master.sh` - Stops the master that was started via the `bin/start-master.sh` script.
+- `sbin/stop-slaves.sh` - Stops the slave instances that were started via `bin/start-slaves.sh`.
+- `sbin/stop-all.sh` - Stops both the master and the slaves as described above.
 
 Note that these scripts must be executed on the machine you want to run the Spark master on, not your local machine.
 
@@ -143,23 +139,72 @@ constructor](scala-programming-guide.html#initializing-spark).
 
 To run an interactive Spark shell against the cluster, run the following command:
 
-    MASTER=spark://IP:PORT ./spark-shell
+    MASTER=spark://IP:PORT ./bin/spark-shell
 
-Note that if you are running spark-shell from one of the spark cluster machines, the `spark-shell` script will
+Note that if you are running spark-shell from one of the spark cluster machines, the `bin/spark-shell` script will
 automatically set MASTER from the `SPARK_MASTER_IP` and `SPARK_MASTER_PORT` variables in `conf/spark-env.sh`.
 
 You can also pass an option `-c <numCores>` to control the number of cores that spark-shell uses on the cluster.
 
+# Launching Applications Inside the Cluster
+
+You may also run your application entirely inside of the cluster by submitting your application driver using the submission client. The syntax for submitting applications is as follows:
+
+
+    ./bin/spark-class org.apache.spark.deploy.Client launch
+       [client-options] \
+       <cluster-url> <application-jar-url> <main-class> \
+       [application-options]
+
+    cluster-url: The URL of the master node.
+    application-jar-url: Path to a bundled jar including your application and all dependencies. Currently, the URL must be globally visible inside of your cluster, for instance, an `hdfs://` path or a `file://` path that is present on all nodes. 
+    main-class: The entry point for your application.
+
+    Client Options:
+      --memory <count> (amount of memory, in MB, allocated for your driver program)
+      --cores <count> (number of cores allocated for your driver program)
+      --supervise (whether to automatically restart your driver on application or node failure)
+      --verbose (prints increased logging output)
+
+Keep in mind that your driver program will be executed on a remote worker machine. You can control the execution environment in the following ways:
+
+ * _Environment variables_: These will be captured from the environment in which you launch the client and applied when launching the driver program.
+ * _Java options_: You can add java options by setting `SPARK_JAVA_OPTS` in the environment in which you launch the submission client.
+ * _Dependencies_: You'll still need to call `sc.addJar` inside of your program to make your bundled application jar visible on all worker nodes.
+
+Once you submit a driver program, it will appear in the cluster management UI at port 8080 and
+be assigned an identifier. If you'd like to prematurely terminate the program, you can do so using
+the same client:
+
+    ./bin/spark-class org.apache.spark.deploy.Client kill <driverId>
+
 # Resource Scheduling
 
 The standalone cluster mode currently only supports a simple FIFO scheduler across applications.
 However, to allow multiple concurrent users, you can control the maximum number of resources each
-application will acquire.
+application will use.
 By default, it will acquire *all* cores in the cluster, which only makes sense if you just run one
-application at a time. You can cap the number of cores using
-`System.setProperty("spark.cores.max", "10")` (for example).
-This value must be set *before* initializing your SparkContext.
-
+application at a time. You can cap the number of cores by setting `spark.cores.max` in your
+[SparkConf](configuration.html#spark-properties). For example:
+
+{% highlight scala %}
+val conf = new SparkConf()
+             .setMaster(...)
+             .setAppName(...)
+             .set("spark.cores.max", "10")
+val sc = new SparkContext(conf)
+{% endhighlight %}
+
+In addition, you can configure `spark.deploy.defaultCores` on the cluster master process to change the
+default for applications that don't set `spark.cores.max` to something less than infinite.
+Do this by adding the following to `conf/spark-env.sh`:
+
+{% highlight bash %}
+export SPARK_JAVA_OPTS="-Dspark.deploy.defaultCores=<value>"
+{% endhighlight %}
+
+This is useful on shared clusters where users might not have configured a maximum number of cores
+individually.
 
 # Monitoring and Logging
 
diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md
index 82f42e0b8d16a..4985c52a11ada 100644
--- a/docs/streaming-programming-guide.md
+++ b/docs/streaming-programming-guide.md
@@ -7,70 +7,457 @@ title: Spark Streaming Programming Guide
 {:toc}
 
 # Overview
-A Spark Streaming application is very similar to a Spark application; it consists of a *driver program* that runs the user's `main` function and continuous executes various *parallel operations* on input streams of data. The main abstraction Spark Streaming provides is a *discretized stream* (DStream), which is a continuous sequence of RDDs (distributed collections of elements) representing a continuous stream of data. DStreams can be created from live incoming data (such as data from a socket, Kafka, etc.) or can be generated by transforming existing DStreams using parallel operators like `map`, `reduce`, and `window`. The basic processing model is as follows: 
-(i) While a Spark Streaming driver program is running, the system receives data from various sources and and divides it into batches. Each batch of data is treated as an RDD, that is, an immutable parallel collection of data. These input RDDs are saved in memory and replicated to two nodes for fault-tolerance. This sequence of RDDs is collectively called an InputDStream.
-(ii) Data received by InputDStreams are processed using DStream operations. Since all data is represented as RDDs and all DStream operations as RDD operations, data is automatically recovered in the event of node failures.  
+Spark Streaming is an extension of the core Spark API that allows enables high-throughput,
+fault-tolerant stream processing of live data streams. Data can be ingested from many sources
+like Kafka, Flume, Twitter, ZeroMQ or plain old TCP sockets and be processed using complex
+algorithms expressed with high-level functions like `map`, `reduce`, `join` and `window`.
+Finally, processed data can be pushed out to filesystems, databases,
+and live dashboards. In fact, you can apply Spark's in-built
+[machine learning](mllib-guide.html) algorithms, and
+[graph processing](graphx-programming-guide.html) algorithms on data streams.
+
+<p style="text-align: center;">
+  <img
+    src="img/streaming-arch.png"
+    title="Spark Streaming architecture"
+    alt="Spark Streaming"
+    width="70%"
+  />
+</p>
+
+Internally, it works as follows. Spark Streaming receives live input data streams and divides
+the data into batches, which are then processed by the Spark engine to generate the final
+stream of results in batches.
+
+<p style="text-align: center;">
+  <img src="img/streaming-flow.png"
+       title="Spark Streaming data flow"
+       alt="Spark Streaming"
+       width="70%" />
+</p>
+
+Spark Streaming provides a high-level abstraction called *discretized stream* or *DStream*,
+which represents a continuous stream of data. DStreams can be created either from input data
+stream from sources such as Kafka and Flume, or by applying high-level
+operations on other DStreams. Internally, a DStream is represented as a sequence of
+[RDDs](api/core/index.html#org.apache.spark.rdd.RDD).
+
+This guide shows you how to start writing Spark Streaming programs with DStreams. You can
+write Spark Streaming programs in Scala or Java, both of which are presented in this guide. You
+will find tabs throughout this guide that let you choose between Scala and Java
+code snippets.
+
+***************************************************************************************************  
+
+# A Quick Example
+Before we go into the details of how to write your own Spark Streaming program,
+let's take a quick look at what a simple Spark Streaming program looks like. Let's say we want to
+count the number of words in text data received from a data server listening on a TCP
+socket. All you need to
+do is as follows.
+
+<div class="codetabs">
+<div data-lang="scala"  markdown="1" >
+
+First, we create a
+[StreamingContext](api/streaming/index.html#org.apache.spark.streaming.StreamingContext) object,
+which is the main entry point for all streaming
+functionality. Besides Spark's configuration, we specify that any DStream will be processed
+in 1 second batches.
 
-This guide shows some how to start programming with DStreams. 
-
-# Linking with Spark Streaming
+{% highlight scala %}
+// Create a StreamingContext with a SparkConf configuration
+val ssc = new StreamingContext(sparkConf, Seconds(1))
+{% endhighlight %}
 
-Add the following SBT or Maven dependency to your project to use Spark Streaming:
+Using this context, we then create a new DStream
+by specifying the IP address and port of the data server.
 
-    groupId = org.apache.spark
-    artifactId = spark-streaming_{{site.SCALA_VERSION}}
-    version = {{site.SPARK_VERSION}}
+{% highlight scala %}
+// Create a DStream that will connect to serverIP:serverPort
+val lines = ssc.socketTextStream(serverIP, serverPort)
+{% endhighlight %}
 
-# Initializing Spark Streaming
-The first thing a Spark Streaming program must do is create a `StreamingContext` object, which tells Spark how to access a cluster. A `StreamingContext` can be created by using
+This `lines` DStream represents the stream of data that will be received from the data
+server. Each record in this DStream is a line of text. Next, we want to split the lines by
+space into words.
 
 {% highlight scala %}
-new StreamingContext(master, appName, batchDuration, [sparkHome], [jars])
+// Split each line into words
+val words = lines.flatMap(_.split(" "))
 {% endhighlight %}
 
-The `master` parameter is a standard [Spark cluster URL](scala-programming-guide.html#master-urls) and can be "local" for local testing. The `appName` is a name of your program, which will be shown on your cluster's web UI. The `batchDuration` is the size of the batches (as explained earlier). This must be set carefully such that the cluster can keep up with the processing of the data streams. Start with something conservative like 5 seconds. See the [Performance Tuning](#setting-the-right-batch-size) section for a detailed discussion. Finally, `sparkHome` and `jars` are necessary when running on a cluster to specify the location of your code, as described in the [Spark programming guide](scala-programming-guide.html#deploying-code-on-a-cluster).
+`flatMap` is a one-to-many DStream operation that creates a new DStream by
+generating multiple new records from each record int the source DStream. In this case,
+each line will be split into multiple words and the stream of words is represented as the
+`words` DStream.  Next, we want to count these words.
 
-This constructor creates a SparkContext for your job as well, which can be accessed with `streamingContext.sparkContext`.
+{% highlight scala %}
+// Count each word in each batch
+val pairs = words.map(word => (word, 1))
+val wordCounts = pairs.reduceByKey(_ + _)
+
+// Print a few of the counts to the console
+wordCount.print()
+{% endhighlight %}
 
+The `words` DStream is further mapped (one-to-one transformation) to a DStream of `(word,
+1)` pairs, which is then reduced to get the frequency of words in each batch of data.
+Finally, `wordCounts.print()` will print a few of the counts generated every second.
 
-# Attaching Input Sources - InputDStreams
-The StreamingContext is used to creating InputDStreams from input sources:
+Note that when these lines are executed, Spark Streaming only sets up the computation it
+will perform when it is started, and no real processing has started yet. To start the processing
+after all the transformations have been setup, we finally call
 
 {% highlight scala %}
-// Assuming ssc is the StreamingContext
-ssc.textFileStream(directory)      // Creates a stream by monitoring and processing new files in a HDFS directory
-ssc.socketStream(hostname, port)   // Creates a stream that uses a TCP socket to read data from hostname:port
+ssc.start()             // Start the computation
+ssc.awaitTermination()  // Wait for the computation to terminate
+{% endhighlight %}
+
+The complete code can be found in the Spark Streaming example
+[NetworkWordCount]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/scala/org/apache/spark/streaming/examples/NetworkWordCount.scala).
+<br>
+
+</div>
+<div data-lang="java" markdown="1">
+
+First, we create a
+[JavaStreamingContext](api/streaming/index.html#org.apache.spark.streaming.api.java.JavaStreamingContext) object,
+which is the main entry point for all streaming
+functionality. Besides Spark's configuration, we specify that any DStream would be processed
+in 1 second batches.
+
+{% highlight java %}
+// Create a StreamingContext with a SparkConf configuration
+JavaStreamingContext jssc = StreamingContext(sparkConf, new Duration(1000))
+{% endhighlight %}
+
+Using this context, we then create a new DStream
+by specifying the IP address and port of the data server.
+
+{% highlight java %}
+// Create a DStream that will connect to serverIP:serverPort
+JavaDStream<String> lines = jssc.socketTextStream(serverIP, serverPort);
+{% endhighlight %}
+
+This `lines` DStream represents the stream of data that will be received from the data
+server. Each record in this stream is a line of text. Then, we want to split the the lines by
+space into words.
+
+{% highlight java %}
+// Split each line into words
+JavaDStream<String> words = lines.flatMap(
+  new FlatMapFunction<String, String>() {
+    @Override public Iterable<String> call(String x) {
+      return Lists.newArrayList(x.split(" "));
+    }
+  });
+{% endhighlight %}
+
+`flatMap` is a DStream operation that creates a new DStream by
+generating multiple new records from each record in the source DStream. In this case,
+each line will be split into multiple words and the stream of words is represented as the
+`words` DStream. Note that we defined the transformation using a
+[FlatMapFunction](api/core/index.html#org.apache.spark.api.java.function.FlatMapFunction) object.
+As we will discover along the way, there are a number of such convenience classes in the Java API
+that help define DStream transformations.
+
+Next, we want to count these words.
+
+{% highlight java %}
+// Count each word in each batch
+JavaPairDStream<String, Integer> pairs = words.map(
+  new PairFunction<String, String, Integer>() {
+    @Override public Tuple2<String, Integer> call(String s) throws Exception {
+      return new Tuple2<String, Integer>(s, 1);
+    }
+  });
+JavaPairDStream<String, Integer> wordCounts = pairs.reduceByKey(
+  new Function2<Integer, Integer, Integer>() {
+    @Override public Integer call(Integer i1, Integer i2) throws Exception {
+      return i1 + i2;
+    }
+  });
+wordCount.print();     // Print a few of the counts to the console
+{% endhighlight %}
+
+The `words` DStream is further mapped (one-to-one transformation) to a DStream of `(word,
+1)` pairs, using a [PairFunction](api/core/index.html#org.apache.spark.api.java.function.PairFunction)
+object. Then, it is reduced to get the frequency of words in each batch of data,
+using a [Function2](api/core/index.html#org.apache.spark.api.java.function.Function2) object.
+Finally, `wordCounts.print()` will print a few of the counts generated every second.
+
+Note that when these lines are executed, Spark Streaming only sets up the computation it
+will perform when it is started, and no real processing has started yet. To start the processing
+after all the transformations have been setup, we finally call
+
+{% highlight java %}
+jssc.start();              // Start the computation
+jssc.awaitTermination();   // Wait for the computation to terminate
+{% endhighlight %}
+
+The complete code can be found in the Spark Streaming example
+[JavaNetworkWordCount]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/java/org/apache/spark/streaming/examples/JavaNetworkWordCount.java).
+<br>
+
+</div>
+</div>
+
+If you have already [downloaded](index.html#downloading) and [built](index.html#building) Spark,
+you can run this example as follows. You will first need to run Netcat
+(a small utility found in most Unix-like systems) as a data server by using
+
+{% highlight bash %}
+$ nc -lk 9999
+{% endhighlight %}
+
+Then, in a different terminal, you can start the example by using
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+{% highlight bash %}
+$ ./bin/run-example org.apache.spark.streaming.examples.NetworkWordCount local[2] localhost 9999
+{% endhighlight %}
+</div>
+<div data-lang="java" markdown="1">
+{% highlight bash %}
+$ ./bin/run-example org.apache.spark.streaming.examples.JavaNetworkWordCount local[2] localhost 9999
+{% endhighlight %}
+</div>
+</div>
+
+
+Then, any lines typed in the terminal running the netcat server will be counted and printed on
+screen every second. It will look something like this.
+
+<table width="100%">
+    <td>
+{% highlight bash %}
+# TERMINAL 1:
+# Running Netcat
+
+$ nc -lk 9999
+
+hello world
+
+
+
+...
+{% endhighlight %}
+    </td>
+    <td width="2%"></td>
+    <td>
+{% highlight bash %}
+# TERMINAL 2: RUNNING NetworkWordCount or JavaNetworkWordCount
+
+$ ./bin/run-example org.apache.spark.streaming.examples.NetworkWordCount local[2] localhost 9999
+...
+-------------------------------------------
+Time: 1357008430000 ms
+-------------------------------------------
+(hello,1)
+(world,1)
+...
 {% endhighlight %}
+    </td>
+</table>
+
+***************************************************************************************************  
+
+# Basics
+
+Next, we move beyond the simple example and elaborate on the basics of Spark Streaming that you
+need to know to write your streaming applications.
+
+## Linking
 
-We also provide a input streams for Kafka, Flume, Akka actor, etc. For a complete list of input streams, take a look at the [StreamingContext API documentation](api/streaming/index.html#org.apache.spark.streaming.StreamingContext).
+To write your own Spark Streaming program, you will have to add the following dependency to your
+ SBT or Maven project:
 
+    groupId = org.apache.spark
+    artifactId = spark-streaming_{{site.SCALA_BINARY_VERSION}}
+    version = {{site.SPARK_VERSION}}
+
+For ingesting data from sources like Kafka and Flume that are not present in the Spark
+Streaming core
+ API, you will have to add the corresponding
+artifact `spark-streaming-xyz_{{site.SCALA_BINARY_VERSION}}` to the dependencies. For example,
+some of the common ones are as follows.
+
+
+<table class="table">
+<tr><th>Source</th><th>Artifact</th></tr>
+<tr><td> Kafka </td><td> spark-streaming-kafka_{{site.SCALA_BINARY_VERSION}} </td></tr>
+<tr><td> Flume </td><td> spark-streaming-flume_{{site.SCALA_BINARY_VERSION}} </td></tr>
+<tr><td> Twitter </td><td> spark-streaming-twitter_{{site.SCALA_BINARY_VERSION}} </td></tr>
+<tr><td> ZeroMQ </td><td> spark-streaming-zeromq_{{site.SCALA_BINARY_VERSION}} </td></tr>
+<tr><td> MQTT </td><td> spark-streaming-mqtt_{{site.SCALA_BINARY_VERSION}} </td></tr>
+<tr><td> </td><td></td></tr>
+</table>
+
+For an up-to-date list, please refer to the
+[Apache repository](http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22org.apache.spark%22%20AND%20v%3A%22{{site.SPARK_VERSION}}%22)
+for the full list of supported sources and artifacts.
+
+## Initializing
 
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+
+To initialize a Spark Streaming program in Scala, a
+[`StreamingContext`](api/streaming/index.html#org.apache.spark.streaming.StreamingContext)
+object has to be created, which is the main entry point of all Spark Streaming functionality.
+A `StreamingContext` object can be created by using
+
+{% highlight scala %}
+new StreamingContext(master, appName, batchDuration, [sparkHome], [jars])
+{% endhighlight %}
+</div>
+<div data-lang="java" markdown="1">
+
+To initialize a Spark Streaming program in Java, a
+[`JavaStreamingContext`](api/streaming/index.html#org.apache.spark.streaming.api.java.JavaStreamingContext)
+object has to be created, which is the main entry point of all Spark Streaming functionality.
+A `JavaStreamingContext` object can be created by using
+
+{% highlight scala %}
+new JavaStreamingContext(master, appName, batchInterval, [sparkHome], [jars])
+{% endhighlight %}
+</div>
+</div>
+
+The `master` parameter is a standard [Spark cluster URL](scala-programming-guide.html#master-urls)
+and can be "local" for local testing. The `appName` is a name of your program,
+which will be shown on your cluster's web UI. The `batchInterval` is the size of the batches,
+as explained earlier. Finally, the last two parameters are needed to deploy your code to a cluster
+ if running in distributed mode, as described in the
+ [Spark programming guide](scala-programming-guide.html#deploying-code-on-a-cluster).
+ Additionally, the underlying SparkContext can be accessed as
+`streamingContext.sparkContext`.
+
+The batch interval must be set based on the latency requirements of your application
+and available cluster resources. See the [Performance Tuning](#setting-the-right-batch-size)
+section for more details.
+
+## DStreams
+*Discretized Stream* or *DStream* is the basic abstraction provided by Spark Streaming.
+It represents a continuous stream of data, either the input data stream received from source,
+or the processed data stream generated by transforming the input stream. Internally,
+it is represented by a continuous sequence of RDDs, which is Spark's abstraction of an immutable,
+distributed dataset. Each RDD in a DStream contains data from a certain interval,
+as shown in the following figure.
+
+<p style="text-align: center;">
+  <img src="img/streaming-dstream.png"
+       title="Spark Streaming data flow"
+       alt="Spark Streaming"
+       width="70%" />
+</p>
+
+Any operation applied on a DStream translates to operations on the underlying RDDs. For example,
+in the [earlier example](#a-quick-example) of converting a stream of lines to words,
+the `flatmap` operation is applied on each RDD in the `lines` DStream to generate the RDDs of the
+ `words` DStream. This is shown the following figure.
+
+<p style="text-align: center;">
+  <img src="img/streaming-dstream-ops.png"
+       title="Spark Streaming data flow"
+       alt="Spark Streaming"
+       width="70%" />
+</p>
+
+
+These underlying RDD transformations are computed by the Spark engine. The DStream operations
+hide most of these details and provides the developer with higher-level API for convenience.
+These operations are discussed in detail in later sections.
+
+## Input Sources
+
+We have already taken a look at the `streamingContext.socketTextStream(...)` in the [quick
+example](#a-quick-example) which creates a DStream from text
+data received over a TCP socket connection. Besides sockets, the core Spark Streaming API provides
+methods for creating DStreams from files and Akka actors as input sources.
+
+Specifically, for files, the DStream can be created as
+
+<div class="codetabs">
+<div data-lang="scala">
+{% highlight scala %}
+streamingContext.fileStream(dataDirectory)
+{% endhighlight %}
+</div>
+<div data-lang="java">
+{% highlight java %}
+javaStreamingContext.fileStream(dataDirectory);
+{% endhighlight %}
+</div>
+</div>
+
+Spark Streaming will monitor the directory `dataDirectory` for any Hadoop-compatible filesystem
+and process any files created in that directory. Note that
+
+ * The files must have the same data format.
+ * The files must be created in the `dataDirectory` by atomically *moving* or *renaming* them into
+ the data directory.
+ * Once moved the files must not be changed.
+
+For more details on streams from files, Akka actors and sockets,
+see the API documentations of the relevant functions in
+[StreamingContext](api/streaming/index.html#org.apache.spark.streaming.StreamingContext) for
+Scala and [JavaStreamingContext](api/streaming/index.html#org.apache.spark.streaming.api.java.JavaStreamingContext)
+ for Java.
+
+Additional functionality for creating DStreams from sources such as Kafka, Flume, and Twitter
+can be imported by adding the right dependencies as explained in an
+[earlier](#linking) section. To take the
+case of Kafka, after adding the artifact `spark-streaming-kafka_{{site.SCALA_BINARY_VERSION}}` to the
+project dependencies, you can create a DStream from Kafka as
+
+<div class="codetabs">
+<div data-lang="scala">
+{% highlight scala %}
+import org.apache.spark.streaming.kafka._
+KafkaUtils.createStream(streamingContext, kafkaParams, ...)
+{% endhighlight %}
+</div>
+<div data-lang="java">
+{% highlight java %}
+import org.apache.spark.streaming.kafka.*
+KafkaUtils.createStream(javaStreamingContext, kafkaParams, ...);
+{% endhighlight %}
+</div>
+</div>
 
-# DStream Operations
-Data received from the input streams can be processed using _DStream operations_. There are two kinds of operations - _transformations_ and _output operations_. Similar to RDD transformations, DStream transformations operate on one or more DStreams to create new DStreams with transformed data. After applying a sequence of transformations to the input streams, you'll need to call the output operations, which writies data out to an external source. 
+For more details on these additional sources, see the corresponding [API documentation]
+(#where-to-go-from-here). Furthermore, you can also implement your own custom receiver
+for your sources. See the [Custom Receiver Guide](streaming-custom-receivers.html).
 
-## Transformations
+## Operations
+There are two kinds of DStream operations - _transformations_ and _output operations_. Similar to
+RDD transformations, DStream transformations operate on one or more DStreams to create new DStreams
+with transformed data. After applying a sequence of transformations to the input streams, output
+operations need to called, which write data out to an external data sink, such as a filesystem or a
+database.
 
-DStreams support many of the transformations available on normal Spark RDD's:
+### Transformations
+DStreams support many of the transformations available on normal Spark RDD's. Some of the
+common ones are as follows.
 
 <table class="table">
-<tr><th style="width:30%">Transformation</th><th>Meaning</th></tr>
+<tr><th style="width:25%">Transformation</th><th>Meaning</th></tr>
 <tr>
   <td> <b>map</b>(<i>func</i>) </td>
-  <td> Returns a new DStream formed by passing each element of the source DStream through a function <i>func</i>. </td>
-</tr>
-<tr>
-  <td> <b>filter</b>(<i>func</i>) </td>
-  <td> Returns a new DStream formed by selecting those elements of the source DStream on which <i>func</i> returns true. </td>
+  <td> Return a new DStream by passing each element of the source DStream through a
+  function <i>func</i>. </td>
 </tr>
 <tr>
   <td> <b>flatMap</b>(<i>func</i>) </td>
-  <td> Similar to map, but each input item can be mapped to 0 or more output items (so <i>func</i> should return a <code>Seq</code> rather than a single item). </td>
+  <td> Similar to map, but each input item can be mapped to 0 or more output items. </td>
 </tr>
 <tr>
-  <td> <b>mapPartitions</b>(<i>func</i>) </td>
-  <td> Similar to map, but runs separately on each partition (block) of the DStream, so <i>func</i> must be of type
-    Iterator[T] => Iterator[U] when running on an DStream of type T. </td>
+  <td> <b>filter</b>(<i>func</i>) </td>
+  <td> Return a new DStream by selecting only the records of the source DStream on which
+  <i>func</i> returns true. </td>
 </tr>
 <tr>
   <td> <b>repartition</b>(<i>numPartitions</i>) </td>
@@ -78,331 +465,681 @@ DStreams support many of the transformations available on normal Spark RDD's:
 </tr>
 <tr>
   <td> <b>union</b>(<i>otherStream</i>) </td>
-  <td> Return a new DStream that contains the union of the elements in the source DStream and the argument DStream. </td>
+  <td> Return a new DStream that contains the union of the elements in the source DStream and
+  <i>otherDStream</i>. </td>
 </tr>
 <tr>
   <td> <b>count</b>() </td>
-  <td> Returns a new DStream of single-element RDDs by counting the number of elements in each RDD of the source DStream.  </td>
+  <td> Return a new DStream of single-element RDDs by counting the number of elements in each RDD
+   of the source DStream. </td>
 </tr>
 <tr>
   <td> <b>reduce</b>(<i>func</i>) </td>
-  <td> Returns a new DStream of single-element RDDs by aggregating the elements in each RDD of the source DStream using a function <i>func</i> (which takes two arguments and returns one). The function should be associative so that it can be computed in parallel. </td>
+  <td> Return a new DStream of single-element RDDs by aggregating the elements in each RDD of the
+  source DStream using a function <i>func</i> (which takes two arguments and returns one).
+  The function should be associative so that it can be computed in parallel. </td>
 </tr>
 <tr>
   <td> <b>countByValue</b>() </td>
-  <td> When called on a DStream of elements of type K, returns a new DStream of (K, Long) pairs where the value of each key is its frequency in each RDD of the source DStream.  </td>
-</tr>
-<tr>
-  <td> <b>groupByKey</b>([<i>numTasks</i>]) </td>
-  <td> When called on a DStream of (K, V) pairs, returns a new DStream of (K, Seq[V]) pairs by grouping together all the values of each key in the RDDs of the source DStream. <br />
-  <b>Note:</b> By default, this uses Spark's default number of parallel tasks (2 for local machine, 8 for a cluster) to do the grouping. You can pass an optional <code>numTasks</code> argument to set a different number of tasks.
-</td>
+  <td> When called on a DStream of elements of type K, return a new DStream of (K, Long) pairs
+  where the value of each key is its frequency in each RDD of the source DStream.  </td>
 </tr>
 <tr>
   <td> <b>reduceByKey</b>(<i>func</i>, [<i>numTasks</i>]) </td>
-  <td> When called on a DStream of (K, V) pairs, returns a new DStream of (K, V) pairs where the values for each key are aggregated using the given reduce function. Like in <code>groupByKey</code>, the number of reduce tasks is configurable through an optional second argument. </td>
+  <td> When called on a DStream of (K, V) pairs, return a new DStream of (K, V) pairs where the
+  values for each key are aggregated using the given reduce function. <b>Note:</b> By default,
+  this uses Spark's default number of parallel tasks (2 for local machine, 8 for a cluster) to
+  do the grouping. You can pass an optional <code>numTasks</code> argument to set a different
+  number of tasks.</td>
 </tr>
 <tr>
   <td> <b>join</b>(<i>otherStream</i>, [<i>numTasks</i>]) </td>
-  <td> When called on two DStreams of (K, V) and (K, W) pairs, returns a new DStream of (K, (V, W)) pairs with all pairs of elements for each key. </td>
+  <td> When called on two DStreams of (K, V) and (K, W) pairs, return a new DStream of (K, (V, W))
+  pairs with all pairs of elements for each key. </td>
 </tr>
 <tr>
   <td> <b>cogroup</b>(<i>otherStream</i>, [<i>numTasks</i>]) </td>
-  <td> When called on DStream of (K, V) and (K, W) pairs, returns a new DStream of (K, Seq[V], Seq[W]) tuples.</td>
+  <td> When called on DStream of (K, V) and (K, W) pairs, return a new DStream of
+  (K, Seq[V], Seq[W]) tuples.</td>
 </tr>
 <tr>
   <td> <b>transform</b>(<i>func</i>) </td>
-  <td> Returns a new DStream by applying func (a RDD-to-RDD function) to every RDD of the stream. This can be used to do arbitrary RDD operations on the DStream. </td>
+  <td> Return a new DStream by applying a RDD-to-RDD function to every RDD of the source DStream.
+  This can be used to do arbitrary RDD operations on the DStream. </td>
 </tr>
 <tr>
   <td> <b>updateStateByKey</b>(<i>func</i>) </td>
-  <td> Return a new "state" DStream where the state for each key is updated by applying the given function on the previous state of the key and the new values of each key. This can be used to track session state by using the session-id as the key and updating the session state as new data is received.</td>
-</tr>
-
-</table>
-
-Spark Streaming features windowed computations, which allow you to apply transformations over a sliding window of data. All window functions take a <i>windowDuration</i>, which represents the width of the window and a <i>slideTime</i>, which represents the frequency during which the window is calculated.
-
-<table class="table">
-<tr><th style="width:30%">Transformation</th><th>Meaning</th></tr>
-<tr>
-  <td> <b>window</b>(<i>windowDuration</i>, <i>slideDuration</i>) </td>
-  <td> Return a new DStream which is computed based on windowed batches of the source DStream. <i>windowDuration</i> is the width of the window and <i>slideTime</i> is the frequency during which the window is calculated. Both times must be multiples of the batch interval.
-  </td>
-</tr>
-<tr>
-  <td> <b>countByWindow</b>(<i>windowDuration</i>, <i>slideDuration</i>) </td>
-  <td> Return a sliding count of elements in the stream. <i>windowDuration</i> and <i>slideDuration</i> are exactly as defined in <code>window()</code>.
-  </td>
-</tr>
-<tr>
-  <td> <b>reduceByWindow</b>(<i>func</i>, <i>windowDuration</i>, <i>slideDuration</i>) </td>
-  <td> Return a new single-element stream, created by aggregating elements in the stream over a sliding interval using <i>func</i>. The function should be associative so that it can be computed correctly in parallel. <i>windowDuration</i> and <i>slideDuration</i> are exactly as defined in <code>window()</code>.
-  </td>
-</tr>
-<tr>
-  <td> <b>groupByKeyAndWindow</b>(<i>windowDuration</i>, <i>slideDuration</i>, [<i>numTasks</i>])
-  </td>
-  <td> When called on a DStream of (K, V) pairs, returns a new DStream of (K, Seq[V]) pairs by grouping together values of each key over batches in a sliding window. <br />
-<b>Note:</b> By default, this uses Spark's default number of parallel tasks (2 for local machine, 8 for a cluster) to do the grouping. You can pass an optional <code>numTasks</code> argument to set a different number of tasks.</td>
-</tr>
-<tr>
-  <td> <b>reduceByKeyAndWindow</b>(<i>func</i>, <i>windowDuration</i>, <i>slideDuration</i>, [<i>numTasks</i>]) </td>
-  <td> When called on a DStream of (K, V) pairs, returns a new DStream of (K, V) pairs where the values for each key are aggregated using the given reduce function <i>func</i> over batches in a sliding window. Like in <code>groupByKeyAndWindow</code>, the number of reduce tasks is configurable through an optional second argument.
- <i>windowDuration</i> and <i>slideDuration</i> are exactly as defined in <code>window()</code>.
-</td> 
-</tr>
-<tr>
-  <td> <b>reduceByKeyAndWindow</b>(<i>func</i>, <i>invFunc</i>, <i>windowDuration</i>, <i>slideDuration</i>, [<i>numTasks</i>]) </td>
-  <td> A more efficient version of the above <code>reduceByKeyAndWindow()</code> where the reduce value of each window is calculated
-  incrementally using the reduce values of the previous window. This is done by reducing the new data that enter the sliding window, and "inverse reducing" the old data that leave the window. An example would be that of "adding" and "subtracting" counts of keys as the window slides. However, it is applicable to only "invertible reduce functions", that is, those reduce functions which have a corresponding "inverse reduce" function (taken as parameter <i>invFunc</i>. Like in <code>groupByKeyAndWindow</code>, the number of reduce tasks is configurable through an optional second argument.
- <i>windowDuration</i> and <i>slideDuration</i> are exactly as defined in <code>window()</code>.
-</td>
-</tr>
-<tr>
-  <td> <b>countByValueAndWindow</b>(<i>windowDuration</i>, <i>slideDuration</i>, [<i>numTasks</i>]) </td>
-  <td> When called on a DStream of (K, V) pairs, returns a new DStream of (K, Long) pairs where the value of each key is its frequency within a sliding window. Like in <code>groupByKeyAndWindow</code>, the number of reduce tasks is configurable through an optional second argument.
- <i>windowDuration</i> and <i>slideDuration</i> are exactly as defined in <code>window()</code>.
-</td>
+  <td> Return a new "state" DStream where the state for each key is updated by applying the
+  given function on the previous state of the key and the new values for the key. This can be
+  used to maintain arbitrary state data for each ket.</td>
 </tr>
+<tr><td></td><td></td></tr>
 </table>
 
-A complete list of DStream operations is available in the API documentation of [DStream](api/streaming/index.html#org.apache.spark.streaming.DStream) and [PairDStreamFunctions](api/streaming/index.html#org.apache.spark.streaming.PairDStreamFunctions).
+The last two transformations are worth highlighting again.
 
-## Output Operations
-When an output operator is called, it triggers the computation of a stream. Currently the following output operators are defined:
+<h4>UpdateStateByKey Operation</h4>
 
-<table class="table">
-<tr><th style="width:30%">Operator</th><th>Meaning</th></tr>
-<tr>
-  <td> <b>foreach</b>(<i>func</i>) </td>
-  <td> The fundamental output operator. Applies a function, <i>func</i>, to each RDD generated from the stream. This function should have side effects, such as printing output, saving the RDD to external files, or writing it over the network to an external system. </td>
-</tr>
+The `updateStateByKey` operation allows
+you to main arbitrary stateful computation, where you want to maintain some state data and
+continuously update it with new information. To use this, you will have to do two steps.
 
-<tr>
-  <td> <b>print</b>() </td>
-  <td> Prints first ten elements of every batch of data in a DStream on the driver. </td>
-</tr>
+1. Define the state - The state can be of arbitrary data type.
+1. Define the state update function - Specify with a function how to update the state using the
+previous state and the new values from input stream.
 
-<tr>
-  <td> <b>saveAsObjectFiles</b>(<i>prefix</i>, [<i>suffix</i>]) </td>
-  <td> Save this DStream's contents as a <code>SequenceFile</code> of serialized objects. The file name at each batch interval is generated based on <i>prefix</i> and <i>suffix</i>: <i>"prefix-TIME_IN_MS[.suffix]"</i>.
-  </td>
-</tr>
+Let's illustrate this with an example. Say you want to maintain a running count of each word
+seen in a text data stream. Here, the running count is the state and it is an integer. We
+define the update function as
 
-<tr>
-  <td> <b>saveAsTextFiles</b>(<i>prefix</i>, [<i>suffix</i>]) </td>
-  <td> Save this DStream's contents as a text files. The file name at each batch interval is generated based on <i>prefix</i> and <i>suffix</i>: <i>"prefix-TIME_IN_MS[.suffix]"</i>. </td>
-</tr>
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
 
-<tr>
-  <td> <b>saveAsHadoopFiles</b>(<i>prefix</i>, [<i>suffix</i>]) </td>
-  <td> Save this DStream's contents as a Hadoop file. The file name at each batch interval is generated based on <i>prefix</i> and <i>suffix</i>: <i>"prefix-TIME_IN_MS[.suffix]"</i>. </td>
-</tr>
+{% highlight scala %}
+def updateFunction(newValues: Seq[Int], runningCount: Option[Int]): Option[Int] = {
+    val newCount = ...  // add the new values with the previous running count to get the new count
+    Some(newCount)
+}
+{% endhighlight %}
 
-</table>
+This is applied on a DStream containing words (say, the `pairs` DStream containing `(word,
+1)` pairs in the [earlier example](#a-quick-example)).
 
-# Starting the Streaming computation
-All the above DStream operations are completely lazy, that is, the operations will start executing only after the context is started by using
 {% highlight scala %}
-ssc.start()
+val runningCounts = pairs.updateStateByKey[Int](updateFunction _)
 {% endhighlight %}
 
-Conversely, the computation can be stopped by using
-{% highlight scala %}
-ssc.stop()
+</div>
+<div data-lang="java" markdown="1">
+
+{% highlight java %}
+Function2<List<Integer>, Optional<Integer>, Optional<Integer>> updateFunction =
+  new Function2<List<Integer>, Optional<Integer>, Optional<Integer>>() {
+    @Override public Optional<Integer> call(List<Integer> values, Optional<Integer> state) {
+      Integer newSum = ...  // add the new values with the previous running count to get the new count
+      return Optional.of(newSum)
+    }
+  }
 {% endhighlight %}
 
-# Example
-A simple example to start off is the [NetworkWordCount](https://github.com/apache/incubator-spark/tree/master/examples/src/main/scala/org/apache/spark/streaming/examples/NetworkWordCount.scala). This example counts the words received from a network server every second. Given below is the relevant sections of the source code. You can find the full source code in `<Spark repo>/streaming/src/main/scala/org/apache/spark/streaming/examples/NetworkWordCount.scala` .
+This is applied on a DStream containing words (say, the `pairs` DStream containing `(word,
+1)` pairs in the [quick example](#a-quick-example)).
 
-{% highlight scala %}
-import org.apache.spark.streaming.{Seconds, StreamingContext}
-import StreamingContext._
-...
+{% highlight java %}
+JavaPairDStream<String, Integer> runningCounts = pairs.updateStateByKey(updateFunction);
+{% endhighlight %}
 
-// Create the context and set up a network input stream to receive from a host:port
-val ssc = new StreamingContext(args(0), "NetworkWordCount", Seconds(1))
-val lines = ssc.socketTextStream(args(1), args(2).toInt)
+</div>
+</div>
 
-// Split the lines into words, count them, and print some of the counts on the master
-val words = lines.flatMap(_.split(" "))
-val wordCounts = words.map(x => (x, 1)).reduceByKey(_ + _)
-wordCounts.print()
+The update function will be called for each word, with `newValues` having a sequence of 1's (from
+the `(word, 1)` pairs) and the `runningCount` having the previous count. For the complete
+Scala code, take a look at the example
+[StatefulNetworkWordCount]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/scala/org/apache/spark/streaming/examples/StatefulNetworkWordCount.scala).
 
-// Start the computation
-ssc.start()
-{% endhighlight %}
+<h4>Transform Operation</h4>
 
-The `socketTextStream` returns a DStream of lines received from a TCP socket-based source. The `lines` DStream is _transformed_ into a DStream using the `flatMap` operation, where each line is split into words. This `words` DStream is then mapped to a DStream of `(word, 1)` pairs, which is finally reduced to get the word counts. `wordCounts.print()` will print 10 of the counts generated every second.
+The `transform` operation (along with its variations like `transformWith`) allows
+arbitrary RDD-to-RDD functions to be applied on a DStream. It can be used to apply any RDD
+operation that is not exposed in the DStream API.
+For example, the functionality of joining every batch in a data stream
+with another dataset is not directly exposed in the DStream API. However,
+you can easily use `transform` to do this. This enables very powerful possibilities. For example,
+if you want to do real-time data cleaning by joining the input data stream with precomputed
+spam information (maybe generated with Spark as well) and then filtering based on it.
 
-To run this example on your local machine, you need to first run a Netcat server by using
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
 
-{% highlight bash %}
-$ nc -lk 9999
+{% highlight scala %}
+val spamInfoRDD = sparkContext.hadoopFile(...) // RDD containing spam information
+
+val cleanedDStream = inputDStream.transform(rdd => {
+  rdd.join(spamInfoRDD).filter(...) // join data stream with spam information to do data cleaning
+  ...
+})
 {% endhighlight %}
 
-Then, in a different terminal, you can start NetworkWordCount by using
+</div>
+<div data-lang="java" markdown="1">
 
-{% highlight bash %}
-$ ./run-example org.apache.spark.streaming.examples.NetworkWordCount local[2] localhost 9999
+{% highlight java %}
+// RDD containing spam information
+JavaPairRDD<String, Double> spamInfoRDD = javaSparkContext.hadoopFile(...);
+
+JavaPairDStream<String, Integer> cleanedDStream = inputDStream.transform(
+  new Function<JavaPairRDD<String, Integer>, JavaPairRDD<String, Integer>>() {
+    @Override public JavaPairRDD<String, Integer> call(JavaPairRDD<String, Integer> rdd) throws Exception {
+      rdd.join(spamInfoRDD).filter(...) // join data stream with spam information to do data cleaning
+      ...
+    }
+  });
 {% endhighlight %}
 
-This will make NetworkWordCount connect to the netcat server. Any lines typed in the terminal running the netcat server will be counted and printed on screen.
+</div>
+</div>
 
-<table>
-<td>
-{% highlight bash %}
-# TERMINAL 1
-# RUNNING NETCAT
+In fact, you can also use [machine learning](mllib-guide.html) and
+[graph computation](graphx-programming-guide.html) algorithms in the `transform` method.
 
-$ nc -lk 9999
-hello world
+<h4>Window Operations</h4>
 
+Finally, Spark Streaming also provides *windowed computations*, which allow you to apply
+transformations over a sliding window of data. This following figure illustrates this sliding
+window.
 
+<p style="text-align: center;">
+  <img src="img/streaming-dstream-window.png"
+       title="Spark Streaming data flow"
+       alt="Spark Streaming"
+       width="60%" />
+</p>
 
+As shown in the figure, every time the window *slides* over a source DStream,
+the source RDDs that fall within the window are combined and operated upon to produce the
+RDDs of the windowed DStream. In this specific case, the operation is applied over last 3 time
+units of data, and slides by 2 time units. This shows that any window-based operation needs to
+specify two parameters.
 
+ * <i>window length</i> - The duration of the window (3 in the figure)
+ * <i>slide interval</i> - The interval at which the window-based operation is performed (2 in
+ the figure).
 
-...
+These two parameters must be multiples of the batch interval of the source DStream (1 in the
+figure).
+
+Let's illustrate the window operations with an example. Say, you want to extend the
+[earlier example](#a-quick-example) by generating word counts over last 30 seconds of data,
+every 10 seconds. To do this, we have to apply the `reduceByKey` operation on the `pairs` DStream of
+`(word, 1)` pairs over the last 30 seconds of data. This is done using the
+operation `reduceByKeyAndWindow`.
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+
+{% highlight scala %}
+// Reduce last 30 seconds of data, every 10 seconds
+val windowedWordCounts = pairs.reduceByKeyAndWindow(_ + _, Seconds(30), Seconds(10))
 {% endhighlight %}
-</td>
-<td>
-{% highlight bash %}
-# TERMINAL 2: RUNNING NetworkWordCount
-...
-2012-12-31 18:47:10,446 INFO SparkContext: Job finished: run at ThreadPoolExecutor.java:886, took 0.038817 s
--------------------------------------------
-Time: 1357008430000 ms
--------------------------------------------
-(hello,1)
-(world,1)
 
-2012-12-31 18:47:10,447 INFO JobManager: Total delay: 0.44700 s for job 8 (execution: 0.44000 s)
-...
+</div>
+<div data-lang="java" markdown="1">
+
+{% highlight java %}
+// Reduce function adding two integers, defined separately for clarity
+Function2<Integer, Integer, Integer> reduceFunc = new Function2<Integer, Integer, Integer>() {
+  @Override public Integer call(Integer i1, Integer i2) throws Exception {
+    return i1 + i2;
+  }
+};
+
+// Reduce last 30 seconds of data, every 10 seconds
+JavaPairDStream<String, Integer> windowedWordCounts = pair.reduceByKeyAndWindow(reduceFunc, new Duration(30000), new Duration(10000));
 {% endhighlight %}
-</td>
-</table>
 
-You can find more examples in `<Spark repo>/streaming/src/main/scala/org/apache/spark/streaming/examples/`. They can be run in the similar manner using `./run-example org.apache.spark.streaming.examples....` . Executing without any parameter would give the required parameter list. Further explanation to run them can be found in comments in the files.
+</div>
+</div>
 
-# DStream Persistence
-Similar to RDDs, DStreams also allow developers to persist the stream's data in memory. That is, using `persist()` method on a DStream would automatically persist every RDD of that DStream in memory. This is useful if the data in the DStream will be computed multiple times (e.g., multiple operations on the same data). For window-based operations like `reduceByWindow` and `reduceByKeyAndWindow` and state-based operations like `updateStateByKey`, this is implicitly true. Hence, DStreams generated by window-based operations are automatically persisted in memory, without the developer calling `persist()`.
+Some of the common window-based operations are as follows. All of these operations take the
+said two parameters - <i>windowLength</i> and <i>slideInterval</i>.
 
-For input streams that receive data from the network (that is, subclasses of NetworkInputDStream like FlumeInputDStream and KafkaInputDStream), the default persistence level is set to replicate the data to two nodes for fault-tolerance.
+<table class="table">
+<tr><th style="width:25%">Transformation</th><th>Meaning</th></tr>
+<tr>
+  <td> <b>window</b>(<i>windowLength</i>, <i>slideInterval</i>) </td>
+  <td> Return a new DStream which is computed based on windowed batches of the source DStream.
+  </td>
+</tr>
+<tr>
+  <td> <b>countByWindow</b>(<i>windowLength</i>, <i>slideInterval</i>) </td>
+  <td> Return a sliding window count of elements in the stream.
+  </td>
+</tr>
+<tr>
+  <td> <b>reduceByWindow</b>(<i>func</i>, <i>windowLength</i>, <i>slideInterval</i>) </td>
+  <td> Return a new single-element stream, created by aggregating elements in the stream over a
+  sliding interval using <i>func</i>. The function should be associative so that it can be computed
+  correctly in parallel.
+  </td>
+</tr>
+<tr>
+  <td> <b>reduceByKeyAndWindow</b>(<i>func</i>, <i>windowLength</i>, <i>slideInterval</i>,
+  [<i>numTasks</i>]) </td>
+  <td> When called on a DStream of (K, V) pairs, returns a new DStream of (K, V)
+  pairs where the values for each key are aggregated using the given reduce function <i>func</i>
+  over batches in a sliding window. <b>Note:</b> By default, this uses Spark's default number of
+  parallel tasks (2 for local machine, 8 for a cluster) to do the grouping. You can pass an optional
+   <code>numTasks</code> argument to set a different number of tasks.
+  </td>
+</tr>
+<tr>
+  <td> <b>reduceByKeyAndWindow</b>(<i>func</i>, <i>invFunc</i>, <i>windowLength</i>,
+  <i>slideInterval</i>, [<i>numTasks</i>]) </td>
+  <td> A more efficient version of the above <code>reduceByKeyAndWindow()</code> where the reduce
+  value of each window is calculated incrementally using the reduce values of the previous window.
+  This is done by reducing the new data that enter the sliding window, and "inverse reducing" the
+  old data that leave the window. An example would be that of "adding" and "subtracting" counts
+  of keys as the window slides. However, it is applicable to only "invertible reduce functions",
+  that is, those reduce functions which have a corresponding "inverse reduce" function (taken as
+  parameter <i>invFunc</i>. Like in <code>reduceByKeyAndWindow</code>, the number of reduce tasks
+  is configurable through an optional argument.
+</td>
+</tr>
+<tr>
+  <td> <b>countByValueAndWindow</b>(<i>windowLength</i>,
+  <i>slideInterval</i>, [<i>numTasks</i>]) </td>
+  <td> When called on a DStream of (K, V) pairs, returns a new DStream of (K, Long) pairs where the
+  value of each key is its frequency within a sliding window. Like in
+  <code>reduceByKeyAndWindow</code>, the number of reduce tasks is configurable through an
+  optional argument.
+</td>
+</tr>
+<tr><td></td><td></td></tr>
+</table>
 
-Note that, unlike RDDs, the default persistence level of DStreams keeps the data serialized in memory. This is further discussed in the [Performance Tuning](#memory-tuning) section. More information on different persistence levels can be found in [Spark Programming Guide](scala-programming-guide.html#rdd-persistence).
+### Output Operations
+When an output operator is called, it triggers the computation of a stream. Currently the following
+output operators are defined:
 
-# RDD Checkpointing within DStreams
-A _stateful operation_ is one which operates over multiple batches of data. This includes all window-based operations and the `updateStateByKey` operation. 
+<table class="table">
+<tr><th style="width:30%">Output Operation</th><th>Meaning</th></tr>
+<tr>
+  <td> <b>print</b>() </td>
+  <td> Prints first ten elements of every batch of data in a DStream on the driver. </td>
+</tr>
+<tr>
+  <td> <b>foreachRDD</b>(<i>func</i>) </td>
+  <td> The fundamental output operator. Applies a function, <i>func</i>, to each RDD generated from
+  the stream. This function should have side effects, such as printing output, saving the RDD to
+  external files, or writing it over the network to an external system. </td>
+</tr>
+<tr>
+  <td> <b>saveAsObjectFiles</b>(<i>prefix</i>, [<i>suffix</i>]) </td>
+  <td> Save this DStream's contents as a <code>SequenceFile</code> of serialized objects. The file
+  name at each batch interval is generated based on <i>prefix</i> and
+  <i>suffix</i>: <i>"prefix-TIME_IN_MS[.suffix]"</i>.
+  </td>
+</tr>
+<tr>
+  <td> <b>saveAsTextFiles</b>(<i>prefix</i>, [<i>suffix</i>]) </td>
+  <td> Save this DStream's contents as a text files. The file name at each batch interval is
+  generated based on <i>prefix</i> and <i>suffix</i>: <i>"prefix-TIME_IN_MS[.suffix]"</i>. </td>
+</tr>
+<tr>
+  <td> <b>saveAsHadoopFiles</b>(<i>prefix</i>, [<i>suffix</i>]) </td>
+  <td> Save this DStream's contents as a Hadoop file. The file name at each batch interval is
+  generated based on <i>prefix</i> and <i>suffix</i>: <i>"prefix-TIME_IN_MS[.suffix]"</i>. </td>
+</tr>
+<tr><td></td><td></td></tr>
+</table>
 
-Because stateful operations have a dependency on previous batches of data, they continuously accumulate metadata over time. To clear this metadata, streaming supports periodic _checkpointing_ by saving intermediate data to HDFS. Note that checkpointing also incurs the cost of saving to HDFS which may cause the corresponding batch to take longer to process. Hence, the interval of checkpointing needs to be set carefully. At small batch sizes (say 1 second), checkpointing every batch may significantly reduce operation throughput. Conversely, checkpointing too slowly causes the lineage and task sizes to grow which may have detrimental effects. Typically, a checkpoint interval of 5 - 10 times of sliding interval of a DStream is good setting to try.
 
-To enable checkpointing, the developer has to provide the HDFS path to which RDD will be saved. This is done by using
+The complete list of DStream operations is available in the API documentation. For the Scala API,
+see [DStream](api/streaming/index.html#org.apache.spark.streaming.dstream.DStream)
+and [PairDStreamFunctions](api/streaming/index.html#org.apache.spark.streaming.dstream.PairDStreamFunctions).
+For the Java API, see [JavaDStream](api/streaming/index.html#org.apache.spark.streaming.api.java.dstream.DStream)
+and [JavaPairDStream](api/streaming/index.html#org.apache.spark.streaming.api.java.JavaPairDStream).
+Specifically for the Java API, see [Spark's Java programming guide](java-programming-guide.html)
+for more information.
+
+## Persistence
+Similar to RDDs, DStreams also allow developers to persist the stream's data in memory. That is,
+using `persist()` method on a DStream would automatically persist every RDD of that DStream in
+memory. This is useful if the data in the DStream will be computed multiple times (e.g., multiple
+operations on the same data). For window-based operations like `reduceByWindow` and
+`reduceByKeyAndWindow` and state-based operations like `updateStateByKey`, this is implicitly true.
+Hence, DStreams generated by window-based operations are automatically persisted in memory, without
+the developer calling `persist()`.
+
+For input streams that receive data over the network (such as, Kafka, Flume, sockets, etc.), the
+default persistence level is set to replicate the data to two nodes for fault-tolerance.
+
+Note that, unlike RDDs, the default persistence level of DStreams keeps the data serialized in
+memory. This is further discussed in the [Performance Tuning](#memory-tuning) section. More
+information on different persistence levels can be found in
+[Spark Programming Guide](scala-programming-guide.html#rdd-persistence).
+
+## RDD Checkpointing
+A _stateful operation_ is one which operates over multiple batches of data. This includes all
+window-based operations and the `updateStateByKey` operation. Since stateful operations have a
+dependency on previous batches of data, they continuously accumulate metadata over time.
+To clear this metadata, streaming supports periodic _checkpointing_ by saving intermediate data
+to HDFS. Note that checkpointing also incurs the cost of saving to HDFS which may cause the
+corresponding batch to take longer to process. Hence, the interval of checkpointing needs to be
+set carefully. At small batch sizes (say 1 second), checkpointing every batch may significantly
+reduce operation throughput. Conversely, checkpointing too slowly causes the lineage and task
+sizes to grow which may have detrimental effects. Typically, a checkpoint interval of 5 - 10
+times of sliding interval of a DStream is good setting to try.
+
+To enable checkpointing, the developer has to provide the HDFS path to which RDD will be saved.
+This is done by using
 
 {% highlight scala %}
-ssc.checkpoint(hdfsPath) // assuming ssc is the StreamingContext
+ssc.checkpoint(hdfsPath) // assuming ssc is the StreamingContext or JavaStreamingContext
 {% endhighlight %}
 
 The interval of checkpointing of a DStream can be set by using
 
 {% highlight scala %}
-dstream.checkpoint(checkpointInterval) // checkpointInterval must be a multiple of slide duration of dstream
+dstream.checkpoint(checkpointInterval)
 {% endhighlight %}
 
-For DStreams that must be checkpointed (that is, DStreams created by `updateStateByKey` and `reduceByKeyAndWindow` with inverse function), the checkpoint interval of the DStream is by default set to a multiple of the DStream's sliding interval such that its at least 10 seconds.
-
+For DStreams that must be checkpointed (that is, DStreams created by `updateStateByKey` and
+`reduceByKeyAndWindow` with inverse function), the checkpoint interval of the DStream is by
+default set to a multiple of the DStream's sliding interval such that its at least 10 seconds.
 
-## Custom Receivers
-Spark comes with a built in support for most common usage scenarios where input stream source can be either a network socket stream to support for a few message queues. Apart from that it is also possible to supply your own custom receiver via a convenient API. Find more details at [Custom Receiver Guide](streaming-custom-receivers.html).
+***************************************************************************************************  
 
 # Performance Tuning
-Getting the best performance of a Spark Streaming application on a cluster requires a bit of tuning. This section explains a number of the parameters and configurations that can tuned to improve the performance of you application. At a high level, you need to consider two things:
+Getting the best performance of a Spark Streaming application on a cluster requires a bit of
+tuning. This section explains a number of the parameters and configurations that can tuned to
+improve the performance of you application. At a high level, you need to consider two things:
+
 <ol>
-<li>Reducing the processing time of each batch of data by efficiently using cluster resources.</li>
-<li>Setting the right batch size such that the data processing can keep up with the data ingestion.</li>
+<li>
+  Reducing the processing time of each batch of data by efficiently using cluster resources.
+</li>
+<li>
+  Setting the right batch size such that the data processing can keep up with the data ingestion.
+</li>
 </ol>
 
 ## Reducing the Processing Time of each Batch
-There are a number of optimizations that can be done in Spark to minimize the processing time of each batch. These have been discussed in detail in [Tuning Guide](tuning.html). This section highlights some of the most important ones.
+There are a number of optimizations that can be done in Spark to minimize the processing time of
+each batch. These have been discussed in detail in [Tuning Guide](tuning.html). This section
+highlights some of the most important ones.
 
 ### Level of Parallelism
-Cluster resources maybe under-utilized if the number of parallel tasks used in any stage of the computation is not high enough. For example, for distributed reduce operations like `reduceByKey` and `reduceByKeyAndWindow`, the default number of parallel tasks is 8. You can pass the level of parallelism as an argument (see the [`PairDStreamFunctions`](api/streaming/index.html#org.apache.spark.PairDStreamFunctions) documentation), or set the system property `spark.default.parallelism` to change the default.
+Cluster resources maybe under-utilized if the number of parallel tasks used in any stage of the
+computation is not high enough. For example, for distributed reduce operations like `reduceByKey`
+and `reduceByKeyAndWindow`, the default number of parallel tasks is 8. You can pass the level of
+parallelism as an argument (see the
+[`PairDStreamFunctions`](api/streaming/index.html#org.apache.spark.streaming.dstream.PairDStreamFunctions)
+documentation), or set the [config property](configuration.html#spark-properties)
+`spark.default.parallelism` to change the default.
 
 ### Data Serialization
-The overhead of data serialization can be significant, especially when sub-second batch sizes are to be achieved. There are two aspects to it.
+The overhead of data serialization can be significant, especially when sub-second batch sizes are
+ to be achieved. There are two aspects to it.
 
-* **Serialization of RDD data in Spark**: Please refer to the detailed discussion on data serialization in the [Tuning Guide](tuning.html). However, note that unlike Spark, by default RDDs are persisted as serialized byte arrays to minimize pauses related to GC.
+* **Serialization of RDD data in Spark**: Please refer to the detailed discussion on data
+  serialization in the [Tuning Guide](tuning.html). However, note that unlike Spark, by default
+  RDDs are persisted as serialized byte arrays to minimize pauses related to GC.
 
-* **Serialization of input data**: To ingest external data into Spark, data received as bytes (say, from the network) needs to deserialized from bytes and re-serialized into Spark's serialization format. Hence, the deserialization overhead of input data may be a bottleneck.
+* **Serialization of input data**: To ingest external data into Spark, data received as bytes
+  (say, from the network) needs to deserialized from bytes and re-serialized into Spark's
+  serialization format. Hence, the deserialization overhead of input data may be a bottleneck.
 
 ### Task Launching Overheads
-If the number of tasks launched per second is high (say, 50 or more per second), then the overhead of sending out tasks to the slaves maybe significant and will make it hard to achieve sub-second latencies. The overhead can be reduced by the following changes:
+If the number of tasks launched per second is high (say, 50 or more per second), then the overhead
+of sending out tasks to the slaves maybe significant and will make it hard to achieve sub-second
+latencies. The overhead can be reduced by the following changes:
 
-* **Task Serialization**: Using Kryo serialization for serializing tasks can reduced the task sizes, and therefore reduce the time taken to send them to the slaves.
+* **Task Serialization**: Using Kryo serialization for serializing tasks can reduced the task
+  sizes, and therefore reduce the time taken to send them to the slaves.
 
-* **Execution mode**: Running Spark in Standalone mode or coarse-grained Mesos mode leads to better task launch times than the fine-grained Mesos mode. Please refer to the [Running on Mesos guide](running-on-mesos.html) for more details.
-These changes may reduce batch processing time by 100s of milliseconds, thus allowing sub-second batch size to be viable.
+* **Execution mode**: Running Spark in Standalone mode or coarse-grained Mesos mode leads to
+  better task launch times than the fine-grained Mesos mode. Please refer to the
+  [Running on Mesos guide](running-on-mesos.html) for more details.
 
-## Setting the Right Batch Size
-For a Spark Streaming application running on a cluster to be stable, the processing of the data streams must keep up with the rate of ingestion of the data streams. Depending on the type of computation, the batch size used may have significant impact on the rate of ingestion that can be sustained by the Spark Streaming application on a fixed cluster resources. For example, let us consider the earlier WordCountNetwork example. For a particular data rate, the system may be able to keep up with reporting word counts every 2 seconds (i.e., batch size of 2 seconds), but not every 500 milliseconds.
+These changes may reduce batch processing time by 100s of milliseconds,
+thus allowing sub-second batch size to be viable.
 
-A good approach to figure out the right batch size for your application is to test it with a conservative batch size (say, 5-10 seconds) and a low data rate. To verify whether the system is able to keep up with data rate, you can check the value of the end-to-end delay experienced by each processed batch (in the Spark master logs, find the line having the phrase "Total delay"). If the delay is maintained to be less than the batch size, then system is stable. Otherwise, if the delay is continuously increasing, it means that the system is unable to keep up and it therefore unstable. Once you have an idea of a stable configuration, you can try increasing the data rate and/or reducing the batch size. Note that momentary increase in the delay due to temporary data rate increases maybe fine as long as the delay reduces back to a low value (i.e., less than batch size).
+## Setting the Right Batch Size
+For a Spark Streaming application running on a cluster to be stable, the processing of the data
+streams must keep up with the rate of ingestion of the data streams. Depending on the type of
+computation, the batch size used may have significant impact on the rate of ingestion that can be
+sustained by the Spark Streaming application on a fixed cluster resources. For example, let us
+consider the earlier WordCountNetwork example. For a particular data rate, the system may be able
+to keep up with reporting word counts every 2 seconds (i.e., batch size of 2 seconds), but not
+every 500 milliseconds.
+
+A good approach to figure out the right batch size for your application is to test it with a
+conservative batch size (say, 5-10 seconds) and a low data rate. To verify whether the system
+is able to keep up with data rate, you can check the value of the end-to-end delay experienced
+by each processed batch (either look for "Total delay" in Spark driver log4j logs, or use the
+[StreamingListener](api/streaming/index.html#org.apache.spark.streaming.scheduler.StreamingListener)
+interface).
+If the delay is maintained to be comparable to the batch size, then system is stable. Otherwise,
+if the delay is continuously increasing, it means that the system is unable to keep up and it
+therefore unstable. Once you have an idea of a stable configuration, you can try increasing the
+data rate and/or reducing the batch size. Note that momentary increase in the delay due to
+temporary data rate increases maybe fine as long as the delay reduces back to a low value
+(i.e., less than batch size).
 
 ## 24/7 Operation
-By default, Spark does not forget any of the metadata (RDDs generated, stages processed, etc.). But for a Spark Streaming application to operate 24/7, it is necessary for Spark to do periodic cleanup of it metadata. This can be enabled by setting the Java system property `spark.cleaner.ttl` to the number of seconds you want any metadata to persist. For example, setting `spark.cleaner.ttl` to 600 would cause Spark periodically cleanup all metadata and persisted RDDs that are older than 10 minutes. Note, that this property needs to be set before the SparkContext is created.
-
-This value is closely tied with any window operation that is being used. Any window operation would require the input data to be persisted in memory for at least the duration of the window. Hence it is necessary to set the delay to at least the value of the largest window operation used in the Spark Streaming application. If this delay is set too low, the application will throw an exception saying so.
+By default, Spark does not forget any of the metadata (RDDs generated, stages processed, etc.).
+But for a Spark Streaming application to operate 24/7, it is necessary for Spark to do periodic
+cleanup of it metadata. This can be enabled by setting the
+[configuration property](configuration.html#spark-properties) `spark.cleaner.ttl` to the number of
+seconds you want any metadata to persist. For example, setting `spark.cleaner.ttl` to 600 would
+cause Spark periodically cleanup all metadata and persisted RDDs that are older than 10 minutes.
+Note, that this property needs to be set before the SparkContext is created.
+
+This value is closely tied with any window operation that is being used. Any window operation
+would require the input data to be persisted in memory for at least the duration of the window.
+Hence it is necessary to set the delay to at least the value of the largest window operation used
+in the Spark Streaming application. If this delay is set too low, the application will throw an
+exception saying so.
+
+## Monitoring
+Besides Spark's in-built [monitoring capabilities](monitoring.html),
+the progress of a Spark Streaming program can also be monitored using the [StreamingListener]
+(streaming/index.html#org.apache.spark.scheduler.StreamingListener) interface,
+which allows you to get statistics of batch processing times, queueing delays,
+and total end-to-end delays. Note that this is still an experimental API and it is likely to be
+improved upon (i.e., more information reported) in the future.
 
 ## Memory Tuning
-Tuning the memory usage and GC behavior of Spark applications have been discussed in great detail in the [Tuning Guide](tuning.html). It is recommended that you read that. In this section, we highlight a few customizations that are strongly recommended to minimize GC related pauses in Spark Streaming applications and achieving more consistent batch processing times.
-
-* **Default persistence level of DStreams**: Unlike RDDs, the default persistence level of DStreams serializes the data in memory (that is, [StorageLevel.MEMORY_ONLY_SER](api/core/index.html#org.apache.spark.storage.StorageLevel$) for DStream compared to [StorageLevel.MEMORY_ONLY](api/core/index.html#org.apache.spark.storage.StorageLevel$) for RDDs). Even though keeping the data serialized incurs a higher serialization overheads, it significantly reduces GC pauses.
-
-* **Concurrent garbage collector**: Using the concurrent mark-and-sweep GC further minimizes the variability of GC pauses. Even though concurrent GC is known to reduce the overall processing throughput of the system, its use is still recommended to achieve more consistent batch processing times.
+Tuning the memory usage and GC behavior of Spark applications have been discussed in great detail
+in the [Tuning Guide](tuning.html). It is recommended that you read that. In this section,
+we highlight a few customizations that are strongly recommended to minimize GC related pauses
+in Spark Streaming applications and achieving more consistent batch processing times.
+
+* **Default persistence level of DStreams**: Unlike RDDs, the default persistence level of DStreams
+serializes the data in memory (that is,
+[StorageLevel.MEMORY_ONLY_SER](api/core/index.html#org.apache.spark.storage.StorageLevel$) for
+DStream compared to
+[StorageLevel.MEMORY_ONLY](api/core/index.html#org.apache.spark.storage.StorageLevel$) for RDDs).
+Even though keeping the data serialized incurs higher serialization/deserialization overheads,
+it significantly reduces GC pauses.
+
+* **Clearing persistent RDDs**: By default, all persistent RDDs generated by Spark Streaming will
+ be cleared from memory based on Spark's in-built policy (LRU). If `spark.cleaner.ttl` is set,
+ then persistent RDDs that are older than that value are periodically cleared. As mentioned
+ [earlier](#operation), this needs to be careful set based on operations used in the Spark
+ Streaming program. However, a smarter unpersisting of RDDs can be enabled by setting the
+ [configuration property](configuration.html#spark-properties) `spark.streaming.unpersist` to
+ `true`. This makes the system to figure out which RDDs are not necessary to be kept around and
+ unpersists them. This is likely to reduce
+ the RDD memory usage of Spark, potentially improving GC behavior as well.
+
+* **Concurrent garbage collector**: Using the concurrent mark-and-sweep GC further
+minimizes the variability of GC pauses. Even though concurrent GC is known to reduce the
+overall processing throughput of the system, its use is still recommended to achieve more
+consistent batch processing times.
+
+***************************************************************************************************  
 
 # Fault-tolerance Properties
-In this section, we are going to discuss the behavior of Spark Streaming application in the event of a node failure. To understand this, let us remember the basic fault-tolerance properties of Spark's RDDs.
+In this section, we are going to discuss the behavior of Spark Streaming application in the event
+of a node failure. To understand this, let us remember the basic fault-tolerance properties of
+Spark's RDDs.
 
- 1. An RDD is an immutable, and deterministically re-computable, distributed dataset. Each RDD remembers the lineage of deterministic operations that were used on a fault-tolerant input dataset to create it.
- 1. If any partition of an RDD is lost due to a worker node failure, then that partition can be re-computed from the original fault-tolerant dataset using the lineage of operations.
+ 1. An RDD is an immutable, deterministically re-computable, distributed dataset. Each RDD
+ remembers the lineage of deterministic operations that were used on a fault-tolerant input
+ dataset to create it.
+ 1. If any partition of an RDD is lost due to a worker node failure, then that partition can be
+ re-computed from the original fault-tolerant dataset using the lineage of operations.
 
-Since all data transformations in Spark Streaming are based on RDD operations, as long as the input dataset is present, all intermediate data can recomputed. Keeping these properties in mind, we are going to discuss the failure semantics in more detail.
+Since all data transformations in Spark Streaming are based on RDD operations, as long as the input
+dataset is present, all intermediate data can recomputed. Keeping these properties in mind, we are
+going to discuss the failure semantics in more detail.
 
 ## Failure of a Worker Node
-
 There are two failure behaviors based on which input sources are used.
 
-1. _Using HDFS files as input source_ - Since the data is reliably stored on HDFS, all data can re-computed and therefore no data will be lost due to any failure.
-1. _Using any input source that receives data through a network_ - For network-based data sources like Kafka and Flume, the received input data is replicated in memory between nodes of the cluster (default replication factor is 2). So if a worker node fails, then the system can recompute the lost from the the left over copy of the input data. However, if the worker node where a network receiver was running fails, then a tiny bit of data may be lost, that is, the data received by the system but not yet replicated to other node(s). The receiver will be started on a different node and it will continue to receive data.
-
-Since all data is modeled as RDDs with their lineage of deterministic operations, any recomputation always leads to the same result. As a result, all DStream transformations are guaranteed to have _exactly-once_ semantics. That is, the final transformed result will be same even if there were was a worker node failure. However, output operations (like `foreach`) have _at-least once_ semantics, that is, the transformed data may get written to an external entity more than once in the event of a worker failure. While this is acceptable for saving to HDFS using the `saveAs*Files` operations (as the file will simply get over-written by the same data), additional transactions-like mechanisms may be necessary to achieve exactly-once semantics for output operations.
+1. _Using HDFS files as input source_ - Since the data is reliably stored on HDFS, all data can
+re-computed and therefore no data will be lost due to any failure.
+1. _Using any input source that receives data through a network_ - For network-based data sources
+like Kafka and Flume, the received input data is replicated in memory between nodes of the cluster
+(default replication factor is 2). So if a worker node fails, then the system can recompute the
+lost from the the left over copy of the input data. However, if the worker node where a network
+receiver was running fails, then a tiny bit of data may be lost, that is, the data received by
+the system but not yet replicated to other node(s). The receiver will be started on a different
+node and it will continue to receive data.
+
+Since all data is modeled as RDDs with their lineage of deterministic operations, any recomputation
+ always leads to the same result. As a result, all DStream transformations are guaranteed to have
+ _exactly-once_ semantics. That is, the final transformed result will be same even if there were
+ was a worker node failure. However, output operations (like `foreachRDD`) have _at-least once_
+ semantics, that is, the transformed data may get written to an external entity more than once in
+ the event of a worker failure. While this is acceptable for saving to HDFS using the
+ `saveAs*Files` operations (as the file will simply get over-written by the same data),
+ additional transactions-like mechanisms may be necessary to achieve exactly-once semantics
+ for output operations.
 
 ## Failure of the Driver Node
-A system that is required to operate 24/7 needs to be able tolerate the failure of the driver node as well. Spark Streaming does this by saving the state of the DStream computation periodically to a HDFS file, that can be used to restart the streaming computation in the event of a failure of the driver node. This checkpointing is enabled by setting a HDFS directory for checkpointing using `ssc.checkpoint(<checkpoint directory>)` as described [earlier](#rdd-checkpointing-within-dstreams). To elaborate, the following state is periodically saved to a file.
+To allows a streaming application to operate 24/7, Spark Streaming allows a streaming computation
+to be resumed even after the failure of the driver node. Spark Streaming periodically writes the
+metadata information of the DStreams setup through the `StreamingContext` to a
+HDFS directory (can be any Hadoop-compatible filesystem). This periodic
+*checkpointing* can be enabled by setting a the checkpoint
+directory using `ssc.checkpoint(<checkpoint directory>)` as described
+[earlier](#rdd-checkpointing). On failure of the driver node,
+the lost `StreamingContext` can be recovered from this information, and restarted.
+
+To allow a Spark Streaming program to be recoverable, it must be written in a way such that
+it has the following behavior:
 
-1. The DStream operator graph (input streams, output streams, etc.)
-1. The configuration of each DStream (checkpoint interval, etc.)
-1. The RDD checkpoint files of each DStream
+1.  When the program is being started for the first time, it will create a new StreamingContext,
+    set up all the streams and then call start().
+1.  When the program is being restarted after failure, it will re-create a StreamingContext
+    from the checkpoint data in the checkpoint directory.
 
-All this is periodically saved in the file `<checkpoint directory>/graph`. To recover, a new Streaming Context can be created with this directory by using
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+
+This behavior is made simple by using `StreamingContext.getOrCreate`. This is used as follows.
 
 {% highlight scala %}
-val ssc = new StreamingContext(checkpointDirectory)
+// Function to create and setup a new StreamingContext
+def functionToCreateContext(): StreamingContext = {
+    val ssc = new StreamingContext(...)   // new context
+    val lines = ssc.socketTextStream(...) // create DStreams
+    ...
+    ssc.checkpoint(checkpointDirectory)   // set checkpoint directory
+    ssc
+}
+
+// Get StreaminContext from checkpoint data or create a new one
+val context = StreamingContext.getOrCreate(checkpointDirectory, functionToCreateContext _)
+
+// Do additional setup on context that needs to be done,
+// irrespective of whether it is being started or restarted
+context. ...
+
+// Start the context
+context.start()
+context.awaitTermination()
 {% endhighlight %}
 
-On calling `ssc.start()` on this new context, the following steps are taken by the system
+If the `checkpointDirectory` exists, then the context will be recreated from the checkpoint data.
+If the directory does not exist (i.e., running for the first time),
+then the function `functionToCreateContext` will be called to create a new
+context and set up the DStreams. See the Scala example
+[RecoverableNetworkWordCount]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/scala/org/apache/spark/streaming/examples/RecoverableNetworkWordCount.scala).
+This example appends the word counts of network data into a file.
 
-1. Schedule the transformations and output operations for all the time steps between the time when the driver failed and when it last checkpointed. This is also done for those time steps that were previously scheduled but not processed due to the failure. This will make the system recompute all the intermediate data from the checkpointed RDD files, etc.
-1. Restart the network receivers, if any, and continue receiving new data.
+You can also explicitly create a `StreamingContext` from the checkpoint data and start the
+ computation by using `new StreamingContext(checkpointDirectory)`.
 
-In the current _alpha_ release, there are two different failure behaviors based on which input sources are used.
+</div>
+<div data-lang="java" markdown="1">
 
-1. _Using HDFS files as input source_ - Since the data is reliably stored on HDFS, all data can re-computed and therefore no data will be lost due to any failure.
-1. _Using any input source that receives data through a network_ - The received input data is replicated in memory to multiple nodes. Since, all the data in the Spark worker's memory is lost when the Spark driver fails, the past input data will not be accessible and driver recovers. Hence, if stateful and window-based operations are used (like `updateStateByKey`, `window`, `countByValueAndWindow`, etc.), then the intermediate state will not be recovered completely.
+This behavior is made simple by using `JavaStreamingContext.getOrCreate`. This is used as follows.
 
-In future releases, we will support full recoverability for all input sources. Note that for non-stateful transformations like `map`, `count`, and `reduceByKey`, with _all_ input streams, the system, upon restarting, will continue to receive and process new data.
+{% highlight java %}
+// Create a factory object that can create a and setup a new JavaStreamingContext
+JavaStreamingContextFactory contextFactory = new JavaStreamingContextFactory() {
+  JavaStreamingContextFactory create() {
+    JavaStreamingContext jssc = new JavaStreamingContext(...);  // new context
+    JavaDStream<String> lines = jssc.socketTextStream(...);     // create DStreams
+    ...
+    jssc.checkpoint(checkpointDirectory);                       // set checkpoint directory
+    return jssc;
+  }
+};
+
+// Get JavaStreamingContext from checkpoint data or create a new one
+JavaStreamingContext context = JavaStreamingContext.getOrCreate(checkpointDirectory, contextFactory);
+
+// Do additional setup on context that needs to be done,
+// irrespective of whether it is being started or restarted
+context. ...
+
+// Start the context
+context.start();
+context.awaitTermination();
+{% endhighlight %}
 
-To better understand the behavior of the system under driver failure with a HDFS source, lets consider what will happen with a file input stream Specifically, in the case of the file input stream, it will correctly identify new files that were created while the driver was down and process them in the same way as it would have if the driver had not failed. To explain further in the case of file input stream, we shall use an example. Lets say, files are being generated every second, and a Spark Streaming program reads every new file and output the number of lines in the file. This is what the sequence of outputs would be with and without a driver failure.
+If the `checkpointDirectory` exists, then the context will be recreated from the checkpoint data.
+If the directory does not exist (i.e., running for the first time),
+then the function `contextFactory` will be called to create a new
+context and set up the DStreams. See the Scala example
+[JavaRecoverableWordCount]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/scala/org/apache/spark/streaming/examples/JavaRecoverableWordCount.scala)
+(note that this example is missing in the 0.9 release, so you can test it using the master branch).
+This example appends the word counts of network data into a file.
+
+You can also explicitly create a `JavaStreamingContext` from the checkpoint data and start
+the computation by using `new JavaStreamingContext(checkpointDirectory)`.
+
+</div>
+</div>
+
+**Note**: If Spark Streaming and/or the Spark Streaming program is recompiled,
+you *must* create a new `StreamingContext` or `JavaStreamingContext`,
+not recreate from checkpoint data. This is because trying to load a
+context from checkpoint data may fail if the data was generated before recompilation of the
+classes. So, if you are using `getOrCreate`, then make sure that the checkpoint directory is
+explicitly deleted every time recompiled code needs to be launched.
+
+This failure recovery can be done automatically using Spark's
+[standalone cluster mode](spark-standalone.html), which allows any Spark
+application's driver to be as well as, ensures automatic restart of the driver on failure (see
+[supervise mode](spark-standalone.html#launching-applications-inside-the-cluster)). This can be
+tested locally by launching the above example using the supervise mode in a
+local standalone cluster and killing the java process running the driver (will be shown as
+*DriverWrapper* when `jps` is run to show all active Java processes). The driver should be
+automatically restarted, and the word counts will cont
+
+For other deployment environments like Mesos and Yarn, you have to restart the driver through other
+mechanisms.
+
+<h4>Recovery Semantics</h4>
+
+There are two different failure behaviors based on which input sources are used.
+
+1. _Using HDFS files as input source_ - Since the data is reliably stored on HDFS, all data can
+re-computed and therefore no data will be lost due to any failure.
+1. _Using any input source that receives data through a network_ - The received input data is
+replicated in memory to multiple nodes. Since, all the data in the Spark worker's memory is lost
+when the Spark driver fails, the past input data will not be accessible and driver recovers.
+Hence, if stateful and window-based operations are used
+(like `updateStateByKey`, `window`, `countByValueAndWindow`, etc.), then the intermediate state
+will not be recovered completely.
+
+In future releases, we will support full recoverability for all input sources. Note that for
+non-stateful transformations like `map`, `count`, and `reduceByKey`, with _all_ input streams,
+the system, upon restarting, will continue to receive and process new data.
+
+To better understand the behavior of the system under driver failure with a HDFS source, lets
+consider what will happen with a file input stream. Specifically, in the case of the file input
+stream, it will correctly identify new files that were created while the driver was down and
+process them in the same way as it would have if the driver had not failed. To explain further
+in the case of file input stream, we shall use an example. Lets say, files are being generated
+every second, and a Spark Streaming program reads every new file and output the number of lines
+in the file. This is what the sequence of outputs would be with and without a driver failure.
 
 <table class="table">
     <!-- Results table headers -->
@@ -474,58 +1211,21 @@ To better understand the behavior of the system under driver failure with a HDFS
     </tr>
 </table>
 
-If the driver had crashed in the middle of the processing of time 3, then it will process time 3 and output 30 after recovery.
-
-# Java API
-
-Similar to [Spark's Java API](java-programming-guide.html), we also provide a Java API for Spark Streaming which allows all its features to be accessible from a Java program. This is defined in [org.apache.spark.streaming.api.java] (api/streaming/index.html#org.apache.spark.streaming.api.java.package) package and includes [JavaStreamingContext](api/streaming/index.html#org.apache.spark.streaming.api.java.JavaStreamingContext) and [JavaDStream](api/streaming/index.html#org.apache.spark.streaming.api.java.JavaDStream) classes that provide the same methods as their Scala counterparts, but take Java functions (that is, Function, and Function2) and return Java data and collection types. Some of the key points to note are:
-
-1. Functions for transformations must be implemented as subclasses of [Function](api/core/index.html#org.apache.spark.api.java.function.Function) and [Function2](api/core/index.html#org.apache.spark.api.java.function.Function2)
-1. Unlike the Scala API, the Java API handles DStreams for key-value pairs using a separate [JavaPairDStream](api/streaming/index.html#org.apache.spark.streaming.api.java.JavaPairDStream) class(similar to [JavaRDD and JavaPairRDD](java-programming-guide.html#rdd-classes). DStream functions like `map` and `filter` are implemented separately by JavaDStreams and JavaPairDStream to return DStreams of appropriate types.
-
-Spark's [Java Programming Guide](java-programming-guide.html) gives more ideas about using the Java API. To extends the ideas presented for the RDDs to DStreams, we present parts of the Java version of the same NetworkWordCount example presented above. The full source code is given at `<spark repo>/examples/src/main/java/org/apache/spark/streaming/examples/JavaNetworkWordCount.java`
-
-The streaming context and the socket stream from input source is started by using a `JavaStreamingContext`, that has the same parameters and provides the same input streams as its Scala counterpart.
-
-{% highlight java %}
-JavaStreamingContext ssc = new JavaStreamingContext(mesosUrl, "NetworkWordCount", Seconds(1));
-JavaDStream<String> lines = ssc.socketTextStream(ip, port);
-{% endhighlight %}
-
-
-Then the `lines` are split into words by using the `flatMap` function and [FlatMapFunction](api/core/index.html#org.apache.spark.api.java.function.FlatMapFunction).
-
-{% highlight java %}
-JavaDStream<String> words = lines.flatMap(
-  new FlatMapFunction<String, String>() {
-    @Override
-    public Iterable<String> call(String x) {
-      return Lists.newArrayList(x.split(" "));
-    }
-  });
-{% endhighlight %}
-
-The `words` is then mapped to a [JavaPairDStream](api/streaming/index.html#org.apache.spark.streaming.api.java.JavaPairDStream) of `(word, 1)` pairs using `map` and [PairFunction](api/core/index.html#org.apache.spark.api.java.function.PairFunction). This is  reduced by using `reduceByKey` and [Function2](api/core/index.html#org.apache.spark.api.java.function.Function2).
-
-{% highlight java %}
-JavaPairDStream<String, Integer> wordCounts = words.map(
-  new PairFunction<String, String, Integer>() {
-    @Override
-    public Tuple2<String, Integer> call(String s) throws Exception {
-      return new Tuple2<String, Integer>(s, 1);
-    }
-  }).reduceByKey(
-  new Function2<Integer, Integer, Integer>() {
-    @Override
-    public Integer call(Integer i1, Integer i2) throws Exception {
-      return i1 + i2;
-    }
-  });
-{% endhighlight %}
-
+If the driver had crashed in the middle of the processing of time 3, then it will process time 3
+and output 30 after recovery.
 
 # Where to Go from Here
 
-* API docs - [Scala](api/streaming/index.html#org.apache.spark.streaming.package) and [Java](api/streaming/index.html#org.apache.spark.streaming.api.java.package)
-* More examples - [Scala](https://github.com/apache/incubator-spark/tree/master/examples/src/main/scala/org/apache/spark/streaming/examples) and [Java](https://github.com/apache/incubator-spark/tree/master/examples/src/main/java/org/apache/spark/streaming/examples)
-* [Paper describing Spark Streaming](http://www.eecs.berkeley.edu/Pubs/TechRpts/2012/EECS-2012-259.pdf)
+* API documentation
+  - Main docs of StreamingContext and DStreams in [Scala](api/streaming/index.html#org.apache.spark.streaming.package)
+    and [Java](api/streaming/index.html#org.apache.spark.streaming.api.java.package)
+  - Additional docs for
+    [Kafka](api/external/kafka/index.html#org.apache.spark.streaming.kafka.KafkaUtils$),
+    [Flume](api/external/flume/index.html#org.apache.spark.streaming.flume.FlumeUtils$),
+    [Twitter](api/external/twitter/index.html#org.apache.spark.streaming.twitter.TwitterUtils$),
+    [ZeroMQ](api/external/zeromq/index.html#org.apache.spark.streaming.zeromq.ZeroMQUtils$), and
+    [MQTT](api/external/mqtt/index.html#org.apache.spark.streaming.mqtt.MQTTUtils$)
+
+* More examples in [Scala]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/scala/org/apache/spark/streaming/examples)
+  and [Java]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/java/org/apache/spark/streaming/examples)
+* [Paper](http://www.eecs.berkeley.edu/Pubs/TechRpts/2012/EECS-2012-259.pdf) describing Spark Streaming
diff --git a/docs/tuning.md b/docs/tuning.md
index f33fda37ebaba..bbb870085c249 100644
--- a/docs/tuning.md
+++ b/docs/tuning.md
@@ -38,13 +38,15 @@ in your operations) and performance. It provides two serialization libraries:
   `Serializable` types and requires you to *register* the classes you'll use in the program in advance
   for best performance.
 
-You can switch to using Kryo by calling `System.setProperty("spark.serializer", "org.apache.spark.serializer.KryoSerializer")`
-*before* creating your SparkContext. The only reason it is not the default is because of the custom
+You can switch to using Kryo by initializing your job with a [SparkConf](configuration.html#spark-properties)
+and calling `conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")`.
+This setting configures the serializer used for not only shuffling data between worker
+nodes but also when serializing RDDs to disk.  The only reason Kryo is not the default is because of the custom
 registration requirement, but we recommend trying it in any network-intensive application.
 
 Finally, to register your classes with Kryo, create a public class that extends
 [`org.apache.spark.serializer.KryoRegistrator`](api/core/index.html#org.apache.spark.serializer.KryoRegistrator) and set the
-`spark.kryo.registrator` system property to point to it, as follows:
+`spark.kryo.registrator` config property to point to it, as follows:
 
 {% highlight scala %}
 import com.esotericsoftware.kryo.Kryo
@@ -57,17 +59,17 @@ class MyRegistrator extends KryoRegistrator {
   }
 }
 
-// Make sure to set these properties *before* creating a SparkContext!
-System.setProperty("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
-System.setProperty("spark.kryo.registrator", "mypackage.MyRegistrator")
-val sc = new SparkContext(...)
+val conf = new SparkConf().setMaster(...).setAppName(...)
+conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
+conf.set("spark.kryo.registrator", "mypackage.MyRegistrator")
+val sc = new SparkContext(conf)
 {% endhighlight %}
 
 The [Kryo documentation](http://code.google.com/p/kryo/) describes more advanced
 registration options, such as adding custom serialization code.
 
 If your objects are large, you may also need to increase the `spark.kryoserializer.buffer.mb`
-system property. The default is 2, but this value needs to be large enough to hold the *largest*
+config property. The default is 2, but this value needs to be large enough to hold the *largest*
 object you will serialize.
 
 Finally, if you don't register your classes, Kryo will still work, but it will have to store the
@@ -164,7 +166,7 @@ cache RDDs. This means that 33% of memory is available for any objects created d
 
 In case your tasks slow down and you find that your JVM is garbage-collecting frequently or running out of
 memory, lowering this value will help reduce the memory consumption. To change this to say 50%, you can call
-`System.setProperty("spark.storage.memoryFraction", "0.5")`. Combined with the use of serialized caching,
+`conf.set("spark.storage.memoryFraction", "0.5")` on your SparkConf. Combined with the use of serialized caching,
 using a smaller cache should be sufficient to mitigate most of the garbage collection problems.
 In case you are interested in further tuning the Java GC, continue reading below.
 
@@ -218,7 +220,7 @@ enough. Spark automatically sets the number of "map" tasks to run on each file a
 distributed "reduce" operations, such as `groupByKey` and `reduceByKey`, it uses the largest
 parent RDD's number of partitions. You can pass the level of parallelism as a second argument
 (see the [`spark.PairRDDFunctions`](api/core/index.html#org.apache.spark.rdd.PairRDDFunctions) documentation),
-or set the system property `spark.default.parallelism` to change the default.
+or set the config property `spark.default.parallelism` to change the default.
 In general, we recommend 2-3 tasks per CPU core in your cluster.
 
 ## Memory Usage of Reduce Tasks
diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py
index dfee9bf7ca2ed..e88f80aa62627 100755
--- a/ec2/spark_ec2.py
+++ b/ec2/spark_ec2.py
@@ -23,6 +23,7 @@
 
 import logging
 import os
+import pipes
 import random
 import shutil
 import subprocess
@@ -36,6 +37,9 @@
 from boto.ec2.blockdevicemapping import BlockDeviceMapping, EBSBlockDeviceType
 from boto import ec2
 
+class UsageError(Exception):
+  pass
+
 # A URL prefix from which to fetch AMI information
 AMI_PREFIX = "https://raw.github.com/mesos/spark-ec2/v2/ami-list"
 
@@ -66,14 +70,14 @@ def parse_args():
            "slaves across multiple (an additional $0.01/Gb for bandwidth" +
            "between zones applies)")
   parser.add_option("-a", "--ami", help="Amazon Machine Image ID to use")
-  parser.add_option("-v", "--spark-version", default="0.8.1",
+  parser.add_option("-v", "--spark-version", default="0.9.0",
       help="Version of Spark to use: 'X.Y.Z' or a specific git hash")
-  parser.add_option("--spark-git-repo", 
+  parser.add_option("--spark-git-repo",
       default="https://github.com/apache/incubator-spark",
       help="Github repo from which to checkout supplied commit hash")
   parser.add_option("--hadoop-major-version", default="1",
       help="Major version of Hadoop (default: 1)")
-  parser.add_option("-D", metavar="[ADDRESS:]PORT", dest="proxy_port", 
+  parser.add_option("-D", metavar="[ADDRESS:]PORT", dest="proxy_port",
       help="Use SSH dynamic port forwarding to create a SOCKS proxy at " +
             "the given local address (for use with login)")
   parser.add_option("--resume", action="store_true", default=False,
@@ -105,11 +109,7 @@ def parse_args():
     parser.print_help()
     sys.exit(1)
   (action, cluster_name) = args
-  if opts.identity_file == None and action in ['launch', 'login', 'start']:
-    print >> stderr, ("ERROR: The -i or --identity-file argument is " +
-                      "required for " + action)
-    sys.exit(1)
-  
+
   # Boto config check
   # http://boto.cloudhackers.com/en/latest/boto_config_tut.html
   home_dir = os.getenv('HOME')
@@ -157,7 +157,7 @@ def is_active(instance):
 
 # Return correct versions of Spark and Shark, given the supplied Spark version
 def get_spark_shark_version(opts):
-  spark_shark_map = {"0.7.3": "0.7.1", "0.8.0": "0.8.0", "0.8.1": "0.8.1"}
+  spark_shark_map = {"0.7.3": "0.7.1", "0.8.0": "0.8.0", "0.8.1": "0.8.1", "0.9.0": "0.9.0"}
   version = opts.spark_version.replace("v", "")
   if version not in spark_shark_map:
     print >> stderr, "Don't know about Spark version: %s" % version
@@ -185,7 +185,16 @@ def get_spark_ami(opts):
     "hi1.4xlarge": "hvm",
     "m3.xlarge":   "hvm",
     "m3.2xlarge":  "hvm",
-    "cr1.8xlarge": "hvm"
+    "cr1.8xlarge": "hvm",
+    "i2.xlarge":   "hvm",
+    "i2.2xlarge":  "hvm",
+    "i2.4xlarge":  "hvm",
+    "i2.8xlarge":  "hvm",
+    "c3.large":    "pvm",
+    "c3.xlarge":   "pvm",
+    "c3.2xlarge":  "pvm",
+    "c3.4xlarge":  "pvm",
+    "c3.8xlarge":  "pvm"
   }
   if opts.instance_type in instance_types:
     instance_type = instance_types[opts.instance_type]
@@ -193,7 +202,7 @@ def get_spark_ami(opts):
     instance_type = "pvm"
     print >> stderr,\
         "Don't recognize %s, assuming type is pvm" % opts.instance_type
-  
+
   ami_path = "%s/%s/%s" % (AMI_PREFIX, opts.region, instance_type)
   try:
     ami = urllib2.urlopen(ami_path).read().strip()
@@ -217,6 +226,7 @@ def launch_cluster(conn, opts, cluster_name):
     master_group.authorize(src_group=slave_group)
     master_group.authorize('tcp', 22, 22, '0.0.0.0/0')
     master_group.authorize('tcp', 8080, 8081, '0.0.0.0/0')
+    master_group.authorize('tcp', 19999, 19999, '0.0.0.0/0')
     master_group.authorize('tcp', 50030, 50030, '0.0.0.0/0')
     master_group.authorize('tcp', 50070, 50070, '0.0.0.0/0')
     master_group.authorize('tcp', 60070, 60070, '0.0.0.0/0')
@@ -373,12 +383,12 @@ def get_existing_cluster(conn, opts, cluster_name, die_on_error=True):
   slave_nodes = []
   for res in reservations:
     active = [i for i in res.instances if is_active(i)]
-    if len(active) > 0:
-      group_names = [g.name for g in res.groups]
+    for inst in active:
+      group_names = [g.name for g in inst.groups]
       if group_names == [cluster_name + "-master"]:
-        master_nodes += res.instances
+        master_nodes.append(inst)
       elif group_names == [cluster_name + "-slaves"]:
-        slave_nodes += res.instances
+        slave_nodes.append(inst)
   if any((master_nodes, slave_nodes)):
     print ("Found %d master(s), %d slaves" %
            (len(master_nodes), len(slave_nodes)))
@@ -399,13 +409,21 @@ def get_existing_cluster(conn, opts, cluster_name, die_on_error=True):
 def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key):
   master = master_nodes[0].public_dns_name
   if deploy_ssh_key:
-    print "Copying SSH key %s to master..." % opts.identity_file
-    ssh(master, opts, 'mkdir -p ~/.ssh')
-    scp(master, opts, opts.identity_file, '~/.ssh/id_rsa')
-    ssh(master, opts, 'chmod 600 ~/.ssh/id_rsa')
-
-  modules = ['spark', 'shark', 'ephemeral-hdfs', 'persistent-hdfs', 
-             'mapreduce', 'spark-standalone']
+    print "Generating cluster's SSH key on master..."
+    key_setup = """
+      [ -f ~/.ssh/id_rsa ] ||
+        (ssh-keygen -q -t rsa -N '' -f ~/.ssh/id_rsa &&
+         cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys)
+    """
+    ssh(master, opts, key_setup)
+    dot_ssh_tar = ssh_read(master, opts, ['tar', 'c', '.ssh'])
+    print "Transferring cluster's SSH key to slaves..."
+    for slave in slave_nodes:
+      print slave.public_dns_name
+      ssh_write(slave.public_dns_name, opts, ['tar', 'x'], dot_ssh_tar)
+
+  modules = ['spark', 'shark', 'ephemeral-hdfs', 'persistent-hdfs',
+             'mapreduce', 'spark-standalone', 'tachyon']
 
   if opts.hadoop_major_version == "1":
     modules = filter(lambda x: x != "mapreduce", modules)
@@ -427,7 +445,7 @@ def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key):
 def setup_standalone_cluster(master, slave_nodes, opts):
   slave_ips = '\n'.join([i.public_dns_name for i in slave_nodes])
   ssh(master, opts, "echo \"%s\" > spark/conf/slaves" % (slave_ips))
-  ssh(master, opts, "/root/spark/bin/start-all.sh")
+  ssh(master, opts, "/root/spark/sbin/start-all.sh")
 
 def setup_spark_cluster(master, opts):
   ssh(master, opts, "chmod u+x spark-ec2/setup.sh")
@@ -469,7 +487,16 @@ def get_num_disks(instance_type):
     "cr1.8xlarge": 2,
     "hi1.4xlarge": 2,
     "m3.xlarge":   0,
-    "m3.2xlarge":  0
+    "m3.2xlarge":  0,
+    "i2.xlarge":   1,
+    "i2.2xlarge":  2,
+    "i2.4xlarge":  4,
+    "i2.8xlarge":  8,
+    "c3.large":    2,
+    "c3.xlarge":   2,
+    "c3.2xlarge":  2,
+    "c3.4xlarge":  2,
+    "c3.8xlarge":  2
   }
   if instance_type in disks_by_instance:
     return disks_by_instance[instance_type]
@@ -544,18 +571,33 @@ def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules):
               dest.write(text)
               dest.close()
   # rsync the whole directory over to the master machine
-  command = (("rsync -rv -e 'ssh -o StrictHostKeyChecking=no -i %s' " +
-      "'%s/' '%s@%s:/'") % (opts.identity_file, tmp_dir, opts.user, active_master))
-  subprocess.check_call(command, shell=True)
+  command = [
+      'rsync', '-rv',
+      '-e', stringify_command(ssh_command(opts)),
+      "%s/" % tmp_dir,
+      "%s@%s:/" % (opts.user, active_master)
+    ]
+  subprocess.check_call(command)
   # Remove the temp directory we created above
   shutil.rmtree(tmp_dir)
 
 
-# Copy a file to a given host through scp, throwing an exception if scp fails
-def scp(host, opts, local_file, dest_file):
-  subprocess.check_call(
-      "scp -q -o StrictHostKeyChecking=no -i %s '%s' '%s@%s:%s'" %
-      (opts.identity_file, local_file, opts.user, host, dest_file), shell=True)
+def stringify_command(parts):
+  if isinstance(parts, str):
+    return parts
+  else:
+    return ' '.join(map(pipes.quote, parts))
+
+
+def ssh_args(opts):
+  parts = ['-o', 'StrictHostKeyChecking=no']
+  if opts.identity_file is not None:
+    parts += ['-i', opts.identity_file]
+  return parts
+
+
+def ssh_command(opts):
+  return ['ssh'] + ssh_args(opts)
 
 
 # Run a command on a host through ssh, retrying up to two times
@@ -565,17 +607,41 @@ def ssh(host, opts, command):
   while True:
     try:
       return subprocess.check_call(
-        "ssh -t -t -o StrictHostKeyChecking=no -i %s %s@%s '%s'" %
-        (opts.identity_file, opts.user, host, command), shell=True)
+        ssh_command(opts) + ['-t', '-t', '%s@%s' % (opts.user, host), stringify_command(command)])
     except subprocess.CalledProcessError as e:
       if (tries > 2):
-        raise e
-      print "Couldn't connect to host {0}, waiting 30 seconds".format(e)
+        # If this was an ssh failure, provide the user with hints.
+        if e.returncode == 255:
+          raise UsageError("Failed to SSH to remote host {0}.\nPlease check that you have provided the correct --identity-file and --key-pair parameters and try again.".format(host))
+        else:
+          raise e
+      print >> stderr, "Error executing remote command, retrying after 30 seconds: {0}".format(e)
       time.sleep(30)
       tries = tries + 1
 
 
+def ssh_read(host, opts, command):
+  return subprocess.check_output(
+      ssh_command(opts) + ['%s@%s' % (opts.user, host), stringify_command(command)])
+
 
+def ssh_write(host, opts, command, input):
+  tries = 0
+  while True:
+    proc = subprocess.Popen(
+        ssh_command(opts) + ['%s@%s' % (opts.user, host), stringify_command(command)],
+        stdin=subprocess.PIPE)
+    proc.stdin.write(input)
+    proc.stdin.close()
+    status = proc.wait()
+    if status == 0:
+      break
+    elif (tries > 2):
+      raise RuntimeError("ssh_write failed with error %s" % proc.returncode)
+    else:
+      print >> stderr, "Error {0} while executing remote command, retrying after 30 seconds".format(status)
+      time.sleep(30)
+      tries = tries + 1
 
 
 # Gets a list of zones to launch instances in
@@ -595,7 +661,7 @@ def get_partition(total, num_partitions, current_partitions):
   return num_slaves_this_zone
 
 
-def main():
+def real_main():
   (opts, action, cluster_name) = parse_args()
   try:
     conn = ec2.connect_to_region(opts.region)
@@ -630,12 +696,12 @@ def main():
       print "Terminating slaves..."
       for inst in slave_nodes:
         inst.terminate()
-      
+
       # Delete security groups as well
       if opts.delete_groups:
         print "Deleting security groups (this will take some time)..."
         group_names = [cluster_name + "-master", cluster_name + "-slaves"]
-        
+
         attempt = 1;
         while attempt <= 3:
           print "Attempt %d" % attempt
@@ -678,11 +744,11 @@ def main():
         conn, opts, cluster_name)
     master = master_nodes[0].public_dns_name
     print "Logging into master " + master + "..."
-    proxy_opt = ""
+    proxy_opt = []
     if opts.proxy_port != None:
-      proxy_opt = "-D " + opts.proxy_port
-    subprocess.check_call("ssh -o StrictHostKeyChecking=no -i %s %s %s@%s" %
-        (opts.identity_file, proxy_opt, opts.user, master), shell=True)
+      proxy_opt = ['-D', opts.proxy_port]
+    subprocess.check_call(
+        ssh_command(opts) + proxy_opt + ['-t', '-t', "%s@%s" % (opts.user, master)])
 
   elif action == "get-master":
     (master_nodes, slave_nodes) = get_existing_cluster(conn, opts, cluster_name)
@@ -728,6 +794,13 @@ def main():
     sys.exit(1)
 
 
+def main():
+  try:
+    real_main()
+  except UsageError, e:
+    print >> stderr, "\nError:\n", e
+
+
 if __name__ == "__main__":
   logging.basicConfig()
   main()
diff --git a/examples/pom.xml b/examples/pom.xml
index db9aaf42a07bf..5c94b5ed4d27c 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,65 +21,86 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>0.8.2-candidate-csd-3-SNAPSHOT</version>
+    <version>0.9.1-csd-1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
   <groupId>org.apache.spark</groupId>
-  <artifactId>spark-examples_2.9.3</artifactId>
+  <artifactId>spark-examples_2.10</artifactId>
   <packaging>jar</packaging>
   <name>Spark Project Examples</name>
   <url>http://spark.incubator.apache.org/</url>
 
   <repositories>
-    <!-- A repository in the local filesystem for the Kafka JAR, which we modified for Scala 2.9 -->
     <repository>
-      <id>lib</id>
-      <url>file://${project.basedir}/lib</url>
+      <id>apache-repo</id>
+      <name>Apache Repository</name>
+      <url>https://repository.apache.org/content/repositories/releases</url>
+      <releases>
+        <enabled>true</enabled>
+      </releases>
+      <snapshots>
+        <enabled>false</enabled>
+      </snapshots>
     </repository>
   </repositories>
 
+
   <dependencies>
     <dependency>
-      <groupId>${akka.group}</groupId>
-      <artifactId>akka-actor</artifactId>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
     </dependency>
     <dependency>
-      <groupId>${akka.group}</groupId>
-      <artifactId>akka-remote</artifactId>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
     </dependency>
     <dependency>
-      <groupId>${akka.group}</groupId>
-      <artifactId>akka-slf4j</artifactId>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-mllib_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
     </dependency>
     <dependency>
-      <groupId>${akka.group}</groupId>
-      <artifactId>akka-zeromq</artifactId>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-bagel_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
     </dependency>
-  
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_2.9.3</artifactId>
+      <artifactId>spark-graphx_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
       <scope>provided</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-streaming_2.9.3</artifactId>
+      <artifactId>spark-streaming-twitter_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
-      <scope>provided</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-mllib_2.9.3</artifactId>
+      <artifactId>spark-streaming-kafka_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
-      <scope>provided</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-bagel_2.9.3</artifactId>
+      <artifactId>spark-streaming-flume_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming-zeromq_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming-mqtt_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
-      <scope>provided</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.hbase</groupId>
@@ -96,35 +117,29 @@
         </exclusion>
       </exclusions>
     </dependency>
-    <dependency>
-      <groupId>org.apache.kafka</groupId>
-      <artifactId>kafka</artifactId>
-      <version>0.7.2-spark</version>  <!-- Comes from our in-project repository -->
-      <scope>provided</scope>
-    </dependency>
     <dependency>
       <groupId>org.eclipse.jetty</groupId>
       <artifactId>jetty-server</artifactId>
     </dependency>
     <dependency>
       <groupId>com.twitter</groupId>
-      <artifactId>algebird-core_2.9.2</artifactId>
+      <artifactId>algebird-core_${scala.binary.version}</artifactId>
       <version>0.1.11</version>
     </dependency>
     <dependency>
       <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_2.9.3</artifactId>
+      <artifactId>scalatest_${scala.binary.version}</artifactId>
       <scope>test</scope>
     </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>
-      <artifactId>scalacheck_2.9.3</artifactId>
+      <artifactId>scalacheck_${scala.binary.version}</artifactId>
       <scope>test</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.cassandra</groupId>
       <artifactId>cassandra-all</artifactId>
-      <version>1.2.5</version>
+      <version>1.2.6</version>
       <exclusions>
         <exclusion>
           <groupId>com.google.guava</groupId>
@@ -154,20 +169,28 @@
           <groupId>org.apache.cassandra.deps</groupId>
           <artifactId>avro</artifactId>
         </exclusion>
+        <exclusion>
+          <groupId>org.sonatype.sisu.inject</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.xerial.snappy</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
       </exclusions>
     </dependency>
   </dependencies>
 
   <build>
-    <outputDirectory>target/scala-${scala.version}/classes</outputDirectory>
-    <testOutputDirectory>target/scala-${scala.version}/test-classes</testOutputDirectory>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
     <plugins>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-shade-plugin</artifactId>
         <configuration>
           <shadedArtifactAttached>false</shadedArtifactAttached>
-          <outputFile>${project.build.directory}/scala-${scala.version}/${project.artifactId}-assembly-${project.version}.jar</outputFile>
+          <outputFile>${project.build.directory}/scala-${scala.binary.version}/${project.artifactId}-assembly-${project.version}.jar</outputFile>
           <artifactSet>
             <includes>
               <include>*:*</include>
@@ -196,6 +219,9 @@
                 <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
                   <resource>reference.conf</resource>
                 </transformer>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.DontIncludeResourceTransformer">
+                  <resource>log4j.properties</resource>
+                </transformer>
               </transformers>
             </configuration>
           </execution>
diff --git a/examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java b/examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java
index be0d38589c5df..d552c47b22231 100644
--- a/examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java
+++ b/examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java
@@ -24,19 +24,19 @@
 
 import java.io.Serializable;
 import java.util.Arrays;
-import java.util.StringTokenizer;
 import java.util.Random;
+import java.util.regex.Pattern;
 
 /**
  * Logistic regression based classification.
  */
-public class JavaHdfsLR {
+public final class JavaHdfsLR {
 
-  static int D = 10;   // Number of dimensions
-  static Random rand = new Random(42);
+  private static final int D = 10;   // Number of dimensions
+  private static final Random rand = new Random(42);
 
   static class DataPoint implements Serializable {
-    public DataPoint(double[] x, double y) {
+    DataPoint(double[] x, double y) {
       this.x = x;
       this.y = y;
     }
@@ -46,20 +46,22 @@ public DataPoint(double[] x, double y) {
   }
 
   static class ParsePoint extends Function<String, DataPoint> {
+    private static final Pattern SPACE = Pattern.compile(" ");
+
+    @Override
     public DataPoint call(String line) {
-      StringTokenizer tok = new StringTokenizer(line, " ");
-      double y = Double.parseDouble(tok.nextToken());
+      String[] tok = SPACE.split(line);
+      double y = Double.parseDouble(tok[0]);
       double[] x = new double[D];
-      int i = 0;
-      while (i < D) {
-        x[i] = Double.parseDouble(tok.nextToken());
-        i += 1;
+      for (int i = 0; i < D; i++) {
+        x[i] = Double.parseDouble(tok[i + 1]);
       }
       return new DataPoint(x, y);
     }
   }
 
   static class VectorSum extends Function2<double[], double[], double[]> {
+    @Override
     public double[] call(double[] a, double[] b) {
       double[] result = new double[D];
       for (int j = 0; j < D; j++) {
@@ -70,12 +72,13 @@ public double[] call(double[] a, double[] b) {
   }
 
   static class ComputeGradient extends Function<DataPoint, double[]> {
-    double[] weights;
+    private final double[] weights;
 
-    public ComputeGradient(double[] weights) {
+    ComputeGradient(double[] weights) {
       this.weights = weights;
     }
 
+    @Override
     public double[] call(DataPoint p) {
       double[] gradient = new double[D];
       for (int i = 0; i < D; i++) {
@@ -106,7 +109,7 @@ public static void main(String[] args) {
     }
 
     JavaSparkContext sc = new JavaSparkContext(args[0], "JavaHdfsLR",
-        System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR"));
+        System.getenv("SPARK_HOME"), JavaSparkContext.jarOfClass(JavaHdfsLR.class));
     JavaRDD<String> lines = sc.textFile(args[1]);
     JavaRDD<DataPoint> points = lines.map(new ParsePoint()).cache();
     int ITERATIONS = Integer.parseInt(args[2]);
diff --git a/examples/src/main/java/org/apache/spark/examples/JavaKMeans.java b/examples/src/main/java/org/apache/spark/examples/JavaKMeans.java
index 5a6afe7eaefd6..0dc879275a22a 100644
--- a/examples/src/main/java/org/apache/spark/examples/JavaKMeans.java
+++ b/examples/src/main/java/org/apache/spark/examples/JavaKMeans.java
@@ -27,19 +27,24 @@
 
 import java.util.List;
 import java.util.Map;
+import java.util.regex.Pattern;
 
 /**
  * K-means clustering using Java API.
  */
-public class JavaKMeans {
+public final class JavaKMeans {
+
+  private static final Pattern SPACE = Pattern.compile(" ");
 
   /** Parses numbers split by whitespace to a vector */
   static Vector parseVector(String line) {
-    String[] splits = line.split(" ");
+    String[] splits = SPACE.split(line);
     double[] data = new double[splits.length];
     int i = 0;
-    for (String s : splits)
-      data[i] = Double.parseDouble(splits[i++]);
+    for (String s : splits) {
+      data[i] = Double.parseDouble(s);
+      i++;
+    }
     return new Vector(data);
   }
 
@@ -74,7 +79,7 @@ public static void main(String[] args) throws Exception {
       System.exit(1);
     }
     JavaSparkContext sc = new JavaSparkContext(args[0], "JavaKMeans",
-      System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR"));
+      System.getenv("SPARK_HOME"), JavaSparkContext.jarOfClass(JavaKMeans.class));
     String path = args[1];
     int K = Integer.parseInt(args[2]);
     double convergeDist = Double.parseDouble(args[3]);
@@ -82,7 +87,7 @@ public static void main(String[] args) throws Exception {
     JavaRDD<Vector> data = sc.textFile(path).map(
       new Function<String, Vector>() {
         @Override
-        public Vector call(String line) throws Exception {
+        public Vector call(String line) {
           return parseVector(line);
         }
       }
@@ -96,7 +101,7 @@ public Vector call(String line) throws Exception {
       JavaPairRDD<Integer, Vector> closest = data.map(
         new PairFunction<Vector, Integer, Vector>() {
           @Override
-          public Tuple2<Integer, Vector> call(Vector vector) throws Exception {
+          public Tuple2<Integer, Vector> call(Vector vector) {
             return new Tuple2<Integer, Vector>(
               closestPoint(vector, centroids), vector);
           }
@@ -107,7 +112,8 @@ public Tuple2<Integer, Vector> call(Vector vector) throws Exception {
       JavaPairRDD<Integer, List<Vector>> pointsGroup = closest.groupByKey();
       Map<Integer, Vector> newCentroids = pointsGroup.mapValues(
         new Function<List<Vector>, Vector>() {
-          public Vector call(List<Vector> ps) throws Exception {
+          @Override
+          public Vector call(List<Vector> ps) {
             return average(ps);
           }
         }).collectAsMap();
@@ -122,8 +128,9 @@ public Vector call(List<Vector> ps) throws Exception {
     } while (tempDist > convergeDist);
 
     System.out.println("Final centers:");
-    for (Vector c : centroids)
+    for (Vector c : centroids) {
       System.out.println(c);
+    }
 
     System.exit(0);
 
diff --git a/examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java b/examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java
index 152f02921338a..9eb1cadd71d22 100644
--- a/examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java
+++ b/examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java
@@ -35,9 +35,9 @@
 /**
  * Executes a roll up-style query against Apache logs.
  */
-public class JavaLogQuery {
+public final class JavaLogQuery {
 
-  public static List<String> exampleApacheLogs = Lists.newArrayList(
+  public static final List<String> exampleApacheLogs = Lists.newArrayList(
     "10.10.10.10 - \"FRED\" [18/Jan/2013:17:56:07 +1100] \"GET http://images.com/2013/Generic.jpg " +
       "HTTP/1.1\" 304 315 \"http://referall.com/\" \"Mozilla/4.0 (compatible; MSIE 7.0; " +
       "Windows NT 5.1; GTB7.4; .NET CLR 2.0.50727; .NET CLR 3.0.04506.30; .NET CLR 3.0.04506.648; " +
@@ -51,14 +51,14 @@ public class JavaLogQuery {
       "3.5.30729; Release=ARP)\" \"UD-1\" - \"image/jpeg\" \"whatever\" 0.352 \"-\" - \"\" 256 977 988 \"\" " +
       "0 73.23.2.15 images.com 1358492557 - Whatup");
 
-  public static Pattern apacheLogRegex = Pattern.compile(
+  public static final Pattern apacheLogRegex = Pattern.compile(
     "^([\\d.]+) (\\S+) (\\S+) \\[([\\w\\d:/]+\\s[+\\-]\\d{4})\\] \"(.+?)\" (\\d{3}) ([\\d\\-]+) \"([^\"]+)\" \"([^\"]+)\".*");
 
   /** Tracks the total query count and number of aggregate bytes for a particular group. */
   public static class Stats implements Serializable {
 
-    private int count;
-    private int numBytes;
+    private final int count;
+    private final int numBytes;
 
     public Stats(int count, int numBytes) {
       this.count = count;
@@ -92,38 +92,38 @@ public static Stats extractStats(String line) {
     if (m.find()) {
       int bytes = Integer.parseInt(m.group(7));
       return new Stats(1, bytes);
-    }
-    else
+    } else {
       return new Stats(1, 0);
+    }
   }
 
-  public static void main(String[] args) throws Exception {
+  public static void main(String[] args) {
     if (args.length == 0) {
       System.err.println("Usage: JavaLogQuery <master> [logFile]");
       System.exit(1);
     }
 
     JavaSparkContext jsc = new JavaSparkContext(args[0], "JavaLogQuery",
-      System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR"));
+      System.getenv("SPARK_HOME"), JavaSparkContext.jarOfClass(JavaLogQuery.class));
 
     JavaRDD<String> dataSet = (args.length == 2) ? jsc.textFile(args[1]) : jsc.parallelize(exampleApacheLogs);
 
     JavaPairRDD<Tuple3<String, String, String>, Stats> extracted = dataSet.map(new PairFunction<String, Tuple3<String, String, String>, Stats>() {
       @Override
-      public Tuple2<Tuple3<String, String, String>, Stats> call(String s) throws Exception {
+      public Tuple2<Tuple3<String, String, String>, Stats> call(String s) {
         return new Tuple2<Tuple3<String, String, String>, Stats>(extractKey(s), extractStats(s));
       }
     });
 
     JavaPairRDD<Tuple3<String, String, String>, Stats> counts = extracted.reduceByKey(new Function2<Stats, Stats, Stats>() {
       @Override
-      public Stats call(Stats stats, Stats stats2) throws Exception {
+      public Stats call(Stats stats, Stats stats2) {
         return stats.merge(stats2);
       }
     });
 
     List<Tuple2<Tuple3<String, String, String>, Stats>> output = counts.collect();
-    for (Tuple2 t : output) {
+    for (Tuple2<?,?> t : output) {
       System.out.println(t._1 + "\t" + t._2);
     }
     System.exit(0);
diff --git a/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java b/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java
index c5603a639bdd9..a84245b0c7449 100644
--- a/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java
+++ b/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java
@@ -21,7 +21,6 @@
 import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.api.java.function.FlatMapFunction;
 import org.apache.spark.api.java.function.Function;
 import org.apache.spark.api.java.function.Function2;
 import org.apache.spark.api.java.function.PairFlatMapFunction;
@@ -29,6 +28,7 @@
 
 import java.util.List;
 import java.util.ArrayList;
+import java.util.regex.Pattern;
 
 /**
  * Computes the PageRank of URLs from an input file. Input file should
@@ -39,7 +39,9 @@
  * ...
  * where URL and their neighbors are separated by space(s).
  */
-public class JavaPageRank {
+public final class JavaPageRank {
+  private static final Pattern SPACES = Pattern.compile("\\s+");
+
   private static class Sum extends Function2<Double, Double, Double> {
     @Override
     public Double call(Double a, Double b) {
@@ -54,7 +56,7 @@ public static void main(String[] args) throws Exception {
     }
 
     JavaSparkContext ctx = new JavaSparkContext(args[0], "JavaPageRank",
-      System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR"));
+      System.getenv("SPARK_HOME"), JavaSparkContext.jarOfClass(JavaPageRank.class));
 
     // Loads in input file. It should be in format of:
     //     URL         neighbor URL
@@ -67,7 +69,7 @@ public static void main(String[] args) throws Exception {
     JavaPairRDD<String, List<String>> links = lines.map(new PairFunction<String, String, String>() {
       @Override
       public Tuple2<String, String> call(String s) {
-        String[] parts = s.split("\\s+");
+        String[] parts = SPACES.split(s);
         return new Tuple2<String, String>(parts[0], parts[1]);
       }
     }).distinct().groupByKey().cache();
@@ -75,7 +77,7 @@ public Tuple2<String, String> call(String s) {
     // Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one.
     JavaPairRDD<String, Double> ranks = links.mapValues(new Function<List<String>, Double>() {
       @Override
-      public Double call(List<String> rs) throws Exception {
+      public Double call(List<String> rs) {
         return 1.0;
       }
     });
@@ -98,7 +100,7 @@ public Iterable<Tuple2<String, Double>> call(Tuple2<List<String>, Double> s) {
       // Re-calculates URL ranks based on neighbor contributions.
       ranks = contribs.reduceByKey(new Sum()).mapValues(new Function<Double, Double>() {
         @Override
-        public Double call(Double sum) throws Exception {
+        public Double call(Double sum) {
           return 0.15 + sum * 0.85;
         }
       });
@@ -106,7 +108,7 @@ public Double call(Double sum) throws Exception {
 
     // Collects all URL ranks and dump them to console.
     List<Tuple2<String, Double>> output = ranks.collect();
-    for (Tuple2 tuple : output) {
+    for (Tuple2<?,?> tuple : output) {
         System.out.println(tuple._1 + " has rank: " + tuple._2 + ".");
     }
 
diff --git a/examples/src/main/java/org/apache/spark/examples/JavaSparkPi.java b/examples/src/main/java/org/apache/spark/examples/JavaSparkPi.java
index 4a2380caf5af5..ac8df02c4630b 100644
--- a/examples/src/main/java/org/apache/spark/examples/JavaSparkPi.java
+++ b/examples/src/main/java/org/apache/spark/examples/JavaSparkPi.java
@@ -26,36 +26,36 @@
 import java.util.List;
 
 /** Computes an approximation to pi */
-public class JavaSparkPi {
-
+public final class JavaSparkPi {
 
   public static void main(String[] args) throws Exception {
     if (args.length == 0) {
-      System.err.println("Usage: JavaLogQuery <master> [slices]");
+      System.err.println("Usage: JavaSparkPi <master> [slices]");
       System.exit(1);
     }
 
-    JavaSparkContext jsc = new JavaSparkContext(args[0], "JavaLogQuery",
-      System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR"));
+    JavaSparkContext jsc = new JavaSparkContext(args[0], "JavaSparkPi",
+      System.getenv("SPARK_HOME"), JavaSparkContext.jarOfClass(JavaSparkPi.class));
 
     int slices = (args.length == 2) ? Integer.parseInt(args[1]) : 2;
     int n = 100000 * slices;
     List<Integer> l = new ArrayList<Integer>(n);
-    for (int i = 0; i < n; i++)
+    for (int i = 0; i < n; i++) {
       l.add(i);
+    }
 
     JavaRDD<Integer> dataSet = jsc.parallelize(l, slices);
 
     int count = dataSet.map(new Function<Integer, Integer>() {
       @Override
-      public Integer call(Integer integer) throws Exception {
+      public Integer call(Integer integer) {
         double x = Math.random() * 2 - 1;
         double y = Math.random() * 2 - 1;
         return (x * x + y * y < 1) ? 1 : 0;
       }
     }).reduce(new Function2<Integer, Integer, Integer>() {
       @Override
-      public Integer call(Integer integer, Integer integer2) throws Exception {
+      public Integer call(Integer integer, Integer integer2) {
         return integer + integer2;
       }
     });
diff --git a/examples/src/main/java/org/apache/spark/examples/JavaTC.java b/examples/src/main/java/org/apache/spark/examples/JavaTC.java
index 17f21f6b776d1..2ceb0fd94ba65 100644
--- a/examples/src/main/java/org/apache/spark/examples/JavaTC.java
+++ b/examples/src/main/java/org/apache/spark/examples/JavaTC.java
@@ -31,11 +31,11 @@
 /**
  * Transitive closure on a graph, implemented in Java.
  */
-public class JavaTC {
+public final class JavaTC {
 
-  static int numEdges = 200;
-  static int numVertices = 100;
-  static Random rand = new Random(42);
+  private static final int numEdges = 200;
+  private static final int numVertices = 100;
+  private static final Random rand = new Random(42);
 
   static List<Tuple2<Integer, Integer>> generateGraph() {
     Set<Tuple2<Integer, Integer>> edges = new HashSet<Tuple2<Integer, Integer>>(numEdges);
@@ -43,15 +43,18 @@ static List<Tuple2<Integer, Integer>> generateGraph() {
       int from = rand.nextInt(numVertices);
       int to = rand.nextInt(numVertices);
       Tuple2<Integer, Integer> e = new Tuple2<Integer, Integer>(from, to);
-      if (from != to) edges.add(e);
+      if (from != to) {
+        edges.add(e);
+      }
     }
     return new ArrayList<Tuple2<Integer, Integer>>(edges);
   }
 
   static class ProjectFn extends PairFunction<Tuple2<Integer, Tuple2<Integer, Integer>>,
       Integer, Integer> {
-    static ProjectFn INSTANCE = new ProjectFn();
+    static final ProjectFn INSTANCE = new ProjectFn();
 
+    @Override
     public Tuple2<Integer, Integer> call(Tuple2<Integer, Tuple2<Integer, Integer>> triple) {
       return new Tuple2<Integer, Integer>(triple._2()._2(), triple._2()._1());
     }
@@ -64,7 +67,7 @@ public static void main(String[] args) {
     }
 
     JavaSparkContext sc = new JavaSparkContext(args[0], "JavaTC",
-        System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR"));
+        System.getenv("SPARK_HOME"), JavaSparkContext.jarOfClass(JavaTC.class));
     Integer slices = (args.length > 1) ? Integer.parseInt(args[1]): 2;
     JavaPairRDD<Integer, Integer> tc = sc.parallelizePairs(generateGraph(), slices).cache();
 
@@ -76,6 +79,7 @@ public static void main(String[] args) {
     // Because join() joins on keys, the edges are stored in reversed order.
     JavaPairRDD<Integer, Integer> edges = tc.map(
       new PairFunction<Tuple2<Integer, Integer>, Integer, Integer>() {
+        @Override
         public Tuple2<Integer, Integer> call(Tuple2<Integer, Integer> e) {
           return new Tuple2<Integer, Integer>(e._2(), e._1());
         }
diff --git a/examples/src/main/java/org/apache/spark/examples/JavaWordCount.java b/examples/src/main/java/org/apache/spark/examples/JavaWordCount.java
index 07d32ad659a74..6651f98d56711 100644
--- a/examples/src/main/java/org/apache/spark/examples/JavaWordCount.java
+++ b/examples/src/main/java/org/apache/spark/examples/JavaWordCount.java
@@ -27,8 +27,11 @@
 
 import java.util.Arrays;
 import java.util.List;
+import java.util.regex.Pattern;
+
+public final class JavaWordCount {
+  private static final Pattern SPACE = Pattern.compile(" ");
 
-public class JavaWordCount {
   public static void main(String[] args) throws Exception {
     if (args.length < 2) {
       System.err.println("Usage: JavaWordCount <master> <file>");
@@ -36,29 +39,32 @@ public static void main(String[] args) throws Exception {
     }
 
     JavaSparkContext ctx = new JavaSparkContext(args[0], "JavaWordCount",
-        System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR"));
+        System.getenv("SPARK_HOME"), JavaSparkContext.jarOfClass(JavaWordCount.class));
     JavaRDD<String> lines = ctx.textFile(args[1], 1);
 
     JavaRDD<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
+      @Override
       public Iterable<String> call(String s) {
-        return Arrays.asList(s.split(" "));
+        return Arrays.asList(SPACE.split(s));
       }
     });
     
     JavaPairRDD<String, Integer> ones = words.map(new PairFunction<String, String, Integer>() {
+      @Override
       public Tuple2<String, Integer> call(String s) {
         return new Tuple2<String, Integer>(s, 1);
       }
     });
     
     JavaPairRDD<String, Integer> counts = ones.reduceByKey(new Function2<Integer, Integer, Integer>() {
+      @Override
       public Integer call(Integer i1, Integer i2) {
         return i1 + i2;
       }
     });
 
     List<Tuple2<String, Integer>> output = counts.collect();
-    for (Tuple2 tuple : output) {
+    for (Tuple2<?,?> tuple : output) {
       System.out.println(tuple._1 + ": " + tuple._2);
     }
     System.exit(0);
diff --git a/examples/src/main/java/org/apache/spark/mllib/examples/JavaALS.java b/examples/src/main/java/org/apache/spark/mllib/examples/JavaALS.java
index 628cb892b6862..435a86e62abc5 100644
--- a/examples/src/main/java/org/apache/spark/mllib/examples/JavaALS.java
+++ b/examples/src/main/java/org/apache/spark/mllib/examples/JavaALS.java
@@ -25,30 +25,33 @@
 import org.apache.spark.mllib.recommendation.MatrixFactorizationModel;
 import org.apache.spark.mllib.recommendation.Rating;
 
-import java.io.Serializable;
 import java.util.Arrays;
-import java.util.StringTokenizer;
+import java.util.regex.Pattern;
 
 import scala.Tuple2;
 
 /**
  * Example using MLLib ALS from Java.
  */
-public class  JavaALS {
+public final class JavaALS {
 
   static class ParseRating extends Function<String, Rating> {
+    private static final Pattern COMMA = Pattern.compile(",");
+
+    @Override
     public Rating call(String line) {
-      StringTokenizer tok = new StringTokenizer(line, ",");
-      int x = Integer.parseInt(tok.nextToken());
-      int y = Integer.parseInt(tok.nextToken());
-      double rating = Double.parseDouble(tok.nextToken());
+      String[] tok = COMMA.split(line);
+      int x = Integer.parseInt(tok[0]);
+      int y = Integer.parseInt(tok[1]);
+      double rating = Double.parseDouble(tok[2]);
       return new Rating(x, y, rating);
     }
   }
 
   static class FeaturesToString extends Function<Tuple2<Object, double[]>, String> {
+    @Override
     public String call(Tuple2<Object, double[]> element) {
-      return element._1().toString() + "," + Arrays.toString(element._2());
+      return element._1() + "," + Arrays.toString(element._2());
     }
   }
 
@@ -69,7 +72,7 @@ public static void main(String[] args) {
     }
 
     JavaSparkContext sc = new JavaSparkContext(args[0], "JavaALS",
-        System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR"));
+        System.getenv("SPARK_HOME"), JavaSparkContext.jarOfClass(JavaALS.class));
     JavaRDD<String> lines = sc.textFile(args[1]);
 
     JavaRDD<Rating> ratings = lines.map(new ParseRating());
diff --git a/examples/src/main/java/org/apache/spark/mllib/examples/JavaKMeans.java b/examples/src/main/java/org/apache/spark/mllib/examples/JavaKMeans.java
index cd59a139b9fee..4b2658f257b3c 100644
--- a/examples/src/main/java/org/apache/spark/mllib/examples/JavaKMeans.java
+++ b/examples/src/main/java/org/apache/spark/mllib/examples/JavaKMeans.java
@@ -25,20 +25,22 @@
 import org.apache.spark.mllib.clustering.KMeansModel;
 
 import java.util.Arrays;
-import java.util.StringTokenizer;
+import java.util.regex.Pattern;
 
 /**
  * Example using MLLib KMeans from Java.
  */
-public class JavaKMeans {
+public final class JavaKMeans {
 
   static class ParsePoint extends Function<String, double[]> {
+    private static final Pattern SPACE = Pattern.compile(" ");
+
+    @Override
     public double[] call(String line) {
-      StringTokenizer tok = new StringTokenizer(line, " ");
-      int numTokens = tok.countTokens();
-      double[] point = new double[numTokens];
-      for (int i = 0; i < numTokens; ++i) {
-        point[i] = Double.parseDouble(tok.nextToken());
+      String[] tok = SPACE.split(line);
+      double[] point = new double[tok.length];
+      for (int i = 0; i < tok.length; ++i) {
+        point[i] = Double.parseDouble(tok[i]);
       }
       return point;
     }
@@ -62,7 +64,7 @@ public static void main(String[] args) {
     }
 
     JavaSparkContext sc = new JavaSparkContext(args[0], "JavaKMeans",
-        System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR"));
+        System.getenv("SPARK_HOME"), JavaSparkContext.jarOfClass(JavaKMeans.class));
     JavaRDD<String> lines = sc.textFile(args[1]);
 
     JavaRDD<double[]> points = lines.map(new ParsePoint());
diff --git a/examples/src/main/java/org/apache/spark/mllib/examples/JavaLR.java b/examples/src/main/java/org/apache/spark/mllib/examples/JavaLR.java
index 258061c8e6ba6..21586ce817d09 100644
--- a/examples/src/main/java/org/apache/spark/mllib/examples/JavaLR.java
+++ b/examples/src/main/java/org/apache/spark/mllib/examples/JavaLR.java
@@ -27,22 +27,25 @@
 import org.apache.spark.mllib.regression.LabeledPoint;
 
 import java.util.Arrays;
-import java.util.StringTokenizer;
+import java.util.regex.Pattern;
 
 /**
  * Logistic regression based classification using ML Lib.
  */
-public class JavaLR {
+public final class JavaLR {
 
   static class ParsePoint extends Function<String, LabeledPoint> {
+    private static final Pattern COMMA = Pattern.compile(",");
+    private static final Pattern SPACE = Pattern.compile(" ");
+
+    @Override
     public LabeledPoint call(String line) {
-      String[] parts = line.split(",");
+      String[] parts = COMMA.split(line);
       double y = Double.parseDouble(parts[0]);
-      StringTokenizer tok = new StringTokenizer(parts[1], " ");
-      int numTokens = tok.countTokens();
-      double[] x = new double[numTokens];
-      for (int i = 0; i < numTokens; ++i) {
-        x[i] = Double.parseDouble(tok.nextToken());
+      String[] tok = SPACE.split(parts[1]);
+      double[] x = new double[tok.length];
+      for (int i = 0; i < tok.length; ++i) {
+        x[i] = Double.parseDouble(tok[i]);
       }
       return new LabeledPoint(y, x);
     }
@@ -59,7 +62,7 @@ public static void main(String[] args) {
     }
 
     JavaSparkContext sc = new JavaSparkContext(args[0], "JavaLR",
-        System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR"));
+        System.getenv("SPARK_HOME"), JavaSparkContext.jarOfClass(JavaLR.class));
     JavaRDD<String> lines = sc.textFile(args[1]);
     JavaRDD<LabeledPoint> points = lines.map(new ParsePoint()).cache();
     double stepSize = Double.parseDouble(args[2]);
diff --git a/examples/src/main/java/org/apache/spark/streaming/examples/JavaFlumeEventCount.java b/examples/src/main/java/org/apache/spark/streaming/examples/JavaFlumeEventCount.java
index 261813bf2f39c..f061001dd264d 100644
--- a/examples/src/main/java/org/apache/spark/streaming/examples/JavaFlumeEventCount.java
+++ b/examples/src/main/java/org/apache/spark/streaming/examples/JavaFlumeEventCount.java
@@ -20,7 +20,8 @@
 import org.apache.spark.api.java.function.Function;
 import org.apache.spark.streaming.*;
 import org.apache.spark.streaming.api.java.*;
-import org.apache.spark.streaming.dstream.SparkFlumeEvent;
+import org.apache.spark.streaming.flume.FlumeUtils;
+import org.apache.spark.streaming.flume.SparkFlumeEvent;
 
 /**
  *  Produces a count of events received from Flume.
@@ -36,23 +37,28 @@
  *           creates a server and listens for flume events.
  *    <port> is the port the Flume receiver will listen on.
  */
-public class JavaFlumeEventCount {
+public final class JavaFlumeEventCount {
+  private JavaFlumeEventCount() {
+  }
+
   public static void main(String[] args) {
     if (args.length != 3) {
       System.err.println("Usage: JavaFlumeEventCount <master> <host> <port>");
       System.exit(1);
     }
 
+    StreamingExamples.setStreamingLogLevels();
+
     String master = args[0];
     String host = args[1];
     int port = Integer.parseInt(args[2]);
 
     Duration batchInterval = new Duration(2000);
 
-    JavaStreamingContext sc = new JavaStreamingContext(master, "FlumeEventCount", batchInterval,
-            System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR"));
-
-    JavaDStream<SparkFlumeEvent> flumeStream = sc.flumeStream("localhost", port);
+    JavaStreamingContext ssc = new JavaStreamingContext(master, "FlumeEventCount", batchInterval,
+            System.getenv("SPARK_HOME"),
+            JavaStreamingContext.jarOfClass(JavaFlumeEventCount.class));
+    JavaDStream<SparkFlumeEvent> flumeStream = FlumeUtils.createStream(ssc, "localhost", port);
 
     flumeStream.count();
 
@@ -63,6 +69,7 @@ public String call(Long in) {
       }
     }).print();
 
-    sc.start();
+    ssc.start();
+    ssc.awaitTermination();
   }
 }
diff --git a/examples/src/main/java/org/apache/spark/streaming/examples/JavaKafkaWordCount.java b/examples/src/main/java/org/apache/spark/streaming/examples/JavaKafkaWordCount.java
new file mode 100644
index 0000000000000..2ffd351b4e498
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/streaming/examples/JavaKafkaWordCount.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.examples;
+
+import java.util.Map;
+import java.util.HashMap;
+import java.util.regex.Pattern;
+
+import com.google.common.collect.Lists;
+import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.function.PairFunction;
+import org.apache.spark.streaming.Duration;
+import org.apache.spark.streaming.api.java.JavaDStream;
+import org.apache.spark.streaming.api.java.JavaPairDStream;
+import org.apache.spark.streaming.api.java.JavaStreamingContext;
+import org.apache.spark.streaming.kafka.KafkaUtils;
+import scala.Tuple2;
+
+/**
+ * Consumes messages from one or more topics in Kafka and does wordcount.
+ * Usage: JavaKafkaWordCount <master> <zkQuorum> <group> <topics> <numThreads>
+ *   <master> is the Spark master URL. In local mode, <master> should be 'local[n]' with n > 1.
+ *   <zkQuorum> is a list of one or more zookeeper servers that make quorum
+ *   <group> is the name of kafka consumer group
+ *   <topics> is a list of one or more kafka topics to consume from
+ *   <numThreads> is the number of threads the kafka consumer should use
+ *
+ * Example:
+ *    `./bin/run-example org.apache.spark.streaming.examples.JavaKafkaWordCount local[2] zoo01,zoo02,
+ *    zoo03 my-consumer-group topic1,topic2 1`
+ */
+
+public final class JavaKafkaWordCount {
+  private static final Pattern SPACE = Pattern.compile(" ");
+
+  private JavaKafkaWordCount() {
+  }
+
+  public static void main(String[] args) {
+    if (args.length < 5) {
+      System.err.println("Usage: KafkaWordCount <master> <zkQuorum> <group> <topics> <numThreads>");
+      System.exit(1);
+    }
+
+    StreamingExamples.setStreamingLogLevels();
+
+    // Create the context with a 1 second batch size
+    JavaStreamingContext jssc = new JavaStreamingContext(args[0], "KafkaWordCount",
+            new Duration(2000), System.getenv("SPARK_HOME"),
+            JavaStreamingContext.jarOfClass(JavaKafkaWordCount.class));
+
+    int numThreads = Integer.parseInt(args[4]);
+    Map<String, Integer> topicMap = new HashMap<String, Integer>();
+    String[] topics = args[3].split(",");
+    for (String topic: topics) {
+      topicMap.put(topic, numThreads);
+    }
+
+    JavaPairDStream<String, String> messages = KafkaUtils.createStream(jssc, args[1], args[2], topicMap);
+
+    JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() {
+      @Override
+      public String call(Tuple2<String, String> tuple2) {
+        return tuple2._2();
+      }
+    });
+
+    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
+      @Override
+      public Iterable<String> call(String x) {
+        return Lists.newArrayList(SPACE.split(x));
+      }
+    });
+
+    JavaPairDStream<String, Integer> wordCounts = words.map(
+      new PairFunction<String, String, Integer>() {
+        @Override
+        public Tuple2<String, Integer> call(String s) {
+          return new Tuple2<String, Integer>(s, 1);
+        }
+      }).reduceByKey(new Function2<Integer, Integer, Integer>() {
+        @Override
+        public Integer call(Integer i1, Integer i2) {
+          return i1 + i2;
+        }
+      });
+
+    wordCounts.print();
+    jssc.start();
+    jssc.awaitTermination();
+  }
+}
diff --git a/examples/src/main/java/org/apache/spark/streaming/examples/JavaNetworkWordCount.java b/examples/src/main/java/org/apache/spark/streaming/examples/JavaNetworkWordCount.java
index def87c199be57..7777c9832abd3 100644
--- a/examples/src/main/java/org/apache/spark/streaming/examples/JavaNetworkWordCount.java
+++ b/examples/src/main/java/org/apache/spark/streaming/examples/JavaNetworkWordCount.java
@@ -27,6 +27,8 @@
 import org.apache.spark.streaming.api.java.JavaPairDStream;
 import org.apache.spark.streaming.api.java.JavaStreamingContext;
 
+import java.util.regex.Pattern;
+
 /**
  * Counts words in UTF8 encoded, '\n' delimited text received from the network every second.
  * Usage: NetworkWordCount <master> <hostname> <port>
@@ -36,44 +38,52 @@
  * To run this on your local machine, you need to first run a Netcat server
  *    `$ nc -lk 9999`
  * and then run the example
- *    `$ ./run spark.streaming.examples.JavaNetworkWordCount local[2] localhost 9999`
+ *    `$ ./run org.apache.spark.streaming.examples.JavaNetworkWordCount local[2] localhost 9999`
  */
-public class JavaNetworkWordCount {
+public final class JavaNetworkWordCount {
+  private static final Pattern SPACE = Pattern.compile(" ");
+
+  private JavaNetworkWordCount() {
+  }
+
   public static void main(String[] args) {
     if (args.length < 3) {
-      System.err.println("Usage: NetworkWordCount <master> <hostname> <port>\n" +
+      System.err.println("Usage: JavaNetworkWordCount <master> <hostname> <port>\n" +
           "In local mode, <master> should be 'local[n]' with n > 1");
       System.exit(1);
     }
 
+    StreamingExamples.setStreamingLogLevels();
+
     // Create the context with a 1 second batch size
-    JavaStreamingContext ssc = new JavaStreamingContext(args[0], "NetworkWordCount",
-            new Duration(1000), System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR"));
+    JavaStreamingContext ssc = new JavaStreamingContext(args[0], "JavaNetworkWordCount",
+            new Duration(1000), System.getenv("SPARK_HOME"),
+            JavaStreamingContext.jarOfClass(JavaNetworkWordCount.class));
 
     // Create a NetworkInputDStream on target ip:port and count the
-    // words in input stream of \n delimited test (eg. generated by 'nc')
+    // words in input stream of \n delimited text (eg. generated by 'nc')
     JavaDStream<String> lines = ssc.socketTextStream(args[1], Integer.parseInt(args[2]));
     JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
       @Override
       public Iterable<String> call(String x) {
-        return Lists.newArrayList(x.split(" "));
+        return Lists.newArrayList(SPACE.split(x));
       }
     });
     JavaPairDStream<String, Integer> wordCounts = words.map(
       new PairFunction<String, String, Integer>() {
         @Override
-        public Tuple2<String, Integer> call(String s) throws Exception {
+        public Tuple2<String, Integer> call(String s) {
           return new Tuple2<String, Integer>(s, 1);
         }
       }).reduceByKey(new Function2<Integer, Integer, Integer>() {
         @Override
-        public Integer call(Integer i1, Integer i2) throws Exception {
+        public Integer call(Integer i1, Integer i2) {
           return i1 + i2;
         }
       });
 
     wordCounts.print();
     ssc.start();
-
+    ssc.awaitTermination();
   }
 }
diff --git a/examples/src/main/java/org/apache/spark/streaming/examples/JavaQueueStream.java b/examples/src/main/java/org/apache/spark/streaming/examples/JavaQueueStream.java
index c8c7389dd1bba..26c44620abec1 100644
--- a/examples/src/main/java/org/apache/spark/streaming/examples/JavaQueueStream.java
+++ b/examples/src/main/java/org/apache/spark/streaming/examples/JavaQueueStream.java
@@ -31,16 +31,21 @@
 import java.util.List;
 import java.util.Queue;
 
-public class JavaQueueStream {
-  public static void main(String[] args) throws InterruptedException {
+public final class JavaQueueStream {
+  private JavaQueueStream() {
+  }
+
+  public static void main(String[] args) throws Exception {
     if (args.length < 1) {
       System.err.println("Usage: JavaQueueStream <master>");
       System.exit(1);
     }
 
+    StreamingExamples.setStreamingLogLevels();
+
     // Create the context
     JavaStreamingContext ssc = new JavaStreamingContext(args[0], "QueueStream", new Duration(1000),
-            System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR"));
+            System.getenv("SPARK_HOME"), JavaStreamingContext.jarOfClass(JavaQueueStream.class));
 
     // Create the queue through which RDDs can be pushed to
     // a QueueInputDStream
@@ -53,28 +58,28 @@ public static void main(String[] args) throws InterruptedException {
     }
 
     for (int i = 0; i < 30; i++) {
-      rddQueue.add(ssc.sc().parallelize(list));
+      rddQueue.add(ssc.sparkContext().parallelize(list));
     }
 
-
     // Create the QueueInputDStream and use it do some processing
     JavaDStream<Integer> inputStream = ssc.queueStream(rddQueue);
     JavaPairDStream<Integer, Integer> mappedStream = inputStream.map(
         new PairFunction<Integer, Integer, Integer>() {
           @Override
-          public Tuple2<Integer, Integer> call(Integer i) throws Exception {
+          public Tuple2<Integer, Integer> call(Integer i) {
             return new Tuple2<Integer, Integer>(i % 10, 1);
           }
         });
     JavaPairDStream<Integer, Integer> reducedStream = mappedStream.reduceByKey(
       new Function2<Integer, Integer, Integer>() {
         @Override
-        public Integer call(Integer i1, Integer i2) throws Exception {
+        public Integer call(Integer i1, Integer i2) {
           return i1 + i2;
         }
     });
 
     reducedStream.print();
     ssc.start();
+    ssc.awaitTermination();
   }
 }
diff --git a/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala b/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala
index 529709c2f9538..0097dade190f6 100644
--- a/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala
@@ -32,13 +32,13 @@ object BroadcastTest {
     System.setProperty("spark.broadcast.factory", "org.apache.spark.broadcast." + bcName + "BroadcastFactory")
     System.setProperty("spark.broadcast.blockSize", blockSize)
 
-    val sc = new SparkContext(args(0), "Broadcast Test 2",
-      System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+    val sc = new SparkContext(args(0), "Broadcast Test",
+      System.getenv("SPARK_HOME"), SparkContext.jarOfClass(this.getClass))
     
     val slices = if (args.length > 1) args(1).toInt else 2
     val num = if (args.length > 2) args(2).toInt else 1000000
 
-    var arr1 = new Array[Int](num)
+    val arr1 = new Array[Int](num)
     for (i <- 0 until arr1.length) {
       arr1(i) = i
     }
@@ -48,9 +48,9 @@ object BroadcastTest {
       println("===========")
       val startTime = System.nanoTime
       val barr1 = sc.broadcast(arr1)
-      sc.parallelize(1 to 10, slices).foreach {
-        i => println(barr1.value.size)
-      }
+      val observedSizes = sc.parallelize(1 to 10, slices).map(_ => barr1.value.size)
+      // Collect the small RDD so we can print the observed sizes locally.
+      observedSizes.collect().foreach(i => println(i))
       println("Iteration %d took %.0f milliseconds".format(i, (System.nanoTime - startTime) / 1E6))
     }
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala b/examples/src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala
new file mode 100644
index 0000000000000..65251e93190f0
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples
+
+import scala.collection.JavaConversions._
+
+/** Prints out environmental information, sleeps, and then exits. Made to
+  * test driver submission in the standalone scheduler. */
+object DriverSubmissionTest {
+  def main(args: Array[String]) {
+    if (args.size < 1) {
+      println("Usage: DriverSubmissionTest <seconds-to-sleep>")
+      System.exit(0)
+    }
+    val numSecondsToSleep = args(0).toInt
+
+    val env = System.getenv()
+    val properties = System.getProperties()
+
+    println("Environment variables containing SPARK_TEST:")
+    env.filter{case (k, v) => k.contains("SPARK_TEST")}.foreach(println)
+
+    println("System properties containing spark.test:")
+    properties.filter{case (k, v) => k.toString.contains("spark.test")}.foreach(println)
+
+    for (i <- 1 until numSecondsToSleep) {
+      println(s"Alive for $i out of $numSecondsToSleep seconds")
+      Thread.sleep(1000)
+    }
+  }
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/ExceptionHandlingTest.scala b/examples/src/main/scala/org/apache/spark/examples/ExceptionHandlingTest.scala
index 92eb96bd8e0c2..b3eb611dd228f 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ExceptionHandlingTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ExceptionHandlingTest.scala
@@ -27,7 +27,7 @@ object ExceptionHandlingTest {
     }
 
     val sc = new SparkContext(args(0), "ExceptionHandlingTest",
-      System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+      System.getenv("SPARK_HOME"), SparkContext.jarOfClass(this.getClass))
     sc.parallelize(0 until sc.defaultParallelism).foreach { i =>
       if (math.random > 0.75)
         throw new Exception("Testing exception handling")
diff --git a/examples/src/main/scala/org/apache/spark/examples/GroupByTest.scala b/examples/src/main/scala/org/apache/spark/examples/GroupByTest.scala
index 42c2e0e8e19c4..39752fdd0eec4 100644
--- a/examples/src/main/scala/org/apache/spark/examples/GroupByTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/GroupByTest.scala
@@ -34,7 +34,7 @@ object GroupByTest {
     var numReducers = if (args.length > 4) args(4).toInt else numMappers
 
     val sc = new SparkContext(args(0), "GroupBy Test",
-      System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+      System.getenv("SPARK_HOME"), SparkContext.jarOfClass(this.getClass))
     
     val pairs1 = sc.parallelize(0 until numMappers, numMappers).flatMap { p =>
       val ranGen = new Random
diff --git a/examples/src/main/scala/org/apache/spark/examples/HBaseTest.scala b/examples/src/main/scala/org/apache/spark/examples/HBaseTest.scala
index efe2e93b0dc91..65d67356be2f6 100644
--- a/examples/src/main/scala/org/apache/spark/examples/HBaseTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/HBaseTest.scala
@@ -26,7 +26,7 @@ import org.apache.hadoop.hbase.mapreduce.TableInputFormat
 object HBaseTest {
   def main(args: Array[String]) {
     val sc = new SparkContext(args(0), "HBaseTest",
-      System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+      System.getenv("SPARK_HOME"), SparkContext.jarOfClass(this.getClass))
 
     val conf = HBaseConfiguration.create()
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala b/examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala
index d6a88d3032c49..c3597d94a224e 100644
--- a/examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala
@@ -22,7 +22,7 @@ import org.apache.spark._
 object HdfsTest {
   def main(args: Array[String]) {
     val sc = new SparkContext(args(0), "HdfsTest",
-      System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+      System.getenv("SPARK_HOME"), SparkContext.jarOfClass(this.getClass))
     val file = sc.textFile(args(1))
     val mapped = file.map(s => s.length).cache()
     for (iter <- 1 to 10) {
diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala b/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala
index 83db8b9e26411..c8ecbb8e41a86 100644
--- a/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala
@@ -43,7 +43,7 @@ object LocalALS {
   def generateR(): DoubleMatrix2D = {
     val mh = factory2D.random(M, F)
     val uh = factory2D.random(U, F)
-    return algebra.mult(mh, algebra.transpose(uh))
+    algebra.mult(mh, algebra.transpose(uh))
   }
 
   def rmse(targetR: DoubleMatrix2D, ms: Array[DoubleMatrix1D],
@@ -56,7 +56,7 @@ object LocalALS {
     //println("R: " + r)
     blas.daxpy(-1, targetR, r)
     val sumSqs = r.aggregate(Functions.plus, Functions.square)
-    return sqrt(sumSqs / (M * U))
+    sqrt(sumSqs / (M * U))
   }
 
   def updateMovie(i: Int, m: DoubleMatrix1D, us: Array[DoubleMatrix1D],
@@ -80,7 +80,7 @@ object LocalALS {
     val ch = new CholeskyDecomposition(XtX)
     val Xty2D = factory2D.make(Xty.toArray, F)
     val solved2D = ch.solve(Xty2D)
-    return solved2D.viewColumn(0)
+    solved2D.viewColumn(0)
   }
 
   def updateUser(j: Int, u: DoubleMatrix1D, ms: Array[DoubleMatrix1D],
@@ -104,7 +104,7 @@ object LocalALS {
     val ch = new CholeskyDecomposition(XtX)
     val Xty2D = factory2D.make(Xty.toArray, F)
     val solved2D = ch.solve(Xty2D)
-    return solved2D.viewColumn(0)
+    solved2D.viewColumn(0)
   }
 
   def main(args: Array[String]) {
diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala b/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala
index fb130ea1988f7..9ab5f5a48620b 100644
--- a/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala
@@ -28,7 +28,7 @@ object LocalFileLR {
 
   def parsePoint(line: String): DataPoint = {
     val nums = line.split(' ').map(_.toDouble)
-    return DataPoint(new Vector(nums.slice(1, D+1)), nums(0))
+    DataPoint(new Vector(nums.slice(1, D+1)), nums(0))
   }
 
   def main(args: Array[String]) {
diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala b/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala
index f90ea35cd447c..a730464ea158e 100644
--- a/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala
@@ -55,7 +55,7 @@ object LocalKMeans {
       }
     }
 
-    return bestIndex
+    bestIndex
   }
 
   def main(args: Array[String]) {
diff --git a/examples/src/main/scala/org/apache/spark/examples/LogQuery.scala b/examples/src/main/scala/org/apache/spark/examples/LogQuery.scala
index 17ff3ce76497f..bddb54b39cdd5 100644
--- a/examples/src/main/scala/org/apache/spark/examples/LogQuery.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LogQuery.scala
@@ -45,7 +45,7 @@ object LogQuery {
     }
 
     val sc = new SparkContext(args(0), "Log Query",
-      System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+      System.getenv("SPARK_HOME"), SparkContext.jarOfClass(this.getClass))
 
     val dataSet =
       if (args.length == 2) sc.textFile(args(1))
diff --git a/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala b/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala
index f79f0142b8679..4aef04fc060b6 100644
--- a/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala
@@ -18,35 +18,38 @@
 package org.apache.spark.examples
 
 import org.apache.spark.SparkContext
+import org.apache.spark.rdd.RDD
 
 object MultiBroadcastTest {
   def main(args: Array[String]) {
     if (args.length == 0) {
-      System.err.println("Usage: BroadcastTest <master> [<slices>] [numElem]")
+      System.err.println("Usage: MultiBroadcastTest <master> [<slices>] [numElem]")
       System.exit(1)
     }
 
-    val sc = new SparkContext(args(0), "Broadcast Test",
-      System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+    val sc = new SparkContext(args(0), "Multi-Broadcast Test",
+      System.getenv("SPARK_HOME"), SparkContext.jarOfClass(this.getClass))
 
     val slices = if (args.length > 1) args(1).toInt else 2
     val num = if (args.length > 2) args(2).toInt else 1000000
 
-    var arr1 = new Array[Int](num)
+    val arr1 = new Array[Int](num)
     for (i <- 0 until arr1.length) {
       arr1(i) = i
     }
 
-    var arr2 = new Array[Int](num)
+    val arr2 = new Array[Int](num)
     for (i <- 0 until arr2.length) {
       arr2(i) = i
     }
 
     val barr1 = sc.broadcast(arr1)
     val barr2 = sc.broadcast(arr2)
-    sc.parallelize(1 to 10, slices).foreach {
-      i => println(barr1.value.size + barr2.value.size)
+    val observedSizes: RDD[(Int, Int)] = sc.parallelize(1 to 10, slices).map { _ =>
+      (barr1.value.size, barr2.value.size)
     }
+    // Collect the small RDD so we can print the observed sizes locally.
+    observedSizes.collect().foreach(i => println(i))
 
     System.exit(0)
   }
diff --git a/examples/src/main/scala/org/apache/spark/examples/SimpleSkewedGroupByTest.scala b/examples/src/main/scala/org/apache/spark/examples/SimpleSkewedGroupByTest.scala
index 37ddfb5db7635..73b0e216cac98 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SimpleSkewedGroupByTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SimpleSkewedGroupByTest.scala
@@ -36,7 +36,7 @@ object SimpleSkewedGroupByTest {
     var ratio = if (args.length > 5) args(5).toInt else 5.0
 
     val sc = new SparkContext(args(0), "GroupBy Test",
-      System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+      System.getenv("SPARK_HOME"), SparkContext.jarOfClass(this.getClass))
 
     val pairs1 = sc.parallelize(0 until numMappers, numMappers).flatMap { p =>
       val ranGen = new Random
diff --git a/examples/src/main/scala/org/apache/spark/examples/SkewedGroupByTest.scala b/examples/src/main/scala/org/apache/spark/examples/SkewedGroupByTest.scala
index 9c954b2b5baa9..31c6d108f34ae 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SkewedGroupByTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SkewedGroupByTest.scala
@@ -34,7 +34,7 @@ object SkewedGroupByTest {
     var numReducers = if (args.length > 4) args(4).toInt else numMappers
 
     val sc = new SparkContext(args(0), "GroupBy Test",
-      System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+      System.getenv("SPARK_HOME"), SparkContext.jarOfClass(this.getClass))
 
     val pairs1 = sc.parallelize(0 until numMappers, numMappers).flatMap { p =>
       val ranGen = new Random
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala
index 814944ba1c6bf..17bafc2218a31 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala
@@ -44,7 +44,7 @@ object SparkALS {
   def generateR(): DoubleMatrix2D = {
     val mh = factory2D.random(M, F)
     val uh = factory2D.random(U, F)
-    return algebra.mult(mh, algebra.transpose(uh))
+    algebra.mult(mh, algebra.transpose(uh))
   }
 
   def rmse(targetR: DoubleMatrix2D, ms: Array[DoubleMatrix1D],
@@ -57,7 +57,7 @@ object SparkALS {
     //println("R: " + r)
     blas.daxpy(-1, targetR, r)
     val sumSqs = r.aggregate(Functions.plus, Functions.square)
-    return sqrt(sumSqs / (M * U))
+    sqrt(sumSqs / (M * U))
   }
 
   def update(i: Int, m: DoubleMatrix1D, us: Array[DoubleMatrix1D],
@@ -83,7 +83,7 @@ object SparkALS {
     val ch = new CholeskyDecomposition(XtX)
     val Xty2D = factory2D.make(Xty.toArray, F)
     val solved2D = ch.solve(Xty2D)
-    return solved2D.viewColumn(0)
+    solved2D.viewColumn(0)
   }
 
   def main(args: Array[String]) {
@@ -112,7 +112,7 @@ object SparkALS {
     printf("Running with M=%d, U=%d, F=%d, iters=%d\n", M, U, F, ITERATIONS)
 
     val sc = new SparkContext(host, "SparkALS",
-      System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+      System.getenv("SPARK_HOME"), SparkContext.jarOfClass(this.getClass))
     
     val R = generateR()
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala b/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala
index 86dd9ca1b3e58..39819064edbaa 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala
@@ -43,7 +43,7 @@ object SparkHdfsLR {
     while (i < D) {
       x(i) = tok.nextToken.toDouble; i += 1
     }
-    return DataPoint(new Vector(x), y)
+    DataPoint(new Vector(x), y)
   }
 
   def main(args: Array[String]) {
@@ -54,7 +54,7 @@ object SparkHdfsLR {
     val inputPath = args(1)
     val conf = SparkHadoopUtil.get.newConfiguration()
     val sc = new SparkContext(args(0), "SparkHdfsLR",
-      System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")), Map(), 
+      System.getenv("SPARK_HOME"), SparkContext.jarOfClass(this.getClass), Map(), 
       InputFormatInfo.computePreferredLocations(
           Seq(new InputFormatInfo(conf, classOf[org.apache.hadoop.mapred.TextInputFormat], inputPath))))
     val lines = sc.textFile(inputPath)
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala b/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala
index bc2db39c1231b..9fe24652358f3 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala
@@ -30,7 +30,7 @@ object SparkKMeans {
   val rand = new Random(42)
     
   def parseVector(line: String): Vector = {
-      return new Vector(line.split(' ').map(_.toDouble))
+    new Vector(line.split(' ').map(_.toDouble))
   }
   
   def closestPoint(p: Vector, centers: Array[Vector]): Int = {
@@ -46,7 +46,7 @@ object SparkKMeans {
       }
     }
   
-    return bestIndex
+    bestIndex
   }
 
   def main(args: Array[String]) {
@@ -55,21 +55,21 @@ object SparkKMeans {
         System.exit(1)
     }
     val sc = new SparkContext(args(0), "SparkLocalKMeans",
-      System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+      System.getenv("SPARK_HOME"), SparkContext.jarOfClass(this.getClass))
     val lines = sc.textFile(args(1))
     val data = lines.map(parseVector _).cache()
     val K = args(2).toInt
     val convergeDist = args(3).toDouble
   
-    var kPoints = data.takeSample(false, K, 42).toArray
+    val kPoints = data.takeSample(withReplacement = false, K, 42).toArray
     var tempDist = 1.0
 
     while(tempDist > convergeDist) {
-      var closest = data.map (p => (closestPoint(p, kPoints), (p, 1)))
+      val closest = data.map (p => (closestPoint(p, kPoints), (p, 1)))
       
-      var pointStats = closest.reduceByKey{case ((x1, y1), (x2, y2)) => (x1 + x2, y1 + y2)}
+      val pointStats = closest.reduceByKey{case ((x1, y1), (x2, y2)) => (x1 + x2, y1 + y2)}
       
-      var newPoints = pointStats.map {pair => (pair._1, pair._2._1 / pair._2._2)}.collectAsMap()
+      val newPoints = pointStats.map {pair => (pair._1, pair._2._1 / pair._2._2)}.collectAsMap()
       
       tempDist = 0.0
       for (i <- 0 until K) {
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala b/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala
index 9ed9fe4d761d5..c54a55bdb4a11 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala
@@ -49,7 +49,7 @@ object SparkLR {
       System.exit(1)
     }
     val sc = new SparkContext(args(0), "SparkLR",
-      System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+      System.getenv("SPARK_HOME"), SparkContext.jarOfClass(this.getClass))
     val numSlices = if (args.length > 1) args(1).toInt else 2
     val points = sc.parallelize(generateData, numSlices).cache()
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala b/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala
index a508c0df577cd..d203f4d20e15f 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala
@@ -38,7 +38,7 @@ object SparkPageRank {
     }
     var iters = args(2).toInt
     val ctx = new SparkContext(args(0), "PageRank",
-      System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+      System.getenv("SPARK_HOME"), SparkContext.jarOfClass(this.getClass))
     val lines = ctx.textFile(args(1), 1)
     val links = lines.map{ s =>
       val parts = s.split("\\s+")
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkPi.scala b/examples/src/main/scala/org/apache/spark/examples/SparkPi.scala
index a689e5a360b6b..e5a09ecec006f 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkPi.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkPi.scala
@@ -29,7 +29,7 @@ object SparkPi {
       System.exit(1)
     }
     val spark = new SparkContext(args(0), "SparkPi",
-      System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+      System.getenv("SPARK_HOME"), SparkContext.jarOfClass(this.getClass))
     val slices = if (args.length > 1) args(1).toInt else 2
     val n = 100000 * slices
     val count = spark.parallelize(1 to n, slices).map { i =>
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkTC.scala b/examples/src/main/scala/org/apache/spark/examples/SparkTC.scala
index 8543ce0e3285e..24e8afa26bc5f 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkTC.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkTC.scala
@@ -46,7 +46,7 @@ object SparkTC {
       System.exit(1)
     }
     val spark = new SparkContext(args(0), "SparkTC",
-      System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+      System.getenv("SPARK_HOME"), SparkContext.jarOfClass(this.getClass))
     val slices = if (args.length > 1) args(1).toInt else 2
     var tc = spark.parallelize(generateGraph, slices).cache()
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRank.scala b/examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRank.scala
index 72b5c7b88e19b..4c0de469645ab 100644
--- a/examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRank.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRank.scala
@@ -36,16 +36,18 @@ object WikipediaPageRank {
       System.err.println("Usage: WikipediaPageRank <inputFile> <threshold> <numPartitions> <host> <usePartitioner>")
       System.exit(-1)
     }
-
-    System.setProperty("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
-    System.setProperty("spark.kryo.registrator", classOf[PRKryoRegistrator].getName)
+    val sparkConf = new SparkConf()
+    sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
+    sparkConf.set("spark.kryo.registrator",  classOf[PRKryoRegistrator].getName)
 
     val inputFile = args(0)
     val threshold = args(1).toDouble
     val numPartitions = args(2).toInt
     val host = args(3)
     val usePartitioner = args(4).toBoolean
-    val sc = new SparkContext(host, "WikipediaPageRank")
+
+    sparkConf.setMaster(host).setAppName("WikipediaPageRank")
+    val sc = new SparkContext(sparkConf)
 
     // Parse the Wikipedia page data into a graph
     val input = sc.textFile(inputFile)
diff --git a/examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRankStandalone.scala b/examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRankStandalone.scala
index ddf6855325e73..2cf273a702d24 100644
--- a/examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRankStandalone.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRankStandalone.scala
@@ -34,15 +34,19 @@ object WikipediaPageRankStandalone {
       System.err.println("Usage: WikipediaPageRankStandalone <inputFile> <threshold> <numIterations> <host> <usePartitioner>")
       System.exit(-1)
     }
+    val sparkConf = new SparkConf()
+    sparkConf.set("spark.serializer", "spark.bagel.examples.WPRSerializer")
 
-    System.setProperty("spark.serializer", "spark.bagel.examples.WPRSerializer")
 
     val inputFile = args(0)
     val threshold = args(1).toDouble
     val numIterations = args(2).toInt
     val host = args(3)
     val usePartitioner = args(4).toBoolean
-    val sc = new SparkContext(host, "WikipediaPageRankStandalone")
+
+    sparkConf.setMaster(host).setAppName("WikipediaPageRankStandalone")
+
+    val sc = new SparkContext(sparkConf)
 
     val input = sc.textFile(inputFile)
     val partitioner = new HashPartitioner(sc.defaultParallelism)
diff --git a/examples/src/main/scala/org/apache/spark/examples/graphx/LiveJournalPageRank.scala b/examples/src/main/scala/org/apache/spark/examples/graphx/LiveJournalPageRank.scala
new file mode 100644
index 0000000000000..d58fddff2b5ec
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/graphx/LiveJournalPageRank.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.graphx
+
+import org.apache.spark.SparkContext._
+import org.apache.spark._
+import org.apache.spark.graphx._
+import org.apache.spark.graphx.lib.Analytics
+
+/**
+ * Uses GraphX to run PageRank on a LiveJournal social network graph. Download the dataset from
+ * http://snap.stanford.edu/data/soc-LiveJournal1.html.
+ */
+object LiveJournalPageRank {
+  def main(args: Array[String]) {
+    if (args.length < 2) {
+      System.err.println(
+        "Usage: LiveJournalPageRank <master> <edge_list_file>\n" +
+          "    [--tol=<tolerance>]\n" +
+          "        The tolerance allowed at convergence (smaller => more accurate). Default is " +
+          "0.001.\n" +
+          "    [--output=<output_file>]\n" +
+          "        If specified, the file to write the ranks to.\n" +
+          "    [--numEPart=<num_edge_partitions>]\n" +
+          "        The number of partitions for the graph's edge RDD. Default is 4.\n" +
+          "    [--partStrategy=RandomVertexCut | EdgePartition1D | EdgePartition2D | " +
+          "CanonicalRandomVertexCut]\n" +
+          "        The way edges are assigned to edge partitions. Default is RandomVertexCut.")
+      System.exit(-1)
+    }
+
+    Analytics.main(args.patch(1, List("pagerank"), 0))
+  }
+}
diff --git a/examples/src/main/scala/org/apache/spark/streaming/examples/ActorWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/ActorWordCount.scala
index af52b7e9a12f1..a5888811cc5ea 100644
--- a/examples/src/main/scala/org/apache/spark/streaming/examples/ActorWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/ActorWordCount.scala
@@ -18,15 +18,13 @@
 package org.apache.spark.streaming.examples
 
 import scala.collection.mutable.LinkedList
+import scala.reflect.ClassTag
 import scala.util.Random
 
-import akka.actor.Actor
-import akka.actor.ActorRef
-import akka.actor.Props
-import akka.actor.actorRef2Scala
+import akka.actor.{Actor, ActorRef, Props, actorRef2Scala}
 
-import org.apache.spark.streaming.Seconds
-import org.apache.spark.streaming.StreamingContext
+import org.apache.spark.SparkConf
+import org.apache.spark.streaming.{Seconds, StreamingContext}
 import org.apache.spark.streaming.StreamingContext.toPairDStreamFunctions
 import org.apache.spark.streaming.receivers.Receiver
 import org.apache.spark.util.AkkaUtils
@@ -82,15 +80,15 @@ class FeederActor extends Actor {
  *
  * @see [[org.apache.spark.streaming.examples.FeederActor]]
  */
-class SampleActorReceiver[T: ClassManifest](urlOfPublisher: String)
+class SampleActorReceiver[T: ClassTag](urlOfPublisher: String)
 extends Actor with Receiver {
 
-  lazy private val remotePublisher = context.actorFor(urlOfPublisher)
+  lazy private val remotePublisher = context.actorSelection(urlOfPublisher)
 
   override def preStart = remotePublisher ! SubscribeReceiver(context.self)
 
   def receive = {
-    case msg ⇒ context.parent ! pushBlock(msg.asInstanceOf[T])
+    case msg ⇒ pushBlock(msg.asInstanceOf[T])
   }
 
   override def postStop() = remotePublisher ! UnsubscribeReceiver(context.self)
@@ -115,7 +113,7 @@ object FeederActor {
     val Seq(host, port) = args.toSeq
 
 
-    val actorSystem = AkkaUtils.createActorSystem("test", host, port.toInt)._1
+    val actorSystem = AkkaUtils.createActorSystem("test", host, port.toInt, conf = new SparkConf)._1
     val feeder = actorSystem.actorOf(Props[FeederActor], "FeederActor")
 
     println("Feeder started as:" + feeder)
@@ -132,9 +130,9 @@ object FeederActor {
  *   <hostname> and <port> describe the AkkaSystem that Spark Sample feeder is running on.
  *
  * To run this example locally, you may run Feeder Actor as
- *    `$ ./run-example spark.streaming.examples.FeederActor 127.0.1.1 9999`
+ *    `$ ./bin/run-example org.apache.spark.streaming.examples.FeederActor 127.0.1.1 9999`
  * and then run the example
- *    `$ ./run-example spark.streaming.examples.ActorWordCount local[2] 127.0.1.1 9999`
+ *    `$ ./bin/run-example org.apache.spark.streaming.examples.ActorWordCount local[2] 127.0.1.1 9999`
  */
 object ActorWordCount {
   def main(args: Array[String]) {
@@ -145,11 +143,13 @@ object ActorWordCount {
       System.exit(1)
     }
 
+    StreamingExamples.setStreamingLogLevels()
+
     val Seq(master, host, port) = args.toSeq
 
     // Create the context and set the batch size
     val ssc = new StreamingContext(master, "ActorWordCount", Seconds(2),
-      System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+      System.getenv("SPARK_HOME"), StreamingContext.jarOfClass(this.getClass))
 
     /*
      * Following is the use of actorStream to plug in custom actor as receiver
@@ -164,12 +164,13 @@ object ActorWordCount {
      */
 
     val lines = ssc.actorStream[String](
-      Props(new SampleActorReceiver[String]("akka://test@%s:%s/user/FeederActor".format(
+      Props(new SampleActorReceiver[String]("akka.tcp://test@%s:%s/user/FeederActor".format(
         host, port.toInt))), "SampleReceiver")
 
     //compute wordcount
     lines.flatMap(_.split("\\s+")).map(x => (x, 1)).reduceByKey(_ + _).print()
 
     ssc.start()
+    ssc.awaitTermination()
   }
 }
diff --git a/examples/src/main/scala/org/apache/spark/streaming/examples/FlumeEventCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/FlumeEventCount.scala
index 9f6e163454a64..11c3aaad3c8a8 100644
--- a/examples/src/main/scala/org/apache/spark/streaming/examples/FlumeEventCount.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/FlumeEventCount.scala
@@ -17,9 +17,10 @@
 
 package org.apache.spark.streaming.examples
 
-import org.apache.spark.util.IntParam
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming._
+import org.apache.spark.streaming.flume._
+import org.apache.spark.util.IntParam
 
 /**
  *  Produces a count of events received from Flume.
@@ -43,19 +44,22 @@ object FlumeEventCount {
       System.exit(1)
     }
 
+    StreamingExamples.setStreamingLogLevels()
+
     val Array(master, host, IntParam(port)) = args
 
     val batchInterval = Milliseconds(2000)
     // Create the context and set the batch size
     val ssc = new StreamingContext(master, "FlumeEventCount", batchInterval,
-      System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+      System.getenv("SPARK_HOME"), StreamingContext.jarOfClass(this.getClass))
 
     // Create a flume stream
-    val stream = ssc.flumeStream(host,port,StorageLevel.MEMORY_ONLY)
+    val stream = FlumeUtils.createStream(ssc, host,port,StorageLevel.MEMORY_ONLY_SER_2)
 
     // Print out the count of events received from this server in each batch
     stream.count().map(cnt => "Received " + cnt + " flume events." ).print()
 
     ssc.start()
+    ssc.awaitTermination()
   }
 }
diff --git a/examples/src/main/scala/org/apache/spark/streaming/examples/HdfsWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/HdfsWordCount.scala
index bc8564b3ba080..954bcc9b6ef5d 100644
--- a/examples/src/main/scala/org/apache/spark/streaming/examples/HdfsWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/HdfsWordCount.scala
@@ -20,7 +20,6 @@ package org.apache.spark.streaming.examples
 import org.apache.spark.streaming.{Seconds, StreamingContext}
 import org.apache.spark.streaming.StreamingContext._
 
-
 /**
  * Counts words in new text files created in the given directory
  * Usage: HdfsWordCount <master> <directory>
@@ -28,7 +27,7 @@ import org.apache.spark.streaming.StreamingContext._
  *   <directory> is the directory that Spark Streaming will use to find and read new text files.
  *
  * To run this on your local machine on directory `localdir`, run this example
- *    `$ ./run-example spark.streaming.examples.HdfsWordCount local[2] localdir`
+ *    `$ ./bin/run-example org.apache.spark.streaming.examples.HdfsWordCount local[2] localdir`
  * Then create a text file in `localdir` and the words in the file will get counted.
  */
 object HdfsWordCount {
@@ -38,9 +37,11 @@ object HdfsWordCount {
       System.exit(1)
     }
 
+    StreamingExamples.setStreamingLogLevels()
+
     // Create the context
     val ssc = new StreamingContext(args(0), "HdfsWordCount", Seconds(2),
-      System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+      System.getenv("SPARK_HOME"), StreamingContext.jarOfClass(this.getClass))
 
     // Create the FileInputDStream on the directory and use the
     // stream to count words in new files created
@@ -49,6 +50,7 @@ object HdfsWordCount {
     val wordCounts = words.map(x => (x, 1)).reduceByKey(_ + _)
     wordCounts.print()
     ssc.start()
+    ssc.awaitTermination()
   }
 }
 
diff --git a/examples/src/main/scala/org/apache/spark/streaming/examples/KafkaWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/KafkaWordCount.scala
index 12f939d5a7e4b..d9cb7326bb97d 100644
--- a/examples/src/main/scala/org/apache/spark/streaming/examples/KafkaWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/KafkaWordCount.scala
@@ -18,13 +18,12 @@
 package org.apache.spark.streaming.examples
 
 import java.util.Properties
-import kafka.message.Message
-import kafka.producer.SyncProducerConfig
+
 import kafka.producer._
-import org.apache.spark.SparkContext
+
 import org.apache.spark.streaming._
 import org.apache.spark.streaming.StreamingContext._
-import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.kafka._
 import org.apache.spark.streaming.util.RawTextHelper._
 
 /**
@@ -37,29 +36,32 @@ import org.apache.spark.streaming.util.RawTextHelper._
  *   <numThreads> is the number of threads the kafka consumer should use
  *
  * Example:
- *    `./run-example spark.streaming.examples.KafkaWordCount local[2] zoo01,zoo02,zoo03 my-consumer-group topic1,topic2 1`
+ *    `./bin/run-example org.apache.spark.streaming.examples.KafkaWordCount local[2] zoo01,zoo02,zoo03 my-consumer-group topic1,topic2 1`
  */
 object KafkaWordCount {
   def main(args: Array[String]) {
-    
     if (args.length < 5) {
       System.err.println("Usage: KafkaWordCount <master> <zkQuorum> <group> <topics> <numThreads>")
       System.exit(1)
     }
 
+    StreamingExamples.setStreamingLogLevels()
+
     val Array(master, zkQuorum, group, topics, numThreads) = args
 
     val ssc =  new StreamingContext(master, "KafkaWordCount", Seconds(2),
-      System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+      System.getenv("SPARK_HOME"), StreamingContext.jarOfClass(this.getClass))
     ssc.checkpoint("checkpoint")
 
     val topicpMap = topics.split(",").map((_,numThreads.toInt)).toMap
-    val lines = ssc.kafkaStream(zkQuorum, group, topicpMap)
+    val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicpMap).map(_._2)
     val words = lines.flatMap(_.split(" "))
-    val wordCounts = words.map(x => (x, 1l)).reduceByKeyAndWindow(add _, subtract _, Minutes(10), Seconds(2), 2)
+    val wordCounts = words.map(x => (x, 1l))
+      .reduceByKeyAndWindow(add _, subtract _, Minutes(10), Seconds(2), 2)
     wordCounts.print()
     
     ssc.start()
+    ssc.awaitTermination()
   }
 }
 
@@ -68,15 +70,16 @@ object KafkaWordCountProducer {
 
   def main(args: Array[String]) {
     if (args.length < 2) {
-      System.err.println("Usage: KafkaWordCountProducer <zkQuorum> <topic> <messagesPerSec> <wordsPerMessage>")
+      System.err.println("Usage: KafkaWordCountProducer <metadataBrokerList> <topic> " +
+        "<messagesPerSec> <wordsPerMessage>")
       System.exit(1)
     }
 
-    val Array(zkQuorum, topic, messagesPerSec, wordsPerMessage) = args
+    val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args
 
     // Zookeper connection properties
     val props = new Properties()
-    props.put("zk.connect", zkQuorum)
+    props.put("metadata.broker.list", brokers)
     props.put("serializer.class", "kafka.serializer.StringEncoder")
     
     val config = new ProducerConfig(props)
@@ -85,11 +88,13 @@ object KafkaWordCountProducer {
     // Send some messages
     while(true) {
       val messages = (1 to messagesPerSec.toInt).map { messageNum =>
-        (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString).mkString(" ")
+        val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString)
+          .mkString(" ")
+
+        new KeyedMessage[String, String](topic, str)
       }.toArray
-      println(messages.mkString(","))
-      val data = new ProducerData[String, String](topic, messages)
-      producer.send(data)
+
+      producer.send(messages: _*)
       Thread.sleep(100)
     }
   }
diff --git a/examples/src/main/scala/org/apache/spark/streaming/examples/MQTTWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/MQTTWordCount.scala
new file mode 100644
index 0000000000000..eb61caf8c85b9
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/MQTTWordCount.scala
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.examples
+
+import org.eclipse.paho.client.mqttv3.{MqttClient, MqttClientPersistence, MqttException, MqttMessage, MqttTopic}
+import org.eclipse.paho.client.mqttv3.persist.MqttDefaultFilePersistence
+
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+import org.apache.spark.streaming.StreamingContext._
+import org.apache.spark.streaming.mqtt._
+
+/**
+ * A simple Mqtt publisher for demonstration purposes, repeatedly publishes 
+ * Space separated String Message "hello mqtt demo for spark streaming"
+ */
+object MQTTPublisher {
+
+  var client: MqttClient = _
+
+  def main(args: Array[String]) {
+    if (args.length < 2) {
+      System.err.println("Usage: MQTTPublisher <MqttBrokerUrl> <topic>")
+      System.exit(1)
+    }
+
+    StreamingExamples.setStreamingLogLevels()
+
+    val Seq(brokerUrl, topic) = args.toSeq
+
+    try {
+      var peristance:MqttClientPersistence =new MqttDefaultFilePersistence("/tmp")
+      client = new MqttClient(brokerUrl, MqttClient.generateClientId(), peristance)
+    } catch {
+      case e: MqttException => println("Exception Caught: " + e)
+    }
+
+    client.connect()
+
+    val msgtopic: MqttTopic = client.getTopic(topic)
+    val msg: String = "hello mqtt demo for spark streaming"
+
+    while (true) {
+      val message: MqttMessage = new MqttMessage(String.valueOf(msg).getBytes())
+      msgtopic.publish(message)
+      println("Published data. topic: " + msgtopic.getName() + " Message: " + message)
+    }
+   client.disconnect()
+  }
+}
+
+/**
+ * A sample wordcount with MqttStream stream
+ *
+ * To work with Mqtt, Mqtt Message broker/server required.
+ * Mosquitto (http://mosquitto.org/) is an open source Mqtt Broker
+ * In ubuntu mosquitto can be installed using the command  `$ sudo apt-get install mosquitto`
+ * Eclipse paho project provides Java library for Mqtt Client http://www.eclipse.org/paho/
+ * Example Java code for Mqtt Publisher and Subscriber can be found here https://bitbucket.org/mkjinesh/mqttclient
+ * Usage: MQTTWordCount <master> <MqttbrokerUrl> <topic>
+ * In local mode, <master> should be 'local[n]' with n > 1
+ *   <MqttbrokerUrl> and <topic> describe where Mqtt publisher is running.
+ *
+ * To run this example locally, you may run publisher as
+ *    `$ ./bin/run-example org.apache.spark.streaming.examples.MQTTPublisher tcp://localhost:1883 foo`
+ * and run the example as
+ *    `$ ./bin/run-example org.apache.spark.streaming.examples.MQTTWordCount local[2] tcp://localhost:1883 foo`
+ */
+object MQTTWordCount {
+
+  def main(args: Array[String]) {
+    if (args.length < 3) {
+      System.err.println(
+        "Usage: MQTTWordCount <master> <MqttbrokerUrl> <topic>" +
+          " In local mode, <master> should be 'local[n]' with n > 1")
+      System.exit(1)
+    }
+
+    val Seq(master, brokerUrl, topic) = args.toSeq
+
+    val ssc = new StreamingContext(master, "MqttWordCount", Seconds(2), System.getenv("SPARK_HOME"), 
+    StreamingContext.jarOfClass(this.getClass))
+    val lines = MQTTUtils.createStream(ssc, brokerUrl, topic, StorageLevel.MEMORY_ONLY_SER_2)
+
+    val words = lines.flatMap(x => x.toString.split(" "))
+    val wordCounts = words.map(x => (x, 1)).reduceByKey(_ + _)
+    wordCounts.print()
+    ssc.start()
+    ssc.awaitTermination()
+  }
+}
diff --git a/examples/src/main/scala/org/apache/spark/streaming/examples/NetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/NetworkWordCount.scala
index e2487dca5f2cc..5656d487a57cc 100644
--- a/examples/src/main/scala/org/apache/spark/streaming/examples/NetworkWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/NetworkWordCount.scala
@@ -19,9 +19,11 @@ package org.apache.spark.streaming.examples
 
 import org.apache.spark.streaming.{Seconds, StreamingContext}
 import org.apache.spark.streaming.StreamingContext._
+import org.apache.spark.storage.StorageLevel
 
 /**
- * Counts words in UTF8 encoded, '\n' delimited text received from the network every second.
+ * Counts words in text encoded with UTF8 received from the network every second.
+ *
  * Usage: NetworkWordCount <master> <hostname> <port>
  *   <master> is the Spark master URL. In local mode, <master> should be 'local[n]' with n > 1.
  *   <hostname> and <port> describe the TCP server that Spark Streaming would connect to receive data.
@@ -29,7 +31,7 @@ import org.apache.spark.streaming.StreamingContext._
  * To run this on your local machine, you need to first run a Netcat server
  *    `$ nc -lk 9999`
  * and then run the example
- *    `$ ./run-example spark.streaming.examples.NetworkWordCount local[2] localhost 9999`
+ *    `$ ./bin/run-example org.apache.spark.streaming.examples.NetworkWordCount local[2] localhost 9999`
  */
 object NetworkWordCount {
   def main(args: Array[String]) {
@@ -39,16 +41,19 @@ object NetworkWordCount {
       System.exit(1)
     }
 
+    StreamingExamples.setStreamingLogLevels()
+
     // Create the context with a 1 second batch size
     val ssc = new StreamingContext(args(0), "NetworkWordCount", Seconds(1),
-      System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+      System.getenv("SPARK_HOME"), StreamingContext.jarOfClass(this.getClass))
 
     // Create a NetworkInputDStream on target ip:port and count the
-    // words in input stream of \n delimited test (eg. generated by 'nc') 
-    val lines = ssc.socketTextStream(args(1), args(2).toInt)
+    // words in input stream of \n delimited text (eg. generated by 'nc')
+    val lines = ssc.socketTextStream(args(1), args(2).toInt, StorageLevel.MEMORY_ONLY_SER)
     val words = lines.flatMap(_.split(" "))
     val wordCounts = words.map(x => (x, 1)).reduceByKey(_ + _)
     wordCounts.print()
     ssc.start()
+    ssc.awaitTermination()
   }
 }
diff --git a/examples/src/main/scala/org/apache/spark/streaming/examples/QueueStream.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/QueueStream.scala
index fad512eebad12..4d4968ba6ae3e 100644
--- a/examples/src/main/scala/org/apache/spark/streaming/examples/QueueStream.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/QueueStream.scala
@@ -17,12 +17,12 @@
 
 package org.apache.spark.streaming.examples
 
+import scala.collection.mutable.SynchronizedQueue
+
 import org.apache.spark.rdd.RDD
 import org.apache.spark.streaming.{Seconds, StreamingContext}
 import org.apache.spark.streaming.StreamingContext._
 
-import scala.collection.mutable.SynchronizedQueue
-
 object QueueStream {
   
   def main(args: Array[String]) {
@@ -30,10 +30,12 @@ object QueueStream {
       System.err.println("Usage: QueueStream <master>")
       System.exit(1)
     }
-    
+
+    StreamingExamples.setStreamingLogLevels()
+
     // Create the context
     val ssc = new StreamingContext(args(0), "QueueStream", Seconds(1),
-      System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+      System.getenv("SPARK_HOME"), StreamingContext.jarOfClass(this.getClass))
 
     // Create the queue through which RDDs can be pushed to 
     // a QueueInputDStream
diff --git a/examples/src/main/scala/org/apache/spark/streaming/examples/RawNetworkGrep.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/RawNetworkGrep.scala
index 0b45c30d20dc3..cdd7547d0d3b4 100644
--- a/examples/src/main/scala/org/apache/spark/streaming/examples/RawNetworkGrep.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/RawNetworkGrep.scala
@@ -17,11 +17,10 @@
 
 package org.apache.spark.streaming.examples
 
-import org.apache.spark.util.IntParam
 import org.apache.spark.storage.StorageLevel
-
 import org.apache.spark.streaming._
 import org.apache.spark.streaming.util.RawTextHelper
+import org.apache.spark.util.IntParam
 
 /**
  * Receives text from multiple rawNetworkStreams and counts how many '\n' delimited
@@ -45,11 +44,13 @@ object RawNetworkGrep {
       System.exit(1)
     }
 
+    StreamingExamples.setStreamingLogLevels()
+
     val Array(master, IntParam(numStreams), host, IntParam(port), IntParam(batchMillis)) = args
 
     // Create the context
     val ssc = new StreamingContext(master, "RawNetworkGrep", Milliseconds(batchMillis),
-      System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+      System.getenv("SPARK_HOME"), StreamingContext.jarOfClass(this.getClass))
 
     // Warm up the JVMs on master and slave for JIT compilation to kick in
     RawTextHelper.warmUp(ssc.sparkContext)
@@ -57,8 +58,9 @@ object RawNetworkGrep {
     val rawStreams = (1 to numStreams).map(_ =>
       ssc.rawSocketStream[String](host, port, StorageLevel.MEMORY_ONLY_SER_2)).toArray
     val union = ssc.union(rawStreams)
-    union.filter(_.contains("the")).count().foreach(r =>
+    union.filter(_.contains("the")).count().foreachRDD(r =>
       println("Grep count: " + r.collect().mkString))
     ssc.start()
+    ssc.awaitTermination()
   }
 }
diff --git a/examples/src/main/scala/org/apache/spark/streaming/examples/RecoverableNetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/RecoverableNetworkWordCount.scala
new file mode 100644
index 0000000000000..aa82bf3c6bd8e
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/RecoverableNetworkWordCount.scala
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.examples
+
+import org.apache.spark.streaming.{Time, Seconds, StreamingContext}
+import org.apache.spark.streaming.StreamingContext._
+import org.apache.spark.util.IntParam
+import java.io.File
+import org.apache.spark.rdd.RDD
+import com.google.common.io.Files
+import java.nio.charset.Charset
+
+/**
+ * Counts words in text encoded with UTF8 received from the network every second.
+ *
+ * Usage: NetworkWordCount <master> <hostname> <port> <checkpoint-directory> <output-file>
+ *   <master> is the Spark master URL. In local mode, <master> should be 'local[n]' with n > 1.
+ *   <hostname> and <port> describe the TCP server that Spark Streaming would connect to receive data.
+ *   <checkpoint-directory> directory to HDFS-compatible file system which checkpoint data
+ *   <output-file> file to which the word counts will be appended
+ *
+ * In local mode, <master> should be 'local[n]' with n > 1
+ * <checkpoint-directory> and <output-file> must be absolute paths
+ *
+ *
+ * To run this on your local machine, you need to first run a Netcat server
+ *
+ *      `$ nc -lk 9999`
+ *
+ * and run the example as
+ *
+ *      `$ ./run-example org.apache.spark.streaming.examples.RecoverableNetworkWordCount \
+ *              local[2] localhost 9999 ~/checkpoint/ ~/out`
+ *
+ * If the directory ~/checkpoint/ does not exist (e.g. running for the first time), it will create
+ * a new StreamingContext (will print "Creating new context" to the console). Otherwise, if
+ * checkpoint data exists in ~/checkpoint/, then it will create StreamingContext from
+ * the checkpoint data.
+ *
+ * To run this example in a local standalone cluster with automatic driver recovery,
+ *
+ *      `$ ./spark-class org.apache.spark.deploy.Client -s launch <cluster-url> <path-to-examples-jar> \
+ *              org.apache.spark.streaming.examples.RecoverableNetworkWordCount <cluster-url> \
+ *              localhost 9999 ~/checkpoint ~/out`
+ *
+ * <path-to-examples-jar> would typically be <spark-dir>/examples/target/scala-XX/spark-examples....jar
+ *
+ * Refer to the online documentation for more details.
+ */
+
+object RecoverableNetworkWordCount {
+
+  def createContext(master: String, ip: String, port: Int, outputPath: String) = {
+
+    // If you do not see this printed, that means the StreamingContext has been loaded
+    // from the new checkpoint
+    println("Creating new context")
+    val outputFile = new File(outputPath)
+    if (outputFile.exists()) outputFile.delete()
+
+    // Create the context with a 1 second batch size
+    val ssc = new StreamingContext(master, "RecoverableNetworkWordCount", Seconds(1),
+      System.getenv("SPARK_HOME"), StreamingContext.jarOfClass(this.getClass))
+
+    // Create a NetworkInputDStream on target ip:port and count the
+    // words in input stream of \n delimited text (eg. generated by 'nc')
+    val lines = ssc.socketTextStream(ip, port)
+    val words = lines.flatMap(_.split(" "))
+    val wordCounts = words.map(x => (x, 1)).reduceByKey(_ + _)
+    wordCounts.foreachRDD((rdd: RDD[(String, Int)], time: Time) => {
+      val counts = "Counts at time " + time + " " + rdd.collect().mkString("[", ", ", "]")
+      println(counts)
+      println("Appending to " + outputFile.getAbsolutePath)
+      Files.append(counts + "\n", outputFile, Charset.defaultCharset())
+    })
+    ssc
+  }
+
+  def main(args: Array[String]) {
+    if (args.length != 5) {
+      System.err.println("You arguments were " + args.mkString("[", ", ", "]"))
+      System.err.println(
+        """
+          |Usage: RecoverableNetworkWordCount <master> <hostname> <port> <checkpoint-directory> <output-file>
+          |     <master> is the Spark master URL. In local mode, <master> should be 'local[n]' with n > 1.
+          |     <hostname> and <port> describe the TCP server that Spark Streaming would connect to receive data.
+          |     <checkpoint-directory> directory to HDFS-compatible file system which checkpoint data
+          |     <output-file> file to which the word counts will be appended
+          |
+          |In local mode, <master> should be 'local[n]' with n > 1
+          |Both <checkpoint-directory> and <output-file> must be absolute paths
+        """.stripMargin
+      )
+      System.exit(1)
+    }
+    val Array(master, ip, IntParam(port), checkpointDirectory, outputPath) = args
+    val ssc = StreamingContext.getOrCreate(checkpointDirectory,
+      () => {
+        createContext(master, ip, port, outputPath)
+      })
+    ssc.start()
+    ssc.awaitTermination()
+  }
+}
diff --git a/examples/src/main/scala/org/apache/spark/streaming/examples/StatefulNetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/StatefulNetworkWordCount.scala
index cb30c4edb30ce..88f1cef89b318 100644
--- a/examples/src/main/scala/org/apache/spark/streaming/examples/StatefulNetworkWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/StatefulNetworkWordCount.scala
@@ -29,7 +29,7 @@ import org.apache.spark.streaming.StreamingContext._
  * To run this on your local machine, you need to first run a Netcat server
  *    `$ nc -lk 9999`
  * and then run the example
- *    `$ ./run-example spark.streaming.examples.StatefulNetworkWordCount local[2] localhost 9999`
+ *    `$ ./bin/run-example org.apache.spark.streaming.examples.StatefulNetworkWordCount local[2] localhost 9999`
  */
 object StatefulNetworkWordCount {
   def main(args: Array[String]) {
@@ -39,6 +39,8 @@ object StatefulNetworkWordCount {
       System.exit(1)
     }
 
+    StreamingExamples.setStreamingLogLevels()
+
     val updateFunc = (values: Seq[Int], state: Option[Int]) => {
       val currentCount = values.foldLeft(0)(_ + _)
 
@@ -49,7 +51,7 @@ object StatefulNetworkWordCount {
 
     // Create the context with a 1 second batch size
     val ssc = new StreamingContext(args(0), "NetworkWordCumulativeCountUpdateStateByKey", Seconds(1),
-      System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+      System.getenv("SPARK_HOME"), StreamingContext.jarOfClass(this.getClass))
     ssc.checkpoint(".")
 
     // Create a NetworkInputDStream on target ip:port and count the
@@ -63,5 +65,6 @@ object StatefulNetworkWordCount {
     val stateDstream = wordDstream.updateStateByKey[Int](updateFunc)
     stateDstream.print()
     ssc.start()
+    ssc.awaitTermination()
   }
 }
diff --git a/examples/src/main/scala/org/apache/spark/streaming/examples/StreamingExamples.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/StreamingExamples.scala
new file mode 100644
index 0000000000000..99f1502046f53
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/StreamingExamples.scala
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.examples
+
+import org.apache.spark.Logging
+
+import org.apache.log4j.{Level, Logger}
+
+/** Utility functions for Spark Streaming examples. */
+object StreamingExamples extends Logging {
+
+  /** Set reasonable logging levels for streaming if the user has not configured log4j. */
+  def setStreamingLogLevels() {
+    val log4jInitialized = Logger.getRootLogger.getAllAppenders.hasMoreElements
+    if (!log4jInitialized) {
+      // We first log something to initialize Spark's default logging, then we override the
+      // logging level.
+      logInfo("Setting log level to [WARN] for streaming example." +
+        " To override add a custom log4j.properties to the classpath.")
+      Logger.getRootLogger.setLevel(Level.WARN)
+    }
+  }
+}
diff --git a/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterAlgebirdCMS.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterAlgebirdCMS.scala
index 35b6329ab3152..bbd44948b6fa5 100644
--- a/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterAlgebirdCMS.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterAlgebirdCMS.scala
@@ -17,11 +17,13 @@
 
 package org.apache.spark.streaming.examples
 
-import org.apache.spark.streaming.{Seconds, StreamingContext}
-import org.apache.spark.storage.StorageLevel
 import com.twitter.algebird._
-import org.apache.spark.streaming.StreamingContext._
+
 import org.apache.spark.SparkContext._
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+import org.apache.spark.streaming.StreamingContext._
+import org.apache.spark.streaming.twitter._
 
 /**
  * Illustrates the use of the Count-Min Sketch, from Twitter's Algebird library, to compute
@@ -33,7 +35,7 @@ import org.apache.spark.SparkContext._
  * <p>
  * <p>
  *   <a href="http://highlyscalable.wordpress.com/2012/05/01/probabilistic-structures-web-analytics-data-mining/">
- *   This blog post</a> has a good overview of the Count-Min Sketch (CMS). The CMS is a datastructure
+ *   This blog post</a> has a good overview of the Count-Min Sketch (CMS). The CMS is a data structure
  *   for approximate frequency estimation in data streams (e.g. Top-K elements, frequency of any given element, etc),
  *   that uses space sub-linear in the number of elements in the stream. Once elements are added to the CMS, the
  *   estimated count of an element can be computed, as well as "heavy-hitters" that occur more than a threshold
@@ -49,6 +51,8 @@ object TwitterAlgebirdCMS {
       System.exit(1)
     }
 
+    StreamingExamples.setStreamingLogLevels()
+
     // CMS parameters
     val DELTA = 1E-3
     val EPS = 0.01
@@ -60,8 +64,8 @@ object TwitterAlgebirdCMS {
     val (master, filters) = (args.head, args.tail)
 
     val ssc = new StreamingContext(master, "TwitterAlgebirdCMS", Seconds(10),
-      System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
-    val stream = ssc.twitterStream(None, filters, StorageLevel.MEMORY_ONLY_SER)
+      System.getenv("SPARK_HOME"), StreamingContext.jarOfClass(this.getClass))
+    val stream = TwitterUtils.createStream(ssc, None, filters, StorageLevel.MEMORY_ONLY_SER_2)
 
     val users = stream.map(status => status.getUser.getId)
 
@@ -77,7 +81,7 @@ object TwitterAlgebirdCMS {
     val exactTopUsers = users.map(id => (id, 1))
       .reduceByKey((a, b) => a + b)
 
-    approxTopUsers.foreach(rdd => {
+    approxTopUsers.foreachRDD(rdd => {
       if (rdd.count() != 0) {
         val partial = rdd.first()
         val partialTopK = partial.heavyHitters.map(id =>
@@ -92,7 +96,7 @@ object TwitterAlgebirdCMS {
       }
     })
 
-    exactTopUsers.foreach(rdd => {
+    exactTopUsers.foreachRDD(rdd => {
       if (rdd.count() != 0) {
         val partialMap = rdd.collect().toMap
         val partialTopK = rdd.map(
@@ -106,5 +110,6 @@ object TwitterAlgebirdCMS {
     })
 
     ssc.start()
+    ssc.awaitTermination()
   }
 }
diff --git a/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterAlgebirdHLL.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterAlgebirdHLL.scala
index 8bfde2a8297c1..a0094d460feec 100644
--- a/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterAlgebirdHLL.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterAlgebirdHLL.scala
@@ -17,11 +17,12 @@
 
 package org.apache.spark.streaming.examples
 
-import org.apache.spark.streaming.{Seconds, StreamingContext}
-import org.apache.spark.storage.StorageLevel
-import com.twitter.algebird.HyperLogLog._
 import com.twitter.algebird.HyperLogLogMonoid
-import org.apache.spark.streaming.dstream.TwitterInputDStream
+import com.twitter.algebird.HyperLogLog._
+
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+import org.apache.spark.streaming.twitter._
 
 /**
  * Illustrates the use of the HyperLogLog algorithm, from Twitter's Algebird library, to compute
@@ -44,13 +45,15 @@ object TwitterAlgebirdHLL {
       System.exit(1)
     }
 
+    StreamingExamples.setStreamingLogLevels()
+
     /** Bit size parameter for HyperLogLog, trades off accuracy vs size */
     val BIT_SIZE = 12
     val (master, filters) = (args.head, args.tail)
 
     val ssc = new StreamingContext(master, "TwitterAlgebirdHLL", Seconds(5),
-      System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
-    val stream = ssc.twitterStream(None, filters, StorageLevel.MEMORY_ONLY_SER)
+      System.getenv("SPARK_HOME"), StreamingContext.jarOfClass(this.getClass))
+    val stream = TwitterUtils.createStream(ssc, None, filters, StorageLevel.MEMORY_ONLY_SER)
 
     val users = stream.map(status => status.getUser.getId)
 
@@ -64,7 +67,7 @@ object TwitterAlgebirdHLL {
 
     val exactUsers = users.map(id => Set(id)).reduce(_ ++ _)
 
-    approxUsers.foreach(rdd => {
+    approxUsers.foreachRDD(rdd => {
       if (rdd.count() != 0) {
         val partial = rdd.first()
         globalHll += partial
@@ -73,7 +76,7 @@ object TwitterAlgebirdHLL {
       }
     })
 
-    exactUsers.foreach(rdd => {
+    exactUsers.foreachRDD(rdd => {
       if (rdd.count() != 0) {
         val partial = rdd.first()
         userSet ++= partial
@@ -84,5 +87,6 @@ object TwitterAlgebirdHLL {
     })
 
     ssc.start()
+    ssc.awaitTermination()
   }
 }
diff --git a/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterPopularTags.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterPopularTags.scala
index 27aa6b14bf221..896d010c68f18 100644
--- a/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterPopularTags.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/TwitterPopularTags.scala
@@ -20,6 +20,7 @@ package org.apache.spark.streaming.examples
 import org.apache.spark.streaming.{Seconds, StreamingContext}
 import StreamingContext._
 import org.apache.spark.SparkContext._
+import org.apache.spark.streaming.twitter._
 
 /**
  * Calculates popular hashtags (topics) over sliding 10 and 60 second windows from a Twitter
@@ -35,11 +36,13 @@ object TwitterPopularTags {
       System.exit(1)
     }
 
+    StreamingExamples.setStreamingLogLevels()
+
     val (master, filters) = (args.head, args.tail)
 
     val ssc = new StreamingContext(master, "TwitterPopularTags", Seconds(2),
-      System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
-    val stream = ssc.twitterStream(None, filters)
+      System.getenv("SPARK_HOME"), StreamingContext.jarOfClass(this.getClass))
+    val stream = TwitterUtils.createStream(ssc, None, filters)
 
     val hashTags = stream.flatMap(status => status.getText.split(" ").filter(_.startsWith("#")))
 
@@ -53,18 +56,19 @@ object TwitterPopularTags {
 
 
     // Print popular hashtags
-    topCounts60.foreach(rdd => {
+    topCounts60.foreachRDD(rdd => {
       val topList = rdd.take(5)
       println("\nPopular topics in last 60 seconds (%s total):".format(rdd.count()))
       topList.foreach{case (count, tag) => println("%s (%s tweets)".format(tag, count))}
     })
 
-    topCounts10.foreach(rdd => {
+    topCounts10.foreachRDD(rdd => {
       val topList = rdd.take(5)
       println("\nPopular topics in last 10 seconds (%s total):".format(rdd.count()))
       topList.foreach{case (count, tag) => println("%s (%s tweets)".format(tag, count))}
     })
 
     ssc.start()
+    ssc.awaitTermination()
   }
 }
diff --git a/examples/src/main/scala/org/apache/spark/streaming/examples/ZeroMQWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/ZeroMQWordCount.scala
index c8743b9e25523..85b4ce5e81950 100644
--- a/examples/src/main/scala/org/apache/spark/streaming/examples/ZeroMQWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/ZeroMQWordCount.scala
@@ -20,9 +20,12 @@ package org.apache.spark.streaming.examples
 import akka.actor.ActorSystem
 import akka.actor.actorRef2Scala
 import akka.zeromq._
-import org.apache.spark.streaming.{ Seconds, StreamingContext }
-import org.apache.spark.streaming.StreamingContext._
 import akka.zeromq.Subscribe
+import akka.util.ByteString
+
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+import org.apache.spark.streaming.StreamingContext._
+import org.apache.spark.streaming.zeromq._
 
 /**
  * A simple publisher for demonstration purposes, repeatedly publishes random Messages
@@ -40,10 +43,11 @@ object SimpleZeroMQPublisher {
     val acs: ActorSystem = ActorSystem()
 
     val pubSocket = ZeroMQExtension(acs).newSocket(SocketType.Pub, Bind(url))
-    val messages: Array[String] = Array("words ", "may ", "count ")
+    implicit def stringToByteString(x: String) = ByteString(x)
+    val messages: List[ByteString] = List("words ", "may ", "count ")
     while (true) {
       Thread.sleep(1000)
-      pubSocket ! ZMQMessage(Frame(topic) :: messages.map(x => Frame(x.getBytes)).toList)
+      pubSocket ! ZMQMessage(ByteString(topic) :: messages)
     }
     acs.awaitTermination()
   }
@@ -60,9 +64,9 @@ object SimpleZeroMQPublisher {
  *   <zeroMQurl> and <topic> describe where zeroMq publisher is running.
  *
  * To run this example locally, you may run publisher as
- *    `$ ./run-example spark.streaming.examples.SimpleZeroMQPublisher tcp://127.0.1.1:1234 foo.bar`
+ *    `$ ./bin/run-example org.apache.spark.streaming.examples.SimpleZeroMQPublisher tcp://127.0.1.1:1234 foo.bar`
  * and run the example as
- *    `$ ./run-example spark.streaming.examples.ZeroMQWordCount local[2] tcp://127.0.1.1:1234 foo`
+ *    `$ ./bin/run-example org.apache.spark.streaming.examples.ZeroMQWordCount local[2] tcp://127.0.1.1:1234 foo`
  */
 object ZeroMQWordCount {
   def main(args: Array[String]) {
@@ -72,20 +76,21 @@ object ZeroMQWordCount {
           "In local mode, <master> should be 'local[n]' with n > 1")
       System.exit(1)
     }
+    StreamingExamples.setStreamingLogLevels()
     val Seq(master, url, topic) = args.toSeq
 
     // Create the context and set the batch size
     val ssc = new StreamingContext(master, "ZeroMQWordCount", Seconds(2),
-      System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+      System.getenv("SPARK_HOME"), StreamingContext.jarOfClass(this.getClass))
 
-    def bytesToStringIterator(x: Seq[Seq[Byte]]) = (x.map(x => new String(x.toArray))).iterator
+    def bytesToStringIterator(x: Seq[ByteString]) = (x.map(_.utf8String)).iterator
 
     //For this stream, a zeroMQ publisher should be running.
-    val lines = ssc.zeroMQStream(url, Subscribe(topic), bytesToStringIterator)
+    val lines = ZeroMQUtils.createStream(ssc, url, Subscribe(topic), bytesToStringIterator _)
     val words = lines.flatMap(_.split(" "))
     val wordCounts = words.map(x => (x, 1)).reduceByKey(_ + _)
     wordCounts.print()
     ssc.start()
+    ssc.awaitTermination()
   }
-
 }
diff --git a/examples/src/main/scala/org/apache/spark/streaming/examples/clickstream/PageViewGenerator.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/clickstream/PageViewGenerator.scala
index de70c50473558..a2600989ca1a6 100644
--- a/examples/src/main/scala/org/apache/spark/streaming/examples/clickstream/PageViewGenerator.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/clickstream/PageViewGenerator.scala
@@ -39,8 +39,8 @@ object PageView extends Serializable {
 /** Generates streaming events to simulate page views on a website.
   *
   * This should be used in tandem with PageViewStream.scala. Example:
-  * $ ./run-example spark.streaming.examples.clickstream.PageViewGenerator 44444 10
-  * $ ./run-example spark.streaming.examples.clickstream.PageViewStream errorRatePerZipCode localhost 44444
+  * $ ./bin/run-example org.apache.spark.streaming.examples.clickstream.PageViewGenerator 44444 10
+  * $ ./bin/run-example org.apache.spark.streaming.examples.clickstream.PageViewStream errorRatePerZipCode localhost 44444
   *
   * When running this, you may want to set the root logging level to ERROR in
   * conf/log4j.properties to reduce the verbosity of the output.
@@ -65,7 +65,7 @@ object PageViewGenerator {
         return item
       }
     }
-    return inputMap.take(1).head._1 // Shouldn't get here if probabilities add up to 1.0
+    inputMap.take(1).head._1 // Shouldn't get here if probabilities add up to 1.0
   }
 
   def getNextClickEvent() : String = {
diff --git a/examples/src/main/scala/org/apache/spark/streaming/examples/clickstream/PageViewStream.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/clickstream/PageViewStream.scala
index 8282cc9269c13..bb44bc3d06ef3 100644
--- a/examples/src/main/scala/org/apache/spark/streaming/examples/clickstream/PageViewStream.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/clickstream/PageViewStream.scala
@@ -17,16 +17,17 @@
 
 package org.apache.spark.streaming.examples.clickstream
 
+import org.apache.spark.SparkContext._
 import org.apache.spark.streaming.{Seconds, StreamingContext}
 import org.apache.spark.streaming.StreamingContext._
-import org.apache.spark.SparkContext._
+import org.apache.spark.streaming.examples.StreamingExamples
 
 /** Analyses a streaming dataset of web page views. This class demonstrates several types of
   * operators available in Spark streaming.
   *
   * This should be used in tandem with PageViewStream.scala. Example:
-  * $ ./run-example spark.streaming.examples.clickstream.PageViewGenerator 44444 10
-  * $ ./run-example spark.streaming.examples.clickstream.PageViewStream errorRatePerZipCode localhost 44444
+  * $ ./bin/run-example org.apache.spark.streaming.examples.clickstream.PageViewGenerator 44444 10
+  * $ ./bin/run-example org.apache.spark.streaming.examples.clickstream.PageViewStream errorRatePerZipCode localhost 44444
   */
 object PageViewStream {
   def main(args: Array[String]) {
@@ -36,13 +37,14 @@ object PageViewStream {
                          " errorRatePerZipCode, activeUserCount, popularUsersSeen")
       System.exit(1)
     }
+    StreamingExamples.setStreamingLogLevels()
     val metric = args(0)
     val host = args(1)
     val port = args(2).toInt
 
     // Create the context
     val ssc = new StreamingContext("local[2]", "PageViewStream", Seconds(1),
-      System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+      System.getenv("SPARK_HOME"), StreamingContext.jarOfClass(this.getClass))
 
     // Create a NetworkInputDStream on target host:port and convert each line to a PageView
     val pageViews = ssc.socketTextStream(host, port)
@@ -89,7 +91,7 @@ object PageViewStream {
       case "popularUsersSeen" =>
         // Look for users in our existing dataset and print it out if we have a match
         pageViews.map(view => (view.userID, 1))
-          .foreach((rdd, time) => rdd.join(userList)
+          .foreachRDD((rdd, time) => rdd.join(userList)
             .map(_._2._2)
             .take(10)
             .foreach(u => println("Saw user %s at time %s".format(u, time))))
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
new file mode 100644
index 0000000000000..34e61a5a165b9
--- /dev/null
+++ b/external/flume/pom.xml
@@ -0,0 +1,93 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent</artifactId>
+    <version>0.9.1-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.spark</groupId>
+  <artifactId>spark-streaming-flume_2.10</artifactId>
+  <packaging>jar</packaging>
+  <name>Spark Project External Flume</name>
+  <url>http://spark.incubator.apache.org/</url>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.flume</groupId>
+      <artifactId>flume-ng-sdk</artifactId>
+      <version>1.2.0</version>
+      <exclusions>
+        <exclusion>
+          <groupId>org.jboss.netty</groupId>
+          <artifactId>netty</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.xerial.snappy</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.scalatest</groupId>
+      <artifactId>scalatest_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.scalacheck</groupId>
+      <artifactId>scalacheck_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.novocode</groupId>
+      <artifactId>junit-interface</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-log4j12</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+  <build>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+    <plugins>
+      <plugin>
+        <groupId>org.scalatest</groupId>
+        <artifactId>scalatest-maven-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
+</project>
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/FlumeInputDStream.scala b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeInputDStream.scala
similarity index 96%
rename from streaming/src/main/scala/org/apache/spark/streaming/dstream/FlumeInputDStream.scala
rename to external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeInputDStream.scala
index a0189eca043c6..ce3ef47cfe4bc 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/FlumeInputDStream.scala
+++ b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeInputDStream.scala
@@ -15,13 +15,14 @@
  * limitations under the License.
  */
 
-package org.apache.spark.streaming.dstream
+package org.apache.spark.streaming.flume
 
 import java.net.InetSocketAddress
 import java.io.{ObjectInput, ObjectOutput, Externalizable}
 import java.nio.ByteBuffer
 
 import scala.collection.JavaConversions._
+import scala.reflect.ClassTag
 
 import org.apache.flume.source.avro.AvroSourceProtocol
 import org.apache.flume.source.avro.AvroFlumeEvent
@@ -29,12 +30,13 @@ import org.apache.flume.source.avro.Status
 import org.apache.avro.ipc.specific.SpecificResponder
 import org.apache.avro.ipc.NettyServer
 
-import org.apache.spark.streaming.StreamingContext
 import org.apache.spark.util.Utils
 import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.StreamingContext
+import org.apache.spark.streaming.dstream._
 
 private[streaming]
-class FlumeInputDStream[T: ClassManifest](
+class FlumeInputDStream[T: ClassTag](
   @transient ssc_ : StreamingContext,
   host: String,
   port: Int,
diff --git a/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeUtils.scala b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeUtils.scala
new file mode 100644
index 0000000000000..654ba451e72fb
--- /dev/null
+++ b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeUtils.scala
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.flume
+
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.StreamingContext
+import org.apache.spark.streaming.api.java.{JavaStreamingContext, JavaDStream}
+import org.apache.spark.streaming.dstream.DStream
+
+object FlumeUtils {
+  /**
+   * Create a input stream from a Flume source.
+   * @param ssc      StreamingContext object
+   * @param hostname Hostname of the slave machine to which the flume data will be sent
+   * @param port     Port of the slave machine to which the flume data will be sent
+   * @param storageLevel  Storage level to use for storing the received objects
+   */
+  def createStream (
+      ssc: StreamingContext,
+      hostname: String,
+      port: Int,
+      storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_SER_2
+    ): DStream[SparkFlumeEvent] = {
+    val inputStream = new FlumeInputDStream[SparkFlumeEvent](ssc, hostname, port, storageLevel)
+    inputStream
+  }
+
+  /**
+   * Creates a input stream from a Flume source.
+   * Storage level of the data will be the default StorageLevel.MEMORY_AND_DISK_SER_2.
+   * @param hostname Hostname of the slave machine to which the flume data will be sent
+   * @param port     Port of the slave machine to which the flume data will be sent
+   */
+  def createStream(
+      jssc: JavaStreamingContext,
+      hostname: String,
+      port: Int
+    ): JavaDStream[SparkFlumeEvent] = {
+    createStream(jssc.ssc, hostname, port)
+  }
+
+  /**
+   * Creates a input stream from a Flume source.
+   * @param hostname Hostname of the slave machine to which the flume data will be sent
+   * @param port     Port of the slave machine to which the flume data will be sent
+   * @param storageLevel  Storage level to use for storing the received objects
+   */
+  def createStream(
+      jssc: JavaStreamingContext,
+      hostname: String,
+      port: Int,
+      storageLevel: StorageLevel
+    ): JavaDStream[SparkFlumeEvent] = {
+    createStream(jssc.ssc, hostname, port, storageLevel)
+  }
+}
diff --git a/external/flume/src/test/java/org/apache/spark/streaming/flume/JavaFlumeStreamSuite.java b/external/flume/src/test/java/org/apache/spark/streaming/flume/JavaFlumeStreamSuite.java
new file mode 100644
index 0000000000000..733389b98d22d
--- /dev/null
+++ b/external/flume/src/test/java/org/apache/spark/streaming/flume/JavaFlumeStreamSuite.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.flume;
+
+import org.apache.spark.storage.StorageLevel;
+import org.apache.spark.streaming.LocalJavaStreamingContext;
+import org.apache.spark.streaming.api.java.JavaDStream;
+
+import org.junit.Test;
+
+public class JavaFlumeStreamSuite extends LocalJavaStreamingContext {
+  @Test
+  public void testFlumeStream() {
+    // tests the API, does not actually test data receiving
+    JavaDStream<SparkFlumeEvent> test1 = FlumeUtils.createStream(ssc, "localhost", 12345);
+    JavaDStream<SparkFlumeEvent> test2 = FlumeUtils.createStream(ssc, "localhost", 12345,
+      StorageLevel.MEMORY_AND_DISK_SER_2());
+  }
+}
diff --git a/external/flume/src/test/resources/log4j.properties b/external/flume/src/test/resources/log4j.properties
new file mode 100644
index 0000000000000..d1bd73a8430e1
--- /dev/null
+++ b/external/flume/src/test/resources/log4j.properties
@@ -0,0 +1,29 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Set everything to be logged to the file streaming/target/unit-tests.log
+log4j.rootCategory=INFO, file
+# log4j.appender.file=org.apache.log4j.FileAppender
+log4j.appender.file=org.apache.log4j.FileAppender
+log4j.appender.file.append=false
+log4j.appender.file.file=external/flume/target/unit-tests.log
+log4j.appender.file.layout=org.apache.log4j.PatternLayout
+log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n
+
+# Ignore messages below warning level from Jetty, because it's a bit verbose
+log4j.logger.org.eclipse.jetty=WARN
+
diff --git a/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumeStreamSuite.scala b/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumeStreamSuite.scala
new file mode 100644
index 0000000000000..8bc43972ab6a0
--- /dev/null
+++ b/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumeStreamSuite.scala
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.flume
+
+import scala.collection.JavaConversions._
+import scala.collection.mutable.{ArrayBuffer, SynchronizedBuffer}
+
+import java.net.InetSocketAddress
+import java.nio.ByteBuffer
+import java.nio.charset.Charset
+
+import org.apache.avro.ipc.NettyTransceiver
+import org.apache.avro.ipc.specific.SpecificRequestor
+import org.apache.flume.source.avro.{AvroFlumeEvent, AvroSourceProtocol}
+
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.{TestOutputStream, StreamingContext, TestSuiteBase}
+import org.apache.spark.streaming.util.ManualClock
+
+class FlumeStreamSuite extends TestSuiteBase {
+
+  val testPort = 9999
+
+  test("flume input stream") {
+    // Set up the streaming context and input streams
+    val ssc = new StreamingContext(conf, batchDuration)
+    val flumeStream = FlumeUtils.createStream(ssc, "localhost", testPort, StorageLevel.MEMORY_AND_DISK)
+    val outputBuffer = new ArrayBuffer[Seq[SparkFlumeEvent]]
+      with SynchronizedBuffer[Seq[SparkFlumeEvent]]
+    val outputStream = new TestOutputStream(flumeStream, outputBuffer)
+    outputStream.register()
+    ssc.start()
+
+    val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
+    val input = Seq(1, 2, 3, 4, 5)
+    Thread.sleep(1000)
+    val transceiver = new NettyTransceiver(new InetSocketAddress("localhost", testPort))
+    val client = SpecificRequestor.getClient(
+      classOf[AvroSourceProtocol], transceiver)
+
+    for (i <- 0 until input.size) {
+      val event = new AvroFlumeEvent
+      event.setBody(ByteBuffer.wrap(input(i).toString.getBytes()))
+      event.setHeaders(Map[CharSequence, CharSequence]("test" -> "header"))
+      client.append(event)
+      Thread.sleep(500)
+      clock.addToTime(batchDuration.milliseconds)
+    }
+
+    val startTime = System.currentTimeMillis()
+    while (outputBuffer.size < input.size && System.currentTimeMillis() - startTime < maxWaitTimeMillis) {
+      logInfo("output.size = " + outputBuffer.size + ", input.size = " + input.size)
+      Thread.sleep(100)
+    }
+    Thread.sleep(1000)
+    val timeTaken = System.currentTimeMillis() - startTime
+    assert(timeTaken < maxWaitTimeMillis, "Operation timed out after " + timeTaken + " ms")
+    logInfo("Stopping context")
+    ssc.stop()
+
+    val decoder = Charset.forName("UTF-8").newDecoder()
+
+    assert(outputBuffer.size === input.length)
+    for (i <- 0 until outputBuffer.size) {
+      assert(outputBuffer(i).size === 1)
+      val str = decoder.decode(outputBuffer(i).head.event.getBody)
+      assert(str.toString === input(i).toString)
+      assert(outputBuffer(i).head.event.getHeaders.get("test") === "header")
+    }
+  }
+}
diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
new file mode 100644
index 0000000000000..89847f80261ec
--- /dev/null
+++ b/external/kafka/pom.xml
@@ -0,0 +1,97 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent</artifactId>
+    <version>0.9.1-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.spark</groupId>
+  <artifactId>spark-streaming-kafka_2.10</artifactId>
+  <packaging>jar</packaging>
+  <name>Spark Project External Kafka</name>
+  <url>http://spark.incubator.apache.org/</url>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.kafka</groupId>
+      <artifactId>kafka_${scala.binary.version}</artifactId>
+      <version>0.8.0</version>
+      <exclusions>
+        <exclusion>
+          <groupId>com.sun.jmx</groupId>
+          <artifactId>jmxri</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.sun.jdmk</groupId>
+          <artifactId>jmxtools</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>net.sf.jopt-simple</groupId>
+          <artifactId>jopt-simple</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.scalatest</groupId>
+      <artifactId>scalatest_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.scalacheck</groupId>
+      <artifactId>scalacheck_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.novocode</groupId>
+      <artifactId>junit-interface</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-log4j12</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+  <build>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+    <plugins>
+      <plugin>
+        <groupId>org.scalatest</groupId>
+        <artifactId>scalatest-maven-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
+</project>
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/KafkaInputDStream.scala b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaInputDStream.scala
similarity index 70%
rename from streaming/src/main/scala/org/apache/spark/streaming/dstream/KafkaInputDStream.scala
rename to external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaInputDStream.scala
index 51e913675d24a..a2cd49c573fb5 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/KafkaInputDStream.scala
+++ b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaInputDStream.scala
@@ -15,56 +15,60 @@
  * limitations under the License.
  */
 
-package org.apache.spark.streaming.dstream
+package org.apache.spark.streaming.kafka
 
-import org.apache.spark.Logging
-import org.apache.spark.storage.StorageLevel
-import org.apache.spark.streaming.{Time, DStreamCheckpointData, StreamingContext}
+import scala.collection.Map
+import scala.reflect.ClassTag
 
 import java.util.Properties
 import java.util.concurrent.Executors
 
 import kafka.consumer._
-import kafka.message.{Message, MessageSet, MessageAndMetadata}
 import kafka.serializer.Decoder
-import kafka.utils.{Utils, ZKGroupTopicDirs}
-import kafka.utils.ZkUtils._
+import kafka.utils.VerifiableProperties
 import kafka.utils.ZKStringSerializer
 import org.I0Itec.zkclient._
 
-import scala.collection.Map
-import scala.collection.mutable.HashMap
-import scala.collection.JavaConversions._
-
+import org.apache.spark.Logging
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.StreamingContext
+import org.apache.spark.streaming.dstream._
 
 /**
  * Input stream that pulls messages from a Kafka Broker.
- * 
+ *
  * @param kafkaParams Map of kafka configuration paramaters. See: http://kafka.apache.org/configuration.html
  * @param topics Map of (topic_name -> numPartitions) to consume. Each partition is consumed
  * in its own thread.
  * @param storageLevel RDD storage level.
  */
 private[streaming]
-class KafkaInputDStream[T: ClassManifest, D <: Decoder[_]: Manifest](
+class KafkaInputDStream[
+  K: ClassTag,
+  V: ClassTag,
+  U <: Decoder[_]: Manifest,
+  T <: Decoder[_]: Manifest](
     @transient ssc_ : StreamingContext,
     kafkaParams: Map[String, String],
     topics: Map[String, Int],
     storageLevel: StorageLevel
-  ) extends NetworkInputDStream[T](ssc_ ) with Logging {
-
+  ) extends NetworkInputDStream[(K, V)](ssc_) with Logging {
 
-  def getReceiver(): NetworkReceiver[T] = {
-    new KafkaReceiver[T, D](kafkaParams, topics, storageLevel)
-        .asInstanceOf[NetworkReceiver[T]]
+  def getReceiver(): NetworkReceiver[(K, V)] = {
+    new KafkaReceiver[K, V, U, T](kafkaParams, topics, storageLevel)
+        .asInstanceOf[NetworkReceiver[(K, V)]]
   }
 }
 
 private[streaming]
-class KafkaReceiver[T: ClassManifest, D <: Decoder[_]: Manifest](
-  kafkaParams: Map[String, String],
-  topics: Map[String, Int],
-  storageLevel: StorageLevel
+class KafkaReceiver[
+  K: ClassTag,
+  V: ClassTag,
+  U <: Decoder[_]: Manifest,
+  T <: Decoder[_]: Manifest](
+    kafkaParams: Map[String, String],
+    topics: Map[String, Int],
+    storageLevel: StorageLevel
   ) extends NetworkReceiver[Any] {
 
   // Handles pushing data into the BlockManager
@@ -83,27 +87,35 @@ class KafkaReceiver[T: ClassManifest, D <: Decoder[_]: Manifest](
     // In case we are using multiple Threads to handle Kafka Messages
     val executorPool = Executors.newFixedThreadPool(topics.values.reduce(_ + _))
 
-    logInfo("Starting Kafka Consumer Stream with group: " + kafkaParams("groupid"))
+    logInfo("Starting Kafka Consumer Stream with group: " + kafkaParams("group.id"))
 
     // Kafka connection properties
     val props = new Properties()
     kafkaParams.foreach(param => props.put(param._1, param._2))
 
     // Create the connection to the cluster
-    logInfo("Connecting to Zookeper: " + kafkaParams("zk.connect"))
+    logInfo("Connecting to Zookeper: " + kafkaParams("zookeeper.connect"))
     val consumerConfig = new ConsumerConfig(props)
     consumerConnector = Consumer.create(consumerConfig)
-    logInfo("Connected to " + kafkaParams("zk.connect"))
+    logInfo("Connected to " + kafkaParams("zookeeper.connect"))
 
     // When autooffset.reset is defined, it is our responsibility to try and whack the
     // consumer group zk node.
-    if (kafkaParams.contains("autooffset.reset")) {
-      tryZookeeperConsumerGroupCleanup(kafkaParams("zk.connect"), kafkaParams("groupid"))
+    if (kafkaParams.contains("auto.offset.reset")) {
+      tryZookeeperConsumerGroupCleanup(kafkaParams("zookeeper.connect"), kafkaParams("group.id"))
     }
 
+    val keyDecoder = manifest[U].runtimeClass.getConstructor(classOf[VerifiableProperties])
+      .newInstance(consumerConfig.props)
+      .asInstanceOf[Decoder[K]]
+    val valueDecoder = manifest[T].runtimeClass.getConstructor(classOf[VerifiableProperties])
+      .newInstance(consumerConfig.props)
+      .asInstanceOf[Decoder[V]]
+
     // Create Threads for each Topic/Message Stream we are listening
-    val decoder = manifest[D].erasure.newInstance.asInstanceOf[Decoder[T]]
-    val topicMessageStreams = consumerConnector.createMessageStreams(topics, decoder)
+    val topicMessageStreams = consumerConnector.createMessageStreams(
+      topics, keyDecoder, valueDecoder)
+
 
     // Start the messages handler for each partition
     topicMessageStreams.values.foreach { streams =>
@@ -112,11 +124,12 @@ class KafkaReceiver[T: ClassManifest, D <: Decoder[_]: Manifest](
   }
 
   // Handles Kafka Messages
-  private class MessageHandler[T: ClassManifest](stream: KafkaStream[T]) extends Runnable {
+  private class MessageHandler[K: ClassTag, V: ClassTag](stream: KafkaStream[K, V])
+    extends Runnable {
     def run() {
       logInfo("Starting MessageHandler.")
       for (msgAndMetadata <- stream) {
-        blockGenerator += msgAndMetadata.message
+        blockGenerator += (msgAndMetadata.key, msgAndMetadata.message)
       }
     }
   }
@@ -135,7 +148,7 @@ class KafkaReceiver[T: ClassManifest, D <: Decoder[_]: Manifest](
       zk.deleteRecursive(dir)
       zk.close()
     } catch {
-      case _ => // swallow
+      case _ : Throwable => // swallow
     }
   }
 }
diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala
new file mode 100644
index 0000000000000..15a2daa008ad4
--- /dev/null
+++ b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala
@@ -0,0 +1,153 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.kafka
+
+import scala.reflect.ClassTag
+import scala.collection.JavaConversions._
+
+import java.lang.{Integer => JInt}
+import java.util.{Map => JMap}
+
+import kafka.serializer.{Decoder, StringDecoder}
+
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.StreamingContext
+import org.apache.spark.streaming.api.java.{JavaStreamingContext, JavaPairDStream}
+import org.apache.spark.streaming.dstream.DStream
+
+
+object KafkaUtils {
+  /**
+   * Create an input stream that pulls messages from a Kafka Broker.
+   * @param ssc       StreamingContext object
+   * @param zkQuorum  Zookeeper quorum (hostname:port,hostname:port,..)
+   * @param groupId   The group id for this consumer
+   * @param topics    Map of (topic_name -> numPartitions) to consume. Each partition is consumed
+   *                  in its own thread
+   * @param storageLevel  Storage level to use for storing the received objects
+   *                      (default: StorageLevel.MEMORY_AND_DISK_SER_2)
+   */
+  def createStream(
+      ssc: StreamingContext,
+      zkQuorum: String,
+      groupId: String,
+      topics: Map[String, Int],
+      storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_SER_2
+    ): DStream[(String, String)] = {
+    val kafkaParams = Map[String, String](
+      "zookeeper.connect" -> zkQuorum, "group.id" -> groupId,
+      "zookeeper.connection.timeout.ms" -> "10000")
+    createStream[String, String, StringDecoder, StringDecoder](
+      ssc, kafkaParams, topics, storageLevel)
+  }
+
+  /**
+   * Create an input stream that pulls messages from a Kafka Broker.
+   * @param ssc         StreamingContext object
+   * @param kafkaParams Map of kafka configuration parameters,
+   *                    see http://kafka.apache.org/08/configuration.html
+   * @param topics      Map of (topic_name -> numPartitions) to consume. Each partition is consumed
+   *                    in its own thread.
+   * @param storageLevel Storage level to use for storing the received objects
+   */
+  def createStream[K: ClassTag, V: ClassTag, U <: Decoder[_]: Manifest, T <: Decoder[_]: Manifest](
+      ssc: StreamingContext,
+      kafkaParams: Map[String, String],
+      topics: Map[String, Int],
+      storageLevel: StorageLevel
+    ): DStream[(K, V)] = {
+    new KafkaInputDStream[K, V, U, T](ssc, kafkaParams, topics, storageLevel)
+  }
+
+  /**
+   * Create an input stream that pulls messages form a Kafka Broker.
+   * Storage level of the data will be the default StorageLevel.MEMORY_AND_DISK_SER_2.
+   * @param jssc      JavaStreamingContext object
+   * @param zkQuorum  Zookeeper quorum (hostname:port,hostname:port,..)
+   * @param groupId   The group id for this consumer
+   * @param topics    Map of (topic_name -> numPartitions) to consume. Each partition is consumed
+   *                  in its own thread
+   */
+  def createStream(
+      jssc: JavaStreamingContext,
+      zkQuorum: String,
+      groupId: String,
+      topics: JMap[String, JInt]
+    ): JavaPairDStream[String, String] = {
+    implicit val cmt: ClassTag[String] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[String]]
+    createStream(jssc.ssc, zkQuorum, groupId, Map(topics.mapValues(_.intValue()).toSeq: _*))
+  }
+
+  /**
+   * Create an input stream that pulls messages form a Kafka Broker.
+   * @param jssc      JavaStreamingContext object
+   * @param zkQuorum  Zookeeper quorum (hostname:port,hostname:port,..).
+   * @param groupId   The group id for this consumer.
+   * @param topics    Map of (topic_name -> numPartitions) to consume. Each partition is consumed
+   *                  in its own thread.
+   * @param storageLevel RDD storage level.
+   *
+   */
+  def createStream(
+      jssc: JavaStreamingContext,
+      zkQuorum: String,
+      groupId: String,
+      topics: JMap[String, JInt],
+      storageLevel: StorageLevel
+    ): JavaPairDStream[String, String] = {
+    implicit val cmt: ClassTag[String] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[String]]
+    createStream(jssc.ssc, zkQuorum, groupId, Map(topics.mapValues(_.intValue()).toSeq: _*), storageLevel)
+  }
+
+  /**
+   * Create an input stream that pulls messages form a Kafka Broker.
+   * @param jssc      JavaStreamingContext object
+   * @param keyTypeClass Key type of RDD
+   * @param valueTypeClass value type of RDD
+   * @param keyDecoderClass Type of kafka key decoder
+   * @param valueDecoderClass Type of kafka value decoder
+   * @param kafkaParams Map of kafka configuration parameters,
+   *                    see http://kafka.apache.org/08/configuration.html
+   * @param topics  Map of (topic_name -> numPartitions) to consume. Each partition is consumed
+   *                in its own thread
+   * @param storageLevel RDD storage level.
+   */
+  def createStream[K, V, U <: Decoder[_], T <: Decoder[_]](
+      jssc: JavaStreamingContext,
+      keyTypeClass: Class[K],
+      valueTypeClass: Class[V],
+      keyDecoderClass: Class[U],
+      valueDecoderClass: Class[T],
+      kafkaParams: JMap[String, String],
+      topics: JMap[String, JInt],
+      storageLevel: StorageLevel
+    ): JavaPairDStream[K, V] = {
+    implicit val keyCmt: ClassTag[K] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[K]]
+    implicit val valueCmt: ClassTag[V] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[V]]
+
+    implicit val keyCmd: Manifest[U] = implicitly[Manifest[AnyRef]].asInstanceOf[Manifest[U]]
+    implicit val valueCmd: Manifest[T] = implicitly[Manifest[AnyRef]].asInstanceOf[Manifest[T]]
+
+    createStream[K, V, U, T](
+      jssc.ssc, kafkaParams.toMap, Map(topics.mapValues(_.intValue()).toSeq: _*), storageLevel)
+  }
+}
diff --git a/external/kafka/src/test/java/org/apache/spark/streaming/kafka/JavaKafkaStreamSuite.java b/external/kafka/src/test/java/org/apache/spark/streaming/kafka/JavaKafkaStreamSuite.java
new file mode 100644
index 0000000000000..7b4999447ee69
--- /dev/null
+++ b/external/kafka/src/test/java/org/apache/spark/streaming/kafka/JavaKafkaStreamSuite.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.kafka;
+
+import java.util.HashMap;
+import org.junit.Test;
+import com.google.common.collect.Maps;
+import kafka.serializer.StringDecoder;
+import org.apache.spark.storage.StorageLevel;
+import org.apache.spark.streaming.LocalJavaStreamingContext;
+import org.apache.spark.streaming.api.java.JavaPairDStream;
+
+public class JavaKafkaStreamSuite extends LocalJavaStreamingContext {
+  @Test
+  public void testKafkaStream() {
+    HashMap<String, Integer> topics = Maps.newHashMap();
+
+    // tests the API, does not actually test data receiving
+    JavaPairDStream<String, String> test1 = KafkaUtils.createStream(ssc, "localhost:12345", "group", topics);
+    JavaPairDStream<String, String> test2 = KafkaUtils.createStream(ssc, "localhost:12345", "group", topics,
+      StorageLevel.MEMORY_AND_DISK_SER_2());
+
+    HashMap<String, String> kafkaParams = Maps.newHashMap();
+    kafkaParams.put("zookeeper.connect", "localhost:12345");
+    kafkaParams.put("group.id","consumer-group");
+    JavaPairDStream<String, String> test3 = KafkaUtils.createStream(ssc,
+      String.class, String.class, StringDecoder.class, StringDecoder.class,
+      kafkaParams, topics, StorageLevel.MEMORY_AND_DISK_SER_2());
+  }
+}
diff --git a/external/kafka/src/test/resources/log4j.properties b/external/kafka/src/test/resources/log4j.properties
new file mode 100644
index 0000000000000..38910d113050a
--- /dev/null
+++ b/external/kafka/src/test/resources/log4j.properties
@@ -0,0 +1,29 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Set everything to be logged to the file streaming/target/unit-tests.log
+log4j.rootCategory=INFO, file
+# log4j.appender.file=org.apache.log4j.FileAppender
+log4j.appender.file=org.apache.log4j.FileAppender
+log4j.appender.file.append=false
+log4j.appender.file.file=external/kafka/target/unit-tests.log
+log4j.appender.file.layout=org.apache.log4j.PatternLayout
+log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n
+
+# Ignore messages below warning level from Jetty, because it's a bit verbose
+log4j.logger.org.eclipse.jetty=WARN
+
diff --git a/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaStreamSuite.scala b/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaStreamSuite.scala
new file mode 100644
index 0000000000000..d9809f6409d44
--- /dev/null
+++ b/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaStreamSuite.scala
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.kafka
+
+import kafka.serializer.StringDecoder
+import org.apache.spark.streaming.{StreamingContext, TestSuiteBase}
+import org.apache.spark.storage.StorageLevel
+
+class KafkaStreamSuite extends TestSuiteBase {
+
+  test("kafka input stream") {
+    val ssc = new StreamingContext(master, framework, batchDuration)
+    val topics = Map("my-topic" -> 1)
+
+    // tests the API, does not actually test data receiving
+    val test1 = KafkaUtils.createStream(ssc, "localhost:1234", "group", topics)
+    val test2 = KafkaUtils.createStream(ssc, "localhost:12345", "group", topics, StorageLevel.MEMORY_AND_DISK_SER_2)
+    val kafkaParams = Map("zookeeper.connect"->"localhost:12345","group.id"->"consumer-group")
+    val test3 = KafkaUtils.createStream[String, String, StringDecoder, StringDecoder](
+      ssc, kafkaParams, topics, StorageLevel.MEMORY_AND_DISK_SER_2)
+
+    // TODO: Actually test receiving data
+    ssc.stop()
+  }
+}
diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
new file mode 100644
index 0000000000000..28c6ae656f218
--- /dev/null
+++ b/external/mqtt/pom.xml
@@ -0,0 +1,108 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent</artifactId>
+    <version>0.9.1-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.spark</groupId>
+  <artifactId>spark-streaming-mqtt_2.10</artifactId>
+  <packaging>jar</packaging>
+  <name>Spark Project External MQTT</name>
+  <url>http://spark.incubator.apache.org/</url>
+
+  <repositories>
+    <repository>
+      <id>mqtt-repo</id>
+      <name>MQTT Repository</name>
+      <url>https://repo.eclipse.org/content/repositories/paho-releases</url>
+      <releases>
+        <enabled>true</enabled>
+      </releases>
+      <snapshots>
+        <enabled>false</enabled>
+      </snapshots>
+    </repository>
+  </repositories>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.paho</groupId>
+      <artifactId>mqtt-client</artifactId>
+       <version>0.4.0</version>
+    </dependency>
+    <dependency>
+      <groupId>${akka.group}</groupId>
+      <artifactId>akka-zeromq_${scala.binary.version}</artifactId>
+      <version>${akka.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>org.jboss.netty</groupId>
+          <artifactId>netty</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.scalatest</groupId>
+      <artifactId>scalatest_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.scalacheck</groupId>
+      <artifactId>scalacheck_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.novocode</groupId>
+      <artifactId>junit-interface</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-log4j12</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+  <build>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+    <plugins>
+      <plugin>
+        <groupId>org.scalatest</groupId>
+        <artifactId>scalatest-maven-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
+</project>
diff --git a/external/mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala b/external/mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala
new file mode 100644
index 0000000000000..41e813d48c7b8
--- /dev/null
+++ b/external/mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.mqtt
+
+import scala.collection.Map
+import scala.collection.mutable.HashMap
+import scala.collection.JavaConversions._
+import scala.reflect.ClassTag
+
+import java.util.Properties
+import java.util.concurrent.Executors
+import java.io.IOException
+
+import org.eclipse.paho.client.mqttv3.MqttCallback
+import org.eclipse.paho.client.mqttv3.MqttClient
+import org.eclipse.paho.client.mqttv3.MqttClientPersistence
+import org.eclipse.paho.client.mqttv3.persist.MemoryPersistence
+import org.eclipse.paho.client.mqttv3.IMqttDeliveryToken
+import org.eclipse.paho.client.mqttv3.MqttException
+import org.eclipse.paho.client.mqttv3.MqttMessage
+import org.eclipse.paho.client.mqttv3.MqttTopic
+
+import org.apache.spark.Logging
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.StreamingContext
+import org.apache.spark.streaming.dstream._
+
+/**
+ * Input stream that subscribe messages from a Mqtt Broker.
+ * Uses eclipse paho as MqttClient http://www.eclipse.org/paho/
+ * @param brokerUrl Url of remote mqtt publisher
+ * @param topic topic name to subscribe to
+ * @param storageLevel RDD storage level.
+ */
+
+private[streaming] 
+class MQTTInputDStream[T: ClassTag](
+    @transient ssc_ : StreamingContext,
+    brokerUrl: String,
+    topic: String,
+    storageLevel: StorageLevel
+  ) extends NetworkInputDStream[T](ssc_) with Logging {
+  
+  def getReceiver(): NetworkReceiver[T] = {
+    new MQTTReceiver(brokerUrl, topic, storageLevel).asInstanceOf[NetworkReceiver[T]]
+  }
+}
+
+private[streaming] 
+class MQTTReceiver(brokerUrl: String,
+  topic: String,
+  storageLevel: StorageLevel
+  ) extends NetworkReceiver[Any] {
+  lazy protected val blockGenerator = new BlockGenerator(storageLevel)
+  
+  def onStop() {
+    blockGenerator.stop()
+  }
+  
+  def onStart() {
+
+    blockGenerator.start()
+
+    // Set up persistence for messages 
+    var peristance: MqttClientPersistence = new MemoryPersistence()
+
+    // Initializing Mqtt Client specifying brokerUrl, clientID and MqttClientPersistance
+    var client: MqttClient = new MqttClient(brokerUrl, MqttClient.generateClientId(), peristance)
+
+    // Connect to MqttBroker    
+    client.connect()
+
+    // Subscribe to Mqtt topic
+    client.subscribe(topic)
+
+    // Callback automatically triggers as and when new message arrives on specified topic
+    var callback: MqttCallback = new MqttCallback() {
+
+      // Handles Mqtt message 
+      override def messageArrived(arg0: String, arg1: MqttMessage) {
+        blockGenerator += new String(arg1.getPayload())
+      }
+
+      override def deliveryComplete(arg0: IMqttDeliveryToken) {
+      }
+
+      override def connectionLost(arg0: Throwable) {
+        logInfo("Connection lost " + arg0)
+      }
+    }
+
+    // Set up callback for MqttClient
+    client.setCallback(callback)
+  }
+}
diff --git a/external/mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTUtils.scala b/external/mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTUtils.scala
new file mode 100644
index 0000000000000..1b09ee5dc8f65
--- /dev/null
+++ b/external/mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTUtils.scala
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.mqtt
+
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.StreamingContext
+import org.apache.spark.streaming.api.java.{JavaStreamingContext, JavaDStream}
+import scala.reflect.ClassTag
+import org.apache.spark.streaming.dstream.DStream
+
+object MQTTUtils {
+  /**
+   * Create an input stream that receives messages pushed by a MQTT publisher.
+   * @param ssc           StreamingContext object
+   * @param brokerUrl     Url of remote MQTT publisher
+   * @param topic         Topic name to subscribe to
+   * @param storageLevel  RDD storage level. Defaults to StorageLevel.MEMORY_AND_DISK_SER_2.
+   */
+  def createStream(
+      ssc: StreamingContext,
+      brokerUrl: String,
+      topic: String,
+      storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_SER_2
+    ): DStream[String] = {
+    new MQTTInputDStream[String](ssc, brokerUrl, topic, storageLevel)
+  }
+
+  /**
+   * Create an input stream that receives messages pushed by a MQTT publisher.
+   * Storage level of the data will be the default StorageLevel.MEMORY_AND_DISK_SER_2.
+   * @param jssc      JavaStreamingContext object
+   * @param brokerUrl Url of remote MQTT publisher
+   * @param topic     Topic name to subscribe to
+   */
+  def createStream(
+      jssc: JavaStreamingContext,
+      brokerUrl: String,
+      topic: String
+    ): JavaDStream[String] = {
+    implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[String]]
+    createStream(jssc.ssc, brokerUrl, topic)
+  }
+
+  /**
+   * Create an input stream that receives messages pushed by a MQTT publisher.
+   * @param jssc      JavaStreamingContext object
+   * @param brokerUrl     Url of remote MQTT publisher
+   * @param topic         Topic name to subscribe to
+   * @param storageLevel  RDD storage level.
+   */
+  def createStream(
+      jssc: JavaStreamingContext,
+      brokerUrl: String,
+      topic: String,
+      storageLevel: StorageLevel
+    ): JavaDStream[String] = {
+    implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[String]]
+    createStream(jssc.ssc, brokerUrl, topic, storageLevel)
+  }
+}
diff --git a/external/mqtt/src/test/java/org/apache/spark/streaming/mqtt/JavaMQTTStreamSuite.java b/external/mqtt/src/test/java/org/apache/spark/streaming/mqtt/JavaMQTTStreamSuite.java
new file mode 100644
index 0000000000000..44743aaecf986
--- /dev/null
+++ b/external/mqtt/src/test/java/org/apache/spark/streaming/mqtt/JavaMQTTStreamSuite.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.mqtt;
+
+import org.apache.spark.storage.StorageLevel;
+import org.apache.spark.streaming.api.java.JavaDStream;
+import org.junit.Test;
+
+import org.apache.spark.streaming.LocalJavaStreamingContext;
+
+public class JavaMQTTStreamSuite extends LocalJavaStreamingContext {
+  @Test
+  public void testMQTTStream() {
+    String brokerUrl = "abc";
+    String topic = "def";
+
+    // tests the API, does not actually test data receiving
+    JavaDStream<String> test1 = MQTTUtils.createStream(ssc, brokerUrl, topic);
+    JavaDStream<String> test2 = MQTTUtils.createStream(ssc, brokerUrl, topic,
+      StorageLevel.MEMORY_AND_DISK_SER_2());
+  }
+}
diff --git a/external/mqtt/src/test/resources/log4j.properties b/external/mqtt/src/test/resources/log4j.properties
new file mode 100644
index 0000000000000..d0462c7336df5
--- /dev/null
+++ b/external/mqtt/src/test/resources/log4j.properties
@@ -0,0 +1,29 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Set everything to be logged to the file streaming/target/unit-tests.log
+log4j.rootCategory=INFO, file
+# log4j.appender.file=org.apache.log4j.FileAppender
+log4j.appender.file=org.apache.log4j.FileAppender
+log4j.appender.file.append=false
+log4j.appender.file.file=external/mqtt/target/unit-tests.log
+log4j.appender.file.layout=org.apache.log4j.PatternLayout
+log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n
+
+# Ignore messages below warning level from Jetty, because it's a bit verbose
+log4j.logger.org.eclipse.jetty=WARN
+
diff --git a/external/mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTStreamSuite.scala b/external/mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTStreamSuite.scala
new file mode 100644
index 0000000000000..89c40ad4619c9
--- /dev/null
+++ b/external/mqtt/src/test/scala/org/apache/spark/streaming/mqtt/MQTTStreamSuite.scala
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.mqtt
+
+import org.apache.spark.streaming.{StreamingContext, TestSuiteBase}
+import org.apache.spark.storage.StorageLevel
+
+class MQTTStreamSuite extends TestSuiteBase {
+
+  test("mqtt input stream") {
+    val ssc = new StreamingContext(master, framework, batchDuration)
+    val brokerUrl = "abc"
+    val topic = "def"
+
+    // tests the API, does not actually test data receiving
+    val test1 = MQTTUtils.createStream(ssc, brokerUrl, topic)
+    val test2 = MQTTUtils.createStream(ssc, brokerUrl, topic, StorageLevel.MEMORY_AND_DISK_SER_2)
+
+    // TODO: Actually test receiving data
+    ssc.stop()
+  }
+}
diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
new file mode 100644
index 0000000000000..a472cd795b2ef
--- /dev/null
+++ b/external/twitter/pom.xml
@@ -0,0 +1,89 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent</artifactId>
+    <version>0.9.1-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.spark</groupId>
+  <artifactId>spark-streaming-twitter_2.10</artifactId>
+  <packaging>jar</packaging>
+  <name>Spark Project External Twitter</name>
+  <url>http://spark.incubator.apache.org/</url>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.twitter4j</groupId>
+      <artifactId>twitter4j-stream</artifactId>
+      <version>3.0.3</version>
+      <exclusions>
+        <exclusion>
+          <groupId>org.jboss.netty</groupId>
+          <artifactId>netty</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.scalatest</groupId>
+      <artifactId>scalatest_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.scalacheck</groupId>
+      <artifactId>scalacheck_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.novocode</groupId>
+      <artifactId>junit-interface</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-log4j12</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+  <build>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+    <plugins>
+      <plugin>
+        <groupId>org.scalatest</groupId>
+        <artifactId>scalatest-maven-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
+</project>
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/TwitterInputDStream.scala b/external/twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala
similarity index 96%
rename from streaming/src/main/scala/org/apache/spark/streaming/dstream/TwitterInputDStream.scala
rename to external/twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala
index 387e15b0e6d80..5cc721d7f90ea 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/TwitterInputDStream.scala
+++ b/external/twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala
@@ -15,18 +15,19 @@
  * limitations under the License.
  */
 
-package org.apache.spark.streaming.dstream
+package org.apache.spark.streaming.twitter
 
-import org.apache.spark._
-import org.apache.spark.streaming._
-import storage.StorageLevel
+import java.util.prefs.Preferences
 import twitter4j._
 import twitter4j.auth.Authorization
-import java.util.prefs.Preferences
 import twitter4j.conf.ConfigurationBuilder
 import twitter4j.conf.PropertyConfiguration
 import twitter4j.auth.OAuthAuthorization
 import twitter4j.auth.AccessToken
+import org.apache.spark._
+import org.apache.spark.streaming._
+import org.apache.spark.streaming.dstream._
+import org.apache.spark.storage.StorageLevel
 
 /* A stream of Twitter statuses, potentially filtered by one or more keywords.
 *
diff --git a/external/twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterUtils.scala b/external/twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterUtils.scala
new file mode 100644
index 0000000000000..e8433b7e9f6fa
--- /dev/null
+++ b/external/twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterUtils.scala
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.twitter
+
+import twitter4j.Status
+import twitter4j.auth.Authorization
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.StreamingContext
+import org.apache.spark.streaming.api.java.{JavaDStream, JavaStreamingContext}
+import org.apache.spark.streaming.dstream.DStream
+
+object TwitterUtils {
+  /**
+   * Create a input stream that returns tweets received from Twitter.
+   * @param ssc         StreamingContext object
+   * @param twitterAuth Twitter4J authentication, or None to use Twitter4J's default OAuth
+   *        authorization; this uses the system properties twitter4j.oauth.consumerKey,
+   *        twitter4j.oauth.consumerSecret, twitter4j.oauth.accessToken and
+   *        twitter4j.oauth.accessTokenSecret
+   * @param filters Set of filter strings to get only those tweets that match them
+   * @param storageLevel Storage level to use for storing the received objects
+   */
+  def createStream(
+      ssc: StreamingContext,
+      twitterAuth: Option[Authorization],
+      filters: Seq[String] = Nil,
+      storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_SER_2
+    ): DStream[Status] = {
+    new TwitterInputDStream(ssc, twitterAuth, filters, storageLevel)
+  }
+
+  /**
+   * Create a input stream that returns tweets received from Twitter using Twitter4J's default
+   * OAuth authentication; this requires the system properties twitter4j.oauth.consumerKey,
+   * twitter4j.oauth.consumerSecret, twitter4j.oauth.accessToken and
+   * twitter4j.oauth.accessTokenSecret.
+   * Storage level of the data will be the default StorageLevel.MEMORY_AND_DISK_SER_2.
+   * @param jssc   JavaStreamingContext object
+   */
+  def createStream(jssc: JavaStreamingContext): JavaDStream[Status] = {
+    createStream(jssc.ssc, None)
+  }
+
+  /**
+   * Create a input stream that returns tweets received from Twitter using Twitter4J's default
+   * OAuth authentication; this requires the system properties twitter4j.oauth.consumerKey,
+   * twitter4j.oauth.consumerSecret, twitter4j.oauth.accessToken and
+   * twitter4j.oauth.accessTokenSecret.
+   * Storage level of the data will be the default StorageLevel.MEMORY_AND_DISK_SER_2.
+   * @param jssc    JavaStreamingContext object
+   * @param filters Set of filter strings to get only those tweets that match them
+   */
+  def createStream(jssc: JavaStreamingContext, filters: Array[String]): JavaDStream[Status] = {
+    createStream(jssc.ssc, None, filters)
+  }
+
+  /**
+   * Create a input stream that returns tweets received from Twitter using Twitter4J's default
+   * OAuth authentication; this requires the system properties twitter4j.oauth.consumerKey,
+   * twitter4j.oauth.consumerSecret, twitter4j.oauth.accessToken and
+   * twitter4j.oauth.accessTokenSecret.
+   * @param jssc         JavaStreamingContext object
+   * @param filters      Set of filter strings to get only those tweets that match them
+   * @param storageLevel Storage level to use for storing the received objects
+   */
+  def createStream(
+      jssc: JavaStreamingContext,
+      filters: Array[String],
+      storageLevel: StorageLevel
+    ): JavaDStream[Status] = {
+    createStream(jssc.ssc, None, filters, storageLevel)
+  }
+
+  /**
+   * Create a input stream that returns tweets received from Twitter.
+   * Storage level of the data will be the default StorageLevel.MEMORY_AND_DISK_SER_2.
+   * @param jssc        JavaStreamingContext object
+   * @param twitterAuth Twitter4J Authorization
+   */
+  def createStream(jssc: JavaStreamingContext, twitterAuth: Authorization): JavaDStream[Status] = {
+    createStream(jssc.ssc, Some(twitterAuth))
+  }
+
+  /**
+   * Create a input stream that returns tweets received from Twitter.
+   * Storage level of the data will be the default StorageLevel.MEMORY_AND_DISK_SER_2.
+   * @param jssc        JavaStreamingContext object
+   * @param twitterAuth Twitter4J Authorization
+   * @param filters     Set of filter strings to get only those tweets that match them
+   */
+  def createStream(
+      jssc: JavaStreamingContext,
+      twitterAuth: Authorization,
+      filters: Array[String]
+    ): JavaDStream[Status] = {
+    createStream(jssc.ssc, Some(twitterAuth), filters)
+  }
+
+  /**
+   * Create a input stream that returns tweets received from Twitter.
+   * @param jssc         JavaStreamingContext object
+   * @param twitterAuth  Twitter4J Authorization object
+   * @param filters      Set of filter strings to get only those tweets that match them
+   * @param storageLevel Storage level to use for storing the received objects
+   */
+  def createStream(
+      jssc: JavaStreamingContext,
+      twitterAuth: Authorization,
+      filters: Array[String],
+      storageLevel: StorageLevel
+    ): JavaDStream[Status] = {
+    createStream(jssc.ssc, Some(twitterAuth), filters, storageLevel)
+  }
+}
diff --git a/external/twitter/src/test/java/org/apache/spark/streaming/twitter/JavaTwitterStreamSuite.java b/external/twitter/src/test/java/org/apache/spark/streaming/twitter/JavaTwitterStreamSuite.java
new file mode 100644
index 0000000000000..e46b4e5c7531d
--- /dev/null
+++ b/external/twitter/src/test/java/org/apache/spark/streaming/twitter/JavaTwitterStreamSuite.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.twitter;
+
+import java.util.Arrays;
+
+import org.junit.Test;
+import twitter4j.Status;
+import twitter4j.auth.Authorization;
+import twitter4j.auth.NullAuthorization;
+import org.apache.spark.storage.StorageLevel;
+import org.apache.spark.streaming.LocalJavaStreamingContext;
+import org.apache.spark.streaming.api.java.JavaDStream;
+
+public class JavaTwitterStreamSuite extends LocalJavaStreamingContext {
+  @Test
+  public void testTwitterStream() {
+    String[] filters = (String[])Arrays.<String>asList("filter1", "filter2").toArray();
+    Authorization auth = NullAuthorization.getInstance();
+
+    // tests the API, does not actually test data receiving
+    JavaDStream<Status> test1 = TwitterUtils.createStream(ssc);
+    JavaDStream<Status> test2 = TwitterUtils.createStream(ssc, filters);
+    JavaDStream<Status> test3 = TwitterUtils.createStream(
+      ssc, filters, StorageLevel.MEMORY_AND_DISK_SER_2());
+    JavaDStream<Status> test4 = TwitterUtils.createStream(ssc, auth);
+    JavaDStream<Status> test5 = TwitterUtils.createStream(ssc, auth, filters);
+    JavaDStream<Status> test6 = TwitterUtils.createStream(ssc,
+      auth, filters, StorageLevel.MEMORY_AND_DISK_SER_2());
+  }
+}
diff --git a/external/twitter/src/test/resources/log4j.properties b/external/twitter/src/test/resources/log4j.properties
new file mode 100644
index 0000000000000..c918335fcdc70
--- /dev/null
+++ b/external/twitter/src/test/resources/log4j.properties
@@ -0,0 +1,29 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Set everything to be logged to the file streaming/target/unit-tests.log
+log4j.rootCategory=INFO, file
+# log4j.appender.file=org.apache.log4j.FileAppender
+log4j.appender.file=org.apache.log4j.FileAppender
+log4j.appender.file.append=false
+log4j.appender.file.file=external/twitter/target/unit-tests.log
+log4j.appender.file.layout=org.apache.log4j.PatternLayout
+log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n
+
+# Ignore messages below warning level from Jetty, because it's a bit verbose
+log4j.logger.org.eclipse.jetty=WARN
+
diff --git a/external/twitter/src/test/scala/org/apache/spark/streaming/twitter/TwitterStreamSuite.scala b/external/twitter/src/test/scala/org/apache/spark/streaming/twitter/TwitterStreamSuite.scala
new file mode 100644
index 0000000000000..06ab0cdaf3b4e
--- /dev/null
+++ b/external/twitter/src/test/scala/org/apache/spark/streaming/twitter/TwitterStreamSuite.scala
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.twitter
+
+import org.apache.spark.streaming.{StreamingContext, TestSuiteBase}
+import org.apache.spark.storage.StorageLevel
+import twitter4j.auth.{NullAuthorization, Authorization}
+
+class TwitterStreamSuite extends TestSuiteBase {
+
+  test("twitter input stream") {
+    val ssc = new StreamingContext(master, framework, batchDuration)
+    val filters = Seq("filter1", "filter2")
+    val authorization: Authorization = NullAuthorization.getInstance()
+
+    // tests the API, does not actually test data receiving
+    val test1 = TwitterUtils.createStream(ssc, None)
+    val test2 = TwitterUtils.createStream(ssc, None, filters)
+    val test3 = TwitterUtils.createStream(ssc, None, filters, StorageLevel.MEMORY_AND_DISK_SER_2)
+    val test4 = TwitterUtils.createStream(ssc, Some(authorization))
+    val test5 = TwitterUtils.createStream(ssc, Some(authorization), filters)
+    val test6 = TwitterUtils.createStream(ssc, Some(authorization), filters,
+      StorageLevel.MEMORY_AND_DISK_SER_2)
+
+    // Note that actually testing the data receiving is hard as authentication keys are
+    // necessary for accessing Twitter live stream
+    ssc.stop()
+  }
+}
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
new file mode 100644
index 0000000000000..9972826a9bcc3
--- /dev/null
+++ b/external/zeromq/pom.xml
@@ -0,0 +1,89 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent</artifactId>
+    <version>0.9.1-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.spark</groupId>
+  <artifactId>spark-streaming-zeromq_2.10</artifactId>
+  <packaging>jar</packaging>
+  <name>Spark Project External ZeroMQ</name>
+  <url>http://spark.incubator.apache.org/</url>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>${akka.group}</groupId>
+      <artifactId>akka-zeromq_${scala.binary.version}</artifactId>
+      <version>${akka.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>org.jboss.netty</groupId>
+          <artifactId>netty</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.scalatest</groupId>
+      <artifactId>scalatest_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.scalacheck</groupId>
+      <artifactId>scalacheck_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.novocode</groupId>
+      <artifactId>junit-interface</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-log4j12</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+  <build>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+    <plugins>
+      <plugin>
+        <groupId>org.scalatest</groupId>
+        <artifactId>scalatest-maven-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
+</project>
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receivers/ZeroMQReceiver.scala b/external/zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQReceiver.scala
similarity index 74%
rename from streaming/src/main/scala/org/apache/spark/streaming/receivers/ZeroMQReceiver.scala
rename to external/zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQReceiver.scala
index 043bb8c8bf795..769761e3b860c 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receivers/ZeroMQReceiver.scala
+++ b/external/zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQReceiver.scala
@@ -15,22 +15,26 @@
  * limitations under the License.
  */
 
-package org.apache.spark.streaming.receivers
+package org.apache.spark.streaming.zeromq
+
+import scala.reflect.ClassTag
 
 import akka.actor.Actor
+import akka.util.ByteString
 import akka.zeromq._
 
 import org.apache.spark.Logging
+import org.apache.spark.streaming.receivers._
 
 /**
  * A receiver to subscribe to ZeroMQ stream.
  */
-private[streaming] class ZeroMQReceiver[T: ClassManifest](publisherUrl: String,
+private[streaming] class ZeroMQReceiver[T: ClassTag](publisherUrl: String,
   subscribe: Subscribe,
-  bytesToObjects: Seq[Seq[Byte]] ⇒ Iterator[T])
+  bytesToObjects: Seq[ByteString] ⇒ Iterator[T])
   extends Actor with Receiver with Logging {
 
-  override def preStart() = context.system.newSocket(SocketType.Sub, Listener(self),
+  override def preStart() = ZeroMQExtension(context.system).newSocket(SocketType.Sub, Listener(self),
     Connect(publisherUrl), subscribe)
 
   def receive: Receive = {
@@ -38,10 +42,10 @@ private[streaming] class ZeroMQReceiver[T: ClassManifest](publisherUrl: String,
     case Connecting ⇒ logInfo("connecting ...")
 
     case m: ZMQMessage ⇒
-      logDebug("Received message for:" + m.firstFrameAsString)
+      logDebug("Received message for:" + m.frame(0))
 
       //We ignore first frame for processing as it is the topic
-      val bytes = m.frames.tail.map(_.payload)
+      val bytes = m.frames.tail
       pushBlock(bytesToObjects(bytes))
 
     case Closed ⇒ logInfo("received closed ")
diff --git a/external/zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQUtils.scala b/external/zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQUtils.scala
new file mode 100644
index 0000000000000..7a14b3d2bf278
--- /dev/null
+++ b/external/zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQUtils.scala
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.zeromq
+
+import scala.reflect.ClassTag
+import scala.collection.JavaConversions._
+import akka.actor.{Props, SupervisorStrategy}
+import akka.util.ByteString
+import akka.zeromq.Subscribe
+import org.apache.spark.api.java.function.{Function => JFunction}
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.receivers.ReceiverSupervisorStrategy
+import org.apache.spark.streaming.StreamingContext
+import org.apache.spark.streaming.api.java.{JavaStreamingContext, JavaDStream}
+import org.apache.spark.streaming.dstream.DStream
+
+object ZeroMQUtils {
+  /**
+   * Create an input stream that receives messages pushed by a zeromq publisher.
+   * @param ssc            StreamingContext object
+   * @param publisherUrl   Url of remote zeromq publisher
+   * @param subscribe      Topic to subscribe to
+   * @param bytesToObjects A zeroMQ stream publishes sequence of frames for each topic
+   *                       and each frame has sequence of byte thus it needs the converter
+   *                       (which might be deserializer of bytes) to translate from sequence
+   *                       of sequence of bytes, where sequence refer to a frame
+   *                       and sub sequence refer to its payload.
+   * @param storageLevel   RDD storage level. Defaults to StorageLevel.MEMORY_AND_DISK_SER_2.
+   */
+  def createStream[T: ClassTag](
+      ssc: StreamingContext,
+      publisherUrl: String,
+      subscribe: Subscribe,
+      bytesToObjects: Seq[ByteString] ⇒ Iterator[T],
+      storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_SER_2,
+      supervisorStrategy: SupervisorStrategy = ReceiverSupervisorStrategy.defaultStrategy
+    ): DStream[T] = {
+    ssc.actorStream(Props(new ZeroMQReceiver(publisherUrl, subscribe, bytesToObjects)),
+      "ZeroMQReceiver", storageLevel, supervisorStrategy)
+  }
+
+  /**
+   * Create an input stream that receives messages pushed by a zeromq publisher.
+   * @param jssc           JavaStreamingContext object
+   * @param publisherUrl   Url of remote ZeroMQ publisher
+   * @param subscribe      Topic to subscribe to
+   * @param bytesToObjects A zeroMQ stream publishes sequence of frames for each topic and each frame has sequence
+   *                       of byte thus it needs the converter(which might be deserializer of bytes)
+   *                       to translate from sequence of sequence of bytes, where sequence refer to a frame
+   *                       and sub sequence refer to its payload.
+   * @param storageLevel  Storage level to use for storing the received objects
+   */
+  def createStream[T](
+      jssc: JavaStreamingContext,
+      publisherUrl: String,
+      subscribe: Subscribe,
+      bytesToObjects: JFunction[Array[Array[Byte]], java.lang.Iterable[T]],
+      storageLevel: StorageLevel,
+      supervisorStrategy: SupervisorStrategy
+    ): JavaDStream[T] = {
+    implicit val cm: ClassTag[T] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[T]]
+    val fn = (x: Seq[ByteString]) => bytesToObjects.apply(x.map(_.toArray).toArray).toIterator
+    createStream[T](jssc.ssc, publisherUrl, subscribe, fn, storageLevel, supervisorStrategy)
+  }
+
+  /**
+   * Create an input stream that receives messages pushed by a zeromq publisher.
+   * @param jssc           JavaStreamingContext object
+   * @param publisherUrl   Url of remote zeromq publisher
+   * @param subscribe      Topic to subscribe to
+   * @param bytesToObjects A zeroMQ stream publishes sequence of frames for each topic and each frame has sequence
+   *                       of byte thus it needs the converter(which might be deserializer of bytes)
+   *                       to translate from sequence of sequence of bytes, where sequence refer to a frame
+   *                       and sub sequence refer to its payload.
+   * @param storageLevel   RDD storage level.
+   */
+  def createStream[T](
+      jssc: JavaStreamingContext,
+      publisherUrl: String,
+      subscribe: Subscribe,
+      bytesToObjects: JFunction[Array[Array[Byte]], java.lang.Iterable[T]],
+      storageLevel: StorageLevel
+    ): JavaDStream[T] = {
+    implicit val cm: ClassTag[T] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[T]]
+    val fn = (x: Seq[ByteString]) => bytesToObjects.apply(x.map(_.toArray).toArray).toIterator
+    createStream[T](jssc.ssc, publisherUrl, subscribe, fn, storageLevel)
+  }
+
+  /**
+   * Create an input stream that receives messages pushed by a zeromq publisher.
+   * @param jssc           JavaStreamingContext object
+   * @param publisherUrl   Url of remote zeromq publisher
+   * @param subscribe      Topic to subscribe to
+   * @param bytesToObjects A zeroMQ stream publishes sequence of frames for each topic and each frame has sequence
+   *                       of byte thus it needs the converter(which might be deserializer of bytes)
+   *                       to translate from sequence of sequence of bytes, where sequence refer to a frame
+   *                       and sub sequence refer to its payload.
+   */
+  def createStream[T](
+      jssc: JavaStreamingContext,
+      publisherUrl: String,
+      subscribe: Subscribe,
+      bytesToObjects: JFunction[Array[Array[Byte]], java.lang.Iterable[T]]
+    ): JavaDStream[T] = {
+    implicit val cm: ClassTag[T] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[T]]
+    val fn = (x: Seq[ByteString]) => bytesToObjects.apply(x.map(_.toArray).toArray).toIterator
+    createStream[T](jssc.ssc, publisherUrl, subscribe, fn)
+  }
+}
diff --git a/external/zeromq/src/test/java/org/apache/spark/streaming/zeromq/JavaZeroMQStreamSuite.java b/external/zeromq/src/test/java/org/apache/spark/streaming/zeromq/JavaZeroMQStreamSuite.java
new file mode 100644
index 0000000000000..d2361e14b898a
--- /dev/null
+++ b/external/zeromq/src/test/java/org/apache/spark/streaming/zeromq/JavaZeroMQStreamSuite.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.zeromq;
+
+import org.junit.Test;
+import akka.actor.SupervisorStrategy;
+import akka.util.ByteString;
+import akka.zeromq.Subscribe;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.storage.StorageLevel;
+import org.apache.spark.streaming.LocalJavaStreamingContext;
+import org.apache.spark.streaming.api.java.JavaDStream;
+
+public class JavaZeroMQStreamSuite extends LocalJavaStreamingContext {
+
+  @Test // tests the API, does not actually test data receiving
+  public void testZeroMQStream() {
+    String publishUrl = "abc";
+    Subscribe subscribe = new Subscribe((ByteString)null);
+    Function<byte[][], Iterable<String>> bytesToObjects = new Function<byte[][], Iterable<String>>() {
+      @Override
+      public Iterable<String> call(byte[][] bytes) throws Exception {
+        return null;
+      }
+    };
+
+    JavaDStream<String> test1 = ZeroMQUtils.<String>createStream(
+      ssc, publishUrl, subscribe, bytesToObjects);
+    JavaDStream<String> test2 = ZeroMQUtils.<String>createStream(
+      ssc, publishUrl, subscribe, bytesToObjects, StorageLevel.MEMORY_AND_DISK_SER_2());
+    JavaDStream<String> test3 = ZeroMQUtils.<String>createStream(
+      ssc,publishUrl, subscribe, bytesToObjects, StorageLevel.MEMORY_AND_DISK_SER_2(),
+      SupervisorStrategy.defaultStrategy());
+  }
+}
diff --git a/external/zeromq/src/test/resources/log4j.properties b/external/zeromq/src/test/resources/log4j.properties
new file mode 100644
index 0000000000000..304683dd0bac3
--- /dev/null
+++ b/external/zeromq/src/test/resources/log4j.properties
@@ -0,0 +1,29 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Set everything to be logged to the file streaming/target/unit-tests.log
+log4j.rootCategory=INFO, file
+# log4j.appender.file=org.apache.log4j.FileAppender
+log4j.appender.file=org.apache.log4j.FileAppender
+log4j.appender.file.append=false
+log4j.appender.file.file=external/zeromq/target/unit-tests.log
+log4j.appender.file.layout=org.apache.log4j.PatternLayout
+log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n
+
+# Ignore messages below warning level from Jetty, because it's a bit verbose
+log4j.logger.org.eclipse.jetty=WARN
+
diff --git a/external/zeromq/src/test/scala/org/apache/spark/streaming/zeromq/ZeroMQStreamSuite.scala b/external/zeromq/src/test/scala/org/apache/spark/streaming/zeromq/ZeroMQStreamSuite.scala
new file mode 100644
index 0000000000000..92d55a7a7b6e4
--- /dev/null
+++ b/external/zeromq/src/test/scala/org/apache/spark/streaming/zeromq/ZeroMQStreamSuite.scala
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.zeromq
+
+import akka.actor.SupervisorStrategy
+import akka.util.ByteString
+import akka.zeromq.Subscribe
+
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.{StreamingContext, TestSuiteBase}
+
+class ZeroMQStreamSuite extends TestSuiteBase {
+
+  test("zeromq input stream") {
+    val ssc = new StreamingContext(master, framework, batchDuration)
+    val publishUrl = "abc"
+    val subscribe = new Subscribe(null.asInstanceOf[ByteString])
+    val bytesToObjects = (bytes: Seq[ByteString]) => null.asInstanceOf[Iterator[String]]
+
+    // tests the API, does not actually test data receiving
+    val test1 = ZeroMQUtils.createStream(ssc, publishUrl, subscribe, bytesToObjects)
+    val test2 = ZeroMQUtils.createStream(
+      ssc, publishUrl, subscribe, bytesToObjects, StorageLevel.MEMORY_AND_DISK_SER_2)
+    val test3 = ZeroMQUtils.createStream(ssc, publishUrl, subscribe, bytesToObjects,
+      StorageLevel.MEMORY_AND_DISK_SER_2, SupervisorStrategy.defaultStrategy)
+
+    // TODO: Actually test data receiving
+    ssc.stop()
+  }
+}
diff --git a/graphx/data/followers.txt b/graphx/data/followers.txt
new file mode 100644
index 0000000000000..7bb8e900e24d4
--- /dev/null
+++ b/graphx/data/followers.txt
@@ -0,0 +1,8 @@
+2 1
+4 1
+1 2
+6 3
+7 3
+7 6
+6 7
+3 7
diff --git a/graphx/data/users.txt b/graphx/data/users.txt
new file mode 100644
index 0000000000000..982d19d50bc3e
--- /dev/null
+++ b/graphx/data/users.txt
@@ -0,0 +1,7 @@
+1,BarackObama,Barack Obama
+2,ladygaga,Goddess of Love
+3,jeresig,John Resig
+4,justinbieber,Justin Bieber
+6,matei_zaharia,Matei Zaharia
+7,odersky,Martin Odersky
+8,anonsys
diff --git a/graphx/pom.xml b/graphx/pom.xml
new file mode 100644
index 0000000000000..e5f12bf8a6955
--- /dev/null
+++ b/graphx/pom.xml
@@ -0,0 +1,71 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent</artifactId>
+    <version>0.9.1-csd-1-SNAPSHOT</version>
+    <relativePath>../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.spark</groupId>
+  <artifactId>spark-graphx_2.10</artifactId>
+  <packaging>jar</packaging>
+  <name>Spark Project GraphX</name>
+  <url>http://spark-project.org/</url>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-math3</artifactId>
+      <version>3.2</version>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-server</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>org.scalatest</groupId>
+      <artifactId>scalatest_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.scalacheck</groupId>
+      <artifactId>scalacheck_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+  <build>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+    <plugins>
+      <plugin>
+        <groupId>org.scalatest</groupId>
+        <artifactId>scalatest-maven-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
+</project>
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Edge.scala b/graphx/src/main/scala/org/apache/spark/graphx/Edge.scala
new file mode 100644
index 0000000000000..580faa0866789
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Edge.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx
+
+/**
+ * A single directed edge consisting of a source id, target id,
+ * and the data associated with the edge.
+ *
+ * @tparam ED type of the edge attribute
+ *
+ * @param srcId The vertex id of the source vertex
+ * @param dstId The vertex id of the target vertex
+ * @param attr The attribute associated with the edge
+ */
+case class Edge[@specialized(Char, Int, Boolean, Byte, Long, Float, Double) ED] (
+    var srcId: VertexId = 0,
+    var dstId: VertexId = 0,
+    var attr: ED = null.asInstanceOf[ED])
+  extends Serializable {
+
+  /**
+   * Given one vertex in the edge return the other vertex.
+   *
+   * @param vid the id one of the two vertices on the edge.
+   * @return the id of the other vertex on the edge.
+   */
+  def otherVertexId(vid: VertexId): VertexId =
+    if (srcId == vid) dstId else { assert(dstId == vid); srcId }
+
+  /**
+   * Return the relative direction of the edge to the corresponding
+   * vertex.
+   *
+   * @param vid the id of one of the two vertices in the edge.
+   * @return the relative direction of the edge to the corresponding
+   * vertex.
+   */
+  def relativeDirection(vid: VertexId): EdgeDirection =
+    if (vid == srcId) EdgeDirection.Out else { assert(vid == dstId); EdgeDirection.In }
+}
+
+object Edge {
+  private[graphx] def lexicographicOrdering[ED] = new Ordering[Edge[ED]] {
+    override def compare(a: Edge[ED], b: Edge[ED]): Int =
+      (if (a.srcId != b.srcId) a.srcId - b.srcId else a.dstId - b.dstId).toInt
+  }
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/EdgeDirection.scala b/graphx/src/main/scala/org/apache/spark/graphx/EdgeDirection.scala
new file mode 100644
index 0000000000000..6f03eb1439773
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/EdgeDirection.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx
+
+/**
+ * The direction of a directed edge relative to a vertex.
+ */
+class EdgeDirection private (private val name: String) extends Serializable {
+  /**
+   * Reverse the direction of an edge.  An in becomes out,
+   * out becomes in and both and either remain the same.
+   */
+  def reverse: EdgeDirection = this match {
+    case EdgeDirection.In   => EdgeDirection.Out
+    case EdgeDirection.Out  => EdgeDirection.In
+    case EdgeDirection.Either => EdgeDirection.Either
+    case EdgeDirection.Both => EdgeDirection.Both
+  }
+
+  override def toString: String = "EdgeDirection." + name
+
+  override def equals(o: Any) = o match {
+    case other: EdgeDirection => other.name == name
+    case _ => false
+  }
+
+  override def hashCode = name.hashCode
+}
+
+
+/**
+ * A set of [[EdgeDirection]]s.
+ */
+object EdgeDirection {
+  /** Edges arriving at a vertex. */
+  final val In = new EdgeDirection("In")
+
+  /** Edges originating from a vertex. */
+  final val Out = new EdgeDirection("Out")
+
+  /** Edges originating from *or* arriving at a vertex of interest. */
+  final val Either = new EdgeDirection("Either")
+
+  /** Edges originating from *and* arriving at a vertex of interest. */
+  final val Both = new EdgeDirection("Both")
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala b/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala
new file mode 100644
index 0000000000000..fe03ae4a629b9
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx
+
+import scala.reflect.{classTag, ClassTag}
+
+import org.apache.spark.{OneToOneDependency, Partition, Partitioner, TaskContext}
+import org.apache.spark.graphx.impl.EdgePartition
+import org.apache.spark.rdd.RDD
+import org.apache.spark.storage.StorageLevel
+
+/**
+ * `EdgeRDD[ED]` extends `RDD[Edge[ED]]` by storing the edges in columnar format on each partition
+ * for performance.
+ */
+class EdgeRDD[@specialized ED: ClassTag](
+    val partitionsRDD: RDD[(PartitionID, EdgePartition[ED])])
+  extends RDD[Edge[ED]](partitionsRDD.context, List(new OneToOneDependency(partitionsRDD))) {
+
+  partitionsRDD.setName("EdgeRDD")
+
+  override protected def getPartitions: Array[Partition] = partitionsRDD.partitions
+
+  /**
+   * If `partitionsRDD` already has a partitioner, use it. Otherwise assume that the
+   * [[PartitionID]]s in `partitionsRDD` correspond to the actual partitions and create a new
+   * partitioner that allows co-partitioning with `partitionsRDD`.
+   */
+  override val partitioner =
+    partitionsRDD.partitioner.orElse(Some(Partitioner.defaultPartitioner(partitionsRDD)))
+
+  override def compute(part: Partition, context: TaskContext): Iterator[Edge[ED]] = {
+    firstParent[(PartitionID, EdgePartition[ED])].iterator(part, context).next._2.iterator
+  }
+
+  override def collect(): Array[Edge[ED]] = this.map(_.copy()).collect()
+
+  override def persist(newLevel: StorageLevel): EdgeRDD[ED] = {
+    partitionsRDD.persist(newLevel)
+    this
+  }
+
+  /** Persist this RDD with the default storage level (`MEMORY_ONLY`). */
+  override def persist(): EdgeRDD[ED] = persist(StorageLevel.MEMORY_ONLY)
+
+  /** Persist this RDD with the default storage level (`MEMORY_ONLY`). */
+  override def cache(): EdgeRDD[ED] = persist()
+
+  override def unpersist(blocking: Boolean = true): EdgeRDD[ED] = {
+    partitionsRDD.unpersist(blocking)
+    this
+  }
+
+  private[graphx] def mapEdgePartitions[ED2: ClassTag](f: (PartitionID, EdgePartition[ED]) => EdgePartition[ED2])
+    : EdgeRDD[ED2] = {
+    new EdgeRDD[ED2](partitionsRDD.mapPartitions({ iter =>
+      val (pid, ep) = iter.next()
+      Iterator(Tuple2(pid, f(pid, ep)))
+    }, preservesPartitioning = true))
+  }
+
+  /**
+   * Map the values in an edge partitioning preserving the structure but changing the values.
+   *
+   * @tparam ED2 the new edge value type
+   * @param f the function from an edge to a new edge value
+   * @return a new EdgeRDD containing the new edge values
+   */
+  def mapValues[ED2: ClassTag](f: Edge[ED] => ED2): EdgeRDD[ED2] =
+    mapEdgePartitions((pid, part) => part.map(f))
+
+  /**
+   * Reverse all the edges in this RDD.
+   *
+   * @return a new EdgeRDD containing all the edges reversed
+   */
+  def reverse: EdgeRDD[ED] = mapEdgePartitions((pid, part) => part.reverse)
+
+  /**
+   * Inner joins this EdgeRDD with another EdgeRDD, assuming both are partitioned using the same
+   * [[PartitionStrategy]].
+   *
+   * @param other the EdgeRDD to join with
+   * @param f the join function applied to corresponding values of `this` and `other`
+   * @return a new EdgeRDD containing only edges that appear in both `this` and `other`, with values
+   * supplied by `f`
+   */
+  def innerJoin[ED2: ClassTag, ED3: ClassTag]
+      (other: EdgeRDD[ED2])
+      (f: (VertexId, VertexId, ED, ED2) => ED3): EdgeRDD[ED3] = {
+    val ed2Tag = classTag[ED2]
+    val ed3Tag = classTag[ED3]
+    new EdgeRDD[ED3](partitionsRDD.zipPartitions(other.partitionsRDD, true) {
+      (thisIter, otherIter) =>
+        val (pid, thisEPart) = thisIter.next()
+        val (_, otherEPart) = otherIter.next()
+        Iterator(Tuple2(pid, thisEPart.innerJoin(otherEPart)(f)(ed2Tag, ed3Tag)))
+    })
+  }
+
+  private[graphx] def collectVertexIds(): RDD[VertexId] = {
+    partitionsRDD.flatMap { case (_, p) => Array.concat(p.srcIds, p.dstIds) }
+  }
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala b/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala
new file mode 100644
index 0000000000000..fea43c3b2bbf1
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx
+
+/**
+ * An edge triplet represents an edge along with the vertex attributes of its neighboring vertices.
+ *
+ * @tparam VD the type of the vertex attribute.
+ * @tparam ED the type of the edge attribute
+ */
+class EdgeTriplet[VD, ED] extends Edge[ED] {
+  /**
+   * The source vertex attribute
+   */
+  var srcAttr: VD = _ //nullValue[VD]
+
+  /**
+   * The destination vertex attribute
+   */
+  var dstAttr: VD = _ //nullValue[VD]
+
+  /**
+   * Set the edge properties of this triplet.
+   */
+  protected[spark] def set(other: Edge[ED]): EdgeTriplet[VD,ED] = {
+    srcId = other.srcId
+    dstId = other.dstId
+    attr = other.attr
+    this
+  }
+
+  /**
+   * Given one vertex in the edge return the other vertex.
+   *
+   * @param vid the id one of the two vertices on the edge
+   * @return the attribute for the other vertex on the edge
+   */
+  def otherVertexAttr(vid: VertexId): VD =
+    if (srcId == vid) dstAttr else { assert(dstId == vid); srcAttr }
+
+  /**
+   * Get the vertex object for the given vertex in the edge.
+   *
+   * @param vid the id of one of the two vertices on the edge
+   * @return the attr for the vertex with that id
+   */
+  def vertexAttr(vid: VertexId): VD =
+    if (srcId == vid) srcAttr else { assert(dstId == vid); dstAttr }
+
+  override def toString = ((srcId, srcAttr), (dstId, dstAttr), attr).toString()
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
new file mode 100644
index 0000000000000..eea95d38d5016
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
@@ -0,0 +1,422 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.graphx.impl._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.storage.StorageLevel
+
+
+/**
+ * The Graph abstractly represents a graph with arbitrary objects
+ * associated with vertices and edges.  The graph provides basic
+ * operations to access and manipulate the data associated with
+ * vertices and edges as well as the underlying structure.  Like Spark
+ * RDDs, the graph is a functional data-structure in which mutating
+ * operations return new graphs.
+ *
+ * @note [[GraphOps]] contains additional convenience operations and graph algorithms.
+ *
+ * @tparam VD the vertex attribute type
+ * @tparam ED the edge attribute type
+ */
+abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializable {
+
+  /**
+   * An RDD containing the vertices and their associated attributes.
+   *
+   * @note vertex ids are unique.
+   * @return an RDD containing the vertices in this graph
+   */
+  val vertices: VertexRDD[VD]
+
+  /**
+   * An RDD containing the edges and their associated attributes.  The entries in the RDD contain
+   * just the source id and target id along with the edge data.
+   *
+   * @return an RDD containing the edges in this graph
+   *
+   * @see [[Edge]] for the edge type.
+   * @see [[triplets]] to get an RDD which contains all the edges
+   * along with their vertex data.
+   *
+   */
+  val edges: EdgeRDD[ED]
+
+  /**
+   * An RDD containing the edge triplets, which are edges along with the vertex data associated with
+   * the adjacent vertices. The caller should use [[edges]] if the vertex data are not needed, i.e.
+   * if only the edge data and adjacent vertex ids are needed.
+   *
+   * @return an RDD containing edge triplets
+   *
+   * @example This operation might be used to evaluate a graph
+   * coloring where we would like to check that both vertices are a
+   * different color.
+   * {{{
+   * type Color = Int
+   * val graph: Graph[Color, Int] = GraphLoader.edgeListFile("hdfs://file.tsv")
+   * val numInvalid = graph.triplets.map(e => if (e.src.data == e.dst.data) 1 else 0).sum
+   * }}}
+   */
+  val triplets: RDD[EdgeTriplet[VD, ED]]
+
+  /**
+   * Caches the vertices and edges associated with this graph at the specified storage level.
+   *
+   * @param newLevel the level at which to cache the graph.
+   *
+   * @return A reference to this graph for convenience.
+   */
+  def persist(newLevel: StorageLevel = StorageLevel.MEMORY_ONLY): Graph[VD, ED]
+
+  /**
+   * Caches the vertices and edges associated with this graph. This is used to
+   * pin a graph in memory enabling multiple queries to reuse the same
+   * construction process.
+   */
+  def cache(): Graph[VD, ED]
+
+  /**
+   * Uncaches only the vertices of this graph, leaving the edges alone. This is useful in iterative
+   * algorithms that modify the vertex attributes but reuse the edges. This method can be used to
+   * uncache the vertex attributes of previous iterations once they are no longer needed, improving
+   * GC performance.
+   */
+  def unpersistVertices(blocking: Boolean = true): Graph[VD, ED]
+
+  /**
+   * Repartitions the edges in the graph according to `partitionStrategy`.
+   */
+  def partitionBy(partitionStrategy: PartitionStrategy): Graph[VD, ED]
+
+  /**
+   * Transforms each vertex attribute in the graph using the map function.
+   *
+   * @note The new graph has the same structure.  As a consequence the underlying index structures
+   * can be reused.
+   *
+   * @param map the function from a vertex object to a new vertex value
+   *
+   * @tparam VD2 the new vertex data type
+   *
+   * @example We might use this operation to change the vertex values
+   * from one type to another to initialize an algorithm.
+   * {{{
+   * val rawGraph: Graph[(), ()] = Graph.textFile("hdfs://file")
+   * val root = 42
+   * var bfsGraph = rawGraph.mapVertices[Int]((vid, data) => if (vid == root) 0 else Math.MaxValue)
+   * }}}
+   *
+   */
+  def mapVertices[VD2: ClassTag](map: (VertexId, VD) => VD2): Graph[VD2, ED]
+
+  /**
+   * Transforms each edge attribute in the graph using the map function.  The map function is not
+   * passed the vertex value for the vertices adjacent to the edge.  If vertex values are desired,
+   * use `mapTriplets`.
+   *
+   * @note This graph is not changed and that the new graph has the
+   * same structure.  As a consequence the underlying index structures
+   * can be reused.
+   *
+   * @param map the function from an edge object to a new edge value.
+   *
+   * @tparam ED2 the new edge data type
+   *
+   * @example This function might be used to initialize edge
+   * attributes.
+   *
+   */
+  def mapEdges[ED2: ClassTag](map: Edge[ED] => ED2): Graph[VD, ED2] = {
+    mapEdges((pid, iter) => iter.map(map))
+  }
+
+  /**
+   * Transforms each edge attribute using the map function, passing it a whole partition at a
+   * time. The map function is given an iterator over edges within a logical partition as well as
+   * the partition's ID, and it should return a new iterator over the new values of each edge. The
+   * new iterator's elements must correspond one-to-one with the old iterator's elements. If
+   * adjacent vertex values are desired, use `mapTriplets`.
+   *
+   * @note This does not change the structure of the
+   * graph or modify the values of this graph.  As a consequence
+   * the underlying index structures can be reused.
+   *
+   * @param map a function that takes a partition id and an iterator
+   * over all the edges in the partition, and must return an iterator over
+   * the new values for each edge in the order of the input iterator
+   *
+   * @tparam ED2 the new edge data type
+   *
+   */
+  def mapEdges[ED2: ClassTag](map: (PartitionID, Iterator[Edge[ED]]) => Iterator[ED2])
+    : Graph[VD, ED2]
+
+  /**
+   * Transforms each edge attribute using the map function, passing it the adjacent vertex attributes
+   * as well. If adjacent vertex values are not required, consider using `mapEdges` instead.
+   *
+   * @note This does not change the structure of the
+   * graph or modify the values of this graph.  As a consequence
+   * the underlying index structures can be reused.
+   *
+   * @param map the function from an edge object to a new edge value.
+   *
+   * @tparam ED2 the new edge data type
+   *
+   * @example This function might be used to initialize edge
+   * attributes based on the attributes associated with each vertex.
+   * {{{
+   * val rawGraph: Graph[Int, Int] = someLoadFunction()
+   * val graph = rawGraph.mapTriplets[Int]( edge =>
+   *   edge.src.data - edge.dst.data)
+   * }}}
+   *
+   */
+  def mapTriplets[ED2: ClassTag](map: EdgeTriplet[VD, ED] => ED2): Graph[VD, ED2] = {
+    mapTriplets((pid, iter) => iter.map(map))
+  }
+
+  /**
+   * Transforms each edge attribute a partition at a time using the map function, passing it the
+   * adjacent vertex attributes as well. The map function is given an iterator over edge triplets
+   * within a logical partition and should yield a new iterator over the new values of each edge in
+   * the order in which they are provided.  If adjacent vertex values are not required, consider
+   * using `mapEdges` instead.
+   *
+   * @note This does not change the structure of the
+   * graph or modify the values of this graph.  As a consequence
+   * the underlying index structures can be reused.
+   *
+   * @param map the iterator transform
+   *
+   * @tparam ED2 the new edge data type
+   *
+   */
+  def mapTriplets[ED2: ClassTag](map: (PartitionID, Iterator[EdgeTriplet[VD, ED]]) => Iterator[ED2])
+    : Graph[VD, ED2]
+
+  /**
+   * Reverses all edges in the graph.  If this graph contains an edge from a to b then the returned
+   * graph contains an edge from b to a.
+   */
+  def reverse: Graph[VD, ED]
+
+  /**
+   * Restricts the graph to only the vertices and edges satisfying the predicates. The resulting
+   * subgraph satisifies
+   *
+   * {{{
+   * V' = {v : for all v in V where vpred(v)}
+   * E' = {(u,v): for all (u,v) in E where epred((u,v)) && vpred(u) && vpred(v)}
+   * }}}
+   *
+   * @param epred the edge predicate, which takes a triplet and
+   * evaluates to true if the edge is to remain in the subgraph.  Note
+   * that only edges where both vertices satisfy the vertex
+   * predicate are considered.
+   *
+   * @param vpred the vertex predicate, which takes a vertex object and
+   * evaluates to true if the vertex is to be included in the subgraph
+   *
+   * @return the subgraph containing only the vertices and edges that
+   * satisfy the predicates
+   */
+  def subgraph(
+      epred: EdgeTriplet[VD,ED] => Boolean = (x => true),
+      vpred: (VertexId, VD) => Boolean = ((v, d) => true))
+    : Graph[VD, ED]
+
+  /**
+   * Restricts the graph to only the vertices and edges that are also in `other`, but keeps the
+   * attributes from this graph.
+   * @param other the graph to project this graph onto
+   * @return a graph with vertices and edges that exist in both the current graph and `other`,
+   * with vertex and edge data from the current graph
+   */
+  def mask[VD2: ClassTag, ED2: ClassTag](other: Graph[VD2, ED2]): Graph[VD, ED]
+
+  /**
+   * Merges multiple edges between two vertices into a single edge. For correct results, the graph
+   * must have been partitioned using [[partitionBy]].
+   *
+   * @param merge the user-supplied commutative associative function to merge edge attributes
+   *              for duplicate edges.
+   *
+   * @return The resulting graph with a single edge for each (source, dest) vertex pair.
+   */
+  def groupEdges(merge: (ED, ED) => ED): Graph[VD, ED]
+
+  /**
+   * Aggregates values from the neighboring edges and vertices of each vertex.  The user supplied
+   * `mapFunc` function is invoked on each edge of the graph, generating 0 or more "messages" to be
+   * "sent" to either vertex in the edge.  The `reduceFunc` is then used to combine the output of
+   * the map phase destined to each vertex.
+   *
+   * @tparam A the type of "message" to be sent to each vertex
+   *
+   * @param mapFunc the user defined map function which returns 0 or
+   * more messages to neighboring vertices
+   *
+   * @param reduceFunc the user defined reduce function which should
+   * be commutative and associative and is used to combine the output
+   * of the map phase
+   *
+   * @param activeSetOpt optionally, a set of "active" vertices and a direction of edges to consider
+   * when running `mapFunc`. If the direction is `In`, `mapFunc` will only be run on edges with
+   * destination in the active set.  If the direction is `Out`, `mapFunc` will only be run on edges
+   * originating from vertices in the active set. If the direction is `Either`, `mapFunc` will be
+   * run on edges with *either* vertex in the active set. If the direction is `Both`, `mapFunc` will
+   * be run on edges with *both* vertices in the active set. The active set must have the same index
+   * as the graph's vertices.
+   *
+   * @example We can use this function to compute the in-degree of each
+   * vertex
+   * {{{
+   * val rawGraph: Graph[(),()] = Graph.textFile("twittergraph")
+   * val inDeg: RDD[(VertexId, Int)] =
+   *   mapReduceTriplets[Int](et => Iterator((et.dst.id, 1)), _ + _)
+   * }}}
+   *
+   * @note By expressing computation at the edge level we achieve
+   * maximum parallelism.  This is one of the core functions in the
+   * Graph API in that enables neighborhood level computation. For
+   * example this function can be used to count neighbors satisfying a
+   * predicate or implement PageRank.
+   *
+   */
+  def mapReduceTriplets[A: ClassTag](
+      mapFunc: EdgeTriplet[VD, ED] => Iterator[(VertexId, A)],
+      reduceFunc: (A, A) => A,
+      activeSetOpt: Option[(VertexRDD[_], EdgeDirection)] = None)
+    : VertexRDD[A]
+
+  /**
+   * Joins the vertices with entries in the `table` RDD and merges the results using `mapFunc`.  The
+   * input table should contain at most one entry for each vertex.  If no entry in `other` is
+   * provided for a particular vertex in the graph, the map function receives `None`.
+   *
+   * @tparam U the type of entry in the table of updates
+   * @tparam VD2 the new vertex value type
+   *
+   * @param other the table to join with the vertices in the graph.
+   *              The table should contain at most one entry for each vertex.
+   * @param mapFunc the function used to compute the new vertex values.
+   *                The map function is invoked for all vertices, even those
+   *                that do not have a corresponding entry in the table.
+   *
+   * @example This function is used to update the vertices with new values based on external data.
+   *          For example we could add the out-degree to each vertex record:
+   *
+   * {{{
+   * val rawGraph: Graph[_, _] = Graph.textFile("webgraph")
+   * val outDeg: RDD[(VertexId, Int)] = rawGraph.outDegrees()
+   * val graph = rawGraph.outerJoinVertices(outDeg) {
+   *   (vid, data, optDeg) => optDeg.getOrElse(0)
+   * }
+   * }}}
+   */
+  def outerJoinVertices[U: ClassTag, VD2: ClassTag](other: RDD[(VertexId, U)])
+      (mapFunc: (VertexId, VD, Option[U]) => VD2)
+    : Graph[VD2, ED]
+
+  /**
+   * The associated [[GraphOps]] object.
+   */
+  // Save a copy of the GraphOps object so there is always one unique GraphOps object
+  // for a given Graph object, and thus the lazy vals in GraphOps would work as intended.
+  val ops = new GraphOps(this)
+} // end of Graph
+
+
+/**
+ * The Graph object contains a collection of routines used to construct graphs from RDDs.
+ */
+object Graph {
+
+  /**
+   * Construct a graph from a collection of edges encoded as vertex id pairs.
+   *
+   * @param rawEdges a collection of edges in (src, dst) form
+   * @param uniqueEdges if multiple identical edges are found they are combined and the edge
+   * attribute is set to the sum.  Otherwise duplicate edges are treated as separate. To enable
+   * `uniqueEdges`, a [[PartitionStrategy]] must be provided.
+   *
+   * @return a graph with edge attributes containing either the count of duplicate edges or 1
+   * (if `uniqueEdges` is `None`) and vertex attributes containing the total degree of each vertex.
+   */
+  def fromEdgeTuples[VD: ClassTag](
+      rawEdges: RDD[(VertexId, VertexId)],
+      defaultValue: VD,
+      uniqueEdges: Option[PartitionStrategy] = None): Graph[VD, Int] =
+  {
+    val edges = rawEdges.map(p => Edge(p._1, p._2, 1))
+    val graph = GraphImpl(edges, defaultValue)
+    uniqueEdges match {
+      case Some(p) => graph.partitionBy(p).groupEdges((a, b) => a + b)
+      case None => graph
+    }
+  }
+
+  /**
+   * Construct a graph from a collection of edges.
+   *
+   * @param edges the RDD containing the set of edges in the graph
+   * @param defaultValue the default vertex attribute to use for each vertex
+   *
+   * @return a graph with edge attributes described by `edges` and vertices
+   *         given by all vertices in `edges` with value `defaultValue`
+   */
+  def fromEdges[VD: ClassTag, ED: ClassTag](
+      edges: RDD[Edge[ED]],
+      defaultValue: VD): Graph[VD, ED] = {
+    GraphImpl(edges, defaultValue)
+  }
+
+  /**
+   * Construct a graph from a collection of vertices and
+   * edges with attributes.  Duplicate vertices are picked arbitrarily and
+   * vertices found in the edge collection but not in the input
+   * vertices are assigned the default attribute.
+   *
+   * @tparam VD the vertex attribute type
+   * @tparam ED the edge attribute type
+   * @param vertices the "set" of vertices and their attributes
+   * @param edges the collection of edges in the graph
+   * @param defaultVertexAttr the default vertex attribute to use for vertices that are
+   *                          mentioned in edges but not in vertices
+   */
+  def apply[VD: ClassTag, ED: ClassTag](
+      vertices: RDD[(VertexId, VD)],
+      edges: RDD[Edge[ED]],
+      defaultVertexAttr: VD = null.asInstanceOf[VD]): Graph[VD, ED] = {
+    GraphImpl(vertices, edges, defaultVertexAttr)
+  }
+
+  /**
+   * Implicitly extracts the [[GraphOps]] member from a graph.
+   *
+   * To improve modularity the Graph type only contains a small set of basic operations.
+   * All the convenience operations are defined in the [[GraphOps]] class which may be
+   * shared across multiple graph implementations.
+   */
+  implicit def graphToGraphOps[VD: ClassTag, ED: ClassTag](g: Graph[VD, ED]) = g.ops
+} // end of Graph object
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphKryoRegistrator.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphKryoRegistrator.scala
new file mode 100644
index 0000000000000..dd380d8c182c9
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphKryoRegistrator.scala
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx
+
+import com.esotericsoftware.kryo.Kryo
+
+import org.apache.spark.graphx.impl._
+import org.apache.spark.serializer.KryoRegistrator
+import org.apache.spark.util.collection.BitSet
+import org.apache.spark.util.BoundedPriorityQueue
+
+/**
+ * Registers GraphX classes with Kryo for improved performance.
+ */
+class GraphKryoRegistrator extends KryoRegistrator {
+
+  def registerClasses(kryo: Kryo) {
+    kryo.register(classOf[Edge[Object]])
+    kryo.register(classOf[MessageToPartition[Object]])
+    kryo.register(classOf[VertexBroadcastMsg[Object]])
+    kryo.register(classOf[(VertexId, Object)])
+    kryo.register(classOf[EdgePartition[Object]])
+    kryo.register(classOf[BitSet])
+    kryo.register(classOf[VertexIdToIndexMap])
+    kryo.register(classOf[VertexAttributeBlock[Object]])
+    kryo.register(classOf[PartitionStrategy])
+    kryo.register(classOf[BoundedPriorityQueue[Object]])
+    kryo.register(classOf[EdgeDirection])
+
+    // This avoids a large number of hash table lookups.
+    kryo.setReferences(false)
+  }
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala
new file mode 100644
index 0000000000000..18858466db27b
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx
+
+import org.apache.spark.{Logging, SparkContext}
+import org.apache.spark.graphx.impl.{EdgePartitionBuilder, GraphImpl}
+
+/**
+ * Provides utilities for loading [[Graph]]s from files.
+ */
+object GraphLoader extends Logging {
+
+  /**
+   * Loads a graph from an edge list formatted file where each line contains two integers: a source
+   * id and a target id. Skips lines that begin with `#`.
+   *
+   * If desired the edges can be automatically oriented in the positive
+   * direction (source Id < target Id) by setting `canonicalOrientation` to
+   * true.
+   *
+   * @example Loads a file in the following format:
+   * {{{
+   * # Comment Line
+   * # Source Id <\t> Target Id
+   * 1   -5
+   * 1    2
+   * 2    7
+   * 1    8
+   * }}}
+   *
+   * @param sc SparkContext
+   * @param path the path to the file (e.g., /home/data/file or hdfs://file)
+   * @param canonicalOrientation whether to orient edges in the positive
+   *        direction
+   * @param minEdgePartitions the number of partitions for the
+   *        the edge RDD
+   */
+  def edgeListFile(
+      sc: SparkContext,
+      path: String,
+      canonicalOrientation: Boolean = false,
+      minEdgePartitions: Int = 1)
+    : Graph[Int, Int] =
+  {
+    val startTime = System.currentTimeMillis
+
+    // Parse the edge data table directly into edge partitions
+    val edges = sc.textFile(path, minEdgePartitions).mapPartitionsWithIndex { (pid, iter) =>
+      val builder = new EdgePartitionBuilder[Int]
+      iter.foreach { line =>
+        if (!line.isEmpty && line(0) != '#') {
+          val lineArray = line.split("\\s+")
+          if (lineArray.length < 2) {
+            logWarning("Invalid line: " + line)
+          }
+          val srcId = lineArray(0).toLong
+          val dstId = lineArray(1).toLong
+          if (canonicalOrientation && srcId > dstId) {
+            builder.add(dstId, srcId, 1)
+          } else {
+            builder.add(srcId, dstId, 1)
+          }
+        }
+      }
+      Iterator((pid, builder.toEdgePartition))
+    }.cache()
+    edges.count()
+
+    logInfo("It took %d ms to load the edges".format(System.currentTimeMillis - startTime))
+
+    GraphImpl.fromEdgePartitions(edges, defaultVertexAttr = 1)
+  } // end of edgeListFile
+
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
new file mode 100644
index 0000000000000..0fc1e4df6813c
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
@@ -0,0 +1,318 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.SparkContext._
+import org.apache.spark.SparkException
+import org.apache.spark.graphx.lib._
+import org.apache.spark.rdd.RDD
+
+/**
+ * Contains additional functionality for [[Graph]]. All operations are expressed in terms of the
+ * efficient GraphX API. This class is implicitly constructed for each Graph object.
+ *
+ * @tparam VD the vertex attribute type
+ * @tparam ED the edge attribute type
+ */
+class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Serializable {
+
+  /** The number of edges in the graph. */
+  lazy val numEdges: Long = graph.edges.count()
+
+  /** The number of vertices in the graph. */
+  lazy val numVertices: Long = graph.vertices.count()
+
+  /**
+   * The in-degree of each vertex in the graph.
+   * @note Vertices with no in-edges are not returned in the resulting RDD.
+   */
+  lazy val inDegrees: VertexRDD[Int] = degreesRDD(EdgeDirection.In)
+
+  /**
+   * The out-degree of each vertex in the graph.
+   * @note Vertices with no out-edges are not returned in the resulting RDD.
+   */
+  lazy val outDegrees: VertexRDD[Int] = degreesRDD(EdgeDirection.Out)
+
+  /**
+   * The degree of each vertex in the graph.
+   * @note Vertices with no edges are not returned in the resulting RDD.
+   */
+  lazy val degrees: VertexRDD[Int] = degreesRDD(EdgeDirection.Either)
+
+  /**
+   * Computes the neighboring vertex degrees.
+   *
+   * @param edgeDirection the direction along which to collect neighboring vertex attributes
+   */
+  private def degreesRDD(edgeDirection: EdgeDirection): VertexRDD[Int] = {
+    if (edgeDirection == EdgeDirection.In) {
+      graph.mapReduceTriplets(et => Iterator((et.dstId,1)), _ + _)
+    } else if (edgeDirection == EdgeDirection.Out) {
+      graph.mapReduceTriplets(et => Iterator((et.srcId,1)), _ + _)
+    } else { // EdgeDirection.Either
+      graph.mapReduceTriplets(et => Iterator((et.srcId,1), (et.dstId,1)), _ + _)
+    }
+  }
+
+  /**
+   * Collect the neighbor vertex ids for each vertex.
+   *
+   * @param edgeDirection the direction along which to collect
+   * neighboring vertices
+   *
+   * @return the set of neighboring ids for each vertex
+   */
+  def collectNeighborIds(edgeDirection: EdgeDirection): VertexRDD[Array[VertexId]] = {
+    val nbrs =
+      if (edgeDirection == EdgeDirection.Either) {
+        graph.mapReduceTriplets[Array[VertexId]](
+          mapFunc = et => Iterator((et.srcId, Array(et.dstId)), (et.dstId, Array(et.srcId))),
+          reduceFunc = _ ++ _
+        )
+      } else if (edgeDirection == EdgeDirection.Out) {
+        graph.mapReduceTriplets[Array[VertexId]](
+          mapFunc = et => Iterator((et.srcId, Array(et.dstId))),
+          reduceFunc = _ ++ _)
+      } else if (edgeDirection == EdgeDirection.In) {
+        graph.mapReduceTriplets[Array[VertexId]](
+          mapFunc = et => Iterator((et.dstId, Array(et.srcId))),
+          reduceFunc = _ ++ _)
+      } else {
+        throw new SparkException("It doesn't make sense to collect neighbor ids without a " +
+          "direction. (EdgeDirection.Both is not supported; use EdgeDirection.Either instead.)")
+      }
+    graph.vertices.leftZipJoin(nbrs) { (vid, vdata, nbrsOpt) =>
+      nbrsOpt.getOrElse(Array.empty[VertexId])
+    }
+  } // end of collectNeighborIds
+
+  /**
+   * Collect the neighbor vertex attributes for each vertex.
+   *
+   * @note This function could be highly inefficient on power-law
+   * graphs where high degree vertices may force a large ammount of
+   * information to be collected to a single location.
+   *
+   * @param edgeDirection the direction along which to collect
+   * neighboring vertices
+   *
+   * @return the vertex set of neighboring vertex attributes for each vertex
+   */
+  def collectNeighbors(edgeDirection: EdgeDirection): VertexRDD[Array[(VertexId, VD)]] = {
+    val nbrs = graph.mapReduceTriplets[Array[(VertexId,VD)]](
+      edge => {
+        val msgToSrc = (edge.srcId, Array((edge.dstId, edge.dstAttr)))
+        val msgToDst = (edge.dstId, Array((edge.srcId, edge.srcAttr)))
+        edgeDirection match {
+          case EdgeDirection.Either => Iterator(msgToSrc, msgToDst)
+          case EdgeDirection.In => Iterator(msgToDst)
+          case EdgeDirection.Out => Iterator(msgToSrc)
+          case EdgeDirection.Both =>
+            throw new SparkException("collectNeighbors does not support EdgeDirection.Both. Use" +
+              "EdgeDirection.Either instead.")
+        }
+      },
+      (a, b) => a ++ b)
+
+    graph.vertices.leftZipJoin(nbrs) { (vid, vdata, nbrsOpt) =>
+      nbrsOpt.getOrElse(Array.empty[(VertexId, VD)])
+    }
+  } // end of collectNeighbor
+
+  /**
+   * Join the vertices with an RDD and then apply a function from the
+   * the vertex and RDD entry to a new vertex value.  The input table
+   * should contain at most one entry for each vertex.  If no entry is
+   * provided the map function is skipped and the old value is used.
+   *
+   * @tparam U the type of entry in the table of updates
+   * @param table the table to join with the vertices in the graph.
+   * The table should contain at most one entry for each vertex.
+   * @param mapFunc the function used to compute the new vertex
+   * values.  The map function is invoked only for vertices with a
+   * corresponding entry in the table otherwise the old vertex value
+   * is used.
+   *
+   * @example This function is used to update the vertices with new
+   * values based on external data.  For example we could add the out
+   * degree to each vertex record
+   *
+   * {{{
+   * val rawGraph: Graph[Int, Int] = GraphLoader.edgeListFile(sc, "webgraph")
+   *   .mapVertices(v => 0)
+   * val outDeg: RDD[(Int, Int)] = rawGraph.outDegrees
+   * val graph = rawGraph.leftJoinVertices[Int,Int](outDeg,
+   *   (v, deg) => deg )
+   * }}}
+   *
+   */
+  def joinVertices[U: ClassTag](table: RDD[(VertexId, U)])(mapFunc: (VertexId, VD, U) => VD)
+    : Graph[VD, ED] = {
+    val uf = (id: VertexId, data: VD, o: Option[U]) => {
+      o match {
+        case Some(u) => mapFunc(id, data, u)
+        case None => data
+      }
+    }
+    graph.outerJoinVertices(table)(uf)
+  }
+
+  /**
+   * Filter the graph by computing some values to filter on, and applying the predicates.
+   *
+   * @param preprocess a function to compute new vertex and edge data before filtering
+   * @param epred edge pred to filter on after preprocess, see more details under
+   *  [[org.apache.spark.graphx.Graph#subgraph]]
+   * @param vpred vertex pred to filter on after prerocess, see more details under
+   *  [[org.apache.spark.graphx.Graph#subgraph]]
+   * @tparam VD2 vertex type the vpred operates on
+   * @tparam ED2 edge type the epred operates on
+   * @return a subgraph of the orginal graph, with its data unchanged
+   *
+   * @example This function can be used to filter the graph based on some property, without
+   * changing the vertex and edge values in your program. For example, we could remove the vertices
+   * in a graph with 0 outdegree
+   *
+   * {{{
+   * graph.filter(
+   *   graph => {
+   *     val degrees: VertexRDD[Int] = graph.outDegrees
+   *     graph.outerJoinVertices(degrees) {(vid, data, deg) => deg.getOrElse(0)}
+   *   },
+   *   vpred = (vid: VertexId, deg:Int) => deg > 0
+   * )
+   * }}}
+   *
+   */
+  def filter[VD2: ClassTag, ED2: ClassTag](
+      preprocess: Graph[VD, ED] => Graph[VD2, ED2],
+      epred: (EdgeTriplet[VD2, ED2]) => Boolean = (x: EdgeTriplet[VD2, ED2]) => true,
+      vpred: (VertexId, VD2) => Boolean = (v:VertexId, d:VD2) => true): Graph[VD, ED] = {
+    graph.mask(preprocess(graph).subgraph(epred, vpred))
+  }
+
+  /**
+   * Execute a Pregel-like iterative vertex-parallel abstraction.  The
+   * user-defined vertex-program `vprog` is executed in parallel on
+   * each vertex receiving any inbound messages and computing a new
+   * value for the vertex.  The `sendMsg` function is then invoked on
+   * all out-edges and is used to compute an optional message to the
+   * destination vertex. The `mergeMsg` function is a commutative
+   * associative function used to combine messages destined to the
+   * same vertex.
+   *
+   * On the first iteration all vertices receive the `initialMsg` and
+   * on subsequent iterations if a vertex does not receive a message
+   * then the vertex-program is not invoked.
+   *
+   * This function iterates until there are no remaining messages, or
+   * for `maxIterations` iterations.
+   *
+   * @tparam A the Pregel message type
+   *
+   * @param initialMsg the message each vertex will receive at the on
+   * the first iteration
+   *
+   * @param maxIterations the maximum number of iterations to run for
+   *
+   * @param activeDirection the direction of edges incident to a vertex that received a message in
+   * the previous round on which to run `sendMsg`. For example, if this is `EdgeDirection.Out`, only
+   * out-edges of vertices that received a message in the previous round will run.
+   *
+   * @param vprog the user-defined vertex program which runs on each
+   * vertex and receives the inbound message and computes a new vertex
+   * value.  On the first iteration the vertex program is invoked on
+   * all vertices and is passed the default message.  On subsequent
+   * iterations the vertex program is only invoked on those vertices
+   * that receive messages.
+   *
+   * @param sendMsg a user supplied function that is applied to out
+   * edges of vertices that received messages in the current
+   * iteration
+   *
+   * @param mergeMsg a user supplied function that takes two incoming
+   * messages of type A and merges them into a single message of type
+   * A.  ''This function must be commutative and associative and
+   * ideally the size of A should not increase.''
+   *
+   * @return the resulting graph at the end of the computation
+   *
+   */
+  def pregel[A: ClassTag](
+      initialMsg: A,
+      maxIterations: Int = Int.MaxValue,
+      activeDirection: EdgeDirection = EdgeDirection.Either)(
+      vprog: (VertexId, VD, A) => VD,
+      sendMsg: EdgeTriplet[VD, ED] => Iterator[(VertexId,A)],
+      mergeMsg: (A, A) => A)
+    : Graph[VD, ED] = {
+    Pregel(graph, initialMsg, maxIterations, activeDirection)(vprog, sendMsg, mergeMsg)
+  }
+
+  /**
+   * Run a dynamic version of PageRank returning a graph with vertex attributes containing the
+   * PageRank and edge attributes containing the normalized edge weight.
+   *
+   * @see [[org.apache.spark.graphx.lib.PageRank$#runUntilConvergence]]
+   */
+  def pageRank(tol: Double, resetProb: Double = 0.15): Graph[Double, Double] = {
+    PageRank.runUntilConvergence(graph, tol, resetProb)
+  }
+
+  /**
+   * Run PageRank for a fixed number of iterations returning a graph with vertex attributes
+   * containing the PageRank and edge attributes the normalized edge weight.
+   *
+   * @see [[org.apache.spark.graphx.lib.PageRank$#run]]
+   */
+  def staticPageRank(numIter: Int, resetProb: Double = 0.15): Graph[Double, Double] = {
+    PageRank.run(graph, numIter, resetProb)
+  }
+
+  /**
+   * Compute the connected component membership of each vertex and return a graph with the vertex
+   * value containing the lowest vertex id in the connected component containing that vertex.
+   *
+   * @see [[org.apache.spark.graphx.lib.ConnectedComponents$#run]]
+   */
+  def connectedComponents(): Graph[VertexId, ED] = {
+    ConnectedComponents.run(graph)
+  }
+
+  /**
+   * Compute the number of triangles passing through each vertex.
+   *
+   * @see [[org.apache.spark.graphx.lib.TriangleCount$#run]]
+   */
+  def triangleCount(): Graph[Int, ED] = {
+    TriangleCount.run(graph)
+  }
+
+  /**
+   * Compute the strongly connected component (SCC) of each vertex and return a graph with the
+   * vertex value containing the lowest vertex id in the SCC containing that vertex.
+   *
+   * @see [[org.apache.spark.graphx.lib.StronglyConnectedComponents$#run]]
+   */
+  def stronglyConnectedComponents(numIter: Int): Graph[VertexId, ED] = {
+    StronglyConnectedComponents.run(graph, numIter)
+  }
+} // end of GraphOps
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/PartitionStrategy.scala b/graphx/src/main/scala/org/apache/spark/graphx/PartitionStrategy.scala
new file mode 100644
index 0000000000000..929915362c1c9
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/PartitionStrategy.scala
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx
+
+/**
+ * Represents the way edges are assigned to edge partitions based on their source and destination
+ * vertex IDs.
+ */
+trait PartitionStrategy extends Serializable {
+  /** Returns the partition number for a given edge. */
+  def getPartition(src: VertexId, dst: VertexId, numParts: PartitionID): PartitionID
+}
+
+/**
+ * Collection of built-in [[PartitionStrategy]] implementations.
+ */
+object PartitionStrategy {
+  /**
+   * Assigns edges to partitions using a 2D partitioning of the sparse edge adjacency matrix,
+   * guaranteeing a `2 * sqrt(numParts)` bound on vertex replication.
+   *
+   * Suppose we have a graph with 11 vertices that we want to partition
+   * over 9 machines.  We can use the following sparse matrix representation:
+   *
+   * <pre>
+   *       __________________________________
+   *  v0   | P0 *     | P1       | P2    *  |
+   *  v1   |  ****    |  *       |          |
+   *  v2   |  ******* |      **  |  ****    |
+   *  v3   |  *****   |  *  *    |       *  |
+   *       ----------------------------------
+   *  v4   | P3 *     | P4 ***   | P5 **  * |
+   *  v5   |  *  *    |  *       |          |
+   *  v6   |       *  |      **  |  ****    |
+   *  v7   |  * * *   |  *  *    |       *  |
+   *       ----------------------------------
+   *  v8   | P6   *   | P7    *  | P8  *   *|
+   *  v9   |     *    |  *    *  |          |
+   *  v10  |       *  |      **  |  *  *    |
+   *  v11  | * <-E    |  ***     |       ** |
+   *       ----------------------------------
+   * </pre>
+   *
+   * The edge denoted by `E` connects `v11` with `v1` and is assigned to processor `P6`. To get the
+   * processor number we divide the matrix into `sqrt(numParts)` by `sqrt(numParts)` blocks.  Notice
+   * that edges adjacent to `v11` can only be in the first column of blocks `(P0, P3, P6)` or the last
+   * row of blocks `(P6, P7, P8)`.  As a consequence we can guarantee that `v11` will need to be
+   * replicated to at most `2 * sqrt(numParts)` machines.
+   *
+   * Notice that `P0` has many edges and as a consequence this partitioning would lead to poor work
+   * balance.  To improve balance we first multiply each vertex id by a large prime to shuffle the
+   * vertex locations.
+   *
+   * One of the limitations of this approach is that the number of machines must either be a perfect
+   * square. We partially address this limitation by computing the machine assignment to the next
+   * largest perfect square and then mapping back down to the actual number of machines.
+   * Unfortunately, this can also lead to work imbalance and so it is suggested that a perfect square
+   * is used.
+   */
+  case object EdgePartition2D extends PartitionStrategy {
+    override def getPartition(src: VertexId, dst: VertexId, numParts: PartitionID): PartitionID = {
+      val ceilSqrtNumParts: PartitionID = math.ceil(math.sqrt(numParts)).toInt
+      val mixingPrime: VertexId = 1125899906842597L
+      val col: PartitionID = ((math.abs(src) * mixingPrime) % ceilSqrtNumParts).toInt
+      val row: PartitionID = ((math.abs(dst) * mixingPrime) % ceilSqrtNumParts).toInt
+      (col * ceilSqrtNumParts + row) % numParts
+    }
+  }
+
+  /**
+   * Assigns edges to partitions using only the source vertex ID, colocating edges with the same
+   * source.
+   */
+  case object EdgePartition1D extends PartitionStrategy {
+    override def getPartition(src: VertexId, dst: VertexId, numParts: PartitionID): PartitionID = {
+      val mixingPrime: VertexId = 1125899906842597L
+      (math.abs(src) * mixingPrime).toInt % numParts
+    }
+  }
+
+
+  /**
+   * Assigns edges to partitions by hashing the source and destination vertex IDs, resulting in a
+   * random vertex cut that colocates all same-direction edges between two vertices.
+   */
+  case object RandomVertexCut extends PartitionStrategy {
+    override def getPartition(src: VertexId, dst: VertexId, numParts: PartitionID): PartitionID = {
+      math.abs((src, dst).hashCode()) % numParts
+    }
+  }
+
+
+  /**
+   * Assigns edges to partitions by hashing the source and destination vertex IDs in a canonical
+   * direction, resulting in a random vertex cut that colocates all edges between two vertices,
+   * regardless of direction.
+   */
+  case object CanonicalRandomVertexCut extends PartitionStrategy {
+    override def getPartition(src: VertexId, dst: VertexId, numParts: PartitionID): PartitionID = {
+      val lower = math.min(src, dst)
+      val higher = math.max(src, dst)
+      math.abs((lower, higher).hashCode()) % numParts
+    }
+  }
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala b/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala
new file mode 100644
index 0000000000000..ac07a594a12e4
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx
+
+import scala.reflect.ClassTag
+
+
+/**
+ * Implements a Pregel-like bulk-synchronous message-passing API.
+ *
+ * Unlike the original Pregel API, the GraphX Pregel API factors the sendMessage computation over
+ * edges, enables the message sending computation to read both vertex attributes, and constrains
+ * messages to the graph structure.  These changes allow for substantially more efficient
+ * distributed execution while also exposing greater flexibility for graph-based computation.
+ *
+ * @example We can use the Pregel abstraction to implement PageRank:
+ * {{{
+ * val pagerankGraph: Graph[Double, Double] = graph
+ *   // Associate the degree with each vertex
+ *   .outerJoinVertices(graph.outDegrees) {
+ *     (vid, vdata, deg) => deg.getOrElse(0)
+ *   }
+ *   // Set the weight on the edges based on the degree
+ *   .mapTriplets(e => 1.0 / e.srcAttr)
+ *   // Set the vertex attributes to the initial pagerank values
+ *   .mapVertices((id, attr) => 1.0)
+ *
+ * def vertexProgram(id: VertexId, attr: Double, msgSum: Double): Double =
+ *   resetProb + (1.0 - resetProb) * msgSum
+ * def sendMessage(id: VertexId, edge: EdgeTriplet[Double, Double]): Iterator[(VertexId, Double)] =
+ *   Iterator((edge.dstId, edge.srcAttr * edge.attr))
+ * def messageCombiner(a: Double, b: Double): Double = a + b
+ * val initialMessage = 0.0
+ * // Execute Pregel for a fixed number of iterations.
+ * Pregel(pagerankGraph, initialMessage, numIter)(
+ *   vertexProgram, sendMessage, messageCombiner)
+ * }}}
+ *
+ */
+object Pregel {
+
+  /**
+   * Execute a Pregel-like iterative vertex-parallel abstraction.  The
+   * user-defined vertex-program `vprog` is executed in parallel on
+   * each vertex receiving any inbound messages and computing a new
+   * value for the vertex.  The `sendMsg` function is then invoked on
+   * all out-edges and is used to compute an optional message to the
+   * destination vertex. The `mergeMsg` function is a commutative
+   * associative function used to combine messages destined to the
+   * same vertex.
+   *
+   * On the first iteration all vertices receive the `initialMsg` and
+   * on subsequent iterations if a vertex does not receive a message
+   * then the vertex-program is not invoked.
+   *
+   * This function iterates until there are no remaining messages, or
+   * for `maxIterations` iterations.
+   *
+   * @tparam VD the vertex data type
+   * @tparam ED the edge data type
+   * @tparam A the Pregel message type
+   *
+   * @param graph the input graph.
+   *
+   * @param initialMsg the message each vertex will receive at the on
+   * the first iteration
+   *
+   * @param maxIterations the maximum number of iterations to run for
+   *
+   * @param activeDirection the direction of edges incident to a vertex that received a message in
+   * the previous round on which to run `sendMsg`. For example, if this is `EdgeDirection.Out`, only
+   * out-edges of vertices that received a message in the previous round will run. The default is
+   * `EdgeDirection.Either`, which will run `sendMsg` on edges where either side received a message
+   * in the previous round. If this is `EdgeDirection.Both`, `sendMsg` will only run on edges where
+   * *both* vertices received a message.
+   *
+   * @param vprog the user-defined vertex program which runs on each
+   * vertex and receives the inbound message and computes a new vertex
+   * value.  On the first iteration the vertex program is invoked on
+   * all vertices and is passed the default message.  On subsequent
+   * iterations the vertex program is only invoked on those vertices
+   * that receive messages.
+   *
+   * @param sendMsg a user supplied function that is applied to out
+   * edges of vertices that received messages in the current
+   * iteration
+   *
+   * @param mergeMsg a user supplied function that takes two incoming
+   * messages of type A and merges them into a single message of type
+   * A.  ''This function must be commutative and associative and
+   * ideally the size of A should not increase.''
+   *
+   * @return the resulting graph at the end of the computation
+   *
+   */
+  def apply[VD: ClassTag, ED: ClassTag, A: ClassTag]
+     (graph: Graph[VD, ED],
+      initialMsg: A,
+      maxIterations: Int = Int.MaxValue,
+      activeDirection: EdgeDirection = EdgeDirection.Either)
+     (vprog: (VertexId, VD, A) => VD,
+      sendMsg: EdgeTriplet[VD, ED] => Iterator[(VertexId, A)],
+      mergeMsg: (A, A) => A)
+    : Graph[VD, ED] =
+  {
+    var g = graph.mapVertices((vid, vdata) => vprog(vid, vdata, initialMsg)).cache()
+    // compute the messages
+    var messages = g.mapReduceTriplets(sendMsg, mergeMsg)
+    var activeMessages = messages.count()
+    // Loop
+    var prevG: Graph[VD, ED] = null
+    var i = 0
+    while (activeMessages > 0 && i < maxIterations) {
+      // Receive the messages. Vertices that didn't get any messages do not appear in newVerts.
+      val newVerts = g.vertices.innerJoin(messages)(vprog).cache()
+      // Update the graph with the new vertices.
+      prevG = g
+      g = g.outerJoinVertices(newVerts) { (vid, old, newOpt) => newOpt.getOrElse(old) }
+      g.cache()
+
+      val oldMessages = messages
+      // Send new messages. Vertices that didn't get any messages don't appear in newVerts, so don't
+      // get to send messages. We must cache messages so it can be materialized on the next line,
+      // allowing us to uncache the previous iteration.
+      messages = g.mapReduceTriplets(sendMsg, mergeMsg, Some((newVerts, activeDirection))).cache()
+      // The call to count() materializes `messages`, `newVerts`, and the vertices of `g`. This
+      // hides oldMessages (depended on by newVerts), newVerts (depended on by messages), and the
+      // vertices of prevG (depended on by newVerts, oldMessages, and the vertices of g).
+      activeMessages = messages.count()
+      // Unpersist the RDDs hidden by newly-materialized RDDs
+      oldMessages.unpersist(blocking=false)
+      newVerts.unpersist(blocking=false)
+      prevG.unpersistVertices(blocking=false)
+      // count the iteration
+      i += 1
+    }
+
+    g
+  } // end of apply
+
+} // end of class Pregel
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/VertexRDD.scala b/graphx/src/main/scala/org/apache/spark/graphx/VertexRDD.scala
new file mode 100644
index 0000000000000..edd59bcf32943
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/VertexRDD.scala
@@ -0,0 +1,347 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx
+
+import scala.reflect.ClassTag
+
+import org.apache.spark._
+import org.apache.spark.SparkContext._
+import org.apache.spark.rdd._
+import org.apache.spark.storage.StorageLevel
+
+import org.apache.spark.graphx.impl.MsgRDDFunctions
+import org.apache.spark.graphx.impl.VertexPartition
+
+/**
+ * Extends `RDD[(VertexId, VD)]` by ensuring that there is only one entry for each vertex and by
+ * pre-indexing the entries for fast, efficient joins. Two VertexRDDs with the same index can be
+ * joined efficiently. All operations except [[reindex]] preserve the index. To construct a
+ * `VertexRDD`, use the [[org.apache.spark.graphx.VertexRDD$ VertexRDD object]].
+ *
+ * @example Construct a `VertexRDD` from a plain RDD:
+ * {{{
+ * // Construct an initial vertex set
+ * val someData: RDD[(VertexId, SomeType)] = loadData(someFile)
+ * val vset = VertexRDD(someData)
+ * // If there were redundant values in someData we would use a reduceFunc
+ * val vset2 = VertexRDD(someData, reduceFunc)
+ * // Finally we can use the VertexRDD to index another dataset
+ * val otherData: RDD[(VertexId, OtherType)] = loadData(otherFile)
+ * val vset3 = vset2.innerJoin(otherData) { (vid, a, b) => b }
+ * // Now we can construct very fast joins between the two sets
+ * val vset4: VertexRDD[(SomeType, OtherType)] = vset.leftJoin(vset3)
+ * }}}
+ *
+ * @tparam VD the vertex attribute associated with each vertex in the set.
+ */
+class VertexRDD[@specialized VD: ClassTag](
+    val partitionsRDD: RDD[VertexPartition[VD]])
+  extends RDD[(VertexId, VD)](partitionsRDD.context, List(new OneToOneDependency(partitionsRDD))) {
+
+  require(partitionsRDD.partitioner.isDefined)
+
+  partitionsRDD.setName("VertexRDD")
+
+  /**
+   * Construct a new VertexRDD that is indexed by only the visible vertices. The resulting
+   * VertexRDD will be based on a different index and can no longer be quickly joined with this RDD.
+   */
+  def reindex(): VertexRDD[VD] = new VertexRDD(partitionsRDD.map(_.reindex()))
+
+  override val partitioner = partitionsRDD.partitioner
+
+  override protected def getPartitions: Array[Partition] = partitionsRDD.partitions
+
+  override protected def getPreferredLocations(s: Partition): Seq[String] =
+    partitionsRDD.preferredLocations(s)
+
+  override def persist(newLevel: StorageLevel): VertexRDD[VD] = {
+    partitionsRDD.persist(newLevel)
+    this
+  }
+
+  /** Persist this RDD with the default storage level (`MEMORY_ONLY`). */
+  override def persist(): VertexRDD[VD] = persist(StorageLevel.MEMORY_ONLY)
+
+  /** Persist this RDD with the default storage level (`MEMORY_ONLY`). */
+  override def cache(): VertexRDD[VD] = persist()
+
+  override def unpersist(blocking: Boolean = true): VertexRDD[VD] = {
+    partitionsRDD.unpersist(blocking)
+    this
+  }
+
+  /** The number of vertices in the RDD. */
+  override def count(): Long = {
+    partitionsRDD.map(_.size).reduce(_ + _)
+  }
+
+  /**
+   * Provides the `RDD[(VertexId, VD)]` equivalent output.
+   */
+  override def compute(part: Partition, context: TaskContext): Iterator[(VertexId, VD)] = {
+    firstParent[VertexPartition[VD]].iterator(part, context).next.iterator
+  }
+
+  /**
+   * Applies a function to each `VertexPartition` of this RDD and returns a new VertexRDD.
+   */
+  private[graphx] def mapVertexPartitions[VD2: ClassTag](f: VertexPartition[VD] => VertexPartition[VD2])
+    : VertexRDD[VD2] = {
+    val newPartitionsRDD = partitionsRDD.mapPartitions(_.map(f), preservesPartitioning = true)
+    new VertexRDD(newPartitionsRDD)
+  }
+
+
+  /**
+   * Restricts the vertex set to the set of vertices satisfying the given predicate. This operation
+   * preserves the index for efficient joins with the original RDD, and it sets bits in the bitmask
+   * rather than allocating new memory.
+   *
+   * @param pred the user defined predicate, which takes a tuple to conform to the
+   * `RDD[(VertexId, VD)]` interface
+   */
+  override def filter(pred: Tuple2[VertexId, VD] => Boolean): VertexRDD[VD] =
+    this.mapVertexPartitions(_.filter(Function.untupled(pred)))
+
+  /**
+   * Maps each vertex attribute, preserving the index.
+   *
+   * @tparam VD2 the type returned by the map function
+   *
+   * @param f the function applied to each value in the RDD
+   * @return a new VertexRDD with values obtained by applying `f` to each of the entries in the
+   * original VertexRDD
+   */
+  def mapValues[VD2: ClassTag](f: VD => VD2): VertexRDD[VD2] =
+    this.mapVertexPartitions(_.map((vid, attr) => f(attr)))
+
+  /**
+   * Maps each vertex attribute, additionally supplying the vertex ID.
+   *
+   * @tparam VD2 the type returned by the map function
+   *
+   * @param f the function applied to each ID-value pair in the RDD
+   * @return a new VertexRDD with values obtained by applying `f` to each of the entries in the
+   * original VertexRDD.  The resulting VertexRDD retains the same index.
+   */
+  def mapValues[VD2: ClassTag](f: (VertexId, VD) => VD2): VertexRDD[VD2] =
+    this.mapVertexPartitions(_.map(f))
+
+  /**
+   * Hides vertices that are the same between `this` and `other`; for vertices that are different,
+   * keeps the values from `other`.
+   */
+  def diff(other: VertexRDD[VD]): VertexRDD[VD] = {
+    val newPartitionsRDD = partitionsRDD.zipPartitions(
+      other.partitionsRDD, preservesPartitioning = true
+    ) { (thisIter, otherIter) =>
+      val thisPart = thisIter.next()
+      val otherPart = otherIter.next()
+      Iterator(thisPart.diff(otherPart))
+    }
+    new VertexRDD(newPartitionsRDD)
+  }
+
+  /**
+   * Left joins this RDD with another VertexRDD with the same index. This function will fail if both
+   * VertexRDDs do not share the same index. The resulting vertex set contains an entry for each
+   * vertex in `this`. If `other` is missing any vertex in this VertexRDD, `f` is passed `None`.
+   *
+   * @tparam VD2 the attribute type of the other VertexRDD
+   * @tparam VD3 the attribute type of the resulting VertexRDD
+   *
+   * @param other the other VertexRDD with which to join.
+   * @param f the function mapping a vertex id and its attributes in this and the other vertex set
+   * to a new vertex attribute.
+   * @return a VertexRDD containing the results of `f`
+   */
+  def leftZipJoin[VD2: ClassTag, VD3: ClassTag]
+      (other: VertexRDD[VD2])(f: (VertexId, VD, Option[VD2]) => VD3): VertexRDD[VD3] = {
+    val newPartitionsRDD = partitionsRDD.zipPartitions(
+      other.partitionsRDD, preservesPartitioning = true
+    ) { (thisIter, otherIter) =>
+      val thisPart = thisIter.next()
+      val otherPart = otherIter.next()
+      Iterator(thisPart.leftJoin(otherPart)(f))
+    }
+    new VertexRDD(newPartitionsRDD)
+  }
+
+  /**
+   * Left joins this VertexRDD with an RDD containing vertex attribute pairs. If the other RDD is
+   * backed by a VertexRDD with the same index then the efficient [[leftZipJoin]] implementation is
+   * used. The resulting VertexRDD contains an entry for each vertex in `this`. If `other` is
+   * missing any vertex in this VertexRDD, `f` is passed `None`. If there are duplicates, the vertex
+   * is picked arbitrarily.
+   *
+   * @tparam VD2 the attribute type of the other VertexRDD
+   * @tparam VD3 the attribute type of the resulting VertexRDD
+   *
+   * @param other the other VertexRDD with which to join
+   * @param f the function mapping a vertex id and its attributes in this and the other vertex set
+   * to a new vertex attribute.
+   * @return a VertexRDD containing all the vertices in this VertexRDD with the attributes emitted
+   * by `f`.
+   */
+  def leftJoin[VD2: ClassTag, VD3: ClassTag]
+      (other: RDD[(VertexId, VD2)])
+      (f: (VertexId, VD, Option[VD2]) => VD3)
+    : VertexRDD[VD3] = {
+    // Test if the other vertex is a VertexRDD to choose the optimal join strategy.
+    // If the other set is a VertexRDD then we use the much more efficient leftZipJoin
+    other match {
+      case other: VertexRDD[_] =>
+        leftZipJoin(other)(f)
+      case _ =>
+        new VertexRDD[VD3](
+          partitionsRDD.zipPartitions(
+            other.partitionBy(this.partitioner.get), preservesPartitioning = true)
+          { (part, msgs) =>
+            val vertexPartition: VertexPartition[VD] = part.next()
+            Iterator(vertexPartition.leftJoin(msgs)(f))
+          }
+        )
+    }
+  }
+
+  /**
+   * Efficiently inner joins this VertexRDD with another VertexRDD sharing the same index. See
+   * [[innerJoin]] for the behavior of the join.
+   */
+  def innerZipJoin[U: ClassTag, VD2: ClassTag](other: VertexRDD[U])
+      (f: (VertexId, VD, U) => VD2): VertexRDD[VD2] = {
+    val newPartitionsRDD = partitionsRDD.zipPartitions(
+      other.partitionsRDD, preservesPartitioning = true
+    ) { (thisIter, otherIter) =>
+      val thisPart = thisIter.next()
+      val otherPart = otherIter.next()
+      Iterator(thisPart.innerJoin(otherPart)(f))
+    }
+    new VertexRDD(newPartitionsRDD)
+  }
+
+  /**
+   * Inner joins this VertexRDD with an RDD containing vertex attribute pairs. If the other RDD is
+   * backed by a VertexRDD with the same index then the efficient [[innerZipJoin]] implementation is
+   * used.
+   *
+   * @param other an RDD containing vertices to join. If there are multiple entries for the same
+   * vertex, one is picked arbitrarily. Use [[aggregateUsingIndex]] to merge multiple entries.
+   * @param f the join function applied to corresponding values of `this` and `other`
+   * @return a VertexRDD co-indexed with `this`, containing only vertices that appear in both `this`
+   * and `other`, with values supplied by `f`
+   */
+  def innerJoin[U: ClassTag, VD2: ClassTag](other: RDD[(VertexId, U)])
+      (f: (VertexId, VD, U) => VD2): VertexRDD[VD2] = {
+    // Test if the other vertex is a VertexRDD to choose the optimal join strategy.
+    // If the other set is a VertexRDD then we use the much more efficient innerZipJoin
+    other match {
+      case other: VertexRDD[_] =>
+        innerZipJoin(other)(f)
+      case _ =>
+        new VertexRDD(
+          partitionsRDD.zipPartitions(
+            other.partitionBy(this.partitioner.get), preservesPartitioning = true)
+          { (part, msgs) =>
+            val vertexPartition: VertexPartition[VD] = part.next()
+            Iterator(vertexPartition.innerJoin(msgs)(f))
+          }
+        )
+    }
+  }
+
+  /**
+   * Aggregates vertices in `messages` that have the same ids using `reduceFunc`, returning a
+   * VertexRDD co-indexed with `this`.
+   *
+   * @param messages an RDD containing messages to aggregate, where each message is a pair of its
+   * target vertex ID and the message data
+   * @param reduceFunc the associative aggregation function for merging messages to the same vertex
+   * @return a VertexRDD co-indexed with `this`, containing only vertices that received messages.
+   * For those vertices, their values are the result of applying `reduceFunc` to all received
+   * messages.
+   */
+  def aggregateUsingIndex[VD2: ClassTag](
+      messages: RDD[(VertexId, VD2)], reduceFunc: (VD2, VD2) => VD2): VertexRDD[VD2] = {
+    val shuffled = MsgRDDFunctions.partitionForAggregation(messages, this.partitioner.get)
+    val parts = partitionsRDD.zipPartitions(shuffled, true) { (thisIter, msgIter) =>
+      val vertexPartition: VertexPartition[VD] = thisIter.next()
+      Iterator(vertexPartition.aggregateUsingIndex(msgIter, reduceFunc))
+    }
+    new VertexRDD[VD2](parts)
+  }
+
+} // end of VertexRDD
+
+
+/**
+ * The VertexRDD singleton is used to construct VertexRDDs.
+ */
+object VertexRDD {
+
+  /**
+   * Construct a `VertexRDD` from an RDD of vertex-attribute pairs.
+   * Duplicate entries are removed arbitrarily.
+   *
+   * @tparam VD the vertex attribute type
+   *
+   * @param rdd the collection of vertex-attribute pairs
+   */
+  def apply[VD: ClassTag](rdd: RDD[(VertexId, VD)]): VertexRDD[VD] = {
+    val partitioned: RDD[(VertexId, VD)] = rdd.partitioner match {
+      case Some(p) => rdd
+      case None => rdd.partitionBy(new HashPartitioner(rdd.partitions.size))
+    }
+    val vertexPartitions = partitioned.mapPartitions(
+      iter => Iterator(VertexPartition(iter)),
+      preservesPartitioning = true)
+    new VertexRDD(vertexPartitions)
+  }
+
+  /**
+   * Constructs a `VertexRDD` from an RDD of vertex-attribute pairs, merging duplicates using
+   * `mergeFunc`.
+   *
+   * @tparam VD the vertex attribute type
+   *
+   * @param rdd the collection of vertex-attribute pairs
+   * @param mergeFunc the associative, commutative merge function.
+   */
+  def apply[VD: ClassTag](rdd: RDD[(VertexId, VD)], mergeFunc: (VD, VD) => VD): VertexRDD[VD] = {
+    val partitioned: RDD[(VertexId, VD)] = rdd.partitioner match {
+      case Some(p) => rdd
+      case None => rdd.partitionBy(new HashPartitioner(rdd.partitions.size))
+    }
+    val vertexPartitions = partitioned.mapPartitions(
+      iter => Iterator(VertexPartition(iter)),
+      preservesPartitioning = true)
+    new VertexRDD(vertexPartitions)
+  }
+
+  /**
+   * Constructs a VertexRDD from the vertex IDs in `vids`, taking attributes from `rdd` and using
+   * `defaultVal` otherwise.
+   */
+  def apply[VD: ClassTag](vids: RDD[VertexId], rdd: RDD[(VertexId, VD)], defaultVal: VD)
+    : VertexRDD[VD] = {
+    VertexRDD(vids.map(vid => (vid, defaultVal))).leftJoin(rdd) { (vid, default, value) =>
+      value.getOrElse(default)
+    }
+  }
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala
new file mode 100644
index 0000000000000..57fa5eefd5e09
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala
@@ -0,0 +1,237 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx.impl
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.graphx._
+import org.apache.spark.graphx.util.collection.PrimitiveKeyOpenHashMap
+
+/**
+ * A collection of edges stored in 3 large columnar arrays (src, dst, attribute). The arrays are
+ * clustered by src.
+ *
+ * @param srcIds the source vertex id of each edge
+ * @param dstIds the destination vertex id of each edge
+ * @param data the attribute associated with each edge
+ * @param index a clustered index on source vertex id
+ * @tparam ED the edge attribute type.
+ */
+private[graphx]
+class EdgePartition[@specialized(Char, Int, Boolean, Byte, Long, Float, Double) ED: ClassTag](
+    val srcIds: Array[VertexId],
+    val dstIds: Array[VertexId],
+    val data: Array[ED],
+    val index: PrimitiveKeyOpenHashMap[VertexId, Int]) extends Serializable {
+
+  /**
+   * Reverse all the edges in this partition.
+   *
+   * @return a new edge partition with all edges reversed.
+   */
+  def reverse: EdgePartition[ED] = {
+    val builder = new EdgePartitionBuilder(size)
+    for (e <- iterator) {
+      builder.add(e.dstId, e.srcId, e.attr)
+    }
+    builder.toEdgePartition
+  }
+
+  /**
+   * Construct a new edge partition by applying the function f to all
+   * edges in this partition.
+   *
+   * @param f a function from an edge to a new attribute
+   * @tparam ED2 the type of the new attribute
+   * @return a new edge partition with the result of the function `f`
+   *         applied to each edge
+   */
+  def map[ED2: ClassTag](f: Edge[ED] => ED2): EdgePartition[ED2] = {
+    val newData = new Array[ED2](data.size)
+    val edge = new Edge[ED]()
+    val size = data.size
+    var i = 0
+    while (i < size) {
+      edge.srcId  = srcIds(i)
+      edge.dstId  = dstIds(i)
+      edge.attr = data(i)
+      newData(i) = f(edge)
+      i += 1
+    }
+    new EdgePartition(srcIds, dstIds, newData, index)
+  }
+
+  /**
+   * Construct a new edge partition by using the edge attributes
+   * contained in the iterator.
+   *
+   * @note The input iterator should return edge attributes in the
+   * order of the edges returned by `EdgePartition.iterator` and
+   * should return attributes equal to the number of edges.
+   *
+   * @param f a function from an edge to a new attribute
+   * @tparam ED2 the type of the new attribute
+   * @return a new edge partition with the result of the function `f`
+   *         applied to each edge
+   */
+  def map[ED2: ClassTag](iter: Iterator[ED2]): EdgePartition[ED2] = {
+    val newData = new Array[ED2](data.size)
+    var i = 0
+    while (iter.hasNext) {
+      newData(i) = iter.next()
+      i += 1
+    }
+    assert(newData.size == i)
+    new EdgePartition(srcIds, dstIds, newData, index)
+  }
+
+  /**
+   * Apply the function f to all edges in this partition.
+   *
+   * @param f an external state mutating user defined function.
+   */
+  def foreach(f: Edge[ED] => Unit) {
+    iterator.foreach(f)
+  }
+
+  /**
+   * Merge all the edges with the same src and dest id into a single
+   * edge using the `merge` function
+   *
+   * @param merge a commutative associative merge operation
+   * @return a new edge partition without duplicate edges
+   */
+  def groupEdges(merge: (ED, ED) => ED): EdgePartition[ED] = {
+    val builder = new EdgePartitionBuilder[ED]
+    var currSrcId: VertexId = null.asInstanceOf[VertexId]
+    var currDstId: VertexId = null.asInstanceOf[VertexId]
+    var currAttr: ED = null.asInstanceOf[ED]
+    var i = 0
+    while (i < size) {
+      if (i > 0 && currSrcId == srcIds(i) && currDstId == dstIds(i)) {
+        currAttr = merge(currAttr, data(i))
+      } else {
+        if (i > 0) {
+          builder.add(currSrcId, currDstId, currAttr)
+        }
+        currSrcId = srcIds(i)
+        currDstId = dstIds(i)
+        currAttr = data(i)
+      }
+      i += 1
+    }
+    if (size > 0) {
+      builder.add(currSrcId, currDstId, currAttr)
+    }
+    builder.toEdgePartition
+  }
+
+  /**
+   * Apply `f` to all edges present in both `this` and `other` and return a new EdgePartition
+   * containing the resulting edges.
+   *
+   * If there are multiple edges with the same src and dst in `this`, `f` will be invoked once for
+   * each edge, but each time it may be invoked on any corresponding edge in `other`.
+   *
+   * If there are multiple edges with the same src and dst in `other`, `f` will only be invoked
+   * once.
+   */
+  def innerJoin[ED2: ClassTag, ED3: ClassTag]
+      (other: EdgePartition[ED2])
+      (f: (VertexId, VertexId, ED, ED2) => ED3): EdgePartition[ED3] = {
+    val builder = new EdgePartitionBuilder[ED3]
+    var i = 0
+    var j = 0
+    // For i = index of each edge in `this`...
+    while (i < size && j < other.size) {
+      val srcId = this.srcIds(i)
+      val dstId = this.dstIds(i)
+      // ... forward j to the index of the corresponding edge in `other`, and...
+      while (j < other.size && other.srcIds(j) < srcId) { j += 1 }
+      if (j < other.size && other.srcIds(j) == srcId) {
+        while (j < other.size && other.srcIds(j) == srcId && other.dstIds(j) < dstId) { j += 1 }
+        if (j < other.size && other.srcIds(j) == srcId && other.dstIds(j) == dstId) {
+          // ... run `f` on the matching edge
+          builder.add(srcId, dstId, f(srcId, dstId, this.data(i), other.data(j)))
+        }
+      }
+      i += 1
+    }
+    builder.toEdgePartition
+  }
+
+  /**
+   * The number of edges in this partition
+   *
+   * @return size of the partition
+   */
+  def size: Int = srcIds.size
+
+  /** The number of unique source vertices in the partition. */
+  def indexSize: Int = index.size
+
+  /**
+   * Get an iterator over the edges in this partition.
+   *
+   * @return an iterator over edges in the partition
+   */
+  def iterator = new Iterator[Edge[ED]] {
+    private[this] val edge = new Edge[ED]
+    private[this] var pos = 0
+
+    override def hasNext: Boolean = pos < EdgePartition.this.size
+
+    override def next(): Edge[ED] = {
+      edge.srcId = srcIds(pos)
+      edge.dstId = dstIds(pos)
+      edge.attr = data(pos)
+      pos += 1
+      edge
+    }
+  }
+
+  /**
+   * Get an iterator over the edges in this partition whose source vertex ids match srcIdPred. The
+   * iterator is generated using an index scan, so it is efficient at skipping edges that don't
+   * match srcIdPred.
+   */
+  def indexIterator(srcIdPred: VertexId => Boolean): Iterator[Edge[ED]] =
+    index.iterator.filter(kv => srcIdPred(kv._1)).flatMap(Function.tupled(clusterIterator))
+
+  /**
+   * Get an iterator over the cluster of edges in this partition with source vertex id `srcId`. The
+   * cluster must start at position `index`.
+   */
+  private def clusterIterator(srcId: VertexId, index: Int) = new Iterator[Edge[ED]] {
+    private[this] val edge = new Edge[ED]
+    private[this] var pos = index
+
+    override def hasNext: Boolean = {
+      pos >= 0 && pos < EdgePartition.this.size && srcIds(pos) == srcId
+    }
+
+    override def next(): Edge[ED] = {
+      assert(srcIds(pos) == srcId)
+      edge.srcId = srcIds(pos)
+      edge.dstId = dstIds(pos)
+      edge.attr = data(pos)
+      pos += 1
+      edge
+    }
+  }
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala
new file mode 100644
index 0000000000000..63ccccb056b48
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx.impl
+
+import scala.reflect.ClassTag
+import scala.util.Sorting
+
+import org.apache.spark.graphx._
+import org.apache.spark.graphx.util.collection.PrimitiveKeyOpenHashMap
+import org.apache.spark.util.collection.PrimitiveVector
+
+private[graphx]
+class EdgePartitionBuilder[@specialized(Long, Int, Double) ED: ClassTag](size: Int = 64) {
+  var edges = new PrimitiveVector[Edge[ED]](size)
+
+  /** Add a new edge to the partition. */
+  def add(src: VertexId, dst: VertexId, d: ED) {
+    edges += Edge(src, dst, d)
+  }
+
+  def toEdgePartition: EdgePartition[ED] = {
+    val edgeArray = edges.trim().array
+    Sorting.quickSort(edgeArray)(Edge.lexicographicOrdering)
+    val srcIds = new Array[VertexId](edgeArray.size)
+    val dstIds = new Array[VertexId](edgeArray.size)
+    val data = new Array[ED](edgeArray.size)
+    val index = new PrimitiveKeyOpenHashMap[VertexId, Int]
+    // Copy edges into columnar structures, tracking the beginnings of source vertex id clusters and
+    // adding them to the index
+    if (edgeArray.length > 0) {
+      index.update(srcIds(0), 0)
+      var currSrcId: VertexId = srcIds(0)
+      var i = 0
+      while (i < edgeArray.size) {
+        srcIds(i) = edgeArray(i).srcId
+        dstIds(i) = edgeArray(i).dstId
+        data(i) = edgeArray(i).attr
+        if (edgeArray(i).srcId != currSrcId) {
+          currSrcId = edgeArray(i).srcId
+          index.update(currSrcId, i)
+        }
+        i += 1
+      }
+    }
+    new EdgePartition(srcIds, dstIds, data, index)
+  }
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeTripletIterator.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeTripletIterator.scala
new file mode 100644
index 0000000000000..886c250d7cffd
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeTripletIterator.scala
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx.impl
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.graphx._
+import org.apache.spark.graphx.util.collection.PrimitiveKeyOpenHashMap
+
+/**
+ * The Iterator type returned when constructing edge triplets. This class technically could be
+ * an anonymous class in GraphImpl.triplets, but we name it here explicitly so it is easier to
+ * debug / profile.
+ */
+private[impl]
+class EdgeTripletIterator[VD: ClassTag, ED: ClassTag](
+    val vidToIndex: VertexIdToIndexMap,
+    val vertexArray: Array[VD],
+    val edgePartition: EdgePartition[ED])
+  extends Iterator[EdgeTriplet[VD, ED]] {
+
+  // Current position in the array.
+  private var pos = 0
+
+  // A triplet object that this iterator.next() call returns. We reuse this object to avoid
+  // allocating too many temporary Java objects.
+  private val triplet = new EdgeTriplet[VD, ED]
+
+  private val vmap = new PrimitiveKeyOpenHashMap[VertexId, VD](vidToIndex, vertexArray)
+
+  override def hasNext: Boolean = pos < edgePartition.size
+
+  override def next() = {
+    triplet.srcId = edgePartition.srcIds(pos)
+    // assert(vmap.containsKey(e.src.id))
+    triplet.srcAttr = vmap(triplet.srcId)
+    triplet.dstId = edgePartition.dstIds(pos)
+    // assert(vmap.containsKey(e.dst.id))
+    triplet.dstAttr = vmap(triplet.dstId)
+    triplet.attr = edgePartition.data(pos)
+    pos += 1
+    triplet
+  }
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
new file mode 100644
index 0000000000000..1d029bf009e8c
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
@@ -0,0 +1,396 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx.impl
+
+import scala.reflect.{classTag, ClassTag}
+
+import org.apache.spark.util.collection.PrimitiveVector
+import org.apache.spark.{HashPartitioner, Partitioner}
+import org.apache.spark.SparkContext._
+import org.apache.spark.graphx._
+import org.apache.spark.graphx.impl.GraphImpl._
+import org.apache.spark.graphx.impl.MsgRDDFunctions._
+import org.apache.spark.graphx.util.BytecodeUtils
+import org.apache.spark.rdd.{ShuffledRDD, RDD}
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.ClosureCleaner
+
+
+/**
+ * A graph that supports computation on graphs.
+ *
+ * Graphs are represented using two classes of data: vertex-partitioned and
+ * edge-partitioned. `vertices` contains vertex attributes, which are vertex-partitioned. `edges`
+ * contains edge attributes, which are edge-partitioned. For operations on vertex neighborhoods,
+ * vertex attributes are replicated to the edge partitions where they appear as sources or
+ * destinations. `routingTable` stores the routing information for shipping vertex attributes to
+ * edge partitions. `replicatedVertexView` stores a view of the replicated vertex attributes created
+ * using the routing table.
+ */
+class GraphImpl[VD: ClassTag, ED: ClassTag] protected (
+    @transient val vertices: VertexRDD[VD],
+    @transient val edges: EdgeRDD[ED],
+    @transient val routingTable: RoutingTable,
+    @transient val replicatedVertexView: ReplicatedVertexView[VD])
+  extends Graph[VD, ED] with Serializable {
+
+  /** Default constructor is provided to support serialization */
+  protected def this() = this(null, null, null, null)
+
+  /** Return a RDD that brings edges together with their source and destination vertices. */
+  @transient override val triplets: RDD[EdgeTriplet[VD, ED]] = {
+    val vdTag = classTag[VD]
+    val edTag = classTag[ED]
+    edges.partitionsRDD.zipPartitions(
+      replicatedVertexView.get(true, true), true) { (ePartIter, vPartIter) =>
+      val (pid, ePart) = ePartIter.next()
+      val (_, vPart) = vPartIter.next()
+      new EdgeTripletIterator(vPart.index, vPart.values, ePart)(vdTag, edTag)
+    }
+  }
+
+  override def persist(newLevel: StorageLevel): Graph[VD, ED] = {
+    vertices.persist(newLevel)
+    edges.persist(newLevel)
+    this
+  }
+
+  override def cache(): Graph[VD, ED] = persist(StorageLevel.MEMORY_ONLY)
+
+  override def unpersistVertices(blocking: Boolean = true): Graph[VD, ED] = {
+    vertices.unpersist(blocking)
+    replicatedVertexView.unpersist(blocking)
+    this
+  }
+
+  override def partitionBy(partitionStrategy: PartitionStrategy): Graph[VD, ED] = {
+    val numPartitions = edges.partitions.size
+    val edTag = classTag[ED]
+    val newEdges = new EdgeRDD(edges.map { e =>
+      val part: PartitionID = partitionStrategy.getPartition(e.srcId, e.dstId, numPartitions)
+
+      // Should we be using 3-tuple or an optimized class
+      new MessageToPartition(part, (e.srcId, e.dstId, e.attr))
+    }
+      .partitionBy(new HashPartitioner(numPartitions))
+      .mapPartitionsWithIndex( { (pid, iter) =>
+        val builder = new EdgePartitionBuilder[ED]()(edTag)
+        iter.foreach { message =>
+          val data = message.data
+          builder.add(data._1, data._2, data._3)
+        }
+        val edgePartition = builder.toEdgePartition
+        Iterator((pid, edgePartition))
+      }, preservesPartitioning = true).cache())
+    GraphImpl(vertices, newEdges)
+  }
+
+  override def reverse: Graph[VD, ED] = {
+    val newETable = edges.mapEdgePartitions((pid, part) => part.reverse)
+    new GraphImpl(vertices, newETable, routingTable, replicatedVertexView)
+  }
+
+  override def mapVertices[VD2: ClassTag](f: (VertexId, VD) => VD2): Graph[VD2, ED] = {
+    if (classTag[VD] equals classTag[VD2]) {
+      // The map preserves type, so we can use incremental replication
+      val newVerts = vertices.mapVertexPartitions(_.map(f)).cache()
+      val changedVerts = vertices.asInstanceOf[VertexRDD[VD2]].diff(newVerts)
+      val newReplicatedVertexView = new ReplicatedVertexView[VD2](
+        changedVerts, edges, routingTable,
+        Some(replicatedVertexView.asInstanceOf[ReplicatedVertexView[VD2]]))
+      new GraphImpl(newVerts, edges, routingTable, newReplicatedVertexView)
+    } else {
+      // The map does not preserve type, so we must re-replicate all vertices
+      GraphImpl(vertices.mapVertexPartitions(_.map(f)), edges, routingTable)
+    }
+  }
+
+  override def mapEdges[ED2: ClassTag](
+      f: (PartitionID, Iterator[Edge[ED]]) => Iterator[ED2]): Graph[VD, ED2] = {
+    val newETable = edges.mapEdgePartitions((pid, part) => part.map(f(pid, part.iterator)))
+    new GraphImpl(vertices, newETable , routingTable, replicatedVertexView)
+  }
+
+  override def mapTriplets[ED2: ClassTag](
+      f: (PartitionID, Iterator[EdgeTriplet[VD, ED]]) => Iterator[ED2]): Graph[VD, ED2] = {
+    val newEdgePartitions =
+      edges.partitionsRDD.zipPartitions(replicatedVertexView.get(true, true), true) {
+        (ePartIter, vTableReplicatedIter) =>
+        val (ePid, edgePartition) = ePartIter.next()
+        val (vPid, vPart) = vTableReplicatedIter.next()
+        assert(!vTableReplicatedIter.hasNext)
+        assert(ePid == vPid)
+        val et = new EdgeTriplet[VD, ED]
+        val inputIterator = edgePartition.iterator.map { e =>
+          et.set(e)
+          et.srcAttr = vPart(e.srcId)
+          et.dstAttr = vPart(e.dstId)
+          et
+        }
+        // Apply the user function to the vertex partition
+        val outputIter = f(ePid, inputIterator)
+        // Consume the iterator to update the edge attributes
+        val newEdgePartition = edgePartition.map(outputIter)
+        Iterator((ePid, newEdgePartition))
+      }
+    new GraphImpl(vertices, new EdgeRDD(newEdgePartitions), routingTable, replicatedVertexView)
+  }
+
+  override def subgraph(
+      epred: EdgeTriplet[VD, ED] => Boolean = x => true,
+      vpred: (VertexId, VD) => Boolean = (a, b) => true): Graph[VD, ED] = {
+    // Filter the vertices, reusing the partitioner and the index from this graph
+    val newVerts = vertices.mapVertexPartitions(_.filter(vpred))
+
+    // Filter the edges
+    val edTag = classTag[ED]
+    val newEdges = new EdgeRDD[ED](triplets.filter { et =>
+      vpred(et.srcId, et.srcAttr) && vpred(et.dstId, et.dstAttr) && epred(et)
+    }.mapPartitionsWithIndex( { (pid, iter) =>
+      val builder = new EdgePartitionBuilder[ED]()(edTag)
+      iter.foreach { et => builder.add(et.srcId, et.dstId, et.attr) }
+      val edgePartition = builder.toEdgePartition
+      Iterator((pid, edgePartition))
+    }, preservesPartitioning = true)).cache()
+
+    // Reuse the previous ReplicatedVertexView unmodified. The replicated vertices that have been
+    // removed will be ignored, since we only refer to replicated vertices when they are adjacent to
+    // an edge.
+    new GraphImpl(newVerts, newEdges, new RoutingTable(newEdges, newVerts), replicatedVertexView)
+  } // end of subgraph
+
+  override def mask[VD2: ClassTag, ED2: ClassTag] (
+      other: Graph[VD2, ED2]): Graph[VD, ED] = {
+    val newVerts = vertices.innerJoin(other.vertices) { (vid, v, w) => v }
+    val newEdges = edges.innerJoin(other.edges) { (src, dst, v, w) => v }
+    // Reuse the previous ReplicatedVertexView unmodified. The replicated vertices that have been
+    // removed will be ignored, since we only refer to replicated vertices when they are adjacent to
+    // an edge.
+    new GraphImpl(newVerts, newEdges, routingTable, replicatedVertexView)
+  }
+
+  override def groupEdges(merge: (ED, ED) => ED): Graph[VD, ED] = {
+    ClosureCleaner.clean(merge)
+    val newETable = edges.mapEdgePartitions((pid, part) => part.groupEdges(merge))
+    new GraphImpl(vertices, newETable, routingTable, replicatedVertexView)
+  }
+
+  //////////////////////////////////////////////////////////////////////////////////////////////////
+  // Lower level transformation methods
+  //////////////////////////////////////////////////////////////////////////////////////////////////
+
+  override def mapReduceTriplets[A: ClassTag](
+      mapFunc: EdgeTriplet[VD, ED] => Iterator[(VertexId, A)],
+      reduceFunc: (A, A) => A,
+      activeSetOpt: Option[(VertexRDD[_], EdgeDirection)] = None) = {
+
+    ClosureCleaner.clean(mapFunc)
+    ClosureCleaner.clean(reduceFunc)
+
+    // For each vertex, replicate its attribute only to partitions where it is
+    // in the relevant position in an edge.
+    val mapUsesSrcAttr = accessesVertexAttr(mapFunc, "srcAttr")
+    val mapUsesDstAttr = accessesVertexAttr(mapFunc, "dstAttr")
+    val vs = activeSetOpt match {
+      case Some((activeSet, _)) =>
+        replicatedVertexView.get(mapUsesSrcAttr, mapUsesDstAttr, activeSet)
+      case None =>
+        replicatedVertexView.get(mapUsesSrcAttr, mapUsesDstAttr)
+    }
+    val activeDirectionOpt = activeSetOpt.map(_._2)
+
+    // Map and combine.
+    val preAgg = edges.partitionsRDD.zipPartitions(vs, true) { (ePartIter, vPartIter) =>
+      val (ePid, edgePartition) = ePartIter.next()
+      val (vPid, vPart) = vPartIter.next()
+      assert(!vPartIter.hasNext)
+      assert(ePid == vPid)
+      // Choose scan method
+      val activeFraction = vPart.numActives.getOrElse(0) / edgePartition.indexSize.toFloat
+      val edgeIter = activeDirectionOpt match {
+        case Some(EdgeDirection.Both) =>
+          if (activeFraction < 0.8) {
+            edgePartition.indexIterator(srcVertexId => vPart.isActive(srcVertexId))
+              .filter(e => vPart.isActive(e.dstId))
+          } else {
+            edgePartition.iterator.filter(e => vPart.isActive(e.srcId) && vPart.isActive(e.dstId))
+          }
+        case Some(EdgeDirection.Either) =>
+          // TODO: Because we only have a clustered index on the source vertex ID, we can't filter
+          // the index here. Instead we have to scan all edges and then do the filter.
+          edgePartition.iterator.filter(e => vPart.isActive(e.srcId) || vPart.isActive(e.dstId))
+        case Some(EdgeDirection.Out) =>
+          if (activeFraction < 0.8) {
+            edgePartition.indexIterator(srcVertexId => vPart.isActive(srcVertexId))
+          } else {
+            edgePartition.iterator.filter(e => vPart.isActive(e.srcId))
+          }
+        case Some(EdgeDirection.In) =>
+          edgePartition.iterator.filter(e => vPart.isActive(e.dstId))
+        case _ => // None
+          edgePartition.iterator
+      }
+
+      // Scan edges and run the map function
+      val et = new EdgeTriplet[VD, ED]
+      val mapOutputs = edgeIter.flatMap { e =>
+        et.set(e)
+        if (mapUsesSrcAttr) {
+          et.srcAttr = vPart(e.srcId)
+        }
+        if (mapUsesDstAttr) {
+          et.dstAttr = vPart(e.dstId)
+        }
+        mapFunc(et)
+      }
+      // Note: This doesn't allow users to send messages to arbitrary vertices.
+      vPart.aggregateUsingIndex(mapOutputs, reduceFunc).iterator
+    }
+
+    // do the final reduction reusing the index map
+    vertices.aggregateUsingIndex(preAgg, reduceFunc)
+  } // end of mapReduceTriplets
+
+  override def outerJoinVertices[U: ClassTag, VD2: ClassTag]
+      (other: RDD[(VertexId, U)])
+      (updateF: (VertexId, VD, Option[U]) => VD2): Graph[VD2, ED] =
+  {
+    if (classTag[VD] equals classTag[VD2]) {
+      // updateF preserves type, so we can use incremental replication
+      val newVerts = vertices.leftJoin(other)(updateF)
+      val changedVerts = vertices.asInstanceOf[VertexRDD[VD2]].diff(newVerts)
+      val newReplicatedVertexView = new ReplicatedVertexView[VD2](
+        changedVerts, edges, routingTable,
+        Some(replicatedVertexView.asInstanceOf[ReplicatedVertexView[VD2]]))
+      new GraphImpl(newVerts, edges, routingTable, newReplicatedVertexView)
+    } else {
+      // updateF does not preserve type, so we must re-replicate all vertices
+      val newVerts = vertices.leftJoin(other)(updateF)
+      GraphImpl(newVerts, edges, routingTable)
+    }
+  }
+
+  /** Test whether the closure accesses the the attribute with name `attrName`. */
+  private def accessesVertexAttr(closure: AnyRef, attrName: String): Boolean = {
+    try {
+      BytecodeUtils.invokedMethod(closure, classOf[EdgeTriplet[VD, ED]], attrName)
+    } catch {
+      case _: ClassNotFoundException => true // if we don't know, be conservative
+    }
+  }
+} // end of class GraphImpl
+
+
+object GraphImpl {
+
+  def apply[VD: ClassTag, ED: ClassTag](
+      edges: RDD[Edge[ED]],
+      defaultVertexAttr: VD): GraphImpl[VD, ED] =
+  {
+    fromEdgeRDD(createEdgeRDD(edges), defaultVertexAttr)
+  }
+
+  def fromEdgePartitions[VD: ClassTag, ED: ClassTag](
+      edgePartitions: RDD[(PartitionID, EdgePartition[ED])],
+      defaultVertexAttr: VD): GraphImpl[VD, ED] = {
+    fromEdgeRDD(new EdgeRDD(edgePartitions), defaultVertexAttr)
+  }
+
+  def apply[VD: ClassTag, ED: ClassTag](
+      vertices: RDD[(VertexId, VD)],
+      edges: RDD[Edge[ED]],
+      defaultVertexAttr: VD): GraphImpl[VD, ED] =
+  {
+    val edgeRDD = createEdgeRDD(edges).cache()
+
+    // Get the set of all vids
+    val partitioner = Partitioner.defaultPartitioner(vertices)
+    val vPartitioned = vertices.partitionBy(partitioner)
+    val vidsFromEdges = collectVertexIdsFromEdges(edgeRDD, partitioner)
+    val vids = vPartitioned.zipPartitions(vidsFromEdges) { (vertexIter, vidsFromEdgesIter) =>
+      vertexIter.map(_._1) ++ vidsFromEdgesIter.map(_._1)
+    }
+
+    val vertexRDD = VertexRDD(vids, vPartitioned, defaultVertexAttr)
+
+    GraphImpl(vertexRDD, edgeRDD)
+  }
+
+  def apply[VD: ClassTag, ED: ClassTag](
+      vertices: VertexRDD[VD],
+      edges: EdgeRDD[ED]): GraphImpl[VD, ED] = {
+    // Cache RDDs that are referenced multiple times
+    edges.cache()
+
+    GraphImpl(vertices, edges, new RoutingTable(edges, vertices))
+  }
+
+  def apply[VD: ClassTag, ED: ClassTag](
+      vertices: VertexRDD[VD],
+      edges: EdgeRDD[ED],
+      routingTable: RoutingTable): GraphImpl[VD, ED] = {
+    // Cache RDDs that are referenced multiple times. `routingTable` is cached by default, so we
+    // don't cache it explicitly.
+    vertices.cache()
+    edges.cache()
+
+    new GraphImpl(
+      vertices, edges, routingTable, new ReplicatedVertexView(vertices, edges, routingTable))
+  }
+
+  /**
+   * Create the edge RDD, which is much more efficient for Java heap storage than the normal edges
+   * data structure (RDD[(VertexId, VertexId, ED)]).
+   *
+   * The edge RDD contains multiple partitions, and each partition contains only one RDD key-value
+   * pair: the key is the partition id, and the value is an EdgePartition object containing all the
+   * edges in a partition.
+   */
+  private def createEdgeRDD[ED: ClassTag](
+      edges: RDD[Edge[ED]]): EdgeRDD[ED] = {
+    val edgePartitions = edges.mapPartitionsWithIndex { (pid, iter) =>
+      val builder = new EdgePartitionBuilder[ED]
+      iter.foreach { e =>
+        builder.add(e.srcId, e.dstId, e.attr)
+      }
+      Iterator((pid, builder.toEdgePartition))
+    }
+    new EdgeRDD(edgePartitions)
+  }
+
+  private def fromEdgeRDD[VD: ClassTag, ED: ClassTag](
+      edges: EdgeRDD[ED],
+      defaultVertexAttr: VD): GraphImpl[VD, ED] = {
+    edges.cache()
+    // Get the set of all vids
+    val vids = collectVertexIdsFromEdges(edges, new HashPartitioner(edges.partitions.size))
+    // Create the VertexRDD.
+    val vertices = VertexRDD(vids.mapValues(x => defaultVertexAttr))
+    GraphImpl(vertices, edges)
+  }
+
+  /** Collects all vids mentioned in edges and partitions them by partitioner. */
+  private def collectVertexIdsFromEdges(
+      edges: EdgeRDD[_],
+      partitioner: Partitioner): RDD[(VertexId, Int)] = {
+    // TODO: Consider doing map side distinct before shuffle.
+    new ShuffledRDD[VertexId, Int, (VertexId, Int)](
+      edges.collectVertexIds.map(vid => (vid, 0)), partitioner)
+      .setSerializer(classOf[VertexIdMsgSerializer].getName)
+  }
+} // end of object GraphImpl
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/MessageToPartition.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/MessageToPartition.scala
new file mode 100644
index 0000000000000..e9ee09c3614c1
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/MessageToPartition.scala
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx.impl
+
+import scala.reflect.{classTag, ClassTag}
+
+import org.apache.spark.Partitioner
+import org.apache.spark.graphx.{PartitionID, VertexId}
+import org.apache.spark.rdd.{ShuffledRDD, RDD}
+
+
+private[graphx]
+class VertexBroadcastMsg[@specialized(Int, Long, Double, Boolean) T](
+    @transient var partition: PartitionID,
+    var vid: VertexId,
+    var data: T)
+  extends Product2[PartitionID, (VertexId, T)] with Serializable {
+
+  override def _1 = partition
+
+  override def _2 = (vid, data)
+
+  override def canEqual(that: Any): Boolean = that.isInstanceOf[VertexBroadcastMsg[_]]
+}
+
+
+/**
+ * A message used to send a specific value to a partition.
+ * @param partition index of the target partition.
+ * @param data value to send
+ */
+private[graphx]
+class MessageToPartition[@specialized(Int, Long, Double, Char, Boolean/*, AnyRef*/) T](
+    @transient var partition: PartitionID,
+    var data: T)
+  extends Product2[PartitionID, T] with Serializable {
+
+  override def _1 = partition
+
+  override def _2 = data
+
+  override def canEqual(that: Any): Boolean = that.isInstanceOf[MessageToPartition[_]]
+}
+
+
+private[graphx]
+class VertexBroadcastMsgRDDFunctions[T: ClassTag](self: RDD[VertexBroadcastMsg[T]]) {
+  def partitionBy(partitioner: Partitioner): RDD[VertexBroadcastMsg[T]] = {
+    val rdd = new ShuffledRDD[PartitionID, (VertexId, T), VertexBroadcastMsg[T]](self, partitioner)
+
+    // Set a custom serializer if the data is of int or double type.
+    if (classTag[T] == ClassTag.Int) {
+      rdd.setSerializer(classOf[IntVertexBroadcastMsgSerializer].getName)
+    } else if (classTag[T] == ClassTag.Long) {
+      rdd.setSerializer(classOf[LongVertexBroadcastMsgSerializer].getName)
+    } else if (classTag[T] == ClassTag.Double) {
+      rdd.setSerializer(classOf[DoubleVertexBroadcastMsgSerializer].getName)
+    }
+    rdd
+  }
+}
+
+
+private[graphx]
+class MsgRDDFunctions[T: ClassTag](self: RDD[MessageToPartition[T]]) {
+
+  /**
+   * Return a copy of the RDD partitioned using the specified partitioner.
+   */
+  def partitionBy(partitioner: Partitioner): RDD[MessageToPartition[T]] = {
+    new ShuffledRDD[PartitionID, T, MessageToPartition[T]](self, partitioner)
+  }
+
+}
+
+
+private[graphx]
+object MsgRDDFunctions {
+  implicit def rdd2PartitionRDDFunctions[T: ClassTag](rdd: RDD[MessageToPartition[T]]) = {
+    new MsgRDDFunctions(rdd)
+  }
+
+  implicit def rdd2vertexMessageRDDFunctions[T: ClassTag](rdd: RDD[VertexBroadcastMsg[T]]) = {
+    new VertexBroadcastMsgRDDFunctions(rdd)
+  }
+
+  def partitionForAggregation[T: ClassTag](msgs: RDD[(VertexId, T)], partitioner: Partitioner) = {
+    val rdd = new ShuffledRDD[VertexId, T, (VertexId, T)](msgs, partitioner)
+
+    // Set a custom serializer if the data is of int or double type.
+    if (classTag[T] == ClassTag.Int) {
+      rdd.setSerializer(classOf[IntAggMsgSerializer].getName)
+    } else if (classTag[T] == ClassTag.Long) {
+      rdd.setSerializer(classOf[LongAggMsgSerializer].getName)
+    } else if (classTag[T] == ClassTag.Double) {
+      rdd.setSerializer(classOf[DoubleAggMsgSerializer].getName)
+    }
+    rdd
+  }
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/ReplicatedVertexView.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/ReplicatedVertexView.scala
new file mode 100644
index 0000000000000..a8154b63ce5fb
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/ReplicatedVertexView.scala
@@ -0,0 +1,212 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx.impl
+
+import scala.reflect.{classTag, ClassTag}
+
+import org.apache.spark.SparkContext._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.util.collection.{PrimitiveVector, OpenHashSet}
+
+import org.apache.spark.graphx._
+
+/**
+ * A view of the vertices after they are shipped to the join sites specified in
+ * `vertexPlacement`. The resulting view is co-partitioned with `edges`. If `prevViewOpt` is
+ * specified, `updatedVerts` are treated as incremental updates to the previous view. Otherwise, a
+ * fresh view is created.
+ *
+ * The view is always cached (i.e., once it is evaluated, it remains materialized). This avoids
+ * constructing it twice if the user calls graph.triplets followed by graph.mapReduceTriplets, for
+ * example. However, it means iterative algorithms must manually call `Graph.unpersist` on previous
+ * iterations' graphs for best GC performance. See the implementation of
+ * [[org.apache.spark.graphx.Pregel]] for an example.
+ */
+private[impl]
+class ReplicatedVertexView[VD: ClassTag](
+    updatedVerts: VertexRDD[VD],
+    edges: EdgeRDD[_],
+    routingTable: RoutingTable,
+    prevViewOpt: Option[ReplicatedVertexView[VD]] = None) {
+
+  /**
+   * Within each edge partition, create a local map from vid to an index into the attribute
+   * array. Each map contains a superset of the vertices that it will receive, because it stores
+   * vids from both the source and destination of edges. It must always include both source and
+   * destination vids because some operations, such as GraphImpl.mapReduceTriplets, rely on this.
+   */
+  private val localVertexIdMap: RDD[(Int, VertexIdToIndexMap)] = prevViewOpt match {
+    case Some(prevView) =>
+      prevView.localVertexIdMap
+    case None =>
+      edges.partitionsRDD.mapPartitions(_.map {
+        case (pid, epart) =>
+          val vidToIndex = new VertexIdToIndexMap
+          epart.foreach { e =>
+            vidToIndex.add(e.srcId)
+            vidToIndex.add(e.dstId)
+          }
+          (pid, vidToIndex)
+      }, preservesPartitioning = true).cache().setName("ReplicatedVertexView localVertexIdMap")
+  }
+
+  private lazy val bothAttrs: RDD[(PartitionID, VertexPartition[VD])] = create(true, true)
+  private lazy val srcAttrOnly: RDD[(PartitionID, VertexPartition[VD])] = create(true, false)
+  private lazy val dstAttrOnly: RDD[(PartitionID, VertexPartition[VD])] = create(false, true)
+  private lazy val noAttrs: RDD[(PartitionID, VertexPartition[VD])] = create(false, false)
+
+  def unpersist(blocking: Boolean = true): ReplicatedVertexView[VD] = {
+    bothAttrs.unpersist(blocking)
+    srcAttrOnly.unpersist(blocking)
+    dstAttrOnly.unpersist(blocking)
+    noAttrs.unpersist(blocking)
+    // Don't unpersist localVertexIdMap because a future ReplicatedVertexView may be using it
+    // without modification
+    this
+  }
+
+  def get(includeSrc: Boolean, includeDst: Boolean): RDD[(PartitionID, VertexPartition[VD])] = {
+    (includeSrc, includeDst) match {
+      case (true, true) => bothAttrs
+      case (true, false) => srcAttrOnly
+      case (false, true) => dstAttrOnly
+      case (false, false) => noAttrs
+    }
+  }
+
+  def get(
+      includeSrc: Boolean,
+      includeDst: Boolean,
+      actives: VertexRDD[_]): RDD[(PartitionID, VertexPartition[VD])] = {
+    // Ship active sets to edge partitions using vertexPlacement, but ignoring includeSrc and
+    // includeDst. These flags govern attribute shipping, but the activeness of a vertex must be
+    // shipped to all edges mentioning that vertex, regardless of whether the vertex attribute is
+    // also shipped there.
+    val shippedActives = routingTable.get(true, true)
+      .zipPartitions(actives.partitionsRDD)(ReplicatedVertexView.buildActiveBuffer(_, _))
+      .partitionBy(edges.partitioner.get)
+    // Update the view with shippedActives, setting activeness flags in the resulting
+    // VertexPartitions
+    get(includeSrc, includeDst).zipPartitions(shippedActives) { (viewIter, shippedActivesIter) =>
+      val (pid, vPart) = viewIter.next()
+      val newPart = vPart.replaceActives(shippedActivesIter.flatMap(_._2.iterator))
+      Iterator((pid, newPart))
+    }
+  }
+
+  private def create(includeSrc: Boolean, includeDst: Boolean)
+    : RDD[(PartitionID, VertexPartition[VD])] = {
+    val vdTag = classTag[VD]
+
+    // Ship vertex attributes to edge partitions according to vertexPlacement
+    val verts = updatedVerts.partitionsRDD
+    val shippedVerts = routingTable.get(includeSrc, includeDst)
+      .zipPartitions(verts)(ReplicatedVertexView.buildBuffer(_, _)(vdTag))
+      .partitionBy(edges.partitioner.get)
+    // TODO: Consider using a specialized shuffler.
+
+    prevViewOpt match {
+      case Some(prevView) =>
+        // Update prevView with shippedVerts, setting staleness flags in the resulting
+        // VertexPartitions
+        prevView.get(includeSrc, includeDst).zipPartitions(shippedVerts) {
+          (prevViewIter, shippedVertsIter) =>
+            val (pid, prevVPart) = prevViewIter.next()
+            val newVPart = prevVPart.innerJoinKeepLeft(shippedVertsIter.flatMap(_._2.iterator))
+            Iterator((pid, newVPart))
+        }.cache().setName("ReplicatedVertexView delta %s %s".format(includeSrc, includeDst))
+
+      case None =>
+        // Within each edge partition, place the shipped vertex attributes into the correct
+        // locations specified in localVertexIdMap
+        localVertexIdMap.zipPartitions(shippedVerts) { (mapIter, shippedVertsIter) =>
+          val (pid, vidToIndex) = mapIter.next()
+          assert(!mapIter.hasNext)
+          // Populate the vertex array using the vidToIndex map
+          val vertexArray = vdTag.newArray(vidToIndex.capacity)
+          for ((_, block) <- shippedVertsIter) {
+            for (i <- 0 until block.vids.size) {
+              val vid = block.vids(i)
+              val attr = block.attrs(i)
+              val ind = vidToIndex.getPos(vid)
+              vertexArray(ind) = attr
+            }
+          }
+          val newVPart = new VertexPartition(
+            vidToIndex, vertexArray, vidToIndex.getBitSet)(vdTag)
+          Iterator((pid, newVPart))
+        }.cache().setName("ReplicatedVertexView %s %s".format(includeSrc, includeDst))
+    }
+  }
+}
+
+private object ReplicatedVertexView {
+  protected def buildBuffer[VD: ClassTag](
+      pid2vidIter: Iterator[Array[Array[VertexId]]],
+      vertexPartIter: Iterator[VertexPartition[VD]]) = {
+    val pid2vid: Array[Array[VertexId]] = pid2vidIter.next()
+    val vertexPart: VertexPartition[VD] = vertexPartIter.next()
+
+    Iterator.tabulate(pid2vid.size) { pid =>
+      val vidsCandidate = pid2vid(pid)
+      val size = vidsCandidate.length
+      val vids = new PrimitiveVector[VertexId](pid2vid(pid).size)
+      val attrs = new PrimitiveVector[VD](pid2vid(pid).size)
+      var i = 0
+      while (i < size) {
+        val vid = vidsCandidate(i)
+        if (vertexPart.isDefined(vid)) {
+          vids += vid
+          attrs += vertexPart(vid)
+        }
+        i += 1
+      }
+      (pid, new VertexAttributeBlock(vids.trim().array, attrs.trim().array))
+    }
+  }
+
+  protected def buildActiveBuffer(
+      pid2vidIter: Iterator[Array[Array[VertexId]]],
+      activePartIter: Iterator[VertexPartition[_]])
+    : Iterator[(Int, Array[VertexId])] = {
+    val pid2vid: Array[Array[VertexId]] = pid2vidIter.next()
+    val activePart: VertexPartition[_] = activePartIter.next()
+
+    Iterator.tabulate(pid2vid.size) { pid =>
+      val vidsCandidate = pid2vid(pid)
+      val size = vidsCandidate.length
+      val actives = new PrimitiveVector[VertexId](vidsCandidate.size)
+      var i = 0
+      while (i < size) {
+        val vid = vidsCandidate(i)
+        if (activePart.isDefined(vid)) {
+          actives += vid
+        }
+        i += 1
+      }
+      (pid, actives.trim().array)
+    }
+  }
+}
+
+private[graphx]
+class VertexAttributeBlock[VD: ClassTag](val vids: Array[VertexId], val attrs: Array[VD])
+  extends Serializable {
+  def iterator: Iterator[(VertexId, VD)] =
+    (0 until vids.size).iterator.map { i => (vids(i), attrs(i)) }
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTable.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTable.scala
new file mode 100644
index 0000000000000..fe44e1ee0c391
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTable.scala
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx.impl
+
+import org.apache.spark.SparkContext._
+import org.apache.spark.graphx._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.collection.PrimitiveVector
+
+/**
+ * Stores the locations of edge-partition join sites for each vertex attribute; that is, the routing
+ * information for shipping vertex attributes to edge partitions. This is always cached because it
+ * may be used multiple times in ReplicatedVertexView -- once to ship the vertex attributes and
+ * (possibly) once to ship the active-set information.
+ */
+private[impl]
+class RoutingTable(edges: EdgeRDD[_], vertices: VertexRDD[_]) {
+
+  val bothAttrs: RDD[Array[Array[VertexId]]] = createPid2Vid(true, true)
+  val srcAttrOnly: RDD[Array[Array[VertexId]]] = createPid2Vid(true, false)
+  val dstAttrOnly: RDD[Array[Array[VertexId]]] = createPid2Vid(false, true)
+  val noAttrs: RDD[Array[Array[VertexId]]] = createPid2Vid(false, false)
+
+  def get(includeSrcAttr: Boolean, includeDstAttr: Boolean): RDD[Array[Array[VertexId]]] =
+    (includeSrcAttr, includeDstAttr) match {
+      case (true, true) => bothAttrs
+      case (true, false) => srcAttrOnly
+      case (false, true) => dstAttrOnly
+      case (false, false) => noAttrs
+    }
+
+  private def createPid2Vid(
+      includeSrcAttr: Boolean, includeDstAttr: Boolean): RDD[Array[Array[VertexId]]] = {
+    // Determine which vertices each edge partition needs by creating a mapping from vid to pid.
+    val vid2pid: RDD[(VertexId, PartitionID)] = edges.partitionsRDD.mapPartitions { iter =>
+      val (pid: PartitionID, edgePartition: EdgePartition[_]) = iter.next()
+      val numEdges = edgePartition.size
+      val vSet = new VertexSet
+      if (includeSrcAttr) {  // Add src vertices to the set.
+        var i = 0
+        while (i < numEdges) {
+          vSet.add(edgePartition.srcIds(i))
+          i += 1
+        }
+      }
+      if (includeDstAttr) {  // Add dst vertices to the set.
+      var i = 0
+        while (i < numEdges) {
+          vSet.add(edgePartition.dstIds(i))
+          i += 1
+        }
+      }
+      vSet.iterator.map { vid => (vid, pid) }
+    }
+
+    val numPartitions = vertices.partitions.size
+    vid2pid.partitionBy(vertices.partitioner.get).mapPartitions { iter =>
+      val pid2vid = Array.fill(numPartitions)(new PrimitiveVector[VertexId])
+      for ((vid, pid) <- iter) {
+        pid2vid(pid) += vid
+      }
+
+      Iterator(pid2vid.map(_.trim().array))
+    }.cache().setName("RoutingTable %s %s".format(includeSrcAttr, includeDstAttr))
+  }
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/Serializers.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/Serializers.scala
new file mode 100644
index 0000000000000..c74d487e206db
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/Serializers.scala
@@ -0,0 +1,412 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx.impl
+
+import java.io.{EOFException, InputStream, OutputStream}
+import java.nio.ByteBuffer
+
+import org.apache.spark.SparkConf
+import org.apache.spark.graphx._
+import org.apache.spark.serializer._
+
+private[graphx]
+class VertexIdMsgSerializer(conf: SparkConf) extends Serializer {
+  override def newInstance(): SerializerInstance = new ShuffleSerializerInstance {
+
+    override def serializeStream(s: OutputStream) = new ShuffleSerializationStream(s) {
+      def writeObject[T](t: T) = {
+        val msg = t.asInstanceOf[(VertexId, _)]
+        writeVarLong(msg._1, optimizePositive = false)
+        this
+      }
+    }
+
+    override def deserializeStream(s: InputStream) = new ShuffleDeserializationStream(s) {
+      override def readObject[T](): T = {
+        (readVarLong(optimizePositive = false), null).asInstanceOf[T]
+      }
+    }
+  }
+}
+
+/** A special shuffle serializer for VertexBroadcastMessage[Int]. */
+private[graphx]
+class IntVertexBroadcastMsgSerializer(conf: SparkConf) extends Serializer {
+  override def newInstance(): SerializerInstance = new ShuffleSerializerInstance {
+
+    override def serializeStream(s: OutputStream) = new ShuffleSerializationStream(s) {
+      def writeObject[T](t: T) = {
+        val msg = t.asInstanceOf[VertexBroadcastMsg[Int]]
+        writeVarLong(msg.vid, optimizePositive = false)
+        writeInt(msg.data)
+        this
+      }
+    }
+
+    override def deserializeStream(s: InputStream) = new ShuffleDeserializationStream(s) {
+      override def readObject[T](): T = {
+        val a = readVarLong(optimizePositive = false)
+        val b = readInt()
+        new VertexBroadcastMsg[Int](0, a, b).asInstanceOf[T]
+      }
+    }
+  }
+}
+
+/** A special shuffle serializer for VertexBroadcastMessage[Long]. */
+private[graphx]
+class LongVertexBroadcastMsgSerializer(conf: SparkConf) extends Serializer {
+  override def newInstance(): SerializerInstance = new ShuffleSerializerInstance {
+
+    override def serializeStream(s: OutputStream) = new ShuffleSerializationStream(s) {
+      def writeObject[T](t: T) = {
+        val msg = t.asInstanceOf[VertexBroadcastMsg[Long]]
+        writeVarLong(msg.vid, optimizePositive = false)
+        writeLong(msg.data)
+        this
+      }
+    }
+
+    override def deserializeStream(s: InputStream) = new ShuffleDeserializationStream(s) {
+      override def readObject[T](): T = {
+        val a = readVarLong(optimizePositive = false)
+        val b = readLong()
+        new VertexBroadcastMsg[Long](0, a, b).asInstanceOf[T]
+      }
+    }
+  }
+}
+
+/** A special shuffle serializer for VertexBroadcastMessage[Double]. */
+private[graphx]
+class DoubleVertexBroadcastMsgSerializer(conf: SparkConf) extends Serializer {
+  override def newInstance(): SerializerInstance = new ShuffleSerializerInstance {
+
+    override def serializeStream(s: OutputStream) = new ShuffleSerializationStream(s) {
+      def writeObject[T](t: T) = {
+        val msg = t.asInstanceOf[VertexBroadcastMsg[Double]]
+        writeVarLong(msg.vid, optimizePositive = false)
+        writeDouble(msg.data)
+        this
+      }
+    }
+
+    override def deserializeStream(s: InputStream) = new ShuffleDeserializationStream(s) {
+      def readObject[T](): T = {
+        val a = readVarLong(optimizePositive = false)
+        val b = readDouble()
+        new VertexBroadcastMsg[Double](0, a, b).asInstanceOf[T]
+      }
+    }
+  }
+}
+
+/** A special shuffle serializer for AggregationMessage[Int]. */
+private[graphx]
+class IntAggMsgSerializer(conf: SparkConf) extends Serializer {
+  override def newInstance(): SerializerInstance = new ShuffleSerializerInstance {
+
+    override def serializeStream(s: OutputStream) = new ShuffleSerializationStream(s) {
+      def writeObject[T](t: T) = {
+        val msg = t.asInstanceOf[(VertexId, Int)]
+        writeVarLong(msg._1, optimizePositive = false)
+        writeUnsignedVarInt(msg._2)
+        this
+      }
+    }
+
+    override def deserializeStream(s: InputStream) = new ShuffleDeserializationStream(s) {
+      override def readObject[T](): T = {
+        val a = readVarLong(optimizePositive = false)
+        val b = readUnsignedVarInt()
+        (a, b).asInstanceOf[T]
+      }
+    }
+  }
+}
+
+/** A special shuffle serializer for AggregationMessage[Long]. */
+private[graphx]
+class LongAggMsgSerializer(conf: SparkConf) extends Serializer {
+  override def newInstance(): SerializerInstance = new ShuffleSerializerInstance {
+
+    override def serializeStream(s: OutputStream) = new ShuffleSerializationStream(s) {
+      def writeObject[T](t: T) = {
+        val msg = t.asInstanceOf[(VertexId, Long)]
+        writeVarLong(msg._1, optimizePositive = false)
+        writeVarLong(msg._2, optimizePositive = true)
+        this
+      }
+    }
+
+    override def deserializeStream(s: InputStream) = new ShuffleDeserializationStream(s) {
+      override def readObject[T](): T = {
+        val a = readVarLong(optimizePositive = false)
+        val b = readVarLong(optimizePositive = true)
+        (a, b).asInstanceOf[T]
+      }
+    }
+  }
+}
+
+/** A special shuffle serializer for AggregationMessage[Double]. */
+private[graphx]
+class DoubleAggMsgSerializer(conf: SparkConf) extends Serializer {
+  override def newInstance(): SerializerInstance = new ShuffleSerializerInstance {
+
+    override def serializeStream(s: OutputStream) = new ShuffleSerializationStream(s) {
+      def writeObject[T](t: T) = {
+        val msg = t.asInstanceOf[(VertexId, Double)]
+        writeVarLong(msg._1, optimizePositive = false)
+        writeDouble(msg._2)
+        this
+      }
+    }
+
+    override def deserializeStream(s: InputStream) = new ShuffleDeserializationStream(s) {
+      def readObject[T](): T = {
+        val a = readVarLong(optimizePositive = false)
+        val b = readDouble()
+        (a, b).asInstanceOf[T]
+      }
+    }
+  }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Helper classes to shorten the implementation of those special serializers.
+////////////////////////////////////////////////////////////////////////////////
+
+private[graphx]
+abstract class ShuffleSerializationStream(s: OutputStream) extends SerializationStream {
+  // The implementation should override this one.
+  def writeObject[T](t: T): SerializationStream
+
+  def writeInt(v: Int) {
+    s.write(v >> 24)
+    s.write(v >> 16)
+    s.write(v >> 8)
+    s.write(v)
+  }
+
+  def writeUnsignedVarInt(value: Int) {
+    if ((value >>> 7) == 0) {
+      s.write(value.toInt)
+    } else if ((value >>> 14) == 0) {
+      s.write((value & 0x7F) | 0x80)
+      s.write(value >>> 7)
+    } else if ((value >>> 21) == 0) {
+      s.write((value & 0x7F) | 0x80)
+      s.write(value >>> 7 | 0x80)
+      s.write(value >>> 14)
+    } else if ((value >>> 28) == 0) {
+      s.write((value & 0x7F) | 0x80)
+      s.write(value >>> 7 | 0x80)
+      s.write(value >>> 14 | 0x80)
+      s.write(value >>> 21)
+    } else {
+      s.write((value & 0x7F) | 0x80)
+      s.write(value >>> 7 | 0x80)
+      s.write(value >>> 14 | 0x80)
+      s.write(value >>> 21 | 0x80)
+      s.write(value >>> 28)
+    }
+  }
+
+  def writeVarLong(value: Long, optimizePositive: Boolean) {
+    val v = if (!optimizePositive) (value << 1) ^ (value >> 63) else value
+    if ((v >>> 7) == 0) {
+      s.write(v.toInt)
+    } else if ((v >>> 14) == 0) {
+      s.write(((v & 0x7F) | 0x80).toInt)
+      s.write((v >>> 7).toInt)
+    } else if ((v >>> 21) == 0) {
+      s.write(((v & 0x7F) | 0x80).toInt)
+      s.write((v >>> 7 | 0x80).toInt)
+      s.write((v >>> 14).toInt)
+    } else if ((v >>> 28) == 0) {
+      s.write(((v & 0x7F) | 0x80).toInt)
+      s.write((v >>> 7 | 0x80).toInt)
+      s.write((v >>> 14 | 0x80).toInt)
+      s.write((v >>> 21).toInt)
+    } else if ((v >>> 35) == 0) {
+      s.write(((v & 0x7F) | 0x80).toInt)
+      s.write((v >>> 7 | 0x80).toInt)
+      s.write((v >>> 14 | 0x80).toInt)
+      s.write((v >>> 21 | 0x80).toInt)
+      s.write((v >>> 28).toInt)
+    } else if ((v >>> 42) == 0) {
+      s.write(((v & 0x7F) | 0x80).toInt)
+      s.write((v >>> 7 | 0x80).toInt)
+      s.write((v >>> 14 | 0x80).toInt)
+      s.write((v >>> 21 | 0x80).toInt)
+      s.write((v >>> 28 | 0x80).toInt)
+      s.write((v >>> 35).toInt)
+    } else if ((v >>> 49) == 0) {
+      s.write(((v & 0x7F) | 0x80).toInt)
+      s.write((v >>> 7 | 0x80).toInt)
+      s.write((v >>> 14 | 0x80).toInt)
+      s.write((v >>> 21 | 0x80).toInt)
+      s.write((v >>> 28 | 0x80).toInt)
+      s.write((v >>> 35 | 0x80).toInt)
+      s.write((v >>> 42).toInt)
+    } else if ((v >>> 56) == 0) {
+      s.write(((v & 0x7F) | 0x80).toInt)
+      s.write((v >>> 7 | 0x80).toInt)
+      s.write((v >>> 14 | 0x80).toInt)
+      s.write((v >>> 21 | 0x80).toInt)
+      s.write((v >>> 28 | 0x80).toInt)
+      s.write((v >>> 35 | 0x80).toInt)
+      s.write((v >>> 42 | 0x80).toInt)
+      s.write((v >>> 49).toInt)
+    } else {
+      s.write(((v & 0x7F) | 0x80).toInt)
+      s.write((v >>> 7 | 0x80).toInt)
+      s.write((v >>> 14 | 0x80).toInt)
+      s.write((v >>> 21 | 0x80).toInt)
+      s.write((v >>> 28 | 0x80).toInt)
+      s.write((v >>> 35 | 0x80).toInt)
+      s.write((v >>> 42 | 0x80).toInt)
+      s.write((v >>> 49 | 0x80).toInt)
+      s.write((v >>> 56).toInt)
+    }
+  }
+
+  def writeLong(v: Long) {
+    s.write((v >>> 56).toInt)
+    s.write((v >>> 48).toInt)
+    s.write((v >>> 40).toInt)
+    s.write((v >>> 32).toInt)
+    s.write((v >>> 24).toInt)
+    s.write((v >>> 16).toInt)
+    s.write((v >>> 8).toInt)
+    s.write(v.toInt)
+  }
+
+  //def writeDouble(v: Double): Unit = writeUnsignedVarLong(java.lang.Double.doubleToLongBits(v))
+  def writeDouble(v: Double): Unit = writeLong(java.lang.Double.doubleToLongBits(v))
+
+  override def flush(): Unit = s.flush()
+
+  override def close(): Unit = s.close()
+}
+
+private[graphx]
+abstract class ShuffleDeserializationStream(s: InputStream) extends DeserializationStream {
+  // The implementation should override this one.
+  def readObject[T](): T
+
+  def readInt(): Int = {
+    val first = s.read()
+    if (first < 0) throw new EOFException
+    (first & 0xFF) << 24 | (s.read() & 0xFF) << 16 | (s.read() & 0xFF) << 8 | (s.read() & 0xFF)
+  }
+
+  def readUnsignedVarInt(): Int = {
+    var value: Int = 0
+    var i: Int = 0
+    def readOrThrow(): Int = {
+      val in = s.read()
+      if (in < 0) throw new EOFException
+      in & 0xFF
+    }
+    var b: Int = readOrThrow()
+    while ((b & 0x80) != 0) {
+      value |= (b & 0x7F) << i
+      i += 7
+      if (i > 35) throw new IllegalArgumentException("Variable length quantity is too long")
+      b = readOrThrow()
+    }
+    value | (b << i)
+  }
+
+  def readVarLong(optimizePositive: Boolean): Long = {
+    def readOrThrow(): Int = {
+      val in = s.read()
+      if (in < 0) throw new EOFException
+      in & 0xFF
+    }
+    var b = readOrThrow()
+    var ret: Long = b & 0x7F
+    if ((b & 0x80) != 0) {
+      b = readOrThrow()
+      ret |= (b & 0x7F) << 7
+      if ((b & 0x80) != 0) {
+        b = readOrThrow()
+        ret |= (b & 0x7F) << 14
+        if ((b & 0x80) != 0) {
+          b = readOrThrow()
+          ret |= (b & 0x7F) << 21
+          if ((b & 0x80) != 0) {
+            b = readOrThrow()
+            ret |= (b & 0x7F).toLong << 28
+            if ((b & 0x80) != 0) {
+              b = readOrThrow()
+              ret |= (b & 0x7F).toLong << 35
+              if ((b & 0x80) != 0) {
+                b = readOrThrow()
+                ret |= (b & 0x7F).toLong << 42
+                if ((b & 0x80) != 0) {
+                  b = readOrThrow()
+                  ret |= (b & 0x7F).toLong << 49
+                  if ((b & 0x80) != 0) {
+                    b = readOrThrow()
+                    ret |= b.toLong << 56
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    if (!optimizePositive) (ret >>> 1) ^ -(ret & 1) else ret
+  }
+
+  def readLong(): Long = {
+    val first = s.read()
+    if (first < 0) throw new EOFException()
+    (first.toLong << 56) |
+      (s.read() & 0xFF).toLong << 48 |
+      (s.read() & 0xFF).toLong << 40 |
+      (s.read() & 0xFF).toLong << 32 |
+      (s.read() & 0xFF).toLong << 24 |
+      (s.read() & 0xFF) << 16 |
+      (s.read() & 0xFF) << 8 |
+      (s.read() & 0xFF)
+  }
+
+  //def readDouble(): Double = java.lang.Double.longBitsToDouble(readUnsignedVarLong())
+  def readDouble(): Double = java.lang.Double.longBitsToDouble(readLong())
+
+  override def close(): Unit = s.close()
+}
+
+private[graphx] sealed trait ShuffleSerializerInstance extends SerializerInstance {
+
+  override def serialize[T](t: T): ByteBuffer = throw new UnsupportedOperationException
+
+  override def deserialize[T](bytes: ByteBuffer): T = throw new UnsupportedOperationException
+
+  override def deserialize[T](bytes: ByteBuffer, loader: ClassLoader): T =
+    throw new UnsupportedOperationException
+
+  // The implementation should override the following two.
+  override def serializeStream(s: OutputStream): SerializationStream
+  override def deserializeStream(s: InputStream): DeserializationStream
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartition.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartition.scala
new file mode 100644
index 0000000000000..7a54b413dc8ca
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartition.scala
@@ -0,0 +1,278 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx.impl
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.Logging
+import org.apache.spark.graphx._
+import org.apache.spark.graphx.util.collection.PrimitiveKeyOpenHashMap
+import org.apache.spark.util.collection.BitSet
+
+private[graphx] object VertexPartition {
+
+  def apply[VD: ClassTag](iter: Iterator[(VertexId, VD)]): VertexPartition[VD] = {
+    val map = new PrimitiveKeyOpenHashMap[VertexId, VD]
+    iter.foreach { case (k, v) =>
+      map(k) = v
+    }
+    new VertexPartition(map.keySet, map._values, map.keySet.getBitSet)
+  }
+
+  def apply[VD: ClassTag](iter: Iterator[(VertexId, VD)], mergeFunc: (VD, VD) => VD)
+    : VertexPartition[VD] =
+  {
+    val map = new PrimitiveKeyOpenHashMap[VertexId, VD]
+    iter.foreach { case (k, v) =>
+      map.setMerge(k, v, mergeFunc)
+    }
+    new VertexPartition(map.keySet, map._values, map.keySet.getBitSet)
+  }
+}
+
+
+private[graphx]
+class VertexPartition[@specialized(Long, Int, Double) VD: ClassTag](
+    val index: VertexIdToIndexMap,
+    val values: Array[VD],
+    val mask: BitSet,
+    /** A set of vids of active vertices. May contain vids not in index due to join rewrite. */
+    private val activeSet: Option[VertexSet] = None)
+  extends Logging {
+
+  val capacity: Int = index.capacity
+
+  def size: Int = mask.cardinality()
+
+  /** Return the vertex attribute for the given vertex ID. */
+  def apply(vid: VertexId): VD = values(index.getPos(vid))
+
+  def isDefined(vid: VertexId): Boolean = {
+    val pos = index.getPos(vid)
+    pos >= 0 && mask.get(pos)
+  }
+
+  /** Look up vid in activeSet, throwing an exception if it is None. */
+  def isActive(vid: VertexId): Boolean = {
+    activeSet.get.contains(vid)
+  }
+
+  /** The number of active vertices, if any exist. */
+  def numActives: Option[Int] = activeSet.map(_.size)
+
+  /**
+   * Pass each vertex attribute along with the vertex id through a map
+   * function and retain the original RDD's partitioning and index.
+   *
+   * @tparam VD2 the type returned by the map function
+   *
+   * @param f the function applied to each vertex id and vertex
+   * attribute in the RDD
+   *
+   * @return a new VertexPartition with values obtained by applying `f` to
+   * each of the entries in the original VertexRDD.  The resulting
+   * VertexPartition retains the same index.
+   */
+  def map[VD2: ClassTag](f: (VertexId, VD) => VD2): VertexPartition[VD2] = {
+    // Construct a view of the map transformation
+    val newValues = new Array[VD2](capacity)
+    var i = mask.nextSetBit(0)
+    while (i >= 0) {
+      newValues(i) = f(index.getValue(i), values(i))
+      i = mask.nextSetBit(i + 1)
+    }
+    new VertexPartition[VD2](index, newValues, mask)
+  }
+
+  /**
+   * Restrict the vertex set to the set of vertices satisfying the given predicate.
+   *
+   * @param pred the user defined predicate
+   *
+   * @note The vertex set preserves the original index structure which means that the returned
+   *       RDD can be easily joined with the original vertex-set. Furthermore, the filter only
+   *       modifies the bitmap index and so no new values are allocated.
+   */
+  def filter(pred: (VertexId, VD) => Boolean): VertexPartition[VD] = {
+    // Allocate the array to store the results into
+    val newMask = new BitSet(capacity)
+    // Iterate over the active bits in the old mask and evaluate the predicate
+    var i = mask.nextSetBit(0)
+    while (i >= 0) {
+      if (pred(index.getValue(i), values(i))) {
+        newMask.set(i)
+      }
+      i = mask.nextSetBit(i + 1)
+    }
+    new VertexPartition(index, values, newMask)
+  }
+
+  /**
+   * Hides vertices that are the same between this and other. For vertices that are different, keeps
+   * the values from `other`. The indices of `this` and `other` must be the same.
+   */
+  def diff(other: VertexPartition[VD]): VertexPartition[VD] = {
+    if (index != other.index) {
+      logWarning("Diffing two VertexPartitions with different indexes is slow.")
+      diff(createUsingIndex(other.iterator))
+    } else {
+      val newMask = mask & other.mask
+      var i = newMask.nextSetBit(0)
+      while (i >= 0) {
+        if (values(i) == other.values(i)) {
+          newMask.unset(i)
+        }
+        i = newMask.nextSetBit(i + 1)
+      }
+      new VertexPartition(index, other.values, newMask)
+    }
+  }
+
+  /** Left outer join another VertexPartition. */
+  def leftJoin[VD2: ClassTag, VD3: ClassTag]
+      (other: VertexPartition[VD2])
+      (f: (VertexId, VD, Option[VD2]) => VD3): VertexPartition[VD3] = {
+    if (index != other.index) {
+      logWarning("Joining two VertexPartitions with different indexes is slow.")
+      leftJoin(createUsingIndex(other.iterator))(f)
+    } else {
+      val newValues = new Array[VD3](capacity)
+
+      var i = mask.nextSetBit(0)
+      while (i >= 0) {
+        val otherV: Option[VD2] = if (other.mask.get(i)) Some(other.values(i)) else None
+        newValues(i) = f(index.getValue(i), values(i), otherV)
+        i = mask.nextSetBit(i + 1)
+      }
+      new VertexPartition(index, newValues, mask)
+    }
+  }
+
+  /** Left outer join another iterator of messages. */
+  def leftJoin[VD2: ClassTag, VD3: ClassTag]
+      (other: Iterator[(VertexId, VD2)])
+      (f: (VertexId, VD, Option[VD2]) => VD3): VertexPartition[VD3] = {
+    leftJoin(createUsingIndex(other))(f)
+  }
+
+  /** Inner join another VertexPartition. */
+  def innerJoin[U: ClassTag, VD2: ClassTag](other: VertexPartition[U])
+      (f: (VertexId, VD, U) => VD2): VertexPartition[VD2] = {
+    if (index != other.index) {
+      logWarning("Joining two VertexPartitions with different indexes is slow.")
+      innerJoin(createUsingIndex(other.iterator))(f)
+    } else {
+      val newMask = mask & other.mask
+      val newValues = new Array[VD2](capacity)
+      var i = newMask.nextSetBit(0)
+      while (i >= 0) {
+        newValues(i) = f(index.getValue(i), values(i), other.values(i))
+        i = newMask.nextSetBit(i + 1)
+      }
+      new VertexPartition(index, newValues, newMask)
+    }
+  }
+
+  /**
+   * Inner join an iterator of messages.
+   */
+  def innerJoin[U: ClassTag, VD2: ClassTag]
+      (iter: Iterator[Product2[VertexId, U]])
+      (f: (VertexId, VD, U) => VD2): VertexPartition[VD2] = {
+    innerJoin(createUsingIndex(iter))(f)
+  }
+
+  /**
+   * Similar effect as aggregateUsingIndex((a, b) => a)
+   */
+  def createUsingIndex[VD2: ClassTag](iter: Iterator[Product2[VertexId, VD2]])
+    : VertexPartition[VD2] = {
+    val newMask = new BitSet(capacity)
+    val newValues = new Array[VD2](capacity)
+    iter.foreach { case (vid, vdata) =>
+      val pos = index.getPos(vid)
+      if (pos >= 0) {
+        newMask.set(pos)
+        newValues(pos) = vdata
+      }
+    }
+    new VertexPartition[VD2](index, newValues, newMask)
+  }
+
+  /**
+   * Similar to innerJoin, but vertices from the left side that don't appear in iter will remain in
+   * the partition, hidden by the bitmask.
+   */
+  def innerJoinKeepLeft(iter: Iterator[Product2[VertexId, VD]]): VertexPartition[VD] = {
+    val newMask = new BitSet(capacity)
+    val newValues = new Array[VD](capacity)
+    System.arraycopy(values, 0, newValues, 0, newValues.length)
+    iter.foreach { case (vid, vdata) =>
+      val pos = index.getPos(vid)
+      if (pos >= 0) {
+        newMask.set(pos)
+        newValues(pos) = vdata
+      }
+    }
+    new VertexPartition(index, newValues, newMask)
+  }
+
+  def aggregateUsingIndex[VD2: ClassTag](
+      iter: Iterator[Product2[VertexId, VD2]],
+      reduceFunc: (VD2, VD2) => VD2): VertexPartition[VD2] = {
+    val newMask = new BitSet(capacity)
+    val newValues = new Array[VD2](capacity)
+    iter.foreach { product =>
+      val vid = product._1
+      val vdata = product._2
+      val pos = index.getPos(vid)
+      if (pos >= 0) {
+        if (newMask.get(pos)) {
+          newValues(pos) = reduceFunc(newValues(pos), vdata)
+        } else { // otherwise just store the new value
+          newMask.set(pos)
+          newValues(pos) = vdata
+        }
+      }
+    }
+    new VertexPartition[VD2](index, newValues, newMask)
+  }
+
+  def replaceActives(iter: Iterator[VertexId]): VertexPartition[VD] = {
+    val newActiveSet = new VertexSet
+    iter.foreach(newActiveSet.add(_))
+    new VertexPartition(index, values, mask, Some(newActiveSet))
+  }
+
+  /**
+   * Construct a new VertexPartition whose index contains only the vertices in the mask.
+   */
+  def reindex(): VertexPartition[VD] = {
+    val hashMap = new PrimitiveKeyOpenHashMap[VertexId, VD]
+    val arbitraryMerge = (a: VD, b: VD) => a
+    for ((k, v) <- this.iterator) {
+      hashMap.setMerge(k, v, arbitraryMerge)
+    }
+    new VertexPartition(hashMap.keySet, hashMap._values, hashMap.keySet.getBitSet)
+  }
+
+  def iterator: Iterator[(VertexId, VD)] =
+    mask.iterator.map(ind => (index.getValue(ind), values(ind)))
+
+  def vidIterator: Iterator[VertexId] = mask.iterator.map(ind => index.getValue(ind))
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/package.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/package.scala
new file mode 100644
index 0000000000000..79549fe060457
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/package.scala
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx
+
+import org.apache.spark.util.collection.OpenHashSet
+
+package object impl {
+  private[graphx] type VertexIdToIndexMap = OpenHashSet[VertexId]
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/Analytics.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/Analytics.scala
new file mode 100644
index 0000000000000..f914e0565ca21
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/Analytics.scala
@@ -0,0 +1,153 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx.lib
+
+import org.apache.spark._
+import org.apache.spark.graphx._
+import org.apache.spark.graphx.PartitionStrategy._
+
+/**
+ * Driver program for running graph algorithms.
+ */
+object Analytics extends Logging {
+
+  def main(args: Array[String]) = {
+    val host = args(0)
+    val taskType = args(1)
+    val fname = args(2)
+    val options =  args.drop(3).map { arg =>
+      arg.dropWhile(_ == '-').split('=') match {
+        case Array(opt, v) => (opt -> v)
+        case _ => throw new IllegalArgumentException("Invalid argument: " + arg)
+      }
+    }
+
+    def pickPartitioner(v: String): PartitionStrategy = {
+      // TODO: Use reflection rather than listing all the partitioning strategies here.
+      v match {
+        case "RandomVertexCut" => RandomVertexCut
+        case "EdgePartition1D" => EdgePartition1D
+        case "EdgePartition2D" => EdgePartition2D
+        case "CanonicalRandomVertexCut" => CanonicalRandomVertexCut
+        case _ => throw new IllegalArgumentException("Invalid PartitionStrategy: " + v)
+      }
+    }
+
+    val conf = new SparkConf()
+      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
+      .set("spark.kryo.registrator", "org.apache.spark.graphx.GraphKryoRegistrator")
+
+    taskType match {
+      case "pagerank" =>
+        var tol: Float = 0.001F
+        var outFname = ""
+        var numEPart = 4
+        var partitionStrategy: Option[PartitionStrategy] = None
+
+        options.foreach{
+          case ("tol", v) => tol = v.toFloat
+          case ("output", v) => outFname = v
+          case ("numEPart", v) => numEPart = v.toInt
+          case ("partStrategy", v) => partitionStrategy = Some(pickPartitioner(v))
+          case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
+        }
+
+        println("======================================")
+        println("|             PageRank               |")
+        println("======================================")
+
+        val sc = new SparkContext(host, "PageRank(" + fname + ")", conf)
+
+        val unpartitionedGraph = GraphLoader.edgeListFile(sc, fname,
+          minEdgePartitions = numEPart).cache()
+        val graph = partitionStrategy.foldLeft(unpartitionedGraph)(_.partitionBy(_))
+
+        println("GRAPHX: Number of vertices " + graph.vertices.count)
+        println("GRAPHX: Number of edges " + graph.edges.count)
+
+        val pr = graph.pageRank(tol).vertices.cache()
+
+        println("GRAPHX: Total rank: " + pr.map(_._2).reduce(_+_))
+
+        if (!outFname.isEmpty) {
+          logWarning("Saving pageranks of pages to " + outFname)
+          pr.map{case (id, r) => id + "\t" + r}.saveAsTextFile(outFname)
+        }
+
+        sc.stop()
+
+      case "cc" =>
+        var numIter = Int.MaxValue
+        var numVPart = 4
+        var numEPart = 4
+        var isDynamic = false
+        var partitionStrategy: Option[PartitionStrategy] = None
+
+        options.foreach{
+          case ("numIter", v) => numIter = v.toInt
+          case ("dynamic", v) => isDynamic = v.toBoolean
+          case ("numEPart", v) => numEPart = v.toInt
+          case ("numVPart", v) => numVPart = v.toInt
+          case ("partStrategy", v) => partitionStrategy = Some(pickPartitioner(v))
+          case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
+        }
+
+        if (!isDynamic && numIter == Int.MaxValue) {
+          println("Set number of iterations!")
+          sys.exit(1)
+        }
+        println("======================================")
+        println("|      Connected Components          |")
+        println("======================================")
+
+        val sc = new SparkContext(host, "ConnectedComponents(" + fname + ")", conf)
+        val unpartitionedGraph = GraphLoader.edgeListFile(sc, fname,
+          minEdgePartitions = numEPart).cache()
+        val graph = partitionStrategy.foldLeft(unpartitionedGraph)(_.partitionBy(_))
+
+        val cc = ConnectedComponents.run(graph)
+        println("Components: " + cc.vertices.map{ case (vid,data) => data}.distinct())
+        sc.stop()
+
+      case "triangles" =>
+        var numEPart = 4
+        // TriangleCount requires the graph to be partitioned
+        var partitionStrategy: PartitionStrategy = RandomVertexCut
+
+        options.foreach{
+          case ("numEPart", v) => numEPart = v.toInt
+          case ("partStrategy", v) => partitionStrategy = pickPartitioner(v)
+          case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
+        }
+        println("======================================")
+        println("|      Triangle Count                |")
+        println("======================================")
+        val sc = new SparkContext(host, "TriangleCount(" + fname + ")", conf)
+        val graph = GraphLoader.edgeListFile(sc, fname, canonicalOrientation = true,
+          minEdgePartitions = numEPart).partitionBy(partitionStrategy).cache()
+        val triangles = TriangleCount.run(graph)
+        println("Triangles: " + triangles.vertices.map {
+          case (vid,data) => data.toLong
+        }.reduce(_ + _) / 3)
+        sc.stop()
+
+      case _ =>
+        println("Invalid task type.")
+    }
+  }
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/ConnectedComponents.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/ConnectedComponents.scala
new file mode 100644
index 0000000000000..e2f6cc138958e
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/ConnectedComponents.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx.lib
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.graphx._
+
+/** Connected components algorithm. */
+object ConnectedComponents {
+  /**
+   * Compute the connected component membership of each vertex and return a graph with the vertex
+   * value containing the lowest vertex id in the connected component containing that vertex.
+   *
+   * @tparam VD the vertex attribute type (discarded in the computation)
+   * @tparam ED the edge attribute type (preserved in the computation)
+   *
+   * @param graph the graph for which to compute the connected components
+   *
+   * @return a graph with vertex attributes containing the smallest vertex in each
+   *         connected component
+   */
+  def run[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]): Graph[VertexId, ED] = {
+    val ccGraph = graph.mapVertices { case (vid, _) => vid }
+    def sendMessage(edge: EdgeTriplet[VertexId, ED]) = {
+      if (edge.srcAttr < edge.dstAttr) {
+        Iterator((edge.dstId, edge.srcAttr))
+      } else if (edge.srcAttr > edge.dstAttr) {
+        Iterator((edge.srcId, edge.dstAttr))
+      } else {
+        Iterator.empty
+      }
+    }
+    val initialMessage = Long.MaxValue
+    Pregel(ccGraph, initialMessage, activeDirection = EdgeDirection.Either)(
+      vprog = (id, attr, msg) => math.min(attr, msg),
+      sendMsg = sendMessage,
+      mergeMsg = (a, b) => math.min(a, b))
+  } // end of connectedComponents
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
new file mode 100644
index 0000000000000..614555a054dfb
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx.lib
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.Logging
+import org.apache.spark.graphx._
+
+/**
+ * PageRank algorithm implementation. There are two implementations of PageRank implemented.
+ *
+ * The first implementation uses the [[Pregel]] interface and runs PageRank for a fixed number
+ * of iterations:
+ * {{{
+ * var PR = Array.fill(n)( 1.0 )
+ * val oldPR = Array.fill(n)( 1.0 )
+ * for( iter <- 0 until numIter ) {
+ *   swap(oldPR, PR)
+ *   for( i <- 0 until n ) {
+ *     PR[i] = alpha + (1 - alpha) * inNbrs[i].map(j => oldPR[j] / outDeg[j]).sum
+ *   }
+ * }
+ * }}}
+ *
+ * The second implementation uses the standalone [[Graph]] interface and runs PageRank until
+ * convergence:
+ *
+ * {{{
+ * var PR = Array.fill(n)( 1.0 )
+ * val oldPR = Array.fill(n)( 0.0 )
+ * while( max(abs(PR - oldPr)) > tol ) {
+ *   swap(oldPR, PR)
+ *   for( i <- 0 until n if abs(PR[i] - oldPR[i]) > tol ) {
+ *     PR[i] = alpha + (1 - \alpha) * inNbrs[i].map(j => oldPR[j] / outDeg[j]).sum
+ *   }
+ * }
+ * }}}
+ *
+ * `alpha` is the random reset probability (typically 0.15), `inNbrs[i]` is the set of
+ * neighbors whick link to `i` and `outDeg[j]` is the out degree of vertex `j`.
+ *
+ * Note that this is not the "normalized" PageRank and as a consequence pages that have no
+ * inlinks will have a PageRank of alpha.
+ */
+object PageRank extends Logging {
+
+  /**
+   * Run PageRank for a fixed number of iterations returning a graph
+   * with vertex attributes containing the PageRank and edge
+   * attributes the normalized edge weight.
+   *
+   * @tparam VD the original vertex attribute (not used)
+   * @tparam ED the original edge attribute (not used)
+   *
+   * @param graph the graph on which to compute PageRank
+   * @param numIter the number of iterations of PageRank to run
+   * @param resetProb the random reset probability (alpha)
+   *
+   * @return the graph containing with each vertex containing the PageRank and each edge
+   *         containing the normalized weight.
+   *
+   */
+  def run[VD: ClassTag, ED: ClassTag](
+      graph: Graph[VD, ED], numIter: Int, resetProb: Double = 0.15): Graph[Double, Double] =
+  {
+    // Initialize the pagerankGraph with each edge attribute having
+    // weight 1/outDegree and each vertex with attribute 1.0.
+    val pagerankGraph: Graph[Double, Double] = graph
+      // Associate the degree with each vertex
+      .outerJoinVertices(graph.outDegrees) { (vid, vdata, deg) => deg.getOrElse(0) }
+      // Set the weight on the edges based on the degree
+      .mapTriplets( e => 1.0 / e.srcAttr )
+      // Set the vertex attributes to the initial pagerank values
+      .mapVertices( (id, attr) => 1.0 )
+      .cache()
+
+    // Define the three functions needed to implement PageRank in the GraphX
+    // version of Pregel
+    def vertexProgram(id: VertexId, attr: Double, msgSum: Double): Double =
+      resetProb + (1.0 - resetProb) * msgSum
+    def sendMessage(edge: EdgeTriplet[Double, Double]) =
+      Iterator((edge.dstId, edge.srcAttr * edge.attr))
+    def messageCombiner(a: Double, b: Double): Double = a + b
+    // The initial message received by all vertices in PageRank
+    val initialMessage = 0.0
+
+    // Execute pregel for a fixed number of iterations.
+    Pregel(pagerankGraph, initialMessage, numIter, activeDirection = EdgeDirection.Out)(
+      vertexProgram, sendMessage, messageCombiner)
+  }
+
+  /**
+   * Run a dynamic version of PageRank returning a graph with vertex attributes containing the
+   * PageRank and edge attributes containing the normalized edge weight.
+   *
+   * @tparam VD the original vertex attribute (not used)
+   * @tparam ED the original edge attribute (not used)
+   *
+   * @param graph the graph on which to compute PageRank
+   * @param tol the tolerance allowed at convergence (smaller => more accurate).
+   * @param resetProb the random reset probability (alpha)
+   *
+   * @return the graph containing with each vertex containing the PageRank and each edge
+   *         containing the normalized weight.
+   */
+  def runUntilConvergence[VD: ClassTag, ED: ClassTag](
+      graph: Graph[VD, ED], tol: Double, resetProb: Double = 0.15): Graph[Double, Double] =
+  {
+    // Initialize the pagerankGraph with each edge attribute
+    // having weight 1/outDegree and each vertex with attribute 1.0.
+    val pagerankGraph: Graph[(Double, Double), Double] = graph
+      // Associate the degree with each vertex
+      .outerJoinVertices(graph.outDegrees) {
+        (vid, vdata, deg) => deg.getOrElse(0)
+      }
+      // Set the weight on the edges based on the degree
+      .mapTriplets( e => 1.0 / e.srcAttr )
+      // Set the vertex attributes to (initalPR, delta = 0)
+      .mapVertices( (id, attr) => (0.0, 0.0) )
+      .cache()
+
+    // Define the three functions needed to implement PageRank in the GraphX
+    // version of Pregel
+    def vertexProgram(id: VertexId, attr: (Double, Double), msgSum: Double): (Double, Double) = {
+      val (oldPR, lastDelta) = attr
+      val newPR = oldPR + (1.0 - resetProb) * msgSum
+      (newPR, newPR - oldPR)
+    }
+
+    def sendMessage(edge: EdgeTriplet[(Double, Double), Double]) = {
+      if (edge.srcAttr._2 > tol) {
+        Iterator((edge.dstId, edge.srcAttr._2 * edge.attr))
+      } else {
+        Iterator.empty
+      }
+    }
+
+    def messageCombiner(a: Double, b: Double): Double = a + b
+
+    // The initial message received by all vertices in PageRank
+    val initialMessage = resetProb / (1.0 - resetProb)
+
+    // Execute a dynamic version of Pregel.
+    Pregel(pagerankGraph, initialMessage, activeDirection = EdgeDirection.Out)(
+      vertexProgram, sendMessage, messageCombiner)
+      .mapVertices((vid, attr) => attr._1)
+  } // end of deltaPageRank
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
new file mode 100644
index 0000000000000..79280f836f21d
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx.lib
+
+import scala.util.Random
+import org.apache.commons.math3.linear._
+import org.apache.spark.rdd._
+import org.apache.spark.graphx._
+
+/** Implementation of SVD++ algorithm. */
+object SVDPlusPlus {
+
+  /** Configuration parameters for SVDPlusPlus. */
+  class Conf(
+      var rank: Int,
+      var maxIters: Int,
+      var minVal: Double,
+      var maxVal: Double,
+      var gamma1: Double,
+      var gamma2: Double,
+      var gamma6: Double,
+      var gamma7: Double)
+    extends Serializable
+
+  /**
+   * Implement SVD++ based on "Factorization Meets the Neighborhood:
+   * a Multifaceted Collaborative Filtering Model",
+   * available at [[http://public.research.att.com/~volinsky/netflix/kdd08koren.pdf]].
+   *
+   * The prediction rule is rui = u + bu + bi + qi*(pu + |N(u)|^(-0.5)*sum(y)),
+   * see the details on page 6.
+   *
+   * @param edges edges for constructing the graph
+   *
+   * @param conf SVDPlusPlus parameters
+   *
+   * @return a graph with vertex attributes containing the trained model
+   */
+  def run(edges: RDD[Edge[Double]], conf: Conf)
+    : (Graph[(RealVector, RealVector, Double, Double), Double], Double) =
+  {
+    // Generate default vertex attribute
+    def defaultF(rank: Int): (RealVector, RealVector, Double, Double) = {
+      val v1 = new ArrayRealVector(rank)
+      val v2 = new ArrayRealVector(rank)
+      for (i <- 0 until rank) {
+        v1.setEntry(i, Random.nextDouble())
+        v2.setEntry(i, Random.nextDouble())
+      }
+      (v1, v2, 0.0, 0.0)
+    }
+
+    // calculate global rating mean
+    edges.cache()
+    val (rs, rc) = edges.map(e => (e.attr, 1L)).reduce((a, b) => (a._1 + b._1, a._2 + b._2))
+    val u = rs / rc
+
+    // construct graph
+    var g = Graph.fromEdges(edges, defaultF(conf.rank)).cache()
+
+    // Calculate initial bias and norm
+    val t0 = g.mapReduceTriplets(
+      et => Iterator((et.srcId, (1L, et.attr)), (et.dstId, (1L, et.attr))),
+      (g1: (Long, Double), g2: (Long, Double)) => (g1._1 + g2._1, g1._2 + g2._2))
+
+    g = g.outerJoinVertices(t0) {
+      (vid: VertexId, vd: (RealVector, RealVector, Double, Double), msg: Option[(Long, Double)]) =>
+        (vd._1, vd._2, msg.get._2 / msg.get._1, 1.0 / scala.math.sqrt(msg.get._1))
+    }
+
+    def mapTrainF(conf: Conf, u: Double)
+        (et: EdgeTriplet[(RealVector, RealVector, Double, Double), Double])
+      : Iterator[(VertexId, (RealVector, RealVector, Double))] = {
+      val (usr, itm) = (et.srcAttr, et.dstAttr)
+      val (p, q) = (usr._1, itm._1)
+      var pred = u + usr._3 + itm._3 + q.dotProduct(usr._2)
+      pred = math.max(pred, conf.minVal)
+      pred = math.min(pred, conf.maxVal)
+      val err = et.attr - pred
+      val updateP = q.mapMultiply(err)
+        .subtract(p.mapMultiply(conf.gamma7))
+        .mapMultiply(conf.gamma2)
+      val updateQ = usr._2.mapMultiply(err)
+        .subtract(q.mapMultiply(conf.gamma7))
+        .mapMultiply(conf.gamma2)
+      val updateY = q.mapMultiply(err * usr._4)
+        .subtract(itm._2.mapMultiply(conf.gamma7))
+        .mapMultiply(conf.gamma2)
+      Iterator((et.srcId, (updateP, updateY, (err - conf.gamma6 * usr._3) * conf.gamma1)),
+        (et.dstId, (updateQ, updateY, (err - conf.gamma6 * itm._3) * conf.gamma1)))
+    }
+
+    for (i <- 0 until conf.maxIters) {
+      // Phase 1, calculate pu + |N(u)|^(-0.5)*sum(y) for user nodes
+      g.cache()
+      val t1 = g.mapReduceTriplets(
+        et => Iterator((et.srcId, et.dstAttr._2)),
+        (g1: RealVector, g2: RealVector) => g1.add(g2))
+      g = g.outerJoinVertices(t1) {
+        (vid: VertexId, vd: (RealVector, RealVector, Double, Double), msg: Option[RealVector]) =>
+          if (msg.isDefined) (vd._1, vd._1.add(msg.get.mapMultiply(vd._4)), vd._3, vd._4) else vd
+      }
+
+      // Phase 2, update p for user nodes and q, y for item nodes
+      g.cache()
+      val t2 = g.mapReduceTriplets(
+        mapTrainF(conf, u),
+        (g1: (RealVector, RealVector, Double), g2: (RealVector, RealVector, Double)) =>
+          (g1._1.add(g2._1), g1._2.add(g2._2), g1._3 + g2._3))
+      g = g.outerJoinVertices(t2) {
+        (vid: VertexId,
+         vd: (RealVector, RealVector, Double, Double),
+         msg: Option[(RealVector, RealVector, Double)]) =>
+          (vd._1.add(msg.get._1), vd._2.add(msg.get._2), vd._3 + msg.get._3, vd._4)
+      }
+    }
+
+    // calculate error on training set
+    def mapTestF(conf: Conf, u: Double)
+        (et: EdgeTriplet[(RealVector, RealVector, Double, Double), Double])
+      : Iterator[(VertexId, Double)] =
+    {
+      val (usr, itm) = (et.srcAttr, et.dstAttr)
+      val (p, q) = (usr._1, itm._1)
+      var pred = u + usr._3 + itm._3 + q.dotProduct(usr._2)
+      pred = math.max(pred, conf.minVal)
+      pred = math.min(pred, conf.maxVal)
+      val err = (et.attr - pred) * (et.attr - pred)
+      Iterator((et.dstId, err))
+    }
+    g.cache()
+    val t3 = g.mapReduceTriplets(mapTestF(conf, u), (g1: Double, g2: Double) => g1 + g2)
+    g = g.outerJoinVertices(t3) {
+      (vid: VertexId, vd: (RealVector, RealVector, Double, Double), msg: Option[Double]) =>
+        if (msg.isDefined) (vd._1, vd._2, vd._3, msg.get) else vd
+    }
+
+    (g, u)
+  }
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/StronglyConnectedComponents.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/StronglyConnectedComponents.scala
new file mode 100644
index 0000000000000..46da38eeb725a
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/StronglyConnectedComponents.scala
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx.lib
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.graphx._
+
+/** Strongly connected components algorithm implementation. */
+object StronglyConnectedComponents {
+
+  /**
+   * Compute the strongly connected component (SCC) of each vertex and return a graph with the
+   * vertex value containing the lowest vertex id in the SCC containing that vertex.
+   *
+   * @tparam VD the vertex attribute type (discarded in the computation)
+   * @tparam ED the edge attribute type (preserved in the computation)
+   *
+   * @param graph the graph for which to compute the SCC
+   *
+   * @return a graph with vertex attributes containing the smallest vertex id in each SCC
+   */
+  def run[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED], numIter: Int): Graph[VertexId, ED] = {
+
+    // the graph we update with final SCC ids, and the graph we return at the end
+    var sccGraph = graph.mapVertices { case (vid, _) => vid }
+    // graph we are going to work with in our iterations
+    var sccWorkGraph = graph.mapVertices { case (vid, _) => (vid, false) }.cache()
+
+    var numVertices = sccWorkGraph.numVertices
+    var iter = 0
+    while (sccWorkGraph.numVertices > 0 && iter < numIter) {
+      iter += 1
+      do {
+        numVertices = sccWorkGraph.numVertices
+        sccWorkGraph = sccWorkGraph.outerJoinVertices(sccWorkGraph.outDegrees) {
+          (vid, data, degreeOpt) => if (degreeOpt.isDefined) data else (vid, true)
+        }.outerJoinVertices(sccWorkGraph.inDegrees) {
+          (vid, data, degreeOpt) => if (degreeOpt.isDefined) data else (vid, true)
+        }.cache()
+
+        // get all vertices to be removed
+        val finalVertices = sccWorkGraph.vertices
+            .filter { case (vid, (scc, isFinal)) => isFinal}
+            .mapValues { (vid, data) => data._1}
+
+        // write values to sccGraph
+        sccGraph = sccGraph.outerJoinVertices(finalVertices) {
+          (vid, scc, opt) => opt.getOrElse(scc)
+        }
+        // only keep vertices that are not final
+        sccWorkGraph = sccWorkGraph.subgraph(vpred = (vid, data) => !data._2).cache()
+      } while (sccWorkGraph.numVertices < numVertices)
+
+      sccWorkGraph = sccWorkGraph.mapVertices{ case (vid, (color, isFinal)) => (vid, isFinal) }
+
+      // collect min of all my neighbor's scc values, update if it's smaller than mine
+      // then notify any neighbors with scc values larger than mine
+      sccWorkGraph = Pregel[(VertexId, Boolean), ED, VertexId](
+        sccWorkGraph, Long.MaxValue, activeDirection = EdgeDirection.Out)(
+        (vid, myScc, neighborScc) => (math.min(myScc._1, neighborScc), myScc._2),
+        e => {
+          if (e.srcId < e.dstId) {
+            Iterator((e.dstId, e.srcAttr._1))
+          } else {
+            Iterator()
+          }
+        },
+        (vid1, vid2) => math.min(vid1, vid2))
+
+      // start at root of SCCs. Traverse values in reverse, notify all my neighbors
+      // do not propagate if colors do not match!
+      sccWorkGraph = Pregel[(VertexId, Boolean), ED, Boolean](
+        sccWorkGraph, false, activeDirection = EdgeDirection.In)(
+        // vertex is final if it is the root of a color
+        // or it has the same color as a neighbor that is final
+        (vid, myScc, existsSameColorFinalNeighbor) => {
+          val isColorRoot = vid == myScc._1
+          (myScc._1, myScc._2 || isColorRoot || existsSameColorFinalNeighbor)
+        },
+        // activate neighbor if they are not final, you are, and you have the same color
+        e => {
+          val sameColor = e.dstAttr._1 == e.srcAttr._1
+          val onlyDstIsFinal = e.dstAttr._2 && !e.srcAttr._2
+          if (sameColor && onlyDstIsFinal) {
+            Iterator((e.srcId, e.dstAttr._2))
+          } else {
+            Iterator()
+          }
+        },
+        (final1, final2) => final1 || final2)
+    }
+    sccGraph
+  }
+
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala
new file mode 100644
index 0000000000000..7c396e6e66a28
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx.lib
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.graphx._
+
+/**
+ * Compute the number of triangles passing through each vertex.
+ *
+ * The algorithm is relatively straightforward and can be computed in three steps:
+ *
+ * <ul>
+ * <li>Compute the set of neighbors for each vertex
+ * <li>For each edge compute the intersection of the sets and send the count to both vertices.
+ * <li> Compute the sum at each vertex and divide by two since each triangle is counted twice.
+ * </ul>
+ *
+ * Note that the input graph should have its edges in canonical direction
+ * (i.e. the `sourceId` less than `destId`). Also the graph must have been partitioned
+ * using [[org.apache.spark.graphx.Graph#partitionBy]].
+ */
+object TriangleCount {
+
+  def run[VD: ClassTag, ED: ClassTag](graph: Graph[VD,ED]): Graph[Int, ED] = {
+    // Remove redundant edges
+    val g = graph.groupEdges((a, b) => a).cache()
+
+    // Construct set representations of the neighborhoods
+    val nbrSets: VertexRDD[VertexSet] =
+      g.collectNeighborIds(EdgeDirection.Either).mapValues { (vid, nbrs) =>
+        val set = new VertexSet(4)
+        var i = 0
+        while (i < nbrs.size) {
+          // prevent self cycle
+          if(nbrs(i) != vid) {
+            set.add(nbrs(i))
+          }
+          i += 1
+        }
+        set
+      }
+    // join the sets with the graph
+    val setGraph: Graph[VertexSet, ED] = g.outerJoinVertices(nbrSets) {
+      (vid, _, optSet) => optSet.getOrElse(null)
+    }
+    // Edge function computes intersection of smaller vertex with larger vertex
+    def edgeFunc(et: EdgeTriplet[VertexSet, ED]): Iterator[(VertexId, Int)] = {
+      assert(et.srcAttr != null)
+      assert(et.dstAttr != null)
+      val (smallSet, largeSet) = if (et.srcAttr.size < et.dstAttr.size) {
+        (et.srcAttr, et.dstAttr)
+      } else {
+        (et.dstAttr, et.srcAttr)
+      }
+      val iter = smallSet.iterator
+      var counter: Int = 0
+      while (iter.hasNext) {
+        val vid = iter.next()
+        if (vid != et.srcId && vid != et.dstId && largeSet.contains(vid)) {
+          counter += 1
+        }
+      }
+      Iterator((et.srcId, counter), (et.dstId, counter))
+    }
+    // compute the intersection along edges
+    val counters: VertexRDD[Int] = setGraph.mapReduceTriplets(edgeFunc, _ + _)
+    // Merge counters with the graph and divide by two since each triangle is counted twice
+    g.outerJoinVertices(counters) {
+      (vid, _, optCounter: Option[Int]) =>
+        val dblCount = optCounter.getOrElse(0)
+        // double count should be even (divisible by two)
+        assert((dblCount & 1) == 0)
+        dblCount / 2
+    }
+  } // end of TriangleCount
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/package.scala b/graphx/src/main/scala/org/apache/spark/graphx/package.scala
new file mode 100644
index 0000000000000..425a5164cad24
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/package.scala
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import org.apache.spark.util.collection.OpenHashSet
+
+/** GraphX is a graph processing framework built on top of Spark. */
+package object graphx {
+  /**
+   * A 64-bit vertex identifier that uniquely identifies a vertex within a graph. It does not need
+   * to follow any ordering or any constraints other than uniqueness.
+   */
+  type VertexId = Long
+
+  /** Integer identifer of a graph partition. */
+  // TODO: Consider using Char.
+  type PartitionID = Int
+
+  private[graphx] type VertexSet = OpenHashSet[VertexId]
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/util/BytecodeUtils.scala b/graphx/src/main/scala/org/apache/spark/graphx/util/BytecodeUtils.scala
new file mode 100644
index 0000000000000..d1528e2f07cf2
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/util/BytecodeUtils.scala
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx.util
+
+import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
+
+import scala.collection.mutable.HashSet
+
+import org.apache.spark.util.Utils
+
+import org.objectweb.asm.{ClassReader, ClassVisitor, MethodVisitor}
+import org.objectweb.asm.Opcodes._
+
+
+/**
+ * Includes an utility function to test whether a function accesses a specific attribute
+ * of an object.
+ */
+private[graphx] object BytecodeUtils {
+
+  /**
+   * Test whether the given closure invokes the specified method in the specified class.
+   */
+  def invokedMethod(closure: AnyRef, targetClass: Class[_], targetMethod: String): Boolean = {
+    if (_invokedMethod(closure.getClass, "apply", targetClass, targetMethod)) {
+      true
+    } else {
+      // look at closures enclosed in this closure
+      for (f <- closure.getClass.getDeclaredFields
+           if f.getType.getName.startsWith("scala.Function")) {
+        f.setAccessible(true)
+        if (invokedMethod(f.get(closure), targetClass, targetMethod)) {
+          return true
+        }
+      }
+      return false
+    }
+  }
+
+  private def _invokedMethod(cls: Class[_], method: String,
+      targetClass: Class[_], targetMethod: String): Boolean = {
+
+    val seen = new HashSet[(Class[_], String)]
+    var stack = List[(Class[_], String)]((cls, method))
+
+    while (stack.nonEmpty) {
+      val (c, m) = stack.head
+      stack = stack.tail
+      seen.add((c, m))
+      val finder = new MethodInvocationFinder(c.getName, m)
+      getClassReader(c).accept(finder, 0)
+      for (classMethod <- finder.methodsInvoked) {
+        //println(classMethod)
+        if (classMethod._1 == targetClass && classMethod._2 == targetMethod) {
+          return true
+        } else if (!seen.contains(classMethod)) {
+          stack = classMethod :: stack
+        }
+      }
+    }
+    return false
+  }
+
+  /**
+   * Get an ASM class reader for a given class from the JAR that loaded it.
+   */
+  private def getClassReader(cls: Class[_]): ClassReader = {
+    // Copy data over, before delegating to ClassReader - else we can run out of open file handles.
+    val className = cls.getName.replaceFirst("^.*\\.", "") + ".class"
+    val resourceStream = cls.getResourceAsStream(className)
+    // todo: Fixme - continuing with earlier behavior ...
+    if (resourceStream == null) return new ClassReader(resourceStream)
+
+    val baos = new ByteArrayOutputStream(128)
+    Utils.copyStream(resourceStream, baos, true)
+    new ClassReader(new ByteArrayInputStream(baos.toByteArray))
+  }
+
+  /**
+   * Given the class name, return whether we should look into the class or not. This is used to
+   * skip examing a large quantity of Java or Scala classes that we know for sure wouldn't access
+   * the closures. Note that the class name is expected in ASM style (i.e. use "/" instead of ".").
+   */
+  private def skipClass(className: String): Boolean = {
+    val c = className
+    c.startsWith("java/") || c.startsWith("scala/") || c.startsWith("javax/")
+  }
+
+  /**
+   * Find the set of methods invoked by the specified method in the specified class.
+   * For example, after running the visitor,
+   *   MethodInvocationFinder("spark/graph/Foo", "test")
+   * its methodsInvoked variable will contain the set of methods invoked directly by
+   * Foo.test(). Interface invocations are not returned as part of the result set because we cannot
+   * determine the actual metod invoked by inspecting the bytecode.
+   */
+  private class MethodInvocationFinder(className: String, methodName: String)
+    extends ClassVisitor(ASM4) {
+
+    val methodsInvoked = new HashSet[(Class[_], String)]
+
+    override def visitMethod(access: Int, name: String, desc: String,
+                             sig: String, exceptions: Array[String]): MethodVisitor = {
+      if (name == methodName) {
+        new MethodVisitor(ASM4) {
+          override def visitMethodInsn(op: Int, owner: String, name: String, desc: String) {
+            if (op == INVOKEVIRTUAL || op == INVOKESPECIAL || op == INVOKESTATIC) {
+              if (!skipClass(owner)) {
+                methodsInvoked.add((Class.forName(owner.replace("/", ".")), name))
+              }
+            }
+          }
+        }
+      } else {
+        null
+      }
+    }
+  }
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala b/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala
new file mode 100644
index 0000000000000..7677641bfede6
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala
@@ -0,0 +1,235 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx.util
+
+import scala.annotation.tailrec
+import scala.math._
+import scala.reflect.ClassTag
+import scala.util._
+
+import org.apache.spark._
+import org.apache.spark.serializer._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
+import org.apache.spark.graphx._
+import org.apache.spark.graphx.Graph
+import org.apache.spark.graphx.Edge
+import org.apache.spark.graphx.impl.GraphImpl
+
+/** A collection of graph generating functions. */
+object GraphGenerators {
+
+  val RMATa = 0.45
+  val RMATb = 0.15
+  val RMATc = 0.15
+  val RMATd = 0.25
+
+  // Right now it just generates a bunch of edges where
+  // the edge data is the weight (default 1)
+  /**
+   * Generate a graph whose vertex out degree is log normal.
+   */
+  def logNormalGraph(sc: SparkContext, numVertices: Int): Graph[Int, Int] = {
+    // based on Pregel settings
+    val mu = 4
+    val sigma = 1.3
+
+    val vertices: RDD[(VertexId, Int)] = sc.parallelize(0 until numVertices).map{
+      src => (src, sampleLogNormal(mu, sigma, numVertices))
+    }
+    val edges = vertices.flatMap { v =>
+      generateRandomEdges(v._1.toInt, v._2, numVertices)
+    }
+    Graph(vertices, edges, 0)
+  }
+
+  def generateRandomEdges(src: Int, numEdges: Int, maxVertexId: Int): Array[Edge[Int]] = {
+    val rand = new Random()
+    Array.fill(maxVertexId) { Edge[Int](src, rand.nextInt(maxVertexId), 1) }
+  }
+
+  /**
+   * Randomly samples from a log normal distribution whose corresponding normal distribution has the
+   * the given mean and standard deviation. It uses the formula `X = exp(m+s*Z)` where `m`, `s` are
+   * the mean, standard deviation of the lognormal distribution and `Z ~ N(0, 1)`. In this function,
+   * `m = e^(mu+sigma^2/2)` and `s = sqrt[(e^(sigma^2) - 1)(e^(2*mu+sigma^2))]`.
+   *
+   * @param mu the mean of the normal distribution
+   * @param sigma the standard deviation of the normal distribution
+   * @param maxVal exclusive upper bound on the value of the sample
+   */
+  private def sampleLogNormal(mu: Double, sigma: Double, maxVal: Int): Int = {
+    val rand = new Random()
+    val m = math.exp(mu+(sigma*sigma)/2.0)
+    val s = math.sqrt((math.exp(sigma*sigma) - 1) * math.exp(2*mu + sigma*sigma))
+    // Z ~ N(0, 1)
+    var X: Double = maxVal
+
+    while (X >= maxVal) {
+      val Z = rand.nextGaussian()
+      X = math.exp(mu + sigma*Z)
+    }
+    math.round(X.toFloat)
+  }
+
+  /**
+   * A random graph generator using the R-MAT model, proposed in
+   * "R-MAT: A Recursive Model for Graph Mining" by Chakrabarti et al.
+   *
+   * See [[http://www.cs.cmu.edu/~christos/PUBLICATIONS/siam04.pdf]].
+   */
+  def rmatGraph(sc: SparkContext, requestedNumVertices: Int, numEdges: Int): Graph[Int, Int] = {
+    // let N = requestedNumVertices
+    // the number of vertices is 2^n where n=ceil(log2[N])
+    // This ensures that the 4 quadrants are the same size at all recursion levels
+    val numVertices = math.round(
+      math.pow(2.0, math.ceil(math.log(requestedNumVertices) / math.log(2.0)))).toInt
+    var edges: Set[Edge[Int]] = Set()
+    while (edges.size < numEdges) {
+      if (edges.size % 100 == 0) {
+        println(edges.size + " edges")
+      }
+      edges += addEdge(numVertices)
+    }
+    outDegreeFromEdges(sc.parallelize(edges.toList))
+  }
+
+  private def outDegreeFromEdges[ED: ClassTag](edges: RDD[Edge[ED]]): Graph[Int, ED] = {
+    val vertices = edges.flatMap { edge => List((edge.srcId, 1)) }
+      .reduceByKey(_ + _)
+      .map{ case (vid, degree) => (vid, degree) }
+    Graph(vertices, edges, 0)
+  }
+
+  /**
+   * @param numVertices Specifies the total number of vertices in the graph (used to get
+   * the dimensions of the adjacency matrix
+   */
+  private def addEdge(numVertices: Int): Edge[Int] = {
+    //val (src, dst) = chooseCell(numVertices/2.0, numVertices/2.0, numVertices/2.0)
+    val v = math.round(numVertices.toFloat/2.0).toInt
+
+    val (src, dst) = chooseCell(v, v, v)
+    Edge[Int](src, dst, 1)
+  }
+
+  /**
+   * This method recursively subdivides the the adjacency matrix into quadrants
+   * until it picks a single cell. The naming conventions in this paper match
+   * those of the R-MAT paper. There are a power of 2 number of nodes in the graph.
+   * The adjacency matrix looks like:
+   * <pre>
+   *
+   *          dst ->
+   * (x,y) ***************  _
+   *       |      |      |  |
+   *       |  a   |  b   |  |
+   *  src  |      |      |  |
+   *   |   ***************  | T
+   *  \|/  |      |      |  |
+   *       |   c  |   d  |  |
+   *       |      |      |  |
+   *       ***************  -
+   * </pre>
+   *
+   * where this represents the subquadrant of the adj matrix currently being
+   * subdivided. (x,y) represent the upper left hand corner of the subquadrant,
+   * and T represents the side length (guaranteed to be a power of 2).
+   *
+   * After choosing the next level subquadrant, we get the resulting sets
+   * of parameters:
+   * {{{
+   *    quad = a, x'=x, y'=y, T'=T/2
+   *    quad = b, x'=x+T/2, y'=y, T'=T/2
+   *    quad = c, x'=x, y'=y+T/2, T'=T/2
+   *    quad = d, x'=x+T/2, y'=y+T/2, T'=T/2
+   * }}}
+   */
+  @tailrec
+  private def chooseCell(x: Int, y: Int, t: Int): (Int, Int) = {
+    if (t <= 1) {
+      (x, y)
+    } else {
+      val newT = math.round(t.toFloat/2.0).toInt
+      pickQuadrant(RMATa, RMATb, RMATc, RMATd) match {
+        case 0 => chooseCell(x, y, newT)
+        case 1 => chooseCell(x+newT, y, newT)
+        case 2 => chooseCell(x, y+newT, newT)
+        case 3 => chooseCell(x+newT, y+newT, newT)
+      }
+    }
+  }
+
+  // TODO(crankshaw) turn result into an enum (or case class for pattern matching}
+  private def pickQuadrant(a: Double, b: Double, c: Double, d: Double): Int = {
+    if (a + b + c + d != 1.0) {
+      throw new IllegalArgumentException(
+        "R-MAT probability parameters sum to " + (a+b+c+d) + ", should sum to 1.0")
+    }
+    val rand = new Random()
+    val result = rand.nextDouble()
+    result match {
+      case x if x < a => 0 // 0 corresponds to quadrant a
+      case x if (x >= a && x < a + b) => 1 // 1 corresponds to b
+      case x if (x >= a + b && x < a + b + c) => 2 // 2 corresponds to c
+      case _ => 3 // 3 corresponds to d
+    }
+  }
+
+  /**
+   * Create `rows` by `cols` grid graph with each vertex connected to its
+   * row+1 and col+1 neighbors.  Vertex ids are assigned in row major
+   * order.
+   *
+   * @param sc the spark context in which to construct the graph
+   * @param rows the number of rows
+   * @param cols the number of columns
+   *
+   * @return A graph containing vertices with the row and column ids
+   * as their attributes and edge values as 1.0.
+   */
+  def gridGraph(sc: SparkContext, rows: Int, cols: Int): Graph[(Int,Int), Double] = {
+    // Convert row column address into vertex ids (row major order)
+    def sub2ind(r: Int, c: Int): VertexId = r * cols + c
+
+    val vertices: RDD[(VertexId, (Int,Int))] =
+      sc.parallelize(0 until rows).flatMap( r => (0 until cols).map( c => (sub2ind(r,c), (r,c)) ) )
+    val edges: RDD[Edge[Double]] =
+      vertices.flatMap{ case (vid, (r,c)) =>
+        (if (r+1 < rows) { Seq( (sub2ind(r, c), sub2ind(r+1, c))) } else { Seq.empty }) ++
+        (if (c+1 < cols) { Seq( (sub2ind(r, c), sub2ind(r, c+1))) } else { Seq.empty })
+      }.map{ case (src, dst) => Edge(src, dst, 1.0) }
+    Graph(vertices, edges)
+  } // end of gridGraph
+
+  /**
+   * Create a star graph with vertex 0 being the center.
+   *
+   * @param sc the spark context in which to construct the graph
+   * @param nverts the number of vertices in the star
+   *
+   * @return A star graph containing `nverts` vertices with vertex 0
+   * being the center vertex.
+   */
+  def starGraph(sc: SparkContext, nverts: Int): Graph[Int, Int] = {
+    val edges: RDD[(VertexId, VertexId)] = sc.parallelize(1 until nverts).map(vid => (vid, 0))
+    Graph.fromEdgeTuples(edges, 1)
+  } // end of starGraph
+
+} // end of Graph Generators
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/util/collection/PrimitiveKeyOpenHashMap.scala b/graphx/src/main/scala/org/apache/spark/graphx/util/collection/PrimitiveKeyOpenHashMap.scala
new file mode 100644
index 0000000000000..7b02e2ed1a9cb
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/util/collection/PrimitiveKeyOpenHashMap.scala
@@ -0,0 +1,153 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx.util.collection
+
+import org.apache.spark.util.collection.OpenHashSet
+
+import scala.reflect._
+
+/**
+ * A fast hash map implementation for primitive, non-null keys. This hash map supports
+ * insertions and updates, but not deletions. This map is about an order of magnitude
+ * faster than java.util.HashMap, while using much less space overhead.
+ *
+ * Under the hood, it uses our OpenHashSet implementation.
+ */
+private[graphx]
+class PrimitiveKeyOpenHashMap[@specialized(Long, Int) K: ClassTag,
+                              @specialized(Long, Int, Double) V: ClassTag](
+    val keySet: OpenHashSet[K], var _values: Array[V])
+  extends Iterable[(K, V)]
+  with Serializable {
+
+  /**
+   * Allocate an OpenHashMap with a fixed initial capacity
+   */
+  def this(initialCapacity: Int) =
+    this(new OpenHashSet[K](initialCapacity), new Array[V](initialCapacity))
+
+  /**
+   * Allocate an OpenHashMap with a default initial capacity, providing a true
+   * no-argument constructor.
+   */
+  def this() = this(64)
+
+  /**
+   * Allocate an OpenHashMap with a fixed initial capacity
+   */
+  def this(keySet: OpenHashSet[K]) = this(keySet, new Array[V](keySet.capacity))
+
+  require(classTag[K] == classTag[Long] || classTag[K] == classTag[Int])
+
+  private var _oldValues: Array[V] = null
+
+  override def size = keySet.size
+
+  /** Get the value for a given key */
+  def apply(k: K): V = {
+    val pos = keySet.getPos(k)
+    _values(pos)
+  }
+
+  /** Get the value for a given key, or returns elseValue if it doesn't exist. */
+  def getOrElse(k: K, elseValue: V): V = {
+    val pos = keySet.getPos(k)
+    if (pos >= 0) _values(pos) else elseValue
+  }
+
+  /** Set the value for a key */
+  def update(k: K, v: V) {
+    val pos = keySet.addWithoutResize(k) & OpenHashSet.POSITION_MASK
+    _values(pos) = v
+    keySet.rehashIfNeeded(k, grow, move)
+    _oldValues = null
+  }
+
+
+  /** Set the value for a key */
+  def setMerge(k: K, v: V, mergeF: (V, V) => V) {
+    val pos = keySet.addWithoutResize(k)
+    val ind = pos & OpenHashSet.POSITION_MASK
+    if ((pos & OpenHashSet.NONEXISTENCE_MASK) != 0) { // if first add
+      _values(ind) = v
+    } else {
+      _values(ind) = mergeF(_values(ind), v)
+    }
+    keySet.rehashIfNeeded(k, grow, move)
+    _oldValues = null
+  }
+
+
+  /**
+   * If the key doesn't exist yet in the hash map, set its value to defaultValue; otherwise,
+   * set its value to mergeValue(oldValue).
+   *
+   * @return the newly updated value.
+   */
+  def changeValue(k: K, defaultValue: => V, mergeValue: (V) => V): V = {
+    val pos = keySet.addWithoutResize(k)
+    if ((pos & OpenHashSet.NONEXISTENCE_MASK) != 0) {
+      val newValue = defaultValue
+      _values(pos & OpenHashSet.POSITION_MASK) = newValue
+      keySet.rehashIfNeeded(k, grow, move)
+      newValue
+    } else {
+      _values(pos) = mergeValue(_values(pos))
+      _values(pos)
+    }
+  }
+
+  override def iterator = new Iterator[(K, V)] {
+    var pos = 0
+    var nextPair: (K, V) = computeNextPair()
+
+    /** Get the next value we should return from next(), or null if we're finished iterating */
+    def computeNextPair(): (K, V) = {
+      pos = keySet.nextPos(pos)
+      if (pos >= 0) {
+        val ret = (keySet.getValue(pos), _values(pos))
+        pos += 1
+        ret
+      } else {
+        null
+      }
+    }
+
+    def hasNext = nextPair != null
+
+    def next() = {
+      val pair = nextPair
+      nextPair = computeNextPair()
+      pair
+    }
+  }
+
+  // The following member variables are declared as protected instead of private for the
+  // specialization to work (specialized class extends the unspecialized one and needs access
+  // to the "private" variables).
+  // They also should have been val's. We use var's because there is a Scala compiler bug that
+  // would throw illegal access error at runtime if they are declared as val's.
+  protected var grow = (newCapacity: Int) => {
+    _oldValues = _values
+    _values = new Array[V](newCapacity)
+  }
+
+  protected var move = (oldPos: Int, newPos: Int) => {
+    _values(newPos) = _oldValues(oldPos)
+  }
+}
diff --git a/graphx/src/test/resources/als-test.data b/graphx/src/test/resources/als-test.data
new file mode 100644
index 0000000000000..e476cc23e047d
--- /dev/null
+++ b/graphx/src/test/resources/als-test.data
@@ -0,0 +1,16 @@
+1,1,5.0
+1,2,1.0
+1,3,5.0
+1,4,1.0
+2,1,5.0
+2,2,1.0
+2,3,5.0
+2,4,1.0
+3,1,1.0
+3,2,5.0
+3,3,1.0
+3,4,5.0
+4,1,1.0
+4,2,5.0
+4,3,1.0
+4,4,5.0
diff --git a/graphx/src/test/resources/log4j.properties b/graphx/src/test/resources/log4j.properties
new file mode 100644
index 0000000000000..85e57f0c4b504
--- /dev/null
+++ b/graphx/src/test/resources/log4j.properties
@@ -0,0 +1,28 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Set everything to be logged to the file core/target/unit-tests.log
+log4j.rootCategory=INFO, file
+log4j.appender.file=org.apache.log4j.FileAppender
+log4j.appender.file.append=false
+log4j.appender.file.file=graphx/target/unit-tests.log
+log4j.appender.file.layout=org.apache.log4j.PatternLayout
+log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n
+
+# Ignore messages below warning level from Jetty, because it's a bit verbose
+log4j.logger.org.eclipse.jetty=WARN
+org.eclipse.jetty.LEVEL=WARN
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/GraphOpsSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/GraphOpsSuite.scala
new file mode 100644
index 0000000000000..bc2ad5677f806
--- /dev/null
+++ b/graphx/src/test/scala/org/apache/spark/graphx/GraphOpsSuite.scala
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx
+
+import org.apache.spark.SparkContext
+import org.apache.spark.graphx.Graph._
+import org.apache.spark.graphx.impl.EdgePartition
+import org.apache.spark.rdd._
+import org.scalatest.FunSuite
+
+class GraphOpsSuite extends FunSuite with LocalSparkContext {
+
+  test("joinVertices") {
+    withSpark { sc =>
+      val vertices =
+        sc.parallelize(Seq[(VertexId, String)]((1, "one"), (2, "two"), (3, "three")), 2)
+      val edges = sc.parallelize((Seq(Edge(1, 2, "onetwo"))))
+      val g: Graph[String, String] = Graph(vertices, edges)
+
+      val tbl = sc.parallelize(Seq[(VertexId, Int)]((1, 10), (2, 20)))
+      val g1 = g.joinVertices(tbl) { (vid: VertexId, attr: String, u: Int) => attr + u }
+
+      val v = g1.vertices.collect().toSet
+      assert(v === Set((1, "one10"), (2, "two20"), (3, "three")))
+    }
+  }
+
+  test("collectNeighborIds") {
+    withSpark { sc =>
+      val chain = (0 until 100).map(x => (x, (x+1)%100) )
+      val rawEdges = sc.parallelize(chain, 3).map { case (s,d) => (s.toLong, d.toLong) }
+      val graph = Graph.fromEdgeTuples(rawEdges, 1.0).cache()
+      val nbrs = graph.collectNeighborIds(EdgeDirection.Either).cache()
+      assert(nbrs.count === chain.size)
+      assert(graph.numVertices === nbrs.count)
+      nbrs.collect.foreach { case (vid, nbrs) => assert(nbrs.size === 2) }
+      nbrs.collect.foreach { case (vid, nbrs) =>
+        val s = nbrs.toSet
+        assert(s.contains((vid + 1) % 100))
+        assert(s.contains(if (vid > 0) vid - 1 else 99 ))
+      }
+    }
+  }
+
+  test ("filter") {
+    withSpark { sc =>
+      val n = 5
+      val vertices = sc.parallelize((0 to n).map(x => (x:VertexId, x)))
+      val edges = sc.parallelize((1 to n).map(x => Edge(0, x, x)))
+      val graph: Graph[Int, Int] = Graph(vertices, edges).cache()
+      val filteredGraph = graph.filter(
+        graph => {
+          val degrees: VertexRDD[Int] = graph.outDegrees
+          graph.outerJoinVertices(degrees) {(vid, data, deg) => deg.getOrElse(0)}
+        },
+        vpred = (vid: VertexId, deg:Int) => deg > 0
+      ).cache()
+
+      val v = filteredGraph.vertices.collect().toSet
+      assert(v === Set((0,0)))
+
+      // the map is necessary because of object-reuse in the edge iterator
+      val e = filteredGraph.edges.map(e => Edge(e.srcId, e.dstId, e.attr)).collect().toSet
+      assert(e.isEmpty)
+    }
+  }
+
+}
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala
new file mode 100644
index 0000000000000..28d34dd9a1a41
--- /dev/null
+++ b/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala
@@ -0,0 +1,290 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.SparkContext
+import org.apache.spark.graphx.Graph._
+import org.apache.spark.graphx.PartitionStrategy._
+import org.apache.spark.rdd._
+
+class GraphSuite extends FunSuite with LocalSparkContext {
+
+  def starGraph(sc: SparkContext, n: Int): Graph[String, Int] = {
+    Graph.fromEdgeTuples(sc.parallelize((1 to n).map(x => (0: VertexId, x: VertexId)), 3), "v")
+  }
+
+  test("Graph.fromEdgeTuples") {
+    withSpark { sc =>
+      val ring = (0L to 100L).zip((1L to 99L) :+ 0L)
+      val doubleRing = ring ++ ring
+      val graph = Graph.fromEdgeTuples(sc.parallelize(doubleRing), 1)
+      assert(graph.edges.count() === doubleRing.size)
+      assert(graph.edges.collect.forall(e => e.attr == 1))
+
+      // uniqueEdges option should uniquify edges and store duplicate count in edge attributes
+      val uniqueGraph = Graph.fromEdgeTuples(sc.parallelize(doubleRing), 1, Some(RandomVertexCut))
+      assert(uniqueGraph.edges.count() === ring.size)
+      assert(uniqueGraph.edges.collect.forall(e => e.attr == 2))
+    }
+  }
+
+  test("Graph.fromEdges") {
+    withSpark { sc =>
+      val ring = (0L to 100L).zip((1L to 99L) :+ 0L).map { case (a, b) => Edge(a, b, 1) }
+      val graph = Graph.fromEdges(sc.parallelize(ring), 1.0F)
+      assert(graph.edges.count() === ring.size)
+    }
+  }
+
+  test("Graph.apply") {
+    withSpark { sc =>
+      val rawEdges = (0L to 98L).zip((1L to 99L) :+ 0L)
+      val edges: RDD[Edge[Int]] = sc.parallelize(rawEdges).map { case (s, t) => Edge(s, t, 1) }
+      val vertices: RDD[(VertexId, Boolean)] = sc.parallelize((0L until 10L).map(id => (id, true)))
+      val graph = Graph(vertices, edges, false)
+      assert( graph.edges.count() === rawEdges.size )
+      // Vertices not explicitly provided but referenced by edges should be created automatically
+      assert( graph.vertices.count() === 100)
+      graph.triplets.map { et =>
+        assert((et.srcId < 10 && et.srcAttr) || (et.srcId >= 10 && !et.srcAttr))
+        assert((et.dstId < 10 && et.dstAttr) || (et.dstId >= 10 && !et.dstAttr))
+      }
+    }
+  }
+
+  test("triplets") {
+    withSpark { sc =>
+      val n = 5
+      val star = starGraph(sc, n)
+      assert(star.triplets.map(et => (et.srcId, et.dstId, et.srcAttr, et.dstAttr)).collect.toSet ===
+        (1 to n).map(x => (0: VertexId, x: VertexId, "v", "v")).toSet)
+    }
+  }
+
+  test("partitionBy") {
+    withSpark { sc =>
+      def mkGraph(edges: List[(Long, Long)]) = Graph.fromEdgeTuples(sc.parallelize(edges, 2), 0)
+      def nonemptyParts(graph: Graph[Int, Int]) = {
+        graph.edges.partitionsRDD.mapPartitions { iter =>
+          Iterator(iter.next()._2.iterator.toList)
+        }.filter(_.nonEmpty)
+      }
+      val identicalEdges = List((0L, 1L), (0L, 1L))
+      val canonicalEdges = List((0L, 1L), (1L, 0L))
+      val sameSrcEdges = List((0L, 1L), (0L, 2L))
+
+      // The two edges start out in different partitions
+      for (edges <- List(identicalEdges, canonicalEdges, sameSrcEdges)) {
+        assert(nonemptyParts(mkGraph(edges)).count === 2)
+      }
+      // partitionBy(RandomVertexCut) puts identical edges in the same partition
+      assert(nonemptyParts(mkGraph(identicalEdges).partitionBy(RandomVertexCut)).count === 1)
+      // partitionBy(EdgePartition1D) puts same-source edges in the same partition
+      assert(nonemptyParts(mkGraph(sameSrcEdges).partitionBy(EdgePartition1D)).count === 1)
+      // partitionBy(CanonicalRandomVertexCut) puts edges that are identical modulo direction into
+      // the same partition
+      assert(nonemptyParts(mkGraph(canonicalEdges).partitionBy(CanonicalRandomVertexCut)).count === 1)
+      // partitionBy(EdgePartition2D) puts identical edges in the same partition
+      assert(nonemptyParts(mkGraph(identicalEdges).partitionBy(EdgePartition2D)).count === 1)
+
+      // partitionBy(EdgePartition2D) ensures that vertices need only be replicated to 2 * sqrt(p)
+      // partitions
+      val n = 100
+      val p = 100
+      val verts = 1 to n
+      val graph = Graph.fromEdgeTuples(sc.parallelize(verts.flatMap(x =>
+        verts.filter(y => y % x == 0).map(y => (x: VertexId, y: VertexId))), p), 0)
+      assert(graph.edges.partitions.length === p)
+      val partitionedGraph = graph.partitionBy(EdgePartition2D)
+      assert(graph.edges.partitions.length === p)
+      val bound = 2 * math.sqrt(p)
+      // Each vertex should be replicated to at most 2 * sqrt(p) partitions
+      val partitionSets = partitionedGraph.edges.partitionsRDD.mapPartitions { iter =>
+        val part = iter.next()._2
+        Iterator((part.srcIds ++ part.dstIds).toSet)
+      }.collect
+      assert(verts.forall(id => partitionSets.count(_.contains(id)) <= bound))
+      // This should not be true for the default hash partitioning
+      val partitionSetsUnpartitioned = graph.edges.partitionsRDD.mapPartitions { iter =>
+        val part = iter.next()._2
+        Iterator((part.srcIds ++ part.dstIds).toSet)
+      }.collect
+      assert(verts.exists(id => partitionSetsUnpartitioned.count(_.contains(id)) > bound))
+    }
+  }
+
+  test("mapVertices") {
+    withSpark { sc =>
+      val n = 5
+      val star = starGraph(sc, n)
+      // mapVertices preserving type
+      val mappedVAttrs = star.mapVertices((vid, attr) => attr + "2")
+      assert(mappedVAttrs.vertices.collect.toSet === (0 to n).map(x => (x: VertexId, "v2")).toSet)
+      // mapVertices changing type
+      val mappedVAttrs2 = star.mapVertices((vid, attr) => attr.length)
+      assert(mappedVAttrs2.vertices.collect.toSet === (0 to n).map(x => (x: VertexId, 1)).toSet)
+    }
+  }
+
+  test("mapEdges") {
+    withSpark { sc =>
+      val n = 3
+      val star = starGraph(sc, n)
+      val starWithEdgeAttrs = star.mapEdges(e => e.dstId)
+
+      val edges = starWithEdgeAttrs.edges.collect()
+      assert(edges.size === n)
+      assert(edges.toSet === (1 to n).map(x => Edge(0, x, x)).toSet)
+    }
+  }
+
+  test("mapTriplets") {
+    withSpark { sc =>
+      val n = 5
+      val star = starGraph(sc, n)
+      assert(star.mapTriplets(et => et.srcAttr + et.dstAttr).edges.collect.toSet ===
+        (1L to n).map(x => Edge(0, x, "vv")).toSet)
+    }
+  }
+
+  test("reverse") {
+    withSpark { sc =>
+      val n = 5
+      val star = starGraph(sc, n)
+      assert(star.reverse.outDegrees.collect.toSet === (1 to n).map(x => (x: VertexId, 1)).toSet)
+    }
+  }
+
+  test("subgraph") {
+    withSpark { sc =>
+      // Create a star graph of 10 veritces.
+      val n = 10
+      val star = starGraph(sc, n)
+      // Take only vertices whose vids are even
+      val subgraph = star.subgraph(vpred = (vid, attr) => vid % 2 == 0)
+
+      // We should have 5 vertices.
+      assert(subgraph.vertices.collect().toSet === (0 to n by 2).map(x => (x, "v")).toSet)
+
+      // And 4 edges.
+      assert(subgraph.edges.map(_.copy()).collect().toSet === (2 to n by 2).map(x => Edge(0, x, 1)).toSet)
+    }
+  }
+
+  test("mask") {
+    withSpark { sc =>
+      val n = 5
+      val vertices = sc.parallelize((0 to n).map(x => (x:VertexId, x)))
+      val edges = sc.parallelize((1 to n).map(x => Edge(0, x, x)))
+      val graph: Graph[Int, Int] = Graph(vertices, edges).cache()
+
+      val subgraph = graph.subgraph(
+        e => e.dstId != 4L,
+        (vid, vdata) => vid != 3L
+      ).mapVertices((vid, vdata) => -1).mapEdges(e => -1)
+
+      val projectedGraph = graph.mask(subgraph)
+
+      val v = projectedGraph.vertices.collect().toSet
+      assert(v === Set((0,0), (1,1), (2,2), (4,4), (5,5)))
+
+      // the map is necessary because of object-reuse in the edge iterator
+      val e = projectedGraph.edges.map(e => Edge(e.srcId, e.dstId, e.attr)).collect().toSet
+      assert(e === Set(Edge(0,1,1), Edge(0,2,2), Edge(0,5,5)))
+
+    }
+  }
+
+  test("groupEdges") {
+    withSpark { sc =>
+      val n = 5
+      val star = starGraph(sc, n)
+      val doubleStar = Graph.fromEdgeTuples(
+        sc.parallelize((1 to n).flatMap(x =>
+          List((0: VertexId, x: VertexId), (0: VertexId, x: VertexId))), 1), "v")
+      val star2 = doubleStar.groupEdges { (a, b) => a}
+      assert(star2.edges.collect.toArray.sorted(Edge.lexicographicOrdering[Int]) ===
+        star.edges.collect.toArray.sorted(Edge.lexicographicOrdering[Int]))
+      assert(star2.vertices.collect.toSet === star.vertices.collect.toSet)
+    }
+  }
+
+  test("mapReduceTriplets") {
+    withSpark { sc =>
+      val n = 5
+      val star = starGraph(sc, n).mapVertices { (_, _) => 0 }.cache()
+      val starDeg = star.joinVertices(star.degrees){ (vid, oldV, deg) => deg }
+      val neighborDegreeSums = starDeg.mapReduceTriplets(
+        edge => Iterator((edge.srcId, edge.dstAttr), (edge.dstId, edge.srcAttr)),
+        (a: Int, b: Int) => a + b)
+      assert(neighborDegreeSums.collect().toSet === (0 to n).map(x => (x, n)).toSet)
+
+      // activeSetOpt
+      val allPairs = for (x <- 1 to n; y <- 1 to n) yield (x: VertexId, y: VertexId)
+      val complete = Graph.fromEdgeTuples(sc.parallelize(allPairs, 3), 0)
+      val vids = complete.mapVertices((vid, attr) => vid).cache()
+      val active = vids.vertices.filter { case (vid, attr) => attr % 2 == 0 }
+      val numEvenNeighbors = vids.mapReduceTriplets(et => {
+        // Map function should only run on edges with destination in the active set
+        if (et.dstId % 2 != 0) {
+          throw new Exception("map ran on edge with dst vid %d, which is odd".format(et.dstId))
+        }
+        Iterator((et.srcId, 1))
+      }, (a: Int, b: Int) => a + b, Some((active, EdgeDirection.In))).collect.toSet
+      assert(numEvenNeighbors === (1 to n).map(x => (x: VertexId, n / 2)).toSet)
+
+      // outerJoinVertices followed by mapReduceTriplets(activeSetOpt)
+      val ringEdges = sc.parallelize((0 until n).map(x => (x: VertexId, (x+1) % n: VertexId)), 3)
+      val ring = Graph.fromEdgeTuples(ringEdges, 0) .mapVertices((vid, attr) => vid).cache()
+      val changed = ring.vertices.filter { case (vid, attr) => attr % 2 == 1 }.mapValues(-_).cache()
+      val changedGraph = ring.outerJoinVertices(changed) { (vid, old, newOpt) => newOpt.getOrElse(old) }
+      val numOddNeighbors = changedGraph.mapReduceTriplets(et => {
+        // Map function should only run on edges with source in the active set
+        if (et.srcId % 2 != 1) {
+          throw new Exception("map ran on edge with src vid %d, which is even".format(et.dstId))
+        }
+        Iterator((et.dstId, 1))
+      }, (a: Int, b: Int) => a + b, Some(changed, EdgeDirection.Out)).collect.toSet
+      assert(numOddNeighbors === (2 to n by 2).map(x => (x: VertexId, 1)).toSet)
+
+    }
+  }
+
+  test("outerJoinVertices") {
+    withSpark { sc =>
+      val n = 5
+      val reverseStar = starGraph(sc, n).reverse.cache()
+      // outerJoinVertices changing type
+      val reverseStarDegrees =
+        reverseStar.outerJoinVertices(reverseStar.outDegrees) { (vid, a, bOpt) => bOpt.getOrElse(0) }
+      val neighborDegreeSums = reverseStarDegrees.mapReduceTriplets(
+        et => Iterator((et.srcId, et.dstAttr), (et.dstId, et.srcAttr)),
+        (a: Int, b: Int) => a + b).collect.toSet
+      assert(neighborDegreeSums === Set((0: VertexId, n)) ++ (1 to n).map(x => (x: VertexId, 0)))
+      // outerJoinVertices preserving type
+      val messages = reverseStar.vertices.mapValues { (vid, attr) => vid.toString }
+      val newReverseStar =
+        reverseStar.outerJoinVertices(messages) { (vid, a, bOpt) => a + bOpt.getOrElse("") }
+      assert(newReverseStar.vertices.map(_._2).collect.toSet ===
+        (0 to n).map(x => "v%d".format(x)).toSet)
+    }
+  }
+
+}
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/LocalSparkContext.scala b/graphx/src/test/scala/org/apache/spark/graphx/LocalSparkContext.scala
new file mode 100644
index 0000000000000..51f02f94e00d5
--- /dev/null
+++ b/graphx/src/test/scala/org/apache/spark/graphx/LocalSparkContext.scala
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx
+
+import org.scalatest.Suite
+import org.scalatest.BeforeAndAfterEach
+
+import org.apache.spark.SparkConf
+import org.apache.spark.SparkContext
+
+/**
+ * Provides a method to run tests against a {@link SparkContext} variable that is correctly stopped
+ * after each test.
+*/
+trait LocalSparkContext {
+  /** Runs `f` on a new SparkContext and ensures that it is stopped afterwards. */
+  def withSpark[T](f: SparkContext => T) = {
+    val conf = new SparkConf()
+      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
+      .set("spark.kryo.registrator", "org.apache.spark.graphx.GraphKryoRegistrator")
+    val sc = new SparkContext("local", "test", conf)
+    try {
+      f(sc)
+    } finally {
+      sc.stop()
+      // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
+      System.clearProperty("spark.driver.port")
+    }
+  }
+}
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/PregelSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/PregelSuite.scala
new file mode 100644
index 0000000000000..490b94429ea1f
--- /dev/null
+++ b/graphx/src/test/scala/org/apache/spark/graphx/PregelSuite.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.SparkContext
+import org.apache.spark.rdd._
+
+class PregelSuite extends FunSuite with LocalSparkContext {
+
+  test("1 iteration") {
+    withSpark { sc =>
+      val n = 5
+      val starEdges = (1 to n).map(x => (0: VertexId, x: VertexId))
+      val star = Graph.fromEdgeTuples(sc.parallelize(starEdges, 3), "v").cache()
+      val result = Pregel(star, 0)(
+        (vid, attr, msg) => attr,
+        et => Iterator.empty,
+        (a: Int, b: Int) => throw new Exception("mergeMsg run unexpectedly"))
+      assert(result.vertices.collect.toSet === star.vertices.collect.toSet)
+    }
+  }
+
+  test("chain propagation") {
+    withSpark { sc =>
+      val n = 5
+      val chain = Graph.fromEdgeTuples(
+        sc.parallelize((1 until n).map(x => (x: VertexId, x + 1: VertexId)), 3),
+        0).cache()
+      assert(chain.vertices.collect.toSet === (1 to n).map(x => (x: VertexId, 0)).toSet)
+      val chainWithSeed = chain.mapVertices { (vid, attr) => if (vid == 1) 1 else 0 }.cache()
+      assert(chainWithSeed.vertices.collect.toSet ===
+        Set((1: VertexId, 1)) ++ (2 to n).map(x => (x: VertexId, 0)).toSet)
+      val result = Pregel(chainWithSeed, 0)(
+        (vid, attr, msg) => math.max(msg, attr),
+        et => if (et.dstAttr != et.srcAttr) Iterator((et.dstId, et.srcAttr)) else Iterator.empty,
+        (a: Int, b: Int) => math.max(a, b))
+      assert(result.vertices.collect.toSet ===
+        chain.vertices.mapValues { (vid, attr) => attr + 1 }.collect.toSet)
+    }
+  }
+}
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/SerializerSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/SerializerSuite.scala
new file mode 100644
index 0000000000000..e5a582b47ba05
--- /dev/null
+++ b/graphx/src/test/scala/org/apache/spark/graphx/SerializerSuite.scala
@@ -0,0 +1,200 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx
+
+import java.io.{EOFException, ByteArrayInputStream, ByteArrayOutputStream}
+
+import scala.util.Random
+
+import org.scalatest.FunSuite
+
+import org.apache.spark._
+import org.apache.spark.graphx.impl._
+import org.apache.spark.graphx.impl.MsgRDDFunctions._
+import org.apache.spark.serializer.SerializationStream
+
+
+class SerializerSuite extends FunSuite with LocalSparkContext {
+
+  test("IntVertexBroadcastMsgSerializer") {
+    val conf = new SparkConf(false)
+    val outMsg = new VertexBroadcastMsg[Int](3, 4, 5)
+    val bout = new ByteArrayOutputStream
+    val outStrm = new IntVertexBroadcastMsgSerializer(conf).newInstance().serializeStream(bout)
+    outStrm.writeObject(outMsg)
+    outStrm.writeObject(outMsg)
+    bout.flush()
+    val bin = new ByteArrayInputStream(bout.toByteArray)
+    val inStrm = new IntVertexBroadcastMsgSerializer(conf).newInstance().deserializeStream(bin)
+    val inMsg1: VertexBroadcastMsg[Int] = inStrm.readObject()
+    val inMsg2: VertexBroadcastMsg[Int] = inStrm.readObject()
+    assert(outMsg.vid === inMsg1.vid)
+    assert(outMsg.vid === inMsg2.vid)
+    assert(outMsg.data === inMsg1.data)
+    assert(outMsg.data === inMsg2.data)
+
+    intercept[EOFException] {
+      inStrm.readObject()
+    }
+  }
+
+  test("LongVertexBroadcastMsgSerializer") {
+    val conf = new SparkConf(false)
+    val outMsg = new VertexBroadcastMsg[Long](3, 4, 5)
+    val bout = new ByteArrayOutputStream
+    val outStrm = new LongVertexBroadcastMsgSerializer(conf).newInstance().serializeStream(bout)
+    outStrm.writeObject(outMsg)
+    outStrm.writeObject(outMsg)
+    bout.flush()
+    val bin = new ByteArrayInputStream(bout.toByteArray)
+    val inStrm = new LongVertexBroadcastMsgSerializer(conf).newInstance().deserializeStream(bin)
+    val inMsg1: VertexBroadcastMsg[Long] = inStrm.readObject()
+    val inMsg2: VertexBroadcastMsg[Long] = inStrm.readObject()
+    assert(outMsg.vid === inMsg1.vid)
+    assert(outMsg.vid === inMsg2.vid)
+    assert(outMsg.data === inMsg1.data)
+    assert(outMsg.data === inMsg2.data)
+
+    intercept[EOFException] {
+      inStrm.readObject()
+    }
+  }
+
+  test("DoubleVertexBroadcastMsgSerializer") {
+    val conf = new SparkConf(false)
+    val outMsg = new VertexBroadcastMsg[Double](3, 4, 5.0)
+    val bout = new ByteArrayOutputStream
+    val outStrm = new DoubleVertexBroadcastMsgSerializer(conf).newInstance().serializeStream(bout)
+    outStrm.writeObject(outMsg)
+    outStrm.writeObject(outMsg)
+    bout.flush()
+    val bin = new ByteArrayInputStream(bout.toByteArray)
+    val inStrm = new DoubleVertexBroadcastMsgSerializer(conf).newInstance().deserializeStream(bin)
+    val inMsg1: VertexBroadcastMsg[Double] = inStrm.readObject()
+    val inMsg2: VertexBroadcastMsg[Double] = inStrm.readObject()
+    assert(outMsg.vid === inMsg1.vid)
+    assert(outMsg.vid === inMsg2.vid)
+    assert(outMsg.data === inMsg1.data)
+    assert(outMsg.data === inMsg2.data)
+
+    intercept[EOFException] {
+      inStrm.readObject()
+    }
+  }
+
+  test("IntAggMsgSerializer") {
+    val conf = new SparkConf(false)
+    val outMsg = (4: VertexId, 5)
+    val bout = new ByteArrayOutputStream
+    val outStrm = new IntAggMsgSerializer(conf).newInstance().serializeStream(bout)
+    outStrm.writeObject(outMsg)
+    outStrm.writeObject(outMsg)
+    bout.flush()
+    val bin = new ByteArrayInputStream(bout.toByteArray)
+    val inStrm = new IntAggMsgSerializer(conf).newInstance().deserializeStream(bin)
+    val inMsg1: (VertexId, Int) = inStrm.readObject()
+    val inMsg2: (VertexId, Int) = inStrm.readObject()
+    assert(outMsg === inMsg1)
+    assert(outMsg === inMsg2)
+
+    intercept[EOFException] {
+      inStrm.readObject()
+    }
+  }
+
+  test("LongAggMsgSerializer") {
+    val conf = new SparkConf(false)
+    val outMsg = (4: VertexId, 1L << 32)
+    val bout = new ByteArrayOutputStream
+    val outStrm = new LongAggMsgSerializer(conf).newInstance().serializeStream(bout)
+    outStrm.writeObject(outMsg)
+    outStrm.writeObject(outMsg)
+    bout.flush()
+    val bin = new ByteArrayInputStream(bout.toByteArray)
+    val inStrm = new LongAggMsgSerializer(conf).newInstance().deserializeStream(bin)
+    val inMsg1: (VertexId, Long) = inStrm.readObject()
+    val inMsg2: (VertexId, Long) = inStrm.readObject()
+    assert(outMsg === inMsg1)
+    assert(outMsg === inMsg2)
+
+    intercept[EOFException] {
+      inStrm.readObject()
+    }
+  }
+
+  test("DoubleAggMsgSerializer") {
+    val conf = new SparkConf(false)
+    val outMsg = (4: VertexId, 5.0)
+    val bout = new ByteArrayOutputStream
+    val outStrm = new DoubleAggMsgSerializer(conf).newInstance().serializeStream(bout)
+    outStrm.writeObject(outMsg)
+    outStrm.writeObject(outMsg)
+    bout.flush()
+    val bin = new ByteArrayInputStream(bout.toByteArray)
+    val inStrm = new DoubleAggMsgSerializer(conf).newInstance().deserializeStream(bin)
+    val inMsg1: (VertexId, Double) = inStrm.readObject()
+    val inMsg2: (VertexId, Double) = inStrm.readObject()
+    assert(outMsg === inMsg1)
+    assert(outMsg === inMsg2)
+
+    intercept[EOFException] {
+      inStrm.readObject()
+    }
+  }
+
+  test("TestShuffleVertexBroadcastMsg") {
+    withSpark { sc =>
+      val bmsgs = sc.parallelize(0 until 100, 10).map { pid =>
+        new VertexBroadcastMsg[Int](pid, pid, pid)
+      }
+      bmsgs.partitionBy(new HashPartitioner(3)).collect()
+    }
+  }
+
+  test("variable long encoding") {
+    def testVarLongEncoding(v: Long, optimizePositive: Boolean) {
+      val bout = new ByteArrayOutputStream
+      val stream = new ShuffleSerializationStream(bout) {
+        def writeObject[T](t: T): SerializationStream = {
+          writeVarLong(t.asInstanceOf[Long], optimizePositive = optimizePositive)
+          this
+        }
+      }
+      stream.writeObject(v)
+
+      val bin = new ByteArrayInputStream(bout.toByteArray)
+      val dstream = new ShuffleDeserializationStream(bin) {
+        def readObject[T](): T = {
+          readVarLong(optimizePositive).asInstanceOf[T]
+        }
+      }
+      val read = dstream.readObject[Long]()
+      assert(read === v)
+    }
+
+    // Test all variable encoding code path (each branch uses 7 bits, i.e. 1L << 7 difference)
+    val d = Random.nextLong() % 128
+    Seq[Long](0, 1L << 0 + d, 1L << 7 + d, 1L << 14 + d, 1L << 21 + d, 1L << 28 + d, 1L << 35 + d,
+      1L << 42 + d, 1L << 49 + d, 1L << 56 + d, 1L << 63 + d).foreach { number =>
+      testVarLongEncoding(number, optimizePositive = false)
+      testVarLongEncoding(number, optimizePositive = true)
+      testVarLongEncoding(-number, optimizePositive = false)
+      testVarLongEncoding(-number, optimizePositive = true)
+    }
+  }
+}
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/VertexRDDSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/VertexRDDSuite.scala
new file mode 100644
index 0000000000000..cc86bafd2d644
--- /dev/null
+++ b/graphx/src/test/scala/org/apache/spark/graphx/VertexRDDSuite.scala
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx
+
+import org.apache.spark.SparkContext
+import org.apache.spark.graphx.Graph._
+import org.apache.spark.graphx.impl.EdgePartition
+import org.apache.spark.rdd._
+import org.scalatest.FunSuite
+
+class VertexRDDSuite extends FunSuite with LocalSparkContext {
+
+  def vertices(sc: SparkContext, n: Int) = {
+    VertexRDD(sc.parallelize((0 to n).map(x => (x.toLong, x)), 5))
+  }
+
+  test("filter") {
+    withSpark { sc =>
+      val n = 100
+      val verts = vertices(sc, n)
+      val evens = verts.filter(q => ((q._2 % 2) == 0))
+      assert(evens.count === (0 to n).filter(_ % 2 == 0).size)
+    }
+  }
+
+  test("mapValues") {
+    withSpark { sc =>
+      val n = 100
+      val verts = vertices(sc, n)
+      val negatives = verts.mapValues(x => -x).cache() // Allow joining b with a derived RDD of b
+      assert(negatives.count === n + 1)
+    }
+  }
+
+  test("diff") {
+    withSpark { sc =>
+      val n = 100
+      val verts = vertices(sc, n).cache()
+      val flipEvens = verts.mapValues(x => if (x % 2 == 0) -x else x).cache()
+      // diff should keep only the changed vertices
+      assert(verts.diff(flipEvens).map(_._2).collect().toSet === (2 to n by 2).map(-_).toSet)
+      // diff should keep the vertex values from `other`
+      assert(flipEvens.diff(verts).map(_._2).collect().toSet === (2 to n by 2).toSet)
+    }
+  }
+
+  test("leftJoin") {
+    withSpark { sc =>
+      val n = 100
+      val verts = vertices(sc, n).cache()
+      val evens = verts.filter(q => ((q._2 % 2) == 0)).cache()
+      // leftJoin with another VertexRDD
+      assert(verts.leftJoin(evens) { (id, a, bOpt) => a - bOpt.getOrElse(0) }.collect.toSet ===
+        (0 to n by 2).map(x => (x.toLong, 0)).toSet ++ (1 to n by 2).map(x => (x.toLong, x)).toSet)
+      // leftJoin with an RDD
+      val evensRDD = evens.map(identity)
+      assert(verts.leftJoin(evensRDD) { (id, a, bOpt) => a - bOpt.getOrElse(0) }.collect.toSet ===
+        (0 to n by 2).map(x => (x.toLong, 0)).toSet ++ (1 to n by 2).map(x => (x.toLong, x)).toSet)
+    }
+  }
+
+  test("innerJoin") {
+    withSpark { sc =>
+      val n = 100
+      val verts = vertices(sc, n).cache()
+      val evens = verts.filter(q => ((q._2 % 2) == 0)).cache()
+      // innerJoin with another VertexRDD
+      assert(verts.innerJoin(evens) { (id, a, b) => a - b }.collect.toSet ===
+        (0 to n by 2).map(x => (x.toLong, 0)).toSet)
+      // innerJoin with an RDD
+      val evensRDD = evens.map(identity)
+      assert(verts.innerJoin(evensRDD) { (id, a, b) => a - b }.collect.toSet ===
+        (0 to n by 2).map(x => (x.toLong, 0)).toSet)    }
+  }
+
+  test("aggregateUsingIndex") {
+    withSpark { sc =>
+      val n = 100
+      val verts = vertices(sc, n)
+      val messageTargets = (0 to n) ++ (0 to n by 2)
+      val messages = sc.parallelize(messageTargets.map(x => (x.toLong, 1)))
+      assert(verts.aggregateUsingIndex[Int](messages, _ + _).collect.toSet ===
+        (0 to n).map(x => (x.toLong, if (x % 2 == 0) 2 else 1)).toSet)
+    }
+  }
+
+}
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/impl/EdgePartitionSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/impl/EdgePartitionSuite.scala
new file mode 100644
index 0000000000000..e135d1d7ad6a3
--- /dev/null
+++ b/graphx/src/test/scala/org/apache/spark/graphx/impl/EdgePartitionSuite.scala
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx.impl
+
+import scala.reflect.ClassTag
+import scala.util.Random
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.graphx._
+
+class EdgePartitionSuite extends FunSuite {
+
+  test("reverse") {
+    val edges = List(Edge(0, 1, 0), Edge(1, 2, 0), Edge(2, 0, 0))
+    val reversedEdges = List(Edge(0, 2, 0), Edge(1, 0, 0), Edge(2, 1, 0))
+    val builder = new EdgePartitionBuilder[Int]
+    for (e <- edges) {
+      builder.add(e.srcId, e.dstId, e.attr)
+    }
+    val edgePartition = builder.toEdgePartition
+    assert(edgePartition.reverse.iterator.map(_.copy()).toList === reversedEdges)
+    assert(edgePartition.reverse.reverse.iterator.map(_.copy()).toList === edges)
+  }
+
+  test("map") {
+    val edges = List(Edge(0, 1, 0), Edge(1, 2, 0), Edge(2, 0, 0))
+    val builder = new EdgePartitionBuilder[Int]
+    for (e <- edges) {
+      builder.add(e.srcId, e.dstId, e.attr)
+    }
+    val edgePartition = builder.toEdgePartition
+    assert(edgePartition.map(e => e.srcId + e.dstId).iterator.map(_.copy()).toList ===
+      edges.map(e => e.copy(attr = e.srcId + e.dstId)))
+  }
+
+  test("groupEdges") {
+    val edges = List(
+      Edge(0, 1, 1), Edge(1, 2, 2), Edge(2, 0, 4), Edge(0, 1, 8), Edge(1, 2, 16), Edge(2, 0, 32))
+    val groupedEdges = List(Edge(0, 1, 9), Edge(1, 2, 18), Edge(2, 0, 36))
+    val builder = new EdgePartitionBuilder[Int]
+    for (e <- edges) {
+      builder.add(e.srcId, e.dstId, e.attr)
+    }
+    val edgePartition = builder.toEdgePartition
+    assert(edgePartition.groupEdges(_ + _).iterator.map(_.copy()).toList === groupedEdges)
+  }
+
+  test("indexIterator") {
+    val edgesFrom0 = List(Edge(0, 1, 0))
+    val edgesFrom1 = List(Edge(1, 0, 0), Edge(1, 2, 0))
+    val sortedEdges = edgesFrom0 ++ edgesFrom1
+    val builder = new EdgePartitionBuilder[Int]
+    for (e <- Random.shuffle(sortedEdges)) {
+      builder.add(e.srcId, e.dstId, e.attr)
+    }
+
+    val edgePartition = builder.toEdgePartition
+    assert(edgePartition.iterator.map(_.copy()).toList === sortedEdges)
+    assert(edgePartition.indexIterator(_ == 0).map(_.copy()).toList === edgesFrom0)
+    assert(edgePartition.indexIterator(_ == 1).map(_.copy()).toList === edgesFrom1)
+  }
+
+  test("innerJoin") {
+    def makeEdgePartition[A: ClassTag](xs: Iterable[(Int, Int, A)]): EdgePartition[A] = {
+      val builder = new EdgePartitionBuilder[A]
+      for ((src, dst, attr) <- xs) { builder.add(src: VertexId, dst: VertexId, attr) }
+      builder.toEdgePartition
+    }
+    val aList = List((0, 1, 0), (1, 0, 0), (1, 2, 0), (5, 4, 0), (5, 5, 0))
+    val bList = List((0, 1, 0), (1, 0, 0), (1, 1, 0), (3, 4, 0), (5, 5, 0))
+    val a = makeEdgePartition(aList)
+    val b = makeEdgePartition(bList)
+
+    assert(a.innerJoin(b) { (src, dst, a, b) => a }.iterator.map(_.copy()).toList ===
+      List(Edge(0, 1, 0), Edge(1, 0, 0), Edge(5, 5, 0)))
+  }
+}
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/impl/VertexPartitionSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/impl/VertexPartitionSuite.scala
new file mode 100644
index 0000000000000..a048d13fd12b8
--- /dev/null
+++ b/graphx/src/test/scala/org/apache/spark/graphx/impl/VertexPartitionSuite.scala
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx.impl
+
+import org.apache.spark.graphx._
+import org.scalatest.FunSuite
+
+class VertexPartitionSuite extends FunSuite {
+
+  test("isDefined, filter") {
+    val vp = VertexPartition(Iterator((0L, 1), (1L, 1))).filter { (vid, attr) => vid == 0 }
+    assert(vp.isDefined(0))
+    assert(!vp.isDefined(1))
+    assert(!vp.isDefined(2))
+    assert(!vp.isDefined(-1))
+  }
+
+  test("isActive, numActives, replaceActives") {
+    val vp = VertexPartition(Iterator((0L, 1), (1L, 1)))
+      .filter { (vid, attr) => vid == 0 }
+      .replaceActives(Iterator(0, 2, 0))
+    assert(vp.isActive(0))
+    assert(!vp.isActive(1))
+    assert(vp.isActive(2))
+    assert(!vp.isActive(-1))
+    assert(vp.numActives == Some(2))
+  }
+
+  test("map") {
+    val vp = VertexPartition(Iterator((0L, 1), (1L, 1))).map { (vid, attr) => 2 }
+    assert(vp(0) === 2)
+  }
+
+  test("diff") {
+    val vp = VertexPartition(Iterator((0L, 1), (1L, 1), (2L, 1)))
+    val vp2 = vp.filter { (vid, attr) => vid <= 1 }
+    val vp3a = vp.map { (vid, attr) => 2 }
+    val vp3b = VertexPartition(vp3a.iterator)
+    // diff with same index
+    val diff1 = vp2.diff(vp3a)
+    assert(diff1(0) === 2)
+    assert(diff1(1) === 2)
+    assert(diff1(2) === 2)
+    assert(!diff1.isDefined(2))
+    // diff with different indexes
+    val diff2 = vp2.diff(vp3b)
+    assert(diff2(0) === 2)
+    assert(diff2(1) === 2)
+    assert(diff2(2) === 2)
+    assert(!diff2.isDefined(2))
+  }
+
+  test("leftJoin") {
+    val vp = VertexPartition(Iterator((0L, 1), (1L, 1), (2L, 1)))
+    val vp2a = vp.filter { (vid, attr) => vid <= 1 }.map { (vid, attr) => 2 }
+    val vp2b = VertexPartition(vp2a.iterator)
+    // leftJoin with same index
+    val join1 = vp.leftJoin(vp2a) { (vid, a, bOpt) => bOpt.getOrElse(a) }
+    assert(join1.iterator.toSet === Set((0L, 2), (1L, 2), (2L, 1)))
+    // leftJoin with different indexes
+    val join2 = vp.leftJoin(vp2b) { (vid, a, bOpt) => bOpt.getOrElse(a) }
+    assert(join2.iterator.toSet === Set((0L, 2), (1L, 2), (2L, 1)))
+    // leftJoin an iterator
+    val join3 = vp.leftJoin(vp2a.iterator) { (vid, a, bOpt) => bOpt.getOrElse(a) }
+    assert(join3.iterator.toSet === Set((0L, 2), (1L, 2), (2L, 1)))
+  }
+
+  test("innerJoin") {
+    val vp = VertexPartition(Iterator((0L, 1), (1L, 1), (2L, 1)))
+    val vp2a = vp.filter { (vid, attr) => vid <= 1 }.map { (vid, attr) => 2 }
+    val vp2b = VertexPartition(vp2a.iterator)
+    // innerJoin with same index
+    val join1 = vp.innerJoin(vp2a) { (vid, a, b) => b }
+    assert(join1.iterator.toSet === Set((0L, 2), (1L, 2)))
+    // innerJoin with different indexes
+    val join2 = vp.innerJoin(vp2b) { (vid, a, b) => b }
+    assert(join2.iterator.toSet === Set((0L, 2), (1L, 2)))
+    // innerJoin an iterator
+    val join3 = vp.innerJoin(vp2a.iterator) { (vid, a, b) => b }
+    assert(join3.iterator.toSet === Set((0L, 2), (1L, 2)))
+  }
+
+  test("createUsingIndex") {
+    val vp = VertexPartition(Iterator((0L, 1), (1L, 1), (2L, 1)))
+    val elems = List((0L, 2), (2L, 2), (3L, 2))
+    val vp2 = vp.createUsingIndex(elems.iterator)
+    assert(vp2.iterator.toSet === Set((0L, 2), (2L, 2)))
+    assert(vp.index === vp2.index)
+  }
+
+  test("innerJoinKeepLeft") {
+    val vp = VertexPartition(Iterator((0L, 1), (1L, 1), (2L, 1)))
+    val elems = List((0L, 2), (2L, 2), (3L, 2))
+    val vp2 = vp.innerJoinKeepLeft(elems.iterator)
+    assert(vp2.iterator.toSet === Set((0L, 2), (2L, 2)))
+    assert(vp2(1) === 1)
+  }
+
+  test("aggregateUsingIndex") {
+    val vp = VertexPartition(Iterator((0L, 1), (1L, 1), (2L, 1)))
+    val messages = List((0L, "a"), (2L, "b"), (0L, "c"), (3L, "d"))
+    val vp2 = vp.aggregateUsingIndex[String](messages.iterator, _ + _)
+    assert(vp2.iterator.toSet === Set((0L, "ac"), (2L, "b")))
+  }
+
+  test("reindex") {
+    val vp = VertexPartition(Iterator((0L, 1), (1L, 1), (2L, 1)))
+    val vp2 = vp.filter { (vid, attr) => vid <= 1 }
+    val vp3 = vp2.reindex()
+    assert(vp2.iterator.toSet === vp3.iterator.toSet)
+    assert(vp2(2) === 1)
+    assert(vp3.index.getPos(2) === -1)
+  }
+
+}
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/lib/ConnectedComponentsSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/lib/ConnectedComponentsSuite.scala
new file mode 100644
index 0000000000000..3915be15b3434
--- /dev/null
+++ b/graphx/src/test/scala/org/apache/spark/graphx/lib/ConnectedComponentsSuite.scala
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx.lib
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
+import org.apache.spark.graphx._
+import org.apache.spark.graphx.util.GraphGenerators
+import org.apache.spark.rdd._
+
+
+class ConnectedComponentsSuite extends FunSuite with LocalSparkContext {
+
+  test("Grid Connected Components") {
+    withSpark { sc =>
+      val gridGraph = GraphGenerators.gridGraph(sc, 10, 10)
+      val ccGraph = gridGraph.connectedComponents()
+      val maxCCid = ccGraph.vertices.map { case (vid, ccId) => ccId }.sum
+      assert(maxCCid === 0)
+    }
+  } // end of Grid connected components
+
+
+  test("Reverse Grid Connected Components") {
+    withSpark { sc =>
+      val gridGraph = GraphGenerators.gridGraph(sc, 10, 10).reverse
+      val ccGraph = gridGraph.connectedComponents()
+      val maxCCid = ccGraph.vertices.map { case (vid, ccId) => ccId }.sum
+      assert(maxCCid === 0)
+    }
+  } // end of Grid connected components
+
+
+  test("Chain Connected Components") {
+    withSpark { sc =>
+      val chain1 = (0 until 9).map(x => (x, x+1) )
+      val chain2 = (10 until 20).map(x => (x, x+1) )
+      val rawEdges = sc.parallelize(chain1 ++ chain2, 3).map { case (s,d) => (s.toLong, d.toLong) }
+      val twoChains = Graph.fromEdgeTuples(rawEdges, 1.0)
+      val ccGraph = twoChains.connectedComponents()
+      val vertices = ccGraph.vertices.collect()
+      for ( (id, cc) <- vertices ) {
+        if(id < 10) { assert(cc === 0) }
+        else { assert(cc === 10) }
+      }
+      val ccMap = vertices.toMap
+      for (id <- 0 until 20) {
+        if (id < 10) {
+          assert(ccMap(id) === 0)
+        } else {
+          assert(ccMap(id) === 10)
+        }
+      }
+    }
+  } // end of chain connected components
+
+  test("Reverse Chain Connected Components") {
+    withSpark { sc =>
+      val chain1 = (0 until 9).map(x => (x, x+1) )
+      val chain2 = (10 until 20).map(x => (x, x+1) )
+      val rawEdges = sc.parallelize(chain1 ++ chain2, 3).map { case (s,d) => (s.toLong, d.toLong) }
+      val twoChains = Graph.fromEdgeTuples(rawEdges, true).reverse
+      val ccGraph = twoChains.connectedComponents()
+      val vertices = ccGraph.vertices.collect
+      for ( (id, cc) <- vertices ) {
+        if (id < 10) {
+          assert(cc === 0)
+        } else {
+          assert(cc === 10)
+        }
+      }
+      val ccMap = vertices.toMap
+      for ( id <- 0 until 20 ) {
+        if (id < 10) {
+          assert(ccMap(id) === 0)
+        } else {
+          assert(ccMap(id) === 10)
+        }
+      }
+    }
+  } // end of reverse chain connected components
+
+  test("Connected Components on a Toy Connected Graph") {
+    withSpark { sc =>
+      // Create an RDD for the vertices
+      val users: RDD[(VertexId, (String, String))] =
+        sc.parallelize(Array((3L, ("rxin", "student")), (7L, ("jgonzal", "postdoc")),
+                       (5L, ("franklin", "prof")), (2L, ("istoica", "prof")),
+                       (4L, ("peter", "student"))))
+      // Create an RDD for edges
+      val relationships: RDD[Edge[String]] =
+        sc.parallelize(Array(Edge(3L, 7L, "collab"),    Edge(5L, 3L, "advisor"),
+                       Edge(2L, 5L, "colleague"), Edge(5L, 7L, "pi"),
+                       Edge(4L, 0L, "student"),   Edge(5L, 0L, "colleague")))
+      // Edges are:
+      //   2 ---> 5 ---> 3
+      //          | \
+      //          V   \|
+      //   4 ---> 0    7
+      //
+      // Define a default user in case there are relationship with missing user
+      val defaultUser = ("John Doe", "Missing")
+      // Build the initial Graph
+      val graph = Graph(users, relationships, defaultUser)
+      val ccGraph = graph.connectedComponents()
+      val vertices = ccGraph.vertices.collect
+      for ( (id, cc) <- vertices ) {
+        assert(cc == 0)
+      }
+    }
+  } // end of toy connected components
+
+}
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala
new file mode 100644
index 0000000000000..fc491ae327c2a
--- /dev/null
+++ b/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx.lib
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
+import org.apache.spark.graphx._
+import org.apache.spark.graphx.lib._
+import org.apache.spark.graphx.util.GraphGenerators
+import org.apache.spark.rdd._
+
+object GridPageRank {
+  def apply(nRows: Int, nCols: Int, nIter: Int, resetProb: Double) = {
+    val inNbrs = Array.fill(nRows * nCols)(collection.mutable.MutableList.empty[Int])
+    val outDegree = Array.fill(nRows * nCols)(0)
+    // Convert row column address into vertex ids (row major order)
+    def sub2ind(r: Int, c: Int): Int = r * nCols + c
+    // Make the grid graph
+    for (r <- 0 until nRows; c <- 0 until nCols) {
+      val ind = sub2ind(r,c)
+      if (r+1 < nRows) {
+        outDegree(ind) += 1
+        inNbrs(sub2ind(r+1,c)) += ind
+      }
+      if (c+1 < nCols) {
+        outDegree(ind) += 1
+        inNbrs(sub2ind(r,c+1)) += ind
+      }
+    }
+    // compute the pagerank
+    var pr = Array.fill(nRows * nCols)(resetProb)
+    for (iter <- 0 until nIter) {
+      val oldPr = pr
+      pr = new Array[Double](nRows * nCols)
+      for (ind <- 0 until (nRows * nCols)) {
+        pr(ind) = resetProb + (1.0 - resetProb) *
+          inNbrs(ind).map( nbr => oldPr(nbr) / outDegree(nbr)).sum
+      }
+    }
+    (0L until (nRows * nCols)).zip(pr)
+  }
+
+}
+
+
+class PageRankSuite extends FunSuite with LocalSparkContext {
+
+  def compareRanks(a: VertexRDD[Double], b: VertexRDD[Double]): Double = {
+    a.leftJoin(b) { case (id, a, bOpt) => (a - bOpt.getOrElse(0.0)) * (a - bOpt.getOrElse(0.0)) }
+      .map { case (id, error) => error }.sum
+  }
+
+  test("Star PageRank") {
+    withSpark { sc =>
+      val nVertices = 100
+      val starGraph = GraphGenerators.starGraph(sc, nVertices).cache()
+      val resetProb = 0.15
+      val errorTol = 1.0e-5
+
+      val staticRanks1 = starGraph.staticPageRank(numIter = 1, resetProb).vertices
+      val staticRanks2 = starGraph.staticPageRank(numIter = 2, resetProb).vertices.cache()
+
+      // Static PageRank should only take 2 iterations to converge
+      val notMatching = staticRanks1.innerZipJoin(staticRanks2) { (vid, pr1, pr2) =>
+        if (pr1 != pr2) 1 else 0
+      }.map { case (vid, test) => test }.sum
+      assert(notMatching === 0)
+
+      val staticErrors = staticRanks2.map { case (vid, pr) =>
+        val correct = (vid > 0 && pr == resetProb) ||
+          (vid == 0 && math.abs(pr - (resetProb + (1.0 - resetProb) * (resetProb * (nVertices - 1)) )) < 1.0E-5)
+        if (!correct) 1 else 0
+      }
+      assert(staticErrors.sum === 0)
+
+      val dynamicRanks = starGraph.pageRank(0, resetProb).vertices.cache()
+      assert(compareRanks(staticRanks2, dynamicRanks) < errorTol)
+    }
+  } // end of test Star PageRank
+
+
+
+  test("Grid PageRank") {
+    withSpark { sc =>
+      val rows = 10
+      val cols = 10
+      val resetProb = 0.15
+      val tol = 0.0001
+      val numIter = 50
+      val errorTol = 1.0e-5
+      val gridGraph = GraphGenerators.gridGraph(sc, rows, cols).cache()
+
+      val staticRanks = gridGraph.staticPageRank(numIter, resetProb).vertices.cache()
+      val dynamicRanks = gridGraph.pageRank(tol, resetProb).vertices.cache()
+      val referenceRanks = VertexRDD(sc.parallelize(GridPageRank(rows, cols, numIter, resetProb))).cache()
+
+      assert(compareRanks(staticRanks, referenceRanks) < errorTol)
+      assert(compareRanks(dynamicRanks, referenceRanks) < errorTol)
+    }
+  } // end of Grid PageRank
+
+
+  test("Chain PageRank") {
+    withSpark { sc =>
+      val chain1 = (0 until 9).map(x => (x, x+1) )
+      val rawEdges = sc.parallelize(chain1, 1).map { case (s,d) => (s.toLong, d.toLong) }
+      val chain = Graph.fromEdgeTuples(rawEdges, 1.0).cache()
+      val resetProb = 0.15
+      val tol = 0.0001
+      val numIter = 10
+      val errorTol = 1.0e-5
+
+      val staticRanks = chain.staticPageRank(numIter, resetProb).vertices
+      val dynamicRanks = chain.pageRank(tol, resetProb).vertices
+
+      assert(compareRanks(staticRanks, dynamicRanks) < errorTol)
+    }
+  }
+}
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/lib/SVDPlusPlusSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/lib/SVDPlusPlusSuite.scala
new file mode 100644
index 0000000000000..e01df56e94de9
--- /dev/null
+++ b/graphx/src/test/scala/org/apache/spark/graphx/lib/SVDPlusPlusSuite.scala
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx.lib
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.graphx._
+
+
+class SVDPlusPlusSuite extends FunSuite with LocalSparkContext {
+
+  test("Test SVD++ with mean square error on training set") {
+    withSpark { sc =>
+      val svdppErr = 8.0
+      val edges = sc.textFile(getClass.getResource("/als-test.data").getFile).map { line =>
+        val fields = line.split(",")
+        Edge(fields(0).toLong * 2, fields(1).toLong * 2 + 1, fields(2).toDouble)
+      }
+      val conf = new SVDPlusPlus.Conf(10, 2, 0.0, 5.0, 0.007, 0.007, 0.005, 0.015) // 2 iterations
+      var (graph, u) = SVDPlusPlus.run(edges, conf)
+      graph.cache()
+      val err = graph.vertices.collect().map{ case (vid, vd) =>
+        if (vid % 2 == 1) vd._4 else 0.0
+      }.reduce(_ + _) / graph.triplets.collect().size
+      assert(err <= svdppErr)
+    }
+  }
+
+}
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/lib/StronglyConnectedComponentsSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/lib/StronglyConnectedComponentsSuite.scala
new file mode 100644
index 0000000000000..df54aa37cad68
--- /dev/null
+++ b/graphx/src/test/scala/org/apache/spark/graphx/lib/StronglyConnectedComponentsSuite.scala
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx.lib
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
+import org.apache.spark.graphx._
+import org.apache.spark.graphx.util.GraphGenerators
+import org.apache.spark.rdd._
+
+
+class StronglyConnectedComponentsSuite extends FunSuite with LocalSparkContext {
+
+  test("Island Strongly Connected Components") {
+    withSpark { sc =>
+      val vertices = sc.parallelize((1L to 5L).map(x => (x, -1)))
+      val edges = sc.parallelize(Seq.empty[Edge[Int]])
+      val graph = Graph(vertices, edges)
+      val sccGraph = graph.stronglyConnectedComponents(5)
+      for ((id, scc) <- sccGraph.vertices.collect) {
+        assert(id == scc)
+      }
+    }
+  }
+
+  test("Cycle Strongly Connected Components") {
+    withSpark { sc =>
+      val rawEdges = sc.parallelize((0L to 6L).map(x => (x, (x + 1) % 7)))
+      val graph = Graph.fromEdgeTuples(rawEdges, -1)
+      val sccGraph = graph.stronglyConnectedComponents(20)
+      for ((id, scc) <- sccGraph.vertices.collect) {
+        assert(0L == scc)
+      }
+    }
+  }
+
+  test("2 Cycle Strongly Connected Components") {
+    withSpark { sc =>
+      val edges =
+        Array(0L -> 1L, 1L -> 2L, 2L -> 0L) ++
+        Array(3L -> 4L, 4L -> 5L, 5L -> 3L) ++
+        Array(6L -> 0L, 5L -> 7L)
+      val rawEdges = sc.parallelize(edges)
+      val graph = Graph.fromEdgeTuples(rawEdges, -1)
+      val sccGraph = graph.stronglyConnectedComponents(20)
+      for ((id, scc) <- sccGraph.vertices.collect) {
+        if (id < 3)
+          assert(0L == scc)
+        else if (id < 6)
+          assert(3L == scc)
+        else
+          assert(id == scc)
+      }
+    }
+  }
+
+}
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/lib/TriangleCountSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/lib/TriangleCountSuite.scala
new file mode 100644
index 0000000000000..293c7f3ba4c21
--- /dev/null
+++ b/graphx/src/test/scala/org/apache/spark/graphx/lib/TriangleCountSuite.scala
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx.lib
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.graphx._
+import org.apache.spark.graphx.PartitionStrategy.RandomVertexCut
+
+
+class TriangleCountSuite extends FunSuite with LocalSparkContext {
+
+  test("Count a single triangle") {
+    withSpark { sc =>
+      val rawEdges = sc.parallelize(Array( 0L->1L, 1L->2L, 2L->0L ), 2)
+      val graph = Graph.fromEdgeTuples(rawEdges, true).cache()
+      val triangleCount = graph.triangleCount()
+      val verts = triangleCount.vertices
+      verts.collect.foreach { case (vid, count) => assert(count === 1) }
+    }
+  }
+
+  test("Count two triangles") {
+    withSpark { sc =>
+      val triangles = Array(0L -> 1L, 1L -> 2L, 2L -> 0L) ++
+        Array(0L -> -1L, -1L -> -2L, -2L -> 0L)
+      val rawEdges = sc.parallelize(triangles, 2)
+      val graph = Graph.fromEdgeTuples(rawEdges, true).cache()
+      val triangleCount = graph.triangleCount()
+      val verts = triangleCount.vertices
+      verts.collect().foreach { case (vid, count) =>
+        if (vid == 0) {
+          assert(count === 2)
+        } else {
+          assert(count === 1)
+        }
+      }
+    }
+  }
+
+  test("Count two triangles with bi-directed edges") {
+    withSpark { sc =>
+      val triangles =
+        Array(0L -> 1L, 1L -> 2L, 2L -> 0L) ++
+        Array(0L -> -1L, -1L -> -2L, -2L -> 0L)
+      val revTriangles = triangles.map { case (a,b) => (b,a) }
+      val rawEdges = sc.parallelize(triangles ++ revTriangles, 2)
+      val graph = Graph.fromEdgeTuples(rawEdges, true).cache()
+      val triangleCount = graph.triangleCount()
+      val verts = triangleCount.vertices
+      verts.collect().foreach { case (vid, count) =>
+        if (vid == 0) {
+          assert(count === 4)
+        } else {
+          assert(count === 2)
+        }
+      }
+    }
+  }
+
+  test("Count a single triangle with duplicate edges") {
+    withSpark { sc =>
+      val rawEdges = sc.parallelize(Array(0L -> 1L, 1L -> 2L, 2L -> 0L) ++
+        Array(0L -> 1L, 1L -> 2L, 2L -> 0L), 2)
+      val graph = Graph.fromEdgeTuples(rawEdges, true, uniqueEdges = Some(RandomVertexCut)).cache()
+      val triangleCount = graph.triangleCount()
+      val verts = triangleCount.vertices
+      verts.collect.foreach { case (vid, count) => assert(count === 1) }
+    }
+  }
+
+}
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/util/BytecodeUtilsSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/util/BytecodeUtilsSuite.scala
new file mode 100644
index 0000000000000..f3b3738db0dad
--- /dev/null
+++ b/graphx/src/test/scala/org/apache/spark/graphx/util/BytecodeUtilsSuite.scala
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx.util
+
+import org.scalatest.FunSuite
+
+
+class BytecodeUtilsSuite extends FunSuite {
+
+  import BytecodeUtilsSuite.TestClass
+
+  test("closure invokes a method") {
+    val c1 = {e: TestClass => println(e.foo); println(e.bar); println(e.baz); }
+    assert(BytecodeUtils.invokedMethod(c1, classOf[TestClass], "foo"))
+    assert(BytecodeUtils.invokedMethod(c1, classOf[TestClass], "bar"))
+    assert(BytecodeUtils.invokedMethod(c1, classOf[TestClass], "baz"))
+
+    val c2 = {e: TestClass => println(e.foo); println(e.bar); }
+    assert(BytecodeUtils.invokedMethod(c2, classOf[TestClass], "foo"))
+    assert(BytecodeUtils.invokedMethod(c2, classOf[TestClass], "bar"))
+    assert(!BytecodeUtils.invokedMethod(c2, classOf[TestClass], "baz"))
+
+    val c3 = {e: TestClass => println(e.foo); }
+    assert(BytecodeUtils.invokedMethod(c3, classOf[TestClass], "foo"))
+    assert(!BytecodeUtils.invokedMethod(c3, classOf[TestClass], "bar"))
+    assert(!BytecodeUtils.invokedMethod(c3, classOf[TestClass], "baz"))
+  }
+
+  test("closure inside a closure invokes a method") {
+    val c1 = {e: TestClass => println(e.foo); println(e.bar); println(e.baz); }
+    val c2 = {e: TestClass => c1(e); println(e.foo); }
+    assert(BytecodeUtils.invokedMethod(c2, classOf[TestClass], "foo"))
+    assert(BytecodeUtils.invokedMethod(c2, classOf[TestClass], "bar"))
+    assert(BytecodeUtils.invokedMethod(c2, classOf[TestClass], "baz"))
+  }
+
+  test("closure inside a closure inside a closure invokes a method") {
+    val c1 = {e: TestClass => println(e.baz); }
+    val c2 = {e: TestClass => c1(e); println(e.foo); }
+    val c3 = {e: TestClass => c2(e) }
+    assert(BytecodeUtils.invokedMethod(c3, classOf[TestClass], "foo"))
+    assert(!BytecodeUtils.invokedMethod(c3, classOf[TestClass], "bar"))
+    assert(BytecodeUtils.invokedMethod(c3, classOf[TestClass], "baz"))
+  }
+
+  test("closure calling a function that invokes a method") {
+    def zoo(e: TestClass) {
+      println(e.baz)
+    }
+    val c1 = {e: TestClass => zoo(e)}
+    assert(!BytecodeUtils.invokedMethod(c1, classOf[TestClass], "foo"))
+    assert(!BytecodeUtils.invokedMethod(c1, classOf[TestClass], "bar"))
+    assert(BytecodeUtils.invokedMethod(c1, classOf[TestClass], "baz"))
+  }
+
+  test("closure calling a function that invokes a method which uses another closure") {
+    val c2 = {e: TestClass => println(e.baz)}
+    def zoo(e: TestClass) {
+      c2(e)
+    }
+    val c1 = {e: TestClass => zoo(e)}
+    assert(!BytecodeUtils.invokedMethod(c1, classOf[TestClass], "foo"))
+    assert(!BytecodeUtils.invokedMethod(c1, classOf[TestClass], "bar"))
+    assert(BytecodeUtils.invokedMethod(c1, classOf[TestClass], "baz"))
+  }
+
+  test("nested closure") {
+    val c2 = {e: TestClass => println(e.baz)}
+    def zoo(e: TestClass, c: TestClass => Unit) {
+      c(e)
+    }
+    val c1 = {e: TestClass => zoo(e, c2)}
+    assert(!BytecodeUtils.invokedMethod(c1, classOf[TestClass], "foo"))
+    assert(!BytecodeUtils.invokedMethod(c1, classOf[TestClass], "bar"))
+    assert(BytecodeUtils.invokedMethod(c1, classOf[TestClass], "baz"))
+  }
+
+  // The following doesn't work yet, because the byte code doesn't contain any information
+  // about what exactly "c" is.
+//  test("invoke interface") {
+//    val c1 = {e: TestClass => c(e)}
+//    assert(!BytecodeUtils.invokedMethod(c1, classOf[TestClass], "foo"))
+//    assert(!BytecodeUtils.invokedMethod(c1, classOf[TestClass], "bar"))
+//    assert(BytecodeUtils.invokedMethod(c1, classOf[TestClass], "baz"))
+//  }
+
+  private val c = {e: TestClass => println(e.baz)}
+}
+
+
+object BytecodeUtilsSuite {
+  class TestClass(val foo: Int, val bar: Long) {
+    def baz: Boolean = false
+  }
+}
diff --git a/libpeerconnection.log b/libpeerconnection.log
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/make-distribution.sh b/make-distribution.sh
index 32bbdb90a5bda..e6b5956d1e7e2 100755
--- a/make-distribution.sh
+++ b/make-distribution.sh
@@ -31,10 +31,10 @@
 #
 # Recommended deploy/testing procedure (standalone mode):
 # 1) Rsync / deploy the dist/ dir to one host
-# 2) cd to deploy dir; ./bin/start-master.sh
+# 2) cd to deploy dir; ./sbin/start-master.sh
 # 3) Verify master is up by visiting web page, ie http://master-ip:8080.  Note the spark:// URL.
-# 4) ./bin/start-slave.sh 1 <<spark:// URL>>
-# 5) MASTER="spark://my-master-ip:7077" ./spark-shell
+# 4) ./sbin/start-slave.sh 1 <<spark:// URL>>
+# 5) MASTER="spark://my-master-ip:7077" ./bin/spark-shell
 #
 
 # Figure out where the Spark framework is installed
@@ -43,7 +43,17 @@ DISTDIR="$FWDIR/dist"
 
 # Get version from SBT
 export TERM=dumb   # Prevents color codes in SBT output
-VERSION=$($FWDIR/sbt/sbt "show version" | tail -1 | cut -f 2 | sed 's/^\([a-zA-Z0-9.-]*\).*/\1/')
+
+VERSIONSTRING=$($FWDIR/sbt/sbt "show version")
+
+if [ $? == -1 ] ;then
+    echo -e "You need sbt installed and available on your path."
+    echo -e "Download sbt from http://www.scala-sbt.org/"
+    exit -1;
+fi
+
+VERSION=$(echo "${VERSIONSTRING}" | tail -1 | cut -f 2 | sed 's/^\([a-zA-Z0-9.-]*\).*/\1/')
+echo "Version is ${VERSION}"
 
 # Initialize defaults
 SPARK_HADOOP_VERSION=1.0.4
@@ -83,7 +93,9 @@ fi
 # Build fat JAR
 export SPARK_HADOOP_VERSION
 export SPARK_YARN
-"$FWDIR/sbt/sbt" "assembly/assembly"
+cd $FWDIR
+
+"sbt/sbt" "assembly/assembly"
 
 # Make directories
 rm -rf "$DISTDIR"
@@ -98,10 +110,7 @@ mkdir "$DISTDIR"/conf
 cp "$FWDIR"/conf/*.template "$DISTDIR"/conf
 cp -r "$FWDIR/bin" "$DISTDIR"
 cp -r "$FWDIR/python" "$DISTDIR"
-cp "$FWDIR/spark-class" "$DISTDIR"
-cp "$FWDIR/spark-shell" "$DISTDIR"
-cp "$FWDIR/spark-executor" "$DISTDIR"
-cp "$FWDIR/pyspark" "$DISTDIR"
+cp -r "$FWDIR/sbin" "$DISTDIR"
 
 
 if [ "$MAKE_TGZ" == "true" ]; then
diff --git a/mllib/data/sample_naive_bayes_data.txt b/mllib/data/sample_naive_bayes_data.txt
new file mode 100644
index 0000000000000..f874adbaf4665
--- /dev/null
+++ b/mllib/data/sample_naive_bayes_data.txt
@@ -0,0 +1,6 @@
+0, 1 0 0
+0, 2 0 0
+1, 0 1 0
+1, 0 2 0
+2, 0 0 1
+2, 0 0 2
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 607937c497f5f..88f47aed381fc 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,32 +21,20 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>0.8.2-candidate-csd-3-SNAPSHOT</version>
+    <version>0.9.1-csd-1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
   <groupId>org.apache.spark</groupId>
-  <artifactId>spark-mllib_2.9.3</artifactId>
+  <artifactId>spark-mllib_2.10</artifactId>
   <packaging>jar</packaging>
   <name>Spark Project ML Library</name>
   <url>http://spark.incubator.apache.org/</url>
 
   <dependencies>
-    <dependency>
-      <groupId>${akka.group}</groupId>
-      <artifactId>akka-actor</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>${akka.group}</groupId>
-      <artifactId>akka-remote</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>${akka.group}</groupId>
-      <artifactId>akka-slf4j</artifactId>
-    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_2.9.3</artifactId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
@@ -60,12 +48,12 @@
     </dependency>
     <dependency>
       <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_2.9.3</artifactId>
+      <artifactId>scalatest_${scala.binary.version}</artifactId>
       <scope>test</scope>
     </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>
-      <artifactId>scalacheck_2.9.3</artifactId>
+      <artifactId>scalacheck_${scala.binary.version}</artifactId>
       <scope>test</scope>
     </dependency>
     <dependency>
@@ -75,8 +63,8 @@
     </dependency>
   </dependencies>
   <build>
-    <outputDirectory>target/scala-${scala.version}/classes</outputDirectory>
-    <testOutputDirectory>target/scala-${scala.version}/test-classes</testOutputDirectory>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
     <plugins>
       <plugin>
         <groupId>org.scalatest</groupId>
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
new file mode 100644
index 0000000000000..efc0eb935376b
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -0,0 +1,276 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.api.python
+import org.apache.spark.api.java.JavaRDD
+import org.apache.spark.mllib.regression._
+import org.apache.spark.mllib.classification._
+import org.apache.spark.mllib.clustering._
+import org.apache.spark.mllib.recommendation._
+import org.apache.spark.rdd.RDD
+import java.nio.ByteBuffer
+import java.nio.ByteOrder
+
+/**
+ * The Java stubs necessary for the Python mllib bindings.
+ */
+class PythonMLLibAPI extends Serializable {
+  private def deserializeDoubleVector(bytes: Array[Byte]): Array[Double] = {
+    val packetLength = bytes.length
+    if (packetLength < 16) {
+      throw new IllegalArgumentException("Byte array too short.")
+    }
+    val bb = ByteBuffer.wrap(bytes)
+    bb.order(ByteOrder.nativeOrder())
+    val magic = bb.getLong()
+    if (magic != 1) {
+      throw new IllegalArgumentException("Magic " + magic + " is wrong.")
+    }
+    val length = bb.getLong()
+    if (packetLength != 16 + 8 * length) {
+      throw new IllegalArgumentException("Length " + length + " is wrong.")
+    }
+    val db = bb.asDoubleBuffer()
+    val ans = new Array[Double](length.toInt)
+    db.get(ans)
+    ans
+  }
+
+  private def serializeDoubleVector(doubles: Array[Double]): Array[Byte] = {
+    val len = doubles.length
+    val bytes = new Array[Byte](16 + 8 * len)
+    val bb = ByteBuffer.wrap(bytes)
+    bb.order(ByteOrder.nativeOrder())
+    bb.putLong(1)
+    bb.putLong(len)
+    val db = bb.asDoubleBuffer()
+    db.put(doubles)
+    bytes
+  }
+
+  private def deserializeDoubleMatrix(bytes: Array[Byte]): Array[Array[Double]] = {
+    val packetLength = bytes.length
+    if (packetLength < 24) {
+      throw new IllegalArgumentException("Byte array too short.")
+    }
+    val bb = ByteBuffer.wrap(bytes)
+    bb.order(ByteOrder.nativeOrder())
+    val magic = bb.getLong()
+    if (magic != 2) {
+      throw new IllegalArgumentException("Magic " + magic + " is wrong.")
+    }
+    val rows = bb.getLong()
+    val cols = bb.getLong()
+    if (packetLength != 24 + 8 * rows * cols) {
+      throw new IllegalArgumentException("Size " + rows + "x" + cols + " is wrong.")
+    }
+    val db = bb.asDoubleBuffer()
+    val ans = new Array[Array[Double]](rows.toInt)
+    for (i <- 0 until rows.toInt) {
+      ans(i) = new Array[Double](cols.toInt)
+      db.get(ans(i))
+    }
+    ans
+  }
+
+  private def serializeDoubleMatrix(doubles: Array[Array[Double]]): Array[Byte] = {
+    val rows = doubles.length
+    var cols = 0
+    if (rows > 0) {
+      cols = doubles(0).length
+    }
+    val bytes = new Array[Byte](24 + 8 * rows * cols)
+    val bb = ByteBuffer.wrap(bytes)
+    bb.order(ByteOrder.nativeOrder())
+    bb.putLong(2)
+    bb.putLong(rows)
+    bb.putLong(cols)
+    val db = bb.asDoubleBuffer()
+    for (i <- 0 until rows) {
+      db.put(doubles(i))
+    }
+    bytes
+  }
+
+  private def trainRegressionModel(trainFunc: (RDD[LabeledPoint], Array[Double]) => GeneralizedLinearModel,
+      dataBytesJRDD: JavaRDD[Array[Byte]], initialWeightsBA: Array[Byte]):
+      java.util.LinkedList[java.lang.Object] = {
+    val data = dataBytesJRDD.rdd.map(xBytes => {
+        val x = deserializeDoubleVector(xBytes)
+        LabeledPoint(x(0), x.slice(1, x.length))
+    })
+    val initialWeights = deserializeDoubleVector(initialWeightsBA)
+    val model = trainFunc(data, initialWeights)
+    val ret = new java.util.LinkedList[java.lang.Object]()
+    ret.add(serializeDoubleVector(model.weights))
+    ret.add(model.intercept: java.lang.Double)
+    ret
+  }
+
+  /**
+   * Java stub for Python mllib LinearRegressionWithSGD.train()
+   */
+  def trainLinearRegressionModelWithSGD(dataBytesJRDD: JavaRDD[Array[Byte]],
+      numIterations: Int, stepSize: Double, miniBatchFraction: Double,
+      initialWeightsBA: Array[Byte]): java.util.List[java.lang.Object] = {
+    trainRegressionModel((data, initialWeights) =>
+        LinearRegressionWithSGD.train(data, numIterations, stepSize,
+                                      miniBatchFraction, initialWeights),
+        dataBytesJRDD, initialWeightsBA)
+  }
+
+  /**
+   * Java stub for Python mllib LassoWithSGD.train()
+   */
+  def trainLassoModelWithSGD(dataBytesJRDD: JavaRDD[Array[Byte]], numIterations: Int,
+      stepSize: Double, regParam: Double, miniBatchFraction: Double,
+      initialWeightsBA: Array[Byte]): java.util.List[java.lang.Object] = {
+    trainRegressionModel((data, initialWeights) =>
+        LassoWithSGD.train(data, numIterations, stepSize, regParam,
+                           miniBatchFraction, initialWeights),
+        dataBytesJRDD, initialWeightsBA)
+  }
+
+  /**
+   * Java stub for Python mllib RidgeRegressionWithSGD.train()
+   */
+  def trainRidgeModelWithSGD(dataBytesJRDD: JavaRDD[Array[Byte]], numIterations: Int,
+      stepSize: Double, regParam: Double, miniBatchFraction: Double,
+      initialWeightsBA: Array[Byte]): java.util.List[java.lang.Object] = {
+    trainRegressionModel((data, initialWeights) =>
+        RidgeRegressionWithSGD.train(data, numIterations, stepSize, regParam,
+                                     miniBatchFraction, initialWeights),
+        dataBytesJRDD, initialWeightsBA)
+  }
+
+  /**
+   * Java stub for Python mllib SVMWithSGD.train()
+   */
+  def trainSVMModelWithSGD(dataBytesJRDD: JavaRDD[Array[Byte]], numIterations: Int,
+      stepSize: Double, regParam: Double, miniBatchFraction: Double,
+      initialWeightsBA: Array[Byte]): java.util.List[java.lang.Object] = {
+    trainRegressionModel((data, initialWeights) =>
+        SVMWithSGD.train(data, numIterations, stepSize, regParam,
+                                     miniBatchFraction, initialWeights),
+        dataBytesJRDD, initialWeightsBA)
+  }
+
+  /**
+   * Java stub for Python mllib LogisticRegressionWithSGD.train()
+   */
+  def trainLogisticRegressionModelWithSGD(dataBytesJRDD: JavaRDD[Array[Byte]],
+      numIterations: Int, stepSize: Double, miniBatchFraction: Double,
+      initialWeightsBA: Array[Byte]): java.util.List[java.lang.Object] = {
+    trainRegressionModel((data, initialWeights) =>
+        LogisticRegressionWithSGD.train(data, numIterations, stepSize,
+                                     miniBatchFraction, initialWeights),
+        dataBytesJRDD, initialWeightsBA)
+  }
+
+  /**
+   * Java stub for NaiveBayes.train()
+   */
+  def trainNaiveBayes(dataBytesJRDD: JavaRDD[Array[Byte]], lambda: Double)
+      : java.util.List[java.lang.Object] =
+  {
+    val data = dataBytesJRDD.rdd.map(xBytes => {
+      val x = deserializeDoubleVector(xBytes)
+      LabeledPoint(x(0), x.slice(1, x.length))
+    })
+    val model = NaiveBayes.train(data, lambda)
+    val ret = new java.util.LinkedList[java.lang.Object]()
+    ret.add(serializeDoubleVector(model.pi))
+    ret.add(serializeDoubleMatrix(model.theta))
+    ret
+  }
+
+  /**
+   * Java stub for Python mllib KMeans.train()
+   */
+  def trainKMeansModel(dataBytesJRDD: JavaRDD[Array[Byte]], k: Int,
+      maxIterations: Int, runs: Int, initializationMode: String):
+      java.util.List[java.lang.Object] = {
+    val data = dataBytesJRDD.rdd.map(xBytes => deserializeDoubleVector(xBytes))
+    val model = KMeans.train(data, k, maxIterations, runs, initializationMode)
+    val ret = new java.util.LinkedList[java.lang.Object]()
+    ret.add(serializeDoubleMatrix(model.clusterCenters))
+    ret
+  }
+
+  /** Unpack a Rating object from an array of bytes */
+  private def unpackRating(ratingBytes: Array[Byte]): Rating = {
+    val bb = ByteBuffer.wrap(ratingBytes)
+    bb.order(ByteOrder.nativeOrder())
+    val user = bb.getInt()
+    val product = bb.getInt()
+    val rating = bb.getDouble()
+    new Rating(user, product, rating)
+  }
+
+  /** Unpack a tuple of Ints from an array of bytes */
+  private[spark] def unpackTuple(tupleBytes: Array[Byte]): (Int, Int) = {
+    val bb = ByteBuffer.wrap(tupleBytes)
+    bb.order(ByteOrder.nativeOrder())
+    val v1 = bb.getInt()
+    val v2 = bb.getInt()
+    (v1, v2)
+  }
+
+  /**
+    * Serialize a Rating object into an array of bytes.
+    * It can be deserialized using RatingDeserializer().
+    *
+    * @param rate the Rating object to serialize
+    * @return
+    */
+  private[spark] def serializeRating(rate: Rating): Array[Byte] = {
+    val len = 3
+    val bytes = new Array[Byte](4 + 8 * len)
+    val bb = ByteBuffer.wrap(bytes)
+    bb.order(ByteOrder.nativeOrder())
+    bb.putInt(len)
+    val db = bb.asDoubleBuffer()
+    db.put(rate.user.toDouble)
+    db.put(rate.product.toDouble)
+    db.put(rate.rating)
+    bytes
+  }
+
+  /**
+   * Java stub for Python mllib ALS.train().  This stub returns a handle
+   * to the Java object instead of the content of the Java object.  Extra care
+   * needs to be taken in the Python code to ensure it gets freed on exit; see
+   * the Py4J documentation.
+   */
+  def trainALSModel(ratingsBytesJRDD: JavaRDD[Array[Byte]], rank: Int,
+      iterations: Int, lambda: Double, blocks: Int): MatrixFactorizationModel = {
+    val ratings = ratingsBytesJRDD.rdd.map(unpackRating)
+    ALS.train(ratings, rank, iterations, lambda, blocks)
+  }
+
+  /**
+   * Java stub for Python mllib ALS.trainImplicit().  This stub returns a
+   * handle to the Java object instead of the content of the Java object.
+   * Extra care needs to be taken in the Python code to ensure it gets freed on
+   * exit; see the Py4J documentation.
+   */
+  def trainImplicitALSModel(ratingsBytesJRDD: JavaRDD[Array[Byte]], rank: Int,
+      iterations: Int, lambda: Double, blocks: Int, alpha: Double): MatrixFactorizationModel = {
+    val ratings = ratingsBytesJRDD.rdd.map(unpackRating)
+    ALS.trainImplicit(ratings, rank, iterations, lambda, blocks, alpha)
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
index 50aede9c07d66..a481f522761e2 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
@@ -97,7 +97,7 @@ object LogisticRegressionWithSGD {
    * @param numIterations Number of iterations of gradient descent to run.
    * @param stepSize Step size to be used for each iteration of gradient descent.
    * @param miniBatchFraction Fraction of data to be used per iteration.
-   * @param initialWeights Initial set of weights to be used. Array should be equal in size to 
+   * @param initialWeights Initial set of weights to be used. Array should be equal in size to
    *        the number of features in the data.
    */
   def train(
@@ -183,6 +183,8 @@ object LogisticRegressionWithSGD {
     val sc = new SparkContext(args(0), "LogisticRegression")
     val data = MLUtils.loadLabeledData(sc, args(1))
     val model = LogisticRegressionWithSGD.train(data, args(3).toInt, args(2).toDouble)
+    println("Weights: " + model.weights.mkString("[", ", ", "]"))
+    println("Intercept: " + model.intercept)
 
     sc.stop()
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
new file mode 100644
index 0000000000000..6539b2f339465
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
@@ -0,0 +1,166 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.classification
+
+import scala.collection.mutable
+
+import org.jblas.DoubleMatrix
+
+import org.apache.spark.{SparkContext, Logging}
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.rdd.RDD
+import org.apache.spark.mllib.util.MLUtils
+
+/**
+ * Model for Naive Bayes Classifiers.
+ *
+ * @param pi Log of class priors, whose dimension is C.
+ * @param theta Log of class conditional probabilities, whose dimension is CxD.
+ */
+class NaiveBayesModel(val pi: Array[Double], val theta: Array[Array[Double]])
+  extends ClassificationModel with Serializable {
+
+  // Create a column vector that can be used for predictions
+  private val _pi = new DoubleMatrix(pi.length, 1, pi: _*)
+  private val _theta = new DoubleMatrix(theta)
+
+  def predict(testData: RDD[Array[Double]]): RDD[Double] = testData.map(predict)
+
+  def predict(testData: Array[Double]): Double = {
+    val dataMatrix = new DoubleMatrix(testData.length, 1, testData: _*)
+    val result = _pi.add(_theta.mmul(dataMatrix))
+    result.argmax()
+  }
+}
+
+/**
+ * Trains a Naive Bayes model given an RDD of `(label, features)` pairs.
+ *
+ * This is the Multinomial NB ([[http://tinyurl.com/lsdw6p]]) which can handle all kinds of
+ * discrete data.  For example, by converting documents into TF-IDF vectors, it can be used for
+ * document classification.  By making every vector a 0-1 vector, it can also be used as
+ * Bernoulli NB ([[http://tinyurl.com/p7c96j6]]).
+ */
+class NaiveBayes private (var lambda: Double)
+  extends Serializable with Logging
+{
+  def this() = this(1.0)
+
+  /** Set the smoothing parameter. Default: 1.0. */
+  def setLambda(lambda: Double): NaiveBayes = {
+    this.lambda = lambda
+    this
+  }
+
+  /**
+   * Run the algorithm with the configured parameters on an input RDD of LabeledPoint entries.
+   *
+   * @param data RDD of (label, array of features) pairs.
+   */
+  def run(data: RDD[LabeledPoint]) = {
+    // Aggregates all sample points to driver side to get sample count and summed feature vector
+    // for each label.  The shape of `zeroCombiner` & `aggregated` is:
+    //
+    //    label: Int -> (count: Int, featuresSum: DoubleMatrix)
+    val zeroCombiner = mutable.Map.empty[Int, (Int, DoubleMatrix)]
+    val aggregated = data.aggregate(zeroCombiner)({ (combiner, point) =>
+      point match {
+        case LabeledPoint(label, features) =>
+          val (count, featuresSum) = combiner.getOrElse(label.toInt, (0, DoubleMatrix.zeros(1)))
+          val fs = new DoubleMatrix(features.length, 1, features: _*)
+          combiner += label.toInt -> (count + 1, featuresSum.addi(fs))
+      }
+    }, { (lhs, rhs) =>
+      for ((label, (c, fs)) <- rhs) {
+        val (count, featuresSum) = lhs.getOrElse(label, (0, DoubleMatrix.zeros(1)))
+        lhs(label) = (count + c, featuresSum.addi(fs))
+      }
+      lhs
+    })
+
+    // Kinds of label
+    val C = aggregated.size
+    // Total sample count
+    val N = aggregated.values.map(_._1).sum
+
+    val pi = new Array[Double](C)
+    val theta = new Array[Array[Double]](C)
+    val piLogDenom = math.log(N + C * lambda)
+
+    for ((label, (count, fs)) <- aggregated) {
+      val thetaLogDenom = math.log(fs.sum() + fs.length * lambda)
+      pi(label) = math.log(count + lambda) - piLogDenom
+      theta(label) = fs.toArray.map(f => math.log(f + lambda) - thetaLogDenom)
+    }
+
+    new NaiveBayesModel(pi, theta)
+  }
+}
+
+object NaiveBayes {
+  /**
+   * Trains a Naive Bayes model given an RDD of `(label, features)` pairs.
+   *
+   * This is the Multinomial NB ([[http://tinyurl.com/lsdw6p]]) which can handle all kinds of
+   * discrete data.  For example, by converting documents into TF-IDF vectors, it can be used for
+   * document classification.  By making every vector a 0-1 vector, it can also be used as
+   * Bernoulli NB ([[http://tinyurl.com/p7c96j6]]).
+   *
+   * This version of the method uses a default smoothing parameter of 1.0.
+   *
+   * @param input RDD of `(label, array of features)` pairs.  Every vector should be a frequency
+   *              vector or a count vector.
+   */
+  def train(input: RDD[LabeledPoint]): NaiveBayesModel = {
+    new NaiveBayes().run(input)
+  }
+
+  /**
+   * Trains a Naive Bayes model given an RDD of `(label, features)` pairs.
+   *
+   * This is the Multinomial NB ([[http://tinyurl.com/lsdw6p]]) which can handle all kinds of
+   * discrete data.  For example, by converting documents into TF-IDF vectors, it can be used for
+   * document classification.  By making every vector a 0-1 vector, it can also be used as
+   * Bernoulli NB ([[http://tinyurl.com/p7c96j6]]).
+   *
+   * @param input RDD of `(label, array of features)` pairs.  Every vector should be a frequency
+   *              vector or a count vector.
+   * @param lambda The smoothing parameter
+   */
+  def train(input: RDD[LabeledPoint], lambda: Double): NaiveBayesModel = {
+    new NaiveBayes(lambda).run(input)
+  }
+
+  def main(args: Array[String]) {
+    if (args.length != 2 && args.length != 3) {
+      println("Usage: NaiveBayes <master> <input_dir> [<lambda>]")
+      System.exit(1)
+    }
+    val sc = new SparkContext(args(0), "NaiveBayes")
+    val data = MLUtils.loadLabeledData(sc, args(1))
+    val model = if (args.length == 2) {
+      NaiveBayes.train(data)
+    } else {
+      NaiveBayes.train(data, args(2).toDouble)
+    }
+    println("Pi: " + model.pi.mkString("[", ", ", "]"))
+    println("Theta:\n" + model.theta.map(_.mkString("[", ", ", "]")).mkString("[", "\n ", "]"))
+
+    sc.stop()
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
index 3b8f8550d0527..6dff29dfb45cc 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.mllib.classification
 
-import scala.math.signum
-
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.optimization._
@@ -183,6 +181,8 @@ object SVMWithSGD {
     val sc = new SparkContext(args(0), "SVM")
     val data = MLUtils.loadLabeledData(sc, args(1))
     val model = SVMWithSGD.train(data, args(4).toInt, args(2).toDouble, args(3).toDouble)
+    println("Weights: " + model.weights.mkString("[", ", ", "]"))
+    println("Intercept: " + model.intercept)
 
     sc.stop()
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
index edbf77dbcc3a4..0dee9399a86ea 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
@@ -18,15 +18,16 @@
 package org.apache.spark.mllib.clustering
 
 import scala.collection.mutable.ArrayBuffer
-import scala.util.Random
+
+import org.jblas.DoubleMatrix
 
 import org.apache.spark.SparkContext
 import org.apache.spark.SparkContext._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.Logging
 import org.apache.spark.mllib.util.MLUtils
+import org.apache.spark.util.XORShiftRandom
 
-import org.jblas.DoubleMatrix
 
 
 /**
@@ -195,7 +196,7 @@ class KMeans private (
    */
   private def initRandom(data: RDD[Array[Double]]): Array[ClusterCenters] = {
     // Sample all the cluster centers in one pass to avoid repeated scans
-    val sample = data.takeSample(true, runs * k, new Random().nextInt()).toSeq
+    val sample = data.takeSample(true, runs * k, new XORShiftRandom().nextInt()).toSeq
     Array.tabulate(runs)(r => sample.slice(r * k, (r + 1) * k).toArray)
   }
 
@@ -210,7 +211,7 @@ class KMeans private (
    */
   private def initKMeansParallel(data: RDD[Array[Double]]): Array[ClusterCenters] = {
     // Initialize each run's center to a random point
-    val seed = new Random().nextInt()
+    val seed = new XORShiftRandom().nextInt()
     val sample = data.takeSample(true, runs, seed).toSeq
     val centers = Array.tabulate(runs)(r => ArrayBuffer(sample(r)))
 
@@ -222,7 +223,7 @@ class KMeans private (
         for (r <- 0 until runs) yield (r, KMeans.pointCost(centerArrays(r), point))
       }.reduceByKey(_ + _).collectAsMap()
       val chosen = data.mapPartitionsWithIndex { (index, points) =>
-        val rand = new Random(seed ^ (step << 16) ^ index)
+        val rand = new XORShiftRandom(seed ^ (step << 16) ^ index)
         for {
           p <- points
           r <- 0 until runs
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
index cfc81c985aa64..980be931576dc 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
@@ -19,8 +19,6 @@ package org.apache.spark.mllib.clustering
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.SparkContext._
-import org.apache.spark.mllib.util.MLUtils
-
 
 /**
  * A clustering model for K-means. Each point belongs to the cluster with the closest center.
@@ -39,6 +37,6 @@ class KMeansModel(val clusterCenters: Array[Array[Double]]) extends Serializable
    * model on the given data.
    */
   def computeCost(data: RDD[Array[Double]]): Double = {
-    data.map(p => KMeans.pointCost(clusterCenters, p)).sum
+    data.map(p => KMeans.pointCost(clusterCenters, p)).sum()
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
index 749e7364f4de1..c590492e7abd6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
@@ -50,8 +50,8 @@ class LogisticGradient extends Gradient {
 
     val gradient = data.mul(gradientMultiplier)
     val loss =
-      if (margin > 0) {
-        math.log(1 + math.exp(0 - margin))
+      if (label > 0) {
+        math.log(1 + math.exp(margin))
       } else {
         math.log(1 + math.exp(margin)) - margin
       }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala
index 4c51f4f881f76..37124f261eeb9 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala
@@ -86,13 +86,17 @@ class L1Updater extends Updater {
 
 /**
  * Updater that adjusts the learning rate and performs L2 regularization
+ *
+ * See, for example, explanation of gradient and loss with L2 regularization on slide 21-22
+ * of <a href="http://people.cs.umass.edu/~sheldon/teaching/2012fa/ml/files/lec7-annotated.pdf">
+ * these slides</a>.
  */
 class SquaredL2Updater extends Updater {
   override def compute(weightsOld: DoubleMatrix, gradient: DoubleMatrix,
       stepSize: Double, iter: Int, regParam: Double): (DoubleMatrix, Double) = {
     val thisIterStepSize = stepSize / math.sqrt(iter)
     val normGradient = gradient.mul(thisIterStepSize)
-    val newWeights = weightsOld.sub(normGradient).div(2.0 * thisIterStepSize * regParam + 1.0)
+    val newWeights = weightsOld.mul(1.0 - 2.0 * thisIterStepSize * regParam).sub(normGradient)
     (newWeights, pow(newWeights.norm2, 2.0) * regParam)
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
index 36853acab514d..3e93402adffaf 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
@@ -22,7 +22,7 @@ import scala.util.Random
 import scala.util.Sorting
 
 import org.apache.spark.broadcast.Broadcast
-import org.apache.spark.{Logging, HashPartitioner, Partitioner, SparkContext}
+import org.apache.spark.{Logging, HashPartitioner, Partitioner, SparkContext, SparkConf}
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.rdd.RDD
 import org.apache.spark.serializer.KryoRegistrator
@@ -209,8 +209,8 @@ class ALS private (var numBlocks: Int, var rank: Int, var iterations: Int, var l
   def computeYtY(factors: RDD[(Int, Array[Array[Double]])]) = {
     if (implicitPrefs) {
       Option(
-        factors.flatMapValues{ case factorArray =>
-          factorArray.map{ vector =>
+        factors.flatMapValues { case factorArray =>
+          factorArray.view.map { vector =>
             val x = new DoubleMatrix(vector)
             x.mmul(x.transpose())
           }
@@ -578,14 +578,14 @@ object ALS {
     val implicitPrefs = if (args.length >= 7) args(6).toBoolean else false
     val alpha = if (args.length >= 8) args(7).toDouble else 1
     val blocks = if (args.length == 9) args(8).toInt else -1
+    val conf = new SparkConf()
+      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
+      .set("spark.kryo.registrator",  classOf[ALSRegistrator].getName)
+      .set("spark.kryo.referenceTracking", "false")
+      .set("spark.kryoserializer.buffer.mb", "8")
+      .set("spark.locality.wait", "10000")
+    val sc = new SparkContext(master, "ALS", conf)
 
-    System.setProperty("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
-    System.setProperty("spark.kryo.registrator", classOf[ALSRegistrator].getName)
-    System.setProperty("spark.kryo.referenceTracking", "false")
-    System.setProperty("spark.kryoserializer.buffer.mb", "8")
-    System.setProperty("spark.locality.wait", "10000")
-
-    val sc = new SparkContext(master, "ALS")
     val ratings = sc.textFile(ratingsFile).map { line =>
       val fields = line.split(',')
       Rating(fields(0).toInt, fields(1).toInt, fields(2).toDouble)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
index af43d89c70f05..443fc5de5bf04 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
@@ -19,8 +19,11 @@ package org.apache.spark.mllib.recommendation
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.SparkContext._
+import org.apache.spark.mllib.api.python.PythonMLLibAPI
 
 import org.jblas._
+import org.apache.spark.api.java.JavaRDD
+
 
 /**
  * Model representing the result of matrix factorization.
@@ -44,6 +47,39 @@ class MatrixFactorizationModel(
     userVector.dot(productVector)
   }
 
-  // TODO: Figure out what good bulk prediction methods would look like.
+  /**
+    * Predict the rating of many users for many products.
+    * The output RDD has an element per each element in the input RDD (including all duplicates)
+    * unless a user or product is missing in the training set.
+    *
+    * @param usersProducts  RDD of (user, product) pairs.
+    * @return RDD of Ratings.
+    */
+  def predict(usersProducts: RDD[(Int, Int)]): RDD[Rating] = {
+    val users = userFeatures.join(usersProducts).map{
+      case (user, (uFeatures, product)) => (product, (user, uFeatures))
+    }
+    users.join(productFeatures).map {
+      case (product, ((user, uFeatures), pFeatures)) =>
+        val userVector = new DoubleMatrix(uFeatures)
+        val productVector = new DoubleMatrix(pFeatures)
+        Rating(user, product, userVector.dot(productVector))
+    }
+  }
+
+  /**
+   * Predict the rating of many users for many products.
+   * This is a Java stub for python predictAll()
+   *
+   * @param usersProductsJRDD A JavaRDD with serialized tuples (user, product)
+   * @return JavaRDD of serialized Rating objects.
+   */
+  def predict(usersProductsJRDD: JavaRDD[Array[Byte]]): JavaRDD[Array[Byte]] = {
+    val pythonAPI = new PythonMLLibAPI()
+    val usersProducts = usersProductsJRDD.rdd.map(xBytes => pythonAPI.unpackTuple(xBytes))
+    predict(usersProducts).map(rate => pythonAPI.serializeRating(rate))
+  }
+
+  // TODO: Figure out what other good bulk prediction methods would look like.
   // Probably want a way to get the top users for a product or vice-versa.
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala
index 63240e24dc29c..1a18292fe3f3b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala
@@ -23,4 +23,8 @@ package org.apache.spark.mllib.regression
  * @param label Label for this data point.
  * @param features List of features for this data point.
  */
-case class LabeledPoint(val label: Double, val features: Array[Double])
+case class LabeledPoint(label: Double, features: Array[Double]) {
+  override def toString: String = {
+    "LabeledPoint(%s, %s)".format(label, features.mkString("[", ", ", "]"))
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
index d959695325984..7c41793722b00 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
@@ -121,7 +121,7 @@ object LassoWithSGD {
    * @param stepSize Step size to be used for each iteration of gradient descent.
    * @param regParam Regularization parameter.
    * @param miniBatchFraction Fraction of data to be used per iteration.
-   * @param initialWeights Initial set of weights to be used. Array should be equal in size to 
+   * @param initialWeights Initial set of weights to be used. Array should be equal in size to
    *        the number of features in the data.
    */
   def train(
@@ -205,6 +205,8 @@ object LassoWithSGD {
     val sc = new SparkContext(args(0), "Lasso")
     val data = MLUtils.loadLabeledData(sc, args(1))
     val model = LassoWithSGD.train(data, args(4).toInt, args(2).toDouble, args(3).toDouble)
+    println("Weights: " + model.weights.mkString("[", ", ", "]"))
+    println("Intercept: " + model.intercept)
 
     sc.stop()
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
index 597d55e0bbdbc..df599fde76a86 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.mllib.regression
 
-import org.apache.spark.{Logging, SparkContext}
+import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.optimization._
 import org.apache.spark.mllib.util.MLUtils
@@ -162,6 +162,8 @@ object LinearRegressionWithSGD {
     val sc = new SparkContext(args(0), "LinearRegression")
     val data = MLUtils.loadLabeledData(sc, args(1))
     val model = LinearRegressionWithSGD.train(data, args(3).toInt, args(2).toDouble)
+    println("Weights: " + model.weights.mkString("[", ", ", "]"))
+    println("Intercept: " + model.intercept)
 
     sc.stop()
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
index b29508d2b9f0f..0c0e67fb7b123 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.mllib.regression
 
-import org.apache.spark.{Logging, SparkContext}
+import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.optimization._
 import org.apache.spark.mllib.util.MLUtils
@@ -76,7 +76,7 @@ class RidgeRegressionWithSGD private (
   def createModel(weights: Array[Double], intercept: Double) = {
     val weightsMat = new DoubleMatrix(weights.length + 1, 1, (Array(intercept) ++ weights):_*)
     val weightsScaled = weightsMat.div(xColSd)
-    val interceptScaled = yMean - (weightsMat.transpose().mmul(xColMean.div(xColSd)).get(0))
+    val interceptScaled = yMean - weightsMat.transpose().mmul(xColMean.div(xColSd)).get(0)
 
     new RidgeRegressionModel(weightsScaled.data, interceptScaled)
   }
@@ -86,7 +86,7 @@ class RidgeRegressionWithSGD private (
       initialWeights: Array[Double])
     : RidgeRegressionModel =
   {
-    val nfeatures: Int = input.first.features.length
+    val nfeatures: Int = input.first().features.length
     val nexamples: Long = input.count()
 
     // To avoid penalizing the intercept, we center and scale the data.
@@ -122,7 +122,7 @@ object RidgeRegressionWithSGD {
    * @param stepSize Step size to be used for each iteration of gradient descent.
    * @param regParam Regularization parameter.
    * @param miniBatchFraction Fraction of data to be used per iteration.
-   * @param initialWeights Initial set of weights to be used. Array should be equal in size to 
+   * @param initialWeights Initial set of weights to be used. Array should be equal in size to
    *        the number of features in the data.
    */
   def train(
@@ -208,6 +208,8 @@ object RidgeRegressionWithSGD {
     val data = MLUtils.loadLabeledData(sc, args(1))
     val model = RidgeRegressionWithSGD.train(data, args(4).toInt, args(2).toDouble,
         args(3).toDouble)
+    println("Weights: " + model.weights.mkString("[", ", ", "]"))
+    println("Intercept: " + model.intercept)
 
     sc.stop()
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
index bc5045fb05d3b..2e03684e62861 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
@@ -25,7 +25,6 @@ import org.jblas.DoubleMatrix
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.regression.LabeledPoint
-import org.apache.spark.mllib.regression.LabeledPoint
 
 /**
  * Generate sample data used for Linear Data. This class generates
@@ -73,7 +72,7 @@ object LinearDataGenerator {
     val x = Array.fill[Array[Double]](nPoints)(
       Array.fill[Double](weights.length)(2 * rnd.nextDouble - 1.0))
     val y = x.map { xi =>
-      (new DoubleMatrix(1, xi.length, xi:_*)).dot(weightsMat) + intercept + eps * rnd.nextGaussian()
+      new DoubleMatrix(1, xi.length, xi: _*).dot(weightsMat) + intercept + eps * rnd.nextGaussian()
     }
     y.zip(x).map(p => LabeledPoint(p._1, p._2))
   }
@@ -86,7 +85,6 @@ object LinearDataGenerator {
    * @param nexamples Number of examples that will be contained in the RDD.
    * @param nfeatures Number of features to generate for each example.
    * @param eps Epsilon factor by which examples are scaled.
-   * @param weights Weights associated with the first weights.length features.
    * @param nparts Number of partitions in the RDD. Default value is 2.
    *
    * @return RDD of LabeledPoint containing sample data.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
index 5aec867257e16..348aba1dea5b6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.mllib.recommendation
+package org.apache.spark.mllib.util
 
 import scala.util.Random
 
@@ -23,7 +23,6 @@ import org.jblas.DoubleMatrix
 
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
-import org.apache.spark.mllib.util.MLUtils
 
 /**
 * Generate RDD(s) containing data for Matrix Factorization.
@@ -31,9 +30,9 @@ import org.apache.spark.mllib.util.MLUtils
 * This method samples training entries according to the oversampling factor
 * 'trainSampFact', which is a multiplicative factor of the number of
 * degrees of freedom of the matrix: rank*(m+n-rank).
-* 
-* It optionally samples entries for a testing matrix using 
-* 'testSampFact', the percentage of the number of training entries 
+*
+* It optionally samples entries for a testing matrix using
+* 'testSampFact', the percentage of the number of training entries
 * to use for testing.
 *
 * This method takes the following inputs:
@@ -73,7 +72,7 @@ object MFDataGenerator{
 
     val A = DoubleMatrix.randn(m, rank)
     val B = DoubleMatrix.randn(rank, n)
-    val z = 1 / (scala.math.sqrt(scala.math.sqrt(rank)))
+    val z = 1 / scala.math.sqrt(scala.math.sqrt(rank))
     A.mmuli(z)
     B.mmuli(z)
     val fullData = A.mmul(B)
@@ -83,7 +82,7 @@ object MFDataGenerator{
       scala.math.round(.99 * m * n)).toInt
     val rand = new Random()
     val mn = m * n
-    val shuffled = rand.shuffle(1 to mn toIterable)
+    val shuffled = rand.shuffle(1 to mn toList)
 
     val omega = shuffled.slice(0, sampSize)
     val ordered = omega.sortWith(_ < _).toArray
@@ -91,7 +90,7 @@ object MFDataGenerator{
       .map(x => (fullData.indexRows(x - 1), fullData.indexColumns(x - 1), fullData.get(x - 1)))
 
     // optionally add gaussian noise
-    if (noise) { 
+    if (noise) {
       trainData.map(x => (x._1, x._2, x._3 + rand.nextGaussian * sigma))
     }
 
@@ -107,8 +106,8 @@ object MFDataGenerator{
         .map(x => (fullData.indexRows(x - 1), fullData.indexColumns(x - 1), fullData.get(x - 1)))
       testData.map(x => x._1 + "," + x._2 + "," + x._3).saveAsTextFile(outputPath)
     }
-        
+
     sc.stop()
-  
+
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
index d91b74c3ac2b3..64c6136a8b89d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
@@ -97,7 +97,7 @@ object MLUtils {
     while (col < nfeatures) {
       xColMean.put(col, xColSumsMap(col)._1 / nexamples)
       val variance =
-        (xColSumsMap(col)._2 - (math.pow(xColSumsMap(col)._1, 2) / nexamples)) / (nexamples)
+        (xColSumsMap(col)._2 - (math.pow(xColSumsMap(col)._1, 2) / nexamples)) / nexamples
       xColSd.put(col, math.sqrt(variance))
       col += 1
     }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
index 07022093f300c..c96c94f70eef7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
@@ -56,7 +56,7 @@ object SVMDataGenerator {
       val x = Array.fill[Double](nfeatures) {
         rnd.nextDouble() * 2.0 - 1.0
       }
-      val yD = (new DoubleMatrix(1, x.length, x:_*)).dot(trueWeights) + rnd.nextGaussian() * 0.1
+      val yD = new DoubleMatrix(1, x.length, x: _*).dot(trueWeights) + rnd.nextGaussian() * 0.1
       val y = if (yD < 0) 0.0 else 1.0
       LabeledPoint(y, x)
     }
diff --git a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaNaiveBayesSuite.java b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaNaiveBayesSuite.java
new file mode 100644
index 0000000000000..073ded6f36933
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaNaiveBayesSuite.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.classification;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.List;
+
+public class JavaNaiveBayesSuite implements Serializable {
+  private transient JavaSparkContext sc;
+
+  @Before
+  public void setUp() {
+    sc = new JavaSparkContext("local", "JavaNaiveBayesSuite");
+  }
+
+  @After
+  public void tearDown() {
+    sc.stop();
+    sc = null;
+    System.clearProperty("spark.driver.port");
+  }
+
+  private static final List<LabeledPoint> POINTS = Arrays.asList(
+    new LabeledPoint(0, new double[] {1.0, 0.0, 0.0}),
+    new LabeledPoint(0, new double[] {2.0, 0.0, 0.0}),
+    new LabeledPoint(1, new double[] {0.0, 1.0, 0.0}),
+    new LabeledPoint(1, new double[] {0.0, 2.0, 0.0}),
+    new LabeledPoint(2, new double[] {0.0, 0.0, 1.0}),
+    new LabeledPoint(2, new double[] {0.0, 0.0, 2.0})
+  );
+
+  private int validatePrediction(List<LabeledPoint> points, NaiveBayesModel model) {
+    int correct = 0;
+    for (LabeledPoint p: points) {
+      if (model.predict(p.features()) == p.label()) {
+        correct += 1;
+      }
+    }
+    return correct;
+  }
+
+  @Test
+  public void runUsingConstructor() {
+    JavaRDD<LabeledPoint> testRDD = sc.parallelize(POINTS, 2).cache();
+
+    NaiveBayes nb = new NaiveBayes().setLambda(1.0);
+    NaiveBayesModel model = nb.run(testRDD.rdd());
+
+    int numAccurate = validatePrediction(POINTS, model);
+    Assert.assertEquals(POINTS.size(), numAccurate);
+  }
+
+  @Test
+  public void runUsingStaticMethods() {
+    JavaRDD<LabeledPoint> testRDD = sc.parallelize(POINTS, 2).cache();
+
+    NaiveBayesModel model1 = NaiveBayes.train(testRDD.rdd());
+    int numAccurate1 = validatePrediction(POINTS, model1);
+    Assert.assertEquals(POINTS.size(), numAccurate1);
+
+    NaiveBayesModel model2 = NaiveBayes.train(testRDD.rdd(), 0.5);
+    int numAccurate2 = validatePrediction(POINTS, model2);
+    Assert.assertEquals(POINTS.size(), numAccurate2);
+  }
+}
diff --git a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaKMeansSuite.java b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaKMeansSuite.java
index 32d3934ac135a..33b99f4bd3bcf 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaKMeansSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaKMeansSuite.java
@@ -77,7 +77,7 @@ void assertSetsEqual(double[][] v1, double[][] v2) {
 
   @Test
   public void runKMeansUsingStaticMethods() {
-    List<double[]> points = new ArrayList();
+    List<double[]> points = new ArrayList<double[]>();
     points.add(new double[]{1.0, 2.0, 6.0});
     points.add(new double[]{1.0, 3.0, 0.0});
     points.add(new double[]{1.0, 4.0, 6.0});
@@ -94,7 +94,7 @@ public void runKMeansUsingStaticMethods() {
 
   @Test
   public void runKMeansUsingConstructor() {
-    List<double[]> points = new ArrayList();
+    List<double[]> points = new ArrayList<double[]>();
     points.add(new double[]{1.0, 2.0, 6.0});
     points.add(new double[]{1.0, 3.0, 0.0});
     points.add(new double[]{1.0, 4.0, 6.0});
diff --git a/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java b/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java
index eafee060cda06..b40f552e0d0aa 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java
@@ -21,8 +21,6 @@
 import java.util.List;
 import java.lang.Math;
 
-import scala.Tuple2;
-
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
index 34c67294e9ac9..02ede711372d3 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
@@ -80,9 +80,9 @@ class LogisticRegressionSuite extends FunSuite with BeforeAndAfterAll with Shoul
   }
 
   def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
-    val numOffPredictions = predictions.zip(input).filter { case (prediction, expected) =>
-      (prediction != expected.label)
-    }.size
+    val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
+      prediction != expected.label
+    }
     // At least 83% of the predictions should be on.
     ((input.length - numOffPredictions).toDouble / input.length) should be > 0.83
   }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
new file mode 100644
index 0000000000000..b615f76e66cf9
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.classification
+
+import scala.util.Random
+
+import org.scalatest.BeforeAndAfterAll
+import org.scalatest.FunSuite
+
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.SparkContext
+
+object NaiveBayesSuite {
+
+  private def calcLabel(p: Double, pi: Array[Double]): Int = {
+    var sum = 0.0
+    for (j <- 0 until pi.length) {
+      sum += pi(j)
+      if (p < sum) return j
+    }
+    -1
+  }
+
+  // Generate input of the form Y = (theta * x).argmax()
+  def generateNaiveBayesInput(
+      pi: Array[Double],            // 1XC
+      theta: Array[Array[Double]],  // CXD
+      nPoints: Int,
+      seed: Int): Seq[LabeledPoint] = {
+    val D = theta(0).length
+    val rnd = new Random(seed)
+
+    val _pi = pi.map(math.pow(math.E, _))
+    val _theta = theta.map(row => row.map(math.pow(math.E, _)))
+
+    for (i <- 0 until nPoints) yield {
+      val y = calcLabel(rnd.nextDouble(), _pi)
+      val xi = Array.tabulate[Double](D) { j =>
+        if (rnd.nextDouble() < _theta(y)(j)) 1 else 0
+      }
+
+      LabeledPoint(y, xi)
+    }
+  }
+}
+
+class NaiveBayesSuite extends FunSuite with BeforeAndAfterAll {
+  @transient private var sc: SparkContext = _
+
+  override def beforeAll() {
+    sc = new SparkContext("local", "test")
+  }
+
+  override def afterAll() {
+    sc.stop()
+    System.clearProperty("spark.driver.port")
+  }
+
+  def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
+    val numOfPredictions = predictions.zip(input).count {
+      case (prediction, expected) =>
+        prediction != expected.label
+    }
+    // At least 80% of the predictions should be on.
+    assert(numOfPredictions < input.length / 5)
+  }
+
+  test("Naive Bayes") {
+    val nPoints = 10000
+
+    val pi = Array(0.5, 0.3, 0.2).map(math.log)
+    val theta = Array(
+      Array(0.91, 0.03, 0.03, 0.03), // label 0
+      Array(0.03, 0.91, 0.03, 0.03), // label 1
+      Array(0.03, 0.03, 0.91, 0.03)  // label 2
+    ).map(_.map(math.log))
+
+    val testData = NaiveBayesSuite.generateNaiveBayesInput(pi, theta, nPoints, 42)
+    val testRDD = sc.parallelize(testData, 2)
+    testRDD.cache()
+
+    val model = NaiveBayes.train(testRDD)
+
+    val validationData = NaiveBayesSuite.generateNaiveBayesInput(pi, theta, nPoints, 17)
+    val validationRDD = sc.parallelize(validationData, 2)
+
+    // Test prediction on RDD.
+    validatePrediction(model.predict(validationRDD.map(_.features)).collect(), validationData)
+
+    // Test prediction on Array.
+    validatePrediction(validationData.map(row => model.predict(row.features)), validationData)
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
index 6a957e3ddca71..3357b86f9b706 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.mllib.classification
 
 import scala.util.Random
-import scala.math.signum
 import scala.collection.JavaConversions._
 
 import org.scalatest.BeforeAndAfterAll
@@ -50,7 +49,7 @@ object SVMSuite {
     val x = Array.fill[Array[Double]](nPoints)(
         Array.fill[Double](weights.length)(rnd.nextDouble() * 2.0 - 1.0))
     val y = x.map { xi =>
-      val yD = (new DoubleMatrix(1, xi.length, xi:_*)).dot(weightsMat) +
+      val yD = new DoubleMatrix(1, xi.length, xi: _*).dot(weightsMat) +
         intercept + 0.01 * rnd.nextGaussian()
       if (yD < 0) 0.0 else 1.0
     }
@@ -72,9 +71,9 @@ class SVMSuite extends FunSuite with BeforeAndAfterAll {
   }
 
   def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
-    val numOffPredictions = predictions.zip(input).filter { case (prediction, expected) =>
-      (prediction != expected.label)
-    }.size
+    val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
+      prediction != expected.label
+    }
     // At least 80% of the predictions should be on.
     assert(numOffPredictions < input.length / 5)
   }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
index 94245f6027b30..73657cac893ce 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
@@ -17,15 +17,12 @@
 
 package org.apache.spark.mllib.clustering
 
-import scala.util.Random
 
 import org.scalatest.BeforeAndAfterAll
 import org.scalatest.FunSuite
 
 import org.apache.spark.SparkContext
-import org.apache.spark.SparkContext._
 
-import org.jblas._
 
 class KMeansSuite extends FunSuite with BeforeAndAfterAll {
   @transient private var sc: SparkContext = _
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala
new file mode 100644
index 0000000000000..a6028a1e981dd
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.optimization
+
+import scala.util.Random
+import scala.collection.JavaConversions._
+
+import org.scalatest.BeforeAndAfterAll
+import org.scalatest.FunSuite
+import org.scalatest.matchers.ShouldMatchers
+
+import org.apache.spark.SparkContext
+import org.apache.spark.mllib.regression._
+
+object GradientDescentSuite {
+
+  def generateLogisticInputAsList(
+      offset: Double,
+      scale: Double,
+      nPoints: Int,
+      seed: Int): java.util.List[LabeledPoint] = {
+    seqAsJavaList(generateGDInput(offset, scale, nPoints, seed))
+  }
+
+  // Generate input of the form Y = logistic(offset + scale * X)
+  def generateGDInput(
+      offset: Double,
+      scale: Double,
+      nPoints: Int,
+      seed: Int): Seq[LabeledPoint]  = {
+    val rnd = new Random(seed)
+    val x1 = Array.fill[Double](nPoints)(rnd.nextGaussian())
+
+    val unifRand = new scala.util.Random(45)
+    val rLogis = (0 until nPoints).map { i =>
+      val u = unifRand.nextDouble()
+      math.log(u) - math.log(1.0-u)
+    }
+
+    val y: Seq[Int] = (0 until nPoints).map { i =>
+      val yVal = offset + scale * x1(i) + rLogis(i)
+      if (yVal > 0) 1 else 0
+    }
+
+    val testData = (0 until nPoints).map(i => LabeledPoint(y(i), Array(x1(i))))
+    testData
+  }
+}
+
+class GradientDescentSuite extends FunSuite with BeforeAndAfterAll with ShouldMatchers {
+  @transient private var sc: SparkContext = _
+
+  override def beforeAll() {
+    sc = new SparkContext("local", "test")
+  }
+
+  override def afterAll() {
+    sc.stop()
+    System.clearProperty("spark.driver.port")
+  }
+
+  test("Assert the loss is decreasing.") {
+    val nPoints = 10000
+    val A = 2.0
+    val B = -1.5
+
+    val initialB = -1.0
+    val initialWeights = Array(initialB)
+
+    val gradient = new LogisticGradient()
+    val updater = new SimpleUpdater()
+    val stepSize = 1.0
+    val numIterations = 10
+    val regParam = 0
+    val miniBatchFrac = 1.0
+
+    // Add a extra variable consisting of all 1.0's for the intercept.
+    val testData = GradientDescentSuite.generateGDInput(A, B, nPoints, 42)
+    val data = testData.map { case LabeledPoint(label, features) =>
+      label -> Array(1.0, features: _*)
+    }
+
+    val dataRDD = sc.parallelize(data, 2).cache()
+    val initialWeightsWithIntercept = Array(1.0, initialWeights: _*)
+
+    val (_, loss) = GradientDescent.runMiniBatchSGD(
+      dataRDD,
+      gradient,
+      updater,
+      stepSize,
+      numIterations,
+      regParam,
+      miniBatchFrac,
+      initialWeightsWithIntercept)
+
+    assert(loss.last - loss.head < 0, "loss isn't decreasing.")
+
+    val lossDiff = loss.init.zip(loss.tail).map { case (lhs, rhs) => lhs - rhs }
+    assert(lossDiff.count(_ > 0).toDouble / lossDiff.size > 0.8)
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
index fafc5ec5f2245..4e8dbde65801c 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
@@ -24,7 +24,6 @@ import org.scalatest.BeforeAndAfterAll
 import org.scalatest.FunSuite
 
 import org.apache.spark.SparkContext
-import org.apache.spark.SparkContext._
 
 import org.jblas._
 
@@ -90,18 +89,34 @@ class ALSSuite extends FunSuite with BeforeAndAfterAll {
     testALS(50, 100, 1, 15, 0.7, 0.3)
   }
 
+  test("rank-1 matrices bulk") {
+    testALS(50, 100, 1, 15, 0.7, 0.3, false, true)
+  }
+
   test("rank-2 matrices") {
     testALS(100, 200, 2, 15, 0.7, 0.3)
   }
 
+  test("rank-2 matrices bulk") {
+    testALS(100, 200, 2, 15, 0.7, 0.3, false, true)
+  }
+
   test("rank-1 matrices implicit") {
     testALS(80, 160, 1, 15, 0.7, 0.4, true)
   }
 
+  test("rank-1 matrices implicit bulk") {
+    testALS(80, 160, 1, 15, 0.7, 0.4, true, true)
+  }
+
   test("rank-2 matrices implicit") {
     testALS(100, 200, 2, 15, 0.7, 0.4, true)
   }
 
+  test("rank-2 matrices implicit bulk") {
+    testALS(100, 200, 2, 15, 0.7, 0.4, true, true)
+  }
+
   /**
    * Test if we can correctly factorize R = U * P where U and P are of known rank.
    *
@@ -111,9 +126,12 @@ class ALSSuite extends FunSuite with BeforeAndAfterAll {
    * @param iterations     number of iterations to run
    * @param samplingRate   what fraction of the user-product pairs are known
    * @param matchThreshold max difference allowed to consider a predicted rating correct
+   * @param implicitPrefs  flag to test implicit feedback
+   * @param bulkPredict    flag to test bulk prediciton
    */
   def testALS(users: Int, products: Int, features: Int, iterations: Int,
-    samplingRate: Double, matchThreshold: Double, implicitPrefs: Boolean = false)
+    samplingRate: Double, matchThreshold: Double, implicitPrefs: Boolean = false,
+    bulkPredict: Boolean = false)
   {
     val (sampledRatings, trueRatings, truePrefs) = ALSSuite.generateRatings(users, products,
       features, samplingRate, implicitPrefs)
@@ -130,7 +148,17 @@ class ALSSuite extends FunSuite with BeforeAndAfterAll {
     for ((p, vec) <- model.productFeatures.collect(); i <- 0 until features) {
       predictedP.put(p, i, vec(i))
     }
-    val predictedRatings = predictedU.mmul(predictedP.transpose)
+    val predictedRatings = bulkPredict match {
+      case false => predictedU.mmul(predictedP.transpose)
+      case true =>
+        val allRatings = new DoubleMatrix(users, products)
+        val usersProducts = for (u <- 0 until users; p <- 0 until products) yield (u, p)
+        val userProductsRDD = sc.parallelize(usersProducts)
+        model.predict(userProductsRDD).collect().foreach { elem =>
+          allRatings.put(elem.user, elem.product, elem.rating)
+        }
+        allRatings
+    }
 
     if (!implicitPrefs) {
       for (u <- 0 until users; p <- 0 until products) {
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala
index db980c7bae64f..b2c8df97a82a7 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.mllib.regression
 
-import scala.collection.JavaConversions._
-import scala.util.Random
 
 import org.scalatest.BeforeAndAfterAll
 import org.scalatest.FunSuite
@@ -41,10 +39,10 @@ class LassoSuite extends FunSuite with BeforeAndAfterAll {
   }
 
   def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
-    val numOffPredictions = predictions.zip(input).filter { case (prediction, expected) =>
+    val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
       // A prediction is off if the prediction is more than 0.5 away from expected value.
       math.abs(prediction - expected.label) > 0.5
-    }.size
+    }
     // At least 80% of the predictions should be on.
     assert(numOffPredictions < input.length / 5)
   }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala
index ef500c704c8a9..406afbaa3e2c1 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala
@@ -21,7 +21,6 @@ import org.scalatest.BeforeAndAfterAll
 import org.scalatest.FunSuite
 
 import org.apache.spark.SparkContext
-import org.apache.spark.SparkContext._
 import org.apache.spark.mllib.util.LinearDataGenerator
 
 class LinearRegressionSuite extends FunSuite with BeforeAndAfterAll {
@@ -37,10 +36,10 @@ class LinearRegressionSuite extends FunSuite with BeforeAndAfterAll {
   }
 
   def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
-    val numOffPredictions = predictions.zip(input).filter { case (prediction, expected) =>
+    val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
       // A prediction is off if the prediction is more than 0.5 away from expected value.
       math.abs(prediction - expected.label) > 0.5
-    }.size
+    }
     // At least 80% of the predictions should be on.
     assert(numOffPredictions < input.length / 5)
   }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
index c18092d804fa3..1d6a10b66e892 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
@@ -17,15 +17,12 @@
 
 package org.apache.spark.mllib.regression
 
-import scala.collection.JavaConversions._
-import scala.util.Random
 
 import org.jblas.DoubleMatrix
 import org.scalatest.BeforeAndAfterAll
 import org.scalatest.FunSuite
 
 import org.apache.spark.SparkContext
-import org.apache.spark.SparkContext._
 import org.apache.spark.mllib.util.LinearDataGenerator
 
 class RidgeRegressionSuite extends FunSuite with BeforeAndAfterAll {
diff --git a/new-yarn/pom.xml b/new-yarn/pom.xml
deleted file mode 100644
index 3310f04022e15..0000000000000
--- a/new-yarn/pom.xml
+++ /dev/null
@@ -1,177 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  ~ Licensed to the Apache Software Foundation (ASF) under one or more
-  ~ contributor license agreements.  See the NOTICE file distributed with
-  ~ this work for additional information regarding copyright ownership.
-  ~ The ASF licenses this file to You under the Apache License, Version 2.0
-  ~ (the "License"); you may not use this file except in compliance with
-  ~ the License.  You may obtain a copy of the License at
-  ~
-  ~    http://www.apache.org/licenses/LICENSE-2.0
-  ~
-  ~ Unless required by applicable law or agreed to in writing, software
-  ~ distributed under the License is distributed on an "AS IS" BASIS,
-  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  ~ See the License for the specific language governing permissions and
-  ~ limitations under the License.
-  -->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-  <parent>
-    <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent</artifactId>
-    <version>0.8.2-incubating-SNAPSHOT</version>
-    <relativePath>../pom.xml</relativePath>
-  </parent>
-
-  <groupId>org.apache.spark</groupId>
-  <artifactId>spark-yarn_2.9.3</artifactId>
-  <packaging>jar</packaging>
-  <name>Spark Project YARN Support</name>
-  <url>http://spark.incubator.apache.org/</url>
-
-  <dependencies>
-    <dependency>
-      <groupId>${akka.group}</groupId>
-      <artifactId>akka-actor</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>${akka.group}</groupId>
-      <artifactId>akka-remote</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>${akka.group}</groupId>
-      <artifactId>akka-slf4j</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>${akka.group}</groupId>
-      <artifactId>akka-zeromq</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_2.9.3</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-yarn-api</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-yarn-common</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-yarn-client</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-client</artifactId>
-      <version>${yarn.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.avro</groupId>
-      <artifactId>avro</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.avro</groupId>
-      <artifactId>avro-ipc</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_2.9.3</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.mockito</groupId>
-      <artifactId>mockito-all</artifactId>
-      <scope>test</scope>
-    </dependency>
-  </dependencies>
-
-  <build>
-    <outputDirectory>target/scala-${scala.version}/classes</outputDirectory>
-    <testOutputDirectory>target/scala-${scala.version}/test-classes</testOutputDirectory>
-    <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-shade-plugin</artifactId>
-        <configuration>
-          <shadedArtifactAttached>false</shadedArtifactAttached>
-          <outputFile>${project.build.directory}/${project.artifactId}-${project.version}-shaded.jar</outputFile>
-          <artifactSet>
-            <includes>
-              <include>*:*</include>
-            </includes>
-          </artifactSet>
-          <filters>
-            <filter>
-              <artifact>*:*</artifact>
-              <excludes>
-                <exclude>META-INF/*.SF</exclude>
-                <exclude>META-INF/*.DSA</exclude>
-                <exclude>META-INF/*.RSA</exclude>
-              </excludes>
-            </filter>
-          </filters>
-        </configuration>
-        <executions>
-          <execution>
-            <phase>package</phase>
-            <goals>
-              <goal>shade</goal>
-            </goals>
-            <configuration>
-              <transformers>
-                <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
-                <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
-                  <resource>reference.conf</resource>
-                </transformer>
-              </transformers>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-antrun-plugin</artifactId>
-        <executions>
-          <execution>
-            <phase>test</phase>
-            <goals>
-              <goal>run</goal>
-            </goals>
-            <configuration>
-              <exportAntProperties>true</exportAntProperties>
-              <tasks>
-                <property name="spark.classpath" refid="maven.test.classpath" />
-                <property environment="env" />
-                <fail message="Please set the SCALA_HOME (or SCALA_LIBRARY_PATH if scala is on the path) environment variables and retry.">  
-                  <condition>
-                    <not>
-                      <or>
-                        <isset property="env.SCALA_HOME" />
-                        <isset property="env.SCALA_LIBRARY_PATH" />
-                      </or>
-                    </not>
-                  </condition>
-                </fail>
-              </tasks>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-      <plugin>
-        <groupId>org.scalatest</groupId>
-        <artifactId>scalatest-maven-plugin</artifactId>
-        <configuration>
-          <environmentVariables>
-            <SPARK_HOME>${basedir}/..</SPARK_HOME>
-            <SPARK_TESTING>1</SPARK_TESTING>
-            <SPARK_CLASSPATH>${spark.classpath}</SPARK_CLASSPATH>
-          </environmentVariables>
-        </configuration>
-      </plugin>
-    </plugins>
-  </build>
-</project>
diff --git a/new-yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala b/new-yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala
deleted file mode 100644
index 5f159b073f537..0000000000000
--- a/new-yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala
+++ /dev/null
@@ -1,228 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.deploy.yarn
-
-import java.net.URI
-
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.FileStatus
-import org.apache.hadoop.fs.FileSystem
-import org.apache.hadoop.fs.Path
-import org.apache.hadoop.fs.permission.FsAction
-import org.apache.hadoop.yarn.api.records.LocalResource
-import org.apache.hadoop.yarn.api.records.LocalResourceVisibility
-import org.apache.hadoop.yarn.api.records.LocalResourceType
-import org.apache.hadoop.yarn.util.{Records, ConverterUtils}
-
-import org.apache.spark.Logging 
-
-import scala.collection.mutable.HashMap
-import scala.collection.mutable.LinkedHashMap
-import scala.collection.mutable.Map
-
-
-/** Client side methods to setup the Hadoop distributed cache */
-class ClientDistributedCacheManager() extends Logging {
-  private val distCacheFiles: Map[String, Tuple3[String, String, String]] = 
-    LinkedHashMap[String, Tuple3[String, String, String]]()
-  private val distCacheArchives: Map[String, Tuple3[String, String, String]] = 
-    LinkedHashMap[String, Tuple3[String, String, String]]()
-
-
-  /**
-   * Add a resource to the list of distributed cache resources. This list can
-   * be sent to the ApplicationMaster and possibly the workers so that it can 
-   * be downloaded into the Hadoop distributed cache for use by this application.
-   * Adds the LocalResource to the localResources HashMap passed in and saves 
-   * the stats of the resources to they can be sent to the workers and verified.
-   *
-   * @param fs FileSystem
-   * @param conf Configuration
-   * @param destPath path to the resource
-   * @param localResources localResource hashMap to insert the resource into
-   * @param resourceType LocalResourceType 
-   * @param link link presented in the distributed cache to the destination
-   * @param statCache cache to store the file/directory stats 
-   * @param appMasterOnly Whether to only add the resource to the app master
-   */
-  def addResource(
-      fs: FileSystem,
-      conf: Configuration,
-      destPath: Path, 
-      localResources: HashMap[String, LocalResource],
-      resourceType: LocalResourceType,
-      link: String,
-      statCache: Map[URI, FileStatus],
-      appMasterOnly: Boolean = false) = {
-    val destStatus = fs.getFileStatus(destPath)
-    val amJarRsrc = Records.newRecord(classOf[LocalResource]).asInstanceOf[LocalResource]
-    amJarRsrc.setType(resourceType)
-    val visibility = getVisibility(conf, destPath.toUri(), statCache)
-    amJarRsrc.setVisibility(visibility)
-    amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(destPath))
-    amJarRsrc.setTimestamp(destStatus.getModificationTime())
-    amJarRsrc.setSize(destStatus.getLen())
-    if (link == null || link.isEmpty()) throw new Exception("You must specify a valid link name")
-    localResources(link) = amJarRsrc
-    
-    if (appMasterOnly == false) {
-      val uri = destPath.toUri()
-      val pathURI = new URI(uri.getScheme(), uri.getAuthority(), uri.getPath(), null, link)
-      if (resourceType == LocalResourceType.FILE) {
-        distCacheFiles(pathURI.toString()) = (destStatus.getLen().toString(), 
-          destStatus.getModificationTime().toString(), visibility.name())
-      } else {
-        distCacheArchives(pathURI.toString()) = (destStatus.getLen().toString(), 
-          destStatus.getModificationTime().toString(), visibility.name())
-      }
-    }
-  }
-
-  /**
-   * Adds the necessary cache file env variables to the env passed in
-   * @param env
-   */
-  def setDistFilesEnv(env: Map[String, String]) = {
-    val (keys, tupleValues) = distCacheFiles.unzip
-    val (sizes, timeStamps, visibilities) = tupleValues.unzip3
-
-    if (keys.size > 0) {
-      env("SPARK_YARN_CACHE_FILES") = keys.reduceLeft[String] { (acc,n) => acc + "," + n }
-      env("SPARK_YARN_CACHE_FILES_TIME_STAMPS") = 
-        timeStamps.reduceLeft[String] { (acc,n) => acc + "," + n }
-      env("SPARK_YARN_CACHE_FILES_FILE_SIZES") = 
-        sizes.reduceLeft[String] { (acc,n) => acc + "," + n }
-      env("SPARK_YARN_CACHE_FILES_VISIBILITIES") = 
-        visibilities.reduceLeft[String] { (acc,n) => acc + "," + n }
-    }
-  }
-
-  /**
-   * Adds the necessary cache archive env variables to the env passed in
-   * @param env
-   */
-  def setDistArchivesEnv(env: Map[String, String]) = {
-    val (keys, tupleValues) = distCacheArchives.unzip
-    val (sizes, timeStamps, visibilities) = tupleValues.unzip3
-
-    if (keys.size > 0) {
-      env("SPARK_YARN_CACHE_ARCHIVES") = keys.reduceLeft[String] { (acc,n) => acc + "," + n }
-      env("SPARK_YARN_CACHE_ARCHIVES_TIME_STAMPS") = 
-        timeStamps.reduceLeft[String] { (acc,n) => acc + "," + n }
-      env("SPARK_YARN_CACHE_ARCHIVES_FILE_SIZES") =
-        sizes.reduceLeft[String] { (acc,n) => acc + "," + n }
-      env("SPARK_YARN_CACHE_ARCHIVES_VISIBILITIES") = 
-        visibilities.reduceLeft[String] { (acc,n) => acc + "," + n }
-    }
-  }
-
-  /**
-   * Returns the local resource visibility depending on the cache file permissions
-   * @param conf
-   * @param uri
-   * @param statCache
-   * @return LocalResourceVisibility
-   */
-  def getVisibility(conf: Configuration, uri: URI, statCache: Map[URI, FileStatus]):
-      LocalResourceVisibility = {
-    if (isPublic(conf, uri, statCache)) {
-      return LocalResourceVisibility.PUBLIC 
-    } 
-    return LocalResourceVisibility.PRIVATE
-  }
-
-  /**
-   * Returns a boolean to denote whether a cache file is visible to all(public)
-   * or not
-   * @param conf
-   * @param uri
-   * @param statCache
-   * @return true if the path in the uri is visible to all, false otherwise
-   */
-  def isPublic(conf: Configuration, uri: URI, statCache: Map[URI, FileStatus]): Boolean = {
-    val fs = FileSystem.get(uri, conf)
-    val current = new Path(uri.getPath())
-    //the leaf level file should be readable by others
-    if (!checkPermissionOfOther(fs, current, FsAction.READ, statCache)) {
-      return false
-    }
-    return ancestorsHaveExecutePermissions(fs, current.getParent(), statCache)
-  }
-
-  /**
-   * Returns true if all ancestors of the specified path have the 'execute'
-   * permission set for all users (i.e. that other users can traverse
-   * the directory heirarchy to the given path)
-   * @param fs
-   * @param path
-   * @param statCache
-   * @return true if all ancestors have the 'execute' permission set for all users
-   */
-  def ancestorsHaveExecutePermissions(fs: FileSystem, path: Path, 
-      statCache: Map[URI, FileStatus]): Boolean =  {
-    var current = path
-    while (current != null) {
-      //the subdirs in the path should have execute permissions for others
-      if (!checkPermissionOfOther(fs, current, FsAction.EXECUTE, statCache)) {
-        return false
-      }
-      current = current.getParent()
-    }
-    return true
-  }
-
-  /**
-   * Checks for a given path whether the Other permissions on it 
-   * imply the permission in the passed FsAction
-   * @param fs
-   * @param path
-   * @param action
-   * @param statCache
-   * @return true if the path in the uri is visible to all, false otherwise
-   */
-  def checkPermissionOfOther(fs: FileSystem, path: Path,
-      action: FsAction, statCache: Map[URI, FileStatus]): Boolean = {
-    val status = getFileStatus(fs, path.toUri(), statCache)
-    val perms = status.getPermission()
-    val otherAction = perms.getOtherAction()
-    if (otherAction.implies(action)) {
-      return true
-    }
-    return false
-  }
-
-  /**
-   * Checks to see if the given uri exists in the cache, if it does it 
-   * returns the existing FileStatus, otherwise it stats the uri, stores
-   * it in the cache, and returns the FileStatus.
-   * @param fs
-   * @param uri
-   * @param statCache
-   * @return FileStatus
-   */
-  def getFileStatus(fs: FileSystem, uri: URI, statCache: Map[URI, FileStatus]): FileStatus = {
-    val stat = statCache.get(uri) match {
-      case Some(existstat) => existstat
-      case None => 
-        val newStat = fs.getFileStatus(new Path(uri))
-        statCache.put(uri, newStat)
-        newStat
-    }
-    return stat
-  }
-}
diff --git a/new-yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala b/new-yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
deleted file mode 100644
index b206780c7806e..0000000000000
--- a/new-yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.scheduler.cluster
-
-import org.apache.hadoop.yarn.api.records.{ApplicationId, YarnApplicationState}
-import org.apache.spark.{SparkException, Logging, SparkContext}
-import org.apache.spark.deploy.yarn.{Client, ClientArguments}
-
-private[spark] class YarnClientSchedulerBackend(
-    scheduler: ClusterScheduler,
-    sc: SparkContext)
-  extends CoarseGrainedSchedulerBackend(scheduler, sc.env.actorSystem)
-  with Logging {
-
-  var client: Client = null
-  var appId: ApplicationId = null
-
-  override def start() {
-    super.start()
-
-    val defalutWorkerCores = "2"
-    val defalutWorkerMemory = "512m"
-    val defaultWorkerNumber = "1"
-
-    val userJar = System.getenv("SPARK_YARN_APP_JAR")
-    var workerCores = System.getenv("SPARK_WORKER_CORES")
-    var workerMemory = System.getenv("SPARK_WORKER_MEMORY")
-    var workerNumber = System.getenv("SPARK_WORKER_INSTANCES")
-
-    if (userJar == null)
-      throw new SparkException("env SPARK_YARN_APP_JAR is not set")
-
-    if (workerCores == null)
-      workerCores = defalutWorkerCores
-    if (workerMemory == null)
-      workerMemory = defalutWorkerMemory
-    if (workerNumber == null)
-      workerNumber = defaultWorkerNumber
-
-    val driverHost = System.getProperty("spark.driver.host")
-    val driverPort = System.getProperty("spark.driver.port")
-    val hostport = driverHost + ":" + driverPort
-
-    val argsArray = Array[String](
-      "--class", "notused",
-      "--jar", userJar,
-      "--args", hostport,
-      "--worker-memory", workerMemory,
-      "--worker-cores", workerCores,
-      "--num-workers", workerNumber,
-      "--master-class", "org.apache.spark.deploy.yarn.WorkerLauncher"
-    )
-
-    val args = new ClientArguments(argsArray)
-    client = new Client(args)
-    appId = client.runApp()
-    waitForApp()
-  }
-
-  def waitForApp() {
-
-    // TODO : need a better way to find out whether the workers are ready or not
-    // maybe by resource usage report?
-    while(true) {
-      val report = client.getApplicationReport(appId)
-
-      logInfo("Application report from ASM: \n" +
-        "\t appMasterRpcPort: " + report.getRpcPort() + "\n" +
-        "\t appStartTime: " + report.getStartTime() + "\n" +
-        "\t yarnAppState: " + report.getYarnApplicationState() + "\n"
-      )
-
-      // Ready to go, or already gone.
-      val state = report.getYarnApplicationState()
-      if (state == YarnApplicationState.RUNNING) {
-        return
-      } else if (state == YarnApplicationState.FINISHED ||
-        state == YarnApplicationState.FAILED ||
-        state == YarnApplicationState.KILLED) {
-        throw new SparkException("Yarn application already ended," +
-          "might be killed or not able to launch application master.")
-      }
-
-      Thread.sleep(1000)
-    }
-  }
-
-  override def stop() {
-    super.stop()
-    client.stop()
-    logInfo("Stoped")
-  }
-
-}
diff --git a/pom.xml b/pom.xml
index 8d37720fe0993..b0cf8c39c6563 100644
--- a/pom.xml
+++ b/pom.xml
@@ -25,7 +25,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent</artifactId>
-  <version>0.8.2-candidate-csd-3-SNAPSHOT</version>
+  <version>0.9.1-csd-1-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.incubator.apache.org/</url>
@@ -37,9 +37,9 @@
     </license>
   </licenses>
   <scm>
-    <connection>scm:git:git@github.com:clearstorydata/incubator-spark.git</connection>
-    <url>scm:git:git@github.com:clearstorydata/incubator-spark.git</url>
-    <developerConnection>scm:git:git@github.com:clearstorydata/incubator-spark.git</developerConnection>
+    <connection>scm:git:git@github.com:apache/incubator-spark.git</connection>
+    <developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/incubator-spark.git</developerConnection>
+    <url>scm:git:git@github.com:apache/incubator-spark.git</url>
     <tag>HEAD</tag>
   </scm>
   <developers>
@@ -87,25 +87,33 @@
   <modules>
     <module>core</module>
     <module>bagel</module>
-    <module>examples</module>
+    <module>graphx</module>
     <module>mllib</module>
     <module>tools</module>
     <module>streaming</module>
     <module>repl</module>
     <module>assembly</module>
+    <module>external/twitter</module>
+    <module>external/kafka</module>
+    <module>external/flume</module>
+    <module>external/zeromq</module>
+    <module>external/mqtt</module>
+    <module>examples</module>
   </modules>
 
   <properties>
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
     <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
 
-    <java.version>1.5</java.version>
-    <scala.version>2.9.3</scala.version>
+    <java.version>1.6</java.version>
+
+    <scala.version>2.10.3</scala.version>
+    <scala.binary.version>2.10</scala.binary.version>
     <mesos.version>0.13.0</mesos.version>
-    <akka.group>com.typesafe.akka</akka.group>
-    <akka.version>2.0.5</akka.version>
-    <slf4j.version>1.7.5</slf4j.version>
+    <akka.group>org.spark-project.akka</akka.group>
+    <akka.version>2.2.3-shaded-protobuf</akka.version>
     <protobuf.version>2.4.1</protobuf.version>
+    <slf4j.version>1.7.2</slf4j.version>
     <log4j.version>1.2.17</log4j.version>
     <cdh.version>4.1.2</cdh.version>
     <hadoop.dist.suffix>cdh${cdh.version}</hadoop.dist.suffix>
@@ -119,9 +127,9 @@
 
   <repositories>
     <repository>
-      <id>jboss-repo</id>
-      <name>JBoss Repository</name>
-      <url>http://repository.jboss.org/nexus/content/repositories/releases/</url>
+      <id>maven-repo</id> <!-- This should be at top, it makes maven try the central repo first and then others and hence faster dep resolution -->
+      <name>Maven Repository</name>
+      <url>http://repo.maven.apache.org/maven2</url>
       <releases>
         <enabled>true</enabled>
       </releases>
@@ -130,9 +138,9 @@
       </snapshots>
     </repository>
     <repository>
-      <id>cloudera-repo</id>
-      <name>Cloudera Repository</name>
-      <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
+      <id>jboss-repo</id>
+      <name>JBoss Repository</name>
+      <url>http://repository.jboss.org/nexus/content/repositories/releases</url>
       <releases>
         <enabled>true</enabled>
       </releases>
@@ -141,210 +149,14 @@
       </snapshots>
     </repository>
     <repository>
-      <id>akka-repo</id>
-      <name>Akka Repository</name>
-      <url>http://repo.akka.io/releases/</url>
-      <releases>
-        <enabled>true</enabled>
-      </releases>
-      <snapshots>
-        <enabled>false</enabled>
-      </snapshots>
+      <id>cloudera-repo</id>
+      <name>Cloudera Repository</name>
+      <url>https://repository.cloudera.com/artifactory/cloudera-repos</url>
     </repository>
   </repositories>
-  <pluginRepositories>
-    <pluginRepository>
-      <id>oss-sonatype-releases</id>
-      <name>OSS Sonatype</name>
-      <url>https://oss.sonatype.org/content/repositories/releases</url>
-      <releases>
-        <enabled>true</enabled>
-      </releases>
-      <snapshots>
-        <enabled>false</enabled>
-      </snapshots>
-    </pluginRepository>
-    <pluginRepository>
-      <id>oss-sonatype-snapshots</id>
-      <name>OSS Sonatype</name>
-      <url>https://oss.sonatype.org/content/repositories/snapshots</url>
-      <releases>
-        <enabled>false</enabled>
-      </releases>
-      <snapshots>
-        <enabled>true</enabled>
-      </snapshots>
-    </pluginRepository>
-    <pluginRepository>
-      <id>oss-sonatype</id>
-      <name>OSS Sonatype</name>
-      <url>https://oss.sonatype.org/content/groups/public</url>
-      <releases>
-        <enabled>true</enabled>
-      </releases>
-      <snapshots>
-        <enabled>true</enabled>
-      </snapshots>
-    </pluginRepository>
-  </pluginRepositories>
-
-  <distributionManagement>
-    <snapshotRepository>
-      <id>${distRepo.snapshots.id}</id>
-      <url>${distRepo.snapshots.url}</url>
-    </snapshotRepository>
-    <repository>
-      <id>${distRepo.releases.id}</id>
-      <url>${distRepo.releases.url}</url>
-    </repository>
-  </distributionManagement>
 
   <dependencyManagement>
     <dependencies>
-      <dependency>
-        <groupId>commons-io</groupId>
-        <artifactId>commons-io</artifactId>
-        <version>2.4</version>
-      </dependency>
-      <dependency>
-        <groupId>org.apache.spark</groupId>
-        <artifactId>spark-core_2.9.3</artifactId>
-        <version>${project.version}</version>
-        <!-- Work around for akka override -->
-        <exclusions>
-          <exclusion>
-            <groupId>com.typesafe.akka</groupId>
-            <artifactId>akka-actor</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.typesafe.akka</groupId>
-            <artifactId>akka-remote</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.typesafe.akka</groupId>
-            <artifactId>akka-slf4j</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.typesafe.akka</groupId>
-            <artifactId>akka-zeromq</artifactId>
-          </exclusion>
-        </exclusions>
-      </dependency>
-      <dependency>
-        <groupId>org.apache.spark</groupId>
-        <artifactId>spark-bagel_2.9.3</artifactId>
-        <version>${project.version}</version>
-        <exclusions>
-          <exclusion>
-            <groupId>com.typesafe.akka</groupId>
-            <artifactId>akka-actor</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.typesafe.akka</groupId>
-            <artifactId>akka-remote</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.typesafe.akka</groupId>
-            <artifactId>akka-slf4j</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.typesafe.akka</groupId>
-            <artifactId>akka-zeromq</artifactId>
-          </exclusion>
-        </exclusions>
-      </dependency>
-      <dependency>
-        <groupId>org.apache.spark</groupId>
-        <artifactId>spark-mllib_2.9.3</artifactId>
-        <version>${project.version}</version>
-        <exclusions>
-          <exclusion>
-            <groupId>com.typesafe.akka</groupId>
-            <artifactId>akka-actor</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.typesafe.akka</groupId>
-            <artifactId>akka-remote</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.typesafe.akka</groupId>
-            <artifactId>akka-slf4j</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.typesafe.akka</groupId>
-            <artifactId>akka-zeromq</artifactId>
-          </exclusion>
-        </exclusions>
-      </dependency>
-      <dependency>
-        <groupId>org.apache.spark</groupId>
-        <artifactId>spark-repl_2.9.3</artifactId>
-        <version>${project.version}</version>
-        <exclusions>
-          <exclusion>
-            <groupId>com.typesafe.akka</groupId>
-            <artifactId>akka-actor</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.typesafe.akka</groupId>
-            <artifactId>akka-remote</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.typesafe.akka</groupId>
-            <artifactId>akka-slf4j</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.typesafe.akka</groupId>
-            <artifactId>akka-zeromq</artifactId>
-          </exclusion>
-        </exclusions>
-      </dependency>
-      <dependency>
-        <groupId>org.apache.spark</groupId>
-        <artifactId>spark-streaming_2.9.3</artifactId>
-        <version>${project.version}</version>
-        <exclusions>
-          <exclusion>
-            <groupId>com.typesafe.akka</groupId>
-            <artifactId>akka-actor</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.typesafe.akka</groupId>
-            <artifactId>akka-remote</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.typesafe.akka</groupId>
-            <artifactId>akka-slf4j</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.typesafe.akka</groupId>
-            <artifactId>akka-zeromq</artifactId>
-          </exclusion>
-        </exclusions>
-      </dependency>
-      <dependency>
-        <groupId>org.apache.spark</groupId>
-        <artifactId>spark-yarn_2.9.3</artifactId>
-        <version>${project.version}</version>
-        <exclusions>
-          <exclusion>
-            <groupId>com.typesafe.akka</groupId>
-            <artifactId>akka-actor</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.typesafe.akka</groupId>
-            <artifactId>akka-remote</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.typesafe.akka</groupId>
-            <artifactId>akka-slf4j</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.typesafe.akka</groupId>
-            <artifactId>akka-zeromq</artifactId>
-          </exclusion>
-        </exclusions>
-      </dependency>
       <dependency>
         <groupId>org.eclipse.jetty</groupId>
         <artifactId>jetty-server</artifactId>
@@ -358,7 +170,7 @@
       <dependency>
         <groupId>com.google.code.findbugs</groupId>
         <artifactId>jsr305</artifactId>
-        <version>2.0.1</version>
+        <version>1.3.9</version>
       </dependency>
       <dependency>
         <groupId>org.slf4j</groupId>
@@ -375,15 +187,10 @@
         <artifactId>jul-to-slf4j</artifactId>
         <version>${slf4j.version}</version>
       </dependency>
-      <dependency>
-        <groupId>commons-daemon</groupId>
-        <artifactId>commons-daemon</artifactId>
-        <version>1.0.10</version>
-      </dependency>
       <dependency>
         <groupId>com.ning</groupId>
         <artifactId>compress-lzf</artifactId>
-        <version>0.8.4</version>
+        <version>1.0.0</version>
       </dependency>
       <dependency>
         <groupId>org.xerial.snappy</groupId>
@@ -395,6 +202,16 @@
         <artifactId>asm</artifactId>
         <version>4.0</version>
       </dependency>
+      <dependency>
+        <groupId>com.clearspring.analytics</groupId>
+        <artifactId>stream</artifactId>
+        <version>2.4.0</version>
+      </dependency>
+      <!-- In theory we need not directly depend on protobuf since Spark does not directly
+           use it. However, when building with Hadoop/YARN 2.2 Maven doesn't correctly bump
+           the protobuf version up from the one Mesos gives. For now we include this variable
+           to explicitly bump the version when building with YARN. It would be nice to figure
+           out why Maven can't resolve this correctly (like SBT does). -->
       <dependency>
         <groupId>com.google.protobuf</groupId>
         <artifactId>protobuf-java</artifactId>
@@ -402,7 +219,7 @@
       </dependency>
       <dependency>
         <groupId>com.twitter</groupId>
-        <artifactId>chill_2.9.3</artifactId>
+        <artifactId>chill_${scala.binary.version}</artifactId>
         <version>0.3.1</version>
       </dependency>
       <dependency>
@@ -412,22 +229,40 @@
       </dependency>
       <dependency>
         <groupId>${akka.group}</groupId>
-        <artifactId>akka-actor</artifactId>
+        <artifactId>akka-actor_${scala.binary.version}</artifactId>
         <version>${akka.version}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>org.jboss.netty</groupId>
+            <artifactId>netty</artifactId>
+          </exclusion>
+        </exclusions>
       </dependency>
       <dependency>
         <groupId>${akka.group}</groupId>
-        <artifactId>akka-remote</artifactId>
+        <artifactId>akka-remote_${scala.binary.version}</artifactId>
         <version>${akka.version}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>org.jboss.netty</groupId>
+            <artifactId>netty</artifactId>
+          </exclusion>
+        </exclusions>
       </dependency>
       <dependency>
         <groupId>${akka.group}</groupId>
-        <artifactId>akka-slf4j</artifactId>
+        <artifactId>akka-slf4j_${scala.binary.version}</artifactId>
         <version>${akka.version}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>org.jboss.netty</groupId>
+            <artifactId>netty</artifactId>
+          </exclusion>
+        </exclusions>
       </dependency>
       <dependency>
         <groupId>${akka.group}</groupId>
-        <artifactId>akka-zeromq</artifactId>
+        <artifactId>akka-testkit_${scala.binary.version}</artifactId>
         <version>${akka.version}</version>
         <exclusions>
           <exclusion>
@@ -454,7 +289,7 @@
       <dependency>
         <groupId>io.netty</groupId>
         <artifactId>netty-all</artifactId>
-        <version>4.0.0.Beta2</version>
+        <version>4.0.13.Final</version>
       </dependency>
       <dependency>
         <groupId>org.apache.derby</groupId>
@@ -464,8 +299,14 @@
       </dependency>
       <dependency>
         <groupId>net.liftweb</groupId>
-        <artifactId>lift-json_2.9.2</artifactId>
-        <version>2.5</version>
+        <artifactId>lift-json_${scala.binary.version}</artifactId>
+        <version>2.5.1</version>
+        <exclusions>
+          <exclusion>
+            <groupId>org.scala-lang</groupId>
+            <artifactId>scalap</artifactId>
+          </exclusion>
+        </exclusions>
       </dependency>
       <dependency>
         <groupId>com.codahale.metrics</groupId>
@@ -487,6 +328,11 @@
         <artifactId>metrics-ganglia</artifactId>
         <version>3.0.0</version>
       </dependency>
+      <dependency>
+        <groupId>com.codahale.metrics</groupId>
+        <artifactId>metrics-graphite</artifactId>
+        <version>3.0.0</version>
+      </dependency>
       <dependency>
         <groupId>org.scala-lang</groupId>
         <artifactId>scala-compiler</artifactId>
@@ -502,24 +348,28 @@
         <artifactId>scala-library</artifactId>
         <version>${scala.version}</version>
       </dependency>
-      <dependency>
-        <groupId>org.scala-lang</groupId>
-        <artifactId>scalap</artifactId>
-        <version>${scala.version}</version>
-      </dependency>
-
       <dependency>
         <groupId>log4j</groupId>
         <artifactId>log4j</artifactId>
         <version>${log4j.version}</version>
       </dependency>
-
       <dependency>
         <groupId>org.scalatest</groupId>
-        <artifactId>scalatest_2.9.3</artifactId>
+        <artifactId>scalatest_${scala.binary.version}</artifactId>
         <version>1.9.1</version>
         <scope>test</scope>
       </dependency>
+      <dependency>
+          <groupId>org.mockito</groupId>
+          <artifactId>mockito-all</artifactId>
+          <scope>test</scope>
+          <version>1.8.5</version>
+      </dependency>
+      <dependency>
+        <groupId>commons-io</groupId>
+        <artifactId>commons-io</artifactId>
+        <version>2.4</version>
+      </dependency>
       <dependency>
         <groupId>org.easymock</groupId>
         <artifactId>easymock</artifactId>
@@ -534,20 +384,20 @@
       </dependency>
       <dependency>
         <groupId>org.scalacheck</groupId>
-        <artifactId>scalacheck_2.9.3</artifactId>
+        <artifactId>scalacheck_${scala.binary.version}</artifactId>
         <version>1.10.0</version>
         <scope>test</scope>
       </dependency>
       <dependency>
         <groupId>com.novocode</groupId>
         <artifactId>junit-interface</artifactId>
-        <version>0.9</version>
+        <version>0.10</version>
         <scope>test</scope>
       </dependency>
       <dependency>
         <groupId>org.apache.zookeeper</groupId>
         <artifactId>zookeeper</artifactId>
-        <version>3.4.3-${hadoop.dist.suffix}</version>
+        <version>3.4.5</version>
         <exclusions>
           <exclusion>
             <groupId>org.jboss.netty</groupId>
@@ -570,19 +420,11 @@
           </exclusion>
           <exclusion>
             <groupId>org.codehaus.jackson</groupId>
-            <artifactId>jackson-core-asl</artifactId>
+            <artifactId>*</artifactId>
           </exclusion>
           <exclusion>
-            <groupId>org.codehaus.jackson</groupId>
-            <artifactId>jackson-mapper-asl</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.codehaus.jackson</groupId>
-            <artifactId>jackson-jaxrs</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.codehaus.jackson</groupId>
-            <artifactId>jackson-xc</artifactId>
+            <groupId>org.sonatype.sisu.inject</groupId>
+            <artifactId>*</artifactId>
           </exclusion>
         </exclusions>
       </dependency>
@@ -606,19 +448,11 @@
           </exclusion>
           <exclusion>
             <groupId>org.codehaus.jackson</groupId>
-            <artifactId>jackson-core-asl</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.codehaus.jackson</groupId>
-            <artifactId>jackson-mapper-asl</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.codehaus.jackson</groupId>
-            <artifactId>jackson-jaxrs</artifactId>
+            <artifactId>*</artifactId>
           </exclusion>
           <exclusion>
-            <groupId>org.codehaus.jackson</groupId>
-            <artifactId>jackson-xc</artifactId>
+            <groupId>org.sonatype.sisu.inject</groupId>
+            <artifactId>*</artifactId>
           </exclusion>
         </exclusions>
       </dependency>
@@ -637,22 +471,15 @@
           </exclusion>
           <exclusion>
             <groupId>org.codehaus.jackson</groupId>
-            <artifactId>jackson-core-asl</artifactId>
+            <artifactId>*</artifactId>
           </exclusion>
           <exclusion>
-            <groupId>org.codehaus.jackson</groupId>
-            <artifactId>jackson-mapper-asl</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.codehaus.jackson</groupId>
-            <artifactId>jackson-jaxrs</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.codehaus.jackson</groupId>
-            <artifactId>jackson-xc</artifactId>
+            <groupId>org.sonatype.sisu.inject</groupId>
+            <artifactId>*</artifactId>
           </exclusion>
         </exclusions>
       </dependency>
+
       <dependency>
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-yarn-client</artifactId>
@@ -668,19 +495,11 @@
           </exclusion>
           <exclusion>
             <groupId>org.codehaus.jackson</groupId>
-            <artifactId>jackson-core-asl</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.codehaus.jackson</groupId>
-            <artifactId>jackson-mapper-asl</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.codehaus.jackson</groupId>
-            <artifactId>jackson-jaxrs</artifactId>
+            <artifactId>*</artifactId>
           </exclusion>
           <exclusion>
-            <groupId>org.codehaus.jackson</groupId>
-            <artifactId>jackson-xc</artifactId>
+            <groupId>org.sonatype.sisu.inject</groupId>
+            <artifactId>*</artifactId>
           </exclusion>
         </exclusions>
       </dependency>
@@ -699,6 +518,10 @@
             <groupId>org.jboss.netty</groupId>
             <artifactId>netty</artifactId>
           </exclusion>
+          <exclusion>
+            <groupId>io.netty</groupId>
+            <artifactId>netty</artifactId>
+          </exclusion>
         </exclusions>
       </dependency>
     </dependencies>
@@ -906,8 +729,16 @@
   </build>
 
   <profiles>
+     <profile>
+      <!-- Without this the release build incorrectly includes the 
+          test-jar dependency and it breaks. -->
+      <id>release</id>
+      <properties>
+        <maven.test.skip>true</maven.test.skip>
+      </properties>
+    </profile>
     <profile>
-      <id>hadoop2-yarn</id>
+      <id>yarn-alpha</id>
       <properties>
         <hadoop.major.version>2</hadoop.major.version>
         <!-- 0.23.* is same as 2.0.* - except hardened to run production jobs -->
@@ -919,58 +750,19 @@
         <module>yarn</module>
       </modules>
 
-      <repositories>
-        <repository>
-          <id>maven-root</id>
-          <name>Maven root repository</name>
-          <url>http://repo1.maven.org/maven2/</url>
-          <releases>
-            <enabled>true</enabled>
-          </releases>
-          <snapshots>
-            <enabled>false</enabled>
-          </snapshots>
-        </repository>
-      </repositories>
-
-      <dependencyManagement>
-        <dependencies>
-        </dependencies>
-      </dependencyManagement>
     </profile>
 
     <profile>
-      <id>new-yarn</id>
+      <id>yarn</id>
       <properties>
-        <akka.group>org.spark-project</akka.group>
-        <akka.version>2.0.5-protobuf-2.5-java-1.5</akka.version>
         <hadoop.major.version>2</hadoop.major.version>
         <hadoop.version>2.2.0</hadoop.version>
         <protobuf.version>2.5.0</protobuf.version>
       </properties>
-
       <modules>
-        <module>new-yarn</module>
+        <module>yarn</module>
       </modules>
 
-      <repositories>
-        <repository>
-          <id>maven-root</id>
-          <name>Maven root repository</name>
-          <url>http://repo1.maven.org/maven2/</url>
-          <releases>
-            <enabled>true</enabled>
-          </releases>
-          <snapshots>
-            <enabled>false</enabled>
-          </snapshots>
-        </repository>
-      </repositories>
-
-      <dependencyManagement>
-        <dependencies>
-        </dependencies>
-      </dependencyManagement>
     </profile>
   </profiles>
 </project>
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 540de6dc9c2a7..b8f901d71a56a 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -20,6 +20,7 @@ import sbt.Classpaths.publishTask
 import Keys._
 import sbtassembly.Plugin._
 import AssemblyKeys._
+import scala.util.Properties
 // For Sonatype publishing
 //import com.jsuereth.pgp.sbtplugin.PgpKeys._
 
@@ -39,65 +40,95 @@ object SparkBuild extends Build {
   val HBASE_VERSION = "0.94.6"
 
   // Target JVM version
-  val SCALAC_JVM_VERSION = "jvm-1.5"
-  val JAVAC_JVM_VERSION = "1.5"
+  val SCALAC_JVM_VERSION = "jvm-1.6"
+  val JAVAC_JVM_VERSION = "1.6"
 
   lazy val root = Project("root", file("."), settings = rootSettings) aggregate(allProjects: _*)
 
   lazy val core = Project("core", file("core"), settings = coreSettings)
 
   lazy val repl = Project("repl", file("repl"), settings = replSettings)
-    .dependsOn(core, bagel, mllib)
-
-  lazy val examples = Project("examples", file("examples"), settings = examplesSettings)
-    .dependsOn(core, mllib, bagel, streaming)
+    .dependsOn(core, graphx, bagel, mllib)
 
   lazy val tools = Project("tools", file("tools"), settings = toolsSettings) dependsOn(core) dependsOn(streaming)
 
   lazy val bagel = Project("bagel", file("bagel"), settings = bagelSettings) dependsOn(core)
 
+  lazy val graphx = Project("graphx", file("graphx"), settings = graphxSettings) dependsOn(core)
+
   lazy val streaming = Project("streaming", file("streaming"), settings = streamingSettings) dependsOn(core)
 
   lazy val mllib = Project("mllib", file("mllib"), settings = mllibSettings) dependsOn(core)
 
   lazy val assemblyProj = Project("assembly", file("assembly"), settings = assemblyProjSettings)
-    .dependsOn(core, bagel, mllib, repl, streaming) dependsOn(maybeYarn: _*)
+    .dependsOn(core, graphx, bagel, mllib, repl, streaming) dependsOn(maybeYarn: _*)
+
+  lazy val assembleDeps = TaskKey[Unit]("assemble-deps", "Build assembly of dependencies and packages Spark projects")
+
+  // A dummy command so we can run the Jenkins pull request builder for older versions of Spark.
+  lazy val scalastyle = TaskKey[Unit]("scalastyle", "Dummy scalastyle check")
+  val scalastyleTask = scalastyle := {
+    println("scalastyle is not configured for this version of Spark project.")
+  }
 
   // A configuration to set an alternative publishLocalConfiguration
   lazy val MavenCompile = config("m2r") extend(Compile)
   lazy val publishLocalBoth = TaskKey[Unit]("publish-local", "publish local for m2 and ivy")
+  val sparkHome = System.getProperty("user.dir")
 
   // Allows build configuration to be set through environment variables
-  lazy val hadoopVersion = scala.util.Properties.envOrElse("SPARK_HADOOP_VERSION", DEFAULT_HADOOP_VERSION)
-  lazy val isNewHadoop = scala.util.Properties.envOrNone("SPARK_IS_NEW_HADOOP") match {
+  lazy val hadoopVersion = Properties.envOrElse("SPARK_HADOOP_VERSION", DEFAULT_HADOOP_VERSION)
+  lazy val isNewHadoop = Properties.envOrNone("SPARK_IS_NEW_HADOOP") match {
     case None => {
       val isNewHadoopVersion = "2.[2-9]+".r.findFirstIn(hadoopVersion).isDefined
       (isNewHadoopVersion|| DEFAULT_IS_NEW_HADOOP)
     }
     case Some(v) => v.toBoolean
   }
-  lazy val isYarnEnabled = scala.util.Properties.envOrNone("SPARK_YARN") match {
+
+  lazy val isYarnEnabled = Properties.envOrNone("SPARK_YARN") match {
     case None => DEFAULT_YARN
     case Some(v) => v.toBoolean
   }
-
-  // Build against a protobuf-2.5 compatible Akka if Hadoop 2 is used.
-  lazy val protobufVersion = if (isNewHadoop) "2.5.0" else "2.4.1"
-  lazy val akkaVersion = if (isNewHadoop) "2.0.5-protobuf-2.5-java-1.5" else "2.0.5"
-  lazy val akkaGroup = if (isNewHadoop) "org.spark-project" else "com.typesafe.akka"
+  lazy val hadoopClient = if (hadoopVersion.startsWith("0.20.") || hadoopVersion == "1.0.0") "hadoop-core" else "hadoop-client" 
 
   // Conditionally include the yarn sub-project
-  lazy val yarn = Project("yarn", file(if (isNewHadoop) "new-yarn" else "yarn"), settings = yarnSettings) dependsOn(core)
-  lazy val maybeYarn = if (isYarnEnabled) Seq[ClasspathDependency](yarn) else Seq[ClasspathDependency]()
-  lazy val maybeYarnRef = if (isYarnEnabled) Seq[ProjectReference](yarn) else Seq[ProjectReference]()
+  lazy val yarnAlpha = Project("yarn-alpha", file("yarn/alpha"), settings = yarnAlphaSettings) dependsOn(core)
+  lazy val yarn = Project("yarn", file("yarn/stable"), settings = yarnSettings) dependsOn(core)
+
+  lazy val maybeYarn = if (isYarnEnabled) Seq[ClasspathDependency](if (isNewHadoop) yarn else yarnAlpha) else Seq[ClasspathDependency]()
+  lazy val maybeYarnRef = if (isYarnEnabled) Seq[ProjectReference](if (isNewHadoop) yarn else yarnAlpha) else Seq[ProjectReference]()
+
+  lazy val externalTwitter = Project("external-twitter", file("external/twitter"), settings = twitterSettings)
+    .dependsOn(streaming % "compile->compile;test->test")
+
+  lazy val externalKafka = Project("external-kafka", file("external/kafka"), settings = kafkaSettings)
+    .dependsOn(streaming % "compile->compile;test->test")
+
+  lazy val externalFlume = Project("external-flume", file("external/flume"), settings = flumeSettings)
+    .dependsOn(streaming % "compile->compile;test->test")
+
+  lazy val externalZeromq = Project("external-zeromq", file("external/zeromq"), settings = zeromqSettings)
+    .dependsOn(streaming % "compile->compile;test->test")
+
+  lazy val externalMqtt = Project("external-mqtt", file("external/mqtt"), settings = mqttSettings)
+    .dependsOn(streaming % "compile->compile;test->test")
+
+  lazy val allExternal = Seq[ClasspathDependency](externalTwitter, externalKafka, externalFlume, externalZeromq, externalMqtt)
+  lazy val allExternalRefs = Seq[ProjectReference](externalTwitter, externalKafka, externalFlume, externalZeromq, externalMqtt)
 
-  lazy val allProjects = Seq[ProjectReference](
-    core, repl, examples, bagel, streaming, mllib, tools, assemblyProj) ++ maybeYarnRef
+  lazy val examples = Project("examples", file("examples"), settings = examplesSettings)
+    .dependsOn(core, mllib, graphx, bagel, streaming, externalTwitter) dependsOn(allExternal: _*)
+
+  // Everything except assembly, tools and examples belong to packageProjects
+  lazy val packageProjects = Seq[ProjectReference](core, repl, bagel, streaming, mllib, graphx) ++ maybeYarnRef
+
+  lazy val allProjects = packageProjects ++ allExternalRefs ++ Seq[ProjectReference](examples, tools, assemblyProj)
 
   def sharedSettings = Defaults.defaultSettings ++ Seq(
-    organization := "org.apache.spark",
-    version := "0.8.2-incubating-SNAPSHOT",
-    scalaVersion := "2.9.3",
+    organization       := "org.apache.spark",
+    version            := "0.9.0-incubating",
+    scalaVersion       := "2.10.3",
     scalacOptions := Seq("-Xmax-classfile-name", "120", "-unchecked", "-deprecation",
       "-target:" + SCALAC_JVM_VERSION),
     javacOptions := Seq("-target", JAVAC_JVM_VERSION, "-source", JAVAC_JVM_VERSION),
@@ -109,7 +140,18 @@ object SparkBuild extends Build {
 
     // Fork new JVMs for tests and set Java options for those
     fork := true,
+    javaOptions in Test += "-Dspark.home=" + sparkHome,
+    javaOptions in Test += "-Dspark.testing=1",
     javaOptions += "-Xmx3g",
+    // Show full stack trace and duration in test cases.
+    testOptions in Test += Tests.Argument("-oDF"),
+    // Remove certain packages from Scaladoc
+    scalacOptions in (Compile,doc) := Seq("-skip-packages", Seq(
+      "akka",
+      "org.apache.spark.network",
+      "org.apache.spark.deploy",
+      "org.apache.spark.util.collection"
+      ).mkString(":")),
 
     // Only allow one test at a time, even across projects, since they run in the same JVM
     concurrentRestrictions in Global += Tags.limit(Tags.Test, 1),
@@ -117,9 +159,6 @@ object SparkBuild extends Build {
     // also check the local Maven repository ~/.m2
     resolvers ++= Seq(Resolver.file("Local Maven Repo", file(Path.userHome + "/.m2/repository"))),
 
-    // Shared between both core and streaming.
-    resolvers ++= Seq("Akka Repository" at "http://repo.akka.io/releases/"),
-
     // For Sonatype publishing
     resolvers ++= Seq("sonatype-snapshots" at "https://oss.sonatype.org/content/repositories/snapshots",
       "sonatype-staging" at "https://oss.sonatype.org/service/local/staging/deploy/maven2/"),
@@ -162,7 +201,7 @@ object SparkBuild extends Build {
       </issueManagement>
     ),
 
-/*
+    /*
     publishTo <<= version { (v: String) =>
       val nexus = "https://oss.sonatype.org/"
       if (v.trim.endsWith("SNAPSHOT"))
@@ -171,17 +210,23 @@ object SparkBuild extends Build {
         Some("sonatype-staging"  at nexus + "service/local/staging/deploy/maven2")
     },
 
-*/
-
+    */
 
     libraryDependencies ++= Seq(
-      "org.eclipse.jetty" % "jetty-server" % "7.6.8.v20121106",
-      "org.scalatest" %% "scalatest" % "1.9.1" % "test",
-      "org.scalacheck" %% "scalacheck" % "1.10.0" % "test",
-      "com.novocode" % "junit-interface" % "0.9" % "test",
-      "org.easymock" % "easymock" % "3.1" % "test",
-      "org.mockito" % "mockito-all" % "1.8.5" % "test"
+        "io.netty"          % "netty-all"       % "4.0.13.Final",
+        "org.eclipse.jetty" % "jetty-server"    % "7.6.8.v20121106",
+        /** Workaround for SPARK-959. Dependency used by org.eclipse.jetty. Fixed in ivy 2.3.0. */
+        "org.eclipse.jetty.orbit" % "javax.servlet" % "2.5.0.v201103041518" artifacts Artifact("javax.servlet", "jar", "jar"),
+        "org.scalatest"    %% "scalatest"       % "1.9.1"  % "test",
+        "org.scalacheck"   %% "scalacheck"      % "1.10.0" % "test",
+        "com.novocode"      % "junit-interface" % "0.10"   % "test",
+        "org.easymock"      % "easymock"        % "3.1"    % "test",
+        "org.mockito"       % "mockito-all"     % "1.8.5"  % "test",
+        "commons-io"        % "commons-io"      % "2.4"    % "test"
     ),
+
+    testOptions += Tests.Argument(TestFrameworks.JUnit, "-v", "-a"),
+    parallelExecution := true,
     /* Workaround for issue #206 (fixed after SBT 0.11.0) */
     watchTransitiveSources <<= Defaults.inDependencies[Task[Seq[File]]](watchSources.task,
       const(std.TaskExtra.constant(Nil)), aggregate = true, includeRoot = true) apply { _.join.map(_.flatten) },
@@ -206,61 +251,62 @@ object SparkBuild extends Build {
   def coreSettings = sharedSettings ++ Seq(
     name := "spark-core",
     resolvers ++= Seq(
-      "JBoss Repository" at "http://repository.jboss.org/nexus/content/repositories/releases/",
-      "Cloudera Repository" at "https://repository.cloudera.com/artifactory/cloudera-repos/"
+       "JBoss Repository"     at "http://repository.jboss.org/nexus/content/repositories/releases/",
+       "Cloudera Repository"  at "https://repository.cloudera.com/artifactory/cloudera-repos/"
     ),
 
     libraryDependencies ++= Seq(
-      "com.google.guava" % "guava" % "14.0.1",
-      "com.google.code.findbugs" % "jsr305" % "1.3.9",
-      "log4j" % "log4j" % "1.2.17",
-      "org.slf4j" % "slf4j-api" % slf4jVersion,
-      "org.slf4j" % "slf4j-log4j12" % slf4jVersion,
-      "commons-daemon" % "commons-daemon" % "1.0.10",  // workaround for bug HADOOP-9407
-      "com.ning" % "compress-lzf" % "0.8.4",
-      "org.xerial.snappy" % "snappy-java" % "1.0.5",
-      "org.ow2.asm" % "asm" % "4.0",
-      "com.google.protobuf" % "protobuf-java" % protobufVersion,
-      akkaGroup % "akka-actor" % akkaVersion excludeAll(excludeNetty),
-      akkaGroup % "akka-remote" % akkaVersion excludeAll(excludeNetty),
-      akkaGroup % "akka-slf4j" % akkaVersion excludeAll(excludeNetty),
-      "it.unimi.dsi" % "fastutil" % "6.4.4",
-      "colt" % "colt" % "1.2.0",
-      "net.liftweb" % "lift-json_2.9.2" % "2.5",
-      "org.apache.mesos" % "mesos" % "0.13.0",
-      "io.netty" % "netty-all" % "4.0.0.Beta2",
-      "org.apache.derby" % "derby" % "10.4.2.0" % "test",
-      "org.apache.hadoop" % "hadoop-client" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm, excludeCglib),
-      "net.java.dev.jets3t" % "jets3t" % "0.7.1",
-      "org.apache.avro" % "avro" % "1.7.4",
-      "org.apache.avro" % "avro-ipc" % "1.7.4" excludeAll(excludeNetty),
-      "org.apache.zookeeper" % "zookeeper" % "3.4.5" excludeAll(excludeNetty),
-      "com.codahale.metrics" % "metrics-core" % "3.0.0",
-      "com.codahale.metrics" % "metrics-jvm" % "3.0.0",
-      "com.codahale.metrics" % "metrics-json" % "3.0.0",
-      "com.codahale.metrics" % "metrics-ganglia" % "3.0.0",
-      "com.twitter" % "chill_2.9.3" % "0.3.1",
-      "com.twitter" % "chill-java" % "0.3.1"
-    )
+        "com.google.guava"         % "guava"            % "14.0.1",
+        "com.google.code.findbugs" % "jsr305"           % "1.3.9",
+        "log4j"                    % "log4j"            % "1.2.17",
+        "org.slf4j"                % "slf4j-api"        % slf4jVersion,
+        "org.slf4j"                % "slf4j-log4j12"    % slf4jVersion,
+        "commons-daemon"           % "commons-daemon"   % "1.0.10", // workaround for bug HADOOP-9407
+        "com.ning"                 % "compress-lzf"     % "1.0.0",
+        "org.xerial.snappy"        % "snappy-java"      % "1.0.5",
+        "org.ow2.asm"              % "asm"              % "4.0",
+        "org.spark-project.akka"  %% "akka-remote"      % "2.2.3-shaded-protobuf"  excludeAll(excludeNetty),
+        "org.spark-project.akka"  %% "akka-slf4j"       % "2.2.3-shaded-protobuf"  excludeAll(excludeNetty),
+        "org.spark-project.akka"  %% "akka-testkit"     % "2.2.3-shaded-protobuf" % "test",
+        "net.liftweb"             %% "lift-json"        % "2.5.1"  excludeAll(excludeNetty),
+        "it.unimi.dsi"             % "fastutil"         % "6.4.4",
+        "colt"                     % "colt"             % "1.2.0",
+        "org.apache.mesos"         % "mesos"            % "0.13.0",
+        "net.java.dev.jets3t"      % "jets3t"           % "0.7.1",
+        "org.apache.derby"         % "derby"            % "10.4.2.0"                     % "test",
+        "org.apache.hadoop"        % hadoopClient       % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm, excludeCglib),
+        "org.apache.avro"          % "avro"             % "1.7.4",
+        "org.apache.avro"          % "avro-ipc"         % "1.7.4" excludeAll(excludeNetty),
+        "org.apache.zookeeper"     % "zookeeper"        % "3.4.5" excludeAll(excludeNetty),
+        "com.codahale.metrics"     % "metrics-core"     % "3.0.0",
+        "com.codahale.metrics"     % "metrics-jvm"      % "3.0.0",
+        "com.codahale.metrics"     % "metrics-json"     % "3.0.0",
+        "com.codahale.metrics"     % "metrics-ganglia"  % "3.0.0",
+        "com.codahale.metrics"     % "metrics-graphite" % "3.0.0",
+        "com.twitter"             %% "chill"            % "0.3.1",
+        "com.twitter"              % "chill-java"       % "0.3.1",
+        "com.clearspring.analytics" % "stream"          % "2.5.1"
+      )
   )
 
   def rootSettings = sharedSettings ++ Seq(
     publish := {}
   )
 
-  def replSettings = sharedSettings ++ Seq(
+ def replSettings = sharedSettings ++ Seq(
     name := "spark-repl",
-    libraryDependencies <+= scalaVersion("org.scala-lang" % "scala-compiler" % _)
+   libraryDependencies <+= scalaVersion(v => "org.scala-lang"  % "scala-compiler" % v ),
+   libraryDependencies <+= scalaVersion(v => "org.scala-lang"  % "jline"          % v ),
+   libraryDependencies <+= scalaVersion(v => "org.scala-lang"  % "scala-reflect"  % v )
   )
 
   def examplesSettings = sharedSettings ++ Seq(
     name := "spark-examples",
     libraryDependencies ++= Seq(
-      "com.twitter" % "algebird-core_2.9.2" % "0.1.11",
-
+      "com.twitter"          %% "algebird-core"   % "0.1.11",
+      "org.apache.hbase"     %  "hbase"           % "0.94.6" excludeAll(excludeNetty, excludeAsm),
       "org.apache.hbase" % "hbase" % HBASE_VERSION excludeAll(excludeNetty, excludeAsm),
-
-      "org.apache.cassandra" % "cassandra-all" % "1.2.5"
+      "org.apache.cassandra" % "cassandra-all" % "1.2.6"
         exclude("com.google.guava", "guava")
         exclude("com.googlecode.concurrentlinkedhashmap", "concurrentlinkedhashmap-lru")
         exclude("com.ning","compress-lzf")
@@ -277,6 +323,13 @@ object SparkBuild extends Build {
     name := "spark-tools"
   ) ++ assemblySettings ++ extraAssemblySettings
 
+  def graphxSettings = sharedSettings ++ Seq(
+    name := "spark-graphx",
+    libraryDependencies ++= Seq(
+      "org.apache.commons" % "commons-math3" % "3.2"
+    )
+  )
+
   def bagelSettings = sharedSettings ++ Seq(
     name := "spark-bagel"
   )
@@ -290,21 +343,34 @@ object SparkBuild extends Build {
 
   def streamingSettings = sharedSettings ++ Seq(
     name := "spark-streaming",
-    resolvers ++= Seq(
-      "Akka Repository" at "http://repo.akka.io/releases/"
-    ),
     libraryDependencies ++= Seq(
-      "org.apache.flume" % "flume-ng-sdk" % "1.2.0" % "compile" excludeAll(excludeNetty, excludeSnappy),
-      "com.github.sgroschupf" % "zkclient" % "0.1" excludeAll(excludeNetty),
-      "org.twitter4j" % "twitter4j-stream" % "3.0.3" excludeAll(excludeNetty),
-      akkaGroup % "akka-zeromq" % akkaVersion excludeAll(excludeNetty)
+      "commons-io" % "commons-io" % "2.4"
     )
   )
 
-  def yarnSettings = sharedSettings ++ Seq(
-    name := "spark-yarn"
+  def yarnCommonSettings = sharedSettings ++ Seq(
+    unmanagedSourceDirectories in Compile <++= baseDirectory { base =>
+      Seq(
+         base / "../common/src/main/scala"
+      )
+    },
+
+    unmanagedSourceDirectories in Test <++= baseDirectory { base =>
+      Seq(
+         base / "../common/src/test/scala"
+      )
+    }
+
   ) ++ extraYarnSettings
 
+  def yarnAlphaSettings = yarnCommonSettings ++ Seq(
+    name := "spark-yarn-alpha"
+  )
+
+  def yarnSettings = yarnCommonSettings ++ Seq(
+    name := "spark-yarn"
+  )
+
   // Conditionally include the YARN dependencies because some tools look at all sub-projects and will complain
   // if we refer to nonexistent dependencies (e.g. hadoop-yarn-api from a Hadoop version without YARN).
   def extraYarnSettings = if(isYarnEnabled) yarnEnabledSettings else Seq()
@@ -312,16 +378,20 @@ object SparkBuild extends Build {
   def yarnEnabledSettings = Seq(
     libraryDependencies ++= Seq(
       // Exclude rule required for all ?
-      "org.apache.hadoop" % "hadoop-client" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm, excludeCglib),
-      "org.apache.hadoop" % "hadoop-yarn-api" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm, excludeCglib),
+      "org.apache.hadoop" % hadoopClient         % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm, excludeCglib),
+      "org.apache.hadoop" % "hadoop-yarn-api"    % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm, excludeCglib),
       "org.apache.hadoop" % "hadoop-yarn-common" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm, excludeCglib),
       "org.apache.hadoop" % "hadoop-yarn-client" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm, excludeCglib)
     )
   )
 
   def assemblyProjSettings = sharedSettings ++ Seq(
+    libraryDependencies += "net.sf.py4j" % "py4j" % "0.8.1",
     name := "spark-assembly",
-    jarName in assembly <<= version map { v => "spark-assembly-" + v + "-hadoop" + hadoopVersion + ".jar" }
+    scalastyleTask,
+    assembleDeps in Compile <<= (packageProjects.map(packageBin in Compile in _) ++ Seq(packageDependency in Compile)).dependOn,
+    jarName in assembly <<= version map { v => "spark-assembly-" + v + "-hadoop" + hadoopVersion + ".jar" },
+    jarName in packageDependency <<= version map { v => "spark-assembly-" + v + "-hadoop" + hadoopVersion + "-deps.jar" }
   ) ++ assemblySettings ++ extraAssemblySettings
 
   def extraAssemblySettings() = Seq(
@@ -335,4 +405,43 @@ object SparkBuild extends Build {
       case _ => MergeStrategy.first
     }
   )
+
+  def twitterSettings() = sharedSettings ++ Seq(
+    name := "spark-streaming-twitter",
+    libraryDependencies ++= Seq(
+      "org.twitter4j" % "twitter4j-stream" % "3.0.3" excludeAll(excludeNetty)
+    )
+  )
+
+  def kafkaSettings() = sharedSettings ++ Seq(
+    name := "spark-streaming-kafka",
+    libraryDependencies ++= Seq(
+      "com.github.sgroschupf"    % "zkclient"   % "0.1"          excludeAll(excludeNetty),
+      "org.apache.kafka"        %% "kafka"      % "0.8.0"
+        exclude("com.sun.jdmk", "jmxtools")
+        exclude("com.sun.jmx", "jmxri")
+        exclude("net.sf.jopt-simple", "jopt-simple")
+        excludeAll(excludeNetty)
+    )
+  )
+
+  def flumeSettings() = sharedSettings ++ Seq(
+    name := "spark-streaming-flume",
+    libraryDependencies ++= Seq(
+      "org.apache.flume" % "flume-ng-sdk" % "1.2.0" % "compile" excludeAll(excludeNetty, excludeSnappy)
+    )
+  )
+
+  def zeromqSettings() = sharedSettings ++ Seq(
+    name := "spark-streaming-zeromq",
+    libraryDependencies ++= Seq(
+      "org.spark-project.akka" %% "akka-zeromq" % "2.2.3-shaded-protobuf" excludeAll(excludeNetty)
+    )
+  )
+
+  def mqttSettings() = streamingSettings ++ Seq(
+    name := "spark-streaming-mqtt",
+    resolvers ++= Seq("Eclipse Repo" at "https://repo.eclipse.org/content/repositories/paho-releases/"),
+    libraryDependencies ++= Seq("org.eclipse.paho" % "mqtt-client" % "0.4.0")
+  )
 }
diff --git a/project/build.properties b/project/build.properties
index 96472771622d8..839f5fbb0c347 100644
--- a/project/build.properties
+++ b/project/build.properties
@@ -14,5 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
 sbt.version=0.12.4
diff --git a/project/project/SparkPluginBuild.scala b/project/project/SparkPluginBuild.scala
index 6a66bd1d06b23..4853be2617684 100644
--- a/project/project/SparkPluginBuild.scala
+++ b/project/project/SparkPluginBuild.scala
@@ -20,5 +20,5 @@ import sbt._
 object SparkPluginDef extends Build {
   lazy val root = Project("plugins", file(".")) dependsOn(junitXmlListener)
   /* This is not published in a Maven repository, so we get it from GitHub directly */
-  lazy val junitXmlListener = uri("git://github.com/ijuma/junit_xml_listener.git#fe434773255b451a38e8d889536ebc260f4225ce")
+  lazy val junitXmlListener = uri("https://github.com/ijuma/junit_xml_listener.git#fe434773255b451a38e8d889536ebc260f4225ce")
 }
diff --git a/python/epydoc.conf b/python/epydoc.conf
index 1d0d002d36623..95a6af0974806 100644
--- a/python/epydoc.conf
+++ b/python/epydoc.conf
@@ -32,6 +32,6 @@ target: docs/
 
 private: no
 
-exclude: pyspark.cloudpickle pyspark.worker pyspark.join pyspark.serializers
+exclude: pyspark.cloudpickle pyspark.worker pyspark.join
          pyspark.java_gateway pyspark.examples pyspark.shell pyspark.test
-         pyspark.rddsampler pyspark.daemon
+         pyspark.rddsampler pyspark.daemon pyspark.mllib._common
diff --git a/python/examples/sort.py b/python/examples/sort.py
new file mode 100755
index 0000000000000..5de20a6d98f43
--- /dev/null
+++ b/python/examples/sort.py
@@ -0,0 +1,36 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+
+from pyspark import SparkContext
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 3:
+        print >> sys.stderr, "Usage: sort <master> <file>"
+        exit(-1)
+    sc = SparkContext(sys.argv[1], "PythonSort")
+    lines = sc.textFile(sys.argv[2], 1)
+    sortedCount = lines.flatMap(lambda x: x.split(' ')) \
+                  .map(lambda x: (int(x), 1)) \
+                  .sortByKey(lambda x: x)
+    # This is just a demo on how to bring all the sorted data back to a single node.
+    # In reality, we wouldn't want to collect all the data to the driver node.
+    output = sortedCount.collect()
+    for (num, unitcount) in output:
+        print num
diff --git a/python/lib/py4j-0.8.1-src.zip b/python/lib/py4j-0.8.1-src.zip
new file mode 100644
index 0000000000000..2069a328d1f2e
Binary files /dev/null and b/python/lib/py4j-0.8.1-src.zip differ
diff --git a/python/lib/py4j0.7.egg b/python/lib/py4j0.7.egg
deleted file mode 100644
index f8a339d8eef59..0000000000000
Binary files a/python/lib/py4j0.7.egg and /dev/null differ
diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py
index 1f35f6f939d8e..a51d5af79b0da 100644
--- a/python/pyspark/__init__.py
+++ b/python/pyspark/__init__.py
@@ -20,28 +20,34 @@
 
 Public classes:
 
-    - L{SparkContext<pyspark.context.SparkContext>}
-        Main entry point for Spark functionality.
-    - L{RDD<pyspark.rdd.RDD>}
-        A Resilient Distributed Dataset (RDD), the basic abstraction in Spark.
-    - L{Broadcast<pyspark.broadcast.Broadcast>}
-        A broadcast variable that gets reused across tasks.
-    - L{Accumulator<pyspark.accumulators.Accumulator>}
-        An "add-only" shared variable that tasks can only add values to.
-    - L{SparkFiles<pyspark.files.SparkFiles>}
-        Access files shipped with jobs.
-    - L{StorageLevel<pyspark.storagelevel.StorageLevel>}
-        Finer-grained cache persistence levels.
+  - L{SparkContext<pyspark.context.SparkContext>}
+      Main entry point for Spark functionality.
+  - L{RDD<pyspark.rdd.RDD>}
+      A Resilient Distributed Dataset (RDD), the basic abstraction in Spark.
+  - L{Broadcast<pyspark.broadcast.Broadcast>}
+      A broadcast variable that gets reused across tasks.
+  - L{Accumulator<pyspark.accumulators.Accumulator>}
+      An "add-only" shared variable that tasks can only add values to.
+  - L{SparkConf<pyspark.conf.SparkConf>}
+      For configuring Spark.
+  - L{SparkFiles<pyspark.files.SparkFiles>}
+      Access files shipped with jobs.
+  - L{StorageLevel<pyspark.storagelevel.StorageLevel>}
+      Finer-grained cache persistence levels.
 """
+
+
+
 import sys
 import os
-sys.path.insert(0, os.path.join(os.environ["SPARK_HOME"], "python/lib/py4j0.7.egg"))
+sys.path.insert(0, os.path.join(os.environ["SPARK_HOME"], "python/lib/py4j-0.8.1-src.zip"))
 
 
+from pyspark.conf import SparkConf
 from pyspark.context import SparkContext
 from pyspark.rdd import RDD
 from pyspark.files import SparkFiles
 from pyspark.storagelevel import StorageLevel
 
 
-__all__ = ["SparkContext", "RDD", "SparkFiles", "StorageLevel"]
+__all__ = ["SparkConf", "SparkContext", "RDD", "SparkFiles", "StorageLevel"]
diff --git a/python/pyspark/accumulators.py b/python/pyspark/accumulators.py
index da3d96689aa15..2204e9c9ca701 100644
--- a/python/pyspark/accumulators.py
+++ b/python/pyspark/accumulators.py
@@ -90,9 +90,11 @@
 import SocketServer
 import threading
 from pyspark.cloudpickle import CloudPickler
-from pyspark.serializers import read_int, read_with_length, load_pickle
+from pyspark.serializers import read_int, PickleSerializer
 
 
+pickleSer = PickleSerializer()
+
 # Holds accumulators registered on the current machine, keyed by ID. This is then used to send
 # the local accumulator updates back to the driver program at the end of a task.
 _accumulatorRegistry = {}
@@ -211,7 +213,7 @@ def handle(self):
         from pyspark.accumulators import _accumulatorRegistry
         num_updates = read_int(self.rfile)
         for _ in range(num_updates):
-            (aid, update) = load_pickle(read_with_length(self.rfile))
+            (aid, update) = pickleSer._read_with_length(self.rfile)
             _accumulatorRegistry[aid] += update
         # Write a byte in acknowledgement
         self.wfile.write(struct.pack("!b", 1))
diff --git a/python/pyspark/broadcast.py b/python/pyspark/broadcast.py
index dfdaba274f43a..43f40f8783bfd 100644
--- a/python/pyspark/broadcast.py
+++ b/python/pyspark/broadcast.py
@@ -45,7 +45,18 @@ def _from_id(bid):
 
 
 class Broadcast(object):
+    """
+    A broadcast variable created with
+    L{SparkContext.broadcast()<pyspark.context.SparkContext.broadcast>}.
+    Access its value through C{.value}.
+    """
+
     def __init__(self, bid, value, java_broadcast=None, pickle_registry=None):
+        """
+        Should not be called directly by users -- use
+        L{SparkContext.broadcast()<pyspark.context.SparkContext.broadcast>}
+        instead.
+        """
         self.value = value
         self.bid = bid
         self._jbroadcast = java_broadcast
diff --git a/python/pyspark/conf.py b/python/pyspark/conf.py
new file mode 100644
index 0000000000000..3870cd8f2b097
--- /dev/null
+++ b/python/pyspark/conf.py
@@ -0,0 +1,169 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+>>> from pyspark.conf import SparkConf
+>>> from pyspark.context import SparkContext
+>>> conf = SparkConf()
+>>> conf.setMaster("local").setAppName("My app")
+<pyspark.conf.SparkConf object at ...>
+>>> conf.get("spark.master")
+u'local'
+>>> conf.get("spark.app.name")
+u'My app'
+>>> sc = SparkContext(conf=conf)
+>>> sc.master
+u'local'
+>>> sc.appName
+u'My app'
+>>> sc.sparkHome == None
+True
+
+>>> conf = SparkConf()
+>>> conf.setSparkHome("/path")
+<pyspark.conf.SparkConf object at ...>
+>>> conf.get("spark.home")
+u'/path'
+>>> conf.setExecutorEnv("VAR1", "value1")
+<pyspark.conf.SparkConf object at ...>
+>>> conf.setExecutorEnv(pairs = [("VAR3", "value3"), ("VAR4", "value4")])
+<pyspark.conf.SparkConf object at ...>
+>>> conf.get("spark.executorEnv.VAR1")
+u'value1'
+>>> print conf.toDebugString()
+spark.executorEnv.VAR1=value1
+spark.executorEnv.VAR3=value3
+spark.executorEnv.VAR4=value4
+spark.home=/path
+>>> sorted(conf.getAll(), key=lambda p: p[0])
+[(u'spark.executorEnv.VAR1', u'value1'), (u'spark.executorEnv.VAR3', u'value3'), (u'spark.executorEnv.VAR4', u'value4'), (u'spark.home', u'/path')]
+"""
+
+
+class SparkConf(object):
+    """
+    Configuration for a Spark application. Used to set various Spark
+    parameters as key-value pairs.
+
+    Most of the time, you would create a SparkConf object with
+    C{SparkConf()}, which will load values from C{spark.*} Java system
+    properties as well. In this case, any parameters you set directly on
+    the C{SparkConf} object take priority over system properties.
+
+    For unit tests, you can also call C{SparkConf(false)} to skip
+    loading external settings and get the same configuration no matter
+    what the system properties are.
+
+    All setter methods in this class support chaining. For example,
+    you can write C{conf.setMaster("local").setAppName("My app")}.
+
+    Note that once a SparkConf object is passed to Spark, it is cloned
+    and can no longer be modified by the user.
+    """
+
+    def __init__(self, loadDefaults=True, _jvm=None):
+        """
+        Create a new Spark configuration.
+
+        @param loadDefaults: whether to load values from Java system
+               properties (True by default)
+        @param _jvm: internal parameter used to pass a handle to the
+               Java VM; does not need to be set by users
+        """
+        from pyspark.context import SparkContext
+        SparkContext._ensure_initialized()
+        _jvm = _jvm or SparkContext._jvm
+        self._jconf = _jvm.SparkConf(loadDefaults)
+
+    def set(self, key, value):
+        """Set a configuration property."""
+        self._jconf.set(key, unicode(value))
+        return self
+
+    def setMaster(self, value):
+        """Set master URL to connect to."""
+        self._jconf.setMaster(value)
+        return self
+
+    def setAppName(self, value):
+        """Set application name."""
+        self._jconf.setAppName(value)
+        return self
+
+    def setSparkHome(self, value):
+        """Set path where Spark is installed on worker nodes."""
+        self._jconf.setSparkHome(value)
+        return self
+
+    def setExecutorEnv(self, key=None, value=None, pairs=None):
+        """Set an environment variable to be passed to executors."""
+        if (key != None and pairs != None) or (key == None and pairs == None):
+            raise Exception("Either pass one key-value pair or a list of pairs")
+        elif key != None:
+            self._jconf.setExecutorEnv(key, value)
+        elif pairs != None:
+            for (k, v) in pairs:
+                self._jconf.setExecutorEnv(k, v)
+        return self
+
+    def setAll(self, pairs):
+        """
+        Set multiple parameters, passed as a list of key-value pairs.
+
+        @param pairs: list of key-value pairs to set
+        """
+        for (k, v) in pairs:
+            self._jconf.set(k, v)
+        return self
+
+    def get(self, key, defaultValue=None):
+        """Get the configured value for some key, or return a default otherwise."""
+        if defaultValue == None:   # Py4J doesn't call the right get() if we pass None
+            if not self._jconf.contains(key):
+                return None
+            return self._jconf.get(key)
+        else:
+            return self._jconf.get(key, defaultValue)
+
+    def getAll(self):
+        """Get all values as a list of key-value pairs."""
+        pairs = []
+        for elem in self._jconf.getAll():
+            pairs.append((elem._1(), elem._2()))
+        return pairs
+
+    def contains(self, key):
+        """Does this configuration contain a given key?"""
+        return self._jconf.contains(key)
+
+    def toDebugString(self):
+        """
+        Returns a printable version of the configuration, as a list of
+        key=value pairs, one per line.
+        """
+        return self._jconf.toDebugString()
+
+
+def _test():
+    import doctest
+    (failure_count, test_count) = doctest.testmod(optionflags=doctest.ELLIPSIS)
+    if failure_count:
+        exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index a7ca8bc888c67..f318b5d9a73d7 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -24,9 +24,10 @@
 from pyspark import accumulators
 from pyspark.accumulators import Accumulator
 from pyspark.broadcast import Broadcast
+from pyspark.conf import SparkConf
 from pyspark.files import SparkFiles
 from pyspark.java_gateway import launch_gateway
-from pyspark.serializers import dump_pickle, write_with_length, batched
+from pyspark.serializers import PickleSerializer, BatchedSerializer, UTF8Deserializer
 from pyspark.storagelevel import StorageLevel
 from pyspark.rdd import RDD
 
@@ -42,22 +43,22 @@ class SparkContext(object):
 
     _gateway = None
     _jvm = None
-    _writeIteratorToPickleFile = None
-    _takePartition = None
+    _writeToFile = None
     _next_accum_id = 0
     _active_spark_context = None
     _lock = Lock()
     _python_includes = None # zip and egg files that need to be added to PYTHONPATH
 
 
-    def __init__(self, master, jobName, sparkHome=None, pyFiles=None,
-        environment=None, batchSize=1024):
+    def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None,
+        environment=None, batchSize=1024, serializer=PickleSerializer(), conf=None):
         """
-        Create a new SparkContext.
+        Create a new SparkContext. At least the master and app name should be set,
+        either through the named parameters here or through C{conf}.
 
         @param master: Cluster URL to connect to
                (e.g. mesos://host:port, spark://host:port, local[4]).
-        @param jobName: A name for your job, to display on the cluster web UI
+        @param appName: A name for your job, to display on the cluster web UI.
         @param sparkHome: Location where Spark is installed on cluster nodes.
         @param pyFiles: Collection of .zip or .py files to send to the cluster
                and add to PYTHONPATH.  These can be paths on the local file
@@ -67,6 +68,8 @@ def __init__(self, master, jobName, sparkHome=None, pyFiles=None,
         @param batchSize: The number of Python objects represented as a single
                Java object.  Set 1 to disable batching or -1 to use an
                unlimited batch size.
+        @param serializer: The serializer for RDDs.
+        @param conf: A L{SparkConf} object setting Spark properties.
 
 
         >>> from pyspark.context import SparkContext
@@ -79,16 +82,45 @@ def __init__(self, master, jobName, sparkHome=None, pyFiles=None,
         """
         SparkContext._ensure_initialized(self)
 
-        self.master = master
-        self.jobName = jobName
-        self.sparkHome = sparkHome or None # None becomes null in Py4J
         self.environment = environment or {}
-        self.batchSize = batchSize  # -1 represents a unlimited batch size
+        self._conf = conf or SparkConf(_jvm=self._jvm)
+        self._batchSize = batchSize  # -1 represents an unlimited batch size
+        self._unbatched_serializer = serializer
+        if batchSize == 1:
+            self.serializer = self._unbatched_serializer
+        else:
+            self.serializer = BatchedSerializer(self._unbatched_serializer,
+                                                batchSize)
+
+        # Set any parameters passed directly to us on the conf
+        if master:
+            self._conf.setMaster(master)
+        if appName:
+            self._conf.setAppName(appName)
+        if sparkHome:
+            self._conf.setSparkHome(sparkHome)
+        if environment:
+            for key, value in environment.iteritems():
+                self._conf.setExecutorEnv(key, value)
+
+        # Check that we have at least the required parameters
+        if not self._conf.contains("spark.master"):
+            raise Exception("A master URL must be set in your configuration")
+        if not self._conf.contains("spark.app.name"):
+            raise Exception("An application name must be set in your configuration")
+
+        # Read back our properties from the conf in case we loaded some of them from
+        # the classpath or an external config file
+        self.master = self._conf.get("spark.master")
+        self.appName = self._conf.get("spark.app.name")
+        self.sparkHome = self._conf.get("spark.home", None)
+        for (k, v) in self._conf.getAll():
+            if k.startswith("spark.executorEnv."):
+                varName = k[len("spark.executorEnv."):]
+                self.environment[varName] = v
 
         # Create the Java SparkContext through Py4J
-        empty_string_array = self._gateway.new_array(self._jvm.String, 0)
-        self._jsc = self._jvm.JavaSparkContext(master, jobName, sparkHome,
-                                              empty_string_array)
+        self._jsc = self._jvm.JavaSparkContext(self._conf._jconf)
 
         # Create a single Accumulator in Java that we'll send all our updates through;
         # they will be passed back to us through a TCP server
@@ -99,6 +131,7 @@ def __init__(self, master, jobName, sparkHome=None, pyFiles=None,
                 self._jvm.PythonAccumulatorParam(host, port))
 
         self.pythonExec = os.environ.get("PYSPARK_PYTHON", 'python')
+
         # Broadcast's __reduce__ method stores Broadcast instances here.
         # This allows other code to determine which Broadcast instances have
         # been pickled, so it can determine which Java broadcast objects to
@@ -115,7 +148,7 @@ def __init__(self, master, jobName, sparkHome=None, pyFiles=None,
             self.addPyFile(path)
 
         # Create a temporary directory inside spark.local.dir:
-        local_dir = self._jvm.org.apache.spark.util.Utils.getLocalDir()
+        local_dir = self._jvm.org.apache.spark.util.Utils.getLocalDir(self._jsc.sc().conf())
         self._temp_dir = \
             self._jvm.org.apache.spark.util.Utils.createTempDir(local_dir).getAbsolutePath()
 
@@ -125,10 +158,7 @@ def _ensure_initialized(cls, instance=None):
             if not SparkContext._gateway:
                 SparkContext._gateway = launch_gateway()
                 SparkContext._jvm = SparkContext._gateway.jvm
-                SparkContext._writeIteratorToPickleFile = \
-                    SparkContext._jvm.PythonRDD.writeIteratorToPickleFile
-                SparkContext._takePartition = \
-                    SparkContext._jvm.PythonRDD.takePartition
+                SparkContext._writeToFile = SparkContext._jvm.PythonRDD.writeToFile
 
             if instance:
                 if SparkContext._active_spark_context and SparkContext._active_spark_context != instance:
@@ -139,8 +169,8 @@ def _ensure_initialized(cls, instance=None):
     @classmethod
     def setSystemProperty(cls, key, value):
         """
-        Set a system property, such as spark.executor.memory. This must be
-        invoked before instantiating SparkContext.
+        Set a Java system property, such as spark.executor.memory. This must
+        must be invoked before instantiating SparkContext.
         """
         SparkContext._ensure_initialized()
         SparkContext._jvm.java.lang.System.setProperty(key, value)
@@ -184,15 +214,17 @@ def parallelize(self, c, numSlices=None):
         # Make sure we distribute data evenly if it's smaller than self.batchSize
         if "__len__" not in dir(c):
             c = list(c)    # Make it a list so we can compute its length
-        batchSize = min(len(c) // numSlices, self.batchSize)
+        batchSize = min(len(c) // numSlices, self._batchSize)
         if batchSize > 1:
-            c = batched(c, batchSize)
-        for x in c:
-            write_with_length(dump_pickle(x), tempFile)
+            serializer = BatchedSerializer(self._unbatched_serializer,
+                                           batchSize)
+        else:
+            serializer = self._unbatched_serializer
+        serializer.dump_stream(c, tempFile)
         tempFile.close()
-        readRDDFromPickleFile = self._jvm.PythonRDD.readRDDFromPickleFile
-        jrdd = readRDDFromPickleFile(self._jsc, tempFile.name, numSlices)
-        return RDD(jrdd, self)
+        readRDDFromFile = self._jvm.PythonRDD.readRDDFromFile
+        jrdd = readRDDFromFile(self._jsc, tempFile.name, numSlices)
+        return RDD(jrdd, self, serializer)
 
     def textFile(self, name, minSplits=None):
         """
@@ -201,29 +233,50 @@ def textFile(self, name, minSplits=None):
         RDD of Strings.
         """
         minSplits = minSplits or min(self.defaultParallelism, 2)
-        jrdd = self._jsc.textFile(name, minSplits)
-        return RDD(jrdd, self)
+        return RDD(self._jsc.textFile(name, minSplits), self,
+                   UTF8Deserializer())
 
-    def _checkpointFile(self, name):
+    def _checkpointFile(self, name, input_deserializer):
         jrdd = self._jsc.checkpointFile(name)
-        return RDD(jrdd, self)
+        return RDD(jrdd, self, input_deserializer)
 
     def union(self, rdds):
         """
         Build the union of a list of RDDs.
+
+        This supports unions() of RDDs with different serialized formats,
+        although this forces them to be reserialized using the default
+        serializer:
+
+        >>> path = os.path.join(tempdir, "union-text.txt")
+        >>> with open(path, "w") as testFile:
+        ...    testFile.write("Hello")
+        >>> textFile = sc.textFile(path)
+        >>> textFile.collect()
+        [u'Hello']
+        >>> parallelized = sc.parallelize(["World!"])
+        >>> sorted(sc.union([textFile, parallelized]).collect())
+        [u'Hello', 'World!']
         """
+        first_jrdd_deserializer = rdds[0]._jrdd_deserializer
+        if any(x._jrdd_deserializer != first_jrdd_deserializer for x in rdds):
+            rdds = [x._reserialize() for x in rdds]
         first = rdds[0]._jrdd
         rest = [x._jrdd for x in rdds[1:]]
-        rest = ListConverter().convert(rest, self.gateway._gateway_client)
-        return RDD(self._jsc.union(first, rest), self)
+        rest = ListConverter().convert(rest, self._gateway._gateway_client)
+        return RDD(self._jsc.union(first, rest), self,
+                   rdds[0]._jrdd_deserializer)
 
     def broadcast(self, value):
         """
-        Broadcast a read-only variable to the cluster, returning a C{Broadcast}
+        Broadcast a read-only variable to the cluster, returning a
+        L{Broadcast<pyspark.broadcast.Broadcast>}
         object for reading it in distributed functions. The variable will be
         sent to each cluster only once.
         """
-        jbroadcast = self._jsc.broadcast(bytearray(dump_pickle(value)))
+        pickleSer = PickleSerializer()
+        pickled = pickleSer.dumps(value)
+        jbroadcast = self._jsc.broadcast(bytearray(pickled))
         return Broadcast(jbroadcast.id(), value, jbroadcast,
                          self._pickled_broadcast_vars)
 
@@ -235,7 +288,7 @@ def accumulator(self, value, accum_param=None):
         and floating-point numbers if you do not provide one. For other types,
         a custom AccumulatorParam can be used.
         """
-        if accum_param == None:
+        if accum_param is None:
             if isinstance(value, int):
                 accum_param = accumulators.INT_ACCUMULATOR_PARAM
             elif isinstance(value, float):
@@ -294,17 +347,12 @@ def addPyFile(self, path):
             self._python_includes.append(filename)
             sys.path.append(os.path.join(SparkFiles.getRootDirectory(), filename)) # for tests in local mode
 
-    def setCheckpointDir(self, dirName, useExisting=False):
+    def setCheckpointDir(self, dirName):
         """
         Set the directory under which RDDs are going to be checkpointed. The
         directory must be a HDFS path if running on a cluster.
-
-        If the directory does not exist, it will be created. If the directory
-        exists and C{useExisting} is set to true, then the exisiting directory
-        will be used.  Otherwise an exception will be thrown to prevent
-        accidental overriding of checkpoint files in the existing directory.
         """
-        self._jsc.sc().setCheckpointDir(dirName, useExisting)
+        self._jsc.sc().setCheckpointDir(dirName)
 
     def _getJavaStorageLevel(self, storageLevel):
         """
diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py
index e615c1e9b63a4..c15add5237507 100644
--- a/python/pyspark/java_gateway.py
+++ b/python/pyspark/java_gateway.py
@@ -31,7 +31,7 @@ def launch_gateway():
     # Launch the Py4j gateway using Spark's run command so that we pick up the
     # proper classpath and SPARK_MEM settings from spark-env.sh
     on_windows = platform.system() == "Windows"
-    script = "spark-class.cmd" if on_windows else "spark-class"
+    script = "./bin/spark-class.cmd" if on_windows else "./bin/spark-class"
     command = [os.path.join(SPARK_HOME, script), "py4j.GatewayServer",
                "--die-on-broken-pipe", "0"]
     if not on_windows:
@@ -60,7 +60,9 @@ def run(self):
     # Connect to the gateway
     gateway = JavaGateway(GatewayClient(port=port), auto_convert=False)
     # Import the classes used by PySpark
+    java_import(gateway.jvm, "org.apache.spark.SparkConf")
     java_import(gateway.jvm, "org.apache.spark.api.java.*")
     java_import(gateway.jvm, "org.apache.spark.api.python.*")
+    java_import(gateway.jvm, "org.apache.spark.mllib.api.python.*")
     java_import(gateway.jvm, "scala.Tuple2")
     return gateway
diff --git a/python/pyspark/mllib/__init__.py b/python/pyspark/mllib/__init__.py
new file mode 100644
index 0000000000000..b420d7a7f23ba
--- /dev/null
+++ b/python/pyspark/mllib/__init__.py
@@ -0,0 +1,30 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Python bindings for MLlib.
+"""
+
+# MLlib currently needs Python 2.7+ and NumPy 1.7+, so complain if lower
+
+import sys
+if sys.version_info[0:2] < (2, 7):
+    raise Exception("MLlib requires Python 2.7+")
+
+import numpy
+if numpy.version.version < '1.7':
+    raise Exception("MLlib requires NumPy 1.7+")
diff --git a/python/pyspark/mllib/_common.py b/python/pyspark/mllib/_common.py
new file mode 100644
index 0000000000000..20a0e309d1494
--- /dev/null
+++ b/python/pyspark/mllib/_common.py
@@ -0,0 +1,252 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from numpy import ndarray, copyto, float64, int64, int32, ones, array_equal, array, dot, shape
+from pyspark import SparkContext, RDD
+
+from pyspark.serializers import Serializer
+import struct
+
+# Double vector format:
+#
+# [8-byte 1] [8-byte length] [length*8 bytes of data]
+#
+# Double matrix format:
+#
+# [8-byte 2] [8-byte rows] [8-byte cols] [rows*cols*8 bytes of data]
+#
+# This is all in machine-endian.  That means that the Java interpreter and the
+# Python interpreter must agree on what endian the machine is.
+
+def _deserialize_byte_array(shape, ba, offset):
+    """Wrapper around ndarray aliasing hack.
+
+    >>> x = array([1.0, 2.0, 3.0, 4.0, 5.0])
+    >>> array_equal(x, _deserialize_byte_array(x.shape, x.data, 0))
+    True
+    >>> x = array([1.0, 2.0, 3.0, 4.0]).reshape(2,2)
+    >>> array_equal(x, _deserialize_byte_array(x.shape, x.data, 0))
+    True
+    """
+    ar = ndarray(shape=shape, buffer=ba, offset=offset, dtype="float64",
+            order='C')
+    return ar.copy()
+
+def _serialize_double_vector(v):
+    """Serialize a double vector into a mutually understood format."""
+    if type(v) != ndarray:
+        raise TypeError("_serialize_double_vector called on a %s; "
+                "wanted ndarray" % type(v))
+    if v.dtype != float64:
+        raise TypeError("_serialize_double_vector called on an ndarray of %s; "
+                "wanted ndarray of float64" % v.dtype)
+    if v.ndim != 1:
+        raise TypeError("_serialize_double_vector called on a %ddarray; "
+                "wanted a 1darray" % v.ndim)
+    length = v.shape[0]
+    ba = bytearray(16 + 8*length)
+    header = ndarray(shape=[2], buffer=ba, dtype="int64")
+    header[0] = 1
+    header[1] = length
+    copyto(ndarray(shape=[length], buffer=ba, offset=16,
+            dtype="float64"), v)
+    return ba
+
+def _deserialize_double_vector(ba):
+    """Deserialize a double vector from a mutually understood format.
+
+    >>> x = array([1.0, 2.0, 3.0, 4.0, -1.0, 0.0, -0.0])
+    >>> array_equal(x, _deserialize_double_vector(_serialize_double_vector(x)))
+    True
+    """
+    if type(ba) != bytearray:
+        raise TypeError("_deserialize_double_vector called on a %s; "
+                "wanted bytearray" % type(ba))
+    if len(ba) < 16:
+        raise TypeError("_deserialize_double_vector called on a %d-byte array, "
+                "which is too short" % len(ba))
+    if (len(ba) & 7) != 0:
+        raise TypeError("_deserialize_double_vector called on a %d-byte array, "
+                "which is not a multiple of 8" % len(ba))
+    header = ndarray(shape=[2], buffer=ba, dtype="int64")
+    if header[0] != 1:
+        raise TypeError("_deserialize_double_vector called on bytearray "
+                        "with wrong magic")
+    length = header[1]
+    if len(ba) != 8*length + 16:
+        raise TypeError("_deserialize_double_vector called on bytearray "
+                        "with wrong length")
+    return _deserialize_byte_array([length], ba, 16)
+
+def _serialize_double_matrix(m):
+    """Serialize a double matrix into a mutually understood format."""
+    if (type(m) == ndarray and m.dtype == float64 and m.ndim == 2):
+        rows = m.shape[0]
+        cols = m.shape[1]
+        ba = bytearray(24 + 8 * rows * cols)
+        header = ndarray(shape=[3], buffer=ba, dtype="int64")
+        header[0] = 2
+        header[1] = rows
+        header[2] = cols
+        copyto(ndarray(shape=[rows, cols], buffer=ba, offset=24,
+                       dtype="float64", order='C'), m)
+        return ba
+    else:
+        raise TypeError("_serialize_double_matrix called on a "
+                        "non-double-matrix")
+
+def _deserialize_double_matrix(ba):
+    """Deserialize a double matrix from a mutually understood format."""
+    if type(ba) != bytearray:
+        raise TypeError("_deserialize_double_matrix called on a %s; "
+                "wanted bytearray" % type(ba))
+    if len(ba) < 24:
+        raise TypeError("_deserialize_double_matrix called on a %d-byte array, "
+                "which is too short" % len(ba))
+    if (len(ba) & 7) != 0:
+        raise TypeError("_deserialize_double_matrix called on a %d-byte array, "
+                "which is not a multiple of 8" % len(ba))
+    header = ndarray(shape=[3], buffer=ba, dtype="int64")
+    if (header[0] != 2):
+        raise TypeError("_deserialize_double_matrix called on bytearray "
+                        "with wrong magic")
+    rows = header[1]
+    cols = header[2]
+    if (len(ba) != 8*rows*cols + 24):
+        raise TypeError("_deserialize_double_matrix called on bytearray "
+                        "with wrong length")
+    return _deserialize_byte_array([rows, cols], ba, 24)
+
+def _linear_predictor_typecheck(x, coeffs):
+    """Check that x is a one-dimensional vector of the right shape.
+    This is a temporary hackaround until I actually implement bulk predict."""
+    if type(x) == ndarray:
+        if x.ndim == 1:
+            if x.shape == coeffs.shape:
+                pass
+            else:
+                raise RuntimeError("Got array of %d elements; wanted %d"
+                        % (shape(x)[0], shape(coeffs)[0]))
+        else:
+            raise RuntimeError("Bulk predict not yet supported.")
+    elif (type(x) == RDD):
+        raise RuntimeError("Bulk predict not yet supported.")
+    else:
+        raise TypeError("Argument of type " + type(x).__name__ + " unsupported")
+
+def _get_unmangled_rdd(data, serializer):
+    dataBytes = data.map(serializer)
+    dataBytes._bypass_serializer = True
+    dataBytes.cache()
+    return dataBytes
+
+# Map a pickled Python RDD of numpy double vectors to a Java RDD of
+# _serialized_double_vectors
+def _get_unmangled_double_vector_rdd(data):
+    return _get_unmangled_rdd(data, _serialize_double_vector)
+
+class LinearModel(object):
+    """Something that has a vector of coefficients and an intercept."""
+    def __init__(self, coeff, intercept):
+        self._coeff = coeff
+        self._intercept = intercept
+
+class LinearRegressionModelBase(LinearModel):
+    """A linear regression model.
+
+    >>> lrmb = LinearRegressionModelBase(array([1.0, 2.0]), 0.1)
+    >>> abs(lrmb.predict(array([-1.03, 7.777])) - 14.624) < 1e-6
+    True
+    """
+    def predict(self, x):
+        """Predict the value of the dependent variable given a vector x"""
+        """containing values for the independent variables."""
+        _linear_predictor_typecheck(x, self._coeff)
+        return dot(self._coeff, x) + self._intercept
+
+# If we weren't given initial weights, take a zero vector of the appropriate
+# length.
+def _get_initial_weights(initial_weights, data):
+    if initial_weights is None:
+        initial_weights = data.first()
+        if type(initial_weights) != ndarray:
+            raise TypeError("At least one data element has type "
+                    + type(initial_weights).__name__ + " which is not ndarray")
+        if initial_weights.ndim != 1:
+            raise TypeError("At least one data element has "
+                    + initial_weights.ndim + " dimensions, which is not 1")
+        initial_weights = ones([initial_weights.shape[0] - 1])
+    return initial_weights
+
+# train_func should take two parameters, namely data and initial_weights, and
+# return the result of a call to the appropriate JVM stub.
+# _regression_train_wrapper is responsible for setup and error checking.
+def _regression_train_wrapper(sc, train_func, klass, data, initial_weights):
+    initial_weights = _get_initial_weights(initial_weights, data)
+    dataBytes = _get_unmangled_double_vector_rdd(data)
+    ans = train_func(dataBytes, _serialize_double_vector(initial_weights))
+    if len(ans) != 2:
+        raise RuntimeError("JVM call result had unexpected length")
+    elif type(ans[0]) != bytearray:
+        raise RuntimeError("JVM call result had first element of type "
+                + type(ans[0]).__name__ + " which is not bytearray")
+    elif type(ans[1]) != float:
+        raise RuntimeError("JVM call result had second element of type "
+                + type(ans[0]).__name__ + " which is not float")
+    return klass(_deserialize_double_vector(ans[0]), ans[1])
+
+def _serialize_rating(r):
+    ba = bytearray(16)
+    intpart = ndarray(shape=[2], buffer=ba, dtype=int32)
+    doublepart = ndarray(shape=[1], buffer=ba, dtype=float64, offset=8)
+    intpart[0], intpart[1], doublepart[0] = r
+    return ba
+
+class RatingDeserializer(Serializer):
+    def loads(self, stream):
+        length = struct.unpack("!i", stream.read(4))[0]
+        ba = stream.read(length)
+        res = ndarray(shape=(3, ), buffer=ba, dtype="float64", offset=4)
+        return int(res[0]), int(res[1]), res[2]
+
+    def load_stream(self, stream):
+        while True:
+            try:
+                yield self.loads(stream)
+            except struct.error:
+                return
+            except EOFError:
+                return
+
+def _serialize_tuple(t):
+    ba = bytearray(8)
+    intpart = ndarray(shape=[2], buffer=ba, dtype=int32)
+    intpart[0], intpart[1] = t
+    return ba
+
+def _test():
+    import doctest
+    globs = globals().copy()
+    globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2)
+    (failure_count, test_count) = doctest.testmod(globs=globs,
+            optionflags=doctest.ELLIPSIS)
+    globs['sc'].stop()
+    if failure_count:
+        exit(-1)
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
new file mode 100644
index 0000000000000..19b90dfd6e167
--- /dev/null
+++ b/python/pyspark/mllib/classification.py
@@ -0,0 +1,139 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import numpy
+
+from numpy import array, dot, shape
+from pyspark import SparkContext
+from pyspark.mllib._common import \
+    _get_unmangled_rdd, _get_unmangled_double_vector_rdd, \
+    _serialize_double_matrix, _deserialize_double_matrix, \
+    _serialize_double_vector, _deserialize_double_vector, \
+    _get_initial_weights, _serialize_rating, _regression_train_wrapper, \
+    LinearModel, _linear_predictor_typecheck
+from math import exp, log
+
+class LogisticRegressionModel(LinearModel):
+    """A linear binary classification model derived from logistic regression.
+
+    >>> data = array([0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 3.0]).reshape(4,2)
+    >>> lrm = LogisticRegressionWithSGD.train(sc.parallelize(data))
+    >>> lrm.predict(array([1.0])) > 0
+    True
+    """
+    def predict(self, x):
+        _linear_predictor_typecheck(x, self._coeff)
+        margin = dot(x, self._coeff) + self._intercept
+        prob = 1/(1 + exp(-margin))
+        return 1 if prob > 0.5 else 0
+
+class LogisticRegressionWithSGD(object):
+    @classmethod
+    def train(cls, data, iterations=100, step=1.0,
+              miniBatchFraction=1.0, initialWeights=None):
+        """Train a logistic regression model on the given data."""
+        sc = data.context
+        return _regression_train_wrapper(sc, lambda d, i:
+                sc._jvm.PythonMLLibAPI().trainLogisticRegressionModelWithSGD(d._jrdd,
+                        iterations, step, miniBatchFraction, i),
+                LogisticRegressionModel, data, initialWeights)
+
+class SVMModel(LinearModel):
+    """A support vector machine.
+
+    >>> data = array([0.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 3.0]).reshape(4,2)
+    >>> svm = SVMWithSGD.train(sc.parallelize(data))
+    >>> svm.predict(array([1.0])) > 0
+    True
+    """
+    def predict(self, x):
+        _linear_predictor_typecheck(x, self._coeff)
+        margin = dot(x, self._coeff) + self._intercept
+        return 1 if margin >= 0 else 0
+
+class SVMWithSGD(object):
+    @classmethod
+    def train(cls, data, iterations=100, step=1.0, regParam=1.0,
+              miniBatchFraction=1.0, initialWeights=None):
+        """Train a support vector machine on the given data."""
+        sc = data.context
+        return _regression_train_wrapper(sc, lambda d, i:
+                sc._jvm.PythonMLLibAPI().trainSVMModelWithSGD(d._jrdd,
+                        iterations, step, regParam, miniBatchFraction, i),
+                SVMModel, data, initialWeights)
+
+class NaiveBayesModel(object):
+    """
+    Model for Naive Bayes classifiers.
+
+    Contains two parameters:
+    - pi: vector of logs of class priors (dimension C)
+    - theta: matrix of logs of class conditional probabilities (CxD)
+
+    >>> data = array([0.0, 0.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0, 0.0]).reshape(3,3)
+    >>> model = NaiveBayes.train(sc.parallelize(data))
+    >>> model.predict(array([0.0, 1.0]))
+    0
+    >>> model.predict(array([1.0, 0.0]))
+    1
+    """
+
+    def __init__(self, pi, theta):
+        self.pi = pi
+        self.theta = theta
+
+    def predict(self, x):
+        """Return the most likely class for a data vector x"""
+        return numpy.argmax(self.pi + dot(x, self.theta))
+
+class NaiveBayes(object):
+    @classmethod
+    def train(cls, data, lambda_=1.0):
+        """
+        Train a Naive Bayes model given an RDD of (label, features) vectors.
+
+        This is the Multinomial NB (U{http://tinyurl.com/lsdw6p}) which can
+        handle all kinds of discrete data.  For example, by converting
+        documents into TF-IDF vectors, it can be used for document
+        classification.  By making every vector a 0-1 vector, it can also be
+        used as Bernoulli NB (U{http://tinyurl.com/p7c96j6}).
+
+        @param data: RDD of NumPy vectors, one per element, where the first
+               coordinate is the label and the rest is the feature vector
+               (e.g. a count vector).
+        @param lambda_: The smoothing parameter
+        """
+        sc = data.context
+        dataBytes = _get_unmangled_double_vector_rdd(data)
+        ans = sc._jvm.PythonMLLibAPI().trainNaiveBayes(dataBytes._jrdd, lambda_)
+        return NaiveBayesModel(
+            _deserialize_double_vector(ans[0]),
+            _deserialize_double_matrix(ans[1]))
+
+
+def _test():
+    import doctest
+    globs = globals().copy()
+    globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2)
+    (failure_count, test_count) = doctest.testmod(globs=globs,
+            optionflags=doctest.ELLIPSIS)
+    globs['sc'].stop()
+    if failure_count:
+        exit(-1)
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py
new file mode 100644
index 0000000000000..30862918c3f86
--- /dev/null
+++ b/python/pyspark/mllib/clustering.py
@@ -0,0 +1,80 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from numpy import array, dot
+from math import sqrt
+from pyspark import SparkContext
+from pyspark.mllib._common import \
+    _get_unmangled_rdd, _get_unmangled_double_vector_rdd, \
+    _serialize_double_matrix, _deserialize_double_matrix, \
+    _serialize_double_vector, _deserialize_double_vector, \
+    _get_initial_weights, _serialize_rating, _regression_train_wrapper
+
+class KMeansModel(object):
+    """A clustering model derived from the k-means method.
+
+    >>> data = array([0.0,0.0, 1.0,1.0, 9.0,8.0, 8.0,9.0]).reshape(4,2)
+    >>> clusters = KMeans.train(sc.parallelize(data), 2, maxIterations=10, runs=30, initializationMode="random")
+    >>> clusters.predict(array([0.0, 0.0])) == clusters.predict(array([1.0, 1.0]))
+    True
+    >>> clusters.predict(array([8.0, 9.0])) == clusters.predict(array([9.0, 8.0]))
+    True
+    >>> clusters = KMeans.train(sc.parallelize(data), 2)
+    """
+    def __init__(self, centers_):
+        self.centers = centers_
+
+    def predict(self, x):
+        """Find the cluster to which x belongs in this model."""
+        best = 0
+        best_distance = 1e75
+        for i in range(0, self.centers.shape[0]):
+            diff = x - self.centers[i]
+            distance = sqrt(dot(diff, diff))
+            if distance < best_distance:
+                best = i
+                best_distance = distance
+        return best
+
+class KMeans(object):
+    @classmethod
+    def train(cls, data, k, maxIterations=100, runs=1,
+            initializationMode="k-means||"):
+        """Train a k-means clustering model."""
+        sc = data.context
+        dataBytes = _get_unmangled_double_vector_rdd(data)
+        ans = sc._jvm.PythonMLLibAPI().trainKMeansModel(dataBytes._jrdd,
+                k, maxIterations, runs, initializationMode)
+        if len(ans) != 1:
+            raise RuntimeError("JVM call result had unexpected length")
+        elif type(ans[0]) != bytearray:
+            raise RuntimeError("JVM call result had first element of type "
+                    + type(ans[0]) + " which is not bytearray")
+        return KMeansModel(_deserialize_double_matrix(ans[0]))
+
+def _test():
+    import doctest
+    globs = globals().copy()
+    globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2)
+    (failure_count, test_count) = doctest.testmod(globs=globs,
+            optionflags=doctest.ELLIPSIS)
+    globs['sc'].stop()
+    if failure_count:
+        exit(-1)
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/mllib/recommendation.py b/python/pyspark/mllib/recommendation.py
new file mode 100644
index 0000000000000..f4a83f0209e27
--- /dev/null
+++ b/python/pyspark/mllib/recommendation.py
@@ -0,0 +1,86 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from pyspark import SparkContext
+from pyspark.mllib._common import \
+    _get_unmangled_rdd, _get_unmangled_double_vector_rdd, \
+    _serialize_double_matrix, _deserialize_double_matrix, \
+    _serialize_double_vector, _deserialize_double_vector, \
+    _get_initial_weights, _serialize_rating, _regression_train_wrapper, \
+    _serialize_tuple, RatingDeserializer
+from pyspark.rdd import RDD
+
+class MatrixFactorizationModel(object):
+    """A matrix factorisation model trained by regularized alternating
+    least-squares.
+
+    >>> r1 = (1, 1, 1.0)
+    >>> r2 = (1, 2, 2.0)
+    >>> r3 = (2, 1, 2.0)
+    >>> ratings = sc.parallelize([r1, r2, r3])
+    >>> model = ALS.trainImplicit(ratings, 1)
+    >>> model.predict(2,2) is not None
+    True
+    >>> testset = sc.parallelize([(1, 2), (1, 1)])
+    >>> model.predictAll(testset).count() == 2
+    True
+    """
+
+    def __init__(self, sc, java_model):
+        self._context = sc
+        self._java_model = java_model
+
+    def __del__(self):
+        self._context._gateway.detach(self._java_model)
+
+    def predict(self, user, product):
+        return self._java_model.predict(user, product)
+
+    def predictAll(self, usersProducts):
+        usersProductsJRDD = _get_unmangled_rdd(usersProducts, _serialize_tuple)
+        return RDD(self._java_model.predict(usersProductsJRDD._jrdd),
+                   self._context, RatingDeserializer())
+
+class ALS(object):
+    @classmethod
+    def train(cls, ratings, rank, iterations=5, lambda_=0.01, blocks=-1):
+        sc = ratings.context
+        ratingBytes = _get_unmangled_rdd(ratings, _serialize_rating)
+        mod = sc._jvm.PythonMLLibAPI().trainALSModel(ratingBytes._jrdd,
+                rank, iterations, lambda_, blocks)
+        return MatrixFactorizationModel(sc, mod)
+
+    @classmethod
+    def trainImplicit(cls, ratings, rank, iterations=5, lambda_=0.01, blocks=-1, alpha=0.01):
+        sc = ratings.context
+        ratingBytes = _get_unmangled_rdd(ratings, _serialize_rating)
+        mod = sc._jvm.PythonMLLibAPI().trainImplicitALSModel(ratingBytes._jrdd,
+                rank, iterations, lambda_, blocks, alpha)
+        return MatrixFactorizationModel(sc, mod)
+
+def _test():
+    import doctest
+    globs = globals().copy()
+    globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2)
+    (failure_count, test_count) = doctest.testmod(globs=globs,
+            optionflags=doctest.ELLIPSIS)
+    globs['sc'].stop()
+    if failure_count:
+        exit(-1)
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
new file mode 100644
index 0000000000000..7656db07f61cc
--- /dev/null
+++ b/python/pyspark/mllib/regression.py
@@ -0,0 +1,113 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from numpy import array, dot
+from pyspark import SparkContext
+from pyspark.mllib._common import \
+    _get_unmangled_rdd, _get_unmangled_double_vector_rdd, \
+    _serialize_double_matrix, _deserialize_double_matrix, \
+    _serialize_double_vector, _deserialize_double_vector, \
+    _get_initial_weights, _serialize_rating, _regression_train_wrapper, \
+    _linear_predictor_typecheck
+
+class LinearModel(object):
+    """Something that has a vector of coefficients and an intercept."""
+    def __init__(self, coeff, intercept):
+        self._coeff = coeff
+        self._intercept = intercept
+
+class LinearRegressionModelBase(LinearModel):
+    """A linear regression model.
+
+    >>> lrmb = LinearRegressionModelBase(array([1.0, 2.0]), 0.1)
+    >>> abs(lrmb.predict(array([-1.03, 7.777])) - 14.624) < 1e-6
+    True
+    """
+    def predict(self, x):
+        """Predict the value of the dependent variable given a vector x"""
+        """containing values for the independent variables."""
+        _linear_predictor_typecheck(x, self._coeff)
+        return dot(self._coeff, x) + self._intercept
+
+class LinearRegressionModel(LinearRegressionModelBase):
+    """A linear regression model derived from a least-squares fit.
+
+    >>> data = array([0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 2.0, 3.0]).reshape(4,2)
+    >>> lrm = LinearRegressionWithSGD.train(sc.parallelize(data), initialWeights=array([1.0]))
+    """
+
+class LinearRegressionWithSGD(object):
+    @classmethod
+    def train(cls, data, iterations=100, step=1.0,
+              miniBatchFraction=1.0, initialWeights=None):
+        """Train a linear regression model on the given data."""
+        sc = data.context
+        return _regression_train_wrapper(sc, lambda d, i:
+                sc._jvm.PythonMLLibAPI().trainLinearRegressionModelWithSGD(
+                        d._jrdd, iterations, step, miniBatchFraction, i),
+                LinearRegressionModel, data, initialWeights)
+
+class LassoModel(LinearRegressionModelBase):
+    """A linear regression model derived from a least-squares fit with an
+    l_1 penalty term.
+
+    >>> data = array([0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 2.0, 3.0]).reshape(4,2)
+    >>> lrm = LassoWithSGD.train(sc.parallelize(data), initialWeights=array([1.0]))
+    """
+
+class LassoWithSGD(object):
+    @classmethod
+    def train(cls, data, iterations=100, step=1.0, regParam=1.0,
+              miniBatchFraction=1.0, initialWeights=None):
+        """Train a Lasso regression model on the given data."""
+        sc = data.context
+        return _regression_train_wrapper(sc, lambda d, i:
+                sc._jvm.PythonMLLibAPI().trainLassoModelWithSGD(d._jrdd,
+                        iterations, step, regParam, miniBatchFraction, i),
+                LassoModel, data, initialWeights)
+
+class RidgeRegressionModel(LinearRegressionModelBase):
+    """A linear regression model derived from a least-squares fit with an
+    l_2 penalty term.
+
+    >>> data = array([0.0, 0.0, 1.0, 1.0, 3.0, 2.0, 2.0, 3.0]).reshape(4,2)
+    >>> lrm = RidgeRegressionWithSGD.train(sc.parallelize(data), initialWeights=array([1.0]))
+    """
+
+class RidgeRegressionWithSGD(object):
+    @classmethod
+    def train(cls, data, iterations=100, step=1.0, regParam=1.0,
+              miniBatchFraction=1.0, initialWeights=None):
+        """Train a ridge regression model on the given data."""
+        sc = data.context
+        return _regression_train_wrapper(sc, lambda d, i:
+                sc._jvm.PythonMLLibAPI().trainRidgeModelWithSGD(d._jrdd,
+                        iterations, step, regParam, miniBatchFraction, i),
+                RidgeRegressionModel, data, initialWeights)
+
+def _test():
+    import doctest
+    globs = globals().copy()
+    globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2)
+    (failure_count, test_count) = doctest.testmod(globs=globs,
+            optionflags=doctest.ELLIPSIS)
+    globs['sc'].stop()
+    if failure_count:
+        exit(-1)
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 3f84312db435f..1ad4b5298758b 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -18,18 +18,19 @@
 from base64 import standard_b64encode as b64enc
 import copy
 from collections import defaultdict
-from itertools import chain, ifilter, imap, product
+from itertools import chain, ifilter, imap
 import operator
 import os
 import sys
 import shlex
+import traceback
 from subprocess import Popen, PIPE
 from tempfile import NamedTemporaryFile
 from threading import Thread
+import warnings
 
-from pyspark import cloudpickle
-from pyspark.serializers import batched, Batch, dump_pickle, load_pickle, \
-    read_from_pickle_file, pack_long
+from pyspark.serializers import NoOpSerializer, CartesianDeserializer, \
+    BatchedSerializer, CloudPickleSerializer, pack_long
 from pyspark.join import python_join, python_left_outer_join, \
     python_right_outer_join, python_cogroup
 from pyspark.statcounter import StatCounter
@@ -40,6 +41,46 @@
 
 __all__ = ["RDD"]
 
+def _extract_concise_traceback():
+    tb = traceback.extract_stack()
+    if len(tb) == 0:
+        return "I'm lost!"
+    # HACK:  This function is in a file called 'rdd.py' in the top level of
+    # everything PySpark.  Just trim off the directory name and assume
+    # everything in that tree is PySpark guts.
+    file, line, module, what = tb[len(tb) - 1]
+    sparkpath = os.path.dirname(file)
+    first_spark_frame = len(tb) - 1
+    for i in range(0, len(tb)):
+        file, line, fun, what = tb[i]
+        if file.startswith(sparkpath):
+            first_spark_frame = i
+            break
+    if first_spark_frame == 0:
+        file, line, fun, what = tb[0]
+        return "%s at %s:%d" % (fun, file, line)
+    sfile, sline, sfun, swhat = tb[first_spark_frame]
+    ufile, uline, ufun, uwhat = tb[first_spark_frame-1]
+    return "%s at %s:%d" % (sfun, ufile, uline)
+
+_spark_stack_depth = 0
+
+class _JavaStackTrace(object):
+    def __init__(self, sc):
+        self._traceback = _extract_concise_traceback()
+        self._context = sc
+
+    def __enter__(self):
+        global _spark_stack_depth
+        if _spark_stack_depth == 0:
+            self._context._jsc.setCallSite(self._traceback)
+        _spark_stack_depth += 1
+
+    def __exit__(self, type, value, tb):
+        global _spark_stack_depth
+        _spark_stack_depth -= 1
+        if _spark_stack_depth == 0:
+            self._context._jsc.setCallSite(None)
 
 class RDD(object):
     """
@@ -48,12 +89,15 @@ class RDD(object):
     operated on in parallel.
     """
 
-    def __init__(self, jrdd, ctx):
+    def __init__(self, jrdd, ctx, jrdd_deserializer):
         self._jrdd = jrdd
         self.is_cached = False
         self.is_checkpointed = False
         self.ctx = ctx
-        self._partitionFunc = None
+        self._jrdd_deserializer = jrdd_deserializer
+
+    def __repr__(self):
+        return self._jrdd.toString()
 
     @property
     def context(self):
@@ -136,7 +180,7 @@ def flatMap(self, f, preservesPartitioning=False):
         [(2, 2), (2, 2), (3, 3), (3, 3), (4, 4), (4, 4)]
         """
         def func(s, iterator): return chain.from_iterable(imap(f, iterator))
-        return self.mapPartitionsWithSplit(func, preservesPartitioning)
+        return self.mapPartitionsWithIndex(func, preservesPartitioning)
 
     def mapPartitions(self, f, preservesPartitioning=False):
         """
@@ -148,10 +192,24 @@ def mapPartitions(self, f, preservesPartitioning=False):
         [3, 7]
         """
         def func(s, iterator): return f(iterator)
-        return self.mapPartitionsWithSplit(func)
+        return self.mapPartitionsWithIndex(func)
+
+    def mapPartitionsWithIndex(self, f, preservesPartitioning=False):
+        """
+        Return a new RDD by applying a function to each partition of this RDD,
+        while tracking the index of the original partition.
+
+        >>> rdd = sc.parallelize([1, 2, 3, 4], 4)
+        >>> def f(splitIndex, iterator): yield splitIndex
+        >>> rdd.mapPartitionsWithIndex(f).sum()
+        6
+        """
+        return PipelinedRDD(self, f, preservesPartitioning)
 
     def mapPartitionsWithSplit(self, f, preservesPartitioning=False):
         """
+        Deprecated: use mapPartitionsWithIndex instead.
+
         Return a new RDD by applying a function to each partition of this RDD,
         while tracking the index of the original partition.
 
@@ -160,7 +218,9 @@ def mapPartitionsWithSplit(self, f, preservesPartitioning=False):
         >>> rdd.mapPartitionsWithSplit(f).sum()
         6
         """
-        return PipelinedRDD(self, f, preservesPartitioning)
+        warnings.warn("mapPartitionsWithSplit is deprecated; "
+            "use mapPartitionsWithIndex instead", DeprecationWarning, stacklevel=2)
+        return self.mapPartitionsWithIndex(f, preservesPartitioning)
 
     def filter(self, f):
         """
@@ -192,7 +252,7 @@ def sample(self, withReplacement, fraction, seed):
         >>> sc.parallelize(range(0, 100)).sample(False, 0.1, 2).collect() #doctest: +SKIP
         [2, 3, 20, 21, 24, 41, 42, 66, 67, 89, 90, 98]
         """
-        return self.mapPartitionsWithSplit(RDDSampler(withReplacement, fraction, seed).func, True)
+        return self.mapPartitionsWithIndex(RDDSampler(withReplacement, fraction, seed).func, True)
 
     # this is ported from scala/spark/RDD.scala
     def takeSample(self, withReplacement, num, seed):
@@ -247,7 +307,23 @@ def union(self, other):
         >>> rdd.union(rdd).collect()
         [1, 1, 2, 3, 1, 1, 2, 3]
         """
-        return RDD(self._jrdd.union(other._jrdd), self.ctx)
+        if self._jrdd_deserializer == other._jrdd_deserializer:
+            rdd = RDD(self._jrdd.union(other._jrdd), self.ctx,
+                      self._jrdd_deserializer)
+            return rdd
+        else:
+            # These RDDs contain data in different serialized formats, so we
+            # must normalize them to the default serializer.
+            self_copy = self._reserialize()
+            other_copy = other._reserialize()
+            return RDD(self_copy._jrdd.union(other_copy._jrdd), self.ctx,
+                       self.ctx.serializer)
+
+    def _reserialize(self):
+        if self._jrdd_deserializer == self.ctx.serializer:
+            return self
+        else:
+            return self.map(lambda x: x, preservesPartitioning=True)
 
     def __add__(self, other):
         """
@@ -334,17 +410,9 @@ def cartesian(self, other):
         [(1, 1), (1, 2), (2, 1), (2, 2)]
         """
         # Due to batching, we can't use the Java cartesian method.
-        java_cartesian = RDD(self._jrdd.cartesian(other._jrdd), self.ctx)
-        def unpack_batches(pair):
-            (x, y) = pair
-            if type(x) == Batch or type(y) == Batch:
-                xs = x.items if type(x) == Batch else [x]
-                ys = y.items if type(y) == Batch else [y]
-                for pair in product(xs, ys):
-                    yield pair
-            else:
-                yield pair
-        return java_cartesian.flatMap(unpack_batches)
+        deserializer = CartesianDeserializer(self._jrdd_deserializer,
+                                             other._jrdd_deserializer)
+        return RDD(self._jrdd.cartesian(other._jrdd), self.ctx, deserializer)
 
     def groupBy(self, f, numPartitions=None):
         """
@@ -391,8 +459,9 @@ def collect(self):
         """
         Return a list that contains all of the elements in this RDD.
         """
-        picklesInJava = self._jrdd.collect().iterator()
-        return list(self._collect_iterator_through_file(picklesInJava))
+        with _JavaStackTrace(self.context) as st:
+          bytesInJava = self._jrdd.collect().iterator()
+        return list(self._collect_iterator_through_file(bytesInJava))
 
     def _collect_iterator_through_file(self, iterator):
         # Transferring lots of data through Py4J can be slow because
@@ -400,10 +469,10 @@ def _collect_iterator_through_file(self, iterator):
         # file and read it back.
         tempFile = NamedTemporaryFile(delete=False, dir=self.ctx._temp_dir)
         tempFile.close()
-        self.ctx._writeIteratorToPickleFile(iterator, tempFile.name)
+        self.ctx._writeToFile(iterator, tempFile.name)
         # Read the data into Python and deserialize it:
         with open(tempFile.name, 'rb') as tempFile:
-            for item in read_from_pickle_file(tempFile):
+            for item in self._jrdd_deserializer.load_stream(tempFile):
                 yield item
         os.unlink(tempFile.name)
 
@@ -572,13 +641,14 @@ def takeUpToNum(iterator):
         # TODO(shivaram): Similar to the scala implementation, update the take 
         # method to scan multiple splits based on an estimate of how many elements 
         # we have per-split.
-        for partition in range(mapped._jrdd.splits().size()):
-            partitionsToTake = self.ctx._gateway.new_array(self.ctx._jvm.int, 1)
-            partitionsToTake[0] = partition
-            iterator = mapped._jrdd.collectPartitions(partitionsToTake)[0].iterator()
-            items.extend(mapped._collect_iterator_through_file(iterator))
-            if len(items) >= num:
-                break
+        with _JavaStackTrace(self.context) as st:
+            for partition in range(mapped._jrdd.splits().size()):
+                partitionsToTake = self.ctx._gateway.new_array(self.ctx._jvm.int, 1)
+                partitionsToTake[0] = partition
+                iterator = mapped._jrdd.collectPartitions(partitionsToTake)[0].iterator()
+                items.extend(mapped._collect_iterator_through_file(iterator))
+                if len(items) >= num:
+                    break
         return items[:num]
 
     def first(self):
@@ -743,6 +813,7 @@ def partitionBy(self, numPartitions, partitionFunc=hash):
         # Transferring O(n) objects to Java is too expensive.  Instead, we'll
         # form the hash buckets in Python, transferring O(numPartitions) objects
         # to Java.  Each object is a (splitNumber, [objects]) pair.
+        outputSerializer = self.ctx._unbatched_serializer
         def add_shuffle_key(split, iterator):
 
             buckets = defaultdict(list)
@@ -751,14 +822,15 @@ def add_shuffle_key(split, iterator):
                 buckets[partitionFunc(k) % numPartitions].append((k, v))
             for (split, items) in buckets.iteritems():
                 yield pack_long(split)
-                yield dump_pickle(Batch(items))
+                yield outputSerializer.dumps(items)
         keyed = PipelinedRDD(self, add_shuffle_key)
         keyed._bypass_serializer = True
-        pairRDD = self.ctx._jvm.PairwiseRDD(keyed._jrdd.rdd()).asJavaPairRDD()
-        partitioner = self.ctx._jvm.PythonPartitioner(numPartitions,
-                                                     id(partitionFunc))
+        with _JavaStackTrace(self.context) as st:
+            pairRDD = self.ctx._jvm.PairwiseRDD(keyed._jrdd.rdd()).asJavaPairRDD()
+            partitioner = self.ctx._jvm.PythonPartitioner(numPartitions,
+                                                          id(partitionFunc))
         jrdd = pairRDD.partitionBy(partitioner).values()
-        rdd = RDD(jrdd, self.ctx)
+        rdd = RDD(jrdd, self.ctx, BatchedSerializer(outputSerializer))
         # This is required so that id(partitionFunc) remains unique, even if
         # partitionFunc is a lambda:
         rdd._partitionFunc = partitionFunc
@@ -795,7 +867,8 @@ def combineByKey(self, createCombiner, mergeValue, mergeCombiners,
             numPartitions = self.ctx.defaultParallelism
         def combineLocally(iterator):
             combiners = {}
-            for (k, v) in iterator:
+            for x in iterator:
+                (k, v) = x
                 if k not in combiners:
                     combiners[k] = createCombiner(v)
                 else:
@@ -937,50 +1010,52 @@ class PipelinedRDD(RDD):
     20
     """
     def __init__(self, prev, func, preservesPartitioning=False):
-        if isinstance(prev, PipelinedRDD) and prev._is_pipelinable():
+        if not isinstance(prev, PipelinedRDD) or not prev._is_pipelinable():
+            # This transformation is the first in its stage:
+            self.func = func
+            self.preservesPartitioning = preservesPartitioning
+            self._prev_jrdd = prev._jrdd
+            self._prev_jrdd_deserializer = prev._jrdd_deserializer
+        else:
             prev_func = prev.func
             def pipeline_func(split, iterator):
                 return func(split, prev_func(split, iterator))
             self.func = pipeline_func
             self.preservesPartitioning = \
                 prev.preservesPartitioning and preservesPartitioning
-            self._prev_jrdd = prev._prev_jrdd
-        else:
-            self.func = func
-            self.preservesPartitioning = preservesPartitioning
-            self._prev_jrdd = prev._jrdd
+            self._prev_jrdd = prev._prev_jrdd  # maintain the pipeline
+            self._prev_jrdd_deserializer = prev._prev_jrdd_deserializer
         self.is_cached = False
         self.is_checkpointed = False
         self.ctx = prev.ctx
         self.prev = prev
         self._jrdd_val = None
+        self._jrdd_deserializer = self.ctx.serializer
         self._bypass_serializer = False
 
     @property
     def _jrdd(self):
         if self._jrdd_val:
             return self._jrdd_val
-        func = self.func
-        if not self._bypass_serializer and self.ctx.batchSize != 1:
-            oldfunc = self.func
-            batchSize = self.ctx.batchSize
-            def batched_func(split, iterator):
-                return batched(oldfunc(split, iterator), batchSize)
-            func = batched_func
-        cmds = [func, self._bypass_serializer]
-        pipe_command = ' '.join(b64enc(cloudpickle.dumps(f)) for f in cmds)
+        if self._bypass_serializer:
+            serializer = NoOpSerializer()
+        else:
+            serializer = self.ctx.serializer
+        command = (self.func, self._prev_jrdd_deserializer, serializer)
+        pickled_command = CloudPickleSerializer().dumps(command)
         broadcast_vars = ListConverter().convert(
             [x._jbroadcast for x in self.ctx._pickled_broadcast_vars],
             self.ctx._gateway._gateway_client)
         self.ctx._pickled_broadcast_vars.clear()
-        class_manifest = self._prev_jrdd.classManifest()
+        class_tag = self._prev_jrdd.classTag()
         env = MapConverter().convert(self.ctx.environment,
                                      self.ctx._gateway._gateway_client)
         includes = ListConverter().convert(self.ctx._python_includes,
                                      self.ctx._gateway._gateway_client)
         python_rdd = self.ctx._jvm.PythonRDD(self._prev_jrdd.rdd(),
-            pipe_command, env, includes, self.preservesPartitioning, self.ctx.pythonExec,
-            broadcast_vars, self.ctx._javaAccumulator, class_manifest)
+            bytearray(pickled_command), env, includes, self.preservesPartitioning,
+            self.ctx.pythonExec, broadcast_vars, self.ctx._javaAccumulator,
+            class_tag)
         self._jrdd_val = python_rdd.asJavaRDD()
         return self._jrdd_val
 
diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index 54fed1c9c70f6..8c6ad79059c23 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -15,45 +15,269 @@
 # limitations under the License.
 #
 
-import struct
+"""
+PySpark supports custom serializers for transferring data; this can improve
+performance.
+
+By default, PySpark uses L{PickleSerializer} to serialize objects using Python's
+C{cPickle} serializer, which can serialize nearly any Python object.
+Other serializers, like L{MarshalSerializer}, support fewer datatypes but can be
+faster.
+
+The serializer is chosen when creating L{SparkContext}:
+
+>>> from pyspark.context import SparkContext
+>>> from pyspark.serializers import MarshalSerializer
+>>> sc = SparkContext('local', 'test', serializer=MarshalSerializer())
+>>> sc.parallelize(list(range(1000))).map(lambda x: 2 * x).take(10)
+[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
+>>> sc.stop()
+
+By default, PySpark serialize objects in batches; the batch size can be
+controlled through SparkContext's C{batchSize} parameter
+(the default size is 1024 objects):
+
+>>> sc = SparkContext('local', 'test', batchSize=2)
+>>> rdd = sc.parallelize(range(16), 4).map(lambda x: x)
+
+Behind the scenes, this creates a JavaRDD with four partitions, each of
+which contains two batches of two objects:
+
+>>> rdd.glom().collect()
+[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]]
+>>> rdd._jrdd.count()
+8L
+>>> sc.stop()
+
+A batch size of -1 uses an unlimited batch size, and a size of 1 disables
+batching:
+
+>>> sc = SparkContext('local', 'test', batchSize=1)
+>>> rdd = sc.parallelize(range(16), 4).map(lambda x: x)
+>>> rdd.glom().collect()
+[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]]
+>>> rdd._jrdd.count()
+16L
+"""
+
 import cPickle
+from itertools import chain, izip, product
+import marshal
+import struct
+from pyspark import cloudpickle
+
+
+__all__ = ["PickleSerializer", "MarshalSerializer"]
+
+
+class SpecialLengths(object):
+    END_OF_DATA_SECTION = -1
+    PYTHON_EXCEPTION_THROWN = -2
+    TIMING_DATA = -3
+
+
+class Serializer(object):
+
+    def dump_stream(self, iterator, stream):
+        """
+        Serialize an iterator of objects to the output stream.
+        """
+        raise NotImplementedError
+
+    def load_stream(self, stream):
+        """
+        Return an iterator of deserialized objects from the input stream.
+        """
+        raise NotImplementedError
+
+
+    def _load_stream_without_unbatching(self, stream):
+        return self.load_stream(stream)
+
+    # Note: our notion of "equality" is that output generated by
+    # equal serializers can be deserialized using the same serializer.
+
+    # This default implementation handles the simple cases;
+    # subclasses should override __eq__ as appropriate.
+
+    def __eq__(self, other):
+        return isinstance(other, self.__class__)
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+
+class FramedSerializer(Serializer):
+    """
+    Serializer that writes objects as a stream of (length, data) pairs,
+    where C{length} is a 32-bit integer and data is C{length} bytes.
+    """
+
+    def dump_stream(self, iterator, stream):
+        for obj in iterator:
+            self._write_with_length(obj, stream)
+
+    def load_stream(self, stream):
+        while True:
+            try:
+                yield self._read_with_length(stream)
+            except EOFError:
+                return
+
+    def _write_with_length(self, obj, stream):
+        serialized = self.dumps(obj)
+        write_int(len(serialized), stream)
+        stream.write(serialized)
+
+    def _read_with_length(self, stream):
+        length = read_int(stream)
+        obj = stream.read(length)
+        if obj == "":
+            raise EOFError
+        return self.loads(obj)
+
+    def dumps(self, obj):
+        """
+        Serialize an object into a byte array.
+        When batching is used, this will be called with an array of objects.
+        """
+        raise NotImplementedError
+
+    def loads(self, obj):
+        """
+        Deserialize an object from a byte array.
+        """
+        raise NotImplementedError
+
+
+class BatchedSerializer(Serializer):
+    """
+    Serializes a stream of objects in batches by calling its wrapped
+    Serializer with streams of objects.
+    """
+
+    UNLIMITED_BATCH_SIZE = -1
+
+    def __init__(self, serializer, batchSize=UNLIMITED_BATCH_SIZE):
+        self.serializer = serializer
+        self.batchSize = batchSize
+
+    def _batched(self, iterator):
+        if self.batchSize == self.UNLIMITED_BATCH_SIZE:
+            yield list(iterator)
+        else:
+            items = []
+            count = 0
+            for item in iterator:
+                items.append(item)
+                count += 1
+                if count == self.batchSize:
+                    yield items
+                    items = []
+                    count = 0
+            if items:
+                yield items
+
+    def dump_stream(self, iterator, stream):
+        self.serializer.dump_stream(self._batched(iterator), stream)
+
+    def load_stream(self, stream):
+        return chain.from_iterable(self._load_stream_without_unbatching(stream))
+
+    def _load_stream_without_unbatching(self, stream):
+            return self.serializer.load_stream(stream)
+
+    def __eq__(self, other):
+        return isinstance(other, BatchedSerializer) and \
+               other.serializer == self.serializer
+
+    def __str__(self):
+        return "BatchedSerializer<%s>" % str(self.serializer)
 
 
-class Batch(object):
+class CartesianDeserializer(FramedSerializer):
     """
-    Used to store multiple RDD entries as a single Java object.
+    Deserializes the JavaRDD cartesian() of two PythonRDDs.
+    """
+
+    def __init__(self, key_ser, val_ser):
+        self.key_ser = key_ser
+        self.val_ser = val_ser
+
+    def load_stream(self, stream):
+        key_stream = self.key_ser._load_stream_without_unbatching(stream)
+        val_stream = self.val_ser._load_stream_without_unbatching(stream)
+        key_is_batched = isinstance(self.key_ser, BatchedSerializer)
+        val_is_batched = isinstance(self.val_ser, BatchedSerializer)
+        for (keys, vals) in izip(key_stream, val_stream):
+            keys = keys if key_is_batched else [keys]
+            vals = vals if val_is_batched else [vals]
+            for pair in product(keys, vals):
+                yield pair
+
+    def __eq__(self, other):
+        return isinstance(other, CartesianDeserializer) and \
+               self.key_ser == other.key_ser and self.val_ser == other.val_ser
+
+    def __str__(self):
+        return "CartesianDeserializer<%s, %s>" % \
+               (str(self.key_ser), str(self.val_ser))
 
-    This relieves us from having to explicitly track whether an RDD
-    is stored as batches of objects and avoids problems when processing
-    the union() of batched and unbatched RDDs (e.g. the union() of textFile()
-    with another RDD).
+
+class NoOpSerializer(FramedSerializer):
+
+    def loads(self, obj): return obj
+    def dumps(self, obj): return obj
+
+
+class PickleSerializer(FramedSerializer):
     """
-    def __init__(self, items):
-        self.items = items
+    Serializes objects using Python's cPickle serializer:
 
+        http://docs.python.org/2/library/pickle.html
 
-def batched(iterator, batchSize):
-    if batchSize == -1: # unlimited batch size
-        yield Batch(list(iterator))
-    else:
-        items = []
-        count = 0
-        for item in iterator:
-            items.append(item)
-            count += 1
-            if count == batchSize:
-                yield Batch(items)
-                items = []
-                count = 0
-        if items:
-            yield Batch(items)
+    This serializer supports nearly any Python object, but may
+    not be as fast as more specialized serializers.
+    """
 
+    def dumps(self, obj): return cPickle.dumps(obj, 2)
+    loads = cPickle.loads
 
-def dump_pickle(obj):
-    return cPickle.dumps(obj, 2)
+class CloudPickleSerializer(PickleSerializer):
 
+    def dumps(self, obj): return cloudpickle.dumps(obj, 2)
 
-load_pickle = cPickle.loads
+
+class MarshalSerializer(FramedSerializer):
+    """
+    Serializes objects using Python's Marshal serializer:
+
+        http://docs.python.org/2/library/marshal.html
+
+    This serializer is faster than PickleSerializer but supports fewer datatypes.
+    """
+
+    dumps = marshal.dumps
+    loads = marshal.loads
+
+
+class UTF8Deserializer(Serializer):
+    """
+    Deserializes streams written by getBytes.
+    """
+
+    def loads(self, stream):
+        length = read_int(stream)
+        return stream.read(length).decode('utf8')
+
+    def load_stream(self, stream):
+        while True:
+            try:
+                yield self.loads(stream)
+            except struct.error:
+                return
+            except EOFError:
+                return
 
 
 def read_long(stream):
@@ -85,24 +309,3 @@ def write_int(value, stream):
 def write_with_length(obj, stream):
     write_int(len(obj), stream)
     stream.write(obj)
-
-
-def read_with_length(stream):
-    length = read_int(stream)
-    obj = stream.read(length)
-    if obj == "":
-        raise EOFError
-    return obj
-
-
-def read_from_pickle_file(stream):
-    try:
-        while True:
-            obj = load_pickle(read_with_length(stream))
-            if type(obj) == Batch:  # We don't care about inheritance
-                for item in obj.items:
-                    yield item
-            else:
-                yield obj
-    except EOFError:
-        return
diff --git a/python/pyspark/shell.py b/python/pyspark/shell.py
index 84f2aa38dd2d1..920334205c13e 100644
--- a/python/pyspark/shell.py
+++ b/python/pyspark/shell.py
@@ -35,19 +35,19 @@
       ____              __
      / __/__  ___ _____/ /__
     _\ \/ _ \/ _ `/ __/  '_/
-   /__ / .__/\_,_/_/ /_/\_\   version 0.8.2-SNAPSHOT
+   /__ / .__/\_,_/_/ /_/\_\   version 0.9.0
       /_/
 """
 print "Using Python version %s (%s, %s)" % (
     platform.python_version(),
     platform.python_build()[0],
     platform.python_build()[1])
-print "Spark context avaiable as sc."
+print "Spark context available as sc."
 
 if add_files != None:
     print "Adding files: [%s]" % ", ".join(add_files)
 
-# The ./pyspark script stores the old PYTHONSTARTUP value in OLD_PYTHONSTARTUP,
+# The ./bin/pyspark script stores the old PYTHONSTARTUP value in OLD_PYTHONSTARTUP,
 # which allows us to execute the user's PYTHONSTARTUP file:
 _pythonstartup = os.environ.get('OLD_PYTHONSTARTUP')
 if _pythonstartup and os.path.isfile(_pythonstartup):
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index d3f6c2bcfbb34..527104587fd31 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -73,8 +73,8 @@ def test_basic_checkpointing(self):
         time.sleep(1)  # 1 second
         self.assertTrue(flatMappedRDD.isCheckpointed())
         self.assertEqual(flatMappedRDD.collect(), result)
-        self.assertEqual(self.checkpointDir.name,
-                         os.path.dirname(flatMappedRDD.getCheckpointFile()))
+        self.assertEqual("file:" + self.checkpointDir.name,
+                         os.path.dirname(os.path.dirname(flatMappedRDD.getCheckpointFile())))
 
     def test_checkpoint_and_restore(self):
         parCollection = self.sc.parallelize([1, 2, 3, 4])
@@ -88,7 +88,8 @@ def test_checkpoint_and_restore(self):
         time.sleep(1)  # 1 second
 
         self.assertTrue(flatMappedRDD.getCheckpointFile() is not None)
-        recovered = self.sc._checkpointFile(flatMappedRDD.getCheckpointFile())
+        recovered = self.sc._checkpointFile(flatMappedRDD.getCheckpointFile(),
+                                            flatMappedRDD._jrdd_deserializer)
         self.assertEquals([1, 2, 3, 4], recovered.collect())
 
 
@@ -151,6 +152,33 @@ def test_save_as_textfile_with_unicode(self):
         raw_contents = ''.join(input(glob(tempFile.name + "/part-0000*")))
         self.assertEqual(x, unicode(raw_contents.strip(), "utf-8"))
 
+    def test_transforming_cartesian_result(self):
+        # Regression test for SPARK-1034
+        rdd1 = self.sc.parallelize([1, 2])
+        rdd2 = self.sc.parallelize([3, 4])
+        cart = rdd1.cartesian(rdd2)
+        result = cart.map(lambda (x, y): x + y).collect()
+
+    def test_cartesian_on_textfile(self):
+        # Regression test for
+        path = os.path.join(SPARK_HOME, "python/test_support/hello.txt")
+        a = self.sc.textFile(path)
+        result = a.cartesian(a).collect()
+        (x, y) = result[0]
+        self.assertEqual("Hello World!", x.strip())
+        self.assertEqual("Hello World!", y.strip())
+
+    def test_deleting_input_files(self):
+        # Regression test for SPARK-1025
+        tempFile = NamedTemporaryFile(delete=False)
+        tempFile.write("Hello World!")
+        tempFile.close()
+        data = self.sc.textFile(tempFile.name)
+        filtered_data = data.filter(lambda x: True)
+        self.assertEqual(1, filtered_data.count())
+        os.unlink(tempFile.name)
+        self.assertRaises(Exception, lambda: filtered_data.count())
+
 
 class TestIO(PySparkTestCase):
 
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index d63c2aaef772d..4e47d02965c4d 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -23,75 +23,73 @@
 import time
 import socket
 import traceback
-from base64 import standard_b64decode
 # CloudPickler needs to be imported so that depicklers are registered using the
 # copy_reg module.
 from pyspark.accumulators import _accumulatorRegistry
 from pyspark.broadcast import Broadcast, _broadcastRegistry
 from pyspark.cloudpickle import CloudPickler
 from pyspark.files import SparkFiles
-from pyspark.serializers import write_with_length, read_with_length, write_int, \
-    read_long, write_long, read_int, dump_pickle, load_pickle, read_from_pickle_file
+from pyspark.serializers import write_with_length, write_int, read_long, \
+    write_long, read_int, SpecialLengths, UTF8Deserializer, PickleSerializer
 
 
-def load_obj(infile):
-    return load_pickle(standard_b64decode(infile.readline().strip()))
+pickleSer = PickleSerializer()
+utf8_deserializer = UTF8Deserializer()
 
 
 def report_times(outfile, boot, init, finish):
-    write_int(-3, outfile)
+    write_int(SpecialLengths.TIMING_DATA, outfile)
     write_long(1000 * boot, outfile)
     write_long(1000 * init, outfile)
     write_long(1000 * finish, outfile)
 
 
 def main(infile, outfile):
-    boot_time = time.time()
-    split_index = read_int(infile)
-    if split_index == -1:  # for unit tests
-        return
+    try:
+        boot_time = time.time()
+        split_index = read_int(infile)
+        if split_index == -1:  # for unit tests
+            return
 
-    # fetch name of workdir
-    spark_files_dir = load_pickle(read_with_length(infile))
-    SparkFiles._root_directory = spark_files_dir
-    SparkFiles._is_running_on_worker = True
+        # fetch name of workdir
+        spark_files_dir = utf8_deserializer.loads(infile)
+        SparkFiles._root_directory = spark_files_dir
+        SparkFiles._is_running_on_worker = True
 
-    # fetch names and values of broadcast variables
-    num_broadcast_variables = read_int(infile)
-    for _ in range(num_broadcast_variables):
-        bid = read_long(infile)
-        value = read_with_length(infile)
-        _broadcastRegistry[bid] = Broadcast(bid, load_pickle(value))
+        # fetch names and values of broadcast variables
+        num_broadcast_variables = read_int(infile)
+        for _ in range(num_broadcast_variables):
+            bid = read_long(infile)
+            value = pickleSer._read_with_length(infile)
+            _broadcastRegistry[bid] = Broadcast(bid, value)
 
-    # fetch names of includes (*.zip and *.egg files) and construct PYTHONPATH
-    sys.path.append(spark_files_dir) # *.py files that were added will be copied here
-    num_python_includes =  read_int(infile)
-    for _ in range(num_python_includes):
-        sys.path.append(os.path.join(spark_files_dir, load_pickle(read_with_length(infile))))
+        # fetch names of includes (*.zip and *.egg files) and construct PYTHONPATH
+        sys.path.append(spark_files_dir) # *.py files that were added will be copied here
+        num_python_includes =  read_int(infile)
+        for _ in range(num_python_includes):
+            filename = utf8_deserializer.loads(infile)
+            sys.path.append(os.path.join(spark_files_dir, filename))
 
-    # now load function
-    func = load_obj(infile)
-    bypassSerializer = load_obj(infile)
-    if bypassSerializer:
-        dumps = lambda x: x
-    else:
-        dumps = dump_pickle
-    init_time = time.time()
-    iterator = read_from_pickle_file(infile)
-    try:
-        for obj in func(split_index, iterator):
-            write_with_length(dumps(obj), outfile)
+        command = pickleSer._read_with_length(infile)
+        (func, deserializer, serializer) = command
+        init_time = time.time()
+        iterator = deserializer.load_stream(infile)
+        serializer.dump_stream(func(split_index, iterator), outfile)
     except Exception as e:
-        write_int(-2, outfile)
+        # Write the error to stderr in addition to trying to passi t back to
+        # Java, in case it happened while serializing a record
+        print >> sys.stderr, "PySpark worker failed with exception:"
+        print >> sys.stderr, traceback.format_exc()
+        write_int(SpecialLengths.PYTHON_EXCEPTION_THROWN, outfile)
         write_with_length(traceback.format_exc(), outfile)
         sys.exit(-1)
     finish_time = time.time()
     report_times(outfile, boot_time, init_time, finish_time)
     # Mark the beginning of the accumulators section of the output
-    write_int(-1, outfile)
-    for aid, accum in _accumulatorRegistry.items():
-        write_with_length(dump_pickle((aid, accum._value)), outfile)
-    write_int(-1, outfile)
+    write_int(SpecialLengths.END_OF_DATA_SECTION, outfile)
+    write_int(len(_accumulatorRegistry), outfile)
+    for (aid, accum) in _accumulatorRegistry.items():
+        pickleSer._write_with_length((aid, accum._value), outfile)
 
 
 if __name__ == '__main__':
diff --git a/python/run-tests b/python/run-tests
index cbc554ea9db0d..a986ac9380be4 100755
--- a/python/run-tests
+++ b/python/run-tests
@@ -29,15 +29,22 @@ FAILED=0
 rm -f unit-tests.log
 
 function run_test() {
-    $FWDIR/pyspark $1 2>&1 | tee -a unit-tests.log
+    SPARK_TESTING=0 $FWDIR/bin/pyspark $1 2>&1 | tee -a unit-tests.log
     FAILED=$((PIPESTATUS[0]||$FAILED))
 }
 
 run_test "pyspark/rdd.py"
 run_test "pyspark/context.py"
+run_test "pyspark/conf.py"
 run_test "-m doctest pyspark/broadcast.py"
 run_test "-m doctest pyspark/accumulators.py"
+run_test "-m doctest pyspark/serializers.py"
 run_test "pyspark/tests.py"
+run_test "pyspark/mllib/_common.py"
+run_test "pyspark/mllib/classification.py"
+run_test "pyspark/mllib/clustering.py"
+run_test "pyspark/mllib/recommendation.py"
+run_test "pyspark/mllib/regression.py"
 
 if [[ $FAILED != 0 ]]; then
     echo -en "\033[31m"  # Red
diff --git a/repl/lib/scala-jline.jar b/repl/lib/scala-jline.jar
deleted file mode 100644
index 2f18c95cdd7aa..0000000000000
Binary files a/repl/lib/scala-jline.jar and /dev/null differ
diff --git a/repl/pom.xml b/repl/pom.xml
index 48cc89ec908fd..edff07423a3ed 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,12 +21,12 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>0.8.2-candidate-csd-3-SNAPSHOT</version>
+    <version>0.9.1-csd-1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
   <groupId>org.apache.spark</groupId>
-  <artifactId>spark-repl_2.9.3</artifactId>
+  <artifactId>spark-repl_2.10</artifactId>
   <packaging>jar</packaging>
   <name>Spark Project REPL</name>
   <url>http://spark.incubator.apache.org/</url>
@@ -37,32 +37,20 @@
   </properties>
 
   <dependencies>
-    <dependency>
-      <groupId>${akka.group}</groupId>
-      <artifactId>akka-actor</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>${akka.group}</groupId>
-      <artifactId>akka-remote</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>${akka.group}</groupId>
-      <artifactId>akka-slf4j</artifactId>
-    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_2.9.3</artifactId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-bagel_2.9.3</artifactId>
+      <artifactId>spark-bagel_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
       <scope>runtime</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-mllib_2.9.3</artifactId>
+      <artifactId>spark-mllib_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
       <scope>runtime</scope>
     </dependency>
@@ -73,10 +61,12 @@
     <dependency>
       <groupId>org.scala-lang</groupId>
       <artifactId>scala-compiler</artifactId>
+      <version>${scala.version}</version>
     </dependency>
     <dependency>
       <groupId>org.scala-lang</groupId>
       <artifactId>jline</artifactId>
+      <version>${scala.version}</version>
     </dependency>
     <dependency>
       <groupId>org.slf4j</groupId>
@@ -88,18 +78,18 @@
     </dependency>
     <dependency>
       <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_2.9.3</artifactId>
+      <artifactId>scalatest_${scala.binary.version}</artifactId>
       <scope>test</scope>
     </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>
-      <artifactId>scalacheck_2.9.3</artifactId>
+      <artifactId>scalacheck_${scala.binary.version}</artifactId>
       <scope>test</scope>
     </dependency>
   </dependencies>
   <build>
-    <outputDirectory>target/scala-${scala.version}/classes</outputDirectory>
-    <testOutputDirectory>target/scala-${scala.version}/test-classes</testOutputDirectory>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
     <plugins>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
@@ -137,7 +127,6 @@
           <environmentVariables>
             <SPARK_HOME>${basedir}/..</SPARK_HOME>
             <SPARK_TESTING>1</SPARK_TESTING>
-            <SPARK_CLASSPATH>${spark.classpath}</SPARK_CLASSPATH>
           </environmentVariables>
         </configuration>
       </plugin>
diff --git a/repl/src/main/scala/org/apache/spark/repl/Main.scala b/repl/src/main/scala/org/apache/spark/repl/Main.scala
index 17e149f8abcc9..14b448d076d84 100644
--- a/repl/src/main/scala/org/apache/spark/repl/Main.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/Main.scala
@@ -20,12 +20,12 @@ package org.apache.spark.repl
 import scala.collection.mutable.Set
 
 object Main {
-  private var _interp: SparkILoop = null
-  
+  private var _interp: SparkILoop = _
+
   def interp = _interp
-  
+
   def interp_=(i: SparkILoop) { _interp = i }
-  
+
   def main(args: Array[String]) {
     _interp = new SparkILoop
     _interp.process(args)
diff --git a/core/src/main/scala/org/apache/spark/api/java/function/WrappedFunction4.scala b/repl/src/main/scala/org/apache/spark/repl/SparkCommandLine.scala
similarity index 58%
rename from core/src/main/scala/org/apache/spark/api/java/function/WrappedFunction4.scala
rename to repl/src/main/scala/org/apache/spark/repl/SparkCommandLine.scala
index 29ba67feb50a3..05816941b54b3 100644
--- a/core/src/main/scala/org/apache/spark/api/java/function/WrappedFunction4.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/SparkCommandLine.scala
@@ -15,18 +15,23 @@
  * limitations under the License.
  */
 
-package org.apache.spark.api.java.function
+package org.apache.spark.repl
 
-import scala.runtime.AbstractFunction4
+import scala.tools.nsc.{Settings, CompilerCommand}
+import scala.Predef._
 
 /**
- * Subclass of Function4 for ease of calling from Java. The main thing it does is re-expose the
- * apply() method as call() and declare that it can throw Exception (since AbstractFunction3.apply
- * isn't marked to allow that).
+ * Command class enabling Spark-specific command line options (provided by
+ * <i>org.apache.spark.repl.SparkRunnerSettings</i>).
  */
-private[spark] abstract class WrappedFunction4[T1, T2, T3, T4, R] extends AbstractFunction4[T1, T2, T3, T4, R] {
-  @throws(classOf[Exception])
-  def call(t1: T1, t2: T2, t3: T3, t4: T4): R
+class SparkCommandLine(args: List[String], override val settings: Settings)
+    extends CompilerCommand(args, settings) {
 
-  final def apply(t1: T1, t2: T2, t3: T3, t4: T4): R = call(t1, t2, t3, t4)
+  def this(args: List[String], error: String => Unit) {
+    this(args, new SparkRunnerSettings(error))
+  }
+
+  def this(args: List[String]) {
+    this(args, str => Console.println("Error: " + str))
+  }
 }
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkExprTyper.scala b/repl/src/main/scala/org/apache/spark/repl/SparkExprTyper.scala
new file mode 100644
index 0000000000000..b2e1df173eb26
--- /dev/null
+++ b/repl/src/main/scala/org/apache/spark/repl/SparkExprTyper.scala
@@ -0,0 +1,109 @@
+/* NSC -- new Scala compiler
+ * Copyright 2005-2013 LAMP/EPFL
+ * @author  Paul Phillips
+ */
+
+package org.apache.spark.repl
+
+import scala.tools.nsc._
+import scala.tools.nsc.interpreter._
+
+import scala.reflect.internal.util.BatchSourceFile
+import scala.tools.nsc.ast.parser.Tokens.EOF
+
+import org.apache.spark.Logging
+
+trait SparkExprTyper extends Logging {
+  val repl: SparkIMain
+
+  import repl._
+  import global.{ reporter => _, Import => _, _ }
+  import definitions._
+  import syntaxAnalyzer.{ UnitParser, UnitScanner, token2name }
+  import naming.freshInternalVarName
+
+  object codeParser extends { val global: repl.global.type = repl.global } with CodeHandlers[Tree] {
+    def applyRule[T](code: String, rule: UnitParser => T): T = {
+      reporter.reset()
+      val scanner = newUnitParser(code)
+      val result  = rule(scanner)
+
+      if (!reporter.hasErrors)
+        scanner.accept(EOF)
+
+      result
+    }
+
+    def defns(code: String) = stmts(code) collect { case x: DefTree => x }
+    def expr(code: String)  = applyRule(code, _.expr())
+    def stmts(code: String) = applyRule(code, _.templateStats())
+    def stmt(code: String)  = stmts(code).last  // guaranteed nonempty
+  }
+
+  /** Parse a line into a sequence of trees. Returns None if the input is incomplete. */
+  def parse(line: String): Option[List[Tree]] = debugging(s"""parse("$line")""")  {
+    var isIncomplete = false
+    reporter.withIncompleteHandler((_, _) => isIncomplete = true) {
+      val trees = codeParser.stmts(line)
+      if (reporter.hasErrors) Some(Nil)
+      else if (isIncomplete) None
+      else Some(trees)
+    }
+  }
+  // def parsesAsExpr(line: String) = {
+  //   import codeParser._
+  //   (opt expr line).isDefined
+  // }
+
+  def symbolOfLine(code: String): Symbol = {
+    def asExpr(): Symbol = {
+      val name  = freshInternalVarName()
+      // Typing it with a lazy val would give us the right type, but runs
+      // into compiler bugs with things like existentials, so we compile it
+      // behind a def and strip the NullaryMethodType which wraps the expr.
+      val line = "def " + name + " = {\n" + code + "\n}"
+
+      interpretSynthetic(line) match {
+        case IR.Success =>
+          val sym0 = symbolOfTerm(name)
+          // drop NullaryMethodType
+          val sym = sym0.cloneSymbol setInfo afterTyper(sym0.info.finalResultType)
+          if (sym.info.typeSymbol eq UnitClass) NoSymbol
+          else sym
+        case _          => NoSymbol
+      }
+    }
+    def asDefn(): Symbol = {
+      val old = repl.definedSymbolList.toSet
+
+      interpretSynthetic(code) match {
+        case IR.Success =>
+          repl.definedSymbolList filterNot old match {
+            case Nil        => NoSymbol
+            case sym :: Nil => sym
+            case syms       => NoSymbol.newOverloaded(NoPrefix, syms)
+          }
+        case _ => NoSymbol
+      }
+    }
+    beQuietDuring(asExpr()) orElse beQuietDuring(asDefn())
+  }
+
+  private var typeOfExpressionDepth = 0
+  def typeOfExpression(expr: String, silent: Boolean = true): Type = {
+    if (typeOfExpressionDepth > 2) {
+      logDebug("Terminating typeOfExpression recursion for expression: " + expr)
+      return NoType
+    }
+    typeOfExpressionDepth += 1
+    // Don't presently have a good way to suppress undesirable success output
+    // while letting errors through, so it is first trying it silently: if there
+    // is an error, and errors are desired, then it re-evaluates non-silently
+    // to induce the error message.
+    try beSilentDuring(symbolOfLine(expr).tpe) match {
+      case NoType if !silent => symbolOfLine(expr).tpe // generate error
+      case tpe               => tpe
+    }
+    finally typeOfExpressionDepth -= 1
+  }
+}
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala
index ca77c99471839..87d94d51be199 100644
--- a/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala
@@ -1,28 +1,41 @@
 /* NSC -- new Scala compiler
- * Copyright 2005-2011 LAMP/EPFL
+ * Copyright 2005-2013 LAMP/EPFL
  * @author Alexander Spoon
  */
 
 package org.apache.spark.repl
 
+
 import scala.tools.nsc._
 import scala.tools.nsc.interpreter._
 
+import scala.tools.nsc.interpreter.{ Results => IR }
 import Predef.{ println => _, _ }
-import java.io.{ BufferedReader, FileReader, PrintWriter }
+import java.io.{ BufferedReader, FileReader }
+import java.util.concurrent.locks.ReentrantLock
 import scala.sys.process.Process
-import session._
-import scala.tools.nsc.interpreter.{ Results => IR }
-import scala.tools.util.{ SignalManager, Signallable, Javap }
+import scala.tools.nsc.interpreter.session._
+import scala.util.Properties.{ jdkHome, javaVersion }
+import scala.tools.util.{ Javap }
 import scala.annotation.tailrec
-import scala.util.control.Exception.{ ignoring }
 import scala.collection.mutable.ListBuffer
 import scala.concurrent.ops
-import util.{ ClassPath, Exceptional, stringFromWriter, stringFromStream }
-import interpreter._
-import io.{ File, Sources }
+import scala.tools.nsc.util.{ ClassPath, Exceptional, stringFromWriter, stringFromStream }
+import scala.tools.nsc.interpreter._
+import scala.tools.nsc.io.{ File, Directory }
+import scala.reflect.NameTransformer._
+import scala.tools.nsc.util.ScalaClassLoader
+import scala.tools.nsc.util.ScalaClassLoader._
+import scala.tools.util._
+import scala.language.{implicitConversions, existentials}
+import scala.reflect.{ClassTag, classTag}
+import scala.tools.reflect.StdRuntimeTags._
+
+import java.lang.{Class => jClass}
+import scala.reflect.api.{Mirror, TypeCreator, Universe => ApiUniverse}
 
 import org.apache.spark.Logging
+import org.apache.spark.SparkConf
 import org.apache.spark.SparkContext
 
 /** The Scala interactive shell.  It provides a read-eval-print loop
@@ -37,45 +50,86 @@ import org.apache.spark.SparkContext
  *  @author  Lex Spoon
  *  @version 1.2
  */
-class SparkILoop(in0: Option[BufferedReader], val out: PrintWriter, val master: Option[String])
+class SparkILoop(in0: Option[BufferedReader], protected val out: JPrintWriter,
+               val master: Option[String])
                 extends AnyRef
                    with LoopCommands
+                   with SparkILoopInit
                    with Logging
 {
-  def this(in0: BufferedReader, out: PrintWriter, master: String) = this(Some(in0), out, Some(master))
-  def this(in0: BufferedReader, out: PrintWriter) = this(Some(in0), out, None)
-  def this() = this(None, new PrintWriter(Console.out, true), None)
-  
+  def this(in0: BufferedReader, out: JPrintWriter, master: String) = this(Some(in0), out, Some(master))
+  def this(in0: BufferedReader, out: JPrintWriter) = this(Some(in0), out, None)
+  def this() = this(None, new JPrintWriter(Console.out, true), None)
+
   var in: InteractiveReader = _   // the input stream from which commands come
   var settings: Settings = _
   var intp: SparkIMain = _
 
-  /*
-  lazy val power = {
-    val g = intp.global
-    Power[g.type](this, g)
-  }
-  */
-  
-  // TODO
-  // object opt extends AestheticSettings
-  // 
-  @deprecated("Use `intp` instead.", "2.9.0")
-  def interpreter = intp
-  
-  @deprecated("Use `intp` instead.", "2.9.0")
-  def interpreter_= (i: SparkIMain): Unit = intp = i
-  
+  @deprecated("Use `intp` instead.", "2.9.0") def interpreter = intp
+  @deprecated("Use `intp` instead.", "2.9.0") def interpreter_= (i: SparkIMain): Unit = intp = i
+
+  /** Having inherited the difficult "var-ness" of the repl instance,
+   *  I'm trying to work around it by moving operations into a class from
+   *  which it will appear a stable prefix.
+   */
+  private def onIntp[T](f: SparkIMain => T): T = f(intp)
+
+  class IMainOps[T <: SparkIMain](val intp: T) {
+    import intp._
+    import global._
+
+    def printAfterTyper(msg: => String) =
+      intp.reporter printMessage afterTyper(msg)
+
+    /** Strip NullaryMethodType artifacts. */
+    private def replInfo(sym: Symbol) = {
+      sym.info match {
+        case NullaryMethodType(restpe) if sym.isAccessor  => restpe
+        case info                                         => info
+      }
+    }
+    def echoTypeStructure(sym: Symbol) =
+      printAfterTyper("" + deconstruct.show(replInfo(sym)))
+
+    def echoTypeSignature(sym: Symbol, verbose: Boolean) = {
+      if (verbose) SparkILoop.this.echo("// Type signature")
+      printAfterTyper("" + replInfo(sym))
+
+      if (verbose) {
+        SparkILoop.this.echo("\n// Internal Type structure")
+        echoTypeStructure(sym)
+      }
+    }
+  }
+  implicit def stabilizeIMain(intp: SparkIMain) = new IMainOps[intp.type](intp)
+
+  /** TODO -
+   *  -n normalize
+   *  -l label with case class parameter names
+   *  -c complete - leave nothing out
+   */
+  private def typeCommandInternal(expr: String, verbose: Boolean): Result = {
+    onIntp { intp =>
+      val sym = intp.symbolOfLine(expr)
+      if (sym.exists) intp.echoTypeSignature(sym, verbose)
+      else ""
+    }
+  }
+
+  var sparkContext: SparkContext = _
+
+  override def echoCommandMessage(msg: String) {
+    intp.reporter printMessage msg
+  }
+
+  // def isAsync = !settings.Yreplsync.value
+  def isAsync = false
+  // lazy val power = new Power(intp, new StdReplVals(this))(tagOfStdReplVals, classTag[StdReplVals])
   def history = in.history
 
   /** The context class loader at the time this object was created */
   protected val originalClassLoader = Thread.currentThread.getContextClassLoader
 
-  // Install a signal handler so we can be prodded.
-  private val signallable =
-    /*if (isReplDebug) Signallable("Dump repl state.")(dumpCommand())
-    else*/ null
-    
   // classpath entries added via :cp
   var addedClasspath: String = ""
 
@@ -87,74 +141,49 @@ class SparkILoop(in0: Option[BufferedReader], val out: PrintWriter, val master:
 
   /** Record a command for replay should the user request a :replay */
   def addReplay(cmd: String) = replayCommandStack ::= cmd
-  
-  /** Try to install sigint handler: ignore failure.  Signal handler
-   *  will interrupt current line execution if any is in progress.
-   * 
-   *  Attempting to protect the repl from accidental exit, we only honor
-   *  a single ctrl-C if the current buffer is empty: otherwise we look
-   *  for a second one within a short time.
-   */
-  private def installSigIntHandler() {
-    def onExit() {
-      Console.println("") // avoiding "shell prompt in middle of line" syndrome
-      sys.exit(1)
-    }
-    ignoring(classOf[Exception]) {
-      SignalManager("INT") = {
-        if (intp == null)
-          onExit()
-        else if (intp.lineManager.running)
-          intp.lineManager.cancel()
-        else if (in.currentLine != "") {
-          // non-empty buffer, so make them hit ctrl-C a second time
-          SignalManager("INT") = onExit()
-          io.timer(5)(installSigIntHandler())  // and restore original handler if they don't
-        }
-        else onExit()
-      }
-    }
+
+  def savingReplayStack[T](body: => T): T = {
+    val saved = replayCommandStack
+    try body
+    finally replayCommandStack = saved
+  }
+  def savingReader[T](body: => T): T = {
+    val saved = in
+    try body
+    finally in = saved
   }
 
+
+  def sparkCleanUp(){
+    echo("Stopping spark context.")
+    intp.beQuietDuring {
+      command("sc.stop()")
+    }
+  }
   /** Close the interpreter and set the var to null. */
   def closeInterpreter() {
     if (intp ne null) {
-      intp.close
+      sparkCleanUp()
+      intp.close()
       intp = null
-      Thread.currentThread.setContextClassLoader(originalClassLoader)
     }
   }
-  
+
   class SparkILoopInterpreter extends SparkIMain(settings, out) {
+    outer =>
+
     override lazy val formatting = new Formatting {
       def prompt = SparkILoop.this.prompt
     }
-    override protected def createLineManager() = new Line.Manager {
-      override def onRunaway(line: Line[_]): Unit = {
-        val template = """
-          |// She's gone rogue, captain! Have to take her out!
-          |// Calling Thread.stop on runaway %s with offending code:
-          |// scala> %s""".stripMargin
-        
-        echo(template.format(line.thread, line.code))
-        // XXX no way to suppress the deprecation warning
-        line.thread.stop()
-        in.redrawLine()
-      }
-    }
-    override protected def parentClassLoader = {
-      SparkHelper.explicitParentLoader(settings).getOrElse( classOf[SparkILoop].getClassLoader )
-    }
+    override protected def parentClassLoader =  SparkHelper.explicitParentLoader(settings).getOrElse(classOf[SparkILoop].getClassLoader)
   }
 
   /** Create a new interpreter. */
   def createInterpreter() {
     if (addedClasspath != "")
       settings.classpath append addedClasspath
-      
+
     intp = new SparkILoopInterpreter
-    intp.setContextClassLoader()
-    installSigIntHandler()
   }
 
   /** print a friendly help message */
@@ -168,10 +197,10 @@ class SparkILoop(in0: Option[BufferedReader], val out: PrintWriter, val master:
   private def helpSummary() = {
     val usageWidth  = commands map (_.usageMsg.length) max
     val formatStr   = "%-" + usageWidth + "s %s %s"
-    
+
     echo("All commands can be abbreviated, e.g. :he instead of :help.")
     echo("Those marked with a * have more detailed help, e.g. :help imports.\n")
-    
+
     commands foreach { cmd =>
       val star = if (cmd.hasLongHelp) "*" else " "
       echo(formatStr.format(cmd.usageMsg, star, cmd.help))
@@ -182,7 +211,7 @@ class SparkILoop(in0: Option[BufferedReader], val out: PrintWriter, val master:
       case Nil  => echo(cmd + ": no such command.  Type :help for help.")
       case xs   => echo(cmd + " is ambiguous: did you mean " + xs.map(":" + _.name).mkString(" or ") + "?")
     }
-    Result(true, None)    
+    Result(true, None)
   }
   private def matchingCommands(cmd: String) = commands filter (_.name startsWith cmd)
   private def uniqueCommand(cmd: String): Option[LoopCommand] = {
@@ -193,31 +222,16 @@ class SparkILoop(in0: Option[BufferedReader], val out: PrintWriter, val master:
       case xs       => xs find (_.name == cmd)
     }
   }
-  
-  /** Print a welcome message */
-  def printWelcome() {
-    echo("""Welcome to
-      ____              __  
-     / __/__  ___ _____/ /__
-    _\ \/ _ \/ _ `/ __/  '_/
-   /___/ .__/\_,_/_/ /_/\_\   version 0.8.2-SNAPSHOT
-      /_/                  
-""")
-    import Properties._
-    val welcomeMsg = "Using Scala %s (%s, Java %s)".format(
-      versionString, javaVmName, javaVersion) 
-    echo(welcomeMsg)
-  }
-  
+
   /** Show the history */
   lazy val historyCommand = new LoopCommand("history", "show the history (optional num is commands to show)") {
     override def usage = "[num]"
     def defaultLines = 20
-    
+
     def apply(line: String): Result = {
       if (history eq NoHistory)
         return "No history available."
-      
+
       val xs      = words(line)
       val current = history.index
       val count   = try xs.head.toInt catch { case _: Exception => defaultLines }
@@ -229,32 +243,38 @@ class SparkILoop(in0: Option[BufferedReader], val out: PrintWriter, val master:
     }
   }
 
-  private def echo(msg: String) = {
+  // When you know you are most likely breaking into the middle
+  // of a line being typed.  This softens the blow.
+  protected def echoAndRefresh(msg: String) = {
+    echo("\n" + msg)
+    in.redrawLine()
+  }
+  protected def echo(msg: String) = {
     out println msg
     out.flush()
   }
-  private def echoNoNL(msg: String) = {
+  protected def echoNoNL(msg: String) = {
     out print msg
     out.flush()
   }
-  
+
   /** Search the history */
   def searchHistory(_cmdline: String) {
     val cmdline = _cmdline.toLowerCase
     val offset  = history.index - history.size + 1
-    
+
     for ((line, index) <- history.asStrings.zipWithIndex ; if line.toLowerCase contains cmdline)
       echo("%d %s".format(index + offset, line))
   }
-  
+
   private var currentPrompt = Properties.shellPromptString
   def setPrompt(prompt: String) = currentPrompt = prompt
   /** Prompt to print when awaiting input */
   def prompt = currentPrompt
-  
+
   import LoopCommand.{ cmd, nullary }
 
-  /** Standard commands **/
+  /** Standard commands */
   lazy val standardCommands = List(
     cmd("cp", "<path>", "add a jar or directory to the classpath", addClasspath),
     cmd("help", "[command]", "print this summary or command-specific help", helpCommand),
@@ -263,53 +283,30 @@ class SparkILoop(in0: Option[BufferedReader], val out: PrintWriter, val master:
     cmd("imports", "[name name ...]", "show import history, identifying sources of names", importsCommand),
     cmd("implicits", "[-v]", "show the implicits in scope", implicitsCommand),
     cmd("javap", "<path|class>", "disassemble a file or class name", javapCommand),
-    nullary("keybindings", "show how ctrl-[A-Z] and other keys are bound", keybindingsCommand),
     cmd("load", "<path>", "load and interpret a Scala file", loadCommand),
     nullary("paste", "enter paste mode: all input up to ctrl-D compiled together", pasteCommand),
-    //nullary("power", "enable power user mode", powerCmd),
-    nullary("quit", "exit the interpreter", () => Result(false, None)),
+//    nullary("power", "enable power user mode", powerCmd),
+    nullary("quit", "exit the repl", () => Result(false, None)),
     nullary("replay", "reset execution and replay all previous commands", replay),
+    nullary("reset", "reset the repl to its initial state, forgetting all session entries", resetCommand),
     shCommand,
     nullary("silent", "disable/enable automatic printing of results", verbosity),
-    cmd("type", "<expr>", "display the type of an expression without evaluating it", typeCommand)
+    cmd("type", "[-v] <expr>", "display the type of an expression without evaluating it", typeCommand),
+    nullary("warnings", "show the suppressed warnings from the most recent line which had any", warningsCommand)
   )
-  
+
   /** Power user commands */
   lazy val powerCommands: List[LoopCommand] = List(
-    //nullary("dump", "displays a view of the interpreter's internal state", dumpCommand),
-    //cmd("phase", "<phase>", "set the implicit phase for power commands", phaseCommand),
-    cmd("wrap", "<method>", "name of method to wrap around each repl line", wrapCommand) withLongHelp ("""
-      |:wrap
-      |:wrap clear
-      |:wrap <method>
-      |
-      |Installs a wrapper around each line entered into the repl.
-      |Currently it must be the simple name of an existing method
-      |with the specific signature shown in the following example.
-      |
-      |def timed[T](body: => T): T = {
-      |  val start = System.nanoTime
-      |  try body
-      |  finally println((System.nanoTime - start) + " nanos elapsed.")
-      |}
-      |:wrap timed
-      |
-      |If given no argument, :wrap names the wrapper installed.
-      |An argument of clear will remove the wrapper if any is active.
-      |Note that wrappers do not compose (a new one replaces the old
-      |one) and also that the :phase command uses the same machinery,
-      |so setting :wrap will clear any :phase setting.       
-    """.stripMargin.trim)
+    // cmd("phase", "<phase>", "set the implicit phase for power commands", phaseCommand)
   )
-  
-  /*
-  private def dumpCommand(): Result = {
-    echo("" + power)
-    history.asStrings takeRight 30 foreach echo
-    in.redrawLine()
-  }
-  */
-  
+
+  // private def dumpCommand(): Result = {
+  //   echo("" + power)
+  //   history.asStrings takeRight 30 foreach echo
+  //   in.redrawLine()
+  // }
+  // private def valsCommand(): Result = power.valsDescription
+
   private val typeTransforms = List(
     "scala.collection.immutable." -> "immutable.",
     "scala.collection.mutable."   -> "mutable.",
@@ -317,7 +314,7 @@ class SparkILoop(in0: Option[BufferedReader], val out: PrintWriter, val master:
     "java.lang."                  -> "jl.",
     "scala.runtime."              -> "runtime."
   )
-  
+
   private def importsCommand(line: String): Result = {
     val tokens    = words(line)
     val handlers  = intp.languageWildcardHandlers ++ intp.importHandlers
@@ -333,7 +330,7 @@ class SparkILoop(in0: Option[BufferedReader], val out: PrintWriter, val master:
         val implicitMsg    = if (imps.isEmpty) "" else imps.size + " are implicit"
         val foundMsg       = if (found.isEmpty) "" else found.mkString(" // imports: ", ", ", "")
         val statsMsg       = List(typeMsg, termMsg, implicitMsg) filterNot (_ == "") mkString ("(", ", ", ")")
-        
+
         intp.reporter.printMessage("%2d) %-30s %s%s".format(
           idx + 1,
           handler.importString,
@@ -342,12 +339,11 @@ class SparkILoop(in0: Option[BufferedReader], val out: PrintWriter, val master:
         ))
     }
   }
-  
-  private def implicitsCommand(line: String): Result = {
-    val intp = SparkILoop.this.intp
+
+  private def implicitsCommand(line: String): Result = onIntp { intp =>
     import intp._
-    import global.Symbol
-    
+    import global._
+
     def p(x: Any) = intp.reporter.printMessage("" + x)
 
     // If an argument is given, only show a source with that
@@ -360,17 +356,17 @@ class SparkILoop(in0: Option[BufferedReader], val out: PrintWriter, val master:
           else (args exists (source.name.toString contains _))
         }
     }
-    
+
     if (filtered.isEmpty)
       return "No implicits have been imported other than those in Predef."
-      
+
     filtered foreach {
       case (source, syms) =>
         p("/* " + syms.size + " implicit members imported from " + source.fullName + " */")
-        
+
         // This groups the members by where the symbol is defined
         val byOwner = syms groupBy (_.owner)
-        val sortedOwners = byOwner.toList sortBy { case (owner, _) => intp.afterTyper(source.info.baseClasses indexOf owner) }
+        val sortedOwners = byOwner.toList sortBy { case (owner, _) => afterTyper(source.info.baseClasses indexOf owner) }
 
         sortedOwners foreach {
           case (owner, members) =>
@@ -388,10 +384,10 @@ class SparkILoop(in0: Option[BufferedReader], val out: PrintWriter, val master:
 
               xss map (xs => xs sortBy (_.name.toString))
             }
-          
-            val ownerMessage = if (owner == source) " defined in " else " inherited from "            
+
+            val ownerMessage = if (owner == source) " defined in " else " inherited from "
             p("  /* " + members.size + ownerMessage + owner.fullName + " */")
-            
+
             memberGroups foreach { group =>
               group foreach (s => p("  " + intp.symbolDefString(s)))
               p("")
@@ -400,158 +396,182 @@ class SparkILoop(in0: Option[BufferedReader], val out: PrintWriter, val master:
         p("")
     }
   }
-  
-  protected def newJavap() = new Javap(intp.classLoader, new SparkIMain.ReplStrippingWriter(intp)) {
+
+  private def findToolsJar() = {
+    val jdkPath = Directory(jdkHome)
+    val jar     = jdkPath / "lib" / "tools.jar" toFile;
+
+    if (jar isFile)
+      Some(jar)
+    else if (jdkPath.isDirectory)
+      jdkPath.deepFiles find (_.name == "tools.jar")
+    else None
+  }
+  private def addToolsJarToLoader() = {
+    val cl = findToolsJar match {
+      case Some(tools) => ScalaClassLoader.fromURLs(Seq(tools.toURL), intp.classLoader)
+      case _           => intp.classLoader
+    }
+    if (Javap.isAvailable(cl)) {
+      logDebug(":javap available.")
+      cl
+    }
+    else {
+      logDebug(":javap unavailable: no tools.jar at " + jdkHome)
+      intp.classLoader
+    }
+  }
+
+  protected def newJavap() = new JavapClass(addToolsJarToLoader(), new SparkIMain.ReplStrippingWriter(intp)) {
     override def tryClass(path: String): Array[Byte] = {
-      // Look for Foo first, then Foo$, but if Foo$ is given explicitly,
-      // we have to drop the $ to find object Foo, then tack it back onto
-      // the end of the flattened name.
-      def className  = intp flatName path
-      def moduleName = (intp flatName path.stripSuffix("$")) + "$"
+      val hd :: rest = path split '.' toList;
+      // If there are dots in the name, the first segment is the
+      // key to finding it.
+      if (rest.nonEmpty) {
+        intp optFlatName hd match {
+          case Some(flat) =>
+            val clazz = flat :: rest mkString NAME_JOIN_STRING
+            val bytes = super.tryClass(clazz)
+            if (bytes.nonEmpty) bytes
+            else super.tryClass(clazz + MODULE_SUFFIX_STRING)
+          case _          => super.tryClass(path)
+        }
+      }
+      else {
+        // Look for Foo first, then Foo$, but if Foo$ is given explicitly,
+        // we have to drop the $ to find object Foo, then tack it back onto
+        // the end of the flattened name.
+        def className  = intp flatName path
+        def moduleName = (intp flatName path.stripSuffix(MODULE_SUFFIX_STRING)) + MODULE_SUFFIX_STRING
 
-      val bytes = super.tryClass(className)
-      if (bytes.nonEmpty) bytes
-      else super.tryClass(moduleName)
+        val bytes = super.tryClass(className)
+        if (bytes.nonEmpty) bytes
+        else super.tryClass(moduleName)
+      }
     }
   }
+  // private lazy val javap = substituteAndLog[Javap]("javap", NoJavap)(newJavap())
   private lazy val javap =
     try newJavap()
     catch { case _: Exception => null }
-  
-  private def typeCommand(line: String): Result = {
-    intp.typeOfExpression(line) match {
-      case Some(tp) => tp.toString
-      case _        => "Failed to determine type."
+
+  // Still todo: modules.
+  private def typeCommand(line0: String): Result = {
+    line0.trim match {
+      case ""                      => ":type [-v] <expression>"
+      case s if s startsWith "-v " => typeCommandInternal(s stripPrefix "-v " trim, true)
+      case s                       => typeCommandInternal(s, false)
     }
   }
-  
+
+  private def warningsCommand(): Result = {
+    if (intp.lastWarnings.isEmpty)
+      "Can't find any cached warnings."
+    else
+      intp.lastWarnings foreach { case (pos, msg) => intp.reporter.warning(pos, msg) }
+  }
+
   private def javapCommand(line: String): Result = {
     if (javap == null)
-      return ":javap unavailable on this platform."
-    if (line == "")
-      return ":javap [-lcsvp] [path1 path2 ...]"
-    
-    javap(words(line)) foreach { res =>
-      if (res.isError) return "Failed: " + res.value
-      else res.show()
-    }
-  }
-  private def keybindingsCommand(): Result = {
-    if (in.keyBindings.isEmpty) "Key bindings unavailable."
-    else {
-      echo("Reading jline properties for default key bindings.")
-      echo("Accuracy not guaranteed: treat this as a guideline only.\n")
-      in.keyBindings foreach (x => echo ("" + x))
-    }
+      ":javap unavailable, no tools.jar at %s.  Set JDK_HOME.".format(jdkHome)
+    else if (javaVersion startsWith "1.7")
+      ":javap not yet working with java 1.7"
+    else if (line == "")
+      ":javap [-lcsvp] [path1 path2 ...]"
+    else
+      javap(words(line)) foreach { res =>
+        if (res.isError) return "Failed: " + res.value
+        else res.show()
+      }
   }
+
   private def wrapCommand(line: String): Result = {
     def failMsg = "Argument to :wrap must be the name of a method with signature [T](=> T): T"
-    val intp = SparkILoop.this.intp
-    val g: intp.global.type = intp.global
-    import g._
-
-    words(line) match {
-      case Nil            =>
-        intp.executionWrapper match {
-          case ""   => "No execution wrapper is set."
-          case s    => "Current execution wrapper: " + s
-        }
-      case "clear" :: Nil =>
-        intp.executionWrapper match {
-          case ""   => "No execution wrapper is set."
-          case s    => intp.clearExecutionWrapper() ; "Cleared execution wrapper."
-        }
-      case wrapper :: Nil =>
-        intp.typeOfExpression(wrapper) match {
-          case Some(PolyType(List(targ), MethodType(List(arg), restpe))) =>
-            intp setExecutionWrapper intp.pathToTerm(wrapper)
-            "Set wrapper to '" + wrapper + "'"
-          case Some(x) =>
-            failMsg + "\nFound: " + x
-          case _ =>
-            failMsg + "\nFound: <unknown>"
-        }
-      case _ => failMsg
-    }
-  }
+    onIntp { intp =>
+      import intp._
+      import global._
 
-  private def pathToPhaseWrapper = intp.pathToTerm("$r") + ".phased.atCurrent"
-  /*
-  private def phaseCommand(name: String): Result = {
-    // This line crashes us in TreeGen:
-    //
-    //   if (intp.power.phased set name) "..."
-    //
-    // Exception in thread "main" java.lang.AssertionError: assertion failed: ._7.type
-    //  at scala.Predef$.assert(Predef.scala:99)
-    //  at scala.tools.nsc.ast.TreeGen.mkAttributedQualifier(TreeGen.scala:69)
-    //  at scala.tools.nsc.ast.TreeGen.mkAttributedQualifier(TreeGen.scala:44)
-    //  at scala.tools.nsc.ast.TreeGen.mkAttributedRef(TreeGen.scala:101)
-    //  at scala.tools.nsc.ast.TreeGen.mkAttributedStableRef(TreeGen.scala:143)
-    //
-    // But it works like so, type annotated.
-    val phased: Phased = power.phased
-    import phased.NoPhaseName
-
-    if (name == "clear") {
-      phased.set(NoPhaseName)
-      intp.clearExecutionWrapper()
-      "Cleared active phase."
-    }
-    else if (name == "") phased.get match {
-      case NoPhaseName => "Usage: :phase <expr> (e.g. typer, erasure.next, erasure+3)"
-      case ph          => "Active phase is '%s'.  (To clear, :phase clear)".format(phased.get)
-    }
-    else {
-      val what = phased.parse(name)
-      if (what.isEmpty || !phased.set(what)) 
-        "'" + name + "' does not appear to represent a valid phase."
-      else {
-        intp.setExecutionWrapper(pathToPhaseWrapper)
-        val activeMessage =
-          if (what.toString.length == name.length) "" + what
-          else "%s (%s)".format(what, name)
-        
-        "Active phase is now: " + activeMessage
+      words(line) match {
+        case Nil            =>
+          intp.executionWrapper match {
+            case ""   => "No execution wrapper is set."
+            case s    => "Current execution wrapper: " + s
+          }
+        case "clear" :: Nil =>
+          intp.executionWrapper match {
+            case ""   => "No execution wrapper is set."
+            case s    => intp.clearExecutionWrapper() ; "Cleared execution wrapper."
+          }
+        case wrapper :: Nil =>
+          intp.typeOfExpression(wrapper) match {
+            case PolyType(List(targ), MethodType(List(arg), restpe)) =>
+              intp setExecutionWrapper intp.pathToTerm(wrapper)
+              "Set wrapper to '" + wrapper + "'"
+            case tp =>
+              failMsg + "\nFound: <unknown>"
+          }
+        case _ => failMsg
       }
     }
   }
-  */
-  
+
+  private def pathToPhaseWrapper = intp.pathToTerm("$r") + ".phased.atCurrent"
+  // private def phaseCommand(name: String): Result = {
+  //   val phased: Phased = power.phased
+  //   import phased.NoPhaseName
+
+  //   if (name == "clear") {
+  //     phased.set(NoPhaseName)
+  //     intp.clearExecutionWrapper()
+  //     "Cleared active phase."
+  //   }
+  //   else if (name == "") phased.get match {
+  //     case NoPhaseName => "Usage: :phase <expr> (e.g. typer, erasure.next, erasure+3)"
+  //     case ph          => "Active phase is '%s'.  (To clear, :phase clear)".format(phased.get)
+  //   }
+  //   else {
+  //     val what = phased.parse(name)
+  //     if (what.isEmpty || !phased.set(what))
+  //       "'" + name + "' does not appear to represent a valid phase."
+  //     else {
+  //       intp.setExecutionWrapper(pathToPhaseWrapper)
+  //       val activeMessage =
+  //         if (what.toString.length == name.length) "" + what
+  //         else "%s (%s)".format(what, name)
+
+  //       "Active phase is now: " + activeMessage
+  //     }
+  //   }
+  // }
+
   /** Available commands */
-  def commands: List[LoopCommand] = standardCommands /* ++ (
+  def commands: List[LoopCommand] = standardCommands /*++ (
     if (isReplPower) powerCommands else Nil
   )*/
-  
+
   val replayQuestionMessage =
-    """|The repl compiler has crashed spectacularly. Shall I replay your
-       |session? I can re-run all lines except the last one.
+    """|That entry seems to have slain the compiler.  Shall I replay
+       |your session? I can re-run each line except the last one.
        |[y/n]
     """.trim.stripMargin
 
-  private val crashRecovery: PartialFunction[Throwable, Unit] = {
+  private val crashRecovery: PartialFunction[Throwable, Boolean] = {
     case ex: Throwable =>
-      if (settings.YrichExes.value) {
-        val sources = implicitly[Sources]
-        echo("\n" + ex.getMessage)
-        echo(
-          if (isReplDebug) "[searching " + sources.path + " for exception contexts...]"
-          else "[searching for exception contexts...]"
-        )
-        echo(Exceptional(ex).force().context())
-      }
-      else {
-        echo(util.stackTraceString(ex))
-      }
+      echo(intp.global.throwableAsString(ex))
+
       ex match {
         case _: NoSuchMethodError | _: NoClassDefFoundError =>
-          echo("Unrecoverable error.")
+          echo("\nUnrecoverable error.")
           throw ex
         case _  =>
-          def fn(): Boolean = in.readYesOrNo(replayQuestionMessage, { echo("\nYou must enter y or n.") ; fn() })
+          def fn(): Boolean =
+            try in.readYesOrNo(replayQuestionMessage, { echo("\nYou must enter y or n.") ; fn() })
+            catch { case _: RuntimeException => false }
+
           if (fn()) replay()
           else echo("\nAbandoning crashed session.")
       }
+      true
   }
 
   /** The main read-eval-print loop for the repl.  It calls
@@ -564,66 +584,89 @@ class SparkILoop(in0: Option[BufferedReader], val out: PrintWriter, val master:
       in readLine prompt
     }
     // return false if repl should exit
-    def processLine(line: String): Boolean =
+    def processLine(line: String): Boolean = {
+      if (isAsync) {
+        if (!awaitInitialized()) return false
+        runThunks()
+      }
       if (line eq null) false               // assume null means EOF
       else command(line) match {
         case Result(false, _)           => false
         case Result(_, Some(finalLine)) => addReplay(finalLine) ; true
         case _                          => true
       }
-
-    while (true) {
-      try if (!processLine(readOneLine)) return
-      catch crashRecovery
     }
+    def innerLoop() {
+      if ( try processLine(readOneLine()) catch crashRecovery )
+        innerLoop()
+    }
+    innerLoop()
   }
 
   /** interpret all lines from a specified file */
-  def interpretAllFrom(file: File) {    
-    val oldIn = in
-    val oldReplay = replayCommandStack
-    
-    try file applyReader { reader =>
-      in = SimpleReader(reader, out, false)
-      echo("Loading " + file + "...")
-      loop()
-    }
-    finally {
-      in = oldIn
-      replayCommandStack = oldReplay
+  def interpretAllFrom(file: File) {
+    savingReader {
+      savingReplayStack {
+        file applyReader { reader =>
+          in = SimpleReader(reader, out, false)
+          echo("Loading " + file + "...")
+          loop()
+        }
+      }
     }
   }
 
-  /** create a new interpreter and replay all commands so far */
+  /** create a new interpreter and replay the given commands */
   def replay() {
-    closeInterpreter()
-    createInterpreter()
-    for (cmd <- replayCommands) {
+    reset()
+    if (replayCommandStack.isEmpty)
+      echo("Nothing to replay.")
+    else for (cmd <- replayCommands) {
       echo("Replaying: " + cmd)  // flush because maybe cmd will have its own output
       command(cmd)
       echo("")
     }
   }
-  
+  def resetCommand() {
+    echo("Resetting repl state.")
+    if (replayCommandStack.nonEmpty) {
+      echo("Forgetting this session history:\n")
+      replayCommands foreach echo
+      echo("")
+      replayCommandStack = Nil
+    }
+    if (intp.namedDefinedTerms.nonEmpty)
+      echo("Forgetting all expression results and named terms: " + intp.namedDefinedTerms.mkString(", "))
+    if (intp.definedTypes.nonEmpty)
+      echo("Forgetting defined types: " + intp.definedTypes.mkString(", "))
+
+    reset()
+  }
+
+  def reset() {
+    intp.reset()
+    // unleashAndSetPhase()
+  }
+
   /** fork a shell and run a command */
   lazy val shCommand = new LoopCommand("sh", "run a shell command (result is implicitly => List[String])") {
     override def usage = "<command line>"
     def apply(line: String): Result = line match {
       case ""   => showUsage()
-      case _    => 
+      case _    =>
         val toRun = classOf[ProcessResult].getName + "(" + string2codeQuoted(line) + ")"
         intp interpret toRun
         ()
     }
   }
-  
+
   def withFile(filename: String)(action: File => Unit) {
     val f = File(filename)
-    
+
     if (f.exists) action(f)
     else echo("That file does not exist")
   }
-  
+
   def loadCommand(arg: String) = {
     var shouldReplay: Option[String] = None
     withFile(arg)(f => {
@@ -657,23 +700,36 @@ class SparkILoop(in0: Option[BufferedReader], val out: PrintWriter, val master:
     }
     else echo("The path '" + f + "' doesn't seem to exist.")
   }
-  
+
   def powerCmd(): Result = {
     if (isReplPower) "Already in power mode."
-    else enablePowerMode()
+    else enablePowerMode(false)
   }
-  def enablePowerMode() = {
-    //replProps.power setValue true
-    //power.unleash()
-    //echo(power.banner)
+
+  def enablePowerMode(isDuringInit: Boolean) = {
+    // replProps.power setValue true
+    // unleashAndSetPhase()
+    // asyncEcho(isDuringInit, power.banner)
+  }
+  // private def unleashAndSetPhase() {
+//     if (isReplPower) {
+// //      power.unleash()
+//       // Set the phase to "typer"
+//       intp beSilentDuring phaseCommand("typer")
+//     }
+//   }
+
+  def asyncEcho(async: Boolean, msg: => String) {
+    if (async) asyncMessage(msg)
+    else echo(msg)
   }
-  
+
   def verbosity() = {
-    val old = intp.printResults
-    intp.printResults = !old
-    echo("Switched " + (if (old) "off" else "on") + " result printing.")
+    // val old = intp.printResults
+    // intp.printResults = !old
+    // echo("Switched " + (if (old) "off" else "on") + " result printing.")
   }
-  
+
   /** Run one command submitted by the user.  Two values are returned:
     * (1) whether to keep running, (2) the line to record for replay,
     * if any. */
@@ -688,11 +744,11 @@ class SparkILoop(in0: Option[BufferedReader], val out: PrintWriter, val master:
     else if (intp.global == null) Result(false, None)  // Notice failure to create compiler
     else Result(true, interpretStartingWith(line))
   }
-  
+
   private def readWhile(cond: String => Boolean) = {
     Iterator continually in.readLine("") takeWhile (x => x != null && cond(x))
   }
-  
+
   def pasteCommand(): Result = {
     echo("// Entering paste mode (ctrl-D to finish)\n")
     val code = readWhile(_ => true) mkString "\n"
@@ -700,23 +756,19 @@ class SparkILoop(in0: Option[BufferedReader], val out: PrintWriter, val master:
     intp interpret code
     ()
   }
-    
+
   private object paste extends Pasted {
     val ContinueString = "     | "
     val PromptString   = "scala> "
-    
+
     def interpret(line: String): Unit = {
       echo(line.trim)
       intp interpret line
       echo("")
     }
-    
+
     def transcript(start: String) = {
-      // Printing this message doesn't work very well because it's buried in the
-      // transcript they just pasted.  Todo: a short timer goes off when
-      // lines stop coming which tells them to hit ctrl-D.
-      //
-      // echo("// Detected repl transcript paste: ctrl-D to finish.")
+      echo("\n// Detected repl transcript paste: ctrl-D to finish.\n")
       apply(Iterator(start) ++ readWhile(_.trim != PromptString.trim))
     }
   }
@@ -731,7 +783,7 @@ class SparkILoop(in0: Option[BufferedReader], val out: PrintWriter, val master:
   def interpretStartingWith(code: String): Option[String] = {
     // signal completion non-completion input has been received
     in.completion.resetVerbosity()
-    
+
     def reallyInterpret = {
       val reallyResult = intp.interpret(code)
       (reallyResult, reallyResult match {
@@ -741,7 +793,7 @@ class SparkILoop(in0: Option[BufferedReader], val out: PrintWriter, val master:
           if (in.interactive && code.endsWith("\n\n")) {
             echo("You typed two blank lines.  Starting a new command.")
             None
-          } 
+          }
           else in.readLine(ContinueString) match {
             case null =>
               // we know compilation is going to fail since we're at EOF and the
@@ -755,10 +807,10 @@ class SparkILoop(in0: Option[BufferedReader], val out: PrintWriter, val master:
           }
       })
     }
-    
+
     /** Here we place ourselves between the user and the interpreter and examine
      *  the input they are ostensibly submitting.  We intervene in several cases:
-     * 
+     *
      *  1) If the line starts with "scala> " it is assumed to be an interpreter paste.
      *  2) If the line starts with "." (but not ".." or "./") it is treated as an invocation
      *     on the previous result.
@@ -773,33 +825,17 @@ class SparkILoop(in0: Option[BufferedReader], val out: PrintWriter, val master:
     else if (Completion.looksLikeInvocation(code) && intp.mostRecentVar != "") {
       interpretStartingWith(intp.mostRecentVar + code)
     }
-    else {
-      def runCompletion = in.completion execute code map (intp bindValue _)
-      /** Due to my accidentally letting file completion execution sneak ahead
-       *  of actual parsing this now operates in such a way that the scala
-       *  interpretation always wins.  However to avoid losing useful file
-       *  completion I let it fail and then check the others.  So if you
-       *  type /tmp it will echo a failure and then give you a Directory object.
-       *  It's not pretty: maybe I'll implement the silence bits I need to avoid
-       *  echoing the failure.
-       */
-      if (intp isParseable code) {
-        val (code, result) = reallyInterpret
-        //if (power != null && code == IR.Error)
-        //  runCompletion
-        
-        result
-      }
-      else runCompletion match {
-        case Some(_)  => None // completion hit: avoid the latent error
-        case _        => reallyInterpret._2  // trigger the latent error
-      }
+    else if (code.trim startsWith "//") {
+      // line comment, do nothing
+      None
     }
+    else
+      reallyInterpret._2
   }
 
   // runs :load `file` on any files passed via -i
   def loadFiles(settings: Settings) = settings match {
-    case settings: GenericRunnerSettings =>
+    case settings: SparkRunnerSettings =>
       for (filename <- settings.loadfiles.value) {
         val cmd = ":load " + filename
         command(cmd)
@@ -808,7 +844,7 @@ class SparkILoop(in0: Option[BufferedReader], val out: PrintWriter, val master:
       }
     case _ =>
   }
-  
+
   /** Tries to create a JLineReader, falling back to SimpleReader:
    *  unless settings or properties are such that it should start
    *  with SimpleReader.
@@ -816,7 +852,7 @@ class SparkILoop(in0: Option[BufferedReader], val out: PrintWriter, val master:
   def chooseReader(settings: Settings): InteractiveReader = {
     if (settings.Xnojline.value || Properties.isEmacsShell)
       SimpleReader()
-    else try SparkJLineReader(
+    else try new SparkJLineReader(
       if (settings.noCompletion.value) NoCompletion
       else new SparkJLineCompletion(intp)
     )
@@ -827,107 +863,113 @@ class SparkILoop(in0: Option[BufferedReader], val out: PrintWriter, val master:
     }
   }
 
-  def initializeSpark() {
-    intp.beQuietDuring {
-      command("""
-        org.apache.spark.repl.Main.interp.out.println("Creating SparkContext...");
-        org.apache.spark.repl.Main.interp.out.flush();
-        @transient val sc = org.apache.spark.repl.Main.interp.createSparkContext();
-        org.apache.spark.repl.Main.interp.out.println("Spark context available as sc.");
-        org.apache.spark.repl.Main.interp.out.flush();
-        """)
-      command("import org.apache.spark.SparkContext._")
-    }
-    echo("Type in expressions to have them evaluated.")
-    echo("Type :help for more information.")
-  }
-
-  var sparkContext: SparkContext = null
-
-  def createSparkContext(): SparkContext = {
-    val uri = System.getenv("SPARK_EXECUTOR_URI")
-    if (uri != null) {
-      System.setProperty("spark.executor.uri", uri)
-    }
-    val master = this.master match {
-      case Some(m) => m
-      case None => {
-        val prop = System.getenv("MASTER")
-        if (prop != null) prop else "local"
-      }
-    }
-    val jars = Option(System.getenv("ADD_JARS")).map(_.split(','))
-                                                .getOrElse(new Array[String](0))
-                                                .map(new java.io.File(_).getAbsolutePath)
-    sparkContext = new SparkContext(master, "Spark shell", System.getenv("SPARK_HOME"), jars)
-    sparkContext
-  }
-
-  def process(settings: Settings): Boolean = {
-    // Ensure logging is initialized before any Spark threads try to use logs
-    // (because SLF4J initialization is not thread safe)
-    initLogging()
-
-    printWelcome()
-    echo("Initializing interpreter...")
-
-    // Add JARS specified in Spark's ADD_JARS variable to classpath
-    val jars = Option(System.getenv("ADD_JARS")).map(_.split(',')).getOrElse(new Array[String](0))
-    jars.foreach(settings.classpath.append(_))
+  val u: scala.reflect.runtime.universe.type = scala.reflect.runtime.universe
+  val m = u.runtimeMirror(getClass.getClassLoader)
+  private def tagOfStaticClass[T: ClassTag]: u.TypeTag[T] =
+    u.TypeTag[T](
+      m,
+      new TypeCreator {
+        def apply[U <: ApiUniverse with Singleton](m: Mirror[U]): U # Type =
+          m.staticClass(classTag[T].runtimeClass.getName).toTypeConstructor.asInstanceOf[U # Type]
+      })
 
+  def process(settings: Settings): Boolean = savingContextLoader {
     this.settings = settings
     createInterpreter()
-    
+
     // sets in to some kind of reader depending on environmental cues
     in = in0 match {
       case Some(reader) => SimpleReader(reader, out, true)
-      case None         => chooseReader(settings)
+      case None         =>
+        // some post-initialization
+        chooseReader(settings) match {
+          case x: SparkJLineReader => addThunk(x.consoleReader.postInit) ; x
+          case x                   => x
+        }
     }
+    lazy val tagOfSparkIMain = tagOfStaticClass[org.apache.spark.repl.SparkIMain]
+    // Bind intp somewhere out of the regular namespace where
+    // we can get at it in generated code.
+    addThunk(intp.quietBind(NamedParam[SparkIMain]("$intp", intp)(tagOfSparkIMain, classTag[SparkIMain])))
+    addThunk({
+      import scala.tools.nsc.io._
+      import Properties.userHome
+      import scala.compat.Platform.EOL
+      val autorun = replProps.replAutorunCode.option flatMap (f => io.File(f).safeSlurp())
+      if (autorun.isDefined) intp.quietRun(autorun.get)
+    })
+
+    addThunk(printWelcome())
+    addThunk(initializeSpark())
 
-    loadFiles(settings)
     // it is broken on startup; go ahead and exit
     if (intp.reporter.hasErrors)
       return false
-    
-    try {      
-      // this is about the illusion of snappiness.  We call initialize()
-      // which spins off a separate thread, then print the prompt and try 
-      // our best to look ready.  Ideally the user will spend a
-      // couple seconds saying "wow, it starts so fast!" and by the time
-      // they type a command the compiler is ready to roll.
-      intp.initialize()
-      initializeSpark()
-      if (isReplPower) {
-        echo("Starting in power mode, one moment...\n")
-        enablePowerMode()
-      }
-      loop()
+
+    // This is about the illusion of snappiness.  We call initialize()
+    // which spins off a separate thread, then print the prompt and try
+    // our best to look ready.  The interlocking lazy vals tend to
+    // inter-deadlock, so we break the cycle with a single asynchronous
+    // message to an actor.
+    if (isAsync) {
+      intp initialize initializedCallback()
+      createAsyncListener() // listens for signal to run postInitialization
     }
+    else {
+      intp.initializeSynchronous()
+      postInitialization()
+    }
+    // printWelcome()
+
+    loadFiles(settings)
+
+    try loop()
+    catch AbstractOrMissingHandler()
     finally closeInterpreter()
+
     true
   }
 
+  def createSparkContext(): SparkContext = {
+    val execUri = System.getenv("SPARK_EXECUTOR_URI")
+    val master = this.master match {
+      case Some(m) => m
+      case None => {
+        val prop = System.getenv("MASTER")
+        if (prop != null) prop else "local"
+      }
+    }
+    val jars = SparkILoop.getAddedJars.map(new java.io.File(_).getAbsolutePath)
+    val conf = new SparkConf()
+      .setMaster(master)
+      .setAppName("Spark shell")
+      .setJars(jars)
+      .set("spark.repl.class.uri", intp.classServer.uri)
+    if (execUri != null) {
+      conf.set("spark.executor.uri", execUri)
+    }
+    if (System.getenv("SPARK_HOME") != null) {
+      conf.setSparkHome(System.getenv("SPARK_HOME"))
+    }
+    sparkContext = new SparkContext(conf)
+    echo("Created spark context..")
+    sparkContext
+  }
+
   /** process command-line arguments and do as they request */
   def process(args: Array[String]): Boolean = {
-    val command = new CommandLine(args.toList, msg => echo("scala: " + msg))
+    val command = new SparkCommandLine(args.toList, msg => echo(msg))
     def neededHelp(): String =
       (if (command.settings.help.value) command.usageMsg + "\n" else "") +
       (if (command.settings.Xhelp.value) command.xusageMsg + "\n" else "")
-    
+
     // if they asked for no help and command is valid, we call the real main
     neededHelp() match {
       case ""     => command.ok && process(command.settings)
       case help   => echoNoNL(help) ; true
     }
   }
-  
-  @deprecated("Use `process` instead", "2.9.0")
-  def main(args: Array[String]): Unit = {
-    if (isReplDebug)
-      System.out.println(new java.util.Date)
-    
-    process(args)
-  }
+
   @deprecated("Use `process` instead", "2.9.0")
   def main(settings: Settings): Unit = process(settings)
 }
@@ -936,15 +978,17 @@ object SparkILoop {
   implicit def loopToInterpreter(repl: SparkILoop): SparkIMain = repl.intp
   private def echo(msg: String) = Console println msg
 
+  def getAddedJars: Array[String] = Option(System.getenv("ADD_JARS")).map(_.split(',')).getOrElse(new Array[String](0))
+
   // Designed primarily for use by test code: take a String with a
   // bunch of code, and prints out a transcript of what it would look
   // like if you'd just typed it into the repl.
   def runForTranscript(code: String, settings: Settings): String = {
     import java.io.{ BufferedReader, StringReader, OutputStreamWriter }
-    
+
     stringFromStream { ostream =>
       Console.withOut(ostream) {
-        val output = new PrintWriter(new OutputStreamWriter(ostream), true) {
+        val output = new JPrintWriter(new OutputStreamWriter(ostream), true) {
           override def write(str: String) = {
             // completely skip continuation lines
             if (str forall (ch => ch.isWhitespace || ch == '|')) ()
@@ -963,26 +1007,29 @@ object SparkILoop {
           }
         }
         val repl = new SparkILoop(input, output)
+
         if (settings.classpath.isDefault)
           settings.classpath.value = sys.props("java.class.path")
 
+        getAddedJars.foreach(settings.classpath.append(_))
+
         repl process settings
       }
     }
   }
-  
+
   /** Creates an interpreter loop with default settings and feeds
    *  the given code to it as input.
    */
   def run(code: String, sets: Settings = new Settings): String = {
     import java.io.{ BufferedReader, StringReader, OutputStreamWriter }
-    
+
     stringFromStream { ostream =>
       Console.withOut(ostream) {
         val input    = new BufferedReader(new StringReader(code))
-        val output   = new PrintWriter(new OutputStreamWriter(ostream), true)
-        val repl     = new SparkILoop(input, output)
-        
+        val output   = new JPrintWriter(new OutputStreamWriter(ostream), true)
+        val repl     = new ILoop(input, output)
+
         if (sets.classpath.isDefault)
           sets.classpath.value = sys.props("java.class.path")
 
@@ -991,32 +1038,4 @@ object SparkILoop {
     }
   }
   def run(lines: List[String]): String = run(lines map (_ + "\n") mkString)
-
-  // provide the enclosing type T
-  // in order to set up the interpreter's classpath and parent class loader properly
-  def breakIf[T: Manifest](assertion: => Boolean, args: NamedParam*): Unit =
-    if (assertion) break[T](args.toList)
-
-  // start a repl, binding supplied args
-  def break[T: Manifest](args: List[NamedParam]): Unit = {
-    val msg = if (args.isEmpty) "" else "  Binding " + args.size + " value%s.".format(
-      if (args.size == 1) "" else "s"
-    )
-    echo("Debug repl starting." + msg)
-    val repl = new SparkILoop {
-      override def prompt = "\ndebug> "
-    }
-    repl.settings = new Settings(echo)
-    repl.settings.embeddedDefaults[T]
-    repl.createInterpreter()
-    repl.in = SparkJLineReader(repl)
-    
-    // rebind exit so people don't accidentally call sys.exit by way of predef
-    repl.quietRun("""def exit = println("Type :quit to resume program execution.")""")
-    args foreach (p => repl.bind(p.name, p.tpe, p.value))
-    repl.loop()
-
-    echo("\nDebug repl exiting.")
-    repl.closeInterpreter()
-  }  
 }
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkILoopInit.scala b/repl/src/main/scala/org/apache/spark/repl/SparkILoopInit.scala
new file mode 100644
index 0000000000000..ab5e283d65f07
--- /dev/null
+++ b/repl/src/main/scala/org/apache/spark/repl/SparkILoopInit.scala
@@ -0,0 +1,143 @@
+/* NSC -- new Scala compiler
+ * Copyright 2005-2013 LAMP/EPFL
+ * @author Paul Phillips
+ */
+
+package org.apache.spark.repl
+
+import scala.tools.nsc._
+import scala.tools.nsc.interpreter._
+
+import scala.reflect.internal.util.Position
+import scala.util.control.Exception.ignoring
+import scala.tools.nsc.util.stackTraceString
+
+/**
+ *  Machinery for the asynchronous initialization of the repl.
+ */
+trait SparkILoopInit {
+  self: SparkILoop =>
+
+  /** Print a welcome message */
+  def printWelcome() {
+    echo("""Welcome to
+      ____              __
+     / __/__  ___ _____/ /__
+    _\ \/ _ \/ _ `/ __/  '_/
+   /___/ .__/\_,_/_/ /_/\_\   version 0.9.0
+      /_/
+""")
+    import Properties._
+    val welcomeMsg = "Using Scala %s (%s, Java %s)".format(
+      versionString, javaVmName, javaVersion)
+    echo(welcomeMsg)
+    echo("Type in expressions to have them evaluated.")
+    echo("Type :help for more information.")
+   }
+
+  protected def asyncMessage(msg: String) {
+    if (isReplInfo || isReplPower)
+      echoAndRefresh(msg)
+  }
+
+  private val initLock = new java.util.concurrent.locks.ReentrantLock()
+  private val initCompilerCondition = initLock.newCondition() // signal the compiler is initialized
+  private val initLoopCondition = initLock.newCondition()     // signal the whole repl is initialized
+  private val initStart = System.nanoTime
+
+  private def withLock[T](body: => T): T = {
+    initLock.lock()
+    try body
+    finally initLock.unlock()
+  }
+  // a condition used to ensure serial access to the compiler.
+  @volatile private var initIsComplete = false
+  @volatile private var initError: String = null
+  private def elapsed() = "%.3f".format((System.nanoTime - initStart).toDouble / 1000000000L)
+
+  // the method to be called when the interpreter is initialized.
+  // Very important this method does nothing synchronous (i.e. do
+  // not try to use the interpreter) because until it returns, the
+  // repl's lazy val `global` is still locked.
+  protected def initializedCallback() = withLock(initCompilerCondition.signal())
+
+  // Spins off a thread which awaits a single message once the interpreter
+  // has been initialized.
+  protected def createAsyncListener() = {
+    io.spawn {
+      withLock(initCompilerCondition.await())
+      asyncMessage("[info] compiler init time: " + elapsed() + " s.")
+      postInitialization()
+    }
+  }
+
+  // called from main repl loop
+  protected def awaitInitialized(): Boolean = {
+    if (!initIsComplete)
+      withLock { while (!initIsComplete) initLoopCondition.await() }
+    if (initError != null) {
+      println("""
+        |Failed to initialize the REPL due to an unexpected error.
+        |This is a bug, please, report it along with the error diagnostics printed below.
+        |%s.""".stripMargin.format(initError)
+      )
+      false
+    } else true
+  }
+  // private def warningsThunks = List(
+  //   () => intp.bind("lastWarnings", "" + typeTag[List[(Position, String)]], intp.lastWarnings _),
+  // )
+
+  protected def postInitThunks = List[Option[() => Unit]](
+    Some(intp.setContextClassLoader _),
+    if (isReplPower) Some(() => enablePowerMode(true)) else None
+  ).flatten
+  // ++ (
+  //   warningsThunks
+  // )
+  // called once after init condition is signalled
+  protected def postInitialization() {
+    try {
+      postInitThunks foreach (f => addThunk(f()))
+      runThunks()
+    } catch {
+      case ex: Throwable =>
+        initError = stackTraceString(ex)
+        throw ex
+    } finally {
+      initIsComplete = true
+
+      if (isAsync) {
+        asyncMessage("[info] total init time: " + elapsed() + " s.")
+        withLock(initLoopCondition.signal())
+      }
+    }
+  }
+
+ def initializeSpark() {
+    intp.beQuietDuring {
+      command("""
+         @transient val sc = org.apache.spark.repl.Main.interp.createSparkContext();
+        """)
+      command("import org.apache.spark.SparkContext._")
+    }
+   echo("Spark context available as sc.")
+  }
+
+  // code to be executed only after the interpreter is initialized
+  // and the lazy val `global` can be accessed without risk of deadlock.
+  private var pendingThunks: List[() => Unit] = Nil
+  protected def addThunk(body: => Unit) = synchronized {
+    pendingThunks :+= (() => body)
+  }
+  protected def runThunks(): Unit = synchronized {
+    if (pendingThunks.nonEmpty)
+      logDebug("Clearing " + pendingThunks.size + " thunks.")
+
+    while (pendingThunks.nonEmpty) {
+      val thunk = pendingThunks.head
+      pendingThunks = pendingThunks.tail
+      thunk()
+    }
+  }
+}
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkIMain.scala b/repl/src/main/scala/org/apache/spark/repl/SparkIMain.scala
index 870e12de341dd..59fdb0b37a766 100644
--- a/repl/src/main/scala/org/apache/spark/repl/SparkIMain.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/SparkIMain.scala
@@ -1,5 +1,5 @@
 /* NSC -- new Scala compiler
- * Copyright 2005-2011 LAMP/EPFL
+ * Copyright 2005-2013 LAMP/EPFL
  * @author  Martin Odersky
  */
 
@@ -9,304 +9,330 @@ import scala.tools.nsc._
 import scala.tools.nsc.interpreter._
 
 import Predef.{ println => _, _ }
-import java.io.{ PrintWriter }
-import java.lang.reflect
+import util.stringFromWriter
+import scala.reflect.internal.util._
 import java.net.URL
-import util.{ Set => _, _ }
-import io.{ AbstractFile, PlainFile, VirtualDirectory }
-import reporters.{ ConsoleReporter, Reporter }
-import symtab.{ Flags, Names }
-import scala.tools.nsc.interpreter.{ Results => IR }
+import scala.sys.BooleanProp
+import io.{AbstractFile, PlainFile, VirtualDirectory}
+
+import reporters._
+import symtab.Flags
+import scala.reflect.internal.Names
 import scala.tools.util.PathResolver
-import scala.tools.nsc.util.{ ScalaClassLoader, Exceptional }
+import scala.tools.nsc.util.ScalaClassLoader
 import ScalaClassLoader.URLClassLoader
-import Exceptional.unwrap
+import scala.tools.nsc.util.Exceptional.unwrap
 import scala.collection.{ mutable, immutable }
-import scala.PartialFunction.{ cond, condOpt }
 import scala.util.control.Exception.{ ultimately }
-import scala.reflect.NameTransformer
 import SparkIMain._
-
-import org.apache.spark.HttpServer
+import java.util.concurrent.Future
+import typechecker.Analyzer
+import scala.language.implicitConversions
+import scala.reflect.runtime.{ universe => ru }
+import scala.reflect.{ ClassTag, classTag }
+import scala.tools.reflect.StdRuntimeTags._
+import scala.util.control.ControlThrowable
+import util.stackTraceString
+
+import org.apache.spark.{HttpServer, SparkConf, Logging}
 import org.apache.spark.util.Utils
-import org.apache.spark.SparkEnv
-
-/** An interpreter for Scala code.
- *  
- *  The main public entry points are compile(), interpret(), and bind().
- *  The compile() method loads a complete Scala file.  The interpret() method
- *  executes one line of Scala code at the request of the user.  The bind()
- *  method binds an object to a variable that can then be used by later
- *  interpreted code.
- *  
- *  The overall approach is based on compiling the requested code and then
- *  using a Java classloader and Java reflection to run the code
- *  and access its results.
- *  
- *  In more detail, a single compiler instance is used
- *  to accumulate all successfully compiled or interpreted Scala code.  To
- *  "interpret" a line of code, the compiler generates a fresh object that
- *  includes the line of code and which has public member(s) to export
- *  all variables defined by that code.  To extract the result of an
- *  interpreted line to show the user, a second "result object" is created
- *  which imports the variables exported by the above object and then
- *  exports a single member named "$export".  To accomodate user expressions
- *  that read from variables or methods defined in previous statements, "import"
- *  statements are used.
- *  
- *  This interpreter shares the strengths and weaknesses of using the
- *  full compiler-to-Java.  The main strength is that interpreted code
- *  behaves exactly as does compiled code, including running at full speed.
- *  The main weakness is that redefining classes and methods is not handled
- *  properly, because rebinding at the Java level is technically difficult.
- *
- *  @author Moez A. Abdel-Gawad
- *  @author Lex Spoon
- */
-class SparkIMain(val settings: Settings, protected val out: PrintWriter) extends SparkImports {
-  imain =>
-  
-  /** construct an interpreter that reports to Console */
-  def this(settings: Settings) = this(settings, new NewLinePrintWriter(new ConsoleWriter, true))
-  def this() = this(new Settings())
-
-  /** whether to print out result lines */
-  var printResults: Boolean = true
-  
-  /** whether to print errors */
-  var totalSilence: Boolean = false
-
-  private val RESULT_OBJECT_PREFIX = "RequestResult$"
-  
-  lazy val formatting: Formatting = new Formatting {
-    val prompt = Properties.shellPromptString
-  }
-  import formatting._
-  
-  val SPARK_DEBUG_REPL: Boolean = (System.getenv("SPARK_DEBUG_REPL") == "1")
-
-  /** Local directory to save .class files too */
-  val outputDir = {
-    val tmp = System.getProperty("java.io.tmpdir")
-    val rootDir = System.getProperty("spark.repl.classdir", tmp)
-    Utils.createTempDir(rootDir)
-  }
-  if (SPARK_DEBUG_REPL) {
-    echo("Output directory: " + outputDir)
-  }
 
-  /** Scala compiler virtual directory for outputDir */
-  val virtualDirectory = new PlainFile(outputDir)
+// /** directory to save .class files to */
+// private class ReplVirtualDirectory(out: JPrintWriter) extends VirtualDirectory("((memory))", None) {
+//   private def pp(root: AbstractFile, indentLevel: Int) {
+//     val spaces = "    " * indentLevel
+//     out.println(spaces + root.name)
+//     if (root.isDirectory)
+//       root.toList sortBy (_.name) foreach (x => pp(x, indentLevel + 1))
+//   }
+//   // print the contents hierarchically
+//   def show() = pp(this, 0)
+// }
+
+  /** An interpreter for Scala code.
+   *
+   *  The main public entry points are compile(), interpret(), and bind().
+   *  The compile() method loads a complete Scala file.  The interpret() method
+   *  executes one line of Scala code at the request of the user.  The bind()
+   *  method binds an object to a variable that can then be used by later
+   *  interpreted code.
+   *
+   *  The overall approach is based on compiling the requested code and then
+   *  using a Java classloader and Java reflection to run the code
+   *  and access its results.
+   *
+   *  In more detail, a single compiler instance is used
+   *  to accumulate all successfully compiled or interpreted Scala code.  To
+   *  "interpret" a line of code, the compiler generates a fresh object that
+   *  includes the line of code and which has public member(s) to export
+   *  all variables defined by that code.  To extract the result of an
+   *  interpreted line to show the user, a second "result object" is created
+   *  which imports the variables exported by the above object and then
+   *  exports members called "$eval" and "$print". To accomodate user expressions
+   *  that read from variables or methods defined in previous statements, "import"
+   *  statements are used.
+   *
+   *  This interpreter shares the strengths and weaknesses of using the
+   *  full compiler-to-Java.  The main strength is that interpreted code
+   *  behaves exactly as does compiled code, including running at full speed.
+   *  The main weakness is that redefining classes and methods is not handled
+   *  properly, because rebinding at the Java level is technically difficult.
+   *
+   *  @author Moez A. Abdel-Gawad
+   *  @author Lex Spoon
+   */
+  class SparkIMain(initialSettings: Settings, val out: JPrintWriter) extends SparkImports with Logging {
+    imain =>
 
-  /** Jetty server that will serve our classes to worker nodes */
-  val classServer = new HttpServer(outputDir)
+      val SPARK_DEBUG_REPL: Boolean = (System.getenv("SPARK_DEBUG_REPL") == "1")
 
-  // Start the classServer and store its URI in a spark system property
-  // (which will be passed to executors so that they can connect to it)
-  classServer.start()
-  System.setProperty("spark.repl.class.uri", classServer.uri)
-  if (SPARK_DEBUG_REPL) {
-    echo("Class server started, URI = " + classServer.uri)
-  }
+      /** Local directory to save .class files too */
+      val outputDir = {
+        val tmp = System.getProperty("java.io.tmpdir")
+        val rootDir = new SparkConf().get("spark.repl.classdir",  tmp)
+        Utils.createTempDir(rootDir)
+      }
+      if (SPARK_DEBUG_REPL) {
+        echo("Output directory: " + outputDir)
+      }
 
-  /*
-  // directory to save .class files to
-  val virtualDirectory = new VirtualDirectory("(memory)", None) {
-    private def pp(root: io.AbstractFile, indentLevel: Int) {
-      val spaces = "    " * indentLevel
-      out.println(spaces + root.name)
-      if (root.isDirectory)
-        root.toList sortBy (_.name) foreach (x => pp(x, indentLevel + 1))
-    }
-    // print the contents hierarchically
-    def show() = pp(this, 0)
-  }
-  */
-
-  /** reporter */
-  lazy val reporter: ConsoleReporter = new SparkIMain.ReplReporter(this)
-  import reporter.{ printMessage, withoutTruncating }
-  
-  // not sure if we have some motivation to print directly to console
-  private def echo(msg: String) { Console println msg }
-  
-  // protected def defaultImports: List[String] = List("_root_.scala.sys.exit")
-
-  /** We're going to go to some trouble to initialize the compiler asynchronously.
-   *  It's critical that nothing call into it until it's been initialized or we will
-   *  run into unrecoverable issues, but the perceived repl startup time goes
-   *  through the roof if we wait for it.  So we initialize it with a future and
-   *  use a lazy val to ensure that any attempt to use the compiler object waits
-   *  on the future.
-   */
-  private val _compiler: Global = newCompiler(settings, reporter)
-  private var _initializeComplete = false
-  def isInitializeComplete = _initializeComplete
-
-  private def _initialize(): Boolean = {
-    val source = """
-      |class $repl_$init {
-      |  List(1) map (_ + 1)
-      |}
-      |""".stripMargin
-    
-    val result = try {
-      new _compiler.Run() compileSources List(new BatchSourceFile("<init>", source))
-      if (isReplDebug || settings.debug.value) {
-        // Can't use printMessage here, it deadlocks
-        Console.println("Repl compiler initialized.")
+    val virtualDirectory                              = new PlainFile(outputDir) // "directory" for classfiles
+    val classServer                                   = new HttpServer(outputDir)     /** Jetty server that will serve our classes to worker nodes */
+    private var currentSettings: Settings             = initialSettings
+    var printResults                                  = true      // whether to print result lines
+    var totalSilence                                  = false     // whether to print anything
+    private var _initializeComplete                   = false     // compiler is initialized
+    private var _isInitialized: Future[Boolean]       = null      // set up initialization future
+    private var bindExceptions                        = true      // whether to bind the lastException variable
+    private var _executionWrapper                     = ""        // code to be wrapped around all lines
+
+
+        // Start the classServer and store its URI in a spark system property
+    // (which will be passed to executors so that they can connect to it)
+      classServer.start()
+      if (SPARK_DEBUG_REPL) {
+        echo("Class server started, URI = " + classServer.uri)
       }
-      // addImports(defaultImports: _*)
-      true
-    } 
-    catch {
-      case x: AbstractMethodError =>
-        printMessage("""
-          |Failed to initialize compiler: abstract method error.
-          |This is most often remedied by a full clean and recompile.
-          |""".stripMargin
-        )
-        x.printStackTrace()
-        false
-      case x: MissingRequirementError => printMessage("""
-        |Failed to initialize compiler: %s not found.
-        |** Note that as of 2.8 scala does not assume use of the java classpath.
-        |** For the old behavior pass -usejavacp to scala, or if using a Settings
-        |** object programatically, settings.usejavacp.value = true.""".stripMargin.format(x.req)
+
+    /** We're going to go to some trouble to initialize the compiler asynchronously.
+     *  It's critical that nothing call into it until it's been initialized or we will
+     *  run into unrecoverable issues, but the perceived repl startup time goes
+     *  through the roof if we wait for it.  So we initialize it with a future and
+     *  use a lazy val to ensure that any attempt to use the compiler object waits
+     *  on the future.
+     */
+    private var _classLoader: AbstractFileClassLoader = null                              // active classloader
+    private val _compiler: Global                     = newCompiler(settings, reporter)   // our private compiler
+
+    private val nextReqId = {
+      var counter = 0
+      () => { counter += 1 ; counter }
+    }
+
+    def compilerClasspath: Seq[URL] = (
+      if (isInitializeComplete) global.classPath.asURLs
+      else new PathResolver(settings).result.asURLs  // the compiler's classpath
       )
-      false
+      def settings = currentSettings
+    def mostRecentLine = prevRequestList match {
+      case Nil      => ""
+      case req :: _ => req.originalLine
+    }
+    // Run the code body with the given boolean settings flipped to true.
+    def withoutWarnings[T](body: => T): T = beQuietDuring {
+      val saved = settings.nowarn.value
+      if (!saved)
+        settings.nowarn.value = true
+
+      try body
+      finally if (!saved) settings.nowarn.value = false
     }
-    
-    try result
-    finally _initializeComplete = result
-  }
-  
-  // set up initialization future
-  private var _isInitialized: () => Boolean = null
-  def initialize() = synchronized { 
-    if (_isInitialized == null)
-      _isInitialized = scala.concurrent.ops future _initialize()
-  }
 
-  /** the public, go through the future compiler */
-  lazy val global: Global = {
-    initialize()
+    /** construct an interpreter that reports to Console */
+    def this(settings: Settings) = this(settings, new NewLinePrintWriter(new ConsoleWriter, true))
+    def this() = this(new Settings())
 
-    // blocks until it is ; false means catastrophic failure
-    if (_isInitialized()) _compiler
-    else null
-  }
-  @deprecated("Use `global` for access to the compiler instance.", "2.9.0")
-  lazy val compiler: global.type = global
-
-  import global._
-  
-  object naming extends {
-    val global: imain.global.type = imain.global
-  } with Naming {
-    // make sure we don't overwrite their unwisely named res3 etc.
-    override def freshUserVarName(): String = {
-      val name = super.freshUserVarName()
-      if (definedNameMap contains name) freshUserVarName()
-      else name
+    lazy val repllog: Logger = new Logger {
+      val out: JPrintWriter = imain.out
+      val isInfo: Boolean  = BooleanProp keyExists "scala.repl.info"
+      val isDebug: Boolean = BooleanProp keyExists "scala.repl.debug"
+      val isTrace: Boolean = BooleanProp keyExists "scala.repl.trace"
     }
-  }
-  import naming._
-
-  // object dossiers extends {
-  //   val intp: imain.type = imain
-  // } with Dossiers { }
-  // import dossiers._
-  
-  lazy val memberHandlers = new {
-    val intp: imain.type = imain
-  } with SparkMemberHandlers
-  import memberHandlers._
-  
-  def atPickler[T](op: => T): T = atPhase(currentRun.picklerPhase)(op)
-  def afterTyper[T](op: => T): T = atPhase(currentRun.typerPhase.next)(op)
-
-  /** Temporarily be quiet */
-  def beQuietDuring[T](operation: => T): T = {    
-    val wasPrinting = printResults    
-    ultimately(printResults = wasPrinting) {
-      if (isReplDebug) echo(">> beQuietDuring")
-      else printResults = false
-      
-      operation
+    lazy val formatting: Formatting = new Formatting {
+      val prompt = Properties.shellPromptString
     }
-  }
-  def beSilentDuring[T](operation: => T): T = {
-    val saved = totalSilence
-    totalSilence = true
-    try operation
-    finally totalSilence = saved
-  }
-  
-  def quietRun[T](code: String) = beQuietDuring(interpret(code))
-
-  /** whether to bind the lastException variable */
-  private var bindLastException = true
-  
-  /** A string representing code to be wrapped around all lines. */
-  private var _executionWrapper: String = ""
-  def executionWrapper = _executionWrapper
-  def setExecutionWrapper(code: String) = _executionWrapper = code
-  def clearExecutionWrapper() = _executionWrapper = ""
-  
-  /** Temporarily stop binding lastException */
-  def withoutBindingLastException[T](operation: => T): T = {
-    val wasBinding = bindLastException
-    ultimately(bindLastException = wasBinding) {
-      bindLastException = false
-      operation
+    lazy val reporter: ConsoleReporter = new SparkIMain.ReplReporter(this)
+
+    import formatting._
+    import reporter.{ printMessage, withoutTruncating }
+
+    // This exists mostly because using the reporter too early leads to deadlock.
+    private def echo(msg: String) { Console println msg }
+    private def _initSources = List(new BatchSourceFile("<init>", "class $repl_$init { }"))
+    private def _initialize() = {
+      try {
+        // todo. if this crashes, REPL will hang
+        new _compiler.Run() compileSources _initSources
+        _initializeComplete = true
+        true
+      }
+      catch AbstractOrMissingHandler()
     }
-  }
-  
-  protected def createLineManager(): Line.Manager = new Line.Manager
-  lazy val lineManager = createLineManager()
-
-  /** interpreter settings */
-  lazy val isettings = new SparkISettings(this)
-
-  /** Instantiate a compiler.  Subclasses can override this to
-   *  change the compiler class used by this interpreter. */
-  protected def newCompiler(settings: Settings, reporter: Reporter) = {
-    settings.outputDirs setSingleOutput virtualDirectory
-    settings.exposeEmptyPackage.value = true
-    new Global(settings, reporter)
-  }
-  
-  /** the compiler's classpath, as URL's */
-  lazy val compilerClasspath: List[URL] = new PathResolver(settings) asURLs
+    private def tquoted(s: String) = "\"\"\"" + s + "\"\"\""
+
+    // argument is a thunk to execute after init is done
+    def initialize(postInitSignal: => Unit) {
+      synchronized {
+        if (_isInitialized == null) {
+          _isInitialized = io.spawn {
+            try _initialize()
+            finally postInitSignal
+          }
+        }
+      }
+    }
+    def initializeSynchronous(): Unit = {
+      if (!isInitializeComplete) {
+        _initialize()
+        assert(global != null, global)
+      }
+    }
+    def isInitializeComplete = _initializeComplete
+
+    /** the public, go through the future compiler */
+    lazy val global: Global = {
+      if (isInitializeComplete) _compiler
+      else {
+        // If init hasn't been called yet you're on your own.
+        if (_isInitialized == null) {
+          logWarning("Warning: compiler accessed before init set up.  Assuming no postInit code.")
+          initialize(())
+        }
+        //       // blocks until it is ; false means catastrophic failure
+        if (_isInitialized.get()) _compiler
+        else null
+      }
+    }
+    @deprecated("Use `global` for access to the compiler instance.", "2.9.0")
+    lazy val compiler: global.type = global
+
+    import global._
+    import definitions.{ScalaPackage, JavaLangPackage, termMember, typeMember}
+    import rootMirror.{RootClass, getClassIfDefined, getModuleIfDefined, getRequiredModule, getRequiredClass}
+
+    implicit class ReplTypeOps(tp: Type) {
+      def orElse(other: => Type): Type    = if (tp ne NoType) tp else other
+      def andAlso(fn: Type => Type): Type = if (tp eq NoType) tp else fn(tp)
+    }
+
+    // TODO: If we try to make naming a lazy val, we run into big time
+    // scalac unhappiness with what look like cycles.  It has not been easy to
+    // reduce, but name resolution clearly takes different paths.
+    object naming extends {
+      val global: imain.global.type = imain.global
+    } with Naming {
+      // make sure we don't overwrite their unwisely named res3 etc.
+      def freshUserTermName(): TermName = {
+        val name = newTermName(freshUserVarName())
+        if (definedNameMap contains name) freshUserTermName()
+        else name
+      }
+      def isUserTermName(name: Name) = isUserVarName("" + name)
+      def isInternalTermName(name: Name) = isInternalVarName("" + name)
+    }
+    import naming._
+
+    object deconstruct extends {
+      val global: imain.global.type = imain.global
+    } with StructuredTypeStrings
+
+    lazy val memberHandlers = new {
+      val intp: imain.type = imain
+    } with SparkMemberHandlers
+    import memberHandlers._
+
+    /** Temporarily be quiet */
+    def beQuietDuring[T](body: => T): T = {
+      val saved = printResults
+      printResults = false
+      try body
+      finally printResults = saved
+    }
+    def beSilentDuring[T](operation: => T): T = {
+      val saved = totalSilence
+      totalSilence = true
+      try operation
+      finally totalSilence = saved
+    }
+
+    def quietRun[T](code: String) = beQuietDuring(interpret(code))
+
+
+     private def logAndDiscard[T](label: String, alt: => T): PartialFunction[Throwable, T] = {
+      case t: ControlThrowable => throw t
+      case t: Throwable        =>
+        logDebug(label + ": " + unwrap(t))
+        logDebug(stackTraceString(unwrap(t)))
+      alt
+    }
+    /** takes AnyRef because it may be binding a Throwable or an Exceptional */
+
+    private def withLastExceptionLock[T](body: => T, alt: => T): T = {
+      assert(bindExceptions, "withLastExceptionLock called incorrectly.")
+      bindExceptions = false
+
+      try     beQuietDuring(body)
+      catch   logAndDiscard("withLastExceptionLock", alt)
+      finally bindExceptions = true
+    }
+
+    def executionWrapper = _executionWrapper
+    def setExecutionWrapper(code: String) = _executionWrapper = code
+    def clearExecutionWrapper() = _executionWrapper = ""
+
+    /** interpreter settings */
+    lazy val isettings = new SparkISettings(this)
+
+    /** Instantiate a compiler.  Overridable. */
+    protected def newCompiler(settings: Settings, reporter: Reporter): ReplGlobal = {
+      settings.outputDirs setSingleOutput virtualDirectory
+      settings.exposeEmptyPackage.value = true
+      new Global(settings, reporter) with ReplGlobal {
+        override def toString: String = "<global>"
+      }
+    }
+
+    /** Parent classloader.  Overridable. */
+    protected def parentClassLoader: ClassLoader =
+      SparkHelper.explicitParentLoader(settings).getOrElse( this.getClass.getClassLoader() )
 
-  /* A single class loader is used for all commands interpreted by this Interpreter.
+    /* A single class loader is used for all commands interpreted by this Interpreter.
      It would also be possible to create a new class loader for each command
      to interpret.  The advantages of the current approach are:
 
-       - Expressions are only evaluated one time.  This is especially
-         significant for I/O, e.g. "val x = Console.readLine"
-
-     The main disadvantage is:
-
-       - Objects, classes, and methods cannot be rebound.  Instead, definitions
-         shadow the old ones, and old code objects refer to the old
-         definitions.
-  */
-  private var _classLoader: AbstractFileClassLoader = null
-  def resetClassLoader() = _classLoader = makeClassLoader()
-  def classLoader: AbstractFileClassLoader = {
-    if (_classLoader == null)
-      resetClassLoader()
-    
-    _classLoader
-  }
-  private def makeClassLoader(): AbstractFileClassLoader = {
-    val parent =
-      if (parentClassLoader == null)  ScalaClassLoader fromURLs compilerClasspath
-      else                            new URLClassLoader(compilerClasspath, parentClassLoader)
+    - Expressions are only evaluated one time.  This is especially
+    significant for I/O, e.g. "val x = Console.readLine"
+
+    The main disadvantage is:
 
-    new AbstractFileClassLoader(virtualDirectory, parent) {
+    - Objects, classes, and methods cannot be rebound.  Instead, definitions
+    shadow the old ones, and old code objects refer to the old
+    definitions.
+    */
+    def resetClassLoader() = {
+      logDebug("Setting new classloader: was " + _classLoader)
+      _classLoader = null
+      ensureClassLoader()
+    }
+    final def ensureClassLoader() {
+      if (_classLoader == null)
+        _classLoader = makeClassLoader()
+    }
+    def classLoader: AbstractFileClassLoader = {
+      ensureClassLoader()
+      _classLoader
+    }
+    private class TranslatingClassLoader(parent: ClassLoader) extends AbstractFileClassLoader(virtualDirectory, parent) {
       /** Overridden here to try translating a simple name to the generated
        *  class name if the original attempt fails.  This method is used by
        *  getResourceAsStream as well as findClass.
@@ -314,223 +340,300 @@ class SparkIMain(val settings: Settings, protected val out: PrintWriter) extends
       override protected def findAbstractFile(name: String): AbstractFile = {
         super.findAbstractFile(name) match {
           // deadlocks on startup if we try to translate names too early
-          case null if isInitializeComplete => generatedName(name) map (x => super.findAbstractFile(x)) orNull
-          case file                         => file
+          case null if isInitializeComplete =>
+            generatedName(name) map (x => super.findAbstractFile(x)) orNull
+          case file                         =>
+            file
         }
       }
     }
-  }
-  private def loadByName(s: String): JClass =
-    (classLoader tryToInitializeClass s) getOrElse sys.error("Failed to load expected class: '" + s + "'")
-  
-  protected def parentClassLoader: ClassLoader =
-    SparkHelper.explicitParentLoader(settings).getOrElse( this.getClass.getClassLoader() )
-
-  def getInterpreterClassLoader() = classLoader
-
-  // Set the current Java "context" class loader to this interpreter's class loader
-  def setContextClassLoader() = classLoader.setAsContext()
-
-  /** Given a simple repl-defined name, returns the real name of
-   *  the class representing it, e.g. for "Bippy" it may return
-   *
-   *    $line19.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$Bippy
-   */
-  def generatedName(simpleName: String): Option[String] = {
-    if (simpleName endsWith "$") optFlatName(simpleName.init) map (_ + "$")
-    else optFlatName(simpleName)
-  }
-  def flatName(id: String)    = optFlatName(id) getOrElse id
-  def optFlatName(id: String) = requestForIdent(id) map (_ fullFlatName id)
-
-  def allDefinedNames = definedNameMap.keys.toList sortBy (_.toString)
-  def pathToType(id: String): String = pathToName(newTypeName(id))
-  def pathToTerm(id: String): String = pathToName(newTermName(id))
-  def pathToName(name: Name): String = {
-    if (definedNameMap contains name)
-      definedNameMap(name) fullPath name
-    else name.toString
-  }
+    private def makeClassLoader(): AbstractFileClassLoader =
+      new TranslatingClassLoader(parentClassLoader match {
+        case null   => ScalaClassLoader fromURLs compilerClasspath
+        case p      => new URLClassLoader(compilerClasspath, p)
+      })
+
+    def getInterpreterClassLoader() = classLoader
+
+    // Set the current Java "context" class loader to this interpreter's class loader
+    def setContextClassLoader() = classLoader.setAsContext()
+
+    /** Given a simple repl-defined name, returns the real name of
+     *  the class representing it, e.g. for "Bippy" it may return
+     *  {{{
+     *    $line19.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$Bippy
+     *  }}}
+     */
+    def generatedName(simpleName: String): Option[String] = {
+      if (simpleName endsWith nme.MODULE_SUFFIX_STRING) optFlatName(simpleName.init) map (_ + nme.MODULE_SUFFIX_STRING)
+      else optFlatName(simpleName)
+    }
+    def flatName(id: String)    = optFlatName(id) getOrElse id
+    def optFlatName(id: String) = requestForIdent(id) map (_ fullFlatName id)
+
+    def allDefinedNames = definedNameMap.keys.toList.sorted
+    def pathToType(id: String): String = pathToName(newTypeName(id))
+    def pathToTerm(id: String): String = pathToName(newTermName(id))
+    def pathToName(name: Name): String = {
+      if (definedNameMap contains name)
+        definedNameMap(name) fullPath name
+      else name.toString
+    }
 
-  /** Most recent tree handled which wasn't wholly synthetic. */
-  private def mostRecentlyHandledTree: Option[Tree] = {
-    prevRequests.reverse foreach { req =>
-      req.handlers.reverse foreach {
-        case x: MemberDefHandler if x.definesValue && !isInternalVarName(x.name.toString)  => return Some(x.member)
-        case _ => ()
+    /** Most recent tree handled which wasn't wholly synthetic. */
+    private def mostRecentlyHandledTree: Option[Tree] = {
+      prevRequests.reverse foreach { req =>
+        req.handlers.reverse foreach {
+          case x: MemberDefHandler if x.definesValue && !isInternalTermName(x.name) => return Some(x.member)
+          case _ => ()
+        }
       }
+      None
     }
-    None
-  }
-  
-  /** Stubs for work in progress. */
-  def handleTypeRedefinition(name: TypeName, old: Request, req: Request) = {
-    for (t1 <- old.simpleNameOfType(name) ; t2 <- req.simpleNameOfType(name)) {
-      DBG("Redefining type '%s'\n  %s -> %s".format(name, t1, t2))
+
+    /** Stubs for work in progress. */
+    def handleTypeRedefinition(name: TypeName, old: Request, req: Request) = {
+      for (t1 <- old.simpleNameOfType(name) ; t2 <- req.simpleNameOfType(name)) {
+        logDebug("Redefining type '%s'\n  %s -> %s".format(name, t1, t2))
+      }
     }
-  }
 
-  def handleTermRedefinition(name: TermName, old: Request, req: Request) = {
-    for (t1 <- old.compilerTypeOf get name ; t2 <- req.compilerTypeOf get name) {
-      // Printing the types here has a tendency to cause assertion errors, like
-      //   assertion failed: fatal: <refinement> has owner value x, but a class owner is required
-      // so DBG is by-name now to keep it in the family.  (It also traps the assertion error,
-      // but we don't want to unnecessarily risk hosing the compiler's internal state.)
-      DBG("Redefining term '%s'\n  %s -> %s".format(name, t1, t2))
+    def handleTermRedefinition(name: TermName, old: Request, req: Request) = {
+      for (t1 <- old.compilerTypeOf get name ; t2 <- req.compilerTypeOf get name) {
+    //    Printing the types here has a tendency to cause assertion errors, like
+        //   assertion failed: fatal: <refinement> has owner value x, but a class owner is required
+        // so DBG is by-name now to keep it in the family.  (It also traps the assertion error,
+        // but we don't want to unnecessarily risk hosing the compiler's internal state.)
+        logDebug("Redefining term '%s'\n  %s -> %s".format(name, t1, t2))
+      }
     }
-  }
-  def recordRequest(req: Request) {
-    if (req == null || referencedNameMap == null)
-      return
-
-    prevRequests += req
-    req.referencedNames foreach (x => referencedNameMap(x) = req)
-    
-    // warning about serially defining companions.  It'd be easy
-    // enough to just redefine them together but that may not always
-    // be what people want so I'm waiting until I can do it better.
-    if (!settings.nowarnings.value) {
+
+    def recordRequest(req: Request) {
+      if (req == null || referencedNameMap == null)
+        return
+
+      prevRequests += req
+      req.referencedNames foreach (x => referencedNameMap(x) = req)
+
+      // warning about serially defining companions.  It'd be easy
+      // enough to just redefine them together but that may not always
+      // be what people want so I'm waiting until I can do it better.
       for {
         name   <- req.definedNames filterNot (x => req.definedNames contains x.companionName)
         oldReq <- definedNameMap get name.companionName
         newSym <- req.definedSymbols get name
         oldSym <- oldReq.definedSymbols get name.companionName
+        if Seq(oldSym, newSym).permutations exists { case Seq(s1, s2) => s1.isClass && s2.isModule }
       } {
-        printMessage("warning: previously defined %s is not a companion to %s.".format(oldSym, newSym))
-        printMessage("Companions must be defined together; you may wish to use :paste mode for this.")
+        afterTyper(replwarn(s"warning: previously defined $oldSym is not a companion to $newSym."))
+        replwarn("Companions must be defined together; you may wish to use :paste mode for this.")
       }
-    }
-    
-    // Updating the defined name map
-    req.definedNames foreach { name =>
-      if (definedNameMap contains name) {
-        if (name.isTypeName) handleTypeRedefinition(name.toTypeName, definedNameMap(name), req)
-        else handleTermRedefinition(name.toTermName, definedNameMap(name), req)
+
+      // Updating the defined name map
+      req.definedNames foreach { name =>
+        if (definedNameMap contains name) {
+          if (name.isTypeName) handleTypeRedefinition(name.toTypeName, definedNameMap(name), req)
+          else handleTermRedefinition(name.toTermName, definedNameMap(name), req)
+        }
+         definedNameMap(name) = req
       }
-      definedNameMap(name) = req
     }
-  }
 
-  /** Parse a line into a sequence of trees. Returns None if the input is incomplete. */
-  def parse(line: String): Option[List[Tree]] = {
-    var justNeedsMore = false
-    reporter.withIncompleteHandler((pos,msg) => {justNeedsMore = true}) {
-      // simple parse: just parse it, nothing else
-      def simpleParse(code: String): List[Tree] = {
-        reporter.reset()
-        val unit = new CompilationUnit(new BatchSourceFile("<console>", code))
-        val scanner = new syntaxAnalyzer.UnitParser(unit)
-        
-        scanner.templateStatSeq(false)._2
-      }
-      val trees = simpleParse(line)
-      
-      if (reporter.hasErrors)   Some(Nil)  // the result did not parse, so stop
-      else if (justNeedsMore)   None
-      else                      Some(trees)
+    def replwarn(msg: => String) {
+      if (!settings.nowarnings.value)
+        printMessage(msg)
     }
-  }
-  
-  def isParseable(line: String): Boolean = {
-    beSilentDuring {
-      parse(line) match {
-        case Some(xs) => xs.nonEmpty  // parses as-is
-        case None     => true         // incomplete
+
+    def isParseable(line: String): Boolean = {
+      beSilentDuring {
+        try parse(line) match {
+          case Some(xs) => xs.nonEmpty  // parses as-is
+          case None     => true         // incomplete
+        }
+        catch { case x: Exception =>    // crashed the compiler
+          replwarn("Exception in isParseable(\"" + line + "\"): " + x)
+           false
+         }
       }
     }
+
+    def compileSourcesKeepingRun(sources: SourceFile*) = {
+      val run = new Run()
+      reporter.reset()
+      run compileSources sources.toList
+      (!reporter.hasErrors, run)
+    }
+
+    /** Compile an nsc SourceFile.  Returns true if there are
+     *  no compilation errors, or false otherwise.
+     */
+    def compileSources(sources: SourceFile*): Boolean =
+      compileSourcesKeepingRun(sources: _*)._1
+
+    /** Compile a string.  Returns true if there are no
+     *  compilation errors, or false otherwise.
+     */
+    def compileString(code: String): Boolean =
+      compileSources(new BatchSourceFile("<script>", code))
+
+    /** Build a request from the user. `trees` is `line` after being parsed.
+     */
+    private def buildRequest(line: String, trees: List[Tree]): Request = {
+      executingRequest = new Request(line, trees)
+      executingRequest
+    }
+
+  // rewriting "5 // foo" to "val x = { 5 // foo }" creates broken code because
+  // the close brace is commented out.  Strip single-line comments.
+  // ... but for error message output reasons this is not used, and rather than
+  // enclosing in braces it is constructed like "val x =\n5 // foo".
+  private def removeComments(line: String): String = {
+    showCodeIfDebugging(line) // as we're about to lose our // show
+    line.lines map (s => s indexOf "//" match {
+      case -1   => s
+      case idx  => s take idx
+    }) mkString "\n"
   }
 
-  /** Compile an nsc SourceFile.  Returns true if there are
-   *  no compilation errors, or false otherwise.
-   */
-  def compileSources(sources: SourceFile*): Boolean = {
-    reporter.reset()
-    new Run() compileSources sources.toList
-    !reporter.hasErrors
+  private def safePos(t: Tree, alt: Int): Int =
+    try t.pos.startOrPoint
+  catch { case _: UnsupportedOperationException => alt }
+
+  // Given an expression like 10 * 10 * 10 we receive the parent tree positioned
+  // at a '*'.  So look at each subtree and find the earliest of all positions.
+  private def earliestPosition(tree: Tree): Int = {
+    var pos = Int.MaxValue
+    tree foreach { t =>
+      pos = math.min(pos, safePos(t, Int.MaxValue))
+    }
+    pos
   }
 
-  /** Compile a string.  Returns true if there are no
-   *  compilation errors, or false otherwise.
-   */
-  def compileString(code: String): Boolean =
-    compileSources(new BatchSourceFile("<script>", code))
 
-  /** Build a request from the user. `trees` is `line` after being parsed.
-   */
-  private def buildRequest(line: String, trees: List[Tree]): Request = new Request(line, trees)
-  
   private def requestFromLine(line: String, synthetic: Boolean): Either[IR.Result, Request] = {
-    val trees = parse(indentCode(line)) match {
+    val content = indentCode(line)
+    val trees = parse(content) match {
       case None         => return Left(IR.Incomplete)
       case Some(Nil)    => return Left(IR.Error) // parse error or empty input
       case Some(trees)  => trees
     }
-    
-    // use synthetic vars to avoid filling up the resXX slots
-    def varName = if (synthetic) freshInternalVarName() else freshUserVarName()
-
-    // Treat a single bare expression specially. This is necessary due to it being hard to
-    // modify code at a textual level, and it being hard to submit an AST to the compiler.
-    if (trees.size == 1) trees.head match {
-      case _:Assign                         => // we don't want to include assignments
-      case _:TermTree | _:Ident | _:Select  => // ... but do want these as valdefs.
-        requestFromLine("val %s =\n%s".format(varName, line), synthetic) match {
+    logDebug(
+      trees map (t => {
+        // [Eugene to Paul] previously it just said `t map ...`
+        // because there was an implicit conversion from Tree to a list of Trees
+        // however Martin and I have removed the conversion
+        // (it was conflicting with the new reflection API),
+        // so I had to rewrite this a bit
+        val subs = t collect { case sub => sub }
+        subs map (t0 =>
+          "  " + safePos(t0, -1) + ": " + t0.shortClass + "\n"
+                ) mkString ""
+      }) mkString "\n"
+    )
+    // If the last tree is a bare expression, pinpoint where it begins using the
+    // AST node position and snap the line off there.  Rewrite the code embodied
+    // by the last tree as a ValDef instead, so we can access the value.
+    trees.last match {
+      case _:Assign                        => // we don't want to include assignments
+        case _:TermTree | _:Ident | _:Select => // ... but do want other unnamed terms.
+          val varName  = if (synthetic) freshInternalVarName() else freshUserVarName()
+      val rewrittenLine = (
+        // In theory this would come out the same without the 1-specific test, but
+        // it's a cushion against any more sneaky parse-tree position vs. code mismatches:
+        // this way such issues will only arise on multiple-statement repl input lines,
+        // which most people don't use.
+        if (trees.size == 1) "val " + varName + " =\n" + content
+        else {
+          // The position of the last tree
+          val lastpos0 = earliestPosition(trees.last)
+          // Oh boy, the parser throws away parens so "(2+2)" is mispositioned,
+          // with increasingly hard to decipher positions as we move on to "() => 5",
+          // (x: Int) => x + 1, and more.  So I abandon attempts to finesse and just
+          // look for semicolons and newlines, which I'm sure is also buggy.
+          val (raw1, raw2) = content splitAt lastpos0
+          logDebug("[raw] " + raw1 + "   <--->   " + raw2)
+
+          val adjustment = (raw1.reverse takeWhile (ch => (ch != ';') && (ch != '\n'))).size
+          val lastpos = lastpos0 - adjustment
+
+          // the source code split at the laboriously determined position.
+          val (l1, l2) = content splitAt lastpos
+          logDebug("[adj] " + l1 + "   <--->   " + l2)
+
+          val prefix   = if (l1.trim == "") "" else l1 + ";\n"
+          // Note to self: val source needs to have this precise structure so that
+          // error messages print the user-submitted part without the "val res0 = " part.
+          val combined   = prefix + "val " + varName + " =\n" + l2
+
+          logDebug(List(
+            "    line" -> line,
+            " content" -> content,
+            "     was" -> l2,
+            "combined" -> combined) map {
+              case (label, s) => label + ": '" + s + "'"
+            } mkString "\n"
+          )
+          combined
+        }
+      )
+        // Rewriting    "foo ; bar ; 123"
+        // to           "foo ; bar ; val resXX = 123"
+        requestFromLine(rewrittenLine, synthetic) match {
           case Right(req) => return Right(req withOriginalLine line)
           case x          => return x
         }
-      case _                                =>
+      case _ =>
     }
-        
-    // figure out what kind of request
     Right(buildRequest(line, trees))
   }
 
-  /** 
-   *    Interpret one line of input.  All feedback, including parse errors
-   *    and evaluation results, are printed via the supplied compiler's 
-   *    reporter.  Values defined are available for future interpreted
-   *    strings.
-   *  
-   *  
-   *    The return value is whether the line was interpreter successfully,
-   *    e.g. that there were no parse errors.
-   *  
+  // normalize non-public types so we don't see protected aliases like Self
+  def normalizeNonPublic(tp: Type) = tp match {
+    case TypeRef(_, sym, _) if sym.isAliasType && !sym.isPublic => tp.dealias
+    case _                                                      => tp
+  }
+
+  /**
+   *  Interpret one line of input. All feedback, including parse errors
+   *  and evaluation results, are printed via the supplied compiler's
+   *  reporter. Values defined are available for future interpreted strings.
    *
-   *  @param line ...
-   *  @return     ...
+   *  The return value is whether the line was interpreter successfully,
+   *  e.g. that there were no parse errors.
    */
   def interpret(line: String): IR.Result = interpret(line, false)
+  def interpretSynthetic(line: String): IR.Result = interpret(line, true)
   def interpret(line: String, synthetic: Boolean): IR.Result = {
     def loadAndRunReq(req: Request) = {
+      classLoader.setAsContext()
       val (result, succeeded) = req.loadAndRun
+
       /** To our displeasure, ConsoleReporter offers only printMessage,
        *  which tacks a newline on the end.  Since that breaks all the
        *  output checking, we have to take one off to balance.
        */
-      def show() = {
-        if (result == "") ()
-        else printMessage(result stripSuffix "\n")
-      }
-
       if (succeeded) {
-        if (printResults)
-          show()
+        if (printResults && result != "")
+          printMessage(result stripSuffix "\n")
+        else if (isReplDebug) // show quiet-mode activity
+          printMessage(result.trim.lines map ("[quiet] " + _) mkString "\n")
+
         // Book-keeping.  Have to record synthetic requests too,
         // as they may have been issued for information, e.g. :type
         recordRequest(req)
         IR.Success
       }
-      else {
-        // don't truncate stack traces
-        withoutTruncating(show())
-        IR.Error
-      }
+        else {
+          // don't truncate stack traces
+          withoutTruncating(printMessage(result))
+          IR.Error
+        }
     }
-    
+
     if (global == null) IR.Error
     else requestFromLine(line, synthetic) match {
       case Left(result) => result
-      case Right(req)   => 
+      case Right(req)   =>
         // null indicates a disallowed statement type; otherwise compile and
         // fail if false (implying e.g. a type error)
         if (req == null || !req.compile) IR.Error
@@ -546,23 +649,39 @@ class SparkIMain(val settings: Settings, protected val out: PrintWriter) extends
    *  @param value     the object value to bind to it
    *  @return          an indication of whether the binding succeeded
    */
-  def bind(name: String, boundType: String, value: Any): IR.Result = {
+  def bind(name: String, boundType: String, value: Any, modifiers: List[String] = Nil): IR.Result = {
     val bindRep = new ReadEvalPrint()
     val run = bindRep.compile("""
-        |object %s {
-        |  var value: %s = _
-        |  def set(x: Any) = value = x.asInstanceOf[%s]
-        |}
-      """.stripMargin.format(bindRep.evalName, boundType, boundType)
-      )
-    bindRep.callOpt("set", value) match {
-      case Some(_)  => interpret("val %s = %s.value".format(name, bindRep.evalPath))
-      case _        => DBG("Set failed in bind(%s, %s, %s)".format(name, boundType, value)) ; IR.Error
+                              |object %s {
+                                |  var value: %s = _
+                              |  def set(x: Any) = value = x.asInstanceOf[%s]
+                              |}
+                              """.stripMargin.format(bindRep.evalName, boundType, boundType)
+                            )
+    bindRep.callEither("set", value) match {
+      case Left(ex) =>
+        logDebug("Set failed in bind(%s, %s, %s)".format(name, boundType, value))
+        logDebug(util.stackTraceString(ex))
+        IR.Error
+
+      case Right(_) =>
+        val line = "%sval %s = %s.value".format(modifiers map (_ + " ") mkString, name, bindRep.evalPath)
+      logDebug("Interpreting: " + line)
+      interpret(line)
     }
   }
+  def directBind(name: String, boundType: String, value: Any): IR.Result = {
+    val result = bind(name, boundType, value)
+    if (result == IR.Success)
+      directlyBoundNames += newTermName(name)
+    result
+  }
+  def directBind(p: NamedParam): IR.Result                                    = directBind(p.name, p.tpe, p.value)
+  def directBind[T: ru.TypeTag : ClassTag](name: String, value: T): IR.Result = directBind((name, value))
+
   def rebind(p: NamedParam): IR.Result = {
     val name     = p.name
-    val oldType  = typeOfTerm(name) getOrElse { return IR.Error }
+    val oldType  = typeOfTerm(name) orElse { return IR.Error }
     val newType  = p.tpe
     val tempName = freshInternalVarName()
 
@@ -570,23 +689,27 @@ class SparkIMain(val settings: Settings, protected val out: PrintWriter) extends
     quietRun("val %s = %s.asInstanceOf[%s]".format(name, tempName, newType))
   }
   def quietImport(ids: String*): IR.Result = beQuietDuring(addImports(ids: _*))
-  def addImports(ids: String*): IR.Result = 
+  def addImports(ids: String*): IR.Result =
     if (ids.isEmpty) IR.Success
     else interpret("import " + ids.mkString(", "))
 
-  def quietBind(p: NamedParam): IR.Result                  = beQuietDuring(bind(p))
-  def bind(p: NamedParam): IR.Result                       = bind(p.name, p.tpe, p.value)
-  def bind[T: Manifest](name: String, value: T): IR.Result = bind((name, value))
-  def bindValue(x: Any): IR.Result                         = bind(freshUserVarName(), TypeStrings.fromValue(x), x)
+  def quietBind(p: NamedParam): IR.Result                               = beQuietDuring(bind(p))
+  def bind(p: NamedParam): IR.Result                                    = bind(p.name, p.tpe, p.value)
+  def bind[T: ru.TypeTag : ClassTag](name: String, value: T): IR.Result = bind((name, value))
+  def bindSyntheticValue(x: Any): IR.Result                             = bindValue(freshInternalVarName(), x)
+  def bindValue(x: Any): IR.Result                                      = bindValue(freshUserVarName(), x)
+  def bindValue(name: String, x: Any): IR.Result                        = bind(name, TypeStrings.fromValue(x), x)
 
   /** Reset this interpreter, forgetting all user-specified requests. */
   def reset() {
-    //virtualDirectory.clear()
-    virtualDirectory.delete()
-    virtualDirectory.create()
+    clearExecutionWrapper()
     resetClassLoader()
     resetAllCreators()
     prevRequests.clear()
+    referencedNameMap.clear()
+    definedNameMap.clear()
+    virtualDirectory.delete()
+    virtualDirectory.create()
   }
 
   /** This instance is no longer needed, so release any resources
@@ -596,9 +719,9 @@ class SparkIMain(val settings: Settings, protected val out: PrintWriter) extends
     reporter.flush()
     classServer.stop()
   }
-  
+
   /** Here is where we:
-   * 
+   *
    *  1) Read some source code, and put it in the "read" object.
    *  2) Evaluate the read object, and put the result in the "eval" object.
    *  3) Create a String for human consumption, and put it in the "print" object.
@@ -608,115 +731,172 @@ class SparkIMain(val settings: Settings, protected val out: PrintWriter) extends
   class ReadEvalPrint(lineId: Int) {
     def this() = this(freshLineId())
 
-    val packageName = "$line" + lineId
-    val readName    = "$read"
-    val evalName    = "$eval"
-    val printName   = "$print"
-    val valueMethod = "$result"   // no-args method giving result
-    
+    private var lastRun: Run = _
+    private var evalCaught: Option[Throwable] = None
+    private var conditionalWarnings: List[ConditionalWarning] = Nil
+
+    val packageName = sessionNames.line + lineId
+    val readName    = sessionNames.read
+    val evalName    = sessionNames.eval
+    val printName   = sessionNames.print
+    val resultName  = sessionNames.result
+
+    def bindError(t: Throwable) = {
+      if (!bindExceptions) // avoid looping if already binding
+        throw t
+
+      val unwrapped = unwrap(t)
+      withLastExceptionLock[String]({
+        directBind[Throwable]("lastException", unwrapped)(tagOfThrowable, classTag[Throwable])
+        util.stackTraceString(unwrapped)
+      }, util.stackTraceString(unwrapped))
+    }
+
     // TODO: split it out into a package object and a regular
     // object and we can do that much less wrapping.
     def packageDecl = "package " + packageName
-    
+
     def pathTo(name: String)   = packageName + "." + name
     def packaged(code: String) = packageDecl + "\n\n" + code
 
     def readPath  = pathTo(readName)
     def evalPath  = pathTo(evalName)
     def printPath = pathTo(printName)
-    
-    def call(name: String, args: Any*): AnyRef = 
-      evalMethod(name).invoke(evalClass, args.map(_.asInstanceOf[AnyRef]): _*)
-    
+
+    def call(name: String, args: Any*): AnyRef = {
+      val m = evalMethod(name)
+      logDebug("Invoking: " + m)
+      if (args.nonEmpty)
+        logDebug("  with args: " + args.mkString(", "))
+
+      m.invoke(evalClass, args.map(_.asInstanceOf[AnyRef]): _*)
+    }
+
+    def callEither(name: String, args: Any*): Either[Throwable, AnyRef] =
+      try Right(call(name, args: _*))
+    catch { case ex: Throwable => Left(ex) }
+
     def callOpt(name: String, args: Any*): Option[AnyRef] =
       try Some(call(name, args: _*))
-      catch { case ex: Exception =>
-        quietBind("lastException", ex)
-        None
-      }
-    
-    lazy val evalClass = loadByName(evalPath)
-    lazy val evalValue = callOpt(valueMethod)
+    catch { case ex: Throwable => bindError(ex) ; None }
 
-    def compile(source: String): Boolean = compileAndSaveRun("<console>", source)
-    def lineAfterTyper[T](op: => T): T = {
-      assert(lastRun != null, "Internal error: trying to use atPhase, but Run is null." + this)
-      atPhase(lastRun.typerPhase.next)(op)
+    class EvalException(msg: String, cause: Throwable) extends RuntimeException(msg, cause) { }
+
+    private def evalError(path: String, ex: Throwable) =
+      throw new EvalException("Failed to load '" + path + "': " + ex.getMessage, ex)
+
+    private def load(path: String): Class[_] = {
+      try Class.forName(path, true, classLoader)
+      catch { case ex: Throwable => evalError(path, unwrap(ex)) }
     }
-    
+
+    lazy val evalClass = load(evalPath)
+    lazy val evalValue = callEither(resultName) match {
+      case Left(ex)      => evalCaught = Some(ex) ; None
+      case Right(result) => Some(result)
+    }
+
+    def compile(source: String): Boolean = compileAndSaveRun("<console>", source)
+
     /** The innermost object inside the wrapper, found by
-      * following accessPath into the outer one.
-      */
+     * following accessPath into the outer one.
+     */
     def resolvePathToSymbol(accessPath: String): Symbol = {
-      //val readRoot  = definitions.getModule(readPath)   // the outermost wrapper
-      // MATEI: changed this to getClass because the root object is no longer a module (Scala singleton object)
-      val readRoot  = definitions.getClass(readPath)   // the outermost wrapper
-      (accessPath split '.').foldLeft(readRoot) { (sym, name) =>
-        if (name == "") sym else
-        lineAfterTyper(sym.info member newTermName(name))
+      // val readRoot  = getRequiredModule(readPath)   // the outermost wrapper
+      // MATEI: Changed this to getClass because the root object is no longer a module (Scala singleton object)
+
+      val readRoot  = rootMirror.getClassByName(newTypeName(readPath))   // the outermost wrapper
+      (accessPath split '.').foldLeft(readRoot: Symbol) {
+        case (sym, "")    => sym
+        case (sym, name)  => afterTyper(termMember(sym, name))
       }
     }
-    
-    // def compileAndTypeExpr(expr: String): Option[Typer] = {
-    //   class TyperRun extends Run {
-    //     override def stopPhase(name: String) = name == "superaccessors"
-    //   }
-    // }
-    private var lastRun: Run = _
-    private def evalMethod(name: String) = {
-      val methods = evalClass.getMethods filter (_.getName == name)
-      assert(methods.size == 1, "Internal error - eval object method " + name + " is overloaded: " + methods)
-      methods.head
+    /** We get a bunch of repeated warnings for reasons I haven't
+     *  entirely figured out yet.  For now, squash.
+     */
+    private def updateRecentWarnings(run: Run) {
+      def loop(xs: List[(Position, String)]): List[(Position, String)] = xs match {
+        case Nil                  => Nil
+        case ((pos, msg)) :: rest =>
+          val filtered = rest filter { case (pos0, msg0) =>
+            (msg != msg0) || (pos.lineContent.trim != pos0.lineContent.trim) || {
+              // same messages and same line content after whitespace removal
+              // but we want to let through multiple warnings on the same line
+              // from the same run.  The untrimmed line will be the same since
+              // there's no whitespace indenting blowing it.
+              (pos.lineContent == pos0.lineContent)
+            }
+                                    }
+        ((pos, msg)) :: loop(filtered)
+      }
+     //PRASHANT: This leads to a NoSuchMethodError for _.warnings. Yet to figure out its purpose.
+      // val warnings = loop(run.allConditionalWarnings flatMap (_.warnings))
+      // if (warnings.nonEmpty)
+      //   mostRecentWarnings = warnings
+    }
+    private def evalMethod(name: String) = evalClass.getMethods filter (_.getName == name) match {
+      case Array(method) => method
+      case xs            => sys.error("Internal error: eval object " + evalClass + ", " + xs.mkString("\n", "\n", ""))
     }
     private def compileAndSaveRun(label: String, code: String) = {
       showCodeIfDebugging(code)
-      reporter.reset()
-      lastRun = new Run()
-      lastRun.compileSources(List(new BatchSourceFile(label, packaged(code))))
-      !reporter.hasErrors
+      val (success, run) = compileSourcesKeepingRun(new BatchSourceFile(label, packaged(code)))
+      updateRecentWarnings(run)
+      lastRun = run
+      success
     }
   }
 
   /** One line of code submitted by the user for interpretation */
-  // private 
+  // private
   class Request(val line: String, val trees: List[Tree]) {
-    val lineRep     = new ReadEvalPrint()
-    import lineRep.lineAfterTyper
-    
+    val reqId = nextReqId()
+    val lineRep = new ReadEvalPrint()
+
     private var _originalLine: String = null
     def withOriginalLine(s: String): this.type = { _originalLine = s ; this }
     def originalLine = if (_originalLine == null) line else _originalLine
 
     /** handlers for each tree in this request */
     val handlers: List[MemberHandler] = trees map (memberHandlers chooseHandler _)
+    def defHandlers = handlers collect { case x: MemberDefHandler => x }
 
     /** all (public) names defined by these statements */
     val definedNames = handlers flatMap (_.definedNames)
 
     /** list of names used by this expression */
     val referencedNames: List[Name] = handlers flatMap (_.referencedNames)
-    
+
     /** def and val names */
     def termNames = handlers flatMap (_.definesTerm)
     def typeNames = handlers flatMap (_.definesType)
+    def definedOrImported = handlers flatMap (_.definedOrImported)
+    def definedSymbolList = defHandlers flatMap (_.definedSymbols)
+
+    def definedTypeSymbol(name: String) = definedSymbols(newTypeName(name))
+    def definedTermSymbol(name: String) = definedSymbols(newTermName(name))
 
     /** Code to import bound names from previous lines - accessPath is code to
-      * append to objectName to access anything bound by request.
-      */
-    val ComputedImports(importsPreamble, importsTrailer, accessPath) =
+     * append to objectName to access anything bound by request.
+     */
+    val SparkComputedImports(importsPreamble, importsTrailer, accessPath) =
       importsCode(referencedNames.toSet)
 
     /** Code to access a variable with the specified name */
-    def fullPath(vname: String) = (
-      //lineRep.readPath + accessPath + ".`%s`".format(vname)
+    def fullPath(vname: String) = {
+      // lineRep.readPath + accessPath + ".`%s`".format(vname)
       lineRep.readPath + ".INSTANCE" + accessPath + ".`%s`".format(vname)
-    )
-    /** Same as fullpath, but after it has been flattened, so:
-     *  $line5.$iw.$iw.$iw.Bippy      // fullPath
-     *  $line5.$iw$$iw$$iw$Bippy      // fullFlatName
-     */
-    def fullFlatName(name: String) =
-      lineRep.readPath + accessPath.replace('.', '$') + "$" + name
+    }
+      /** Same as fullpath, but after it has been flattened, so:
+       *  $line5.$iw.$iw.$iw.Bippy      // fullPath
+       *  $line5.$iw$$iw$$iw$Bippy      // fullFlatName
+       */
+      def fullFlatName(name: String) =
+        // lineRep.readPath + accessPath.replace('.', '$') + nme.NAME_JOIN_STRING + name
+        lineRep.readPath + ".INSTANCE" + accessPath.replace('.', '$') + nme.NAME_JOIN_STRING + name
+
+    /** The unmangled symbol name, but supplemented with line info. */
+    def disambiguated(name: Name): String = name + " (in " + lineRep + ")"
 
     /** Code to access a variable with the specified name */
     def fullPath(vname: Name): String = fullPath(vname.toString)
@@ -726,52 +906,66 @@ class SparkIMain(val settings: Settings, protected val out: PrintWriter) extends
 
     /** generate the source code for the object that computes this request */
     private object ObjectSourceCode extends CodeAssembler[MemberHandler] {
+      def path = pathToTerm("$intp")
+      def envLines = {
+        if (!isReplPower) Nil // power mode only for now
+        // $intp is not bound; punt, but include the line.
+        else if (path == "$intp") List(
+          "def $line = " + tquoted(originalLine),
+          "def $trees = Nil"
+        )
+        else List(
+          "def $line  = " + tquoted(originalLine),
+          "def $req = %s.requestForReqId(%s).orNull".format(path, reqId),
+          "def $trees = if ($req eq null) Nil else $req.trees".format(lineRep.readName, path, reqId)
+        )
+      }
+
       val preamble = """
         |class %s extends Serializable {
-        |  %s%s
-      """.stripMargin.format(lineRep.readName, importsPreamble, indentCode(toCompute))
+        |  %s%s%s
+      """.stripMargin.format(lineRep.readName, envLines.map("  " + _ + ";\n").mkString, importsPreamble, indentCode(toCompute))
       val postamble = importsTrailer + "\n}" + "\n" +
         "object " + lineRep.readName + " {\n" +
         "  val INSTANCE = new " + lineRep.readName + "();\n" +
         "}\n"
       val generate = (m: MemberHandler) => m extraCodeToEvaluate Request.this
+
       /*
       val preamble = """
-        |object %s {
-        |  %s%s
-      """.stripMargin.format(lineRep.readName, importsPreamble, indentCode(toCompute))
+        |object %s extends Serializable {
+        |%s%s%s
+      """.stripMargin.format(lineRep.readName, envLines.map("  " + _ + ";\n").mkString, importsPreamble, indentCode(toCompute))
       val postamble = importsTrailer + "\n}"
       val generate = (m: MemberHandler) => m extraCodeToEvaluate Request.this
       */
+
     }
-    
+
     private object ResultObjectSourceCode extends CodeAssembler[MemberHandler] {
       /** We only want to generate this code when the result
        *  is a value which can be referred to as-is.
-       */      
+       */
       val evalResult =
         if (!handlers.last.definesValue) ""
         else handlers.last.definesTerm match {
           case Some(vname) if typeOf contains vname =>
-            """
-            |lazy val $result = {
-            |  $export
-            |  %s
-            |}""".stripMargin.format(fullPath(vname))
+            "lazy val %s = %s".format(lineRep.resultName, fullPath(vname))
           case _  => ""
         }
+
       // first line evaluates object to make sure constructor is run
       // initial "" so later code can uniformly be: + etc
       val preamble = """
       |object %s {
       |  %s
-      |  val $export: String = %s {
+      |  val %s: String = %s {
       |    %s
       |    (""
       """.stripMargin.format(
-        lineRep.evalName, evalResult, executionWrapper, lineRep.readName + ".INSTANCE" + accessPath
+        lineRep.evalName, evalResult, lineRep.printName,
+        executionWrapper, lineRep.readName + ".INSTANCE" + accessPath
       )
-      
       val postamble = """
       |    )
       |  }
@@ -785,7 +979,7 @@ class SparkIMain(val settings: Settings, protected val out: PrintWriter) extends
     def getEval: Option[AnyRef] = {
       // ensure it has been compiled
       compile
-      // try to load it and call the value method      
+      // try to load it and call the value method
       lineRep.evalValue filterNot (_ == null)
     }
 
@@ -797,121 +991,54 @@ class SparkIMain(val settings: Settings, protected val out: PrintWriter) extends
 
       // compile the object containing the user's code
       lineRep.compile(ObjectSourceCode(handlers)) && {
-        // extract and remember types 
+        // extract and remember types
         typeOf
         typesOfDefinedTerms
 
+        // Assign symbols to the original trees
+        // TODO - just use the new trees.
+        defHandlers foreach { dh =>
+          val name = dh.member.name
+          definedSymbols get name foreach { sym =>
+            dh.member setSymbol sym
+           logDebug("Set symbol of " + name + " to " + sym.defString)
+          }
+        }
+
         // compile the result-extraction object
-        lineRep compile ResultObjectSourceCode(handlers)
+        withoutWarnings(lineRep compile ResultObjectSourceCode(handlers))
       }
     }
 
     lazy val resultSymbol = lineRep.resolvePathToSymbol(accessPath)
-    def applyToResultMember[T](name: Name, f: Symbol => T) = lineAfterTyper(f(resultSymbol.info.nonPrivateDecl(name)))
+    def applyToResultMember[T](name: Name, f: Symbol => T) = afterTyper(f(resultSymbol.info.nonPrivateDecl(name)))
 
     /* typeOf lookup with encoding */
-    def lookupTypeOf(name: Name) = {
-      typeOf.getOrElse(name, typeOf(global.encode(name.toString)))
-    }
-    def simpleNameOfType(name: TypeName) = {
-      (compilerTypeOf get name) map (_.typeSymbol.simpleName)
-    }
-    
-    private def typeMap[T](f: Type => T): Map[Name, T] = {
-      def toType(name: Name): T = {
-        // the types are all =>T; remove the =>
-        val tp1 = lineAfterTyper(resultSymbol.info.nonPrivateDecl(name).tpe match {
-          case NullaryMethodType(tp)  => tp
-          case tp                 => tp
-        })
-        // normalize non-public types so we don't see protected aliases like Self
-        lineAfterTyper(tp1 match {
-          case TypeRef(_, sym, _) if !sym.isPublic  => f(tp1.normalize)
-          case tp                                   => f(tp)
-        })
-      }
-      termNames ++ typeNames map (x => x -> toType(x)) toMap
-    }
+    def lookupTypeOf(name: Name) = typeOf.getOrElse(name, typeOf(global.encode(name.toString)))
+    def simpleNameOfType(name: TypeName) = (compilerTypeOf get name) map (_.typeSymbol.simpleName)
+
+    private def typeMap[T](f: Type => T) =
+      mapFrom[Name, Name, T](termNames ++ typeNames)(x => f(cleanMemberDecl(resultSymbol, x)))
+
     /** Types of variables defined by this request. */
-    lazy val compilerTypeOf = typeMap[Type](x => x)
+    lazy val compilerTypeOf = typeMap[Type](x => x) withDefaultValue NoType
     /** String representations of same. */
-    lazy val typeOf         = typeMap[String](_.toString)
-    
+    lazy val typeOf         = typeMap[String](tp => afterTyper(tp.toString))
+
     // lazy val definedTypes: Map[Name, Type] = {
     //   typeNames map (x => x -> afterTyper(resultSymbol.info.nonPrivateDecl(x).tpe)) toMap
     // }
-    lazy val definedSymbols: Map[Name, Symbol] = (
+    lazy val definedSymbols = (
       termNames.map(x => x -> applyToResultMember(x, x => x)) ++
-      typeNames.map(x => x -> compilerTypeOf.get(x).map(_.typeSymbol).getOrElse(NoSymbol))
-    ).toMap
-
-    lazy val typesOfDefinedTerms: Map[Name, Type] =
-      termNames map (x => x -> applyToResultMember(x, _.tpe)) toMap
-    
-    private def bindExceptionally(t: Throwable) = {
-      val ex: Exceptional =
-        if (isettings.showInternalStackTraces) Exceptional(t)
-        else new Exceptional(t) {
-          override def spanFn(frame: JavaStackFrame) = !(frame.className startsWith lineRep.evalPath)
-          override def contextPrelude = super.contextPrelude + "/* The repl internal portion of the stack trace is elided. */\n"
-        }
-        
-      quietBind("lastException", ex)
-      ex.contextHead + "\n(access lastException for the full trace)"
-    }
-    private def bindUnexceptionally(t: Throwable) = {
-      quietBind("lastException", t)
-      stackTraceString(t)
-    }
+      typeNames.map(x => x -> compilerTypeOf(x).typeSymbolDirect)
+    ).toMap[Name, Symbol] withDefaultValue NoSymbol
+
+    lazy val typesOfDefinedTerms = mapFrom[Name, Name, Type](termNames)(x => applyToResultMember(x, _.tpe))
 
     /** load and run the code using reflection */
     def loadAndRun: (String, Boolean) = {
-      import interpreter.Line._
-      
-      def handleException(t: Throwable) = {
-        /** We turn off the binding to accomodate ticket #2817 */
-        withoutBindingLastException {
-          val message =
-            if (opt.richExes) bindExceptionally(unwrap(t))
-            else bindUnexceptionally(unwrap(t))
-          
-          (message, false)
-        }
-      }
-
-      // Get a copy of the local properties from SparkContext, and set it later in the thread
-      // that triggers the execution. This is to make sure the caller of this function can pass
-      // the right thread local (inheritable) properties down into Spark.
-      val sc = org.apache.spark.repl.Main.interp.sparkContext
-      val props = if (sc != null) sc.getLocalProperties() else null
-      
-      try {
-        val execution = lineManager.set(originalLine) {
-          // MATEI: set the right SparkEnv for our SparkContext, because
-          // this execution will happen in a separate thread
-          if (sc != null && sc.env != null) {
-            SparkEnv.set(sc.env)
-            sc.setLocalProperties(props)
-          }
-          // Execute the line
-          lineRep call "$export"
-        }
-        execution.await()
-        
-        execution.state match {
-          case Done       => ("" + execution.get(), true)
-          case Threw      =>
-            val ex = execution.caught()
-            if (isReplDebug)
-              ex.printStackTrace()
-
-            if (bindLastException) handleException(ex)
-            else throw ex
-          case Cancelled  => ("Execution interrupted by signal.\n", false)
-          case Running    => ("Execution still running! Seems impossible.", false)
-        }
-      }
-      finally lineManager.clear()
+      try   { ("" + (lineRep call sessionNames.print), true) }
+      catch { case ex: Throwable => (lineRep.bindError(ex), false) }
     }
 
     override def toString = "Request(line=%s, %s trees)".format(line, trees.size)
@@ -929,136 +1056,157 @@ class SparkIMain(val settings: Settings, protected val out: PrintWriter) extends
       case ModuleDef(_, name, _)    => name
       case _                        => naming.mostRecentVar
     })
-  
-  private def requestForName(name: Name): Option[Request] = {
+
+  private var mostRecentWarnings: List[(global.Position, String)] = Nil
+  def lastWarnings = mostRecentWarnings
+
+  def treesForRequestId(id: Int): List[Tree] =
+    requestForReqId(id).toList flatMap (_.trees)
+
+  def requestForReqId(id: Int): Option[Request] =
+    if (executingRequest != null && executingRequest.reqId == id) Some(executingRequest)
+    else prevRequests find (_.reqId == id)
+
+  def requestForName(name: Name): Option[Request] = {
     assert(definedNameMap != null, "definedNameMap is null")
     definedNameMap get name
   }
 
-  private def requestForIdent(line: String): Option[Request] = 
+  def requestForIdent(line: String): Option[Request] =
     requestForName(newTermName(line)) orElse requestForName(newTypeName(line))
-    
-  def safeClass(name: String): Option[Symbol] = {
-    try Some(definitions.getClass(newTypeName(name)))
-    catch { case _: MissingRequirementError => None }
-  }
-  def safeModule(name: String): Option[Symbol] = {
-    try Some(definitions.getModule(newTermName(name)))
-    catch { case _: MissingRequirementError => None }
-  }
+
+  def requestHistoryForName(name: Name): List[Request] =
+    prevRequests.toList.reverse filter (_.definedNames contains name)
+
 
   def definitionForName(name: Name): Option[MemberHandler] =
     requestForName(name) flatMap { req =>
       req.handlers find (_.definedNames contains name)
     }
-  
+
   def valueOfTerm(id: String): Option[AnyRef] =
-    requestForIdent(id) flatMap (_.getEval)
+    requestForName(newTermName(id)) flatMap (_.getEval)
 
   def classOfTerm(id: String): Option[JClass] =
-    valueOfTerm(id) map (_.getClass)    
+    valueOfTerm(id) map (_.getClass)
 
-  def typeOfTerm(id: String): Option[Type] = newTermName(id) match {
-    case nme.ROOTPKG  => Some(definitions.RootClass.tpe)
-    case name         => requestForName(name) flatMap (_.compilerTypeOf get name)
+  def typeOfTerm(id: String): Type = newTermName(id) match {
+    case nme.ROOTPKG  => RootClass.tpe
+    case name         => requestForName(name).fold(NoType: Type)(_ compilerTypeOf name)
   }
+
+  def symbolOfType(id: String): Symbol =
+    requestForName(newTypeName(id)).fold(NoSymbol: Symbol)(_ definedTypeSymbol id)
+
   def symbolOfTerm(id: String): Symbol =
-    requestForIdent(id) flatMap (_.definedSymbols get newTermName(id)) getOrElse NoSymbol
+    requestForIdent(newTermName(id)).fold(NoSymbol: Symbol)(_ definedTermSymbol id)
 
   def runtimeClassAndTypeOfTerm(id: String): Option[(JClass, Type)] = {
-    for {
-      clazz <- classOfTerm(id)
-      tpe <- runtimeTypeOfTerm(id)
-      nonAnon <- new RichClass(clazz).supers.find(c => !(new RichClass(c).isScalaAnonymous))
-    } yield {
-      (nonAnon, tpe)
-    }
-  }
-  
-  def runtimeTypeOfTerm(id: String): Option[Type] = {
-    for {
-      tpe <- typeOfTerm(id)
-      clazz <- classOfTerm(id)
-      val staticSym = tpe.typeSymbol
-      runtimeSym <- safeClass(clazz.getName)
-      if runtimeSym != staticSym
-      if runtimeSym isSubClass staticSym
-    } yield {
-      runtimeSym.info
+    classOfTerm(id) flatMap { clazz =>
+      new RichClass(clazz).supers find(c => !(new RichClass(c).isScalaAnonymous)) map { nonAnon =>
+        (nonAnon, runtimeTypeOfTerm(id))
+      }
     }
   }
-  
-  // XXX literals.
-  // 1) Identifiers defined in the repl.
-  // 2) A path loadable via getModule.
-  // 3) Try interpreting it as an expression.
-  private var typeOfExpressionDepth = 0
-  def typeOfExpression(expr: String): Option[Type] = {
-    DBG("typeOfExpression(" + expr + ")")
-    if (typeOfExpressionDepth > 2) {
-      DBG("Terminating typeOfExpression recursion for expression: " + expr)
-      return None
-    }
 
-    def asQualifiedImport = {
-      val name = expr.takeWhile(_ != '.')
-      importedTermNamed(name) flatMap { sym =>
-        typeOfExpression(sym.fullName + expr.drop(name.length))
-      }
+  def runtimeTypeOfTerm(id: String): Type = {
+    typeOfTerm(id) andAlso { tpe =>
+      val clazz      = classOfTerm(id) getOrElse { return NoType }
+      val staticSym  = tpe.typeSymbol
+      val runtimeSym = getClassIfDefined(clazz.getName)
+
+      if ((runtimeSym != NoSymbol) && (runtimeSym != staticSym) && (runtimeSym isSubClass staticSym))
+        runtimeSym.info
+      else NoType
     }
-    def asModule = safeModule(expr) map (_.tpe)
-    def asExpr = beSilentDuring {
-      val lhs = freshInternalVarName()
-      val line = "lazy val " + lhs + " = { " + expr + " } "
-
-      interpret(line, true) match {
-        case IR.Success => typeOfExpression(lhs)
-        case _          => None
+  }
+  def cleanMemberDecl(owner: Symbol, member: Name): Type = afterTyper {
+    normalizeNonPublic {
+      owner.info.nonPrivateDecl(member).tpe match {
+        case NullaryMethodType(tp) => tp
+        case tp                    => tp
       }
     }
-    
-    typeOfExpressionDepth += 1
-    try typeOfTerm(expr) orElse asModule orElse asExpr orElse asQualifiedImport
-    finally typeOfExpressionDepth -= 1
   }
-  // def compileAndTypeExpr(expr: String): Option[Typer] = {
-  //   class TyperRun extends Run {
-  //     override def stopPhase(name: String) = name == "superaccessors"
-  //   }
-  // }
-    
+
+  object exprTyper extends {
+    val repl: SparkIMain.this.type = imain
+  } with SparkExprTyper { }
+
+  def parse(line: String): Option[List[Tree]] = exprTyper.parse(line)
+
+  def symbolOfLine(code: String): Symbol =
+    exprTyper.symbolOfLine(code)
+
+  def typeOfExpression(expr: String, silent: Boolean = true): Type =
+    exprTyper.typeOfExpression(expr, silent)
+
   protected def onlyTerms(xs: List[Name]) = xs collect { case x: TermName => x }
   protected def onlyTypes(xs: List[Name]) = xs collect { case x: TypeName => x }
-    
-  def definedTerms   = onlyTerms(allDefinedNames) filterNot (x => isInternalVarName(x.toString))
-  def definedTypes   = onlyTypes(allDefinedNames)
-  def definedSymbols = prevRequests.toSet flatMap ((x: Request) => x.definedSymbols.values)
-  
+
+  def definedTerms      = onlyTerms(allDefinedNames) filterNot isInternalTermName
+  def definedTypes      = onlyTypes(allDefinedNames)
+  def definedSymbols    = prevRequestList.flatMap(_.definedSymbols.values).toSet[Symbol]
+  def definedSymbolList = prevRequestList flatMap (_.definedSymbolList) filterNot (s => isInternalTermName(s.name))
+
+  // Terms with user-given names (i.e. not res0 and not synthetic)
+    def namedDefinedTerms = definedTerms filterNot (x => isUserVarName("" + x) || directlyBoundNames(x))
+
+  private def findName(name: Name) = definedSymbols find (_.name == name) getOrElse NoSymbol
+
+  /** Translate a repl-defined identifier into a Symbol.
+   */
+  def apply(name: String): Symbol =
+    types(name) orElse terms(name)
+
+  def types(name: String): Symbol = {
+    val tpname = newTypeName(name)
+    findName(tpname) orElse getClassIfDefined(tpname)
+  }
+  def terms(name: String): Symbol = {
+    val termname = newTypeName(name)
+    findName(termname) orElse getModuleIfDefined(termname)
+  }
+  // [Eugene to Paul] possibly you could make use of TypeTags here
+  def types[T: ClassTag] : Symbol = types(classTag[T].runtimeClass.getName)
+  def terms[T: ClassTag] : Symbol = terms(classTag[T].runtimeClass.getName)
+  def apply[T: ClassTag] : Symbol = apply(classTag[T].runtimeClass.getName)
+
+  def classSymbols  = allDefSymbols collect { case x: ClassSymbol => x }
+  def methodSymbols = allDefSymbols collect { case x: MethodSymbol => x }
+
   /** the previous requests this interpreter has processed */
-  private lazy val prevRequests      = mutable.ListBuffer[Request]()
-  private lazy val referencedNameMap = mutable.Map[Name, Request]()
-  private lazy val definedNameMap    = mutable.Map[Name, Request]()
-  protected def prevRequestList      = prevRequests.toList
-  private def allHandlers            = prevRequestList flatMap (_.handlers)
-  def allSeenTypes                   = prevRequestList flatMap (_.typeOf.values.toList) distinct
-  def allImplicits                   = allHandlers filter (_.definesImplicit) flatMap (_.definedNames)
-  def importHandlers                 = allHandlers collect { case x: ImportHandler => x }
-  
+  private var executingRequest: Request = _
+  private val prevRequests       = mutable.ListBuffer[Request]()
+  private val referencedNameMap  = mutable.Map[Name, Request]()
+  private val definedNameMap     = mutable.Map[Name, Request]()
+  private val directlyBoundNames = mutable.Set[Name]()
+
+  def allHandlers    = prevRequestList flatMap (_.handlers)
+  def allDefHandlers = allHandlers collect { case x: MemberDefHandler => x }
+  def allDefSymbols  = allDefHandlers map (_.symbol) filter (_ ne NoSymbol)
+
+  def lastRequest         = if (prevRequests.isEmpty) null else prevRequests.last
+  def prevRequestList     = prevRequests.toList
+  def allSeenTypes        = prevRequestList flatMap (_.typeOf.values.toList) distinct
+  def allImplicits        = allHandlers filter (_.definesImplicit) flatMap (_.definedNames)
+  def importHandlers      = allHandlers collect { case x: ImportHandler => x }
+
   def visibleTermNames: List[Name] = definedTerms ++ importedTerms distinct
 
   /** Another entry point for tab-completion, ids in scope */
   def unqualifiedIds = visibleTermNames map (_.toString) filterNot (_ contains "$") sorted
-  
+
   /** Parse the ScalaSig to find type aliases */
   def aliasForType(path: String) = ByteCode.aliasForType(path)
-  
+
   def withoutUnwrapping(op: => Unit): Unit = {
     val saved = isettings.unwrapStrings
     isettings.unwrapStrings = false
     try op
     finally isettings.unwrapStrings = saved
   }
-  
+
   def symbolDefString(sym: Symbol) = {
     TypeStrings.quieter(
       afterTyper(sym.defString),
@@ -1066,38 +1214,41 @@ class SparkIMain(val settings: Settings, protected val out: PrintWriter) extends
       sym.owner.fullName + "."
     )
   }
-  
+
   def showCodeIfDebugging(code: String) {
     /** Secret bookcase entrance for repl debuggers: end the line
      *  with "// show" and see what's going on.
      */
-    if (SPARK_DEBUG_REPL || code.lines.exists(_.trim endsWith "// show")) {
-      echo(code)
-      parse(code) foreach (ts => ts foreach (t => withoutUnwrapping(DBG(asCompactString(t)))))
+    def isShow    = code.lines exists (_.trim endsWith "// show")
+    def isShowRaw = code.lines exists (_.trim endsWith "// raw")
+
+    // old style
+    beSilentDuring(parse(code)) foreach { ts =>
+      ts foreach { t =>
+        withoutUnwrapping(logDebug(asCompactString(t)))
+      }
     }
   }
+
   // debugging
   def debugging[T](msg: String)(res: T) = {
-    DBG(msg + " " + res)
+    logDebug(msg + " " + res)
     res
   }
-  def DBG(s: => String) = if (isReplDebug) {
-    //try repldbg(s)
-    //catch { case x: AssertionError => repldbg("Assertion error printing debug string:\n  " + x) }
-  }
 }
 
 /** Utility methods for the Interpreter. */
 object SparkIMain {
   // The two name forms this is catching are the two sides of this assignment:
   //
-  // $line3.$read.$iw.$iw.Bippy = 
+  // $line3.$read.$iw.$iw.Bippy =
   //   $line3.$read$$iw$$iw$Bippy@4a6a00ca
   private def removeLineWrapper(s: String) = s.replaceAll("""\$line\d+[./]\$(read|eval|print)[$.]""", "")
   private def removeIWPackages(s: String)  = s.replaceAll("""\$(iw|iwC|read|eval|print)[$.]""", "")
   private def removeSparkVals(s: String)   = s.replaceAll("""\$VAL[0-9]+[$.]""", "")
+
   def stripString(s: String)               = removeSparkVals(removeIWPackages(removeLineWrapper(s)))
-  
+
   trait CodeAssembler[T] {
     def preamble: String
     def generate: T => String
@@ -1109,7 +1260,7 @@ object SparkIMain {
       code println postamble
     }
   }
-  
+
   trait StrippingWriter {
     def isStripping: Boolean
     def stripImpl(str: String): String
@@ -1119,17 +1270,17 @@ object SparkIMain {
     def maxStringLength: Int
     def isTruncating: Boolean
     def truncate(str: String): String = {
-      if (isTruncating && str.length > maxStringLength)
+      if (isTruncating && (maxStringLength != 0 && str.length > maxStringLength))
         (str take maxStringLength - 3) + "..."
       else str
     }
   }
-  abstract class StrippingTruncatingWriter(out: PrintWriter)
-          extends PrintWriter(out)
+  abstract class StrippingTruncatingWriter(out: JPrintWriter)
+          extends JPrintWriter(out)
              with StrippingWriter
              with TruncatingWriter {
     self =>
- 
+
     def clean(str: String): String = truncate(strip(str))
     override def write(str: String) = super.write(clean(str))
   }
@@ -1139,18 +1290,7 @@ object SparkIMain {
     def isStripping        = isettings.unwrapStrings
     def isTruncating       = reporter.truncationOK
 
-    def stripImpl(str: String): String = {
-      val cleaned = stripString(str)
-      var ctrlChars = 0
-      cleaned map { ch =>
-        if (ch.isControl && !ch.isWhitespace) {
-          ctrlChars += 1
-          if (ctrlChars > 5) return "[line elided for control chars: possibly a scala signature]"
-          else '?'
-        }
-        else ch
-      }
-    }
+    def stripImpl(str: String): String = naming.unmangle(str)
   }
 
   class ReplReporter(intp: SparkIMain) extends ConsoleReporter(intp.settings, null, new ReplStrippingWriter(intp)) {
@@ -1163,5 +1303,55 @@ object SparkIMain {
       }
       else Console.println(msg)
     }
-  }  
+  }
+}
+
+class SparkISettings(intp: SparkIMain) extends Logging {
+  /** A list of paths where :load should look */
+  var loadPath = List(".")
+
+  /** Set this to true to see repl machinery under -Yrich-exceptions.
+   */
+  var showInternalStackTraces = false
+
+  /** The maximum length of toString to use when printing the result
+   *  of an evaluation.  0 means no maximum.  If a printout requires
+   *  more than this number of characters, then the printout is
+   *  truncated.
+   */
+  var maxPrintString = 800
+
+  /** The maximum number of completion candidates to print for tab
+   *  completion without requiring confirmation.
+   */
+  var maxAutoprintCompletion = 250
+
+  /** String unwrapping can be disabled if it is causing issues.
+   *  Settings this to false means you will see Strings like "$iw.$iw.".
+   */
+  var unwrapStrings = true
+
+  def deprecation_=(x: Boolean) = {
+    val old = intp.settings.deprecation.value
+    intp.settings.deprecation.value = x
+    if (!old && x) logDebug("Enabled -deprecation output.")
+    else if (old && !x) logDebug("Disabled -deprecation output.")
+  }
+
+  def deprecation: Boolean = intp.settings.deprecation.value
+
+  def allSettings = Map(
+    "maxPrintString" -> maxPrintString,
+    "maxAutoprintCompletion" -> maxAutoprintCompletion,
+    "unwrapStrings" -> unwrapStrings,
+    "deprecation" -> deprecation
+  )
+
+  private def allSettingsString =
+    allSettings.toList sortBy (_._1) map { case (k, v) => "  " + k + " = " + v + "\n" } mkString
+
+  override def toString = """
+    | SparkISettings {
+    | %s
+    | }""".stripMargin.format(allSettingsString)
 }
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkISettings.scala b/repl/src/main/scala/org/apache/spark/repl/SparkISettings.scala
deleted file mode 100644
index 605b7b259b54f..0000000000000
--- a/repl/src/main/scala/org/apache/spark/repl/SparkISettings.scala
+++ /dev/null
@@ -1,63 +0,0 @@
-/* NSC -- new Scala compiler
- * Copyright 2005-2011 LAMP/EPFL
- * @author Alexander Spoon
- */
-
-package org.apache.spark.repl
-
-import scala.tools.nsc._
-import scala.tools.nsc.interpreter._
-
-/** Settings for the interpreter
- *
- * @version 1.0
- * @author Lex Spoon, 2007/3/24
- **/
-class SparkISettings(intp: SparkIMain) {
-  /** A list of paths where :load should look */
-  var loadPath = List(".")
-  
-  /** Set this to true to see repl machinery under -Yrich-exceptions.
-   */
-  var showInternalStackTraces = false
-
-  /** The maximum length of toString to use when printing the result
-   *  of an evaluation.  0 means no maximum.  If a printout requires
-   *  more than this number of characters, then the printout is
-   *  truncated.
-   */
-  var maxPrintString = 800
-  
-  /** The maximum number of completion candidates to print for tab
-   *  completion without requiring confirmation.
-   */
-  var maxAutoprintCompletion = 250
-  
-  /** String unwrapping can be disabled if it is causing issues.
-   *  Settings this to false means you will see Strings like "$iw.$iw.".
-   */
-  var unwrapStrings = true
-  
-  def deprecation_=(x: Boolean) = {
-    val old = intp.settings.deprecation.value
-    intp.settings.deprecation.value = x
-    if (!old && x) println("Enabled -deprecation output.")
-    else if (old && !x) println("Disabled -deprecation output.")
-  }
-  def deprecation: Boolean = intp.settings.deprecation.value
-  
-  def allSettings = Map(
-    "maxPrintString" -> maxPrintString,
-    "maxAutoprintCompletion" -> maxAutoprintCompletion,
-    "unwrapStrings" -> unwrapStrings,
-    "deprecation" -> deprecation
-  )
-  
-  private def allSettingsString =
-    allSettings.toList sortBy (_._1) map { case (k, v) => "  " + k + " = " + v + "\n" } mkString
-    
-  override def toString = """
-    | SparkISettings {
-    | %s
-    | }""".stripMargin.format(allSettingsString)
-}
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkImports.scala b/repl/src/main/scala/org/apache/spark/repl/SparkImports.scala
index 41a1731d60001..64084209e80d2 100644
--- a/repl/src/main/scala/org/apache/spark/repl/SparkImports.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/SparkImports.scala
@@ -1,5 +1,5 @@
 /* NSC -- new Scala compiler
- * Copyright 2005-2011 LAMP/EPFL
+ * Copyright 2005-2013 LAMP/EPFL
  * @author  Paul Phillips
  */
 
@@ -12,30 +12,34 @@ import scala.collection.{ mutable, immutable }
 
 trait SparkImports {
   self: SparkIMain =>
-  
+
   import global._
   import definitions.{ ScalaPackage, JavaLangPackage, PredefModule }
   import memberHandlers._
-  
+
+  def isNoImports = settings.noimports.value
+  def isNoPredef  = settings.nopredef.value
+
   /** Synthetic import handlers for the language defined imports. */
   private def makeWildcardImportHandler(sym: Symbol): ImportHandler = {
     val hd :: tl = sym.fullName.split('.').toList map newTermName
     val tree = Import(
       tl.foldLeft(Ident(hd): Tree)((x, y) => Select(x, y)),
-      List(ImportSelector(nme.WILDCARD, -1, null, -1))
+      ImportSelector.wildList
     )
     tree setSymbol sym
     new ImportHandler(tree)
   }
-  
+
   /** Symbols whose contents are language-defined to be imported. */
   def languageWildcardSyms: List[Symbol] = List(JavaLangPackage, ScalaPackage, PredefModule)
   def languageWildcards: List[Type] = languageWildcardSyms map (_.tpe)
   def languageWildcardHandlers = languageWildcardSyms map makeWildcardImportHandler
-  
-  def importedTerms  = onlyTerms(importHandlers flatMap (_.importedNames))
-  def importedTypes  = onlyTypes(importHandlers flatMap (_.importedNames))
-  
+
+  def allImportedNames = importHandlers flatMap (_.importedNames)
+  def importedTerms    = onlyTerms(allImportedNames)
+  def importedTypes    = onlyTypes(allImportedNames)
+
   /** Types which have been wildcard imported, such as:
    *    val x = "abc" ; import x._  // type java.lang.String
    *    import java.lang.String._   // object java.lang.String
@@ -48,30 +52,28 @@ trait SparkImports {
    *  into the compiler scopes.
    */
   def sessionWildcards: List[Type] = {
-    importHandlers flatMap {
-      case x if x.importsWildcard => x.targetType
-      case _                      => None
-    } distinct
+    importHandlers filter (_.importsWildcard) map (_.targetType) distinct
   }
   def wildcardTypes = languageWildcards ++ sessionWildcards
-  
+
   def languageSymbols        = languageWildcardSyms flatMap membersAtPickler
   def sessionImportedSymbols = importHandlers flatMap (_.importedSymbols)
   def importedSymbols        = languageSymbols ++ sessionImportedSymbols
   def importedTermSymbols    = importedSymbols collect { case x: TermSymbol => x }
   def importedTypeSymbols    = importedSymbols collect { case x: TypeSymbol => x }
   def implicitSymbols        = importedSymbols filter (_.isImplicit)
-  
-  def importedTermNamed(name: String) = importedTermSymbols find (_.name.toString == name)
+
+  def importedTermNamed(name: String): Symbol =
+    importedTermSymbols find (_.name.toString == name) getOrElse NoSymbol
 
   /** Tuples of (source, imported symbols) in the order they were imported.
    */
   def importedSymbolsBySource: List[(Symbol, List[Symbol])] = {
     val lang    = languageWildcardSyms map (sym => (sym, membersAtPickler(sym)))
-    val session = importHandlers filter (_.targetType.isDefined) map { mh =>
-      (mh.targetType.get.typeSymbol, mh.importedSymbols)
+    val session = importHandlers filter (_.targetType != NoType) map { mh =>
+      (mh.targetType.typeSymbol, mh.importedSymbols)
     }
-    
+
     lang ++ session
   }
   def implicitSymbolsBySource: List[(Symbol, List[Symbol])] = {
@@ -79,7 +81,7 @@ trait SparkImports {
       case (k, vs) => (k, vs filter (_.isImplicit))
     } filterNot (_._2.isEmpty)
   }
-  
+
   /** Compute imports that allow definitions from previous
    *  requests to be visible in a new request.  Returns
    *  three pieces of related code:
@@ -90,7 +92,7 @@ trait SparkImports {
    *  2. A code fragment that should go after the code
    *  of the new request.
    *
-   *  3. An access path which can be traverested to access
+   *  3. An access path which can be traversed to access
    *  any bindings inside code wrapped by #1 and #2 .
    *
    * The argument is a set of Names that need to be imported.
@@ -103,27 +105,27 @@ trait SparkImports {
    * (3) It imports multiple same-named implicits, but only the
    * last one imported is actually usable.
    */
-  case class ComputedImports(prepend: String, append: String, access: String)
-  protected def importsCode(wanted: Set[Name]): ComputedImports = {
-    /** Narrow down the list of requests from which imports 
+  case class SparkComputedImports(prepend: String, append: String, access: String)
+
+  protected def importsCode(wanted: Set[Name]): SparkComputedImports = {
+    /** Narrow down the list of requests from which imports
      *  should be taken.  Removes requests which cannot contribute
      *  useful imports for the specified set of wanted names.
      */
     case class ReqAndHandler(req: Request, handler: MemberHandler) { }
-    
-    def reqsToUse: List[ReqAndHandler] = {      
+
+    def reqsToUse: List[ReqAndHandler] = {
       /** Loop through a list of MemberHandlers and select which ones to keep.
         * 'wanted' is the set of names that need to be imported.
        */
       def select(reqs: List[ReqAndHandler], wanted: Set[Name]): List[ReqAndHandler] = {
-        val isWanted = wanted contains _
         // Single symbol imports might be implicits! See bug #1752.  Rather than
         // try to finesse this, we will mimic all imports for now.
         def keepHandler(handler: MemberHandler) = handler match {
           case _: ImportHandler => true
-          case x                => x.definesImplicit || (x.definedNames exists isWanted)
+          case x                => x.definesImplicit || (x.definedNames exists wanted)
         }
-                   
+
         reqs match {
           case Nil                                    => Nil
           case rh :: rest if !keepHandler(rh.handler) => select(rest, wanted)
@@ -133,7 +135,7 @@ trait SparkImports {
             rh :: select(rest, newWanted)
         }
       }
-      
+
       /** Flatten the handlers out and pair each with the original request */
       select(allReqAndHandlers reverseMap { case (r, h) => ReqAndHandler(r, h) }, wanted).reverse
     }
@@ -147,8 +149,13 @@ trait SparkImports {
       code append "class %sC extends Serializable {\n".format(impname)
       trailingBraces append "}\nval " + impname + " = new " + impname + "C;\n"
       accessPath append ("." + impname)
-      
+
       currentImps.clear
+      // code append "object %s {\n".format(impname)
+      // trailingBraces append "}\n"
+      // accessPath append ("." + impname)
+
+      // currentImps.clear
     }
 
     addWrapper()
@@ -159,36 +166,33 @@ trait SparkImports {
         // If the user entered an import, then just use it; add an import wrapping
         // level if the import might conflict with some other import
         case x: ImportHandler =>
-          if (x.importsWildcard || (currentImps exists (x.importedNames contains _)))
+          if (x.importsWildcard || currentImps.exists(x.importedNames contains _))
             addWrapper()
-          
+
           code append (x.member + "\n")
-          
+
           // give wildcard imports a import wrapper all to their own
           if (x.importsWildcard) addWrapper()
           else currentImps ++= x.importedNames
 
         // For other requests, import each defined name.
         // import them explicitly instead of with _, so that
-        // ambiguity errors will not be generated. Also, quote 
-        // the name of the variable, so that we don't need to 
-        // handle quoting keywords separately. 
+        // ambiguity errors will not be generated. Also, quote
+        // the name of the variable, so that we don't need to
+        // handle quoting keywords separately.
         case x =>
           for (imv <- x.definedNames) {
-            // MATEI: Changed this check because it was messing up for case classes
-            // (trying to import them twice within the same wrapper), but that is more likely
-            // due to a miscomputation of names that makes the code think they're unique.
-            // Need to evaluate whether having so many wrappers is a bad thing.
-            /*if (currentImps contains imv)*/
-            val imvName = imv.toString
-            if (currentImps exists (_.toString == imvName)) addWrapper()
-        
+            if (currentImps contains imv) addWrapper()
             val objName = req.lineRep.readPath
             val valName = "$VAL" + newValId();
-            code.append("val " + valName + " = " + objName + ".INSTANCE;\n")
-            code.append("import " + valName + req.accessPath + ".`" + imv + "`;\n")
-
-            //code append ("import %s\n" format (req fullPath imv))
+            
+            if(!code.toString.endsWith(".`" + imv + "`;\n")) { // Which means already imported
+               code.append("val " + valName + " = " + objName + ".INSTANCE;\n")
+               code.append("import " + valName + req.accessPath + ".`" + imv + "`;\n")
+            }
+            // code.append("val " + valName + " = " + objName + ".INSTANCE;\n")
+            // code.append("import " + valName + req.accessPath + ".`" + imv + "`;\n")
+            // code append ("import " + (req fullPath imv) + "\n")
             currentImps += imv
           }
       }
@@ -196,14 +200,14 @@ trait SparkImports {
     // add one extra wrapper, to prevent warnings in the common case of
     // redefining the value bound in the last interpreter request.
     addWrapper()
-    ComputedImports(code.toString, trailingBraces.toString, accessPath.toString)
+    SparkComputedImports(code.toString, trailingBraces.toString, accessPath.toString)
   }
-  
+
   private def allReqAndHandlers =
     prevRequestList flatMap (req => req.handlers map (req -> _))
 
   private def membersAtPickler(sym: Symbol): List[Symbol] =
-    atPickler(sym.info.nonPrivateMembers)
+    beforePickler(sym.info.nonPrivateMembers.toList)
 
   private var curValId = 0
 
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkJLineCompletion.scala b/repl/src/main/scala/org/apache/spark/repl/SparkJLineCompletion.scala
index fdc172d7530ae..8865f82bc0055 100644
--- a/repl/src/main/scala/org/apache/spark/repl/SparkJLineCompletion.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/SparkJLineCompletion.scala
@@ -1,5 +1,5 @@
 /* NSC -- new Scala compiler
- * Copyright 2005-2011 LAMP/EPFL
+ * Copyright 2005-2013 LAMP/EPFL
  * @author Paul Phillips
  */
 
@@ -11,30 +11,31 @@ import scala.tools.nsc.interpreter._
 import scala.tools.jline._
 import scala.tools.jline.console.completer._
 import Completion._
-import collection.mutable.ListBuffer
+import scala.collection.mutable.ListBuffer
+import org.apache.spark.Logging
 
 // REPL completor - queries supplied interpreter for valid
 // completions based on current contents of buffer.
-class SparkJLineCompletion(val intp: SparkIMain) extends Completion with CompletionOutput {
+class SparkJLineCompletion(val intp: SparkIMain) extends Completion with CompletionOutput with Logging {
   val global: intp.global.type = intp.global
   import global._
-  import definitions.{ PredefModule, RootClass, AnyClass, AnyRefClass, ScalaPackage, JavaLangPackage }
+  import definitions.{ PredefModule, AnyClass, AnyRefClass, ScalaPackage, JavaLangPackage }
+  import rootMirror.{ RootClass, getModuleIfDefined }
   type ExecResult = Any
-  import intp.{ DBG, debugging, afterTyper }
-  
+  import intp.{ debugging }
+
   // verbosity goes up with consecutive tabs
   private var verbosity: Int = 0
   def resetVerbosity() = verbosity = 0
-    
-  def getType(name: String, isModule: Boolean) = {
-    val f = if (isModule) definitions.getModule(_: Name) else definitions.getClass(_: Name)
-    try Some(f(name).tpe)
-    catch { case _: MissingRequirementError => None }
-  }
-  
-  def typeOf(name: String) = getType(name, false)
-  def moduleOf(name: String) = getType(name, true)
-    
+
+  def getSymbol(name: String, isModule: Boolean) = (
+    if (isModule) getModuleIfDefined(name)
+    else getModuleIfDefined(name)
+  )
+  def getType(name: String, isModule: Boolean) = getSymbol(name, isModule).tpe
+  def typeOf(name: String)                     = getType(name, false)
+  def moduleOf(name: String)                   = getType(name, true)
+
   trait CompilerCompletion {
     def tp: Type
     def effectiveTp = tp match {
@@ -48,16 +49,16 @@ class SparkJLineCompletion(val intp: SparkIMain) extends Completion with Complet
     private def anyMembers = AnyClass.tpe.nonPrivateMembers
     def anyRefMethodsToShow = Set("isInstanceOf", "asInstanceOf", "toString")
 
-    def tos(sym: Symbol) = sym.name.decode.toString
-    def memberNamed(s: String) = members find (x => tos(x) == s)
-    def hasMethod(s: String) = methods exists (x => tos(x) == s)
+    def tos(sym: Symbol): String = sym.decodedName
+    def memberNamed(s: String) = afterTyper(effectiveTp member newTermName(s))
+    def hasMethod(s: String) = memberNamed(s).isMethod
 
     // XXX we'd like to say "filterNot (_.isDeprecated)" but this causes the
     // compiler to crash for reasons not yet known.
-    def members     = afterTyper((effectiveTp.nonPrivateMembers ++ anyMembers) filter (_.isPublic))
-    def methods     = members filter (_.isMethod)
-    def packages    = members filter (_.isPackage)
-    def aliases     = members filter (_.isAliasType)
+    def members     = afterTyper((effectiveTp.nonPrivateMembers.toList ++ anyMembers) filter (_.isPublic))
+    def methods     = members.toList filter (_.isMethod)
+    def packages    = members.toList filter (_.isPackage)
+    def aliases     = members.toList filter (_.isAliasType)
 
     def memberNames   = members map tos
     def methodNames   = methods map tos
@@ -65,12 +66,19 @@ class SparkJLineCompletion(val intp: SparkIMain) extends Completion with Complet
     def aliasNames    = aliases map tos
   }
 
+  object NoTypeCompletion extends TypeMemberCompletion(NoType) {
+    override def memberNamed(s: String) = NoSymbol
+    override def members = Nil
+    override def follow(s: String) = None
+    override def alternativesFor(id: String) = Nil
+  }
+
   object TypeMemberCompletion {
     def apply(tp: Type, runtimeType: Type, param: NamedParam): TypeMemberCompletion = {
       new TypeMemberCompletion(tp) {
         var upgraded = false
         lazy val upgrade = {
-          intp rebind param          
+          intp rebind param
           intp.reporter.printMessage("\nRebinding stable value %s from %s to %s".format(param.name, tp, param.tpe))
           upgraded = true
           new TypeMemberCompletion(runtimeType)
@@ -92,7 +100,8 @@ class SparkJLineCompletion(val intp: SparkIMain) extends Completion with Complet
       }
     }
     def apply(tp: Type): TypeMemberCompletion = {
-      if (tp.typeSymbol.isPackageClass) new PackageCompletion(tp)
+      if (tp eq NoType) NoTypeCompletion
+      else if (tp.typeSymbol.isPackageClass) new PackageCompletion(tp)
       else new TypeMemberCompletion(tp)
     }
     def imported(tp: Type) = new ImportCompletion(tp)
@@ -103,9 +112,9 @@ class SparkJLineCompletion(val intp: SparkIMain) extends Completion with Complet
     def excludeEndsWith: List[String] = Nil
     def excludeStartsWith: List[String] = List("<") // <byname>, <repeated>, etc.
     def excludeNames: List[String] = (anyref.methodNames filterNot anyRefMethodsToShow) :+ "_root_"
-    
+
     def methodSignatureString(sym: Symbol) = {
-      SparkIMain stripString afterTyper(new MethodSymbolOutput(sym).methodString())
+      IMain stripString afterTyper(new MethodSymbolOutput(sym).methodString())
     }
 
     def exclude(name: String): Boolean = (
@@ -118,10 +127,10 @@ class SparkJLineCompletion(val intp: SparkIMain) extends Completion with Complet
 
     def completions(verbosity: Int) =
       debugging(tp + " completions ==> ")(filtered(memberNames))
-    
+
     override def follow(s: String): Option[CompletionAware] =
-      debugging(tp + " -> '" + s + "' ==> ")(memberNamed(s) map (x => TypeMemberCompletion(x.tpe)))      
-    
+      debugging(tp + " -> '" + s + "' ==> ")(Some(TypeMemberCompletion(memberNamed(s).tpe)) filterNot (_ eq NoTypeCompletion))
+
     override def alternativesFor(id: String): List[String] =
       debugging(id + " alternatives ==> ") {
         val alts = members filter (x => x.isMethod && tos(x) == id) map methodSignatureString
@@ -131,7 +140,7 @@ class SparkJLineCompletion(val intp: SparkIMain) extends Completion with Complet
 
     override def toString = "%s (%d members)".format(tp, members.size)
   }
-  
+
   class PackageCompletion(tp: Type) extends TypeMemberCompletion(tp) {
     override def excludeNames = anyref.methodNames
   }
@@ -142,43 +151,44 @@ class SparkJLineCompletion(val intp: SparkIMain) extends Completion with Complet
       case _    => memberNames
     }
   }
-  
+
   class ImportCompletion(tp: Type) extends TypeMemberCompletion(tp) {
     override def completions(verbosity: Int) = verbosity match {
       case 0    => filtered(members filterNot (_.isSetter) map tos)
       case _    => super.completions(verbosity)
     }
   }
-  
+
   // not for completion but for excluding
   object anyref extends TypeMemberCompletion(AnyRefClass.tpe) { }
-  
+
   // the unqualified vals/defs/etc visible in the repl
   object ids extends CompletionAware {
     override def completions(verbosity: Int) = intp.unqualifiedIds ++ List("classOf") //, "_root_")
     // now we use the compiler for everything.
-    override def follow(id: String) = {
-      if (completions(0) contains id) {
-        intp typeOfExpression id map { tpe =>
-          def default = TypeMemberCompletion(tpe)
-
-          // only rebinding vals in power mode for now.
-          if (!isReplPower) default
-          else intp runtimeClassAndTypeOfTerm id match {
-            case Some((clazz, runtimeType)) =>
-              val sym = intp.symbolOfTerm(id)
-              if (sym.isStable) {
-                val param = new NamedParam.Untyped(id, intp valueOfTerm id getOrElse null)
-                TypeMemberCompletion(tpe, runtimeType, param)
-              }
-              else default
-            case _        =>
-              default
+    override def follow(id: String): Option[CompletionAware] = {
+      if (!completions(0).contains(id))
+        return None
+
+      val tpe = intp typeOfExpression id
+      if (tpe == NoType)
+        return None
+
+      def default = Some(TypeMemberCompletion(tpe))
+
+      // only rebinding vals in power mode for now.
+      if (!isReplPower) default
+      else intp runtimeClassAndTypeOfTerm id match {
+        case Some((clazz, runtimeType)) =>
+          val sym = intp.symbolOfTerm(id)
+          if (sym.isStable) {
+            val param = new NamedParam.Untyped(id, intp valueOfTerm id getOrElse null)
+            Some(TypeMemberCompletion(tpe, runtimeType, param))
           }
-        }
+          else default
+        case _        =>
+          default
       }
-      else
-        None
     }
     override def toString = "<repl ids> (%s)".format(completions(0).size)
   }
@@ -187,17 +197,10 @@ class SparkJLineCompletion(val intp: SparkIMain) extends Completion with Complet
   private def imported = intp.sessionWildcards map TypeMemberCompletion.imported
 
   // literal Ints, Strings, etc.
-  object literals extends CompletionAware {    
-    def simpleParse(code: String): Tree = {
-      val unit    = new CompilationUnit(new util.BatchSourceFile("<console>", code))
-      val scanner = new syntaxAnalyzer.UnitParser(unit)
-      val tss     = scanner.templateStatSeq(false)._2
-
-      if (tss.size == 1) tss.head else EmptyTree
-    }
-  
+  object literals extends CompletionAware {
+    def simpleParse(code: String): Tree = newUnitParser(code).templateStats().last
     def completions(verbosity: Int) = Nil
-    
+
     override def follow(id: String) = simpleParse(id) match {
       case x: Literal   => Some(new LiteralCompletion(x))
       case _            => None
@@ -210,18 +213,18 @@ class SparkJLineCompletion(val intp: SparkIMain) extends Completion with Complet
     override def follow(id: String) = id match {
       case "_root_" => Some(this)
       case _        => super.follow(id)
-    }    
+    }
   }
   // members of Predef
   object predef extends TypeMemberCompletion(PredefModule.tpe) {
     override def excludeEndsWith    = super.excludeEndsWith ++ List("Wrapper", "ArrayOps")
     override def excludeStartsWith  = super.excludeStartsWith ++ List("wrap")
     override def excludeNames       = anyref.methodNames
-    
+
     override def exclude(name: String) = super.exclude(name) || (
       (name contains "2")
     )
-    
+
     override def completions(verbosity: Int) = verbosity match {
       case 0    => Nil
       case _    => super.completions(verbosity)
@@ -234,7 +237,7 @@ class SparkJLineCompletion(val intp: SparkIMain) extends Completion with Complet
     override def exclude(name: String) = super.exclude(name) || (
       skipArity(name)
     )
-    
+
     override def completions(verbosity: Int) = verbosity match {
       case 0    => filtered(packageNames ++ aliasNames)
       case _    => super.completions(verbosity)
@@ -244,7 +247,7 @@ class SparkJLineCompletion(val intp: SparkIMain) extends Completion with Complet
   object javalang extends PackageCompletion(JavaLangPackage.tpe) {
     override lazy val excludeEndsWith   = super.excludeEndsWith ++ List("Exception", "Error")
     override lazy val excludeStartsWith = super.excludeStartsWith ++ List("CharacterData")
-    
+
     override def completions(verbosity: Int) = verbosity match {
       case 0    => filtered(packageNames)
       case _    => super.completions(verbosity)
@@ -256,7 +259,7 @@ class SparkJLineCompletion(val intp: SparkIMain) extends Completion with Complet
   lazy val topLevelBase: List[CompletionAware] = List(ids, rootClass, predef, scalalang, javalang, literals)
   def topLevel = topLevelBase ++ imported
   def topLevelThreshold = 50
-  
+
   // the first tier of top level objects (doesn't include file completion)
   def topLevelFor(parsed: Parsed): List[String] = {
     val buf = new ListBuffer[String]
@@ -280,19 +283,6 @@ class SparkJLineCompletion(val intp: SparkIMain) extends Completion with Complet
     if (parsed.isEmpty) xs map ("." + _) else xs
   }
 
-  // chasing down results which won't parse
-  def execute(line: String): Option[ExecResult] = {
-    val parsed = Parsed(line)
-    def noDotOrSlash = line forall (ch => ch != '.' && ch != '/')
-    
-    if (noDotOrSlash) None  // we defer all unqualified ids to the repl.
-    else {
-      (ids executionFor parsed) orElse
-      (rootClass executionFor parsed) orElse
-      (FileCompletion executionFor line)
-    }
-  }
-  
   // generic interface for querying (e.g. interpreter loop, testing)
   def completions(buf: String): List[String] =
     topLevelFor(Parsed.dotted(buf + ".", buf.length + 1))
@@ -327,11 +317,11 @@ class SparkJLineCompletion(val intp: SparkIMain) extends Completion with Complet
     // This is jline's entry point for completion.
     override def complete(buf: String, cursor: Int): Candidates = {
       verbosity = if (isConsecutiveTabs(buf, cursor)) verbosity + 1 else 0
-      DBG("\ncomplete(%s, %d) last = (%s, %d), verbosity: %s".format(buf, cursor, lastBuf, lastCursor, verbosity))
+      logDebug("\ncomplete(%s, %d) last = (%s, %d), verbosity: %s".format(buf, cursor, lastBuf, lastCursor, verbosity))
 
       // we don't try lower priority completions unless higher ones return no results.
       def tryCompletion(p: Parsed, completionFunction: Parsed => List[String]): Option[Candidates] = {
-        val winners = completionFunction(p) 
+        val winners = completionFunction(p)
         if (winners.isEmpty)
           return None
         val newCursor =
@@ -340,39 +330,45 @@ class SparkJLineCompletion(val intp: SparkIMain) extends Completion with Complet
             val advance = commonPrefix(winners)
             lastCursor = p.position + advance.length
             lastBuf = (buf take p.position) + advance
-            DBG("tryCompletion(%s, _) lastBuf = %s, lastCursor = %s, p.position = %s".format(
+            logDebug("tryCompletion(%s, _) lastBuf = %s, lastCursor = %s, p.position = %s".format(
               p, lastBuf, lastCursor, p.position))
             p.position
           }
-        
+
         Some(Candidates(newCursor, winners))
       }
-      
+
       def mkDotted      = Parsed.dotted(buf, cursor) withVerbosity verbosity
       def mkUndelimited = Parsed.undelimited(buf, cursor) withVerbosity verbosity
 
       // a single dot is special cased to completion on the previous result
       def lastResultCompletion =
-        if (!looksLikeInvocation(buf)) None            
+        if (!looksLikeInvocation(buf)) None
         else tryCompletion(Parsed.dotted(buf drop 1, cursor), lastResultFor)
 
-      def regularCompletion = tryCompletion(mkDotted, topLevelFor)
-      def fileCompletion    = 
-        if (!looksLikePath(buf)) None
-        else tryCompletion(mkUndelimited, FileCompletion completionsFor _.buffer)
-      
-      /** This is the kickoff point for all manner of theoretically possible compiler
-       *  unhappiness - fault may be here or elsewhere, but we don't want to crash the
-       *  repl regardless.  Hopefully catching Exception is enough, but because the
-       *  compiler still throws some Errors it may not be.
+      def tryAll = (
+                  lastResultCompletion
+           orElse tryCompletion(mkDotted, topLevelFor)
+        getOrElse Candidates(cursor, Nil)
+      )
+
+      /**
+       *  This is the kickoff point for all manner of theoretically
+       *  possible compiler unhappiness. The fault may be here or
+       *  elsewhere, but we don't want to crash the repl regardless.
+       *  The compiler makes it impossible to avoid catching Throwable
+       *  with its unfortunate tendency to throw java.lang.Errors and
+       *  AssertionErrors as the hats drop. We take two swings at it
+       *  because there are some spots which like to throw an assertion
+       *  once, then work after that. Yeah, what can I say.
        */
-      try {
-        (lastResultCompletion orElse regularCompletion orElse fileCompletion) getOrElse Candidates(cursor, Nil)
-      }
-      catch {
-        case ex: Exception =>
-          DBG("Error: complete(%s, %s) provoked %s".format(buf, cursor, ex))
-          Candidates(cursor, List(" ", "<completion error: " + ex.getMessage +  ">"))
+      try tryAll
+      catch { case ex: Throwable =>
+        logWarning("Error: complete(%s, %s) provoked".format(buf, cursor) + ex)
+        Candidates(cursor,
+          if (isReplDebug) List("<error:" + ex + ">")
+          else Nil
+        )
       }
     }
   }
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkJLineReader.scala b/repl/src/main/scala/org/apache/spark/repl/SparkJLineReader.scala
index d9e1de105c216..60a4d7841eb7f 100644
--- a/repl/src/main/scala/org/apache/spark/repl/SparkJLineReader.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/SparkJLineReader.scala
@@ -1,5 +1,5 @@
 /* NSC -- new Scala compiler
- * Copyright 2005-2011 LAMP/EPFL
+ * Copyright 2005-2013 LAMP/EPFL
  * @author Stepan Koltsov
  */
 
@@ -15,28 +15,33 @@ import scala.collection.JavaConverters._
 import Completion._
 import io.Streamable.slurp
 
-/** Reads from the console using JLine */
-class SparkJLineReader(val completion: Completion) extends InteractiveReader {
+/**
+ *  Reads from the console using JLine.
+ */
+class SparkJLineReader(_completion: => Completion) extends InteractiveReader {
   val interactive = true
+  val consoleReader = new JLineConsoleReader()
+
+  lazy val completion = _completion
   lazy val history: JLineHistory = JLineHistory()
-  lazy val keyBindings =
-    try KeyBinding parse slurp(term.getDefaultBindings)
-    catch { case _: Exception => Nil }
 
   private def term = consoleReader.getTerminal()
   def reset() = term.reset()
   def init()  = term.init()
-  
+
   def scalaToJline(tc: ScalaCompleter): Completer = new Completer {
     def complete(_buf: String, cursor: Int, candidates: JList[CharSequence]): Int = {
-      val buf   = if (_buf == null) "" else _buf      
+      val buf   = if (_buf == null) "" else _buf
       val Candidates(newCursor, newCandidates) = tc.complete(buf, cursor)
       newCandidates foreach (candidates add _)
       newCursor
     }
   }
-    
+
   class JLineConsoleReader extends ConsoleReader with ConsoleReaderHelper {
+    if ((history: History) ne NoHistory)
+      this setHistory history
+
     // working around protected/trait/java insufficiencies.
     def goBack(num: Int): Unit = back(num)
     def readOneKey(prompt: String) = {
@@ -46,34 +51,28 @@ class SparkJLineReader(val completion: Completion) extends InteractiveReader {
     }
     def eraseLine() = consoleReader.resetPromptLine("", "", 0)
     def redrawLineAndFlush(): Unit = { flush() ; drawLine() ; flush() }
-    
-    this setBellEnabled false
-    if (history ne NoHistory)
-      this setHistory history
-    
-    if (completion ne NoCompletion) {
-      val argCompletor: ArgumentCompleter =
-        new ArgumentCompleter(new JLineDelimiter, scalaToJline(completion.completer()))
-      argCompletor setStrict false
-      
-      this addCompleter argCompletor
-      this setAutoprintThreshold 400 // max completion candidates without warning
+    // override def readLine(prompt: String): String
+
+    // A hook for running code after the repl is done initializing.
+    lazy val postInit: Unit = {
+      this setBellEnabled false
+
+      if (completion ne NoCompletion) {
+        val argCompletor: ArgumentCompleter =
+          new ArgumentCompleter(new JLineDelimiter, scalaToJline(completion.completer()))
+        argCompletor setStrict false
+
+        this addCompleter argCompletor
+        this setAutoprintThreshold 400 // max completion candidates without warning
+      }
     }
   }
-  
-  val consoleReader: JLineConsoleReader = new JLineConsoleReader()
 
-  def currentLine: String = consoleReader.getCursorBuffer.buffer.toString
+  def currentLine = consoleReader.getCursorBuffer.buffer.toString
   def redrawLine() = consoleReader.redrawLineAndFlush()
-  def eraseLine() = {
-    while (consoleReader.delete()) { }
-    // consoleReader.eraseLine()
-  }
+  def eraseLine() = consoleReader.eraseLine()
+  // Alternate implementation, not sure if/when I need this.
+  // def eraseLine() = while (consoleReader.delete()) { }
   def readOneLine(prompt: String) = consoleReader readLine prompt
   def readOneKey(prompt: String)  = consoleReader readOneKey prompt
 }
-
-object SparkJLineReader {
-  def apply(intp: SparkIMain): SparkJLineReader = apply(new SparkJLineCompletion(intp))
-  def apply(comp: Completion): SparkJLineReader = new SparkJLineReader(comp)
-}
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkMemberHandlers.scala b/repl/src/main/scala/org/apache/spark/repl/SparkMemberHandlers.scala
index a3409bf66542c..382f8360a7308 100644
--- a/repl/src/main/scala/org/apache/spark/repl/SparkMemberHandlers.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/SparkMemberHandlers.scala
@@ -1,5 +1,5 @@
 /* NSC -- new Scala compiler
- * Copyright 2005-2011 LAMP/EPFL
+ * Copyright 2005-2013 LAMP/EPFL
  * @author  Martin Odersky
  */
 
@@ -10,13 +10,14 @@ import scala.tools.nsc.interpreter._
 
 import scala.collection.{ mutable, immutable }
 import scala.PartialFunction.cond
-import scala.reflect.NameTransformer
-import util.Chars
+import scala.reflect.internal.Chars
+import scala.reflect.internal.Flags._
+import scala.language.implicitConversions
 
 trait SparkMemberHandlers {
   val intp: SparkIMain
 
-  import intp.{ Request, global, naming, atPickler }
+  import intp.{ Request, global, naming }
   import global._
   import naming._
 
@@ -29,7 +30,7 @@ trait SparkMemberHandlers {
     front + (xs map string2codeQuoted mkString " + ")
   }
   private implicit def name2string(name: Name) = name.toString
-  
+
   /** A traverser that finds all mentioned identifiers, i.e. things
    *  that need to be imported.  It might return extra names.
    */
@@ -54,26 +55,28 @@ trait SparkMemberHandlers {
   }
 
   def chooseHandler(member: Tree): MemberHandler = member match {
-    case member: DefDef               => new DefHandler(member)
-    case member: ValDef               => new ValHandler(member)
-    case member@Assign(Ident(_), _)   => new AssignHandler(member)
-    case member: ModuleDef            => new ModuleHandler(member)
-    case member: ClassDef             => new ClassHandler(member)
-    case member: TypeDef              => new TypeAliasHandler(member)
-    case member: Import               => new ImportHandler(member)
-    case DocDef(_, documented)        => chooseHandler(documented)
-    case member                       => new GenericHandler(member)
+    case member: DefDef        => new DefHandler(member)
+    case member: ValDef        => new ValHandler(member)
+    case member: Assign        => new AssignHandler(member)
+    case member: ModuleDef     => new ModuleHandler(member)
+    case member: ClassDef      => new ClassHandler(member)
+    case member: TypeDef       => new TypeAliasHandler(member)
+    case member: Import        => new ImportHandler(member)
+    case DocDef(_, documented) => chooseHandler(documented)
+    case member                => new GenericHandler(member)
   }
-  
+
   sealed abstract class MemberDefHandler(override val member: MemberDef) extends MemberHandler(member) {
+    def symbol          = if (member.symbol eq null) NoSymbol else member.symbol
     def name: Name      = member.name
     def mods: Modifiers = member.mods
     def keyword         = member.keyword
-    def prettyName      = NameTransformer.decode(name)
-    
+    def prettyName      = name.decode
+
     override def definesImplicit = member.mods.isImplicit
     override def definesTerm: Option[TermName] = Some(name.toTermName) filter (_ => name.isTermName)
     override def definesType: Option[TypeName] = Some(name.toTypeName) filter (_ => name.isTypeName)
+    override def definedSymbols = if (symbol eq NoSymbol) Nil else List(symbol)
   }
 
   /** Class to handle one member among all the members included
@@ -82,11 +85,8 @@ trait SparkMemberHandlers {
   sealed abstract class MemberHandler(val member: Tree) {
     def definesImplicit = false
     def definesValue    = false
-    def isLegalTopLevel = member match {
-      case _: ModuleDef | _: ClassDef | _: Import => true
-      case _                                      => false
-    }
-    
+    def isLegalTopLevel = false
+
     def definesTerm     = Option.empty[TermName]
     def definesType     = Option.empty[TypeName]
 
@@ -94,6 +94,7 @@ trait SparkMemberHandlers {
     def importedNames        = List[Name]()
     def definedNames         = definesTerm.toList ++ definesType.toList
     def definedOrImported    = definedNames ++ importedNames
+    def definedSymbols       = List[Symbol]()
 
     def extraCodeToEvaluate(req: Request): String = ""
     def resultExtractionCode(req: Request): String = ""
@@ -103,11 +104,11 @@ trait SparkMemberHandlers {
   }
 
   class GenericHandler(member: Tree) extends MemberHandler(member)
-  
+
   class ValHandler(member: ValDef) extends MemberDefHandler(member) {
-    val maxStringElements = 1000  // no need to mkString billions of elements    
+    val maxStringElements = 1000  // no need to mkString billions of elements
     override def definesValue = true
-    
+
     override def resultExtractionCode(req: Request): String = {
       val isInternal = isUserVarName(name) && req.lookupTypeOf(name) == "Unit"
       if (!mods.isPublic || isInternal) ""
@@ -116,22 +117,27 @@ trait SparkMemberHandlers {
         val resultString =
           if (mods.isLazy) codegenln(false, "<lazy>")
           else any2stringOf(req fullPath name, maxStringElements)
-      
-        """ + "%s: %s = " + %s""".format(prettyName, string2code(req typeOf name), resultString)
+
+        val vidString =
+          if (replProps.vids) """" + " @ " + "%%8x".format(System.identityHashCode(%s)) + " """.trim.format(req fullPath name)
+          else ""
+
+        """ + "%s%s: %s = " + %s""".format(string2code(prettyName), vidString, string2code(req typeOf name), resultString)
       }
     }
   }
 
   class DefHandler(member: DefDef) extends MemberDefHandler(member) {
     private def vparamss = member.vparamss
-    // true if 0-arity
-    override def definesValue = vparamss.isEmpty || vparamss.head.isEmpty
+    private def isMacro = member.symbol hasFlag MACRO
+    // true if not a macro and 0-arity
+    override def definesValue = !isMacro && flattensToEmpty(vparamss)
     override def resultExtractionCode(req: Request) =
       if (mods.isPublic) codegenln(name, ": ", req.typeOf(name)) else ""
   }
 
   class AssignHandler(member: Assign) extends MemberHandler(member) {
-    val lhs = member.lhs.asInstanceOf[Ident] // an unfortunate limitation
+    val Assign(lhs, rhs) = member
     val name = newTermName(freshInternalVarName())
 
     override def definesTerm = Some(name)
@@ -142,15 +148,15 @@ trait SparkMemberHandlers {
     /** Print out lhs instead of the generated varName */
     override def resultExtractionCode(req: Request) = {
       val lhsType = string2code(req lookupTypeOf name)
-      val res = string2code(req fullPath name)
-      
-      """ + "%s: %s = " + %s + "\n" """.format(lhs, lhsType, res) + "\n"
+      val res     = string2code(req fullPath name)
+      """ + "%s: %s = " + %s + "\n" """.format(string2code(lhs.toString), lhsType, res) + "\n"
     }
   }
 
   class ModuleHandler(module: ModuleDef) extends MemberDefHandler(module) {
     override def definesTerm = Some(name)
     override def definesValue = true
+    override def isLegalTopLevel = true
 
     override def resultExtractionCode(req: Request) = codegenln("defined module ", name)
   }
@@ -158,7 +164,8 @@ trait SparkMemberHandlers {
   class ClassHandler(member: ClassDef) extends MemberDefHandler(member) {
     override def definesType = Some(name.toTypeName)
     override def definesTerm = Some(name.toTermName) filter (_ => mods.isCase)
-    
+    override def isLegalTopLevel = true
+
     override def resultExtractionCode(req: Request) =
       codegenln("defined %s %s".format(keyword, name))
   }
@@ -173,26 +180,42 @@ trait SparkMemberHandlers {
 
   class ImportHandler(imp: Import) extends MemberHandler(imp) {
     val Import(expr, selectors) = imp
-    def targetType = intp.typeOfExpression("" + expr)
-    
+    def targetType: Type = intp.typeOfExpression("" + expr)
+    override def isLegalTopLevel = true
+
+    def createImportForName(name: Name): String = {
+      selectors foreach {
+        case sel @ ImportSelector(old, _, `name`, _)  => return "import %s.{ %s }".format(expr, sel)
+        case _ => ()
+      }
+      "import %s.%s".format(expr, name)
+    }
+    // TODO: Need to track these specially to honor Predef masking attempts,
+    // because they must be the leading imports in the code generated for each
+    // line.  We can use the same machinery as Contexts now, anyway.
+    def isPredefImport = isReferenceToPredef(expr)
+
     // wildcard imports, e.g. import foo._
     private def selectorWild    = selectors filter (_.name == nme.USCOREkw)
     // renamed imports, e.g. import foo.{ bar => baz }
     private def selectorRenames = selectors map (_.rename) filterNot (_ == null)
-    
+
     /** Whether this import includes a wildcard import */
     val importsWildcard = selectorWild.nonEmpty
-    
+
+    /** Whether anything imported is implicit .*/
+    def importsImplicit = implicitSymbols.nonEmpty
+
     def implicitSymbols = importedSymbols filter (_.isImplicit)
     def importedSymbols = individualSymbols ++ wildcardSymbols
-    
+
     lazy val individualSymbols: List[Symbol] =
-      atPickler(targetType.toList flatMap (tp => individualNames map (tp nonPrivateMember _)))
+      beforePickler(individualNames map (targetType nonPrivateMember _))
 
     lazy val wildcardSymbols: List[Symbol] =
-      if (importsWildcard) atPickler(targetType.toList flatMap (_.nonPrivateMembers))
+      if (importsWildcard) beforePickler(targetType.nonPrivateMembers.toList)
       else Nil
-    
+
     /** Complete list of names imported by a wildcard */
     lazy val wildcardNames: List[Name]   = wildcardSymbols map (_.name)
     lazy val individualNames: List[Name] = selectorRenames filterNot (_ == nme.USCOREkw) flatMap (_.bothNames)
@@ -200,7 +223,7 @@ trait SparkMemberHandlers {
     /** The names imported by this statement */
     override lazy val importedNames: List[Name] = wildcardNames ++ individualNames
     lazy val importsSymbolNamed: Set[String] = importedNames map (_.toString) toSet
-    
+
     def importString = imp.toString
     override def resultExtractionCode(req: Request) = codegenln(importString) + "\n"
   }
diff --git a/core/src/main/scala/org/apache/spark/api/java/function/Function4.java b/repl/src/main/scala/org/apache/spark/repl/SparkRunnerSettings.scala
similarity index 57%
rename from core/src/main/scala/org/apache/spark/api/java/function/Function4.java
rename to repl/src/main/scala/org/apache/spark/repl/SparkRunnerSettings.scala
index 94621d7c3e320..7fd5fbb42468c 100644
--- a/core/src/main/scala/org/apache/spark/api/java/function/Function4.java
+++ b/repl/src/main/scala/org/apache/spark/repl/SparkRunnerSettings.scala
@@ -15,24 +15,18 @@
  * limitations under the License.
  */
 
-package org.apache.spark.api.java.function;
+package org.apache.spark.repl
 
-import scala.reflect.ClassManifest;
-import scala.reflect.ClassManifest$;
-import scala.runtime.AbstractFunction3;
-
-import java.io.Serializable;
+import scala.tools.nsc.Settings
 
 /**
- * A four-argument function that takes arguments of type T1, T2, T3, and T4 and returns an R.
+ * <i>scala.tools.nsc.Settings</i> implementation adding Spark-specific REPL
+ * command line options.
  */
-public abstract class Function4<T1, T2, T3, T4, R> extends WrappedFunction4<T1, T2, T3, T4, R>
-  implements Serializable {
-
-  public abstract R call(T1 t1, T2 t2, T3 t3, T4 t4) throws Exception;
+class SparkRunnerSettings(error: String => Unit) extends Settings(error){
 
-  public ClassManifest<R> returnType() {
-    return (ClassManifest<R>) ClassManifest$.MODULE$.fromClass(Object.class);
-  }
+  val loadfiles = MultiStringSetting(
+      "-i",
+      "file",
+      "load a file (assumes the code is given interactively)")
 }
-
diff --git a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
index 6e4504d4d5f41..8203b8f6122e1 100644
--- a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
@@ -52,18 +52,17 @@ class ReplSuite extends FunSuite {
     }
     // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
     System.clearProperty("spark.driver.port")
-    System.clearProperty("spark.hostPort")
     return out.toString
   }
 
   def assertContains(message: String, output: String) {
     assert(output.contains(message),
-           "Interpreter output did not contain '" + message + "':\n" + output)
+      "Interpreter output did not contain '" + message + "':\n" + output)
   }
 
   def assertDoesNotContain(message: String, output: String) {
     assert(!output.contains(message),
-           "Interpreter output contained '" + message + "':\n" + output)
+      "Interpreter output contained '" + message + "':\n" + output)
   }
 
   test("propagation of local properties") {
@@ -92,99 +91,105 @@ class ReplSuite extends FunSuite {
 
     interp.sparkContext.stop()
     System.clearProperty("spark.driver.port")
-    System.clearProperty("spark.hostPort")
   }
 
-  test ("simple foreach with accumulator") {
-    val output = runInterpreter("local", """
-      val accum = sc.accumulator(0)
-      sc.parallelize(1 to 10).foreach(x => accum += x)
-      accum.value
-      """)
+  test("simple foreach with accumulator") {
+    val output = runInterpreter("local",
+      """
+        |val accum = sc.accumulator(0)
+        |sc.parallelize(1 to 10).foreach(x => accum += x)
+        |accum.value
+      """.stripMargin)
     assertDoesNotContain("error:", output)
     assertDoesNotContain("Exception", output)
     assertContains("res1: Int = 55", output)
   }
 
-  test ("external vars") {
-    val output = runInterpreter("local", """
-      var v = 7
-      sc.parallelize(1 to 10).map(x => v).collect.reduceLeft(_+_)
-      v = 10
-      sc.parallelize(1 to 10).map(x => v).collect.reduceLeft(_+_)
-      """)
+  test("external vars") {
+    val output = runInterpreter("local",
+      """
+        |var v = 7
+        |sc.parallelize(1 to 10).map(x => v).collect.reduceLeft(_+_)
+        |v = 10
+        |sc.parallelize(1 to 10).map(x => v).collect.reduceLeft(_+_)
+      """.stripMargin)
     assertDoesNotContain("error:", output)
     assertDoesNotContain("Exception", output)
     assertContains("res0: Int = 70", output)
     assertContains("res1: Int = 100", output)
   }
 
-  test ("external classes") {
-    val output = runInterpreter("local", """
-      class C {
-        def foo = 5
-      }
-      sc.parallelize(1 to 10).map(x => (new C).foo).collect.reduceLeft(_+_)
-      """)
+  test("external classes") {
+    val output = runInterpreter("local",
+      """
+        |class C {
+        |def foo = 5
+        |}
+        |sc.parallelize(1 to 10).map(x => (new C).foo).collect.reduceLeft(_+_)
+      """.stripMargin)
     assertDoesNotContain("error:", output)
     assertDoesNotContain("Exception", output)
     assertContains("res0: Int = 50", output)
   }
 
-  test ("external functions") {
-    val output = runInterpreter("local", """
-      def double(x: Int) = x + x
-      sc.parallelize(1 to 10).map(x => double(x)).collect.reduceLeft(_+_)
-      """)
+  test("external functions") {
+    val output = runInterpreter("local",
+      """
+        |def double(x: Int) = x + x
+        |sc.parallelize(1 to 10).map(x => double(x)).collect.reduceLeft(_+_)
+      """.stripMargin)
     assertDoesNotContain("error:", output)
     assertDoesNotContain("Exception", output)
     assertContains("res0: Int = 110", output)
   }
 
-  test ("external functions that access vars") {
-    val output = runInterpreter("local", """
-      var v = 7
-      def getV() = v
-      sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
-      v = 10
-      sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
-      """)
+  test("external functions that access vars") {
+    val output = runInterpreter("local",
+      """
+        |var v = 7
+        |def getV() = v
+        |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+        |v = 10
+        |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+      """.stripMargin)
     assertDoesNotContain("error:", output)
     assertDoesNotContain("Exception", output)
     assertContains("res0: Int = 70", output)
     assertContains("res1: Int = 100", output)
   }
 
-  test ("broadcast vars") {
+  test("broadcast vars") {
     // Test that the value that a broadcast var had when it was created is used,
     // even if that variable is then modified in the driver program
     // TODO: This doesn't actually work for arrays when we run in local mode!
-    val output = runInterpreter("local", """
-      var array = new Array[Int](5)
-      val broadcastArray = sc.broadcast(array)
-      sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
-      array(0) = 5
-      sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
-      """)
+    val output = runInterpreter("local",
+      """
+        |var array = new Array[Int](5)
+        |val broadcastArray = sc.broadcast(array)
+        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+        |array(0) = 5
+        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+      """.stripMargin)
     assertDoesNotContain("error:", output)
     assertDoesNotContain("Exception", output)
     assertContains("res0: Array[Int] = Array(0, 0, 0, 0, 0)", output)
     assertContains("res2: Array[Int] = Array(5, 0, 0, 0, 0)", output)
   }
 
-  test ("interacting with files") {
+  test("interacting with files") {
     val tempDir = Files.createTempDir()
     val out = new FileWriter(tempDir + "/input")
     out.write("Hello world!\n")
     out.write("What's up?\n")
     out.write("Goodbye\n")
     out.close()
-    val output = runInterpreter("local", """
-      var file = sc.textFile("%s/input").cache()
-      file.count()
-      file.count()
-      file.count()
-      """.format(tempDir.getAbsolutePath))
+    val output = runInterpreter("local",
+      """
+        |var file = sc.textFile("%s/input").cache()
+        |file.count()
+        |file.count()
+        |file.count()
+      """.stripMargin.format(tempDir.getAbsolutePath))
     assertDoesNotContain("error:", output)
     assertDoesNotContain("Exception", output)
     assertContains("res0: Long = 3", output)
@@ -192,19 +197,20 @@ class ReplSuite extends FunSuite {
     assertContains("res2: Long = 3", output)
   }
 
-  test ("local-cluster mode") {
-    val output = runInterpreter("local-cluster[1,1,512]", """
-      var v = 7
-      def getV() = v
-      sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
-      v = 10
-      sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
-      var array = new Array[Int](5)
-      val broadcastArray = sc.broadcast(array)
-      sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
-      array(0) = 5
-      sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
-      """)
+  test("local-cluster mode") {
+    val output = runInterpreter("local-cluster[1,1,512]",
+      """
+        |var v = 7
+        |def getV() = v
+        |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+        |v = 10
+        |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+        |var array = new Array[Int](5)
+        |val broadcastArray = sc.broadcast(array)
+        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+        |array(0) = 5
+        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+      """.stripMargin)
     assertDoesNotContain("error:", output)
     assertDoesNotContain("Exception", output)
     assertContains("res0: Int = 70", output)
@@ -214,19 +220,20 @@ class ReplSuite extends FunSuite {
   }
 
   if (System.getenv("MESOS_NATIVE_LIBRARY") != null) {
-    test ("running on Mesos") {
-      val output = runInterpreter("localquiet", """
-        var v = 7
-        def getV() = v
-        sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
-        v = 10
-        sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
-        var array = new Array[Int](5)
-        val broadcastArray = sc.broadcast(array)
-        sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
-        array(0) = 5
-        sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
-        """)
+    test("running on Mesos") {
+      val output = runInterpreter("localquiet",
+        """
+          |var v = 7
+          |def getV() = v
+          |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+          |v = 10
+          |sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+          |var array = new Array[Int](5)
+          |val broadcastArray = sc.broadcast(array)
+          |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+          |array(0) = 5
+          |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+        """.stripMargin)
       assertDoesNotContain("error:", output)
       assertDoesNotContain("Exception", output)
       assertContains("res0: Int = 70", output)
diff --git a/bin/slaves.sh b/sbin/slaves.sh
similarity index 97%
rename from bin/slaves.sh
rename to sbin/slaves.sh
index c367c2fd8ed9a..a5bc2183d87f3 100755
--- a/bin/slaves.sh
+++ b/sbin/slaves.sh
@@ -36,10 +36,10 @@ if [ $# -le 0 ]; then
   exit 1
 fi
 
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
+sbin=`dirname "$0"`
+sbin=`cd "$sbin"; pwd`
 
-. "$bin/spark-config.sh"
+. "$sbin/spark-config.sh"
 
 # If the slaves file is specified in the command line,
 # then it takes precedence over the definition in
diff --git a/bin/spark-config.sh b/sbin/spark-config.sh
similarity index 100%
rename from bin/spark-config.sh
rename to sbin/spark-config.sh
diff --git a/bin/spark-daemon.sh b/sbin/spark-daemon.sh
similarity index 96%
rename from bin/spark-daemon.sh
rename to sbin/spark-daemon.sh
index a0c0d44b58d99..2be2b3d7c0933 100755
--- a/bin/spark-daemon.sh
+++ b/sbin/spark-daemon.sh
@@ -37,10 +37,10 @@ if [ $# -le 1 ]; then
   exit 1
 fi
 
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
+sbin=`dirname "$0"`
+sbin=`cd "$sbin"; pwd`
 
-. "$bin/spark-config.sh"
+. "$sbin/spark-config.sh"
 
 # get arguments
 
@@ -147,7 +147,7 @@ case $startStop in
     spark_rotate_log "$log"
     echo starting $command, logging to $log
     cd "$SPARK_PREFIX"
-    nohup nice -n $SPARK_NICENESS "$SPARK_PREFIX"/spark-class $command "$@" >> "$log" 2>&1 < /dev/null &
+    nohup nice -n $SPARK_NICENESS "$SPARK_PREFIX"/bin/spark-class $command "$@" >> "$log" 2>&1 < /dev/null &
     newpid=$!
     echo $newpid > $pid
     sleep 2
diff --git a/bin/spark-daemons.sh b/sbin/spark-daemons.sh
similarity index 88%
rename from bin/spark-daemons.sh
rename to sbin/spark-daemons.sh
index 64286cb2da4f5..5d9f2bb51cae0 100755
--- a/bin/spark-daemons.sh
+++ b/sbin/spark-daemons.sh
@@ -27,9 +27,9 @@ if [ $# -le 1 ]; then
   exit 1
 fi
 
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
+sbin=`dirname "$0"`
+sbin=`cd "$sbin"; pwd`
 
-. "$bin/spark-config.sh"
+. "$sbin/spark-config.sh"
 
-exec "$bin/slaves.sh" cd "$SPARK_HOME" \; "$bin/spark-daemon.sh" "$@"
+exec "$sbin/slaves.sh" cd "$SPARK_HOME" \; "$sbin/spark-daemon.sh" "$@"
diff --git a/spark-executor b/sbin/spark-executor
similarity index 88%
rename from spark-executor
rename to sbin/spark-executor
index 2c07c5484338f..de5bfab563125 100755
--- a/spark-executor
+++ b/sbin/spark-executor
@@ -17,6 +17,7 @@
 # limitations under the License.
 #
 
-FWDIR="`dirname $0`"
+FWDIR="$(cd `dirname $0`/..; pwd)"
+
 echo "Running spark-executor with framework dir = $FWDIR"
-exec $FWDIR/spark-class org.apache.spark.executor.MesosExecutorBackend
+exec $FWDIR/bin/spark-class org.apache.spark.executor.MesosExecutorBackend
diff --git a/bin/start-all.sh b/sbin/start-all.sh
similarity index 89%
rename from bin/start-all.sh
rename to sbin/start-all.sh
index 0182f1ab240b7..2daf49db359df 100755
--- a/bin/start-all.sh
+++ b/sbin/start-all.sh
@@ -21,14 +21,14 @@
 # Starts the master on this node.
 # Starts a worker on each node specified in conf/slaves
 
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
+sbin=`dirname "$0"`
+sbin=`cd "$sbin"; pwd`
 
 # Load the Spark configuration
-. "$bin/spark-config.sh"
+. "$sbin/spark-config.sh"
 
 # Start Master
-"$bin"/start-master.sh
+"$sbin"/start-master.sh
 
 # Start Workers
-"$bin"/start-slaves.sh
+"$sbin"/start-slaves.sh
diff --git a/bin/start-master.sh b/sbin/start-master.sh
similarity index 88%
rename from bin/start-master.sh
rename to sbin/start-master.sh
index 648c7ae75fe81..3dcf7cc3483a7 100755
--- a/bin/start-master.sh
+++ b/sbin/start-master.sh
@@ -19,10 +19,10 @@
 
 # Starts the master on the machine this script is executed on.
 
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
+sbin=`dirname "$0"`
+sbin=`cd "$sbin"; pwd`
 
-. "$bin/spark-config.sh"
+. "$sbin/spark-config.sh"
 
 if [ -f "${SPARK_CONF_DIR}/spark-env.sh" ]; then
   . "${SPARK_CONF_DIR}/spark-env.sh"
@@ -49,4 +49,4 @@ if [ "$SPARK_PUBLIC_DNS" = "" ]; then
     fi
 fi
 
-"$bin"/spark-daemon.sh start org.apache.spark.deploy.master.Master 1 --ip $SPARK_MASTER_IP --port $SPARK_MASTER_PORT --webui-port $SPARK_MASTER_WEBUI_PORT
+"$sbin"/spark-daemon.sh start org.apache.spark.deploy.master.Master 1 --ip $SPARK_MASTER_IP --port $SPARK_MASTER_PORT --webui-port $SPARK_MASTER_WEBUI_PORT
diff --git a/bin/start-slave.sh b/sbin/start-slave.sh
similarity index 92%
rename from bin/start-slave.sh
rename to sbin/start-slave.sh
index 4eefa20944338..524be38c62968 100755
--- a/bin/start-slave.sh
+++ b/sbin/start-slave.sh
@@ -20,8 +20,8 @@
 # Usage: start-slave.sh <worker#> <master-spark-URL>
 #   where <master-spark-URL> is like "spark://localhost:7077"
 
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
+sbin=`dirname "$0"`
+sbin=`cd "$sbin"; pwd`
 
 # Set SPARK_PUBLIC_DNS so slaves can be linked in master web UI
 if [ "$SPARK_PUBLIC_DNS" = "" ]; then
@@ -32,4 +32,4 @@ if [ "$SPARK_PUBLIC_DNS" = "" ]; then
     fi
 fi
 
-"$bin"/spark-daemon.sh start org.apache.spark.deploy.worker.Worker "$@"
+"$sbin"/spark-daemon.sh start org.apache.spark.deploy.worker.Worker "$@"
diff --git a/bin/start-slaves.sh b/sbin/start-slaves.sh
similarity index 78%
rename from bin/start-slaves.sh
rename to sbin/start-slaves.sh
index 00dc4888b2e95..fd5cdeb1e6788 100755
--- a/bin/start-slaves.sh
+++ b/sbin/start-slaves.sh
@@ -17,10 +17,10 @@
 # limitations under the License.
 #
 
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
+sbin=`dirname "$0"`
+sbin=`cd "$sbin"; pwd`
 
-. "$bin/spark-config.sh"
+. "$sbin/spark-config.sh"
 
 if [ -f "${SPARK_CONF_DIR}/spark-env.sh" ]; then
   . "${SPARK_CONF_DIR}/spark-env.sh"
@@ -37,12 +37,12 @@ fi
 
 # Launch the slaves
 if [ "$SPARK_WORKER_INSTANCES" = "" ]; then
-  exec "$bin/slaves.sh" cd "$SPARK_HOME" \; "$bin/start-slave.sh" 1 spark://$SPARK_MASTER_IP:$SPARK_MASTER_PORT
+  exec "$sbin/slaves.sh" cd "$SPARK_HOME" \; "$sbin/start-slave.sh" 1 spark://$SPARK_MASTER_IP:$SPARK_MASTER_PORT
 else
   if [ "$SPARK_WORKER_WEBUI_PORT" = "" ]; then
     SPARK_WORKER_WEBUI_PORT=8081
   fi
   for ((i=0; i<$SPARK_WORKER_INSTANCES; i++)); do
-    "$bin/slaves.sh" cd "$SPARK_HOME" \; "$bin/start-slave.sh" $(( $i + 1 ))  spark://$SPARK_MASTER_IP:$SPARK_MASTER_PORT --webui-port $(( $SPARK_WORKER_WEBUI_PORT + $i ))
+    "$sbin/slaves.sh" cd "$SPARK_HOME" \; "$sbin/start-slave.sh" $(( $i + 1 ))  spark://$SPARK_MASTER_IP:$SPARK_MASTER_PORT --webui-port $(( $SPARK_WORKER_WEBUI_PORT + $i ))
   done
 fi
diff --git a/bin/stop-all.sh b/sbin/stop-all.sh
similarity index 89%
rename from bin/stop-all.sh
rename to sbin/stop-all.sh
index b6c83a7ba4c7d..60b358d374565 100755
--- a/bin/stop-all.sh
+++ b/sbin/stop-all.sh
@@ -21,12 +21,12 @@
 # Run this on the master nde
 
 
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
+sbin=`dirname "$0"`
+sbin=`cd "$sbin"; pwd`
 
 # Load the Spark configuration
-. "$bin/spark-config.sh"
+. "$sbin/spark-config.sh"
 
 # Stop the slaves, then the master
-"$bin"/stop-slaves.sh
-"$bin"/stop-master.sh
+"$sbin"/stop-slaves.sh
+"$sbin"/stop-master.sh
diff --git a/bin/stop-master.sh b/sbin/stop-master.sh
similarity index 86%
rename from bin/stop-master.sh
rename to sbin/stop-master.sh
index 310e33bedc057..2adabd426563c 100755
--- a/bin/stop-master.sh
+++ b/sbin/stop-master.sh
@@ -19,9 +19,9 @@
 
 # Starts the master on the machine this script is executed on.
 
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
+sbin=`dirname "$0"`
+sbin=`cd "$sbin"; pwd`
 
-. "$bin/spark-config.sh"
+. "$sbin/spark-config.sh"
 
-"$bin"/spark-daemon.sh stop org.apache.spark.deploy.master.Master 1
+"$sbin"/spark-daemon.sh stop org.apache.spark.deploy.master.Master 1
diff --git a/bin/stop-slaves.sh b/sbin/stop-slaves.sh
similarity index 81%
rename from bin/stop-slaves.sh
rename to sbin/stop-slaves.sh
index fcb8555d4e570..eb803b4900347 100755
--- a/bin/stop-slaves.sh
+++ b/sbin/stop-slaves.sh
@@ -17,19 +17,19 @@
 # limitations under the License.
 #
 
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
+sbin=`dirname "$0"`
+sbin=`cd "$sbin"; pwd`
 
-. "$bin/spark-config.sh"
+. "$sbin/spark-config.sh"
 
 if [ -f "${SPARK_CONF_DIR}/spark-env.sh" ]; then
   . "${SPARK_CONF_DIR}/spark-env.sh"
 fi
 
 if [ "$SPARK_WORKER_INSTANCES" = "" ]; then
-  "$bin"/spark-daemons.sh stop org.apache.spark.deploy.worker.Worker 1
+  "$sbin"/spark-daemons.sh stop org.apache.spark.deploy.worker.Worker 1
 else
   for ((i=0; i<$SPARK_WORKER_INSTANCES; i++)); do
-    "$bin"/spark-daemons.sh stop org.apache.spark.deploy.worker.Worker $(( $i + 1 ))
+    "$sbin"/spark-daemons.sh stop org.apache.spark.deploy.worker.Worker $(( $i + 1 ))
   done
 fi
diff --git a/sbt/sbt b/sbt/sbt
index c31a0280ff9de..62ead8a69dbf6 100755
--- a/sbt/sbt
+++ b/sbt/sbt
@@ -1,4 +1,4 @@
-#!/usr/bin/env bash
+#!/bin/bash
 
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
@@ -17,12 +17,34 @@
 # limitations under the License.
 #
 
-EXTRA_ARGS=""
-if [ "$MESOS_HOME" != "" ]; then
-  EXTRA_ARGS="-Djava.library.path=$MESOS_HOME/lib/java"
-fi
-
-export SPARK_HOME=$(cd "$(dirname $0)/.." 2>&1 >/dev/null ; pwd)
-export SPARK_TESTING=1  # To put test classes on classpath
+# This script launches sbt for this project. If present it uses the system 
+# version of sbt. If there is no system version of sbt it attempts to download
+# sbt locally.
+SBT_VERSION=`awk -F "=" '/sbt\\.version/ {print $2}' ./project/build.properties`
+URL1=http://typesafe.artifactoryonline.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar
+URL2=http://repo.typesafe.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar
+JAR=sbt/sbt-launch-${SBT_VERSION}.jar
 
-java -Xmx1200m -XX:MaxPermSize=350m -XX:ReservedCodeCacheSize=256m $EXTRA_ARGS $SBT_OPTS -jar "$SPARK_HOME"/sbt/sbt-launch-*.jar "$@"
+# Download sbt launch jar if it hasn't been downloaded yet
+if [ ! -f ${JAR} ]; then
+  # Download
+  printf "Attempting to fetch sbt\n"
+  if hash curl 2>/dev/null; then
+    curl --progress-bar ${URL1} > ${JAR} || curl --progress-bar ${URL2} > ${JAR}
+  elif hash wget 2>/dev/null; then
+    wget --progress=bar ${URL1} -O ${JAR} || wget --progress=bar ${URL2} -O ${JAR}
+  else
+    printf "You do not have curl or wget installed, please install sbt manually from http://www.scala-sbt.org/\n"
+    exit -1
+  fi
+fi
+if [ ! -f ${JAR} ]; then
+  # We failed to download
+  printf "Our attempt to download sbt locally to ${JAR} failed. Please install sbt manually from http://www.scala-sbt.org/\n"
+  exit -1
+fi
+printf "Launching sbt from ${JAR}\n"
+java \
+  -Xmx1200m -XX:MaxPermSize=350m -XX:ReservedCodeCacheSize=256m \
+  -jar ${JAR} \
+  "$@"
diff --git a/sbt/sbt-launch-0.11.3-2.jar b/sbt/sbt-launch-0.11.3-2.jar
deleted file mode 100644
index 23e5c3f31149b..0000000000000
Binary files a/sbt/sbt-launch-0.11.3-2.jar and /dev/null differ
diff --git a/sbt/sbt.cmd b/sbt/sbt.cmd
deleted file mode 100644
index 681fe00f92108..0000000000000
--- a/sbt/sbt.cmd
+++ /dev/null
@@ -1,25 +0,0 @@
-@echo off
-
-rem
-rem Licensed to the Apache Software Foundation (ASF) under one or more
-rem contributor license agreements.  See the NOTICE file distributed with
-rem this work for additional information regarding copyright ownership.
-rem The ASF licenses this file to You under the Apache License, Version 2.0
-rem (the "License"); you may not use this file except in compliance with
-rem the License.  You may obtain a copy of the License at
-rem
-rem    http://www.apache.org/licenses/LICENSE-2.0
-rem
-rem Unless required by applicable law or agreed to in writing, software
-rem distributed under the License is distributed on an "AS IS" BASIS,
-rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-rem See the License for the specific language governing permissions and
-rem limitations under the License.
-rem
-
-set EXTRA_ARGS=
-if not "%MESOS_HOME%x"=="x" set EXTRA_ARGS=-Djava.library.path=%MESOS_HOME%\lib\java
-
-set SPARK_HOME=%~dp0..
-
-java -Xmx1200M -XX:MaxPermSize=200m -XX:ReservedCodeCacheSize=256m %EXTRA_ARGS% -jar %SPARK_HOME%\sbt\sbt-launch-0.11.3-2.jar "%*"
diff --git a/streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.jar b/streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.jar
deleted file mode 100644
index 65f79925a4d06..0000000000000
Binary files a/streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.jar and /dev/null differ
diff --git a/streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.jar.md5 b/streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.jar.md5
deleted file mode 100644
index 29f45f4adb697..0000000000000
--- a/streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.jar.md5
+++ /dev/null
@@ -1 +0,0 @@
-18876b8bc2e4cef28b6d191aa49d963f
\ No newline at end of file
diff --git a/streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.jar.sha1 b/streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.jar.sha1
deleted file mode 100644
index e3bd62bac038f..0000000000000
--- a/streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-06b27270ffa52250a2c08703b397c99127b72060
\ No newline at end of file
diff --git a/streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.pom b/streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.pom
deleted file mode 100644
index 082d35726a5af..0000000000000
--- a/streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.pom
+++ /dev/null
@@ -1,9 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns="http://maven.apache.org/POM/4.0.0"
-    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
-  <modelVersion>4.0.0</modelVersion>
-  <groupId>org.apache.kafka</groupId>
-  <artifactId>kafka</artifactId>
-  <version>0.7.2-spark</version>
-  <description>POM was created from install:install-file</description>
-</project>
diff --git a/streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.pom.md5 b/streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.pom.md5
deleted file mode 100644
index 92c4132b5b01c..0000000000000
--- a/streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.pom.md5
+++ /dev/null
@@ -1 +0,0 @@
-7bc4322266e6032bdf9ef6eebdd8097d
\ No newline at end of file
diff --git a/streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.pom.sha1 b/streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.pom.sha1
deleted file mode 100644
index 8a1d8a097a113..0000000000000
--- a/streaming/lib/org/apache/kafka/kafka/0.7.2-spark/kafka-0.7.2-spark.pom.sha1
+++ /dev/null
@@ -1 +0,0 @@
-d0f79e8eff0db43ca7bcf7dce2c8cd2972685c9d
\ No newline at end of file
diff --git a/streaming/lib/org/apache/kafka/kafka/maven-metadata-local.xml b/streaming/lib/org/apache/kafka/kafka/maven-metadata-local.xml
deleted file mode 100644
index 720cd51c2f5e6..0000000000000
--- a/streaming/lib/org/apache/kafka/kafka/maven-metadata-local.xml
+++ /dev/null
@@ -1,12 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<metadata>
-  <groupId>org.apache.kafka</groupId>
-  <artifactId>kafka</artifactId>
-  <versioning>
-    <release>0.7.2-spark</release>
-    <versions>
-      <version>0.7.2-spark</version>
-    </versions>
-    <lastUpdated>20130121015225</lastUpdated>
-  </versioning>
-</metadata>
diff --git a/streaming/lib/org/apache/kafka/kafka/maven-metadata-local.xml.md5 b/streaming/lib/org/apache/kafka/kafka/maven-metadata-local.xml.md5
deleted file mode 100644
index a4ce5dc9e8d9b..0000000000000
--- a/streaming/lib/org/apache/kafka/kafka/maven-metadata-local.xml.md5
+++ /dev/null
@@ -1 +0,0 @@
-e2b9c7c5f6370dd1d21a0aae5e8dcd77
\ No newline at end of file
diff --git a/streaming/lib/org/apache/kafka/kafka/maven-metadata-local.xml.sha1 b/streaming/lib/org/apache/kafka/kafka/maven-metadata-local.xml.sha1
deleted file mode 100644
index b869eaf2a61de..0000000000000
--- a/streaming/lib/org/apache/kafka/kafka/maven-metadata-local.xml.sha1
+++ /dev/null
@@ -1 +0,0 @@
-2a4341da936b6c07a09383d17ffb185ac558ee91
\ No newline at end of file
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 134a226aad445..e5057e84fb0c1 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,40 +21,34 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>0.8.2-candidate-csd-3-SNAPSHOT</version>
+    <version>0.9.1-csd-1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
   <groupId>org.apache.spark</groupId>
-  <artifactId>spark-streaming_2.9.3</artifactId>
+  <artifactId>spark-streaming_2.10</artifactId>
   <packaging>jar</packaging>
   <name>Spark Project Streaming</name>
   <url>http://spark.incubator.apache.org/</url>
 
   <repositories>
-    <!-- A repository in the local filesystem for the Kafka JAR, which we modified for Scala 2.9 -->
     <repository>
-      <id>lib</id>
-      <url>file://${project.basedir}/lib</url>
+      <id>apache-repo</id>
+      <name>Apache Repository</name>
+      <url>https://repository.apache.org/content/repositories/releases</url>
+      <releases>
+        <enabled>true</enabled>
+      </releases>
+      <snapshots>
+        <enabled>false</enabled>
+      </snapshots>
     </repository>
   </repositories>
 
   <dependencies>
-    <dependency>
-      <groupId>${akka.group}</groupId>
-      <artifactId>akka-actor</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>${akka.group}</groupId>
-      <artifactId>akka-remote</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>${akka.group}</groupId>
-      <artifactId>akka-slf4j</artifactId>
-    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_2.9.3</artifactId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
@@ -66,49 +60,18 @@
       <artifactId>jackson-mapper-asl</artifactId>
       <version>1.9.11</version>
     </dependency>
-    <dependency>
-      <groupId>org.apache.kafka</groupId>
-      <artifactId>kafka</artifactId>
-      <version>0.7.2-spark</version>  <!-- Comes from our in-project repository -->
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.flume</groupId>
-      <artifactId>flume-ng-sdk</artifactId>
-      <version>1.2.0</version>
-      <exclusions>
-        <exclusion>
-          <groupId>org.jboss.netty</groupId>
-          <artifactId>netty</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-    <dependency>
-      <groupId>com.github.sgroschupf</groupId>
-      <artifactId>zkclient</artifactId>
-      <version>0.1</version>
-    </dependency>
-    <dependency>
-      <groupId>org.twitter4j</groupId>
-      <artifactId>twitter4j-stream</artifactId>
-      <version>3.0.3</version>
-    </dependency>
     <dependency>
       <groupId>org.scala-lang</groupId>
       <artifactId>scala-library</artifactId>
     </dependency>
-    <dependency>
-      <groupId>${akka.group}</groupId>
-      <artifactId>akka-zeromq</artifactId>
-    </dependency>
     <dependency>
       <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_2.9.3</artifactId>
+      <artifactId>scalatest_${scala.binary.version}</artifactId>
       <scope>test</scope>
     </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>
-      <artifactId>scalacheck_2.9.3</artifactId>
+      <artifactId>scalacheck_${scala.binary.version}</artifactId>
       <scope>test</scope>
     </dependency>
     <dependency>
@@ -121,15 +84,48 @@
       <artifactId>slf4j-log4j12</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>commons-io</groupId>
+      <artifactId>commons-io</artifactId>
+    </dependency>
   </dependencies>
   <build>
-    <outputDirectory>target/scala-${scala.version}/classes</outputDirectory>
-    <testOutputDirectory>target/scala-${scala.version}/test-classes</testOutputDirectory>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
     <plugins>
       <plugin>
         <groupId>org.scalatest</groupId>
         <artifactId>scalatest-maven-plugin</artifactId>
       </plugin>
+      
+      <!-- 
+           This plugin forces the generation of jar containing streaming test classes, 
+           so that the tests classes of external modules can use them. The two execution profiles
+           are necessary - first one for 'mvn package', second one for 'mvn compile'. Ideally, 
+           'mvn compile' should not compile test classes and therefore should not need this. 
+           However, an open Maven bug (http://jira.codehaus.org/browse/MNG-3559)
+           causes the compilation to fail if streaming test-jar is not generated. Hence, the 
+           second execution profile for 'mvn compile'.
+      -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-jar-plugin</artifactId>
+        <version>2.2</version>
+        <executions>
+          <execution>
+            <goals>
+              <goal>test-jar</goal>
+            </goals>
+          </execution>
+          <execution>
+            <id>test-jar-on-compile</id>
+            <phase>compile</phase>
+            <goals>
+              <goal>test-jar</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
     </plugins>
   </build>
 </project>
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
index 74069860c0e6e..4d778dc4d43b4 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
@@ -21,11 +21,13 @@ import java.io._
 import java.util.concurrent.Executors
 import java.util.concurrent.RejectedExecutionException
 
-import org.apache.hadoop.fs.Path
+import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.hadoop.conf.Configuration
 
-import org.apache.spark.Logging
+import org.apache.spark.{SparkException, SparkConf, Logging}
 import org.apache.spark.io.CompressionCodec
+import org.apache.spark.util.MetadataCleaner
+import org.apache.spark.streaming.scheduler.JobGenerator
 
 
 private[streaming]
@@ -33,13 +35,20 @@ class Checkpoint(@transient ssc: StreamingContext, val checkpointTime: Time)
   extends Logging with Serializable {
   val master = ssc.sc.master
   val framework = ssc.sc.appName
-  val sparkHome = ssc.sc.sparkHome
+  val sparkHome = ssc.sc.getSparkHome.getOrElse(null)
   val jars = ssc.sc.jars
-  val environment = ssc.sc.environment
   val graph = ssc.graph
   val checkpointDir = ssc.checkpointDir
   val checkpointDuration = ssc.checkpointDuration
-  val pendingTimes = ssc.scheduler.jobManager.getPendingTimes()
+  val pendingTimes = ssc.scheduler.getPendingTimes().toArray
+  val delaySeconds = MetadataCleaner.getDelaySeconds(ssc.conf)
+  val sparkConfPairs = ssc.conf.getAll
+
+  def sparkConf = {
+    new SparkConf(false).setAll(sparkConfPairs)
+      .remove("spark.driver.host")
+      .remove("spark.driver.port")
+  }
 
   def validate() {
     assert(master != null, "Checkpoint.master is null")
@@ -50,58 +59,119 @@ class Checkpoint(@transient ssc: StreamingContext, val checkpointTime: Time)
   }
 }
 
+private[streaming]  
+object Checkpoint extends Logging {
+  val PREFIX = "checkpoint-"
+  val REGEX = (PREFIX + """([\d]+)([\w\.]*)""").r
 
-/**
- * Convenience class to speed up the writing of graph checkpoint to file
- */
-private[streaming]
-class CheckpointWriter(checkpointDir: String) extends Logging {
-  val file = new Path(checkpointDir, "graph")
-  // The file to which we actually write - and then "move" to file.
-  private val writeFile = new Path(file.getParent, file.getName + ".next")
-  private val bakFile = new Path(file.getParent, file.getName + ".bk")
+  /** Get the checkpoint file for the given checkpoint time */
+  def checkpointFile(checkpointDir: String, checkpointTime: Time) = {
+    new Path(checkpointDir, PREFIX + checkpointTime.milliseconds)
+  }
 
-  private var stopped = false
+  /** Get the checkpoint backup file for the given checkpoint time */
+  def checkpointBackupFile(checkpointDir: String, checkpointTime: Time) = {
+    new Path(checkpointDir, PREFIX + checkpointTime.milliseconds + ".bk")
+  }
 
-  val conf = new Configuration()
-  var fs = file.getFileSystem(conf)
-  val maxAttempts = 3
-  val executor = Executors.newFixedThreadPool(1)
+  /** Get checkpoint files present in the give directory, ordered by oldest-first */
+  def getCheckpointFiles(checkpointDir: String, fs: FileSystem): Seq[Path] = {
+    def sortFunc(path1: Path, path2: Path): Boolean = {
+      val (time1, bk1) = path1.getName match { case REGEX(x, y) => (x.toLong, !y.isEmpty) }
+      val (time2, bk2) = path2.getName match { case REGEX(x, y) => (x.toLong, !y.isEmpty) }
+      (time1 < time2) || (time1 == time2 && bk1) 
+    }
 
-  private val compressionCodec = CompressionCodec.createCodec()
+    val path = new Path(checkpointDir)
+    if (fs.exists(path)) {
+      val statuses = fs.listStatus(path)
+      if (statuses != null) {
+        val paths = statuses.map(_.getPath)
+        val filtered = paths.filter(p => REGEX.findFirstIn(p.toString).nonEmpty)
+        filtered.sortWith(sortFunc)
+      } else {
+        logWarning("Listing " + path + " returned null")
+        Seq.empty
+      }
+    } else {
+      logInfo("Checkpoint directory " + path + " does not exist")
+      Seq.empty 
+    }
+  }
+}
 
-  // Removed code which validates whether there is only one CheckpointWriter per path 'file' since 
-  // I did not notice any errors - reintroduce it ?
+
+/**
+ * Convenience class to handle the writing of graph checkpoint to file
+ */
+private[streaming]
+class CheckpointWriter(
+    jobGenerator: JobGenerator,
+    conf: SparkConf,
+    checkpointDir: String,
+    hadoopConf: Configuration
+  ) extends Logging {
+  val MAX_ATTEMPTS = 3
+  val executor = Executors.newFixedThreadPool(1)
+  val compressionCodec = CompressionCodec.createCodec(conf)
+  private var stopped = false
+  private var fs_ : FileSystem = _
 
   class CheckpointWriteHandler(checkpointTime: Time, bytes: Array[Byte]) extends Runnable {
     def run() {
       var attempts = 0
       val startTime = System.currentTimeMillis()
-      while (attempts < maxAttempts) {
+      val tempFile = new Path(checkpointDir, "temp")
+      val checkpointFile = Checkpoint.checkpointFile(checkpointDir, checkpointTime)
+      val backupFile = Checkpoint.checkpointBackupFile(checkpointDir, checkpointTime)
+
+      while (attempts < MAX_ATTEMPTS && !stopped) {
         attempts += 1
         try {
-          logDebug("Saving checkpoint for time " + checkpointTime + " to file '" + file + "'")
-          // This is inherently thread unsafe .. so alleviating it by writing to '.new' and then doing moves : which should be pretty fast.
-          val fos = fs.create(writeFile)
+          logInfo("Saving checkpoint for time " + checkpointTime + " to file '" + checkpointFile + "'")
+
+          // Write checkpoint to temp file
+          fs.delete(tempFile, true)   // just in case it exists
+          val fos = fs.create(tempFile)
           fos.write(bytes)
           fos.close()
-          if (fs.exists(file) && fs.rename(file, bakFile)) {
-            logDebug("Moved existing checkpoint file to " + bakFile)
+
+          // If the checkpoint file exists, back it up
+          // If the backup exists as well, just delete it, otherwise rename will fail
+          if (fs.exists(checkpointFile)) {
+            fs.delete(backupFile, true) // just in case it exists
+            if (!fs.rename(checkpointFile, backupFile)) {
+              logWarning("Could not rename " + checkpointFile + " to " + backupFile)
+            }
+          }
+
+          // Rename temp file to the final checkpoint file
+          if (!fs.rename(tempFile, checkpointFile)) {
+            logWarning("Could not rename " + tempFile + " to " + checkpointFile)
+          }
+
+          // Delete old checkpoint files
+          val allCheckpointFiles = Checkpoint.getCheckpointFiles(checkpointDir, fs)
+          if (allCheckpointFiles.size > 4) {
+            allCheckpointFiles.take(allCheckpointFiles.size - 10).foreach(file => {
+              logInfo("Deleting " + file)
+              fs.delete(file, true)
+            })
           }
-          // paranoia
-          fs.delete(file, false)
-          fs.rename(writeFile, file)
 
+          // All done, print success 
           val finishTime = System.currentTimeMillis()
-          logInfo("Checkpoint for time " + checkpointTime + " saved to file '" + file +
-            "', took " + bytes.length + " bytes and " + (finishTime - startTime) + " milliseconds")
+          logInfo("Checkpoint for time " + checkpointTime + " saved to file '" + checkpointFile +
+            "', took " + bytes.length + " bytes and " + (finishTime - startTime) + " ms")
+          jobGenerator.onCheckpointCompletion(checkpointTime)
           return
         } catch {
           case ioe: IOException =>
-            logWarning("Error writing checkpoint to file in " + attempts + " attempts", ioe)
+            logWarning("Error in attempt " + attempts + " of writing checkpoint to " + checkpointFile, ioe)
+            reset()
         }
       }
-      logError("Could not write checkpoint for time " + checkpointTime + " to file '" + file + "'")
+      logWarning("Could not write checkpoint for time " + checkpointTime + " to file " + checkpointFile + "'")
     }
   }
 
@@ -114,6 +184,7 @@ class CheckpointWriter(checkpointDir: String) extends Logging {
     bos.close()
     try {
       executor.execute(new CheckpointWriteHandler(checkpoint.checkpointTime, bos.toByteArray))
+      logDebug("Submitted checkpoint of time " + checkpoint.checkpointTime + " writer queue")
     } catch {
       case rej: RejectedExecutionException =>
         logError("Could not submit checkpoint task to the thread pool executor", rej)
@@ -131,7 +202,17 @@ class CheckpointWriter(checkpointDir: String) extends Logging {
     val startTime = System.currentTimeMillis()
     val terminated = executor.awaitTermination(10, java.util.concurrent.TimeUnit.SECONDS)
     val endTime = System.currentTimeMillis()
-    logInfo("CheckpointWriter executor terminated ? " + terminated + ", waited for " + (endTime - startTime) + " ms.")
+    logInfo("CheckpointWriter executor terminated ? " + terminated +
+      ", waited for " + (endTime - startTime) + " ms.")
+  }
+
+  private def fs = synchronized {
+    if (fs_ == null) fs_ = new Path(checkpointDir).getFileSystem(hadoopConf)
+    fs_
+  }
+
+  private def reset() = synchronized {
+    fs_ = null
   }
 }
 
@@ -139,41 +220,46 @@ class CheckpointWriter(checkpointDir: String) extends Logging {
 private[streaming]
 object CheckpointReader extends Logging {
 
-  def read(path: String): Checkpoint = {
-    val fs = new Path(path).getFileSystem(new Configuration())
-    val attempts = Seq(new Path(path, "graph"), new Path(path, "graph.bk"), new Path(path), new Path(path + ".bk"))
-
-    val compressionCodec = CompressionCodec.createCodec()
+  def read(checkpointDir: String, conf: SparkConf, hadoopConf: Configuration): Option[Checkpoint] = {
+    val checkpointPath = new Path(checkpointDir)
+    def fs = checkpointPath.getFileSystem(hadoopConf)
+    
+    // Try to find the checkpoint files 
+    val checkpointFiles = Checkpoint.getCheckpointFiles(checkpointDir, fs).reverse
+    if (checkpointFiles.isEmpty) {
+      return None
+    }
 
-    attempts.foreach(file => {
-      if (fs.exists(file)) {
-        logInfo("Attempting to load checkpoint from file '" + file + "'")
-        try {
-          val fis = fs.open(file)
-          // ObjectInputStream uses the last defined user-defined class loader in the stack
-          // to find classes, which maybe the wrong class loader. Hence, a inherited version
-          // of ObjectInputStream is used to explicitly use the current thread's default class
-          // loader to find and load classes. This is a well know Java issue and has popped up
-          // in other places (e.g., http://jira.codehaus.org/browse/GROOVY-1627)
-          val zis = compressionCodec.compressedInputStream(fis)
-          val ois = new ObjectInputStreamWithLoader(zis, Thread.currentThread().getContextClassLoader)
-          val cp = ois.readObject.asInstanceOf[Checkpoint]
-          ois.close()
-          fs.close()
-          cp.validate()
-          logInfo("Checkpoint successfully loaded from file '" + file + "'")
-          logInfo("Checkpoint was generated at time " + cp.checkpointTime)
-          return cp
-        } catch {
-          case e: Exception =>
-            logError("Error loading checkpoint from file '" + file + "'", e)
-        }
-      } else {
-        logWarning("Could not read checkpoint from file '" + file + "' as it does not exist")
+    // Try to read the checkpoint files in the order  
+    logInfo("Checkpoint files found: " + checkpointFiles.mkString(","))
+    val compressionCodec = CompressionCodec.createCodec(conf)
+    checkpointFiles.foreach(file => {
+      logInfo("Attempting to load checkpoint from file " + file)
+      try {
+        val fis = fs.open(file)
+        // ObjectInputStream uses the last defined user-defined class loader in the stack
+        // to find classes, which maybe the wrong class loader. Hence, a inherited version
+        // of ObjectInputStream is used to explicitly use the current thread's default class
+        // loader to find and load classes. This is a well know Java issue and has popped up
+        // in other places (e.g., http://jira.codehaus.org/browse/GROOVY-1627)
+        val zis = compressionCodec.compressedInputStream(fis)
+        val ois = new ObjectInputStreamWithLoader(zis,
+          Thread.currentThread().getContextClassLoader)
+        val cp = ois.readObject.asInstanceOf[Checkpoint]
+        ois.close()
+        fs.close()
+        cp.validate()
+        logInfo("Checkpoint successfully loaded from file " + file)
+        logInfo("Checkpoint was generated at time " + cp.checkpointTime)
+        return Some(cp)
+      } catch {
+        case e: Exception =>
+          logWarning("Error reading checkpoint from file " + file, e)
       }
-
     })
-    throw new Exception("Could not read checkpoint from path '" + path + "'")
+
+    // If none of checkpoint files could be read, then throw exception
+    throw new SparkException("Failed to read checkpoint from directory " + checkpointPath)
   }
 }
 
@@ -187,6 +273,6 @@ class ObjectInputStreamWithLoader(inputStream_ : InputStream, loader: ClassLoade
     } catch {
       case e: Exception =>
     }
-    return super.resolveClass(desc)
+    super.resolveClass(desc)
   }
 }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ContextWaiter.scala b/streaming/src/main/scala/org/apache/spark/streaming/ContextWaiter.scala
new file mode 100644
index 0000000000000..86753360a07e4
--- /dev/null
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ContextWaiter.scala
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming
+
+private[streaming] class ContextWaiter {
+  private var error: Throwable = null
+  private var stopped: Boolean = false
+
+  def notifyError(e: Throwable) = synchronized {
+    error = e
+    notifyAll()
+  }
+
+  def notifyStop() = synchronized {
+    notifyAll()
+  }
+
+  def waitForStopOrError(timeout: Long = -1) = synchronized {
+    // If already had error, then throw it
+    if (error != null) {
+      throw error
+    }
+
+    // If not already stopped, then wait
+    if (!stopped) {
+      if (timeout < 0) wait() else wait(timeout)
+      if (error != null) throw error
+    }
+  }
+}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/DStreamCheckpointData.scala b/streaming/src/main/scala/org/apache/spark/streaming/DStreamCheckpointData.scala
deleted file mode 100644
index 58a0da2870541..0000000000000
--- a/streaming/src/main/scala/org/apache/spark/streaming/DStreamCheckpointData.scala
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.streaming
-
-import org.apache.hadoop.fs.Path
-import org.apache.hadoop.fs.FileSystem
-import org.apache.hadoop.conf.Configuration
-import collection.mutable.HashMap
-import org.apache.spark.Logging
-
-
-
-private[streaming]
-class DStreamCheckpointData[T: ClassManifest] (dstream: DStream[T])
-  extends Serializable with Logging {
-  protected val data = new HashMap[Time, AnyRef]()
-
-  @transient private var fileSystem : FileSystem = null
-  @transient private var lastCheckpointFiles: HashMap[Time, String] = null
-
-  protected[streaming] def checkpointFiles = data.asInstanceOf[HashMap[Time, String]]
-
-  /**
-   * Updates the checkpoint data of the DStream. This gets called every time
-   * the graph checkpoint is initiated. Default implementation records the
-   * checkpoint files to which the generate RDDs of the DStream has been saved.
-   */
-  def update() {
-
-    // Get the checkpointed RDDs from the generated RDDs
-    val newCheckpointFiles = dstream.generatedRDDs.filter(_._2.getCheckpointFile.isDefined)
-                                       .map(x => (x._1, x._2.getCheckpointFile.get))
-
-    // Make a copy of the existing checkpoint data (checkpointed RDDs)
-    lastCheckpointFiles = checkpointFiles.clone()
-
-    // If the new checkpoint data has checkpoints then replace existing with the new one
-    if (newCheckpointFiles.size > 0) {
-      checkpointFiles.clear()
-      checkpointFiles ++= newCheckpointFiles
-    }
-
-    // TODO: remove this, this is just for debugging
-    newCheckpointFiles.foreach {
-      case (time, data) => { logInfo("Added checkpointed RDD for time " + time + " to stream checkpoint") }
-    }
-  }
-
-  /**
-   * Cleanup old checkpoint data. This gets called every time the graph
-   * checkpoint is initiated, but after `update` is called. Default
-   * implementation, cleans up old checkpoint files.
-   */
-  def cleanup() {
-    // If there is at least on checkpoint file in the current checkpoint files,
-    // then delete the old checkpoint files.
-    if (checkpointFiles.size > 0 && lastCheckpointFiles != null) {
-      (lastCheckpointFiles -- checkpointFiles.keySet).foreach {
-        case (time, file) => {
-          try {
-            val path = new Path(file)
-            if (fileSystem == null) {
-              fileSystem = path.getFileSystem(new Configuration())
-            }
-            fileSystem.delete(path, true)
-            logInfo("Deleted checkpoint file '" + file + "' for time " + time)
-          } catch {
-            case e: Exception =>
-              logWarning("Error deleting old checkpoint file '" + file + "' for time " + time, e)
-          }
-        }
-      }
-    }
-  }
-
-  /**
-   * Restore the checkpoint data. This gets called once when the DStream graph
-   * (along with its DStreams) are being restored from a graph checkpoint file.
-   * Default implementation restores the RDDs from their checkpoint files.
-   */
-  def restore() {
-    // Create RDDs from the checkpoint data
-    checkpointFiles.foreach {
-      case(time, file) => {
-        logInfo("Restoring checkpointed RDD for time " + time + " from file '" + file + "'")
-        dstream.generatedRDDs += ((time, dstream.context.sparkContext.checkpointFile[T](file)))
-      }
-    }
-  }
-
-  override def toString() = {
-    "[\n" + checkpointFiles.size + " checkpoint files \n" + checkpointFiles.mkString("\n") + "\n]"
-  }
-}
-
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala b/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
index b9a58fded6761..0683113bd0b51 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
@@ -17,13 +17,13 @@
 
 package org.apache.spark.streaming
 
-import dstream.InputDStream
+import scala.collection.mutable.ArrayBuffer
 import java.io.{ObjectInputStream, IOException, ObjectOutputStream}
-import collection.mutable.ArrayBuffer
 import org.apache.spark.Logging
+import org.apache.spark.streaming.scheduler.Job
+import org.apache.spark.streaming.dstream.{DStream, NetworkInputDStream, InputDStream}
 
 final private[streaming] class DStreamGraph extends Serializable with Logging {
-  initLogging()
 
   private val inputStreams = new ArrayBuffer[InputDStream[_]]()
   private val outputStreams = new ArrayBuffer[DStream[_]]()
@@ -78,7 +78,7 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
   def remember(duration: Duration) {
     this.synchronized {
       if (rememberDuration != null) {
-        throw new Exception("Batch duration already set as " + batchDuration +
+        throw new Exception("Remember duration already set as " + batchDuration +
           ". cannot set it again.")
       }
       rememberDuration = duration
@@ -103,37 +103,51 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
 
   def getOutputStreams() = this.synchronized { outputStreams.toArray }
 
+  def getNetworkInputStreams() = this.synchronized {
+    inputStreams.filter(_.isInstanceOf[NetworkInputDStream[_]])
+      .map(_.asInstanceOf[NetworkInputDStream[_]])
+      .toArray
+  }
+
   def generateJobs(time: Time): Seq[Job] = {
-    this.synchronized {
-      logInfo("Generating jobs for time " + time)
-      val jobs = outputStreams.flatMap(outputStream => outputStream.generateJob(time))
-      logInfo("Generated " + jobs.length + " jobs for time " + time)
-      jobs
+    logDebug("Generating jobs for time " + time)
+    val jobs = this.synchronized {
+      outputStreams.flatMap(outputStream => outputStream.generateJob(time))
     }
+    logDebug("Generated " + jobs.length + " jobs for time " + time)
+    jobs
   }
 
-  def clearOldMetadata(time: Time) {
+  def clearMetadata(time: Time) {
+    logDebug("Clearing metadata for time " + time)
     this.synchronized {
-      logInfo("Clearing old metadata for time " + time)
-      outputStreams.foreach(_.clearOldMetadata(time))
-      logInfo("Cleared old metadata for time " + time)
+      outputStreams.foreach(_.clearMetadata(time))
     }
+    logDebug("Cleared old metadata for time " + time)
   }
 
   def updateCheckpointData(time: Time) {
+    logInfo("Updating checkpoint data for time " + time)
     this.synchronized {
-      logInfo("Updating checkpoint data for time " + time)
       outputStreams.foreach(_.updateCheckpointData(time))
-      logInfo("Updated checkpoint data for time " + time)
     }
+    logInfo("Updated checkpoint data for time " + time)
+  }
+
+  def clearCheckpointData(time: Time) {
+    logInfo("Clearing checkpoint data for time " + time)
+    this.synchronized {
+      outputStreams.foreach(_.clearCheckpointData(time))
+    }
+    logInfo("Cleared checkpoint data for time " + time)
   }
 
   def restoreCheckpointData() {
+    logInfo("Restoring checkpoint data")
     this.synchronized {
-      logInfo("Restoring checkpoint data")
       outputStreams.foreach(_.restoreCheckpointData())
-      logInfo("Restored checkpoint data")
     }
+    logInfo("Restored checkpoint data")
   }
 
   def validate() {
@@ -146,18 +160,20 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
 
   @throws(classOf[IOException])
   private def writeObject(oos: ObjectOutputStream) {
+    logDebug("DStreamGraph.writeObject used")
     this.synchronized {
-      logDebug("DStreamGraph.writeObject used")
       checkpointInProgress = true
+      logDebug("Enabled checkpoint mode")
       oos.defaultWriteObject()
       checkpointInProgress = false
+      logDebug("Disabled checkpoint mode")
     }
   }
 
   @throws(classOf[IOException])
   private def readObject(ois: ObjectInputStream) {
+    logDebug("DStreamGraph.readObject used")
     this.synchronized {
-      logDebug("DStreamGraph.readObject used")
       checkpointInProgress = true
       ois.defaultReadObject()
       checkpointInProgress = false
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/JobManager.scala b/streaming/src/main/scala/org/apache/spark/streaming/JobManager.scala
deleted file mode 100644
index 5233129506f9e..0000000000000
--- a/streaming/src/main/scala/org/apache/spark/streaming/JobManager.scala
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.streaming
-
-import org.apache.spark.Logging
-import org.apache.spark.SparkEnv
-import java.util.concurrent.Executors
-import collection.mutable.HashMap
-import collection.mutable.ArrayBuffer
-
-
-private[streaming]
-class JobManager(ssc: StreamingContext, numThreads: Int = 1) extends Logging {
-  
-  class JobHandler(ssc: StreamingContext, job: Job) extends Runnable {
-    def run() {
-      SparkEnv.set(ssc.env)
-      try {
-        val timeTaken = job.run()
-        logInfo("Total delay: %.5f s for job %s of time %s (execution: %.5f s)".format(
-          (System.currentTimeMillis() - job.time.milliseconds) / 1000.0, job.id, job.time.milliseconds, timeTaken / 1000.0))
-      } catch {
-        case e: Exception =>
-          logError("Running " + job + " failed", e)
-      }
-      clearJob(job)
-    }
-  }
-
-  initLogging()
-
-  val jobExecutor = Executors.newFixedThreadPool(numThreads) 
-  val jobs = new HashMap[Time, ArrayBuffer[Job]]
-
-  def runJob(job: Job) {
-    jobs.synchronized {
-      jobs.getOrElseUpdate(job.time, new ArrayBuffer[Job]) += job
-    }
-    jobExecutor.execute(new JobHandler(ssc, job))
-    logInfo("Added " + job + " to queue")
-  }
-
-  def stop() {
-    jobExecutor.shutdown()
-  }
-
-  private def clearJob(job: Job) {
-    var timeCleared = false
-    val time = job.time
-    jobs.synchronized {
-      val jobsOfTime = jobs.get(time)
-      if (jobsOfTime.isDefined) {
-        jobsOfTime.get -= job
-        if (jobsOfTime.get.isEmpty) {
-          jobs -= time
-          timeCleared = true
-        }
-      } else {
-        throw new Exception("Job finished for time " + job.time +
-          " but time does not exist in jobs")
-      }
-    }
-    if (timeCleared) {
-      ssc.scheduler.clearOldMetadata(time)
-    }
-  }
-
-  def getPendingTimes(): Array[Time] = {
-    jobs.synchronized {
-      jobs.keySet.toArray
-    }
-  }
-}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/Scheduler.scala b/streaming/src/main/scala/org/apache/spark/streaming/Scheduler.scala
deleted file mode 100644
index ed892e33e6c34..0000000000000
--- a/streaming/src/main/scala/org/apache/spark/streaming/Scheduler.scala
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.streaming
-
-import util.{ManualClock, RecurringTimer, Clock}
-import org.apache.spark.SparkEnv
-import org.apache.spark.Logging
-
-private[streaming]
-class Scheduler(ssc: StreamingContext) extends Logging {
-
-  initLogging()
-
-  val concurrentJobs = System.getProperty("spark.streaming.concurrentJobs", "1").toInt
-  val jobManager = new JobManager(ssc, concurrentJobs)
-  val checkpointWriter = if (ssc.checkpointDuration != null && ssc.checkpointDir != null) {
-    new CheckpointWriter(ssc.checkpointDir)
-  } else {
-    null
-  }
-
-  val clockClass = System.getProperty(
-    "spark.streaming.clock", "org.apache.spark.streaming.util.SystemClock")
-  val clock = Class.forName(clockClass).newInstance().asInstanceOf[Clock]
-  val timer = new RecurringTimer(clock, ssc.graph.batchDuration.milliseconds,
-    longTime => generateJobs(new Time(longTime)))
-  val graph = ssc.graph
-  var latestTime: Time = null
-
-  def start() = synchronized {
-    if (ssc.isCheckpointPresent) {
-      restart()
-    } else {
-      startFirstTime()
-    }
-    logInfo("Scheduler started")
-  }
-  
-  def stop() = synchronized {
-    timer.stop()
-    jobManager.stop()
-    if (checkpointWriter != null) checkpointWriter.stop()
-    ssc.graph.stop()
-    logInfo("Scheduler stopped")    
-  }
-
-  private def startFirstTime() {
-    val startTime = new Time(timer.getStartTime())
-    graph.start(startTime - graph.batchDuration)
-    timer.start(startTime.milliseconds)
-    logInfo("Scheduler's timer started at " + startTime)
-  }
-
-  private def restart() {
-
-    // If manual clock is being used for testing, then
-    // either set the manual clock to the last checkpointed time,
-    // or if the property is defined set it to that time
-    if (clock.isInstanceOf[ManualClock]) {
-      val lastTime = ssc.initialCheckpoint.checkpointTime.milliseconds
-      val jumpTime = System.getProperty("spark.streaming.manualClock.jump", "0").toLong
-      clock.asInstanceOf[ManualClock].setTime(lastTime + jumpTime)
-    }
-
-    val batchDuration = ssc.graph.batchDuration
-
-    // Batches when the master was down, that is,
-    // between the checkpoint and current restart time
-    val checkpointTime = ssc.initialCheckpoint.checkpointTime
-    val restartTime = new Time(timer.getRestartTime(graph.zeroTime.milliseconds))
-    val downTimes = checkpointTime.until(restartTime, batchDuration)
-    logInfo("Batches during down time: " + downTimes.mkString(", "))
-
-    // Batches that were unprocessed before failure
-    val pendingTimes = ssc.initialCheckpoint.pendingTimes
-    logInfo("Batches pending processing: " + pendingTimes.mkString(", "))
-    // Reschedule jobs for these times
-    val timesToReschedule = (pendingTimes ++ downTimes).distinct.sorted(Time.ordering)
-    logInfo("Batches to reschedule: " + timesToReschedule.mkString(", "))
-    timesToReschedule.foreach(time =>
-      graph.generateJobs(time).foreach(jobManager.runJob)
-    )
-
-    // Restart the timer
-    timer.start(restartTime.milliseconds)
-    logInfo("Scheduler's timer restarted at " + restartTime)
-  }
-
-  /** Generate jobs and perform checkpoint for the given `time`.  */
-  def generateJobs(time: Time) {
-    SparkEnv.set(ssc.env)
-    logInfo("\n-----------------------------------------------------\n")
-    graph.generateJobs(time).foreach(jobManager.runJob)
-    latestTime = time
-    doCheckpoint(time)
-  }
-
-  /**
-   * Clear old metadata assuming jobs of `time` have finished processing.
-   * And also perform checkpoint.
-   */
-  def clearOldMetadata(time: Time) {
-    ssc.graph.clearOldMetadata(time)
-    doCheckpoint(time)
-  }
-
-  /** Perform checkpoint for the give `time`. */
-  def doCheckpoint(time: Time) = synchronized {
-    if (ssc.checkpointDuration != null && (time - graph.zeroTime).isMultipleOf(ssc.checkpointDuration)) {
-      logInfo("Checkpointing graph for time " + time)
-      ssc.graph.updateCheckpointData(time)
-      checkpointWriter.write(new Checkpoint(ssc, time))
-    }
-  }
-}
-
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index 267677c43d7c0..5847b95e3f5d1 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -17,43 +17,42 @@
 
 package org.apache.spark.streaming
 
-import akka.actor.Props
-import akka.actor.SupervisorStrategy
-import akka.zeromq.Subscribe
-
-import org.apache.spark.streaming.dstream._
-
-import org.apache.spark._
-import org.apache.spark.rdd.RDD
-import org.apache.spark.streaming.receivers.ActorReceiver
-import org.apache.spark.streaming.receivers.ReceiverSupervisorStrategy
-import org.apache.spark.streaming.receivers.ZeroMQReceiver
-import org.apache.spark.storage.StorageLevel
-import org.apache.spark.util.MetadataCleaner
-import org.apache.spark.streaming.receivers.ActorReceiver
-
 import scala.collection.mutable.Queue
 import scala.collection.Map
+import scala.reflect.ClassTag
 
 import java.io.InputStream
 import java.util.concurrent.atomic.AtomicInteger
-import java.util.UUID
 
+import akka.actor.Props
+import akka.actor.SupervisorStrategy
 import org.apache.hadoop.io.LongWritable
 import org.apache.hadoop.io.Text
 import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat}
 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
 import org.apache.hadoop.fs.Path
-import twitter4j.Status
-import twitter4j.auth.Authorization
 
+import org.apache.spark._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.MetadataCleaner
+import org.apache.spark.streaming.dstream._
+import org.apache.spark.streaming.receivers._
+import org.apache.spark.streaming.scheduler._
+import org.apache.hadoop.conf.Configuration
 
 /**
- * A StreamingContext is the main entry point for Spark Streaming functionality. Besides the basic
- * information (such as, cluster URL and job name) to internally create a SparkContext, it provides
- * methods used to create DStream from various input sources.
+ * Main entry point for Spark Streaming functionality. It provides methods used to create
+ * [[org.apache.spark.streaming.dstream.DStream]]s from various input sources. It can be either
+ * created by providing a Spark master URL and an appName, or from a org.apache.spark.SparkConf
+ * configuration (see core Spark documentation), or from an existing org.apache.spark.SparkContext.
+ * The associated SparkContext can be accessed using `context.sparkContext`. After
+ * creating and transforming DStreams, the streaming computation can be started and stopped
+ * using `context.start()` and `context.stop()`, respectively.
+ * `context.awaitTransformation()` allows the current thread to wait for the termination
+ * of the context by `stop()` or by an exception.
  */
-class StreamingContext private (
+class StreamingContext private[streaming] (
     sc_ : SparkContext,
     cp_ : Checkpoint,
     batchDur_ : Duration
@@ -61,18 +60,27 @@ class StreamingContext private (
 
   /**
    * Create a StreamingContext using an existing SparkContext.
-   * @param sparkContext Existing SparkContext
-   * @param batchDuration The time interval at which streaming data will be divided into batches
+   * @param sparkContext existing SparkContext
+   * @param batchDuration the time interval at which streaming data will be divided into batches
    */
   def this(sparkContext: SparkContext, batchDuration: Duration) = {
     this(sparkContext, null, batchDuration)
   }
 
+  /**
+   * Create a StreamingContext by providing the configuration necessary for a new SparkContext.
+   * @param conf a org.apache.spark.SparkConf object specifying Spark parameters
+   * @param batchDuration the time interval at which streaming data will be divided into batches
+   */
+  def this(conf: SparkConf, batchDuration: Duration) = {
+    this(StreamingContext.createNewSparkContext(conf), null, batchDuration)
+  }
+
   /**
    * Create a StreamingContext by providing the details necessary for creating a new SparkContext.
-   * @param master Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]).
-   * @param appName A name for your job, to display on the cluster web UI
-   * @param batchDuration The time interval at which streaming data will be divided into batches
+   * @param master cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]).
+   * @param appName a name for your job, to display on the cluster web UI
+   * @param batchDuration the time interval at which streaming data will be divided into batches
    */
   def this(
       master: String,
@@ -85,39 +93,40 @@ class StreamingContext private (
          null, batchDuration)
   }
 
-
   /**
-   * Re-create a StreamingContext from a checkpoint file.
-   * @param path Path either to the directory that was specified as the checkpoint directory, or
-   *             to the checkpoint file 'graph' or 'graph.bk'.
+   * Recreate a StreamingContext from a checkpoint file.
+   * @param path Path to the directory that was specified as the checkpoint directory
+   * @param hadoopConf Optional, configuration object if necessary for reading from
+   *                   HDFS compatible filesystems
    */
-  def this(path: String) = this(null, CheckpointReader.read(path), null)
-
-  initLogging()
+  def this(path: String, hadoopConf: Configuration = new Configuration) =
+    this(null, CheckpointReader.read(path, new SparkConf(), hadoopConf).get, null)
 
   if (sc_ == null && cp_ == null) {
     throw new Exception("Spark Streaming cannot be initialized with " +
       "both SparkContext and checkpoint as null")
   }
 
-  if (MetadataCleaner.getDelaySeconds < 0) {
-    throw new SparkException("Spark Streaming cannot be used without setting spark.cleaner.ttl; "
-      + "set this property before creating a SparkContext (use SPARK_JAVA_OPTS for the shell)")
-  }
-
-  protected[streaming] val isCheckpointPresent = (cp_ != null)
+  private[streaming] val isCheckpointPresent = (cp_ != null)
 
-  protected[streaming] val sc: SparkContext = {
+  private[streaming] val sc: SparkContext = {
     if (isCheckpointPresent) {
-      new SparkContext(cp_.master, cp_.framework, cp_.sparkHome, cp_.jars, cp_.environment)
+      new SparkContext(cp_.sparkConf)
     } else {
       sc_
     }
   }
 
-  protected[streaming] val env = SparkEnv.get
+  if (MetadataCleaner.getDelaySeconds(sc.conf) < 0) {
+    throw new SparkException("Spark Streaming cannot be used without setting spark.cleaner.ttl; "
+      + "set this property before creating a SparkContext (use SPARK_JAVA_OPTS for the shell)")
+  }
+
+  private[streaming] val conf = sc.conf
+
+  private[streaming] val env = SparkEnv.get
 
-  protected[streaming] val graph: DStreamGraph = {
+  private[streaming] val graph: DStreamGraph = {
     if (isCheckpointPresent) {
       cp_.graph.setContext(this)
       cp_.graph.restoreCheckpointData()
@@ -130,21 +139,24 @@ class StreamingContext private (
     }
   }
 
-  protected[streaming] val nextNetworkInputStreamId = new AtomicInteger(0)
-  protected[streaming] var networkInputTracker: NetworkInputTracker = null
+  private val nextNetworkInputStreamId = new AtomicInteger(0)
 
-  protected[streaming] var checkpointDir: String = {
+  private[streaming] var checkpointDir: String = {
     if (isCheckpointPresent) {
-      sc.setCheckpointDir(StreamingContext.getSparkCheckpointDir(cp_.checkpointDir), true)
+      sc.setCheckpointDir(cp_.checkpointDir)
       cp_.checkpointDir
     } else {
       null
     }
   }
 
-  protected[streaming] var checkpointDuration: Duration = if (isCheckpointPresent) cp_.checkpointDuration else null
-  protected[streaming] var receiverJobThread: Thread = null
-  protected[streaming] var scheduler: Scheduler = null
+  private[streaming] val checkpointDuration: Duration = {
+    if (isCheckpointPresent) cp_.checkpointDuration else graph.batchDuration
+  }
+
+  private[streaming] val scheduler = new JobScheduler(this)
+
+  private[streaming] val waiter = new ContextWaiter
 
   /**
    * Return the associated Spark context
@@ -163,36 +175,38 @@ class StreamingContext private (
   }
 
   /**
-   * Set the context to periodically checkpoint the DStream operations for master
-   * fault-tolerance. The graph will be checkpointed every batch interval.
-   * @param directory HDFS-compatible directory where the checkpoint data will be reliably stored
+   * Set the context to periodically checkpoint the DStream operations for driver
+   * fault-tolerance.
+   * @param directory HDFS-compatible directory where the checkpoint data will be reliably stored.
+   *                  Note that this must be a fault-tolerant file system like HDFS for
    */
   def checkpoint(directory: String) {
     if (directory != null) {
-      sc.setCheckpointDir(StreamingContext.getSparkCheckpointDir(directory))
-      checkpointDir = directory
+      val path = new Path(directory)
+      val fs = path.getFileSystem(sparkContext.hadoopConfiguration)
+      fs.mkdirs(path)
+      val fullPath = fs.getFileStatus(path).getPath().toString
+      sc.setCheckpointDir(fullPath)
+      checkpointDir = fullPath
     } else {
       checkpointDir = null
     }
   }
 
-  protected[streaming] def initialCheckpoint: Checkpoint = {
+  private[streaming] def initialCheckpoint: Checkpoint = {
     if (isCheckpointPresent) cp_ else null
   }
 
-  protected[streaming] def getNewNetworkStreamId() = nextNetworkInputStreamId.getAndIncrement()
+  private[streaming] def getNewNetworkStreamId() = nextNetworkInputStreamId.getAndIncrement()
 
   /**
    * Create an input stream with any arbitrary user implemented network receiver.
    * Find more details at: http://spark-project.org/docs/latest/streaming-custom-receivers.html
    * @param receiver Custom implementation of NetworkReceiver
    */
-  def networkStream[T: ClassManifest](
+  def networkStream[T: ClassTag](
     receiver: NetworkReceiver[T]): DStream[T] = {
-    val inputStream = new PluggableInputDStream[T](this,
-      receiver)
-    graph.addInputStream(inputStream)
-    inputStream
+    new PluggableInputDStream[T](this, receiver)
   }
 
   /**
@@ -207,75 +221,15 @@ class StreamingContext private (
    *       to ensure the type safety, i.e parametrized type of data received and actorStream
    *       should be same.
    */
-  def actorStream[T: ClassManifest](
+  def actorStream[T: ClassTag](
       props: Props,
       name: String,
-      storageLevel: StorageLevel = StorageLevel.MEMORY_ONLY_SER_2,
+      storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_SER_2,
       supervisorStrategy: SupervisorStrategy = ReceiverSupervisorStrategy.defaultStrategy
     ): DStream[T] = {
     networkStream(new ActorReceiver[T](props, name, storageLevel, supervisorStrategy))
   }
 
-  /**
-   * Create an input stream that receives messages pushed by a zeromq publisher.
-   * @param publisherUrl Url of remote zeromq publisher
-   * @param subscribe topic to subscribe to
-   * @param bytesToObjects A zeroMQ stream publishes sequence of frames for each topic
-   *                       and each frame has sequence of byte thus it needs the converter
-   *                       (which might be deserializer of bytes) to translate from sequence
-   *                       of sequence of bytes, where sequence refer to a frame
-   *                       and sub sequence refer to its payload.
-   * @param storageLevel RDD storage level. Defaults to memory-only.
-   */
-  def zeroMQStream[T: ClassManifest](
-      publisherUrl:String,
-      subscribe: Subscribe,
-      bytesToObjects: Seq[Seq[Byte]] ⇒ Iterator[T],
-      storageLevel: StorageLevel = StorageLevel.MEMORY_ONLY_SER_2,
-      supervisorStrategy: SupervisorStrategy = ReceiverSupervisorStrategy.defaultStrategy
-    ): DStream[T] = {
-    actorStream(Props(new ZeroMQReceiver(publisherUrl,subscribe,bytesToObjects)),
-        "ZeroMQReceiver", storageLevel, supervisorStrategy)
-  }
-
-  /**
-   * Create an input stream that pulls messages from a Kafka Broker.
-   * @param zkQuorum Zookeper quorum (hostname:port,hostname:port,..).
-   * @param groupId The group id for this consumer.
-   * @param topics Map of (topic_name -> numPartitions) to consume. Each partition is consumed
-   *               in its own thread.
-   * @param storageLevel  Storage level to use for storing the received objects
-   *                      (default: StorageLevel.MEMORY_AND_DISK_SER_2)
-   */
-  def kafkaStream(
-      zkQuorum: String,
-      groupId: String,
-      topics: Map[String, Int],
-      storageLevel: StorageLevel = StorageLevel.MEMORY_ONLY_SER_2
-    ): DStream[String] = {
-    val kafkaParams = Map[String, String](
-      "zk.connect" -> zkQuorum, "groupid" -> groupId, "zk.connectiontimeout.ms" -> "10000")
-    kafkaStream[String, kafka.serializer.StringDecoder](kafkaParams, topics, storageLevel)
-  }
-
-  /**
-   * Create an input stream that pulls messages from a Kafka Broker.
-   * @param kafkaParams Map of kafka configuration paramaters.
-   *                    See: http://kafka.apache.org/configuration.html
-   * @param topics Map of (topic_name -> numPartitions) to consume. Each partition is consumed
-   *               in its own thread.
-   * @param storageLevel  Storage level to use for storing the received objects
-   */
-  def kafkaStream[T: ClassManifest, D <: kafka.serializer.Decoder[_]: Manifest](
-      kafkaParams: Map[String, String],
-      topics: Map[String, Int],
-      storageLevel: StorageLevel
-    ): DStream[T] = {
-    val inputStream = new KafkaInputDStream[T, D](this, kafkaParams, topics, storageLevel)
-    registerInputStream(inputStream)
-    inputStream
-  }
-
   /**
    * Create a input stream from TCP source hostname:port. Data is received using
    * a TCP socket and the receive bytes is interpreted as UTF8 encoded `\n` delimited
@@ -303,31 +257,13 @@ class StreamingContext private (
    * @param storageLevel  Storage level to use for storing the received objects
    * @tparam T            Type of the objects received (after converting bytes to objects)
    */
-  def socketStream[T: ClassManifest](
+  def socketStream[T: ClassTag](
       hostname: String,
       port: Int,
       converter: (InputStream) => Iterator[T],
       storageLevel: StorageLevel
     ): DStream[T] = {
-    val inputStream = new SocketInputDStream[T](this, hostname, port, converter, storageLevel)
-    registerInputStream(inputStream)
-    inputStream
-  }
-
-  /**
-   * Create a input stream from a Flume source.
-   * @param hostname Hostname of the slave machine to which the flume data will be sent
-   * @param port     Port of the slave machine to which the flume data will be sent
-   * @param storageLevel  Storage level to use for storing the received objects
-   */
-  def flumeStream (
-      hostname: String,
-      port: Int,
-      storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_SER_2
-    ): DStream[SparkFlumeEvent] = {
-    val inputStream = new FlumeInputDStream(this, hostname, port, storageLevel)
-    registerInputStream(inputStream)
-    inputStream
+    new SocketInputDStream[T](this, hostname, port, converter, storageLevel)
   }
 
   /**
@@ -338,40 +274,40 @@ class StreamingContext private (
    * @param hostname      Hostname to connect to for receiving data
    * @param port          Port to connect to for receiving data
    * @param storageLevel  Storage level to use for storing the received objects
+   *                      (default: StorageLevel.MEMORY_AND_DISK_SER_2)
    * @tparam T            Type of the objects in the received blocks
    */
-  def rawSocketStream[T: ClassManifest](
+  def rawSocketStream[T: ClassTag](
       hostname: String,
       port: Int,
       storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_SER_2
     ): DStream[T] = {
-    val inputStream = new RawInputDStream[T](this, hostname, port, storageLevel)
-    registerInputStream(inputStream)
-    inputStream
+    new RawInputDStream[T](this, hostname, port, storageLevel)
   }
 
   /**
    * Create a input stream that monitors a Hadoop-compatible filesystem
    * for new files and reads them using the given key-value types and input format.
-   * File names starting with . are ignored.
+   * Files must be written to the monitored directory by "moving" them from another
+   * location within the same file system. File names starting with . are ignored.
    * @param directory HDFS directory to monitor for new file
    * @tparam K Key type for reading HDFS file
    * @tparam V Value type for reading HDFS file
    * @tparam F Input format for reading HDFS file
    */
   def fileStream[
-    K: ClassManifest,
-    V: ClassManifest,
-    F <: NewInputFormat[K, V]: ClassManifest
+    K: ClassTag,
+    V: ClassTag,
+    F <: NewInputFormat[K, V]: ClassTag
   ] (directory: String): DStream[(K, V)] = {
-    val inputStream = new FileInputDStream[K, V, F](this, directory)
-    registerInputStream(inputStream)
-    inputStream
+    new FileInputDStream[K, V, F](this, directory)
   }
 
   /**
    * Create a input stream that monitors a Hadoop-compatible filesystem
    * for new files and reads them using the given key-value types and input format.
+   * Files must be written to the monitored directory by "moving" them from another
+   * location within the same file system.
    * @param directory HDFS directory to monitor for new file
    * @param filter Function to filter paths to process
    * @param newFilesOnly Should process only new files and ignore existing files in the directory
@@ -380,43 +316,25 @@ class StreamingContext private (
    * @tparam F Input format for reading HDFS file
    */
   def fileStream[
-    K: ClassManifest,
-    V: ClassManifest,
-    F <: NewInputFormat[K, V]: ClassManifest
+    K: ClassTag,
+    V: ClassTag,
+    F <: NewInputFormat[K, V]: ClassTag
   ] (directory: String, filter: Path => Boolean, newFilesOnly: Boolean): DStream[(K, V)] = {
-    val inputStream = new FileInputDStream[K, V, F](this, directory, filter, newFilesOnly)
-    registerInputStream(inputStream)
-    inputStream
+    new FileInputDStream[K, V, F](this, directory, filter, newFilesOnly)
   }
 
   /**
    * Create a input stream that monitors a Hadoop-compatible filesystem
    * for new files and reads them as text files (using key as LongWritable, value
-   * as Text and input format as TextInputFormat). File names starting with . are ignored.
+   * as Text and input format as TextInputFormat). Files must be written to the
+   * monitored directory by "moving" them from another location within the same
+   * file system. File names starting with . are ignored.
    * @param directory HDFS directory to monitor for new file
    */
   def textFileStream(directory: String): DStream[String] = {
     fileStream[LongWritable, Text, TextInputFormat](directory).map(_._2.toString)
   }
 
-  /**
-   * Create a input stream that returns tweets received from Twitter.
-   * @param twitterAuth Twitter4J authentication, or None to use Twitter4J's default OAuth
-   *        authorization; this uses the system properties twitter4j.oauth.consumerKey,
-   *        .consumerSecret, .accessToken and .accessTokenSecret.
-   * @param filters Set of filter strings to get only those tweets that match them
-   * @param storageLevel Storage level to use for storing the received objects
-   */
-  def twitterStream(
-      twitterAuth: Option[Authorization] = None,
-      filters: Seq[String] = Nil,
-      storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_SER_2
-    ): DStream[Status] = {
-    val inputStream = new TwitterInputDStream(this, twitterAuth, filters, storageLevel)
-    registerInputStream(inputStream)
-    inputStream
-  }
-
   /**
    * Create an input stream from a queue of RDDs. In each batch,
    * it will process either one or all of the RDDs returned by the queue.
@@ -424,7 +342,7 @@ class StreamingContext private (
    * @param oneAtATime Whether only one RDD should be consumed from the queue in every interval
    * @tparam T         Type of objects in the RDD
    */
-  def queueStream[T: ClassManifest](
+  def queueStream[T: ClassTag](
       queue: Queue[RDD[T]],
       oneAtATime: Boolean = true
     ): DStream[T] = {
@@ -440,20 +358,18 @@ class StreamingContext private (
    *                   Set as null if no RDD should be returned when empty
    * @tparam T         Type of objects in the RDD
    */
-  def queueStream[T: ClassManifest](
+  def queueStream[T: ClassTag](
       queue: Queue[RDD[T]],
       oneAtATime: Boolean,
       defaultRDD: RDD[T]
     ): DStream[T] = {
-    val inputStream = new QueueInputDStream(this, queue, oneAtATime, defaultRDD)
-    registerInputStream(inputStream)
-    inputStream
+    new QueueInputDStream(this, queue, oneAtATime, defaultRDD)
   }
 
   /**
    * Create a unified DStream from multiple DStreams of the same type and same slide duration.
    */
-  def union[T: ClassManifest](streams: Seq[DStream[T]]): DStream[T] = {
+  def union[T: ClassTag](streams: Seq[DStream[T]]): DStream[T] = {
     new UnionDStream[T](streams.toArray)
   }
 
@@ -461,29 +377,21 @@ class StreamingContext private (
    * Create a new DStream in which each RDD is generated by applying a function on RDDs of
    * the DStreams.
    */
-  def transform[T: ClassManifest](
+  def transform[T: ClassTag](
       dstreams: Seq[DStream[_]],
       transformFunc: (Seq[RDD[_]], Time) => RDD[T]
     ): DStream[T] = {
     new TransformedDStream[T](dstreams, sparkContext.clean(transformFunc))
   }
 
-  /**
-   * Register an input stream that will be started (InputDStream.start() called) to get the
-   * input data.
-   */
-  def registerInputStream(inputStream: InputDStream[_]) {
-    graph.addInputStream(inputStream)
+  /** Add a [[org.apache.spark.streaming.scheduler.StreamingListener]] object for
+    * receiving system events related to streaming.
+    */
+  def addStreamingListener(streamingListener: StreamingListener) {
+    scheduler.listenerBus.addListener(streamingListener)
   }
 
-  /**
-   * Register an output stream that will be computed every interval
-   */
-  def registerOutputStream(outputStream: DStream[_]) {
-    graph.addOutputStream(outputStream)
-  }
-
-  protected def validate() {
+  private def validate() {
     assert(graph != null, "Graph is null")
     graph.validate()
 
@@ -497,69 +405,115 @@ class StreamingContext private (
   /**
    * Start the execution of the streams.
    */
-  def start() {
-    if (checkpointDir != null && checkpointDuration == null && graph != null) {
-      checkpointDuration = graph.batchDuration
-    }
-
+  def start() = synchronized {
     validate()
+    scheduler.start()
+  }
 
-    val networkInputStreams = graph.getInputStreams().filter(s => s match {
-        case n: NetworkInputDStream[_] => true
-        case _ => false
-      }).map(_.asInstanceOf[NetworkInputDStream[_]]).toArray
-
-    if (networkInputStreams.length > 0) {
-      // Start the network input tracker (must start before receivers)
-      networkInputTracker = new NetworkInputTracker(this, networkInputStreams)
-      networkInputTracker.start()
-    }
-
-    Thread.sleep(1000)
+  /**
+   * Wait for the execution to stop. Any exceptions that occurs during the execution
+   * will be thrown in this thread.
+   */
+  def awaitTermination() {
+    waiter.waitForStopOrError()
+  }
 
-    // Start the scheduler
-    scheduler = new Scheduler(this)
-    scheduler.start()
+  /**
+   * Wait for the execution to stop. Any exceptions that occurs during the execution
+   * will be thrown in this thread.
+   * @param timeout time to wait in milliseconds
+   */
+  def awaitTermination(timeout: Long) {
+    waiter.waitForStopOrError(timeout)
   }
 
   /**
    * Stop the execution of the streams.
+   * @param stopSparkContext Stop the associated SparkContext or not
    */
-  def stop() {
-    try {
-      if (scheduler != null) scheduler.stop()
-      if (networkInputTracker != null) networkInputTracker.stop()
-      if (receiverJobThread != null) receiverJobThread.interrupt()
-      sc.stop()
-      logInfo("StreamingContext stopped successfully")
-    } catch {
-      case e: Exception => logWarning("Error while stopping", e)
-    }
+  def stop(stopSparkContext: Boolean = true) = synchronized {
+    scheduler.stop()
+    logInfo("StreamingContext stopped successfully")
+    waiter.notifyStop()
+    if (stopSparkContext) sc.stop()
   }
 }
 
+/**
+ * StreamingContext object contains a number of utility functions related to the
+ * StreamingContext class.
+ */
 
-object StreamingContext {
+object StreamingContext extends Logging {
 
-  implicit def toPairDStreamFunctions[K: ClassManifest, V: ClassManifest](stream: DStream[(K,V)]) = {
+  private[streaming] val DEFAULT_CLEANER_TTL = 3600
+
+  implicit def toPairDStreamFunctions[K: ClassTag, V: ClassTag](stream: DStream[(K,V)]) = {
     new PairDStreamFunctions[K, V](stream)
   }
 
-  protected[streaming] def createNewSparkContext(
+  /**
+   * Either recreate a StreamingContext from checkpoint data or create a new StreamingContext.
+   * If checkpoint data exists in the provided `checkpointPath`, then StreamingContext will be
+   * recreated from the checkpoint data. If the data does not exist, then the StreamingContext
+   * will be created by called the provided `creatingFunc`.
+   *
+   * @param checkpointPath Checkpoint directory used in an earlier StreamingContext program
+   * @param creatingFunc   Function to create a new StreamingContext
+   * @param hadoopConf     Optional Hadoop configuration if necessary for reading from the
+   *                       file system
+   * @param createOnError  Optional, whether to create a new StreamingContext if there is an
+   *                       error in reading checkpoint data. By default, an exception will be
+   *                       thrown on error.
+   */
+  def getOrCreate(
+      checkpointPath: String,
+      creatingFunc: () => StreamingContext,
+      hadoopConf: Configuration = new Configuration(),
+      createOnError: Boolean = false
+    ): StreamingContext = {
+    val checkpointOption = try {
+      CheckpointReader.read(checkpointPath,  new SparkConf(), hadoopConf)
+    } catch {
+      case e: Exception =>
+        if (createOnError) {
+          None
+        } else {
+          throw e
+        }
+    }
+    checkpointOption.map(new StreamingContext(null, _, null)).getOrElse(creatingFunc())
+  }
+
+  /**
+   * Find the JAR from which a given class was loaded, to make it easy for users to pass
+   * their JARs to StreamingContext.
+   */
+  def jarOfClass(cls: Class[_]) = SparkContext.jarOfClass(cls)
+
+  private[streaming] def createNewSparkContext(conf: SparkConf): SparkContext = {
+    // Set the default cleaner delay to an hour if not already set.
+    // This should be sufficient for even 1 second batch intervals.
+    if (MetadataCleaner.getDelaySeconds(conf) < 0) {
+      MetadataCleaner.setDelaySeconds(conf, DEFAULT_CLEANER_TTL)
+    }
+    val sc = new SparkContext(conf)
+    sc
+  }
+
+  private[streaming] def createNewSparkContext(
       master: String,
       appName: String,
       sparkHome: String,
       jars: Seq[String],
-      environment: Map[String, String]): SparkContext = {
-    // Set the default cleaner delay to an hour if not already set.
-    // This should be sufficient for even 1 second interval.
-    if (MetadataCleaner.getDelaySeconds < 0) {
-      MetadataCleaner.setDelaySeconds(3600)
-    }
-    new SparkContext(master, appName, sparkHome, jars, environment)
+      environment: Map[String, String]
+    ): SparkContext = {
+    val conf = SparkContext.updatedConf(
+      new SparkConf(), master, appName, sparkHome, jars, environment)
+    createNewSparkContext(conf)
   }
 
-  protected[streaming] def rddToFileName[T](prefix: String, suffix: String, time: Time): String = {
+  private[streaming] def rddToFileName[T](prefix: String, suffix: String, time: Time): String = {
     if (prefix == null) {
       time.milliseconds.toString
     } else if (suffix == null || suffix.length ==0) {
@@ -568,8 +522,4 @@ object StreamingContext {
       prefix + "-" + time.milliseconds + "." + suffix
     }
   }
-
-  protected[streaming] def getSparkCheckpointDir(sscCheckpointDir: String): String = {
-    new Path(sscCheckpointDir, UUID.randomUUID.toString).toString
-  }
 }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStream.scala
index 1a2aeaa8797e1..e23b725052864 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStream.scala
@@ -17,31 +17,24 @@
 
 package org.apache.spark.streaming.api.java
 
-import org.apache.spark.streaming.{Duration, Time, DStream}
+import org.apache.spark.streaming.{Duration, Time}
 import org.apache.spark.api.java.function.{Function => JFunction}
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.rdd.RDD
 
+import scala.reflect.ClassTag
+import org.apache.spark.streaming.dstream.DStream
+
 /**
- * A Discretized Stream (DStream), the basic abstraction in Spark Streaming, is a continuous
- * sequence of RDDs (of the same type) representing a continuous stream of data (see [[org.apache.spark.RDD]]
- * for more details on RDDs). DStreams can either be created from live data (such as, data from
- * HDFS, Kafka or Flume) or it can be generated by transformation existing DStreams using operations
- * such as `map`, `window` and `reduceByKeyAndWindow`. While a Spark Streaming program is running, each
- * DStream periodically generates a RDD, either from live data or by transforming the RDD generated
- * by a parent DStream.
- *
- * This class contains the basic operations available on all DStreams, such as `map`, `filter` and
- * `window`. In addition, [[org.apache.spark.streaming.api.java.JavaPairDStream]] contains operations available
- * only on DStreams of key-value pairs, such as `groupByKeyAndWindow` and `join`.
- *
- * DStreams internally is characterized by a few basic properties:
- *  - A list of other DStreams that the DStream depends on
- *  - A time interval at which the DStream generates an RDD
- *  - A function that is used to generate an RDD after each time interval
+ * A Java-friendly interface to [[org.apache.spark.streaming.dstream.DStream]], the basic
+ * abstraction in Spark Streaming that represents a continuous stream of data.
+ * DStreams can either be created from live data (such as, data from TCP sockets, Kafka, Flume,
+ * etc.) or it can be generated by transforming existing DStreams using operations such as `map`,
+ * `window`. For operations applicable to key-value pair DStreams, see
+ * [[org.apache.spark.streaming.api.java.JavaPairDStream]].
  */
-class JavaDStream[T](val dstream: DStream[T])(implicit val classManifest: ClassManifest[T])
+class JavaDStream[T](val dstream: DStream[T])(implicit val classTag: ClassTag[T])
     extends JavaDStreamLike[T, JavaDStream[T], JavaRDD[T]] {
 
   override def wrapRDD(rdd: RDD[T]): JavaRDD[T] = JavaRDD.fromRDD(rdd)
@@ -103,6 +96,6 @@ class JavaDStream[T](val dstream: DStream[T])(implicit val classManifest: ClassM
 }
 
 object JavaDStream {
-  implicit def fromDStream[T: ClassManifest](dstream: DStream[T]): JavaDStream[T] =
+  implicit def fromDStream[T: ClassTag](dstream: DStream[T]): JavaDStream[T] =
     new JavaDStream[T](dstream)
 }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala
index 09189eadd824e..64fe204cdf7a5 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala
@@ -21,6 +21,7 @@ import java.util.{List => JList}
 import java.lang.{Long => JLong}
 
 import scala.collection.JavaConversions._
+import scala.reflect.ClassTag
 
 import org.apache.spark.streaming._
 import org.apache.spark.api.java.{JavaPairRDD, JavaRDDLike, JavaRDD}
@@ -29,10 +30,11 @@ import org.apache.spark.api.java.function.{Function3 => JFunction3, _}
 import java.util
 import org.apache.spark.rdd.RDD
 import JavaDStream._
+import org.apache.spark.streaming.dstream.DStream
 
 trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T, R]]
     extends Serializable {
-  implicit val classManifest: ClassManifest[T]
+  implicit val classTag: ClassTag[T]
 
   def dstream: DStream[T]
 
@@ -136,7 +138,7 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T
 
   /** Return a new DStream by applying a function to all elements of this DStream. */
   def map[K2, V2](f: PairFunction[T, K2, V2]): JavaPairDStream[K2, V2] = {
-    def cm = implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[Tuple2[K2, V2]]]
+    def cm = implicitly[ClassTag[Tuple2[_, _]]].asInstanceOf[ClassTag[Tuple2[K2, V2]]]
     new JavaPairDStream(dstream.map(f)(cm))(f.keyType(), f.valueType())
   }
 
@@ -157,7 +159,7 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T
   def flatMap[K2, V2](f: PairFlatMapFunction[T, K2, V2]): JavaPairDStream[K2, V2] = {
     import scala.collection.JavaConverters._
     def fn = (x: T) => f.apply(x).asScala
-    def cm = implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[Tuple2[K2, V2]]]
+    def cm = implicitly[ClassTag[Tuple2[_, _]]].asInstanceOf[ClassTag[Tuple2[K2, V2]]]
     new JavaPairDStream(dstream.flatMap(fn)(cm))(f.keyType(), f.valueType())
   }
 
@@ -206,7 +208,6 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T
     dstream.reduceByWindow(reduceFunc, windowDuration, slideDuration)
   }
 
-
   /**
    * Return a new DStream in which each RDD has a single element generated by reducing all
    * elements in a sliding window over this DStream. However, the reduction is done incrementally
@@ -242,17 +243,39 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T
   /**
    * Apply a function to each RDD in this DStream. This is an output operator, so
    * 'this' DStream will be registered as an output stream and therefore materialized.
+   *
+   * @deprecated  As of release 0.9.0, replaced by foreachRDD
    */
+  @Deprecated
   def foreach(foreachFunc: JFunction[R, Void]) {
-    dstream.foreach(rdd => foreachFunc.call(wrapRDD(rdd)))
+    foreachRDD(foreachFunc)
   }
 
   /**
    * Apply a function to each RDD in this DStream. This is an output operator, so
    * 'this' DStream will be registered as an output stream and therefore materialized.
+   *
+   * @deprecated  As of release 0.9.0, replaced by foreachRDD
    */
+  @Deprecated
   def foreach(foreachFunc: JFunction2[R, Time, Void]) {
-    dstream.foreach((rdd, time) => foreachFunc.call(wrapRDD(rdd), time))
+    foreachRDD(foreachFunc)
+  }
+
+  /**
+   * Apply a function to each RDD in this DStream. This is an output operator, so
+   * 'this' DStream will be registered as an output stream and therefore materialized.
+   */
+  def foreachRDD(foreachFunc: JFunction[R, Void]) {
+    dstream.foreachRDD(rdd => foreachFunc.call(wrapRDD(rdd)))
+  }
+
+  /**
+   * Apply a function to each RDD in this DStream. This is an output operator, so
+   * 'this' DStream will be registered as an output stream and therefore materialized.
+   */
+  def foreachRDD(foreachFunc: JFunction2[R, Time, Void]) {
+    dstream.foreachRDD((rdd, time) => foreachFunc.call(wrapRDD(rdd), time))
   }
 
   /**
@@ -260,8 +283,8 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T
    * on each RDD of 'this' DStream.
    */
   def transform[U](transformFunc: JFunction[R, JavaRDD[U]]): JavaDStream[U] = {
-    implicit val cm: ClassManifest[U] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[U]]
+    implicit val cm: ClassTag[U] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[U]]
     def scalaTransform (in: RDD[T]): RDD[U] =
       transformFunc.call(wrapRDD(in)).rdd
     dstream.transform(scalaTransform(_))
@@ -272,8 +295,8 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T
    * on each RDD of 'this' DStream.
    */
   def transform[U](transformFunc: JFunction2[R, Time, JavaRDD[U]]): JavaDStream[U] = {
-    implicit val cm: ClassManifest[U] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[U]]
+    implicit val cm: ClassTag[U] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[U]]
     def scalaTransform (in: RDD[T], time: Time): RDD[U] =
       transformFunc.call(wrapRDD(in), time).rdd
     dstream.transform(scalaTransform(_, _))
@@ -285,10 +308,10 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T
    */
   def transform[K2, V2](transformFunc: JFunction[R, JavaPairRDD[K2, V2]]):
   JavaPairDStream[K2, V2] = {
-    implicit val cmk: ClassManifest[K2] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[K2]]
-    implicit val cmv: ClassManifest[V2] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[V2]]
+    implicit val cmk: ClassTag[K2] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[K2]]
+    implicit val cmv: ClassTag[V2] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[V2]]
     def scalaTransform (in: RDD[T]): RDD[(K2, V2)] =
       transformFunc.call(wrapRDD(in)).rdd
     dstream.transform(scalaTransform(_))
@@ -300,10 +323,10 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T
    */
   def transform[K2, V2](transformFunc: JFunction2[R, Time, JavaPairRDD[K2, V2]]):
   JavaPairDStream[K2, V2] = {
-    implicit val cmk: ClassManifest[K2] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[K2]]
-    implicit val cmv: ClassManifest[V2] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[V2]]
+    implicit val cmk: ClassTag[K2] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[K2]]
+    implicit val cmv: ClassTag[V2] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[V2]]
     def scalaTransform (in: RDD[T], time: Time): RDD[(K2, V2)] =
       transformFunc.call(wrapRDD(in), time).rdd
     dstream.transform(scalaTransform(_, _))
@@ -317,10 +340,10 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T
       other: JavaDStream[U],
       transformFunc: JFunction3[R, JavaRDD[U], Time, JavaRDD[W]]
     ): JavaDStream[W] = {
-    implicit val cmu: ClassManifest[U] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[U]]
-    implicit val cmv: ClassManifest[W] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[W]]
+    implicit val cmu: ClassTag[U] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[U]]
+    implicit val cmv: ClassTag[W] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[W]]
     def scalaTransform (inThis: RDD[T], inThat: RDD[U], time: Time): RDD[W] =
       transformFunc.call(wrapRDD(inThis), other.wrapRDD(inThat), time).rdd
     dstream.transformWith[U, W](other.dstream, scalaTransform(_, _, _))
@@ -334,12 +357,12 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T
       other: JavaDStream[U],
       transformFunc: JFunction3[R, JavaRDD[U], Time, JavaPairRDD[K2, V2]]
     ): JavaPairDStream[K2, V2] = {
-    implicit val cmu: ClassManifest[U] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[U]]
-    implicit val cmk2: ClassManifest[K2] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[K2]]
-    implicit val cmv2: ClassManifest[V2] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[V2]]
+    implicit val cmu: ClassTag[U] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[U]]
+    implicit val cmk2: ClassTag[K2] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[K2]]
+    implicit val cmv2: ClassTag[V2] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[V2]]
     def scalaTransform (inThis: RDD[T], inThat: RDD[U], time: Time): RDD[(K2, V2)] =
       transformFunc.call(wrapRDD(inThis), other.wrapRDD(inThat), time).rdd
     dstream.transformWith[U, (K2, V2)](other.dstream, scalaTransform(_, _, _))
@@ -353,12 +376,12 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T
       other: JavaPairDStream[K2, V2],
       transformFunc: JFunction3[R, JavaPairRDD[K2, V2], Time, JavaRDD[W]]
     ): JavaDStream[W] = {
-    implicit val cmk2: ClassManifest[K2] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[K2]]
-    implicit val cmv2: ClassManifest[V2] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[V2]]
-    implicit val cmw: ClassManifest[W] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[W]]
+    implicit val cmk2: ClassTag[K2] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[K2]]
+    implicit val cmv2: ClassTag[V2] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[V2]]
+    implicit val cmw: ClassTag[W] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[W]]
     def scalaTransform (inThis: RDD[T], inThat: RDD[(K2, V2)], time: Time): RDD[W] =
       transformFunc.call(wrapRDD(inThis), other.wrapRDD(inThat), time).rdd
     dstream.transformWith[(K2, V2), W](other.dstream, scalaTransform(_, _, _))
@@ -372,21 +395,21 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T
       other: JavaPairDStream[K2, V2],
       transformFunc: JFunction3[R, JavaPairRDD[K2, V2], Time, JavaPairRDD[K3, V3]]
     ): JavaPairDStream[K3, V3] = {
-    implicit val cmk2: ClassManifest[K2] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[K2]]
-    implicit val cmv2: ClassManifest[V2] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[V2]]
-    implicit val cmk3: ClassManifest[K3] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[K3]]
-    implicit val cmv3: ClassManifest[V3] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[V3]]
+    implicit val cmk2: ClassTag[K2] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[K2]]
+    implicit val cmv2: ClassTag[V2] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[V2]]
+    implicit val cmk3: ClassTag[K3] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[K3]]
+    implicit val cmv3: ClassTag[V3] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[V3]]
     def scalaTransform (inThis: RDD[T], inThat: RDD[(K2, V2)], time: Time): RDD[(K3, V3)] =
       transformFunc.call(wrapRDD(inThis), other.wrapRDD(inThat), time).rdd
     dstream.transformWith[(K2, V2), (K3, V3)](other.dstream, scalaTransform(_, _, _))
   }
 
   /**
-   * Enable periodic checkpointing of RDDs of this DStream
+   * Enable periodic checkpointing of RDDs of this DStream.
    * @param interval Time interval after which generated RDD will be checkpointed
    */
   def checkpoint(interval: Duration) = {
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
index c6cd635afa0c8..62cfa0a229db1 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
@@ -21,6 +21,7 @@ import java.util.{List => JList}
 import java.lang.{Long => JLong}
 
 import scala.collection.JavaConversions._
+import scala.reflect.ClassTag
 
 import org.apache.spark.streaming._
 import org.apache.spark.streaming.StreamingContext._
@@ -34,10 +35,15 @@ import org.apache.spark.storage.StorageLevel
 import com.google.common.base.Optional
 import org.apache.spark.rdd.RDD
 import org.apache.spark.rdd.PairRDDFunctions
+import org.apache.spark.streaming.dstream.DStream
 
+/**
+ * A Java-friendly interface to a DStream of key-value pairs, which provides extra methods
+ * like `reduceByKey` and `join`.
+ */
 class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
-    implicit val kManifest: ClassManifest[K],
-    implicit val vManifest: ClassManifest[V])
+    implicit val kManifest: ClassTag[K],
+    implicit val vManifest: ClassTag[V])
     extends JavaDStreamLike[(K, V), JavaPairDStream[K, V], JavaPairRDD[K, V]] {
 
   override def wrapRDD(rdd: RDD[(K, V)]): JavaPairRDD[K, V] = JavaPairRDD.fromRDD(rdd)
@@ -154,7 +160,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
 
   /**
    * Combine elements of each key in DStream's RDDs using custom function. This is similar to the
-   * combineByKey for RDDs. Please refer to combineByKey in [[org.apache.spark.PairRDDFunctions]] for more
+   * combineByKey for RDDs. Please refer to combineByKey in [[org.apache.spark.rdd.PairRDDFunctions]] for more
    * information.
    */
   def combineByKey[C](createCombiner: JFunction[V, C],
@@ -162,11 +168,27 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
       mergeCombiners: JFunction2[C, C, C],
       partitioner: Partitioner
     ): JavaPairDStream[K, C] = {
-    implicit val cm: ClassManifest[C] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[C]]
+    implicit val cm: ClassTag[C] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[C]]
     dstream.combineByKey(createCombiner, mergeValue, mergeCombiners, partitioner)
   }
 
+  /**
+   * Combine elements of each key in DStream's RDDs using custom function. This is similar to the
+   * combineByKey for RDDs. Please refer to combineByKey in [[org.apache.spark.rdd.PairRDDFunctions]] for more
+   * information.
+   */
+  def combineByKey[C](createCombiner: JFunction[V, C],
+      mergeValue: JFunction2[C, V, C],
+      mergeCombiners: JFunction2[C, C, C],
+      partitioner: Partitioner,
+      mapSideCombine: Boolean
+    ): JavaPairDStream[K, C] = {
+    implicit val cm: ClassTag[C] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[C]]
+    dstream.combineByKey(createCombiner, mergeValue, mergeCombiners, partitioner, mapSideCombine)
+  }
+
   /**
    * Return a new DStream by applying `groupByKey` over a sliding window. This is similar to
    * `DStream.groupByKey()` but applies it over a sliding window. The new DStream generates RDDs
@@ -428,8 +450,8 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
    */
   def updateStateByKey[S](updateFunc: JFunction2[JList[V], Optional[S], Optional[S]])
   : JavaPairDStream[K, S] = {
-    implicit val cm: ClassManifest[S] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[S]]
+    implicit val cm: ClassTag[S] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[S]]
     dstream.updateStateByKey(convertUpdateStateFunction(updateFunc))
   }
 
@@ -446,8 +468,8 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
       updateFunc: JFunction2[JList[V], Optional[S], Optional[S]],
       numPartitions: Int)
   : JavaPairDStream[K, S] = {
-    implicit val cm: ClassManifest[S] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[S]]
+    implicit val cm: ClassTag[S] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[S]]
     dstream.updateStateByKey(convertUpdateStateFunction(updateFunc), numPartitions)
   }
 
@@ -464,8 +486,8 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
       updateFunc: JFunction2[JList[V], Optional[S], Optional[S]],
       partitioner: Partitioner
   ): JavaPairDStream[K, S] = {
-    implicit val cm: ClassManifest[S] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[S]]
+    implicit val cm: ClassTag[S] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[S]]
     dstream.updateStateByKey(convertUpdateStateFunction(updateFunc), partitioner)
   }
 
@@ -475,8 +497,8 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
    * 'this' DStream without changing the key.
    */
   def mapValues[U](f: JFunction[V, U]): JavaPairDStream[K, U] = {
-    implicit val cm: ClassManifest[U] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[U]]
+    implicit val cm: ClassTag[U] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[U]]
     dstream.mapValues(f)
   }
 
@@ -487,8 +509,8 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
   def flatMapValues[U](f: JFunction[V, java.lang.Iterable[U]]): JavaPairDStream[K, U] = {
     import scala.collection.JavaConverters._
     def fn = (x: V) => f.apply(x).asScala
-    implicit val cm: ClassManifest[U] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[U]]
+    implicit val cm: ClassTag[U] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[U]]
     dstream.flatMapValues(fn)
   }
 
@@ -498,8 +520,8 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
    * of partitions.
    */
   def cogroup[W](other: JavaPairDStream[K, W]): JavaPairDStream[K, (JList[V], JList[W])] = {
-    implicit val cm: ClassManifest[W] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[W]]
+    implicit val cm: ClassTag[W] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[W]]
     dstream.cogroup(other.dstream).mapValues(t => (seqAsJavaList(t._1), seqAsJavaList((t._2))))
   }
 
@@ -511,8 +533,8 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
       other: JavaPairDStream[K, W],
       numPartitions: Int
     ): JavaPairDStream[K, (JList[V], JList[W])] = {
-    implicit val cm: ClassManifest[W] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[W]]
+    implicit val cm: ClassTag[W] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[W]]
     dstream.cogroup(other.dstream, numPartitions)
            .mapValues(t => (seqAsJavaList(t._1), seqAsJavaList((t._2))))
   }
@@ -525,8 +547,8 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
       other: JavaPairDStream[K, W],
       partitioner: Partitioner
     ): JavaPairDStream[K, (JList[V], JList[W])] = {
-    implicit val cm: ClassManifest[W] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[W]]
+    implicit val cm: ClassTag[W] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[W]]
     dstream.cogroup(other.dstream, partitioner)
            .mapValues(t => (seqAsJavaList(t._1), seqAsJavaList((t._2))))
   }
@@ -536,8 +558,8 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
    * Hash partitioning is used to generate the RDDs with Spark's default number of partitions.
    */
   def join[W](other: JavaPairDStream[K, W]): JavaPairDStream[K, (V, W)] = {
-    implicit val cm: ClassManifest[W] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[W]]
+    implicit val cm: ClassTag[W] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[W]]
     dstream.join(other.dstream)
   }
 
@@ -546,8 +568,8 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
    * Hash partitioning is used to generate the RDDs with `numPartitions` partitions.
    */
   def join[W](other: JavaPairDStream[K, W], numPartitions: Int): JavaPairDStream[K, (V, W)] = {
-    implicit val cm: ClassManifest[W] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[W]]
+    implicit val cm: ClassTag[W] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[W]]
     dstream.join(other.dstream, numPartitions)
   }
 
@@ -559,8 +581,8 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
       other: JavaPairDStream[K, W],
       partitioner: Partitioner
     ): JavaPairDStream[K, (V, W)] = {
-    implicit val cm: ClassManifest[W] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[W]]
+    implicit val cm: ClassTag[W] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[W]]
     dstream.join(other.dstream, partitioner)
   }
 
@@ -570,8 +592,8 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
    * number of partitions.
    */
   def leftOuterJoin[W](other: JavaPairDStream[K, W]): JavaPairDStream[K, (V, Optional[W])] = {
-    implicit val cm: ClassManifest[W] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[W]]
+    implicit val cm: ClassTag[W] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[W]]
     val joinResult = dstream.leftOuterJoin(other.dstream)
     joinResult.mapValues{case (v, w) => (v, JavaUtils.optionToOptional(w))}
   }
@@ -585,8 +607,8 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
       other: JavaPairDStream[K, W],
       numPartitions: Int
     ): JavaPairDStream[K, (V, Optional[W])] = {
-    implicit val cm: ClassManifest[W] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[W]]
+    implicit val cm: ClassTag[W] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[W]]
     val joinResult = dstream.leftOuterJoin(other.dstream, numPartitions)
     joinResult.mapValues{case (v, w) => (v, JavaUtils.optionToOptional(w))}
   }
@@ -599,8 +621,8 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
       other: JavaPairDStream[K, W],
       partitioner: Partitioner
     ): JavaPairDStream[K, (V, Optional[W])] = {
-    implicit val cm: ClassManifest[W] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[W]]
+    implicit val cm: ClassTag[W] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[W]]
     val joinResult = dstream.leftOuterJoin(other.dstream, partitioner)
     joinResult.mapValues{case (v, w) => (v, JavaUtils.optionToOptional(w))}
   }
@@ -611,8 +633,8 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
    * number of partitions.
    */
   def rightOuterJoin[W](other: JavaPairDStream[K, W]): JavaPairDStream[K, (Optional[V], W)] = {
-    implicit val cm: ClassManifest[W] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[W]]
+    implicit val cm: ClassTag[W] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[W]]
     val joinResult = dstream.rightOuterJoin(other.dstream)
     joinResult.mapValues{case (v, w) => (JavaUtils.optionToOptional(v), w)}
   }
@@ -626,8 +648,8 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
       other: JavaPairDStream[K, W],
       numPartitions: Int
     ): JavaPairDStream[K, (Optional[V], W)] = {
-    implicit val cm: ClassManifest[W] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[W]]
+    implicit val cm: ClassTag[W] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[W]]
     val joinResult = dstream.rightOuterJoin(other.dstream, numPartitions)
     joinResult.mapValues{case (v, w) => (JavaUtils.optionToOptional(v), w)}
   }
@@ -641,8 +663,8 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
       other: JavaPairDStream[K, W],
       partitioner: Partitioner
     ): JavaPairDStream[K, (Optional[V], W)] = {
-    implicit val cm: ClassManifest[W] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[W]]
+    implicit val cm: ClassTag[W] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[W]]
     val joinResult = dstream.rightOuterJoin(other.dstream, partitioner)
     joinResult.mapValues{case (v, w) => (JavaUtils.optionToOptional(v), w)}
   }
@@ -722,24 +744,24 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
     new JavaDStream[(K, V)](dstream)
   }
 
-  override val classManifest: ClassManifest[(K, V)] =
-    implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[Tuple2[K, V]]]
+  override val classTag: ClassTag[(K, V)] =
+    implicitly[ClassTag[Tuple2[_, _]]].asInstanceOf[ClassTag[Tuple2[K, V]]]
 }
 
 object JavaPairDStream {
-  implicit def fromPairDStream[K: ClassManifest, V: ClassManifest](dstream: DStream[(K, V)]) = {
+  implicit def fromPairDStream[K: ClassTag, V: ClassTag](dstream: DStream[(K, V)]) = {
     new JavaPairDStream[K, V](dstream)
   }
 
   def fromJavaDStream[K, V](dstream: JavaDStream[(K, V)]): JavaPairDStream[K, V] = {
-    implicit val cmk: ClassManifest[K] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[K]]
-    implicit val cmv: ClassManifest[V] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[V]]
+    implicit val cmk: ClassTag[K] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[K]]
+    implicit val cmv: ClassTag[V] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[V]]
     new JavaPairDStream[K, V](dstream.dstream)
   }
 
-  def scalaToJavaLong[K: ClassManifest](dstream: JavaPairDStream[K, Long])
+  def scalaToJavaLong[K: ClassTag](dstream: JavaPairDStream[K, Long])
   : JavaPairDStream[K, JLong] = {
     StreamingContext.toPairDStreamFunctions(dstream.dstream).mapValues(new JLong(_))
   }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
index 4f6d479401d61..921b56143af25 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
@@ -17,40 +17,41 @@
 
 package org.apache.spark.streaming.api.java
 
-import java.lang.{Integer => JInt}
-import java.io.InputStream
-import java.util.{Map => JMap, List => JList}
 
 import scala.collection.JavaConversions._
+import scala.reflect.ClassTag
+
+import java.io.InputStream
+import java.util.{List => JList, Map => JMap}
 
+import akka.actor.{Props, SupervisorStrategy}
 import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat}
-import twitter4j.Status
-import akka.actor.Props
-import akka.actor.SupervisorStrategy
-import akka.zeromq.Subscribe
-import twitter4j.auth.Authorization
 
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.api.java.{JavaPairRDD, JavaRDD, JavaSparkContext}
+import org.apache.spark.api.java.function.{Function => JFunction, Function2 => JFunction2}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
-import org.apache.spark.api.java.function.{Function => JFunction, Function2 => JFunction2}
-import org.apache.spark.api.java.{JavaPairRDD, JavaSparkContext, JavaRDD}
 import org.apache.spark.streaming._
-import org.apache.spark.streaming.dstream._
+import org.apache.spark.streaming.scheduler.StreamingListener
+import org.apache.hadoop.conf.Configuration
+import org.apache.spark.streaming.dstream.DStream
 
 /**
- * A StreamingContext is the main entry point for Spark Streaming functionality. Besides the basic
- * information (such as, cluster URL and job name) to internally create a SparkContext, it provides
- * methods used to create DStream from various input sources.
+ * A Java-friendly version of [[org.apache.spark.streaming.StreamingContext]] which is the main
+ * entry point for Spark Streaming functionality. It provides methods to create
+ * [[org.apache.spark.streaming.api.java.JavaDStream]] and
+ * [[org.apache.spark.streaming.api.java.JavaPairDStream.]] from input sources. The internal
+ * org.apache.spark.api.java.JavaSparkContext (see core Spark documentation) can be accessed
+ * using `context.sparkContext`. After creating and transforming DStreams, the streaming
+ * computation can be started and stopped using `context.start()` and `context.stop()`,
+ * respectively. `context.awaitTransformation()` allows the current thread to wait for the
+ * termination of a context by `stop()` or by an exception.
  */
 class JavaStreamingContext(val ssc: StreamingContext) {
 
-  // TODOs:
-  // - Test to/from Hadoop functions
-  // - Support creating and registering InputStreams
-
-
   /**
-   * Creates a StreamingContext.
+   * Create a StreamingContext.
    * @param master Name of the Spark Master
    * @param appName Name to be used when registering with the scheduler
    * @param batchDuration The time interval at which streaming data will be divided into batches
@@ -59,7 +60,7 @@ class JavaStreamingContext(val ssc: StreamingContext) {
     this(new StreamingContext(master, appName, batchDuration, null, Nil, Map()))
 
   /**
-   * Creates a StreamingContext.
+   * Create a StreamingContext.
    * @param master Name of the Spark Master
    * @param appName Name to be used when registering with the scheduler
    * @param batchDuration The time interval at which streaming data will be divided into batches
@@ -76,7 +77,7 @@ class JavaStreamingContext(val ssc: StreamingContext) {
     this(new StreamingContext(master, appName, batchDuration, sparkHome, Seq(jarFile), Map()))
 
   /**
-   * Creates a StreamingContext.
+   * Create a StreamingContext.
    * @param master Name of the Spark Master
    * @param appName Name to be used when registering with the scheduler
    * @param batchDuration The time interval at which streaming data will be divided into batches
@@ -93,7 +94,7 @@ class JavaStreamingContext(val ssc: StreamingContext) {
     this(new StreamingContext(master, appName, batchDuration, sparkHome, jars, Map()))
 
   /**
-   * Creates a StreamingContext.
+   * Create a StreamingContext.
    * @param master Name of the Spark Master
    * @param appName Name to be used when registering with the scheduler
    * @param batchDuration The time interval at which streaming data will be divided into batches
@@ -112,7 +113,7 @@ class JavaStreamingContext(val ssc: StreamingContext) {
     this(new StreamingContext(master, appName, batchDuration, sparkHome, jars, environment))
 
   /**
-   * Creates a StreamingContext using an existing SparkContext.
+   * Create a JavaStreamingContext using an existing JavaSparkContext.
    * @param sparkContext The underlying JavaSparkContext to use
    * @param batchDuration The time interval at which streaming data will be divided into batches
    */
@@ -120,88 +121,39 @@ class JavaStreamingContext(val ssc: StreamingContext) {
     this(new StreamingContext(sparkContext.sc, batchDuration))
 
   /**
-   * Re-creates a StreamingContext from a checkpoint file.
-   * @param path Path either to the directory that was specified as the checkpoint directory, or
-   *             to the checkpoint file 'graph' or 'graph.bk'.
+   * Create a JavaStreamingContext using a SparkConf configuration.
+   * @param conf A Spark application configuration
+   * @param batchDuration The time interval at which streaming data will be divided into batches
    */
-  def this(path: String) = this (new StreamingContext(path))
-
-  /** The underlying SparkContext */
-  val sc: JavaSparkContext = new JavaSparkContext(ssc.sc)
+  def this(conf: SparkConf, batchDuration: Duration) =
+    this(new StreamingContext(conf, batchDuration))
 
   /**
-   * Create an input stream that pulls messages form a Kafka Broker.
-   * @param zkQuorum Zookeper quorum (hostname:port,hostname:port,..).
-   * @param groupId The group id for this consumer.
-   * @param topics Map of (topic_name -> numPartitions) to consume. Each partition is consumed
-   * in its own thread.
+   * Recreate a JavaStreamingContext from a checkpoint file.
+   * @param path Path to the directory that was specified as the checkpoint directory
    */
-  def kafkaStream(
-    zkQuorum: String,
-    groupId: String,
-    topics: JMap[String, JInt])
-  : JavaDStream[String] = {
-    implicit val cmt: ClassManifest[String] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[String]]
-    ssc.kafkaStream(zkQuorum, groupId, Map(topics.mapValues(_.intValue()).toSeq: _*),
-      StorageLevel.MEMORY_ONLY_SER_2)
-  }
+  def this(path: String) = this(new StreamingContext(path, new Configuration))
 
   /**
-   * Create an input stream that pulls messages form a Kafka Broker.
-   * @param zkQuorum Zookeper quorum (hostname:port,hostname:port,..).
-   * @param groupId The group id for this consumer.
-   * @param topics Map of (topic_name -> numPartitions) to consume. Each partition is consumed
-   *               in its own thread.
-   * @param storageLevel RDD storage level. Defaults to memory-only
+   * Re-creates a JavaStreamingContext from a checkpoint file.
+   * @param path Path to the directory that was specified as the checkpoint directory
    *
    */
-  def kafkaStream(
-    zkQuorum: String,
-    groupId: String,
-    topics: JMap[String, JInt],
-    storageLevel: StorageLevel)
-  : JavaDStream[String] = {
-    implicit val cmt: ClassManifest[String] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[String]]
-    ssc.kafkaStream(zkQuorum, groupId, Map(topics.mapValues(_.intValue()).toSeq: _*),
-      storageLevel)
-  }
+  def this(path: String, hadoopConf: Configuration) = this(new StreamingContext(path, hadoopConf))
 
-  /**
-   * Create an input stream that pulls messages form a Kafka Broker.
-   * @param typeClass Type of RDD
-   * @param decoderClass Type of kafka decoder
-   * @param kafkaParams Map of kafka configuration paramaters.
-   *                    See: http://kafka.apache.org/configuration.html
-   * @param topics Map of (topic_name -> numPartitions) to consume. Each partition is consumed
-   * in its own thread.
-   * @param storageLevel RDD storage level. Defaults to memory-only
-   */
-  def kafkaStream[T, D <: kafka.serializer.Decoder[_]](
-    typeClass: Class[T],
-    decoderClass: Class[D],
-    kafkaParams: JMap[String, String],
-    topics: JMap[String, JInt],
-    storageLevel: StorageLevel)
-  : JavaDStream[T] = {
-    implicit val cmt: ClassManifest[T] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[T]]
-    implicit val cmd: Manifest[D] = implicitly[Manifest[AnyRef]].asInstanceOf[Manifest[D]]
-    ssc.kafkaStream[T, D](
-      kafkaParams.toMap,
-      Map(topics.mapValues(_.intValue()).toSeq: _*),
-      storageLevel)
-  }
+  @deprecated("use sparkContext", "0.9.0")
+  val sc: JavaSparkContext = sparkContext
+
+  /** The underlying SparkContext */
+  val sparkContext = new JavaSparkContext(ssc.sc)
 
   /**
-   * Create a input stream from network source hostname:port. Data is received using
+   * Create an input stream from network source hostname:port. Data is received using
    * a TCP socket and the receive bytes is interpreted as UTF8 encoded \n delimited
    * lines.
    * @param hostname      Hostname to connect to for receiving data
    * @param port          Port to connect to for receiving data
    * @param storageLevel  Storage level to use for storing the received objects
-   *                      (default: StorageLevel.MEMORY_AND_DISK_SER_2)
    */
   def socketTextStream(hostname: String, port: Int, storageLevel: StorageLevel)
   : JavaDStream[String] = {
@@ -209,9 +161,9 @@ class JavaStreamingContext(val ssc: StreamingContext) {
   }
 
   /**
-   * Create a input stream from network source hostname:port. Data is received using
+   * Create an input stream from network source hostname:port. Data is received using
    * a TCP socket and the receive bytes is interpreted as UTF8 encoded \n delimited
-   * lines.
+   * lines. Storage level of the data will be the default StorageLevel.MEMORY_AND_DISK_SER_2.
    * @param hostname      Hostname to connect to for receiving data
    * @param port          Port to connect to for receiving data
    */
@@ -220,7 +172,7 @@ class JavaStreamingContext(val ssc: StreamingContext) {
   }
 
   /**
-   * Create a input stream from network source hostname:port. Data is received using
+   * Create an input stream from network source hostname:port. Data is received using
    * a TCP socket and the receive bytes it interepreted as object using the given
    * converter.
    * @param hostname      Hostname to connect to for receiving data
@@ -236,15 +188,17 @@ class JavaStreamingContext(val ssc: StreamingContext) {
       storageLevel: StorageLevel)
   : JavaDStream[T] = {
     def fn = (x: InputStream) => converter.apply(x).toIterator
-    implicit val cmt: ClassManifest[T] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[T]]
+    implicit val cmt: ClassTag[T] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[T]]
     ssc.socketStream(hostname, port, fn, storageLevel)
   }
 
   /**
-   * Creates a input stream that monitors a Hadoop-compatible filesystem
+   * Create an input stream that monitors a Hadoop-compatible filesystem
    * for new files and reads them as text files (using key as LongWritable, value
-   * as Text and input format as TextInputFormat). File names starting with . are ignored.
+   * as Text and input format as TextInputFormat). Files must be written to the
+   * monitored directory by "moving" them from another location within the same
+   * file system. File names starting with . are ignored.
    * @param directory HDFS directory to monitor for new file
    */
   def textFileStream(directory: String): JavaDStream[String] = {
@@ -252,7 +206,7 @@ class JavaStreamingContext(val ssc: StreamingContext) {
   }
 
   /**
-   * Create a input stream from network source hostname:port, where data is received
+   * Create an input stream from network source hostname:port, where data is received
    * as serialized blocks (serialized using the Spark's serializer) that can be directly
    * pushed into the block manager without deserializing them. This is the most efficient
    * way to receive data.
@@ -265,13 +219,13 @@ class JavaStreamingContext(val ssc: StreamingContext) {
       hostname: String,
       port: Int,
       storageLevel: StorageLevel): JavaDStream[T] = {
-    implicit val cmt: ClassManifest[T] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[T]]
+    implicit val cmt: ClassTag[T] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[T]]
     JavaDStream.fromDStream(ssc.rawSocketStream(hostname, port, storageLevel))
   }
 
   /**
-   * Create a input stream from network source hostname:port, where data is received
+   * Create an input stream from network source hostname:port, where data is received
    * as serialized blocks (serialized using the Spark's serializer) that can be directly
    * pushed into the block manager without deserializing them. This is the most efficient
    * way to receive data.
@@ -280,122 +234,31 @@ class JavaStreamingContext(val ssc: StreamingContext) {
    * @tparam T            Type of the objects in the received blocks
    */
   def rawSocketStream[T](hostname: String, port: Int): JavaDStream[T] = {
-    implicit val cmt: ClassManifest[T] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[T]]
+    implicit val cmt: ClassTag[T] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[T]]
     JavaDStream.fromDStream(ssc.rawSocketStream(hostname, port))
   }
 
   /**
-   * Creates a input stream that monitors a Hadoop-compatible filesystem
+   * Create an input stream that monitors a Hadoop-compatible filesystem
    * for new files and reads them using the given key-value types and input format.
-   * File names starting with . are ignored.
+   * Files must be written to the monitored directory by "moving" them from another
+   * location within the same file system. File names starting with . are ignored.
    * @param directory HDFS directory to monitor for new file
    * @tparam K Key type for reading HDFS file
    * @tparam V Value type for reading HDFS file
    * @tparam F Input format for reading HDFS file
    */
   def fileStream[K, V, F <: NewInputFormat[K, V]](directory: String): JavaPairDStream[K, V] = {
-    implicit val cmk: ClassManifest[K] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[K]]
-    implicit val cmv: ClassManifest[V] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[V]]
-    implicit val cmf: ClassManifest[F] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[F]]
+    implicit val cmk: ClassTag[K] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[K]]
+    implicit val cmv: ClassTag[V] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[V]]
+    implicit val cmf: ClassTag[F] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[F]]
     ssc.fileStream[K, V, F](directory)
   }
 
-  /**
-   * Creates a input stream from a Flume source.
-   * @param hostname Hostname of the slave machine to which the flume data will be sent
-   * @param port     Port of the slave machine to which the flume data will be sent
-   * @param storageLevel  Storage level to use for storing the received objects
-   */
-  def flumeStream(hostname: String, port: Int, storageLevel: StorageLevel):
-    JavaDStream[SparkFlumeEvent] = {
-    ssc.flumeStream(hostname, port, storageLevel)
-  }
-
-
-  /**
-   * Creates a input stream from a Flume source.
-   * @param hostname Hostname of the slave machine to which the flume data will be sent
-   * @param port     Port of the slave machine to which the flume data will be sent
-   */
-  def flumeStream(hostname: String, port: Int): JavaDStream[SparkFlumeEvent] = {
-    ssc.flumeStream(hostname, port)
-  }
-
-  /**
-   * Create a input stream that returns tweets received from Twitter.
-   * @param twitterAuth Twitter4J Authorization object
-   * @param filters Set of filter strings to get only those tweets that match them
-   * @param storageLevel Storage level to use for storing the received objects
-   */
-  def twitterStream(
-      twitterAuth: Authorization,
-      filters: Array[String],
-      storageLevel: StorageLevel
-    ): JavaDStream[Status] = {
-    ssc.twitterStream(Some(twitterAuth), filters, storageLevel)
-  }
-
-  /**
-   * Create a input stream that returns tweets received from Twitter using Twitter4J's default
-   * OAuth authentication; this requires the system properties twitter4j.oauth.consumerKey,
-   * .consumerSecret, .accessToken and .accessTokenSecret to be set.
-   * @param filters Set of filter strings to get only those tweets that match them
-   * @param storageLevel Storage level to use for storing the received objects
-   */
-  def twitterStream(
-      filters: Array[String],
-      storageLevel: StorageLevel
-    ): JavaDStream[Status] = {
-    ssc.twitterStream(None, filters, storageLevel)
-  }
-
-  /**
-   * Create a input stream that returns tweets received from Twitter.
-   * @param twitterAuth Twitter4J Authorization
-   * @param filters Set of filter strings to get only those tweets that match them
-   */
-  def twitterStream(
-      twitterAuth: Authorization,
-      filters: Array[String]
-    ): JavaDStream[Status] = {
-    ssc.twitterStream(Some(twitterAuth), filters)
-  }
-
-  /**
-   * Create a input stream that returns tweets received from Twitter using Twitter4J's default
-   * OAuth authentication; this requires the system properties twitter4j.oauth.consumerKey,
-   * .consumerSecret, .accessToken and .accessTokenSecret to be set.
-   * @param filters Set of filter strings to get only those tweets that match them
-   */
-  def twitterStream(
-      filters: Array[String]
-    ): JavaDStream[Status] = {
-    ssc.twitterStream(None, filters)
-  }
-
-  /**
-   * Create a input stream that returns tweets received from Twitter.
-   * @param twitterAuth Twitter4J Authorization
-   */
-  def twitterStream(
-      twitterAuth: Authorization
-    ): JavaDStream[Status] = {
-    ssc.twitterStream(Some(twitterAuth))
-  }
-
-  /**
-   * Create a input stream that returns tweets received from Twitter using Twitter4J's default
-   * OAuth authentication; this requires the system properties twitter4j.oauth.consumerKey,
-   * .consumerSecret, .accessToken and .accessTokenSecret to be set.
-   */
-  def twitterStream(): JavaDStream[Status] = {
-    ssc.twitterStream()
-  }
-  
   /**
    * Create an input stream with any arbitrary user implemented actor receiver.
    * @param props Props object defining creation of the actor
@@ -413,8 +276,8 @@ class JavaStreamingContext(val ssc: StreamingContext) {
       storageLevel: StorageLevel,
       supervisorStrategy: SupervisorStrategy
     ): JavaDStream[T] = {
-    implicit val cm: ClassManifest[T] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[T]]
+    implicit val cm: ClassTag[T] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[T]]
     ssc.actorStream[T](props, name, storageLevel, supervisorStrategy)
   }
 
@@ -434,13 +297,14 @@ class JavaStreamingContext(val ssc: StreamingContext) {
       name: String,
       storageLevel: StorageLevel
   ): JavaDStream[T] = {
-    implicit val cm: ClassManifest[T] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[T]]
+    implicit val cm: ClassTag[T] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[T]]
     ssc.actorStream[T](props, name, storageLevel)
   }
 
   /**
    * Create an input stream with any arbitrary user implemented actor receiver.
+   * Storage level of the data will be the default StorageLevel.MEMORY_AND_DISK_SER_2.
    * @param props Props object defining creation of the actor
    * @param name Name of the actor
    *
@@ -453,84 +317,13 @@ class JavaStreamingContext(val ssc: StreamingContext) {
       props: Props,
       name: String
     ): JavaDStream[T] = {
-    implicit val cm: ClassManifest[T] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[T]]
+    implicit val cm: ClassTag[T] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[T]]
     ssc.actorStream[T](props, name)
   }
 
   /**
-   * Create an input stream that receives messages pushed by a zeromq publisher.
-   * @param publisherUrl Url of remote zeromq publisher
-   * @param subscribe topic to subscribe to
-   * @param bytesToObjects A zeroMQ stream publishes sequence of frames for each topic and each frame has sequence
-   *                       of byte thus it needs the converter(which might be deserializer of bytes)
-   *                       to translate from sequence of sequence of bytes, where sequence refer to a frame
-   *                       and sub sequence refer to its payload.
-   * @param storageLevel  Storage level to use for storing the received objects
-   */
-  def zeroMQStream[T](
-      publisherUrl:String,
-      subscribe: Subscribe,
-      bytesToObjects: Seq[Seq[Byte]] ⇒ Iterator[T],
-      storageLevel: StorageLevel,
-      supervisorStrategy: SupervisorStrategy
-    ): JavaDStream[T] = {
-    implicit val cm: ClassManifest[T] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[T]]
-    ssc.zeroMQStream[T](publisherUrl, subscribe, bytesToObjects, storageLevel, supervisorStrategy)
-  }
-
-  /**
-   * Create an input stream that receives messages pushed by a zeromq publisher.
-   * @param publisherUrl Url of remote zeromq publisher
-   * @param subscribe topic to subscribe to
-   * @param bytesToObjects A zeroMQ stream publishes sequence of frames for each topic and each frame has sequence
-   *                       of byte thus it needs the converter(which might be deserializer of bytes)
-   *                       to translate from sequence of sequence of bytes, where sequence refer to a frame
-   *                       and sub sequence refer to its payload.
-   * @param storageLevel RDD storage level. Defaults to memory-only.
-   */
-  def zeroMQStream[T](
-      publisherUrl:String,
-      subscribe: Subscribe,
-      bytesToObjects: JFunction[Array[Array[Byte]], java.lang.Iterable[T]],
-      storageLevel: StorageLevel
-    ): JavaDStream[T] = {
-    implicit val cm: ClassManifest[T] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[T]]
-    def fn(x: Seq[Seq[Byte]]) = bytesToObjects.apply(x.map(_.toArray).toArray).toIterator
-    ssc.zeroMQStream[T](publisherUrl, subscribe, fn, storageLevel)
-  }
-
-  /**
-   * Create an input stream that receives messages pushed by a zeromq publisher.
-   * @param publisherUrl Url of remote zeromq publisher
-   * @param subscribe topic to subscribe to
-   * @param bytesToObjects A zeroMQ stream publishes sequence of frames for each topic and each frame has sequence
-   *                       of byte thus it needs the converter(which might be deserializer of bytes)
-   *                       to translate from sequence of sequence of bytes, where sequence refer to a frame
-   *                       and sub sequence refer to its payload.
-   */
-  def zeroMQStream[T](
-      publisherUrl:String,
-      subscribe: Subscribe,
-      bytesToObjects: JFunction[Array[Array[Byte]], java.lang.Iterable[T]]
-    ): JavaDStream[T] = {
-    implicit val cm: ClassManifest[T] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[T]]
-    def fn(x: Seq[Seq[Byte]]) = bytesToObjects.apply(x.map(_.toArray).toArray).toIterator
-    ssc.zeroMQStream[T](publisherUrl, subscribe, fn)
-  }
-
-  /**
-   * Registers an output stream that will be computed every interval
-   */
-  def registerOutputStream(outputStream: JavaDStreamLike[_, _, _]) {
-    ssc.registerOutputStream(outputStream.dstream)
-  }
-
-  /**
-   * Creates a input stream from an queue of RDDs. In each batch,
+   * Creates an input stream from an queue of RDDs. In each batch,
    * it will process either one or all of the RDDs returned by the queue.
    *
    * NOTE: changes to the queue after the stream is created will not be recognized.
@@ -538,15 +331,15 @@ class JavaStreamingContext(val ssc: StreamingContext) {
    * @tparam T         Type of objects in the RDD
    */
   def queueStream[T](queue: java.util.Queue[JavaRDD[T]]): JavaDStream[T] = {
-    implicit val cm: ClassManifest[T] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[T]]
+    implicit val cm: ClassTag[T] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[T]]
     val sQueue = new scala.collection.mutable.Queue[RDD[T]]
     sQueue.enqueue(queue.map(_.rdd).toSeq: _*)
     ssc.queueStream(sQueue)
   }
 
   /**
-   * Creates a input stream from an queue of RDDs. In each batch,
+   * Creates an input stream from an queue of RDDs. In each batch,
    * it will process either one or all of the RDDs returned by the queue.
    *
    * NOTE: changes to the queue after the stream is created will not be recognized.
@@ -555,15 +348,15 @@ class JavaStreamingContext(val ssc: StreamingContext) {
    * @tparam T         Type of objects in the RDD
    */
   def queueStream[T](queue: java.util.Queue[JavaRDD[T]], oneAtATime: Boolean): JavaDStream[T] = {
-    implicit val cm: ClassManifest[T] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[T]]
+    implicit val cm: ClassTag[T] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[T]]
     val sQueue = new scala.collection.mutable.Queue[RDD[T]]
     sQueue.enqueue(queue.map(_.rdd).toSeq: _*)
     ssc.queueStream(sQueue, oneAtATime)
   }
 
   /**
-   * Creates a input stream from an queue of RDDs. In each batch,
+   * Creates an input stream from an queue of RDDs. In each batch,
    * it will process either one or all of the RDDs returned by the queue.
    *
    * NOTE: changes to the queue after the stream is created will not be recognized.
@@ -576,8 +369,8 @@ class JavaStreamingContext(val ssc: StreamingContext) {
       queue: java.util.Queue[JavaRDD[T]],
       oneAtATime: Boolean,
       defaultRDD: JavaRDD[T]): JavaDStream[T] = {
-    implicit val cm: ClassManifest[T] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[T]]
+    implicit val cm: ClassTag[T] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[T]]
     val sQueue = new scala.collection.mutable.Queue[RDD[T]]
     sQueue.enqueue(queue.map(_.rdd).toSeq: _*)
     ssc.queueStream(sQueue, oneAtATime, defaultRDD.rdd)
@@ -588,7 +381,7 @@ class JavaStreamingContext(val ssc: StreamingContext) {
    */
   def union[T](first: JavaDStream[T], rest: JList[JavaDStream[T]]): JavaDStream[T] = {
     val dstreams: Seq[DStream[T]] = (Seq(first) ++ asScalaBuffer(rest)).map(_.dstream)
-    implicit val cm: ClassManifest[T] = first.classManifest
+    implicit val cm: ClassTag[T] = first.classTag
     ssc.union(dstreams)(cm)
   }
 
@@ -600,9 +393,9 @@ class JavaStreamingContext(val ssc: StreamingContext) {
       rest: JList[JavaPairDStream[K, V]]
     ): JavaPairDStream[K, V] = {
     val dstreams: Seq[DStream[(K, V)]] = (Seq(first) ++ asScalaBuffer(rest)).map(_.dstream)
-    implicit val cm: ClassManifest[(K, V)] = first.classManifest
-    implicit val kcm: ClassManifest[K] = first.kManifest
-    implicit val vcm: ClassManifest[V] = first.vManifest
+    implicit val cm: ClassTag[(K, V)] = first.classTag
+    implicit val kcm: ClassTag[K] = first.kManifest
+    implicit val vcm: ClassTag[V] = first.vManifest
     new JavaPairDStream[K, V](ssc.union(dstreams)(cm))(kcm, vcm)
   }
 
@@ -619,8 +412,8 @@ class JavaStreamingContext(val ssc: StreamingContext) {
       dstreams: JList[JavaDStream[_]],
       transformFunc: JFunction2[JList[JavaRDD[_]], Time, JavaRDD[T]]
     ): JavaDStream[T] = {
-    implicit val cmt: ClassManifest[T] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[T]]
+    implicit val cmt: ClassTag[T] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[T]]
     val scalaDStreams = dstreams.map(_.dstream).toSeq
     val scalaTransformFunc = (rdds: Seq[RDD[_]], time: Time) => {
       val jrdds = rdds.map(rdd => JavaRDD.fromRDD[AnyRef](rdd.asInstanceOf[RDD[AnyRef]])).toList
@@ -642,10 +435,10 @@ class JavaStreamingContext(val ssc: StreamingContext) {
       dstreams: JList[JavaDStream[_]],
       transformFunc: JFunction2[JList[JavaRDD[_]], Time, JavaPairRDD[K, V]]
     ): JavaPairDStream[K, V] = {
-    implicit val cmk: ClassManifest[K] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[K]]
-    implicit val cmv: ClassManifest[V] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[V]]
+    implicit val cmk: ClassTag[K] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[K]]
+    implicit val cmv: ClassTag[V] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[V]]
     val scalaDStreams = dstreams.map(_.dstream).toSeq
     val scalaTransformFunc = (rdds: Seq[RDD[_]], time: Time) => {
       val jrdds = rdds.map(rdd => JavaRDD.fromRDD[AnyRef](rdd.asInstanceOf[RDD[AnyRef]])).toList
@@ -674,14 +467,124 @@ class JavaStreamingContext(val ssc: StreamingContext) {
     ssc.remember(duration)
   }
 
+  /** Add a [[org.apache.spark.streaming.scheduler.StreamingListener]] object for
+    * receiving system events related to streaming.
+    */
+  def addStreamingListener(streamingListener: StreamingListener) {
+    ssc.addStreamingListener(streamingListener)
+  }
+
   /**
-   * Starts the execution of the streams.
+   * Start the execution of the streams.
    */
   def start() = ssc.start()
 
   /**
-   * Sstops the execution of the streams.
+   * Wait for the execution to stop. Any exceptions that occurs during the execution
+   * will be thrown in this thread.
+   */
+  def awaitTermination() = ssc.awaitTermination()
+
+  /**
+   * Wait for the execution to stop. Any exceptions that occurs during the execution
+   * will be thrown in this thread.
+   * @param timeout time to wait in milliseconds
+   */
+  def awaitTermination(timeout: Long) = ssc.awaitTermination(timeout)
+
+  /**
+   * Stop the execution of the streams. Will stop the associated JavaSparkContext as well.
    */
   def stop() = ssc.stop()
 
+  /**
+   * Stop the execution of the streams.
+   * @param stopSparkContext Stop the associated SparkContext or not
+   */
+  def stop(stopSparkContext: Boolean) = ssc.stop(stopSparkContext)
+}
+
+/**
+ * JavaStreamingContext object contains a number of utility functions.
+ */
+object JavaStreamingContext {
+
+  /**
+   * Either recreate a StreamingContext from checkpoint data or create a new StreamingContext.
+   * If checkpoint data exists in the provided `checkpointPath`, then StreamingContext will be
+   * recreated from the checkpoint data. If the data does not exist, then the provided factory
+   * will be used to create a JavaStreamingContext.
+   *
+   * @param checkpointPath Checkpoint directory used in an earlier JavaStreamingContext program
+   * @param factory        JavaStreamingContextFactory object to create a new JavaStreamingContext
+   */
+  def getOrCreate(
+      checkpointPath: String,
+      factory: JavaStreamingContextFactory
+    ): JavaStreamingContext = {
+    val ssc = StreamingContext.getOrCreate(checkpointPath, () => {
+      factory.create.ssc
+    })
+    new JavaStreamingContext(ssc)
+  }
+
+  /**
+   * Either recreate a StreamingContext from checkpoint data or create a new StreamingContext.
+   * If checkpoint data exists in the provided `checkpointPath`, then StreamingContext will be
+   * recreated from the checkpoint data. If the data does not exist, then the provided factory
+   * will be used to create a JavaStreamingContext.
+   *
+   * @param checkpointPath Checkpoint directory used in an earlier StreamingContext program
+   * @param factory        JavaStreamingContextFactory object to create a new JavaStreamingContext
+   * @param hadoopConf     Hadoop configuration if necessary for reading from any HDFS compatible
+   *                       file system
+   */
+  def getOrCreate(
+      checkpointPath: String,
+      hadoopConf: Configuration,
+      factory: JavaStreamingContextFactory
+    ): JavaStreamingContext = {
+    val ssc = StreamingContext.getOrCreate(checkpointPath, () => {
+      factory.create.ssc
+    }, hadoopConf)
+    new JavaStreamingContext(ssc)
+  }
+
+  /**
+   * Either recreate a StreamingContext from checkpoint data or create a new StreamingContext.
+   * If checkpoint data exists in the provided `checkpointPath`, then StreamingContext will be
+   * recreated from the checkpoint data. If the data does not exist, then the provided factory
+   * will be used to create a JavaStreamingContext.
+   *
+   * @param checkpointPath Checkpoint directory used in an earlier StreamingContext program
+   * @param factory        JavaStreamingContextFactory object to create a new JavaStreamingContext
+   * @param hadoopConf     Hadoop configuration if necessary for reading from any HDFS compatible
+   *                       file system
+   * @param createOnError  Whether to create a new JavaStreamingContext if there is an
+   *                       error in reading checkpoint data.
+   */
+  def getOrCreate(
+      checkpointPath: String,
+      hadoopConf: Configuration,
+      factory: JavaStreamingContextFactory,
+      createOnError: Boolean
+    ): JavaStreamingContext = {
+    val ssc = StreamingContext.getOrCreate(checkpointPath, () => {
+      factory.create.ssc
+    }, hadoopConf, createOnError)
+    new JavaStreamingContext(ssc)
+  }
+
+  /**
+   * Find the JAR from which a given class was loaded, to make it easy for users to pass
+   * their JARs to StreamingContext.
+   */
+  def jarOfClass(cls: Class[_]) = SparkContext.jarOfClass(cls).toArray
+}
+
+/**
+ * Factory interface for creating a new JavaStreamingContext
+ */
+trait JavaStreamingContextFactory {
+  def create(): JavaStreamingContext
 }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ConstantInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ConstantInputDStream.scala
index a9a05c9981f74..f396c347581ce 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ConstantInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ConstantInputDStream.scala
@@ -19,11 +19,12 @@ package org.apache.spark.streaming.dstream
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.streaming.{Time, StreamingContext}
+import scala.reflect.ClassTag
 
 /**
  * An input stream that always returns the same RDD on each timestep. Useful for testing.
  */
-class ConstantInputDStream[T: ClassManifest](ssc_ : StreamingContext, rdd: RDD[T])
+class ConstantInputDStream[T: ClassTag](ssc_ : StreamingContext, rdd: RDD[T])
   extends InputDStream[T](ssc_) {
 
   override def start() {}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/DStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
similarity index 80%
rename from streaming/src/main/scala/org/apache/spark/streaming/DStream.scala
rename to streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
index 9ceff754c4b72..6bff56a9d332a 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/DStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
@@ -15,40 +15,41 @@
  * limitations under the License.
  */
 
-package org.apache.spark.streaming
+package org.apache.spark.streaming.dstream
 
-import org.apache.spark.streaming.dstream._
-import StreamingContext._
-import org.apache.spark.util.MetadataCleaner
 
-//import Time._
+import scala.deprecated
+import scala.collection.mutable.HashMap
+import scala.reflect.ClassTag
+
+import java.io.{IOException, ObjectInputStream, ObjectOutputStream}
 
 import org.apache.spark.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
-
-import scala.collection.mutable.ArrayBuffer
-import scala.collection.mutable.HashMap
-
-import java.io.{ObjectInputStream, IOException, ObjectOutputStream}
-
-import org.apache.hadoop.fs.{FileSystem, Path}
-import org.apache.hadoop.conf.Configuration
+import org.apache.spark.util.MetadataCleaner
+import org.apache.spark.streaming._
+import org.apache.spark.streaming.StreamingContext._
+import org.apache.spark.streaming.scheduler.Job
+import org.apache.spark.streaming.Duration
 
 /**
  * A Discretized Stream (DStream), the basic abstraction in Spark Streaming, is a continuous
- * sequence of RDDs (of the same type) representing a continuous stream of data (see [[org.apache.spark.RDD]]
- * for more details on RDDs). DStreams can either be created from live data (such as, data from
- * HDFS, Kafka or Flume) or it can be generated by transformation existing DStreams using operations
- * such as `map`, `window` and `reduceByKeyAndWindow`. While a Spark Streaming program is running, each
- * DStream periodically generates a RDD, either from live data or by transforming the RDD generated
- * by a parent DStream.
+ * sequence of RDDs (of the same type) representing a continuous stream of data (see
+ * org.apache.spark.rdd.RDD in the Spark core documentation for more details on RDDs).
+ * DStreams can either be created from live data (such as, data from TCP sockets, Kafka, Flume,
+ * etc.) using a [[org.apache.spark.streaming.StreamingContext]] or it can be generated by
+ * transforming existing DStreams using operations such as `map`,
+ * `window` and `reduceByKeyAndWindow`. While a Spark Streaming program is running, each DStream
+ * periodically generates a RDD, either from live data or by transforming the RDD generated by a
+ * parent DStream.
  *
  * This class contains the basic operations available on all DStreams, such as `map`, `filter` and
- * `window`. In addition, [[org.apache.spark.streaming.PairDStreamFunctions]] contains operations available
- * only on DStreams of key-value pairs, such as `groupByKeyAndWindow` and `join`. These operations
- * are automatically available on any DStream of the right type (e.g., DStream[(Int, Int)] through
- * implicit conversions when `spark.streaming.StreamingContext._` is imported.
+ * `window`. In addition, [[org.apache.spark.streaming.dstream.PairDStreamFunctions]] contains
+ * operations available only on DStreams of key-value pairs, such as `groupByKeyAndWindow` and
+ * `join`. These operations are automatically available on any DStream of pairs
+ * (e.g., DStream[(Int, Int)] through implicit conversions when
+ * `org.apache.spark.streaming.StreamingContext._` is imported.
  *
  * DStreams internally is characterized by a few basic properties:
  *  - A list of other DStreams that the DStream depends on
@@ -56,12 +57,10 @@ import org.apache.hadoop.conf.Configuration
  *  - A function that is used to generate an RDD after each time interval
  */
 
-abstract class DStream[T: ClassManifest] (
-    @transient protected[streaming] var ssc: StreamingContext
+abstract class DStream[T: ClassTag] (
+    @transient private[streaming] var ssc: StreamingContext
   ) extends Serializable with Logging {
 
-  initLogging()
-
   // =======================================================================
   // Methods that should be implemented by subclasses of DStream
   // =======================================================================
@@ -79,31 +78,31 @@ abstract class DStream[T: ClassManifest] (
   // Methods and fields available on all DStreams
   // =======================================================================
 
-  // RDDs generated, marked as protected[streaming] so that testsuites can access it
+  // RDDs generated, marked as private[streaming] so that testsuites can access it
   @transient
-  protected[streaming] var generatedRDDs = new HashMap[Time, RDD[T]] ()
-  
+  private[streaming] var generatedRDDs = new HashMap[Time, RDD[T]] ()
+
   // Time zero for the DStream
-  protected[streaming] var zeroTime: Time = null
+  private[streaming] var zeroTime: Time = null
 
   // Duration for which the DStream will remember each RDD created
-  protected[streaming] var rememberDuration: Duration = null
+  private[streaming] var rememberDuration: Duration = null
 
   // Storage level of the RDDs in the stream
-  protected[streaming] var storageLevel: StorageLevel = StorageLevel.NONE
+  private[streaming] var storageLevel: StorageLevel = StorageLevel.NONE
 
   // Checkpoint details
-  protected[streaming] val mustCheckpoint = false
-  protected[streaming] var checkpointDuration: Duration = null
-  protected[streaming] val checkpointData = new DStreamCheckpointData(this)
+  private[streaming] val mustCheckpoint = false
+  private[streaming] var checkpointDuration: Duration = null
+  private[streaming] val checkpointData = new DStreamCheckpointData(this)
 
   // Reference to whole DStream graph
-  protected[streaming] var graph: DStreamGraph = null
+  private[streaming] var graph: DStreamGraph = null
 
-  protected[streaming] def isInitialized = (zeroTime != null)
+  private[streaming] def isInitialized = (zeroTime != null)
 
   // Duration for which the DStream requires its parent DStream to remember each RDD created
-  protected[streaming] def parentRememberDuration = rememberDuration
+  private[streaming] def parentRememberDuration = rememberDuration
 
   /** Return the StreamingContext associated with this DStream */
   def context = ssc
@@ -143,7 +142,7 @@ abstract class DStream[T: ClassManifest] (
    * the validity of future times is calculated. This method also recursively initializes
    * its parent DStreams.
    */
-  protected[streaming] def initialize(time: Time) {
+  private[streaming] def initialize(time: Time) {
     if (zeroTime != null && zeroTime != time) {
       throw new Exception("ZeroTime is already initialized to " + zeroTime
         + ", cannot initialize it again to " + time)
@@ -159,7 +158,8 @@ abstract class DStream[T: ClassManifest] (
     // Set the minimum value of the rememberDuration if not already set
     var minRememberDuration = slideDuration
     if (checkpointDuration != null && minRememberDuration <= checkpointDuration) {
-      minRememberDuration = checkpointDuration * 2  // times 2 just to be sure that the latest checkpoint is not forgetten
+      // times 2 just to be sure that the latest checkpoint is not forgotten (#paranoia)
+      minRememberDuration = checkpointDuration * 2
     }
     if (rememberDuration == null || rememberDuration < minRememberDuration) {
       rememberDuration = minRememberDuration
@@ -169,7 +169,7 @@ abstract class DStream[T: ClassManifest] (
     dependencies.foreach(_.initialize(zeroTime))
   }
 
-  protected[streaming] def validate() {
+  private[streaming] def validate() {
     assert(rememberDuration != null, "Remember duration is set to null")
 
     assert(
@@ -212,7 +212,7 @@ abstract class DStream[T: ClassManifest] (
         checkpointDuration + "). Please set it to higher than " + checkpointDuration + "."
     )
 
-    val metadataCleanerDelay = MetadataCleaner.getDelaySeconds
+    val metadataCleanerDelay = MetadataCleaner.getDelaySeconds(ssc.conf)
     logInfo("metadataCleanupDelay = " + metadataCleanerDelay)
     assert(
       metadataCleanerDelay < 0 || rememberDuration.milliseconds < metadataCleanerDelay * 1000,
@@ -233,7 +233,7 @@ abstract class DStream[T: ClassManifest] (
     logInfo("Initialized and validated " + this)
   }
 
-  protected[streaming] def setContext(s: StreamingContext) {
+  private[streaming] def setContext(s: StreamingContext) {
     if (ssc != null && ssc != s) {
       throw new Exception("Context is already set in " + this + ", cannot set it again")
     }
@@ -242,7 +242,7 @@ abstract class DStream[T: ClassManifest] (
     dependencies.foreach(_.setContext(ssc))
   }
 
-  protected[streaming] def setGraph(g: DStreamGraph) {
+  private[streaming] def setGraph(g: DStreamGraph) {
     if (graph != null && graph != g) {
       throw new Exception("Graph is already set in " + this + ", cannot set it again")
     }
@@ -250,7 +250,7 @@ abstract class DStream[T: ClassManifest] (
     dependencies.foreach(_.setGraph(graph))
   }
 
-  protected[streaming] def remember(duration: Duration) {
+  private[streaming] def remember(duration: Duration) {
     if (duration != null && duration > rememberDuration) {
       rememberDuration = duration
       logInfo("Duration for remembering RDDs set to " + rememberDuration + " for " + this)
@@ -259,14 +259,15 @@ abstract class DStream[T: ClassManifest] (
   }
 
   /** Checks whether the 'time' is valid wrt slideDuration for generating RDD */
-  protected def isTimeValid(time: Time): Boolean = {
+  private[streaming] def isTimeValid(time: Time): Boolean = {
     if (!isInitialized) {
       throw new Exception (this + " has not been initialized")
     } else if (time <= zeroTime || ! (time - zeroTime).isMultipleOf(slideDuration)) {
-      logInfo("Time " + time + " is invalid as zeroTime is " + zeroTime + " and slideDuration is " + slideDuration + " and difference is " + (time - zeroTime))
+      logInfo("Time " + time + " is invalid as zeroTime is " + zeroTime +
+        " and slideDuration is " + slideDuration + " and difference is " + (time - zeroTime))
       false
     } else {
-      logInfo("Time " + time + " is valid")
+      logDebug("Time " + time + " is valid")
       true
     }
   }
@@ -274,16 +275,16 @@ abstract class DStream[T: ClassManifest] (
   /**
    * Retrieve a precomputed RDD of this DStream, or computes the RDD. This is an internal
    * method that should not be called directly.
-   */  
-  protected[streaming] def getOrCompute(time: Time): Option[RDD[T]] = {
+   */
+  private[streaming] def getOrCompute(time: Time): Option[RDD[T]] = {
     // If this DStream was not initialized (i.e., zeroTime not set), then do it
     // If RDD was already generated, then retrieve it from HashMap
     generatedRDDs.get(time) match {
-      
-      // If an RDD was already generated and is being reused, then 
+
+      // If an RDD was already generated and is being reused, then
       // probably all RDDs in this DStream will be reused and hence should be cached
       case Some(oldRDD) => Some(oldRDD)
-      
+
       // if RDD was not generated, and if the time is valid
       // (based on sliding time of this DStream), then generate the RDD
       case None => {
@@ -292,15 +293,18 @@ abstract class DStream[T: ClassManifest] (
             case Some(newRDD) =>
               if (storageLevel != StorageLevel.NONE) {
                 newRDD.persist(storageLevel)
-                logInfo("Persisting RDD " + newRDD.id + " for time " + time + " to " + storageLevel + " at time " + time)
+                logInfo("Persisting RDD " + newRDD.id + " for time " +
+                  time + " to " + storageLevel + " at time " + time)
               }
-              if (checkpointDuration != null && (time - zeroTime).isMultipleOf(checkpointDuration)) {
+              if (checkpointDuration != null &&
+                (time - zeroTime).isMultipleOf(checkpointDuration)) {
                 newRDD.checkpoint()
-                logInfo("Marking RDD " + newRDD.id + " for time " + time + " for checkpointing at time " + time)
+                logInfo("Marking RDD " + newRDD.id + " for time " + time +
+                  " for checkpointing at time " + time)
               }
               generatedRDDs.put(time, newRDD)
               Some(newRDD)
-            case None => 
+            case None =>
               None
           }
         } else {
@@ -316,7 +320,7 @@ abstract class DStream[T: ClassManifest] (
    * that materializes the corresponding RDD. Subclasses of DStream may override this
    * to generate their own jobs.
    */
-  protected[streaming] def generateJob(time: Time): Option[Job] = {
+  private[streaming] def generateJob(time: Time): Option[Job] = {
     getOrCompute(time) match {
       case Some(rdd) => {
         val jobFunc = () => {
@@ -335,19 +339,22 @@ abstract class DStream[T: ClassManifest] (
    * implementation clears the old generated RDDs. Subclasses of DStream may override
    * this to clear their own metadata along with the generated RDDs.
    */
-  protected[streaming] def clearOldMetadata(time: Time) {
-    var numForgotten = 0
+  private[streaming] def clearMetadata(time: Time) {
     val oldRDDs = generatedRDDs.filter(_._1 <= (time - rememberDuration))
     generatedRDDs --= oldRDDs.keys
-    logInfo("Cleared " + oldRDDs.size + " RDDs that were older than " +
+    if (ssc.conf.getBoolean("spark.streaming.unpersist", false)) {
+      logDebug("Unpersisting old RDDs: " + oldRDDs.keys.mkString(", "))
+      oldRDDs.values.foreach(_.unpersist(false))
+    }
+    logDebug("Cleared " + oldRDDs.size + " RDDs that were older than " +
       (time - rememberDuration) + ": " + oldRDDs.keys.mkString(", "))
-    dependencies.foreach(_.clearOldMetadata(time))
+    dependencies.foreach(_.clearMetadata(time))
   }
 
-  /* Adds metadata to the Stream while it is running. 
-   * This methd should be overwritten by sublcasses of InputDStream.
+  /* Adds metadata to the Stream while it is running.
+   * This method should be overwritten by sublcasses of InputDStream.
    */
-  protected[streaming] def addMetadata(metadata: Any) {
+  private[streaming] def addMetadata(metadata: Any) {
     if (metadata != null) {
       logInfo("Dropping Metadata: " + metadata.toString)
     }
@@ -360,21 +367,27 @@ abstract class DStream[T: ClassManifest] (
    * checkpointData. Subclasses of DStream (especially those of InputDStream) may override
    * this method to save custom checkpoint data.
    */
-  protected[streaming] def updateCheckpointData(currentTime: Time) {
-    logInfo("Updating checkpoint data for time " + currentTime)
-    checkpointData.update()
+  private[streaming] def updateCheckpointData(currentTime: Time) {
+    logDebug("Updating checkpoint data for time " + currentTime)
+    checkpointData.update(currentTime)
     dependencies.foreach(_.updateCheckpointData(currentTime))
-    checkpointData.cleanup()
     logDebug("Updated checkpoint data for time " + currentTime + ": " + checkpointData)
   }
 
+  private[streaming] def clearCheckpointData(time: Time) {
+    logDebug("Clearing checkpoint data")
+    checkpointData.cleanup(time)
+    dependencies.foreach(_.clearCheckpointData(time))
+    logDebug("Cleared checkpoint data")
+  }
+
   /**
    * Restore the RDDs in generatedRDDs from the checkpointData. This is an internal method
    * that should not be called directly. This is a default implementation that recreates RDDs
    * from the checkpoint file names stored in checkpointData. Subclasses of DStream that
    * override the updateCheckpointData() method would also need to override this method.
    */
-  protected[streaming] def restoreCheckpointData() {
+  private[streaming] def restoreCheckpointData() {
     // Create RDDs from the checkpoint data
     logInfo("Restoring checkpoint data")
     checkpointData.restore()
@@ -400,7 +413,8 @@ abstract class DStream[T: ClassManifest] (
         }
       }
     } else {
-      throw new java.io.NotSerializableException("Graph is unexpectedly null when DStream is being serialized.")
+      throw new java.io.NotSerializableException(
+        "Graph is unexpectedly null when DStream is being serialized.")
     }
   }
 
@@ -416,7 +430,7 @@ abstract class DStream[T: ClassManifest] (
   // =======================================================================
 
   /** Return a new DStream by applying a function to all elements of this DStream. */
-  def map[U: ClassManifest](mapFunc: T => U): DStream[U] = {
+  def map[U: ClassTag](mapFunc: T => U): DStream[U] = {
     new MappedDStream(this, context.sparkContext.clean(mapFunc))
   }
 
@@ -424,7 +438,7 @@ abstract class DStream[T: ClassManifest] (
    * Return a new DStream by applying a function to all elements of this DStream,
    * and then flattening the results
    */
-  def flatMap[U: ClassManifest](flatMapFunc: T => Traversable[U]): DStream[U] = {
+  def flatMap[U: ClassTag](flatMapFunc: T => Traversable[U]): DStream[U] = {
     new FlatMappedDStream(this, context.sparkContext.clean(flatMapFunc))
   }
 
@@ -450,7 +464,7 @@ abstract class DStream[T: ClassManifest] (
    * of this DStream. Applying mapPartitions() to an RDD applies a function to each partition
    * of the RDD.
    */
-  def mapPartitions[U: ClassManifest](
+  def mapPartitions[U: ClassTag](
       mapPartFunc: Iterator[T] => Iterator[U],
       preservePartitioning: Boolean = false
     ): DStream[U] = {
@@ -488,25 +502,37 @@ abstract class DStream[T: ClassManifest] (
    * Apply a function to each RDD in this DStream. This is an output operator, so
    * 'this' DStream will be registered as an output stream and therefore materialized.
    */
-  def foreach(foreachFunc: RDD[T] => Unit) {
-    this.foreach((r: RDD[T], t: Time) => foreachFunc(r))
+  @deprecated("use foreachRDD", "0.9.0")
+  def foreach(foreachFunc: RDD[T] => Unit) = this.foreachRDD(foreachFunc)
+
+  /**
+   * Apply a function to each RDD in this DStream. This is an output operator, so
+   * 'this' DStream will be registered as an output stream and therefore materialized.
+   */
+  @deprecated("use foreachRDD", "0.9.0")
+  def foreach(foreachFunc: (RDD[T], Time) => Unit) = this.foreachRDD(foreachFunc)
+
+  /**
+   * Apply a function to each RDD in this DStream. This is an output operator, so
+   * 'this' DStream will be registered as an output stream and therefore materialized.
+   */
+  def foreachRDD(foreachFunc: RDD[T] => Unit) {
+    this.foreachRDD((r: RDD[T], t: Time) => foreachFunc(r))
   }
 
   /**
    * Apply a function to each RDD in this DStream. This is an output operator, so
    * 'this' DStream will be registered as an output stream and therefore materialized.
    */
-  def foreach(foreachFunc: (RDD[T], Time) => Unit) {
-    val newStream = new ForEachDStream(this, context.sparkContext.clean(foreachFunc))
-    ssc.registerOutputStream(newStream)
-    newStream
+  def foreachRDD(foreachFunc: (RDD[T], Time) => Unit) {
+    new ForEachDStream(this, context.sparkContext.clean(foreachFunc)).register()
   }
 
   /**
    * Return a new DStream in which each RDD is generated by applying a function
    * on each RDD of 'this' DStream.
    */
-  def transform[U: ClassManifest](transformFunc: RDD[T] => RDD[U]): DStream[U] = {
+  def transform[U: ClassTag](transformFunc: RDD[T] => RDD[U]): DStream[U] = {
     transform((r: RDD[T], t: Time) => context.sparkContext.clean(transformFunc(r)))
   }
 
@@ -514,8 +540,7 @@ abstract class DStream[T: ClassManifest] (
    * Return a new DStream in which each RDD is generated by applying a function
    * on each RDD of 'this' DStream.
    */
-  def transform[U: ClassManifest](transformFunc: (RDD[T], Time) => RDD[U]): DStream[U] = {
-    //new TransformedDStream(this, context.sparkContext.clean(transformFunc))
+  def transform[U: ClassTag](transformFunc: (RDD[T], Time) => RDD[U]): DStream[U] = {
     val cleanedF = context.sparkContext.clean(transformFunc)
     val realTransformFunc =  (rdds: Seq[RDD[_]], time: Time) => {
       assert(rdds.length == 1)
@@ -528,7 +553,7 @@ abstract class DStream[T: ClassManifest] (
    * Return a new DStream in which each RDD is generated by applying a function
    * on each RDD of 'this' DStream and 'other' DStream.
    */
-  def transformWith[U: ClassManifest, V: ClassManifest](
+  def transformWith[U: ClassTag, V: ClassTag](
       other: DStream[U], transformFunc: (RDD[T], RDD[U]) => RDD[V]
     ): DStream[V] = {
     val cleanedF = ssc.sparkContext.clean(transformFunc)
@@ -539,7 +564,7 @@ abstract class DStream[T: ClassManifest] (
    * Return a new DStream in which each RDD is generated by applying a function
    * on each RDD of 'this' DStream and 'other' DStream.
    */
-  def transformWith[U: ClassManifest, V: ClassManifest](
+  def transformWith[U: ClassTag, V: ClassTag](
       other: DStream[U], transformFunc: (RDD[T], RDD[U], Time) => RDD[V]
     ): DStream[V] = {
     val cleanedF = ssc.sparkContext.clean(transformFunc)
@@ -566,8 +591,7 @@ abstract class DStream[T: ClassManifest] (
       if (first11.size > 10) println("...")
       println()
     }
-    val newStream = new ForEachDStream(this, context.sparkContext.clean(foreachFunc))
-    ssc.registerOutputStream(newStream)
+    new ForEachDStream(this, context.sparkContext.clean(foreachFunc)).register()
   }
 
   /**
@@ -638,8 +662,8 @@ abstract class DStream[T: ClassManifest] (
 
   /**
    * Return a new DStream in which each RDD has a single element generated by counting the number
-   * of elements in a sliding window over this DStream. Hash partitioning is used to generate the RDDs with
-   * Spark's default number of partitions.
+   * of elements in a sliding window over this DStream. Hash partitioning is used to generate
+   * the RDDs with Spark's default number of partitions.
    * @param windowDuration width of the window; must be a multiple of this DStream's
    *                       batching interval
    * @param slideDuration  sliding interval of the window (i.e., the interval after which
@@ -687,7 +711,7 @@ abstract class DStream[T: ClassManifest] (
   /**
    * Return all the RDDs defined by the Interval object (both end times included)
    */
-  protected[streaming] def slice(interval: Interval): Seq[RDD[T]] = {
+  def slice(interval: Interval): Seq[RDD[T]] = {
     slice(interval.beginTime, interval.endTime)
   }
 
@@ -696,10 +720,12 @@ abstract class DStream[T: ClassManifest] (
    */
   def slice(fromTime: Time, toTime: Time): Seq[RDD[T]] = {
     if (!(fromTime - zeroTime).isMultipleOf(slideDuration)) {
-      logWarning("fromTime (" + fromTime + ") is not a multiple of slideDuration (" + slideDuration + ")")
+      logWarning("fromTime (" + fromTime + ") is not a multiple of slideDuration ("
+        + slideDuration + ")")
     }
     if (!(toTime - zeroTime).isMultipleOf(slideDuration)) {
-      logWarning("toTime (" + fromTime + ") is not a multiple of slideDuration (" + slideDuration + ")")
+      logWarning("toTime (" + fromTime + ") is not a multiple of slideDuration ("
+        + slideDuration + ")")
     }
     val alignedToTime = toTime.floor(slideDuration)
     val alignedFromTime = fromTime.floor(slideDuration)
@@ -722,7 +748,7 @@ abstract class DStream[T: ClassManifest] (
       val file = rddToFileName(prefix, suffix, time)
       rdd.saveAsObjectFile(file)
     }
-    this.foreach(saveFunc)
+    this.foreachRDD(saveFunc)
   }
 
   /**
@@ -735,10 +761,15 @@ abstract class DStream[T: ClassManifest] (
       val file = rddToFileName(prefix, suffix, time)
       rdd.saveAsTextFile(file)
     }
-    this.foreach(saveFunc)
+    this.foreachRDD(saveFunc)
   }
 
-  def register() {
-    ssc.registerOutputStream(this)
+  /**
+   * Register this streaming as an output stream. This would ensure that RDDs of this
+   * DStream will be generated.
+   */
+  private[streaming] def register(): DStream[T] = {
+    ssc.graph.addOutputStream(this)
+    this
   }
 }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala
new file mode 100644
index 0000000000000..906a16e508cd8
--- /dev/null
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.dstream
+
+import scala.collection.mutable.HashMap
+import scala.reflect.ClassTag
+import java.io.{ObjectOutputStream, ObjectInputStream, IOException}
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.fs.FileSystem
+import org.apache.spark.Logging
+import org.apache.spark.streaming.Time
+
+private[streaming]
+class DStreamCheckpointData[T: ClassTag] (dstream: DStream[T])
+  extends Serializable with Logging {
+  protected val data = new HashMap[Time, AnyRef]()
+
+  // Mapping of the batch time to the checkpointed RDD file of that time
+  @transient private var timeToCheckpointFile = new HashMap[Time, String]
+  // Mapping of the batch time to the time of the oldest checkpointed RDD
+  // in that batch's checkpoint data
+  @transient private var timeToOldestCheckpointFileTime = new HashMap[Time, Time]
+
+  @transient private var fileSystem : FileSystem = null
+  protected[streaming] def currentCheckpointFiles = data.asInstanceOf[HashMap[Time, String]]
+
+  /**
+   * Updates the checkpoint data of the DStream. This gets called every time
+   * the graph checkpoint is initiated. Default implementation records the
+   * checkpoint files to which the generate RDDs of the DStream has been saved.
+   */
+  def update(time: Time) {
+
+    // Get the checkpointed RDDs from the generated RDDs
+    val checkpointFiles = dstream.generatedRDDs.filter(_._2.getCheckpointFile.isDefined)
+                                       .map(x => (x._1, x._2.getCheckpointFile.get))
+    logDebug("Current checkpoint files:\n" + checkpointFiles.toSeq.mkString("\n"))
+
+    // Add the checkpoint files to the data to be serialized 
+    if (!checkpointFiles.isEmpty) {
+      currentCheckpointFiles.clear()
+      currentCheckpointFiles ++= checkpointFiles
+      // Add the current checkpoint files to the map of all checkpoint files
+      // This will be used to delete old checkpoint files
+      timeToCheckpointFile ++= currentCheckpointFiles
+      // Remember the time of the oldest checkpoint RDD in current state
+      timeToOldestCheckpointFileTime(time) = currentCheckpointFiles.keys.min(Time.ordering)
+    }
+  }
+
+  /**
+   * Cleanup old checkpoint data. This gets called after a checkpoint of `time` has been
+   * written to the checkpoint directory.
+   */
+  def cleanup(time: Time) {
+    // Get the time of the oldest checkpointed RDD that was written as part of the
+    // checkpoint of `time`
+    timeToOldestCheckpointFileTime.remove(time) match {
+      case Some(lastCheckpointFileTime) =>
+        // Find all the checkpointed RDDs (i.e. files) that are older than `lastCheckpointFileTime`
+        // This is because checkpointed RDDs older than this are not going to be needed
+        // even after master fails, as the checkpoint data of `time` does not refer to those files
+        val filesToDelete = timeToCheckpointFile.filter(_._1 < lastCheckpointFileTime)
+        logDebug("Files to delete:\n" + filesToDelete.mkString(","))
+        filesToDelete.foreach {
+          case (time, file) =>
+            try {
+              val path = new Path(file)
+              if (fileSystem == null) {
+                fileSystem = path.getFileSystem(dstream.ssc.sparkContext.hadoopConfiguration)
+              }
+              fileSystem.delete(path, true)
+              timeToCheckpointFile -= time
+              logInfo("Deleted checkpoint file '" + file + "' for time " + time)
+            } catch {
+              case e: Exception =>
+                logWarning("Error deleting old checkpoint file '" + file + "' for time " + time, e)
+                fileSystem = null
+            }
+        }
+      case None =>
+        logDebug("Nothing to delete")
+    }
+  }
+
+  /**
+   * Restore the checkpoint data. This gets called once when the DStream graph
+   * (along with its DStreams) are being restored from a graph checkpoint file.
+   * Default implementation restores the RDDs from their checkpoint files.
+   */
+  def restore() {
+    // Create RDDs from the checkpoint data
+    currentCheckpointFiles.foreach {
+      case(time, file) => {
+        logInfo("Restoring checkpointed RDD for time " + time + " from file '" + file + "'")
+        dstream.generatedRDDs += ((time, dstream.context.sparkContext.checkpointFile[T](file)))
+      }
+    }
+  }
+
+  override def toString() = {
+    "[\n" + currentCheckpointFiles.size + " checkpoint files \n" + currentCheckpointFiles.mkString("\n") + "\n]"
+  }
+
+  @throws(classOf[IOException])
+  private def writeObject(oos: ObjectOutputStream) {
+    logDebug(this.getClass().getSimpleName + ".writeObject used")
+    if (dstream.context.graph != null) {
+      dstream.context.graph.synchronized {
+        if (dstream.context.graph.checkpointInProgress) {
+          oos.defaultWriteObject()
+        } else {
+          val msg = "Object of " + this.getClass.getName + " is being serialized " +
+            " possibly as a part of closure of an RDD operation. This is because " +
+            " the DStream object is being referred to from within the closure. " +
+            " Please rewrite the RDD operation inside this DStream to avoid this. " +
+            " This has been enforced to avoid bloating of Spark tasks " +
+            " with unnecessary objects."
+          throw new java.io.NotSerializableException(msg)
+        }
+      }
+    } else {
+      throw new java.io.NotSerializableException(
+        "Graph is unexpectedly null when DStream is being serialized.")
+    }
+  }
+
+  @throws(classOf[IOException])
+  private def readObject(ois: ObjectInputStream) {
+    logDebug(this.getClass().getSimpleName + ".readObject used")
+    ois.defaultReadObject()
+    timeToOldestCheckpointFileTime = new HashMap[Time, Time]
+    timeToCheckpointFile = new HashMap[Time, String]
+  }
+}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala
index fea0573b77046..8a6051622e2d5 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala
@@ -17,44 +17,45 @@
 
 package org.apache.spark.streaming.dstream
 
-import org.apache.spark.rdd.RDD
-import org.apache.spark.rdd.UnionRDD
-import org.apache.spark.streaming.{DStreamCheckpointData, StreamingContext, Time}
-
+import java.io.{ObjectInputStream, IOException}
+import scala.collection.mutable.{HashSet, HashMap}
+import scala.reflect.ClassTag
 import org.apache.hadoop.fs.{FileSystem, Path, PathFilter}
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.UnionRDD
+import org.apache.spark.streaming.{StreamingContext, Time}
+import org.apache.spark.util.TimeStampedHashMap
 
-import scala.collection.mutable.{HashSet, HashMap}
-import java.io.{ObjectInputStream, IOException}
 
 private[streaming]
-class FileInputDStream[K: ClassManifest, V: ClassManifest, F <: NewInputFormat[K,V] : ClassManifest](
+class FileInputDStream[K: ClassTag, V: ClassTag, F <: NewInputFormat[K,V] : ClassTag](
     @transient ssc_ : StreamingContext,
     directory: String,
     filter: Path => Boolean = FileInputDStream.defaultFilter,
-    newFilesOnly: Boolean = true) 
+    newFilesOnly: Boolean = true)
   extends InputDStream[(K, V)](ssc_) {
 
   protected[streaming] override val checkpointData = new FileInputDStreamCheckpointData
 
-  // Latest file mod time seen till any point of time
-  private val lastModTimeFiles = new HashSet[String]()
-  private var lastModTime = 0L
+  // files found in the last interval
+  private val lastFoundFiles = new HashSet[String]
+
+  // Files with mod time earlier than this is ignored. This is updated every interval
+  // such that in the current interval, files older than any file found in the
+  // previous interval will be ignored. Obviously this time keeps moving forward.
+  private var ignoreTime = if (newFilesOnly) 0L else System.currentTimeMillis()
 
+  // Latest file mod time seen till any point of time
   @transient private var path_ : Path = null
   @transient private var fs_ : FileSystem = null
   @transient private[streaming] var files = new HashMap[Time, Array[String]]
+  @transient private var fileModTimes = new TimeStampedHashMap[String, Long](true)
+  @transient private var lastNewFileFindingTime = 0L
+
+  override def start() { }
 
-  override def start() {
-    if (newFilesOnly) {
-      lastModTime = graph.zeroTime.milliseconds
-    } else {
-      lastModTime = 0
-    }
-    logDebug("LastModTime initialized to " + lastModTime + ", new files only = " + newFilesOnly)
-  }
-  
   override def stop() { }
 
   /**
@@ -67,93 +68,95 @@ class FileInputDStream[K: ClassManifest, V: ClassManifest, F <: NewInputFormat[K
    * the previous call.
    */
   override def compute(validTime: Time): Option[RDD[(K, V)]] = {
-    assert(validTime.milliseconds >= lastModTime, "Trying to get new files for really old time [" + validTime + " < " + lastModTime)
-
-    // Create the filter for selecting new files
-    val newFilter = new PathFilter() {
-      // Latest file mod time seen in this round of fetching files and its corresponding files
-      var latestModTime = 0L
-      val latestModTimeFiles = new HashSet[String]()
+    assert(validTime.milliseconds >= ignoreTime,
+      "Trying to get new files for a really old time [" + validTime + " < " + ignoreTime + "]")
 
-      def accept(path: Path): Boolean = {
-        if (!filter(path)) {  // Reject file if it does not satisfy filter
-          logDebug("Rejected by filter " + path)
-          return false
-        } else {              // Accept file only if
-          val modTime = fs.getFileStatus(path).getModificationTime()
-          logDebug("Mod time for " + path + " is " + modTime)
-          if (modTime < lastModTime) {
-            logDebug("Mod time less than last mod time")
-            return false  // If the file was created before the last time it was called
-          } else if (modTime == lastModTime && lastModTimeFiles.contains(path.toString)) {
-            logDebug("Mod time equal to last mod time, but file considered already")
-            return false  // If the file was created exactly as lastModTime but not reported yet
-          } else if (modTime > validTime.milliseconds) {
-            logDebug("Mod time more than valid time")
-            return false  // If the file was created after the time this function call requires
-          }
-          if (modTime > latestModTime) {
-            latestModTime = modTime
-            latestModTimeFiles.clear()
-            logDebug("Latest mod time updated to " + latestModTime)
-          }
-          latestModTimeFiles += path.toString
-          logDebug("Accepted " + path)
-          return true
-        }        
-      }
-    }
-    logDebug("Finding new files at time " + validTime + " for last mod time = " + lastModTime)
-    val newFiles = fs.listStatus(path, newFilter).map(_.getPath.toString)
+    // Find new files
+    val (newFiles, minNewFileModTime) = findNewFiles(validTime.milliseconds)
     logInfo("New files at time " + validTime + ":\n" + newFiles.mkString("\n"))
-    if (newFiles.length > 0) {
-      // Update the modification time and the files processed for that modification time
-      if (lastModTime != newFilter.latestModTime) {
-        lastModTime = newFilter.latestModTime
-        lastModTimeFiles.clear()
-      }
-      lastModTimeFiles ++= newFilter.latestModTimeFiles
-      logDebug("Last mod time updated to " + lastModTime)
+    if (!newFiles.isEmpty) {
+      lastFoundFiles.clear()
+      lastFoundFiles ++= newFiles
+      ignoreTime = minNewFileModTime
     }
-    files += ((validTime, newFiles))
+    files += ((validTime, newFiles.toArray))
     Some(filesToRDD(newFiles))
   }
 
   /** Clear the old time-to-files mappings along with old RDDs */
-  protected[streaming] override def clearOldMetadata(time: Time) {
-    super.clearOldMetadata(time)
-    val oldFiles = files.filter(_._1 <= (time - rememberDuration))
+  protected[streaming] override def clearMetadata(time: Time) {
+    super.clearMetadata(time)
+    val oldFiles = files.filter(_._1 < (time - rememberDuration))
     files --= oldFiles.keys
     logInfo("Cleared " + oldFiles.size + " old files that were older than " +
       (time - rememberDuration) + ": " + oldFiles.keys.mkString(", "))
     logDebug("Cleared files are:\n" +
       oldFiles.map(p => (p._1, p._2.mkString(", "))).mkString("\n"))
+    // Delete file mod times that weren't accessed in the last round of getting new files
+    fileModTimes.clearOldValues(lastNewFileFindingTime - 1)
+  }
+
+  /**
+   * Find files which have modification timestamp <= current time and return a 3-tuple of
+   * (new files found, latest modification time among them, files with latest modification time)
+   */
+  private def findNewFiles(currentTime: Long): (Seq[String], Long) = {
+    logDebug("Trying to get new files for time " + currentTime)
+    lastNewFileFindingTime = System.currentTimeMillis
+    val filter = new CustomPathFilter(currentTime)
+    val newFiles = fs.listStatus(directoryPath, filter).map(_.getPath.toString)
+    val timeTaken = System.currentTimeMillis - lastNewFileFindingTime
+    logInfo("Finding new files took " + timeTaken + " ms")
+    logDebug("# cached file times = " + fileModTimes.size)
+    if (timeTaken > slideDuration.milliseconds) {
+      logWarning(
+        "Time taken to find new files exceeds the batch size. " +
+          "Consider increasing the batch size or reduceing the number of " +
+          "files in the monitored directory."
+      )
+    }
+    (newFiles, filter.minNewFileModTime)
   }
 
   /** Generate one RDD from an array of files */
-  protected[streaming] def filesToRDD(files: Seq[String]): RDD[(K, V)] = {
-    new UnionRDD(
-      context.sparkContext,
-      files.map(file => context.sparkContext.newAPIHadoopFile[K, V, F](file))
-    )
+  private def filesToRDD(files: Seq[String]): RDD[(K, V)] = {
+    val fileRDDs = files.map(file => context.sparkContext.newAPIHadoopFile[K, V, F](file))
+    files.zip(fileRDDs).foreach { case (file, rdd) => {
+      if (rdd.partitions.size == 0) {
+        logError("File " + file + " has no data in it. Spark Streaming can only ingest " +
+          "files that have been \"moved\" to the directory assigned to the file stream. " +
+          "Refer to the streaming programming guide for more details.")
+      }
+    }}
+    new UnionRDD(context.sparkContext, fileRDDs)
   }
 
-  private def path: Path = {
+  private def directoryPath: Path = {
     if (path_ == null) path_ = new Path(directory)
     path_
   }
 
   private def fs: FileSystem = {
-    if (fs_ == null) fs_ = path.getFileSystem(new Configuration())
+    if (fs_ == null) fs_ = directoryPath.getFileSystem(new Configuration())
     fs_
   }
 
+  private def getFileModTime(path: Path) = {
+    // Get file mod time from cache or fetch it from the file system
+    fileModTimes.getOrElseUpdate(path.toString, fs.getFileStatus(path).getModificationTime())
+  }
+
+  private def reset()  {
+    fs_ = null
+  }
+
   @throws(classOf[IOException])
   private def readObject(ois: ObjectInputStream) {
     logDebug(this.getClass().getSimpleName + ".readObject used")
     ois.defaultReadObject()
     generatedRDDs = new HashMap[Time, RDD[(K,V)]] ()
     files = new HashMap[Time, Array[String]]
+    fileModTimes = new TimeStampedHashMap[String, Long](true)
   }
 
   /**
@@ -165,15 +168,15 @@ class FileInputDStream[K: ClassManifest, V: ClassManifest, F <: NewInputFormat[K
 
     def hadoopFiles = data.asInstanceOf[HashMap[Time, Array[String]]]
 
-    override def update() {
+    override def update(time: Time) {
       hadoopFiles.clear()
       hadoopFiles ++= files
     }
 
-    override def cleanup() { }
+    override def cleanup(time: Time) { }
 
     override def restore() {
-      hadoopFiles.foreach {
+      hadoopFiles.toSeq.sortBy(_._1)(Time.ordering).foreach {
         case (t, f) => {
           // Restore the metadata in both files and generatedRDDs
           logInfo("Restoring files for time " + t + " - " +
@@ -189,11 +192,57 @@ class FileInputDStream[K: ClassManifest, V: ClassManifest, F <: NewInputFormat[K
         hadoopFiles.map(p => (p._1, p._2.mkString(", "))).mkString("\n") + "\n]"
     }
   }
+
+  /**
+   * Custom PathFilter class to find new files that
+   * ... have modification time more than ignore time
+   * ... have not been seen in the last interval
+   * ... have modification time less than maxModTime
+   */
+  private[streaming]
+  class CustomPathFilter(maxModTime: Long) extends PathFilter {
+
+    // Minimum of the mod times of new files found in the current interval
+    var minNewFileModTime = -1L
+
+    def accept(path: Path): Boolean = {
+      try {
+        if (!filter(path)) {  // Reject file if it does not satisfy filter
+          logDebug("Rejected by filter " + path)
+          return false
+        }
+        // Reject file if it was found in the last interval
+        if (lastFoundFiles.contains(path.toString)) {
+          logDebug("Mod time equal to last mod time, but file considered already")
+          return false
+        }
+        val modTime = getFileModTime(path)
+        logDebug("Mod time for " + path + " is " + modTime)
+        if (modTime < ignoreTime) {
+          // Reject file if it was created before the ignore time (or, before last interval)
+          logDebug("Mod time " + modTime + " less than ignore time " + ignoreTime)
+          return false
+        } else if (modTime > maxModTime) {
+          // Reject file if it is too new that considering it may give errors
+          logDebug("Mod time more than ")
+          return false
+        }
+        if (minNewFileModTime < 0 || modTime < minNewFileModTime) {
+          minNewFileModTime = modTime
+        }
+        logDebug("Accepted " + path)
+      } catch {
+        case fnfe: java.io.FileNotFoundException => 
+          logWarning("Error finding new files", fnfe)
+          reset()
+          return false
+      }
+      true
+    }
+  }
 }
 
 private[streaming]
 object FileInputDStream {
   def defaultFilter(path: Path): Boolean = !path.getName().startsWith(".")
 }
-
-
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/FilteredDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FilteredDStream.scala
index 91ee2c1a36fa3..c81534ae584ea 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/FilteredDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FilteredDStream.scala
@@ -17,11 +17,12 @@
 
 package org.apache.spark.streaming.dstream
 
-import org.apache.spark.streaming.{Duration, DStream, Time}
+import org.apache.spark.streaming.{Duration, Time}
 import org.apache.spark.rdd.RDD
+import scala.reflect.ClassTag
 
 private[streaming]
-class FilteredDStream[T: ClassManifest](
+class FilteredDStream[T: ClassTag](
     parent: DStream[T],
     filterFunc: T => Boolean
   ) extends DStream[T](parent.ssc) {
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/FlatMapValuedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FlatMapValuedDStream.scala
index ca7d7ca49effd..658623455498c 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/FlatMapValuedDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FlatMapValuedDStream.scala
@@ -17,12 +17,13 @@
 
 package org.apache.spark.streaming.dstream
 
-import org.apache.spark.streaming.{Duration, DStream, Time}
+import org.apache.spark.streaming.{Duration, Time}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.SparkContext._
+import scala.reflect.ClassTag
 
 private[streaming]
-class FlatMapValuedDStream[K: ClassManifest, V: ClassManifest, U: ClassManifest](
+class FlatMapValuedDStream[K: ClassTag, V: ClassTag, U: ClassTag](
     parent: DStream[(K, V)],
     flatMapValueFunc: V => TraversableOnce[U]
   ) extends DStream[(K, U)](parent.ssc) {
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/FlatMappedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FlatMappedDStream.scala
index b37966f9a79bd..c7bb2833eabb8 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/FlatMappedDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FlatMappedDStream.scala
@@ -17,11 +17,12 @@
 
 package org.apache.spark.streaming.dstream
 
-import org.apache.spark.streaming.{Duration, DStream, Time}
+import org.apache.spark.streaming.{Duration, Time}
 import org.apache.spark.rdd.RDD
+import scala.reflect.ClassTag
 
 private[streaming]
-class FlatMappedDStream[T: ClassManifest, U: ClassManifest](
+class FlatMappedDStream[T: ClassTag, U: ClassTag](
     parent: DStream[T],
     flatMapFunc: T => Traversable[U]
   ) extends DStream[U](parent.ssc) {
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ForEachDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ForEachDStream.scala
index e21bac460255c..905bc723f69a9 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ForEachDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ForEachDStream.scala
@@ -18,10 +18,12 @@
 package org.apache.spark.streaming.dstream
 
 import org.apache.spark.rdd.RDD
-import org.apache.spark.streaming.{Duration, DStream, Job, Time}
+import org.apache.spark.streaming.{Duration, Time}
+import org.apache.spark.streaming.scheduler.Job
+import scala.reflect.ClassTag
 
 private[streaming]
-class ForEachDStream[T: ClassManifest] (
+class ForEachDStream[T: ClassTag] (
     parent: DStream[T],
     foreachFunc: (RDD[T], Time) => Unit
   ) extends DStream[Unit](parent.ssc) {
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/GlommedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/GlommedDStream.scala
index 4294b07d910f1..a9bb51f054048 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/GlommedDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/GlommedDStream.scala
@@ -17,11 +17,12 @@
 
 package org.apache.spark.streaming.dstream
 
-import org.apache.spark.streaming.{Duration, DStream, Time}
+import org.apache.spark.streaming.{Duration, Time}
 import org.apache.spark.rdd.RDD
+import scala.reflect.ClassTag
 
 private[streaming]
-class GlommedDStream[T: ClassManifest](parent: DStream[T])
+class GlommedDStream[T: ClassTag](parent: DStream[T])
   extends DStream[Array[T]](parent.ssc) {
 
   override def dependencies = List(parent)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala
index 674b27118caef..27303390d9e03 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala
@@ -17,23 +17,29 @@
 
 package org.apache.spark.streaming.dstream
 
-import org.apache.spark.streaming.{Time, Duration, StreamingContext, DStream}
+import org.apache.spark.streaming.{Time, Duration, StreamingContext}
+
+import scala.reflect.ClassTag
 
 /**
- * This is the abstract base class for all input streams. This class provides to methods
- * start() and stop() which called by the scheduler to start and stop receiving data/
- * Input streams that can generated RDDs from new data just by running a service on
- * the driver node (that is, without running a receiver onworker nodes) can be
- * implemented by directly subclassing this InputDStream. For example,
- * FileInputDStream, a subclass of InputDStream, monitors a HDFS directory for
- * new files and generates RDDs on the new files. For implementing input streams
- * that requires running a receiver on the worker nodes, use NetworkInputDStream
- * as the parent class.
+ * This is the abstract base class for all input streams. This class provides methods
+ * start() and stop() which is called by Spark Streaming system to start and stop receiving data.
+ * Input streams that can generate RDDs from new data by running a service/thread only on
+ * the driver node (that is, without running a receiver on worker nodes), can be
+ * implemented by directly inheriting this InputDStream. For example,
+ * FileInputDStream, a subclass of InputDStream, monitors a HDFS directory from the driver for
+ * new files and generates RDDs with the new files. For implementing input streams
+ * that requires running a receiver on the worker nodes, use
+ * [[org.apache.spark.streaming.dstream.NetworkInputDStream]] as the parent class.
+ *
+ * @param ssc_ Streaming context that will execute this input stream
  */
-abstract class InputDStream[T: ClassManifest] (@transient ssc_ : StreamingContext)
+abstract class InputDStream[T: ClassTag] (@transient ssc_ : StreamingContext)
   extends DStream[T](ssc_) {
 
-  var lastValidTime: Time = null
+  private[streaming] var lastValidTime: Time = null
+
+  ssc.graph.addInputStream(this)
 
   /**
    * Checks whether the 'time' is valid wrt slideDuration for generating RDD.
@@ -41,7 +47,7 @@ abstract class InputDStream[T: ClassManifest] (@transient ssc_ : StreamingContex
    * This ensures that InputDStream.compute() is called strictly on increasing
    * times.
    */
-  override protected def isTimeValid(time: Time): Boolean = {
+  override private[streaming] def isTimeValid(time: Time): Boolean = {
     if (!super.isTimeValid(time)) {
       false // Time not valid
     } else {
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapPartitionedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapPartitionedDStream.scala
index 5329601a6f949..3d8ee29df1e82 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapPartitionedDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapPartitionedDStream.scala
@@ -17,11 +17,12 @@
 
 package org.apache.spark.streaming.dstream
 
-import org.apache.spark.streaming.{Duration, DStream, Time}
+import org.apache.spark.streaming.{Duration, Time}
 import org.apache.spark.rdd.RDD
+import scala.reflect.ClassTag
 
 private[streaming]
-class MapPartitionedDStream[T: ClassManifest, U: ClassManifest](
+class MapPartitionedDStream[T: ClassTag, U: ClassTag](
     parent: DStream[T],
     mapPartFunc: Iterator[T] => Iterator[U],
     preservePartitioning: Boolean
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapValuedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapValuedDStream.scala
index 8290df90a2894..7aea1f945d9db 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapValuedDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapValuedDStream.scala
@@ -17,12 +17,13 @@
 
 package org.apache.spark.streaming.dstream
 
-import org.apache.spark.streaming.{Duration, DStream, Time}
+import org.apache.spark.streaming.{Duration, Time}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.SparkContext._
+import scala.reflect.ClassTag
 
 private[streaming]
-class MapValuedDStream[K: ClassManifest, V: ClassManifest, U: ClassManifest](
+class MapValuedDStream[K: ClassTag, V: ClassTag, U: ClassTag](
     parent: DStream[(K, V)],
     mapValueFunc: V => U
   ) extends DStream[(K, U)](parent.ssc) {
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/MappedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/MappedDStream.scala
index b1682afea3924..02704a8d1c2e0 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/MappedDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/MappedDStream.scala
@@ -17,11 +17,12 @@
 
 package org.apache.spark.streaming.dstream
 
-import org.apache.spark.streaming.{Duration, DStream, Time}
+import org.apache.spark.streaming.{Duration, Time}
 import org.apache.spark.rdd.RDD
+import scala.reflect.ClassTag
 
 private[streaming]
-class MappedDStream[T: ClassManifest, U: ClassManifest] (
+class MappedDStream[T: ClassTag, U: ClassTag] (
     parent: DStream[T],
     mapFunc: T => U
   ) extends DStream[U](parent.ssc) {
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/NetworkInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/NetworkInputDStream.scala
index a82862c8029b2..ce153f065d35a 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/NetworkInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/NetworkInputDStream.scala
@@ -21,28 +21,31 @@ import java.util.concurrent.ArrayBlockingQueue
 import java.nio.ByteBuffer
 
 import scala.collection.mutable.ArrayBuffer
+import scala.concurrent.Await
+import scala.concurrent.duration._
+import scala.reflect.ClassTag
 
 import akka.actor.{Props, Actor}
 import akka.pattern.ask
-import akka.dispatch.Await
-import akka.util.duration._
 
 import org.apache.spark.streaming.util.{RecurringTimer, SystemClock}
 import org.apache.spark.streaming._
 import org.apache.spark.{Logging, SparkEnv}
 import org.apache.spark.rdd.{RDD, BlockRDD}
 import org.apache.spark.storage.{BlockId, StorageLevel, StreamBlockId}
+import org.apache.spark.streaming.scheduler.{DeregisterReceiver, AddBlocks, RegisterReceiver}
 
 /**
- * Abstract class for defining any InputDStream that has to start a receiver on worker
- * nodes to receive external data. Specific implementations of NetworkInputDStream must
+ * Abstract class for defining any [[org.apache.spark.streaming.dstream.InputDStream]]
+ * that has to start a receiver on worker nodes to receive external data.
+ * Specific implementations of NetworkInputDStream must
  * define the getReceiver() function that gets the receiver object of type
- * [[org.apache.spark.streaming.dstream.NetworkReceiver]] that will be sent to the workers to receive
- * data.
+ * [[org.apache.spark.streaming.dstream.NetworkReceiver]] that will be sent
+ * to the workers to receive data.
  * @param ssc_ Streaming context that will execute this input stream
  * @tparam T Class type of the object of this stream
  */
-abstract class NetworkInputDStream[T: ClassManifest](@transient ssc_ : StreamingContext)
+abstract class NetworkInputDStream[T: ClassTag](@transient ssc_ : StreamingContext)
   extends InputDStream[T](ssc_) {
 
   // This is an unique identifier that is used to match the network receiver with the
@@ -66,7 +69,7 @@ abstract class NetworkInputDStream[T: ClassManifest](@transient ssc_ : Streaming
     // then this returns an empty RDD. This may happen when recovering from a
     // master failure
     if (validTime >= graph.startTime) {
-      val blockIds = ssc.networkInputTracker.getBlockIds(id, validTime)
+      val blockIds = ssc.scheduler.networkInputTracker.getBlockIds(id, validTime)
       Some(new BlockRDD[T](ssc.sc, blockIds))
     } else {
       Some(new BlockRDD[T](ssc.sc, Array[BlockId]()))
@@ -84,9 +87,7 @@ private[streaming] case class ReportError(msg: String) extends NetworkReceiverMe
  * Abstract class of a receiver that can be run on worker nodes to receive external data. See
  * [[org.apache.spark.streaming.dstream.NetworkInputDStream]] for an explanation.
  */
-abstract class NetworkReceiver[T: ClassManifest]() extends Serializable with Logging {
-
-  initLogging()
+abstract class NetworkReceiver[T: ClassTag]() extends Serializable with Logging {
 
   lazy protected val env = SparkEnv.get
 
@@ -174,10 +175,10 @@ abstract class NetworkReceiver[T: ClassManifest]() extends Serializable with Log
   /** A helper actor that communicates with the NetworkInputTracker */
   private class NetworkReceiverActor extends Actor {
     logInfo("Attempting to register with tracker")
-    val ip = System.getProperty("spark.driver.host", "localhost")
-    val port = System.getProperty("spark.driver.port", "7077").toInt
-    val url = "akka://spark@%s:%s/user/NetworkInputTracker".format(ip, port)
-    val tracker = env.actorSystem.actorFor(url)
+    val ip = env.conf.get("spark.driver.host", "localhost")
+    val port = env.conf.getInt("spark.driver.port", 7077)
+    val url = "akka.tcp://spark@%s:%s/user/NetworkInputTracker".format(ip, port)
+    val tracker = env.actorSystem.actorSelection(url)
     val timeout = 5.seconds
 
     override def preStart() {
@@ -212,7 +213,7 @@ abstract class NetworkReceiver[T: ClassManifest]() extends Serializable with Log
     case class Block(id: BlockId, buffer: ArrayBuffer[T], metadata: Any = null)
 
     val clock = new SystemClock()
-    val blockInterval = System.getProperty("spark.streaming.blockInterval", "200").toLong
+    val blockInterval = env.conf.getLong("spark.streaming.blockInterval", 200)
     val blockIntervalTimer = new RecurringTimer(clock, blockInterval, updateCurrentBuffer)
     val blockStorageLevel = storageLevel
     val blocksForPushing = new ArrayBlockingQueue[Block](1000)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/PairDStreamFunctions.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
similarity index 90%
rename from streaming/src/main/scala/org/apache/spark/streaming/PairDStreamFunctions.scala
rename to streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
index 8c12fd11efcaf..fb9df2f48eae3 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/PairDStreamFunctions.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
@@ -15,27 +15,29 @@
  * limitations under the License.
  */
 
-package org.apache.spark.streaming
+package org.apache.spark.streaming.dstream
 
 import org.apache.spark.streaming.StreamingContext._
-import org.apache.spark.streaming.dstream.{ReducedWindowedDStream, StateDStream}
-import org.apache.spark.streaming.dstream.{ShuffledDStream}
-import org.apache.spark.streaming.dstream.{MapValuedDStream, FlatMapValuedDStream}
 
 import org.apache.spark.{Partitioner, HashPartitioner}
 import org.apache.spark.SparkContext._
-import org.apache.spark.rdd.{Manifests, RDD, PairRDDFunctions}
-import org.apache.spark.storage.StorageLevel
+import org.apache.spark.rdd.RDD
 
 import scala.collection.mutable.ArrayBuffer
+import scala.reflect.ClassTag
 
-import org.apache.hadoop.mapred.{JobConf, OutputFormat}
+import org.apache.hadoop.mapred.JobConf
 import org.apache.hadoop.mapreduce.{OutputFormat => NewOutputFormat}
 import org.apache.hadoop.mapred.OutputFormat
-import org.apache.hadoop.security.UserGroupInformation
 import org.apache.hadoop.conf.Configuration
+import org.apache.spark.streaming.{Time, Duration}
 
-class PairDStreamFunctions[K: ClassManifest, V: ClassManifest](self: DStream[(K,V)])
+/**
+ * Extra functions available on DStream of (key, value) pairs through an implicit conversion.
+ * Import `org.apache.spark.streaming.StreamingContext._` at the top of your program to use
+ * these functions.
+ */
+class PairDStreamFunctions[K: ClassTag, V: ClassTag](self: DStream[(K,V)])
 extends Serializable {
 
   private[streaming] def ssc = self.ssc
@@ -103,14 +105,15 @@ extends Serializable {
   /**
    * Combine elements of each key in DStream's RDDs using custom functions. This is similar to the
    * combineByKey for RDDs. Please refer to combineByKey in
-   * [[org.apache.spark.rdd.PairRDDFunctions]] for more information.
+   * org.apache.spark.rdd.PairRDDFunctions in the Spark core documentation for more information.
    */
-  def combineByKey[C: ClassManifest](
+  def combineByKey[C: ClassTag](
     createCombiner: V => C,
     mergeValue: (C, V) => C,
     mergeCombiner: (C, C) => C,
-    partitioner: Partitioner) : DStream[(K, C)] = {
-    new ShuffledDStream[K, V, C](self, createCombiner, mergeValue, mergeCombiner, partitioner)
+    partitioner: Partitioner,
+    mapSideCombine: Boolean = true): DStream[(K, C)] = {
+    new ShuffledDStream[K, V, C](self, createCombiner, mergeValue, mergeCombiner, partitioner, mapSideCombine)
   }
 
   /**
@@ -174,7 +177,13 @@ extends Serializable {
       slideDuration: Duration,
       partitioner: Partitioner
     ): DStream[(K, Seq[V])] = {
-    self.window(windowDuration, slideDuration).groupByKey(partitioner)
+    val createCombiner = (v: Seq[V]) => new ArrayBuffer[V] ++= v
+    val mergeValue = (buf: ArrayBuffer[V], v: Seq[V]) => buf ++= v
+    val mergeCombiner = (buf1: ArrayBuffer[V], buf2: ArrayBuffer[V]) => buf1 ++= buf2
+    self.groupByKey(partitioner)
+        .window(windowDuration, slideDuration)
+        .combineByKey[ArrayBuffer[V]](createCombiner, mergeValue, mergeCombiner, partitioner)
+        .asInstanceOf[DStream[(K, Seq[V])]]
   }
 
   /**
@@ -205,7 +214,7 @@ extends Serializable {
    *                       DStream's batching interval
    */
   def reduceByKeyAndWindow(
-      reduceFunc: (V, V) => V, 
+      reduceFunc: (V, V) => V,
       windowDuration: Duration,
       slideDuration: Duration
     ): DStream[(K, V)] = {
@@ -336,7 +345,7 @@ extends Serializable {
    *                   corresponding state key-value pair will be eliminated.
    * @tparam S State type
    */
-  def updateStateByKey[S: ClassManifest](
+  def updateStateByKey[S: ClassTag](
       updateFunc: (Seq[V], Option[S]) => Option[S]
     ): DStream[(K, S)] = {
     updateStateByKey(updateFunc, defaultPartitioner())
@@ -351,7 +360,7 @@ extends Serializable {
    * @param numPartitions Number of partitions of each RDD in the new DStream.
    * @tparam S State type
    */
-  def updateStateByKey[S: ClassManifest](
+  def updateStateByKey[S: ClassTag](
       updateFunc: (Seq[V], Option[S]) => Option[S],
       numPartitions: Int
     ): DStream[(K, S)] = {
@@ -367,7 +376,7 @@ extends Serializable {
    * @param partitioner Partitioner for controlling the partitioning of each RDD in the new DStream.
    * @tparam S State type
    */
-  def updateStateByKey[S: ClassManifest](
+  def updateStateByKey[S: ClassTag](
       updateFunc: (Seq[V], Option[S]) => Option[S],
       partitioner: Partitioner
     ): DStream[(K, S)] = {
@@ -390,7 +399,7 @@ extends Serializable {
    * @param rememberPartitioner Whether to remember the paritioner object in the generated RDDs.
    * @tparam S State type
    */
-  def updateStateByKey[S: ClassManifest](
+  def updateStateByKey[S: ClassTag](
       updateFunc: (Iterator[(K, Seq[V], Option[S])]) => Iterator[(K, S)],
       partitioner: Partitioner,
       rememberPartitioner: Boolean
@@ -402,7 +411,7 @@ extends Serializable {
    * Return a new DStream by applying a map function to the value of each key-value pairs in
    * 'this' DStream without changing the key.
    */
-  def mapValues[U: ClassManifest](mapValuesFunc: V => U): DStream[(K, U)] = {
+  def mapValues[U: ClassTag](mapValuesFunc: V => U): DStream[(K, U)] = {
     new MapValuedDStream[K, V, U](self, mapValuesFunc)
   }
 
@@ -410,7 +419,7 @@ extends Serializable {
    * Return a new DStream by applying a flatmap function to the value of each key-value pairs in
    * 'this' DStream without changing the key.
    */
-  def flatMapValues[U: ClassManifest](
+  def flatMapValues[U: ClassTag](
       flatMapValuesFunc: V => TraversableOnce[U]
     ): DStream[(K, U)] = {
     new FlatMapValuedDStream[K, V, U](self, flatMapValuesFunc)
@@ -421,7 +430,7 @@ extends Serializable {
    * Hash partitioning is used to generate the RDDs with Spark's default number
    * of partitions.
    */
-  def cogroup[W: ClassManifest](other: DStream[(K, W)]): DStream[(K, (Seq[V], Seq[W]))] = {
+  def cogroup[W: ClassTag](other: DStream[(K, W)]): DStream[(K, (Seq[V], Seq[W]))] = {
     cogroup(other, defaultPartitioner())
   }
 
@@ -429,7 +438,7 @@ extends Serializable {
    * Return a new DStream by applying 'cogroup' between RDDs of `this` DStream and `other` DStream.
    * Hash partitioning is used to generate the RDDs with `numPartitions` partitions.
    */
-  def cogroup[W: ClassManifest](other: DStream[(K, W)], numPartitions: Int): DStream[(K, (Seq[V], Seq[W]))] = {
+  def cogroup[W: ClassTag](other: DStream[(K, W)], numPartitions: Int): DStream[(K, (Seq[V], Seq[W]))] = {
     cogroup(other, defaultPartitioner(numPartitions))
   }
 
@@ -437,7 +446,7 @@ extends Serializable {
    * Return a new DStream by applying 'cogroup' between RDDs of `this` DStream and `other` DStream.
    * The supplied [[org.apache.spark.Partitioner]] is used to partition the generated RDDs.
    */
-  def cogroup[W: ClassManifest](
+  def cogroup[W: ClassTag](
       other: DStream[(K, W)],
       partitioner: Partitioner
     ): DStream[(K, (Seq[V], Seq[W]))] = {
@@ -451,7 +460,7 @@ extends Serializable {
    * Return a new DStream by applying 'join' between RDDs of `this` DStream and `other` DStream.
    * Hash partitioning is used to generate the RDDs with Spark's default number of partitions.
    */
-  def join[W: ClassManifest](other: DStream[(K, W)]): DStream[(K, (V, W))] = {
+  def join[W: ClassTag](other: DStream[(K, W)]): DStream[(K, (V, W))] = {
     join[W](other, defaultPartitioner())
   }
 
@@ -459,7 +468,7 @@ extends Serializable {
    * Return a new DStream by applying 'join' between RDDs of `this` DStream and `other` DStream.
    * Hash partitioning is used to generate the RDDs with `numPartitions` partitions.
    */
-  def join[W: ClassManifest](other: DStream[(K, W)], numPartitions: Int): DStream[(K, (V, W))] = {
+  def join[W: ClassTag](other: DStream[(K, W)], numPartitions: Int): DStream[(K, (V, W))] = {
     join[W](other, defaultPartitioner(numPartitions))
   }
 
@@ -467,7 +476,7 @@ extends Serializable {
    * Return a new DStream by applying 'join' between RDDs of `this` DStream and `other` DStream.
    * The supplied [[org.apache.spark.Partitioner]] is used to control the partitioning of each RDD.
    */
-  def join[W: ClassManifest](
+  def join[W: ClassTag](
       other: DStream[(K, W)],
       partitioner: Partitioner
     ): DStream[(K, (V, W))] = {
@@ -482,7 +491,7 @@ extends Serializable {
    * `other` DStream. Hash partitioning is used to generate the RDDs with Spark's default
    * number of partitions.
    */
-  def leftOuterJoin[W: ClassManifest](other: DStream[(K, W)]): DStream[(K, (V, Option[W]))] = {
+  def leftOuterJoin[W: ClassTag](other: DStream[(K, W)]): DStream[(K, (V, Option[W]))] = {
     leftOuterJoin[W](other, defaultPartitioner())
   }
 
@@ -491,7 +500,7 @@ extends Serializable {
    * `other` DStream. Hash partitioning is used to generate the RDDs with `numPartitions`
    * partitions.
    */
-  def leftOuterJoin[W: ClassManifest](
+  def leftOuterJoin[W: ClassTag](
       other: DStream[(K, W)],
       numPartitions: Int
     ): DStream[(K, (V, Option[W]))] = {
@@ -503,7 +512,7 @@ extends Serializable {
    * `other` DStream. The supplied [[org.apache.spark.Partitioner]] is used to control
    * the partitioning of each RDD.
    */
-  def leftOuterJoin[W: ClassManifest](
+  def leftOuterJoin[W: ClassTag](
       other: DStream[(K, W)],
       partitioner: Partitioner
     ): DStream[(K, (V, Option[W]))] = {
@@ -518,7 +527,7 @@ extends Serializable {
    * `other` DStream. Hash partitioning is used to generate the RDDs with Spark's default
    * number of partitions.
    */
-  def rightOuterJoin[W: ClassManifest](other: DStream[(K, W)]): DStream[(K, (Option[V], W))] = {
+  def rightOuterJoin[W: ClassTag](other: DStream[(K, W)]): DStream[(K, (Option[V], W))] = {
     rightOuterJoin[W](other, defaultPartitioner())
   }
 
@@ -527,7 +536,7 @@ extends Serializable {
    * `other` DStream. Hash partitioning is used to generate the RDDs with `numPartitions`
    * partitions.
    */
-  def rightOuterJoin[W: ClassManifest](
+  def rightOuterJoin[W: ClassTag](
       other: DStream[(K, W)],
       numPartitions: Int
     ): DStream[(K, (Option[V], W))] = {
@@ -539,7 +548,7 @@ extends Serializable {
    * `other` DStream. The supplied [[org.apache.spark.Partitioner]] is used to control
    * the partitioning of each RDD.
    */
-  def rightOuterJoin[W: ClassManifest](
+  def rightOuterJoin[W: ClassTag](
       other: DStream[(K, W)],
       partitioner: Partitioner
     ): DStream[(K, (Option[V], W))] = {
@@ -556,8 +565,8 @@ extends Serializable {
   def saveAsHadoopFiles[F <: OutputFormat[K, V]](
       prefix: String,
       suffix: String
-    )(implicit fm: ClassManifest[F]) {
-    saveAsHadoopFiles(prefix, suffix, getKeyClass, getValueClass, fm.erasure.asInstanceOf[Class[F]])
+    )(implicit fm: ClassTag[F]) {
+    saveAsHadoopFiles(prefix, suffix, getKeyClass, getValueClass, fm.runtimeClass.asInstanceOf[Class[F]])
   }
 
   /**
@@ -576,7 +585,7 @@ extends Serializable {
       val file = rddToFileName(prefix, suffix, time)
       rdd.saveAsHadoopFile(file, keyClass, valueClass, outputFormatClass, conf)
     }
-    self.foreach(saveFunc)
+    self.foreachRDD(saveFunc)
   }
 
   /**
@@ -586,8 +595,8 @@ extends Serializable {
   def saveAsNewAPIHadoopFiles[F <: NewOutputFormat[K, V]](
       prefix: String,
       suffix: String
-    )(implicit fm: ClassManifest[F])  {
-    saveAsNewAPIHadoopFiles(prefix, suffix, getKeyClass, getValueClass, fm.erasure.asInstanceOf[Class[F]])
+    )(implicit fm: ClassTag[F])  {
+    saveAsNewAPIHadoopFiles(prefix, suffix, getKeyClass, getValueClass, fm.runtimeClass.asInstanceOf[Class[F]])
   }
 
   /**
@@ -606,12 +615,10 @@ extends Serializable {
       val file = rddToFileName(prefix, suffix, time)
       rdd.saveAsNewAPIHadoopFile(file, keyClass, valueClass, outputFormatClass, conf)
     }
-    self.foreach(saveFunc)
+    self.foreachRDD(saveFunc)
   }
 
-  private def getKeyClass() = implicitly[ClassManifest[K]].erasure
+  private def getKeyClass() = implicitly[ClassTag[K]].runtimeClass
 
-  private def getValueClass() = implicitly[ClassManifest[V]].erasure
+  private def getValueClass() = implicitly[ClassTag[V]].runtimeClass
 }
-
-
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PluggableInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PluggableInputDStream.scala
index 15782f5c11905..6f9477020a459 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PluggableInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PluggableInputDStream.scala
@@ -18,9 +18,10 @@
 package org.apache.spark.streaming.dstream
 
 import org.apache.spark.streaming.StreamingContext
+import scala.reflect.ClassTag
 
 private[streaming]
-class PluggableInputDStream[T: ClassManifest](
+class PluggableInputDStream[T: ClassTag](
   @transient ssc_ : StreamingContext,
   receiver: NetworkReceiver[T]) extends NetworkInputDStream[T](ssc_) {
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/QueueInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/QueueInputDStream.scala
index 7d9f3521b1ce7..97325f8ea3117 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/QueueInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/QueueInputDStream.scala
@@ -19,13 +19,13 @@ package org.apache.spark.streaming.dstream
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.rdd.UnionRDD
-
 import scala.collection.mutable.Queue
 import scala.collection.mutable.ArrayBuffer
 import org.apache.spark.streaming.{Time, StreamingContext}
+import scala.reflect.ClassTag
 
 private[streaming]
-class QueueInputDStream[T: ClassManifest](
+class QueueInputDStream[T: ClassTag](
     @transient ssc: StreamingContext,
     val queue: Queue[RDD[T]],
     oneAtATime: Boolean,
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/RawInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/RawInputDStream.scala
index 10ed4ef78d12c..dea0f26f908fb 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/RawInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/RawInputDStream.scala
@@ -21,6 +21,8 @@ import org.apache.spark.Logging
 import org.apache.spark.storage.{StorageLevel, StreamBlockId}
 import org.apache.spark.streaming.StreamingContext
 
+import scala.reflect.ClassTag
+
 import java.net.InetSocketAddress
 import java.nio.ByteBuffer
 import java.nio.channels.{ReadableByteChannel, SocketChannel}
@@ -35,7 +37,7 @@ import java.util.concurrent.ArrayBlockingQueue
  * in the format that the system is configured with.
  */
 private[streaming]
-class RawInputDStream[T: ClassManifest](
+class RawInputDStream[T: ClassTag](
     @transient ssc_ : StreamingContext,
     host: String,
     port: Int,
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReducedWindowedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReducedWindowedDStream.scala
index b88a4db9596be..7a6b1ea35eb13 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReducedWindowedDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReducedWindowedDStream.scala
@@ -26,10 +26,13 @@ import org.apache.spark.SparkContext._
 import org.apache.spark.storage.StorageLevel
 
 import scala.collection.mutable.ArrayBuffer
-import org.apache.spark.streaming.{Duration, Interval, Time, DStream}
+import org.apache.spark.streaming.{Duration, Interval, Time}
+
+import scala.collection.mutable.ArrayBuffer
+import scala.reflect.ClassTag
 
 private[streaming]
-class ReducedWindowedDStream[K: ClassManifest, V: ClassManifest](
+class ReducedWindowedDStream[K: ClassTag, V: ClassTag](
     parent: DStream[(K, V)],
     reduceFunc: (V, V) => V,
     invReduceFunc: (V, V) => V,
@@ -49,7 +52,7 @@ class ReducedWindowedDStream[K: ClassManifest, V: ClassManifest](
       "must be multiple of the slide duration of parent DStream (" + parent.slideDuration + ")"
   )
 
-  // Reduce each batch of data using reduceByKey which will be further reduced by window 
+  // Reduce each batch of data using reduceByKey which will be further reduced by window
   // by ReducedWindowedDStream
   val reducedStream = parent.reduceByKey(reduceFunc, partitioner)
 
@@ -170,5 +173,3 @@ class ReducedWindowedDStream[K: ClassManifest, V: ClassManifest](
     }
   }
 }
-
-
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ShuffledDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ShuffledDStream.scala
index a95e66d7615ce..880a89bc36895 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ShuffledDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ShuffledDStream.scala
@@ -20,16 +20,18 @@ package org.apache.spark.streaming.dstream
 import org.apache.spark.Partitioner
 import org.apache.spark.rdd.RDD
 import org.apache.spark.SparkContext._
-import org.apache.spark.streaming.{Duration, DStream, Time}
+import org.apache.spark.streaming.{Duration, Time}
+import scala.reflect.ClassTag
 
 private[streaming]
-class ShuffledDStream[K: ClassManifest, V: ClassManifest, C: ClassManifest](
+class ShuffledDStream[K: ClassTag, V: ClassTag, C: ClassTag](
     parent: DStream[(K,V)],
     createCombiner: V => C,
     mergeValue: (C, V) => C,
     mergeCombiner: (C, C) => C,
-    partitioner: Partitioner
-  ) extends DStream [(K,C)] (parent.ssc) {
+    partitioner: Partitioner,
+    mapSideCombine: Boolean = true
+  ) extends DStream[(K,C)] (parent.ssc) {
 
   override def dependencies = List(parent)
 
@@ -37,8 +39,8 @@ class ShuffledDStream[K: ClassManifest, V: ClassManifest, C: ClassManifest](
 
   override def compute(validTime: Time): Option[RDD[(K,C)]] = {
     parent.getOrCompute(validTime) match {
-      case Some(rdd) =>
-        Some(rdd.combineByKey[C](createCombiner, mergeValue, mergeCombiner, partitioner))
+      case Some(rdd) => Some(rdd.combineByKey[C](
+          createCombiner, mergeValue, mergeCombiner, partitioner, mapSideCombine))
       case None => None
     }
   }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala
index e2539c7396138..2cdd13f205313 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala
@@ -21,11 +21,13 @@ import org.apache.spark.streaming.StreamingContext
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.NextIterator
 
+import scala.reflect.ClassTag
+
 import java.io._
 import java.net.Socket
 
 private[streaming]
-class SocketInputDStream[T: ClassManifest](
+class SocketInputDStream[T: ClassTag](
     @transient ssc_ : StreamingContext,
     host: String,
     port: Int,
@@ -39,7 +41,7 @@ class SocketInputDStream[T: ClassManifest](
 }
 
 private[streaming]
-class SocketReceiver[T: ClassManifest](
+class SocketReceiver[T: ClassTag](
     host: String,
     port: Int,
     bytesToObjects: InputStream => Iterator[T],
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala
index 362a6bf4cc429..9d8889b655356 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala
@@ -21,10 +21,12 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.Partitioner
 import org.apache.spark.SparkContext._
 import org.apache.spark.storage.StorageLevel
-import org.apache.spark.streaming.{Duration, Time, DStream}
+import org.apache.spark.streaming.{Duration, Time}
+
+import scala.reflect.ClassTag
 
 private[streaming]
-class StateDStream[K: ClassManifest, V: ClassManifest, S: ClassManifest](
+class StateDStream[K: ClassTag, V: ClassTag, S: ClassTag](
     parent: DStream[(K, V)],
     updateFunc: (Iterator[(K, Seq[V], Option[S])]) => Iterator[(K, S)],
     partitioner: Partitioner,
@@ -63,7 +65,7 @@ class StateDStream[K: ClassManifest, V: ClassManifest, S: ClassManifest](
             val cogroupedRDD = parentRDD.cogroup(prevStateRDD, partitioner)
             val stateRDD = cogroupedRDD.mapPartitions(finalFunc, preservePartitioning)
             //logDebug("Generating state RDD for time " + validTime)
-            return Some(stateRDD)
+            Some(stateRDD)
           }
           case None => {    // If parent RDD does not exist
 
@@ -74,7 +76,7 @@ class StateDStream[K: ClassManifest, V: ClassManifest, S: ClassManifest](
               updateFuncLocal(i)
             }
             val stateRDD = prevStateRDD.mapPartitions(finalFunc, preservePartitioning)
-            return Some(stateRDD)
+            Some(stateRDD)
           }
         }
       }
@@ -96,11 +98,11 @@ class StateDStream[K: ClassManifest, V: ClassManifest, S: ClassManifest](
             val groupedRDD = parentRDD.groupByKey(partitioner)
             val sessionRDD = groupedRDD.mapPartitions(finalFunc, preservePartitioning)
             //logDebug("Generating state RDD for time " + validTime + " (first)")
-            return Some(sessionRDD)
+            Some(sessionRDD)
           }
           case None => { // If parent RDD does not exist, then nothing to do!
             //logDebug("Not generating state RDD (no previous state, no parent)")
-            return None
+            None
           }
         }
       }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/TransformedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/TransformedDStream.scala
index 71bcb2b390582..7cd4554282ca1 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/TransformedDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/TransformedDStream.scala
@@ -18,10 +18,11 @@
 package org.apache.spark.streaming.dstream
 
 import org.apache.spark.rdd.RDD
-import org.apache.spark.streaming.{Duration, DStream, Time}
+import org.apache.spark.streaming.{Duration, Time}
+import scala.reflect.ClassTag
 
 private[streaming]
-class TransformedDStream[U: ClassManifest] (
+class TransformedDStream[U: ClassTag] (
     parents: Seq[DStream[_]],
     transformFunc: (Seq[RDD[_]], Time) => RDD[U]
   ) extends DStream[U](parents.head.ssc) {
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/UnionDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/UnionDStream.scala
index c696bb70a8fb6..4ecba03ab5d2f 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/UnionDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/UnionDStream.scala
@@ -17,13 +17,15 @@
 
 package org.apache.spark.streaming.dstream
 
-import org.apache.spark.streaming.{Duration, DStream, Time}
+import org.apache.spark.streaming.{Duration, Time}
 import org.apache.spark.rdd.RDD
-import collection.mutable.ArrayBuffer
 import org.apache.spark.rdd.UnionRDD
 
+import scala.collection.mutable.ArrayBuffer
+import scala.reflect.ClassTag
+
 private[streaming]
-class UnionDStream[T: ClassManifest](parents: Array[DStream[T]])
+class UnionDStream[T: ClassTag](parents: Array[DStream[T]])
   extends DStream[T](parents.head.ssc) {
 
   if (parents.length == 0) {
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/WindowedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/WindowedDStream.scala
index 3c5729426902c..6301772468737 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/WindowedDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/WindowedDStream.scala
@@ -17,26 +17,29 @@
 
 package org.apache.spark.streaming.dstream
 
-import org.apache.spark.rdd.RDD
-import org.apache.spark.rdd.UnionRDD
+import org.apache.spark.rdd.{PartitionerAwareUnionRDD, RDD, UnionRDD}
 import org.apache.spark.storage.StorageLevel
-import org.apache.spark.streaming.{Duration, Interval, Time, DStream}
+import org.apache.spark.streaming._
+import org.apache.spark.streaming.Duration
+
+import scala.reflect.ClassTag
 
 private[streaming]
-class WindowedDStream[T: ClassManifest](
+class WindowedDStream[T: ClassTag](
     parent: DStream[T],
     _windowDuration: Duration,
     _slideDuration: Duration)
   extends DStream[T](parent.ssc) {
 
   if (!_windowDuration.isMultipleOf(parent.slideDuration))
-    throw new Exception("The window duration of WindowedDStream (" + _slideDuration + ") " +
-    "must be multiple of the slide duration of parent DStream (" + parent.slideDuration + ")")
+    throw new Exception("The window duration of windowed DStream (" + _slideDuration + ") " +
+    "must be a multiple of the slide duration of parent DStream (" + parent.slideDuration + ")")
 
   if (!_slideDuration.isMultipleOf(parent.slideDuration))
-    throw new Exception("The slide duration of WindowedDStream (" + _slideDuration + ") " +
-    "must be multiple of the slide duration of parent DStream (" + parent.slideDuration + ")")
+    throw new Exception("The slide duration of windowed DStream (" + _slideDuration + ") " +
+    "must be a multiple of the slide duration of parent DStream (" + parent.slideDuration + ")")
 
+  // Persist parent level by default, as those RDDs are going to be obviously reused.
   parent.persist(StorageLevel.MEMORY_ONLY_SER)
 
   def windowDuration: Duration =  _windowDuration
@@ -47,11 +50,24 @@ class WindowedDStream[T: ClassManifest](
 
   override def parentRememberDuration: Duration = rememberDuration + windowDuration
 
+  override def persist(level: StorageLevel): DStream[T] = {
+    // Do not let this windowed DStream be persisted as windowed (union-ed) RDDs share underlying
+    // RDDs and persisting the windowed RDDs would store numerous copies of the underlying data.
+    // Instead control the persistence of the parent DStream.
+    parent.persist(level)
+    this
+  }
+
   override def compute(validTime: Time): Option[RDD[T]] = {
     val currentWindow = new Interval(validTime - windowDuration + parent.slideDuration, validTime)
-    Some(new UnionRDD(ssc.sc, parent.slice(currentWindow)))
+    val rddsInWindow = parent.slice(currentWindow)
+    val windowRDD = if (rddsInWindow.flatMap(_.partitioner).distinct.length == 1) {
+      logDebug("Using partition aware union for windowing at " + validTime)
+      new PartitionerAwareUnionRDD(ssc.sc, rddsInWindow)
+    } else {
+      logDebug("Using normal union for windowing at " + validTime)
+      new UnionRDD(ssc.sc,rddsInWindow)
+    }
+    Some(windowRDD)
   }
 }
-
-
-
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/package.scala b/streaming/src/main/scala/org/apache/spark/streaming/package.scala
new file mode 100644
index 0000000000000..4dd985cf5a178
--- /dev/null
+++ b/streaming/src/main/scala/org/apache/spark/streaming/package.scala
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+/**
+ * Spark Streaming functionality. [[org.apache.spark.streaming.StreamingContext]] serves as the main
+ * entry point to Spark Streaming, while [[org.apache.spark.streaming.dstream.DStream]] is the data
+ * type representing a continuous sequence of RDDs, representing a continuous stream of data.
+ *
+ * In addition, [[org.apache.spark.streaming.dstream.PairDStreamFunctions]] contains operations
+ * available only on DStreams
+ * of key-value pairs, such as `groupByKey` and `reduceByKey`. These operations are automatically
+ * available on any DStream of the right type (e.g. DStream[(Int, Int)] through implicit
+ * conversions when you `import org.apache.spark.streaming.StreamingContext._`.
+ *
+ * For the Java API of Spark Streaming, take a look at the
+ * [[org.apache.spark.streaming.api.java.JavaStreamingContext]] which serves as the entry point, and
+ * the [[org.apache.spark.streaming.api.java.JavaDStream]] and the
+ * [[org.apache.spark.streaming.api.java.JavaPairDStream]] which have the DStream functionality.
+ */
+package object streaming {
+  // For package docs only
+}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receivers/ActorReceiver.scala b/streaming/src/main/scala/org/apache/spark/streaming/receivers/ActorReceiver.scala
index ef0f85a71747c..79ed696814f07 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receivers/ActorReceiver.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receivers/ActorReceiver.scala
@@ -20,6 +20,10 @@ package org.apache.spark.streaming.receivers
 import akka.actor.{ Actor, PoisonPill, Props, SupervisorStrategy }
 import akka.actor.{ actorRef2Scala, ActorRef }
 import akka.actor.{ PossiblyHarmful, OneForOneStrategy }
+import akka.actor.SupervisorStrategy._
+
+import scala.concurrent.duration._
+import scala.reflect.ClassTag
 
 import org.apache.spark.storage.{StorageLevel, StreamBlockId}
 import org.apache.spark.streaming.dstream.NetworkReceiver
@@ -28,12 +32,9 @@ import java.util.concurrent.atomic.AtomicInteger
 
 import scala.collection.mutable.ArrayBuffer
 
-/** A helper with set of defaults for supervisor strategy **/
+/** A helper with set of defaults for supervisor strategy */
 object ReceiverSupervisorStrategy {
 
-  import akka.util.duration._
-  import akka.actor.SupervisorStrategy._
-
   val defaultStrategy = OneForOneStrategy(maxNrOfRetries = 10, withinTimeRange =
     15 millis) {
     case _: RuntimeException ⇒ Restart
@@ -43,48 +44,57 @@ object ReceiverSupervisorStrategy {
 
 /**
  * A receiver trait to be mixed in with your Actor to gain access to
- * pushBlock API.
+ * the API for pushing received data into Spark Streaming for being processed.
  *
  * Find more details at: http://spark-project.org/docs/latest/streaming-custom-receivers.html
  * 
  * @example {{{
- * 	class MyActor extends Actor with Receiver{
- * 		def receive {
- * 			case anything :String ⇒ pushBlock(anything)
- * 		}
+ *  class MyActor extends Actor with Receiver{
+ *      def receive {
+ *          case anything: String => pushBlock(anything)
+ *      }
  *  }
- *  //Can be plugged in actorStream as follows
+ *
+ *  // Can be used with an actorStream as follows
  *  ssc.actorStream[String](Props(new MyActor),"MyActorReceiver")
  *
  * }}}
  *
- * @note An important point to note:
- *       Since Actor may exist outside the spark framework, It is thus user's responsibility
+ * @note Since Actor may exist outside the spark framework, It is thus user's responsibility
  *       to ensure the type safety, i.e parametrized type of push block and InputDStream
  *       should be same.
- *
  */
-trait Receiver { self: Actor ⇒
-  def pushBlock[T: ClassManifest](iter: Iterator[T]) {
+trait Receiver {
+
+  self: Actor ⇒ // to ensure that this can be added to Actor classes only
+
+  /**
+   * Push an iterator received data into Spark Streaming for processing
+   */
+  def pushBlock[T: ClassTag](iter: Iterator[T]) {
     context.parent ! Data(iter)
   }
 
-  def pushBlock[T: ClassManifest](data: T) {
+  /**
+   * Push a single item of received data into Spark Streaming for processing
+   */
+  def pushBlock[T: ClassTag](data: T) {
     context.parent ! Data(data)
   }
-
 }
 
 /**
- * Statistics for querying the supervisor about state of workers
+ * Statistics for querying the supervisor about state of workers. Used in
+ * conjunction with `StreamingContext.actorStream` and
+ * [[org.apache.spark.streaming.receivers.Receiver]].
  */
 case class Statistics(numberOfMsgs: Int,
   numberOfWorkers: Int,
   numberOfHiccups: Int,
   otherInfo: String)
 
-/** Case class to receive data sent by child actors **/
-private[streaming] case class Data[T: ClassManifest](data: T)
+/** Case class to receive data sent by child actors */
+private[streaming] case class Data[T: ClassTag](data: T)
 
 /**
  * Provides Actors as receivers for receiving stream.
@@ -95,19 +105,17 @@ private[streaming] case class Data[T: ClassManifest](data: T)
  * his own Actor to run as receiver for Spark Streaming input source.
  *
  * This starts a supervisor actor which starts workers and also provides
- * 	[http://doc.akka.io/docs/akka/2.0.5/scala/fault-tolerance.html fault-tolerance].
- * 
- *  Here's a way to start more supervisor/workers as its children.
+ * [http://doc.akka.io/docs/akka/snapshot/scala/fault-tolerance.html fault-tolerance].
+ *
+ * Here's a way to start more supervisor/workers as its children.
  *
  * @example {{{
- * 	context.parent ! Props(new Supervisor)
+ *  context.parent ! Props(new Supervisor)
  * }}} OR {{{
- *  context.parent ! Props(new Worker,"Worker")
+ *  context.parent ! Props(new Worker, "Worker")
  * }}}
- *
- *
  */
-private[streaming] class ActorReceiver[T: ClassManifest](
+private[streaming] class ActorReceiver[T: ClassTag](
   props: Props,
   name: String,
   storageLevel: StorageLevel,
@@ -120,7 +128,7 @@ private[streaming] class ActorReceiver[T: ClassManifest](
   protected lazy val supervisor = env.actorSystem.actorOf(Props(new Supervisor),
     "Supervisor" + streamId)
 
-  private class Supervisor extends Actor {
+  class Supervisor extends Actor {
 
     override val supervisorStrategy = receiverSupervisorStrategy
     val worker = context.actorOf(props, name)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/BatchInfo.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/BatchInfo.scala
new file mode 100644
index 0000000000000..4e8d07fe921fb
--- /dev/null
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/BatchInfo.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.scheduler
+
+import org.apache.spark.streaming.Time
+
+/**
+ * Class having information on completed batches.
+ * @param batchTime   Time of the batch
+ * @param submissionTime  Clock time of when jobs of this batch was submitted to
+ *                        the streaming scheduler queue
+ * @param processingStartTime Clock time of when the first job of this batch started processing
+ * @param processingEndTime Clock time of when the last job of this batch finished processing
+ */
+case class BatchInfo(
+    batchTime: Time,
+    submissionTime: Long,
+    processingStartTime: Option[Long],
+    processingEndTime: Option[Long]
+  ) {
+
+  /**
+   * Time taken for the first job of this batch to start processing from the time this batch
+   * was submitted to the streaming scheduler. Essentially, it is
+   * `processingStartTime` - `submissionTime`.
+   */
+  def schedulingDelay = processingStartTime.map(_ - submissionTime)
+
+  /**
+   * Time taken for the all jobs of this batch to finish processing from the time they started
+   * processing. Essentially, it is `processingEndTime` - `processingStartTime`.
+   */
+  def processingDelay = processingEndTime.zip(processingStartTime).map(x => x._1 - x._2).headOption
+
+  /**
+   * Time taken for all the jobs of this batch to finish processing from the time they
+   * were submitted.  Essentially, it is `processingDelay` + `schedulingDelay`.
+   */
+  def totalDelay = schedulingDelay.zip(processingDelay).map(x => x._1 + x._2).headOption
+}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/Job.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/Job.scala
similarity index 67%
rename from streaming/src/main/scala/org/apache/spark/streaming/Job.scala
rename to streaming/src/main/scala/org/apache/spark/streaming/scheduler/Job.scala
index 2128b7c7a64c2..7e0f6b2cdfc08 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/Job.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/Job.scala
@@ -15,27 +15,26 @@
  * limitations under the License.
  */
 
-package org.apache.spark.streaming
+package org.apache.spark.streaming.scheduler
 
-import java.util.concurrent.atomic.AtomicLong
+import org.apache.spark.streaming.Time
+import scala.util.Try
 
+/**
+ * Class representing a Spark computation. It may contain multiple Spark jobs.
+ */
 private[streaming]
 class Job(val time: Time, func: () => _) {
-  val id = Job.getNewId()
-  def run(): Long = {
-    val startTime = System.currentTimeMillis 
-    func() 
-    val stopTime = System.currentTimeMillis
-    (stopTime - startTime)
-  }
+  var id: String = _
+  var result: Try[_] = null
 
-  override def toString = "streaming job " + id + " @ " + time 
-}
+  def run() {
+    result = Try(func())
+  }
 
-private[streaming]
-object Job {
-  val id = new AtomicLong(0)
+  def setId(number: Int) {
+    id = "streaming job " + time + "." + number
+  }
 
-  def getNewId() = id.getAndIncrement()
+  override def toString = id
 }
-
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala
new file mode 100644
index 0000000000000..b5f11d344068d
--- /dev/null
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.scheduler
+
+import akka.actor.{ActorRef, ActorSystem, Props, Actor}
+import org.apache.spark.{SparkException, SparkEnv, Logging}
+import org.apache.spark.streaming.{Checkpoint, Time, CheckpointWriter}
+import org.apache.spark.streaming.util.{ManualClock, RecurringTimer, Clock}
+import scala.util.{Failure, Success, Try}
+
+/** Event classes for JobGenerator */
+private[scheduler] sealed trait JobGeneratorEvent
+private[scheduler] case class GenerateJobs(time: Time) extends JobGeneratorEvent
+private[scheduler] case class ClearMetadata(time: Time) extends JobGeneratorEvent
+private[scheduler] case class DoCheckpoint(time: Time) extends JobGeneratorEvent
+private[scheduler] case class ClearCheckpointData(time: Time) extends JobGeneratorEvent
+
+/**
+ * This class generates jobs from DStreams as well as drives checkpointing and cleaning
+ * up DStream metadata.
+ */
+private[streaming]
+class JobGenerator(jobScheduler: JobScheduler) extends Logging {
+
+  private val ssc = jobScheduler.ssc
+  private val graph = ssc.graph
+  val clock = {
+    val clockClass = ssc.sc.conf.get(
+      "spark.streaming.clock", "org.apache.spark.streaming.util.SystemClock")
+    Class.forName(clockClass).newInstance().asInstanceOf[Clock]
+  }
+  private val timer = new RecurringTimer(clock, ssc.graph.batchDuration.milliseconds,
+    longTime => eventActor ! GenerateJobs(new Time(longTime)))
+  private lazy val checkpointWriter = if (ssc.checkpointDuration != null && ssc.checkpointDir != null) {
+    new CheckpointWriter(this, ssc.conf, ssc.checkpointDir, ssc.sparkContext.hadoopConfiguration)
+  } else {
+    null
+  }
+
+  // eventActor is created when generator starts.
+  // This not being null means the scheduler has been started and not stopped
+  private var eventActor: ActorRef = null
+
+  /** Start generation of jobs */
+  def start() = synchronized {
+    if (eventActor != null) {
+      throw new SparkException("JobGenerator already started")
+    }
+
+    eventActor = ssc.env.actorSystem.actorOf(Props(new Actor {
+      def receive = {
+        case event: JobGeneratorEvent =>
+          logDebug("Got event of type " + event.getClass.getName)
+          processEvent(event)
+      }
+    }), "JobGenerator")
+    if (ssc.isCheckpointPresent) {
+      restart()
+    } else {
+      startFirstTime()
+    }
+  }
+
+  /** Stop generation of jobs */
+  def stop() = synchronized {
+    if (eventActor != null) {
+      timer.stop()
+      ssc.env.actorSystem.stop(eventActor)
+      if (checkpointWriter != null) checkpointWriter.stop()
+      ssc.graph.stop()
+      logInfo("JobGenerator stopped")
+    }
+  }
+
+  /**
+   * On batch completion, clear old metadata and checkpoint computation.
+   */
+  def onBatchCompletion(time: Time) {
+    eventActor ! ClearMetadata(time)
+  }
+  
+  def onCheckpointCompletion(time: Time) {
+    eventActor ! ClearCheckpointData(time)
+  }
+
+  /** Processes all events */
+  private def processEvent(event: JobGeneratorEvent) {
+    event match {
+      case GenerateJobs(time) => generateJobs(time)
+      case ClearMetadata(time) => clearMetadata(time)
+      case DoCheckpoint(time) => doCheckpoint(time)
+      case ClearCheckpointData(time) => clearCheckpointData(time)
+    }
+  }
+
+  /** Starts the generator for the first time */
+  private def startFirstTime() {
+    val startTime = new Time(timer.getStartTime())
+    graph.start(startTime - graph.batchDuration)
+    timer.start(startTime.milliseconds)
+    logInfo("JobGenerator started at " + startTime)
+  }
+
+  /** Restarts the generator based on the information in checkpoint */
+  private def restart() {
+    // If manual clock is being used for testing, then
+    // either set the manual clock to the last checkpointed time,
+    // or if the property is defined set it to that time
+    if (clock.isInstanceOf[ManualClock]) {
+      val lastTime = ssc.initialCheckpoint.checkpointTime.milliseconds
+      val jumpTime = ssc.sc.conf.getLong("spark.streaming.manualClock.jump", 0)
+      clock.asInstanceOf[ManualClock].setTime(lastTime + jumpTime)
+    }
+
+    val batchDuration = ssc.graph.batchDuration
+
+    // Batches when the master was down, that is,
+    // between the checkpoint and current restart time
+    val checkpointTime = ssc.initialCheckpoint.checkpointTime
+    val restartTime = new Time(timer.getRestartTime(graph.zeroTime.milliseconds))
+    val downTimes = checkpointTime.until(restartTime, batchDuration)
+    logInfo("Batches during down time (" + downTimes.size + " batches): "
+      + downTimes.mkString(", "))
+
+    // Batches that were unprocessed before failure
+    val pendingTimes = ssc.initialCheckpoint.pendingTimes.sorted(Time.ordering)
+    logInfo("Batches pending processing (" + pendingTimes.size + " batches): " +
+      pendingTimes.mkString(", "))
+    // Reschedule jobs for these times
+    val timesToReschedule = (pendingTimes ++ downTimes).distinct.sorted(Time.ordering)
+    logInfo("Batches to reschedule (" + timesToReschedule.size + " batches): " +
+      timesToReschedule.mkString(", "))
+    timesToReschedule.foreach(time =>
+      jobScheduler.runJobs(time, graph.generateJobs(time))
+    )
+
+    // Restart the timer
+    timer.start(restartTime.milliseconds)
+    logInfo("JobGenerator restarted at " + restartTime)
+  }
+
+  /** Generate jobs and perform checkpoint for the given `time`.  */
+  private def generateJobs(time: Time) {
+    SparkEnv.set(ssc.env)
+    Try(graph.generateJobs(time)) match {
+      case Success(jobs) => jobScheduler.runJobs(time, jobs)
+      case Failure(e) => jobScheduler.reportError("Error generating jobs for time " + time, e)
+    }
+    eventActor ! DoCheckpoint(time)
+  }
+
+  /** Clear DStream metadata for the given `time`. */
+  private def clearMetadata(time: Time) {
+    ssc.graph.clearMetadata(time)
+    eventActor ! DoCheckpoint(time)
+  }
+
+  /** Clear DStream checkpoint data for the given `time`. */
+  private def clearCheckpointData(time: Time) {
+    ssc.graph.clearCheckpointData(time)
+  }
+
+  /** Perform checkpoint for the give `time`. */
+  private def doCheckpoint(time: Time) = synchronized {
+    if (checkpointWriter != null && (time - graph.zeroTime).isMultipleOf(ssc.checkpointDuration)) {
+      logInfo("Checkpointing graph for time " + time)
+      ssc.graph.updateCheckpointData(time)
+      checkpointWriter.write(new Checkpoint(ssc, time))
+    }
+  }
+}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala
new file mode 100644
index 0000000000000..de675d3c7fb94
--- /dev/null
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.scheduler
+
+import scala.util.{Failure, Success, Try}
+import scala.collection.JavaConversions._
+import java.util.concurrent.{TimeUnit, ConcurrentHashMap, Executors}
+import akka.actor.{ActorRef, Actor, Props}
+import org.apache.spark.{SparkException, Logging, SparkEnv}
+import org.apache.spark.streaming._
+
+
+private[scheduler] sealed trait JobSchedulerEvent
+private[scheduler] case class JobStarted(job: Job) extends JobSchedulerEvent
+private[scheduler] case class JobCompleted(job: Job) extends JobSchedulerEvent
+private[scheduler] case class ErrorReported(msg: String, e: Throwable) extends JobSchedulerEvent
+
+/**
+ * This class schedules jobs to be run on Spark. It uses the JobGenerator to generate
+ * the jobs and runs them using a thread pool.
+ */
+private[streaming]
+class JobScheduler(val ssc: StreamingContext) extends Logging {
+
+  private val jobSets = new ConcurrentHashMap[Time, JobSet]
+  private val numConcurrentJobs = ssc.conf.getInt("spark.streaming.concurrentJobs", 1)
+  private val executor = Executors.newFixedThreadPool(numConcurrentJobs)
+  private val jobGenerator = new JobGenerator(this)
+  val clock = jobGenerator.clock
+  val listenerBus = new StreamingListenerBus()
+
+  // These two are created only when scheduler starts.
+  // eventActor not being null means the scheduler has been started and not stopped
+  var networkInputTracker: NetworkInputTracker = null
+  private var eventActor: ActorRef = null
+
+
+  def start() = synchronized {
+    if (eventActor != null) {
+      throw new SparkException("JobScheduler already started")
+    }
+
+    eventActor = ssc.env.actorSystem.actorOf(Props(new Actor {
+      def receive = {
+        case event: JobSchedulerEvent => processEvent(event)
+      }
+    }), "JobScheduler")
+    listenerBus.start()
+    networkInputTracker = new NetworkInputTracker(ssc)
+    networkInputTracker.start()
+    Thread.sleep(1000)
+    jobGenerator.start()
+    logInfo("JobScheduler started")
+  }
+
+  def stop() = synchronized {
+    if (eventActor != null) {
+      jobGenerator.stop()
+      networkInputTracker.stop()
+      executor.shutdown()
+      if (!executor.awaitTermination(2, TimeUnit.SECONDS)) {
+        executor.shutdownNow()
+      }
+      listenerBus.stop()
+      ssc.env.actorSystem.stop(eventActor)
+      logInfo("JobScheduler stopped")
+    }
+  }
+
+  def runJobs(time: Time, jobs: Seq[Job]) {
+    if (jobs.isEmpty) {
+      logInfo("No jobs added for time " + time)
+    } else {
+      val jobSet = new JobSet(time, jobs)
+      jobSets.put(time, jobSet)
+      jobSet.jobs.foreach(job => executor.execute(new JobHandler(job)))
+      logInfo("Added jobs for time " + time)
+    }
+  }
+
+  def getPendingTimes(): Seq[Time] = {
+    jobSets.keySet.toSeq
+  }
+
+  def reportError(msg: String, e: Throwable) {
+    eventActor ! ErrorReported(msg, e)
+  }
+
+  private def processEvent(event: JobSchedulerEvent) {
+    try {
+      event match {
+        case JobStarted(job) => handleJobStart(job)
+        case JobCompleted(job) => handleJobCompletion(job)
+        case ErrorReported(m, e) => handleError(m, e)
+      }
+    } catch {
+      case e: Throwable =>
+        reportError("Error in job scheduler", e)
+    }
+  }
+
+  private def handleJobStart(job: Job) {
+    val jobSet = jobSets.get(job.time)
+    if (!jobSet.hasStarted) {
+      listenerBus.post(StreamingListenerBatchStarted(jobSet.toBatchInfo))
+    }
+    jobSet.handleJobStart(job)
+    logInfo("Starting job " + job.id + " from job set of time " + jobSet.time)
+    SparkEnv.set(ssc.env)
+  }
+
+  private def handleJobCompletion(job: Job) {
+    job.result match {
+      case Success(_) =>
+        val jobSet = jobSets.get(job.time)
+        jobSet.handleJobCompletion(job)
+        logInfo("Finished job " + job.id + " from job set of time " + jobSet.time)
+        if (jobSet.hasCompleted) {
+          jobSets.remove(jobSet.time)
+          jobGenerator.onBatchCompletion(jobSet.time)
+          logInfo("Total delay: %.3f s for time %s (execution: %.3f s)".format(
+            jobSet.totalDelay / 1000.0, jobSet.time.toString,
+            jobSet.processingDelay / 1000.0
+          ))
+          listenerBus.post(StreamingListenerBatchCompleted(jobSet.toBatchInfo))
+        }
+      case Failure(e) =>
+        reportError("Error running job " + job, e)
+    }
+  }
+
+  private def handleError(msg: String, e: Throwable) {
+    logError(msg, e)
+    ssc.waiter.notifyError(e)
+  }
+
+  private class JobHandler(job: Job) extends Runnable {
+    def run() {
+      eventActor ! JobStarted(job)
+      job.run()
+      eventActor ! JobCompleted(job)
+    }
+  }
+}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobSet.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobSet.scala
new file mode 100644
index 0000000000000..fcf303aee6cd7
--- /dev/null
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobSet.scala
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.scheduler
+
+import scala.collection.mutable.{ArrayBuffer, HashSet}
+import org.apache.spark.streaming.Time
+
+/** Class representing a set of Jobs
+  * belong to the same batch.
+  */
+private[streaming]
+case class JobSet(time: Time, jobs: Seq[Job]) {
+
+  private val incompleteJobs = new HashSet[Job]()
+  private val submissionTime = System.currentTimeMillis() // when this jobset was submitted
+  private var processingStartTime = -1L // when the first job of this jobset started processing
+  private var processingEndTime = -1L // when the last job of this jobset finished processing
+
+  jobs.zipWithIndex.foreach { case (job, i) => job.setId(i) }
+  incompleteJobs ++= jobs
+
+  def handleJobStart(job: Job) {
+    if (processingStartTime < 0) processingStartTime = System.currentTimeMillis()
+  }
+
+  def handleJobCompletion(job: Job) {
+    incompleteJobs -= job
+    if (hasCompleted) processingEndTime = System.currentTimeMillis()
+  }
+
+  def hasStarted = processingStartTime > 0
+
+  def hasCompleted = incompleteJobs.isEmpty
+
+  // Time taken to process all the jobs from the time they started processing
+  // (i.e. not including the time they wait in the streaming scheduler queue)
+  def processingDelay = processingEndTime - processingStartTime
+
+  // Time taken to process all the jobs from the time they were submitted
+  // (i.e. including the time they wait in the streaming scheduler queue)
+  def totalDelay = {
+    processingEndTime - time.milliseconds
+  }
+
+  def toBatchInfo: BatchInfo = {
+    new BatchInfo(
+      time,
+      submissionTime,
+      if (processingStartTime >= 0 ) Some(processingStartTime) else None,
+      if (processingEndTime >= 0 ) Some(processingEndTime) else None
+    )
+  }
+}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/NetworkInputTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/NetworkInputTracker.scala
similarity index 79%
rename from streaming/src/main/scala/org/apache/spark/streaming/NetworkInputTracker.scala
rename to streaming/src/main/scala/org/apache/spark/streaming/scheduler/NetworkInputTracker.scala
index b97fb7e6e367e..0d9733fa69a12 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/NetworkInputTracker.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/NetworkInputTracker.scala
@@ -15,22 +15,23 @@
  * limitations under the License.
  */
 
-package org.apache.spark.streaming
+package org.apache.spark.streaming.scheduler
 
 import org.apache.spark.streaming.dstream.{NetworkInputDStream, NetworkReceiver}
 import org.apache.spark.streaming.dstream.{StopReceiver, ReportBlock, ReportError}
-import org.apache.spark.Logging
-import org.apache.spark.SparkEnv
+import org.apache.spark.{SparkException, Logging, SparkEnv}
 import org.apache.spark.SparkContext._
 
 import scala.collection.mutable.HashMap
 import scala.collection.mutable.Queue
+import scala.concurrent.duration._
 
 import akka.actor._
 import akka.pattern.ask
-import akka.util.duration._
 import akka.dispatch._
 import org.apache.spark.storage.BlockId
+import org.apache.spark.streaming.{Time, StreamingContext}
+import org.apache.spark.util.AkkaUtils
 
 private[streaming] sealed trait NetworkInputTrackerMessage
 private[streaming] case class RegisterReceiver(streamId: Int, receiverActor: ActorRef) extends NetworkInputTrackerMessage
@@ -38,33 +39,47 @@ private[streaming] case class AddBlocks(streamId: Int, blockIds: Seq[BlockId], m
 private[streaming] case class DeregisterReceiver(streamId: Int, msg: String) extends NetworkInputTrackerMessage
 
 /**
- * This class manages the execution of the receivers of NetworkInputDStreams.
+ * This class manages the execution of the receivers of NetworkInputDStreams. Instance of
+ * this class must be created after all input streams have been added and StreamingContext.start()
+ * has been called because it needs the final set of input streams at the time of instantiation.
  */
 private[streaming]
-class NetworkInputTracker(
-    @transient ssc: StreamingContext,
-    @transient networkInputStreams: Array[NetworkInputDStream[_]])
-  extends Logging {
+class NetworkInputTracker(ssc: StreamingContext) extends Logging {
 
+  val networkInputStreams = ssc.graph.getNetworkInputStreams()
   val networkInputStreamMap = Map(networkInputStreams.map(x => (x.id, x)): _*)
   val receiverExecutor = new ReceiverExecutor()
   val receiverInfo = new HashMap[Int, ActorRef]
   val receivedBlockIds = new HashMap[Int, Queue[BlockId]]
-  val timeout = 5000.milliseconds
+  val timeout = AkkaUtils.askTimeout(ssc.conf)
 
+
+  // actor is created when generator starts.
+  // This not being null means the tracker has been started and not stopped
+  var actor: ActorRef = null
   var currentTime: Time = null
 
   /** Start the actor and receiver execution thread. */
   def start() {
-    ssc.env.actorSystem.actorOf(Props(new NetworkInputTrackerActor), "NetworkInputTracker")
-    receiverExecutor.start()
+    if (actor != null) {
+      throw new SparkException("NetworkInputTracker already started")
+    }
+
+    if (!networkInputStreams.isEmpty) {
+      actor = ssc.env.actorSystem.actorOf(Props(new NetworkInputTrackerActor), "NetworkInputTracker")
+      receiverExecutor.start()
+      logInfo("NetworkInputTracker started")
+    }
   }
 
   /** Stop the receiver execution thread. */
   def stop() {
-    // TODO: stop the actor as well
-    receiverExecutor.interrupt()
-    receiverExecutor.stopReceivers()
+    if (!networkInputStreams.isEmpty && actor != null) {
+      receiverExecutor.interrupt()
+      receiverExecutor.stopReceivers()
+      ssc.env.actorSystem.stop(actor)
+      logInfo("NetworkInputTracker stopped")
+    }
   }
 
   /** Return all the blocks received from a receiver. */
@@ -159,7 +174,9 @@ class NetworkInputTracker(
       }
       // Run the dummy Spark job to ensure that all slaves have registered.
       // This avoids all the receivers to be scheduled on the same node.
-      ssc.sparkContext.makeRDD(1 to 50, 50).map(x => (x, 1)).reduceByKey(_ + _, 20).collect()
+      if (!ssc.sparkContext.isLocal) {
+        ssc.sparkContext.makeRDD(1 to 50, 50).map(x => (x, 1)).reduceByKey(_ + _, 20).collect()
+      }
 
       // Distribute the receivers and start them
       ssc.sparkContext.runJob(tempRDD, startReceiver)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/StreamingListener.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/StreamingListener.scala
new file mode 100644
index 0000000000000..461ea3506477f
--- /dev/null
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/StreamingListener.scala
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.scheduler
+
+import scala.collection.mutable.Queue
+import org.apache.spark.util.Distribution
+
+/** Base trait for events related to StreamingListener */
+sealed trait StreamingListenerEvent
+
+case class StreamingListenerBatchCompleted(batchInfo: BatchInfo) extends StreamingListenerEvent
+case class StreamingListenerBatchStarted(batchInfo: BatchInfo) extends StreamingListenerEvent
+
+/** An event used in the listener to shutdown the listener daemon thread. */
+private[scheduler] case object StreamingListenerShutdown extends StreamingListenerEvent
+
+/**
+ * A listener interface for receiving information about an ongoing streaming
+ * computation.
+ */
+trait StreamingListener {
+  /**
+   * Called when processing of a batch has completed
+   */
+  def onBatchCompleted(batchCompleted: StreamingListenerBatchCompleted) { }
+
+  /**
+   * Called when processing of a batch has started
+   */
+  def onBatchStarted(batchStarted: StreamingListenerBatchStarted) { }
+}
+
+
+/**
+ * A simple StreamingListener that logs summary statistics across Spark Streaming batches
+ * @param numBatchInfos Number of last batches to consider for generating statistics (default: 10)
+ */
+class StatsReportListener(numBatchInfos: Int = 10) extends StreamingListener {
+  // Queue containing latest completed batches
+  val batchInfos = new Queue[BatchInfo]()
+
+  override def onBatchCompleted(batchStarted: StreamingListenerBatchCompleted) {
+    batchInfos.enqueue(batchStarted.batchInfo)
+    if (batchInfos.size > numBatchInfos) batchInfos.dequeue()
+    printStats()
+  }
+
+  def printStats() {
+    showMillisDistribution("Total delay: ", _.totalDelay)
+    showMillisDistribution("Processing time: ", _.processingDelay)
+  }
+
+  def showMillisDistribution(heading: String, getMetric: BatchInfo => Option[Long]) {
+    org.apache.spark.scheduler.StatsReportListener.showMillisDistribution(
+      heading, extractDistribution(getMetric))
+  }
+
+  def extractDistribution(getMetric: BatchInfo => Option[Long]): Option[Distribution] = {
+    Distribution(batchInfos.flatMap(getMetric(_)).map(_.toDouble))
+  }
+}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/StreamingListenerBus.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/StreamingListenerBus.scala
new file mode 100644
index 0000000000000..3063cf10a39f3
--- /dev/null
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/StreamingListenerBus.scala
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.scheduler
+
+import org.apache.spark.Logging
+import scala.collection.mutable.{SynchronizedBuffer, ArrayBuffer}
+import java.util.concurrent.LinkedBlockingQueue
+
+/** Asynchronously passes StreamingListenerEvents to registered StreamingListeners. */
+private[spark] class StreamingListenerBus() extends Logging {
+  private val listeners = new ArrayBuffer[StreamingListener]() with SynchronizedBuffer[StreamingListener]
+
+  /* Cap the capacity of the SparkListenerEvent queue so we get an explicit error (rather than
+   * an OOM exception) if it's perpetually being added to more quickly than it's being drained. */
+  private val EVENT_QUEUE_CAPACITY = 10000
+  private val eventQueue = new LinkedBlockingQueue[StreamingListenerEvent](EVENT_QUEUE_CAPACITY)
+  private var queueFullErrorMessageLogged = false
+
+  val listenerThread = new Thread("StreamingListenerBus") {
+    setDaemon(true)
+    override def run() {
+      while (true) {
+        val event = eventQueue.take
+        event match {
+          case batchStarted: StreamingListenerBatchStarted =>
+            listeners.foreach(_.onBatchStarted(batchStarted))
+          case batchCompleted: StreamingListenerBatchCompleted =>
+            listeners.foreach(_.onBatchCompleted(batchCompleted))
+          case StreamingListenerShutdown =>
+            // Get out of the while loop and shutdown the daemon thread
+            return
+          case _ =>
+        }
+      }
+    }
+  }
+
+  def start() {
+    listenerThread.start()
+  }
+
+  def addListener(listener: StreamingListener) {
+    listeners += listener
+  }
+
+  def post(event: StreamingListenerEvent) {
+    val eventAdded = eventQueue.offer(event)
+    if (!eventAdded && !queueFullErrorMessageLogged) {
+      logError("Dropping StreamingListenerEvent because no remaining room in event queue. " +
+        "This likely means one of the StreamingListeners is too slow and cannot keep up with the " +
+        "rate at which events are being started by the scheduler.")
+      queueFullErrorMessageLogged = true
+    }
+  }
+
+  /**
+   * Waits until there are no more events in the queue, or until the specified time has elapsed.
+   * Used for testing only. Returns true if the queue has emptied and false is the specified time
+   * elapsed before the queue emptied.
+   */
+  def waitUntilEmpty(timeoutMillis: Int): Boolean = {
+    val finishTime = System.currentTimeMillis + timeoutMillis
+    while (!eventQueue.isEmpty) {
+      if (System.currentTimeMillis > finishTime) {
+        return false
+      }
+      /* Sleep rather than using wait/notify, because this is used only for testing and wait/notify
+       * add overhead in the general case. */
+      Thread.sleep(10)
+    }
+    true
+  }
+
+  def stop(): Unit = post(StreamingListenerShutdown)
+}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/Clock.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/Clock.scala
index f67bb2f6ac51a..c3a849d2769a7 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/Clock.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/Clock.scala
@@ -66,7 +66,7 @@ class SystemClock() extends Clock {
         }
       Thread.sleep(sleepTime)
     }
-    return -1
+    -1
   }
 }
 
@@ -96,6 +96,6 @@ class ManualClock() extends Clock {
         this.wait(100)
       }      
     }
-    return currentTime()
+    currentTime()
   }
 }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/MasterFailureTest.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/MasterFailureTest.scala
index 6977957126659..54813934b8c95 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/MasterFailureTest.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/MasterFailureTest.scala
@@ -20,11 +20,12 @@ package org.apache.spark.streaming.util
 import org.apache.spark.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.streaming._
-import org.apache.spark.streaming.dstream.ForEachDStream
+import org.apache.spark.streaming.dstream.{DStream, ForEachDStream}
 import StreamingContext._
 
 import scala.util.Random
 import scala.collection.mutable.{SynchronizedBuffer, ArrayBuffer}
+import scala.reflect.ClassTag
 
 import java.io.{File, ObjectInputStream, IOException}
 import java.util.UUID
@@ -38,10 +39,10 @@ import org.apache.hadoop.conf.Configuration
 
 private[streaming]
 object MasterFailureTest extends Logging {
-  initLogging()
 
   @volatile var killed = false
   @volatile var killCount = 0
+  @volatile var setupCalled = false
 
   def main(args: Array[String]) {
     if (args.size < 2) {
@@ -120,7 +121,7 @@ object MasterFailureTest extends Logging {
    * Tests stream operation with multiple master failures, and verifies whether the
    * final set of output values is as expected or not.
    */
-  def testOperation[T: ClassManifest](
+  def testOperation[T: ClassTag](
     directory: String,
     batchDuration: Duration,
     input: Seq[String],
@@ -131,8 +132,26 @@ object MasterFailureTest extends Logging {
     // Just making sure that the expected output does not have duplicates
     assert(expectedOutput.distinct.toSet == expectedOutput.toSet)
 
+    // Reset all state
+    reset()
+
+    // Create the directories for this test
+    val uuid = UUID.randomUUID().toString
+    val rootDir = new Path(directory, uuid)
+    val fs = rootDir.getFileSystem(new Configuration())
+    val checkpointDir = new Path(rootDir, "checkpoint")
+    val testDir = new Path(rootDir, "test")
+    fs.mkdirs(checkpointDir)
+    fs.mkdirs(testDir)
+
     // Setup the stream computation with the given operation
-    val (ssc, checkpointDir, testDir) = setupStreams(directory, batchDuration, operation)
+    val ssc = StreamingContext.getOrCreate(checkpointDir.toString, () => {
+      setupStreams(batchDuration, operation, checkpointDir, testDir)
+    })
+
+    // Check if setupStream was called to create StreamingContext
+    // (and not created from checkpoint file)
+    assert(setupCalled, "Setup was not called in the first call to StreamingContext.getOrCreate")
 
     // Start generating files in the a different thread
     val fileGeneratingThread = new FileGeneratingThread(input, testDir, batchDuration.milliseconds)
@@ -144,9 +163,7 @@ object MasterFailureTest extends Logging {
     val maxTimeToRun = expectedOutput.size * batchDuration.milliseconds * 2
     val mergedOutput = runStreams(ssc, lastExpectedOutput, maxTimeToRun)
 
-    // Delete directories
     fileGeneratingThread.join()
-    val fs = checkpointDir.getFileSystem(new Configuration())
     fs.delete(checkpointDir, true)
     fs.delete(testDir, true)
     logInfo("Finished test after " + killCount + " failures")
@@ -158,33 +175,24 @@ object MasterFailureTest extends Logging {
    * and batch duration. Returns the streaming context and the directory to which
    * files should be written for testing.
    */
-  private def setupStreams[T: ClassManifest](
-      directory: String,
+  private def setupStreams[T: ClassTag](
       batchDuration: Duration,
-      operation: DStream[String] => DStream[T]
-    ): (StreamingContext, Path, Path) = {
-    // Reset all state
-    reset()
-
-    // Create the directories for this test
-    val uuid = UUID.randomUUID().toString
-    val rootDir = new Path(directory, uuid)
-    val fs = rootDir.getFileSystem(new Configuration())
-    val checkpointDir = new Path(rootDir, "checkpoint")
-    val testDir = new Path(rootDir, "test")
-    fs.mkdirs(checkpointDir)
-    fs.mkdirs(testDir)
+      operation: DStream[String] => DStream[T],
+      checkpointDir: Path,
+      testDir: Path
+    ): StreamingContext = {
+    // Mark that setup was called
+    setupCalled = true
 
     // Setup the streaming computation with the given operation
     System.clearProperty("spark.driver.port")
-    System.clearProperty("spark.hostPort")
-    var ssc = new StreamingContext("local[4]", "MasterFailureTest", batchDuration, null, Nil, Map())
+    val ssc = new StreamingContext("local[4]", "MasterFailureTest", batchDuration, null, Nil, Map())
     ssc.checkpoint(checkpointDir.toString)
     val inputStream = ssc.textFileStream(testDir.toString)
     val operatedStream = operation(inputStream)
     val outputStream = new TestOutputStream(operatedStream)
-    ssc.registerOutputStream(outputStream)
-    (ssc, checkpointDir, testDir)
+    outputStream.register()
+    ssc
   }
 
 
@@ -192,7 +200,7 @@ object MasterFailureTest extends Logging {
    * Repeatedly starts and kills the streaming context until timed out or
    * the last expected output is generated. Finally, return
    */
-  private def runStreams[T: ClassManifest](
+  private def runStreams[T: ClassTag](
       ssc_ : StreamingContext,
       lastExpectedOutput: T,
       maxTimeToRun: Long
@@ -204,7 +212,7 @@ object MasterFailureTest extends Logging {
     var isTimedOut = false
     val mergedOutput = new ArrayBuffer[T]()
     val checkpointDir = ssc.checkpointDir
-    var batchDuration = ssc.graph.batchDuration
+    val batchDuration = ssc.graph.batchDuration
 
     while(!isLastOutputGenerated && !isTimedOut) {
       // Get the output buffer
@@ -224,7 +232,6 @@ object MasterFailureTest extends Logging {
         // (iii) Its not timed out yet
         System.clearProperty("spark.streaming.clock")
         System.clearProperty("spark.driver.port")
-        System.clearProperty("spark.hostPort")
         ssc.start()
         val startTime = System.currentTimeMillis()
         while (!killed && !isLastOutputGenerated && !isTimedOut) {
@@ -261,7 +268,10 @@ object MasterFailureTest extends Logging {
         )
         Thread.sleep(sleepTime)
         // Recreate the streaming context from checkpoint
-        ssc = new StreamingContext(checkpointDir)
+        ssc = StreamingContext.getOrCreate(checkpointDir, () => {
+          throw new Exception("Trying to create new context when it " +
+            "should be reading from checkpoint file")
+        })
       }
     }
     mergedOutput
@@ -274,7 +284,7 @@ object MasterFailureTest extends Logging {
    * duplicate batch outputs of values from the `output`. As a result, the
    * expected output should not have consecutive batches with the same values as output.
    */
-  private def verifyOutput[T: ClassManifest](output: Seq[T], expectedOutput: Seq[T]) {
+  private def verifyOutput[T: ClassTag](output: Seq[T], expectedOutput: Seq[T]) {
     // Verify whether expected outputs do not consecutive batches with same output
     for (i <- 0 until expectedOutput.size - 1) {
       assert(expectedOutput(i) != expectedOutput(i+1),
@@ -297,6 +307,7 @@ object MasterFailureTest extends Logging {
   private def reset() {
     killed = false
     killCount = 0
+    setupCalled = false
   }
 }
 
@@ -305,7 +316,7 @@ object MasterFailureTest extends Logging {
  * ArrayBuffer. This buffer is wiped clean on being restored from checkpoint.
  */
 private[streaming]
-class TestOutputStream[T: ClassManifest](
+class TestOutputStream[T: ClassTag](
     parent: DStream[T],
     val output: ArrayBuffer[Seq[T]] = new ArrayBuffer[Seq[T]] with SynchronizedBuffer[Seq[T]]
   ) extends ForEachDStream[T](
@@ -330,7 +341,6 @@ class TestOutputStream[T: ClassManifest](
  */
 private[streaming]
 class KillingThread(ssc: StreamingContext, maxKillWaitTime: Long) extends Thread with Logging {
-  initLogging()
 
   override def run() {
     try {
@@ -365,7 +375,6 @@ class KillingThread(ssc: StreamingContext, maxKillWaitTime: Long) extends Thread
 private[streaming]
 class FileGeneratingThread(input: Seq[String], testDir: Path, interval: Long)
   extends Thread with Logging {
-  initLogging()
 
   override def run() {
     val localTestDir = Files.createTempDir()
@@ -380,24 +389,24 @@ class FileGeneratingThread(input: Seq[String], testDir: Path, interval: Long)
         val tempHadoopFile = new Path(testDir, ".tmp_" + (i+1).toString)
         FileUtils.writeStringToFile(localFile, input(i).toString + "\n")
         var tries = 0
-	var done = false
-        while (!done && tries < maxTries) {
-          tries += 1
-          try {
-            // fs.copyFromLocalFile(new Path(localFile.toString), hadoopFile)
-            fs.copyFromLocalFile(new Path(localFile.toString), tempHadoopFile)
-            fs.rename(tempHadoopFile, hadoopFile)
-	    done = true
-	  } catch {
-	    case ioe: IOException => { 
-              fs = testDir.getFileSystem(new Configuration()) 
-              logWarning("Attempt " + tries + " at generating file " + hadoopFile + " failed.", ioe)
-	    }
-	  }
+        var done = false
+            while (!done && tries < maxTries) {
+              tries += 1
+              try {
+                // fs.copyFromLocalFile(new Path(localFile.toString), hadoopFile)
+                fs.copyFromLocalFile(new Path(localFile.toString), tempHadoopFile)
+                fs.rename(tempHadoopFile, hadoopFile)
+            done = true
+          } catch {
+            case ioe: IOException => {
+                  fs = testDir.getFileSystem(new Configuration())
+                  logWarning("Attempt " + tries + " at generating file " + hadoopFile + " failed.", ioe)
+            }
+          }
         }
-	if (!done) 
+    if (!done)
           logError("Could not generate file " + hadoopFile)
-        else 
+        else
           logInfo("Generated file " + hadoopFile + " at " + System.currentTimeMillis)
         Thread.sleep(interval)
         localFile.delete()
@@ -411,5 +420,3 @@ class FileGeneratingThread(input: Seq[String], testDir: Path, interval: Long)
     }
   }
 }
-
-
diff --git a/core/src/main/scala/org/apache/spark/util/RateLimitedOutputStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/RateLimitedOutputStream.scala
similarity index 97%
rename from core/src/main/scala/org/apache/spark/util/RateLimitedOutputStream.scala
rename to streaming/src/main/scala/org/apache/spark/streaming/util/RateLimitedOutputStream.scala
index 47e1b450043b3..179fd7593982c 100644
--- a/core/src/main/scala/org/apache/spark/util/RateLimitedOutputStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/RateLimitedOutputStream.scala
@@ -15,13 +15,14 @@
  * limitations under the License.
  */
 
-package org.apache.spark.util
+package org.apache.spark.streaming.util
 
 import scala.annotation.tailrec
 
 import java.io.OutputStream
 import java.util.concurrent.TimeUnit._
 
+private[streaming]
 class RateLimitedOutputStream(out: OutputStream, bytesPerSec: Int) extends OutputStream {
   val SYNC_INTERVAL = NANOSECONDS.convert(10, SECONDS)
   val CHUNK_SIZE = 8192
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala
index 4e6ce6eabd7ba..07021ebb5802a 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala
@@ -22,6 +22,7 @@ import org.apache.spark.SparkContext._
 import it.unimi.dsi.fastutil.objects.{Object2LongOpenHashMap => OLMap}
 import scala.collection.JavaConversions.mapAsScalaMap
 
+private[streaming]
 object RawTextHelper {
 
   /** 
@@ -90,7 +91,7 @@ object RawTextHelper {
         }
       }
     }
-    return taken.toIterator  
+    taken.toIterator
   }
  
   /**
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextSender.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextSender.scala
index fc8655a0839d0..684b38e8b3102 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextSender.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextSender.scala
@@ -17,19 +17,23 @@
 
 package org.apache.spark.streaming.util
 
-import java.nio.ByteBuffer
-import org.apache.spark.util.{RateLimitedOutputStream, IntParam}
+import java.io.IOException
 import java.net.ServerSocket
-import org.apache.spark.{Logging}
-import it.unimi.dsi.fastutil.io.FastByteArrayOutputStream
+import java.nio.ByteBuffer
+
 import scala.io.Source
-import java.io.IOException
+
+import it.unimi.dsi.fastutil.io.FastByteArrayOutputStream
+
+import org.apache.spark.{SparkConf, Logging}
 import org.apache.spark.serializer.KryoSerializer
+import org.apache.spark.util.IntParam
 
 /**
  * A helper program that sends blocks of Kryo-serialized text strings out on a socket at a
  * specified rate. Used to feed data into RawInputDStream.
  */
+private[streaming]
 object RawTextSender extends Logging {
   def main(args: Array[String]) {
     if (args.length != 4) {
@@ -42,7 +46,7 @@ object RawTextSender extends Logging {
     // Repeat the input data multiple times to fill in a buffer
     val lines = Source.fromFile(file).getLines().toArray
     val bufferStream = new FastByteArrayOutputStream(blockSize + 1000)
-    val ser = new KryoSerializer().newInstance()
+    val ser = new KryoSerializer(new SparkConf()).newInstance()
     val serStream = ser.serializeStream(bufferStream)
     var i = 0
     while (bufferStream.position < blockSize) {
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala
index d644240405caa..559c2473851b3 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala
@@ -20,17 +20,7 @@ package org.apache.spark.streaming.util
 private[streaming]
 class RecurringTimer(val clock: Clock, val period: Long, val callback: (Long) => Unit) {
   
-  private val minPollTime = 25L
-  
-  private val pollTime = {
-    if (period / 10.0 > minPollTime) {
-      (period / 10.0).toLong
-    } else {
-      minPollTime
-    }  
-  }
-  
-  private val thread = new Thread() {
+  private val thread = new Thread("RecurringTimer") {
     override def run() { loop }    
   }
   
@@ -66,7 +56,6 @@ class RecurringTimer(val clock: Clock, val period: Long, val callback: (Long) =>
         callback(nextTime)
         nextTime += period
       }
-      
     } catch {
       case e: InterruptedException =>
     }
diff --git a/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java b/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java
index 418eff4e9b329..4fbbce9b8b90e 100644
--- a/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java
+++ b/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java
@@ -17,63 +17,34 @@
 
 package org.apache.spark.streaming;
 
-import com.google.common.base.Optional;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.common.io.Files;
-import kafka.serializer.StringDecoder;
-import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
-import org.apache.spark.streaming.api.java.JavaDStreamLike;
+import scala.Tuple2;
+
 import org.junit.After;
 import org.junit.Assert;
-import org.junit.Before;
 import org.junit.Test;
-import scala.Tuple2;
+import java.io.*;
+import java.util.*;
+
+import com.google.common.base.Optional;
+import com.google.common.collect.Lists;
+import com.google.common.io.Files;
+import com.google.common.collect.Sets;
+
+import org.apache.spark.SparkConf;
 import org.apache.spark.HashPartitioner;
 import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.JavaRDDLike;
-import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.api.java.function.*;
 import org.apache.spark.storage.StorageLevel;
 import org.apache.spark.streaming.api.java.JavaDStream;
 import org.apache.spark.streaming.api.java.JavaPairDStream;
 import org.apache.spark.streaming.api.java.JavaStreamingContext;
-import org.apache.spark.streaming.JavaTestUtils;
-import org.apache.spark.streaming.JavaCheckpointTestUtils;
-import org.apache.spark.streaming.InputStreamsSuite;
-
-import java.io.*;
-import java.util.*;
-
-import akka.actor.Props;
-import akka.zeromq.Subscribe;
-
-
 
 // The test suite itself is Serializable so that anonymous Function implementations can be
 // serialized, as an alternative to converting these anonymous classes to static inner classes;
 // see http://stackoverflow.com/questions/758570/.
-public class JavaAPISuite implements Serializable {
-  private transient JavaStreamingContext ssc;
-
-  @Before
-  public void setUp() {
-      System.setProperty("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock");
-      ssc = new JavaStreamingContext("local[2]", "test", new Duration(1000));
-    ssc.checkpoint("checkpoint");
-  }
-
-  @After
-  public void tearDown() {
-    ssc.stop();
-    ssc = null;
-
-    // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-    System.clearProperty("spark.driver.port");
-  }
-
+public class JavaAPISuite extends LocalJavaStreamingContext implements Serializable {
   @Test
   public void testCount() {
     List<List<Integer>> inputData = Arrays.asList(
@@ -86,8 +57,8 @@ public void testCount() {
         Arrays.asList(3L),
         Arrays.asList(1L));
 
-    JavaDStream stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
-    JavaDStream count = stream.count();
+    JavaDStream<Integer> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
+    JavaDStream<Long> count = stream.count();
     JavaTestUtils.attachTestOutputStream(count);
     List<List<Long>> result = JavaTestUtils.runStreams(ssc, 3, 3);
     assertOrderInvariantEquals(expected, result);
@@ -99,12 +70,12 @@ public void testMap() {
         Arrays.asList("hello", "world"),
         Arrays.asList("goodnight", "moon"));
 
-   List<List<Integer>> expected = Arrays.asList(
+    List<List<Integer>> expected = Arrays.asList(
         Arrays.asList(5,5),
         Arrays.asList(9,4));
 
-    JavaDStream stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
-    JavaDStream letterCount = stream.map(new Function<String, Integer>() {
+    JavaDStream<String> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
+    JavaDStream<Integer> letterCount = stream.map(new Function<String, Integer>() {
         @Override
         public Integer call(String s) throws Exception {
           return s.length();
@@ -129,8 +100,8 @@ public void testWindow() {
         Arrays.asList(7,8,9,4,5,6),
         Arrays.asList(7,8,9));
 
-    JavaDStream stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
-    JavaDStream windowed = stream.window(new Duration(2000));
+    JavaDStream<Integer> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
+    JavaDStream<Integer> windowed = stream.window(new Duration(2000));
     JavaTestUtils.attachTestOutputStream(windowed);
     List<List<Integer>> result = JavaTestUtils.runStreams(ssc, 4, 4);
 
@@ -153,8 +124,8 @@ public void testWindowWithSlideDuration() {
         Arrays.asList(7,8,9,10,11,12,13,14,15,16,17,18),
         Arrays.asList(13,14,15,16,17,18));
 
-    JavaDStream stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
-    JavaDStream windowed = stream.window(new Duration(4000), new Duration(2000));
+    JavaDStream<Integer> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
+    JavaDStream<Integer> windowed = stream.window(new Duration(4000), new Duration(2000));
     JavaTestUtils.attachTestOutputStream(windowed);
     List<List<Integer>> result = JavaTestUtils.runStreams(ssc, 8, 4);
 
@@ -171,8 +142,8 @@ public void testFilter() {
         Arrays.asList("giants"),
         Arrays.asList("yankees"));
 
-    JavaDStream stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
-    JavaDStream filtered = stream.filter(new Function<String, Boolean>() {
+    JavaDStream<String> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
+    JavaDStream<String> filtered = stream.filter(new Function<String, Boolean>() {
       @Override
       public Boolean call(String s) throws Exception {
         return s.contains("a");
@@ -227,8 +198,8 @@ public void testGlom() {
         Arrays.asList(Arrays.asList("giants", "dodgers")),
         Arrays.asList(Arrays.asList("yankees", "red socks")));
 
-    JavaDStream stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
-    JavaDStream glommed = stream.glom();
+    JavaDStream<String> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
+    JavaDStream<List<String>> glommed = stream.glom();
     JavaTestUtils.attachTestOutputStream(glommed);
     List<List<List<String>>> result = JavaTestUtils.runStreams(ssc, 2, 2);
 
@@ -245,8 +216,8 @@ public void testMapPartitions() {
         Arrays.asList("GIANTSDODGERS"),
         Arrays.asList("YANKEESRED SOCKS"));
 
-    JavaDStream stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
-    JavaDStream mapped = stream.mapPartitions(new FlatMapFunction<Iterator<String>, String>() {
+    JavaDStream<String> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
+    JavaDStream<String> mapped = stream.mapPartitions(new FlatMapFunction<Iterator<String>, String>() {
       @Override
       public Iterable<String> call(Iterator<String> in) {
         String out = "";
@@ -288,8 +259,8 @@ public void testReduce() {
         Arrays.asList(15),
         Arrays.asList(24));
 
-    JavaDStream stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
-    JavaDStream reduced = stream.reduce(new IntegerSum());
+    JavaDStream<Integer> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
+    JavaDStream<Integer> reduced = stream.reduce(new IntegerSum());
     JavaTestUtils.attachTestOutputStream(reduced);
     List<List<Integer>> result = JavaTestUtils.runStreams(ssc, 3, 3);
 
@@ -309,8 +280,8 @@ public void testReduceByWindow() {
         Arrays.asList(39),
         Arrays.asList(24));
 
-    JavaDStream stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
-    JavaDStream reducedWindowed = stream.reduceByWindow(new IntegerSum(),
+    JavaDStream<Integer> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
+    JavaDStream<Integer> reducedWindowed = stream.reduceByWindow(new IntegerSum(),
         new IntegerDifference(), new Duration(2000), new Duration(1000));
     JavaTestUtils.attachTestOutputStream(reducedWindowed);
     List<List<Integer>> result = JavaTestUtils.runStreams(ssc, 4, 4);
@@ -326,9 +297,9 @@ public void testQueueStream() {
         Arrays.asList(7,8,9));
 
     JavaSparkContext jsc = new JavaSparkContext(ssc.ssc().sc());
-    JavaRDD<Integer> rdd1 = ssc.sc().parallelize(Arrays.asList(1, 2, 3));
-    JavaRDD<Integer> rdd2 = ssc.sc().parallelize(Arrays.asList(4, 5, 6));
-    JavaRDD<Integer> rdd3 = ssc.sc().parallelize(Arrays.asList(7,8,9));
+    JavaRDD<Integer> rdd1 = ssc.sparkContext().parallelize(Arrays.asList(1, 2, 3));
+    JavaRDD<Integer> rdd2 = ssc.sparkContext().parallelize(Arrays.asList(4, 5, 6));
+    JavaRDD<Integer> rdd3 = ssc.sparkContext().parallelize(Arrays.asList(7,8,9));
 
     LinkedList<JavaRDD<Integer>> rdds = Lists.newLinkedList();
     rdds.add(rdd1);
@@ -471,13 +442,13 @@ public void testTransformWith() {
             new Tuple2<String, String>("new york", "islanders")));
 
 
-    List<List<Tuple2<String, Tuple2<String, String>>>> expected = Arrays.asList(
-        Arrays.asList(
+    List<HashSet<Tuple2<String, Tuple2<String, String>>>> expected = Arrays.asList(
+        Sets.newHashSet(
             new Tuple2<String, Tuple2<String, String>>("california",
                 new Tuple2<String, String>("dodgers", "giants")),
             new Tuple2<String, Tuple2<String, String>>("new york",
-                    new Tuple2<String, String>("yankees", "mets"))),
-        Arrays.asList(
+                new Tuple2<String, String>("yankees", "mets"))),
+        Sets.newHashSet(
             new Tuple2<String, Tuple2<String, String>>("california",
                 new Tuple2<String, String>("sharks", "ducks")),
             new Tuple2<String, Tuple2<String, String>>("new york",
@@ -512,8 +483,12 @@ public JavaPairRDD<String, Tuple2<String, String>> call(
 
     JavaTestUtils.attachTestOutputStream(joined);
     List<List<Tuple2<String, Tuple2<String, String>>>> result = JavaTestUtils.runStreams(ssc, 2, 2);
+    List<HashSet<Tuple2<String, Tuple2<String, String>>>> unorderedResult = Lists.newArrayList();
+    for (List<Tuple2<String, Tuple2<String, String>>> res: result) {
+        unorderedResult.add(Sets.newHashSet(res));
+    }
 
-    Assert.assertEquals(expected, result);
+    Assert.assertEquals(expected, unorderedResult);
   }
 
 
@@ -695,8 +670,8 @@ public void testFlatMap() {
         Arrays.asList("b", "o", "o", "d","o","d","g","e","r","s"),
         Arrays.asList("a","t","h","l","e","t","i","c","s"));
 
-    JavaDStream stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
-    JavaDStream flatMapped = stream.flatMap(new FlatMapFunction<String, String>() {
+    JavaDStream<String> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
+    JavaDStream<String> flatMapped = stream.flatMap(new FlatMapFunction<String, String>() {
       @Override
       public Iterable<String> call(String x) {
         return Lists.newArrayList(x.split("(?!^)"));
@@ -742,8 +717,8 @@ public void testPairFlatMap() {
             new Tuple2<Integer, String>(9, "c"),
             new Tuple2<Integer, String>(9, "s")));
 
-    JavaDStream stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
-    JavaPairDStream flatMapped = stream.flatMap(new PairFlatMapFunction<String, Integer, String>() {
+    JavaDStream<String> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
+    JavaPairDStream<Integer,String> flatMapped = stream.flatMap(new PairFlatMapFunction<String, Integer, String>() {
       @Override
       public Iterable<Tuple2<Integer, String>> call(String in) throws Exception {
         List<Tuple2<Integer, String>> out = Lists.newArrayList();
@@ -776,10 +751,10 @@ public void testUnion() {
         Arrays.asList(2,2,5,5),
         Arrays.asList(3,3,6,6));
 
-    JavaDStream stream1 = JavaTestUtils.attachTestInputStream(ssc, inputData1, 2);
-    JavaDStream stream2 = JavaTestUtils.attachTestInputStream(ssc, inputData2, 2);
+    JavaDStream<Integer> stream1 = JavaTestUtils.attachTestInputStream(ssc, inputData1, 2);
+    JavaDStream<Integer> stream2 = JavaTestUtils.attachTestInputStream(ssc, inputData2, 2);
 
-    JavaDStream unioned = stream1.union(stream2);
+    JavaDStream<Integer> unioned = stream1.union(stream2);
     JavaTestUtils.attachTestOutputStream(unioned);
     List<List<Integer>> result = JavaTestUtils.runStreams(ssc, 3, 3);
 
@@ -790,7 +765,7 @@ public void testUnion() {
    * Performs an order-invariant comparison of lists representing two RDD streams. This allows
    * us to account for ordering variation within individual RDD's which occurs during windowing.
    */
-  public static <T extends Comparable> void assertOrderInvariantEquals(
+  public static <T extends Comparable<T>> void assertOrderInvariantEquals(
       List<List<T>> expected, List<List<T>> actual) {
     for (List<T> list: expected) {
       Collections.sort(list);
@@ -813,11 +788,11 @@ public void testPairFilter() {
         Arrays.asList(new Tuple2<String, Integer>("giants", 6)),
         Arrays.asList(new Tuple2<String, Integer>("yankees", 7)));
 
-    JavaDStream stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
+    JavaDStream<String> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1);
     JavaPairDStream<String, Integer> pairStream = stream.map(
         new PairFunction<String, String, Integer>() {
           @Override
-          public Tuple2 call(String in) throws Exception {
+          public Tuple2<String, Integer> call(String in) throws Exception {
             return new Tuple2<String, Integer>(in, in.length());
           }
         });
@@ -1226,15 +1201,15 @@ public void testCountByValueAndWindow() {
         Arrays.asList("hello", "moon"),
         Arrays.asList("hello"));
 
-    List<List<Tuple2<String, Long>>> expected = Arrays.asList(
-        Arrays.asList(
+    List<HashSet<Tuple2<String, Long>>> expected = Arrays.asList(
+        Sets.newHashSet(
             new Tuple2<String, Long>("hello", 1L),
             new Tuple2<String, Long>("world", 1L)),
-        Arrays.asList(
+        Sets.newHashSet(
             new Tuple2<String, Long>("hello", 2L),
             new Tuple2<String, Long>("world", 1L),
             new Tuple2<String, Long>("moon", 1L)),
-        Arrays.asList(
+        Sets.newHashSet(
             new Tuple2<String, Long>("hello", 2L),
             new Tuple2<String, Long>("moon", 1L)));
 
@@ -1244,8 +1219,12 @@ public void testCountByValueAndWindow() {
       stream.countByValueAndWindow(new Duration(2000), new Duration(1000));
     JavaTestUtils.attachTestOutputStream(counted);
     List<List<Tuple2<String, Long>>> result = JavaTestUtils.runStreams(ssc, 3, 3);
+    List<HashSet<Tuple2<String, Long>>> unorderedResult = Lists.newArrayList();
+    for (List<Tuple2<String, Long>> res: result) {
+      unorderedResult.add(Sets.newHashSet(res));
+    }
 
-    Assert.assertEquals(expected, result);
+    Assert.assertEquals(expected, unorderedResult);
   }
 
   @Test
@@ -1540,8 +1519,8 @@ public void testCheckpointMasterRecovery() throws InterruptedException {
     File tempDir = Files.createTempDir();
     ssc.checkpoint(tempDir.getAbsolutePath());
 
-    JavaDStream stream = JavaCheckpointTestUtils.attachTestInputStream(ssc, inputData, 1);
-    JavaDStream letterCount = stream.map(new Function<String, Integer>() {
+    JavaDStream<String> stream = JavaCheckpointTestUtils.attachTestInputStream(ssc, inputData, 1);
+    JavaDStream<Integer> letterCount = stream.map(new Function<String, Integer>() {
       @Override
       public Integer call(String s) throws Exception {
         return s.length();
@@ -1594,23 +1573,9 @@ public Integer call(String s) throws Exception {
   // Input stream tests. These mostly just test that we can instantiate a given InputStream with
   // Java arguments and assign it to a JavaDStream without producing type errors. Testing of the
   // InputStream functionality is deferred to the existing Scala tests.
-  @Test
-  public void testKafkaStream() {
-    HashMap<String, Integer> topics = Maps.newHashMap();
-    JavaDStream test1 = ssc.kafkaStream("localhost:12345", "group", topics);
-    JavaDStream test2 = ssc.kafkaStream("localhost:12345", "group", topics,
-      StorageLevel.MEMORY_AND_DISK());
-
-    HashMap<String, String> kafkaParams = Maps.newHashMap();
-    kafkaParams.put("zk.connect","localhost:12345");
-    kafkaParams.put("groupid","consumer-group");
-    JavaDStream test3 = ssc.kafkaStream(String.class, StringDecoder.class, kafkaParams, topics,
-      StorageLevel.MEMORY_AND_DISK());
-  }
-
   @Test
   public void testSocketTextStream() {
-    JavaDStream test = ssc.socketTextStream("localhost", 12345);
+    JavaDStream<String> test = ssc.socketTextStream("localhost", 12345);
   }
 
   @Test
@@ -1630,7 +1595,7 @@ public Iterable<String> call(InputStream in) {
       }
     }
 
-    JavaDStream test = ssc.socketStream(
+    JavaDStream<String> test = ssc.socketStream(
       "localhost",
       12345,
       new Converter(),
@@ -1639,43 +1604,11 @@ public Iterable<String> call(InputStream in) {
 
   @Test
   public void testTextFileStream() {
-    JavaDStream test = ssc.textFileStream("/tmp/foo");
+    JavaDStream<String> test = ssc.textFileStream("/tmp/foo");
   }
 
   @Test
   public void testRawSocketStream() {
-    JavaDStream test = ssc.rawSocketStream("localhost", 12345);
-  }
-
-  @Test
-  public void testFlumeStream() {
-    JavaDStream test = ssc.flumeStream("localhost", 12345, StorageLevel.MEMORY_ONLY());
-  }
-
-  @Test
-  public void testFileStream() {
-    JavaPairDStream<String, String> foo =
-      ssc.<String, String, SequenceFileInputFormat>fileStream("/tmp/foo");
-  }
-
-  @Test
-  public void testTwitterStream() {
-    String[] filters = new String[] { "good", "bad", "ugly" };
-    JavaDStream test = ssc.twitterStream(filters, StorageLevel.MEMORY_ONLY());
-  }
-
-  @Test
-  public void testActorStream() {
-    JavaDStream test = ssc.actorStream((Props)null, "TestActor", StorageLevel.MEMORY_ONLY());
-  }
-
-  @Test
-  public void testZeroMQStream() {
-    JavaDStream test = ssc.zeroMQStream("url", (Subscribe) null, new Function<byte[][], Iterable<String>>() {
-      @Override
-      public Iterable<String> call(byte[][] b) throws Exception {
-        return null;
-      }
-    });
+    JavaDStream<String> test = ssc.rawSocketStream("localhost", 12345);
   }
 }
diff --git a/streaming/src/test/java/org/apache/spark/streaming/JavaTestUtils.scala b/streaming/src/test/java/org/apache/spark/streaming/JavaTestUtils.scala
index 5e384eeee45f3..33f6df8f88177 100644
--- a/streaming/src/test/java/org/apache/spark/streaming/JavaTestUtils.scala
+++ b/streaming/src/test/java/org/apache/spark/streaming/JavaTestUtils.scala
@@ -17,7 +17,9 @@
 
 package org.apache.spark.streaming
 
-import collection.mutable.{SynchronizedBuffer, ArrayBuffer}
+import scala.collection.mutable.{SynchronizedBuffer, ArrayBuffer}
+import scala.reflect.ClassTag
+
 import java.util.{List => JList}
 import org.apache.spark.streaming.api.java.{JavaPairDStream, JavaDStreamLike, JavaDStream, JavaStreamingContext}
 import org.apache.spark.streaming._
@@ -31,17 +33,16 @@ trait JavaTestBase extends TestSuiteBase {
   /**
    * Create a [[org.apache.spark.streaming.TestInputStream]] and attach it to the supplied context.
    * The stream will be derived from the supplied lists of Java objects.
-   **/
+   */
   def attachTestInputStream[T](
       ssc: JavaStreamingContext,
       data: JList[JList[T]],
       numPartitions: Int) = {
     val seqData = data.map(Seq(_:_*))
 
-    implicit val cm: ClassManifest[T] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[T]]
+    implicit val cm: ClassTag[T] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[T]]
     val dstream = new TestInputStream[T](ssc.ssc, seqData, numPartitions)
-    ssc.ssc.registerInputStream(dstream)
     new JavaDStream[T](dstream)
   }
 
@@ -52,10 +53,10 @@ trait JavaTestBase extends TestSuiteBase {
   def attachTestOutputStream[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T, R]](
       dstream: JavaDStreamLike[T, This, R]) =
   {
-    implicit val cm: ClassManifest[T] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[T]]
+    implicit val cm: ClassTag[T] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[T]]
     val ostream = new TestOutputStreamWithPartitions(dstream.dstream)
-    dstream.dstream.ssc.registerOutputStream(ostream)
+    ostream.register()
   }
 
   /**
@@ -67,8 +68,8 @@ trait JavaTestBase extends TestSuiteBase {
    */
   def runStreams[V](
       ssc: JavaStreamingContext, numBatches: Int, numExpectedOutput: Int): JList[JList[V]] = {
-    implicit val cm: ClassManifest[V] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[V]]
+    implicit val cm: ClassTag[V] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[V]]
     val res = runStreams[V](ssc.ssc, numBatches, numExpectedOutput)
     val out = new ArrayList[JList[V]]()
     res.map(entry => out.append(new ArrayList[V](entry)))
@@ -85,8 +86,8 @@ trait JavaTestBase extends TestSuiteBase {
    */
   def runStreamsWithPartitions[V](ssc: JavaStreamingContext, numBatches: Int,
       numExpectedOutput: Int): JList[JList[JList[V]]] = {
-    implicit val cm: ClassManifest[V] =
-      implicitly[ClassManifest[AnyRef]].asInstanceOf[ClassManifest[V]]
+    implicit val cm: ClassTag[V] =
+      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[V]]
     val res = runStreamsWithPartitions[V](ssc.ssc, numBatches, numExpectedOutput)
     val out = new ArrayList[JList[JList[V]]]()
     res.map{entry =>
diff --git a/streaming/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java b/streaming/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java
new file mode 100644
index 0000000000000..849bbf1299182
--- /dev/null
+++ b/streaming/src/test/java/org/apache/spark/streaming/LocalJavaStreamingContext.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming;
+
+import org.apache.spark.streaming.api.java.JavaStreamingContext;
+import org.junit.After;
+import org.junit.Before;
+
+public abstract class LocalJavaStreamingContext {
+
+    protected transient JavaStreamingContext ssc;
+
+    @Before
+    public void setUp() {
+        System.clearProperty("spark.driver.port");
+        System.setProperty("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock");
+        ssc = new JavaStreamingContext("local[2]", "test", new Duration(1000));
+        ssc.checkpoint("checkpoint");
+    }
+
+    @After
+    public void tearDown() {
+        ssc.stop();
+        ssc = null;
+
+        // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
+        System.clearProperty("spark.driver.port");
+    }
+}
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala
index 259ef1608cbc5..bcb0c28bf07a0 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala
@@ -23,21 +23,12 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.SparkContext._
 
 import util.ManualClock
+import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.streaming.dstream.{WindowedDStream, DStream}
+import scala.collection.mutable.{SynchronizedBuffer, ArrayBuffer}
+import scala.reflect.ClassTag
 
 class BasicOperationsSuite extends TestSuiteBase {
-
-  override def framework() = "BasicOperationsSuite"
-
-  before {
-    System.setProperty("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock")
-  }
-
-  after {
-    // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-    System.clearProperty("spark.driver.port")
-    System.clearProperty("spark.hostPort")
-  }
-
   test("map") {
     val input = Seq(1 to 4, 5 to 8, 9 to 12)
     testOperation(
@@ -387,11 +378,10 @@ class BasicOperationsSuite extends TestSuiteBase {
   }
 
   test("slice") {
-    val ssc = new StreamingContext("local[2]", "BasicOperationSuite", Seconds(1))
+    val ssc = new StreamingContext(conf, Seconds(1))
     val input = Seq(Seq(1), Seq(2), Seq(3), Seq(4))
     val stream = new TestInputStream[Int](ssc, input, 2)
-    ssc.registerInputStream(stream)
-    stream.foreach(_ => {})  // Dummy output stream
+    stream.foreachRDD(_ => {})  // Dummy output stream
     ssc.start()
     Thread.sleep(2000)
     def getInputFromSlice(fromMillis: Long, toMillis: Long) = {
@@ -406,40 +396,31 @@ class BasicOperationsSuite extends TestSuiteBase {
     Thread.sleep(1000)
   }
 
-  test("forgetting of RDDs - map and window operations") {
-    assert(batchDuration === Seconds(1), "Batch duration has changed from 1 second")
+  val cleanupTestInput = (0 until 10).map(x => Seq(x, x + 1)).toSeq
 
-    val input = (0 until 10).map(x => Seq(x, x + 1)).toSeq
+  test("rdd cleanup - map and window") {
     val rememberDuration = Seconds(3)
-
-    assert(input.size === 10, "Number of inputs have changed")
-
     def operation(s: DStream[Int]): DStream[(Int, Int)] = {
       s.map(x => (x % 10, 1))
        .window(Seconds(2), Seconds(1))
        .window(Seconds(4), Seconds(2))
     }
 
-    val ssc = setupStreams(input, operation _)
-    ssc.remember(rememberDuration)
-    runStreams[(Int, Int)](ssc, input.size, input.size / 2)
-
-    val windowedStream2 = ssc.graph.getOutputStreams().head.dependencies.head
-    val windowedStream1 = windowedStream2.dependencies.head
+    val operatedStream = runCleanupTest(conf, operation _,
+      numExpectedOutput = cleanupTestInput.size / 2, rememberDuration = Seconds(3))
+    val windowedStream2 = operatedStream.asInstanceOf[WindowedDStream[_]]
+    val windowedStream1 = windowedStream2.dependencies.head.asInstanceOf[WindowedDStream[_]]
     val mappedStream = windowedStream1.dependencies.head
 
-    val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
-    assert(clock.time === Seconds(10).milliseconds)
-
-    // IDEALLY
-    // WindowedStream2 should remember till 7 seconds: 10, 8,
-    // WindowedStream1 should remember till 4 seconds: 10, 9, 8, 7, 6, 5
-    // MappedStream should remember till 7 seconds:    10, 9, 8, 7, 6, 5, 4, 3,
+    // Checkpoint remember durations
+    assert(windowedStream2.rememberDuration === rememberDuration)
+    assert(windowedStream1.rememberDuration === rememberDuration + windowedStream2.windowDuration)
+    assert(mappedStream.rememberDuration ===
+      rememberDuration + windowedStream2.windowDuration + windowedStream1.windowDuration)
 
-    // IN THIS TEST
-    // WindowedStream2 should remember till 7 seconds: 10, 8,
+    // WindowedStream2 should remember till 7 seconds: 10, 9, 8, 7
     // WindowedStream1 should remember till 4 seconds: 10, 9, 8, 7, 6, 5, 4
-    // MappedStream should remember till 7 seconds:    10, 9, 8, 7, 6, 5, 4, 3, 2
+    // MappedStream should remember till 2 seconds:    10, 9, 8, 7, 6, 5, 4, 3, 2
 
     // WindowedStream2
     assert(windowedStream2.generatedRDDs.contains(Time(10000)))
@@ -456,4 +437,37 @@ class BasicOperationsSuite extends TestSuiteBase {
     assert(mappedStream.generatedRDDs.contains(Time(2000)))
     assert(!mappedStream.generatedRDDs.contains(Time(1000)))
   }
+
+  test("rdd cleanup - updateStateByKey") {
+    val updateFunc = (values: Seq[Int], state: Option[Int]) => {
+      Some(values.foldLeft(0)(_ + _) + state.getOrElse(0))
+    }
+    val stateStream = runCleanupTest(
+      conf, _.map(_ -> 1).updateStateByKey(updateFunc).checkpoint(Seconds(3)))
+
+    assert(stateStream.rememberDuration === stateStream.checkpointDuration * 2)
+    assert(stateStream.generatedRDDs.contains(Time(10000)))
+    assert(!stateStream.generatedRDDs.contains(Time(4000)))
+  }
+
+  /** Test cleanup of RDDs in DStream metadata */
+  def runCleanupTest[T: ClassTag](
+      conf2: SparkConf,
+      operation: DStream[Int] => DStream[T],
+      numExpectedOutput: Int = cleanupTestInput.size,
+      rememberDuration: Duration = null
+    ): DStream[T] = {
+
+    // Setup the stream computation
+    assert(batchDuration === Seconds(1),
+      "Batch duration has changed from 1 second, check cleanup tests")
+    val ssc = setupStreams(cleanupTestInput, operation)
+    val operatedStream = ssc.graph.getOutputStreams().head.dependencies.head.asInstanceOf[DStream[T]]
+    if (rememberDuration != null) ssc.remember(rememberDuration)
+    val output = runStreams[(Int, Int)](ssc, cleanupTestInput.size, numExpectedOutput)
+    val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
+    assert(clock.time === Seconds(10).milliseconds)
+    assert(output.size === numExpectedOutput)
+    operatedStream
+  }
 }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
index beb20831bd7b4..831e7c1471a09 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
@@ -17,56 +17,52 @@
 
 package org.apache.spark.streaming
 
-import dstream.FileInputDStream
-import org.apache.spark.streaming.StreamingContext._
 import java.io.File
-import runtime.RichInt
-import org.scalatest.BeforeAndAfter
+
+import scala.collection.mutable.ArrayBuffer
+import scala.reflect.ClassTag
 import org.apache.commons.io.FileUtils
-import collection.mutable.{SynchronizedBuffer, ArrayBuffer}
-import util.{Clock, ManualClock}
-import scala.util.Random
 import com.google.common.io.Files
-
+import org.apache.hadoop.fs.{Path, FileSystem}
+import org.apache.hadoop.conf.Configuration
+import org.apache.spark.streaming.StreamingContext._
+import org.apache.spark.streaming.dstream.{DStream, FileInputDStream}
+import org.apache.spark.streaming.util.ManualClock
+import org.apache.spark.util.Utils
+import org.apache.spark.SparkConf
 
 /**
  * This test suites tests the checkpointing functionality of DStreams -
  * the checkpointing of a DStream's RDDs as well as the checkpointing of
  * the whole DStream graph.
  */
-class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
+class CheckpointSuite extends TestSuiteBase {
+
+  var ssc: StreamingContext = null
+
+  override def batchDuration = Milliseconds(500)
 
-  System.setProperty("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock")
+  override def actuallyWait = true // to allow checkpoints to be written
 
-  before {
+  override def beforeFunction() {
+    super.beforeFunction()
     FileUtils.deleteDirectory(new File(checkpointDir))
   }
 
-  after {
+  override def afterFunction() {
+    super.afterFunction()
     if (ssc != null) ssc.stop()
     FileUtils.deleteDirectory(new File(checkpointDir))
-
-    // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-    System.clearProperty("spark.driver.port")
-    System.clearProperty("spark.hostPort")
   }
 
-  var ssc: StreamingContext = null
-
-  override def framework = "CheckpointSuite"
-
-  override def batchDuration = Milliseconds(500)
-
-  override def actuallyWait = true
-
   test("basic rdd checkpoints + dstream graph checkpoint recovery") {
 
     assert(batchDuration === Milliseconds(500), "batchDuration for this test must be 1 second")
 
-    System.setProperty("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock")
+    conf.set("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock")
 
     val stateStreamCheckpointInterval = Seconds(1)
-
+    val fs = FileSystem.getLocal(new Configuration())
     // this ensure checkpointing occurs at least once
     val firstNumBatches = (stateStreamCheckpointInterval / batchDuration).toLong * 2
     val secondNumBatches = firstNumBatches
@@ -74,13 +70,13 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
     // Setup the streams
     val input = (1 to 10).map(_ => Seq("a")).toSeq
     val operation = (st: DStream[String]) => {
-      val updateFunc = (values: Seq[Int], state: Option[RichInt]) => {
-        Some(new RichInt(values.foldLeft(0)(_ + _) + state.map(_.self).getOrElse(0)))
+      val updateFunc = (values: Seq[Int], state: Option[Int]) => {
+        Some((values.foldLeft(0)(_ + _) + state.getOrElse(0)))
       }
       st.map(x => (x, 1))
-      .updateStateByKey[RichInt](updateFunc)
+      .updateStateByKey(updateFunc)
       .checkpoint(stateStreamCheckpointInterval)
-      .map(t => (t._1, t._2.self))
+      .map(t => (t._1, t._2))
     }
     var ssc = setupStreams(input, operation)
     var stateStream = ssc.graph.getOutputStreams().head.dependencies.head.dependencies.head
@@ -90,19 +86,21 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
     ssc.start()
     advanceTimeWithRealDelay(ssc, firstNumBatches)
     logInfo("Checkpoint data of state stream = \n" + stateStream.checkpointData)
-    assert(!stateStream.checkpointData.checkpointFiles.isEmpty, "No checkpointed RDDs in state stream before first failure")
-    stateStream.checkpointData.checkpointFiles.foreach {
-      case (time, data) => {
-        val file = new File(data.toString)
-        assert(file.exists(), "Checkpoint file '" + file +"' for time " + time + " for state stream before first failure does not exist")
+    assert(!stateStream.checkpointData.currentCheckpointFiles.isEmpty,
+      "No checkpointed RDDs in state stream before first failure")
+    stateStream.checkpointData.currentCheckpointFiles.foreach {
+      case (time, file) => {
+        assert(fs.exists(new Path(file)), "Checkpoint file '" + file +"' for time " + time +
+            " for state stream before first failure does not exist")
       }
     }
 
     // Run till a further time such that previous checkpoint files in the stream would be deleted
     // and check whether the earlier checkpoint files are deleted
-    val checkpointFiles = stateStream.checkpointData.checkpointFiles.map(x => new File(x._2))
+    val checkpointFiles = stateStream.checkpointData.currentCheckpointFiles.map(x => new File(x._2))
     advanceTimeWithRealDelay(ssc, secondNumBatches)
-    checkpointFiles.foreach(file => assert(!file.exists, "Checkpoint file '" + file + "' was not deleted"))
+    checkpointFiles.foreach(file =>
+      assert(!file.exists, "Checkpoint file '" + file + "' was not deleted"))
     ssc.stop()
 
     // Restart stream computation using the checkpoint file and check whether
@@ -110,19 +108,20 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
     ssc = new StreamingContext(checkpointDir)
     stateStream = ssc.graph.getOutputStreams().head.dependencies.head.dependencies.head
     logInfo("Restored data of state stream = \n[" + stateStream.generatedRDDs.mkString("\n") + "]")
-    assert(!stateStream.generatedRDDs.isEmpty, "No restored RDDs in state stream after recovery from first failure")
+    assert(!stateStream.generatedRDDs.isEmpty,
+      "No restored RDDs in state stream after recovery from first failure")
 
 
     // Run one batch to generate a new checkpoint file and check whether some RDD
     // is present in the checkpoint data or not
     ssc.start()
     advanceTimeWithRealDelay(ssc, 1)
-    assert(!stateStream.checkpointData.checkpointFiles.isEmpty, "No checkpointed RDDs in state stream before second failure")
-    stateStream.checkpointData.checkpointFiles.foreach {
-      case (time, data) => {
-        val file = new File(data.toString)
-        assert(file.exists(),
-          "Checkpoint file '" + file +"' for time " + time + " for state stream before seconds failure does not exist")
+    assert(!stateStream.checkpointData.currentCheckpointFiles.isEmpty,
+      "No checkpointed RDDs in state stream before second failure")
+    stateStream.checkpointData.currentCheckpointFiles.foreach {
+      case (time, file) => {
+        assert(fs.exists(new Path(file)), "Checkpoint file '" + file +"' for time " + time +
+          " for state stream before seconds failure does not exist")
       }
     }
     ssc.stop()
@@ -132,10 +131,12 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
     ssc = new StreamingContext(checkpointDir)
     stateStream = ssc.graph.getOutputStreams().head.dependencies.head.dependencies.head
     logInfo("Restored data of state stream = \n[" + stateStream.generatedRDDs.mkString("\n") + "]")
-    assert(!stateStream.generatedRDDs.isEmpty, "No restored RDDs in state stream after recovery from second failure")
+    assert(!stateStream.generatedRDDs.isEmpty,
+      "No restored RDDs in state stream after recovery from second failure")
 
-    // Adjust manual clock time as if it is being restarted after a delay
-    System.setProperty("spark.streaming.manualClock.jump", (batchDuration.milliseconds * 7).toString)
+    // Adjust manual clock time as if it is being restarted after a delay; this is a hack because
+    // we modify the conf object, but it works for this one property
+    ssc.conf.set("spark.streaming.manualClock.jump", (batchDuration.milliseconds * 7).toString)
     ssc.start()
     advanceTimeWithRealDelay(ssc, 4)
     ssc.stop()
@@ -143,6 +144,39 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
     ssc = null
   }
 
+  // This tests whether spark conf persists through checkpoints, and certain
+  // configs gets scrubbed
+  test("persistence of conf through checkpoints") {
+    val key = "spark.mykey"
+    val value = "myvalue"
+    System.setProperty(key, value)
+    ssc = new StreamingContext(master, framework, batchDuration)
+    val originalConf = ssc.conf
+
+    val cp = new Checkpoint(ssc, Time(1000))
+    val cpConf = cp.sparkConf
+    assert(cpConf.get("spark.master") === originalConf.get("spark.master"))
+    assert(cpConf.get("spark.app.name") === originalConf.get("spark.app.name"))
+    assert(cpConf.get(key) === value)
+    ssc.stop()
+
+    // Serialize/deserialize to simulate write to storage and reading it back
+    val newCp = Utils.deserialize[Checkpoint](Utils.serialize(cp))
+
+    val newCpConf = newCp.sparkConf
+    assert(newCpConf.get("spark.master") === originalConf.get("spark.master"))
+    assert(newCpConf.get("spark.app.name") === originalConf.get("spark.app.name"))
+    assert(newCpConf.get(key) === value)
+    assert(!newCpConf.contains("spark.driver.host"))
+    assert(!newCpConf.contains("spark.driver.port"))
+
+    // Check if all the parameters have been restored
+    ssc = new StreamingContext(null, newCp, null)
+    val restoredConf = ssc.conf
+    assert(restoredConf.get(key) === value)
+  }
+
+
   // This tests whether the systm can recover from a master failure with simple
   // non-stateful operations. This assumes as reliable, replayable input
   // source - TestInputDStream.
@@ -180,26 +214,23 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
     val input = (1 to 10).map(_ => Seq("a")).toSeq
     val output = (1 to 10).map(x => Seq(("a", x))).toSeq
     val operation = (st: DStream[String]) => {
-      val updateFunc = (values: Seq[Int], state: Option[RichInt]) => {
-        Some(new RichInt(values.foldLeft(0)(_ + _) + state.map(_.self).getOrElse(0)))
+      val updateFunc = (values: Seq[Int], state: Option[Int]) => {
+        Some((values.foldLeft(0)(_ + _) + state.getOrElse(0)))
       }
       st.map(x => (x, 1))
-        .updateStateByKey[RichInt](updateFunc)
+        .updateStateByKey(updateFunc)
         .checkpoint(batchDuration * 2)
-        .map(t => (t._1, t._2.self))
+        .map(t => (t._1, t._2))
     }
     testCheckpointedOperation(input, operation, output, 7)
   }
 
+
   // This tests whether file input stream remembers what files were seen before
   // the master failure and uses them again to process a large window operation.
   // It also tests whether batches, whose processing was incomplete due to the
   // failure, are re-processed or not.
   test("recovery with file input stream") {
-    // Disable manual clock as FileInputDStream does not work with manual clock
-    val clockProperty = System.getProperty("spark.streaming.clock")
-    System.clearProperty("spark.streaming.clock")
-
     // Set up the streaming context and input streams
     val testDir = Files.createTempDir()
     var ssc = new StreamingContext(master, framework, Seconds(1))
@@ -218,7 +249,7 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
     val reducedStream = mappedStream.reduceByWindow(_ + _, Seconds(30), Seconds(1))
     val outputBuffer = new ArrayBuffer[Seq[Int]]
     var outputStream = new TestOutputStream(reducedStream, outputBuffer)
-    ssc.registerOutputStream(outputStream)
+    outputStream.register()
     ssc.start()
 
     // Create files and advance manual clock to process them
@@ -296,10 +327,6 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
     )
     // To ensure that all the inputs were received correctly
     assert(expectedOutput.last === output.last)
-
-    // Enable manual clock back again for other tests
-    if (clockProperty != null)
-      System.setProperty("spark.streaming.clock", clockProperty)
   }
 
 
@@ -312,7 +339,7 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
    * NOTE: This takes into consideration that the last batch processed before
    * master failure will be re-processed after restart/recovery.
    */
-  def testCheckpointedOperation[U: ClassManifest, V: ClassManifest](
+  def testCheckpointedOperation[U: ClassTag, V: ClassTag](
     input: Seq[Seq[U]],
     operation: DStream[U] => DStream[V],
     expectedOutput: Seq[Seq[V]],
@@ -343,7 +370,6 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
     )
     ssc = new StreamingContext(checkpointDir)
     System.clearProperty("spark.driver.port")
-    System.clearProperty("spark.hostPort")
     ssc.start()
     val outputNew = advanceTimeWithRealDelay[V](ssc, nextNumBatches)
     // the first element will be re-processed data of the last batch before restart
@@ -356,7 +382,7 @@ class CheckpointSuite extends TestSuiteBase with BeforeAndAfter {
    * Advances the manual clock on the streaming scheduler by given number of batches.
    * It also waits for the expected amount of time for each batch.
    */
-  def advanceTimeWithRealDelay[V: ClassManifest](ssc: StreamingContext, numBatches: Long): Seq[Seq[V]] = {
+  def advanceTimeWithRealDelay[V: ClassTag](ssc: StreamingContext, numBatches: Long): Seq[Seq[V]] = {
     val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
     logInfo("Manual clock before advancing = " + clock.time)
     for (i <- 1 to numBatches.toInt) {
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/FailureSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/FailureSuite.scala
index 6337c5359c3dc..da9b04de1ac44 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/FailureSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/FailureSuite.scala
@@ -32,17 +32,22 @@ import collection.mutable.ArrayBuffer
  * This testsuite tests master failures at random times while the stream is running using
  * the real clock.
  */
-class FailureSuite extends FunSuite with BeforeAndAfter with Logging {
+class FailureSuite extends TestSuiteBase with Logging {
 
   var directory = "FailureSuite"
   val numBatches = 30
-  val batchDuration = Milliseconds(1000)
 
-  before {
+  override def batchDuration = Milliseconds(1000)
+
+  override def useManualClock = false
+
+  override def beforeFunction() {
+    super.beforeFunction()
     FileUtils.deleteDirectory(new File(directory))
   }
 
-  after {
+  override def afterFunction() {
+    super.afterFunction()
     FileUtils.deleteDirectory(new File(directory))
   }
 
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
index f398263a10da5..95bf40ba75956 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
@@ -23,7 +23,7 @@ import akka.actor.IOManager
 import akka.actor.Props
 import akka.util.ByteString
 
-import org.apache.spark.streaming.dstream.{NetworkReceiver, SparkFlumeEvent}
+import org.apache.spark.streaming.dstream.{NetworkReceiver}
 import java.net.{InetSocketAddress, SocketException, Socket, ServerSocket}
 import java.io.{File, BufferedWriter, OutputStreamWriter}
 import java.util.concurrent.{Executors, TimeUnit, ArrayBlockingQueue}
@@ -31,18 +31,11 @@ import collection.mutable.{SynchronizedBuffer, ArrayBuffer}
 import util.ManualClock
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.receivers.Receiver
-import org.apache.spark.{SparkContext, Logging}
+import org.apache.spark.Logging
 import scala.util.Random
 import org.apache.commons.io.FileUtils
 import org.scalatest.BeforeAndAfter
-import org.apache.flume.source.avro.AvroSourceProtocol
-import org.apache.flume.source.avro.AvroFlumeEvent
-import org.apache.flume.source.avro.Status
-import org.apache.avro.ipc.{specific, NettyTransceiver}
-import org.apache.avro.ipc.specific.SpecificRequestor
-import java.nio.ByteBuffer
 import collection.JavaConversions._
-import java.nio.charset.Charset
 import com.google.common.io.Files
 import java.util.concurrent.atomic.AtomicInteger
 
@@ -50,30 +43,18 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
 
   val testPort = 9999
 
-  override def checkpointDir = "checkpoint"
-
-  before {
-    System.setProperty("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock")
-  }
-
-  after {
-    // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-    System.clearProperty("spark.driver.port")
-    System.clearProperty("spark.hostPort")
-  }
-
   test("socket input stream") {
     // Start the server
     val testServer = new TestServer()
     testServer.start()
 
     // Set up the streaming context and input streams
-    val ssc = new StreamingContext(master, framework, batchDuration)
+    val ssc = new StreamingContext(conf, batchDuration)
     val networkStream = ssc.socketTextStream("localhost", testServer.port, StorageLevel.MEMORY_AND_DISK)
-    val outputBuffer = new ArrayBuffer[Seq[String]] with SynchronizedBuffer[Seq[String  ]]
+    val outputBuffer = new ArrayBuffer[Seq[String]] with SynchronizedBuffer[Seq[String]]
     val outputStream = new TestOutputStream(networkStream, outputBuffer)
     def output = outputBuffer.flatMap(x => x)
-    ssc.registerOutputStream(outputStream)
+    outputStream.register()
     ssc.start()
 
     // Feed data to the server to send to the network receiver
@@ -111,67 +92,18 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
   }
 
 
-  test("flume input stream") {
-    // Set up the streaming context and input streams
-    val ssc = new StreamingContext(master, framework, batchDuration)
-    val flumeStream = ssc.flumeStream("localhost", testPort, StorageLevel.MEMORY_AND_DISK)
-    val outputBuffer = new ArrayBuffer[Seq[SparkFlumeEvent]]
-      with SynchronizedBuffer[Seq[SparkFlumeEvent]]
-    val outputStream = new TestOutputStream(flumeStream, outputBuffer)
-    ssc.registerOutputStream(outputStream)
-    ssc.start()
-
-    val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
-    val input = Seq(1, 2, 3, 4, 5)
-    Thread.sleep(1000)
-    val transceiver = new NettyTransceiver(new InetSocketAddress("localhost", testPort))
-    val client = SpecificRequestor.getClient(
-      classOf[AvroSourceProtocol], transceiver)
-
-    for (i <- 0 until input.size) {
-      val event = new AvroFlumeEvent
-      event.setBody(ByteBuffer.wrap(input(i).toString.getBytes()))
-      event.setHeaders(Map[CharSequence, CharSequence]("test" -> "header"))
-      client.append(event)
-      Thread.sleep(500)
-      clock.addToTime(batchDuration.milliseconds)
-    }
-
-    val startTime = System.currentTimeMillis()
-    while (outputBuffer.size < input.size && System.currentTimeMillis() - startTime < maxWaitTimeMillis) {
-      logInfo("output.size = " + outputBuffer.size + ", input.size = " + input.size)
-      Thread.sleep(100)
-    }
-    Thread.sleep(1000)
-    val timeTaken = System.currentTimeMillis() - startTime
-    assert(timeTaken < maxWaitTimeMillis, "Operation timed out after " + timeTaken + " ms")
-    logInfo("Stopping context")
-    ssc.stop()
-
-    val decoder = Charset.forName("UTF-8").newDecoder()
-
-    assert(outputBuffer.size === input.length)
-    for (i <- 0 until outputBuffer.size) {
-      assert(outputBuffer(i).size === 1)
-      val str = decoder.decode(outputBuffer(i).head.event.getBody)
-      assert(str.toString === input(i).toString)
-      assert(outputBuffer(i).head.event.getHeaders.get("test") === "header")
-    }
-  }
-
-
   test("file input stream") {
     // Disable manual clock as FileInputDStream does not work with manual clock
-    System.clearProperty("spark.streaming.clock")
+    conf.set("spark.streaming.clock", "org.apache.spark.streaming.util.SystemClock")
 
     // Set up the streaming context and input streams
     val testDir = Files.createTempDir()
-    val ssc = new StreamingContext(master, framework, batchDuration)
+    val ssc = new StreamingContext(conf, batchDuration)
     val fileStream = ssc.textFileStream(testDir.toString)
     val outputBuffer = new ArrayBuffer[Seq[String]] with SynchronizedBuffer[Seq[String]]
     def output = outputBuffer.flatMap(x => x)
     val outputStream = new TestOutputStream(fileStream, outputBuffer)
-    ssc.registerOutputStream(outputStream)
+    outputStream.register()
     ssc.start()
 
     // Create files in the temporary directory so that Spark Streaming can read data from it
@@ -207,7 +139,7 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
     FileUtils.deleteDirectory(testDir)
 
     // Enable manual clock back again for other tests
-    System.setProperty("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock")
+    conf.set("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock")
   }
 
 
@@ -218,13 +150,13 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
     testServer.start()
 
     // Set up the streaming context and input streams
-    val ssc = new StreamingContext(master, framework, batchDuration)
+    val ssc = new StreamingContext(conf, batchDuration)
     val networkStream = ssc.actorStream[String](Props(new TestActor(port)), "TestActor",
       StorageLevel.MEMORY_AND_DISK) //Had to pass the local value of port to prevent from closing over entire scope
     val outputBuffer = new ArrayBuffer[Seq[String]] with SynchronizedBuffer[Seq[String]]
     val outputStream = new TestOutputStream(networkStream, outputBuffer)
     def output = outputBuffer.flatMap(x => x)
-    ssc.registerOutputStream(outputStream)
+    outputStream.register()
     ssc.start()
 
     // Feed data to the server to send to the network receiver
@@ -261,16 +193,6 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
     }
   }
 
-  test("kafka input stream") {
-    val ssc = new StreamingContext(master, framework, batchDuration)
-    val topics = Map("my-topic" -> 1)
-    val test1 = ssc.kafkaStream("localhost:12345", "group", topics)
-    val test2 = ssc.kafkaStream("localhost:12345", "group", topics, StorageLevel.MEMORY_AND_DISK)
-
-    // Test specifying decoder
-    val kafkaParams = Map("zk.connect"->"localhost:12345","groupid"->"consumer-group")
-    val test3 = ssc.kafkaStream[String, kafka.serializer.StringDecoder](kafkaParams, topics, StorageLevel.MEMORY_AND_DISK)
-  }
 
   test("multi-thread receiver") {
     // set up the test receiver
@@ -281,13 +203,13 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
     MultiThreadTestReceiver.haveAllThreadsFinished = false
 
     // set up the network stream using the test receiver
-    val ssc = new StreamingContext(master, framework, batchDuration)
+    val ssc = new StreamingContext(conf, batchDuration)
     val networkStream = ssc.networkStream[Int](testReceiver)
     val countStream = networkStream.count
     val outputBuffer = new ArrayBuffer[Seq[Long]] with SynchronizedBuffer[Seq[Long]]
     val outputStream = new TestOutputStream(countStream, outputBuffer)
     def output = outputBuffer.flatMap(x => x)
-    ssc.registerOutputStream(outputStream)
+    outputStream.register()
     ssc.start()
 
     // Let the data from the receiver be received
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
new file mode 100644
index 0000000000000..717da8e00462b
--- /dev/null
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
@@ -0,0 +1,218 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming
+
+import org.scalatest.{FunSuite, BeforeAndAfter}
+import org.scalatest.exceptions.TestFailedDueToTimeoutException
+import org.scalatest.concurrent.Timeouts
+import org.scalatest.time.SpanSugar._
+import org.apache.spark.{SparkException, SparkConf, SparkContext}
+import org.apache.spark.util.{Utils, MetadataCleaner}
+import org.apache.spark.streaming.dstream.DStream
+
+class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts {
+
+  val master = "local[2]"
+  val appName = this.getClass.getSimpleName
+  val batchDuration = Seconds(1)
+  val sparkHome = "someDir"
+  val envPair = "key" -> "value"
+  val ttl = StreamingContext.DEFAULT_CLEANER_TTL + 100
+
+  var sc: SparkContext = null
+  var ssc: StreamingContext = null
+
+  before {
+    System.clearProperty("spark.cleaner.ttl")
+  }
+
+  after {
+    if (ssc != null) {
+      ssc.stop()
+      ssc = null
+    }
+    if (sc != null) {
+      sc.stop()
+      sc = null
+    }
+  }
+
+  test("from no conf constructor") {
+    ssc = new StreamingContext(master, appName, batchDuration)
+    assert(ssc.sparkContext.conf.get("spark.master") === master)
+    assert(ssc.sparkContext.conf.get("spark.app.name") === appName)
+    assert(MetadataCleaner.getDelaySeconds(ssc.sparkContext.conf) ===
+      StreamingContext.DEFAULT_CLEANER_TTL)
+  }
+
+  test("from no conf + spark home") {
+    ssc = new StreamingContext(master, appName, batchDuration, sparkHome, Nil)
+    assert(ssc.conf.get("spark.home") === sparkHome)
+    assert(MetadataCleaner.getDelaySeconds(ssc.sparkContext.conf) ===
+      StreamingContext.DEFAULT_CLEANER_TTL)
+  }
+
+  test("from no conf + spark home + env") {
+    ssc = new StreamingContext(master, appName, batchDuration,
+      sparkHome, Nil, Map(envPair))
+    assert(ssc.conf.getExecutorEnv.exists(_ == envPair))
+    assert(MetadataCleaner.getDelaySeconds(ssc.sparkContext.conf) ===
+      StreamingContext.DEFAULT_CLEANER_TTL)
+  }
+
+  test("from conf without ttl set") {
+    val myConf = SparkContext.updatedConf(new SparkConf(false), master, appName)
+    ssc = new StreamingContext(myConf, batchDuration)
+    assert(MetadataCleaner.getDelaySeconds(ssc.conf) ===
+      StreamingContext.DEFAULT_CLEANER_TTL)
+  }
+
+  test("from conf with ttl set") {
+    val myConf = SparkContext.updatedConf(new SparkConf(false), master, appName)
+    myConf.set("spark.cleaner.ttl", ttl.toString)
+    ssc = new StreamingContext(myConf, batchDuration)
+    assert(ssc.conf.getInt("spark.cleaner.ttl", -1) === ttl)
+  }
+
+  test("from existing SparkContext without ttl set") {
+    sc = new SparkContext(master, appName)
+    val exception = intercept[SparkException] {
+      ssc = new StreamingContext(sc, batchDuration)
+    }
+    assert(exception.getMessage.contains("ttl"))
+  }
+
+  test("from existing SparkContext with ttl set") {
+    val myConf = SparkContext.updatedConf(new SparkConf(false), master, appName)
+    myConf.set("spark.cleaner.ttl", ttl.toString)
+    ssc = new StreamingContext(myConf, batchDuration)
+    assert(ssc.conf.getInt("spark.cleaner.ttl", -1) === ttl)
+  }
+
+  test("from checkpoint") {
+    val myConf = SparkContext.updatedConf(new SparkConf(false), master, appName)
+    myConf.set("spark.cleaner.ttl", ttl.toString)
+    val ssc1 = new StreamingContext(myConf, batchDuration)
+    val cp = new Checkpoint(ssc1, Time(1000))
+    assert(MetadataCleaner.getDelaySeconds(cp.sparkConf) === ttl)
+    ssc1.stop()
+    val newCp = Utils.deserialize[Checkpoint](Utils.serialize(cp))
+    assert(MetadataCleaner.getDelaySeconds(newCp.sparkConf) === ttl)
+    ssc = new StreamingContext(null, cp, null)
+    assert(MetadataCleaner.getDelaySeconds(ssc.conf) === ttl)
+  }
+
+  test("start multiple times") {
+    ssc = new StreamingContext(master, appName, batchDuration)
+    addInputStream(ssc).register
+
+    ssc.start()
+    intercept[SparkException] {
+      ssc.start()
+    }
+  }
+
+  test("stop multiple times") {
+    ssc = new StreamingContext(master, appName, batchDuration)
+    addInputStream(ssc).register
+    ssc.start()
+    ssc.stop()
+    ssc.stop()
+    ssc = null
+  }
+
+  test("stop only streaming context") {
+    ssc = new StreamingContext(master, appName, batchDuration)
+    sc = ssc.sparkContext
+    addInputStream(ssc).register
+    ssc.start()
+    ssc.stop(false)
+    ssc = null
+    assert(sc.makeRDD(1 to 100).collect().size === 100)
+    ssc = new StreamingContext(sc, batchDuration)
+  }
+
+  test("awaitTermination") {
+    ssc = new StreamingContext(master, appName, batchDuration)
+    val inputStream = addInputStream(ssc)
+    inputStream.map(x => x).register
+
+    // test whether start() blocks indefinitely or not
+    failAfter(2000 millis) {
+      ssc.start()
+    }
+
+    // test whether waitForStop() exits after give amount of time
+    failAfter(1000 millis) {
+      ssc.awaitTermination(500)
+    }
+
+    // test whether waitForStop() does not exit if not time is given
+    val exception = intercept[Exception] {
+      failAfter(1000 millis) {
+        ssc.awaitTermination()
+        throw new Exception("Did not wait for stop")
+      }
+    }
+    assert(exception.isInstanceOf[TestFailedDueToTimeoutException], "Did not wait for stop")
+
+    // test whether wait exits if context is stopped
+    failAfter(10000 millis) { // 10 seconds because spark takes a long time to shutdown
+      new Thread() {
+        override def run {
+          Thread.sleep(500)
+          ssc.stop()
+        }
+      }.start()
+      ssc.awaitTermination()
+    }
+  }
+
+  test("awaitTermination with error in task") {
+    ssc = new StreamingContext(master, appName, batchDuration)
+    val inputStream = addInputStream(ssc)
+    inputStream.map(x => { throw new TestException("error in map task"); x})
+               .foreachRDD(_.count)
+
+    val exception = intercept[Exception] {
+      ssc.start()
+      ssc.awaitTermination(5000)
+    }
+    assert(exception.getMessage.contains("map task"), "Expected exception not thrown")
+  }
+
+  test("awaitTermination with error in job generation") {
+    ssc = new StreamingContext(master, appName, batchDuration)
+    val inputStream = addInputStream(ssc)
+
+    inputStream.transform(rdd => { throw new TestException("error in transform"); rdd }).register
+    val exception = intercept[TestException] {
+      ssc.start()
+      ssc.awaitTermination(5000)
+    }
+    assert(exception.getMessage.contains("transform"), "Expected exception not thrown")
+  }
+
+  def addInputStream(s: StreamingContext): DStream[Int] = {
+    val input = (1 to 100).map(i => (1 to i))
+    val inputStream = new TestInputStream(s, input, 1)
+    inputStream
+  }
+}
+
+class TestException(msg: String) extends Exception(msg)
\ No newline at end of file
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingListenerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingListenerSuite.scala
new file mode 100644
index 0000000000000..9e0f2c900e8ba
--- /dev/null
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingListenerSuite.scala
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming
+
+import org.apache.spark.streaming.scheduler._
+import scala.collection.mutable.ArrayBuffer
+import org.scalatest.matchers.ShouldMatchers
+import org.apache.spark.streaming.dstream.DStream
+
+class StreamingListenerSuite extends TestSuiteBase with ShouldMatchers {
+
+  val input = (1 to 4).map(Seq(_)).toSeq
+  val operation = (d: DStream[Int]) => d.map(x => x)
+
+  // To make sure that the processing start and end times in collected
+  // information are different for successive batches
+  override def batchDuration = Milliseconds(100)
+  override def actuallyWait = true
+
+  test("basic BatchInfo generation") {
+    val ssc = setupStreams(input, operation)
+    val collector = new BatchInfoCollector
+    ssc.addStreamingListener(collector)
+    runStreams(ssc, input.size, input.size)
+    val batchInfos = collector.batchInfos
+    batchInfos should have size 4
+
+    batchInfos.foreach(info => {
+      info.schedulingDelay should not be None
+      info.processingDelay should not be None
+      info.totalDelay should not be None
+      info.schedulingDelay.get should be >= 0L
+      info.processingDelay.get should be >= 0L
+      info.totalDelay.get should be >= 0L
+    })
+
+    isInIncreasingOrder(batchInfos.map(_.submissionTime)) should be (true)
+    isInIncreasingOrder(batchInfos.map(_.processingStartTime.get)) should be (true)
+    isInIncreasingOrder(batchInfos.map(_.processingEndTime.get)) should be (true)
+  }
+
+  /** Check if a sequence of numbers is in increasing order */
+  def isInIncreasingOrder(seq: Seq[Long]): Boolean = {
+    for(i <- 1 until seq.size) {
+      if (seq(i - 1) > seq(i)) return false
+    }
+    true
+  }
+
+  /** Listener that collects information on processed batches */
+  class BatchInfoCollector extends StreamingListener {
+    val batchInfos = new ArrayBuffer[BatchInfo]
+    override def onBatchCompleted(batchCompleted: StreamingListenerBatchCompleted) {
+      batchInfos += batchCompleted.batchInfo
+    }
+  }
+}
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
index 8c8c359e6e865..201630672ab4c 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
@@ -17,17 +17,18 @@
 
 package org.apache.spark.streaming
 
-import org.apache.spark.streaming.dstream.{InputDStream, ForEachDStream}
+import org.apache.spark.streaming.dstream.{DStream, InputDStream, ForEachDStream}
 import org.apache.spark.streaming.util.ManualClock
 
-import collection.mutable.ArrayBuffer
-import collection.mutable.SynchronizedBuffer
+import scala.collection.mutable.ArrayBuffer
+import scala.collection.mutable.SynchronizedBuffer
+import scala.reflect.ClassTag
 
 import java.io.{ObjectInputStream, IOException}
 
 import org.scalatest.{BeforeAndAfter, FunSuite}
 
-import org.apache.spark.Logging
+import org.apache.spark.{SparkContext, SparkConf, Logging}
 import org.apache.spark.rdd.RDD
 
 /**
@@ -35,7 +36,7 @@ import org.apache.spark.rdd.RDD
  * replayable, reliable message queue like Kafka. It requires a sequence as input, and
  * returns the i_th element at the i_th batch unde manual clock.
  */
-class TestInputStream[T: ClassManifest](ssc_ : StreamingContext, input: Seq[Seq[T]], numPartitions: Int)
+class TestInputStream[T: ClassTag](ssc_ : StreamingContext, input: Seq[Seq[T]], numPartitions: Int)
   extends InputDStream[T](ssc_) {
 
   def start() {}
@@ -63,7 +64,7 @@ class TestInputStream[T: ClassManifest](ssc_ : StreamingContext, input: Seq[Seq[
  *
  * The buffer contains a sequence of RDD's, each containing a sequence of items
  */
-class TestOutputStream[T: ClassManifest](parent: DStream[T],
+class TestOutputStream[T: ClassTag](parent: DStream[T],
     val output: ArrayBuffer[Seq[T]] = ArrayBuffer[Seq[T]]())
   extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => {
     val collected = rdd.collect()
@@ -85,7 +86,7 @@ class TestOutputStream[T: ClassManifest](parent: DStream[T],
  * The buffer contains a sequence of RDD's, each containing a sequence of partitions, each
  * containing a sequence of items.
  */
-class TestOutputStreamWithPartitions[T: ClassManifest](parent: DStream[T],
+class TestOutputStreamWithPartitions[T: ClassTag](parent: DStream[T],
     val output: ArrayBuffer[Seq[Seq[T]]] = ArrayBuffer[Seq[Seq[T]]]())
   extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => {
     val collected = rdd.glom().collect().map(_.toSeq)
@@ -109,7 +110,7 @@ class TestOutputStreamWithPartitions[T: ClassManifest](parent: DStream[T],
 trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
 
   // Name of the framework for Spark context
-  def framework = "TestSuiteBase"
+  def framework = this.getClass.getSimpleName
 
   // Master for Spark context
   def master = "local[2]"
@@ -126,21 +127,51 @@ trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
   // Maximum time to wait before the test times out
   def maxWaitTimeMillis = 10000
 
+  // Whether to use manual clock or not
+  def useManualClock = true
+
   // Whether to actually wait in real time before changing manual clock
   def actuallyWait = false
 
+  //// A SparkConf to use in tests. Can be modified before calling setupStreams to configure things.
+  val conf = new SparkConf()
+    .setMaster(master)
+    .setAppName(framework)
+    .set("spark.cleaner.ttl", StreamingContext.DEFAULT_CLEANER_TTL.toString)
+
+  // Default before function for any streaming test suite. Override this
+  // if you want to add your stuff to "before" (i.e., don't call before { } )
+  def beforeFunction() {
+    if (useManualClock) {
+      logInfo("Using manual clock")
+      conf.set("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock")
+    } else {
+      logInfo("Using real clock")
+      conf.set("spark.streaming.clock", "org.apache.spark.streaming.util.SystemClock")
+    }
+  }
+
+  // Default after function for any streaming test suite. Override this
+  // if you want to add your stuff to "after" (i.e., don't call after { } )
+  def afterFunction() {
+    // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
+    System.clearProperty("spark.driver.port")
+  }
+
+  before(beforeFunction)
+  after(afterFunction)
+
   /**
    * Set up required DStreams to test the DStream operation using the two sequences
    * of input collections.
    */
-  def setupStreams[U: ClassManifest, V: ClassManifest](
+  def setupStreams[U: ClassTag, V: ClassTag](
       input: Seq[Seq[U]],
       operation: DStream[U] => DStream[V],
       numPartitions: Int = numInputPartitions
     ): StreamingContext = {
-
     // Create StreamingContext
-    val ssc = new StreamingContext(master, framework, batchDuration)
+    val ssc = new StreamingContext(conf, batchDuration)
     if (checkpointDir != null) {
       ssc.checkpoint(checkpointDir)
     }
@@ -150,8 +181,7 @@ trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
     val operatedStream = operation(inputStream)
     val outputStream = new TestOutputStreamWithPartitions(operatedStream,
       new ArrayBuffer[Seq[Seq[V]]] with SynchronizedBuffer[Seq[Seq[V]]])
-    ssc.registerInputStream(inputStream)
-    ssc.registerOutputStream(outputStream)
+    outputStream.register()
     ssc
   }
 
@@ -159,14 +189,13 @@ trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
    * Set up required DStreams to test the binary operation using the sequence
    * of input collections.
    */
-  def setupStreams[U: ClassManifest, V: ClassManifest, W: ClassManifest](
+  def setupStreams[U: ClassTag, V: ClassTag, W: ClassTag](
       input1: Seq[Seq[U]],
       input2: Seq[Seq[V]],
       operation: (DStream[U], DStream[V]) => DStream[W]
     ): StreamingContext = {
-
     // Create StreamingContext
-    val ssc = new StreamingContext(master, framework, batchDuration)
+    val ssc = new StreamingContext(conf, batchDuration)
     if (checkpointDir != null) {
       ssc.checkpoint(checkpointDir)
     }
@@ -177,9 +206,7 @@ trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
     val operatedStream = operation(inputStream1, inputStream2)
     val outputStream = new TestOutputStreamWithPartitions(operatedStream,
       new ArrayBuffer[Seq[Seq[W]]] with SynchronizedBuffer[Seq[Seq[W]]])
-    ssc.registerInputStream(inputStream1)
-    ssc.registerInputStream(inputStream2)
-    ssc.registerOutputStream(outputStream)
+    outputStream.register()
     ssc
   }
 
@@ -190,7 +217,7 @@ trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
    *
    * Returns a sequence of items for each RDD.
    */
-  def runStreams[V: ClassManifest](
+  def runStreams[V: ClassTag](
       ssc: StreamingContext,
       numBatches: Int,
       numExpectedOutput: Int
@@ -207,7 +234,7 @@ trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
    * Returns a sequence of RDD's. Each RDD is represented as several sequences of items, each
    * representing one partition.
    */
-  def runStreamsWithPartitions[V: ClassManifest](
+  def runStreamsWithPartitions[V: ClassTag](
       ssc: StreamingContext,
       numBatches: Int,
       numExpectedOutput: Int
@@ -242,10 +269,11 @@ trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
       val startTime = System.currentTimeMillis()
       while (output.size < numExpectedOutput && System.currentTimeMillis() - startTime < maxWaitTimeMillis) {
         logInfo("output.size = " + output.size + ", numExpectedOutput = " + numExpectedOutput)
-        Thread.sleep(100)
+        ssc.awaitTermination(50)
       }
       val timeTaken = System.currentTimeMillis() - startTime
-
+      logInfo("Output generated in " + timeTaken + " milliseconds")
+      output.foreach(x => logInfo("[" + x.mkString(",") + "]"))
       assert(timeTaken < maxWaitTimeMillis, "Operation timed out after " + timeTaken + " ms")
       assert(output.size === numExpectedOutput, "Unexpected number of outputs generated")
 
@@ -263,7 +291,7 @@ trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
    * is same as the expected output values, by comparing the output
    * collections either as lists (order matters) or sets (order does not matter)
    */
-  def verifyOutput[V: ClassManifest](
+  def verifyOutput[V: ClassTag](
       output: Seq[Seq[V]],
       expectedOutput: Seq[Seq[V]],
       useSet: Boolean
@@ -293,7 +321,7 @@ trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
    * Test unary DStream operation with a list of inputs, with number of
    * batches to run same as the number of expected output values
    */
-  def testOperation[U: ClassManifest, V: ClassManifest](
+  def testOperation[U: ClassTag, V: ClassTag](
       input: Seq[Seq[U]],
       operation: DStream[U] => DStream[V],
       expectedOutput: Seq[Seq[V]],
@@ -311,7 +339,7 @@ trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
    * @param useSet     Compare the output values with the expected output values
    *                   as sets (order matters) or as lists (order does not matter)
    */
-  def testOperation[U: ClassManifest, V: ClassManifest](
+  def testOperation[U: ClassTag, V: ClassTag](
       input: Seq[Seq[U]],
       operation: DStream[U] => DStream[V],
       expectedOutput: Seq[Seq[V]],
@@ -328,7 +356,7 @@ trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
    * Test binary DStream operation with two lists of inputs, with number of
    * batches to run same as the number of expected output values
    */
-  def testOperation[U: ClassManifest, V: ClassManifest, W: ClassManifest](
+  def testOperation[U: ClassTag, V: ClassTag, W: ClassTag](
       input1: Seq[Seq[U]],
       input2: Seq[Seq[V]],
       operation: (DStream[U], DStream[V]) => DStream[W],
@@ -348,7 +376,7 @@ trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging {
    * @param useSet     Compare the output values with the expected output values
    *                   as sets (order matters) or as lists (order does not matter)
    */
-  def testOperation[U: ClassManifest, V: ClassManifest, W: ClassManifest](
+  def testOperation[U: ClassTag, V: ClassTag, W: ClassTag](
       input1: Seq[Seq[U]],
       input2: Seq[Seq[V]],
       operation: (DStream[U], DStream[V]) => DStream[W],
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/WindowOperationsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/WindowOperationsSuite.scala
index f50e05c0d883b..471c99fab4682 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/WindowOperationsSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/WindowOperationsSuite.scala
@@ -18,23 +18,14 @@
 package org.apache.spark.streaming
 
 import org.apache.spark.streaming.StreamingContext._
-import collection.mutable.ArrayBuffer
+import org.apache.spark.streaming.dstream.DStream
+import org.apache.spark.storage.StorageLevel
 
 class WindowOperationsSuite extends TestSuiteBase {
 
-  System.setProperty("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock")
+  override def maxWaitTimeMillis = 20000  // large window tests can sometimes take longer
 
-  override def framework = "WindowOperationsSuite"
-
-  override def maxWaitTimeMillis = 20000
-
-  override def batchDuration = Seconds(1)
-
-  after {
-    // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-    System.clearProperty("spark.driver.port")
-    System.clearProperty("spark.hostPort")
-  }
+  override def batchDuration = Seconds(1)  // making sure its visible in this class
 
   val largerSlideInput = Seq(
     Seq(("a", 1)),
@@ -154,6 +145,19 @@ class WindowOperationsSuite extends TestSuiteBase {
     Seconds(3)
   )
 
+  test("window - persistence level") {
+    val input = Seq( Seq(0), Seq(1), Seq(2), Seq(3), Seq(4), Seq(5))
+    val ssc = new StreamingContext(conf, batchDuration)
+    val inputStream = new TestInputStream[Int](ssc, input, 1)
+    val windowStream1 = inputStream.window(batchDuration * 2)
+    assert(windowStream1.storageLevel === StorageLevel.NONE)
+    assert(inputStream.storageLevel === StorageLevel.MEMORY_ONLY_SER)
+    windowStream1.persist(StorageLevel.MEMORY_ONLY)
+    assert(windowStream1.storageLevel === StorageLevel.NONE)
+    assert(inputStream.storageLevel === StorageLevel.MEMORY_ONLY)
+    ssc.stop()
+  }
+
   // Testing naive reduceByKeyAndWindow (without invertible function)
 
   testReduceByKeyAndWindow(
@@ -235,9 +239,7 @@ class WindowOperationsSuite extends TestSuiteBase {
     val slideDuration = Seconds(1)
     val numBatches = expectedOutput.size * (slideDuration / batchDuration).toInt
     val operation = (s: DStream[(String, Int)]) => {
-      s.groupByKeyAndWindow(windowDuration, slideDuration)
-       .map(x => (x._1, x._2.toSet))
-       .persist()
+      s.groupByKeyAndWindow(windowDuration, slideDuration).map(x => (x._1, x._2.toSet))
     }
     testOperation(input, operation, expectedOutput, numBatches, true)
   }
diff --git a/core/src/test/scala/org/apache/spark/util/RateLimitedOutputStreamSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/util/RateLimitedOutputStreamSuite.scala
similarity index 97%
rename from core/src/test/scala/org/apache/spark/util/RateLimitedOutputStreamSuite.scala
rename to streaming/src/test/scala/org/apache/spark/streaming/util/RateLimitedOutputStreamSuite.scala
index a9dd0b1a5b615..15f13d5b19946 100644
--- a/core/src/test/scala/org/apache/spark/util/RateLimitedOutputStreamSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/util/RateLimitedOutputStreamSuite.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.util
+package org.apache.spark.streaming.util
 
 import org.scalatest.FunSuite
 import java.io.ByteArrayOutputStream
diff --git a/tools/pom.xml b/tools/pom.xml
index 0c6fdd24d1666..b1a0c206b76f2 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,12 +20,12 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>0.8.2-candidate-csd-3-SNAPSHOT</version>
+    <version>0.9.1-csd-1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
   <groupId>org.apache.spark</groupId>
-  <artifactId>spark-tools_2.9.3</artifactId>
+  <artifactId>spark-tools_2.10</artifactId>
   <packaging>jar</packaging>
   <name>Spark Project Tools</name>
   <url>http://spark.incubator.apache.org/</url>
@@ -33,24 +33,24 @@
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_2.9.3</artifactId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-streaming_2.9.3</artifactId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
       <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_2.9.3</artifactId>
+      <artifactId>scalatest_${scala.binary.version}</artifactId>
       <scope>test</scope>
     </dependency>
   </dependencies>
 
   <build>
-    <outputDirectory>target/scala-${scala.version}/classes</outputDirectory>
-    <testOutputDirectory>target/scala-${scala.version}/test-classes</testOutputDirectory>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
     <plugins>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
diff --git a/tools/src/main/scala/org/apache/spark/tools/JavaAPICompletenessChecker.scala b/tools/src/main/scala/org/apache/spark/tools/JavaAPICompletenessChecker.scala
index f824c472aee62..4886cd6ea8a64 100644
--- a/tools/src/main/scala/org/apache/spark/tools/JavaAPICompletenessChecker.scala
+++ b/tools/src/main/scala/org/apache/spark/tools/JavaAPICompletenessChecker.scala
@@ -24,8 +24,9 @@ import scala.collection.mutable.ArrayBuffer
 import org.apache.spark._
 import org.apache.spark.api.java._
 import org.apache.spark.rdd.{RDD, DoubleRDDFunctions, PairRDDFunctions, OrderedRDDFunctions}
-import org.apache.spark.streaming.{PairDStreamFunctions, DStream, StreamingContext}
+import org.apache.spark.streaming.StreamingContext
 import org.apache.spark.streaming.api.java.{JavaPairDStream, JavaDStream, JavaStreamingContext}
+import org.apache.spark.streaming.dstream.{DStream, PairDStreamFunctions}
 
 
 private[spark] abstract class SparkType(val name: String)
@@ -147,7 +148,7 @@ object JavaAPICompletenessChecker {
               } else {
                 ParameterizedType(classOf[JavaRDD[_]].getName, parameters.map(applySubs))
               }
-            case "org.apache.spark.streaming.DStream" =>
+            case "org.apache.spark.streaming.dstream.DStream" =>
               if (parameters(0).name == classOf[Tuple2[_, _]].getName) {
                 val tupleParams =
                   parameters(0).asInstanceOf[ParameterizedType].parameters.map(applySubs)
@@ -199,7 +200,7 @@ object JavaAPICompletenessChecker {
 
   private def toJavaMethod(method: SparkMethod): SparkMethod = {
     val params = method.parameters
-      .filterNot(_.name == "scala.reflect.ClassManifest")
+      .filterNot(_.name == "scala.reflect.ClassTag")
       .map(toJavaType(_, isReturnType = false))
     SparkMethod(method.name, toJavaType(method.returnType, isReturnType = true), params)
   }
@@ -212,7 +213,7 @@ object JavaAPICompletenessChecker {
     // internal Spark components.
     val excludedNames = Seq(
       "org.apache.spark.rdd.RDD.origin",
-      "org.apache.spark.rdd.RDD.elementClassManifest",
+      "org.apache.spark.rdd.RDD.elementClassTag",
       "org.apache.spark.rdd.RDD.checkpointData",
       "org.apache.spark.rdd.RDD.partitioner",
       "org.apache.spark.rdd.RDD.partitions",
@@ -248,30 +249,29 @@ object JavaAPICompletenessChecker {
       "org.apache.spark.SparkContext.getSparkHome",
       "org.apache.spark.SparkContext.executorMemoryRequested",
       "org.apache.spark.SparkContext.getExecutorStorageStatus",
-      "org.apache.spark.streaming.DStream.generatedRDDs",
-      "org.apache.spark.streaming.DStream.zeroTime",
-      "org.apache.spark.streaming.DStream.rememberDuration",
-      "org.apache.spark.streaming.DStream.storageLevel",
-      "org.apache.spark.streaming.DStream.mustCheckpoint",
-      "org.apache.spark.streaming.DStream.checkpointDuration",
-      "org.apache.spark.streaming.DStream.checkpointData",
-      "org.apache.spark.streaming.DStream.graph",
-      "org.apache.spark.streaming.DStream.isInitialized",
-      "org.apache.spark.streaming.DStream.parentRememberDuration",
-      "org.apache.spark.streaming.DStream.initialize",
-      "org.apache.spark.streaming.DStream.validate",
-      "org.apache.spark.streaming.DStream.setContext",
-      "org.apache.spark.streaming.DStream.setGraph",
-      "org.apache.spark.streaming.DStream.remember",
-      "org.apache.spark.streaming.DStream.getOrCompute",
-      "org.apache.spark.streaming.DStream.generateJob",
-      "org.apache.spark.streaming.DStream.clearOldMetadata",
-      "org.apache.spark.streaming.DStream.addMetadata",
-      "org.apache.spark.streaming.DStream.updateCheckpointData",
-      "org.apache.spark.streaming.DStream.restoreCheckpointData",
-      "org.apache.spark.streaming.DStream.isTimeValid",
+      "org.apache.spark.streaming.dstream.DStream.generatedRDDs",
+      "org.apache.spark.streaming.dstream.DStream.zeroTime",
+      "org.apache.spark.streaming.dstream.DStream.rememberDuration",
+      "org.apache.spark.streaming.dstream.DStream.storageLevel",
+      "org.apache.spark.streaming.dstream.DStream.mustCheckpoint",
+      "org.apache.spark.streaming.dstream.DStream.checkpointDuration",
+      "org.apache.spark.streaming.dstream.DStream.checkpointData",
+      "org.apache.spark.streaming.dstream.DStream.graph",
+      "org.apache.spark.streaming.dstream.DStream.isInitialized",
+      "org.apache.spark.streaming.dstream.DStream.parentRememberDuration",
+      "org.apache.spark.streaming.dstream.DStream.initialize",
+      "org.apache.spark.streaming.dstream.DStream.validate",
+      "org.apache.spark.streaming.dstream.DStream.setContext",
+      "org.apache.spark.streaming.dstream.DStream.setGraph",
+      "org.apache.spark.streaming.dstream.DStream.remember",
+      "org.apache.spark.streaming.dstream.DStream.getOrCompute",
+      "org.apache.spark.streaming.dstream.DStream.generateJob",
+      "org.apache.spark.streaming.dstream.DStream.clearOldMetadata",
+      "org.apache.spark.streaming.dstream.DStream.addMetadata",
+      "org.apache.spark.streaming.dstream.DStream.updateCheckpointData",
+      "org.apache.spark.streaming.dstream.DStream.restoreCheckpointData",
+      "org.apache.spark.streaming.dstream.DStream.isTimeValid",
       "org.apache.spark.streaming.StreamingContext.nextNetworkInputStreamId",
-      "org.apache.spark.streaming.StreamingContext.networkInputTracker",
       "org.apache.spark.streaming.StreamingContext.checkpointDir",
       "org.apache.spark.streaming.StreamingContext.checkpointDuration",
       "org.apache.spark.streaming.StreamingContext.receiverJobThread",
diff --git a/yarn/README.md b/yarn/README.md
new file mode 100644
index 0000000000000..65ee85447e04a
--- /dev/null
+++ b/yarn/README.md
@@ -0,0 +1,12 @@
+# YARN DIRECTORY LAYOUT
+
+Hadoop Yarn related codes are organized in separate directories to minimize duplicated code.
+
+ * common : Common codes that do not depending on specific version of Hadoop.
+
+ * alpha / stable : Codes that involve specific version of Hadoop YARN API.
+
+  alpha represents 0.23 and 2.0.x
+  stable represents 2.2 and later, until the API changes again.
+
+alpha / stable will build together with common dir into a single jar
diff --git a/yarn/alpha/pom.xml b/yarn/alpha/pom.xml
new file mode 100644
index 0000000000000..2791f277933b6
--- /dev/null
+++ b/yarn/alpha/pom.xml
@@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>yarn-parent_2.10</artifactId>
+    <version>0.9.0</version>
+    <relativePath>../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.spark</groupId>
+  <artifactId>spark-yarn-alpha_2.10</artifactId>
+  <packaging>jar</packaging>
+  <name>Spark Project YARN Alpha API</name>
+
+</project>
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
similarity index 87%
rename from yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
rename to yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 240ed8b32ae93..2e46d750c4a38 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -36,15 +36,18 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.hadoop.yarn.ipc.YarnRPC
 import org.apache.hadoop.yarn.util.{ConverterUtils, Records}
 
-import org.apache.spark.{SparkContext, Logging}
+import org.apache.spark.{SparkConf, SparkContext, Logging}
 import org.apache.spark.util.Utils
 
+class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration,
+                        sparkConf: SparkConf) extends Logging {
 
-class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) extends Logging {
+  def this(args: ApplicationMasterArguments, sparkConf: SparkConf) =
+    this(args, new Configuration(), sparkConf)
 
-  def this(args: ApplicationMasterArguments) = this(args, new Configuration())
-  
-  private var rpc: YarnRPC = YarnRPC.create(conf)
+  def this(args: ApplicationMasterArguments) = this(args, new SparkConf())
+
+  private val rpc: YarnRPC = YarnRPC.create(conf)
   private var resourceManager: AMRMProtocol = _
   private var appAttemptId: ApplicationAttemptId = _
   private var userThread: Thread = _
@@ -57,18 +60,23 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e
   private val maxAppAttempts: Int = conf.getInt(YarnConfiguration.RM_AM_MAX_RETRIES,
     YarnConfiguration.DEFAULT_RM_AM_MAX_RETRIES)
   private var isLastAMRetry: Boolean = true
-  // default to numWorkers * 2, with minimum of 3
-  private val maxNumWorkerFailures = System.getProperty("spark.yarn.max.worker.failures",
-    math.max(args.numWorkers * 2, 3).toString()).toInt
+
+  // Default to numWorkers * 2, with minimum of 3
+  private val maxNumWorkerFailures = sparkConf.getInt("spark.yarn.max.worker.failures",
+    math.max(args.numWorkers * 2, 3))
 
   def run() {
     // Setup the directories so things go to yarn approved directories rather
     // then user specified and /tmp.
     System.setProperty("spark.local.dir", getLocalDirs())
 
+    // set the web ui port to be ephemeral for yarn so we don't conflict with
+    // other spark processes running on the same box
+    System.setProperty("spark.ui.port", "0")
+
     // Use priority 30 as its higher then HDFS. Its same priority as MapReduce is using.
     ShutdownHookManager.get().addShutdownHook(new AppMasterShutdownHook(this), 30)
-    
+
     appAttemptId = getApplicationAttemptId()
     isLastAMRetry = appAttemptId.getAttemptId() >= maxAppAttempts
     resourceManager = registerWithResourceManager()
@@ -92,24 +100,22 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e
     //  }
     //}
     // org.apache.hadoop.io.compress.CompressionCodecFactory.getCodecClasses(conf)
-    
+
     ApplicationMaster.register(this)
     // Start the user's JAR
     userThread = startUserClass()
-    
+
     // This a bit hacky, but we need to wait until the spark.driver.port property has
     // been set by the Thread executing the user class.
-    waitForSparkMaster()
-
     waitForSparkContextInitialized()
 
     // Do this after spark master is up and SparkContext is created so that we can register UI Url
     val appMasterResponse: RegisterApplicationMasterResponse = registerApplicationMaster()
-    
+
     // Allocate all containers
     allocateWorkers()
-    
-    // Wait for the user class to Finish     
+
+    // Wait for the user class to Finish
     userThread.join()
 
     System.exit(0)
@@ -121,15 +127,14 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e
     // local dirs, so lets check both. We assume one of the 2 is set.
     // LOCAL_DIRS => 2.X, YARN_LOCAL_DIRS => 0.23.X
     val localDirs = Option(System.getenv("YARN_LOCAL_DIRS"))
-      .getOrElse(Option(System.getenv("LOCAL_DIRS"))
-        .getOrElse(""))
-
-    if (localDirs.isEmpty()) {
-      throw new Exception("Yarn Local dirs can't be empty")
+      .orElse(Option(System.getenv("LOCAL_DIRS")))
+ 
+    localDirs match {
+      case None => throw new Exception("Yarn Local dirs can't be empty")
+      case Some(l) => l
     }
-    localDirs
-  }
-  
+  } 
+
   private def getApplicationAttemptId(): ApplicationAttemptId = {
     val envs = System.getenv()
     val containerIdString = envs.get(ApplicationConstants.AM_CONTAINER_ID_ENV)
@@ -138,7 +143,7 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e
     logInfo("ApplicationAttemptId: " + appAttemptId)
     appAttemptId
   }
-  
+
   private def registerWithResourceManager(): AMRMProtocol = {
     val rmAddress = NetUtils.createSocketAddr(yarnConf.get(
       YarnConfiguration.RM_SCHEDULER_ADDRESS,
@@ -146,50 +151,26 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e
     logInfo("Connecting to ResourceManager at " + rmAddress)
     rpc.getProxy(classOf[AMRMProtocol], rmAddress, conf).asInstanceOf[AMRMProtocol]
   }
-  
+
   private def registerApplicationMaster(): RegisterApplicationMasterResponse = {
     logInfo("Registering the ApplicationMaster")
     val appMasterRequest = Records.newRecord(classOf[RegisterApplicationMasterRequest])
       .asInstanceOf[RegisterApplicationMasterRequest]
     appMasterRequest.setApplicationAttemptId(appAttemptId)
     // Setting this to master host,port - so that the ApplicationReport at client has some
-    // sensible info. 
+    // sensible info.
     // Users can then monitor stderr/stdout on that node if required.
     appMasterRequest.setHost(Utils.localHostName())
     appMasterRequest.setRpcPort(0)
     appMasterRequest.setTrackingUrl(uiAddress)
     resourceManager.registerApplicationMaster(appMasterRequest)
   }
-  
-  private def waitForSparkMaster() {
-    logInfo("Waiting for spark driver to be reachable.")
-    var driverUp = false
-    var tries = 0
-    val numTries = System.getProperty("spark.yarn.applicationMaster.waitTries", "10").toInt
-    while(!driverUp && tries < numTries) {
-      val driverHost = System.getProperty("spark.driver.host")
-      val driverPort = System.getProperty("spark.driver.port")
-      try {
-        val socket = new Socket(driverHost, driverPort.toInt)
-        socket.close()
-        logInfo("Driver now available: %s:%s".format(driverHost, driverPort))
-        driverUp = true
-      } catch {
-        case e: Exception => {
-          logWarning("Failed to connect to driver at %s:%s, retrying ...".
-            format(driverHost, driverPort))
-          Thread.sleep(100)
-          tries = tries + 1
-        }
-      }
-    }
-  }
 
-  private def startUserClass(): Thread  = {
+  private def startUserClass(): Thread = {
     logInfo("Starting the user JAR in a separate Thread")
     val mainMethod = Class.forName(
       args.userClass,
-      false /* initialize */,
+      false /* initialize */ ,
       Thread.currentThread.getContextClassLoader).getMethod("main", classOf[Array[String]])
     val t = new Thread {
       override def run() {
@@ -226,7 +207,7 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e
       ApplicationMaster.sparkContextRef.synchronized {
         var count = 0
         val waitTime = 10000L
-        val numTries = System.getProperty("spark.yarn.ApplicationMaster.waitTries", "10").toInt
+        val numTries = sparkConf.getInt("spark.yarn.ApplicationMaster.waitTries", 10)
         while (ApplicationMaster.sparkContextRef.get() == null && count < numTries) {
           logInfo("Waiting for spark context initialization ... " + count)
           count = count + 1
@@ -241,8 +222,9 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e
             yarnConf,
             resourceManager,
             appAttemptId,
-            args, 
-            sparkContext.preferredNodeLocationData) 
+            args,
+            sparkContext.preferredNodeLocationData,
+            sparkContext.getConf)
         } else {
           logWarning("Unable to retrieve sparkContext inspite of waiting for %d, numTries = %d".
             format(count * waitTime, numTries))
@@ -250,7 +232,8 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e
             yarnConf,
             resourceManager,
             appAttemptId,
-            args)
+            args,
+            sparkContext.getConf)
         }
       }
     } finally {
@@ -294,7 +277,7 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e
 
       // we want to be reasonably responsive without causing too many requests to RM.
       val schedulerInterval =
-        System.getProperty("spark.yarn.scheduler.heartbeat.interval-ms", "5000").toLong
+        sparkConf.getLong("spark.yarn.scheduler.heartbeat.interval-ms", 5000)
 
       // must be <= timeoutInterval / 2.
       val interval = math.min(timeoutInterval / 2, schedulerInterval)
@@ -304,7 +287,7 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e
   }
 
   private def launchReporterThread(_sleepTime: Long): Thread = {
-    val sleepTime = if (_sleepTime <= 0 ) 0 else _sleepTime
+    val sleepTime = if (_sleepTime <= 0) 0 else _sleepTime
 
     val t = new Thread {
       override def run() {
@@ -342,11 +325,11 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e
     for (container <- containers) {
       logInfo("Launching shell command on a new container."
         + ", containerId=" + container.getId()
-        + ", containerNode=" + container.getNodeId().getHost() 
+        + ", containerNode=" + container.getNodeId().getHost()
         + ":" + container.getNodeId().getPort()
         + ", containerNodeURI=" + container.getNodeHttpAddress()
         + ", containerState" + container.getState()
-        + ", containerResourceMemory"  
+        + ", containerResourceMemory"
         + container.getResource().getMemory())
     }
   }
@@ -372,12 +355,12 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e
   }
 
   /**
-   * Clean up the staging directory. 
+   * Clean up the staging directory.
    */
-  private def cleanupStagingDir() { 
+  private def cleanupStagingDir() {
     var stagingDirPath: Path = null
     try {
-      val preserveFiles = System.getProperty("spark.yarn.preserve.staging.files", "false").toBoolean
+      val preserveFiles = sparkConf.get("spark.yarn.preserve.staging.files", "false").toBoolean
       if (!preserveFiles) {
         stagingDirPath = new Path(System.getenv("SPARK_YARN_STAGING_DIR"))
         if (stagingDirPath == null) {
@@ -393,7 +376,7 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e
     }
   }
 
-  // The shutdown hook that runs when a signal is received AND during normal close of the JVM. 
+  // The shutdown hook that runs when a signal is received AND during normal close of the JVM.
   class AppMasterShutdownHook(appMaster: ApplicationMaster) extends Runnable {
 
     def run() {
@@ -403,6 +386,7 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e
       if (appMaster.isLastAMRetry) appMaster.cleanupStagingDir()
     }
   }
+
 }
 
 object ApplicationMaster {
@@ -412,6 +396,7 @@ object ApplicationMaster {
   // TODO: Currently, task to container is computed once (TaskSetManager) - which need not be
   // optimal as more containers are available. Might need to handle this better.
   private val ALLOCATOR_LOOP_WAIT_COUNT = 30
+
   def incrementAllocatorLoop(by: Int) {
     val count = yarnAllocatorLoop.getAndAdd(by)
     if (count >= ALLOCATOR_LOOP_WAIT_COUNT) {
@@ -446,19 +431,20 @@ object ApplicationMaster {
     // Note that this will unfortunately not properly clean up the staging files because it gets
     // called too late, after the filesystem is already shutdown.
     if (modified) {
-      Runtime.getRuntime().addShutdownHook(new Thread with Logging { 
+      Runtime.getRuntime().addShutdownHook(new Thread with Logging {
         // This is not only logs, but also ensures that log system is initialized for this instance
         // when we are actually 'run'-ing.
         logInfo("Adding shutdown hook for context " + sc)
-        override def run() { 
-          logInfo("Invoking sc stop from shutdown hook") 
-          sc.stop() 
+
+        override def run() {
+          logInfo("Invoking sc stop from shutdown hook")
+          sc.stop()
           // Best case ...
           for (master <- applicationMasters) {
             master.finishApplicationMaster(FinalApplicationStatus.SUCCEEDED)
           }
-        } 
-      } )
+        }
+      })
     }
 
     // Wait for initialization to complete and atleast 'some' nodes can get allocated.
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
similarity index 90%
rename from yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
rename to yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 79dd03806523b..e56bc02897216 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -40,14 +40,18 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.hadoop.yarn.ipc.YarnRPC
 import org.apache.hadoop.yarn.util.{Apps, Records}
 
-import org.apache.spark.Logging 
+import org.apache.spark.{Logging, SparkConf}
 import org.apache.spark.util.Utils
 import org.apache.spark.deploy.SparkHadoopUtil
 
 
-class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl with Logging {
+class Client(args: ClientArguments, conf: Configuration, sparkConf: SparkConf)
+  extends YarnClientImpl with Logging {
 
-  def this(args: ClientArguments) = this(new Configuration(), args)
+  def this(args: ClientArguments, sparkConf: SparkConf) =
+    this(args, new Configuration(), sparkConf)
+
+  def this(args: ClientArguments) = this(args, new SparkConf())
 
   var rpc: YarnRPC = YarnRPC.create(conf)
   val yarnConf: YarnConfiguration = new YarnConfiguration(conf)
@@ -59,7 +63,7 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
   val STAGING_DIR_PERMISSION: FsPermission = FsPermission.createImmutable(0700:Short)
 
   // App files are world-wide readable and owner writable -> rw-r--r--
-  val APP_FILE_PERMISSION: FsPermission = FsPermission.createImmutable(0644:Short) 
+  val APP_FILE_PERMISSION: FsPermission = FsPermission.createImmutable(0644:Short)
 
   // for client user who want to monitor app status by itself.
   def runApp() = {
@@ -98,12 +102,12 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
       (System.getenv("SPARK_JAR") == null) -> "Error: You must set SPARK_JAR environment variable!",
       (args.userJar == null) -> "Error: You must specify a user jar!",
       (args.userClass == null) -> "Error: You must specify a user class!",
-      (args.numWorkers <= 0) -> "Error: You must specify atleast 1 worker!",
+      (args.numWorkers <= 0) -> "Error: You must specify at least 1 worker!",
       (args.amMemory <= YarnAllocationHandler.MEMORY_OVERHEAD) -> ("Error: AM memory size must be " +
         "greater than: " + YarnAllocationHandler.MEMORY_OVERHEAD),
       (args.workerMemory <= YarnAllocationHandler.MEMORY_OVERHEAD) -> ("Error: Worker memory size " +
         "must be greater than: " + YarnAllocationHandler.MEMORY_OVERHEAD)
-    ).foreach { case(cond, errStr) => 
+    ).foreach { case(cond, errStr) =>
       if (cond) {
         logError(errStr)
         args.printUsageAndExit(1)
@@ -121,7 +125,7 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
       clusterMetrics.getNumNodeManagers)
 
     val queueInfo: QueueInfo = super.getQueueInfo(args.amQueue)
-    logInfo("""Queue info ... queueName = %s, queueCurrentCapacity = %s, queueMaxCapacity = %s,
+    logInfo( """Queue info ... queueName = %s, queueCurrentCapacity = %s, queueMaxCapacity = %s,
       queueApplicationCount = %s, queueChildQueueCount = %s""".format(
         queueInfo.getQueueName,
         queueInfo.getCurrentCapacity,
@@ -130,7 +134,7 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
         queueInfo.getChildQueues.size))
   }
 
-  def verifyClusterResources(app: GetNewApplicationResponse) = { 
+  def verifyClusterResources(app: GetNewApplicationResponse) = {
     val maxMem = app.getMaximumResourceCapability().getMemory()
     logInfo("Max mem capabililty of a single resource in this cluster " + maxMem)
 
@@ -141,12 +145,12 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
     }
     val amMem = args.amMemory + YarnAllocationHandler.MEMORY_OVERHEAD
     if (amMem > maxMem) {
-      logError("AM size is to large to run on this cluster "  + amMem)
+      logError("AM size is to large to run on this cluster " + amMem)
       System.exit(1)
     }
 
     // We could add checks to make sure the entire cluster has enough resources but that involves
-    // getting all the node reports and computing ourselves 
+    // getting all the node reports and computing ourselves
   }
 
   def createApplicationSubmissionContext(appId: ApplicationId): ApplicationSubmissionContext = {
@@ -154,7 +158,7 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
     val appContext = Records.newRecord(classOf[ApplicationSubmissionContext])
     appContext.setApplicationId(appId)
     appContext.setApplicationName(args.appName)
-    return appContext
+    appContext
   }
 
   /** See if two file systems are the same or not. */
@@ -189,7 +193,8 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
     if (srcUri.getPort() != dstUri.getPort()) {
       return false
     }
-    return true
+
+    true
   }
 
   /** Copy the file into HDFS if needed. */
@@ -207,7 +212,7 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
       FileUtil.copy(remoteFs, originalPath, fs, newPath, false, conf)
       fs.setReplication(newPath, replication)
       if (setPerms) fs.setPermission(newPath, new FsPermission(APP_FILE_PERMISSION))
-    } 
+    }
     // Resolve any symlinks in the URI path so using a "current" symlink to point to a specific
     // version shows the specific version in the distributed cache configuration
     val qualPath = fs.makeQualified(newPath)
@@ -230,7 +235,7 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
       }
     }
     val dst = new Path(fs.getHomeDirectory(), appStagingDir)
-    val replication = System.getProperty("spark.yarn.submit.file.replication", "3").toShort
+    val replication = sparkConf.getInt("spark.yarn.submit.file.replication", 3).toShort
 
     if (UserGroupInformation.isSecurityEnabled()) {
       val dstFs = dst.getFileSystem(conf)
@@ -241,7 +246,7 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
 
     val statCache: Map[URI, FileStatus] = HashMap[URI, FileStatus]()
 
-    Map(Client.SPARK_JAR -> System.getenv("SPARK_JAR"), Client.APP_JAR -> args.userJar, 
+    Map(Client.SPARK_JAR -> System.getenv("SPARK_JAR"), Client.APP_JAR -> args.userJar,
       Client.LOG4J_PROP -> System.getenv("SPARK_LOG4J_CONF"))
     .foreach { case(destName, _localPath) =>
       val localPath: String = if (_localPath != null) _localPath.trim() else ""
@@ -253,7 +258,7 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
         }
         val setPermissions = if (destName.equals(Client.APP_JAR)) true else false
         val destPath = copyRemoteFile(dst, new Path(localURI), replication, setPermissions)
-        distCacheMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.FILE, 
+        distCacheMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.FILE,
           destName, statCache)
       }
     }
@@ -265,7 +270,7 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
         val localPath = new Path(localURI)
         val linkname = Option(localURI.getFragment()).getOrElse(localPath.getName())
         val destPath = copyRemoteFile(dst, localPath, replication)
-        distCacheMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.FILE, 
+        distCacheMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.FILE,
           linkname, statCache, true)
       }
     }
@@ -277,7 +282,7 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
         val localPath = new Path(localURI)
         val linkname = Option(localURI.getFragment()).getOrElse(localPath.getName())
         val destPath = copyRemoteFile(dst, localPath, replication)
-        distCacheMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.FILE, 
+        distCacheMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.FILE,
           linkname, statCache)
       }
     }
@@ -289,24 +294,24 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
         val localPath = new Path(localURI)
         val linkname = Option(localURI.getFragment()).getOrElse(localPath.getName())
         val destPath = copyRemoteFile(dst, localPath, replication)
-        distCacheMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.ARCHIVE, 
+        distCacheMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.ARCHIVE,
           linkname, statCache)
       }
     }
 
     UserGroupInformation.getCurrentUser().addCredentials(credentials)
-    return localResources
+    localResources
   }
 
   def setupLaunchEnv(
-      localResources: HashMap[String, LocalResource], 
+      localResources: HashMap[String, LocalResource],
       stagingDir: String): HashMap[String, String] = {
     logInfo("Setting up the launch environment")
     val log4jConfLocalRes = localResources.getOrElse(Client.LOG4J_PROP, null)
 
     val env = new HashMap[String, String]()
 
-    Client.populateClasspath(yarnConf, log4jConfLocalRes != null, env)
+    Client.populateClasspath(yarnConf, sparkConf, log4jConfLocalRes != null, env)
     env("SPARK_YARN_MODE") = "true"
     env("SPARK_YARN_STAGING_DIR") = stagingDir
 
@@ -326,7 +331,7 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
     val prefix = " --args "
     val args = clientArgs.userArgs
     val retval = new StringBuilder()
-    for (arg <- args){
+    for (arg <- args) {
       retval.append(prefix).append(" '").append(arg).append("' ")
     }
     retval.toString
@@ -354,7 +359,7 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
     // Add Xmx for am memory
     JAVA_OPTS += "-Xmx" + amMemory + "m "
 
-    JAVA_OPTS += " -Djava.io.tmpdir=" + 
+    JAVA_OPTS += " -Djava.io.tmpdir=" +
       new Path(Environment.PWD.$(), YarnConfiguration.DEFAULT_CONTAINER_TEMP_DIR) + " "
 
     // Commenting it out for now - so that people can refer to the properties if required. Remove
@@ -387,11 +392,11 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
       javaCommand = Environment.JAVA_HOME.$() + "/bin/java"
     }
 
-    val commands = List[String](javaCommand + 
+    val commands = List[String](javaCommand +
       " -server " +
       JAVA_OPTS +
       " " + args.amClass +
-      " --class " + args.userClass + 
+      " --class " + args.userClass +
       " --jar " + args.userJar +
       userArgsToString(args) +
       " --worker-memory " + args.workerMemory +
@@ -421,9 +426,11 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
     super.submitApplication(appContext)
   }
 
-  def monitorApplication(appId: ApplicationId): Boolean = {  
+  def monitorApplication(appId: ApplicationId): Boolean = {
+    val interval = sparkConf.getLong("spark.yarn.report.interval", 1000)
+
     while (true) {
-      Thread.sleep(1000)
+      Thread.sleep(interval)
       val report = super.getApplicationReport(appId)
 
       logInfo("Application report from ASM: \n" +
@@ -443,7 +450,7 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
 
       val state = report.getYarnApplicationState()
       val dsStatus = report.getFinalApplicationStatus()
-      if (state == YarnApplicationState.FINISHED || 
+      if (state == YarnApplicationState.FINISHED ||
         state == YarnApplicationState.FAILED ||
         state == YarnApplicationState.KILLED) {
         return true
@@ -463,9 +470,10 @@ object Client {
     // Note that anything with SPARK prefix gets propagated to all (remote) processes
     System.setProperty("SPARK_YARN_MODE", "true")
 
-    val args = new ClientArguments(argStrings)
+    val sparkConf = new SparkConf
+    val args = new ClientArguments(argStrings, sparkConf)
 
-    new Client(args).run
+    new Client(args, sparkConf).run
   }
 
   // Based on code from org.apache.hadoop.mapreduce.v2.util.MRApps
@@ -475,29 +483,28 @@ object Client {
     }
   }
 
-  def populateClasspath(conf: Configuration, addLog4j: Boolean, env: HashMap[String, String]) {
+  def populateClasspath(conf: Configuration, sparkConf: SparkConf, addLog4j: Boolean, env: HashMap[String, String]) {
     Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$())
     // If log4j present, ensure ours overrides all others
     if (addLog4j) {
-      Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() + 
+      Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() +
         Path.SEPARATOR + LOG4J_PROP)
     }
     // Normally the users app.jar is last in case conflicts with spark jars
-    val userClasspathFirst = System.getProperty("spark.yarn.user.classpath.first", "false")
-      .toBoolean
+    val userClasspathFirst = sparkConf.get("spark.yarn.user.classpath.first", "false").toBoolean
     if (userClasspathFirst) {
-      Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() + 
+      Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() +
         Path.SEPARATOR + APP_JAR)
     }
-    Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() + 
+    Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() +
       Path.SEPARATOR + SPARK_JAR)
     Client.populateHadoopClasspath(conf, env)
 
     if (!userClasspathFirst) {
-      Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() + 
+      Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() +
         Path.SEPARATOR + APP_JAR)
     }
-    Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() + 
+    Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() +
       Path.SEPARATOR + "*")
   }
 }
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala
similarity index 75%
rename from yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala
rename to yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala
index 421a83c87afdf..138c27910b0b0 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala
+++ b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala
@@ -27,53 +27,59 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.hadoop.yarn.ipc.YarnRPC
 import org.apache.hadoop.yarn.util.{ConverterUtils, Records}
 import akka.actor._
-import akka.remote.{RemoteClientShutdown, RemoteClientDisconnected, RemoteClientLifeCycleEvent}
-import akka.remote.RemoteClientShutdown
+import akka.remote._
 import akka.actor.Terminated
-import akka.remote.RemoteClientDisconnected
-import org.apache.spark.{SparkContext, Logging}
+import org.apache.spark.{SparkConf, SparkContext, Logging}
 import org.apache.spark.util.{Utils, AkkaUtils}
 import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
 import org.apache.spark.scheduler.SplitInfo
 
-class WorkerLauncher(args: ApplicationMasterArguments, conf: Configuration) extends Logging {
+class WorkerLauncher(args: ApplicationMasterArguments, conf: Configuration, sparkConf: SparkConf)
+  extends Logging {
 
-  def this(args: ApplicationMasterArguments) = this(args, new Configuration())
+  def this(args: ApplicationMasterArguments, sparkConf: SparkConf) = this(args, new Configuration(), sparkConf)
+
+  def this(args: ApplicationMasterArguments) = this(args, new SparkConf())
 
   private val rpc: YarnRPC = YarnRPC.create(conf)
-  private var resourceManager: AMRMProtocol = null
-  private var appAttemptId: ApplicationAttemptId = null
-  private var reporterThread: Thread = null
+  private var resourceManager: AMRMProtocol = _
+  private var appAttemptId: ApplicationAttemptId = _
+  private var reporterThread: Thread = _
   private val yarnConf: YarnConfiguration = new YarnConfiguration(conf)
 
-  private var yarnAllocator: YarnAllocationHandler = null
+  private var yarnAllocator: YarnAllocationHandler = _
   private var driverClosed:Boolean = false
 
-  val actorSystem : ActorSystem = AkkaUtils.createActorSystem("sparkYarnAM", Utils.localHostName, 0)._1
-  var actor: ActorRef = null
+  val actorSystem : ActorSystem = AkkaUtils.createActorSystem("sparkYarnAM", Utils.localHostName, 0,
+    conf = sparkConf)._1
+  var actor: ActorRef = _
 
   // This actor just working as a monitor to watch on Driver Actor.
   class MonitorActor(driverUrl: String) extends Actor {
 
-    var driver: ActorRef = null
+    var driver: ActorSelection = _
 
     override def preStart() {
       logInfo("Listen to driver: " + driverUrl)
-      driver = context.actorFor(driverUrl)
-      driver ! "hello"
-      context.system.eventStream.subscribe(self, classOf[RemoteClientLifeCycleEvent])
-      context.watch(driver) // Doesn't work with remote actors, but useful for testing
+      driver = context.actorSelection(driverUrl)
+      // Send a hello message thus the connection is actually established, thus we can monitor Lifecycle Events.
+      driver ! "Hello"
+      context.system.eventStream.subscribe(self, classOf[RemotingLifecycleEvent])
     }
 
     override def receive = {
-      case Terminated(_) | RemoteClientDisconnected(_, _) | RemoteClientShutdown(_, _) =>
-        logInfo("Driver terminated or disconnected! Shutting down.")
+      case x: DisassociatedEvent =>
+        logInfo(s"Driver terminated or disconnected! Shutting down. $x")
         driverClosed = true
     }
   }
 
   def run() {
 
+    // Setup the directories so things go to yarn approved directories rather
+    // then user specified and /tmp.
+    System.setProperty("spark.local.dir", getLocalDirs())
+
     appAttemptId = getApplicationAttemptId()
     resourceManager = registerWithResourceManager()
     val appMasterResponse: RegisterApplicationMasterResponse = registerApplicationMaster()
@@ -101,10 +107,12 @@ class WorkerLauncher(args: ApplicationMasterArguments, conf: Configuration) exte
     // ensure that progress is sent before YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS elapse.
 
     val timeoutInterval = yarnConf.getInt(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, 120000)
-    // must be <= timeoutInterval/ 2.
-    // On other hand, also ensure that we are reasonably responsive without causing too many requests to RM.
-    // so atleast 1 minute or timeoutInterval / 10 - whichever is higher.
-    val interval = math.min(timeoutInterval / 2, math.max(timeoutInterval/ 10, 60000L))
+    // we want to be reasonably responsive without causing too many requests to RM.
+    val schedulerInterval =
+      System.getProperty("spark.yarn.scheduler.heartbeat.interval-ms", "5000").toLong
+    // must be <= timeoutInterval / 2.
+    val interval = math.min(timeoutInterval / 2, schedulerInterval)
+
     reporterThread = launchReporterThread(interval)
 
     // Wait for the reporter thread to Finish.
@@ -117,13 +125,27 @@ class WorkerLauncher(args: ApplicationMasterArguments, conf: Configuration) exte
     System.exit(0)
   }
 
+  /** Get the Yarn approved local directories. */
+  private def getLocalDirs(): String = {
+    // Hadoop 0.23 and 2.x have different Environment variable names for the
+    // local dirs, so lets check both. We assume one of the 2 is set.
+    // LOCAL_DIRS => 2.X, YARN_LOCAL_DIRS => 0.23.X
+    val localDirs = Option(System.getenv("YARN_LOCAL_DIRS"))
+      .orElse(Option(System.getenv("LOCAL_DIRS")))
+
+    localDirs match {
+      case None => throw new Exception("Yarn Local dirs can't be empty")
+      case Some(l) => l
+    }
+  }
+
   private def getApplicationAttemptId(): ApplicationAttemptId = {
     val envs = System.getenv()
     val containerIdString = envs.get(ApplicationConstants.AM_CONTAINER_ID_ENV)
     val containerId = ConverterUtils.toContainerId(containerIdString)
     val appAttemptId = containerId.getApplicationAttemptId()
     logInfo("ApplicationAttemptId: " + appAttemptId)
-    return appAttemptId
+    appAttemptId
   }
 
   private def registerWithResourceManager(): AMRMProtocol = {
@@ -131,7 +153,7 @@ class WorkerLauncher(args: ApplicationMasterArguments, conf: Configuration) exte
       YarnConfiguration.RM_SCHEDULER_ADDRESS,
       YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS))
     logInfo("Connecting to ResourceManager at " + rmAddress)
-    return rpc.getProxy(classOf[AMRMProtocol], rmAddress, conf).asInstanceOf[AMRMProtocol]
+    rpc.getProxy(classOf[AMRMProtocol], rmAddress, conf).asInstanceOf[AMRMProtocol]
   }
 
   private def registerApplicationMaster(): RegisterApplicationMasterResponse = {
@@ -145,7 +167,7 @@ class WorkerLauncher(args: ApplicationMasterArguments, conf: Configuration) exte
     appMasterRequest.setRpcPort(0)
     // What do we provide here ? Might make sense to expose something sensible later ?
     appMasterRequest.setTrackingUrl("")
-    return resourceManager.registerApplicationMaster(appMasterRequest)
+    resourceManager.registerApplicationMaster(appMasterRequest)
   }
 
   private def waitForSparkMaster() {
@@ -165,10 +187,10 @@ class WorkerLauncher(args: ApplicationMasterArguments, conf: Configuration) exte
         Thread.sleep(100)
       }
     }
-    System.setProperty("spark.driver.host", driverHost)
-    System.setProperty("spark.driver.port", driverPort.toString)
+    sparkConf.set("spark.driver.host",  driverHost)
+    sparkConf.set("spark.driver.port",  driverPort.toString)
 
-    val driverUrl = "akka://spark@%s:%s/user/%s".format(
+    val driverUrl = "akka.tcp://spark@%s:%s/user/%s".format(
       driverHost, driverPort.toString, CoarseGrainedSchedulerBackend.ACTOR_NAME)
 
     actor = actorSystem.actorOf(Props(new MonitorActor(driverUrl)), name = "YarnAM")
@@ -178,15 +200,17 @@ class WorkerLauncher(args: ApplicationMasterArguments, conf: Configuration) exte
   private def allocateWorkers() {
 
     // Fixme: should get preferredNodeLocationData from SparkContext, just fake a empty one for now.
-    val preferredNodeLocationData: scala.collection.Map[String, scala.collection.Set[SplitInfo]] = scala.collection.immutable.Map()
+    val preferredNodeLocationData: scala.collection.Map[String, scala.collection.Set[SplitInfo]] =
+      scala.collection.immutable.Map()
 
-    yarnAllocator = YarnAllocationHandler.newAllocator(yarnConf, resourceManager, appAttemptId, args, preferredNodeLocationData)
+    yarnAllocator = YarnAllocationHandler.newAllocator(yarnConf, resourceManager, appAttemptId,
+      args, preferredNodeLocationData, sparkConf)
 
     logInfo("Allocating " + args.numWorkers + " workers.")
     // Wait until all containers have finished
     // TODO: This is a bit ugly. Can we make it nicer?
     // TODO: Handle container failure
-    while(yarnAllocator.getNumWorkersRunning < args.numWorkers) {
+    while ((yarnAllocator.getNumWorkersRunning < args.numWorkers) && (!driverClosed)) {
       yarnAllocator.allocateContainers(math.max(args.numWorkers - yarnAllocator.getNumWorkersRunning, 0))
       Thread.sleep(100)
     }
@@ -216,7 +240,7 @@ class WorkerLauncher(args: ApplicationMasterArguments, conf: Configuration) exte
     t.setDaemon(true)
     t.start()
     logInfo("Started progress reporter thread - sleep time : " + sleepTime)
-    return t
+    t
   }
 
   private def sendProgress() {
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerRunnable.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/WorkerRunnable.scala
similarity index 96%
rename from yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerRunnable.scala
rename to yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/WorkerRunnable.scala
index 6a90cc51cfbaf..d32cdcc879f7e 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerRunnable.scala
+++ b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/WorkerRunnable.scala
@@ -37,12 +37,13 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.hadoop.yarn.ipc.YarnRPC
 import org.apache.hadoop.yarn.util.{Apps, ConverterUtils, Records, ProtoUtils}
 
-import org.apache.spark.Logging
+import org.apache.spark.{SparkConf, Logging}
 
 
 class WorkerRunnable(
     container: Container,
     conf: Configuration,
+    sparkConf: SparkConf,
     masterAddress: String,
     slaveId: String,
     hostname: String,
@@ -51,7 +52,7 @@ class WorkerRunnable(
   extends Runnable with Logging {
 
   var rpc: YarnRPC = YarnRPC.create(conf)
-  var cm: ContainerManager = null
+  var cm: ContainerManager = _
   val yarnConf: YarnConfiguration = new YarnConfiguration(conf)
 
   def run = {
@@ -194,19 +195,19 @@ class WorkerRunnable(
     }
 
     logInfo("Prepared Local resources " + localResources)
-    return localResources
+    localResources
   }
 
   def prepareEnvironment: HashMap[String, String] = {
     val env = new HashMap[String, String]()
 
-    Client.populateClasspath(yarnConf, System.getenv("SPARK_YARN_LOG4J_PATH") != null, env)
+    Client.populateClasspath(yarnConf, sparkConf, System.getenv("SPARK_YARN_LOG4J_PATH") != null, env)
 
     // Allow users to specify some environment variables
     Apps.setEnvFromInputString(env, System.getenv("SPARK_YARN_USER_ENV"))
 
     System.getenv().filterKeys(_.startsWith("SPARK")).foreach { case (k,v) => env(k) = v }
-    return env
+    env
   }
 
   def connectToCM: ContainerManager = {
@@ -225,8 +226,7 @@ class WorkerRunnable(
     val proxy = user
         .doAs(new PrivilegedExceptionAction[ContainerManager] {
           def run: ContainerManager = {
-            return rpc.getProxy(classOf[ContainerManager],
-                cmAddress, conf).asInstanceOf[ContainerManager]
+            rpc.getProxy(classOf[ContainerManager], cmAddress, conf).asInstanceOf[ContainerManager]
           }
         })
     proxy
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
similarity index 95%
rename from yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
rename to yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
index f15f3c7c1195c..e91257be8ed00 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
+++ b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
@@ -26,9 +26,9 @@ import scala.collection
 import scala.collection.JavaConversions._
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
 
-import org.apache.spark.Logging
-import org.apache.spark.scheduler.SplitInfo
-import org.apache.spark.scheduler.cluster.{ClusterScheduler, CoarseGrainedSchedulerBackend}
+import org.apache.spark.{Logging, SparkConf}
+import org.apache.spark.scheduler.{SplitInfo,TaskSchedulerImpl}
+import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
 import org.apache.spark.util.Utils
 
 import org.apache.hadoop.conf.Configuration
@@ -40,7 +40,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.{AllocateRequest, AllocateResp
 import org.apache.hadoop.yarn.util.{RackResolver, Records}
 
 
-object AllocationType extends Enumeration ("HOST", "RACK", "ANY") {
+object AllocationType extends Enumeration {
   type AllocationType = Value
   val HOST, RACK, ANY = Value
 }
@@ -62,7 +62,8 @@ private[yarn] class YarnAllocationHandler(
     val workerMemory: Int,
     val workerCores: Int,
     val preferredHostToCount: Map[String, Int], 
-    val preferredRackToCount: Map[String, Int])
+    val preferredRackToCount: Map[String, Int],
+    val sparkConf: SparkConf)
   extends Logging {
   // These three are locked on allocatedHostToContainersMap. Complementary data structures
   // allocatedHostToContainersMap : containers which are running : host, Set<containerid>
@@ -214,9 +215,9 @@ private[yarn] class YarnAllocationHandler(
       // host if there are sufficiently large number of hosts/containers.
 
       val allocatedContainers = new ArrayBuffer[Container](_allocatedContainers.size)
-      allocatedContainers ++= ClusterScheduler.prioritizeContainers(dataLocalContainers)
-      allocatedContainers ++= ClusterScheduler.prioritizeContainers(rackLocalContainers)
-      allocatedContainers ++= ClusterScheduler.prioritizeContainers(offRackContainers)
+      allocatedContainers ++= TaskSchedulerImpl.prioritizeContainers(dataLocalContainers)
+      allocatedContainers ++= TaskSchedulerImpl.prioritizeContainers(rackLocalContainers)
+      allocatedContainers ++= TaskSchedulerImpl.prioritizeContainers(offRackContainers)
 
       // Run each of the allocated containers
       for (container <- allocatedContainers) {
@@ -238,8 +239,8 @@ private[yarn] class YarnAllocationHandler(
           // Deallocate + allocate can result in reusing id's wrongly - so use a different counter
           // (workerIdCounter)
           val workerId = workerIdCounter.incrementAndGet().toString
-          val driverUrl = "akka://spark@%s:%s/user/%s".format(
-            System.getProperty("spark.driver.host"), System.getProperty("spark.driver.port"),
+          val driverUrl = "akka.tcp://spark@%s:%s/user/%s".format(
+            sparkConf.get("spark.driver.host"), sparkConf.get("spark.driver.port"),
             CoarseGrainedSchedulerBackend.ACTOR_NAME)
 
           logInfo("launching container on " + containerId + " host " + workerHostname)
@@ -260,7 +261,7 @@ private[yarn] class YarnAllocationHandler(
           }
 
           new Thread(
-            new WorkerRunnable(container, conf, driverUrl, workerId,
+            new WorkerRunnable(container, conf, sparkConf, driverUrl, workerId,
               workerHostname, workerMemory, workerCores)
           ).start()
         }
@@ -552,7 +553,8 @@ object YarnAllocationHandler {
     conf: Configuration,
     resourceManager: AMRMProtocol,
     appAttemptId: ApplicationAttemptId,
-    args: ApplicationMasterArguments): YarnAllocationHandler = {
+    args: ApplicationMasterArguments,
+    sparkConf: SparkConf): YarnAllocationHandler = {
 
     new YarnAllocationHandler(
       conf,
@@ -562,7 +564,8 @@ object YarnAllocationHandler {
       args.workerMemory,
       args.workerCores,
       Map[String, Int](),
-      Map[String, Int]())
+      Map[String, Int](),
+      sparkConf)
   }
 
   def newAllocator(
@@ -571,7 +574,8 @@ object YarnAllocationHandler {
     appAttemptId: ApplicationAttemptId,
     args: ApplicationMasterArguments,
     map: collection.Map[String,
-    collection.Set[SplitInfo]]): YarnAllocationHandler = {
+    collection.Set[SplitInfo]],
+    sparkConf: SparkConf): YarnAllocationHandler = {
 
     val (hostToCount, rackToCount) = generateNodeToWeight(conf, map)
     new YarnAllocationHandler(
@@ -582,7 +586,8 @@ object YarnAllocationHandler {
       args.workerMemory,
       args.workerCores,
       hostToCount,
-      rackToCount)
+      rackToCount,
+      sparkConf)
   }
 
   def newAllocator(
@@ -592,7 +597,8 @@ object YarnAllocationHandler {
     maxWorkers: Int,
     workerMemory: Int,
     workerCores: Int,
-    map: collection.Map[String, collection.Set[SplitInfo]]): YarnAllocationHandler = {
+    map: collection.Map[String, collection.Set[SplitInfo]],
+    sparkConf: SparkConf): YarnAllocationHandler = {
 
     val (hostToCount, rackToCount) = generateNodeToWeight(conf, map)
 
@@ -604,7 +610,8 @@ object YarnAllocationHandler {
       workerMemory,
       workerCores,
       hostToCount,
-      rackToCount)
+      rackToCount,
+      sparkConf)
   }
 
   // A simple method to copy the split info map.
diff --git a/new-yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala
similarity index 100%
rename from new-yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala
rename to yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala
diff --git a/new-yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
similarity index 96%
rename from new-yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
rename to yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
index 9efb28a942672..1419f215c78e5 100644
--- a/new-yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
@@ -19,13 +19,14 @@ package org.apache.spark.deploy.yarn
 
 import scala.collection.mutable.{ArrayBuffer, HashMap}
 
+import org.apache.spark.SparkConf
 import org.apache.spark.scheduler.{InputFormatInfo, SplitInfo}
 import org.apache.spark.util.IntParam
 import org.apache.spark.util.MemoryParam
 
 
 // TODO: Add code and support for ensuring that yarn resource 'tasks' are location aware !
-class ClientArguments(val args: Array[String]) {
+class ClientArguments(val args: Array[String], val sparkConf: SparkConf) {
   var addJars: String = null
   var files: String = null
   var archives: String = null
@@ -35,7 +36,7 @@ class ClientArguments(val args: Array[String]) {
   var workerMemory = 1024 // MB
   var workerCores = 1
   var numWorkers = 2
-  var amQueue = System.getProperty("QUEUE", "default")
+  var amQueue = sparkConf.get("QUEUE", "default")
   var amMemory: Int = 512 // MB
   var amClass: String = "org.apache.spark.deploy.yarn.ApplicationMaster"
   var appName: String = "Spark"
@@ -92,6 +93,7 @@ class ClientArguments(val args: Array[String]) {
 
         case ("--name") :: value :: tail =>
           appName = value
+          args = tail
 
         case ("--addJars") :: value :: tail =>
           addJars = value
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala
similarity index 98%
rename from yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala
rename to yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala
index 5f159b073f537..535abbfb7f638 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala
@@ -143,7 +143,7 @@ class ClientDistributedCacheManager() extends Logging {
     if (isPublic(conf, uri, statCache)) {
       return LocalResourceVisibility.PUBLIC 
     } 
-    return LocalResourceVisibility.PRIVATE
+    LocalResourceVisibility.PRIVATE
   }
 
   /**
@@ -161,7 +161,7 @@ class ClientDistributedCacheManager() extends Logging {
     if (!checkPermissionOfOther(fs, current, FsAction.READ, statCache)) {
       return false
     }
-    return ancestorsHaveExecutePermissions(fs, current.getParent(), statCache)
+    ancestorsHaveExecutePermissions(fs, current.getParent(), statCache)
   }
 
   /**
@@ -183,7 +183,7 @@ class ClientDistributedCacheManager() extends Logging {
       }
       current = current.getParent()
     }
-    return true
+    true
   }
 
   /**
@@ -203,7 +203,7 @@ class ClientDistributedCacheManager() extends Logging {
     if (otherAction.implies(action)) {
       return true
     }
-    return false
+    false
   }
 
   /**
@@ -223,6 +223,6 @@ class ClientDistributedCacheManager() extends Logging {
         statCache.put(uri, newStat)
         newStat
     }
-    return stat
+    stat
   }
 }
diff --git a/new-yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
similarity index 100%
rename from new-yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
rename to yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
diff --git a/new-yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientClusterScheduler.scala b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientClusterScheduler.scala
similarity index 94%
rename from new-yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientClusterScheduler.scala
rename to yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientClusterScheduler.scala
index 63a0449e5a073..522e0a9ad7eeb 100644
--- a/new-yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientClusterScheduler.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientClusterScheduler.scala
@@ -20,13 +20,14 @@ package org.apache.spark.scheduler.cluster
 import org.apache.spark._
 import org.apache.hadoop.conf.Configuration
 import org.apache.spark.deploy.yarn.YarnAllocationHandler
+import org.apache.spark.scheduler.TaskSchedulerImpl
 import org.apache.spark.util.Utils
 
 /**
  *
  * This scheduler launch worker through Yarn - by call into Client to launch WorkerLauncher as AM.
  */
-private[spark] class YarnClientClusterScheduler(sc: SparkContext, conf: Configuration) extends ClusterScheduler(sc) {
+private[spark] class YarnClientClusterScheduler(sc: SparkContext, conf: Configuration) extends TaskSchedulerImpl(sc) {
 
   def this(sc: SparkContext) = this(sc, new Configuration())
 
diff --git a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
similarity index 68%
rename from yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
rename to yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
index b206780c7806e..22e55e0c60647 100644
--- a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
@@ -20,9 +20,12 @@ package org.apache.spark.scheduler.cluster
 import org.apache.hadoop.yarn.api.records.{ApplicationId, YarnApplicationState}
 import org.apache.spark.{SparkException, Logging, SparkContext}
 import org.apache.spark.deploy.yarn.{Client, ClientArguments}
+import org.apache.spark.scheduler.TaskSchedulerImpl
+
+import scala.collection.mutable.ArrayBuffer
 
 private[spark] class YarnClientSchedulerBackend(
-    scheduler: ClusterScheduler,
+    scheduler: TaskSchedulerImpl,
     sc: SparkContext)
   extends CoarseGrainedSchedulerBackend(scheduler, sc.env.actorSystem)
   with Logging {
@@ -30,44 +33,48 @@ private[spark] class YarnClientSchedulerBackend(
   var client: Client = null
   var appId: ApplicationId = null
 
+  private[spark] def addArg(optionName: String, optionalParam: String, arrayBuf: ArrayBuffer[String]) {
+    Option(System.getenv(optionalParam)) foreach {
+      optParam => {
+        arrayBuf += (optionName, optParam)
+      }
+    }
+  }
+
   override def start() {
     super.start()
 
-    val defalutWorkerCores = "2"
-    val defalutWorkerMemory = "512m"
-    val defaultWorkerNumber = "1"
-
     val userJar = System.getenv("SPARK_YARN_APP_JAR")
-    var workerCores = System.getenv("SPARK_WORKER_CORES")
-    var workerMemory = System.getenv("SPARK_WORKER_MEMORY")
-    var workerNumber = System.getenv("SPARK_WORKER_INSTANCES")
-
     if (userJar == null)
       throw new SparkException("env SPARK_YARN_APP_JAR is not set")
 
-    if (workerCores == null)
-      workerCores = defalutWorkerCores
-    if (workerMemory == null)
-      workerMemory = defalutWorkerMemory
-    if (workerNumber == null)
-      workerNumber = defaultWorkerNumber
-
-    val driverHost = System.getProperty("spark.driver.host")
-    val driverPort = System.getProperty("spark.driver.port")
+    val driverHost = conf.get("spark.driver.host")
+    val driverPort = conf.get("spark.driver.port")
     val hostport = driverHost + ":" + driverPort
 
-    val argsArray = Array[String](
+    val argsArrayBuf = new ArrayBuffer[String]()
+    argsArrayBuf += (
       "--class", "notused",
       "--jar", userJar,
       "--args", hostport,
-      "--worker-memory", workerMemory,
-      "--worker-cores", workerCores,
-      "--num-workers", workerNumber,
       "--master-class", "org.apache.spark.deploy.yarn.WorkerLauncher"
     )
 
-    val args = new ClientArguments(argsArray)
-    client = new Client(args)
+    // process any optional arguments, use the defaults already defined in ClientArguments 
+    // if things aren't specified
+    Map("--master-memory" -> "SPARK_MASTER_MEMORY",
+      "--num-workers" -> "SPARK_WORKER_INSTANCES",
+      "--worker-memory" -> "SPARK_WORKER_MEMORY",
+      "--worker-cores" -> "SPARK_WORKER_CORES",
+      "--queue" -> "SPARK_YARN_QUEUE",
+      "--name" -> "SPARK_YARN_APP_NAME",
+      "--files" -> "SPARK_YARN_DIST_FILES",
+      "--archives" -> "SPARK_YARN_DIST_ARCHIVES")
+    .foreach { case (optName, optParam) => addArg(optName, optParam, argsArrayBuf) }
+      
+    logDebug("ClientArguments called with: " + argsArrayBuf)
+    val args = new ClientArguments(argsArrayBuf.toArray, conf)
+    client = new Client(args, conf)
     appId = client.runApp()
     waitForApp()
   }
diff --git a/new-yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala
similarity index 95%
rename from new-yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala
rename to yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala
index 29b3f22e13697..a4638cc863611 100644
--- a/new-yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala
@@ -19,6 +19,7 @@ package org.apache.spark.scheduler.cluster
 
 import org.apache.spark._
 import org.apache.spark.deploy.yarn.{ApplicationMaster, YarnAllocationHandler}
+import org.apache.spark.scheduler.TaskSchedulerImpl
 import org.apache.spark.util.Utils
 import org.apache.hadoop.conf.Configuration
 
@@ -26,7 +27,7 @@ import org.apache.hadoop.conf.Configuration
  *
  * This is a simple extension to ClusterScheduler - to ensure that appropriate initialization of ApplicationMaster, etc is done
  */
-private[spark] class YarnClusterScheduler(sc: SparkContext, conf: Configuration) extends ClusterScheduler(sc) {
+private[spark] class YarnClusterScheduler(sc: SparkContext, conf: Configuration) extends TaskSchedulerImpl(sc) {
 
   logInfo("Created YarnClusterScheduler")
 
diff --git a/new-yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManagerSuite.scala b/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManagerSuite.scala
similarity index 99%
rename from new-yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManagerSuite.scala
rename to yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManagerSuite.scala
index 2941356bc55f9..458df4fa3cd99 100644
--- a/new-yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManagerSuite.scala
+++ b/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManagerSuite.scala
@@ -42,7 +42,7 @@ class ClientDistributedCacheManagerSuite extends FunSuite with MockitoSugar {
   class MockClientDistributedCacheManager extends ClientDistributedCacheManager {
     override def getVisibility(conf: Configuration, uri: URI, statCache: Map[URI, FileStatus]): 
         LocalResourceVisibility = {
-      return LocalResourceVisibility.PRIVATE
+      LocalResourceVisibility.PRIVATE
     }
   }
   
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 8a3964c997869..ac379d945580a 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,36 +20,19 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>0.8.2-candidate-csd-1-SNAPSHOT</version>
+    <version>0.9.1-csd-1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
   <groupId>org.apache.spark</groupId>
-  <artifactId>spark-yarn_2.9.3</artifactId>
-  <packaging>jar</packaging>
-  <name>Spark Project YARN Support</name>
-  <url>http://spark.incubator.apache.org/</url>
-
+  <artifactId>yarn-parent_2.10</artifactId>
+  <packaging>pom</packaging>
+  <name>Spark Project YARN Parent POM</name>
+  
   <dependencies>
-    <dependency>
-      <groupId>${akka.group}</groupId>
-      <artifactId>akka-actor</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>${akka.group}</groupId>
-      <artifactId>akka-remote</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>${akka.group}</groupId>
-      <artifactId>akka-slf4j</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>${akka.group}</groupId>
-      <artifactId>akka-zeromq</artifactId>
-    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_2.9.3</artifactId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
     <dependency>
@@ -79,7 +62,7 @@
     </dependency>
     <dependency>
       <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_2.9.3</artifactId>
+      <artifactId>scalatest_${scala.binary.version}</artifactId>
       <scope>test</scope>
     </dependency>
     <dependency>
@@ -89,45 +72,52 @@
     </dependency>
   </dependencies>
 
+  <profiles>
+    <profile>
+      <id>yarn-alpha</id>
+      <modules>
+        <module>alpha</module>
+      </modules>
+    </profile>
+
+    <profile>
+      <id>yarn</id>
+      <modules>
+        <module>stable</module>
+      </modules>
+    </profile>
+  </profiles>
+
   <build>
-    <outputDirectory>target/scala-${scala.version}/classes</outputDirectory>
-    <testOutputDirectory>target/scala-${scala.version}/test-classes</testOutputDirectory>
     <plugins>
       <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-shade-plugin</artifactId>
-        <configuration>
-          <shadedArtifactAttached>false</shadedArtifactAttached>
-          <outputFile>${project.build.directory}/${project.artifactId}-${project.version}-shaded.jar</outputFile>
-          <artifactSet>
-            <includes>
-              <include>*:*</include>
-            </includes>
-          </artifactSet>
-          <filters>
-            <filter>
-              <artifact>*:*</artifact>
-              <excludes>
-                <exclude>META-INF/*.SF</exclude>
-                <exclude>META-INF/*.DSA</exclude>
-                <exclude>META-INF/*.RSA</exclude>
-              </excludes>
-            </filter>
-          </filters>
-        </configuration>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>build-helper-maven-plugin</artifactId>
         <executions>
           <execution>
-            <phase>package</phase>
+           <id>add-scala-sources</id>
+            <phase>generate-sources</phase>
             <goals>
-              <goal>shade</goal>
+              <goal>add-source</goal>
             </goals>
             <configuration>
-              <transformers>
-                <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
-                <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
-                  <resource>reference.conf</resource>
-                </transformer>
-              </transformers>
+              <sources>
+                <source>src/main/scala</source>
+                <source>../common/src/main/scala</source>
+              </sources>
+            </configuration>
+          </execution>
+          <execution>
+            <id>add-scala-test-sources</id>
+            <phase>generate-test-sources</phase>
+            <goals>
+              <goal>add-test-source</goal>
+            </goals>
+            <configuration>
+              <sources>
+                <source>src/test/scala</source>
+                <source>../common/src/test/scala</source>
+              </sources>
             </configuration>
           </execution>
         </executions>
@@ -166,12 +156,16 @@
         <artifactId>scalatest-maven-plugin</artifactId>
         <configuration>
           <environmentVariables>
-            <SPARK_HOME>${basedir}/..</SPARK_HOME>
+            <SPARK_HOME>${basedir}/../..</SPARK_HOME>
             <SPARK_TESTING>1</SPARK_TESTING>
             <SPARK_CLASSPATH>${spark.classpath}</SPARK_CLASSPATH>
           </environmentVariables>
         </configuration>
       </plugin>
     </plugins>
+
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
   </build>
+
 </project>
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala
deleted file mode 100644
index f76a5ddd39e90..0000000000000
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.deploy.yarn
-
-import org.apache.spark.util.IntParam
-import collection.mutable.ArrayBuffer
-
-class ApplicationMasterArguments(val args: Array[String]) {
-  var userJar: String = null
-  var userClass: String = null
-  var userArgs: Seq[String] = Seq[String]()
-  var workerMemory = 1024
-  var workerCores = 1
-  var numWorkers = 2
-
-  parseArgs(args.toList)
-  
-  private def parseArgs(inputArgs: List[String]): Unit = {
-    val userArgsBuffer = new ArrayBuffer[String]()
-
-    var args = inputArgs
-
-    while (! args.isEmpty) {
-
-      args match {
-        case ("--jar") :: value :: tail =>
-          userJar = value
-          args = tail
-
-        case ("--class") :: value :: tail =>
-          userClass = value
-          args = tail
-
-        case ("--args") :: value :: tail =>
-          userArgsBuffer += value
-          args = tail
-
-        case ("--num-workers") :: IntParam(value) :: tail =>
-          numWorkers = value
-          args = tail
-
-        case ("--worker-memory") :: IntParam(value) :: tail =>
-          workerMemory = value
-          args = tail
-
-        case ("--worker-cores") :: IntParam(value) :: tail =>
-          workerCores = value
-          args = tail
-
-        case Nil =>
-          if (userJar == null || userClass == null) {
-            printUsageAndExit(1)
-          }
-
-        case _ =>
-          printUsageAndExit(1, args)
-      }
-    }
-
-    userArgs = userArgsBuffer.readOnly
-  }
-  
-  def printUsageAndExit(exitCode: Int, unknownParam: Any = null) {
-    if (unknownParam != null) {
-      System.err.println("Unknown/unsupported param " + unknownParam)
-    }
-    System.err.println(
-      "Usage: org.apache.spark.deploy.yarn.ApplicationMaster [options] \n" +
-      "Options:\n" +
-      "  --jar JAR_PATH       Path to your application's JAR file (required)\n" +
-      "  --class CLASS_NAME   Name of your application's main class (required)\n" +
-      "  --args ARGS          Arguments to be passed to your application's main class.\n" +
-      "                       Mutliple invocations are possible, each will be passed in order.\n" +
-      "  --num-workers NUM    Number of workers to start (Default: 2)\n" +
-      "  --worker-cores NUM   Number of cores for the workers (Default: 1)\n" +
-      "  --worker-memory MEM  Memory per Worker (e.g. 1000M, 2G) (Default: 1G)\n")
-    System.exit(exitCode)
-  }
-}
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
deleted file mode 100644
index b9dbc3fb87a1f..0000000000000
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.deploy.yarn
-
-import org.apache.spark.util.MemoryParam
-import org.apache.spark.util.IntParam
-import collection.mutable.{ArrayBuffer, HashMap}
-import org.apache.spark.scheduler.{InputFormatInfo, SplitInfo}
-
-// TODO: Add code and support for ensuring that yarn resource 'asks' are location aware !
-class ClientArguments(val args: Array[String]) {
-  var addJars: String = null
-  var files: String = null
-  var archives: String = null
-  var userJar: String = null
-  var userClass: String = null
-  var userArgs: Seq[String] = Seq[String]()
-  var workerMemory = 1024
-  var workerCores = 1
-  var numWorkers = 2
-  var amQueue = System.getProperty("QUEUE", "default")
-  var amMemory: Int = 512
-  var amClass: String = "org.apache.spark.deploy.yarn.ApplicationMaster"
-  var appName: String = "Spark"
-  // TODO
-  var inputFormatInfo: List[InputFormatInfo] = null
-
-  parseArgs(args.toList)
-
-  private def parseArgs(inputArgs: List[String]): Unit = {
-    val userArgsBuffer: ArrayBuffer[String] = new ArrayBuffer[String]()
-    val inputFormatMap: HashMap[String, InputFormatInfo] = new HashMap[String, InputFormatInfo]()
-
-    var args = inputArgs
-
-    while (! args.isEmpty) {
-
-      args match {
-        case ("--jar") :: value :: tail =>
-          userJar = value
-          args = tail
-
-        case ("--class") :: value :: tail =>
-          userClass = value
-          args = tail
-
-        case ("--args") :: value :: tail =>
-          userArgsBuffer += value
-          args = tail
-
-        case ("--master-class") :: value :: tail =>
-          amClass = value
-          args = tail
-
-        case ("--master-memory") :: MemoryParam(value) :: tail =>
-          amMemory = value
-          args = tail
-
-        case ("--worker-memory") :: MemoryParam(value) :: tail =>
-          workerMemory = value
-          args = tail
-
-        case ("--num-workers") :: IntParam(value) :: tail =>
-          numWorkers = value
-          args = tail
-
-        case ("--worker-cores") :: IntParam(value) :: tail =>
-          workerCores = value
-          args = tail
-
-        case ("--queue") :: value :: tail =>
-          amQueue = value
-          args = tail
-
-        case ("--name") :: value :: tail =>
-          appName = value
-
-        case ("--addJars") :: value :: tail =>
-          addJars = value
-          args = tail
-
-        case ("--files") :: value :: tail =>
-          files = value
-          args = tail
-
-        case ("--archives") :: value :: tail =>
-          archives = value
-          args = tail
-
-        case Nil =>
-          if (userJar == null || userClass == null) {
-            printUsageAndExit(1)
-          }
-
-        case _ =>
-          printUsageAndExit(1, args)
-      }
-    }
-
-    userArgs = userArgsBuffer.readOnly
-    inputFormatInfo = inputFormatMap.values.toList
-  }
-
-
-  def printUsageAndExit(exitCode: Int, unknownParam: Any = null) {
-    if (unknownParam != null) {
-      System.err.println("Unknown/unsupported param " + unknownParam)
-    }
-    System.err.println(
-      "Usage: org.apache.spark.deploy.yarn.Client [options] \n" +
-      "Options:\n" +
-      "  --jar JAR_PATH             Path to your application's JAR file (required)\n" +
-      "  --class CLASS_NAME         Name of your application's main class (required)\n" +
-      "  --args ARGS                Arguments to be passed to your application's main class.\n" +
-      "                             Mutliple invocations are possible, each will be passed in order.\n" +
-      "  --num-workers NUM          Number of workers to start (Default: 2)\n" +
-      "  --worker-cores NUM         Number of cores for the workers (Default: 1). This is unsused right now.\n" +
-      "  --master-class CLASS_NAME  Class Name for Master (Default: spark.deploy.yarn.ApplicationMaster)\n" +
-      "  --master-memory MEM        Memory for Master (e.g. 1000M, 2G) (Default: 512 Mb)\n" +
-      "  --worker-memory MEM        Memory per Worker (e.g. 1000M, 2G) (Default: 1G)\n" +
-      "  --name NAME                The name of your application (Default: Spark)\n" +
-      "  --queue QUEUE              The hadoop queue to use for allocation requests (Default: 'default')\n" +
-      "  --addJars jars             Comma separated list of local jars that want SparkContext.addJar to work with.\n" +
-      "  --files files              Comma separated list of files to be distributed with the job.\n" +
-      "  --archives archives        Comma separated list of archives to be distributed with the job."
-      )
-    System.exit(exitCode)
-  }
-
-}
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
deleted file mode 100644
index 2ba2366ead171..0000000000000
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.deploy.yarn
-
-import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.hadoop.mapred.JobConf
-import org.apache.hadoop.security.UserGroupInformation
-import org.apache.hadoop.yarn.conf.YarnConfiguration
-import org.apache.hadoop.conf.Configuration
-
-/**
- * Contains util methods to interact with Hadoop from spark.
- */
-class YarnSparkHadoopUtil extends SparkHadoopUtil {
-
-  // Note that all params which start with SPARK are propagated all the way through, so if in yarn mode, this MUST be set to true.
-  override def isYarnMode(): Boolean = { true }
-
-  // Return an appropriate (subclass) of Configuration. Creating config can initializes some hadoop subsystems
-  // Always create a new config, dont reuse yarnConf.
-  override def newConfiguration(): Configuration = new YarnConfiguration(new Configuration())
-
-  // add any user credentials to the job conf which are necessary for running on a secure Hadoop cluster
-  override def addCredentials(conf: JobConf) {
-    val jobCreds = conf.getCredentials()
-    jobCreds.mergeAll(UserGroupInformation.getCurrentUser().getCredentials())
-  }
-}
diff --git a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientClusterScheduler.scala b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientClusterScheduler.scala
deleted file mode 100644
index 63a0449e5a073..0000000000000
--- a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientClusterScheduler.scala
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.scheduler.cluster
-
-import org.apache.spark._
-import org.apache.hadoop.conf.Configuration
-import org.apache.spark.deploy.yarn.YarnAllocationHandler
-import org.apache.spark.util.Utils
-
-/**
- *
- * This scheduler launch worker through Yarn - by call into Client to launch WorkerLauncher as AM.
- */
-private[spark] class YarnClientClusterScheduler(sc: SparkContext, conf: Configuration) extends ClusterScheduler(sc) {
-
-  def this(sc: SparkContext) = this(sc, new Configuration())
-
-  // By default, rack is unknown
-  override def getRackForHost(hostPort: String): Option[String] = {
-    val host = Utils.parseHostPort(hostPort)._1
-    val retval = YarnAllocationHandler.lookupRack(conf, host)
-    if (retval != null) Some(retval) else None
-  }
-
-  override def postStartHook() {
-
-    // The yarn application is running, but the worker might not yet ready
-    // Wait for a few seconds for the slaves to bootstrap and register with master - best case attempt
-    Thread.sleep(2000L)
-    logInfo("YarnClientClusterScheduler.postStartHook done")
-  }
-}
diff --git a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala
deleted file mode 100644
index 29b3f22e13697..0000000000000
--- a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.scheduler.cluster
-
-import org.apache.spark._
-import org.apache.spark.deploy.yarn.{ApplicationMaster, YarnAllocationHandler}
-import org.apache.spark.util.Utils
-import org.apache.hadoop.conf.Configuration
-
-/**
- *
- * This is a simple extension to ClusterScheduler - to ensure that appropriate initialization of ApplicationMaster, etc is done
- */
-private[spark] class YarnClusterScheduler(sc: SparkContext, conf: Configuration) extends ClusterScheduler(sc) {
-
-  logInfo("Created YarnClusterScheduler")
-
-  def this(sc: SparkContext) = this(sc, new Configuration())
-
-  // Nothing else for now ... initialize application master : which needs sparkContext to determine how to allocate
-  // Note that only the first creation of SparkContext influences (and ideally, there must be only one SparkContext, right ?)
-  // Subsequent creations are ignored - since nodes are already allocated by then.
-
-
-  // By default, rack is unknown
-  override def getRackForHost(hostPort: String): Option[String] = {
-    val host = Utils.parseHostPort(hostPort)._1
-    val retval = YarnAllocationHandler.lookupRack(conf, host)
-    if (retval != null) Some(retval) else None
-  }
-
-  override def postStartHook() {
-    val sparkContextInitialized = ApplicationMaster.sparkContextInitialized(sc)
-    if (sparkContextInitialized){
-      // Wait for a few seconds for the slaves to bootstrap and register with master - best case attempt
-      Thread.sleep(3000L)
-    }
-    logInfo("YarnClusterScheduler.postStartHook done")
-  }
-}
diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManagerSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManagerSuite.scala
deleted file mode 100644
index 2941356bc55f9..0000000000000
--- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManagerSuite.scala
+++ /dev/null
@@ -1,220 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.deploy.yarn
-
-import java.net.URI
-
-import org.scalatest.FunSuite
-import org.scalatest.mock.MockitoSugar
-import org.mockito.Mockito.when
-
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.FileStatus
-import org.apache.hadoop.fs.FileSystem
-import org.apache.hadoop.fs.Path
-import org.apache.hadoop.fs.permission.FsAction
-import org.apache.hadoop.yarn.api.records.LocalResource
-import org.apache.hadoop.yarn.api.records.LocalResourceVisibility
-import org.apache.hadoop.yarn.api.records.LocalResourceType
-import org.apache.hadoop.yarn.util.{Records, ConverterUtils}
-
-import scala.collection.mutable.HashMap
-import scala.collection.mutable.Map
-
-
-class ClientDistributedCacheManagerSuite extends FunSuite with MockitoSugar {
-
-  class MockClientDistributedCacheManager extends ClientDistributedCacheManager {
-    override def getVisibility(conf: Configuration, uri: URI, statCache: Map[URI, FileStatus]): 
-        LocalResourceVisibility = {
-      return LocalResourceVisibility.PRIVATE
-    }
-  }
-  
-  test("test getFileStatus empty") {
-    val distMgr = new ClientDistributedCacheManager()
-    val fs = mock[FileSystem]
-    val uri = new URI("/tmp/testing")
-    when(fs.getFileStatus(new Path(uri))).thenReturn(new FileStatus())
-    val statCache: Map[URI, FileStatus] = HashMap[URI, FileStatus]()
-    val stat = distMgr.getFileStatus(fs, uri, statCache)
-    assert(stat.getPath() === null)
-  }
-
-  test("test getFileStatus cached") {
-    val distMgr = new ClientDistributedCacheManager()
-    val fs = mock[FileSystem]
-    val uri = new URI("/tmp/testing")
-    val realFileStatus = new FileStatus(10, false, 1, 1024, 10, 10, null, "testOwner", 
-      null, new Path("/tmp/testing"))
-    when(fs.getFileStatus(new Path(uri))).thenReturn(new FileStatus())
-    val statCache: Map[URI, FileStatus] = HashMap[URI, FileStatus](uri -> realFileStatus)
-    val stat = distMgr.getFileStatus(fs, uri, statCache)
-    assert(stat.getPath().toString() === "/tmp/testing")
-  }
-
-  test("test addResource") {
-    val distMgr = new MockClientDistributedCacheManager()
-    val fs = mock[FileSystem]
-    val conf = new Configuration()
-    val destPath = new Path("file:///foo.invalid.com:8080/tmp/testing")
-    val localResources = HashMap[String, LocalResource]()
-    val statCache: Map[URI, FileStatus] = HashMap[URI, FileStatus]()
-    when(fs.getFileStatus(destPath)).thenReturn(new FileStatus())
-
-    distMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.FILE, "link", 
-      statCache, false)
-    val resource = localResources("link")
-    assert(resource.getVisibility() === LocalResourceVisibility.PRIVATE)
-    assert(ConverterUtils.getPathFromYarnURL(resource.getResource()) === destPath)
-    assert(resource.getTimestamp() === 0)
-    assert(resource.getSize() === 0)
-    assert(resource.getType() === LocalResourceType.FILE)
-
-    val env = new HashMap[String, String]()
-    distMgr.setDistFilesEnv(env)
-    assert(env("SPARK_YARN_CACHE_FILES") === "file:/foo.invalid.com:8080/tmp/testing#link")
-    assert(env("SPARK_YARN_CACHE_FILES_TIME_STAMPS") === "0")
-    assert(env("SPARK_YARN_CACHE_FILES_FILE_SIZES") === "0")
-    assert(env("SPARK_YARN_CACHE_FILES_VISIBILITIES") === LocalResourceVisibility.PRIVATE.name())
-
-    distMgr.setDistArchivesEnv(env)
-    assert(env.get("SPARK_YARN_CACHE_ARCHIVES") === None)
-    assert(env.get("SPARK_YARN_CACHE_ARCHIVES_TIME_STAMPS") === None)
-    assert(env.get("SPARK_YARN_CACHE_ARCHIVES_FILE_SIZES") === None)
-    assert(env.get("SPARK_YARN_CACHE_ARCHIVES_VISIBILITIES") === None)
-
-    //add another one and verify both there and order correct
-    val realFileStatus = new FileStatus(20, false, 1, 1024, 10, 30, null, "testOwner", 
-      null, new Path("/tmp/testing2"))
-    val destPath2 = new Path("file:///foo.invalid.com:8080/tmp/testing2")
-    when(fs.getFileStatus(destPath2)).thenReturn(realFileStatus)
-    distMgr.addResource(fs, conf, destPath2, localResources, LocalResourceType.FILE, "link2", 
-      statCache, false)
-    val resource2 = localResources("link2")
-    assert(resource2.getVisibility() === LocalResourceVisibility.PRIVATE)
-    assert(ConverterUtils.getPathFromYarnURL(resource2.getResource()) === destPath2)
-    assert(resource2.getTimestamp() === 10)
-    assert(resource2.getSize() === 20)
-    assert(resource2.getType() === LocalResourceType.FILE)
-
-    val env2 = new HashMap[String, String]()
-    distMgr.setDistFilesEnv(env2)
-    val timestamps = env2("SPARK_YARN_CACHE_FILES_TIME_STAMPS").split(',')
-    val files = env2("SPARK_YARN_CACHE_FILES").split(',') 
-    val sizes = env2("SPARK_YARN_CACHE_FILES_FILE_SIZES").split(',')
-    val visibilities = env2("SPARK_YARN_CACHE_FILES_VISIBILITIES") .split(',')
-    assert(files(0) === "file:/foo.invalid.com:8080/tmp/testing#link")
-    assert(timestamps(0)  === "0")
-    assert(sizes(0)  === "0")
-    assert(visibilities(0) === LocalResourceVisibility.PRIVATE.name())
-
-    assert(files(1) === "file:/foo.invalid.com:8080/tmp/testing2#link2")
-    assert(timestamps(1)  === "10")
-    assert(sizes(1)  === "20")
-    assert(visibilities(1) === LocalResourceVisibility.PRIVATE.name())
-  }
-
-  test("test addResource link null") {
-    val distMgr = new MockClientDistributedCacheManager()
-    val fs = mock[FileSystem]
-    val conf = new Configuration()
-    val destPath = new Path("file:///foo.invalid.com:8080/tmp/testing")
-    val localResources = HashMap[String, LocalResource]()
-    val statCache: Map[URI, FileStatus] = HashMap[URI, FileStatus]()
-    when(fs.getFileStatus(destPath)).thenReturn(new FileStatus())
-
-    intercept[Exception] {
-      distMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.FILE, null, 
-        statCache, false)
-    }
-    assert(localResources.get("link") === None)
-    assert(localResources.size === 0)
-  }
-
-  test("test addResource appmaster only") {
-    val distMgr = new MockClientDistributedCacheManager()
-    val fs = mock[FileSystem]
-    val conf = new Configuration()
-    val destPath = new Path("file:///foo.invalid.com:8080/tmp/testing")
-    val localResources = HashMap[String, LocalResource]()
-    val statCache: Map[URI, FileStatus] = HashMap[URI, FileStatus]()
-    val realFileStatus = new FileStatus(20, false, 1, 1024, 10, 30, null, "testOwner", 
-      null, new Path("/tmp/testing"))
-    when(fs.getFileStatus(destPath)).thenReturn(realFileStatus)
-
-    distMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.ARCHIVE, "link", 
-      statCache, true)
-    val resource = localResources("link")
-    assert(resource.getVisibility() === LocalResourceVisibility.PRIVATE)
-    assert(ConverterUtils.getPathFromYarnURL(resource.getResource()) === destPath)
-    assert(resource.getTimestamp() === 10)
-    assert(resource.getSize() === 20)
-    assert(resource.getType() === LocalResourceType.ARCHIVE)
-
-    val env = new HashMap[String, String]()
-    distMgr.setDistFilesEnv(env)
-    assert(env.get("SPARK_YARN_CACHE_FILES") === None)
-    assert(env.get("SPARK_YARN_CACHE_FILES_TIME_STAMPS") === None)
-    assert(env.get("SPARK_YARN_CACHE_FILES_FILE_SIZES") === None)
-    assert(env.get("SPARK_YARN_CACHE_FILES_VISIBILITIES") === None)
-
-    distMgr.setDistArchivesEnv(env)
-    assert(env.get("SPARK_YARN_CACHE_ARCHIVES") === None)
-    assert(env.get("SPARK_YARN_CACHE_ARCHIVES_TIME_STAMPS") === None)
-    assert(env.get("SPARK_YARN_CACHE_ARCHIVES_FILE_SIZES") === None)
-    assert(env.get("SPARK_YARN_CACHE_ARCHIVES_VISIBILITIES") === None)
-  }
-
-  test("test addResource archive") {
-    val distMgr = new MockClientDistributedCacheManager()
-    val fs = mock[FileSystem]
-    val conf = new Configuration()
-    val destPath = new Path("file:///foo.invalid.com:8080/tmp/testing")
-    val localResources = HashMap[String, LocalResource]()
-    val statCache: Map[URI, FileStatus] = HashMap[URI, FileStatus]()
-    val realFileStatus = new FileStatus(20, false, 1, 1024, 10, 30, null, "testOwner", 
-      null, new Path("/tmp/testing"))
-    when(fs.getFileStatus(destPath)).thenReturn(realFileStatus)
-
-    distMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.ARCHIVE, "link", 
-      statCache, false)
-    val resource = localResources("link")
-    assert(resource.getVisibility() === LocalResourceVisibility.PRIVATE)
-    assert(ConverterUtils.getPathFromYarnURL(resource.getResource()) === destPath)
-    assert(resource.getTimestamp() === 10)
-    assert(resource.getSize() === 20)
-    assert(resource.getType() === LocalResourceType.ARCHIVE)
-
-    val env = new HashMap[String, String]()
-
-    distMgr.setDistArchivesEnv(env)
-    assert(env("SPARK_YARN_CACHE_ARCHIVES") === "file:/foo.invalid.com:8080/tmp/testing#link")
-    assert(env("SPARK_YARN_CACHE_ARCHIVES_TIME_STAMPS") === "10")
-    assert(env("SPARK_YARN_CACHE_ARCHIVES_FILE_SIZES") === "20")
-    assert(env("SPARK_YARN_CACHE_ARCHIVES_VISIBILITIES") === LocalResourceVisibility.PRIVATE.name())
-
-    distMgr.setDistFilesEnv(env)
-    assert(env.get("SPARK_YARN_CACHE_FILES") === None)
-    assert(env.get("SPARK_YARN_CACHE_FILES_TIME_STAMPS") === None)
-    assert(env.get("SPARK_YARN_CACHE_FILES_FILE_SIZES") === None)
-    assert(env.get("SPARK_YARN_CACHE_FILES_VISIBILITIES") === None)
-  }
-
-
-}
diff --git a/yarn/stable/pom.xml b/yarn/stable/pom.xml
new file mode 100644
index 0000000000000..0b7d6de2b929a
--- /dev/null
+++ b/yarn/stable/pom.xml
@@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>yarn-parent_2.10</artifactId>
+    <version>0.9.1-SNAPSHOT</version>
+    <relativePath>../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.spark</groupId>
+  <artifactId>spark-yarn_2.10</artifactId>
+  <packaging>jar</packaging>
+  <name>Spark Project YARN Stable API</name>
+
+</project>
diff --git a/new-yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
similarity index 86%
rename from new-yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
rename to yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index eeeca3ea8a33e..4b777d5fa7a28 100644
--- a/new-yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -38,15 +38,18 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.hadoop.yarn.ipc.YarnRPC
 import org.apache.hadoop.yarn.util.{ConverterUtils, Records}
 
-import org.apache.spark.{SparkContext, Logging}
+import org.apache.spark.{SparkConf, SparkContext, Logging}
 import org.apache.spark.util.Utils
 
 
-class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) extends Logging {
+class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration,
+                        sparkConf: SparkConf) extends Logging {
+
+  def this(args: ApplicationMasterArguments, sparkConf: SparkConf) =
+    this(args, new Configuration(), sparkConf)
+
+  def this(args: ApplicationMasterArguments) = this(args, new SparkConf())
 
-  def this(args: ApplicationMasterArguments) = this(args, new Configuration())
-  
-  private var rpc: YarnRPC = YarnRPC.create(conf)
   private val yarnConf: YarnConfiguration = new YarnConfiguration(conf)
   private var appAttemptId: ApplicationAttemptId = _
   private var userThread: Thread = _
@@ -61,14 +64,18 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e
   private var amClient: AMRMClient[ContainerRequest] = _
 
   // Default to numWorkers * 2, with minimum of 3
-  private val maxNumWorkerFailures = System.getProperty("spark.yarn.max.worker.failures",
-    math.max(args.numWorkers * 2, 3).toString()).toInt
+  private val maxNumWorkerFailures = sparkConf.getInt("spark.yarn.max.worker.failures",
+    math.max(args.numWorkers * 2, 3))
 
   def run() {
     // Setup the directories so things go to YARN approved directories rather
     // than user specified and /tmp.
     System.setProperty("spark.local.dir", getLocalDirs())
 
+    // set the web ui port to be ephemeral for yarn so we don't conflict with
+    // other spark processes running on the same box
+    System.setProperty("spark.ui.port", "0")
+
     // Use priority 30 as it's higher then HDFS. It's same priority as MapReduce is using.
     ShutdownHookManager.get().addShutdownHook(new AppMasterShutdownHook(this), 30)
 
@@ -81,16 +88,14 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e
     // Workaround until hadoop moves to something which has
     // https://issues.apache.org/jira/browse/HADOOP-8406 - fixed in (2.0.2-alpha but no 0.23 line)
     // org.apache.hadoop.io.compress.CompressionCodecFactory.getCodecClasses(conf)
-    
+
     ApplicationMaster.register(this)
 
     // Start the user's JAR
     userThread = startUserClass()
-    
+
     // This a bit hacky, but we need to wait until the spark.driver.port property has
     // been set by the Thread executing the user class.
-    waitForSparkMaster()
-
     waitForSparkContextInitialized()
 
     // Do this after Spark master is up and SparkContext is created so that we can register UI Url.
@@ -99,7 +104,7 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e
     // Allocate all containers
     allocateWorkers()
 
-    // Wait for the user class to Finish     
+    // Wait for the user class to Finish
     userThread.join()
 
     System.exit(0)
@@ -111,15 +116,14 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e
     // local dirs, so lets check both. We assume one of the 2 is set.
     // LOCAL_DIRS => 2.X, YARN_LOCAL_DIRS => 0.23.X
     val localDirs = Option(System.getenv("YARN_LOCAL_DIRS"))
-      .getOrElse(Option(System.getenv("LOCAL_DIRS"))
-        .getOrElse(""))
-
-    if (localDirs.isEmpty()) {
-      throw new Exception("Yarn Local dirs can't be empty")
+      .orElse(Option(System.getenv("LOCAL_DIRS")))
+ 
+    localDirs match {
+      case None => throw new Exception("Yarn Local dirs can't be empty")
+      case Some(l) => l
     }
-    localDirs
-  }
-  
+  } 
+
   private def getApplicationAttemptId(): ApplicationAttemptId = {
     val envs = System.getenv()
     val containerIdString = envs.get(ApplicationConstants.Environment.CONTAINER_ID.name())
@@ -128,41 +132,17 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e
     logInfo("ApplicationAttemptId: " + appAttemptId)
     appAttemptId
   }
-  
+
   private def registerApplicationMaster(): RegisterApplicationMasterResponse = {
     logInfo("Registering the ApplicationMaster")
     amClient.registerApplicationMaster(Utils.localHostName(), 0, uiAddress)
   }
-  
-  private def waitForSparkMaster() {
-    logInfo("Waiting for Spark driver to be reachable.")
-    var driverUp = false
-    var tries = 0
-    val numTries = System.getProperty("spark.yarn.applicationMaster.waitTries", "10").toInt
-    while (!driverUp && tries < numTries) {
-      val driverHost = System.getProperty("spark.driver.host")
-      val driverPort = System.getProperty("spark.driver.port")
-      try {
-        val socket = new Socket(driverHost, driverPort.toInt)
-        socket.close()
-        logInfo("Driver now available: %s:%s".format(driverHost, driverPort))
-        driverUp = true
-      } catch {
-        case e: Exception => {
-          logWarning("Failed to connect to driver at %s:%s, retrying ...".
-            format(driverHost, driverPort))
-          Thread.sleep(100)
-          tries = tries + 1
-        }
-      }
-    }
-  }
 
-  private def startUserClass(): Thread  = {
+  private def startUserClass(): Thread = {
     logInfo("Starting the user JAR in a separate Thread")
     val mainMethod = Class.forName(
       args.userClass,
-      false /* initialize */,
+      false /* initialize */ ,
       Thread.currentThread.getContextClassLoader).getMethod("main", classOf[Array[String]])
     val t = new Thread {
       override def run() {
@@ -199,7 +179,7 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e
       ApplicationMaster.sparkContextRef.synchronized {
         var numTries = 0
         val waitTime = 10000L
-        val maxNumTries = System.getProperty("spark.yarn.ApplicationMaster.waitTries", "10").toInt
+        val maxNumTries = sparkConf.getInt("spark.yarn.applicationMaster.waitTries", 10)
         while (ApplicationMaster.sparkContextRef.get() == null && numTries < maxNumTries) {
           logInfo("Waiting for Spark context initialization ... " + numTries)
           numTries = numTries + 1
@@ -214,8 +194,9 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e
             yarnConf,
             amClient,
             appAttemptId,
-            args, 
-            sparkContext.preferredNodeLocationData)
+            args,
+            sparkContext.preferredNodeLocationData,
+            sparkContext.getConf)
         } else {
           logWarning("Unable to retrieve SparkContext inspite of waiting for %d, maxNumTries = %d".
             format(numTries * waitTime, maxNumTries))
@@ -223,7 +204,8 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e
             yarnConf,
             amClient,
             appAttemptId,
-            args)
+            args,
+            sparkContext.getConf)
         }
       }
     } finally {
@@ -265,7 +247,8 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e
 
       // we want to be reasonably responsive without causing too many requests to RM.
       val schedulerInterval =
-        System.getProperty("spark.yarn.scheduler.heartbeat.interval-ms", "5000").toLong
+        sparkConf.getLong("spark.yarn.scheduler.heartbeat.interval-ms", 5000)
+
 
       // must be <= timeoutInterval / 2.
       val interval = math.min(timeoutInterval / 2, schedulerInterval)
@@ -275,7 +258,7 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e
   }
 
   private def launchReporterThread(_sleepTime: Long): Thread = {
-    val sleepTime = if (_sleepTime <= 0 ) 0 else _sleepTime
+    val sleepTime = if (_sleepTime <= 0) 0 else _sleepTime
 
     val t = new Thread {
       override def run() {
@@ -314,11 +297,11 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e
     for (container <- containers) {
       logInfo("Launching shell command on a new container."
         + ", containerId=" + container.getId()
-        + ", containerNode=" + container.getNodeId().getHost() 
+        + ", containerNode=" + container.getNodeId().getHost()
         + ":" + container.getNodeId().getPort()
         + ", containerNodeURI=" + container.getNodeHttpAddress()
         + ", containerState" + container.getState()
-        + ", containerResourceMemory"  
+        + ", containerResourceMemory"
         + container.getResource().getMemory())
     }
   }
@@ -334,16 +317,16 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e
 
     logInfo("finishApplicationMaster with " + status)
     // Set tracking URL to empty since we don't have a history server.
-    amClient.unregisterApplicationMaster(status, "" /* appMessage */, "" /* appTrackingUrl */)
+    amClient.unregisterApplicationMaster(status, "" /* appMessage */ , "" /* appTrackingUrl */)
   }
 
   /**
-   * Clean up the staging directory. 
+   * Clean up the staging directory.
    */
-  private def cleanupStagingDir() { 
+  private def cleanupStagingDir() {
     var stagingDirPath: Path = null
     try {
-      val preserveFiles = System.getProperty("spark.yarn.preserve.staging.files", "false").toBoolean
+      val preserveFiles = sparkConf.get("spark.yarn.preserve.staging.files", "false").toBoolean
       if (!preserveFiles) {
         stagingDirPath = new Path(System.getenv("SPARK_YARN_STAGING_DIR"))
         if (stagingDirPath == null) {
@@ -359,7 +342,7 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e
     }
   }
 
-  // The shutdown hook that runs when a signal is received AND during normal close of the JVM. 
+  // The shutdown hook that runs when a signal is received AND during normal close of the JVM.
   class AppMasterShutdownHook(appMaster: ApplicationMaster) extends Runnable {
 
     def run() {
@@ -369,6 +352,7 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e
       if (appMaster.isLastAMRetry) appMaster.cleanupStagingDir()
     }
   }
+
 }
 
 object ApplicationMaster {
@@ -415,19 +399,20 @@ object ApplicationMaster {
     // Note that this will unfortunately not properly clean up the staging files because it gets
     // called too late, after the filesystem is already shutdown.
     if (modified) {
-      Runtime.getRuntime().addShutdownHook(new Thread with Logging { 
+      Runtime.getRuntime().addShutdownHook(new Thread with Logging {
         // This is not only logs, but also ensures that log system is initialized for this instance
         // when we are actually 'run'-ing.
         logInfo("Adding shutdown hook for context " + sc)
-        override def run() { 
-          logInfo("Invoking sc stop from shutdown hook") 
-          sc.stop() 
+
+        override def run() {
+          logInfo("Invoking sc stop from shutdown hook")
+          sc.stop()
           // Best case ...
           for (master <- applicationMasters) {
             master.finishApplicationMaster(FinalApplicationStatus.SUCCEEDED)
           }
-        } 
-      } )
+        }
+      })
     }
 
     // Wait for initialization to complete and atleast 'some' nodes can get allocated.
diff --git a/new-yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
similarity index 92%
rename from new-yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
rename to yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 94678815e806a..51d9adb9d4061 100644
--- a/new-yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -40,7 +40,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.hadoop.yarn.ipc.YarnRPC
 import org.apache.hadoop.yarn.util.{Apps, Records}
 
-import org.apache.spark.Logging 
+import org.apache.spark.{Logging, SparkConf}
 import org.apache.spark.util.Utils
 import org.apache.spark.deploy.SparkHadoopUtil
 
@@ -50,7 +50,13 @@ import org.apache.spark.deploy.SparkHadoopUtil
  * Client submits an application to the global ResourceManager to launch Spark's ApplicationMaster,
  * which will launch a Spark master process and negotiate resources throughout its duration.
  */
-class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl with Logging {
+class Client(args: ClientArguments, conf: Configuration, sparkConf: SparkConf)
+  extends YarnClientImpl with Logging {
+
+  def this(args: ClientArguments, sparkConf: SparkConf) =
+    this(args, new Configuration(), sparkConf)
+
+  def this(args: ClientArguments) = this(args, new SparkConf())
 
   var rpc: YarnRPC = YarnRPC.create(conf)
   val yarnConf: YarnConfiguration = new YarnConfiguration(conf)
@@ -63,8 +69,6 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
   // App files are world-wide readable and owner writable -> rw-r--r--
   val APP_FILE_PERMISSION: FsPermission = FsPermission.createImmutable(0644: Short)
 
-  def this(args: ClientArguments) = this(new Configuration(), args)
-
   def runApp(): ApplicationId = {
     validateArgs()
     // Initialize and start the client service.
@@ -95,6 +99,7 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
     appContext.setApplicationName(args.appName)
     appContext.setQueue(args.amQueue)
     appContext.setAMContainerSpec(amContainer)
+    appContext.setApplicationType("SPARK")
 
     // Memory for the ApplicationMaster.
     val memoryResource = Records.newRecord(classOf[Resource]).asInstanceOf[Resource]
@@ -118,7 +123,7 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
       (System.getenv("SPARK_JAR") == null) -> "Error: You must set SPARK_JAR environment variable!",
       (args.userJar == null) -> "Error: You must specify a user jar!",
       (args.userClass == null) -> "Error: You must specify a user class!",
-      (args.numWorkers <= 0) -> "Error: You must specify atleast 1 worker!",
+      (args.numWorkers <= 0) -> "Error: You must specify at least 1 worker!",
       (args.amMemory <= YarnAllocationHandler.MEMORY_OVERHEAD) -> ("Error: AM memory size must be" +
         "greater than: " + YarnAllocationHandler.MEMORY_OVERHEAD),
       (args.workerMemory <= YarnAllocationHandler.MEMORY_OVERHEAD) -> ("Error: Worker memory size" +
@@ -141,7 +146,7 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
       clusterMetrics.getNumNodeManagers)
 
     val queueInfo: QueueInfo = super.getQueueInfo(args.amQueue)
-    logInfo("""Queue info ... queueName: %s, queueCurrentCapacity: %s, queueMaxCapacity: %s,
+    logInfo( """Queue info ... queueName: %s, queueCurrentCapacity: %s, queueMaxCapacity: %s,
       queueApplicationCount = %s, queueChildQueueCount = %s""".format(
         queueInfo.getQueueName,
         queueInfo.getCurrentCapacity,
@@ -150,7 +155,7 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
         queueInfo.getChildQueues.size))
   }
 
-  def verifyClusterResources(app: GetNewApplicationResponse) = { 
+  def verifyClusterResources(app: GetNewApplicationResponse) = {
     val maxMem = app.getMaximumResourceCapability().getMemory()
     logInfo("Max mem capabililty of a single resource in this cluster " + maxMem)
 
@@ -203,7 +208,8 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
     if (srcUri.getPort() != dstUri.getPort()) {
       return false
     }
-    return true
+
+    true
   }
 
   /** Copy the file into HDFS if needed. */
@@ -221,7 +227,7 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
       FileUtil.copy(remoteFs, originalPath, fs, newPath, false, conf)
       fs.setReplication(newPath, replication)
       if (setPerms) fs.setPermission(newPath, new FsPermission(APP_FILE_PERMISSION))
-    } 
+    }
     // Resolve any symlinks in the URI path so using a "current" symlink to point to a specific
     // version shows the specific version in the distributed cache configuration
     val qualPath = fs.makeQualified(newPath)
@@ -244,7 +250,7 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
       }
     }
     val dst = new Path(fs.getHomeDirectory(), appStagingDir)
-    val replication = System.getProperty("spark.yarn.submit.file.replication", "3").toShort
+    val replication = sparkConf.getInt("spark.yarn.submit.file.replication", 3).toShort
 
     if (UserGroupInformation.isSecurityEnabled()) {
       val dstFs = dst.getFileSystem(conf)
@@ -269,7 +275,7 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
         }
         val setPermissions = if (destName.equals(Client.APP_JAR)) true else false
         val destPath = copyRemoteFile(dst, new Path(localURI), replication, setPermissions)
-        distCacheMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.FILE, 
+        distCacheMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.FILE,
           destName, statCache)
       }
     }
@@ -283,7 +289,7 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
         val destPath = copyRemoteFile(dst, localPath, replication)
         // Only add the resource to the Spark ApplicationMaster.
         val appMasterOnly = true
-        distCacheMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.FILE, 
+        distCacheMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.FILE,
           linkname, statCache, appMasterOnly)
       }
     }
@@ -295,7 +301,7 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
         val localPath = new Path(localURI)
         val linkname = Option(localURI.getFragment()).getOrElse(localPath.getName())
         val destPath = copyRemoteFile(dst, localPath, replication)
-        distCacheMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.FILE, 
+        distCacheMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.FILE,
           linkname, statCache)
       }
     }
@@ -307,7 +313,7 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
         val localPath = new Path(localURI)
         val linkname = Option(localURI.getFragment()).getOrElse(localPath.getName())
         val destPath = copyRemoteFile(dst, localPath, replication)
-        distCacheMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.ARCHIVE, 
+        distCacheMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.ARCHIVE,
           linkname, statCache)
       }
     }
@@ -317,14 +323,14 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
   }
 
   def setupLaunchEnv(
-      localResources: HashMap[String, LocalResource], 
+      localResources: HashMap[String, LocalResource],
       stagingDir: String): HashMap[String, String] = {
     logInfo("Setting up the launch environment")
     val log4jConfLocalRes = localResources.getOrElse(Client.LOG4J_PROP, null)
 
     val env = new HashMap[String, String]()
 
-    Client.populateClasspath(yarnConf, log4jConfLocalRes != null, env)
+    Client.populateClasspath(yarnConf, sparkConf, log4jConfLocalRes != null, env)
     env("SPARK_YARN_MODE") = "true"
     env("SPARK_YARN_STAGING_DIR") = stagingDir
 
@@ -345,7 +351,7 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
     val prefix = " --args "
     val args = clientArgs.userArgs
     val retval = new StringBuilder()
-    for (arg <- args){
+    for (arg <- args) {
       retval.append(prefix).append(" '").append(arg).append("' ")
     }
     retval.toString
@@ -406,11 +412,11 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
     }
 
     val commands = List[String](
-      javaCommand + 
+      javaCommand +
       " -server " +
       JAVA_OPTS +
       " " + args.amClass +
-      " --class " + args.userClass + 
+      " --class " + args.userClass +
       " --jar " + args.userJar +
       userArgsToString(args) +
       " --worker-memory " + args.workerMemory +
@@ -436,9 +442,11 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
     super.submitApplication(appContext)
   }
 
-  def monitorApplication(appId: ApplicationId): Boolean = {  
+  def monitorApplication(appId: ApplicationId): Boolean = {
+    val interval = sparkConf.getLong("spark.yarn.report.interval", 1000)
+
     while (true) {
-      Thread.sleep(1000)
+      Thread.sleep(interval)
       val report = super.getApplicationReport(appId)
 
       logInfo("Application report from ASM: \n" +
@@ -458,7 +466,7 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl
 
       val state = report.getYarnApplicationState()
       val dsStatus = report.getFinalApplicationStatus()
-      if (state == YarnApplicationState.FINISHED || 
+      if (state == YarnApplicationState.FINISHED ||
         state == YarnApplicationState.FAILED ||
         state == YarnApplicationState.KILLED) {
         return true
@@ -478,10 +486,10 @@ object Client {
     // Note: anything env variable with SPARK_ prefix gets propagated to all (remote) processes -
     // see Client#setupLaunchEnv().
     System.setProperty("SPARK_YARN_MODE", "true")
+    val sparkConf = new SparkConf()
+    val args = new ClientArguments(argStrings, sparkConf)
 
-    val args = new ClientArguments(argStrings)
-
-    (new Client(args)).run()
+    new Client(args, sparkConf).run()
   }
 
   // Based on code from org.apache.hadoop.mapreduce.v2.util.MRApps
@@ -491,29 +499,29 @@ object Client {
     }
   }
 
-  def populateClasspath(conf: Configuration, addLog4j: Boolean, env: HashMap[String, String]) {
+  def populateClasspath(conf: Configuration, sparkConf: SparkConf, addLog4j: Boolean, env: HashMap[String, String]) {
     Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$())
     // If log4j present, ensure ours overrides all others
     if (addLog4j) {
-      Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() + 
+      Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() +
         Path.SEPARATOR + LOG4J_PROP)
     }
     // Normally the users app.jar is last in case conflicts with spark jars
-    val userClasspathFirst = System.getProperty("spark.yarn.user.classpath.first", "false")
+    val userClasspathFirst = sparkConf.get("spark.yarn.user.classpath.first", "false")
       .toBoolean
     if (userClasspathFirst) {
-      Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() + 
+      Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() +
         Path.SEPARATOR + APP_JAR)
     }
-    Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() + 
+    Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() +
       Path.SEPARATOR + SPARK_JAR)
     Client.populateHadoopClasspath(conf, env)
 
     if (!userClasspathFirst) {
-      Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() + 
+      Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() +
         Path.SEPARATOR + APP_JAR)
     }
-    Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() + 
+    Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() +
       Path.SEPARATOR + "*")
   }
 }
diff --git a/new-yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala
similarity index 74%
rename from new-yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala
rename to yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala
index c38f33e212fbf..40600f38e5e73 100644
--- a/new-yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala
+++ b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala
@@ -26,18 +26,22 @@ import org.apache.hadoop.yarn.api.protocolrecords._
 import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.hadoop.yarn.util.{ConverterUtils, Records}
 import akka.actor._
-import akka.remote.{RemoteClientShutdown, RemoteClientDisconnected, RemoteClientLifeCycleEvent}
+import akka.remote._
 import akka.actor.Terminated
-import org.apache.spark.{SparkContext, Logging}
+import org.apache.spark.{SparkConf, SparkContext, Logging}
 import org.apache.spark.util.{Utils, AkkaUtils}
 import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
 import org.apache.spark.scheduler.SplitInfo
 import org.apache.hadoop.yarn.client.api.AMRMClient
 import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest
 
-class WorkerLauncher(args: ApplicationMasterArguments, conf: Configuration) extends Logging {
+class WorkerLauncher(args: ApplicationMasterArguments, conf: Configuration, sparkConf: SparkConf)
+  extends Logging {
 
-  def this(args: ApplicationMasterArguments) = this(args, new Configuration())
+  def this(args: ApplicationMasterArguments, sparkConf: SparkConf) =
+    this(args, new Configuration(), sparkConf)
+
+  def this(args: ApplicationMasterArguments) = this(args, new SparkConf())
 
   private var appAttemptId: ApplicationAttemptId = _
   private var reporterThread: Thread = _
@@ -48,30 +52,36 @@ class WorkerLauncher(args: ApplicationMasterArguments, conf: Configuration) exte
 
   private var amClient: AMRMClient[ContainerRequest] = _
 
-  val actorSystem : ActorSystem = AkkaUtils.createActorSystem("sparkYarnAM", Utils.localHostName, 0)._1
+  val actorSystem: ActorSystem = AkkaUtils.createActorSystem("sparkYarnAM", Utils.localHostName, 0,
+    conf = sparkConf)._1
   var actor: ActorRef = _
 
   // This actor just working as a monitor to watch on Driver Actor.
   class MonitorActor(driverUrl: String) extends Actor {
 
-    var driver: ActorRef = null
+    var driver: ActorSelection = _
 
     override def preStart() {
       logInfo("Listen to driver: " + driverUrl)
-      driver = context.actorFor(driverUrl)
-      context.system.eventStream.subscribe(self, classOf[RemoteClientLifeCycleEvent])
-      context.watch(driver) // Doesn't work with remote actors, but useful for testing
+      driver = context.actorSelection(driverUrl)
+      // Send a hello message thus the connection is actually established, thus we can monitor Lifecycle Events.
+      driver ! "Hello"
+      context.system.eventStream.subscribe(self, classOf[RemotingLifecycleEvent])
     }
 
     override def receive = {
-      case Terminated(_) | RemoteClientDisconnected(_, _) | RemoteClientShutdown(_, _) =>
-        logInfo("Driver terminated or disconnected! Shutting down.")
+      case x: DisassociatedEvent =>
+        logInfo(s"Driver terminated or disconnected! Shutting down. $x")
         driverClosed = true
     }
   }
 
   def run() {
 
+    // Setup the directories so things go to yarn approved directories rather
+    // then user specified and /tmp.
+    System.setProperty("spark.local.dir", getLocalDirs())
+
     amClient = AMRMClient.createAMRMClient()
     amClient.init(yarnConf)
     amClient.start()
@@ -88,10 +98,12 @@ class WorkerLauncher(args: ApplicationMasterArguments, conf: Configuration) exte
     // ensure that progress is sent before YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS elapse.
 
     val timeoutInterval = yarnConf.getInt(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, 120000)
-    // must be <= timeoutInterval/ 2.
-    // On other hand, also ensure that we are reasonably responsive without causing too many requests to RM.
-    // so atleast 1 minute or timeoutInterval / 10 - whichever is higher.
-    val interval = math.min(timeoutInterval / 2, math.max(timeoutInterval/ 10, 60000L))
+    // we want to be reasonably responsive without causing too many requests to RM.
+    val schedulerInterval =
+      System.getProperty("spark.yarn.scheduler.heartbeat.interval-ms", "5000").toLong
+    // must be <= timeoutInterval / 2.
+    val interval = math.min(timeoutInterval / 2, schedulerInterval)
+
     reporterThread = launchReporterThread(interval)
 
     // Wait for the reporter thread to Finish.
@@ -104,6 +116,20 @@ class WorkerLauncher(args: ApplicationMasterArguments, conf: Configuration) exte
     System.exit(0)
   }
 
+  /** Get the Yarn approved local directories. */
+  private def getLocalDirs(): String = {
+    // Hadoop 0.23 and 2.x have different Environment variable names for the
+    // local dirs, so lets check both. We assume one of the 2 is set.
+    // LOCAL_DIRS => 2.X, YARN_LOCAL_DIRS => 0.23.X
+    val localDirs = Option(System.getenv("YARN_LOCAL_DIRS"))
+      .orElse(Option(System.getenv("LOCAL_DIRS")))
+ 
+    localDirs match {
+      case None => throw new Exception("Yarn Local dirs can't be empty")
+      case Some(l) => l
+    }
+  } 
+
   private def getApplicationAttemptId(): ApplicationAttemptId = {
     val envs = System.getenv()
     val containerIdString = envs.get(ApplicationConstants.Environment.CONTAINER_ID.name())
@@ -137,10 +163,10 @@ class WorkerLauncher(args: ApplicationMasterArguments, conf: Configuration) exte
         Thread.sleep(100)
       }
     }
-    System.setProperty("spark.driver.host", driverHost)
-    System.setProperty("spark.driver.port", driverPort.toString)
+    sparkConf.set("spark.driver.host", driverHost)
+    sparkConf.set("spark.driver.port", driverPort.toString)
 
-    val driverUrl = "akka://spark@%s:%s/user/%s".format(
+    val driverUrl = "akka.tcp://spark@%s:%s/user/%s".format(
       driverHost, driverPort.toString, CoarseGrainedSchedulerBackend.ACTOR_NAME)
 
     actor = actorSystem.actorOf(Props(new MonitorActor(driverUrl)), name = "YarnAM")
@@ -158,7 +184,8 @@ class WorkerLauncher(args: ApplicationMasterArguments, conf: Configuration) exte
       amClient,
       appAttemptId,
       args,
-      preferredNodeLocationData)
+      preferredNodeLocationData,
+      sparkConf)
 
     logInfo("Allocating " + args.numWorkers + " workers.")
     // Wait until all containers have finished
@@ -166,7 +193,7 @@ class WorkerLauncher(args: ApplicationMasterArguments, conf: Configuration) exte
     // TODO: Handle container failure
 
     yarnAllocator.addResourceRequests(args.numWorkers)
-    while(yarnAllocator.getNumWorkersRunning < args.numWorkers) {
+    while ((yarnAllocator.getNumWorkersRunning < args.numWorkers) && (!driverClosed)) {
       yarnAllocator.allocateResources()
       Thread.sleep(100)
     }
@@ -177,7 +204,7 @@ class WorkerLauncher(args: ApplicationMasterArguments, conf: Configuration) exte
 
   // TODO: We might want to extend this to allocate more containers in case they die !
   private def launchReporterThread(_sleepTime: Long): Thread = {
-    val sleepTime = if (_sleepTime <= 0 ) 0 else _sleepTime
+    val sleepTime = if (_sleepTime <= 0) 0 else _sleepTime
 
     val t = new Thread {
       override def run() {
@@ -209,7 +236,7 @@ class WorkerLauncher(args: ApplicationMasterArguments, conf: Configuration) exte
 
   def finishApplicationMaster(status: FinalApplicationStatus) {
     logInfo("finish ApplicationMaster with " + status)
-    amClient.unregisterApplicationMaster(status, "" /* appMessage */, "" /* appTrackingUrl */)
+    amClient.unregisterApplicationMaster(status, "" /* appMessage */ , "" /* appTrackingUrl */)
   }
 
 }
diff --git a/new-yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerRunnable.scala b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/WorkerRunnable.scala
similarity index 97%
rename from new-yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerRunnable.scala
rename to yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/WorkerRunnable.scala
index 9f5523c4b97a8..b7699050bbaeb 100644
--- a/new-yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerRunnable.scala
+++ b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/WorkerRunnable.scala
@@ -39,12 +39,13 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.hadoop.yarn.ipc.YarnRPC
 import org.apache.hadoop.yarn.util.{Apps, ConverterUtils, Records}
 
-import org.apache.spark.Logging
+import org.apache.spark.{SparkConf, Logging}
 
 
 class WorkerRunnable(
     container: Container,
     conf: Configuration,
+    sparkConf: SparkConf,
     masterAddress: String,
     slaveId: String,
     hostname: String,
@@ -197,7 +198,7 @@ class WorkerRunnable(
   def prepareEnvironment: HashMap[String, String] = {
     val env = new HashMap[String, String]()
 
-    Client.populateClasspath(yarnConf, System.getenv("SPARK_YARN_LOG4J_PATH") != null, env)
+    Client.populateClasspath(yarnConf, sparkConf, System.getenv("SPARK_YARN_LOG4J_PATH") != null, env)
 
     // Allow users to specify some environment variables
     Apps.setEnvFromInputString(env, System.getenv("SPARK_YARN_USER_ENV"))
diff --git a/new-yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
similarity index 95%
rename from new-yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
rename to yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
index dba0f7640e67c..1ac61124cb028 100644
--- a/new-yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
+++ b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
@@ -26,9 +26,9 @@ import scala.collection
 import scala.collection.JavaConversions._
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
 
-import org.apache.spark.Logging
-import org.apache.spark.scheduler.SplitInfo
-import org.apache.spark.scheduler.cluster.{ClusterScheduler, CoarseGrainedSchedulerBackend}
+import org.apache.spark.{Logging, SparkConf}
+import org.apache.spark.scheduler.{SplitInfo,TaskSchedulerImpl}
+import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
 import org.apache.spark.util.Utils
 
 import org.apache.hadoop.conf.Configuration
@@ -42,7 +42,7 @@ import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest
 import org.apache.hadoop.yarn.util.{RackResolver, Records}
 
 
-object AllocationType extends Enumeration ("HOST", "RACK", "ANY") {
+object AllocationType extends Enumeration {
   type AllocationType = Value
   val HOST, RACK, ANY = Value
 }
@@ -64,7 +64,8 @@ private[yarn] class YarnAllocationHandler(
     val workerMemory: Int,
     val workerCores: Int,
     val preferredHostToCount: Map[String, Int], 
-    val preferredRackToCount: Map[String, Int])
+    val preferredRackToCount: Map[String, Int],
+    val sparkConf: SparkConf)
   extends Logging {
   // These three are locked on allocatedHostToContainersMap. Complementary data structures
   // allocatedHostToContainersMap : containers which are running : host, Set<containerid>
@@ -233,9 +234,9 @@ private[yarn] class YarnAllocationHandler(
       // Note that the list we create below tries to ensure that not all containers end up within
       // a host if there is a sufficiently large number of hosts/containers.
       val allocatedContainersToProcess = new ArrayBuffer[Container](allocatedContainers.size)
-      allocatedContainersToProcess ++= ClusterScheduler.prioritizeContainers(dataLocalContainers)
-      allocatedContainersToProcess ++= ClusterScheduler.prioritizeContainers(rackLocalContainers)
-      allocatedContainersToProcess ++= ClusterScheduler.prioritizeContainers(offRackContainers)
+      allocatedContainersToProcess ++= TaskSchedulerImpl.prioritizeContainers(dataLocalContainers)
+      allocatedContainersToProcess ++= TaskSchedulerImpl.prioritizeContainers(rackLocalContainers)
+      allocatedContainersToProcess ++= TaskSchedulerImpl.prioritizeContainers(offRackContainers)
 
       // Run each of the allocated containers.
       for (container <- allocatedContainersToProcess) {
@@ -253,9 +254,9 @@ private[yarn] class YarnAllocationHandler(
           numWorkersRunning.decrementAndGet()
         } else {
           val workerId = workerIdCounter.incrementAndGet().toString
-          val driverUrl = "akka://spark@%s:%s/user/%s".format(
-            System.getProperty("spark.driver.host"),
-            System.getProperty("spark.driver.port"),
+          val driverUrl = "akka.tcp://spark@%s:%s/user/%s".format(
+            sparkConf.get("spark.driver.host"),
+            sparkConf.get("spark.driver.port"),
             CoarseGrainedSchedulerBackend.ACTOR_NAME)
 
           logInfo("Launching container %s for on host %s".format(containerId, workerHostname))
@@ -279,6 +280,7 @@ private[yarn] class YarnAllocationHandler(
           val workerRunnable = new WorkerRunnable(
             container,
             conf,
+            sparkConf,
             driverUrl,
             workerId,
             workerHostname,
@@ -530,15 +532,15 @@ private[yarn] class YarnAllocationHandler(
       priority: Int
     ): ArrayBuffer[ContainerRequest] = {
 
-    val memoryResource = Records.newRecord(classOf[Resource])
-    memoryResource.setMemory(workerMemory + YarnAllocationHandler.MEMORY_OVERHEAD)
+    val memoryRequest = workerMemory + YarnAllocationHandler.MEMORY_OVERHEAD
+    val resource = Resource.newInstance(memoryRequest, workerCores)
 
     val prioritySetting = Records.newRecord(classOf[Priority])
     prioritySetting.setPriority(priority)
 
     val requests = new ArrayBuffer[ContainerRequest]()
     for (i <- 0 until numWorkers) {
-      requests += new ContainerRequest(memoryResource, hosts, racks, prioritySetting)
+      requests += new ContainerRequest(resource, hosts, racks, prioritySetting)
     }
     requests
   }
@@ -565,7 +567,8 @@ object YarnAllocationHandler {
       conf: Configuration,
       amClient: AMRMClient[ContainerRequest],
       appAttemptId: ApplicationAttemptId,
-      args: ApplicationMasterArguments
+      args: ApplicationMasterArguments,
+      sparkConf: SparkConf
     ): YarnAllocationHandler = {
     new YarnAllocationHandler(
       conf,
@@ -575,7 +578,8 @@ object YarnAllocationHandler {
       args.workerMemory,
       args.workerCores,
       Map[String, Int](),
-      Map[String, Int]())
+      Map[String, Int](),
+      sparkConf)
   }
 
   def newAllocator(
@@ -584,7 +588,8 @@ object YarnAllocationHandler {
       appAttemptId: ApplicationAttemptId,
       args: ApplicationMasterArguments,
       map: collection.Map[String,
-      collection.Set[SplitInfo]]
+      collection.Set[SplitInfo]],
+      sparkConf: SparkConf
     ): YarnAllocationHandler = {
     val (hostToSplitCount, rackToSplitCount) = generateNodeToWeight(conf, map)
     new YarnAllocationHandler(
@@ -595,7 +600,8 @@ object YarnAllocationHandler {
       args.workerMemory,
       args.workerCores,
       hostToSplitCount,
-      rackToSplitCount)
+      rackToSplitCount,
+      sparkConf)
   }
 
   def newAllocator(
@@ -605,7 +611,8 @@ object YarnAllocationHandler {
       maxWorkers: Int,
       workerMemory: Int,
       workerCores: Int,
-      map: collection.Map[String, collection.Set[SplitInfo]]
+      map: collection.Map[String, collection.Set[SplitInfo]],
+      sparkConf: SparkConf
     ): YarnAllocationHandler = {
     val (hostToCount, rackToCount) = generateNodeToWeight(conf, map)
     new YarnAllocationHandler(
@@ -616,7 +623,8 @@ object YarnAllocationHandler {
       workerMemory,
       workerCores,
       hostToCount,
-      rackToCount)
+      rackToCount,
+      sparkConf)
   }
 
   // A simple method to copy the split info map.