ThalesGroup
diff --git a/‎README.md
Lines changed: 2 additions & 2 deletions b/‎README.md
Lines changed: 2 additions & 2 deletions
diff --git a/‎bin/pyspark
Lines changed: 2 additions & 0 deletions b/‎bin/pyspark
Lines changed: 2 additions & 0 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/rdd/RDD.scala
Lines changed: 3 additions & 4 deletions b/‎core/src/main/scala/org/apache/spark/rdd/RDD.scala
Lines changed: 3 additions & 4 deletions
diff --git a/‎ec2/spark_ec2.py
Lines changed: 25 additions & 8 deletions b/‎ec2/spark_ec2.py
Lines changed: 25 additions & 8 deletions
diff --git a/‎python/pyspark/rdd.py
Lines changed: 4 additions & 4 deletions b/‎python/pyspark/rdd.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
Lines changed: 1 addition & 1 deletion b/‎sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
Lines changed: 1 addition & 1 deletion
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
Lines changed: 12 additions & 4 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
Lines changed: 12 additions & 4 deletions
diff --git a/‎sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
Lines changed: 5 additions & 3 deletions b/‎sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
Lines changed: 5 additions & 3 deletions
diff --git a/‎sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
Lines changed: 3 additions & 2 deletions b/‎sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
Lines changed: 3 additions & 2 deletions
diff --git a/‎sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
Lines changed: 16 additions & 0 deletions b/‎sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
Lines changed: 16 additions & 0 deletions
@@ -4,8 +4,8 @@ Spark is a fast and general cluster computing system for Big Data. It provides
 high-level APIs in Scala, Java, and Python, and an optimized engine that
 supports general computation graphs for data analysis. It also supports a
 rich set of higher-level tools including Spark SQL for SQL and structured
-data processing, MLLib for machine learning, GraphX for graph processing,
-and Spark Streaming.
+data processing, MLlib for machine learning, GraphX for graph processing,
+and Spark Streaming for stream processing.
 
 <http://spark.apache.org/>
 
 
@@ -85,6 +85,8 @@ export PYSPARK_SUBMIT_ARGS
 
 # For pyspark tests
 if [[ -n "$SPARK_TESTING" ]]; then
+  unset YARN_CONF_DIR
+  unset HADOOP_CONF_DIR
   if [[ -n "$PYSPARK_DOC_TEST" ]]; then
     exec "$PYSPARK_PYTHON" -m doctest $1
   else
 
@@ -1064,11 +1064,10 @@ abstract class RDD[T: ClassTag](
       // greater than totalParts because we actually cap it at totalParts in runJob.
       var numPartsToTry = 1
       if (partsScanned > 0) {
-        // If we didn't find any rows after the first iteration, just try all partitions next.
-        // Otherwise, interpolate the number of partitions we need to try, but overestimate it
-        // by 50%.
+        // If we didn't find any rows after the previous iteration, quadruple and retry.  Otherwise,
+        // interpolate the number of partitions we need to try, but overestimate it by 50%.
         if (buf.size == 0) {
-          numPartsToTry = totalParts - 1
+          numPartsToTry = partsScanned * 4
         } else {
           numPartsToTry = (1.5 * num * partsScanned / buf.size).toInt
         }
 
@@ -102,9 +102,17 @@ def parse_args():
              "(for debugging)")
     parser.add_option(
         "--ebs-vol-size", metavar="SIZE", type="int", default=0,
-        help="Attach a new EBS volume of size SIZE (in GB) to each node as " +
-             "/vol. The volumes will be deleted when the instances terminate. " +
-             "Only possible on EBS-backed AMIs.")
+        help="Size (in GB) of each EBS volume.")
+    parser.add_option(
+        "--ebs-vol-type", default="standard",
+        help="EBS volume type (e.g. 'gp2', 'standard').")
+    parser.add_option(
+        "--ebs-vol-num", type="int", default=1,
+        help="Number of EBS volumes to attach to each node as /vol[x]. " +
+             "The volumes will be deleted when the instances terminate. " +
+             "Only possible on EBS-backed AMIs. " +
+             "EBS volumes are only attached if --ebs-vol-size > 0." +
+             "Only support up to 8 EBS volumes.")
     parser.add_option(
         "--swap", metavar="SWAP", type="int", default=1024,
         help="Swap space to set up per node, in MB (default: 1024)")
@@ -348,13 +356,16 @@ def launch_cluster(conn, opts, cluster_name):
         print >> stderr, "Could not find AMI " + opts.ami
         sys.exit(1)
 
-    # Create block device mapping so that we can add an EBS volume if asked to
+    # Create block device mapping so that we can add EBS volumes if asked to.
+    # The first drive is attached as /dev/sds, 2nd as /dev/sdt, ... /dev/sdz
     block_map = BlockDeviceMapping()
     if opts.ebs_vol_size > 0:
-        device = EBSBlockDeviceType()
-        device.size = opts.ebs_vol_size
-        device.delete_on_termination = True
-        block_map["/dev/sdv"] = device
+        for i in range(opts.ebs_vol_num):
+            device = EBSBlockDeviceType()
+            device.size = opts.ebs_vol_size
+            device.volume_type=opts.ebs_vol_type
+            device.delete_on_termination = True
+            block_map["/dev/sd" + chr(ord('s') + i)] = device
 
     # AWS ignores the AMI-specified block device mapping for M3 (see SPARK-3342).
     if opts.instance_type.startswith('m3.'):
@@ -828,6 +839,12 @@ def get_partition(total, num_partitions, current_partitions):
 
 def real_main():
     (opts, action, cluster_name) = parse_args()
+
+    # Input parameter validation
+    if opts.ebs_vol_num > 8:
+        print >> stderr, "ebs-vol-num cannot be greater than 8"
+        sys.exit(1)
+
     try:
         conn = ec2.connect_to_region(opts.region)
     except Exception as e:
 
@@ -1089,11 +1089,11 @@ def take(self, num):
             # we actually cap it at totalParts in runJob.
             numPartsToTry = 1
             if partsScanned > 0:
-                # If we didn't find any rows after the first iteration, just
-                # try all partitions next. Otherwise, interpolate the number
-                # of partitions we need to try, but overestimate it by 50%.
+                # If we didn't find any rows after the previous iteration,
+                # quadruple and retry.  Otherwise, interpolate the number of
+                # partitions we need to try, but overestimate it by 50%.
                 if len(items) == 0:
-                    numPartsToTry = totalParts - 1
+                    numPartsToTry = partsScanned * 4
                 else:
                     numPartsToTry = int(1.5 * num * partsScanned / len(items))
 
 
@@ -272,7 +272,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
     val currentTable = table(tableName).queryExecution.analyzed
     val asInMemoryRelation = currentTable match {
       case _: InMemoryRelation =>
-        currentTable.logicalPlan
+        currentTable
 
       case _ =>
         InMemoryRelation(useCompression, columnBatchSize, executePlan(currentTable).executedPlan)
 
@@ -60,10 +60,10 @@ case class SetCommand(
         logWarning(s"Property ${SQLConf.Deprecated.MAPRED_REDUCE_TASKS} is deprecated, " +
           s"automatically converted to ${SQLConf.SHUFFLE_PARTITIONS} instead.")
         context.setConf(SQLConf.SHUFFLE_PARTITIONS, v)
-        Array(Row(s"${SQLConf.SHUFFLE_PARTITIONS}=$v"))
+        Seq(Row(s"${SQLConf.SHUFFLE_PARTITIONS}=$v"))
       } else {
         context.setConf(k, v)
-        Array(Row(s"$k=$v"))
+        Seq(Row(s"$k=$v"))
       }
 
     // Query the value bound to key k.
@@ -78,11 +78,19 @@ case class SetCommand(
           "hive-hwi-0.12.0.jar",
           "hive-0.12.0.jar").mkString(":")
 
-        Array(
+        context.getAllConfs.map { case (k, v) =>
+          Row(s"$k=$v")
+        }.toSeq ++ Seq(
           Row("system:java.class.path=" + hiveJars),
           Row("system:sun.java.command=shark.SharkServer2"))
       } else {
-        Array(Row(s"$k=${context.getConf(k, "<undefined>")}"))
+        if (k == SQLConf.Deprecated.MAPRED_REDUCE_TASKS) {
+          logWarning(s"Property ${SQLConf.Deprecated.MAPRED_REDUCE_TASKS} is deprecated, " +
+            s"showing ${SQLConf.SHUFFLE_PARTITIONS} instead.")
+          Seq(Row(s"${SQLConf.SHUFFLE_PARTITIONS}=${context.numShufflePartitions}"))
+        } else {
+          Seq(Row(s"$k=${context.getConf(k, "<undefined>")}"))
+        }
       }
 
     // Query all key-value pairs that are set in the SQLConf of the context.
 
@@ -44,6 +44,8 @@ private[hive] case class SourceCommand(filePath: String) extends Command
 
 private[hive] case class AddFile(filePath: String) extends Command
 
+private[hive] case class AddJar(path: String) extends Command
+
 private[hive] case class DropTable(tableName: String, ifExists: Boolean) extends Command
 
 private[hive] case class AnalyzeTable(tableName: String) extends Command
@@ -231,7 +233,7 @@ private[hive] object HiveQl {
       } else if (sql.trim.toLowerCase.startsWith("uncache table")) {
         CacheCommand(sql.trim.drop(14).trim, false)
       } else if (sql.trim.toLowerCase.startsWith("add jar")) {
-        NativeCommand(sql)
+        AddJar(sql.trim.drop(8).trim)
       } else if (sql.trim.toLowerCase.startsWith("add file")) {
         AddFile(sql.trim.drop(9))
       } else if (sql.trim.toLowerCase.startsWith("dfs")) {
@@ -1018,9 +1020,9 @@ private[hive] object HiveQl {
 
     /* Other functions */
     case Token("TOK_FUNCTION", Token(RAND(), Nil) :: Nil) => Rand
-    case Token("TOK_FUNCTION", Token(SUBSTR(), Nil) :: string :: pos :: Nil) => 
+    case Token("TOK_FUNCTION", Token(SUBSTR(), Nil) :: string :: pos :: Nil) =>
       Substring(nodeToExpr(string), nodeToExpr(pos), Literal(Integer.MAX_VALUE, IntegerType))
-    case Token("TOK_FUNCTION", Token(SUBSTR(), Nil) :: string :: pos :: length :: Nil) => 
+    case Token("TOK_FUNCTION", Token(SUBSTR(), Nil) :: string :: pos :: length :: Nil) =>
       Substring(nodeToExpr(string), nodeToExpr(pos), nodeToExpr(length))
 
     /* UDFs - Must be last otherwise will preempt built in functions */
 
@@ -195,11 +195,12 @@ private[hive] trait HiveStrategies {
 
   case class HiveCommandStrategy(context: HiveContext) extends Strategy {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
-      case logical.NativeCommand(sql) =>
-        NativeCommand(sql, plan.output)(context) :: Nil
+      case logical.NativeCommand(sql) => NativeCommand(sql, plan.output)(context) :: Nil
 
       case hive.DropTable(tableName, ifExists) => execution.DropTable(tableName, ifExists) :: Nil
 
+      case hive.AddJar(path) => execution.AddJar(path) :: Nil
+
       case hive.AnalyzeTable(tableName) => execution.AnalyzeTable(tableName) :: Nil
 
       case describe: logical.DescribeCommand =>
 
@@ -60,3 +60,19 @@ case class DropTable(tableName: String, ifExists: Boolean) extends LeafNode with
     Seq.empty[Row]
   }
 }
+
+/**
+ * :: DeveloperApi ::
+ */
+@DeveloperApi
+case class AddJar(path: String) extends LeafNode with Command {
+  def hiveContext = sqlContext.asInstanceOf[HiveContext]
+
+  override def output = Seq.empty
+
+  override protected[sql] lazy val sideEffectResult: Seq[Row] = {
+    hiveContext.runSqlHive(s"ADD JAR $path")
+    hiveContext.sparkContext.addJar(path)
+    Seq.empty[Row]
+  }
+}