From 231af3aee0994ade9b64ad5a2b2615ac04b154d5 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@apache.org>
Date: Tue, 25 Mar 2014 19:20:44 -0700
Subject: [PATCH 1/5] Limit/TakeOrdered: 1. Renamed StopAfter to Limit to be
 more consistent with naming in other relational databases. 2. Renamed TopK to
 TakeOrdered to be more consistent with Spark RDD API. 3. Avoid breaking
 lineage in Limit. 4. Added a bunch of override's to
 execution/basicOperators.scala.

---
 .../apache/spark/sql/catalyst/SqlParser.scala |  2 +-
 .../plans/logical/basicOperators.scala        |  2 +-
 .../org/apache/spark/sql/SQLContext.scala     |  2 +-
 .../spark/sql/execution/SparkStrategies.scala | 10 ++--
 .../spark/sql/execution/basicOperators.scala  | 53 ++++++++++++-------
 .../apache/spark/sql/hive/HiveContext.scala   |  2 +-
 .../org/apache/spark/sql/hive/HiveQl.scala    |  4 +-
 7 files changed, 44 insertions(+), 31 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
index 9dec4e3d9e4c2..461e3a510a4bf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
@@ -178,7 +178,7 @@ class SqlParser extends StandardTokenParsers {
         val withDistinct = d.map(_ => Distinct(withProjection)).getOrElse(withProjection)
         val withHaving = h.map(h => Filter(h, withDistinct)).getOrElse(withDistinct)
         val withOrder = o.map(o => Sort(o, withHaving)).getOrElse(withHaving)
-        val withLimit = l.map { l => StopAfter(l, withOrder) }.getOrElse(withOrder)
+        val withLimit = l.map { l => Limit(l, withOrder) }.getOrElse(withOrder)
         withLimit
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
index 61481de65e76e..e48b7ff306491 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
@@ -132,7 +132,7 @@ case class Aggregate(
   def references = child.references
 }
 
-case class StopAfter(limit: Expression, child: LogicalPlan) extends UnaryNode {
+case class Limit(limit: Expression, child: LogicalPlan) extends UnaryNode {
   def output = child.output
   def references = limit.references
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index cf3c06acce5b0..19d96fa487608 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -115,7 +115,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
     val sparkContext = self.sparkContext
 
     val strategies: Seq[Strategy] =
-      TopK ::
+      TakeOrdered ::
       PartialAggregation ::
       SparkEquiInnerJoin ::
       ParquetOperations ::
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index 8a39ded0a9ec4..c4db73ca5cdc6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -158,10 +158,10 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
     case other => other
   }
 
-  object TopK extends Strategy {
+  object TakeOrdered extends Strategy {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
-      case logical.StopAfter(IntegerLiteral(limit), logical.Sort(order, child)) =>
-        execution.TopK(limit, order, planLater(child))(sparkContext) :: Nil
+      case logical.Limit(IntegerLiteral(limit), logical.Sort(order, child)) =>
+        execution.TakeOrdered(limit, order, planLater(child))(sparkContext) :: Nil
       case _ => Nil
     }
   }
@@ -213,8 +213,8 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
           sparkContext.parallelize(data.map(r =>
             new GenericRow(r.productIterator.map(convertToCatalyst).toArray): Row))
         execution.ExistingRdd(output, dataAsRdd) :: Nil
-      case logical.StopAfter(IntegerLiteral(limit), child) =>
-        execution.StopAfter(limit, planLater(child))(sparkContext) :: Nil
+      case logical.Limit(IntegerLiteral(limit), child) =>
+        execution.Limit(limit, planLater(child))(sparkContext) :: Nil
       case Unions(unionChildren) =>
         execution.Union(unionChildren.map(planLater))(sparkContext) :: Nil
       case logical.Generate(generator, join, outer, _, child) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala
index e4f918b6787fe..5e2f3d91bd410 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala
@@ -29,18 +29,18 @@ import org.apache.spark.sql.catalyst.plans.physical.{OrderedDistribution, Unspec
 import org.apache.spark.sql.catalyst.ScalaReflection
 
 case class Project(projectList: Seq[NamedExpression], child: SparkPlan) extends UnaryNode {
-  def output = projectList.map(_.toAttribute)
+  override def output = projectList.map(_.toAttribute)
 
-  def execute() = child.execute().mapPartitions { iter =>
+  override def execute() = child.execute().mapPartitions { iter =>
     @transient val reusableProjection = new MutableProjection(projectList)
     iter.map(reusableProjection)
   }
 }
 
 case class Filter(condition: Expression, child: SparkPlan) extends UnaryNode {
-  def output = child.output
+  override def output = child.output
 
-  def execute() = child.execute().mapPartitions { iter =>
+  override def execute() = child.execute().mapPartitions { iter =>
     iter.filter(condition.apply(_).asInstanceOf[Boolean])
   }
 }
@@ -48,37 +48,50 @@ case class Filter(condition: Expression, child: SparkPlan) extends UnaryNode {
 case class Sample(fraction: Double, withReplacement: Boolean, seed: Int, child: SparkPlan)
     extends UnaryNode {
 
-  def output = child.output
+  override def output = child.output
 
   // TODO: How to pick seed?
-  def execute() = child.execute().sample(withReplacement, fraction, seed)
+  override def execute() = child.execute().sample(withReplacement, fraction, seed)
 }
 
 case class Union(children: Seq[SparkPlan])(@transient sc: SparkContext) extends SparkPlan {
   // TODO: attributes output by union should be distinct for nullability purposes
-  def output = children.head.output
-  def execute() = sc.union(children.map(_.execute()))
+  override def output = children.head.output
+  override def execute() = sc.union(children.map(_.execute()))
 
   override def otherCopyArgs = sc :: Nil
 }
 
-case class StopAfter(limit: Int, child: SparkPlan)(@transient sc: SparkContext) extends UnaryNode {
+/**
+ * Take the first limit elements.
+ */
+case class Limit(limit: Int, child: SparkPlan)(@transient sc: SparkContext) extends UnaryNode {
   override def otherCopyArgs = sc :: Nil
+  // Note that the implementation is different depending on
+  // whether this is a terminal operator or not.
 
-  def output = child.output
+  override def output = child.output
 
   override def executeCollect() = child.execute().map(_.copy()).take(limit)
 
-  // TODO: Terminal split should be implemented differently from non-terminal split.
-  // TODO: Pick num splits based on |limit|.
-  def execute() = sc.makeRDD(executeCollect(), 1)
+  override def execute() = {
+    child.execute()
+      .mapPartitions(_.take(limit))
+      .coalesce(1, shuffle = true)
+      .mapPartitions(_.take(limit))
+  }
 }
 
-case class TopK(limit: Int, sortOrder: Seq[SortOrder], child: SparkPlan)
-               (@transient sc: SparkContext) extends UnaryNode {
+/**
+ * Take the first limit elements as defined by the sortOrder. This is logically equivalent to
+ * having a [[Limit]] operator after a [[Sort]] operator. This could have been named TopK, but
+ * Spark's top operator does the opposite in ordering so we name it TakeOrdered to avoid confusion.
+ */
+case class TakeOrdered(limit: Int, sortOrder: Seq[SortOrder], child: SparkPlan)
+                      (@transient sc: SparkContext) extends UnaryNode {
   override def otherCopyArgs = sc :: Nil
 
-  def output = child.output
+  override def output = child.output
 
   @transient
   lazy val ordering = new RowOrdering(sortOrder)
@@ -87,7 +100,7 @@ case class TopK(limit: Int, sortOrder: Seq[SortOrder], child: SparkPlan)
 
   // TODO: Terminal split should be implemented differently from non-terminal split.
   // TODO: Pick num splits based on |limit|.
-  def execute() = sc.makeRDD(executeCollect(), 1)
+  override def execute() = sc.makeRDD(executeCollect(), 1)
 }
 
 
@@ -102,7 +115,7 @@ case class Sort(
   @transient
   lazy val ordering = new RowOrdering(sortOrder)
 
-  def execute() = attachTree(this, "sort") {
+  override def execute() = attachTree(this, "sort") {
     // TODO: Optimize sorting operation?
     child.execute()
       .mapPartitions(
@@ -110,7 +123,7 @@ case class Sort(
         preservesPartitioning = true)
   }
 
-  def output = child.output
+  override def output = child.output
 }
 
 object ExistingRdd {
@@ -131,6 +144,6 @@ object ExistingRdd {
 }
 
 case class ExistingRdd(output: Seq[Attribute], rdd: RDD[Row]) extends LeafNode {
-  def execute() = rdd
+  override def execute() = rdd
 }
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index 3bcf586662f2c..d806442f87537 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -188,7 +188,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
     val hiveContext = self
 
     override val strategies: Seq[Strategy] = Seq(
-      TopK,
+      TakeOrdered,
       ParquetOperations,
       HiveTableScans,
       DataSinks,
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index 8e76a7348e957..003f2de389f31 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -530,7 +530,7 @@ object HiveQl {
 
         val withLimit =
           limitClause.map(l => nodeToExpr(l.getChildren.head))
-            .map(StopAfter(_, withSort))
+            .map(Limit(_, withSort))
             .getOrElse(withSort)
 
         // TOK_INSERT_INTO means to add files to the table.
@@ -603,7 +603,7 @@ object HiveQl {
         case Token("TOK_TABLESPLITSAMPLE",
                Token("TOK_ROWCOUNT", Nil) ::
                Token(count, Nil) :: Nil) =>
-          StopAfter(Literal(count.toInt), relation)
+          Limit(Literal(count.toInt), relation)
         case Token("TOK_TABLESPLITSAMPLE",
                Token("TOK_PERCENT", Nil) ::
                Token(fraction, Nil) :: Nil) =>

From 47d33278af0a253f166e7a98aad8e0573f743dc4 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@apache.org>
Date: Tue, 25 Mar 2014 22:16:59 -0700
Subject: [PATCH 2/5] Copy tuples in Limit before shuffle.

---
 .../scala/org/apache/spark/sql/execution/basicOperators.scala   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala
index 5e2f3d91bd410..b815e1f2cd2fd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala
@@ -76,7 +76,7 @@ case class Limit(limit: Int, child: SparkPlan)(@transient sc: SparkContext) exte
 
   override def execute() = {
     child.execute()
-      .mapPartitions(_.take(limit))
+      .mapPartitions(_.take(limit).map(_.copy()))
       .coalesce(1, shuffle = true)
       .mapPartitions(_.take(limit))
   }

From 9b79246226faf962b489570f864e49f953cbb63c Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@apache.org>
Date: Wed, 26 Mar 2014 00:01:39 -0700
Subject: [PATCH 3/5] Updated doc for Limit.

---
 .../org/apache/spark/sql/execution/basicOperators.scala   | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala
index b815e1f2cd2fd..4b767e35ea32a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala
@@ -63,12 +63,14 @@ case class Union(children: Seq[SparkPlan])(@transient sc: SparkContext) extends
 }
 
 /**
- * Take the first limit elements.
+ * Take the first limit elements. Note that the implementation is different depending on whether
+ * this is a terminal operator or not. If it is terminal and is invoked using executeCollect,
+ * this operator uses Spark's take method on the Spark driver. If it is not terminal or is
+ * invoked using execute, we first take the limit on each partition, and then repartition all the
+ * data to a single partition to compute the global limit.
  */
 case class Limit(limit: Int, child: SparkPlan)(@transient sc: SparkContext) extends UnaryNode {
   override def otherCopyArgs = sc :: Nil
-  // Note that the implementation is different depending on
-  // whether this is a terminal operator or not.
 
   override def output = child.output
 

From 87b7d375958a58c3af002678bcd2aa2a22dcd2c5 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@apache.org>
Date: Wed, 26 Mar 2014 00:18:51 -0700
Subject: [PATCH 4/5] Use the proper serializer in limit.

---
 .../spark/sql/execution/basicOperators.scala  | 24 ++++++++++++-------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala
index 4b767e35ea32a..b4c4812bae9d2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala
@@ -20,13 +20,14 @@ package execution
 
 import scala.reflect.runtime.universe.TypeTag
 
-import org.apache.spark.rdd.RDD
-import org.apache.spark.SparkContext
-
+import org.apache.spark.{HashPartitioner, SparkConf, SparkContext}
+import org.apache.spark.rdd.{RDD, ShuffledRDD}
+import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.catalyst.errors._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.physical.{OrderedDistribution, UnspecifiedDistribution}
-import org.apache.spark.sql.catalyst.ScalaReflection
+import org.apache.spark.util.MutablePair
+
 
 case class Project(projectList: Seq[NamedExpression], child: SparkPlan) extends UnaryNode {
   override def output = projectList.map(_.toAttribute)
@@ -70,6 +71,9 @@ case class Union(children: Seq[SparkPlan])(@transient sc: SparkContext) extends
  * data to a single partition to compute the global limit.
  */
 case class Limit(limit: Int, child: SparkPlan)(@transient sc: SparkContext) extends UnaryNode {
+  // TODO: Implement a partition local limit, and use a strategy to generate the proper limit plan:
+  // partition local limit -> exchange into one partition -> partition local limit again
+
   override def otherCopyArgs = sc :: Nil
 
   override def output = child.output
@@ -77,10 +81,14 @@ case class Limit(limit: Int, child: SparkPlan)(@transient sc: SparkContext) exte
   override def executeCollect() = child.execute().map(_.copy()).take(limit)
 
   override def execute() = {
-    child.execute()
-      .mapPartitions(_.take(limit).map(_.copy()))
-      .coalesce(1, shuffle = true)
-      .mapPartitions(_.take(limit))
+    val rdd = child.execute().mapPartitions { iter =>
+      val mutablePair = new MutablePair[Boolean, Row]()
+      iter.take(limit).map(row => mutablePair.update(false, row))
+    }
+    val part = new HashPartitioner(1)
+    val shuffled = new ShuffledRDD[Boolean, Row, MutablePair[Boolean, Row]](rdd, part)
+    shuffled.setSerializer(new SparkSqlSerializer(new SparkConf(false)))
+    shuffled.mapPartitions(_.take(limit).map(_._2))
   }
 }
 

From 92b97275aa76af8b1004397abc40bb1d4aaa71ce Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Tue, 1 Apr 2014 16:00:01 -0700
Subject: [PATCH 5/5] More hacks to make Maps serialize with Kryo.

---
 .../org/apache/spark/sql/execution/SparkSqlSerializer.scala | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlSerializer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlSerializer.scala
index 915f551fb2f01..d8e1b970c1d88 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlSerializer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlSerializer.scala
@@ -32,7 +32,13 @@ class SparkSqlSerializer(conf: SparkConf) extends KryoSerializer(conf) {
     kryo.setRegistrationRequired(false)
     kryo.register(classOf[MutablePair[_, _]])
     kryo.register(classOf[Array[Any]])
+    // This is kinda hacky...
     kryo.register(classOf[scala.collection.immutable.Map$Map1], new MapSerializer)
+    kryo.register(classOf[scala.collection.immutable.Map$Map2], new MapSerializer)
+    kryo.register(classOf[scala.collection.immutable.Map$Map3], new MapSerializer)
+    kryo.register(classOf[scala.collection.immutable.Map$Map4], new MapSerializer)
+    kryo.register(classOf[scala.collection.immutable.Map[_,_]], new MapSerializer)
+    kryo.register(classOf[scala.collection.Map[_,_]], new MapSerializer)
     kryo.register(classOf[org.apache.spark.sql.catalyst.expressions.GenericRow])
     kryo.register(classOf[org.apache.spark.sql.catalyst.expressions.GenericMutableRow])
     kryo.register(classOf[scala.collection.mutable.ArrayBuffer[_]])