change default outputordering

adrian-wang · adrian-wang · commit 171001fea716 · 2015-04-11T19:26:01.000+08:00
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Aggregate.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Aggregate.scala
@@ -60,6 +60,8 @@ case class Aggregate(
 
   override def output: Seq[Attribute] = aggregateExpressions.map(_.toAttribute)
 
+  override def outputOrdering: Seq[SortOrder] = Nil
+
   /**
    * An aggregate that needs to be computed for each row in a group.
    *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
@@ -161,8 +161,8 @@ private[sql] case class AddExchange(sqlContext: SQLContext) extends Rule[SparkPl
       def addExchangeIfNecessary(partitioning: Partitioning, child: SparkPlan): SparkPlan =
         if (child.outputPartitioning != partitioning) Exchange(partitioning, child) else child
 
-      // Check if the partitioning we want to ensure is the same as the child's output
-      // partitioning. If so, we do not need to add the Exchange operator.
+      // Check if the ordering we want to ensure is the same as the child's output
+      // ordering. If so, we do not need to add the Sort operator.
       def addSortIfNecessary(ordering: Seq[SortOrder], child: SparkPlan): SparkPlan =
         if (child.outputOrdering != ordering) Sort(ordering, global = false, child) else child
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
@@ -183,6 +183,7 @@ private[sql] trait LeafNode extends SparkPlan with trees.LeafNode[SparkPlan] {
 private[sql] trait UnaryNode extends SparkPlan with trees.UnaryNode[SparkPlan] {
   self: Product =>
   override def outputPartitioning: Partitioning = child.outputPartitioning
+  override def outputOrdering: Seq[SortOrder] = child.outputOrdering
 }
 
 private[sql] trait BinaryNode extends SparkPlan with trees.BinaryNode[SparkPlan] {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala
@@ -70,6 +70,8 @@ case class Sample(fraction: Double, withReplacement: Boolean, seed: Long, child:
   override def execute(): RDD[Row] = {
     child.execute().map(_.copy()).sample(withReplacement, fraction, seed)
   }
+
+  override def outputOrdering: Seq[SortOrder] = Nil
 }
 
 /**
@@ -146,6 +148,8 @@ case class TakeOrdered(limit: Int, sortOrder: Seq[SortOrder], child: SparkPlan)
   // TODO: Terminal split should be implemented differently from non-terminal split.
   // TODO: Pick num splits based on |limit|.
   override def execute(): RDD[Row] = sparkContext.makeRDD(collectData(), 1)
+
+  override def outputOrdering: Seq[SortOrder] = sortOrder
 }
 
 /**
@@ -171,6 +175,8 @@ case class Sort(
   }
 
   override def output: Seq[Attribute] = child.output
+
+  override def outputOrdering: Seq[SortOrder] = sortOrder
 }
 
 /**
@@ -201,6 +207,8 @@ case class ExternalSort(
   }
 
   override def output: Seq[Attribute] = child.output
+
+  override def outputOrdering: Seq[SortOrder] = sortOrder
 }
 
 /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
@@ -95,13 +95,16 @@ class JoinSuite extends QueryTest with BeforeAndAfterEach {
       ("SELECT * FROM testData full JOIN testData2 ON (key * a != key + a)",
         classOf[BroadcastNestedLoopJoin])
     ).foreach { case (query, joinClass) => assertJoin(query, joinClass) }
-    conf.setConf("spark.sql.autoSortMergeJoin", "true")
-    Seq(
-      ("SELECT * FROM testData JOIN testData2 ON key = a", classOf[SortMergeJoin]),
-      ("SELECT * FROM testData JOIN testData2 ON key = a and key = 2", classOf[SortMergeJoin]),
-      ("SELECT * FROM testData JOIN testData2 ON key = a where key = 2", classOf[SortMergeJoin])
-    ).foreach { case (query, joinClass) => assertJoin(query, joinClass) }
-    conf.setConf("spark.sql.autoSortMergeJoin", AUTO_SORTMERGEJOIN.toString)
+    try {
+      conf.setConf("spark.sql.autoSortMergeJoin", "true")
+      Seq(
+        ("SELECT * FROM testData JOIN testData2 ON key = a", classOf[SortMergeJoin]),
+        ("SELECT * FROM testData JOIN testData2 ON key = a and key = 2", classOf[SortMergeJoin]),
+        ("SELECT * FROM testData JOIN testData2 ON key = a where key = 2", classOf[SortMergeJoin])
+      ).foreach { case (query, joinClass) => assertJoin(query, joinClass) }
+    } finally {
+      conf.setConf("spark.sql.autoSortMergeJoin", AUTO_SORTMERGEJOIN.toString)
+    }
   }
 
   test("broadcasted hash join operator selection") {

Original file line number	Diff line number	Diff line change
`@@ -60,6 +60,8 @@ case class Aggregate(`
`60`	`60`
`61`	`61`	`override def output: Seq[Attribute] = aggregateExpressions.map(_.toAttribute)`
`62`	`62`
	`63`	`+ override def outputOrdering: Seq[SortOrder] = Nil`
	`64`	`+`
`63`	`65`	`/**`
`64`	`66`	`* An aggregate that needs to be computed for each row in a group.`
`65`	`67`	`*`
Original file line number	Diff line number	Diff line change
`@@ -183,6 +183,7 @@ private[sql] trait LeafNode extends SparkPlan with trees.LeafNode[SparkPlan] {`
`183`	`183`	`private[sql] trait UnaryNode extends SparkPlan with trees.UnaryNode[SparkPlan] {`
`184`	`184`	`self: Product =>`
`185`	`185`	`override def outputPartitioning: Partitioning = child.outputPartitioning`
	`186`	`+ override def outputOrdering: Seq[SortOrder] = child.outputOrdering`
`186`	`187`	`}`
`187`	`188`
`188`	`189`	`private[sql] trait BinaryNode extends SparkPlan with trees.BinaryNode[SparkPlan] {`
Original file line number	Diff line number	Diff line change
`@@ -70,6 +70,8 @@ case class Sample(fraction: Double, withReplacement: Boolean, seed: Long, child:`
`70`	`70`	`override def execute(): RDD[Row] = {`
`71`	`71`	`child.execute().map(_.copy()).sample(withReplacement, fraction, seed)`
`72`	`72`	`}`
	`73`	`+`
	`74`	`+ override def outputOrdering: Seq[SortOrder] = Nil`
`73`	`75`	`}`
`74`	`76`
`75`	`77`	`/**`
`@@ -146,6 +148,8 @@ case class TakeOrdered(limit: Int, sortOrder: Seq[SortOrder], child: SparkPlan)`
`146`	`148`	`// TODO: Terminal split should be implemented differently from non-terminal split.`
`147`	`149`	`// TODO: Pick num splits based on \|limit\|.`
`148`	`150`	`override def execute(): RDD[Row] = sparkContext.makeRDD(collectData(), 1)`
	`151`	`+`
	`152`	`+ override def outputOrdering: Seq[SortOrder] = sortOrder`
`149`	`153`	`}`
`150`	`154`
`151`	`155`	`/**`
`@@ -171,6 +175,8 @@ case class Sort(`
`171`	`175`	`}`
`172`	`176`
`173`	`177`	`override def output: Seq[Attribute] = child.output`
	`178`	`+`
	`179`	`+ override def outputOrdering: Seq[SortOrder] = sortOrder`
`174`	`180`	`}`
`175`	`181`
`176`	`182`	`/**`
`@@ -201,6 +207,8 @@ case class ExternalSort(`
`201`	`207`	`}`
`202`	`208`
`203`	`209`	`override def output: Seq[Attribute] = child.output`
	`210`	`+`
	`211`	`+ override def outputOrdering: Seq[SortOrder] = sortOrder`
`204`	`212`	`}`
`205`	`213`
`206`	`214`	`/**`