@@ -633,25 +633,35 @@ class Analyzer(
633
633
* it into the plan tree.
634
634
*/
635
635
object ExtractWindowExpressions extends Rule [LogicalPlan ] {
636
- def hasWindowFunction (projectList : Seq [NamedExpression ]): Boolean =
636
+ private def hasWindowFunction (projectList : Seq [NamedExpression ]): Boolean =
637
637
projectList.exists(hasWindowFunction)
638
638
639
- def hasWindowFunction (expr : NamedExpression ): Boolean = {
639
+ private def hasWindowFunction (expr : NamedExpression ): Boolean = {
640
640
expr.find {
641
641
case window : WindowExpression => true
642
642
case _ => false
643
643
}.isDefined
644
644
}
645
645
646
646
/**
647
- * From a Seq of [[NamedExpression ]]s, extract window expressions and
648
- * other regular expressions.
647
+ * From a Seq of [[NamedExpression ]]s, extract expressions containing window expressions and
648
+ * other regular expressions that do not contain any window expression. For example, for
649
+ * `col1, Sum(col2 + col3) OVER (PARTITION BY col4 ORDER BY col5)`, we will extract
650
+ * `col1`, `col2 + col3`, `col4`, and `col5` out and replace them appearances in
651
+ * the window expression as attribute references. So, the first returned value will be
652
+ * `[Sum(_w0) OVER (PARTITION BY _w1 ORDER BY _w2)]` and the second returned value will be
653
+ * [col1, col2 + col3 as _w0, col4 as _w1, col5 as _w2].
654
+ *
655
+ * @return (seq of expressions containing at lease one window expressions,
656
+ * seq of non-window expressions)
649
657
*/
650
- def extract (
658
+ private def extract (
651
659
expressions : Seq [NamedExpression ]): (Seq [NamedExpression ], Seq [NamedExpression ]) = {
652
- // First, we simple partition the input expressions to two part, one having
653
- // WindowExpressions and another one without WindowExpressions.
654
- val (windowExpressions, regularExpressions) = expressions.partition(hasWindowFunction)
660
+ // First, we partition the input expressions to two part. For the first part,
661
+ // every expression in it contain at least one WindowExpression.
662
+ // Expressions in the second part do not have any WindowExpression.
663
+ val (expressionsWithWindowFunctions, regularExpressions) =
664
+ expressions.partition(hasWindowFunction)
655
665
656
666
// Then, we need to extract those regular expressions used in the WindowExpression.
657
667
// For example, when we have col1 - Sum(col2 + col3) OVER (PARTITION BY col4 ORDER BY col5),
@@ -660,8 +670,8 @@ class Analyzer(
660
670
val extractedExprBuffer = new ArrayBuffer [NamedExpression ]()
661
671
def extractExpr (expr : Expression ): Expression = expr match {
662
672
case ne : NamedExpression =>
663
- // If a named expression is not in regularExpressions, add extract it and replace it
664
- // with an AttributeReference.
673
+ // If a named expression is not in regularExpressions, add it to
674
+ // extractedExprBuffer and replace it with an AttributeReference.
665
675
val missingExpr =
666
676
AttributeSet (Seq (expr)) -- (regularExpressions ++ extractedExprBuffer)
667
677
if (missingExpr.nonEmpty) {
@@ -678,8 +688,9 @@ class Analyzer(
678
688
withName.toAttribute
679
689
}
680
690
681
- // Now, we extract expressions from windowExpressions by using extractExpr.
682
- val newWindowExpressions = windowExpressions.map {
691
+ // Now, we extract regular expressions from expressionsWithWindowFunctions
692
+ // by using extractExpr.
693
+ val newExpressionsWithWindowFunctions = expressionsWithWindowFunctions.map {
683
694
_.transform {
684
695
// Extracts children expressions of a WindowFunction (input parameters of
685
696
// a WindowFunction).
@@ -705,37 +716,80 @@ class Analyzer(
705
716
}.asInstanceOf [NamedExpression ]
706
717
}
707
718
708
- (newWindowExpressions , regularExpressions ++ extractedExprBuffer)
709
- }
719
+ (newExpressionsWithWindowFunctions , regularExpressions ++ extractedExprBuffer)
720
+ } // end of extract
710
721
711
722
/**
712
723
* Adds operators for Window Expressions. Every Window operator handles a single Window Spec.
713
724
*/
714
- def addWindow (windowExpressions : Seq [NamedExpression ], child : LogicalPlan ): LogicalPlan = {
715
- // First, we group window expressions based on their Window Spec.
716
- val groupedWindowExpression = windowExpressions.groupBy { expr =>
717
- val windowSpec = expr.collectFirst {
725
+ private def addWindow (
726
+ expressionsWithWindowFunctions : Seq [NamedExpression ],
727
+ child : LogicalPlan ): LogicalPlan = {
728
+ // First, we need to extract all WindowExpressions from expressionsWithWindowFunctions
729
+ // and put those extracted WindowExpressions to extractedWindowExprBuffer.
730
+ // This step is needed because it is possible that an expression contains multiple
731
+ // WindowExpressions with different Window Specs.
732
+ // After extracting WindowExpressions, we need to construct a project list to generate
733
+ // expressionsWithWindowFunctions based on extractedWindowExprBuffer.
734
+ // For example, for "sum(a) over (...) / sum(b) over (...)", we will first extract
735
+ // "sum(a) over (...)" and "sum(b) over (...)" out, and assign "_we0" as the alias to
736
+ // "sum(a) over (...)" and "_we1" as the alias to "sum(b) over (...)".
737
+ // Then, the projectList will be [_we0/_we1].
738
+ val extractedWindowExprBuffer = new ArrayBuffer [NamedExpression ]()
739
+ val newExpressionsWithWindowFunctions = expressionsWithWindowFunctions.map {
740
+ // We need to use transformDown because we want to trigger
741
+ // "case alias @ Alias(window: WindowExpression, _)" first.
742
+ _.transformDown {
743
+ case alias @ Alias (window : WindowExpression , _) =>
744
+ // If a WindowExpression has an assigned alias, just use it.
745
+ extractedWindowExprBuffer += alias
746
+ alias.toAttribute
747
+ case window : WindowExpression =>
748
+ // If there is no alias assigned to the WindowExpressions. We create an
749
+ // internal column.
750
+ val withName = Alias (window, s " _we ${extractedWindowExprBuffer.length}" )()
751
+ extractedWindowExprBuffer += withName
752
+ withName.toAttribute
753
+ }.asInstanceOf [NamedExpression ]
754
+ }
755
+
756
+ // Second, we group extractedWindowExprBuffer based on their Window Spec.
757
+ val groupedWindowExpressions = extractedWindowExprBuffer.groupBy { expr =>
758
+ val distinctWindowSpec = expr.collect {
718
759
case window : WindowExpression => window.windowSpec
760
+ }.distinct
761
+
762
+ // We do a final check and see if we only have a single Window Spec defined in an
763
+ // expressions.
764
+ if (distinctWindowSpec.length == 0 ) {
765
+ failAnalysis(s " $expr does not have any WindowExpression. " )
766
+ } else if (distinctWindowSpec.length > 1 ) {
767
+ // newExpressionsWithWindowFunctions only have expressions with a single
768
+ // WindowExpression. If we reach here, we have a bug.
769
+ failAnalysis(s " $expr has multiple Window Specifications ( $distinctWindowSpec). " +
770
+ s " Please file a bug report with this error message, stack trace, and the query. " )
771
+ } else {
772
+ distinctWindowSpec.head
719
773
}
720
- windowSpec.getOrElse(
721
- failAnalysis(s " $windowExpressions does not have any WindowExpression. " ))
722
774
}.toSeq
723
775
724
- // For every Window Spec, we add a Window operator and set currentChild as the child of it.
776
+ // Third, for every Window Spec, we add a Window operator and set currentChild as the
777
+ // child of it.
725
778
var currentChild = child
726
779
var i = 0
727
- while (i < groupedWindowExpression .size) {
728
- val (windowSpec, windowExpressions) = groupedWindowExpression (i)
780
+ while (i < groupedWindowExpressions .size) {
781
+ val (windowSpec, windowExpressions) = groupedWindowExpressions (i)
729
782
// Set currentChild to the newly created Window operator.
730
783
currentChild = Window (currentChild.output, windowExpressions, windowSpec, currentChild)
731
784
732
- // Move to next WindowExpression .
785
+ // Move to next Window Spec .
733
786
i += 1
734
787
}
735
788
736
- // We return the top operator.
737
- currentChild
738
- }
789
+ // Finally, we create a Project to output currentChild's output
790
+ // newExpressionsWithWindowFunctions.
791
+ Project (currentChild.output ++ newExpressionsWithWindowFunctions, currentChild)
792
+ } // end of addWindow
739
793
740
794
// We have to use transformDown at here to make sure the rule of
741
795
// "Aggregate with Having clause" will be triggered.
0 commit comments