Remove the CubedData & RollupedData

chenghao-intel · chenghao-intel · commit 84c956427316 · 2015-05-19T17:34:52.000-07:00
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -685,7 +685,7 @@ class DataFrame private[sql](
    * @since 1.3.0
    */
   @scala.annotation.varargs
-  def groupBy(cols: Column*): GroupedData = new GroupedData(this, cols.map(_.expr))
+  def groupBy(cols: Column*): GroupedData = new GroupedData(this, cols.map(_.expr), GroupByType)
 
   /**
    * Rollup the [[DataFrame]] using the specified columns, so we can run aggregation on them.
@@ -705,7 +705,7 @@ class DataFrame private[sql](
    * @since 1.4.0
    */
   @scala.annotation.varargs
-  def rollup(cols: Column*): GroupedData = new RollupedData(this, cols.map(_.expr))
+  def rollup(cols: Column*): GroupedData = new GroupedData(this, cols.map(_.expr), RollupType)
 
   /**
    * Cube the [[DataFrame]] using the specified columns, so we can run aggregation on them.
@@ -725,7 +725,7 @@ class DataFrame private[sql](
    * @since 1.4.0
    */
   @scala.annotation.varargs
-  def cube(cols: Column*): GroupedData = new CubedData(this, cols.map(_.expr))
+  def cube(cols: Column*): GroupedData = new GroupedData(this, cols.map(_.expr), CubeType)
 
   /**
    * Groups the [[DataFrame]] using the specified columns, so we can run aggregation on them.
@@ -750,7 +750,7 @@ class DataFrame private[sql](
   @scala.annotation.varargs
   def groupBy(col1: String, cols: String*): GroupedData = {
     val colNames: Seq[String] = col1 +: cols
-    new GroupedData(this, colNames.map(colName => resolve(colName)))
+    new GroupedData(this, colNames.map(colName => resolve(colName)), GroupByType)
   }
 
   /**
@@ -776,7 +776,7 @@ class DataFrame private[sql](
   @scala.annotation.varargs
   def rollup(col1: String, cols: String*): GroupedData = {
     val colNames: Seq[String] = col1 +: cols
-    new RollupedData(this, colNames.map(colName => resolve(colName)))
+    new GroupedData(this, colNames.map(colName => resolve(colName)), RollupType)
   }
 
   /**
@@ -802,7 +802,7 @@ class DataFrame private[sql](
   @scala.annotation.varargs
   def cube(col1: String, cols: String*): GroupedData = {
     val colNames: Seq[String] = col1 +: cols
-    new CubedData(this, colNames.map(colName => resolve(colName)))
+    new GroupedData(this, colNames.map(colName => resolve(colName)), CubeType)
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala b/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
@@ -26,6 +26,25 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.{Rollup, Cube, Aggregate}
 import org.apache.spark.sql.types.NumericType
 
+/**
+ * The Grouping Type
+ */
+sealed private[sql] trait GroupType
+
+/**
+ * To indicate it's the GroupBy
+ */
+private[sql] object GroupByType extends GroupType
+
+/**
+ * To indicate it's the CUBE
+ */
+private[sql] object CubeType extends GroupType
+
+/**
+ * To indicate it's the ROLLUP
+ */
+private[sql] object RollupType extends GroupType
 
 /**
  * :: Experimental ::
@@ -34,10 +53,13 @@ import org.apache.spark.sql.types.NumericType
  * @since 1.3.0
  */
 @Experimental
-class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression]) {
+class GroupedData protected[sql](
+    df: DataFrame,
+    groupingExprs: Seq[Expression],
+    groupType: GroupType) {
 
   protected def aggregateExpressions(aggrExprs: Seq[NamedExpression])
-  : Seq[NamedExpression] = {
+    : Seq[NamedExpression] = {
     if (df.sqlContext.conf.dataFrameRetainGroupColumns) {
       val retainedExprs = groupingExprs.map {
         case expr: NamedExpression => expr
@@ -50,8 +72,17 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
   }
 
   protected[sql] implicit def toDF(aggExprs: Seq[NamedExpression]): DataFrame = {
-    DataFrame(
-      df.sqlContext, Aggregate(groupingExprs, aggregateExpressions(aggExprs), df.logicalPlan))
+    groupType match {
+      case GroupByType =>
+        DataFrame(
+          df.sqlContext, Aggregate(groupingExprs, aggregateExpressions(aggExprs), df.logicalPlan))
+      case RollupType =>
+        DataFrame(
+          df.sqlContext, Rollup(groupingExprs, df.logicalPlan, aggregateExpressions(aggExprs)))
+      case CubeType =>
+        DataFrame(
+          df.sqlContext, Cube(groupingExprs, df.logicalPlan, aggregateExpressions(aggExprs)))
+    }
   }
 
   private[this] def aggregateNumericColumns(colNames: String*)(f: Expression => Expression)
@@ -259,27 +290,3 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
   }
 
 }
-
-/**
- * A set of methods for aggregations on a [[DataFrame]] cube, created by [[DataFrame.cube]].
- */
-private[sql] class CubedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
-  extends GroupedData(df, groupingExprs) {
-
-  protected[sql] implicit override def toDF(aggExprs: Seq[NamedExpression]): DataFrame = {
-    DataFrame(
-      df.sqlContext, Cube(groupingExprs, df.logicalPlan, aggregateExpressions(aggExprs)))
-  }
-}
-
-/**
- * A set of methods for aggregations on a [[DataFrame]] rollup, created by [[DataFrame.rollup]].
- */
-private[sql] class RollupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
-  extends GroupedData(df, groupingExprs) {
-
-  protected[sql] implicit override def toDF(aggExprs: Seq[NamedExpression]): DataFrame = {
-    DataFrame(
-      df.sqlContext, Rollup(groupingExprs, df.logicalPlan, aggregateExpressions(aggExprs)))
-  }
-}