Updated rest of the files

rxin · rxin · commit d910141b5d3d · 2015-05-07T20:57:19.000-07:00
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
@@ -1069,7 +1069,7 @@ def agg(self, *exprs):
 
         >>> from pyspark.sql import functions as F
         >>> gdf.agg(F.min(df.age)).collect()
-        [Row(MIN(age)=2), Row(MIN(age)=5)]
+        [Row(name=u'Alice', MIN(age)=2), Row(name=u'Bob', MIN(age)=5)]
         """
         assert exprs, "exprs should not be empty"
         if len(exprs) == 1 and isinstance(exprs[0], dict):
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala b/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala
@@ -135,8 +135,9 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])
   }
 
   /**
-   * Compute aggregates by specifying a series of aggregate columns. Unlike other methods in this
-   * class, the resulting [[DataFrame]] won't automatically include the grouping columns.
+   * Compute aggregates by specifying a series of aggregate columns. Note that this function by
+   * default retains the grouping columns in its output. To not retain grouping columns, set
+   * `spark.sql.retainGroupColumns` to false.
    *
    * The available aggregate methods are defined in [[org.apache.spark.sql.functions]].
    *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
@@ -102,7 +102,7 @@ private[sql] object StatFunctions extends Logging {
   /** Generate a table of frequencies for the elements of two columns. */
   private[sql] def crossTabulate(df: DataFrame, col1: String, col2: String): DataFrame = {
     val tableName = s"${col1}_$col2"
-    val counts = df.groupBy(col1, col2).agg(col(col1), col(col2), count("*")).take(1e6.toInt)
+    val counts = df.groupBy(col1, col2).agg(count("*")).take(1e6.toInt)
     if (counts.length == 1e6.toInt) {
       logWarning("The maximum limit of 1e6 pairs have been collected, which may not be all of " +
         "the pairs. Please try reducing the amount of distinct items in your columns.")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -62,7 +62,7 @@ class DataFrameSuite extends QueryTest {
     val df = Seq((1,(1,1))).toDF()
 
     checkAnswer(
-      df.groupBy("_1").agg(col("_1"), sum("_2._1")).toDF("key", "total"),
+      df.groupBy("_1").agg(sum("_2._1")).toDF("key", "total"),
       Row(1, 1) :: Nil)
   }
 
@@ -127,7 +127,7 @@ class DataFrameSuite extends QueryTest {
       df2
         .select('_1 as 'letter, 'number)
         .groupBy('letter)
-        .agg('letter, countDistinct('number)),
+        .agg(countDistinct('number)),
       Row("a", 3) :: Row("b", 2) :: Row("c", 1) :: Nil
     )
   }

Original file line number	Diff line number	Diff line change
`@@ -135,8 +135,9 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression])`
`135`	`135`	`}`
`136`	`136`
`137`	`137`	`/**`
`138`		`- * Compute aggregates by specifying a series of aggregate columns. Unlike other methods in this`
`139`		`- * class, the resulting [[DataFrame]] won't automatically include the grouping columns.`
	`138`	`+ * Compute aggregates by specifying a series of aggregate columns. Note that this function by`
	`139`	`+ * default retains the grouping columns in its output. To not retain grouping columns, set`
	`140`	+ * `spark.sql.retainGroupColumns` to false.
`140`	`141`	`*`
`141`	`142`	`* The available aggregate methods are defined in [[org.apache.spark.sql.functions]].`
`142`	`143`	`*`
Original file line number	Diff line number	Diff line change
`@@ -62,7 +62,7 @@ class DataFrameSuite extends QueryTest {`
`62`	`62`	`val df = Seq((1,(1,1))).toDF()`
`63`	`63`
`64`	`64`	`checkAnswer(`
`65`		`- df.groupBy("_1").agg(col("_1"), sum("_2._1")).toDF("key", "total"),`
	`65`	`+ df.groupBy("_1").agg(sum("_2._1")).toDF("key", "total"),`
`66`	`66`	`Row(1, 1) :: Nil)`
`67`	`67`	`}`
`68`	`68`
`@@ -127,7 +127,7 @@ class DataFrameSuite extends QueryTest {`
`127`	`127`	`df2`
`128`	`128`	`.select('_1 as 'letter, 'number)`
`129`	`129`	`.groupBy('letter)`
`130`		`- .agg('letter, countDistinct('number)),`
	`130`	`+ .agg(countDistinct('number)),`
`131`	`131`	`Row("a", 3) :: Row("b", 2) :: Row("c", 1) :: Nil`
`132`	`132`	`)`
`133`	`133`	`}`