update comments

carsonwang · carsonwang · commit 4a2311cd1003 · 2019-01-15T17:47:38.000+08:00
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/PlanQueryStage.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/PlanQueryStage.scala
@@ -30,7 +30,9 @@ import org.apache.spark.sql.types.StructType
  * Divide the spark plan into multiple QueryStages. For each Exchange in the plan, it adds a
  * QueryStage and a QueryStageInput. If reusing Exchange is enabled, it finds duplicated exchanges
  * and uses the same QueryStage for all the references. Note this rule must be run after
- * EnsureRequirements rule.
+ * EnsureRequirements rule. The rule divides the plan into multiple sub-trees as QueryStageInput
+ * is a leaf node. Transforming the plan after applying this rule will only transform node in a
+ * sub-tree.
  */
 case class PlanQueryStage(conf: SQLConf) extends Rule[SparkPlan] {
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageInput.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageInput.scala
@@ -25,9 +25,13 @@ import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partition
 import org.apache.spark.sql.execution._
 
 /**
- * QueryStageInput is the leaf node of a QueryStage and is used to hide its child stage. It gets
- * the result of its child stage and serves it as the input of the QueryStage. A QueryStage knows
- * its child stages by collecting all the QueryStageInputs.
+ * QueryStageInput is the leaf node of a QueryStage and serves as its input. It is responsible for
+ * changing the output partition based on the need of its QueryStage. It gets the ShuffledRowRDD
+ * from its child stage and creates a new ShuffledRowRDD with different partitions by specifying
+ * an optional array of partition start indices. For example, a ShuffledQueryStage can be reused
+ * by two different QueryStages. One QueryStageInput can let the first task read partition 0 to 3,
+ * while in another stage, the QueryStageInput can let the first task read partition 0 to 1.
+ * A QueryStage knows its child stages by collecting all the QueryStageInputs.
  */
 abstract class QueryStageInput extends LeafExecNode {