-
Notifications
You must be signed in to change notification settings - Fork 28.7k
[SPARK-23034][SQL] Override nodeName
for all *ScanExec operators
#20226
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
49da476
22f4e48
edd44fd
ec85a02
9c29252
0c0aa94
bf90ac7
1facc05
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -103,6 +103,8 @@ case class ExternalRDDScanExec[T]( | |
override lazy val metrics = Map( | ||
"numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) | ||
|
||
override val nodeName: String = s"Scan ExternalRDD ${output.map(_.name).mkString("[", ",", "]")}" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think including the output in the node name is a good idea. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My intention here was to be able to distinguish between |
||
|
||
protected override def doExecute(): RDD[InternalRow] = { | ||
val numOutputRows = longMetric("numOutputRows") | ||
val outputDataType = outputObjAttr.dataType | ||
|
@@ -116,7 +118,7 @@ case class ExternalRDDScanExec[T]( | |
} | ||
|
||
override def simpleString: String = { | ||
s"Scan $nodeName${output.mkString("[", ",", "]")}" | ||
s"Scan ${super.nodeName}${output.mkString("[", ",", "]")}" | ||
} | ||
} | ||
|
||
|
@@ -169,10 +171,12 @@ case class LogicalRDD( | |
case class RDDScanExec( | ||
output: Seq[Attribute], | ||
rdd: RDD[InternalRow], | ||
override val nodeName: String, | ||
name: String, | ||
override val outputPartitioning: Partitioning = UnknownPartitioning(0), | ||
override val outputOrdering: Seq[SortOrder] = Nil) extends LeafExecNode { | ||
|
||
override val nodeName: String = s"Scan RDD $name ${output.map(_.name).mkString("[", ",", "]")}" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. removed |
||
|
||
override lazy val metrics = Map( | ||
"numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) | ||
|
||
|
@@ -189,6 +193,6 @@ case class RDDScanExec( | |
} | ||
|
||
override def simpleString: String = { | ||
s"Scan $nodeName${Utils.truncatedString(output, "[", ",", "]")}" | ||
s"$nodeName${Utils.truncatedString(output, "[", ",", "]")}" | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen._ | |
import org.apache.spark.sql.catalyst.plans.physical.Partitioning | ||
import org.apache.spark.sql.catalyst.rules.Rule | ||
import org.apache.spark.sql.execution.aggregate.HashAggregateExec | ||
import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec | ||
import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, SortMergeJoinExec} | ||
import org.apache.spark.sql.execution.metric.SQLMetrics | ||
import org.apache.spark.sql.internal.SQLConf | ||
|
@@ -45,7 +46,12 @@ trait CodegenSupport extends SparkPlan { | |
case _: SortMergeJoinExec => "smj" | ||
case _: RDDScanExec => "rdd" | ||
case _: DataSourceScanExec => "scan" | ||
case _ => nodeName.toLowerCase(Locale.ROOT) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This caused one of the tests to fail as the |
||
case _: LocalTableScanExec => "local_scan" | ||
case _: InMemoryTableScanExec => "in_mem_scan" | ||
case _ => | ||
// Java variable names can only have alpha-numeric characters, underscores and `$` (the use of | ||
// later two is discouraged) | ||
nodeName.toLowerCase(Locale.ROOT).replaceAll("\\P{Alnum}", "") | ||
} | ||
|
||
/** | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
DataSourceScanExec.nodeName
is defined ass"Scan $relation ${tableIdentifier.map(_.unquotedString).getOrElse("")}"
, do we really need to overwrite it here?Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
My intent was to be able to distinguish between RowDataSourceScan and FileSourceScan. Removing those overrides.