Skip to content

Commit c3462c6

Browse files
GregOwenpwendell
authored andcommitted
[SPARK-2086] Improve output of toDebugString to make shuffle boundaries more clear
Changes RDD.toDebugString() to show hierarchy and shuffle transformations more clearly New output: ``` (3) FlatMappedValuesRDD[325] at apply at Transformer.scala:22 | MappedValuesRDD[324] at apply at Transformer.scala:22 | CoGroupedRDD[323] at apply at Transformer.scala:22 +-(5) MappedRDD[320] at apply at Transformer.scala:22 | | MappedRDD[319] at apply at Transformer.scala:22 | | MappedValuesRDD[318] at apply at Transformer.scala:22 | | MapPartitionsRDD[317] at apply at Transformer.scala:22 | | ShuffledRDD[316] at apply at Transformer.scala:22 | +-(10) MappedRDD[315] at apply at Transformer.scala:22 | | ParallelCollectionRDD[314] at apply at Transformer.scala:22 +-(100) MappedRDD[322] at apply at Transformer.scala:22 | ParallelCollectionRDD[321] at apply at Transformer.scala:22 ``` Author: Gregory Owen <[email protected]> Closes #1364 from GregOwen/to-debug-string and squashes the following commits: 08f5c78 [Gregory Owen] toDebugString: prettier debug printing to show shuffles and joins more clearly 1603f7b [Gregory Owen] toDebugString: prettier debug printing to show shuffles and joins more clearly
1 parent 511a731 commit c3462c6

File tree

2 files changed

+56
-4
lines changed

2 files changed

+56
-4
lines changed

core/src/main/scala/org/apache/spark/rdd/RDD.scala

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1269,11 +1269,55 @@ abstract class RDD[T: ClassTag](
12691269

12701270
/** A description of this RDD and its recursive dependencies for debugging. */
12711271
def toDebugString: String = {
1272-
def debugString(rdd: RDD[_], prefix: String = ""): Seq[String] = {
1273-
Seq(prefix + rdd + " (" + rdd.partitions.size + " partitions)") ++
1274-
rdd.dependencies.flatMap(d => debugString(d.rdd, prefix + " "))
1272+
// Apply a different rule to the last child
1273+
def debugChildren(rdd: RDD[_], prefix: String): Seq[String] = {
1274+
val len = rdd.dependencies.length
1275+
len match {
1276+
case 0 => Seq.empty
1277+
case 1 =>
1278+
val d = rdd.dependencies.head
1279+
debugString(d.rdd, prefix, d.isInstanceOf[ShuffleDependency[_,_,_]], true)
1280+
case _ =>
1281+
val frontDeps = rdd.dependencies.take(len - 1)
1282+
val frontDepStrings = frontDeps.flatMap(
1283+
d => debugString(d.rdd, prefix, d.isInstanceOf[ShuffleDependency[_,_,_]]))
1284+
1285+
val lastDep = rdd.dependencies.last
1286+
val lastDepStrings =
1287+
debugString(lastDep.rdd, prefix, lastDep.isInstanceOf[ShuffleDependency[_,_,_]], true)
1288+
1289+
(frontDepStrings ++ lastDepStrings)
1290+
}
1291+
}
1292+
// The first RDD in the dependency stack has no parents, so no need for a +-
1293+
def firstDebugString(rdd: RDD[_]): Seq[String] = {
1294+
val partitionStr = "(" + rdd.partitions.size + ")"
1295+
val leftOffset = (partitionStr.length - 1) / 2
1296+
val nextPrefix = (" " * leftOffset) + "|" + (" " * (partitionStr.length - leftOffset))
1297+
Seq(partitionStr + " " + rdd) ++ debugChildren(rdd, nextPrefix)
1298+
}
1299+
def shuffleDebugString(rdd: RDD[_], prefix: String = "", isLastChild: Boolean): Seq[String] = {
1300+
val partitionStr = "(" + rdd.partitions.size + ")"
1301+
val leftOffset = (partitionStr.length - 1) / 2
1302+
val thisPrefix = prefix.replaceAll("\\|\\s+$", "")
1303+
val nextPrefix = (
1304+
thisPrefix
1305+
+ (if (isLastChild) " " else "| ")
1306+
+ (" " * leftOffset) + "|" + (" " * (partitionStr.length - leftOffset)))
1307+
Seq(thisPrefix + "+-" + partitionStr + " " + rdd) ++ debugChildren(rdd, nextPrefix)
1308+
}
1309+
def debugString(rdd: RDD[_],
1310+
prefix: String = "",
1311+
isShuffle: Boolean = true,
1312+
isLastChild: Boolean = false): Seq[String] = {
1313+
if (isShuffle) {
1314+
shuffleDebugString(rdd, prefix, isLastChild)
1315+
}
1316+
else {
1317+
Seq(prefix + rdd) ++ debugChildren(rdd, prefix)
1318+
}
12751319
}
1276-
debugString(this).mkString("\n")
1320+
firstDebugString(this).mkString("\n")
12771321
}
12781322

12791323
override def toString: String = "%s%s[%d] at %s".format(

project/MimaExcludes.scala

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,14 @@ object MimaExcludes {
6161
"org.apache.spark.api.java.JavaDoubleRDD.countApproxDistinct$default$1"),
6262
ProblemFilters.exclude[MissingMethodProblem](
6363
"org.apache.spark.storage.MemoryStore.Entry"),
64+
ProblemFilters.exclude[MissingMethodProblem](
65+
"org.apache.spark.rdd.RDD.org$apache$spark$rdd$RDD$$debugChildren$1"),
66+
ProblemFilters.exclude[MissingMethodProblem](
67+
"org.apache.spark.rdd.RDD.org$apache$spark$rdd$RDD$$firstDebugString$1"),
68+
ProblemFilters.exclude[MissingMethodProblem](
69+
"org.apache.spark.rdd.RDD.org$apache$spark$rdd$RDD$$shuffleDebugString$1"),
70+
ProblemFilters.exclude[MissingMethodProblem](
71+
"org.apache.spark.rdd.RDD.org$apache$spark$rdd$RDD$$debugString$1"),
6472
ProblemFilters.exclude[MissingMethodProblem](
6573
"org.apache.spark.rdd.PairRDDFunctions.org$apache$spark$rdd$PairRDDFunctions$$"
6674
+ "createZero$1")

0 commit comments

Comments
 (0)