performance issue when gc blocks

7mming7 · chenzhx · commit 9bf72562a51c · 2021-10-21T15:38:00.000+08:00
performance issue when gc blocks (apache#189) Co-authored-by: 7mming7 <7mming7@gmail.com>
diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -1381,7 +1381,7 @@ private[spark] object MapOutputTracker extends Logging {
       // Important arr(0) is the tag == DIRECT, ignore that while deserializing !
       // arr is a nested Array so that it can handle over 2GB serialized data
       val arr = chunkedByteBuf.getChunks().map(_.array())
-      val bcast = broadcastManager.newBroadcast(arr, isLocal)
+      val bcast = broadcastManager.newBroadcast(arr, isLocal, null)
       // Using `org.apache.commons.io.output.ByteArrayOutputStream` instead of the standard one
       // This implementation doesn't reallocate the whole memory block but allocates
       // additional buffers. This way no buffers need to be garbage collected and
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -1512,10 +1512,13 @@ class SparkContext(config: SparkConf) extends Logging {
     assertNotStopped()
     require(!classOf[RDD[_]].isAssignableFrom(classTag[T].runtimeClass),
       "Can not directly broadcast RDDs; instead, call collect() and broadcast the result.")
-    val bc = env.broadcastManager.newBroadcast[T](value, isLocal)
+    val executionId = getLocalProperty("spark.sql.execution.id")
+    val bc = env.broadcastManager.newBroadcast[T](value, isLocal, executionId)
     val callSite = getCallSite
     logInfo("Created broadcast " + bc.id + " from " + callSite.shortForm)
-    cleaner.foreach(_.registerBroadcastForCleanup(bc))
+    if (executionId == null) {
+      cleaner.foreach(_.registerBroadcastForCleanup(bc))
+    }
     bc
   }
 
diff --git a/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala b/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala
@@ -18,8 +18,10 @@
 package org.apache.spark.broadcast
 
 import java.util.Collections
+import java.util.concurrent.ConcurrentHashMap
 import java.util.concurrent.atomic.AtomicLong
 
+import scala.collection.mutable.ListBuffer
 import scala.reflect.ClassTag
 
 import org.apache.commons.collections.map.{AbstractReferenceMap, ReferenceMap}
@@ -31,8 +33,11 @@ import org.apache.spark.internal.Logging
 private[spark] class BroadcastManager(
     val isDriver: Boolean, conf: SparkConf) extends Logging {
 
+  val cleanQueryBroadcast = conf.getBoolean("spark.broadcast.autoClean.enabled", false)
+
   private var initialized = false
   private var broadcastFactory: BroadcastFactory = null
+  var cachedBroadcast = new ConcurrentHashMap[String, ListBuffer[Long]]()
 
   initialize()
 
@@ -53,14 +58,33 @@ private[spark] class BroadcastManager(
 
   private val nextBroadcastId = new AtomicLong(0)
 
+  private[spark] def currentBroadcastId: Long = nextBroadcastId.get()
+
   private[broadcast] val cachedValues =
     Collections.synchronizedMap(
       new ReferenceMap(AbstractReferenceMap.HARD, AbstractReferenceMap.WEAK)
         .asInstanceOf[java.util.Map[Any, Any]]
     )
 
-  def newBroadcast[T: ClassTag](value_ : T, isLocal: Boolean): Broadcast[T] = {
+  def cleanBroadCast(executionId: String): Unit = {
+    if (cachedBroadcast.containsKey(executionId)) {
+      cachedBroadcast.get(executionId)
+        .foreach(broadcastId => unbroadcast(broadcastId, true, false))
+      cachedBroadcast.remove(executionId)
+    }
+  }
+
+  def newBroadcast[T: ClassTag](value_ : T, isLocal: Boolean, executionId: String): Broadcast[T] = {
     val bid = nextBroadcastId.getAndIncrement()
+    if (executionId != null && cleanQueryBroadcast) {
+      if (cachedBroadcast.containsKey(executionId)) {
+        cachedBroadcast.get(executionId) += bid
+      } else {
+        val list = new scala.collection.mutable.ListBuffer[Long]
+        list += bid
+        cachedBroadcast.put(executionId, list)
+      }
+    }
     value_ match {
       case pb: PythonBroadcast =>
         // SPARK-28486: attach this new broadcast variable's id to the PythonBroadcast,
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -854,6 +854,8 @@ private[spark] class DAGScheduler(
       return new JobWaiter[U](this, jobId, 0, resultHandler)
     }
 
+    val executionId = sc.getLocalProperty("spark.sql.execution.id")
+    logInfo(s"submit job : $jobId, executionId is $executionId")
     assert(partitions.nonEmpty)
     val func2 = func.asInstanceOf[(TaskContext, Iterator[_]) => _]
     val waiter = new JobWaiter[U](this, jobId, partitions.size, resultHandler)
@@ -1317,6 +1319,7 @@ private[spark] class DAGScheduler(
   /** Called when stage's parents are available and we can now do its task. */
   private def submitMissingTasks(stage: Stage, jobId: Int): Unit = {
     logDebug("submitMissingTasks(" + stage + ")")
+    logInfo(s"submit stage ${stage.id} with jobId: $jobId")
 
     // Before find missing partition, do the intermediate state clean work first.
     // The operation here can make sure for the partially completed intermediate stage,
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala b/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala
@@ -54,7 +54,7 @@ import org.apache.spark.rdd.RDD
 private[spark] class ResultTask[T, U](
     stageId: Int,
     stageAttemptId: Int,
-    taskBinary: Broadcast[Array[Byte]],
+    val taskBinary: Broadcast[Array[Byte]],
     partition: Partition,
     locs: Seq[TaskLocation],
     val outputId: Int,
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
@@ -52,7 +52,7 @@ import org.apache.spark.rdd.RDD
 private[spark] class ShuffleMapTask(
     stageId: Int,
     stageAttemptId: Int,
-    taskBinary: Broadcast[Array[Byte]],
+    val taskBinary: Broadcast[Array[Byte]],
     partition: Partition,
     @transient private var locs: Seq[TaskLocation],
     localProperties: Properties,
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -564,6 +564,13 @@ private[spark] class TaskSetManager(
     if (isZombie && runningTasks == 0) {
       sched.taskSetFinished(this)
       if (tasksSuccessful == numTasks) {
+        val broadcastId = taskSet.tasks.head match {
+          case resultTask: ResultTask[Any, Any] =>
+            resultTask.taskBinary.id
+          case shuffleMapTask: ShuffleMapTask =>
+            shuffleMapTask.taskBinary.id
+        }
+        SparkEnv.get.broadcastManager.unbroadcast(broadcastId, true, false)
         healthTracker.foreach(_.updateExcludedForSuccessfulTaskSet(
           taskSet.stageId,
           taskSet.stageAttemptId,
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
@@ -340,6 +340,12 @@ class BlockManagerMasterEndpoint(
       }
     }.toSeq
 
+    val blocksToRemove = blockLocations.keySet().asScala
+      .collect {
+        case broadcastId@BroadcastBlockId(`broadcastId`, _) =>
+          broadcastId
+      }
+    blocksToRemove.foreach(blockLocations.remove)
     Future.sequence(futures)
   }
 
diff --git a/pom.xml b/pom.xml
@@ -86,6 +86,7 @@
     <module>common/kvstore</module>
     <module>common/network-common</module>
     <module>common/network-shuffle</module>
+    <module>common/network-yarn</module>
     <module>common/unsafe</module>
     <module>common/tags</module>
     <module>core</module>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
@@ -21,6 +21,7 @@ import java.util.concurrent.{ConcurrentHashMap, ExecutorService, Future => JFutu
 import java.util.concurrent.atomic.AtomicLong
 
 import org.apache.spark.SparkContext
+import org.apache.spark.SparkEnv
 import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.execution.ui.{PostQueryExecutionForKylin, SparkListenerSQLExecutionEnd, SparkListenerSQLExecutionStart}
@@ -127,6 +128,7 @@ object SQLExecution {
     } finally {
       executionIdToQueryExecution.remove(executionId)
       sc.setLocalProperty(EXECUTION_ID_KEY, oldExecutionId)
+      SparkEnv.get.broadcastManager.cleanBroadCast(executionId.toString)
     }
   }
 

Original file line number	Diff line number	Diff line change
`@@ -340,6 +340,12 @@ class BlockManagerMasterEndpoint(`
`340`	`340`	`}`
`341`	`341`	`}.toSeq`
`342`	`342`
	`343`	`+ val blocksToRemove = blockLocations.keySet().asScala`
	`344`	`+ .collect {`
	`345`	+ case broadcastId@BroadcastBlockId(`broadcastId`, _) =>
	`346`	`+ broadcastId`
	`347`	`+ }`
	`348`	`+ blocksToRemove.foreach(blockLocations.remove)`
`343`	`349`	`Future.sequence(futures)`
`344`	`350`	`}`
`345`	`351`
Original file line number	Diff line number	Diff line change
`@@ -21,6 +21,7 @@ import java.util.concurrent.{ConcurrentHashMap, ExecutorService, Future => JFutu`
`21`	`21`	`import java.util.concurrent.atomic.AtomicLong`
`22`	`22`
`23`	`23`	`import org.apache.spark.SparkContext`
	`24`	`+import org.apache.spark.SparkEnv`
`24`	`25`	`import org.apache.spark.internal.config.Tests.IS_TESTING`
`25`	`26`	`import org.apache.spark.sql.SparkSession`
`26`	`27`	`import org.apache.spark.sql.execution.ui.{PostQueryExecutionForKylin, SparkListenerSQLExecutionEnd, SparkListenerSQLExecutionStart}`
`@@ -127,6 +128,7 @@ object SQLExecution {`
`127`	`128`	`} finally {`
`128`	`129`	`executionIdToQueryExecution.remove(executionId)`
`129`	`130`	`sc.setLocalProperty(EXECUTION_ID_KEY, oldExecutionId)`
	`131`	`+ SparkEnv.get.broadcastManager.cleanBroadCast(executionId.toString)`
`130`	`132`	`}`
`131`	`133`	`}`
`132`	`134`