Propagate TaskContext to writer thread

JoshRosen · JoshRosen · commit 494cde023ffb · 2015-07-27T19:10:14.000-07:00
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala
@@ -28,7 +28,7 @@ import org.apache.hadoop.hive.serde.serdeConstants
 import org.apache.hadoop.hive.serde2.AbstractSerDe
 import org.apache.hadoop.hive.serde2.objectinspector._
 
-import org.apache.spark.Logging
+import org.apache.spark.{TaskContext, Logging}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.CatalystTypeConverters
@@ -98,7 +98,8 @@ case class ScriptTransformation(
         ioschema,
         outputStream,
         proc,
-        stderrBuffer
+        stderrBuffer,
+        TaskContext.get()
       )
 
       // This nullability is a performance optimization in order to avoid an Option.foreach() call
@@ -221,7 +222,8 @@ private class ScriptTransformationWriterThread(
     ioschema: HiveScriptIOSchema,
     outputStream: OutputStream,
     proc: Process,
-    stderrBuffer: CircularBuffer
+    stderrBuffer: CircularBuffer,
+    taskContext: TaskContext
   ) extends Thread("Thread-ScriptTransformation-Feed") with Logging {
 
   setDaemon(true)
@@ -232,6 +234,8 @@ private class ScriptTransformationWriterThread(
   def exception: Option[Throwable] = Option(_exception)
 
   override def run(): Unit = Utils.logUncaughtExceptions {
+    TaskContext.setTaskContext(taskContext)
+
     val dataOutputStream = new DataOutputStream(outputStream)
 
     // We can't use Utils.tryWithSafeFinally here because we also need a `catch` block, so
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ScriptTransformationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ScriptTransformationSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.hive.execution
 import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
 import org.scalatest.exceptions.TestFailedException
 
+import org.apache.spark.TaskContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.catalyst.InternalRow
@@ -113,6 +114,7 @@ class ScriptTransformationSuite extends SparkPlanTest {
 private case class ExceptionInjectingOperator(child: SparkPlan) extends UnaryNode {
   override protected def doExecute(): RDD[InternalRow] = {
     child.execute().map { x =>
+      assert(TaskContext.get() != null) // Make sure that TaskContext is defined.
       Thread.sleep(1000) // This sleep gives the external process time to start.
       throw new IllegalArgumentException("intentional exception")
     }