@@ -50,7 +50,6 @@ private[spark] class SqlNewHadoopPartition(
50
50
}
51
51
52
52
/**
53
- * :: DeveloperApi ::
54
53
* An RDD that provides core functionality for reading data stored in Hadoop (e.g., files in HDFS,
55
54
* sources in HBase, or S3), using the new MapReduce API (`org.apache.hadoop.mapreduce`).
56
55
* It is based on [[org.apache.spark.rdd.NewHadoopRDD ]]. It has three additions.
@@ -60,13 +59,10 @@ private[spark] class SqlNewHadoopPartition(
60
59
* 3. An optional closure `initLocalJobFuncOpt` that set configurations at both the driver side
61
60
* and the executor side to the shared Hadoop Configuration.
62
61
*
63
- * @param sc The SparkContext to associate the RDD with.
64
- * @param inputFormatClass Storage format of the data to be read.
65
- * @param keyClass Class of the key associated with the inputFormatClass.
66
- * @param valueClass Class of the value associated with the inputFormatClass.
67
- * @param conf The Hadoop configuration.
62
+ * Note: This is RDD is basically a cloned version of [[org.apache.spark.rdd.NewHadoopRDD ]] with
63
+ * changes based on [[org.apache.spark.rdd.HadoopRDD ]]. In future, this functionality will be
64
+ * folded into core.
68
65
*/
69
- @ DeveloperApi
70
66
private [sql] class SqlNewHadoopRDD [K , V ](
71
67
@ transient sc : SparkContext ,
72
68
broadcastedConf : Broadcast [SerializableWritable [Configuration ]],
@@ -85,11 +81,22 @@ private[sql] class SqlNewHadoopRDD[K, V](
85
81
86
82
protected def getJob (): Job = {
87
83
val conf : Configuration = broadcastedConf.value.value
84
+ // "new Job" will make a copy of the conf. Then, it is
85
+ // safe to mutate conf properties with initLocalJobFuncOpt
86
+ // and initDriverSideJobFuncOpt.
88
87
val newJob = new Job (conf)
89
88
initLocalJobFuncOpt.map(f => f(newJob))
90
89
newJob
91
90
}
92
91
92
+ def getConf (isDriverSide : Boolean ): Configuration = {
93
+ val job = getJob()
94
+ if (isDriverSide) {
95
+ initDriverSideJobFuncOpt.map(f => f(job))
96
+ }
97
+ job.getConfiguration
98
+ }
99
+
93
100
private val jobTrackerId : String = {
94
101
val formatter = new SimpleDateFormat (" yyyyMMddHHmm" )
95
102
formatter.format(new Date ())
@@ -235,14 +242,6 @@ private[sql] class SqlNewHadoopRDD[K, V](
235
242
}
236
243
super .persist(storageLevel)
237
244
}
238
-
239
- def getConf (isDriverSide : Boolean ): Configuration = {
240
- val job = getJob()
241
- if (isDriverSide) {
242
- initDriverSideJobFuncOpt.map(f => f(job))
243
- }
244
- job.getConfiguration
245
- }
246
245
}
247
246
248
247
private [spark] object SqlNewHadoopRDD {
0 commit comments