Skip to content

Commit c5e630b

Browse files
Mikhail Gorbovekrivokonmapr
authored andcommitted
Fixed impersonation when data read from MapR-DB via Spark-Hive. (apache#131)
1 parent 8db8ed9 commit c5e630b

File tree

1 file changed

+29
-2
lines changed

1 file changed

+29
-2
lines changed

core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
package org.apache.spark.rdd
1919

2020
import java.io.{FileNotFoundException, IOException}
21+
import java.security.PrivilegedExceptionAction
2122
import java.text.SimpleDateFormat
2223
import java.util.{Date, Locale}
2324

@@ -29,6 +30,7 @@ import org.apache.hadoop.mapred._
2930
import org.apache.hadoop.mapred.lib.CombineFileSplit
3031
import org.apache.hadoop.mapreduce.TaskType
3132
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
33+
import org.apache.hadoop.security.UserGroupInformation
3234
import org.apache.hadoop.util.ReflectionUtils
3335

3436
import org.apache.spark._
@@ -124,6 +126,8 @@ class HadoopRDD[K, V](
124126
minPartitions)
125127
}
126128

129+
private val doAsUserName = UserGroupInformation.getCurrentUser.getUserName
130+
127131
protected val jobConfCacheKey: String = "rdd_%d_job_conf".format(id)
128132

129133
protected val inputFormatCacheKey: String = "rdd_%d_input_format".format(id)
@@ -220,7 +224,7 @@ class HadoopRDD[K, V](
220224
}
221225
}
222226

223-
override def compute(theSplit: Partition, context: TaskContext): InterruptibleIterator[(K, V)] = {
227+
def doCompute(theSplit: Partition, context: TaskContext): InterruptibleIterator[(K, V)] = {
224228
val iter = new NextIterator[(K, V)] {
225229

226230
private val split = theSplit.asInstanceOf[HadoopPartition]
@@ -326,7 +330,7 @@ class HadoopRDD[K, V](
326330
if (getBytesReadCallback.isDefined) {
327331
updateBytesRead()
328332
} else if (split.inputSplit.value.isInstanceOf[FileSplit] ||
329-
split.inputSplit.value.isInstanceOf[CombineFileSplit]) {
333+
split.inputSplit.value.isInstanceOf[CombineFileSplit]) {
330334
// If we can't get the bytes read from the FS stats, fall back to the split size,
331335
// which may be inaccurate.
332336
try {
@@ -342,6 +346,29 @@ class HadoopRDD[K, V](
342346
new InterruptibleIterator[(K, V)](context, iter)
343347
}
344348

349+
override def compute(theSplit: Partition, context: TaskContext): InterruptibleIterator[(K, V)] = {
350+
val ugi = UserGroupInformation.getCurrentUser
351+
352+
if (ugi.getUserName == doAsUserName) {
353+
doCompute(theSplit: Partition, context: TaskContext)
354+
} else {
355+
val doAsAction = new PrivilegedExceptionAction[InterruptibleIterator[(K, V)]]() {
356+
override def run(): InterruptibleIterator[(K, V)] = {
357+
try {
358+
doCompute(theSplit: Partition, context: TaskContext)
359+
} catch {
360+
case e: Exception =>
361+
log.error("Error when HadoopRDD computing: ", e)
362+
throw e
363+
}
364+
}
365+
}
366+
367+
val proxyUgi = UserGroupInformation.createProxyUser(doAsUserName, ugi)
368+
proxyUgi.doAs(doAsAction)
369+
}
370+
}
371+
345372
/** Maps over a partition, providing the InputSplit that was used as the base of the partition. */
346373
@DeveloperApi
347374
def mapPartitionsWithInputSplit[U: ClassTag](

0 commit comments

Comments
 (0)