Skip to content

Commit 47e0023

Browse files
committed
fix scala style,add config flag,break the chaining
1 parent 04c443c commit 47e0023

File tree

1 file changed

+27
-21
lines changed

1 file changed

+27
-21
lines changed

sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -142,39 +142,45 @@ class HadoopTableReader(
142142
partitionToDeserializer: Map[HivePartition,
143143
Class[_ <: Deserializer]],
144144
filterOpt: Option[PathFilter]): RDD[Row] = {
145-
// SPARK-5068:get FileStatus and do the filtering locally when the path is not exists
146-
147-
var existPathSet =collection.mutable.Set[String]()
148-
var pathPatternSet = collection.mutable.Set[String]()
149-
150-
val hivePartitionRDDs = partitionToDeserializer.filter {
151-
case (partition, partDeserializer) =>
152-
153-
def updateExistPathSetByPathPattern(pathPatternStr:String ){
145+
146+
//SPARK-5068:get FileStatus and do the filtering locally when the path is not exists
147+
def verifyPartitionPath(
148+
partitionToDeserializer: Map[HivePartition, Class[_ <: Deserializer]]):
149+
Map[HivePartition, Class[_ <: Deserializer]] = {
150+
if (!sc.getConf("spark.sql.hive.verifyPartitionPath", "true").toBoolean) {
151+
partitionToDeserializer
152+
} else {
153+
var existPathSet = collection.mutable.Set[String]()
154+
var pathPatternSet = collection.mutable.Set[String]()
155+
partitionToDeserializer.filter {
156+
case (partition, partDeserializer) =>
157+
def updateExistPathSetByPathPattern(pathPatternStr: String) {
154158
val pathPattern = new Path(pathPatternStr)
155159
val fs = pathPattern.getFileSystem(sc.hiveconf)
156-
val matchs = fs.globStatus(pathPattern);
157-
matchs.map( fileStatus =>(existPathSet+= fileStatus.getPath.toString))
160+
val matches = fs.globStatus(pathPattern)
161+
matches.map(fileStatus => existPathSet += fileStatus.getPath.toString)
158162
}
159163
// convert /demo/data/year/month/day to /demo/data/**/**/**/
160-
def getPathPatternByPath(parNum:Int,tpath:Path):String = {
161-
var path = tpath
162-
for (i <- (1 to parNum)) { path = path.getParent }
163-
val tails = (1 to parNum).map(_ => "*").mkString("/","/","/")
164+
def getPathPatternByPath(parNum: Int, tempPath: Path): String = {
165+
var path = tempPath
166+
for (i <- (1 to parNum)) path = path.getParent
167+
val tails = (1 to parNum).map(_ => "*").mkString("/", "/", "/")
164168
path.toString + tails
165169
}
166170

167171
val partPath = HiveShim.getDataLocationPath(partition)
168172
val partNum = Utilities.getPartitionDesc(partition).getPartSpec.size();
169-
var pathPatternStr = getPathPatternByPath(partNum,partPath)
170-
if(!pathPatternSet.contains(pathPatternStr)){
171-
pathPatternSet+=pathPatternStr
173+
var pathPatternStr = getPathPatternByPath(partNum, partPath)
174+
if (!pathPatternSet.contains(pathPatternStr)) {
175+
pathPatternSet += pathPatternStr
172176
updateExistPathSetByPathPattern(pathPatternStr)
173177
}
174-
existPathSet.contains(partPath.toString)
178+
existPathSet.contains(partPath.toString)
179+
}
180+
}
181+
}
175182

176-
}
177-
.map { case (partition, partDeserializer) =>
183+
val hivePartitionRDDs = verifyPartitionPath(partitionToDeserializer).map {
178184
val partDesc = Utilities.getPartitionDesc(partition)
179185
val partPath = HiveShim.getDataLocationPath(partition)
180186
val inputPathStr = applyFilterIfNeeded(partPath, filterOpt)

0 commit comments

Comments
 (0)