@@ -19,36 +19,27 @@ package org.apache.spark.sql.hive
19
19
20
20
import java .io .{BufferedReader , File , InputStreamReader , PrintStream }
21
21
import java .sql .{Date , Timestamp }
22
- import java .util .{ArrayList => JArrayList }
23
-
24
- import org .apache .hadoop .hive .common .`type` .HiveDecimal
25
- import org .apache .spark .sql .catalyst .types .DecimalType
26
- import org .apache .spark .sql .catalyst .types .decimal .Decimal
27
22
28
23
import scala .collection .JavaConversions ._
29
24
import scala .language .implicitConversions
30
- import scala .reflect .runtime .universe .{ TypeTag , typeTag }
25
+ import scala .reflect .runtime .universe .TypeTag
31
26
32
- import org .apache .hadoop .fs .FileSystem
33
- import org .apache .hadoop .fs .Path
27
+ import org .apache .hadoop .fs .{FileSystem , Path }
34
28
import org .apache .hadoop .hive .conf .HiveConf
35
29
import org .apache .hadoop .hive .ql .Driver
36
30
import org .apache .hadoop .hive .ql .metadata .Table
37
31
import org .apache .hadoop .hive .ql .processors ._
38
32
import org .apache .hadoop .hive .ql .session .SessionState
39
- import org .apache .hadoop .hive .serde2 .io .TimestampWritable
40
- import org .apache .hadoop .hive .serde2 .io .DateWritable
33
+ import org .apache .hadoop .hive .serde2 .io .{DateWritable , TimestampWritable }
41
34
42
35
import org .apache .spark .SparkContext
43
- import org .apache .spark .rdd .RDD
44
36
import org .apache .spark .sql ._
45
37
import org .apache .spark .sql .catalyst .ScalaReflection
46
- import org .apache .spark .sql .catalyst .analysis .{Analyzer , EliminateAnalysisOperators }
47
- import org .apache .spark .sql .catalyst .analysis .{OverrideCatalog , OverrideFunctionRegistry }
38
+ import org .apache .spark .sql .catalyst .analysis .{Analyzer , EliminateAnalysisOperators , OverrideCatalog , OverrideFunctionRegistry }
48
39
import org .apache .spark .sql .catalyst .plans .logical ._
49
- import org .apache .spark .sql .execution . ExtractPythonUdfs
50
- import org .apache .spark .sql .execution . QueryExecutionException
51
- import org .apache .spark .sql .execution .{Command => PhysicalCommand }
40
+ import org .apache .spark .sql .catalyst . types . DecimalType
41
+ import org .apache .spark .sql .catalyst . types . decimal . Decimal
42
+ import org .apache .spark .sql .execution .{ExtractPythonUdfs , QueryExecutionException , Command => PhysicalCommand }
52
43
import org .apache .spark .sql .hive .execution .DescribeHiveTableCommand
53
44
import org .apache .spark .sql .sources .DataSourceStrategy
54
45
@@ -136,7 +127,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
136
127
val relation = EliminateAnalysisOperators (catalog.lookupRelation(None , tableName))
137
128
138
129
relation match {
139
- case relation : MetastoreRelation => {
130
+ case relation : MetastoreRelation =>
140
131
// This method is mainly based on
141
132
// org.apache.hadoop.hive.ql.stats.StatsUtils.getFileSizeForTable(HiveConf, Table)
142
133
// in Hive 0.13 (except that we do not use fs.getContentSummary).
@@ -147,7 +138,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
147
138
// countFileSize to count the table size.
148
139
def calculateTableSize (fs : FileSystem , path : Path ): Long = {
149
140
val fileStatus = fs.getFileStatus(path)
150
- val size = if (fileStatus.isDir ) {
141
+ val size = if (fileStatus.isDirectory ) {
151
142
fs.listStatus(path).map(status => calculateTableSize(fs, status.getPath)).sum
152
143
} else {
153
144
fileStatus.getLen
@@ -157,7 +148,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
157
148
}
158
149
159
150
def getFileSizeForTable (conf : HiveConf , table : Table ): Long = {
160
- val path = table.getPath()
151
+ val path = table.getPath
161
152
var size : Long = 0L
162
153
try {
163
154
val fs = path.getFileSystem(conf)
@@ -187,15 +178,14 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
187
178
val hiveTTable = relation.hiveQlTable.getTTable
188
179
hiveTTable.setParameters(tableParameters)
189
180
val tableFullName =
190
- relation.hiveQlTable.getDbName() + " ." + relation.hiveQlTable.getTableName()
181
+ relation.hiveQlTable.getDbName + " ." + relation.hiveQlTable.getTableName
191
182
192
183
catalog.client.alterTable(tableFullName, new Table (hiveTTable))
193
184
}
194
- }
195
185
case otherRelation =>
196
186
throw new NotImplementedError (
197
187
s " Analyze has only implemented for Hive tables, " +
198
- s " but ${ tableName} is a ${otherRelation.nodeName}" )
188
+ s " but $tableName is a ${otherRelation.nodeName}" )
199
189
}
200
190
}
201
191
@@ -374,50 +364,6 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
374
364
/** Extends QueryExecution with hive specific features. */
375
365
protected [sql] abstract class QueryExecution extends super .QueryExecution {
376
366
377
- protected val primitiveTypes =
378
- Seq (StringType , IntegerType , LongType , DoubleType , FloatType , BooleanType , ByteType ,
379
- ShortType , DateType , TimestampType , BinaryType )
380
-
381
- protected [sql] def toHiveString (a : (Any , DataType )): String = a match {
382
- case (struct : Row , StructType (fields)) =>
383
- struct.zip(fields).map {
384
- case (v, t) => s """ " ${t.name}": ${toHiveStructString(v, t.dataType)}"""
385
- }.mkString(" {" , " ," , " }" )
386
- case (seq : Seq [_], ArrayType (typ, _)) =>
387
- seq.map(v => (v, typ)).map(toHiveStructString).mkString(" [" , " ," , " ]" )
388
- case (map : Map [_,_], MapType (kType, vType, _)) =>
389
- map.map {
390
- case (key, value) =>
391
- toHiveStructString((key, kType)) + " :" + toHiveStructString((value, vType))
392
- }.toSeq.sorted.mkString(" {" , " ," , " }" )
393
- case (null , _) => " NULL"
394
- case (d : Date , DateType ) => new DateWritable (d).toString
395
- case (t : Timestamp , TimestampType ) => new TimestampWritable (t).toString
396
- case (bin : Array [Byte ], BinaryType ) => new String (bin, " UTF-8" )
397
- case (decimal : Decimal , DecimalType ()) => // Hive strips trailing zeros so use its toString
398
- HiveShim .createDecimal(decimal.toBigDecimal.underlying()).toString
399
- case (other, tpe) if primitiveTypes contains tpe => other.toString
400
- }
401
-
402
- /** Hive outputs fields of structs slightly differently than top level attributes. */
403
- protected def toHiveStructString (a : (Any , DataType )): String = a match {
404
- case (struct : Row , StructType (fields)) =>
405
- struct.zip(fields).map {
406
- case (v, t) => s """ " ${t.name}": ${toHiveStructString(v, t.dataType)}"""
407
- }.mkString(" {" , " ," , " }" )
408
- case (seq : Seq [_], ArrayType (typ, _)) =>
409
- seq.map(v => (v, typ)).map(toHiveStructString).mkString(" [" , " ," , " ]" )
410
- case (map : Map [_, _], MapType (kType, vType, _)) =>
411
- map.map {
412
- case (key, value) =>
413
- toHiveStructString((key, kType)) + " :" + toHiveStructString((value, vType))
414
- }.toSeq.sorted.mkString(" {" , " ," , " }" )
415
- case (null , _) => " null"
416
- case (s : String , StringType ) => " \" " + s + " \" "
417
- case (decimal, DecimalType ()) => decimal.toString
418
- case (other, tpe) if primitiveTypes contains tpe => other.toString
419
- }
420
-
421
367
/**
422
368
* Returns the result as a hive compatible sequence of strings. For native commands, the
423
369
* execution is simply passed back to Hive.
@@ -435,8 +381,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
435
381
// We need the types so we can output struct field names
436
382
val types = analyzed.output.map(_.dataType)
437
383
// Reformat to match hive tab delimited output.
438
- val asString = result.map(_.zip(types).map(toHiveString)).map(_.mkString(" \t " )).toSeq
439
- asString
384
+ result.map(_.zip(types).map(HiveContext .toHiveString)).map(_.mkString(" \t " )).toSeq
440
385
}
441
386
442
387
override def simpleString : String =
@@ -447,3 +392,49 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
447
392
}
448
393
}
449
394
}
395
+
396
+ object HiveContext {
397
+ protected val primitiveTypes =
398
+ Seq (StringType , IntegerType , LongType , DoubleType , FloatType , BooleanType , ByteType ,
399
+ ShortType , DateType , TimestampType , BinaryType )
400
+
401
+ protected [sql] def toHiveString (a : (Any , DataType )): String = a match {
402
+ case (struct : Row , StructType (fields)) =>
403
+ struct.zip(fields).map {
404
+ case (v, t) => s """ " ${t.name}": ${toHiveStructString(v, t.dataType)}"""
405
+ }.mkString(" {" , " ," , " }" )
406
+ case (seq : Seq [_], ArrayType (typ, _)) =>
407
+ seq.map(v => (v, typ)).map(toHiveStructString).mkString(" [" , " ," , " ]" )
408
+ case (map : Map [_,_], MapType (kType, vType, _)) =>
409
+ map.map {
410
+ case (key, value) =>
411
+ toHiveStructString((key, kType)) + " :" + toHiveStructString((value, vType))
412
+ }.toSeq.sorted.mkString(" {" , " ," , " }" )
413
+ case (null , _) => " NULL"
414
+ case (d : Date , DateType ) => new DateWritable (d).toString
415
+ case (t : Timestamp , TimestampType ) => new TimestampWritable (t).toString
416
+ case (bin : Array [Byte ], BinaryType ) => new String (bin, " UTF-8" )
417
+ case (decimal : Decimal , DecimalType ()) => // Hive strips trailing zeros so use its toString
418
+ HiveShim .createDecimal(decimal.toBigDecimal.underlying()).toString
419
+ case (other, tpe) if primitiveTypes contains tpe => other.toString
420
+ }
421
+
422
+ /** Hive outputs fields of structs slightly differently than top level attributes. */
423
+ protected def toHiveStructString (a : (Any , DataType )): String = a match {
424
+ case (struct : Row , StructType (fields)) =>
425
+ struct.zip(fields).map {
426
+ case (v, t) => s """ " ${t.name}": ${toHiveStructString(v, t.dataType)}"""
427
+ }.mkString(" {" , " ," , " }" )
428
+ case (seq : Seq [_], ArrayType (typ, _)) =>
429
+ seq.map(v => (v, typ)).map(toHiveStructString).mkString(" [" , " ," , " ]" )
430
+ case (map : Map [_, _], MapType (kType, vType, _)) =>
431
+ map.map {
432
+ case (key, value) =>
433
+ toHiveStructString((key, kType)) + " :" + toHiveStructString((value, vType))
434
+ }.toSeq.sorted.mkString(" {" , " ," , " }" )
435
+ case (null , _) => " null"
436
+ case (s : String , StringType ) => " \" " + s + " \" "
437
+ case (decimal, DecimalType ()) => decimal.toString
438
+ case (other, tpe) if primitiveTypes contains tpe => other.toString
439
+ }
440
+ }
0 commit comments