Skip to content

Commit 94b4fdc

Browse files
committed
Spark-2706: hive-0.13.1 support on spark
1 parent 87ebf3b commit 94b4fdc

File tree

18 files changed

+481
-84
lines changed

18 files changed

+481
-84
lines changed

assembly/pom.xml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,22 @@
188188
</dependency>
189189
</dependencies>
190190
</profile>
191+
<profile>
192+
<id>hive-0.13</id>
193+
<activation>
194+
<property>
195+
<name>hive.version</name>
196+
<value>0.13.1</value>
197+
</property>
198+
</activation>
199+
<dependencies>
200+
<dependency>
201+
<groupId>org.apache.spark</groupId>
202+
<artifactId>spark-hive_${scala.binary.version}</artifactId>
203+
<version>${project.version}</version>
204+
</dependency>
205+
</dependencies>
206+
</profile>
191207
<profile>
192208
<id>spark-ganglia-lgpl</id>
193209
<dependencies>

pom.xml

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,6 @@
127127
<hbase.version>0.94.6</hbase.version>
128128
<flume.version>1.4.0</flume.version>
129129
<zookeeper.version>3.4.5</zookeeper.version>
130-
<hive.version>0.12.0</hive.version>
131130
<parquet.version>1.4.3</parquet.version>
132131
<jblas.version>1.2.3</jblas.version>
133132
<jetty.version>8.1.14.v20131031</jetty.version>
@@ -427,7 +426,7 @@
427426
<dependency>
428427
<groupId>org.apache.derby</groupId>
429428
<artifactId>derby</artifactId>
430-
<version>10.4.2.0</version>
429+
<version>${derby.version}</version>
431430
</dependency>
432431
<dependency>
433432
<groupId>com.codahale.metrics</groupId>
@@ -1225,7 +1224,18 @@
12251224
</dependency>
12261225
</dependencies>
12271226
</profile>
1228-
1227+
<profile>
1228+
<id>hive-default</id>
1229+
<activation>
1230+
<property>
1231+
<name>!hive.version</name>
1232+
</property>
1233+
</activation>
1234+
<properties>
1235+
<hive.version>0.12.0</hive.version>
1236+
<derby.version>10.4.2.0</derby.version>
1237+
</properties>
1238+
</profile>
12291239
<profile>
12301240
<id>hive</id>
12311241
<activation>
@@ -1235,6 +1245,18 @@
12351245
<module>sql/hive-thriftserver</module>
12361246
</modules>
12371247
</profile>
1248+
<profile>
1249+
<id>hive-0.13</id>
1250+
<activation>
1251+
<property>
1252+
<name>hive.version</name>
1253+
<value>0.13.1</value>
1254+
</property>
1255+
</activation>
1256+
<properties>
1257+
<derby.version>10.10.1.1</derby.version>
1258+
</properties>
1259+
</profile>
12381260

12391261
</profiles>
12401262
</project>

sql/hive/pom.xml

Lines changed: 144 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,6 @@
3636
</properties>
3737

3838
<dependencies>
39-
<dependency>
40-
<groupId>com.twitter</groupId>
41-
<artifactId>parquet-hive-bundle</artifactId>
42-
<version>1.5.0</version>
43-
</dependency>
4439
<dependency>
4540
<groupId>org.apache.spark</groupId>
4641
<artifactId>spark-core_${scala.binary.version}</artifactId>
@@ -51,46 +46,15 @@
5146
<artifactId>spark-sql_${scala.binary.version}</artifactId>
5247
<version>${project.version}</version>
5348
</dependency>
54-
<dependency>
55-
<groupId>org.spark-project.hive</groupId>
56-
<artifactId>hive-metastore</artifactId>
57-
<version>${hive.version}</version>
58-
</dependency>
5949
<dependency>
6050
<groupId>commons-httpclient</groupId>
6151
<artifactId>commons-httpclient</artifactId>
6252
<version>3.1</version>
6353
</dependency>
64-
<dependency>
65-
<groupId>org.spark-project.hive</groupId>
66-
<artifactId>hive-exec</artifactId>
67-
<version>${hive.version}</version>
68-
<exclusions>
69-
<exclusion>
70-
<groupId>commons-logging</groupId>
71-
<artifactId>commons-logging</artifactId>
72-
</exclusion>
73-
</exclusions>
74-
</dependency>
7554
<dependency>
7655
<groupId>org.codehaus.jackson</groupId>
7756
<artifactId>jackson-mapper-asl</artifactId>
7857
</dependency>
79-
<dependency>
80-
<groupId>org.spark-project.hive</groupId>
81-
<artifactId>hive-serde</artifactId>
82-
<version>${hive.version}</version>
83-
<exclusions>
84-
<exclusion>
85-
<groupId>commons-logging</groupId>
86-
<artifactId>commons-logging</artifactId>
87-
</exclusion>
88-
<exclusion>
89-
<groupId>commons-logging</groupId>
90-
<artifactId>commons-logging-api</artifactId>
91-
</exclusion>
92-
</exclusions>
93-
</dependency>
9458
<!-- hive-serde already depends on avro, but this brings in customized config of avro deps from parent -->
9559
<dependency>
9660
<groupId>org.apache.avro</groupId>
@@ -109,6 +73,74 @@
10973
</dependencies>
11074

11175
<profiles>
76+
<profile>
77+
<id>hive-default</id>
78+
<activation>
79+
<property>
80+
<name>!hive.version</name>
81+
</property>
82+
</activation>
83+
<dependencies>
84+
<dependency>
85+
<groupId>com.twitter</groupId>
86+
<artifactId>parquet-hive-bundle</artifactId>
87+
<version>1.5.0</version>
88+
</dependency>
89+
<dependency>
90+
<groupId>org.spark-project.hive</groupId>
91+
<artifactId>hive-metastore</artifactId>
92+
<version>${hive.version}</version>
93+
</dependency>
94+
<dependency>
95+
<groupId>org.spark-project.hive</groupId>
96+
<artifactId>hive-exec</artifactId>
97+
<version>${hive.version}</version>
98+
<exclusions>
99+
<exclusion>
100+
<groupId>commons-logging</groupId>
101+
<artifactId>commons-logging</artifactId>
102+
</exclusion>
103+
</exclusions>
104+
</dependency>
105+
<dependency>
106+
<groupId>org.spark-project.hive</groupId>
107+
<artifactId>hive-serde</artifactId>
108+
<version>${hive.version}</version>
109+
<exclusions>
110+
<exclusion>
111+
<groupId>commons-logging</groupId>
112+
<artifactId>commons-logging</artifactId>
113+
</exclusion>
114+
<exclusion>
115+
<groupId>commons-logging</groupId>
116+
<artifactId>commons-logging-api</artifactId>
117+
</exclusion>
118+
</exclusions>
119+
</dependency>
120+
</dependencies>
121+
<build>
122+
<plugins>
123+
<plugin>
124+
<groupId>org.codehaus.mojo</groupId>
125+
<artifactId>build-helper-maven-plugin</artifactId>
126+
<executions>
127+
<execution>
128+
<id>add-default-sources</id>
129+
<phase>generate-sources</phase>
130+
<goals>
131+
<goal>add-source</goal>
132+
</goals>
133+
<configuration>
134+
<sources>
135+
<source>v0.12/src/main/scala</source>
136+
</sources>
137+
</configuration>
138+
</execution>
139+
</executions>
140+
</plugin>
141+
</plugins>
142+
</build>
143+
</profile>
112144
<profile>
113145
<id>hive</id>
114146
<build>
@@ -135,6 +167,82 @@
135167
</plugins>
136168
</build>
137169
</profile>
170+
<profile>
171+
<id>hive-0.13</id>
172+
<activation>
173+
<property>
174+
<name>hive.version</name>
175+
<value>0.13.1</value>
176+
</property>
177+
</activation>
178+
<dependencies>
179+
<dependency>
180+
<groupId>org.apache.hive</groupId>
181+
<artifactId>hive-metastore</artifactId>
182+
<version>${hive.version}</version>
183+
</dependency>
184+
<dependency>
185+
<groupId>org.apache.hive</groupId>
186+
<artifactId>hive-exec</artifactId>
187+
<version>${hive.version}</version>
188+
<exclusions>
189+
<exclusion>
190+
<groupId>commons-logging</groupId>
191+
<artifactId>commons-logging</artifactId>
192+
</exclusion>
193+
</exclusions>
194+
</dependency>
195+
<dependency>
196+
<groupId>org.apache.hive</groupId>
197+
<artifactId>hive-serde</artifactId>
198+
<version>${hive.version}</version>
199+
<exclusions>
200+
<exclusion>
201+
<groupId>commons-logging</groupId>
202+
<artifactId>commons-logging</artifactId>
203+
</exclusion>
204+
<exclusion>
205+
<groupId>commons-logging</groupId>
206+
<artifactId>commons-logging-api</artifactId>
207+
</exclusion>
208+
</exclusions>
209+
</dependency>
210+
</dependencies>
211+
<build>
212+
<plugins>
213+
<plugin>
214+
<groupId>org.codehaus.mojo</groupId>
215+
<artifactId>build-helper-maven-plugin</artifactId>
216+
<executions>
217+
<execution>
218+
<id>add-v13-sources</id>
219+
<phase>generate-sources</phase>
220+
<goals>
221+
<goal>add-source</goal>
222+
</goals>
223+
<configuration>
224+
<sources>
225+
<source>v0.13/src/main/scala</source>
226+
</sources>
227+
</configuration>
228+
</execution>
229+
<execution>
230+
<id>add-scala-test-sources</id>
231+
<phase>generate-test-sources</phase>
232+
<goals>
233+
<goal>add-test-source</goal>
234+
</goals>
235+
<configuration>
236+
<sources>
237+
<source>src/test/scala</source>
238+
</sources>
239+
</configuration>
240+
</execution>
241+
</executions>
242+
</plugin>
243+
</plugins>
244+
</build>
245+
</profile>
138246
</profiles>
139247

140248
<build>

sql/hive/src/main/scala/org/apache/spark/SparkHadoopWriter.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,12 @@ import java.util.Date
2424
import org.apache.hadoop.fs.Path
2525
import org.apache.hadoop.hive.ql.exec.{FileSinkOperator, Utilities}
2626
import org.apache.hadoop.hive.ql.io.{HiveFileFormatUtils, HiveOutputFormat}
27-
import org.apache.hadoop.hive.ql.plan.FileSinkDesc
2827
import org.apache.hadoop.mapred._
2928
import org.apache.hadoop.io.Writable
3029

3130
import org.apache.spark.{Logging, SerializableWritable, SparkHadoopWriter}
31+
import org.apache.spark.sql.hive.{ShimFileSinkDesc => FileSinkDesc}
32+
import org.apache.spark.sql.hive.HiveShim._
3233

3334
/**
3435
* Internal helper class that saves an RDD using a Hive OutputFormat.

sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ import org.apache.hadoop.hive.ql.Driver
3232
import org.apache.hadoop.hive.ql.metadata.Table
3333
import org.apache.hadoop.hive.ql.processors._
3434
import org.apache.hadoop.hive.ql.session.SessionState
35-
import org.apache.hadoop.hive.ql.stats.StatsSetupConst
3635
import org.apache.hadoop.hive.serde2.io.TimestampWritable
3736

3837
import org.apache.spark.SparkContext
@@ -46,6 +45,8 @@ import org.apache.spark.sql.execution.ExtractPythonUdfs
4645
import org.apache.spark.sql.execution.QueryExecutionException
4746
import org.apache.spark.sql.execution.{Command => PhysicalCommand}
4847
import org.apache.spark.sql.hive.execution.DescribeHiveTableCommand
48+
import org.apache.spark.sql.hive.HiveShim
49+
import org.apache.spark.sql.hive.HiveShim._
4950

5051
/**
5152
* DEPRECATED: Use HiveContext instead.
@@ -170,13 +171,14 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
170171

171172
val tableParameters = relation.hiveQlTable.getParameters
172173
val oldTotalSize =
173-
Option(tableParameters.get(StatsSetupConst.TOTAL_SIZE)).map(_.toLong).getOrElse(0L)
174+
Option(tableParameters.get(HiveShim.getStatsSetupConstTotalSize)).
175+
map(_.toLong).getOrElse(0L)
174176
val newTotalSize = getFileSizeForTable(hiveconf, relation.hiveQlTable)
175177
// Update the Hive metastore if the total size of the table is different than the size
176178
// recorded in the Hive metastore.
177179
// This logic is based on org.apache.hadoop.hive.ql.exec.StatsTask.aggregateStats().
178180
if (newTotalSize > 0 && newTotalSize != oldTotalSize) {
179-
tableParameters.put(StatsSetupConst.TOTAL_SIZE, newTotalSize.toString)
181+
tableParameters.put(HiveShim.getStatsSetupConstTotalSize, newTotalSize.toString)
180182
val hiveTTable = relation.hiveQlTable.getTTable
181183
hiveTTable.setParameters(tableParameters)
182184
val tableFullName =
@@ -286,24 +288,20 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
286288
val cmd_trimmed: String = cmd.trim()
287289
val tokens: Array[String] = cmd_trimmed.split("\\s+")
288290
val cmd_1: String = cmd_trimmed.substring(tokens(0).length()).trim()
289-
val proc: CommandProcessor = CommandProcessorFactory.get(tokens(0), hiveconf)
290-
291-
SessionState.start(sessionState)
291+
val proc: CommandProcessor = HiveShim.getCommandProcessor(Array(tokens(0)), hiveconf)
292292

293293
proc match {
294294
case driver: Driver =>
295-
driver.init()
296-
297295
val results = new JArrayList[String]
298296
val response: CommandProcessorResponse = driver.run(cmd)
299297
// Throw an exception if there is an error in query processing.
300298
if (response.getResponseCode != 0) {
301-
driver.destroy()
299+
driver.close
302300
throw new QueryExecutionException(response.getErrorMessage)
303301
}
304302
driver.setMaxRows(maxRows)
305303
driver.getResults(results)
306-
driver.destroy()
304+
driver.close
307305
results
308306
case _ =>
309307
sessionState.out.println(tokens(0) + " " + cmd_1)

sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.types._
2929

3030
/* Implicit conversions */
3131
import scala.collection.JavaConversions._
32+
import org.apache.spark.sql.hive.HiveShim
3233

3334
private[hive] trait HiveInspectors {
3435

@@ -137,7 +138,7 @@ private[hive] trait HiveInspectors {
137138

138139
/** Converts native catalyst types to the types expected by Hive */
139140
def wrap(a: Any): AnyRef = a match {
140-
case s: String => new hadoopIo.Text(s) // TODO why should be Text?
141+
case s: String => HiveShim.convertCatalystString2Hive(s)
141142
case i: Int => i: java.lang.Integer
142143
case b: Boolean => b: java.lang.Boolean
143144
case f: Float => f: java.lang.Float

0 commit comments

Comments
 (0)