From d553765cb75c50c918117965d5714361065fa3c6 Mon Sep 17 00:00:00 2001 From: angerszhu Date: Thu, 19 Nov 2020 22:28:01 +0800 Subject: [PATCH 1/8] Update HiveExternalCatalogVersionsSuite.scala --- .../HiveExternalCatalogVersionsSuite.scala | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala index 38a8c492d77a7..efd62bc0195f3 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala @@ -52,7 +52,6 @@ import org.apache.spark.util.Utils @ExtendedHiveTest class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { import HiveExternalCatalogVersionsSuite._ - private val isTestAtLeastJava9 = SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9) private val wareHousePath = Utils.createTempDir(namePrefix = "warehouse") private val tmpDataDir = Utils.createTempDir(namePrefix = "test-data") // For local test, you can set `spark.test.cache-dir` to a static value like `/tmp/test-spark`, to @@ -149,7 +148,9 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { new String(Files.readAllBytes(contentPath), StandardCharsets.UTF_8) } - private def prepare(): Unit = { + override def beforeAll(): Unit = { + super.beforeAll() + val tempPyFile = File.createTempFile("test", ".py") // scalastyle:off line.size.limit Files.write(tempPyFile.toPath, @@ -211,16 +212,7 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { tempPyFile.delete() } - override def beforeAll(): Unit = { - super.beforeAll() - if (!isTestAtLeastJava9) { - prepare() - } - } - test("backward compatibility") { - // TODO SPARK-28704 Test backward compatibility on JDK9+ once we have a version supports JDK9+ - assume(!isTestAtLeastJava9) val args = Seq( "--class", PROCESS_TABLES.getClass.getName.stripSuffix("$"), "--name", "HiveExternalCatalog backward compatibility test", @@ -252,7 +244,9 @@ object PROCESS_TABLES extends QueryTest with SQLTestUtils { // do not throw exception during object initialization. case NonFatal(_) => Seq("3.0.1", "2.4.7") // A temporary fallback to use a specific version } - versions.filter(v => v.startsWith("3") || !TestUtils.isPythonVersionAtLeast38()) + versions + .filter(v => v.startsWith("3") || !TestUtils.isPythonVersionAtLeast38()) + .filter(v => v.startsWith("3") || !SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_11)) } protected var spark: SparkSession = _ From b9dd1e40c2c4e48376417e7f0549ca976255bb51 Mon Sep 17 00:00:00 2001 From: angerszhu Date: Fri, 20 Nov 2020 08:58:55 +0800 Subject: [PATCH 2/8] Update HiveExternalCatalogVersionsSuite.scala --- .../spark/sql/hive/HiveExternalCatalogVersionsSuite.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala index efd62bc0195f3..4cafd3e8ca626 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala @@ -200,7 +200,7 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { "--master", "local[2]", "--conf", s"${UI_ENABLED.key}=false", "--conf", s"${MASTER_REST_SERVER_ENABLED.key}=false", - "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=1.2.1", + "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=2.3.7", "--conf", s"${HiveUtils.HIVE_METASTORE_JARS.key}=maven", "--conf", s"${WAREHOUSE_PATH.key}=${wareHousePath.getCanonicalPath}", "--conf", s"spark.sql.test.version.index=$index", @@ -219,7 +219,7 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { "--master", "local[2]", "--conf", s"${UI_ENABLED.key}=false", "--conf", s"${MASTER_REST_SERVER_ENABLED.key}=false", - "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=1.2.1", + "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=2.3.7", "--conf", s"${HiveUtils.HIVE_METASTORE_JARS.key}=maven", "--conf", s"${WAREHOUSE_PATH.key}=${wareHousePath.getCanonicalPath}", "--driver-java-options", s"-Dderby.system.home=${wareHousePath.getCanonicalPath}", @@ -246,7 +246,7 @@ object PROCESS_TABLES extends QueryTest with SQLTestUtils { } versions .filter(v => v.startsWith("3") || !TestUtils.isPythonVersionAtLeast38()) - .filter(v => v.startsWith("3") || !SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_11)) + .filter(v => v.startsWith("3") || !SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9)) } protected var spark: SparkSession = _ From 67222ca467b59e185621605213db8b964c6278d2 Mon Sep 17 00:00:00 2001 From: angerszhu Date: Sat, 21 Nov 2020 20:28:43 +0800 Subject: [PATCH 3/8] Update package.scala --- .../main/scala/org/apache/spark/sql/hive/client/package.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala index 27ba3eca81948..a56920c0f70c8 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala @@ -106,7 +106,8 @@ package object client { exclusions = Seq("org.apache.calcite:calcite-druid", "org.apache.calcite.avatica:avatica", "org.apache.curator:*", - "org.pentaho:pentaho-aggdesigner-algorithm")) + "org.pentaho:pentaho-aggdesigner-algorithm", + "org.apache.zookeeper:zookeeper")) // Since Hive 3.0, HookUtils uses org.apache.logging.log4j.util.Strings // Since HIVE-14496, Hive.java uses calcite-core From 053974a7c1c8b20f7ecb0e7f68728a9e075175e1 Mon Sep 17 00:00:00 2001 From: angerszhu Date: Sat, 21 Nov 2020 20:44:55 +0800 Subject: [PATCH 4/8] with extra Deps --- .../main/scala/org/apache/spark/sql/hive/client/package.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala index a56920c0f70c8..8061f7a3b2177 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala @@ -103,11 +103,11 @@ package object client { // Since HIVE-14496, Hive materialized view need calcite-core. // For spark, only VersionsSuite currently creates a hive materialized view for testing. case object v2_3 extends HiveVersion("2.3.7", + extraDeps = Seq("org.apache.zookeeper:zookeeper"), exclusions = Seq("org.apache.calcite:calcite-druid", "org.apache.calcite.avatica:avatica", "org.apache.curator:*", - "org.pentaho:pentaho-aggdesigner-algorithm", - "org.apache.zookeeper:zookeeper")) + "org.pentaho:pentaho-aggdesigner-algorithm")) // Since Hive 3.0, HookUtils uses org.apache.logging.log4j.util.Strings // Since HIVE-14496, Hive.java uses calcite-core From 668db8b31609bb49aada740e335f9af75db277cb Mon Sep 17 00:00:00 2001 From: angerszhu Date: Sat, 21 Nov 2020 22:36:04 +0800 Subject: [PATCH 5/8] Update package.scala --- .../main/scala/org/apache/spark/sql/hive/client/package.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala index 8061f7a3b2177..cfdd862189e31 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala @@ -103,7 +103,7 @@ package object client { // Since HIVE-14496, Hive materialized view need calcite-core. // For spark, only VersionsSuite currently creates a hive materialized view for testing. case object v2_3 extends HiveVersion("2.3.7", - extraDeps = Seq("org.apache.zookeeper:zookeeper"), + extraDeps = Seq("org.apache.zookeeper:zookeeper:3.4.6"), exclusions = Seq("org.apache.calcite:calcite-druid", "org.apache.calcite.avatica:avatica", "org.apache.curator:*", From b853f5ee653d7afd00bce7644e07d121dc93abf8 Mon Sep 17 00:00:00 2001 From: angerszhu Date: Sun, 22 Nov 2020 01:40:16 +0800 Subject: [PATCH 6/8] test --- .../sql/hive/client/IsolatedClientLoader.scala | 18 +++++++++++++++--- .../apache/spark/sql/hive/client/package.scala | 1 - 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index 9663e03ee6a74..ee5906feb1c02 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -121,7 +121,7 @@ private[hive] object IsolatedClientLoader extends Logging { val hiveArtifacts = version.extraDeps ++ Seq("hive-metastore", "hive-exec", "hive-common", "hive-serde") .map(a => s"org.apache.hive:$a:${version.fullVersion}") ++ - Seq("com.google.guava:guava:14.0.1") ++ hadoopJarNames + Seq("com.google.guava:guava:14.0.1") val extraExclusions = if (hadoopVersion.startsWith("3")) { // this introduced from lower version of Hive could conflict with jars in Hadoop 3.2+, so @@ -131,7 +131,7 @@ private[hive] object IsolatedClientLoader extends Logging { Seq.empty } - val classpath = quietly { + val hiveJarsClasspath = quietly { SparkSubmitUtils.resolveMavenCoordinates( hiveArtifacts.mkString(","), SparkSubmitUtils.buildIvySettings( @@ -139,7 +139,19 @@ private[hive] object IsolatedClientLoader extends Logging { ivyPath), exclusions = version.exclusions ++ extraExclusions) } - val allFiles = classpath.split(",").map(new File(_)).toSet + + val hadoopJarsClasspath = quietly { + SparkSubmitUtils.resolveMavenCoordinates( + hadoopJarNames.mkString(","), + SparkSubmitUtils.buildIvySettings( + Some(remoteRepos), + ivyPath), + exclusions = version.exclusions ++ extraExclusions) + } + + val allFiles = + (hiveJarsClasspath.split(",") ++ hadoopJarsClasspath.split(",")) + .map(new File(_)).toSet // TODO: Remove copy logic. val tempDir = Utils.createTempDir(namePrefix = s"hive-${version}") diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala index cfdd862189e31..27ba3eca81948 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala @@ -103,7 +103,6 @@ package object client { // Since HIVE-14496, Hive materialized view need calcite-core. // For spark, only VersionsSuite currently creates a hive materialized view for testing. case object v2_3 extends HiveVersion("2.3.7", - extraDeps = Seq("org.apache.zookeeper:zookeeper:3.4.6"), exclusions = Seq("org.apache.calcite:calcite-druid", "org.apache.calcite.avatica:avatica", "org.apache.curator:*", From c4543ef16bb0d3964b7edf4bdc8ab9681326f4f5 Mon Sep 17 00:00:00 2001 From: angerszhu Date: Sun, 22 Nov 2020 09:10:35 +0800 Subject: [PATCH 7/8] hive version change with jdk version --- .../sql/hive/client/IsolatedClientLoader.scala | 18 +++--------------- .../HiveExternalCatalogVersionsSuite.scala | 9 +++++++-- 2 files changed, 10 insertions(+), 17 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index ee5906feb1c02..f7c4d45e530d0 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -121,7 +121,7 @@ private[hive] object IsolatedClientLoader extends Logging { val hiveArtifacts = version.extraDeps ++ Seq("hive-metastore", "hive-exec", "hive-common", "hive-serde") .map(a => s"org.apache.hive:$a:${version.fullVersion}") ++ - Seq("com.google.guava:guava:14.0.1") + Seq("com.google.guava:guava:14.0.1") ++ hadoopJarNames val extraExclusions = if (hadoopVersion.startsWith("3")) { // this introduced from lower version of Hive could conflict with jars in Hadoop 3.2+, so @@ -131,7 +131,7 @@ private[hive] object IsolatedClientLoader extends Logging { Seq.empty } - val hiveJarsClasspath = quietly { + val jarsClasspath = quietly { SparkSubmitUtils.resolveMavenCoordinates( hiveArtifacts.mkString(","), SparkSubmitUtils.buildIvySettings( @@ -139,19 +139,7 @@ private[hive] object IsolatedClientLoader extends Logging { ivyPath), exclusions = version.exclusions ++ extraExclusions) } - - val hadoopJarsClasspath = quietly { - SparkSubmitUtils.resolveMavenCoordinates( - hadoopJarNames.mkString(","), - SparkSubmitUtils.buildIvySettings( - Some(remoteRepos), - ivyPath), - exclusions = version.exclusions ++ extraExclusions) - } - - val allFiles = - (hiveJarsClasspath.split(",") ++ hadoopJarsClasspath.split(",")) - .map(new File(_)).toSet + val allFiles = jarsClasspath.split(",").map(new File(_)).toSet // TODO: Remove copy logic. val tempDir = Utils.createTempDir(namePrefix = s"hive-${version}") diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala index 4cafd3e8ca626..cf070f4611f3b 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala @@ -59,6 +59,11 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { private val sparkTestingDir = Option(System.getProperty(SPARK_TEST_CACHE_DIR_SYSTEM_PROPERTY)) .map(new File(_)).getOrElse(Utils.createTempDir(namePrefix = "test-spark")) private val unusedJar = TestUtils.createJarWithClasses(Seq.empty) + val hiveVersion = if (SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9)) { + "2.3.7" + } else { + "1.2.1" + } override def afterAll(): Unit = { try { @@ -200,7 +205,7 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { "--master", "local[2]", "--conf", s"${UI_ENABLED.key}=false", "--conf", s"${MASTER_REST_SERVER_ENABLED.key}=false", - "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=2.3.7", + "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=$hiveVersion", "--conf", s"${HiveUtils.HIVE_METASTORE_JARS.key}=maven", "--conf", s"${WAREHOUSE_PATH.key}=${wareHousePath.getCanonicalPath}", "--conf", s"spark.sql.test.version.index=$index", @@ -219,7 +224,7 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { "--master", "local[2]", "--conf", s"${UI_ENABLED.key}=false", "--conf", s"${MASTER_REST_SERVER_ENABLED.key}=false", - "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=2.3.7", + "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=$hiveVersion", "--conf", s"${HiveUtils.HIVE_METASTORE_JARS.key}=maven", "--conf", s"${WAREHOUSE_PATH.key}=${wareHousePath.getCanonicalPath}", "--driver-java-options", s"-Dderby.system.home=${wareHousePath.getCanonicalPath}", From 568150a841bb2d189a769db58399c233867d2167 Mon Sep 17 00:00:00 2001 From: angerszhu Date: Sun, 22 Nov 2020 09:12:20 +0800 Subject: [PATCH 8/8] Update IsolatedClientLoader.scala --- .../apache/spark/sql/hive/client/IsolatedClientLoader.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index f7c4d45e530d0..9663e03ee6a74 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -121,7 +121,7 @@ private[hive] object IsolatedClientLoader extends Logging { val hiveArtifacts = version.extraDeps ++ Seq("hive-metastore", "hive-exec", "hive-common", "hive-serde") .map(a => s"org.apache.hive:$a:${version.fullVersion}") ++ - Seq("com.google.guava:guava:14.0.1") ++ hadoopJarNames + Seq("com.google.guava:guava:14.0.1") ++ hadoopJarNames val extraExclusions = if (hadoopVersion.startsWith("3")) { // this introduced from lower version of Hive could conflict with jars in Hadoop 3.2+, so @@ -131,7 +131,7 @@ private[hive] object IsolatedClientLoader extends Logging { Seq.empty } - val jarsClasspath = quietly { + val classpath = quietly { SparkSubmitUtils.resolveMavenCoordinates( hiveArtifacts.mkString(","), SparkSubmitUtils.buildIvySettings( @@ -139,7 +139,7 @@ private[hive] object IsolatedClientLoader extends Logging { ivyPath), exclusions = version.exclusions ++ extraExclusions) } - val allFiles = jarsClasspath.split(",").map(new File(_)).toSet + val allFiles = classpath.split(",").map(new File(_)).toSet // TODO: Remove copy logic. val tempDir = Utils.createTempDir(namePrefix = s"hive-${version}")