From 310ca4bce40dbd1145e728283d4e6e190ddadb06 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 1 Jul 2025 18:17:10 +0200 Subject: [PATCH 1/4] Disable stable column aliases in tests that it is assumed --- .../apache/spark/sql/internal/SQLConf.scala | 2 +- .../apache/spark/sql/DataFrameShowSuite.scala | 176 +++++++++--------- .../spark/sql/FileBasedDataSourceSuite.scala | 1 + 3 files changed, 92 insertions(+), 87 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 762abfe3b6061..483759cef6d09 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -5580,7 +5580,7 @@ object SQLConf { "and form them via pretty SQL print.") .version("3.5.0") .booleanConf - .createWithDefault(false) + .createWithDefault(true) val LOCAL_RELATION_CACHE_THRESHOLD = buildConf(SqlApiConfHelper.LOCAL_RELATION_CACHE_THRESHOLD_KEY) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameShowSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameShowSuite.scala index 86d3ca45fd08e..81b49ef4c42a6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameShowSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameShowSuite.scala @@ -239,43 +239,45 @@ class DataFrameShowSuite extends QueryTest with SharedSparkSession { } test("SPARK-33690: showString: escape meta-characters") { - val df1 = spark.sql("SELECT 'aaa\nbbb\tccc\rddd\feee\bfff\u000Bggg\u0007hhh'") - assert(df1.showString(1, truncate = 0) === - """+--------------------------------------+ - ||aaa\nbbb\tccc\rddd\feee\bfff\vggg\ahhh| - |+--------------------------------------+ - ||aaa\nbbb\tccc\rddd\feee\bfff\vggg\ahhh| - |+--------------------------------------+ - |""".stripMargin) - - val df2 = spark.sql("SELECT array('aaa\nbbb\tccc\rddd\feee\bfff\u000Bggg\u0007hhh')") - assert(df2.showString(1, truncate = 0) === - """+---------------------------------------------+ - ||array(aaa\nbbb\tccc\rddd\feee\bfff\vggg\ahhh)| - |+---------------------------------------------+ - ||[aaa\nbbb\tccc\rddd\feee\bfff\vggg\ahhh] | - |+---------------------------------------------+ - |""".stripMargin) - - val df3 = - spark.sql("SELECT map('aaa\nbbb\tccc', 'aaa\nbbb\tccc\rddd\feee\bfff\u000Bggg\u0007hhh')") - assert(df3.showString(1, truncate = 0) === - """+----------------------------------------------------------+ - ||map(aaa\nbbb\tccc, aaa\nbbb\tccc\rddd\feee\bfff\vggg\ahhh)| - |+----------------------------------------------------------+ - ||{aaa\nbbb\tccc -> aaa\nbbb\tccc\rddd\feee\bfff\vggg\ahhh} | - |+----------------------------------------------------------+ - |""".stripMargin) - - val df4 = - spark.sql("SELECT named_struct('v', 'aaa\nbbb\tccc\rddd\feee\bfff\u000Bggg\u0007hhh')") - assert(df4.showString(1, truncate = 0) === - """+-------------------------------------------------------+ - ||named_struct(v, aaa\nbbb\tccc\rddd\feee\bfff\vggg\ahhh)| - |+-------------------------------------------------------+ - ||{aaa\nbbb\tccc\rddd\feee\bfff\vggg\ahhh} | - |+-------------------------------------------------------+ - |""".stripMargin) + withSQLConf(SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED.key -> "false") { + val df1 = spark.sql("SELECT 'aaa\nbbb\tccc\rddd\feee\bfff\u000Bggg\u0007hhh'") + assert(df1.showString(1, truncate = 0) === + """+--------------------------------------+ + ||aaa\nbbb\tccc\rddd\feee\bfff\vggg\ahhh| + |+--------------------------------------+ + ||aaa\nbbb\tccc\rddd\feee\bfff\vggg\ahhh| + |+--------------------------------------+ + |""".stripMargin) + + val df2 = spark.sql("SELECT array('aaa\nbbb\tccc\rddd\feee\bfff\u000Bggg\u0007hhh')") + assert(df2.showString(1, truncate = 0) === + """+---------------------------------------------+ + ||array(aaa\nbbb\tccc\rddd\feee\bfff\vggg\ahhh)| + |+---------------------------------------------+ + ||[aaa\nbbb\tccc\rddd\feee\bfff\vggg\ahhh] | + |+---------------------------------------------+ + |""".stripMargin) + + val df3 = + spark.sql("SELECT map('aaa\nbbb\tccc', 'aaa\nbbb\tccc\rddd\feee\bfff\u000Bggg\u0007hhh')") + assert(df3.showString(1, truncate = 0) === + """+----------------------------------------------------------+ + ||map(aaa\nbbb\tccc, aaa\nbbb\tccc\rddd\feee\bfff\vggg\ahhh)| + |+----------------------------------------------------------+ + ||{aaa\nbbb\tccc -> aaa\nbbb\tccc\rddd\feee\bfff\vggg\ahhh} | + |+----------------------------------------------------------+ + |""".stripMargin) + + val df4 = + spark.sql("SELECT named_struct('v', 'aaa\nbbb\tccc\rddd\feee\bfff\u000Bggg\u0007hhh')") + assert(df4.showString(1, truncate = 0) === + """+-------------------------------------------------------+ + ||named_struct(v, aaa\nbbb\tccc\rddd\feee\bfff\vggg\ahhh)| + |+-------------------------------------------------------+ + ||{aaa\nbbb\tccc\rddd\feee\bfff\vggg\ahhh} | + |+-------------------------------------------------------+ + |""".stripMargin) + } } test("SPARK-7319 showString") { @@ -434,55 +436,57 @@ class DataFrameShowSuite extends QueryTest with SharedSparkSession { } test("SPARK-34308: printSchema: escape meta-characters") { - val captured = new ByteArrayOutputStream() - - val df1 = spark.sql("SELECT 'aaa\nbbb\tccc\rddd\feee\bfff\u000Bggg\u0007hhh'") - Console.withOut(captured) { - df1.printSchema() - } - assert(captured.toString === - """root - | |-- aaa\nbbb\tccc\rddd\feee\bfff\vggg\ahhh: string (nullable = false) - | - |""".stripMargin) - captured.reset() - - val df2 = spark.sql("SELECT array('aaa\nbbb\tccc\rddd\feee\bfff\u000Bggg\u0007hhh')") - Console.withOut(captured) { - df2.printSchema() - } - assert(captured.toString === - """root - | |-- array(aaa\nbbb\tccc\rddd\feee\bfff\vggg\ahhh): array (nullable = false) - | | |-- element: string (containsNull = false) - | - |""".stripMargin) - captured.reset() - - val df3 = - spark.sql("SELECT map('aaa\nbbb\tccc', 'aaa\nbbb\tccc\rddd\feee\bfff\u000Bggg\u0007hhh')") - Console.withOut(captured) { - df3.printSchema() - } - assert(captured.toString === - """root - | |-- map(aaa\nbbb\tccc, aaa\nbbb\tccc\rddd\feee\bfff\vggg\ahhh): map (nullable = false) - | | |-- key: string - | | |-- value: string (valueContainsNull = false) - | - |""".stripMargin) - captured.reset() - - val df4 = - spark.sql("SELECT named_struct('v', 'aaa\nbbb\tccc\rddd\feee\bfff\u000Bggg\u0007hhh')") - Console.withOut(captured) { - df4.printSchema() + withSQLConf(SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED.key -> "false") { + val captured = new ByteArrayOutputStream() + + val df1 = spark.sql("SELECT 'aaa\nbbb\tccc\rddd\feee\bfff\u000Bggg\u0007hhh'") + Console.withOut(captured) { + df1.printSchema() + } + assert(captured.toString === + """root + | |-- aaa\nbbb\tccc\rddd\feee\bfff\vggg\ahhh: string (nullable = false) + | + |""".stripMargin) + captured.reset() + + val df2 = spark.sql("SELECT array('aaa\nbbb\tccc\rddd\feee\bfff\u000Bggg\u0007hhh')") + Console.withOut(captured) { + df2.printSchema() + } + assert(captured.toString === + """root + | |-- array(aaa\nbbb\tccc\rddd\feee\bfff\vggg\ahhh): array (nullable = false) + | | |-- element: string (containsNull = false) + | + |""".stripMargin) + captured.reset() + + val df3 = + spark.sql("SELECT map('aaa\nbbb\tccc', 'aaa\nbbb\tccc\rddd\feee\bfff\u000Bggg\u0007hhh')") + Console.withOut(captured) { + df3.printSchema() + } + assert(captured.toString === + """root + | |-- map(aaa\nbbb\tccc, aaa\nbbb\tccc\rddd\feee\bfff\vggg\ahhh): map (nullable = false) + | | |-- key: string + | | |-- value: string (valueContainsNull = false) + | + |""".stripMargin) + captured.reset() + + val df4 = + spark.sql("SELECT named_struct('v', 'aaa\nbbb\tccc\rddd\feee\bfff\u000Bggg\u0007hhh')") + Console.withOut(captured) { + df4.printSchema() + } + assert(captured.toString === + """root + | |-- named_struct(v, aaa\nbbb\tccc\rddd\feee\bfff\vggg\ahhh): struct (nullable = false) + | | |-- v: string (nullable = false) + | + |""".stripMargin) } - assert(captured.toString === - """root - | |-- named_struct(v, aaa\nbbb\tccc\rddd\feee\bfff\vggg\ahhh): struct (nullable = false) - | | |-- v: string (nullable = false) - | - |""".stripMargin) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala index 35bbc6c8a1f4b..71d50a4126e79 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala @@ -511,6 +511,7 @@ class FileBasedDataSourceSuite extends QueryTest "" } withSQLConf( + SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED.key -> "false", SQLConf.USE_V1_SOURCE_LIST.key -> useV1List, SQLConf.LEGACY_INTERVAL_ENABLED.key -> "true") { // write path From 26f6c339ebd12be920429abaa55cf851a5b004a7 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 1 Jul 2025 21:23:58 +0200 Subject: [PATCH 2/4] Fix tests --- .../org/apache/spark/sql/avro/AvroSuite.scala | 4 +- .../org/apache/spark/sql/DataFrameSuite.scala | 18 +- .../org/apache/spark/sql/ExplainSuite.scala | 108 +++--- .../apache/spark/sql/SQLQueryTestSuite.scala | 1 + .../sql/SubqueryHintPropagationSuite.scala | 41 +-- .../scala/org/apache/spark/sql/TPCBase.scala | 5 +- .../IndeterminateCollationTestSuite.scala | 46 +-- .../sql/execution/SQLViewTestSuite.scala | 65 ++-- .../sql/execution/SparkSqlParserSuite.scala | 312 +++++++++--------- ...pelineDatasetAsSelectParserSuiteBase.scala | 47 +-- .../spark/sql/sources/InsertSuite.scala | 3 +- .../sql/hive/execution/HiveDDLSuite.scala | 4 +- .../sql/hive/execution/HiveQuerySuite.scala | 8 +- 13 files changed, 354 insertions(+), 308 deletions(-) diff --git a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala index 6f345e069ff78..0a93b41796894 100644 --- a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala +++ b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala @@ -1762,7 +1762,9 @@ abstract class AvroSuite } test("error handling for unsupported Interval data types") { - withSQLConf(SQLConf.LEGACY_INTERVAL_ENABLED.key -> "true") { + withSQLConf( + SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED.key -> "false", + SQLConf.LEGACY_INTERVAL_ENABLED.key -> "true") { withTempDir { dir => val tempDir = new File(dir, "files").getCanonicalPath checkError( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 5b88eeefeca75..4dd6c7b798917 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -1043,14 +1043,16 @@ class DataFrameSuite extends QueryTest } test("SPARK-41391: Correct the output column name of groupBy.agg(count_distinct)") { - withTempView("person") { - person.createOrReplaceTempView("person") - val df1 = person.groupBy("id").agg(count_distinct(col("name"))) - val df2 = spark.sql("SELECT id, COUNT(DISTINCT name) FROM person GROUP BY id") - assert(df1.columns === df2.columns) - val df3 = person.groupBy("id").agg(count_distinct(col("*"))) - val df4 = spark.sql("SELECT id, COUNT(DISTINCT *) FROM person GROUP BY id") - assert(df3.columns === df4.columns) + withSQLConf(SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED.key -> "false") { + withTempView("person") { + person.createOrReplaceTempView("person") + val df1 = person.groupBy("id").agg(count_distinct(col("name"))) + val df2 = spark.sql("SELECT id, COUNT(DISTINCT name) FROM person GROUP BY id") + assert(df1.columns === df2.columns) + val df3 = person.groupBy("id").agg(count_distinct(col("*"))) + val df4 = spark.sql("SELECT id, COUNT(DISTINCT *) FROM person GROUP BY id") + assert(df3.columns === df4.columns) + } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala index 9c90e0105a424..871d982740935 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala @@ -111,30 +111,32 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite } test("optimized plan should show the rewritten expression") { - withTempView("test_agg") { - sql( - """ - |CREATE TEMPORARY VIEW test_agg AS SELECT * FROM VALUES - | (1, true), (1, false), - | (2, true), - | (3, false), (3, null), - | (4, null), (4, null), - | (5, null), (5, true), (5, false) AS test_agg(k, v) + withSQLConf(SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED.key -> "false") { + withTempView("test_agg") { + sql( + """ + |CREATE TEMPORARY VIEW test_agg AS SELECT * FROM VALUES + | (1, true), (1, false), + | (2, true), + | (3, false), (3, null), + | (4, null), (4, null), + | (5, null), (5, true), (5, false) AS test_agg(k, v) """.stripMargin) - // simple explain of queries having every/some/any aggregates. Optimized - // plan should show the rewritten aggregate expression. - val df = sql("SELECT k, every(v), some(v), any(v) FROM test_agg GROUP BY k") - checkKeywordsExistsInExplain(df, - "Aggregate [k#x], [k#x, every(v#x) AS every(v)#x, some(v#x) AS some(v)#x, " + - "any(v#x) AS any(v)#x]") - } + // simple explain of queries having every/some/any aggregates. Optimized + // plan should show the rewritten aggregate expression. + val df = sql("SELECT k, every(v), some(v), any(v) FROM test_agg GROUP BY k") + checkKeywordsExistsInExplain(df, + "Aggregate [k#x], [k#x, every(v#x) AS every(v)#x, some(v#x) AS some(v)#x, " + + "any(v#x) AS any(v)#x]") + } - withTable("t") { - sql("CREATE TABLE t(col TIMESTAMP) USING parquet") - val df = sql("SELECT date_part('month', col) FROM t") - checkKeywordsExistsInExplain(df, - "Project [month(cast(col#x as date)) AS date_part(month, col)#x]") + withTable("t") { + sql("CREATE TABLE t(col TIMESTAMP) USING parquet") + val df = sql("SELECT date_part('month', col) FROM t") + checkKeywordsExistsInExplain(df, + "Project [month(cast(col#x as date)) AS date_part(month, col)#x]") + } } } @@ -218,39 +220,43 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite } test("check operator precedence") { - // We follow Oracle operator precedence in the table below that lists the levels - // of precedence among SQL operators from high to low: - // --------------------------------------------------------------------------------------- - // Operator Operation - // --------------------------------------------------------------------------------------- - // +, - identity, negation - // *, / multiplication, division - // +, -, || addition, subtraction, concatenation - // =, !=, <, >, <=, >=, IS NULL, LIKE, BETWEEN, IN comparison - // NOT exponentiation, logical negation - // AND conjunction - // OR disjunction - // --------------------------------------------------------------------------------------- - checkKeywordsExistsInExplain(sql("select '1' || 1 + 2"), - "Project [13", " AS (concat(1, 1) + 2)#x") - checkKeywordsExistsInExplain(sql("select 1 - 2 || 'b'"), - "Project [-1b AS concat((1 - 2), b)#x]") - checkKeywordsExistsInExplain(sql("select 2 * 4 + 3 || 'b'"), - "Project [11b AS concat(((2 * 4) + 3), b)#x]") - checkKeywordsExistsInExplain(sql("select 3 + 1 || 'a' || 4 / 2"), - "Project [4a2.0 AS concat(concat((3 + 1), a), (4 / 2))#x]") - checkKeywordsExistsInExplain(sql("select 1 == 1 OR 'a' || 'b' == 'ab'"), - "Project [true AS ((1 = 1) OR (concat(a, b) = ab))#x]") - checkKeywordsExistsInExplain(sql("select 'a' || 'c' == 'ac' AND 2 == 3"), - "Project [false AS ((concat(a, c) = ac) AND (2 = 3))#x]") + withSQLConf(SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED.key -> "false") { + // We follow Oracle operator precedence in the table below that lists the levels + // of precedence among SQL operators from high to low: + // --------------------------------------------------------------------------------------- + // Operator Operation + // --------------------------------------------------------------------------------------- + // +, - identity, negation + // *, / multiplication, division + // +, -, || addition, subtraction, concatenation + // =, !=, <, >, <=, >=, IS NULL, LIKE, BETWEEN, IN comparison + // NOT exponentiation, logical negation + // AND conjunction + // OR disjunction + // --------------------------------------------------------------------------------------- + checkKeywordsExistsInExplain(sql("select '1' || 1 + 2"), + "Project [13", " AS (concat(1, 1) + 2)#x") + checkKeywordsExistsInExplain(sql("select 1 - 2 || 'b'"), + "Project [-1b AS concat((1 - 2), b)#x]") + checkKeywordsExistsInExplain(sql("select 2 * 4 + 3 || 'b'"), + "Project [11b AS concat(((2 * 4) + 3), b)#x]") + checkKeywordsExistsInExplain(sql("select 3 + 1 || 'a' || 4 / 2"), + "Project [4a2.0 AS concat(concat((3 + 1), a), (4 / 2))#x]") + checkKeywordsExistsInExplain(sql("select 1 == 1 OR 'a' || 'b' == 'ab'"), + "Project [true AS ((1 = 1) OR (concat(a, b) = ab))#x]") + checkKeywordsExistsInExplain(sql("select 'a' || 'c' == 'ac' AND 2 == 3"), + "Project [false AS ((concat(a, c) = ac) AND (2 = 3))#x]") + } } test("explain for these functions; use range to avoid constant folding") { - val df = sql("select ifnull(id, 1), nullif(id, 1), nvl(id, 1), nvl2(id, 1, 2) " + - "from range(2)") - checkKeywordsExistsInExplain(df, - "Project [id#xL AS ifnull(id, 1)#xL, if ((id#xL = 1)) null " + - "else id#xL AS nullif(id, 1)#xL, id#xL AS nvl(id, 1)#xL, 1 AS nvl2(id, 1, 2)#x]") + withSQLConf(SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED.key -> "false") { + val df = sql("select ifnull(id, 1), nullif(id, 1), nvl(id, 1), nvl2(id, 1, 2) " + + "from range(2)") + checkKeywordsExistsInExplain(df, + "Project [id#xL AS ifnull(id, 1)#xL, if ((id#xL = 1)) null " + + "else id#xL AS nullif(id, 1)#xL, id#xL AS nvl(id, 1)#xL, 1 AS nvl2(id, 1, 2)#x]") + } } test("SPARK-26659: explain of DataWritingCommandExec should not contain duplicate cmd.nodeName") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index 575a4ae69d1a9..55b2b9081a2b3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -156,6 +156,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper // SPARK-39564: don't print out serde to avoid introducing complicated and error-prone // regex magic. .set("spark.test.noSerdeInExplain", "true") + .set(SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED, false) // SPARK-32106 Since we add SQL test 'transform.sql' will use `cat` command, // here we need to ignore it. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubqueryHintPropagationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubqueryHintPropagationSuite.scala index eefb762b59c22..f84923584cc4b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SubqueryHintPropagationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SubqueryHintPropagationSuite.scala @@ -21,6 +21,7 @@ import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression import org.apache.spark.sql.catalyst.plans.{InnerLike, LeftSemi} import org.apache.spark.sql.catalyst.plans.logical.{BROADCAST, HintInfo, Join, JoinHint, LogicalPlan} import org.apache.spark.sql.functions.col +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession class SubqueryHintPropagationSuite extends QueryTest with SharedSparkSession { @@ -175,26 +176,28 @@ class SubqueryHintPropagationSuite extends QueryTest with SharedSparkSession { } test("Scalar subquery with non-equality predicates") { - val queryDf = sql( - s"""SELECT * FROM testData s1 WHERE key = - |(SELECT $hintStringified MAX(key) FROM - |testData s2 WHERE s1.key > s2.key AND s1.value > s2.value) - |""".stripMargin) - val condContainsMax = (condition: Expression) => { - condition.find { - case e: AttributeReference if e.name.contains("max") => - true - case _ => false - }.isDefined - } - val optimizedPlan = queryDf.queryExecution.optimizedPlan - val expectedJoinHint = JoinHint(leftHint = None, rightHint = expectedHint) - val joinsFound = optimizedPlan.collect { - case j: Join if j.condition.nonEmpty && condContainsMax(j.condition.get) => - assert(expectedJoinHint == j.hint) + withSQLConf(SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED.key -> "false") { + val queryDf = sql( + s"""SELECT * FROM testData s1 WHERE key = + |(SELECT $hintStringified MAX(key) FROM + |testData s2 WHERE s1.key > s2.key AND s1.value > s2.value) + |""".stripMargin) + val condContainsMax = (condition: Expression) => { + condition.find { + case e: AttributeReference if e.name.contains("max") => + true + case _ => false + }.isDefined + } + val optimizedPlan = queryDf.queryExecution.optimizedPlan + val expectedJoinHint = JoinHint(leftHint = None, rightHint = expectedHint) + val joinsFound = optimizedPlan.collect { + case j: Join if j.condition.nonEmpty && condContainsMax(j.condition.get) => + assert(expectedJoinHint == j.hint) + } + assert(joinsFound.size == 1) + checkAnswer(queryDf, spark.emptyDataFrame) } - assert(joinsFound.size == 1) - checkAnswer(queryDf, spark.emptyDataFrame) } test("Scalar subquery nested subquery") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/TPCBase.scala index 17645849225a6..e7042ecb407a1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/TPCBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCBase.scala @@ -32,8 +32,11 @@ trait TPCBase extends SharedSparkSession { .set(SQLConf.CBO_ENABLED, true) .set(SQLConf.PLAN_STATS_ENABLED, true) .set(SQLConf.JOIN_REORDER_ENABLED, true) + .set(SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED, false) } else { - super.sparkConf.set(SQLConf.MAX_TO_STRING_FIELDS, Int.MaxValue) + super.sparkConf + .set(SQLConf.MAX_TO_STRING_FIELDS, Int.MaxValue) + .set(SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED, false) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/collation/IndeterminateCollationTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/collation/IndeterminateCollationTestSuite.scala index 163060f7a52d1..5bef79b4ed2a9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/collation/IndeterminateCollationTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/collation/IndeterminateCollationTestSuite.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.collation import org.apache.spark.{SparkRuntimeException, SparkThrowable} import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row} import org.apache.spark.sql.functions._ +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types.{StringType, StructField, StructType} @@ -190,30 +191,31 @@ class IndeterminateCollationTestSuite extends QueryTest with SharedSparkSession } test("create table as select fails with indeterminate collation") { - withTestTable { - assertIndeterminateCollationInSchemaError("concat(c1, c2)") { - sql(s""" - |CREATE TABLE t AS - |SELECT c1 || c2 FROM $testTableName - |""".stripMargin) - } + withSQLConf(SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED.key -> "false") { + withTestTable { + assertIndeterminateCollationInSchemaError("concat(c1, c2)") { + sql(s""" + |CREATE TABLE t AS + |SELECT c1 || c2 FROM $testTableName""".stripMargin) + } - assertIndeterminateCollationInSchemaError("col") { - sql(s""" - |CREATE TABLE t AS - |SELECT concat_ws(', ', c1, c2) as col FROM $testTableName - |""".stripMargin) - } + assertIndeterminateCollationInSchemaError("col") { + sql(s""" + |CREATE TABLE t AS + |SELECT concat_ws(', ', c1, c2) as col FROM $testTableName""".stripMargin) + } - assertIndeterminateCollationInSchemaError("arr.element", "map.value", "struct.f1")(sql(s""" - |CREATE TABLE t - |USING $dataSource - |AS SELECT - | array(c1 || c2) AS arr, - | map('a', c1 || c2) AS map, - | named_struct('f1', c1 || c2, 'f2', c2) AS struct - |FROM $testTableName - |""".stripMargin)) + assertIndeterminateCollationInSchemaError("arr.element", "map.value", "struct.f1")(sql( + s""" + |CREATE TABLE t + |USING $dataSource + |AS SELECT + | array(c1 || c2) AS arr, + | map('a', c1 || c2) AS map, + | named_struct('f1', c1 || c2, 'f2', c2) AS struct + |FROM $testTableName + |""".stripMargin)) + } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala index f6d2e096ecacc..39c367eb0eb14 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala @@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.withDefaultTimeZone import org.apache.spark.sql.connector.catalog._ import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME import org.apache.spark.sql.errors.DataTypeErrors.toSQLId +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf._ import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} import org.apache.spark.sql.types.{IntegerType, StructField, StructType} @@ -573,43 +574,49 @@ class PersistedViewTestSuite extends SQLViewTestSuite with SharedSparkSession { } test("SPARK-35686: error out for creating view with auto gen alias") { - withView("v") { - checkError( - exception = intercept[AnalysisException] { - sql("CREATE VIEW v AS SELECT count(*) FROM VALUES (1), (2), (3) t(a)") - }, - condition = "CREATE_PERMANENT_VIEW_WITHOUT_ALIAS", - parameters = Map("name" -> tableIdentifier("v").quotedString, "attr" -> "\"count(1)\"") - ) - sql("CREATE VIEW v AS SELECT count(*) AS cnt FROM VALUES (1), (2), (3) t(a)") - checkAnswer(sql("SELECT * FROM v"), Seq(Row(3))) + withSQLConf(SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED.key -> "false") { + withView("v") { + checkError( + exception = intercept[AnalysisException] { + sql("CREATE VIEW v AS SELECT count(*) FROM VALUES (1), (2), (3) t(a)") + }, + condition = "CREATE_PERMANENT_VIEW_WITHOUT_ALIAS", + parameters = Map("name" -> tableIdentifier("v").quotedString, "attr" -> "\"count(1)\"") + ) + sql("CREATE VIEW v AS SELECT count(*) AS cnt FROM VALUES (1), (2), (3) t(a)") + checkAnswer(sql("SELECT * FROM v"), Seq(Row(3))) + } } } test("SPARK-35686: error out for creating view with auto gen alias in subquery") { - withView("v") { - checkError( - exception = intercept[AnalysisException] { - sql("CREATE VIEW v AS SELECT * FROM (SELECT a + b FROM VALUES (1, 2) t(a, b))") - }, - condition = "CREATE_PERMANENT_VIEW_WITHOUT_ALIAS", - parameters = Map("name" -> tableIdentifier("v").quotedString, "attr" -> "\"(a + b)\"") - ) - sql("CREATE VIEW v AS SELECT * FROM (SELECT a + b AS col FROM VALUES (1, 2) t(a, b))") - checkAnswer(sql("SELECT * FROM v"), Seq(Row(3))) + withSQLConf(SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED.key -> "false") { + withView("v") { + checkError( + exception = intercept[AnalysisException] { + sql("CREATE VIEW v AS SELECT * FROM (SELECT a + b FROM VALUES (1, 2) t(a, b))") + }, + condition = "CREATE_PERMANENT_VIEW_WITHOUT_ALIAS", + parameters = Map("name" -> tableIdentifier("v").quotedString, "attr" -> "\"(a + b)\"") + ) + sql("CREATE VIEW v AS SELECT * FROM (SELECT a + b AS col FROM VALUES (1, 2) t(a, b))") + checkAnswer(sql("SELECT * FROM v"), Seq(Row(3))) + } } } test("SPARK-35686: error out for alter view with auto gen alias") { - withView("v") { - sql("CREATE VIEW v AS SELECT 1 AS a") - checkError( - exception = intercept[AnalysisException] { - sql("ALTER VIEW v AS SELECT count(*) FROM VALUES (1), (2), (3) t(a)") - }, - condition = "CREATE_PERMANENT_VIEW_WITHOUT_ALIAS", - parameters = Map("name" -> tableIdentifier("v").quotedString, "attr" -> "\"count(1)\"") - ) + withSQLConf(SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED.key -> "false") { + withView("v") { + sql("CREATE VIEW v AS SELECT 1 AS a") + checkError( + exception = intercept[AnalysisException] { + sql("ALTER VIEW v AS SELECT count(*) FROM VALUES (1), (2), (3) t(a)") + }, + condition = "CREATE_PERMANENT_VIEW_WITHOUT_ALIAS", + parameters = Map("name" -> tableIdentifier("v").quotedString, "attr" -> "\"count(1)\"") + ) + } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala index fbcc8a582bfbb..b5bbbd26f924a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala @@ -537,14 +537,16 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession { } test("pipeline concatenation") { - val concat = Concat( - Concat(UnresolvedAttribute("a") :: UnresolvedAttribute("b") :: Nil) :: - UnresolvedAttribute("c") :: - Nil - ) - assertEqual( - "SELECT a || b || c FROM t", - Project(UnresolvedAlias(concat) :: Nil, UnresolvedRelation(TableIdentifier("t")))) + withSQLConf(SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED.key -> "false") { + val concat = Concat( + Concat(UnresolvedAttribute("a") :: UnresolvedAttribute("b") :: Nil) :: + UnresolvedAttribute("c") :: + Nil + ) + assertEqual( + "SELECT a || b || c FROM t", + Project(UnresolvedAlias(concat) :: Nil, UnresolvedRelation(TableIdentifier("t")))) + } } test("database and schema tokens are interchangeable") { @@ -579,159 +581,167 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession { } test("SPARK-32608: script transform with row format delimit") { - val rowFormat = - """ - | ROW FORMAT DELIMITED - | FIELDS TERMINATED BY ',' - | COLLECTION ITEMS TERMINATED BY '#' - | MAP KEYS TERMINATED BY '@' - | LINES TERMINATED BY '\n' - | NULL DEFINED AS 'null' + withSQLConf(SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED.key -> "false") { + val rowFormat = + """ + | ROW FORMAT DELIMITED + | FIELDS TERMINATED BY ',' + | COLLECTION ITEMS TERMINATED BY '#' + | MAP KEYS TERMINATED BY '@' + | LINES TERMINATED BY '\n' + | NULL DEFINED AS 'null' """.stripMargin - val ioSchema = - ScriptInputOutputSchema( - Seq(("TOK_TABLEROWFORMATFIELD", ","), - ("TOK_TABLEROWFORMATCOLLITEMS", "#"), - ("TOK_TABLEROWFORMATMAPKEYS", "@"), - ("TOK_TABLEROWFORMATNULL", "null"), - ("TOK_TABLEROWFORMATLINES", "\n")), - Seq(("TOK_TABLEROWFORMATFIELD", ","), - ("TOK_TABLEROWFORMATCOLLITEMS", "#"), - ("TOK_TABLEROWFORMATMAPKEYS", "@"), - ("TOK_TABLEROWFORMATNULL", "null"), - ("TOK_TABLEROWFORMATLINES", "\n")), None, None, - List.empty, List.empty, None, None, false) - - assertEqual( - s""" - |SELECT TRANSFORM(a, b, c) - | $rowFormat - | USING 'cat' AS (a, b, c) - | $rowFormat - |FROM testData + val ioSchema = + ScriptInputOutputSchema( + Seq(("TOK_TABLEROWFORMATFIELD", ","), + ("TOK_TABLEROWFORMATCOLLITEMS", "#"), + ("TOK_TABLEROWFORMATMAPKEYS", "@"), + ("TOK_TABLEROWFORMATNULL", "null"), + ("TOK_TABLEROWFORMATLINES", "\n")), + Seq(("TOK_TABLEROWFORMATFIELD", ","), + ("TOK_TABLEROWFORMATCOLLITEMS", "#"), + ("TOK_TABLEROWFORMATMAPKEYS", "@"), + ("TOK_TABLEROWFORMATNULL", "null"), + ("TOK_TABLEROWFORMATLINES", "\n")), None, None, + List.empty, List.empty, None, None, false) + + assertEqual( + s""" + |SELECT TRANSFORM(a, b, c) + | $rowFormat + | USING 'cat' AS (a, b, c) + | $rowFormat + |FROM testData """.stripMargin, - ScriptTransformation( - "cat", - Seq(AttributeReference("a", StringType)(), - AttributeReference("b", StringType)(), - AttributeReference("c", StringType)()), - Project(Seq($"a", $"b", $"c"), - UnresolvedRelation(TableIdentifier("testData"))), - ioSchema)) - - assertEqual( - s""" - |SELECT TRANSFORM(a, sum(b), max(c)) - | $rowFormat - | USING 'cat' AS (a, b, c) - | $rowFormat - |FROM testData - |GROUP BY a - |HAVING sum(b) > 10 - """.stripMargin, - ScriptTransformation( - "cat", - Seq(AttributeReference("a", StringType)(), - AttributeReference("b", StringType)(), - AttributeReference("c", StringType)()), - UnresolvedHaving( - GreaterThan( - UnresolvedFunction("sum", Seq(UnresolvedAttribute("b")), isDistinct = false), - Literal(10)), - Aggregate( - Seq($"a"), - Seq( - $"a", - UnresolvedAlias( - UnresolvedFunction("sum", Seq(UnresolvedAttribute("b")), isDistinct = false), None), - UnresolvedAlias( - UnresolvedFunction("max", Seq(UnresolvedAttribute("c")), isDistinct = false), None) - ), - UnresolvedRelation(TableIdentifier("testData")))), - ioSchema)) - - assertEqual( - s""" - |SELECT TRANSFORM(a, sum(b) OVER w, max(c) OVER w) - | $rowFormat - | USING 'cat' AS (a, b, c) - | $rowFormat - |FROM testData - |WINDOW w AS (PARTITION BY a ORDER BY b) + ScriptTransformation( + "cat", + Seq(AttributeReference("a", StringType)(), + AttributeReference("b", StringType)(), + AttributeReference("c", StringType)()), + Project(Seq($"a", $"b", $"c"), + UnresolvedRelation(TableIdentifier("testData"))), + ioSchema)) + + assertEqual( + s""" + |SELECT TRANSFORM(a, sum(b), max(c)) + | $rowFormat + | USING 'cat' AS (a, b, c) + | $rowFormat + |FROM testData + |GROUP BY a + |HAVING sum(b) > 10 """.stripMargin, - ScriptTransformation( - "cat", - Seq(AttributeReference("a", StringType)(), - AttributeReference("b", StringType)(), - AttributeReference("c", StringType)()), - WithWindowDefinition( - Map("w" -> WindowSpecDefinition( - Seq($"a"), - Seq(SortOrder($"b", Ascending, NullsFirst, Seq.empty)), - UnspecifiedFrame)), - Project( - Seq( - $"a", - UnresolvedAlias( - UnresolvedWindowExpression( + ScriptTransformation( + "cat", + Seq(AttributeReference("a", StringType)(), + AttributeReference("b", StringType)(), + AttributeReference("c", StringType)()), + UnresolvedHaving( + GreaterThan( + UnresolvedFunction("sum", Seq(UnresolvedAttribute("b")), isDistinct = false), + Literal(10)), + Aggregate( + Seq($"a"), + Seq( + $"a", + UnresolvedAlias( UnresolvedFunction("sum", Seq(UnresolvedAttribute("b")), isDistinct = false), - WindowSpecReference("w")), None), - UnresolvedAlias( - UnresolvedWindowExpression( + None), + UnresolvedAlias( UnresolvedFunction("max", Seq(UnresolvedAttribute("c")), isDistinct = false), - WindowSpecReference("w")), None) - ), - UnresolvedRelation(TableIdentifier("testData"))), - forPipeSQL = false - ), - ioSchema)) - - assertEqual( - s""" - |SELECT TRANSFORM(a, sum(b), max(c)) - | $rowFormat - | USING 'cat' AS (a, b, c) - | $rowFormat - |FROM testData - |LATERAL VIEW explode(array(array(1,2,3))) myTable AS myCol - |LATERAL VIEW explode(myTable.myCol) myTable2 AS myCol2 - |GROUP BY a, myCol, myCol2 - |HAVING sum(b) > 10 + None) + ), + UnresolvedRelation(TableIdentifier("testData")))), + ioSchema)) + + assertEqual( + s""" + |SELECT TRANSFORM(a, sum(b) OVER w, max(c) OVER w) + | $rowFormat + | USING 'cat' AS (a, b, c) + | $rowFormat + |FROM testData + |WINDOW w AS (PARTITION BY a ORDER BY b) + """.stripMargin, + ScriptTransformation( + "cat", + Seq(AttributeReference("a", StringType)(), + AttributeReference("b", StringType)(), + AttributeReference("c", StringType)()), + WithWindowDefinition( + Map("w" -> WindowSpecDefinition( + Seq($"a"), + Seq(SortOrder($"b", Ascending, NullsFirst, Seq.empty)), + UnspecifiedFrame)), + Project( + Seq( + $"a", + UnresolvedAlias( + UnresolvedWindowExpression( + UnresolvedFunction("sum", Seq(UnresolvedAttribute("b")), isDistinct = false), + WindowSpecReference("w")), None), + UnresolvedAlias( + UnresolvedWindowExpression( + UnresolvedFunction("max", Seq(UnresolvedAttribute("c")), isDistinct = false), + WindowSpecReference("w")), None) + ), + UnresolvedRelation(TableIdentifier("testData"))), + forPipeSQL = false + ), + ioSchema)) + + assertEqual( + s""" + |SELECT TRANSFORM(a, sum(b), max(c)) + | $rowFormat + | USING 'cat' AS (a, b, c) + | $rowFormat + |FROM testData + |LATERAL VIEW explode(array(array(1,2,3))) myTable AS myCol + |LATERAL VIEW explode(myTable.myCol) myTable2 AS myCol2 + |GROUP BY a, myCol, myCol2 + |HAVING sum(b) > 10 """.stripMargin, - ScriptTransformation( - "cat", - Seq(AttributeReference("a", StringType)(), - AttributeReference("b", StringType)(), - AttributeReference("c", StringType)()), - UnresolvedHaving( - GreaterThan( - UnresolvedFunction("sum", Seq(UnresolvedAttribute("b")), isDistinct = false), - Literal(10)), - Aggregate( - Seq($"a", $"myCol", $"myCol2"), - Seq( - $"a", - UnresolvedAlias( - UnresolvedFunction("sum", Seq(UnresolvedAttribute("b")), isDistinct = false), None), - UnresolvedAlias( - UnresolvedFunction("max", Seq(UnresolvedAttribute("c")), isDistinct = false), None) - ), - Generate( - UnresolvedGenerator( - FunctionIdentifier("explode"), - Seq(UnresolvedAttribute("myTable.myCol"))), - Nil, false, Option("mytable2"), Seq($"myCol2"), + ScriptTransformation( + "cat", + Seq(AttributeReference("a", StringType)(), + AttributeReference("b", StringType)(), + AttributeReference("c", StringType)()), + UnresolvedHaving( + GreaterThan( + UnresolvedFunction("sum", Seq(UnresolvedAttribute("b")), isDistinct = false), + Literal(10)), + Aggregate( + Seq($"a", $"myCol", $"myCol2"), + Seq( + $"a", + UnresolvedAlias( + UnresolvedFunction("sum", Seq(UnresolvedAttribute("b")), isDistinct = false), + None), + UnresolvedAlias( + UnresolvedFunction("max", Seq(UnresolvedAttribute("c")), isDistinct = false), + None) + ), Generate( UnresolvedGenerator( FunctionIdentifier("explode"), - Seq(UnresolvedFunction("array", - Seq( - UnresolvedFunction("array", Seq(Literal(1), Literal(2), Literal(3)), false)), - false))), - Nil, false, Option("mytable"), Seq($"myCol"), - UnresolvedRelation(TableIdentifier("testData")))))), - ioSchema)) + Seq(UnresolvedAttribute("myTable.myCol"))), + Nil, false, Option("mytable2"), Seq($"myCol2"), + Generate( + UnresolvedGenerator( + FunctionIdentifier("explode"), + Seq(UnresolvedFunction("array", + Seq( + UnresolvedFunction("array", Seq(Literal(1), Literal(2), Literal(3)), + false)), + false))), + Nil, false, Option("mytable"), Seq($"myCol"), + UnresolvedRelation(TableIdentifier("testData")))))), + ioSchema)) + + } } test("SPARK-32607: Script Transformation ROW FORMAT DELIMITED" + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CreatePipelineDatasetAsSelectParserSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CreatePipelineDatasetAsSelectParserSuiteBase.scala index 9eb82853a3168..dafbfd620427c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CreatePipelineDatasetAsSelectParserSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CreatePipelineDatasetAsSelectParserSuiteBase.scala @@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{ColumnDefinition, CreatePipe import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTransform} import org.apache.spark.sql.execution.SparkSqlParser import org.apache.spark.sql.execution.command.v1.CommandSuiteBase +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{IntegerType, MetadataBuilder, StringType, StructField, StructType} trait CreatePipelineDatasetAsSelectParserSuiteBase extends CommandSuiteBase { @@ -225,28 +226,30 @@ trait CreatePipelineDatasetAsSelectParserSuiteBase extends CommandSuiteBase { } test("CTAS subquery is parsed into plan correctly") { - Seq( - "SELECT 1", - "SELECT * FROM input", - "SELECT a, b, c FROM input2", - """ - |SELECT o.id, o.amount, c.region - |FROM orders o - |JOIN ( - | SELECT id, region - | FROM customers - | WHERE region IS NOT NULL - |) AS c - |ON o.customer_id = c.id - |""".stripMargin - ).foreach { subquery => - val plan = parser.parsePlan( - s""" - |CREATE $datasetSqlSyntax table1 AS - |$subquery""".stripMargin) - val cmd = plan.asInstanceOf[CreatePipelineDatasetAsSelect] - assert(cmd.originalText.replaceAll("\\s", "") == subquery.replaceAll("\\s", "")) - assert(cmd.query == parser.parsePlan(subquery)) + withSQLConf(SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED.key -> "false") { + Seq( + "SELECT 1", + "SELECT * FROM input", + "SELECT a, b, c FROM input2", + """ + |SELECT o.id, o.amount, c.region + |FROM orders o + |JOIN ( + | SELECT id, region + | FROM customers + | WHERE region IS NOT NULL + |) AS c + |ON o.customer_id = c.id + |""".stripMargin + ).foreach { subquery => + val plan = parser.parsePlan( + s""" + |CREATE $datasetSqlSyntax table1 AS + |$subquery""".stripMargin) + val cmd = plan.asInstanceOf[CreatePipelineDatasetAsSelect] + assert(cmd.originalText.replaceAll("\\s", "") == subquery.replaceAll("\\s", "")) + assert(cmd.query == parser.parsePlan(subquery)) + } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala index af10fbbbc506a..e173ff20175c4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala @@ -651,7 +651,8 @@ class InsertSuite extends DataSourceTest with SharedSparkSession { test("Throw exception on unsafe cast with strict casting policy") { withSQLConf( SQLConf.USE_V1_SOURCE_LIST.key -> "parquet", - SQLConf.STORE_ASSIGNMENT_POLICY.key -> SQLConf.StoreAssignmentPolicy.STRICT.toString) { + SQLConf.STORE_ASSIGNMENT_POLICY.key -> SQLConf.StoreAssignmentPolicy.STRICT.toString, + SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED.key -> "false") { withTable("t") { sql("create table t(i int, d double) using parquet") checkError( diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 13e8d3721d81e..31fa75c265d22 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -3213,7 +3213,9 @@ class HiveDDLSuite } test("SPARK-33844, 37969: Insert overwrite directory should check schema too") { - withSQLConf(HiveUtils.CONVERT_METASTORE_INSERT_DIR.key -> "false") { + withSQLConf( + SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED.key -> "false", + HiveUtils.CONVERT_METASTORE_INSERT_DIR.key -> "false") { withView("v") { spark.range(1).createTempView("v") withTempPath { path => diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index d58dec6f1126e..58e0bc67bce73 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -1254,7 +1254,9 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd test("Partition spec validation") { withTable("dp_test") { sql("CREATE TABLE dp_test(key INT, value STRING) USING HIVE PARTITIONED BY (dp INT, sp INT)") - withSQLConf("hive.exec.dynamic.partition.mode" -> "strict") { + withSQLConf( + SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED.key -> "false", + "hive.exec.dynamic.partition.mode" -> "strict") { // Should throw when using strict dynamic partition mode without any static partition checkError( exception = intercept[AnalysisException] { @@ -1268,7 +1270,9 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd "tableColumns" -> "`key`, `value`, `dp`, `sp`", "dataColumns" -> "`key`, `value`, `(key % 5)`")) } - withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") { + withSQLConf( + SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED.key -> "false", + "hive.exec.dynamic.partition.mode" -> "nonstrict") { // Should throw when a static partition appears after a dynamic partition checkError( exception = intercept[AnalysisException] { From 7f5b43c54cf24e156f266b3b372a06947ac64bcf Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 1 Jul 2025 23:35:51 +0200 Subject: [PATCH 3/4] Fix tests --- .../org/apache/spark/sql/avro/AvroSuite.scala | 56 ++-- .../sql/connect/ClientE2ETestSuite.scala | 22 +- .../spark/sql/ExpressionsSchemaSuite.scala | 2 + .../org/apache/spark/sql/SQLQuerySuite.scala | 287 +++++++++--------- .../sql/hive/execution/HiveQuerySuite.scala | 28 +- 5 files changed, 205 insertions(+), 190 deletions(-) diff --git a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala index 0a93b41796894..a77677e584739 100644 --- a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala +++ b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala @@ -3018,34 +3018,36 @@ abstract class AvroSuite } test("SPARK-33865: CREATE TABLE DDL with avro should check col name") { - withTable("test_ddl") { - withView("v") { - spark.range(1).createTempView("v") - withTempDir { dir => - checkError( - exception = intercept[AnalysisException] { - sql( - s""" - |CREATE TABLE test_ddl USING AVRO - |LOCATION '${dir}' - |AS SELECT ID, IF(ID=1,1,0) FROM v""".stripMargin) - }, - condition = "INVALID_COLUMN_NAME_AS_PATH", - parameters = Map( - "datasource" -> "AvroFileFormat", "columnName" -> "`(IF((ID = 1), 1, 0))`") - ) - } + withSQLConf(SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED.key -> "false") { + withTable("test_ddl") { + withView("v") { + spark.range(1).createTempView("v") + withTempDir { dir => + checkError( + exception = intercept[AnalysisException] { + sql( + s""" + |CREATE TABLE test_ddl USING AVRO + |LOCATION '${dir}' + |AS SELECT ID, IF(ID=1,1,0) FROM v""".stripMargin) + }, + condition = "INVALID_COLUMN_NAME_AS_PATH", + parameters = Map( + "datasource" -> "AvroFileFormat", "columnName" -> "`(IF((ID = 1), 1, 0))`") + ) + } - withTempDir { dir => - spark.sql( - s""" - |CREATE TABLE test_ddl USING AVRO - |LOCATION '${dir}' - |AS SELECT ID, IF(ID=1,ID,0) AS A, ABS(ID) AS B - |FROM v""".stripMargin) - val expectedSchema = StructType(Seq(StructField("ID", LongType, true), - StructField("A", LongType, true), StructField("B", LongType, true))) - assert(spark.table("test_ddl").schema == expectedSchema) + withTempDir { dir => + spark.sql( + s""" + |CREATE TABLE test_ddl USING AVRO + |LOCATION '${dir}' + |AS SELECT ID, IF(ID=1,ID,0) AS A, ABS(ID) AS B + |FROM v""".stripMargin) + val expectedSchema = StructType(Seq(StructField("ID", LongType, true), + StructField("A", LongType, true), StructField("B", LongType, true))) + assert(spark.table("test_ddl").schema == expectedSchema) + } } } } diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala index 415960e83f9d1..58400a7e61757 100644 --- a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala +++ b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/ClientE2ETestSuite.scala @@ -1277,19 +1277,21 @@ class ClientE2ETestSuite } test("sql() with positional parameters") { - val result0 = spark.sql("select 1", Array.empty).collect() - assert(result0.length == 1 && result0(0).getInt(0) === 1) + withSQLConf("spark.sql.stableDerivedColumnAlias.enabled" -> "false") { + val result0 = spark.sql("select 1", Array.empty).collect() + assert(result0.length == 1 && result0(0).getInt(0) === 1) - val result1 = spark.sql("select ?", Array(1)).collect() - assert(result1.length == 1 && result1(0).getInt(0) === 1) + val result1 = spark.sql("select ?", Array(1)).collect() + assert(result1.length == 1 && result1(0).getInt(0) === 1) - val result2 = spark.sql("select ?, ?", Array(1, "abc")).collect() - assert(result2.length == 1) - assert(result2(0).getInt(0) === 1) - assert(result2(0).getString(1) === "abc") + val result2 = spark.sql("select ?, ?", Array(1, "abc")).collect() + assert(result2.length == 1) + assert(result2(0).getInt(0) === 1) + assert(result2(0).getString(1) === "abc") - val result3 = spark.sql("select element_at(?, 1)", Array(array(lit(1)))).collect() - assert(result3.length == 1 && result3(0).getInt(0) === 1) + val result3 = spark.sql("select element_at(?, 1)", Array(array(lit(1)))).collect() + assert(result3.length == 1 && result3(0).getInt(0) === 1) + } } test("sql() with named parameters") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala index df6fc50dc59db..dff8357093ae2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala @@ -22,6 +22,7 @@ import java.io.File import scala.collection.mutable.ArrayBuffer import org.apache.spark.sql.catalyst.util.{fileToString, stringToFile} +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.tags.ExtendedSQLTest @@ -87,6 +88,7 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { } test("Check schemas for expression examples") { + assume(!spark.conf.get(SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED)) val exampleRe = """^(.+);\n(?s)(.+)$""".r val funInfos = spark.sessionState.functionRegistry.listFunction().map { funcId => spark.sessionState.catalog.lookupFunctionInfo(funcId) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 26aa4b6b5210f..6ad597361bb3e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -3488,26 +3488,29 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark } test("SPARK-31594: Do not display the seed of rand/randn with no argument in output schema") { - def checkIfSeedExistsInExplain(df: DataFrame): Unit = { - val output = new java.io.ByteArrayOutputStream() - Console.withOut(output) { - df.explain() + withSQLConf(SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED.key -> "false") { + def checkIfSeedExistsInExplain(df: DataFrame): Unit = { + val output = new java.io.ByteArrayOutputStream() + Console.withOut(output) { + df.explain() + } + val projectExplainOutput = output.toString.split("\n").find(_.contains("Project")).get + assert(projectExplainOutput.matches(""".*randn?\(-?[0-9]+\).*""")) } - val projectExplainOutput = output.toString.split("\n").find(_.contains("Project")).get - assert(projectExplainOutput.matches(""".*randn?\(-?[0-9]+\).*""")) - } - val df1 = sql("SELECT rand()") - assert(df1.schema.head.name === "rand()") - checkIfSeedExistsInExplain(df1) - val df2 = sql("SELECT rand(1L)") - assert(df2.schema.head.name === "rand(1)") - checkIfSeedExistsInExplain(df2) - val df3 = sql("SELECT randn()") - assert(df3.schema.head.name === "randn()") - checkIfSeedExistsInExplain(df1) - val df4 = sql("SELECT randn(1L)") - assert(df4.schema.head.name === "randn(1)") - checkIfSeedExistsInExplain(df2) + + val df1 = sql("SELECT rand()") + assert(df1.schema.head.name === "rand()") + checkIfSeedExistsInExplain(df1) + val df2 = sql("SELECT rand(1L)") + assert(df2.schema.head.name === "rand(1)") + checkIfSeedExistsInExplain(df2) + val df3 = sql("SELECT randn()") + assert(df3.schema.head.name === "randn()") + checkIfSeedExistsInExplain(df1) + val df4 = sql("SELECT randn(1L)") + assert(df4.schema.head.name === "randn(1)") + checkIfSeedExistsInExplain(df2) + } } test("SPARK-31670: Trim unnecessary Struct field alias in Aggregate/GroupingSets") { @@ -4712,144 +4715,148 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark } test("SPARK-47939: Describe should work with parameterized queries") { - checkAnswer( - spark.sql("describe select ?", Array(1)), - Array( - Row("1", "int", null) + withSQLConf(SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED.key -> "false") { + checkAnswer( + spark.sql("describe select ?", Array(1)), + Array( + Row("1", "int", null) + ) ) - ) - checkAnswer( - spark.sql("describe select :first", Map("first" -> 1)), - Array( - Row("1", "int", null) + checkAnswer( + spark.sql("describe select :first", Map("first" -> 1)), + Array( + Row("1", "int", null) + ) ) - ) - checkAnswer( - spark.sql("describe select * from values (?, ?) t(x, y)", Array(1, "a")), - Array( - Row("x", "int", null), - Row("y", "string", null) + checkAnswer( + spark.sql("describe select * from values (?, ?) t(x, y)", Array(1, "a")), + Array( + Row("x", "int", null), + Row("y", "string", null) + ) ) - ) - checkAnswer( - spark.sql( - "describe select * from values (:first, :second) t(x, y)", - Map("first" -> 1, "second" -> "a") - ), - Array( - Row("x", "int", null), - Row("y", "string", null) + checkAnswer( + spark.sql( + "describe select * from values (:first, :second) t(x, y)", + Map("first" -> 1, "second" -> "a") + ), + Array( + Row("x", "int", null), + Row("y", "string", null) + ) ) - ) + } } test("SPARK-47939: Explain should work with parameterized queries") { - def checkQueryPlan(df: DataFrame, plan: String): Unit = assert( - df.collect() - .map(_.getString(0)) - .map(_.replaceAll("#[0-9]+", "#N")) - === Array(plan.stripMargin) - ) + withSQLConf(SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED.key -> "false") { + def checkQueryPlan(df: DataFrame, plan: String): Unit = assert( + df.collect() + .map(_.getString(0)) + .map(_.replaceAll("#[0-9]+", "#N")) + === Array(plan.stripMargin) + ) - checkQueryPlan( - spark.sql("explain select ?", Array(1)), - """== Physical Plan == - |*(1) Project [1 AS 1#N] - |+- *(1) Scan OneRowRelation[] + checkQueryPlan( + spark.sql("explain select ?", Array(1)), + """== Physical Plan == + |*(1) Project [1 AS 1#N] + |+- *(1) Scan OneRowRelation[] - |""" - ) - checkQueryPlan( - spark.sql("explain select :first", Map("first" -> 1)), - """== Physical Plan == - |*(1) Project [1 AS 1#N] - |+- *(1) Scan OneRowRelation[] + |""" + ) + checkQueryPlan( + spark.sql("explain select :first", Map("first" -> 1)), + """== Physical Plan == + |*(1) Project [1 AS 1#N] + |+- *(1) Scan OneRowRelation[] - |""" - ) + |""" + ) - checkQueryPlan( - spark.sql("explain explain explain select ?", Array(1)), - """== Physical Plan == - |Execute ExplainCommand - | +- ExplainCommand ExplainCommand 'PosParameterizedQuery [1], SimpleMode, SimpleMode + checkQueryPlan( + spark.sql("explain explain explain select ?", Array(1)), + """== Physical Plan == + |Execute ExplainCommand + | +- ExplainCommand ExplainCommand 'PosParameterizedQuery [1], SimpleMode, SimpleMode - |""" - ) - checkQueryPlan( - spark.sql("explain explain explain select :first", Map("first" -> 1)), - // scalastyle:off - """== Physical Plan == - |Execute ExplainCommand - | +- ExplainCommand ExplainCommand 'NameParameterizedQuery [first], [1], SimpleMode, SimpleMode - - |""" - // scalastyle:on - ) + |""" + ) + checkQueryPlan( + spark.sql("explain explain explain select :first", Map("first" -> 1)), + // scalastyle:off + """== Physical Plan == + |Execute ExplainCommand + | +- ExplainCommand ExplainCommand 'NameParameterizedQuery [first], [1], SimpleMode, SimpleMode + + |""" + // scalastyle:on + ) - checkQueryPlan( - spark.sql("explain describe select ?", Array(1)), - """== Physical Plan == - |Execute DescribeQueryCommand - | +- DescribeQueryCommand select ? + checkQueryPlan( + spark.sql("explain describe select ?", Array(1)), + """== Physical Plan == + |Execute DescribeQueryCommand + | +- DescribeQueryCommand select ? - |""" - ) - checkQueryPlan( - spark.sql("explain describe select :first", Map("first" -> 1)), - """== Physical Plan == - |Execute DescribeQueryCommand - | +- DescribeQueryCommand select :first + |""" + ) + checkQueryPlan( + spark.sql("explain describe select :first", Map("first" -> 1)), + """== Physical Plan == + |Execute DescribeQueryCommand + | +- DescribeQueryCommand select :first - |""" - ) + |""" + ) - checkQueryPlan( - spark.sql("explain extended select * from values (?, ?) t(x, y)", Array(1, "a")), - """== Parsed Logical Plan == - |'PosParameterizedQuery [1, a] - |+- 'Project [*] - | +- 'SubqueryAlias t - | +- 'UnresolvedInlineTable [x, y], [[posparameter(39), posparameter(42)]] - - |== Analyzed Logical Plan == - |x: int, y: string - |Project [x#N, y#N] - |+- SubqueryAlias t - | +- LocalRelation [x#N, y#N] - - |== Optimized Logical Plan == - |LocalRelation [x#N, y#N] - - |== Physical Plan == - |LocalTableScan [x#N, y#N] - |""" - ) - checkQueryPlan( - spark.sql( - "explain extended select * from values (:first, :second) t(x, y)", - Map("first" -> 1, "second" -> "a") - ), - """== Parsed Logical Plan == - |'NameParameterizedQuery [first, second], [1, a] - |+- 'Project [*] - | +- 'SubqueryAlias t - | +- 'UnresolvedInlineTable [x, y], [[namedparameter(first), namedparameter(second)]] - - |== Analyzed Logical Plan == - |x: int, y: string - |Project [x#N, y#N] - |+- SubqueryAlias t - | +- LocalRelation [x#N, y#N] - - |== Optimized Logical Plan == - |LocalRelation [x#N, y#N] - - |== Physical Plan == - |LocalTableScan [x#N, y#N] - |""" - ) + checkQueryPlan( + spark.sql("explain extended select * from values (?, ?) t(x, y)", Array(1, "a")), + """== Parsed Logical Plan == + |'PosParameterizedQuery [1, a] + |+- 'Project [*] + | +- 'SubqueryAlias t + | +- 'UnresolvedInlineTable [x, y], [[posparameter(39), posparameter(42)]] + + |== Analyzed Logical Plan == + |x: int, y: string + |Project [x#N, y#N] + |+- SubqueryAlias t + | +- LocalRelation [x#N, y#N] + + |== Optimized Logical Plan == + |LocalRelation [x#N, y#N] + + |== Physical Plan == + |LocalTableScan [x#N, y#N] + |""" + ) + checkQueryPlan( + spark.sql( + "explain extended select * from values (:first, :second) t(x, y)", + Map("first" -> 1, "second" -> "a") + ), + """== Parsed Logical Plan == + |'NameParameterizedQuery [first, second], [1, a] + |+- 'Project [*] + | +- 'SubqueryAlias t + | +- 'UnresolvedInlineTable [x, y], [[namedparameter(first), namedparameter(second)]] + + |== Analyzed Logical Plan == + |x: int, y: string + |Project [x#N, y#N] + |+- SubqueryAlias t + | +- LocalRelation [x#N, y#N] + + |== Optimized Logical Plan == + |LocalRelation [x#N, y#N] + + |== Physical Plan == + |LocalTableScan [x#N, y#N] + |""" + ) + } } test("SPARK-36680: Files hint options should be put into resolveDataSource function") { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index 58e0bc67bce73..316253522977f 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -376,22 +376,24 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd """.stripMargin) test("SPARK-7270: consider dynamic partition when comparing table output") { - withTable("test_partition", "ptest") { - sql(s"CREATE TABLE test_partition (a STRING) USING HIVE PARTITIONED BY (b BIGINT, c STRING)") - sql(s"CREATE TABLE ptest (a STRING, b BIGINT, c STRING)") - - val analyzedPlan = sql( - """ - |INSERT OVERWRITE table test_partition PARTITION (b=1, c) - |SELECT 'a', 'c' from ptest + withSQLConf(SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED.key -> "false") { + withTable("test_partition", "ptest") { + sql("CREATE TABLE test_partition (a STRING) USING HIVE PARTITIONED BY (b BIGINT, c STRING)") + sql("CREATE TABLE ptest (a STRING, b BIGINT, c STRING)") + + val analyzedPlan = sql( + """ + |INSERT OVERWRITE table test_partition PARTITION (b=1, c) + |SELECT 'a', 'c' from ptest """.stripMargin).queryExecution.analyzed - assertResult(false, "Incorrect cast detected\n" + analyzedPlan) { - var hasCast = false - analyzedPlan.collect { - case p: Project => p.transformExpressionsUp { case c: Cast => hasCast = true; c } + assertResult(false, "Incorrect cast detected\n" + analyzedPlan) { + var hasCast = false + analyzedPlan.collect { + case p: Project => p.transformExpressionsUp { case c: Cast => hasCast = true; c } + } + hasCast } - hasCast } } } From 2f734efcf6853c1b3039bfd80890a16186702f0e Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 2 Jul 2025 10:48:07 +0200 Subject: [PATCH 4/4] Fix tests --- .../org/apache/spark/sql/avro/AvroSuite.scala | 50 +++++++++--------- .../sql/catalyst/parser/DDLParserSuite.scala | 14 ++--- .../parser/ExpressionParserSuite.scala | 52 +++++++++++-------- .../sql/catalyst/parser/PlanParserSuite.scala | 8 +-- .../spark/sql/sources/InsertSuite.scala | 9 +++- .../apache/spark/sql/hive/InsertSuite.scala | 4 +- .../sql/hive/orc/HiveOrcSourceSuite.scala | 4 +- 7 files changed, 80 insertions(+), 61 deletions(-) diff --git a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala index a77677e584739..cb7601fdbc26a 100644 --- a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala +++ b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala @@ -3146,31 +3146,33 @@ class AvroV1Suite extends AvroSuite { .set(SQLConf.USE_V1_SOURCE_LIST, "avro") test("SPARK-36271: V1 insert should check schema field name too") { - withView("v") { - spark.range(1).createTempView("v") - withTempDir { dir => - checkError( - exception = intercept[AnalysisException] { - sql("SELECT ID, IF(ID=1,1,0) FROM v").write.mode(SaveMode.Overwrite) - .format("avro").save(dir.getCanonicalPath) - }, - condition = "INVALID_COLUMN_NAME_AS_PATH", - parameters = Map( - "datasource" -> "AvroFileFormat", "columnName" -> "`(IF((ID = 1), 1, 0))`") - ) - } + withSQLConf(SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED.key -> "false") { + withView("v") { + spark.range(1).createTempView("v") + withTempDir { dir => + checkError( + exception = intercept[AnalysisException] { + sql("SELECT ID, IF(ID=1,1,0) FROM v").write.mode(SaveMode.Overwrite) + .format("avro").save(dir.getCanonicalPath) + }, + condition = "INVALID_COLUMN_NAME_AS_PATH", + parameters = Map( + "datasource" -> "AvroFileFormat", "columnName" -> "`(IF((ID = 1), 1, 0))`") + ) + } - withTempDir { dir => - checkError( - exception = intercept[AnalysisException] { - sql("SELECT NAMED_STRUCT('(IF((ID = 1), 1, 0))', IF(ID=1,ID,0)) AS col1 FROM v") - .write.mode(SaveMode.Overwrite) - .format("avro").save(dir.getCanonicalPath) - }, - condition = "INVALID_COLUMN_NAME_AS_PATH", - parameters = Map( - "datasource" -> "AvroFileFormat", "columnName" -> "`(IF((ID = 1), 1, 0))`") - ) + withTempDir { dir => + checkError( + exception = intercept[AnalysisException] { + sql("SELECT NAMED_STRUCT('(IF((ID = 1), 1, 0))', IF(ID=1,ID,0)) AS col1 FROM v") + .write.mode(SaveMode.Overwrite) + .format("avro").save(dir.getCanonicalPath) + }, + condition = "INVALID_COLUMN_NAME_AS_PATH", + parameters = Map( + "datasource" -> "AvroFileFormat", "columnName" -> "`(IF((ID = 1), 1, 0))`") + ) + } } } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index 0cb6f53f273ab..50f359221051a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -2474,12 +2474,14 @@ class DDLParserSuite extends AnalysisTest { } test("alter view: AS Query") { - val parsed = parsePlan("ALTER VIEW a.b.c AS SELECT 1") - val expected = AlterViewAs( - UnresolvedView(Seq("a", "b", "c"), "ALTER VIEW ... AS", true, false), - "SELECT 1", - parsePlan("SELECT 1")) - comparePlans(parsed, expected) + withSQLConf(SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED.key -> "false") { + val parsed = parsePlan("ALTER VIEW a.b.c AS SELECT 1") + val expected = AlterViewAs( + UnresolvedView(Seq("a", "b", "c"), "ALTER VIEW ... AS", true, false), + "SELECT 1", + parsePlan("SELECT 1")) + comparePlans(parsed, expected) + } } test("DESCRIBE FUNCTION") { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala index d2b4345072f8f..28503159590a1 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala @@ -137,9 +137,11 @@ class ExpressionParserSuite extends AnalysisTest { } test("exists expression") { - assertEqual( - "exists (select 1 from b where b.x = a.x)", - Exists(table("b").where($"b.x" === $"a.x").select(1))) + withSQLConf(SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED.key -> "false") { + assertEqual( + "exists (select 1 from b where b.x = a.x)", + Exists(table("b").where($"b.x" === $"a.x").select(1))) + } } test("comparison expressions") { @@ -333,20 +335,22 @@ class ExpressionParserSuite extends AnalysisTest { } test("function expressions with named arguments") { - assertEqual("encode(value => 'abc', charset => 'utf-8')", - $"encode".function(NamedArgumentExpression("value", Literal("abc")), - NamedArgumentExpression("charset", Literal("utf-8")))) - assertEqual("encode('abc', charset => 'utf-8')", - $"encode".function(Literal("abc"), NamedArgumentExpression("charset", Literal("utf-8")))) - assertEqual("encode(charset => 'utf-8', 'abc')", - $"encode".function(NamedArgumentExpression("charset", Literal("utf-8")), Literal("abc"))) - assertEqual("encode('abc', charset => 'utf' || '-8')", - $"encode".function(Literal("abc"), NamedArgumentExpression("charset", - Concat(Literal("utf") :: Literal("-8") :: Nil)))) - val unresolvedAlias = Project(Seq(UnresolvedAlias(Literal("1"))), OneRowRelation()) - assertEqual("encode(value => ((select '1')), charset => 'utf-8')", - $"encode".function(NamedArgumentExpression("value", ScalarSubquery(plan = unresolvedAlias)), - NamedArgumentExpression("charset", Literal("utf-8")))) + withSQLConf(SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED.key -> "false") { + assertEqual("encode(value => 'abc', charset => 'utf-8')", + $"encode".function(NamedArgumentExpression("value", Literal("abc")), + NamedArgumentExpression("charset", Literal("utf-8")))) + assertEqual("encode('abc', charset => 'utf-8')", + $"encode".function(Literal("abc"), NamedArgumentExpression("charset", Literal("utf-8")))) + assertEqual("encode(charset => 'utf-8', 'abc')", + $"encode".function(NamedArgumentExpression("charset", Literal("utf-8")), Literal("abc"))) + assertEqual("encode('abc', charset => 'utf' || '-8')", + $"encode".function(Literal("abc"), NamedArgumentExpression("charset", + Concat(Literal("utf") :: Literal("-8") :: Nil)))) + val unresolvedAlias = Project(Seq(UnresolvedAlias(Literal("1"))), OneRowRelation()) + assertEqual("encode(value => ((select '1')), charset => 'utf-8')", + $"encode".function(NamedArgumentExpression("value", ScalarSubquery(plan = unresolvedAlias)), + NamedArgumentExpression("charset", Literal("utf-8")))) + } } private def lv(s: Symbol) = UnresolvedNamedLambdaVariable(Seq(s.name)) @@ -477,12 +481,14 @@ class ExpressionParserSuite extends AnalysisTest { } test("scalar sub-query") { - assertEqual( - "(select max(val) from tbl) > current", - ScalarSubquery(table("tbl").select($"max".function($"val"))) > $"current") - assertEqual( - "a = (select b from s)", - $"a" === ScalarSubquery(table("s").select($"b"))) + withSQLConf(SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED.key -> "false") { + assertEqual( + "(select max(val) from tbl) > current", + ScalarSubquery(table("tbl").select($"max".function($"val"))) > $"current") + assertEqual( + "a = (select b from s)", + $"a" === ScalarSubquery(table("s").select($"b"))) + } } test("case when") { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index 2d35feeb6bfd0..fc54e622bdad2 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -257,9 +257,11 @@ class PlanParserSuite extends AnalysisTest { } test("SPARK-42552: select and union without parentheses") { - val plan = Distinct(OneRowRelation().select(Literal(1)) - .union(OneRowRelation().select(Literal(1)))) - assertEqual("select 1 union select 1", plan) + withSQLConf(SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED.key -> "false") { + val plan = Distinct(OneRowRelation().select(Literal(1)) + .union(OneRowRelation().select(Literal(1)))) + assertEqual("select 1 union select 1", plan) + } } test("set operations") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala index e173ff20175c4..8b0a630884b47 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala @@ -66,6 +66,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession { override def beforeAll(): Unit = { super.beforeAll() path = Utils.createTempDir() + spark.conf.set(SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED, false) val ds = (1 to 10).map(i => s"""{"a":$i, "b":"str$i"}""").toDS() spark.read.json(ds).createOrReplaceTempView("jt") sql( @@ -1942,13 +1943,17 @@ class InsertSuite extends DataSourceTest with SharedSparkSession { // Run the test twice, once using SQL for the INSERT operations and again using DataFrames. for (useDataFrames <- Seq(false, true)) { config.sqlConf.map { kv: (String, String) => - withSQLConf(kv) { + withSQLConf( + SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED.key -> "false", + kv) { // Run the test with the pair of custom SQLConf values. runTest(testCase.dataSource, config.copy(useDataFrames = useDataFrames)) } }.getOrElse { // Run the test with default settings. - runTest(testCase.dataSource, config.copy(useDataFrames = useDataFrames)) + withSQLConf(SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED.key -> "false") { + runTest(testCase.dataSource, config.copy(useDataFrames = useDataFrames)) + } } } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala index 3a37ef8d922cd..40ea4d41b8a27 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala @@ -798,7 +798,9 @@ class InsertSuite extends QueryTest with TestHiveSingleton with BeforeAndAfter test(s"SPARK-25389 INSERT OVERWRITE $local DIRECTORY ... STORED AS with duplicated names" + s"(caseSensitivity=$caseSensitivity, format=$format)") { withTempDir { dir => - withSQLConf(SQLConf.CASE_SENSITIVE.key -> s"$caseSensitivity") { + withSQLConf( + SQLConf.STABLE_DERIVED_COLUMN_ALIAS_ENABLED.key -> "false", + SQLConf.CASE_SENSITIVE.key -> s"$caseSensitivity") { val e = intercept[AnalysisException] { sql( s""" diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala index c1084dd4ee7ff..a2de6a650c2bf 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala @@ -115,11 +115,11 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { // write path checkError( exception = intercept[AnalysisException] { - sql("select interval 1 days").write.mode("overwrite").orc(orcDir) + sql("select interval 1 days as col").write.mode("overwrite").orc(orcDir) }, condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE", parameters = Map( - "columnName" -> "`INTERVAL '1' DAY`", + "columnName" -> "`col`", "columnType" -> "\"INTERVAL DAY\"", "format" -> "ORC") )