[SPARK-55043][SQL] Fix time travel with subquery containing table references

cloud-fan · cloud-fan · commit 67f6a3fe5e87 · 2026-01-19T10:39:38.000+08:00
### What changes were proposed in this pull request? This PR fixes an issue where `TIMESTAMP AS OF (subquery)` fails when the subquery references a table. Before this fix, queries like: ```sql SELECT * FROM t TIMESTAMP AS OF (SELECT MIN(ts) FROM t) ``` would fail with: ``` assertion failed: No plan for SubqueryAlias testcat.t ``` The fix changes `EvalSubqueriesForTimeTravel` to wrap the scalar subquery in a `Project` over `OneRowRelation` and execute it through the normal query execution path (`sessionState.executePlan`), which properly handles table references including V2 tables. ### Why are the changes needed? The `EvalSubqueriesForTimeTravel` analyzer rule was directly calling `QueryExecution.prepareExecutedPlan` on the subquery's inner plan, which failed to properly plan V2 table relations. ### Does this PR introduce _any_ user-facing change? Yes. Users can now use subqueries with table references in `TIMESTAMP AS OF` expressions. ### How was this patch tested? Added a new test case in `DataSourceV2SQLSuite` that verifies time travel with a subquery containing a table reference. ### Was this patch authored or co-authored using generative AI tooling? Yes. Closes #53811 from cloud-fan/udf. Authored-by: Wenchen Fan <wenchen@databricks.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/EvalSubqueriesForTimeTravel.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/EvalSubqueriesForTimeTravel.scala
@@ -17,12 +17,11 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
-import org.apache.spark.sql.catalyst.expressions.{Literal, ScalarSubquery, SubqueryExpression}
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.expressions.{Alias, Literal, ScalarSubquery, SubqueryExpression}
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation, Project}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.TreePattern.RELATION_TIME_TRAVEL
 import org.apache.spark.sql.classic.SparkSession
-import org.apache.spark.sql.execution.{QueryExecution, ScalarSubquery => ScalarSubqueryExec, SubqueryExec}
 
 class EvalSubqueriesForTimeTravel extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsWithPruning(
@@ -35,26 +34,15 @@ class EvalSubqueriesForTimeTravel extends Rule[LogicalPlan] {
           // outer references and should not be correlated.
           assert(!s.isCorrelated, "Correlated subquery should not appear in " +
             classOf[EvalSubqueriesForTimeTravel].getSimpleName)
-          SimpleAnalyzer.checkSubqueryExpression(r, s)
-          val executedPlan = QueryExecution.prepareExecutedPlan(SparkSession.active, s.plan)
-          val physicalSubquery = ScalarSubqueryExec(
-            SubqueryExec.createForScalarSubquery(
-              s"scalar-subquery#${s.exprId.id}", executedPlan),
-            s.exprId)
-          evalSubqueries(physicalSubquery)
-          Literal(physicalSubquery.eval(), s.dataType)
+          // Wrap the scalar subquery in a Project over OneRowRelation to execute it
+          // through the normal query execution path. This properly handles table
+          // references in the subquery (e.g., V2 tables).
+          val wrappedPlan = Project(Seq(Alias(s, "result")()), OneRowRelation())
+          val spark = SparkSession.active
+          val qe = spark.sessionState.executePlan(wrappedPlan)
+          val result = qe.executedPlan.executeCollect().head.get(0, s.dataType)
+          Literal(result, s.dataType)
       }
       r.copy(timestamp = Some(subqueryEvaluated))
   }
-
-  // Evaluate subqueries in a bottom-up way.
-  private def evalSubqueries(subquery: ScalarSubqueryExec): Unit = {
-    subquery.plan.foreachUp { plan =>
-      plan.expressions.foreach(_.foreachUp {
-        case s: ScalarSubqueryExec => evalSubqueries(s)
-        case _ =>
-      })
-    }
-    subquery.updateResult()
-  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -3241,13 +3241,16 @@ class DataSourceV2SQLSuiteV1Filter
       DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone))
     val ts1InSeconds = MICROSECONDS.toSeconds(ts1).toString
     val ts2InSeconds = MICROSECONDS.toSeconds(ts2).toString
+
+    val t = "testcat.t"
     val t3 = s"testcat.t$ts1"
     val t4 = s"testcat.t$ts2"
-
-    withTable(t3, t4) {
+    withTable(t, t3, t4) {
+      sql(s"CREATE TABLE $t (ts STRING) USING foo")
       sql(s"CREATE TABLE $t3 (id int) USING foo")
       sql(s"CREATE TABLE $t4 (id int) USING foo")
 
+      sql(s"INSERT INTO $t VALUES ('2019-01-29 00:37:58')")
       sql(s"INSERT INTO $t3 VALUES (5)")
       sql(s"INSERT INTO $t3 VALUES (6)")
       sql(s"INSERT INTO $t4 VALUES (7)")
@@ -3282,6 +3285,9 @@ class DataSourceV2SQLSuiteV1Filter
       val res10 = sql("SELECT * FROM t TIMESTAMP AS OF (SELECT (SELECT make_date(2021, 1, 29)))")
         .collect()
       assert(res10 === Array(Row(7), Row(8)))
+      // Subquery with table reference
+      val res11 = sql("SELECT * FROM t TIMESTAMP AS OF (SELECT MIN(ts) FROM t)").collect()
+      assert(res11 === Array(Row(5), Row(6)))
 
       checkError(
         exception = intercept[AnalysisException] {
@@ -3307,6 +3313,11 @@ class DataSourceV2SQLSuiteV1Filter
         condition = "INVALID_TIME_TRAVEL_TIMESTAMP_EXPR.INPUT",
         parameters = Map("expr" -> "\"abc\""))
 
+      checkError(
+        exception = analysisException(s"SELECT * FROM $t TIMESTAMP AS OF NULL"),
+        condition = "INVALID_TIME_TRAVEL_TIMESTAMP_EXPR.INPUT",
+        parameters = Map("expr" -> "\"NULL\""))
+
       checkError(
         exception = intercept[AnalysisException] {
           spark.read.option("timestampAsOf", "abc").table("t").collect()