[SPARK-40948][SQL][FOLLOWUP] Restore PATH_NOT_FOUND

itholic · beliefer · commit 5e15c6dbd35b · 2022-12-18T08:44:36.000+08:00
### What changes were proposed in this pull request? The original PR to introduce the error class `PATH_NOT_FOUND` was reverted since it breaks the tests in different test env. This PR proposes to restore it back. ### Why are the changes needed? Restoring the reverted changes with proper fix. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? The existing CI should pass. Closes apache#38575 from itholic/SPARK-40948-followup. Authored-by: itholic <haejoon.lee@databricks.com> Signed-off-by: Max Gekk <max.gekk@gmail.com>
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -3990,12 +3990,16 @@ test_that("Call DataFrameWriter.load() API in Java without path and check argume
   expect_error(read.df(source = "json"),
                paste("Error in load : analysis error - Unable to infer schema for JSON.",
                      "It must be specified manually"))
-  expect_error(read.df("arbitrary_path"), "Error in load : analysis error - Path does not exist")
-  expect_error(read.json("arbitrary_path"), "Error in json : analysis error - Path does not exist")
-  expect_error(read.text("arbitrary_path"), "Error in text : analysis error - Path does not exist")
-  expect_error(read.orc("arbitrary_path"), "Error in orc : analysis error - Path does not exist")
+  expect_error(read.df("arbitrary_path"),
+               "Error in load : analysis error - \\[PATH_NOT_FOUND\\].*")
+  expect_error(read.json("arbitrary_path"),
+               "Error in json : analysis error - \\[PATH_NOT_FOUND\\].*")
+  expect_error(read.text("arbitrary_path"),
+               "Error in text : analysis error - \\[PATH_NOT_FOUND\\].*")
+  expect_error(read.orc("arbitrary_path"),
+               "Error in orc : analysis error - \\[PATH_NOT_FOUND\\].*")
   expect_error(read.parquet("arbitrary_path"),
-              "Error in parquet : analysis error - Path does not exist")
+               "Error in parquet : analysis error - \\[PATH_NOT_FOUND\\].*")
 
   # Arguments checking in R side.
   expect_error(read.df(path = c(3)),
diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
@@ -912,6 +912,11 @@
     ],
     "sqlState" : "42000"
   },
+  "PATH_NOT_FOUND" : {
+    "message" : [
+      "Path does not exist: <path>."
+    ]
+  },
   "PIVOT_VALUE_DATA_TYPE_MISMATCH" : {
     "message" : [
       "Invalid pivot value '<value>': value data type <valueType> does not match pivot column data type <pivotType>"
@@ -2332,11 +2337,6 @@
       "Unable to infer schema for <format>. It must be specified manually."
     ]
   },
-  "_LEGACY_ERROR_TEMP_1130" : {
-    "message" : [
-      "Path does not exist: <path>."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_1131" : {
     "message" : [
       "Data source <className> does not support <outputMode> output mode."
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -1378,7 +1378,7 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
 
   def dataPathNotExistError(path: String): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1130",
+      errorClass = "PATH_NOT_FOUND",
       messageParameters = Map("path" -> path))
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -2327,39 +2327,44 @@ class DataFrameSuite extends QueryTest
   test("SPARK-13774: Check error message for non existent path without globbed paths") {
     val uuid = UUID.randomUUID().toString
     val baseDir = Utils.createTempDir()
-    try {
-      val e = intercept[AnalysisException] {
+    checkError(
+      exception = intercept[AnalysisException] {
         spark.read.format("csv").load(
           new File(baseDir, "file").getAbsolutePath,
           new File(baseDir, "file2").getAbsolutePath,
           new File(uuid, "file3").getAbsolutePath,
           uuid).rdd
-      }
-      assert(e.getMessage.startsWith("Path does not exist"))
-    } finally {
-
-    }
-
+      },
+      errorClass = "PATH_NOT_FOUND",
+      parameters = Map("path" -> "file:.*"),
+      matchPVals = true
+    )
    }
 
   test("SPARK-13774: Check error message for not existent globbed paths") {
     // Non-existent initial path component:
     val nonExistentBasePath = "/" + UUID.randomUUID().toString
     assert(!new File(nonExistentBasePath).exists())
-    val e = intercept[AnalysisException] {
-      spark.read.format("text").load(s"$nonExistentBasePath/*")
-    }
-    assert(e.getMessage.startsWith("Path does not exist"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        spark.read.format("text").load(s"$nonExistentBasePath/*")
+      },
+      errorClass = "PATH_NOT_FOUND",
+      parameters = Map("path" -> s"file:$nonExistentBasePath/*")
+    )
 
     // Existent initial path component, but no matching files:
     val baseDir = Utils.createTempDir()
     val childDir = Utils.createTempDir(baseDir.getAbsolutePath)
     assert(childDir.exists())
     try {
-      val e1 = intercept[AnalysisException] {
-        spark.read.json(s"${baseDir.getAbsolutePath}/*/*-xyz.json").rdd
-      }
-      assert(e1.getMessage.startsWith("Path does not exist"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          spark.read.json(s"${baseDir.getAbsolutePath}/*/*-xyz.json").rdd
+        },
+        errorClass = "PATH_NOT_FOUND",
+        parameters = Map("path" -> s"file:${baseDir.getAbsolutePath}/*/*-xyz.json")
+      )
     } finally {
       Utils.deleteRecursively(baseDir)
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
@@ -109,18 +109,22 @@ class DataSourceSuite extends SharedSparkSession with PrivateMethodTester {
   }
 
   test("test non existent paths") {
-    assertThrows[AnalysisException](
-      DataSource.checkAndGlobPathIfNecessary(
-        Seq(
-          path1.toString,
-          path2.toString,
-          nonExistentPath.toString
-        ),
-        hadoopConf,
-        checkEmptyGlobPath = true,
-        checkFilesExist = true,
-        enableGlobbing = true
-      )
+    checkError(
+      exception = intercept[AnalysisException](
+        DataSource.checkAndGlobPathIfNecessary(
+          Seq(
+            path1.toString,
+            path2.toString,
+            nonExistentPath.toString
+          ),
+          hadoopConf,
+          checkEmptyGlobPath = true,
+          checkFilesExist = true,
+          enableGlobbing = true
+        )
+      ),
+      errorClass = "PATH_NOT_FOUND",
+      parameters = Map("path" -> nonExistentPath.toString)
     )
   }
 

Original file line number	Diff line number	Diff line change
`@@ -1378,7 +1378,7 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {`
`1378`	`1378`
`1379`	`1379`	`def dataPathNotExistError(path: String): Throwable = {`
`1380`	`1380`	`new AnalysisException(`
`1381`		`- errorClass = "_LEGACY_ERROR_TEMP_1130",`
	`1381`	`+ errorClass = "PATH_NOT_FOUND",`
`1382`	`1382`	`messageParameters = Map("path" -> path))`
`1383`	`1383`	`}`
`1384`	`1384`