Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
cefc2de
add test catalog
huan233usc Jul 4, 2025
2e30924
add basic test
huan233usc Jul 4, 2025
668d91a
fix build
huan233usc Jul 7, 2025
7e36962
fix attempt2
huan233usc Jul 7, 2025
3400758
fix attempt3
huan233usc Jul 7, 2025
111e86d
fix attempt4
huan233usc Jul 7, 2025
947c606
fix attempt5
huan233usc Jul 7, 2025
926b37c
fix 6
huan233usc Jul 7, 2025
8e1e632
fix 7
huan233usc Jul 7, 2025
ecbdc09
fix 8
huan233usc Jul 7, 2025
895af13
fix 9
huan233usc Jul 7, 2025
e8bc4c8
fix 19
huan233usc Jul 7, 2025
ff5453b
fix 11
huan233usc Jul 7, 2025
79e840d
fix 12
huan233usc Jul 7, 2025
ef7f427
fix python test
huan233usc Jul 7, 2025
44d2b63
fix build
huan233usc Jul 7, 2025
658aa6e
debug log
huan233usc Jul 7, 2025
1c5eaec
debug log
huan233usc Jul 7, 2025
ae7229d
fix
huan233usc Jul 7, 2025
37fbcf3
fix
huan233usc Jul 7, 2025
34e5905
revert and see what happens
huan233usc Jul 8, 2025
c28511a
revert and see what happens
huan233usc Jul 8, 2025
b9dc13f
fix attempt 2
huan233usc Jul 8, 2025
47659b7
fix attempt 3
huan233usc Jul 8, 2025
fcf0d75
fix attempt 4
huan233usc Jul 8, 2025
e7baf3a
fix attempt 5
huan233usc Jul 8, 2025
e23cc4d
fix attempt 6
huan233usc Jul 8, 2025
c27743a
fix attempt 7
huan233usc Jul 8, 2025
7384217
fix attempt 8
huan233usc Jul 8, 2025
6bb9912
fix attempt 10
huan233usc Jul 8, 2025
0852e20
fix attempt 11
huan233usc Jul 8, 2025
3715bf0
fix attempt 12
huan233usc Jul 8, 2025
778eec7
fix attempt 1234
huan233usc Jul 8, 2025
31243a9
fix attempt 1235
huan233usc Jul 8, 2025
f28ddcd
fix attempt 1236
huan233usc Jul 8, 2025
4070b45
fix
huan233usc Jul 8, 2025
d5df56b
revert
huan233usc Jul 9, 2025
35f3c09
revert
huan233usc Jul 9, 2025
cbf839f
skip kernel
huan233usc Jul 9, 2025
c1cb547
skip kernel
huan233usc Jul 9, 2025
6aec27e
skip kernel
huan233usc Jul 9, 2025
feef5aa
update workflow
huan233usc Jul 9, 2025
465470a
update workflow
huan233usc Jul 9, 2025
090a45c
update workflow
huan233usc Jul 9, 2025
1fb90fb
update workflow
huan233usc Jul 9, 2025
f170ee2
update workflow
huan233usc Jul 9, 2025
255e47c
update workflow
huan233usc Jul 9, 2025
d0341cc
update workflow
huan233usc Jul 9, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .github/workflows/spark_examples_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,15 @@ jobs:
# it is typically only released when publishing for Scala 2.12.
run: |
build/sbt clean
# First publish storage
build/sbt storage/publishM2

# Then publish kernel
build/sbt kernelApi/publishM2
build/sbt kernelDefaults/publishM2

# Then publish sparkDsv2 explicitly (since it depends on kernel modules)
build/sbt sparkDsv2/publishM2
build/sbt "++ $SCALA_VERSION publishM2"
cd examples/scala && build/sbt "++ $SCALA_VERSION compile"
if: steps.git-diff.outputs.diff
2 changes: 1 addition & 1 deletion .github/workflows/spark_master_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ jobs:
- name: Run Spark Master tests
# when changing TEST_PARALLELISM_COUNT make sure to also change it in spark_test.yaml
run: |
TEST_PARALLELISM_COUNT=4 SHARD_ID=${{matrix.shard}} build/sbt -DsparkVersion=master "++ ${{ matrix.scala }}" clean spark/test
TEST_PARALLELISM_COUNT=4 SHARD_ID=${{matrix.shard}} DISABLE_UNIDOC=true build/sbt -DsparkVersion=master "++ ${{ matrix.scala }}" clean spark/test
TEST_PARALLELISM_COUNT=4 build/sbt -DsparkVersion=master "++ ${{ matrix.scala }}" clean connectServer/test
TEST_PARALLELISM_COUNT=4 build/sbt -DsparkVersion=master "++ ${{ matrix.scala }}" clean connectServer/assembly connectClient/test
if: steps.git-diff.outputs.diff
109 changes: 93 additions & 16 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ val LATEST_RELEASED_SPARK_VERSION = "3.5.3"
val SPARK_MASTER_VERSION = "4.0.1-SNAPSHOT"
val sparkVersion = settingKey[String]("Spark version")
spark / sparkVersion := getSparkVersion()
sparkDsv1 / sparkVersion := getSparkVersion()
connectCommon / sparkVersion := getSparkVersion()
connectClient / sparkVersion := getSparkVersion()
connectServer / sparkVersion := getSparkVersion()
Expand Down Expand Up @@ -349,7 +350,7 @@ lazy val connectClient = (project in file("spark-connect/client"))
// Create a symlink for the log4j properties
val confDir = distributionDir / "conf"
IO.createDirectory(confDir)
val log4jProps = (spark / Test / resourceDirectory).value / "log4j2_spark_master.properties"
val log4jProps = (sparkDsv1 / Test / resourceDirectory).value / "log4j2_spark_master.properties"
val linkedLog4jProps = confDir / "log4j2.properties"
Files.createSymbolicLink(linkedLog4jProps.toPath, log4jProps.toPath)
}
Expand All @@ -362,7 +363,7 @@ lazy val connectClient = (project in file("spark-connect/client"))

lazy val connectServer = (project in file("spark-connect/server"))
.dependsOn(connectCommon % "compile->compile;test->test;provided->provided")
.dependsOn(spark % "compile->compile;test->test;provided->provided")
.dependsOn(sparkDsv1 % "compile->compile;test->test;provided->provided")
.disablePlugins(JavaFormatterPlugin, ScalafmtPlugin)
.settings(
name := "delta-connect-server",
Expand Down Expand Up @@ -431,13 +432,12 @@ lazy val deltaSuiteGenerator = (project in file("spark/delta-suite-generator"))
Compile / mainClass := Some("io.delta.suitegenerator.ModularSuiteGenerator"),
Test / baseDirectory := (ThisBuild / baseDirectory).value,
)

lazy val spark = (project in file("spark"))
lazy val sparkDsv1 = (project in file("spark"))
.dependsOn(storage)
.enablePlugins(Antlr4Plugin)
.disablePlugins(JavaFormatterPlugin, ScalafmtPlugin)
.settings (
name := "delta-spark",
name := "delta-spark-dsv1",
commonSettings,
scalaStyleSettings,
sparkMimaSettings,
Expand All @@ -464,7 +464,7 @@ lazy val spark = (project in file("spark"))
"org.mockito" % "mockito-inline" % "4.11.0" % "test",
),
Compile / packageBin / mappings := (Compile / packageBin / mappings).value ++
listPythonFiles(baseDirectory.value.getParentFile / "python"),
listPythonFiles(baseDirectory.value.getParentFile / "python"),
Antlr4 / antlr4PackageName := Some("io.delta.sql.parser"),
Antlr4 / antlr4GenListener := true,
Antlr4 / antlr4GenVisitor := true,
Expand Down Expand Up @@ -523,7 +523,7 @@ lazy val spark = (project in file("spark"))
)

lazy val contribs = (project in file("contribs"))
.dependsOn(spark % "compile->compile;test->test;provided->provided")
.dependsOn(sparkDsv1 % "compile->compile;test->test;provided->provided")
.disablePlugins(JavaFormatterPlugin, ScalafmtPlugin)
.settings (
name := "delta-contribs",
Expand Down Expand Up @@ -562,7 +562,7 @@ lazy val contribs = (project in file("contribs"))
).configureUnidoc()

lazy val sharing = (project in file("sharing"))
.dependsOn(spark % "compile->compile;test->test;provided->provided")
.dependsOn(sparkDsv1 % "compile->compile;test->test;provided->provided")
.disablePlugins(JavaFormatterPlugin, ScalafmtPlugin)
.settings(
name := "delta-sharing-spark",
Expand Down Expand Up @@ -597,6 +597,8 @@ lazy val kernelApi = (project in file("kernel/kernel-api"))
javaOnlyReleaseSettings,
javafmtCheckSettings,
scalafmtCheckSettings,
// Skip kernel tests when running with Scala 2.13 due to compatibility issues
Test / skip := scalaVersion.value.startsWith("2.13"),
Test / javaOptions ++= Seq("-ea"),
libraryDependencies ++= Seq(
"org.roaringbitmap" % "RoaringBitmap" % "0.9.25",
Expand Down Expand Up @@ -676,7 +678,7 @@ lazy val kernelDefaults = (project in file("kernel/kernel-defaults"))
.dependsOn(kernelApi % "test->test")
.dependsOn(storage)
.dependsOn(storage % "test->test") // Required for InMemoryCommitCoordinator for tests
.dependsOn(spark % "test->test")
.dependsOn(sparkDsv1 % "test->test")
.dependsOn(goldenTables % "test")
.settings(
name := "delta-kernel-defaults",
Expand All @@ -685,6 +687,8 @@ lazy val kernelDefaults = (project in file("kernel/kernel-defaults"))
javaOnlyReleaseSettings,
javafmtCheckSettings,
scalafmtCheckSettings,
// Skip kernel tests when running with Scala 2.13 due to compatibility issues
Test / skip := scalaVersion.value.startsWith("2.13"),
Test / javaOptions ++= Seq("-ea"),
// This allows generating tables with unsupported test table features in delta-spark
Test / envVars += ("DELTA_TESTING", "1"),
Expand Down Expand Up @@ -715,6 +719,79 @@ lazy val kernelDefaults = (project in file("kernel/kernel-defaults"))
unidocSourceFilePatterns += SourceFilePattern("io/delta/kernel/"),
).configureUnidoc(docTitle = "Delta Kernel Defaults")


lazy val sparkDsv2 = (project in file("spark-dsv2"))
.dependsOn(kernelApi)
.dependsOn(kernelDefaults)
.dependsOn(sparkDsv1 % "test->test")
.settings(
name := "delta-spark-dsv2",
commonSettings,
javaOnlyReleaseSettings
).configureUnidoc(
generatedJavaDoc = getSparkVersion() == LATEST_RELEASED_SPARK_VERSION,
generateScalaDoc = getSparkVersion() == LATEST_RELEASED_SPARK_VERSION)

lazy val spark = (project in file("spark-jar"))
.dependsOn(sparkDsv1)
.dependsOn(sparkDsv2)
.aggregate(sparkDsv1, sparkDsv2, kernelApi, kernelDefaults)
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Needed for DSP workflow

.settings (
name := "delta-spark",
commonSettings,
scalaStyleSettings,
releaseSettings,
crossSparkSettings(),
sparkMimaSettings,
// Task to copy classes from dependent modules to make sure mima works.
Compile / compile := {
val compileResult = (Compile / compile).value
val log = streams.value.log
val sparkClassesDir = (Compile / classDirectory).value
val sparkDsv1ClassesDir = (sparkDsv1 / Compile / classDirectory).value
val sparkDsv2ClassesDir = (sparkDsv2 / Compile / classDirectory).value
// Copy classes from sparkDsv1
if (sparkDsv1ClassesDir.exists()) {
IO.copyDirectory(sparkDsv1ClassesDir, sparkClassesDir, overwrite = true)
}
// Copy classes from sparkDsv2
if (sparkDsv2ClassesDir.exists()) {
IO.copyDirectory(sparkDsv2ClassesDir, sparkClassesDir, overwrite = true)
}
compileResult
},
// Make sure dependent modules are compiled first
Compile / compile := (Compile / compile).dependsOn(
sparkDsv1 / Compile / compile,
sparkDsv2 / Compile / compile
).value,

Compile / packageBin / mappings := (Compile / packageBin / mappings).value ++
listPythonFiles(baseDirectory.value.getParentFile / "python"),

javaOptions += "-Xmx1024m",

// Required for testing table features see https://github.com/delta-io/delta/issues/1602
Test / envVars += ("DELTA_TESTING", "1"),

// Hack to avoid errors related to missing repo-root/target/scala-2.12/classes/
createTargetClassesDir := {
val dir = baseDirectory.value.getParentFile / "target" / "scala-2.12" / "classes"
Files.createDirectories(dir.toPath)
},
Compile / compile := ((Compile / compile) dependsOn createTargetClassesDir).value,
TestParallelization.settings,
)
.configureUnidoc(
generatedJavaDoc = getSparkVersion() == LATEST_RELEASED_SPARK_VERSION,
generateScalaDoc = getSparkVersion() == LATEST_RELEASED_SPARK_VERSION,
// spark-connect has classes with the same name as spark-core, this causes compilation issues
// with unidoc since it concatenates the classpaths from all modules
// ==> thus we exclude such sources
// (mostly) relevant github issue: https://github.com/sbt/sbt-unidoc/issues/77
classPathToSkip = "spark-connect"
)

lazy val unity = (project in file("unity"))
.enablePlugins(ScalafmtPlugin)
.dependsOn(kernelApi % "compile->compile;test->test")
Expand Down Expand Up @@ -764,7 +841,7 @@ lazy val storage = (project in file("storage"))

lazy val storageS3DynamoDB = (project in file("storage-s3-dynamodb"))
.dependsOn(storage % "compile->compile;test->test;provided->provided")
.dependsOn(spark % "test->test")
.dependsOn(sparkDsv1 % "test->test")
.disablePlugins(JavaFormatterPlugin, ScalafmtPlugin)
.settings (
name := "delta-storage-s3-dynamodb",
Expand All @@ -790,7 +867,7 @@ val icebergSparkRuntimeArtifactName = {

lazy val testDeltaIcebergJar = (project in file("testDeltaIcebergJar"))
// delta-iceberg depends on delta-spark! So, we need to include it during our test.
.dependsOn(spark % "test")
.dependsOn(sparkDsv1 % "test")
.disablePlugins(JavaFormatterPlugin, ScalafmtPlugin)
.settings(
name := "test-delta-iceberg-jar",
Expand Down Expand Up @@ -820,7 +897,7 @@ val deltaIcebergSparkIncludePrefixes = Seq(
// It will fail the first time, just re-run it.
// scalastyle:off println
lazy val iceberg = (project in file("iceberg"))
.dependsOn(spark % "compile->compile;test->test;provided->provided")
.dependsOn(sparkDsv1 % "compile->compile;test->test;provided->provided")
.disablePlugins(JavaFormatterPlugin, ScalafmtPlugin)
.settings (
name := "delta-iceberg",
Expand Down Expand Up @@ -890,7 +967,7 @@ lazy val iceberg = (project in file("iceberg"))
lazy val generateIcebergJarsTask = TaskKey[Unit]("generateIcebergJars", "Generate Iceberg JARs")

lazy val icebergShaded = (project in file("icebergShaded"))
.dependsOn(spark % "provided")
.dependsOn(sparkDsv1 % "provided")
.disablePlugins(JavaFormatterPlugin, ScalafmtPlugin)
.settings (
name := "iceberg-shaded",
Expand Down Expand Up @@ -921,7 +998,7 @@ lazy val icebergShaded = (project in file("icebergShaded"))
)

lazy val hudi = (project in file("hudi"))
.dependsOn(spark % "compile->compile;test->test;provided->provided")
.dependsOn(sparkDsv1 % "compile->compile;test->test;provided->provided")
.disablePlugins(JavaFormatterPlugin, ScalafmtPlugin)
.settings (
name := "delta-hudi",
Expand Down Expand Up @@ -1433,7 +1510,7 @@ lazy val compatibility = (project in file("connectors/oss-compatibility-tests"))
*/

lazy val goldenTables = (project in file("connectors/golden-tables"))
.dependsOn(spark % "test") // depends on delta-spark
.dependsOn(sparkDsv1 % "test") // depends on delta-spark
.disablePlugins(JavaFormatterPlugin, ScalafmtPlugin)
.settings(
name := "golden-tables",
Expand All @@ -1460,7 +1537,7 @@ def sqlDeltaImportScalaVersion(scalaBinaryVersion: String): String = {
}

lazy val sqlDeltaImport = (project in file("connectors/sql-delta-import"))
.dependsOn(spark)
.dependsOn(sparkDsv1)
.disablePlugins(JavaFormatterPlugin, ScalafmtPlugin)
.settings (
name := "sql-delta-import",
Expand Down
44 changes: 44 additions & 0 deletions spark-dsv2/src/test/java/io/delta/spark/dsv2/Dsv2BasicTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* Copyright (2025) The Delta Lake Project Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.delta.spark.dsv2;

import static org.junit.Assert.assertThrows;
import static org.junit.Assert.assertTrue;

import org.apache.spark.SparkConf;
import org.apache.spark.sql.SparkSession;
import org.junit.Test;

public class Dsv2BasicTest {

private final SparkSession spark =
SparkSession.builder()
.master("local[*]")
.config(
new SparkConf()
.set("spark.sql.catalog.dsv2", "io.delta.spark.dsv2.catalog.TestCatalog"))
.getOrCreate();

@Test
public void loadTableTest() {
Exception exception =
assertThrows(
Exception.class, () -> spark.sql("select * from dsv2.test_namespace.test_table"));

assertTrue(exception instanceof UnsupportedOperationException);
assertTrue(exception.getMessage().contains("loadTable method is not implemented"));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Copyright (2025) The Delta Lake Project Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.delta.spark.dsv2.catalog;

import java.util.*;
import org.apache.spark.sql.connector.catalog.*;
import org.apache.spark.sql.connector.expressions.Transform;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.sql.util.CaseInsensitiveStringMap;

public class TestCatalog implements TableCatalog {

private String catalogName;

@Override
public Identifier[] listTables(String[] namespace) {
throw new UnsupportedOperationException("listTables method is not implemented");
}

@Override
public Table loadTable(Identifier ident) {
throw new UnsupportedOperationException("loadTable method is not implemented");
}

@Override
public Table createTable(
Identifier ident, StructType schema, Transform[] partitions, Map<String, String> properties) {
throw new UnsupportedOperationException("createTable method is not implemented");
}

@Override
public Table alterTable(Identifier ident, TableChange... changes) {
throw new UnsupportedOperationException("alterTable method is not implemented");
}

@Override
public boolean dropTable(Identifier ident) {
throw new UnsupportedOperationException("dropTable method is not implemented");
}

@Override
public void renameTable(Identifier oldIdent, Identifier newIdent) {
throw new UnsupportedOperationException("renameTable method is not implemented");
}

@Override
public void initialize(String name, CaseInsensitiveStringMap options) {
this.catalogName = name;
}

@Override
public String name() {
return catalogName;
}
}
Loading