Skip to content

Commit beeaa29

Browse files
authored
fix(duckdb): use the delta extension for reading deltalake data (#10833)
Switch to using the DuckDB `delta` extension for reading deltalake data. Closes #10829.
1 parent b85c642 commit beeaa29

File tree

4 files changed

+18
-24
lines changed

4 files changed

+18
-24
lines changed

ibis/backends/duckdb/__init__.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -827,43 +827,43 @@ def _read_parquet_pyarrow_dataset(
827827
# explicitly.
828828

829829
def read_delta(
830-
self, path: str, /, *, table_name: str | None = None, **kwargs: Any
830+
self, path: str | Path, /, *, table_name: str | None = None, **kwargs: Any
831831
) -> ir.Table:
832832
"""Register a Delta Lake table as a table in the current database.
833833
834834
Parameters
835835
----------
836836
path
837-
The data source. Must be a directory
838-
containing a Delta Lake table.
837+
The data source. Must be a directory containing a Delta Lake table.
839838
table_name
840839
An optional name to use for the created table. This defaults to
841840
a sequentially generated name.
842-
**kwargs
841+
kwargs
843842
Additional keyword arguments passed to deltalake.DeltaTable.
844843
845844
Returns
846845
-------
847846
ir.Table
848847
The just-registered table.
849-
850848
"""
851-
path = util.normalize_filenames(path)[0]
849+
(path,) = util.normalize_filenames(path)
850+
851+
extensions = ["delta"]
852+
if path.startswith(("http://", "https://", "s3://")):
853+
extensions.append("httpfs")
852854

853855
table_name = table_name or util.gen_name("read_delta")
854856

855-
try:
856-
from deltalake import DeltaTable
857-
except ImportError:
858-
raise ImportError(
859-
"The deltalake extra is required to use the "
860-
"read_delta method. You can install it using pip:\n\n"
861-
"pip install 'ibis-framework[deltalake]'\n"
862-
)
857+
options = [
858+
sg.to_identifier(key).eq(sge.convert(val)) for key, val in kwargs.items()
859+
]
863860

864-
delta_table = DeltaTable(path, **kwargs)
861+
self._load_extensions(extensions)
865862

866-
self.con.register(table_name, delta_table.to_pyarrow_dataset())
863+
self._create_temp_view(
864+
table_name,
865+
sg.select(STAR).from_(self.compiler.f.delta_scan(path, *options)),
866+
)
867867
return self.table(table_name)
868868

869869
def list_tables(

ibis/backends/tests/test_export.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
from ibis import util
1313
from ibis.backends.tests.errors import (
1414
DatabricksServerOperationError,
15-
DuckDBInvalidInputException,
1615
DuckDBNotImplementedException,
1716
DuckDBParserException,
1817
ExaQueryError,
@@ -476,11 +475,6 @@ def test_to_pyarrow_decimal(backend, dtype, pyarrow_dtype):
476475
condition=CI and IS_SPARK_REMOTE,
477476
reason="not supported until pyspark 4",
478477
)
479-
@pytest.mark.xfail_version(
480-
duckdb=["pyarrow>=19"],
481-
raises=DuckDBInvalidInputException,
482-
reason="decoding delta file fails",
483-
)
484478
@pytest.mark.xfail_version(
485479
datafusion=["pyarrow>=19", "datafusion>=44"],
486480
raises=Exception,

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ druid = [
9797
"rich>=12.4.4,<14",
9898
]
9999
duckdb = [
100-
"duckdb>=0.10,<2",
100+
"duckdb>=0.10.3,<2",
101101
"pyarrow>=10.0.1",
102102
"pyarrow-hotfix>=0.4,<1",
103103
"numpy>=1.23.2,<3",

uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)