Skip to content

Commit

Permalink
feat: implement read_delta for datafusion
Browse files Browse the repository at this point in the history
  • Loading branch information
lostmygithubaccount committed Jun 9, 2023
1 parent 05cc51e commit ef4a53d
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 3 deletions.
44 changes: 44 additions & 0 deletions ibis/backends/datafusion/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,50 @@ def read_parquet(
self._context.register_parquet(table_name, path, **kwargs)
return self.table(table_name)

def read_delta(
self, source_table: str | Path, table_name: str | None = None, **kwargs: Any
) -> ir.Table:
"""Register a Delta Lake table as a table in the current database.
Parameters
----------
source_table
The data source. Must be a directory
containing a Delta Lake table.
table_name
An optional name to use for the created table. This defaults to
a sequentially generated name.
**kwargs
Additional keyword arguments passed to deltalake.DeltaTable.
Returns
-------
ir.Table
The just-registered table
"""
source_table = normalize_filename(source_table)

table_name = table_name or gen_name("read_delta")

# Our other backends support overwriting views / tables when reregistering
# TODO: datafusion doesn't natively support Delta Lake tables at this point,
# so we need to add this later?
# self._context.deregister_table(table_name)
# self._context.register_delta(table_name, source_table, **kwargs)

try:
from deltalake import DeltaTable
except ImportError:
raise ImportError(
"The deltalake extra is required to use the "
"read_delta method. You can install it using pip:\n\n"
"pip install ibis-framework[deltalake]\n"
)

delta_table = DeltaTable(source_table, **kwargs)

return self.register(delta_table.to_pyarrow_dataset(), table_name=table_name)

def _get_frame(
self,
expr: ir.Expr,
Expand Down
3 changes: 1 addition & 2 deletions ibis/backends/duckdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,8 +489,7 @@ def read_delta(
table_name
An optional name to use for the created table. This defaults to
a sequentially generated name.
kwargs
Additional keyword arguments passed to DuckDB loading function.
**kwargs
Additional keyword arguments passed to deltalake.DeltaTable.
Returns
Expand Down
1 change: 0 additions & 1 deletion ibis/backends/tests/test_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,6 @@ def test_to_pyarrow_decimal(backend, dtype, pyarrow_dtype):
[
"bigquery",
"clickhouse",
"datafusion",
"impala",
"oracle",
"postgres",
Expand Down

0 comments on commit ef4a53d

Please sign in to comment.