chore(datasets): Remove deprecation warning from Table Dataset (#956)

* remove deprecation warning from table dataset * fix lint * remove release note * remove filepath, file_format and load_args * fix tests * adjust file permissions * fix lint * address PR comments * fix tests * restore load_args * fix lint * fix tests * add load_args check * address PR comments
kedro-org · Dec 17, 2024 · 83dc20f · 83dc20f
1 parent 9054d99
commit 83dc20f
Show file tree

Hide file tree

Showing 3 changed files with 13 additions and 65 deletions.
diff --git a/kedro-datasets/RELEASE.md b/kedro-datasets/RELEASE.md
@@ -25,6 +25,7 @@
 ## Breaking Changes
 
 - Demoted `video.VideoDataset` from core to experimental dataset.
+- Removed file handling capabilities from `ibis.TableDataset`. Use `ibis.FileDataset` to load and save files with an Ibis backend instead.
 
 ## Community contributions
 

diff --git a/kedro-datasets/kedro_datasets/ibis/table_dataset.py b/kedro-datasets/kedro_datasets/ibis/table_dataset.py
@@ -1,14 +1,12 @@
 """Provide data loading and saving functionality for Ibis's backends."""
 from __future__ import annotations
 
-import warnings
 from copy import deepcopy
 from typing import TYPE_CHECKING, Any, ClassVar
 
 import ibis.expr.types as ir
-from kedro.io import AbstractDataset, DatasetError
+from kedro.io import AbstractDataset
 
-from kedro_datasets import KedroDeprecationWarning
 from kedro_datasets._utils import ConnectionMixin
 
 if TYPE_CHECKING:
@@ -76,16 +74,14 @@ class TableDataset(ConnectionMixin, AbstractDataset[ir.Table, ir.Table]):
     def __init__(  # noqa: PLR0913
         self,
         *,
-        filepath: str | None = None,
-        file_format: str | None = None,
-        table_name: str | None = None,
+        table_name: str,
         database: str | None = None,
         connection: dict[str, Any] | None = None,
         load_args: dict[str, Any] | None = None,
         save_args: dict[str, Any] | None = None,
         metadata: dict[str, Any] | None = None,
     ) -> None:
-        """Creates a new ``TableDataset`` pointing to a table (or file).
+        """Creates a new ``TableDataset`` pointing to a table.
 
         ``TableDataset`` connects to the Ibis backend object constructed
         from the connection configuration. The `backend` key provided in
@@ -95,8 +91,7 @@ def __init__(  # noqa: PLR0913
         `ibis.duckdb.connect() <https://ibis-project.org/backends/duckdb\
         #ibis.duckdb.connect>`_).
 
-        If ``table_name`` is given (and ``filepath`` isn't), the dataset
-        establishes a connection to the relevant table for the execution
+        The dataset establishes a connection to the relevant table for the execution
         backend. Therefore, Ibis doesn't fetch data on load; all compute
         is deferred until materialization, when the expression is saved.
         In practice, this happens when another ``TableDataset`` instance
@@ -122,22 +117,7 @@ def __init__(  # noqa: PLR0913
             metadata: Any arbitrary metadata. This is ignored by Kedro,
                 but may be consumed by users or external plugins.
         """
-        if filepath is None and table_name is None:
-            raise DatasetError(
-                "Must provide at least one of `filepath` or `table_name`."
-            )
-
-        if filepath is not None or file_format is not None:
-            warnings.warn(
-                "Use 'FileDataset' to load and save files with an Ibis "
-                "backend; the functionality will be removed from 'Table"
-                "Dataset' in Kedro-Datasets 6.0.0",
-                KedroDeprecationWarning,
-                stacklevel=2,
-            )
-
-        self._filepath = filepath
-        self._file_format = file_format
+
         self._table_name = table_name
         self._database = database
         self._connection_config = connection or self.DEFAULT_CONNECTION_CONFIG
@@ -171,19 +151,9 @@ def connection(self) -> BaseBackend:
         return self._connection
 
     def load(self) -> ir.Table:
-        if self._filepath is not None:
-            if self._file_format is None:
-                raise NotImplementedError
-
-            reader = getattr(self.connection, f"read_{self._file_format}")
-            return reader(self._filepath, self._table_name, **self._load_args)
-        else:
-            return self.connection.table(self._table_name, **self._load_args)
+        return self.connection.table(self._table_name, **self._load_args)
 
     def save(self, data: ir.Table) -> None:
-        if self._table_name is None:
-            raise DatasetError("Must provide `table_name` for materialization.")
-
         writer = getattr(self.connection, f"create_{self._materialized}")
         writer(self._table_name, data, **self._save_args)
 
@@ -193,8 +163,6 @@ def _describe(self) -> dict[str, Any]:
         load_args.pop("database", None)
         save_args.pop("database", None)
         return {
-            "filepath": self._filepath,
-            "file_format": self._file_format,
             "table_name": self._table_name,
             "database": self._database,
             "backend": self._connection_config["backend"],

diff --git a/kedro-datasets/tests/ibis/test_table_dataset.py b/kedro-datasets/tests/ibis/test_table_dataset.py
@@ -1,7 +1,6 @@
 import duckdb
 import ibis
 import pytest
-from kedro.io import DatasetError
 from packaging.version import Version
 from pandas.testing import assert_frame_equal
 
@@ -48,19 +47,8 @@ def table_dataset(database_name, connection_config, load_args, save_args):
 
 
 @pytest.fixture
-def table_dataset_from_csv(filepath_csv, connection_config, load_args, save_args):
-    return TableDataset(
-        filepath=filepath_csv,
-        file_format="csv",
-        connection=connection_config,
-        load_args=load_args,
-        save_args=save_args,
-    )
-
-
-@pytest.fixture
-def dummy_table(table_dataset_from_csv):
-    return table_dataset_from_csv.load()
+def dummy_table():
+    return ibis.memtable({"col1": [1, 2], "col2": [4, 5], "col3": [5, 6]})
 
 
 @pytest.fixture
@@ -112,10 +100,11 @@ def test_exists(self, table_dataset, dummy_table):
         table_dataset.save(dummy_table)
         assert table_dataset.exists()
 
-    @pytest.mark.parametrize("load_args", [{"filename": True}], indirect=True)
-    def test_load_extra_params(self, table_dataset_from_csv, load_args):
+    @pytest.mark.parametrize("load_args", [{"database": "test"}], indirect=True)
+    def test_load_extra_params(self, table_dataset, load_args):
         """Test overriding the default load arguments."""
-        assert "filename" in table_dataset_from_csv.load()
+        for key, value in load_args.items():
+            assert table_dataset._load_args[key] == value
 
     @pytest.mark.parametrize("save_args", [{"materialized": "table"}], indirect=True)
     def test_save_extra_params(self, table_dataset, save_args, dummy_table, database):
@@ -172,16 +161,6 @@ def test_database(
             "test" in con.sql("SELECT * FROM duckdb_views").fetchnumpy()["schema_name"]
         )
 
-    def test_no_filepath_or_table_name(connection_config):
-        pattern = r"Must provide at least one of `filepath` or `table_name`\."
-        with pytest.raises(DatasetError, match=pattern):
-            TableDataset(connection=connection_config)
-
-    def test_save_no_table_name(self, table_dataset_from_csv, dummy_table):
-        pattern = r"Must provide `table_name` for materialization\."
-        with pytest.raises(DatasetError, match=pattern):
-            table_dataset_from_csv.save(dummy_table)
-
     @pytest.mark.parametrize(
         ("connection_config", "key"),
         [