merge main and test ci

kedro-org · Dec 17, 2024 · 3161627 · 3161627
2 parents 7ba66a4 + 2df011c
commit 3161627
Show file tree

Hide file tree

Showing 12 changed files with 5,823 additions and 7 deletions.
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -8,6 +8,7 @@
 
 - [ ] Opened this PR as a 'Draft Pull Request' if it is work-in-progress
 - [ ] Updated the documentation to reflect the code changes
+- [ ] Updated `jsonschema/kedro-catalog-X.XX.json` if necessary
 - [ ] Added a description of this change in the relevant `RELEASE.md` file
 - [ ] Added tests to cover my changes
 - [ ] Received approvals from at least half of the TSC (required for adding a new, non-experimental dataset)
diff --git a/.github/workflows/kedro-datasets.yml b/.github/workflows/kedro-datasets.yml
@@ -22,7 +22,7 @@ jobs:
     strategy:
       matrix:
         os: [ ubuntu-latest, windows-latest ]
-        python-version: [ "3.10", "3.11", "3.12", "3.13" ]
+        python-version: [ "3.13" ]
     uses: ./.github/workflows/unit-tests.yml
     with:
       plugin: kedro-datasets

diff --git a/kedro-datasets/CONTRIBUTING.md b/kedro-datasets/CONTRIBUTING.md
@@ -27,7 +27,7 @@ If you have new ideas for Kedro-Datasets then please open a [GitHub issue](https
 
 If you're unsure where to begin contributing to Kedro-Datasets, please start by looking through the `good first issue` and `help wanted` on [GitHub](https://github.com/kedro-org/kedro-plugins/issues).
 If you want to contribute a new dataset, read the [tutorial to create and contribute a custom dataset](https://docs.kedro.org/en/stable/data/how_to_create_a_custom_dataset.html) in the Kedro documentation.
-Make sure to add the new dataset to `kedro_datasets.rst` so that it shows up in the API documentation and to `static/jsonschema/kedro-catalog-X.json` for IDE validation.
+Make sure to add the new dataset to `kedro_datasets.rst` so that it shows up in the API documentation and to `kedro-datasets/static/jsonschema/kedro-catalog-X.json` for IDE validation.
 
 Below is a guide to help you understand the process of contributing a new dataset, whether it falls under the category of core or experimental datasets.
 

diff --git a/kedro-datasets/RELEASE.md b/kedro-datasets/RELEASE.md
@@ -3,6 +3,7 @@
 ## Major features and improvements
 
 - Added support for Python 3.13
+- Supported passing `database` to `ibis.TableDataset` for load and save operations.
 - Added functionality to save pandas DataFrames directly to Snowflake, facilitating seamless `.csv` ingestion.
 - Added Python 3.9, 3.10 and 3.11 support for `snowflake.SnowflakeTableDataset`.
 - Enabled connection sharing between `ibis.FileDataset` and `ibis.TableDataset` instances, thereby allowing nodes to save data loaded by one to the other (as long as they share the same connection configuration).
@@ -18,6 +19,7 @@
 - Implemented Snowflake's [local testing framework](https://docs.snowflake.com/en/developer-guide/snowpark/python/testing-locally) for testing purposes.
 - Improved the dependency management for Spark-based datasets by refactoring the Spark and Databricks utility functions used across the datasets.
 - Added deprecation warning for `tracking.MetricsDataset` and `tracking.JSONDataset`.
+- Moved `kedro-catalog` JSON schemas from Kedro core to `kedro-datasets`.
 
 ## Breaking Changes
 
@@ -29,6 +31,8 @@ Many thanks to the following Kedroids for contributing PRs to this release:
 
 - [Thomas d'Hooghe](https://github.com/tdhooghe)
 - [Minura Punchihewa](https://github.com/MinuraPunchihewa)
+- [Mark Druffel](https://github.com/mark-druffel)
+- [Chris Schopp](https://github.com/chrisschopp)
 
 # Release 5.1.0
 

diff --git a/kedro-datasets/kedro_datasets/ibis/table_dataset.py b/kedro-datasets/kedro_datasets/ibis/table_dataset.py
@@ -79,6 +79,7 @@ def __init__(  # noqa: PLR0913
         filepath: str | None = None,
         file_format: str | None = None,
         table_name: str | None = None,
+        database: str | None = None,
         connection: dict[str, Any] | None = None,
         load_args: dict[str, Any] | None = None,
         save_args: dict[str, Any] | None = None,
@@ -103,6 +104,12 @@ def __init__(  # noqa: PLR0913
 
         Args:
             table_name: The name of the table or view to read or create.
+            database: The name of the database to read the table or view
+                from or create the table or view in. If not passed, then
+                the current database is used. Provide a tuple of strings
+                (e.g. `("catalog", "database")`) or a dotted string path
+                (e.g. `"catalog.database"`) to reference a table or view
+                in a multi-level table hierarchy.
             connection: Configuration for connecting to an Ibis backend.
                 If not provided, connect to DuckDB in in-memory mode.
             load_args: Additional arguments passed to the Ibis backend's
@@ -132,17 +139,22 @@ def __init__(  # noqa: PLR0913
         self._filepath = filepath
         self._file_format = file_format
         self._table_name = table_name
+        self._database = database
         self._connection_config = connection or self.DEFAULT_CONNECTION_CONFIG
         self.metadata = metadata
 
         # Set load and save arguments, overwriting defaults if provided.
         self._load_args = deepcopy(self.DEFAULT_LOAD_ARGS)
         if load_args is not None:
             self._load_args.update(load_args)
+        if database is not None:
+            self._load_args["database"] = database
 
         self._save_args = deepcopy(self.DEFAULT_SAVE_ARGS)
         if save_args is not None:
             self._save_args.update(save_args)
+        if database is not None:
+            self._save_args["database"] = database
 
         self._materialized = self._save_args.pop("materialized")
 
@@ -166,7 +178,7 @@ def load(self) -> ir.Table:
             reader = getattr(self.connection, f"read_{self._file_format}")
             return reader(self._filepath, self._table_name, **self._load_args)
         else:
-            return self.connection.table(self._table_name)
+            return self.connection.table(self._table_name, **self._load_args)
 
     def save(self, data: ir.Table) -> None:
         if self._table_name is None:
@@ -176,13 +188,18 @@ def save(self, data: ir.Table) -> None:
         writer(self._table_name, data, **self._save_args)
 
     def _describe(self) -> dict[str, Any]:
+        load_args = deepcopy(self._load_args)
+        save_args = deepcopy(self._save_args)
+        load_args.pop("database", None)
+        save_args.pop("database", None)
         return {
             "filepath": self._filepath,
             "file_format": self._file_format,
             "table_name": self._table_name,
+            "database": self._database,
             "backend": self._connection_config["backend"],
-            "load_args": self._load_args,
-            "save_args": self._save_args,
+            "load_args": load_args,
+            "save_args": save_args,
             "materialized": self._materialized,
         }
 

diff --git a/kedro-datasets/kedro_datasets/pandas/sql_dataset.py b/kedro-datasets/kedro_datasets/pandas/sql_dataset.py
@@ -1,5 +1,4 @@
 """``SQLDataset`` to load and save data to a SQL backend."""
-
 from __future__ import annotations
 
 import copy