diff --git a/analytics/benchmark_data_fetching.ipynb b/analytics/benchmark_data_fetching.ipynb index 94d2652..89bdd66 100644 --- a/analytics/benchmark_data_fetching.ipynb +++ b/analytics/benchmark_data_fetching.ipynb @@ -46,7 +46,7 @@ "from dotenv import load_dotenv\n", "\n", "from coastpy.io.engine import STACQueryEngine\n", - "from coastpy.io.utils import read_items_extent\n", + "from coastpy.stac.utils import read_snapshot\n", "\n", "load_dotenv(override=True)\n", "\n", @@ -85,7 +85,7 @@ " \"https://coclico.blob.core.windows.net/stac/v1/catalog.json\"\n", ")\n", "gcts_collection = coclico_catalog.get_child(\"gcts\")\n", - "gcts_extents = read_items_extent(gcts_collection, columns=[\"geometry\", \"assets\"])\n", + "gcts_extents = read_snapshot(gcts_collection, columns=[\"geometry\", \"assets\"])\n", "gcts_extents.head()" ] }, diff --git a/analytics/hypsometry.ipynb b/analytics/hypsometry.ipynb index 8a0da3f..eb669ba 100644 --- a/analytics/hypsometry.ipynb +++ b/analytics/hypsometry.ipynb @@ -98,7 +98,7 @@ "metadata": {}, "outputs": [], "source": [ - "from coastpy.io.utils import read_items_extent\n", + "from coastpy.stac.utils import read_snapshot\n", "\n", "# Read the CoCliCo STAC catalog\n", "coclico_catalog = pystac.Catalog.from_file(\n", @@ -107,11 +107,11 @@ "\n", "# Read the spatial footprint of the transect partitions.\n", "gcts_collection = coclico_catalog.get_child(\"gcts\")\n", - "gcts_extents = read_items_extent(gcts_collection, columns=[\"geometry\", \"assets\"])\n", + "gcts_extents = read_snapshot(gcts_collection, columns=[\"geometry\", \"assets\"])\n", "\n", "# Read the spatial footprint of the DeltaDTM tiles.\n", "ddtm_collection = coclico_catalog.get_child(\"deltares-delta-dtm\")\n", - "ddtm_extents = read_items_extent(ddtm_collection, columns=[\"geometry\", \"assets\"])" + "ddtm_extents = read_snapshot(ddtm_collection, columns=[\"geometry\", \"assets\"])" ] }, { @@ -174,7 +174,7 @@ " return gpd.GeoDataFrame(df[\"transect_id\"], geometry=geoms, crs=4326)\n", "\n", " gcts_collection = coclico_catalog.get_child(\"gcts\")\n", - " gcts_extents = read_items_extent(gcts_collection, columns=[\"geometry\", \"assets\"])\n", + " gcts_extents = read_snapshot(gcts_collection, columns=[\"geometry\", \"assets\"])\n", " gcts_hrefs = gcts_extents.href.to_list()\n", "\n", " # template GDF that matches what is retunred from map_extract_landward_side\n", @@ -262,7 +262,7 @@ " df.to_parquet(f)\n", "\n", " ddtm_collection = coclico_catalog.get_child(\"deltares-delta-dtm\")\n", - " ddtm_extents = read_items_extent(ddtm_collection, columns=[\"geometry\", \"assets\"])\n", + " ddtm_extents = read_snapshot(ddtm_collection, columns=[\"geometry\", \"assets\"])\n", "\n", " tiles = make_mercantiles(MERCANTILES_LEVEL)\n", "\n", diff --git a/src/coastpy/io/engine.py b/src/coastpy/io/engine.py index 4d6b585..36757b4 100644 --- a/src/coastpy/io/engine.py +++ b/src/coastpy/io/engine.py @@ -7,7 +7,7 @@ import shapely from shapely.wkb import loads as wkb_loads -from coastpy.io.utils import read_items_extent +from coastpy.stac.utils import read_snapshot class BaseQueryEngine: @@ -94,7 +94,7 @@ def __init__( columns: list[str] | None = None, ) -> None: super().__init__(storage_backend=storage_backend) - self.extents = read_items_extent( + self.extents = read_snapshot( stac_collection, columns=["geometry", "assets", "proj:epsg", "href"] ) self.proj_epsg = self.extents["proj:epsg"].unique().item() diff --git a/src/coastpy/io/utils.py b/src/coastpy/io/utils.py index c154b33..8f86172 100644 --- a/src/coastpy/io/utils.py +++ b/src/coastpy/io/utils.py @@ -1,4 +1,3 @@ -import copy import json import logging import pathlib @@ -258,50 +257,6 @@ def name_bounds(bounds, crs): return filename -def read_items_extent(collection, columns=None, storage_options=None): - """ - Reads the extent of items from a STAC collection and returns a GeoDataFrame with specified columns. - - Args: - collection: A STAC collection object that contains assets. - columns: List of columns to return. Default is ["geometry", "assets", "href"]. - storage_options: Storage options to pass to fsspec. Default is None. - - Returns: - GeoDataFrame containing the specified columns. - """ - if storage_options is None: - storage_options = {} - - # Set default columns - if columns is None: - columns = ["geometry", "assets", "href"] - - columns_ = copy.deepcopy(columns) - - # Ensure 'assets' is always in the columns - if "assets" not in columns: - columns.append("assets") - logger.debug("'assets' column added to the list of columns") - - # Open the parquet file and read the specified columns - href = collection.assets["geoparquet-stac-items"].href - with fsspec.open(href, mode="rb", **storage_options) as f: - extents = gpd.read_parquet(f, columns=[c for c in columns if c != "href"]) - - # If 'href' is requested, extract it from the 'assets' column - if "href" in columns: - extents["href"] = extents["assets"].apply(lambda x: x["data"]["href"]) - logger.debug("'href' column extracted from 'assets'") - - # Drop 'assets' if it was not originally requested - if "assets" not in columns_: - extents = extents.drop(columns=["assets"]) - logger.debug("'assets' column dropped from the GeoDataFrame") - - return extents - - def write_log_entry( container: str, name: str, diff --git a/src/coastpy/stac/utils.py b/src/coastpy/stac/utils.py index 984237e..000a613 100644 --- a/src/coastpy/stac/utils.py +++ b/src/coastpy/stac/utils.py @@ -1,9 +1,15 @@ +import copy import itertools +import logging import operator +import fsspec +import geopandas as gpd import pystac import xarray as xr +logger = logging.getLogger(__name__) + def collate(items: xr.DataArray) -> list[pystac.Item]: """ @@ -67,3 +73,47 @@ def stackstac_to_dataset(stack: xr.DataArray) -> xr.Dataset: ds = ds.drop_dims("band") return ds + + +def read_snapshot(collection, columns=None, storage_options=None): + """ + Reads the extent of items from a STAC collection and returns a GeoDataFrame with specified columns. + + Args: + collection: A STAC collection object that contains assets. + columns: List of columns to return. Default is ["geometry", "assets", "href"]. + storage_options: Storage options to pass to fsspec. Default is None. + + Returns: + GeoDataFrame containing the specified columns. + """ + if storage_options is None: + storage_options = {"account_name": "coclico"} + + # Set default columns + if columns is None: + columns = ["geometry", "assets", "href"] + + columns_ = copy.deepcopy(columns) + + # Ensure 'assets' is always in the columns + if "assets" not in columns: + columns.append("assets") + logger.debug("'assets' column added to the list of columns") + + # Open the parquet file and read the specified columns + href = collection.assets["geoparquet-stac-items"].href + with fsspec.open(href, mode="rb", **storage_options) as f: + extents = gpd.read_parquet(f, columns=[c for c in columns if c != "href"]) + + # If 'href' is requested, extract it from the 'assets' column + if "href" in columns: + extents["href"] = extents["assets"].apply(lambda x: x["data"]["href"]) + logger.debug("'href' column extracted from 'assets'") + + # Drop 'assets' if it was not originally requested + if "assets" not in columns_: + extents = extents.drop(columns=["assets"]) + logger.debug("'assets' column dropped from the GeoDataFrame") + + return extents diff --git a/tutorials/deltadtm.ipynb b/tutorials/deltadtm.ipynb index ee6f7e8..2d81aba 100644 --- a/tutorials/deltadtm.ipynb +++ b/tutorials/deltadtm.ipynb @@ -23,9 +23,7 @@ "import rioxarray\n", "import shapely\n", "import xarray as xr\n", - "from ipyleaflet import Map, basemaps\n", - "\n", - "storage_options = {\"account_name\": \"coclico\"}" + "from ipyleaflet import Map, basemaps" ] }, { @@ -45,15 +43,17 @@ "metadata": {}, "outputs": [], "source": [ - "from coastpy.io.utils import read_items_extent\n", + "from coastpy.stac.utils import read_snapshot\n", "\n", "coclico_catalog = pystac.Catalog.from_file(\n", " \"https://coclico.blob.core.windows.net/stac/v1/catalog.json\"\n", ")\n", "\n", "ddtm_collection = coclico_catalog.get_child(\"deltares-delta-dtm\")\n", - "ddtm_extents = read_items_extent(\n", - " ddtm_collection, columns=[\"geometry\", \"assets\", \"href\"]\n", + "ddtm_extents = read_snapshot(\n", + " ddtm_collection,\n", + " columns=[\"geometry\", \"assets\", \"href\"],\n", + " storage_options={\"account_name\": \"coclico\"},\n", ")\n", "\n", "ddtm_extents.head()" diff --git a/tutorials/global_coastal_transect_system.ipynb b/tutorials/global_coastal_transect_system.ipynb index cde8b87..b1bdabd 100644 --- a/tutorials/global_coastal_transect_system.ipynb +++ b/tutorials/global_coastal_transect_system.ipynb @@ -81,9 +81,9 @@ "metadata": {}, "outputs": [], "source": [ - "from coastpy.io.utils import read_items_extent\n", + "from coastpy.stac.utils import read_snapshot\n", "\n", - "gcts_extents = read_items_extent(\n", + "gcts_extents = read_snapshot(\n", " gcts_collection,\n", " columns=[\"geometry\", \"assets\", \"href\"],\n", " storage_options=storage_options,\n",