Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ref/snapshot #11

Merged
merged 2 commits into from
Dec 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions analytics/benchmark_data_fetching.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
"from dotenv import load_dotenv\n",
"\n",
"from coastpy.io.engine import STACQueryEngine\n",
"from coastpy.io.utils import read_items_extent\n",
"from coastpy.stac.utils import read_snapshot\n",
"\n",
"load_dotenv(override=True)\n",
"\n",
Expand Down Expand Up @@ -85,7 +85,7 @@
" \"https://coclico.blob.core.windows.net/stac/v1/catalog.json\"\n",
")\n",
"gcts_collection = coclico_catalog.get_child(\"gcts\")\n",
"gcts_extents = read_items_extent(gcts_collection, columns=[\"geometry\", \"assets\"])\n",
"gcts_extents = read_snapshot(gcts_collection, columns=[\"geometry\", \"assets\"])\n",
"gcts_extents.head()"
]
},
Expand Down
10 changes: 5 additions & 5 deletions analytics/hypsometry.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@
"metadata": {},
"outputs": [],
"source": [
"from coastpy.io.utils import read_items_extent\n",
"from coastpy.stac.utils import read_snapshot\n",
"\n",
"# Read the CoCliCo STAC catalog\n",
"coclico_catalog = pystac.Catalog.from_file(\n",
Expand All @@ -107,11 +107,11 @@
"\n",
"# Read the spatial footprint of the transect partitions.\n",
"gcts_collection = coclico_catalog.get_child(\"gcts\")\n",
"gcts_extents = read_items_extent(gcts_collection, columns=[\"geometry\", \"assets\"])\n",
"gcts_extents = read_snapshot(gcts_collection, columns=[\"geometry\", \"assets\"])\n",
"\n",
"# Read the spatial footprint of the DeltaDTM tiles.\n",
"ddtm_collection = coclico_catalog.get_child(\"deltares-delta-dtm\")\n",
"ddtm_extents = read_items_extent(ddtm_collection, columns=[\"geometry\", \"assets\"])"
"ddtm_extents = read_snapshot(ddtm_collection, columns=[\"geometry\", \"assets\"])"
]
},
{
Expand Down Expand Up @@ -174,7 +174,7 @@
" return gpd.GeoDataFrame(df[\"transect_id\"], geometry=geoms, crs=4326)\n",
"\n",
" gcts_collection = coclico_catalog.get_child(\"gcts\")\n",
" gcts_extents = read_items_extent(gcts_collection, columns=[\"geometry\", \"assets\"])\n",
" gcts_extents = read_snapshot(gcts_collection, columns=[\"geometry\", \"assets\"])\n",
" gcts_hrefs = gcts_extents.href.to_list()\n",
"\n",
" # template GDF that matches what is retunred from map_extract_landward_side\n",
Expand Down Expand Up @@ -262,7 +262,7 @@
" df.to_parquet(f)\n",
"\n",
" ddtm_collection = coclico_catalog.get_child(\"deltares-delta-dtm\")\n",
" ddtm_extents = read_items_extent(ddtm_collection, columns=[\"geometry\", \"assets\"])\n",
" ddtm_extents = read_snapshot(ddtm_collection, columns=[\"geometry\", \"assets\"])\n",
"\n",
" tiles = make_mercantiles(MERCANTILES_LEVEL)\n",
"\n",
Expand Down
4 changes: 2 additions & 2 deletions src/coastpy/io/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import shapely
from shapely.wkb import loads as wkb_loads

from coastpy.io.utils import read_items_extent
from coastpy.stac.utils import read_snapshot


class BaseQueryEngine:
Expand Down Expand Up @@ -94,7 +94,7 @@ def __init__(
columns: list[str] | None = None,
) -> None:
super().__init__(storage_backend=storage_backend)
self.extents = read_items_extent(
self.extents = read_snapshot(
stac_collection, columns=["geometry", "assets", "proj:epsg", "href"]
)
self.proj_epsg = self.extents["proj:epsg"].unique().item()
Expand Down
45 changes: 0 additions & 45 deletions src/coastpy/io/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import copy
import json
import logging
import pathlib
Expand Down Expand Up @@ -258,50 +257,6 @@ def name_bounds(bounds, crs):
return filename


def read_items_extent(collection, columns=None, storage_options=None):
"""
Reads the extent of items from a STAC collection and returns a GeoDataFrame with specified columns.

Args:
collection: A STAC collection object that contains assets.
columns: List of columns to return. Default is ["geometry", "assets", "href"].
storage_options: Storage options to pass to fsspec. Default is None.

Returns:
GeoDataFrame containing the specified columns.
"""
if storage_options is None:
storage_options = {}

# Set default columns
if columns is None:
columns = ["geometry", "assets", "href"]

columns_ = copy.deepcopy(columns)

# Ensure 'assets' is always in the columns
if "assets" not in columns:
columns.append("assets")
logger.debug("'assets' column added to the list of columns")

# Open the parquet file and read the specified columns
href = collection.assets["geoparquet-stac-items"].href
with fsspec.open(href, mode="rb", **storage_options) as f:
extents = gpd.read_parquet(f, columns=[c for c in columns if c != "href"])

# If 'href' is requested, extract it from the 'assets' column
if "href" in columns:
extents["href"] = extents["assets"].apply(lambda x: x["data"]["href"])
logger.debug("'href' column extracted from 'assets'")

# Drop 'assets' if it was not originally requested
if "assets" not in columns_:
extents = extents.drop(columns=["assets"])
logger.debug("'assets' column dropped from the GeoDataFrame")

return extents


def write_log_entry(
container: str,
name: str,
Expand Down
50 changes: 50 additions & 0 deletions src/coastpy/stac/utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
import copy
import itertools
import logging
import operator

import fsspec
import geopandas as gpd
import pystac
import xarray as xr

logger = logging.getLogger(__name__)


def collate(items: xr.DataArray) -> list[pystac.Item]:
"""
Expand Down Expand Up @@ -67,3 +73,47 @@ def stackstac_to_dataset(stack: xr.DataArray) -> xr.Dataset:

ds = ds.drop_dims("band")
return ds


def read_snapshot(collection, columns=None, storage_options=None):
"""
Reads the extent of items from a STAC collection and returns a GeoDataFrame with specified columns.

Args:
collection: A STAC collection object that contains assets.
columns: List of columns to return. Default is ["geometry", "assets", "href"].
storage_options: Storage options to pass to fsspec. Default is None.

Returns:
GeoDataFrame containing the specified columns.
"""
if storage_options is None:
storage_options = {"account_name": "coclico"}

# Set default columns
if columns is None:
columns = ["geometry", "assets", "href"]

columns_ = copy.deepcopy(columns)

# Ensure 'assets' is always in the columns
if "assets" not in columns:
columns.append("assets")
logger.debug("'assets' column added to the list of columns")

# Open the parquet file and read the specified columns
href = collection.assets["geoparquet-stac-items"].href
with fsspec.open(href, mode="rb", **storage_options) as f:
extents = gpd.read_parquet(f, columns=[c for c in columns if c != "href"])

# If 'href' is requested, extract it from the 'assets' column
if "href" in columns:
extents["href"] = extents["assets"].apply(lambda x: x["data"]["href"])
logger.debug("'href' column extracted from 'assets'")

# Drop 'assets' if it was not originally requested
if "assets" not in columns_:
extents = extents.drop(columns=["assets"])
logger.debug("'assets' column dropped from the GeoDataFrame")

return extents
12 changes: 6 additions & 6 deletions tutorials/deltadtm.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,7 @@
"import rioxarray\n",
"import shapely\n",
"import xarray as xr\n",
"from ipyleaflet import Map, basemaps\n",
"\n",
"storage_options = {\"account_name\": \"coclico\"}"
"from ipyleaflet import Map, basemaps"
]
},
{
Expand All @@ -45,15 +43,17 @@
"metadata": {},
"outputs": [],
"source": [
"from coastpy.io.utils import read_items_extent\n",
"from coastpy.stac.utils import read_snapshot\n",
"\n",
"coclico_catalog = pystac.Catalog.from_file(\n",
" \"https://coclico.blob.core.windows.net/stac/v1/catalog.json\"\n",
")\n",
"\n",
"ddtm_collection = coclico_catalog.get_child(\"deltares-delta-dtm\")\n",
"ddtm_extents = read_items_extent(\n",
" ddtm_collection, columns=[\"geometry\", \"assets\", \"href\"]\n",
"ddtm_extents = read_snapshot(\n",
" ddtm_collection,\n",
" columns=[\"geometry\", \"assets\", \"href\"],\n",
" storage_options={\"account_name\": \"coclico\"},\n",
")\n",
"\n",
"ddtm_extents.head()"
Expand Down
4 changes: 2 additions & 2 deletions tutorials/global_coastal_transect_system.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,9 @@
"metadata": {},
"outputs": [],
"source": [
"from coastpy.io.utils import read_items_extent\n",
"from coastpy.stac.utils import read_snapshot\n",
"\n",
"gcts_extents = read_items_extent(\n",
"gcts_extents = read_snapshot(\n",
" gcts_collection,\n",
" columns=[\"geometry\", \"assets\", \"href\"],\n",
" storage_options=storage_options,\n",
Expand Down
Loading