diff --git a/pypgstac/pypgstac/load.py b/pypgstac/pypgstac/load.py index 4e87a9cf..aea85371 100644 --- a/pypgstac/pypgstac/load.py +++ b/pypgstac/pypgstac/load.py @@ -511,8 +511,8 @@ def format_item(self, _item: Union[Path, str, dict]) -> dict: base_item, key, partition_trunc = self.collection_json(item["collection"]) - out["id"] = item.pop("id") - out["collection"] = item.pop("collection") + out["id"] = item.get("id") + out["collection"] = item.get("collection") properties: dict = item.get("properties", {}) dt = properties.get("datetime") @@ -544,9 +544,8 @@ def format_item(self, _item: Union[Path, str, dict]) -> dict: out["partition"] = partition - bbox = item.pop("bbox") - geojson = item.pop("geometry") - if geojson is None and bbox is not None: + geojson = item.get("geometry") + if geojson is None: geometry = None else: geometry = str(Geometry.from_geojson(geojson).wkb) @@ -554,6 +553,12 @@ def format_item(self, _item: Union[Path, str, dict]) -> dict: content = dehydrate(base_item, item) + # Remove keys from the dehydrated item content which are stored directly + # on the table row. + content.pop("id", None) + content.pop("collection", None) + content.pop("geometry", None) + out["content"] = orjson.dumps(content).decode() return out diff --git a/pypgstac/tests/test_load.py b/pypgstac/tests/test_load.py index 29692321..4e2bce2a 100644 --- a/pypgstac/tests/test_load.py +++ b/pypgstac/tests/test_load.py @@ -1,6 +1,7 @@ """Tests for pypgstac.""" +import json from pathlib import Path -from pypgstac.load import Methods, Loader +from pypgstac.load import Methods, Loader, read_json from psycopg.errors import UniqueViolation import pytest @@ -239,3 +240,30 @@ def test_load_items_dehydrated_ignore_succeeds(loader: Loader) -> None: loader.load_items( str(TEST_DEHYDRATED_ITEMS), insert_mode=Methods.ignore, dehydrated=True ) + + +def test_format_items_keys(loader: Loader) -> None: + """Test pypgstac items ignore loader.""" + loader.load_collections( + str(TEST_COLLECTIONS_JSON), + insert_mode=Methods.ignore, + ) + + items_iter = read_json(str(TEST_ITEMS)) + item_json = next(iter(items_iter)) + out = loader.format_item(item_json) + + # Top level keys expected after format + assert "id" in out + assert "collection" in out + assert "geometry" in out + assert "content" in out + + # Special keys expected not to be in the item content + content_json = json.loads(out["content"]) + assert "id" not in content_json + assert "collection" not in content_json + assert "geometry" not in content_json + + # Ensure bbox is included in content + assert "bbox" in content_json