From 18b738be8ab4eee1b355d1df9a023ef9e2a0e2bc Mon Sep 17 00:00:00 2001 From: Matt McFarland Date: Fri, 13 May 2022 15:24:47 -0400 Subject: [PATCH 1/2] Ensure special keys not in content when loaded The loader pop'd keys like collection off the item, then dehydrated the item to be used as the 'content'. With collection removed prior to dehydration, it was flagged with the "do-not-merge" marker because the key is on the base_item. Instead, ensure that id, collection, and geometry are not in content as they are stored on the table row and shouldn't participate in hydration. Additionally, bbox was previously a derived value at search runtime but was recently changed to a returned value if it existed on the item. However, during loading, the bbox was dropped so would never exist on the persisted item. --- pypgstac/pypgstac/load.py | 14 ++++++++++---- pypgstac/tests/test_load.py | 30 +++++++++++++++++++++++++++++- 2 files changed, 39 insertions(+), 5 deletions(-) diff --git a/pypgstac/pypgstac/load.py b/pypgstac/pypgstac/load.py index 4e87a9cf..791d1628 100644 --- a/pypgstac/pypgstac/load.py +++ b/pypgstac/pypgstac/load.py @@ -511,8 +511,8 @@ def format_item(self, _item: Union[Path, str, dict]) -> dict: base_item, key, partition_trunc = self.collection_json(item["collection"]) - out["id"] = item.pop("id") - out["collection"] = item.pop("collection") + out["id"] = item.get("id") + out["collection"] = item.get("collection") properties: dict = item.get("properties", {}) dt = properties.get("datetime") @@ -544,8 +544,8 @@ def format_item(self, _item: Union[Path, str, dict]) -> dict: out["partition"] = partition - bbox = item.pop("bbox") - geojson = item.pop("geometry") + bbox = item.get("bbox") + geojson = item.get("geometry") if geojson is None and bbox is not None: geometry = None else: @@ -554,6 +554,12 @@ def format_item(self, _item: Union[Path, str, dict]) -> dict: content = dehydrate(base_item, item) + # Remove keys from the dehydrated item content which are stored directly + # on the table row. + content.pop("id", None) + content.pop("collection", None) + content.pop("geometry", None) + out["content"] = orjson.dumps(content).decode() return out diff --git a/pypgstac/tests/test_load.py b/pypgstac/tests/test_load.py index 29692321..4e2bce2a 100644 --- a/pypgstac/tests/test_load.py +++ b/pypgstac/tests/test_load.py @@ -1,6 +1,7 @@ """Tests for pypgstac.""" +import json from pathlib import Path -from pypgstac.load import Methods, Loader +from pypgstac.load import Methods, Loader, read_json from psycopg.errors import UniqueViolation import pytest @@ -239,3 +240,30 @@ def test_load_items_dehydrated_ignore_succeeds(loader: Loader) -> None: loader.load_items( str(TEST_DEHYDRATED_ITEMS), insert_mode=Methods.ignore, dehydrated=True ) + + +def test_format_items_keys(loader: Loader) -> None: + """Test pypgstac items ignore loader.""" + loader.load_collections( + str(TEST_COLLECTIONS_JSON), + insert_mode=Methods.ignore, + ) + + items_iter = read_json(str(TEST_ITEMS)) + item_json = next(iter(items_iter)) + out = loader.format_item(item_json) + + # Top level keys expected after format + assert "id" in out + assert "collection" in out + assert "geometry" in out + assert "content" in out + + # Special keys expected not to be in the item content + content_json = json.loads(out["content"]) + assert "id" not in content_json + assert "collection" not in content_json + assert "geometry" not in content_json + + # Ensure bbox is included in content + assert "bbox" in content_json From 98968ab0b1543c83a09462c255b61bdb7d905a52 Mon Sep 17 00:00:00 2001 From: Matt McFarland Date: Fri, 13 May 2022 16:58:15 -0400 Subject: [PATCH 2/2] Simplify geometry check --- pypgstac/pypgstac/load.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pypgstac/pypgstac/load.py b/pypgstac/pypgstac/load.py index 791d1628..aea85371 100644 --- a/pypgstac/pypgstac/load.py +++ b/pypgstac/pypgstac/load.py @@ -544,9 +544,8 @@ def format_item(self, _item: Union[Path, str, dict]) -> dict: out["partition"] = partition - bbox = item.get("bbox") geojson = item.get("geometry") - if geojson is None and bbox is not None: + if geojson is None: geometry = None else: geometry = str(Geometry.from_geojson(geojson).wkb)