Skip to content

Commit

Permalink
add collection mapping methods (#110)
Browse files Browse the repository at this point in the history
* add collection mapping methods
  • Loading branch information
Phil Varner authored Apr 19, 2024
1 parent b64c612 commit 8d6dc7d
Show file tree
Hide file tree
Showing 5 changed files with 111 additions and 18 deletions.
15 changes: 12 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.

## [unreleased]

## Deprecated

- CLI flags `--skip-upload` and `--skip-validation` deprecated in favor of `--upload/--no-upload` and `--validate/no-validate`
- Task constructor arguments `skip_upload` and `skip_validation` deprecated in favor of `upload` and `validate`

## Fixed

- Several CLI arguments were missing `help` descriptions
Expand All @@ -20,11 +25,15 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
metadata. Users wishing to retain this field can call the method `Task.add_software_version_to_item(item)` on the resulting item to add it.
- Task logging now identifies the task instance that is logging, e.g.,
`INFO:my-task-name:[my-collection/workflow-my-workflow/task-1] Task did a thing.`
- Collection assignment now assigns the first matching collection expression, rather
than the last.

## Deprecated
## Added

- CLI flags `--skip-upload` and `--skip-validation` deprecated in favor of `--upload/--no-upload` and `--validate/no-validate`
- Task constructor arguments `skip_upload` and `skip_validation` deprecated in favor of `upload` and `validate`
- Added property `collection_mapping` to `Task` class to retrieve the collection mappings
from upload_options
- Added utils method `find_collection` to allow the retrieval of the collection name for
an Item dict

## Added

Expand Down
23 changes: 14 additions & 9 deletions stactask/task.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import argparse
import asyncio
import itertools
import json
import logging
import os
Expand All @@ -25,7 +24,7 @@
)
from .exceptions import FailedValidation
from .logging import TaskLoggerAdapter
from .utils import stac_jsonpath_match
from .utils import find_collection as utils_find_collection

# types
PathLike = Union[str, Path]
Expand Down Expand Up @@ -151,6 +150,14 @@ def upload_options(self) -> Dict[str, Any]:
else:
raise ValueError(f"upload_options is not a dict: {type(upload_options)}")

@property
def collection_mapping(self) -> Dict[str, str]:
collection_mapping = self.upload_options.get("collections", {})
if isinstance(collection_mapping, dict):
return collection_mapping
else:
raise ValueError(f"collections is not a dict: {type(collection_mapping)}")

@property
def items_as_dicts(self) -> List[Dict[str, Any]]:
features = self._payload.get("features", [])
Expand Down Expand Up @@ -223,13 +230,11 @@ def cleanup_workdir(self) -> None:
)

def assign_collections(self) -> None:
"""Assigns new collection names based on"""
for i, (coll, expr) in itertools.product(
self._payload["features"],
self.upload_options.get("collections", dict()).items(),
):
if stac_jsonpath_match(i, expr):
i["collection"] = coll
"""Assigns new collection names based on upload_options collections attribute
according to the first matching expression in the order they are defined."""
for item in self._payload["features"]:
if coll := utils_find_collection(self.collection_mapping, item):
item["collection"] = coll

def download_item_assets(
self,
Expand Down
33 changes: 27 additions & 6 deletions stactask/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Dict
from typing import Any, Dict, Optional

from jsonpath_ng.ext import parser

Expand All @@ -18,8 +18,29 @@ def stac_jsonpath_match(item: Dict[str, Any], expr: str) -> bool:
Returns:
Boolean: Returns True if the jsonpath expression matches the STAC Item JSON
"""
result = [x.value for x in parser.parse(expr).find([item])]
if len(result) == 1:
return True
else:
return False
return len([x.value for x in parser.parse(expr).find([item])]) == 1


def find_collection(
collection_mapping: Dict[str, str], item: Dict[str, Any]
) -> Optional[str]:
"""Find the collection for a given STAC Item represented as a dictionary from a
dictionary of collection names to JSONPath expressions.
Args:
collection_mapping (Dict): A dictionary of collection names to JSONPath
expressions.
item (Dict): A STAC Item
Returns:
Optional[str]: Returns None if no JSONPath expression matches, returns a
collection name if one does
"""
return next(
(
c
for c, expr in collection_mapping.items()
if stac_jsonpath_match(item, expr)
),
None,
)
6 changes: 6 additions & 0 deletions tests/test_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,5 +177,11 @@ def test_parse_args_upload_and_validation() -> None:
assert args["validate"] is True


def test_collection_mapping(nothing_task: Task) -> None:
assert nothing_task.collection_mapping == {
"sentinel-2-l2a": "$[?(@.id =~ 'S2[AB].*')]"
}


if __name__ == "__main__":
output = NothingTask.cli()
52 changes: 52 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from stactask.utils import find_collection, stac_jsonpath_match


def test_stac_jsonpath_match() -> None:
assert stac_jsonpath_match({"id": "1"}, "$[?(@.id =~ '.*')]")
assert stac_jsonpath_match({"id": "1"}, "$[?(@.id == '1')]")
assert not stac_jsonpath_match(
{"properties": {"s2:processing_baseline": "04.00"}},
"$[?(@.properties.['s2:processing_baseline'] >= '05.00')]",
)
assert stac_jsonpath_match(
{"properties": {"s2:processing_baseline": "05.00"}},
"$[?(@.properties.['s2:processing_baseline'] >= '05.00')]",
)
assert stac_jsonpath_match(
{"properties": {"s2:processing_baseline": "04.00"}},
"$[?(@.properties.['s2:processing_baseline'] =~ '^04')]",
)
assert not stac_jsonpath_match(
{"properties": {"s2:processing_baseline": "05.00"}},
"$[?(@.properties.['s2:processing_baseline'] =~ '^04')]",
)


def test_find_collection() -> None:
assert find_collection({"a": "$[?(@.id =~ '.*')]"}, {"id": "1"}) == "a"
assert (
find_collection(
{"a": "$[?(@.id == '1')]", "b": "$[?(@.id == '2')]"}, {"id": "2"}
)
== "b"
)
assert (
find_collection(
{
"sentinel-2-c1-l2a": "$[?(@.properties.['s2:processing_baseline'] >= '05.00')]", # noqa: E501
"sentinel-2-l2a-baseline-04": "$[?(@.properties.['s2:processing_baseline'] =~ '^04')]", # noqa: E501
},
{"properties": {"s2:processing_baseline": "04.00"}},
)
== "sentinel-2-l2a-baseline-04"
)
assert (
find_collection(
{
"sentinel-2-c1-l2a": "$[?(@.properties.['s2:processing_baseline'] >= '05.00')]", # noqa: E501
"sentinel-2-l2a-baseline-04": "$[?(@.properties.['s2:processing_baseline'] =~ '^04')]", # noqa: E501
},
{"properties": {"s2:processing_baseline": "05.00"}},
)
== "sentinel-2-c1-l2a"
)

0 comments on commit 8d6dc7d

Please sign in to comment.