From 6422e576a701087de89647dca457b60b2f49d5bd Mon Sep 17 00:00:00 2001 From: Vincent Privat Date: Fri, 29 Nov 2024 17:05:23 +0100 Subject: [PATCH] Configure whether to open URLs when validating assets --- CHANGELOG.md | 1 + README.md | 2 ++ stac_validator/stac_validator.py | 8 ++++++++ stac_validator/utilities.py | 19 +++++++++++-------- stac_validator/validate.py | 5 ++++- tests/test_assets.py | 29 ++++++++++++++++++++++++++++- 6 files changed, 54 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f3b2104..77b4843 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ The format is (loosely) based on [Keep a Changelog](http://keepachangelog.com/) ### Added - Added publish.yml to automatically publish new releases to PyPI [#236](https://github.com/stac-utils/stac-validator/pull/236) +- Configure whether to open URLs when validating assets [#238](https://github.com/stac-utils/stac-validator/pull/238) ## [v3.4.0] - 2024-10-08 diff --git a/README.md b/README.md index fd0490e..48d3b0a 100644 --- a/README.md +++ b/README.md @@ -106,6 +106,8 @@ Options: --collections Validate /collections response. --item-collection Validate item collection response. Can be combined with --pages. Defaults to one page. + --no-assets-urls Disables the opening of href links when validating + assets (enabled by default). -p, --pages INTEGER Maximum number of pages to validate via --item- collection. Defaults to one page. -v, --verbose Enables verbose output for recursive mode. diff --git a/stac_validator/stac_validator.py b/stac_validator/stac_validator.py index 899fdd9..2e8becd 100644 --- a/stac_validator/stac_validator.py +++ b/stac_validator/stac_validator.py @@ -109,6 +109,11 @@ def collections_summary(message: List[Dict[str, Any]]) -> None: is_flag=True, help="Validate item collection response. Can be combined with --pages. Defaults to one page.", ) +@click.option( + "--no-assets-urls", + is_flag=True, + help="Disables the opening of href links when validating assets (enabled by default).", +) @click.option( "--pages", "-p", @@ -128,6 +133,7 @@ def main( stac_file: str, collections: bool, item_collection: bool, + no_assets_urls: bool, pages: int, recursive: bool, max_depth: int, @@ -147,6 +153,7 @@ def main( stac_file (str): Path to the STAC file to be validated. collections (bool): Validate response from /collections endpoint. item_collection (bool): Whether to validate item collection responses. + no_assets_urls (bool): Whether to open href links when validating assets (enabled by default). pages (int): Maximum number of pages to validate via `item_collection`. recursive (bool): Whether to recursively validate all related STAC objects. max_depth (int): Maximum depth to traverse when recursing. @@ -177,6 +184,7 @@ def main( core=core, links=links, assets=assets, + assets_open_urls=not no_assets_urls, extensions=extensions, custom=custom, verbose=verbose, diff --git a/stac_validator/utilities.py b/stac_validator/utilities.py index 5d6a905..1c70e51 100644 --- a/stac_validator/utilities.py +++ b/stac_validator/utilities.py @@ -152,6 +152,7 @@ def set_schema_addr(version: str, stac_type: str) -> str: def link_request( link: Dict, initial_message: Dict, + open_urls: bool = True, ) -> None: """Makes a request to a URL and appends it to the relevant field of the initial message. @@ -159,6 +160,7 @@ def link_request( link: A dictionary containing a "href" key which is a string representing a URL. initial_message: A dictionary containing lists for "request_valid", "request_invalid", "format_valid", and "format_invalid" URLs. + open_urls: Whether to open link href URL Returns: None @@ -166,14 +168,15 @@ def link_request( """ if is_url(link["href"]): try: - if "s3" in link["href"]: - context = ssl._create_unverified_context() - response = urlopen(link["href"], context=context) - else: - response = urlopen(link["href"]) - status_code = response.getcode() - if status_code == 200: - initial_message["request_valid"].append(link["href"]) + if open_urls: + if "s3" in link["href"]: + context = ssl._create_unverified_context() + response = urlopen(link["href"], context=context) + else: + response = urlopen(link["href"]) + status_code = response.getcode() + if status_code == 200: + initial_message["request_valid"].append(link["href"]) except Exception: initial_message["request_invalid"].append(link["href"]) initial_message["format_valid"].append(link["href"]) diff --git a/stac_validator/validate.py b/stac_validator/validate.py index c4a0c94..b48d3c0 100644 --- a/stac_validator/validate.py +++ b/stac_validator/validate.py @@ -33,6 +33,7 @@ class StacValidate: core (bool): Whether to only validate the core STAC object (without extensions). links (bool): Whether to additionally validate links (only works in default mode). assets (bool): Whether to additionally validate assets (only works in default mode). + assets_open_urls (bool): Whether to open assets URLs when validating assets. extensions (bool): Whether to only validate STAC object extensions. custom (str): The local filepath or remote URL of a custom JSON schema to validate the STAC object. verbose (bool): Whether to enable verbose output in recursive mode. @@ -54,6 +55,7 @@ def __init__( core: bool = False, links: bool = False, assets: bool = False, + assets_open_urls: bool = True, extensions: bool = False, custom: str = "", verbose: bool = False, @@ -67,6 +69,7 @@ def __init__( self.schema = custom self.links = links self.assets = assets + self.assets_open_urls = assets_open_urls self.recursive = recursive self.max_depth = max_depth self.extensions = extensions @@ -122,7 +125,7 @@ def assets_validator(self) -> Dict: assets = self.stac_content.get("assets") if assets: for asset in assets.values(): - link_request(asset, initial_message) + link_request(asset, initial_message, self.assets_open_urls) return initial_message def links_validator(self) -> Dict: diff --git a/tests/test_assets.py b/tests/test_assets.py index 8c33bb5..8ac2f82 100644 --- a/tests/test_assets.py +++ b/tests/test_assets.py @@ -1,5 +1,5 @@ """ -Description: Test --links option +Description: Test --assets option """ @@ -78,6 +78,33 @@ def test_assets_v100(): ] +def test_assets_v100_no_links(): + stac_file = "tests/test_data/v100/simple-item.json" + stac = stac_validator.StacValidate(stac_file, assets=True, assets_open_urls=False) + stac.run() + assert stac.message == [ + { + "version": "1.0.0", + "path": "tests/test_data/v100/simple-item.json", + "schema": [ + "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/item.json" + ], + "valid_stac": True, + "asset_type": "ITEM", + "validation_method": "default", + "assets_validated": { + "format_valid": [ + "https://storage.googleapis.com/open-cogs/stac-examples/20201211_223832_CS2_test.tif", + "https://storage.googleapis.com/open-cogs/stac-examples/20201211_223832_CS2_test.jpg", + ], + "format_invalid": [], + "request_valid": [], + "request_invalid": [], + }, + } + ] + + def test_assets_on_collection_without_assets_ok(): stac_file = "tests/test_data/v100/collection.json" stac = stac_validator.StacValidate(stac_file, assets=True)