From 12c34fd89145f6a170d35d0c358a19655f3ffdde Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Sat, 19 Feb 2022 07:08:53 -0800 Subject: [PATCH 1/3] update changelog --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d702729..7fcd6ab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,8 +4,9 @@ All notable changes to this project will be documented in this file. The format is (loosely) based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). -## [v0.2.0] - 2022-02-02 - 2022-02-20 +## [v0.2.0] - 2022-02-02 - 2022-02-19 ### Added +- Import main validator as stac-validator was updated to 2.3.0 - Added best practices docuument to repo - Recommend 'self' link in links - Check catalogs and collections use 'catalog.json' or 'collection.json' as a file name From 9dda9b54dae4b0e3051e3b830ee36a6487ecb941 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Sat, 19 Feb 2022 07:09:02 -0800 Subject: [PATCH 2/3] version 0.2.0 --- setup.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index f293421..9fdceb2 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ """ from setuptools import setup, find_packages -__version__ = "0.1.3" +__version__ = "0.2.0" with open("README.md", "r") as fh: long_description = fh.read() @@ -19,7 +19,8 @@ "click>=7.1.2", "requests>=2.19.1", "jsonschema>=3.1.2b0", - "pytest" + "pytest", + "stac-validator==2.3.0" ], entry_points={ 'console_scripts': ['stac_check=stac_check.cli:main'] From 30fb7abd9cbdc0b7ea62b994da0d08c104346568 Mon Sep 17 00:00:00 2001 From: jonhealy1 Date: Sat, 19 Feb 2022 07:09:18 -0800 Subject: [PATCH 3/3] import stac validator --- stac_check/lint.py | 6 +- stac_check/utilities.py | 82 ---------- stac_check/validate.py | 325 ---------------------------------------- 3 files changed, 3 insertions(+), 410 deletions(-) delete mode 100644 stac_check/utilities.py delete mode 100644 stac_check/validate.py diff --git a/stac_check/lint.py b/stac_check/lint.py index a3b9da6..e98e2f6 100644 --- a/stac_check/lint.py +++ b/stac_check/lint.py @@ -1,5 +1,5 @@ -from .validate import StacValidate -from .utilities import is_valid_url +from stac_validator.validate import StacValidate +from stac_validator.utilities import is_valid_url import json import os from dataclasses import dataclass @@ -19,7 +19,7 @@ def __post_init__(self): self.message = self.validate_file(self.item) self.asset_type = self.check_asset_type() self.version = self.check_version() - self.validator_version = "2.4.0" + self.validator_version = "2.3.0" self.update_msg = self.set_update_message() self.valid_stac = self.message["valid_stac"] self.error_type = self.check_error_type() diff --git a/stac_check/utilities.py b/stac_check/utilities.py deleted file mode 100644 index dcf6c3f..0000000 --- a/stac_check/utilities.py +++ /dev/null @@ -1,82 +0,0 @@ -import json -from urllib.parse import urlparse -from urllib.request import urlopen - -import requests # type: ignore -from pystac.serialization import identify_stac_object # type: ignore - -NEW_VERSIONS = [ - "1.0.0-beta.2", - "1.0.0-rc.1", - "1.0.0-rc.2", - "1.0.0-rc.3", - "1.0.0-rc.4", - "1.0.0", -] - - -def is_url(url: str): - try: - result = urlparse(url) - return all([result.scheme, result.netloc]) - except ValueError: - return False - - -def is_valid_url(url: str) -> bool: - result = urlparse(url) - if result.scheme in ("http", "https"): - return True - else: - return False - - -def get_stac_type(stac_content) -> str: - try: - content_types = ["Item", "Catalog", "Collection"] - if "type" in stac_content and stac_content["type"] == "Feature": - return "Item" - if "type" in stac_content and stac_content["type"] in content_types: - return stac_content["type"] - stac_object = identify_stac_object(stac_content) - return stac_object.object_type - except TypeError as e: - return str(e) - - -def fetch_and_parse_file(input_path) -> dict: - data = None - if is_valid_url(input_path): - resp = requests.get(input_path) - data = resp.json() - else: - with open(input_path) as f: - data = json.load(f) - - return data - - -# validate new versions at schemas.stacspec.org -def set_schema_addr(version, stac_type: str): - if version in NEW_VERSIONS: - return f"https://schemas.stacspec.org/v{version}/{stac_type}-spec/json-schema/{stac_type}.json" - else: - return f"https://cdn.staclint.com/v{version}/{stac_type}.json" - - -def link_request( - link, - initial_message, -): - if is_url(link["href"]): - try: - response = urlopen(link["href"]) - status_code = response.getcode() - if status_code == 200: - initial_message["request_valid"].append(link["href"]) - except Exception: - initial_message["request_invalid"].append(link["href"]) - initial_message["format_valid"].append(link["href"]) - else: - initial_message["request_invalid"].append(link["href"]) - initial_message["format_invalid"].append(link["href"]) \ No newline at end of file diff --git a/stac_check/validate.py b/stac_check/validate.py deleted file mode 100644 index 7ce3c2b..0000000 --- a/stac_check/validate.py +++ /dev/null @@ -1,325 +0,0 @@ -import json -import os -from json.decoder import JSONDecodeError -from typing import List -from urllib.error import HTTPError, URLError - -import click # type: ignore -import jsonschema # type: ignore -from jsonschema import RefResolver -from requests import exceptions # type: ignore - -from .utilities import ( - fetch_and_parse_file, - get_stac_type, - link_request, - set_schema_addr, -) - - -class StacValidate: - def __init__( - self, - stac_file: str = None, - recursive: int = -2, - core: bool = False, - links: bool = False, - assets: bool = False, - extensions: bool = False, - custom: str = "", - verbose: bool = False, - no_output: bool = False, - log: str = "", - ): - self.stac_file = stac_file - self.message: list = [] - self.custom = custom - self.links = links - self.assets = assets - self.recursive = recursive - self.extensions = extensions - self.core = core - self.stac_content: dict = {} - self.version = "" - self.depth: int = 0 - self.skip_val = False - self.verbose = verbose - self.no_output = False - self.valid = False - self.log = log - - def create_err_msg(self, err_type: str, err_msg: str) -> dict: - self.valid = False - return { - "version": self.version, - "path": self.stac_file, - "schema": [self.custom], - "valid_stac": False, - "error_type": err_type, - "error_message": err_msg, - } - - def create_links_message(self): - format_valid: List[str] = [] - format_invalid: List[str] = [] - request_valid: List[str] = [] - request_invalid: List[str] = [] - return { - "format_valid": format_valid, - "format_invalid": format_invalid, - "request_valid": request_valid, - "request_invalid": request_invalid, - } - - def create_message(self, stac_type: str, val_type: str) -> dict: - return { - "version": self.version, - "path": self.stac_file, - "schema": [self.custom], - "valid_stac": False, - "asset_type": stac_type.upper(), - "validation_method": val_type, - } - - def assets_validator(self) -> dict: - initial_message = self.create_links_message() - for _, value in self.stac_content["assets"].items(): - link_request(value, initial_message) - return initial_message - - def links_validator(self) -> dict: - initial_message = self.create_links_message() - # get root_url for checking relative links - root_url = "" - for link in self.stac_content["links"]: - if link["rel"] == "self" and link["href"][0:4] == "http": - root_url = ( - link["href"].split("/")[0] + "//" + link["href"].split("/")[2] - ) - elif link["rel"] == "alternate" and link["href"][0:4] == "http": - root_url = ( - link["href"].split("/")[0] + "//" + link["href"].split("/")[2] - ) - for link in self.stac_content["links"]: - if link["href"][0:4] != "http": - link["href"] = root_url + link["href"][1:] - link_request(link, initial_message) - - return initial_message - - def extensions_validator(self, stac_type: str) -> dict: - message = self.create_message(stac_type, "extensions") - message["schema"] = [] - valid = True - if stac_type == "ITEM": - try: - if "stac_extensions" in self.stac_content: - # error with the 'proj' extension not being 'projection' in older stac - if "proj" in self.stac_content["stac_extensions"]: - index = self.stac_content["stac_extensions"].index("proj") - self.stac_content["stac_extensions"][index] = "projection" - schemas = self.stac_content["stac_extensions"] - for extension in schemas: - if "http" not in extension: - # where are the extensions for 1.0.0-beta.2 on cdn.staclint.com? - if self.version == "1.0.0-beta.2": - self.stac_content["stac_version"] = "1.0.0-beta.1" - self.version = self.stac_content["stac_version"] - extension = f"https://cdn.staclint.com/v{self.version}/extension/{extension}.json" - self.custom = extension - self.custom_validator() - message["schema"].append(extension) - except jsonschema.exceptions.ValidationError as e: - valid = False - if e.absolute_path: - err_msg = f"{e.message}. Error is in {' -> '.join([str(i) for i in e.absolute_path])}" - else: - err_msg = f"{e.message} of the root of the STAC object" - message = self.create_err_msg("ValidationError", err_msg) - return message - except Exception as e: - valid = False - err_msg = f"{e}. Error in Extensions." - return self.create_err_msg("Exception", err_msg) - else: - self.core_validator(stac_type) - message["schema"] = [self.custom] - self.valid = valid - return message - - def custom_validator(self): - # in case the path to custom json schema is local - # it may contain relative references - schema = fetch_and_parse_file(self.custom) - if os.path.exists(self.custom): - custom_abspath = os.path.abspath(self.custom) - custom_dir = os.path.dirname(custom_abspath).replace("\\", "/") - custom_uri = f"file:///{custom_dir}/" - resolver = RefResolver(custom_uri, self.custom) - jsonschema.validate(self.stac_content, schema, resolver=resolver) - else: - schema = fetch_and_parse_file(self.custom) - jsonschema.validate(self.stac_content, schema) - - def core_validator(self, stac_type: str): - stac_type = stac_type.lower() - self.custom = set_schema_addr(self.version, stac_type.lower()) - self.custom_validator() - - def default_validator(self, stac_type: str) -> dict: - message = self.create_message(stac_type, "default") - message["schema"] = [] - self.core_validator(stac_type) - core_schema = self.custom - message["schema"].append(core_schema) - stac_type = stac_type.upper() - if stac_type == "ITEM": - message = self.extensions_validator(stac_type) - message["validation_method"] = "default" - message["schema"].append(core_schema) - if self.links: - message["links_validated"] = self.links_validator() - if self.assets: - message["assets_validated"] = self.assets_validator() - return message - - def recursive_validator(self, stac_type: str): - if self.skip_val is False: - self.custom = set_schema_addr(self.version, stac_type.lower()) - message = self.create_message(stac_type, "recursive") - message["valid_stac"] = False - try: - _ = self.default_validator(stac_type) - - except jsonschema.exceptions.ValidationError as e: - if e.absolute_path: - err_msg = f"{e.message}. Error is in {' -> '.join([str(i) for i in e.absolute_path])}" - else: - err_msg = f"{e.message} of the root of the STAC object" - message.update(self.create_err_msg("ValidationError", err_msg)) - self.message.append(message) - return - message["valid_stac"] = True - self.message.append(message) - self.depth = self.depth + 1 - if self.recursive > -1: - if self.depth >= int(self.recursive): - self.skip_val = True - base_url = self.stac_file - for link in self.stac_content["links"]: - if link["rel"] == "child" or link["rel"] == "item": - address = link["href"] - if "http" not in address: - x = str(base_url).split("/") - x.pop(-1) - st = x[0] - for i in range(len(x)): - if i > 0: - st = st + "/" + x[i] - self.stac_file = st + "/" + address - else: - self.stac_file = address - self.stac_content = fetch_and_parse_file(self.stac_file) - self.stac_content["stac_version"] = self.version - stac_type = get_stac_type(self.stac_content).lower() - - if link["rel"] == "child": - - if self.verbose is True: - click.echo(json.dumps(message, indent=4)) - self.recursive_validator(stac_type) - - if link["rel"] == "item": - self.custom = set_schema_addr(self.version, stac_type.lower()) - message = self.create_message(stac_type, "recursive") - if self.version == "0.7.0": - schema = fetch_and_parse_file(self.custom) - # this next line prevents this: unknown url type: 'geojson.json' ?? - schema["allOf"] = [{}] - jsonschema.validate(self.stac_content, schema) - else: - msg = self.default_validator(stac_type) - message["schema"] = msg["schema"] - message["valid_stac"] = True - - if self.log != "": - self.message.append(message) - if self.recursive < 5: - self.message.append(message) - if self.verbose is True: - click.echo(json.dumps(message, indent=4)) - - def validate_dict(cls, stac_content): - cls.stac_content = stac_content - return cls.run() - - def run(cls): - message = {} - try: - if cls.stac_file is not None: - cls.stac_content = fetch_and_parse_file(cls.stac_file) - stac_type = get_stac_type(cls.stac_content).upper() - cls.version = cls.stac_content["stac_version"] - - if cls.core is True: - message = cls.create_message(stac_type, "core") - cls.core_validator(stac_type) - message["schema"] = [cls.custom] - cls.valid = True - elif cls.custom != "": - message = cls.create_message(stac_type, "custom") - message["schema"] = [cls.custom] - cls.custom_validator() - cls.valid = True - elif cls.recursive > -2: - cls.recursive_validator(stac_type) - cls.valid = True - elif cls.extensions is True: - message = cls.extensions_validator(stac_type) - else: - cls.valid = True - message = cls.default_validator(stac_type) - - except ValueError as e: - message.update(cls.create_err_msg("ValueError", str(e))) - except URLError as e: - message.update(cls.create_err_msg("URLError", str(e))) - except JSONDecodeError as e: - message.update(cls.create_err_msg("JSONDecodeError", str(e))) - except TypeError as e: - message.update(cls.create_err_msg("TypeError", str(e))) - except FileNotFoundError as e: - message.update(cls.create_err_msg("FileNotFoundError", str(e))) - except ConnectionError as e: - message.update(cls.create_err_msg("ConnectionError", str(e))) - except exceptions.SSLError as e: - message.update(cls.create_err_msg("SSLError", str(e))) - except OSError as e: - message.update(cls.create_err_msg("OSError", str(e))) - except jsonschema.exceptions.ValidationError as e: - if e.absolute_path: - err_msg = f"{e.message}. Error is in {' -> '.join([str(i) for i in e.absolute_path])}" - else: - err_msg = f"{e.message} of the root of the STAC object" - message.update(cls.create_err_msg("ValidationError", err_msg)) - except KeyError as e: - message.update(cls.create_err_msg("KeyError", str(e))) - except HTTPError as e: - message.update(cls.create_err_msg("HTTPError", str(e))) - except Exception as e: - message.update(cls.create_err_msg("Exception", str(e))) - - message["valid_stac"] = cls.valid - - if cls.recursive < -1: - cls.message.append(message) - - if cls.log != "": - f = open(cls.log, "w") - f.write(json.dumps(cls.message, indent=4)) - f.close() - - if cls.valid: - return True - else: - return False \ No newline at end of file