diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b7210c3..e768fe0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,7 +4,7 @@ repos: hooks: - id: black - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.4.1 + rev: v1.5.1 hooks: - id: mypy additional_dependencies: @@ -13,8 +13,9 @@ repos: - pytest - types-aiofiles - types-python-dateutil + - types-tabulate - types-tqdm - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: "v0.0.278" + rev: "v0.0.285" hooks: - id: ruff diff --git a/CHANGELOG.md b/CHANGELOG.md index c8a8904..0ef3c65 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - `fail_fast` ([#69](https://github.com/stac-utils/stac-asset/pull/69)) - `assert_asset_exists`, `asset_exists`, `Client.assert_href_exists`, `Client.href_exists` ([#81](https://github.com/stac-utils/stac-asset/pull/81), [#85](https://github.com/stac-utils/stac-asset/pull/85)) - Blocking interface ([#86](https://github.com/stac-utils/stac-asset/pull/86)) +- `stac-asset info` CLI subcommand ([#82](https://github.com/stac-utils/stac-asset/pull/83)) ### Changed diff --git a/pyproject.toml b/pyproject.toml index b8df090..b0e5865 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,12 @@ dependencies = [ ] [project.optional-dependencies] -cli = ["click~=8.1.5", "click-logging~=1.0.1", "tqdm~=4.66.1"] +cli = [ + "click~=8.1.5", + "click-logging~=1.0.1", + "tabulate~=0.9.0", + "tqdm~=4.66.1", +] dev = [ "black~=23.3", "mypy~=1.3", @@ -39,6 +44,7 @@ dev = [ "types-aiofiles~=23.1", "types-python-dateutil~=2.8.19", "types-tqdm~=4.66.0", + "types-tabulate~=0.9.0", ] docs = ["pydata-sphinx-theme~=0.13", "sphinx~=7.2.2", "sphinx-click~=5.0"] diff --git a/src/stac_asset/_cli.py b/src/stac_asset/_cli.py index 4303cc6..3c8e971 100644 --- a/src/stac_asset/_cli.py +++ b/src/stac_asset/_cli.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import asyncio import json import logging @@ -6,12 +8,13 @@ from asyncio import Queue from dataclasses import dataclass from pathlib import Path -from typing import TYPE_CHECKING, Any, List, Optional, Union +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union import click import click_logging +import tabulate import tqdm -from pystac import Item, ItemCollection +from pystac import Asset, Item, ItemCollection from . import Config, ErrorStrategy, _functions from .client import Clients @@ -38,17 +41,18 @@ @click.group() +@click_logging.simple_verbosity_option(logger) # type: ignore def cli() -> None: """Work with STAC assets. See each subcommand's help text for more information: $ stac-asset download --help + $ stac-asset info --help """ @cli.command() -@click_logging.simple_verbosity_option(logger) # type: ignore @click.argument("href", required=False) @click.argument("directory", required=False) @click.option( @@ -199,15 +203,11 @@ async def download_async( overwrite=overwrite, ) - if href is None or href == "-": - input_dict = json.load(sys.stdin) - else: - input_dict = json.loads(await read_file(href, config)) + input_dict = await read_as_dict(href, config) if directory is None: directory_str = os.getcwd() else: directory_str = str(directory) - if quiet: messages = None else: @@ -220,7 +220,7 @@ async def download_async( sys.exit(1) elif type_ == "Feature": item = Item.from_dict(input_dict) - if href: + if href and href != "-": item.set_self_href(href) item.make_asset_hrefs_absolute() @@ -268,7 +268,18 @@ async def download() -> Union[Item, ItemCollection]: json.dump(output.to_dict(transform_hrefs=False), sys.stdout) -async def read_file(href: str, config: Config) -> bytes: +async def read_as_dict(href: Optional[str], config: Config) -> Dict[str, Any]: + if href is None or href == "-": + data = json.load(sys.stdin) + else: + data = json.loads(await read(href, config)) + if not isinstance(data, dict): + raise ValueError(f"input is not a dictionary: {data.__type__}") + else: + return data + + +async def read(href: str, config: Config) -> bytes: clients = Clients(config) async with await clients.get_client(href) as client: data = b"" @@ -346,5 +357,175 @@ class Download: progress_bar: Tqdm +@cli.command() +@click.argument("HREF", required=False) +@click.option( + "-a", + "--alternate-assets", + help="Alternate asset hrefs to prefer, if available", + multiple=True, +) +@click.option( + "--s3-requester-pays", + help="If checking via the s3 client, enable requester pays", + default=False, + is_flag=True, + show_default=True, +) +@click.option( + "--s3-retry-mode", + help="If checking via the s3 client, the retry mode (standard, legacy, and " + "adaptive)", + default=DEFAULT_S3_RETRY_MODE, +) +@click.option( + "--s3-max-attempts", + help="If checking via the s3 client, the max number of retries", + default=DEFAULT_S3_MAX_ATTEMPTS, +) +def info( + href: Optional[str], + alternate_assets: List[str], + s3_requester_pays: bool, + s3_retry_mode: str, + s3_max_attempts: int, +) -> None: + asyncio.run( + info_async( + href=href, + alternate_assets=alternate_assets, + s3_requester_pays=s3_requester_pays, + s3_max_attempts=s3_max_attempts, + s3_retry_mode=s3_retry_mode, + ) + ) + + +async def info_async( + href: Optional[str], + alternate_assets: List[str], + s3_requester_pays: bool, + s3_retry_mode: str, + s3_max_attempts: int, +) -> None: + """Prints information about an item or item collection. + + $ stac-asset info item.json + """ + config = Config( + alternate_assets=alternate_assets, + s3_requester_pays=s3_requester_pays, + s3_retry_mode=s3_retry_mode, + s3_max_attempts=s3_max_attempts, + ) + input_dict = await read_as_dict(href, config) + type_ = input_dict.get("type") + tasks = set() + clients = Clients(config) + if type_ is None: + print("ERROR: missing 'type' field on input dictionary", file=sys.stderr) + sys.exit(1) + elif type_ == "Feature": + item = Item.from_dict(input_dict) + if href and href != "-": + item.set_self_href(href) + item.make_asset_hrefs_absolute() + for key, asset in item.assets.items(): + tasks.add(asyncio.create_task(get_asset_info(key, asset, config, clients))) + elif type_ == "FeatureCollection": + item_collection = ItemCollection.from_dict(input_dict) + for item in item_collection.items: + for key, asset in item.assets.items(): + tasks.add( + asyncio.create_task(get_asset_info(key, asset, config, clients)) + ) + else: + print(f"ERROR: invalid 'type' field: {type_}", file=sys.stderr) + sys.exit(1) + + asset_infos = await asyncio.gather(*tasks) + await clients.close_all() + + table_dict = dict() + for asset_info in asset_infos: + assert isinstance(asset_info, AssetInfo) + if asset_info.key not in table_dict: + table_dict[asset_info.key] = { + "Key": asset_info.key, + "Client": asset_info.client, + "Media type": asset_info.media_type, + "Exists": asset_info.exists, + "Note": asset_info.note, + } + else: + if asset_info.exists != table_dict[asset_info.key]["Exists"]: + table_dict[asset_info.key]["Exists"] = "Sometimes" + if asset_info.client != table_dict[asset_info.key]["Client"]: + table_dict[asset_info.key]["Client"] = "Various" + if asset_info.media_type != table_dict[asset_info.key]["Media type"]: + table_dict[asset_info.key]["Media type"] = "Various" + if asset_info.note and table_dict[asset_info.key]["Note"]: + table_dict[asset_info.key]["Note"] = ( + str(table_dict[asset_info.key]["Note"]) + "\n" + asset_info.note + ) + + keys = sorted(table_dict.keys()) + headers = ["Asset", "Client", "Exists", "Media type", "Note"] + table_data = list() + for key in keys: + value = table_dict[key] + table_data.append( + [ + value["Key"], + value["Client"], + value["Exists"], + value["Media type"], + value["Note"], + ] + ) + print(tabulate.tabulate(table_data, headers=headers)) + + +async def get_asset_info( + key: str, asset: Asset, config: Config, clients: Clients +) -> AssetInfo: + # TODO refactor with asset_exists + href = _functions.get_absolute_asset_href(asset, config.alternate_assets) + if href: + client = await clients.get_client(href) + try: + await client.assert_href_exists(href) + except Exception as error: + note = str(error) + exists = False + else: + note = "" + exists = True + # TODO clients should probably specify this explicitly + name = type(client).__name__.lower() + # We can't use `removesuffix` because it was added in Python 3.9 + if name.endswith("client"): + name = name[: -len("client")] + return AssetInfo( + key=key, client=name, media_type=asset.media_type, exists=exists, note=note + ) + else: + return AssetInfo( + key=key, + client="n/a", + exists=False, + note="Could not make absolute href", + ) + + +@dataclass +class AssetInfo: + key: str + client: str + exists: bool + media_type: Optional[str] = None + note: str = "" + + if __name__ == "__main__": cli() diff --git a/tests/test_cli.py b/tests/test_cli.py index f58b4d9..ebd3984 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -86,3 +86,12 @@ def test_download_item_s3_requester_pays(tmp_path: Path) -> None: ], ) assert result.exit_code == 0 + + +def test_info(item_path: Path) -> None: + runner = CliRunner() + result = runner.invoke( + stac_asset._cli.cli, + ["info", str(item_path)], + ) + assert result.exit_code == 0, result.stdout