Skip to content

Commit

Permalink
feat: add info command
Browse files Browse the repository at this point in the history
  • Loading branch information
gadomski committed Aug 23, 2023
1 parent 7ccd85f commit 6c0cf64
Show file tree
Hide file tree
Showing 5 changed files with 211 additions and 13 deletions.
5 changes: 3 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ repos:
hooks:
- id: black
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.4.1
rev: v1.5.1
hooks:
- id: mypy
additional_dependencies:
Expand All @@ -13,8 +13,9 @@ repos:
- pytest
- types-aiofiles
- types-python-dateutil
- types-tabulate
- types-tqdm
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: "v0.0.278"
rev: "v0.0.285"
hooks:
- id: ruff
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- `fail_fast` ([#69](https://github.com/stac-utils/stac-asset/pull/69))
- `assert_asset_exists`, `asset_exists`, `Client.assert_href_exists`, `Client.href_exists` ([#81](https://github.com/stac-utils/stac-asset/pull/81), [#85](https://github.com/stac-utils/stac-asset/pull/85))
- Blocking interface ([#86](https://github.com/stac-utils/stac-asset/pull/86))
- `stac-asset info` CLI subcommand ([#82](https://github.com/stac-utils/stac-asset/pull/83))

### Changed

Expand Down
8 changes: 7 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,12 @@ dependencies = [
]

[project.optional-dependencies]
cli = ["click~=8.1.5", "click-logging~=1.0.1", "tqdm~=4.66.1"]
cli = [
"click~=8.1.5",
"click-logging~=1.0.1",
"tabulate~=0.9.0",
"tqdm~=4.66.1",
]
dev = [
"black~=23.3",
"mypy~=1.3",
Expand All @@ -39,6 +44,7 @@ dev = [
"types-aiofiles~=23.1",
"types-python-dateutil~=2.8.19",
"types-tqdm~=4.66.0",
"types-tabulate~=0.9.0",
]
docs = ["pydata-sphinx-theme~=0.13", "sphinx~=7.2.2", "sphinx-click~=5.0"]

Expand Down
201 changes: 191 additions & 10 deletions src/stac_asset/_cli.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import asyncio
import json
import logging
Expand All @@ -6,12 +8,13 @@
from asyncio import Queue
from dataclasses import dataclass
from pathlib import Path
from typing import TYPE_CHECKING, Any, List, Optional, Union
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union

import click
import click_logging
import tabulate
import tqdm
from pystac import Item, ItemCollection
from pystac import Asset, Item, ItemCollection

from . import Config, ErrorStrategy, _functions
from .client import Clients
Expand All @@ -38,17 +41,18 @@


@click.group()
@click_logging.simple_verbosity_option(logger) # type: ignore
def cli() -> None:
"""Work with STAC assets.
See each subcommand's help text for more information:
$ stac-asset download --help
$ stac-asset info --help
"""


@cli.command()
@click_logging.simple_verbosity_option(logger) # type: ignore
@click.argument("href", required=False)
@click.argument("directory", required=False)
@click.option(
Expand Down Expand Up @@ -199,15 +203,11 @@ async def download_async(
overwrite=overwrite,
)

if href is None or href == "-":
input_dict = json.load(sys.stdin)
else:
input_dict = json.loads(await read_file(href, config))
input_dict = await read_as_dict(href, config)
if directory is None:
directory_str = os.getcwd()
else:
directory_str = str(directory)

if quiet:
messages = None
else:
Expand All @@ -220,7 +220,7 @@ async def download_async(
sys.exit(1)
elif type_ == "Feature":
item = Item.from_dict(input_dict)
if href:
if href and href != "-":
item.set_self_href(href)
item.make_asset_hrefs_absolute()

Expand Down Expand Up @@ -268,7 +268,18 @@ async def download() -> Union[Item, ItemCollection]:
json.dump(output.to_dict(transform_hrefs=False), sys.stdout)


async def read_file(href: str, config: Config) -> bytes:
async def read_as_dict(href: Optional[str], config: Config) -> Dict[str, Any]:
if href is None or href == "-":
data = json.load(sys.stdin)
else:
data = json.loads(await read(href, config))
if not isinstance(data, dict):
raise ValueError(f"input is not a dictionary: {data.__type__}")
else:
return data


async def read(href: str, config: Config) -> bytes:
clients = Clients(config)
async with await clients.get_client(href) as client:
data = b""
Expand Down Expand Up @@ -346,5 +357,175 @@ class Download:
progress_bar: Tqdm


@cli.command()
@click.argument("HREF", required=False)
@click.option(
"-a",
"--alternate-assets",
help="Alternate asset hrefs to prefer, if available",
multiple=True,
)
@click.option(
"--s3-requester-pays",
help="If checking via the s3 client, enable requester pays",
default=False,
is_flag=True,
show_default=True,
)
@click.option(
"--s3-retry-mode",
help="If checking via the s3 client, the retry mode (standard, legacy, and "
"adaptive)",
default=DEFAULT_S3_RETRY_MODE,
)
@click.option(
"--s3-max-attempts",
help="If checking via the s3 client, the max number of retries",
default=DEFAULT_S3_MAX_ATTEMPTS,
)
def info(
href: Optional[str],
alternate_assets: List[str],
s3_requester_pays: bool,
s3_retry_mode: str,
s3_max_attempts: int,
) -> None:
asyncio.run(
info_async(
href=href,
alternate_assets=alternate_assets,
s3_requester_pays=s3_requester_pays,
s3_max_attempts=s3_max_attempts,
s3_retry_mode=s3_retry_mode,
)
)


async def info_async(
href: Optional[str],
alternate_assets: List[str],
s3_requester_pays: bool,
s3_retry_mode: str,
s3_max_attempts: int,
) -> None:
"""Prints information about an item or item collection.
$ stac-asset info item.json
"""
config = Config(
alternate_assets=alternate_assets,
s3_requester_pays=s3_requester_pays,
s3_retry_mode=s3_retry_mode,
s3_max_attempts=s3_max_attempts,
)
input_dict = await read_as_dict(href, config)
type_ = input_dict.get("type")
tasks = set()
clients = Clients(config)
if type_ is None:
print("ERROR: missing 'type' field on input dictionary", file=sys.stderr)
sys.exit(1)
elif type_ == "Feature":
item = Item.from_dict(input_dict)
if href and href != "-":
item.set_self_href(href)
item.make_asset_hrefs_absolute()
for key, asset in item.assets.items():
tasks.add(asyncio.create_task(get_asset_info(key, asset, config, clients)))
elif type_ == "FeatureCollection":
item_collection = ItemCollection.from_dict(input_dict)
for item in item_collection.items:
for key, asset in item.assets.items():
tasks.add(
asyncio.create_task(get_asset_info(key, asset, config, clients))
)
else:
print(f"ERROR: invalid 'type' field: {type_}", file=sys.stderr)
sys.exit(1)

asset_infos = await asyncio.gather(*tasks)
await clients.close_all()

table_dict = dict()
for asset_info in asset_infos:
assert isinstance(asset_info, AssetInfo)
if asset_info.key not in table_dict:
table_dict[asset_info.key] = {
"Key": asset_info.key,
"Client": asset_info.client,
"Media type": asset_info.media_type,
"Exists": asset_info.exists,
"Note": asset_info.note,
}
else:
if asset_info.exists != table_dict[asset_info.key]["Exists"]:
table_dict[asset_info.key]["Exists"] = "Sometimes"
if asset_info.client != table_dict[asset_info.key]["Client"]:
table_dict[asset_info.key]["Client"] = "Various"
if asset_info.media_type != table_dict[asset_info.key]["Media type"]:
table_dict[asset_info.key]["Media type"] = "Various"
if asset_info.note and table_dict[asset_info.key]["Note"]:
table_dict[asset_info.key]["Note"] = (
str(table_dict[asset_info.key]["Note"]) + "\n" + asset_info.note
)

keys = sorted(table_dict.keys())
headers = ["Asset", "Client", "Exists", "Media type", "Note"]
table_data = list()
for key in keys:
value = table_dict[key]
table_data.append(
[
value["Key"],
value["Client"],
value["Exists"],
value["Media type"],
value["Note"],
]
)
print(tabulate.tabulate(table_data, headers=headers))


async def get_asset_info(
key: str, asset: Asset, config: Config, clients: Clients
) -> AssetInfo:
# TODO refactor with asset_exists
href = _functions.get_absolute_asset_href(asset, config.alternate_assets)
if href:
client = await clients.get_client(href)
try:
await client.assert_href_exists(href)
except Exception as error:
note = str(error)
exists = False
else:
note = ""
exists = True
# TODO clients should probably specify this explicitly
name = type(client).__name__.lower()
# We can't use `removesuffix` because it was added in Python 3.9
if name.endswith("client"):
name = name[: -len("client")]
return AssetInfo(
key=key, client=name, media_type=asset.media_type, exists=exists, note=note
)
else:
return AssetInfo(
key=key,
client="n/a",
exists=False,
note="Could not make absolute href",
)


@dataclass
class AssetInfo:
key: str
client: str
exists: bool
media_type: Optional[str] = None
note: str = ""


if __name__ == "__main__":
cli()
9 changes: 9 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,12 @@ def test_download_item_s3_requester_pays(tmp_path: Path) -> None:
],
)
assert result.exit_code == 0


def test_info(item_path: Path) -> None:
runner = CliRunner()
result = runner.invoke(
stac_asset._cli.cli,
["info", str(item_path)],
)
assert result.exit_code == 0, result.stdout

0 comments on commit 6c0cf64

Please sign in to comment.