Skip to content

Commit

Permalink
keep original filenames (#77)
Browse files Browse the repository at this point in the history
* keep original filenames

Closes #6

Update CHANGELOG.md

ignore .python-version file for pyenv-virtualenv

* remove keep_original_filenames from CLI

keep_original_filenames is task specific, not invocation specific

* update CHANGELOG.md

* Update CHANGELOG.md

---------

Co-authored-by: Pete Gadomski <pete.gadomski@gmail.com>
  • Loading branch information
ircwaves and gadomski authored Jan 17, 2024
1 parent f51b9f9 commit 96fc203
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 10 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
.python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
Expand Down
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
- ([#72]) Given that `_get_file` is part of the `AsyncFileSystem` spec, this
adds the synchronous `get_file` as a way to retrieve files if `_get_file` is
not found.
- ([#77](https://github.com/stac-utils/stac-task/pull/77)) Added option `keep_original_filenames` to download routines to
support legacy applications dependent on filename specifics.

## [v0.3.0] - 2023-12-20

Expand Down
8 changes: 6 additions & 2 deletions stactask/asset_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ async def download_item_assets(
overwrite: bool = False,
path_template: str = "${collection}/${id}",
absolute_path: bool = False,
keep_original_filenames: bool = False,
**kwargs: Any,
) -> Item:
_assets = item.assets.keys() if assets is None else assets
Expand All @@ -61,8 +62,11 @@ async def download_item_assets(
href = item.assets[a].href

# local filename
ext = os.path.splitext(href)[-1]
new_href = os.path.join(path, a + ext)
if keep_original_filenames:
basename = os.path.basename(href)
else:
basename = a + os.path.splitext(href)[1]
new_href = os.path.join(path, basename)
if absolute_path:
new_href = os.path.abspath(new_href)

Expand Down
45 changes: 38 additions & 7 deletions stactask/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,33 +217,64 @@ def download_item_assets(
self,
item: Item,
path_template: str = "${collection}/${id}",
keep_original_filenames: bool = False,
**kwargs: Any,
) -> Item:
"""Download provided asset keys for all items in payload. Assets are
saved in workdir in a directory named by the Item ID, and the items are
updated with the new asset hrefs.
"""Download provided asset keys for the given item. Assets are
saved in workdir in a directory (as specified by path_template), and
the items are updated with the new asset hrefs.
Args:
assets (Optional[List[str]], optional): List of asset keys to
download. Defaults to all assets.
item (pystac.Item): STAC Item for which assets need be downloaded.
assets (Optional[List[str]]): List of asset keys to download.
Defaults to all assets.
path_template (Optional[str]): String to be interpolated to specify
where to store downloaded files.
keep_original_filenames (Optional[bool]): Controls whether original
file names should be used, or asset key + extension.
"""
outdir = str(self._workdir / path_template)
loop = asyncio.get_event_loop()
item = loop.run_until_complete(
download_item_assets(item, path_template=outdir, **kwargs)
download_item_assets(
item,
path_template=outdir,
keep_original_filenames=keep_original_filenames,
**kwargs,
)
)
return item

def download_items_assets(
self,
items: Iterable[Item],
path_template: str = "${collection}/${id}",
keep_original_filenames: bool = False,
**kwargs: Any,
) -> List[Item]:
"""Download provided asset keys for the given items. Assets are
saved in workdir in a directory (as specified by path_template), and
the items are updated with the new asset hrefs.
Args:
items (List[pystac.Item]): List of STAC Items for which assets need
be downloaded.
assets (Optional[List[str]]): List of asset keys to download.
Defaults to all assets.
path_template (Optional[str]): String to be interpolated to specify
where to store downloaded files.
keep_original_filenames (Optional[bool]): Controls whether original
file names should be used, or asset key + extension.
"""
outdir = str(self._workdir / path_template)
loop = asyncio.get_event_loop()
items = loop.run_until_complete(
download_items_assets(items, path_template=outdir, **kwargs)
download_items_assets(
items,
path_template=outdir,
keep_original_filenames=keep_original_filenames,
**kwargs,
)
)
return list(items)

Expand Down
15 changes: 15 additions & 0 deletions tests/test_task_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,21 @@ def test_download_item_asset(tmp_path: Path, item_collection: Dict[str, Any]) ->
assert filename.is_file() is True


def test_download_keep_original_filenames(
tmp_path: Path, item_collection: Dict[str, Any]
) -> None:
t = NothingTask(
item_collection,
workdir=tmp_path / "test-task-download-item-asset",
)
item = t.download_item_assets(
t.items[0], assets=["tileinfo_metadata"], keep_original_filenames=True
).to_dict()
fname = item["assets"]["tileinfo_metadata"]["href"]
filename = Path(fname)
assert filename.name == "tileInfo.json"


def test_download_item_asset_local(
tmp_path: Path, item_collection: Dict[str, Any]
) -> None:
Expand Down

0 comments on commit 96fc203

Please sign in to comment.