Skip to content

Commit

Permalink
add Task.upload_local_item_assets_to_s3 method (#113)
Browse files Browse the repository at this point in the history
* add Task.upload_local_item_assets_to_s3 method

* update changelog

* fix fake auth config
  • Loading branch information
Phil Varner authored Apr 22, 2024
1 parent d416a3b commit c7ec30a
Show file tree
Hide file tree
Showing 6 changed files with 78 additions and 11 deletions.
6 changes: 4 additions & 2 deletions .github/workflows/continuous-integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@ jobs:
with:
python-version: ${{ matrix.python-version }}
cache: pip
- name: Install
- name: Install dependencies for linting
run: pip install '.[dev]'
- name: Lint
run: pre-commit run --all-files
- name: Install dependencies for testing
run: pip install '.[test]'
- name: Test
run: pytest
codecov:
Expand All @@ -41,7 +43,7 @@ jobs:
python-version: "3.12"
cache: pip
- name: Install
run: pip install '.[dev]'
run: pip install '.[test]'
- name: Test
run: pytest --cov=stactask
- name: Upload coverage to Codecov
Expand Down
8 changes: 3 additions & 5 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,11 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.

## Added

- Added property `collection_mapping` to `Task` class to retrieve the collection mappings
- Property `collection_mapping` to `Task` class to retrieve the collection mappings
from upload_options
- Added utils method `find_collection` to allow the retrieval of the collection name for
- Utils method `find_collection` to allow the retrieval of the collection name for
an Item dict

## Added

- Task method `upload_local_item_assets_to_s3(item)` to upload all local assets to S3
- Added support for using stdin and stdout as input and output for task, e.g., `cat in.json | src/mytask/mytask.py run --local | tee out.json`

## [v0.4.2] - 2024-03-08
Expand Down
23 changes: 21 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,15 @@ dev = [
"codespell~=2.2.5",
"mypy~=1.9",
"pre-commit~=3.7",
"pytest-cov~=5.0",
"pytest~=8.0",
"ruff~=0.3.1",
"types-setuptools~=69.0",
]
test = [
"pytest~=8.0",
"pytest-cov~=5.0",
"pytest-env~=1.1",
"moto~=5.0.5",
]

[project.urls]
Issues = "https://github.com/stac-utils/stactask/issues"
Expand All @@ -55,3 +59,18 @@ ignore_missing_imports = true

[tool.ruff.lint]
select = ["F", "E", "W", "I", "ERA", "RUF"]

[tool.pytest.ini_options]
addopts = "-rx -q -s -vvv"
log_cli_level = "INFO"
log_cli = true
markers = ["system", "unit"]
env = [
"AWS_DEFAULT_REGION=us-west-2",
"AWS_ACCESS_KEY_ID=foo",
"AWS_SECRET_ACCESS_KEY=bar",
"AWS_SESSION_TOKEN=baz",
]
filterwarnings = [
"ignore::UserWarning:stactask.*:",
]
21 changes: 20 additions & 1 deletion stactask/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

import fsspec
from boto3utils import s3
from pystac import Item, ItemCollection
from pystac import Asset, Item, ItemCollection

from .asset_io import (
download_item_assets,
Expand Down Expand Up @@ -316,6 +316,25 @@ def upload_item_assets_to_s3(

return item

def _is_local_asset(self, asset: Asset) -> bool:
return bool(asset.href.startswith(str(self._workdir)))

def _get_local_asset_keys(self, item: Item) -> List[str]:
return [
key for key, asset in item.assets.items() if self._is_local_asset(asset)
]

def upload_local_item_assets_to_s3(
self,
item: Item,
s3_client: Optional[s3] = None,
) -> Item:
return self.upload_item_assets_to_s3(
item=item,
assets=self._get_local_asset_keys(item),
s3_client=s3_client,
)

# this should be in PySTAC
@staticmethod
def create_item_from_item(item: Dict[str, Any]) -> Dict[str, Any]:
Expand Down
2 changes: 1 addition & 1 deletion tests/fixtures/sentinel2-l2a-j2k-payload.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
],
"workflow": "cog-archive",
"upload_options": {
"path_template": "s3://sentinel-cogs/${collection}/${grid:utm_zone}/${grid:latitude_band}/${grid:grid_square}/${year}/${month}/${id}",
"path_template": "s3://sentinel-cogs/${collection}/${mgrs:utm_zone}/${mgrs:latitude_band}/${mgrs:grid_square}/${year}/${month}/${id}",
"public_assets": "ALL",
"collections": {
"sentinel-2-l2a": "$[?(@.id =~ 'S2[AB].*')]"
Expand Down
29 changes: 29 additions & 0 deletions tests/test_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
from pathlib import Path
from typing import Any, Dict, Optional

import boto3
import pytest
from moto import mock_aws
from pystac import Asset

from stactask.exceptions import FailedValidation
from stactask.task import Task
Expand Down Expand Up @@ -183,5 +186,31 @@ def test_collection_mapping(nothing_task: Task) -> None:
}


@mock_aws # type: ignore
def test_s3_upload(nothing_task: Task) -> None:

# start S3 mocks
s3_client = boto3.client("s3")
s3_client.create_bucket(
Bucket="sentinel-cogs",
CreateBucketConfiguration={
"LocationConstraint": "us-west-2",
},
)
# end S3 mocks

item = nothing_task.items.items[0]
key1_path = nothing_task._workdir / "foo.txt"
key1_path.write_text("some text")
asset = Asset(href=str(key1_path))
item.add_asset("key1", asset)
item_after_upload = nothing_task.upload_local_item_assets_to_s3(item)

assert (
item_after_upload.assets["key1"].href
== "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-2-l2a/52/H/GH/2022/10/S2A_52HGH_20221007_0_L2A/foo.txt"
)


if __name__ == "__main__":
output = NothingTask.cli()

0 comments on commit c7ec30a

Please sign in to comment.