From c9dd83fc33a39c8a8a1ab124f74f8792122891fe Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Mon, 28 Oct 2024 10:11:17 -0700 Subject: [PATCH 1/5] breaking: using latest uds-lib + update docker --- docker/stage-in-stage-out/dc-003-upload.yaml | 9 ++++++--- requirements.txt | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/docker/stage-in-stage-out/dc-003-upload.yaml b/docker/stage-in-stage-out/dc-003-upload.yaml index 77d6653b..7f9a95ab 100644 --- a/docker/stage-in-stage-out/dc-003-upload.yaml +++ b/docker/stage-in-stage-out/dc-003-upload.yaml @@ -23,17 +23,20 @@ services: VERIFY_SSL: 'FALSE' DAPA_API: 'https://k3a3qmarxh.execute-api.us-west-2.amazonaws.com/dev' - COLLECTION_ID: 'NEW_COLLECTION_EXAMPLE_L1B___9' + + PROJECT: 'NAME of the project if collection name in Granule STAC purely includes the name. It will be part of URN:NASA:UNITY:::' + VENUE: 'VENUE of the project if collection name in Granule STAC purely includes the name. It will be part of URN:NASA:UNITY:::' STAGING_BUCKET: 'uds-test-cumulus-staging' + DELETE_FILES: 'FALSE' - RESULT_PATH_PREFIX: 'stage_out/sample_dir (Optional:defaulted to stage_out if missing. NOTE: No need to begin or end with "/". )' + RESULT_PATH_PREFIX: 'stage_out/sample_dir (Optional:defaulted to stage_out if missing. NOTE: No need to begin or end with "/". It also accepts empty string now as default value)' GRANULES_SEARCH_DOMAIN: 'UNITY' GRANULES_UPLOAD_TYPE: 'UPLOAD_S3_BY_STAC_CATALOG' UPLOAD_DIR: '/etc/snpp_upload_test_1. or empty string' OUTPUT_DIRECTORY: '/some/directory' CATALOG_FILE: 'empty string or /path/to/stac/catalog file' - LOG_LEVEL: '20' + LOG_LEVEL: '20 NOTE: 10=DEBUG 20=info. 30=error' networks: - internal networks: diff --git a/requirements.txt b/requirements.txt index f7989aea..986ee5f1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,7 +15,7 @@ jsonschema==4.23.0 jsonschema-specifications==2023.12.1 lark==0.12.0 mangum==0.18.0 -mdps-ds-lib==0.5.1.dev1 +mdps-ds-lib==0.5.1.dev10000 pydantic==2.9.2 pydantic_core==2.23.4 pygeofilter==0.2.4 From d803e28cf348d65609a24049bb557c57f01c40dc Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Mon, 28 Oct 2024 12:08:17 -0700 Subject: [PATCH 2/5] feat: use latest uds-lib --- docker/stage-in-stage-out/dc-003-upload.yaml | 1 + requirements.txt | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/stage-in-stage-out/dc-003-upload.yaml b/docker/stage-in-stage-out/dc-003-upload.yaml index 7f9a95ab..d02baca3 100644 --- a/docker/stage-in-stage-out/dc-003-upload.yaml +++ b/docker/stage-in-stage-out/dc-003-upload.yaml @@ -22,6 +22,7 @@ services: COGNITO_URL: 'ask U-CS. ex: https://cognito-idp.us-west-2.amazonaws.com' VERIFY_SSL: 'FALSE' + DRY_RUN: 'TRUE | FALSE . set to TRUE to confirm if all settings are correct bar aws credentials' DAPA_API: 'https://k3a3qmarxh.execute-api.us-west-2.amazonaws.com/dev' PROJECT: 'NAME of the project if collection name in Granule STAC purely includes the name. It will be part of URN:NASA:UNITY:::' diff --git a/requirements.txt b/requirements.txt index 986ee5f1..c0cca764 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,7 +15,7 @@ jsonschema==4.23.0 jsonschema-specifications==2023.12.1 lark==0.12.0 mangum==0.18.0 -mdps-ds-lib==0.5.1.dev10000 +mdps-ds-lib==0.5.1.dev010100 pydantic==2.9.2 pydantic_core==2.23.4 pygeofilter==0.2.4 From d178ce938148f46ec76944d1d3243e995e84c7bd Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Mon, 16 Dec 2024 09:24:47 -0600 Subject: [PATCH 3/5] feat: add delete single granule endpoint --- .../cumulus_wrapper/query_granules.py | 44 +++++++++++++++++ .../lib/uds_db/granules_db_index.py | 15 ++++++ .../uds_api/granules_api.py | 47 +++++++++++++++++++ requirements.txt | 2 +- 4 files changed, 107 insertions(+), 1 deletion(-) diff --git a/cumulus_lambda_functions/cumulus_wrapper/query_granules.py b/cumulus_lambda_functions/cumulus_wrapper/query_granules.py index cc4a4829..86eacefb 100644 --- a/cumulus_lambda_functions/cumulus_wrapper/query_granules.py +++ b/cumulus_lambda_functions/cumulus_wrapper/query_granules.py @@ -21,6 +21,7 @@ def __init__(self, cumulus_base: str, cumulus_token: str): super().__init__(cumulus_base, cumulus_token) self._conditions.append('status=completed') self._item_transformer = ItemTransformer() + self.__collection_id = None def with_filter(self, filter_key, filter_values: list): if len(filter_values) < 1: @@ -34,6 +35,7 @@ def with_filter(self, filter_key, filter_values: list): def with_collection_id(self, collection_id: str): self._conditions.append(f'{self.__collection_id_key}={collection_id}') + self.__collection_id = collection_id return self def with_bbox(self): @@ -130,6 +132,48 @@ def query_direct_to_private_api(self, private_api_prefix: str, transform=True): return {'server_error': f'error while invoking:{str(e)}'} return {'results': stac_list} + def delete_entry(self, private_api_prefix: str, granule_id: str): + payload = { + 'httpMethod': 'DELETE', + 'resource': '/{proxy+}', + 'path': f'/{self.__granules_key}/{self.__collection_id}/{granule_id}', + 'queryStringParameters': {**{k[0]: k[1] for k in [k1.split('=') for k1 in self._conditions]}}, + # 'queryStringParameters': {'limit': '30'}, + 'headers': { + 'Content-Type': 'application/json', + }, + # 'body': json.dumps({"action": "removeFromCmr"}) + } + LOGGER.debug(f'payload: {payload}') + try: + query_result = self._invoke_api(payload, private_api_prefix) + """ + {'statusCode': 200, 'body': '{"meta":{"name":"cumulus-api","stack":"am-uds-dev-cumulus","table":"granule","limit":3,"page":1,"count":0},"results":[]}', 'headers': {'x-powered-by': 'Express', 'access-control-allow-origin': '*', 'strict-transport-security': 'max-age=31536000; includeSubDomains', 'content-type': 'application/json; charset=utf-8', 'content-length': '120', 'etag': 'W/"78-YdHqDNIH4LuOJMR39jGNA/23yOQ"', 'date': 'Tue, 07 Jun 2022 22:30:44 GMT', 'connection': 'close'}, 'isBase64Encoded': False} + """ + if query_result['statusCode'] >= 500: + LOGGER.error(f'server error status code: {query_result.statusCode}. details: {query_result}') + return {'server_error': query_result} + if query_result['statusCode'] >= 400: + LOGGER.error(f'client error status code: {query_result.statusCode}. details: {query_result}') + return {'client_error': query_result} + query_result = json.loads(query_result['body']) + LOGGER.info(f'json query_result: {query_result}') + """ + { + "detail": "Record deleted" + } + """ + if 'detail' not in query_result: + LOGGER.error(f'missing key: detail. invalid response json: {query_result}') + return {'server_error': f'missing key: detail. invalid response json: {query_result}'} + if query_result['detail'] != 'Record deleted': + LOGGER.error(f'Wrong Message: {query_result}') + return {'server_error': f'Wrong Message: {query_result}'} + except Exception as e: + LOGGER.exception('error while invoking') + return {'server_error': f'error while invoking:{str(e)}'} + return {} + def query(self, transform=True): conditions_str = '&'.join(self._conditions) LOGGER.info(f'cumulus_base: {self.cumulus_base}') diff --git a/cumulus_lambda_functions/lib/uds_db/granules_db_index.py b/cumulus_lambda_functions/lib/uds_db/granules_db_index.py index f993f16e..f23b61d3 100644 --- a/cumulus_lambda_functions/lib/uds_db/granules_db_index.py +++ b/cumulus_lambda_functions/lib/uds_db/granules_db_index.py @@ -201,6 +201,21 @@ def get_entry(self, tenant: str, tenant_venue: str, doc_id: str, ): raise ValueError(f"no such granule: {doc_id}") return result + def delete_entry(self, tenant: str, tenant_venue: str, doc_id: str, ): + read_alias_name = f'{DBConstants.granules_read_alias_prefix}_{tenant}_{tenant_venue}'.lower().strip() + result = self.__es.query({ + 'size': 9999, + 'query': {'term': {'_id': doc_id}} + }, read_alias_name) + if result is None: + raise ValueError(f"no such granule: {doc_id}") + delete_result = self.__es.delete_by_query({ + 'query': {'term': {'_id': doc_id}} + }, read_alias_name) + if delete_result is None: + raise ValueError(f"no such granule: {doc_id}") + return result + def update_entry(self, tenant: str, tenant_venue: str, json_body: dict, doc_id: str, ): write_alias_name = f'{DBConstants.granules_write_alias_prefix}_{tenant}_{tenant_venue}'.lower().strip() json_body['event_time'] = TimeUtils.get_current_unix_milli() diff --git a/cumulus_lambda_functions/uds_api/granules_api.py b/cumulus_lambda_functions/uds_api/granules_api.py index a3961d08..c19a480c 100644 --- a/cumulus_lambda_functions/uds_api/granules_api.py +++ b/cumulus_lambda_functions/uds_api/granules_api.py @@ -2,6 +2,11 @@ import os from typing import Union +from mdps_ds_lib.lib.aws.aws_s3 import AwsS3 +from pystac import Item + +from cumulus_lambda_functions.cumulus_wrapper.query_granules import GranulesQuery + from cumulus_lambda_functions.daac_archiver.daac_archiver_logic import DaacArchiverLogic from cumulus_lambda_functions.uds_api.dapa.daac_archive_crud import DaacArchiveCrud, DaacDeleteModel, DaacAddModel, \ DaacUpdateModel @@ -239,6 +244,48 @@ async def get_single_granule_dapa(request: Request, collection_id: str, granule_ raise HTTPException(status_code=500, detail=str(e)) return granules_result +@router.delete("/{collection_id}/items/{granule_id}") +@router.delete("/{collection_id}/items/{granule_id}/") +async def get_single_granule_dapa(request: Request, collection_id: str, granule_id: str): + authorizer: UDSAuthorizorAbstract = UDSAuthorizerFactory() \ + .get_instance(UDSAuthorizerFactory.cognito, + es_url=os.getenv('ES_URL'), + es_port=int(os.getenv('ES_PORT', '443')) + ) + auth_info = FastApiUtils.get_authorization_info(request) + collection_identifier = UdsCollections.decode_identifier(collection_id) + if not authorizer.is_authorized_for_collection(DBConstants.read, collection_id, + auth_info['ldap_groups'], + collection_identifier.tenant, + collection_identifier.venue): + LOGGER.debug(f'user: {auth_info["username"]} is not authorized for {collection_id}') + raise HTTPException(status_code=403, detail=json.dumps({ + 'message': 'not authorized to execute this action' + })) + try: + LOGGER.debug(f'deleting granule: {granule_id}') + cumulus_lambda_prefix = os.getenv('CUMULUS_LAMBDA_PREFIX') + cumulus = GranulesQuery('https://na/dev', 'NA') + cumulus.with_collection_id(collection_id) + cumulus_delete_result = cumulus.delete_entry(cumulus_lambda_prefix, granule_id) # TODO not sure it is correct granule ID + LOGGER.debug(f'cumulus_delete_result: {cumulus_delete_result}') + es_delete_result = GranulesDbIndex().delete_entry(collection_identifier.tenant, + collection_identifier.venue, + granule_id + ) + LOGGER.debug(f'es_delete_result: {es_delete_result}') + es_delete_result = [Item.from_dict(k['_source']) for k in es_delete_result['hits']['hits']] + s3 = AwsS3() + for each_granule in es_delete_result: + s3_urls = [v.href for k, v in each_granule.assets.items()] + LOGGER.debug(f'deleting S3 for {each_granule.id} - s3_urls: {s3_urls}') + delete_result = s3.delete_multiple(s3_urls=s3_urls) + LOGGER.debug(f'delete_result for {each_granule.id} - delete_result: {delete_result}') + except Exception as e: + LOGGER.exception('failed during get_granules_dapa') + raise HTTPException(status_code=500, detail=str(e)) + return {} + @router.put("/{collection_id}/archive/{granule_id}") @router.put("/{collection_id}/archive/{granule_id}/") diff --git a/requirements.txt b/requirements.txt index 42e7ec78..72949417 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,7 +15,7 @@ jsonschema==4.23.0 jsonschema-specifications==2023.12.1 lark==0.12.0 mangum==0.18.0 -mdps-ds-lib==1.1.1 +mdps-ds-lib==1.1.1.dev000200 pydantic==2.9.2 pydantic_core==2.23.4 pygeofilter==0.2.4 From c1f340c458f935a69332bcc28d916b3f09f7200f Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Mon, 16 Dec 2024 10:17:56 -0600 Subject: [PATCH 4/5] fix: deleting individually from real index. not from alias --- .../lib/uds_db/granules_db_index.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/cumulus_lambda_functions/lib/uds_db/granules_db_index.py b/cumulus_lambda_functions/lib/uds_db/granules_db_index.py index f23b61d3..24139081 100644 --- a/cumulus_lambda_functions/lib/uds_db/granules_db_index.py +++ b/cumulus_lambda_functions/lib/uds_db/granules_db_index.py @@ -209,11 +209,13 @@ def delete_entry(self, tenant: str, tenant_venue: str, doc_id: str, ): }, read_alias_name) if result is None: raise ValueError(f"no such granule: {doc_id}") - delete_result = self.__es.delete_by_query({ - 'query': {'term': {'_id': doc_id}} - }, read_alias_name) - if delete_result is None: - raise ValueError(f"no such granule: {doc_id}") + for each_granule in result['hits']['hits']: + delete_result = self.__es.delete_by_query({ + 'query': {'term': {'_id': each_granule['_id']}} + }, each_granule['_index']) + LOGGER.debug(f'delete_result: {delete_result}') + if delete_result is None: + raise ValueError(f"error deleting {each_granule}") return result def update_entry(self, tenant: str, tenant_venue: str, json_body: dict, doc_id: str, ): From 0e16e7f4edb978b882bb61c6c473ca8186138bbb Mon Sep 17 00:00:00 2001 From: Wai Phyo Date: Mon, 16 Dec 2024 10:25:29 -0600 Subject: [PATCH 5/5] feat: add test case --- .../test_granules_deletion.py | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 tests/integration_tests/test_granules_deletion.py diff --git a/tests/integration_tests/test_granules_deletion.py b/tests/integration_tests/test_granules_deletion.py new file mode 100644 index 00000000..e9229072 --- /dev/null +++ b/tests/integration_tests/test_granules_deletion.py @@ -0,0 +1,68 @@ +import base64 +import json +import os +from unittest import TestCase + +import requests +from dotenv import load_dotenv +from mdps_ds_lib.lib.cognito_login.cognito_login import CognitoLogin + + +class TestGranulesDeletion(TestCase): + def setUp(self) -> None: + super().setUp() + load_dotenv() + self._url_prefix = f'{os.environ.get("UNITY_URL")}/{os.environ.get("UNITY_STAGE", "sbx-uds-dapa")}' + self.cognito_login = CognitoLogin() \ + .with_client_id(os.environ.get('CLIENT_ID', '')) \ + .with_cognito_url(os.environ.get('COGNITO_URL', '')) \ + .with_verify_ssl(False) \ + .start(base64.standard_b64decode(os.environ.get('USERNAME')).decode(), + base64.standard_b64decode(os.environ.get('PASSWORD')).decode()) + self.bearer_token = self.cognito_login.token + self.stage = os.environ.get("UNITY_URL").split('/')[-1] + self.uds_url = f'{os.environ.get("UNITY_URL")}/{os.environ.get("UNITY_STAGE", "sbx-uds-dapa")}/' + self.custom_metadata_body = { + 'tag': {'type': 'keyword'}, + 'c_data1': {'type': 'long'}, + 'c_data2': {'type': 'boolean'}, + 'c_data3': {'type': 'keyword'}, + } + + self.tenant = 'UDS_LOCAL_TEST_3' # 'uds_local_test' # 'uds_sandbox' + self.tenant_venue = 'DEV' # 'DEV1' # 'dev' + self.collection_name = 'AAA' # 'uds_collection' # 'sbx_collection' + self.collection_version = '24.03.20.14.40'.replace('.', '') # '2402011200' + return + + def test_query_all(self): + collection_id = f'URN:NASA:UNITY:{self.tenant}:{self.tenant_venue}:{self.collection_name}-01___001' + granule_id = f'{collection_id}:test_file01' + post_url = f'{self.uds_url}collections/{collection_id}/items/' # MCP Dev + headers = { + 'Authorization': f'Bearer {self.bearer_token}', + } + print(post_url) + query_result = requests.get(url=post_url, + headers=headers, + ) + self.assertEqual(query_result.status_code, 200, f'wrong status code. {query_result.text}') + response_json = json.loads(query_result.text) + print(json.dumps(response_json, indent=4)) + return + + def test_delete_01(self): + collection_id = f'URN:NASA:UNITY:{self.tenant}:{self.tenant_venue}:{self.collection_name}-01___001' + granule_id = f'{collection_id}:test_file01' + post_url = f'{self.uds_url}collections/{collection_id}/items/{granule_id}' # MCP Dev + headers = { + 'Authorization': f'Bearer {self.bearer_token}', + } + print(post_url) + query_result = requests.delete(url=post_url, + headers=headers, + ) + self.assertEqual(query_result.status_code, 200, f'wrong status code. {query_result.text}') + response_json = json.loads(query_result.text) + print(json.dumps(response_json, indent=4)) + return