diff --git a/cdp_backend/file_store/functions.py b/cdp_backend/file_store/functions.py index 1cdc872a..ff740c2d 100644 --- a/cdp_backend/file_store/functions.py +++ b/cdp_backend/file_store/functions.py @@ -67,6 +67,7 @@ def upload_file( filepath: str, save_name: Optional[str] = None, remove_local: bool = False, + overwrite: bool = False, ) -> str: """ Uploads a file to a Google Cloud file store bucket. @@ -88,6 +89,9 @@ def upload_file( The name to save the file as in the file store. remove_local: bool If True, remove the local file upon successful upload. + overwrite: bool + Boolean value indicating whether or not to overwrite the remote resource with + the same name if it already exists. Returns ------- @@ -107,7 +111,7 @@ def upload_file( uri = get_file_uri(bucket, save_name, credentials_file) # Return existing uri and remove local copy if desired - if uri: + if uri and not overwrite: if remove_local: remove_local_file(resolved_filepath) diff --git a/cdp_backend/pipeline/generate_event_index_pipeline.py b/cdp_backend/pipeline/generate_event_index_pipeline.py index dfebc4a4..3f1065a1 100644 --- a/cdp_backend/pipeline/generate_event_index_pipeline.py +++ b/cdp_backend/pipeline/generate_event_index_pipeline.py @@ -383,6 +383,7 @@ def chunk_index( bucket=bucket_name, filepath=str(local_chunk_path), save_name=f"{REMOTE_INDEX_CHUNK_DIR}/{save_filename}", + overwrite=True, ) diff --git a/cdp_backend/tests/file_store/test_functions.py b/cdp_backend/tests/file_store/test_functions.py index eb92a446..5d38ddd1 100644 --- a/cdp_backend/tests/file_store/test_functions.py +++ b/cdp_backend/tests/file_store/test_functions.py @@ -17,7 +17,7 @@ BUCKET = "bucket" FILEPATH = "fake/path/" + FILENAME SAVE_NAME = "fakeSaveName" -EXISTING_FILE_URI = "gs://bucket/existing_file.json" +EXISTING_FILE_URI = "gs://bucket/" + SAVE_NAME GCS_FILE_URI = functions.GCS_URI.format(bucket=BUCKET, filename=FILENAME) ############################################################################### @@ -56,12 +56,68 @@ def test_get_file_uri( @pytest.mark.parametrize( - "bucket, filepath, save_name, remove_local, existing_file_uri, expected", + "bucket, filepath, save_name, remove_local, overwrite, existing_file_uri, expected", [ - (BUCKET, FILEPATH, SAVE_NAME, True, EXISTING_FILE_URI, EXISTING_FILE_URI), - (BUCKET, FILEPATH, SAVE_NAME, False, EXISTING_FILE_URI, EXISTING_FILE_URI), - (BUCKET, FILEPATH, None, False, None, GCS_FILE_URI), - (BUCKET, FILEPATH, None, True, None, GCS_FILE_URI), + ( + BUCKET, + FILEPATH, + SAVE_NAME, + True, + True, + EXISTING_FILE_URI, + EXISTING_FILE_URI, + ), + ( + BUCKET, + FILEPATH, + SAVE_NAME, + True, + True, + None, + EXISTING_FILE_URI, + ), + ( + BUCKET, + FILEPATH, + SAVE_NAME, + True, + False, + EXISTING_FILE_URI, + EXISTING_FILE_URI, + ), + ( + BUCKET, + FILEPATH, + SAVE_NAME, + False, + True, + EXISTING_FILE_URI, + EXISTING_FILE_URI, + ), + ( + BUCKET, + FILEPATH, + SAVE_NAME, + False, + True, + None, + EXISTING_FILE_URI, + ), + ( + BUCKET, + FILEPATH, + SAVE_NAME, + False, + False, + EXISTING_FILE_URI, + EXISTING_FILE_URI, + ), + (BUCKET, FILEPATH, None, False, True, GCS_FILE_URI, GCS_FILE_URI), + (BUCKET, FILEPATH, None, False, True, None, GCS_FILE_URI), + (BUCKET, FILEPATH, None, False, False, None, GCS_FILE_URI), + (BUCKET, FILEPATH, None, True, True, GCS_FILE_URI, GCS_FILE_URI), + (BUCKET, FILEPATH, None, True, True, None, GCS_FILE_URI), + (BUCKET, FILEPATH, None, True, False, None, GCS_FILE_URI), ], ) def test_upload_file( @@ -69,6 +125,7 @@ def test_upload_file( filepath: str, save_name: Optional[str], remove_local: bool, + overwrite: bool, existing_file_uri: str, expected: str, ) -> None: @@ -82,7 +139,12 @@ def test_upload_file( mock_path.return_value.name = FILENAME assert expected == functions.upload_file( - "path/to/creds", bucket, filepath, save_name, remove_local + "path/to/creds", + bucket, + filepath, + save_name, + remove_local, + overwrite, )