From f26013965a4e7566d83e2a5e36e21eeeefc9b214 Mon Sep 17 00:00:00 2001 From: Rachael Robinson <23265724+rachaelcodes@users.noreply.github.com> Date: Thu, 4 Jul 2024 15:46:26 +0100 Subject: [PATCH 1/2] add new put route --- core_api/src/routes/file.py | 40 ++++++++++++++++++++++++++++++ core_api/tests/routes/test_file.py | 18 ++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/core_api/src/routes/file.py b/core_api/src/routes/file.py index 922546f64..2094916c0 100644 --- a/core_api/src/routes/file.py +++ b/core_api/src/routes/file.py @@ -200,6 +200,46 @@ def delete_file(file_uuid: UUID, user_uuid: Annotated[UUID, Depends(get_user_uui return file +@file_app.put( + "/{file_uuid}", + response_model=File, + tags=["file"], + responses={404: {"model": APIError404, "description": "The file was not found"}}, +) +async def reingest_file(file_uuid: UUID, user_uuid: Annotated[UUID, Depends(get_user_uuid)]) -> File: + """Deletes exisiting file chunks and regenerates embeddings + + Args: + file_uuid (UUID): The UUID of the file to delete + user_uuid (UUID): The UUID of the user + + Returns: + File: The file that was deleted + + Raises: + 404: If the file isn't found, or the creator and requester don't match + """ + try: + file = storage_handler.read_item(file_uuid, model_type="File") + except NotFoundError: + return file_not_found_response(file_uuid=file_uuid) + + if file.creator_user_uuid != user_uuid: + return file_not_found_response(file_uuid=file_uuid) + + log.info("reingesting %s", file.uuid) + + # Remove old chunks + chunks = storage_handler.get_file_chunks(file.uuid, user_uuid) + storage_handler.delete_items(chunks) + + # Add new chunks + log.info("publishing %s", file.uuid) + await file_publisher.publish(file) + + return file + + @file_app.get( "/{file_uuid}/chunks", tags=["file"], diff --git a/core_api/tests/routes/test_file.py b/core_api/tests/routes/test_file.py index 06d60ee8a..7a7607247 100644 --- a/core_api/tests/routes/test_file.py +++ b/core_api/tests/routes/test_file.py @@ -108,6 +108,24 @@ def test_delete_missing_file(app_client, headers): assert response.status_code == HTTPStatus.NOT_FOUND +def test_reingest_file(app_client, chunked_file, elasticsearch_storage_handler, headers): + """ + Given a previously chunked file + When I PUT it to /file/uuid/ + I Expect the old chunks to be removed + """ + previous_chunks = elasticsearch_storage_handler.get_file_chunks(chunked_file.uuid, chunked_file.creator_user_uuid) + + response = app_client.put(f"/file/{chunked_file.uuid}", headers=headers) + assert response.status_code == HTTPStatus.OK + + elasticsearch_storage_handler.refresh() + assert ( + elasticsearch_storage_handler.get_file_chunks(chunked_file.uuid, chunked_file.creator_user_uuid) + != previous_chunks + ) + + def test_get_file_chunks(app_client, chunked_file, headers): """ Given a previously chunked file From 4920f3e4891cb84be0440032941f88a3d731e649 Mon Sep 17 00:00:00 2001 From: Rachael Robinson <23265724+rachaelcodes@users.noreply.github.com> Date: Fri, 5 Jul 2024 08:05:03 +0100 Subject: [PATCH 2/2] update admin task --- django_app/redbox_app/redbox_core/admin.py | 25 ++++++--------------- django_app/redbox_app/redbox_core/client.py | 6 +++++ 2 files changed, 13 insertions(+), 18 deletions(-) diff --git a/django_app/redbox_app/redbox_core/admin.py b/django_app/redbox_app/redbox_core/admin.py index cfea204a9..507e610ac 100644 --- a/django_app/redbox_app/redbox_core/admin.py +++ b/django_app/redbox_app/redbox_core/admin.py @@ -41,28 +41,17 @@ class FileAdmin(admin.ModelAdmin): def reupload(self, request, queryset): # noqa:ARG002 for file in queryset: try: - logger.info("Deleting existing file from core-api: %s", file) - core_api.delete_file(file.core_file_uuid, file.user) + logger.info("Re-uploading file to core-api: %s", file) + core_api.reingest_file(file.core_file_uuid, file.user) except RequestException as e: - logger.exception("Error deleting File model object %s.", file, exc_info=e) - + logger.exception("Error re-uploading File model object %s.", file, exc_info=e) + file.status = models.StatusEnum.errored + file.save() else: - file.status = models.StatusEnum.deleted + file.status = models.StatusEnum.uploaded file.save() - try: - logger.info("Re-uploading file to core-api: %s", file) - upload_file_response = core_api.upload_file(file.unique_name, file.user) - except RequestException as e: - logger.exception("Error re-uploading File model object %s.", file, exc_info=e) - file.status = models.StatusEnum.errored - file.save() - else: - file.core_file_uuid = upload_file_response.uuid - file.status = models.StatusEnum.uploaded - file.save() - - logger.info("Successfully reuploaded file %s.", file) + logger.info("Successfully reuploaded file %s.", file) list_display = ["original_file_name", "user", "status", "created_at", "last_referenced"] list_filter = ["user", "status"] diff --git a/django_app/redbox_app/redbox_core/client.py b/django_app/redbox_app/redbox_core/client.py index 7a44ddef6..5a51f6449 100644 --- a/django_app/redbox_app/redbox_core/client.py +++ b/django_app/redbox_app/redbox_core/client.py @@ -114,3 +114,9 @@ def delete_file(self, file_id: UUID, user: User) -> FileOperation: response = requests.delete(url, headers={"Authorization": user.get_bearer_token()}, timeout=60) response.raise_for_status() return FileOperation.schema().loads(response.content) + + def reingest_file(self, file_id: UUID, user: User) -> FileOperation: + url = self.url / "file" / str(file_id) + response = requests.put(url, headers={"Authorization": user.get_bearer_token()}, timeout=60) + response.raise_for_status() + return FileOperation.schema().loads(response.content)