Skip to content

Commit

Permalink
Merge pull request #728 from i-dot-ai/feature/put-endpoint-for-reinge…
Browse files Browse the repository at this point in the history
…stion

New core-api PUT endpoint for file reingestion
  • Loading branch information
rachaelcodes authored Jul 5, 2024
2 parents 6d87a1b + 4920f3e commit 74b9ebd
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 18 deletions.
40 changes: 40 additions & 0 deletions core_api/src/routes/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,46 @@ def delete_file(file_uuid: UUID, user_uuid: Annotated[UUID, Depends(get_user_uui
return file


@file_app.put(
"/{file_uuid}",
response_model=File,
tags=["file"],
responses={404: {"model": APIError404, "description": "The file was not found"}},
)
async def reingest_file(file_uuid: UUID, user_uuid: Annotated[UUID, Depends(get_user_uuid)]) -> File:
"""Deletes exisiting file chunks and regenerates embeddings
Args:
file_uuid (UUID): The UUID of the file to delete
user_uuid (UUID): The UUID of the user
Returns:
File: The file that was deleted
Raises:
404: If the file isn't found, or the creator and requester don't match
"""
try:
file = storage_handler.read_item(file_uuid, model_type="File")
except NotFoundError:
return file_not_found_response(file_uuid=file_uuid)

if file.creator_user_uuid != user_uuid:
return file_not_found_response(file_uuid=file_uuid)

log.info("reingesting %s", file.uuid)

# Remove old chunks
chunks = storage_handler.get_file_chunks(file.uuid, user_uuid)
storage_handler.delete_items(chunks)

# Add new chunks
log.info("publishing %s", file.uuid)
await file_publisher.publish(file)

return file


@file_app.get(
"/{file_uuid}/chunks",
tags=["file"],
Expand Down
18 changes: 18 additions & 0 deletions core_api/tests/routes/test_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,24 @@ def test_delete_missing_file(app_client, headers):
assert response.status_code == HTTPStatus.NOT_FOUND


def test_reingest_file(app_client, chunked_file, elasticsearch_storage_handler, headers):
"""
Given a previously chunked file
When I PUT it to /file/uuid/
I Expect the old chunks to be removed
"""
previous_chunks = elasticsearch_storage_handler.get_file_chunks(chunked_file.uuid, chunked_file.creator_user_uuid)

response = app_client.put(f"/file/{chunked_file.uuid}", headers=headers)
assert response.status_code == HTTPStatus.OK

elasticsearch_storage_handler.refresh()
assert (
elasticsearch_storage_handler.get_file_chunks(chunked_file.uuid, chunked_file.creator_user_uuid)
!= previous_chunks
)


def test_get_file_chunks(app_client, chunked_file, headers):
"""
Given a previously chunked file
Expand Down
25 changes: 7 additions & 18 deletions django_app/redbox_app/redbox_core/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,28 +41,17 @@ class FileAdmin(admin.ModelAdmin):
def reupload(self, request, queryset): # noqa:ARG002
for file in queryset:
try:
logger.info("Deleting existing file from core-api: %s", file)
core_api.delete_file(file.core_file_uuid, file.user)
logger.info("Re-uploading file to core-api: %s", file)
core_api.reingest_file(file.core_file_uuid, file.user)
except RequestException as e:
logger.exception("Error deleting File model object %s.", file, exc_info=e)

logger.exception("Error re-uploading File model object %s.", file, exc_info=e)
file.status = models.StatusEnum.errored
file.save()
else:
file.status = models.StatusEnum.deleted
file.status = models.StatusEnum.uploaded
file.save()

try:
logger.info("Re-uploading file to core-api: %s", file)
upload_file_response = core_api.upload_file(file.unique_name, file.user)
except RequestException as e:
logger.exception("Error re-uploading File model object %s.", file, exc_info=e)
file.status = models.StatusEnum.errored
file.save()
else:
file.core_file_uuid = upload_file_response.uuid
file.status = models.StatusEnum.uploaded
file.save()

logger.info("Successfully reuploaded file %s.", file)
logger.info("Successfully reuploaded file %s.", file)

list_display = ["original_file_name", "user", "status", "created_at", "last_referenced"]
list_filter = ["user", "status"]
Expand Down
6 changes: 6 additions & 0 deletions django_app/redbox_app/redbox_core/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,3 +114,9 @@ def delete_file(self, file_id: UUID, user: User) -> FileOperation:
response = requests.delete(url, headers={"Authorization": user.get_bearer_token()}, timeout=60)
response.raise_for_status()
return FileOperation.schema().loads(response.content)

def reingest_file(self, file_id: UUID, user: User) -> FileOperation:
url = self.url / "file" / str(file_id)
response = requests.put(url, headers={"Authorization": user.get_bearer_token()}, timeout=60)
response.raise_for_status()
return FileOperation.schema().loads(response.content)

0 comments on commit 74b9ebd

Please sign in to comment.