-
Notifications
You must be signed in to change notification settings - Fork 31
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Feature/rationalise file name #1127
Changes from 19 commits
4a5ee1b
8e872fc
b71df76
f62a809
376255d
5eb6412
bad4787
56186e9
583d6f4
f0fbe15
fe9e002
d9c2187
efae1ac
4c6e102
44b3fbf
f531721
370048a
fa45bc3
e211221
988cb5e
ce58e3a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# Generated by Django 5.1.2 on 2024-10-28 08:51 | ||
|
||
import redbox_app.redbox_core.models | ||
import storages.backends.s3 | ||
from django.db import migrations, models | ||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
('redbox_core', '0057_citation_text_in_answer'), | ||
] | ||
|
||
operations = [ | ||
migrations.AlterField( | ||
model_name='file', | ||
name='original_file', | ||
field=models.FileField(storage=storages.backends.s3.S3Storage, upload_to=redbox_app.redbox_core.models.build_s3_key), | ||
), | ||
] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -536,9 +536,21 @@ def __init__(self, file): | |
super().__init__(f"{file.pk} is inactive, status is {file.status}") | ||
|
||
|
||
def build_s3_key(instance, filename: str) -> str: | ||
"""the s3-key is the primary key for a file, | ||
this needs to be unique so that if a user uploads a file with the same name as | ||
1. an existing file that they own, then it is overwritten | ||
2. an existing file that another user owns then a new file is created | ||
""" | ||
return f"{instance.user.email}/{filename}" | ||
|
||
|
||
class File(UUIDPrimaryKeyBase, TimeStampedModel): | ||
status = models.CharField(choices=StatusEnum.choices, null=False, blank=False) | ||
original_file = models.FileField(storage=settings.STORAGES["default"]["BACKEND"]) | ||
original_file = models.FileField( | ||
storage=settings.STORAGES["default"]["BACKEND"], | ||
upload_to=build_s3_key, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this |
||
) | ||
user = models.ForeignKey(User, on_delete=models.CASCADE) | ||
original_file_name = models.TextField(max_length=2048, blank=True, null=True) | ||
last_referenced = models.DateTimeField(blank=True, null=True) | ||
|
@@ -547,7 +559,7 @@ class File(UUIDPrimaryKeyBase, TimeStampedModel): | |
) | ||
|
||
def __str__(self) -> str: # pragma: no cover | ||
return f"{self.original_file_name} {self.user}" | ||
return self.file_name | ||
|
||
def save(self, *args, **kwargs): | ||
if not self.last_referenced: | ||
|
@@ -573,22 +585,12 @@ def delete_from_elastic(self): | |
if es_client.indices.exists(index=index): | ||
es_client.delete_by_query( | ||
index=index, | ||
body={"query": {"term": {"metadata.file_name.keyword": self.unique_name}}}, | ||
body={"query": {"term": {"metadata.file_name.keyword": self.s3_key}}}, | ||
) | ||
|
||
def update_status_from_core(self, status_label): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. unused |
||
match status_label: | ||
case "complete": | ||
self.status = StatusEnum.complete | ||
case "failed": | ||
self.status = StatusEnum.errored | ||
case _: | ||
self.status = StatusEnum.processing | ||
self.save() | ||
|
||
@property | ||
def file_type(self) -> str: | ||
name = self.original_file.name | ||
name = self.file_name | ||
return name.split(".")[-1] | ||
|
||
@property | ||
|
@@ -608,7 +610,7 @@ def url(self) -> URL | None: | |
ClientMethod="get_object", | ||
Params={ | ||
"Bucket": settings.AWS_STORAGE_BUCKET_NAME, | ||
"Key": self.name, | ||
"Key": self.file_name, | ||
}, | ||
) | ||
return URL(url) | ||
|
@@ -620,18 +622,21 @@ def url(self) -> URL | None: | |
return URL(self.original_file.url) | ||
|
||
@property | ||
def name(self) -> str: | ||
# User-facing name | ||
try: | ||
return self.original_file_name or self.original_file.name | ||
except ValueError as e: | ||
logger.exception("attempt to access non-existent file %s", self.pk, exc_info=e) | ||
def file_name(self) -> str: | ||
if self.original_file_name: # delete me? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for backwards compatability |
||
return self.original_file_name | ||
|
||
# could have a stronger (regex?) way of stripping the users email address? | ||
if "/" not in self.original_file.name: | ||
msg = "expected filename to start with the user's email address" | ||
raise ValueError(msg) | ||
return self.original_file.name.split("/")[1] | ||
|
||
@property | ||
def unique_name(self) -> str: | ||
# Name used when processing files that exist in S3 | ||
def s3_key(self) -> str: | ||
"""primary key for accessing file in s3""" | ||
if self.status in INACTIVE_STATUSES: | ||
logger.exception("Attempt to access unique_name for inactive file %s with status %s", self.pk, self.status) | ||
logger.exception("Attempt to access s3-key for inactive file %s with status %s", self.pk, self.status) | ||
raise InactiveFileError(self) | ||
return self.original_file.name | ||
|
||
|
@@ -780,7 +785,7 @@ def save(self, force_insert=False, force_update=False, using=None, update_fields | |
@property | ||
def uri(self) -> str: | ||
"""returns either the url of an external citation or the file uri of a user-uploaded document""" | ||
return self.url or f"file://{self.file.original_file_name}" | ||
return self.url or f"file://{self.file.file_name}" | ||
|
||
|
||
class ChatMessage(UUIDPrimaryKeyBase, TimeStampedModel): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't we still want to use original_file_name here so we see 'stuff.pdf' in the frontend rather than 'me@cabinetoffice/stuff.pdf'?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
file_name
will strip outme@cabinetoffice/
and just returnstuff.pdf
TBH i was hoping that we could just delete
original_file_name
altogether in 30 days (this PR means that its no longer getting populated)