Skip to content

Commit

Permalink
feat: clean unused glossary languages (#12331)
Browse files Browse the repository at this point in the history
* cleanup stale glossaries translation delete and glossary languages sync

* changelog update

* test fix

* Update weblate/glossary/tasks.py

Co-authored-by: Michal Čihař <michal@cihar.com>

* update glossary.tasks.cleanup_stale_glossaries

* sentence correction

* fix missing argument TypeError

* only cleanup glossaries on translation delete

* update tests

* cleanup task trigger update

* adjust test to changed condition

* Update weblate/glossary/tasks.py

Co-authored-by: Michal Čihař <michal@cihar.com>

---------

Co-authored-by: Michal Čihař <michal@cihar.com>
Co-authored-by: Benjamin Alan Jamie <benjamin@weblate.org>
  • Loading branch information
3 people authored Oct 10, 2024
1 parent 7047c2d commit 1a38bed
Show file tree
Hide file tree
Showing 5 changed files with 112 additions and 5 deletions.
1 change: 1 addition & 0 deletions docs/changes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Not yet released.
* :ref:`autofix` for Devanagari danda now better handles latin script.
* :ref:`autofix` for French and Breton now uses a non-breaking space before colons instead of a narrow one.
* :ref:`api` now has a preview OpenAPI specification.
* Stale, empty glossaries are now automatically removed.
* :kbd:`?` now displays available :ref:`keyboard`.
* Translation and language view in the project now include basic information about the language and plurals.
* :ref:`bulk-edit` shows a preview of matched strings.
Expand Down
9 changes: 7 additions & 2 deletions weblate/api/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -3327,7 +3327,11 @@ def test_add_plural(self) -> None:
)

def test_delete(self) -> None:
start_count = Translation.objects.count()
def _translation_count():
# exclude glossaries because stale glossaries are also cleaned out
return Translation.objects.filter(component__is_glossary=False).count()

start_count = _translation_count()
self.do_request(
"api:translation-detail", self.translation_kwargs, method="delete", code=403
)
Expand All @@ -3338,7 +3342,8 @@ def test_delete(self) -> None:
superuser=True,
code=204,
)
self.assertEqual(Translation.objects.count(), start_count - 1)

self.assertEqual(_translation_count(), start_count - 1)


class UnitAPITest(APIBaseTest):
Expand Down
67 changes: 66 additions & 1 deletion weblate/glossary/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,15 @@

from __future__ import annotations

from django.db import transaction
from django.db.models import F

from weblate.auth.models import get_anonymous
from weblate.lang.models import Language
from weblate.trans.models import Component
from weblate.trans.models import Component, Project, Translation
from weblate.utils.celery import app
from weblate.utils.lock import WeblateLockTimeoutError
from weblate.utils.stats import prefetch_stats


@app.task(
Expand Down Expand Up @@ -40,6 +45,66 @@ def sync_glossary_languages(pk: int, component: Component | None = None) -> None
component.create_translations_task()


@app.task(trail=False, autoretry_for=(Project.DoesNotExist, WeblateLockTimeoutError))
def cleanup_stale_glossaries(project: int | Project) -> None:
"""
Delete stale glossaries.
A glossary translation is considered stale when it meets the following conditions:
- glossary.language is not used in any other non-glossary components
- glossary.language is different from glossary.component.source_language
- It has no translation
Stale glossary is not removed if:
- the component only has one glossary component
- if is managed outside weblate (i.e repo != 'local:')
"""
if isinstance(project, int):
project = Project.objects.get(pk=project)

languages_in_non_glossary_components: set[int] = set(
Translation.objects.filter(
component__project=project, component__is_glossary=False
).values_list("language_id", flat=True)
)

glossary_translations = prefetch_stats(
Translation.objects.filter(
component__project=project, component__is_glossary=True
)
.prefetch()
.exclude(language__id__in=languages_in_non_glossary_components)
.exclude(language=F("component__source_language"))
)

component_to_check = []

def can_delete(_glossary: Translation) -> bool:
"""
Check if a glossary can be deleted.
It is possible to delete a glossary if:
- it has no translations
- it is not the only glossary in the project
- it is managed by Weblate (i.e. repo == 'local:')
"""
return all(
[
_glossary.stats.translated == 0,
_glossary.component.repo == "local:",
]
)

for glossary in glossary_translations:
if can_delete(glossary):
glossary.remove(get_anonymous())
if glossary.component not in component_to_check:
component_to_check.append(glossary.component)

for component in component_to_check:
transaction.on_commit(component.schedule_update_checks)


@app.task(
trail=False,
autoretry_for=(Component.DoesNotExist, WeblateLockTimeoutError),
Expand Down
34 changes: 33 additions & 1 deletion weblate/glossary/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@
from django.urls import reverse

from weblate.glossary.models import get_glossary_terms, get_glossary_tsv
from weblate.glossary.tasks import sync_terminology
from weblate.glossary.tasks import (
cleanup_stale_glossaries,
sync_terminology,
)
from weblate.lang.models import Language
from weblate.trans.models import Unit
from weblate.trans.tests.test_views import ViewTestCase
from weblate.trans.tests.utils import get_test_file
Expand Down Expand Up @@ -472,3 +476,31 @@ def test_tsv(self) -> None:
lines = list(reader)
self.assertEqual(len(lines), 163)
self.assertTrue(all(len(line) == 2 for line in lines))

def test_stale_glossaries_cleanup(self) -> None:
# setup: make glossary managed outside weblate
self.glossary_component.repo = "git://example.com/test/project.git"
self.glossary_component.save()

initial_count = self.glossary_component.translation_set.count()

# check glossary not deleted because it has a valid translation
cleanup_stale_glossaries(self.project.id)
self.assertEqual(self.glossary_component.translation_set.count(), initial_count)

# delete translation: should trigger cleanup_stale_glossary task
german = Language.objects.get(code="de")
self.component.translation_set.get(language=german).remove(self.user)

cleanup_stale_glossaries(self.project.id)
self.assertEqual(self.glossary_component.translation_set.count(), initial_count)

# make glossary managed by weblate
self.glossary_component.repo = "local:"
self.glossary_component.save()

# check that one glossary has been deleted
cleanup_stale_glossaries(self.project.id)
self.assertEqual(
self.glossary_component.translation_set.count(), initial_count - 1
)
6 changes: 5 additions & 1 deletion weblate/trans/models/translation.py
Original file line number Diff line number Diff line change
Expand Up @@ -1360,7 +1360,9 @@ def get_export_url(self):
return self.component.get_export_url()

def remove(self, user: User) -> None:
"""Remove translation from the VCS."""
"""Remove translation from the Database and VCS."""
from weblate.glossary.tasks import cleanup_stale_glossaries

author = user.get_author_name()
# Log
self.log_info("removing %s as %s", self.filenames, author)
Expand Down Expand Up @@ -1389,6 +1391,8 @@ def remove(self, user: User) -> None:
user=user,
author=user,
)
if not self.component.is_glossary:
cleanup_stale_glossaries.delay(self.component.project.id)

def handle_store_change(
self,
Expand Down

0 comments on commit 1a38bed

Please sign in to comment.