Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

11168 clean unused glossary languages #12331

Merged
merged 19 commits into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/changes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Not yet released.
* :ref:`autofix` for Devanagari danda now better handles latin script.
* :ref:`autofix` for French and Breton now uses a non-breaking space before colons instead of a narrow one.
* :ref:`api` now has a preview OpenAPI specification.
* Stale, empty glossaries are now automatically removed.
* :kbd:`?` now displays available :ref:`keyboard`.
* Translation and language view in the project now include basic information about the language and plurals.
* :ref:`bulk-edit` shows a preview of matched strings.
Expand Down
9 changes: 7 additions & 2 deletions weblate/api/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -3327,7 +3327,11 @@ def test_add_plural(self) -> None:
)

def test_delete(self) -> None:
start_count = Translation.objects.count()
def _translation_count():
# exclude glossaries because stale glossaries are also cleaned out
return Translation.objects.filter(component__is_glossary=False).count()

start_count = _translation_count()
self.do_request(
"api:translation-detail", self.translation_kwargs, method="delete", code=403
)
Expand All @@ -3338,7 +3342,8 @@ def test_delete(self) -> None:
superuser=True,
code=204,
)
self.assertEqual(Translation.objects.count(), start_count - 1)

self.assertEqual(_translation_count(), start_count - 1)


class UnitAPITest(APIBaseTest):
Expand Down
67 changes: 66 additions & 1 deletion weblate/glossary/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,15 @@

from __future__ import annotations

from django.db import transaction
from django.db.models import F

from weblate.auth.models import get_anonymous
from weblate.lang.models import Language
from weblate.trans.models import Component
from weblate.trans.models import Component, Project, Translation
from weblate.utils.celery import app
from weblate.utils.lock import WeblateLockTimeoutError
from weblate.utils.stats import prefetch_stats


@app.task(
nijel marked this conversation as resolved.
Show resolved Hide resolved
Expand Down Expand Up @@ -40,6 +45,66 @@ def sync_glossary_languages(pk: int, component: Component | None = None) -> None
component.create_translations_task()


@app.task(trail=False, autoretry_for=(Project.DoesNotExist, WeblateLockTimeoutError))
def cleanup_stale_glossaries(project: int | Project) -> None:
"""
Delete stale glossaries.
A glossary translation is considered stale when it meets the following conditions:
- glossary.language is not used in any other non-glossary components
- glossary.language is different from glossary.component.source_language
- It has no translation
Stale glossary is not removed if:
- the component only has one glossary component
- if is managed outside weblate (i.e repo != 'local:')
"""
if isinstance(project, int):
project = Project.objects.get(pk=project)

languages_in_non_glossary_components: set[int] = set(
Translation.objects.filter(
component__project=project, component__is_glossary=False
).values_list("language_id", flat=True)
)
gersona marked this conversation as resolved.
Show resolved Hide resolved

glossary_translations = prefetch_stats(
Translation.objects.filter(
component__project=project, component__is_glossary=True
)
gersona marked this conversation as resolved.
Show resolved Hide resolved
.prefetch()
.exclude(language__id__in=languages_in_non_glossary_components)
.exclude(language=F("component__source_language"))
)
gersona marked this conversation as resolved.
Show resolved Hide resolved

component_to_check = []

def can_delete(_glossary: Translation) -> bool:
"""
Check if a glossary can be deleted.
It is possible to delete a glossary if:
- it has no translations
- it is not the only glossary in the project
- it is managed by Weblate (i.e. repo == 'local:')
"""
return all(
[
_glossary.stats.translated == 0,
_glossary.component.repo == "local:",
]
)

for glossary in glossary_translations:
if can_delete(glossary):
glossary.remove(get_anonymous())
gersona marked this conversation as resolved.
Show resolved Hide resolved
if glossary.component not in component_to_check:
component_to_check.append(glossary.component)

for component in component_to_check:
transaction.on_commit(component.schedule_update_checks)


@app.task(
trail=False,
autoretry_for=(Component.DoesNotExist, WeblateLockTimeoutError),
Expand Down
34 changes: 33 additions & 1 deletion weblate/glossary/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@
from django.urls import reverse

from weblate.glossary.models import get_glossary_terms, get_glossary_tsv
from weblate.glossary.tasks import sync_terminology
from weblate.glossary.tasks import (
cleanup_stale_glossaries,
sync_terminology,
)
from weblate.lang.models import Language
from weblate.trans.models import Unit
from weblate.trans.tests.test_views import ViewTestCase
from weblate.trans.tests.utils import get_test_file
Expand Down Expand Up @@ -472,3 +476,31 @@ def test_tsv(self) -> None:
lines = list(reader)
self.assertEqual(len(lines), 163)
self.assertTrue(all(len(line) == 2 for line in lines))

def test_stale_glossaries_cleanup(self) -> None:
# setup: make glossary managed outside weblate
self.glossary_component.repo = "git://example.com/test/project.git"
self.glossary_component.save()

initial_count = self.glossary_component.translation_set.count()

# check glossary not deleted because it has a valid translation
cleanup_stale_glossaries(self.project.id)
self.assertEqual(self.glossary_component.translation_set.count(), initial_count)

# delete translation: should trigger cleanup_stale_glossary task
german = Language.objects.get(code="de")
self.component.translation_set.get(language=german).remove(self.user)

cleanup_stale_glossaries(self.project.id)
self.assertEqual(self.glossary_component.translation_set.count(), initial_count)

# make glossary managed by weblate
self.glossary_component.repo = "local:"
self.glossary_component.save()

# check that one glossary has been deleted
cleanup_stale_glossaries(self.project.id)
self.assertEqual(
self.glossary_component.translation_set.count(), initial_count - 1
)
6 changes: 5 additions & 1 deletion weblate/trans/models/translation.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,10 +177,10 @@
def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.stats = TranslationStats(self)
self.addon_commit_files = []

Check failure on line 180 in weblate/trans/models/translation.py

View workflow job for this annotation

GitHub Actions / mypy

Need type annotation for "addon_commit_files" (hint: "addon_commit_files: list[<type>] = ...")
self.reason = ""
self._invalidate_scheduled = False
self.update_changes = []

Check failure on line 183 in weblate/trans/models/translation.py

View workflow job for this annotation

GitHub Actions / mypy

Need type annotation for "update_changes" (hint: "update_changes: list[<type>] = ...")
# Project backup integration
self.original_id = -1

Expand Down Expand Up @@ -321,9 +321,9 @@
except KeyError:
newunit = Unit(translation=self, id_hash=id_hash, state=-1)
# Avoid fetching empty list of checks from the database
newunit.all_checks = []

Check failure on line 324 in weblate/trans/models/translation.py

View workflow job for this annotation

GitHub Actions / mypy

Incompatible types in assignment (expression has type "list[Never]", variable has type "QuerySet[Check, Check]")
# Avoid fetching empty list of variants
newunit._prefetched_objects_cache = { # noqa: SLF001

Check failure on line 326 in weblate/trans/models/translation.py

View workflow job for this annotation

GitHub Actions / mypy

"Unit" has no attribute "_prefetched_objects_cache"
"defined_variants": Variant.objects.none()
}
is_new = True
Expand Down Expand Up @@ -384,7 +384,7 @@
self.component.check_template_valid()

# List of updated units (used for cleanup and duplicates detection)
updated = {}

Check failure on line 387 in weblate/trans/models/translation.py

View workflow job for this annotation

GitHub Actions / mypy

Need type annotation for "updated" (hint: "updated: dict[<type>, <type>] = ...")

try:
store = self.store
Expand Down Expand Up @@ -816,7 +816,7 @@
# Update po file header
now = timezone.now()
if not timezone.is_aware(now):
now = timezone.make_aware(now, timezone.utc)

Check failure on line 819 in weblate/trans/models/translation.py

View workflow job for this annotation

GitHub Actions / mypy

Module has no attribute "utc"

# Prepare headers to update
headers = {
Expand Down Expand Up @@ -1201,7 +1201,7 @@
if has_template:
existing = set(self.unit_set.values_list("context", flat=True))
else:
existing = set(self.unit_set.values_list("context", "source"))

Check failure on line 1204 in weblate/trans/models/translation.py

View workflow job for this annotation

GitHub Actions / mypy

Argument 1 to "set" has incompatible type "QuerySet[Unit, tuple[str, str]]"; expected "Iterable[str]"
for _set_fuzzy, unit in store.iterate_merge(fuzzy, only_translated=False):
idkey = unit.context if has_template else (unit.context, unit.source)
if idkey in existing:
Expand Down Expand Up @@ -1360,7 +1360,9 @@
return self.component.get_export_url()

def remove(self, user: User) -> None:
"""Remove translation from the VCS."""
"""Remove translation from the Database and VCS."""
from weblate.glossary.tasks import cleanup_stale_glossaries

author = user.get_author_name()
# Log
self.log_info("removing %s as %s", self.filenames, author)
Expand Down Expand Up @@ -1389,6 +1391,8 @@
user=user,
author=user,
)
if not self.component.is_glossary:
cleanup_stale_glossaries.delay(self.component.project.id)

def handle_store_change(
self,
Expand All @@ -1414,7 +1418,7 @@

def get_store_change_translations(self):
component = self.component
result = []

Check failure on line 1421 in weblate/trans/models/translation.py

View workflow job for this annotation

GitHub Actions / mypy

Need type annotation for "result" (hint: "result: list[<type>] = ...")
if self.is_source:
result.extend(component.translation_set.exclude(id=self.id))
# Source is always at the end
Expand Down Expand Up @@ -1495,7 +1499,7 @@
elif add_terminology and translation != self:
current_target = ""
else:
current_target = target

Check failure on line 1502 in weblate/trans/models/translation.py

View workflow job for this annotation

GitHub Actions / mypy

Incompatible types in assignment (expression has type "str | list[str] | None", variable has type "str")
if current_target is None:
current_target = ""
if isinstance(current_target, list):
Expand Down
Loading