Proxito: normalize code languages and redirect to them (#10750)

Languages code are being normalized from the root, the old version of the lang codes are used when building with Sphinx. Projects affected by this change: ``` In [1]: old_language_codes = [ ...: 'nb_NO', ...: 'pt_BR', ...: 'es_MX', ...: 'uk_UA', ...: 'zh_CN', ...: 'zh_TW', ...: ] In [2]: Project.objects.filter(language__in=old_language_codes).count() Out[2]: 3544 ``` We may probably want to publish a small blog post communicating this change. We will be using the new code everywhere, even in the API responses. Old paths using the old code will redirect to the new language code. ### How to deploy this change - Deploy as usual - After the webs are out, run the migrations - This change has zero downtime for doc serving, only downtime will be for downloads (till the migration is run). Closes #2763 --------- Co-authored-by: Manuel Kaufmann <humitos@gmail.com>
readthedocs · Oct 17, 2023 · 0154576 · 0154576
1 parent 1e6e633
commit 0154576
Show file tree

Hide file tree

Showing 10 changed files with 607 additions and 22 deletions.
diff --git a/readthedocs/core/unresolver.py b/readthedocs/core/unresolver.py
@@ -286,13 +286,21 @@ def _match_multiversion_project(
             return None
 
         language = match.group("language")
+        # Normalize old language codes to lowercase with dashes.
+        normalized_language = language.lower().replace("_", "-")
+
+        # TODO: remove after deploy.
+        # This is so we can temporarily support old language codes
+        # while we migrate existing projects.
+        languages = [language, normalized_language]
+
         version_slug = match.group("version")
         filename = self._normalize_filename(match.group("filename"))
 
-        if parent_project.language == language:
+        if parent_project.language in languages:
             project = parent_project
         else:
-            project = parent_project.translations.filter(language=language).first()
+            project = parent_project.translations.filter(language__in=languages).first()
             if not project:
                 raise TranslationNotFoundError(
                     project=parent_project,

diff --git a/readthedocs/doc_builder/backends/sphinx.py b/readthedocs/doc_builder/backends/sphinx.py
@@ -19,7 +19,7 @@
 from readthedocs.builds.models import APIVersion
 from readthedocs.core.utils.filesystem import safe_open
 from readthedocs.doc_builder.exceptions import PDFNotFound
-from readthedocs.projects.constants import PUBLIC
+from readthedocs.projects.constants import OLD_LANGUAGES_CODE_MAPPING, PUBLIC
 from readthedocs.projects.exceptions import ProjectConfigurationError, UserFileNotFound
 from readthedocs.projects.models import Feature
 from readthedocs.projects.templatetags.projects_tags import sort_version_aware
@@ -111,6 +111,10 @@ def __init__(self, *args, **kwargs):
             # because Read the Docs will automatically create one for it.
             pass
 
+    def get_language(self, project):
+        """Get a Sphinx compatible language code."""
+        language = project.language
+        return OLD_LANGUAGES_CODE_MAPPING.get(language, language)
 
     def get_config_params(self):
         """Get configuration parameters to be rendered into the conf file."""
@@ -293,14 +297,15 @@ def build(self):
         ]
         if self.config.sphinx.fail_on_warning:
             build_command.extend(["-W", "--keep-going"])
+        language = self.get_language(project)
         build_command.extend(
             [
                 "-b",
                 self.sphinx_builder,
                 "-d",
                 self.sphinx_doctrees_dir,
                 "-D",
-                f"language={project.language}",
+                f"language={language}",
                 # Sphinx's source directory (SOURCEDIR).
                 # We are executing this command at the location of the `conf.py` file (CWD).
                 # TODO: ideally we should execute it from where the repository was clonned,
@@ -472,6 +477,7 @@ class PdfBuilder(BaseSphinx):
     pdf_file_name = None
 
     def build(self):
+        language = self.get_language(self.project)
         self.run(
             *self.get_sphinx_cmd(),
             "-T",
@@ -481,7 +487,7 @@ def build(self):
             "-d",
             self.sphinx_doctrees_dir,
             "-D",
-            f"language={self.project.language}",
+            f"language={language}",
             # Sphinx's source directory (SOURCEDIR).
             # We are executing this command at the location of the `conf.py` file (CWD).
             # TODO: ideally we should execute it from where the repository was clonned,

diff --git a/readthedocs/projects/constants.py b/readthedocs/projects/constants.py
@@ -279,15 +279,37 @@
     ('zh', 'Chinese'),
     ('zu', 'Zulu'),
     # Try these to test our non-2 letter language support
-    ('nb_NO', 'Norwegian Bokmal'),
-    ('pt_BR', 'Brazilian Portuguese'),
-    ('es_MX', 'Mexican Spanish'),
-    ('uk_UA', 'Ukrainian'),
-    ('zh_CN', 'Simplified Chinese'),
-    ('zh_TW', 'Traditional Chinese'),
+    ("nb-no", "Norwegian Bokmal"),
+    ("pt-br", "Brazilian Portuguese"),
+    ("es-mx", "Mexican Spanish"),
+    ("uk-ua", "Ukrainian"),
+    ("zh-cn", "Simplified Chinese"),
+    ("zh-tw", "Traditional Chinese"),
 )
+LANGUAGE_CODES = [code for code, *_ in LANGUAGES]
 
-LANGUAGES_REGEX = '|'.join([re.escape(code[0]) for code in LANGUAGES])
+# Normalize the language codes to lowercase with dashes,
+# we use them to match the language codes in the URL.
+# The old language codes were uppercase with underscores,
+# and are deprecated, but we still need to support them.
+old_language_codes = [
+    "nb_NO",
+    "pt_BR",
+    "es_MX",
+    "uk_UA",
+    "zh_CN",
+    "zh_TW",
+]
+OLD_LANGUAGES_CODE_MAPPING = {
+    code.lower().replace("_", "-"): code for code in old_language_codes
+}
+
+LANGUAGES_REGEX = "|".join(
+    [
+        re.escape(code)
+        for code in LANGUAGE_CODES + list(OLD_LANGUAGES_CODE_MAPPING.values())
+    ]
+)
 
 PROGRAMMING_LANGUAGES = (
     ('words', 'Only Words'),