Skip to content

Commit

Permalink
Proxito: normalize code languages and redirect to them (#10750)
Browse files Browse the repository at this point in the history
Languages code are being normalized from the root, the old version of the lang codes are used when building with Sphinx.

Projects affected by this change:


```
In [1]: old_language_codes = [
   ...:     'nb_NO',
   ...:     'pt_BR',
   ...:     'es_MX',
   ...:     'uk_UA',
   ...:     'zh_CN',
   ...:     'zh_TW',
   ...: ]

In [2]: Project.objects.filter(language__in=old_language_codes).count()
Out[2]: 3544
```

We may probably want to publish a small blog post communicating this change.

We will be using the new code everywhere, even in the API responses.
Old paths using the old code will redirect to the new language code.

### How to deploy this change

- Deploy as usual
- After the webs are out, run the migrations
- This change has zero downtime for doc serving, only downtime will be for downloads (till the migration is run).

Closes #2763

---------

Co-authored-by: Manuel Kaufmann <humitos@gmail.com>
  • Loading branch information
stsewd and humitos authored Oct 17, 2023
1 parent 1e6e633 commit 0154576
Show file tree
Hide file tree
Showing 10 changed files with 607 additions and 22 deletions.
12 changes: 10 additions & 2 deletions readthedocs/core/unresolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,13 +286,21 @@ def _match_multiversion_project(
return None

language = match.group("language")
# Normalize old language codes to lowercase with dashes.
normalized_language = language.lower().replace("_", "-")

# TODO: remove after deploy.
# This is so we can temporarily support old language codes
# while we migrate existing projects.
languages = [language, normalized_language]

version_slug = match.group("version")
filename = self._normalize_filename(match.group("filename"))

if parent_project.language == language:
if parent_project.language in languages:
project = parent_project
else:
project = parent_project.translations.filter(language=language).first()
project = parent_project.translations.filter(language__in=languages).first()
if not project:
raise TranslationNotFoundError(
project=parent_project,
Expand Down
12 changes: 9 additions & 3 deletions readthedocs/doc_builder/backends/sphinx.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from readthedocs.builds.models import APIVersion
from readthedocs.core.utils.filesystem import safe_open
from readthedocs.doc_builder.exceptions import PDFNotFound
from readthedocs.projects.constants import PUBLIC
from readthedocs.projects.constants import OLD_LANGUAGES_CODE_MAPPING, PUBLIC
from readthedocs.projects.exceptions import ProjectConfigurationError, UserFileNotFound
from readthedocs.projects.models import Feature
from readthedocs.projects.templatetags.projects_tags import sort_version_aware
Expand Down Expand Up @@ -111,6 +111,10 @@ def __init__(self, *args, **kwargs):
# because Read the Docs will automatically create one for it.
pass

def get_language(self, project):
"""Get a Sphinx compatible language code."""
language = project.language
return OLD_LANGUAGES_CODE_MAPPING.get(language, language)

def get_config_params(self):
"""Get configuration parameters to be rendered into the conf file."""
Expand Down Expand Up @@ -293,14 +297,15 @@ def build(self):
]
if self.config.sphinx.fail_on_warning:
build_command.extend(["-W", "--keep-going"])
language = self.get_language(project)
build_command.extend(
[
"-b",
self.sphinx_builder,
"-d",
self.sphinx_doctrees_dir,
"-D",
f"language={project.language}",
f"language={language}",
# Sphinx's source directory (SOURCEDIR).
# We are executing this command at the location of the `conf.py` file (CWD).
# TODO: ideally we should execute it from where the repository was clonned,
Expand Down Expand Up @@ -472,6 +477,7 @@ class PdfBuilder(BaseSphinx):
pdf_file_name = None

def build(self):
language = self.get_language(self.project)
self.run(
*self.get_sphinx_cmd(),
"-T",
Expand All @@ -481,7 +487,7 @@ def build(self):
"-d",
self.sphinx_doctrees_dir,
"-D",
f"language={self.project.language}",
f"language={language}",
# Sphinx's source directory (SOURCEDIR).
# We are executing this command at the location of the `conf.py` file (CWD).
# TODO: ideally we should execute it from where the repository was clonned,
Expand Down
36 changes: 29 additions & 7 deletions readthedocs/projects/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,15 +279,37 @@
('zh', 'Chinese'),
('zu', 'Zulu'),
# Try these to test our non-2 letter language support
('nb_NO', 'Norwegian Bokmal'),
('pt_BR', 'Brazilian Portuguese'),
('es_MX', 'Mexican Spanish'),
('uk_UA', 'Ukrainian'),
('zh_CN', 'Simplified Chinese'),
('zh_TW', 'Traditional Chinese'),
("nb-no", "Norwegian Bokmal"),
("pt-br", "Brazilian Portuguese"),
("es-mx", "Mexican Spanish"),
("uk-ua", "Ukrainian"),
("zh-cn", "Simplified Chinese"),
("zh-tw", "Traditional Chinese"),
)
LANGUAGE_CODES = [code for code, *_ in LANGUAGES]

LANGUAGES_REGEX = '|'.join([re.escape(code[0]) for code in LANGUAGES])
# Normalize the language codes to lowercase with dashes,
# we use them to match the language codes in the URL.
# The old language codes were uppercase with underscores,
# and are deprecated, but we still need to support them.
old_language_codes = [
"nb_NO",
"pt_BR",
"es_MX",
"uk_UA",
"zh_CN",
"zh_TW",
]
OLD_LANGUAGES_CODE_MAPPING = {
code.lower().replace("_", "-"): code for code in old_language_codes
}

LANGUAGES_REGEX = "|".join(
[
re.escape(code)
for code in LANGUAGE_CODES + list(OLD_LANGUAGES_CODE_MAPPING.values())
]
)

PROGRAMMING_LANGUAGES = (
('words', 'Only Words'),
Expand Down
Loading

0 comments on commit 0154576

Please sign in to comment.