Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Proxito: move "canonicalizing" logic to docs view #10027

Merged
merged 4 commits into from
Feb 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions readthedocs/proxito/constants.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
REDIRECT_HTTPS = 'https'
REDIRECT_CANONICAL_CNAME = 'canonical-cname'
REDIRECT_SUBPROJECT_MAIN_DOMAIN = 'subproject-main-domain'
from enum import Enum, auto


class RedirectType(Enum):
http_to_https = auto()
to_canonical_domain = auto()
subproject_to_main_domain = auto()
# Application defined redirect.
system = auto()
# User defined redirect.
user = auto()
28 changes: 0 additions & 28 deletions readthedocs/proxito/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@
unresolver,
)
from readthedocs.core.utils import get_cache_tag
from readthedocs.projects.models import Domain, ProjectRelationship
from readthedocs.proxito import constants

log = structlog.get_logger(__name__)

Expand Down Expand Up @@ -185,34 +183,8 @@ def _set_request_attributes(self, request, unresolved_domain):
Set attributes in the request from the unresolved domain.

- Set ``request.unresolved_domain`` to the unresolved domain.
- If the domain needs to redirect, set the canonicalize attribute accordingly.
"""
request.unresolved_domain = unresolved_domain
project = unresolved_domain.project
stsewd marked this conversation as resolved.
Show resolved Hide resolved
if unresolved_domain.is_from_custom_domain:
domain = unresolved_domain.domain
if domain.https and not request.is_secure():
# Redirect HTTP -> HTTPS (302) for this custom domain.
log.debug("Proxito CNAME HTTPS Redirect.", domain=domain.domain)
request.canonicalize = constants.REDIRECT_HTTPS
elif unresolved_domain.is_from_public_domain:
canonical_domain = (
Domain.objects.filter(project=project)
.filter(canonical=True, https=True)
.exists()
)
if canonical_domain:
log.debug(
"Proxito Public Domain -> Canonical Domain Redirect.",
project_slug=project.slug,
)
request.canonicalize = constants.REDIRECT_CANONICAL_CNAME
elif ProjectRelationship.objects.filter(child=project).exists():
log.debug(
"Proxito Public Domain -> Subproject Main Domain Redirect.",
project_slug=project.slug,
)
request.canonicalize = constants.REDIRECT_SUBPROJECT_MAIN_DOMAIN

def process_request(self, request): # noqa
# Initialize our custom request attributes.
Expand Down
71 changes: 37 additions & 34 deletions readthedocs/proxito/tests/test_middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from readthedocs.builds.models import Version
from readthedocs.projects.constants import PUBLIC
from readthedocs.projects.models import Domain, Feature, Project, ProjectRelationship
from readthedocs.proxito.constants import RedirectType
from readthedocs.proxito.middleware import ProxitoMiddleware
from readthedocs.rtd_tests.base import RequestFactoryTestMixin
from readthedocs.rtd_tests.storage import BuildMediaFileSystemStorageTest
Expand Down Expand Up @@ -49,32 +50,32 @@ def test_proper_cname_https_upgrade(self):
get(Domain, project=self.pip, domain=cname, canonical=True, https=True)

for url in (self.url, '/subdir/'):
request = self.request(method='get', path=url, HTTP_HOST=cname)
res = self.run_middleware(request)
self.assertIsNone(res)
self.assertTrue(hasattr(request, 'canonicalize'))
self.assertEqual(request.canonicalize, 'https')
resp = self.client.get(path=url, secure=False, HTTP_HOST=cname)
self.assertEqual(resp.status_code, 302)
self.assertEqual(resp["location"], f"https://{cname}{url}")
self.assertEqual(resp["X-RTD-Redirect"], RedirectType.http_to_https.name)

def test_canonical_cname_redirect(self):
"""Requests to the public domain URL should redirect to the custom domain if the domain is canonical/https."""
cname = 'docs.random.com'
domain = get(Domain, project=self.pip, domain=cname, canonical=False, https=False)

request = self.request(method='get', path=self.url, HTTP_HOST='pip.dev.readthedocs.io')
res = self.run_middleware(request)
self.assertIsNone(res)
self.assertFalse(hasattr(request, 'canonicalize'))
resp = self.client.get(self.url, HTTP_HOST="pip.dev.readthedocs.io")
# This is the / -> /en/latest/ redirect.
self.assertEqual(resp.status_code, 302)
self.assertEqual(resp["X-RTD-Redirect"], RedirectType.system.name)

# Make the domain canonical/https and make sure we redirect
domain.canonical = True
domain.https = True
domain.save()
for url in (self.url, '/subdir/'):
request = self.request(method='get', path=url, HTTP_HOST='pip.dev.readthedocs.io')
res = self.run_middleware(request)
self.assertIsNone(res)
self.assertTrue(hasattr(request, 'canonicalize'))
self.assertEqual(request.canonicalize, 'canonical-cname')
for url in (self.url, "/subdir/"):
resp = self.client.get(url, HTTP_HOST="pip.dev.readthedocs.io")
self.assertEqual(resp.status_code, 302)
self.assertEqual(resp["location"], f"https://{cname}{url}")
self.assertEqual(
resp["X-RTD-Redirect"], RedirectType.to_canonical_domain.name
)

def test_subproject_redirect(self):
"""Requests to a subproject should redirect to the domain of the main project."""
Expand All @@ -92,25 +93,31 @@ def test_subproject_redirect(self):
child=subproject,
)

for url in (self.url, '/subdir/', '/en/latest/'):
request = self.request(method='get', path=url, HTTP_HOST='subproject.dev.readthedocs.io')
res = self.run_middleware(request)
self.assertIsNone(res)
self.assertEqual(getattr(request, 'canonicalize', None), 'subproject-main-domain')
for url in (self.url, "/subdir/", "/en/latest/"):
resp = self.client.get(url, HTTP_HOST="subproject.dev.readthedocs.io")
self.assertEqual(resp.status_code, 302)
self.assertTrue(
resp["location"].startswith(
"http://pip.dev.readthedocs.io/projects/subproject/"
)
)
self.assertEqual(
resp["X-RTD-Redirect"], RedirectType.subproject_to_main_domain.name
)

# Using a custom domain in a subproject isn't supported (or shouldn't be!).
cname = 'docs.random.com'
domain = get(
get(
Domain,
project=subproject,
domain=cname,
canonical=True,
https=True,
)
request = self.request(method='get', path=self.url, HTTP_HOST='subproject.dev.readthedocs.io')
res = self.run_middleware(request)
self.assertIsNone(res)
self.assertEqual(getattr(request, 'canonicalize', None), 'canonical-cname')
resp = self.client.get(self.url, HTTP_HOST="subproject.dev.readthedocs.io")
self.assertEqual(resp.status_code, 302)
self.assertEqual(resp["location"], f"http://pip.dev.readthedocs.io/")
self.assertEqual(resp["X-RTD-Redirect"], RedirectType.to_canonical_domain.name)

# We are not canonicalizing custom domains -> public domain for now
@pytest.mark.xfail(strict=True)
Expand All @@ -119,20 +126,16 @@ def test_canonical_cname_redirect_public_domain(self):
cname = 'docs.random.com'
domain = get(Domain, project=self.pip, domain=cname, canonical=False, https=False)

request = self.request(method='get', path=self.url, HTTP_HOST=cname)
res = self.run_middleware(request)
self.assertIsNone(res)
self.assertTrue(hasattr(request, 'canonicalize'))
self.assertEqual(request.canonicalize, 'noncanonical-cname')
resp = self.client.get(self.url, HTTP_HOST=cname)
self.assertEqual(resp.status_code, 302)
self.assertEqual(resp["X-RTD-Redirect"], "noncanonical-cname")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't we create a RedirectType.to_non_canonical_domain and use it here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We aren't setting that value from anywhere, this test is marked as fail too. This looks like a feature we wanted to implement at some point, I'm +1 on just deleting these tests.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Requests to a custom domain should redirect to the public domain or canonical domain if not canonical.

I guess that's when users don't have any custom domain set as canonical we want to redirect to the public domain?

@ericholscher may have more information about this "lost feature"

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#6905 (comment) I think is the conversation. I think it's mostly just awkward to redirect people away from custom domains. There are use cases where people might want to have docs on a custom domain, but not have it be canonical.

So I'm +1 on deleting this I think.


# Make the domain canonical and make sure we don't redirect
domain.canonical = True
domain.save()
for url in (self.url, '/subdir/'):
request = self.request(method='get', path=url, HTTP_HOST=cname)
res = self.run_middleware(request)
self.assertIsNone(res)
self.assertFalse(hasattr(request, 'canonicalize'))
resp = self.client.get(url, HTTP_HOST=cname)
self.assertNotIn("X-RTD-Redirect", resp)

def test_proper_cname_uppercase(self):
get(Domain, project=self.pip, domain='docs.random.com')
Expand Down
18 changes: 10 additions & 8 deletions readthedocs/proxito/tests/test_redirects.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import pytest
from django.test import override_settings

from readthedocs.proxito.constants import RedirectType

from .base import BaseDocServing


Expand Down Expand Up @@ -35,7 +37,7 @@ def test_custom_domain_root_url(self):
self.assertEqual(
r['Location'], f'https://{self.domain.domain}/en/latest/',
)
self.assertEqual(r['X-RTD-Redirect'], 'system')
self.assertEqual(r["X-RTD-Redirect"], RedirectType.system.name)

def test_custom_domain_root_url_no_slash(self):
self.domain.canonical = True
Expand All @@ -46,7 +48,7 @@ def test_custom_domain_root_url_no_slash(self):
self.assertEqual(
r['Location'], f'https://{self.domain.domain}/en/latest/',
)
self.assertEqual(r['X-RTD-Redirect'], 'system')
self.assertEqual(r["X-RTD-Redirect"], RedirectType.system.name)

def test_single_version_root_url_doesnt_redirect(self):
self.project.single_version = True
Expand Down Expand Up @@ -145,15 +147,15 @@ def test_canonicalize_https_redirect(self):
self.assertEqual(
r['Location'], f'https://{self.domain.domain}/',
)
self.assertEqual(r['X-RTD-Redirect'], 'https')
self.assertEqual(r["X-RTD-Redirect"], RedirectType.http_to_https.name)

# We should redirect before 404ing
r = self.client.get('/en/latest/404after302', HTTP_HOST=self.domain.domain)
self.assertEqual(r.status_code, 302)
self.assertEqual(
r['Location'], f'https://{self.domain.domain}/en/latest/404after302',
)
self.assertEqual(r['X-RTD-Redirect'], 'https')
self.assertEqual(r["X-RTD-Redirect"], RedirectType.http_to_https.name)

def test_canonicalize_public_domain_to_cname_redirect(self):
"""Redirect to the CNAME if it is canonical."""
Expand All @@ -165,31 +167,31 @@ def test_canonicalize_public_domain_to_cname_redirect(self):
self.assertEqual(
r['Location'], f'https://{self.domain.domain}/',
)
self.assertEqual(r['X-RTD-Redirect'], 'canonical-cname')
self.assertEqual(r["X-RTD-Redirect"], RedirectType.to_canonical_domain.name)

# We should redirect before 404ing
r = self.client.get('/en/latest/404after302', HTTP_HOST='project.dev.readthedocs.io')
self.assertEqual(r.status_code, 302)
self.assertEqual(
r['Location'], f'https://{self.domain.domain}/en/latest/404after302',
)
self.assertEqual(r['X-RTD-Redirect'], 'canonical-cname')
self.assertEqual(r["X-RTD-Redirect"], RedirectType.to_canonical_domain.name)

def test_translation_redirect(self):
r = self.client.get('/', HTTP_HOST='translation.dev.readthedocs.io')
self.assertEqual(r.status_code, 302)
self.assertEqual(
r['Location'], f'https://project.dev.readthedocs.io/es/latest/',
)
self.assertEqual(r['X-RTD-Redirect'], 'system')
self.assertEqual(r["X-RTD-Redirect"], RedirectType.system.name)

def test_translation_secure_redirect(self):
r = self.client.get('/', HTTP_HOST='translation.dev.readthedocs.io', secure=True)
self.assertEqual(r.status_code, 302)
self.assertEqual(
r['Location'], f'https://project.dev.readthedocs.io/es/latest/',
)
self.assertEqual(r['X-RTD-Redirect'], 'system')
self.assertEqual(r["X-RTD-Redirect"], RedirectType.system.name)

# We are not canonicalizing custom domains -> public domain for now
@pytest.mark.xfail(strict=True)
Expand Down
23 changes: 14 additions & 9 deletions readthedocs/proxito/views/mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from readthedocs.builds.constants import EXTERNAL, INTERNAL
from readthedocs.core.resolver import resolve
from readthedocs.projects.constants import MEDIA_TYPE_HTML
from readthedocs.proxito.constants import REDIRECT_CANONICAL_CNAME, REDIRECT_HTTPS
from readthedocs.proxito.constants import RedirectType
from readthedocs.redirects.exceptions import InfiniteRedirectException
from readthedocs.storage import build_media_storage, staticfiles_storage

Expand Down Expand Up @@ -324,7 +324,7 @@ def system_redirect(
"System Redirect.", host=request.get_host(), from_url=filename, to_url=to
)
resp = HttpResponseRedirect(to)
resp['X-RTD-Redirect'] = 'system'
resp["X-RTD-Redirect"] = RedirectType.system.name
return resp

def canonical_redirect(
Expand All @@ -333,7 +333,7 @@ def canonical_redirect(
final_project,
version_slug,
filename,
redirect_type=None,
redirect_type,
is_external_version=False,
):
"""
Expand All @@ -360,9 +360,12 @@ def canonical_redirect(
from_url = request.build_absolute_uri()
parsed_from = urlparse(from_url)

if redirect_type == REDIRECT_HTTPS:
to = parsed_from._replace(scheme='https').geturl()
else:
if redirect_type == RedirectType.http_to_https:
to = parsed_from._replace(scheme="https").geturl()
elif redirect_type in [
RedirectType.to_canonical_domain,
RedirectType.subproject_to_main_domain,
]:
to = resolve(
project=final_project,
version_slug=version_slug,
Expand All @@ -371,12 +374,14 @@ def canonical_redirect(
external=is_external_version,
)
# When a canonical redirect is done, only change the domain.
if redirect_type == REDIRECT_CANONICAL_CNAME:
if redirect_type == RedirectType.to_canonical_domain:
parsed_to = urlparse(to)
to = parsed_from._replace(
scheme=parsed_to.scheme,
netloc=parsed_to.netloc,
).geturl()
else:
raise NotImplementedError

if from_url == to:
# check that we do have a response and avoid infinite redirect
Expand All @@ -388,7 +393,7 @@ def canonical_redirect(

log.info('Canonical Redirect.', host=request.get_host(), from_url=filename, to_url=to)
resp = HttpResponseRedirect(to)
resp["X-RTD-Redirect"] = redirect_type or "unknown"
resp["X-RTD-Redirect"] = redirect_type.name
stsewd marked this conversation as resolved.
Show resolved Hide resolved
return resp

def get_redirect(
Expand Down Expand Up @@ -460,5 +465,5 @@ def get_redirect_response(self, request, redirect_path, proxito_path, http_statu
resp = HttpResponseRedirect(new_path)

# Add a user-visible header to make debugging easier
resp['X-RTD-Redirect'] = 'user'
resp["X-RTD-Redirect"] = RedirectType.user.name
return resp
Loading