Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Allow registering absolute URLs for autorefs #8

Merged
merged 2 commits into from
May 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 37 additions & 12 deletions src/mkdocs_autorefs/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
and fixes them using the previously stored identifier-URL mapping.
"""

import functools
import logging
from typing import Callable, Dict, Optional

Expand All @@ -19,7 +20,7 @@
from mkdocs.structure.toc import AnchorLink
from mkdocs.utils import warning_filter

from mkdocs_autorefs.references import AutorefsExtension, fix_refs
from mkdocs_autorefs.references import AutorefsExtension, fix_refs, relative_url

log = logging.getLogger(f"mkdocs.plugins.{__name__}")
log.addFilter(warning_filter)
Expand All @@ -45,22 +46,36 @@ def __init__(self) -> None:
"""Initialize the object."""
super().__init__()
self._url_map: Dict[str, str] = {}
self.get_fallback_anchor: Callable[[str], Optional[str]] = lambda identifier: None
self._abs_url_map: Dict[str, str] = {}
self.get_fallback_anchor: Optional[Callable[[str], Optional[str]]] = None

def register_anchor(self, page: str, anchor: str):
def register_anchor(self, page: str, identifier: str):
"""Register that an anchor corresponding to an identifier was encountered when rendering the page.

Arguments:
page: The relative URL of the current page. Examples: `'foo/bar/'`, `'foo/index.html'`
anchor: The HTML anchor (without '#') as a string.
identifier: The HTML anchor (without '#') as a string.
"""
self._url_map[anchor] = f"{page}#{anchor}"
self._url_map[identifier] = f"{page}#{identifier}"

def get_item_url(self, anchor: str) -> str:
def register_url(self, identifier: str, url: str):
"""Register that the identifier should be turned into a link to this URL.

Arguments:
identifier: The new identifier.
url: The absolute URL (including anchor, if needed) where this item can be found.
"""
self._abs_url_map[identifier] = url

def get_item_url(
self, identifier: str, from_url: Optional[str] = None, fallback: Optional[Callable[[str], Optional[str]]] = None
) -> str:
"""Return a site-relative URL with anchor to the identifier, if it's present anywhere.

Arguments:
anchor: The anchor (without '#').
identifier: The anchor (without '#').
from_url: The URL of the base page, from which we link towards the targeted pages.
fallback: An optional function to suggest an alternative anchor to try on failure.

Returns:
A site-relative URL.
Expand All @@ -69,13 +84,22 @@ def get_item_url(self, anchor: str) -> str:
KeyError: If there isn't an item by this identifier anywhere on the site.
"""
try:
return self._url_map[anchor]
url = self._url_map[identifier]
except KeyError:
new_anchor = self.get_fallback_anchor(anchor)
if new_anchor and new_anchor in self._url_map:
return self._url_map[new_anchor]
if identifier in self._abs_url_map:
return self._abs_url_map[identifier]

if fallback:
new_identifier = fallback(identifier)
if new_identifier:
return self.get_item_url(new_identifier, from_url)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't we risk infinite recursion here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, sorry that there's no explanation for the "cleverness".

The self-call is not exactly the same, fallback is dropped. So, no, there will be only 2 attempts.

I'll add an explicit fallback=None for clarity.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, my bad for not reading this more thoroughly, I would have seen it 🙂
Thanks for the explanation. The fallback=None is not necessary, but you can add it if you want.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And you did. Thanks again ^^

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ooh let me revert that, there's some failure, and it's a concern for backwards compatibility.
I'll also check in detail why the test failed, I would not have expected that

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is resolved btw. There wasn't really a problem, just an implicit merge conflict


raise

if from_url is not None:
return relative_url(from_url, url)
return url

def on_config(self, config: Config, **kwargs) -> Config: # noqa: W0613,R0201 (unused arguments, cannot be static)
"""Instantiate our Markdown extension.

Expand Down Expand Up @@ -166,7 +190,8 @@ def on_post_page(self, output: str, page: Page, **kwargs) -> str: # noqa: W0613
"""
log.debug(f"{__name__}: Fixing references in page {page.file.src_path}")

fixed_output, unmapped = fix_refs(output, page.url, self.get_item_url)
url_mapper = functools.partial(self.get_item_url, from_url=page.url, fallback=self.get_fallback_anchor)
fixed_output, unmapped = fix_refs(output, url_mapper)

if unmapped and log.isEnabledFor(logging.WARNING):
for ref in unmapped:
Expand Down
14 changes: 4 additions & 10 deletions src/mkdocs_autorefs/references.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def relative_url(url_a: str, url_b: str) -> str:
return f"{relative}#{anchor}"


def fix_ref(url_mapper: Callable[[str], str], from_url: str, unmapped: List[str]) -> Callable:
def fix_ref(url_mapper: Callable[[str], str], unmapped: List[str]) -> Callable:
"""Return a `repl` function for [`re.sub`](https://docs.python.org/3/library/re.html#re.sub).

In our context, we match Markdown references and replace them with HTML links.
Expand All @@ -140,7 +140,6 @@ def fix_ref(url_mapper: Callable[[str], str], from_url: str, unmapped: List[str]
Arguments:
url_mapper: A callable that gets an object's site URL by its identifier,
such as [mkdocs_autorefs.plugin.AutorefsPlugin.get_item_url][].
from_url: The URL of the base page, from which we link towards the targeted pages.
unmapped: A list to store unmapped identifiers.

Returns:
Expand All @@ -153,7 +152,7 @@ def inner(match: Match):
title = match["title"]

try:
url = relative_url(from_url, url_mapper(unescape(identifier)))
url = url_mapper(unescape(identifier))
except KeyError:
if match["kind"] == "autorefs-optional":
return title
Expand All @@ -167,24 +166,19 @@ def inner(match: Match):
return inner


def fix_refs(
html: str,
from_url: str,
url_mapper: Callable[[str], str],
) -> Tuple[str, List[str]]:
def fix_refs(html: str, url_mapper: Callable[[str], str]) -> Tuple[str, List[str]]:
"""Fix all references in the given HTML text.

Arguments:
html: The text to fix.
from_url: The URL at which this HTML is served.
url_mapper: A callable that gets an object's site URL by its identifier,
such as [mkdocs_autorefs.plugin.AutorefsPlugin.get_item_url][].

Returns:
The fixed HTML.
"""
unmapped = [] # type: ignore
html = AUTO_REF_RE.sub(fix_ref(url_mapper, from_url, unmapped), html)
html = AUTO_REF_RE.sub(fix_ref(url_mapper, unmapped), html)
return html, unmapped


Expand Down
42 changes: 42 additions & 0 deletions tests/test_plugin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"""Tests for the plugin module."""
import pytest

from mkdocs_autorefs.plugin import AutorefsPlugin


def test_url_registration():
"""Check that URLs can be registered, then obtained."""
plugin = AutorefsPlugin()
plugin.register_anchor(identifier="foo", page="foo1.html")
plugin.register_url(identifier="bar", url="https://example.org/bar.html")

assert plugin.get_item_url("foo") == "foo1.html#foo"
assert plugin.get_item_url("bar") == "https://example.org/bar.html"
with pytest.raises(KeyError):
plugin.get_item_url("baz")


def test_url_registration_with_from_url():
"""Check that URLs can be registered, then obtained, relative to a page."""
plugin = AutorefsPlugin()
plugin.register_anchor(identifier="foo", page="foo1.html")
plugin.register_url(identifier="bar", url="https://example.org/bar.html")

assert plugin.get_item_url("foo", from_url="a/b.html") == "../foo1.html#foo"
assert plugin.get_item_url("bar", from_url="a/b.html") == "https://example.org/bar.html"
with pytest.raises(KeyError):
plugin.get_item_url("baz", from_url="a/b.html")


def test_url_registration_with_fallback():
"""Check that URLs can be registered, then obtained through a fallback."""
plugin = AutorefsPlugin()
plugin.register_anchor(identifier="foo", page="foo1.html")
plugin.register_url(identifier="bar", url="https://example.org/bar.html")

assert plugin.get_item_url("baz", fallback=lambda s: "foo") == "foo1.html#foo"
assert plugin.get_item_url("baz", fallback=lambda s: "bar") == "https://example.org/bar.html"
with pytest.raises(KeyError):
plugin.get_item_url("baz", fallback=lambda s: "baaaa")
with pytest.raises(KeyError):
plugin.get_item_url("baz", fallback=lambda s: None)
19 changes: 16 additions & 3 deletions tests/test_references.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,10 @@ def run_references_test(url_map, source, output, unmapped=None, from_url="page.h
md = markdown.Markdown(extensions=[AutorefsExtension()])
content = md.convert(source)

actual_output, actual_unmapped = fix_refs(content, from_url, url_map.__getitem__)
def url_mapper(identifier):
return relative_url(from_url, url_map[identifier])

actual_output, actual_unmapped = fix_refs(content, url_mapper)
assert actual_output == output
assert actual_unmapped == (unmapped or [])

Expand Down Expand Up @@ -89,6 +92,16 @@ def test_reference_with_punctuation():
)


def test_reference_to_relative_path():
"""Check references from a page at a nested path."""
run_references_test(
from_url="sub/sub/page.html",
url_map={"zz": "foo.html#zz"},
source="This [zz][].",
output='<p>This <a href="../../foo.html#zz">zz</a>.</p>',
)


def test_no_reference_with_space():
"""Check that references with spaces are not fixed."""
run_references_test(
Expand Down Expand Up @@ -160,7 +173,7 @@ def test_custom_required_reference():
"""Check that external HTML-based references are expanded or reported missing."""
url_map = {"ok": "ok.html#ok"}
source = "<span data-autorefs-identifier=bar>foo</span> <span data-autorefs-identifier=ok>ok</span>"
output, unmapped = fix_refs(source, "page.html", url_map.__getitem__)
output, unmapped = fix_refs(source, url_map.__getitem__)
assert output == '[foo][bar] <a href="ok.html#ok">ok</a>'
assert unmapped == ["bar"]

Expand All @@ -169,6 +182,6 @@ def test_custom_optional_reference():
"""Check that optional HTML-based references are expanded and never reported missing."""
url_map = {"ok": "ok.html#ok"}
source = '<span data-autorefs-optional="bar">foo</span> <span data-autorefs-optional=ok>ok</span>'
output, unmapped = fix_refs(source, "page.html", url_map.__getitem__)
output, unmapped = fix_refs(source, url_map.__getitem__)
assert output == 'foo <a href="ok.html#ok">ok</a>'
assert unmapped == []