Skip to content

Commit

Permalink
feat: Add option to scan and register HTML anchors
Browse files Browse the repository at this point in the history
  • Loading branch information
tvdboom authored and pawamoy committed Feb 16, 2024
1 parent 143d768 commit 631783e
Show file tree
Hide file tree
Showing 6 changed files with 129 additions and 16 deletions.
9 changes: 9 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
@@ -1 +1,10 @@
--8<-- "CHANGELOG.md"

[](#hello){#hello2}

## Hello

Hello.

Link to [Hello 1][hello1].
Link to [Hello 2][hello2].
9 changes: 9 additions & 0 deletions docs/index.md
Original file line number Diff line number Diff line change
@@ -1 +1,10 @@
--8<-- "README.md"

[](#hello){#hello1}

## Hello

Hello.

Link to [Hello 1][hello1].
Link to [Hello 2][hello2].
3 changes: 3 additions & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ markdown_extensions:
permalink: "¤"

plugins:
- autorefs:
scan_anchors: true
- search
- markdown-exec
- gen-files:
Expand All @@ -109,6 +111,7 @@ plugins:
import:
- https://docs.python.org/3/objects.inv
- https://www.mkdocs.org/objects.inv
- https://python-markdown.github.io/objects.inv
paths: [src]
options:
docstring_options:
Expand Down
50 changes: 34 additions & 16 deletions src/mkdocs_autorefs/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,18 @@
import contextlib
import functools
import logging
import re
from functools import partial
from typing import TYPE_CHECKING, Any, Callable, Sequence
from urllib.parse import urlsplit

from mkdocs.config.base import Config
from mkdocs.config.config_options import Type
from mkdocs.config.defaults import MkDocsConfig
from mkdocs.plugins import BasePlugin
from mkdocs.structure.pages import Page

from mkdocs_autorefs.references import AutorefsExtension, fix_refs, relative_url
from mkdocs_autorefs.references import AnchorScannerTreeProcessor, AutorefsExtension, fix_refs, relative_url

if TYPE_CHECKING:
from mkdocs.config.defaults import MkDocsConfig
Expand All @@ -36,7 +42,14 @@
log = logging.getLogger(f"mkdocs.plugins.{__name__}") # type: ignore[assignment]


class AutorefsPlugin(BasePlugin):
class AutorefsConfig(Config):
"""Configuration options for the Autorefs plugin."""

scan_anchors = Type(bool, default=False)
"""Whether to scan HTML pages for anchors defining references."""


class AutorefsPlugin(BasePlugin[AutorefsConfig]):
"""An `mkdocs` plugin.
This plugin defines the following event hooks:
Expand All @@ -50,23 +63,28 @@ class AutorefsPlugin(BasePlugin):
"""

scan_toc: bool = True
scan_anchors: bool = False
current_page: str | None = None

_re_anchors = re.compile(r'<a(?:\s+href="([^"]*)")?\s+id="([^"]+)"\s*>')

def __init__(self) -> None:
"""Initialize the object."""
super().__init__()
self._url_map: dict[str, str] = {}
self._abs_url_map: dict[str, str] = {}
self._extension: AutorefsExtension | None = None
self.get_fallback_anchor: Callable[[str], str | None] | None = None
self.current_page: str | None = None

def register_anchor(self, page: str, identifier: str) -> None:
def register_anchor(self, page: str, identifier: str, anchor: str | None = None) -> None:
"""Register that an anchor corresponding to an identifier was encountered when rendering the page.
Arguments:
page: The relative URL of the current page. Examples: `'foo/bar/'`, `'foo/index.html'`
identifier: The HTML anchor (without '#') as a string.
"""
self._url_map[identifier] = f"{page}#{identifier}"
self._url_map[identifier] = f"{page}#{anchor or identifier}"

def register_url(self, identifier: str, url: str) -> None:
"""Register that the identifier should be turned into a link to this URL.
Expand Down Expand Up @@ -133,20 +151,15 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig | None:
The modified config.
"""
log.debug("Adding AutorefsExtension to the list")
config["markdown_extensions"].append(AutorefsExtension())
anchor_scanner_factory = (
partial(AnchorScannerTreeProcessor, self) if self.scan_anchors or self.config.scan_anchors else None
)
# anchor_scanner_factory = None
self._extension = AutorefsExtension(anchor_scanner_factory=anchor_scanner_factory)
config["markdown_extensions"].append(self._extension)
return config

def on_page_markdown(self, markdown: str, page: Page, **kwargs: Any) -> str: # noqa: ARG002
"""Remember which page is the current one.
Arguments:
markdown: Input Markdown.
page: The related MkDocs page instance.
kwargs: Additional arguments passed by MkDocs.
Returns:
The same Markdown. We only use this hook to map anchors to URLs.
"""
def on_page_markdown(self, markdown: str, *, page: Page, **kwargs: Any) -> str | None: # noqa: ARG002, D102
self.current_page = page.url
return markdown

Expand All @@ -170,6 +183,11 @@ def on_page_content(self, html: str, page: Page, **kwargs: Any) -> str: # noqa:
log.debug(f"Mapping identifiers to URLs for page {page.file.src_path}")
for item in page.toc.items:
self.map_urls(page.url, item)

# if self.scan_anchors or self.config.scan_anchors:
# for href, hid in re.findall(self._re_anchors, html):
# self.register_anchor(page.url, identifier=hid, anchor=href.lstrip("#"))

return html

def map_urls(self, base_url: str, anchor: AnchorLink) -> None:
Expand Down
50 changes: 50 additions & 0 deletions src/mkdocs_autorefs/references.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,17 @@
from urllib.parse import urlsplit
from xml.etree.ElementTree import Element

from markdown.core import Markdown
from markdown.extensions import Extension
from markdown.inlinepatterns import REFERENCE_RE, ReferenceInlineProcessor
from markdown.treeprocessors import Treeprocessor
from markdown.util import INLINE_PLACEHOLDER_RE

if TYPE_CHECKING:
from markdown import Markdown

from mkdocs_autorefs.plugin import AutorefsPlugin

AUTO_REF_RE = re.compile(
r"<span data-(?P<kind>autorefs-identifier|autorefs-optional|autorefs-optional-hover)="
r'("?)(?P<identifier>[^"<>]*)\2>(?P<title>.*?)</span>',
Expand Down Expand Up @@ -197,9 +201,48 @@ def fix_refs(html: str, url_mapper: Callable[[str], str]) -> tuple[str, list[str
return html, unmapped


class AnchorScannerTreeProcessor(Treeprocessor):
"""Tree processor to scan and register HTML anchors."""

def __init__(self, plugin: AutorefsPlugin, md: Markdown | None = None) -> None:
"""Initialize the tree processor.
Parameters:
plugin: A reference to the autorefs plugin, to use its `register_anchor` method.
"""
super().__init__(md)
self.plugin = plugin

def run(self, root: Element) -> None: # noqa: D102
if self.plugin.current_page is not None:
self._scan_anchors(root)

def _scan_anchors(self, parent: Element) -> None:
for el in parent:
if el.tag == "a" and (hid := el.get("id")):
self.plugin.register_anchor(self.plugin.current_page, hid, el.get("href", "").lstrip("#")) # type: ignore[arg-type]
else:
self._scan_anchors(el)


class AutorefsExtension(Extension):
"""Extension that inserts auto-references in Markdown."""

def __init__(
self,
anchor_scanner_factory: Callable[[Markdown], AnchorScannerTreeProcessor] | None = None,
**kwargs: Any,
) -> None:
"""Initialize the Markdown extension.
Parameters:
anchor_scanner_factory: A callable that returns an instance of the anchor scanner tree processor.
**kwargs: Keyword arguments passed to the [base constructor][markdown.extensions.Extension].
"""
super().__init__(**kwargs)
self.anchor_scanner_factory = anchor_scanner_factory
self.anchor_scanner: AnchorScannerTreeProcessor | None = None

def extendMarkdown(self, md: Markdown) -> None: # noqa: N802 (casing: parent method's name)
"""Register the extension.
Expand All @@ -213,3 +256,10 @@ def extendMarkdown(self, md: Markdown) -> None: # noqa: N802 (casing: parent me
"mkdocs-autorefs",
priority=168, # Right after markdown.inlinepatterns.ReferenceInlineProcessor
)
if self.anchor_scanner_factory:
self.anchor_scanner = self.anchor_scanner_factory(md)
md.treeprocessors.register(
self.anchor_scanner,
"mkdocs-autorefs-anchors-scanner",
priority=0,
)
24 changes: 24 additions & 0 deletions tests/test_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,27 @@ def test_dont_make_relative_urls_relative_again() -> None:
plugin.get_item_url("hello", from_url="baz/bar/foo.html", fallback=lambda _: ("foo.bar.baz",))
== "../../foo/bar/baz.html#foo.bar.baz"
)


def test_register_html_anchors() -> None:
"""Check that HT?ML anchors are registered when enabled."""
plugin = AutorefsPlugin()
plugin.scan_toc = False
plugin.scan_anchors = True

class Page:
url = "/page/url"

plugin.on_page_content(
"""
<a id="foo.bar">
<a href="#foo.baz">
<a id="foo.qux" href="#fooqux">
<a href="quxfoo" id="qux.foo">
""",
page=Page(), # type: ignore[arg-type]
)
assert "foo.bar" in plugin._url_map
assert "foo.baz" not in plugin._url_map
assert "foo.qux" in plugin._url_map
assert "qux.foo" in plugin._url_map

0 comments on commit 631783e

Please sign in to comment.