Skip to content

Commit

Permalink
Merge pull request #9260 from tk0miya/9016_linkcheck_github_anchors
Browse files Browse the repository at this point in the history
Close #9016: linkcheck builder failed to check the anchors of github.com
  • Loading branch information
tk0miya authored Jun 3, 2021
2 parents 69cbf7a + a02f6c5 commit 1418e3a
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 4 deletions.
3 changes: 3 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ Features added
text
* #9176: i18n: Emit a debug message if message catalog file not found under
:confval:`locale_dirs`
* #9016: linkcheck: Support checking anchors on github.com
* #9016: linkcheck: Add a new event :event:`linkcheck-process-uri` to modify
URIs before checking hyperlinks
* #1874: py domain: Support union types using ``|`` in info-field-list
* #9268: py domain: :confval:`python_use_unqualified_type_names` supports type
field in info-field-list
Expand Down
8 changes: 8 additions & 0 deletions doc/extdev/appapi.rst
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,14 @@ Here is a more detailed list of these events.
.. versionchanged:: 1.3
The return value can now specify a template name.

.. event:: linkcheck-process-uri (app, uri)

Emitted when the linkcheck builder collects hyperlinks from document. *uri*
is a collected URI. The event handlers can modify the URI by returning a
string.

.. versionadded:: 4.1

.. event:: build-finished (app, exception)

Emitted when a build has finished, before Sphinx exits, usually used for
Expand Down
28 changes: 27 additions & 1 deletion sphinx/builders/linkcheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from threading import Thread
from typing import (Any, Dict, Generator, List, NamedTuple, Optional, Pattern, Set, Tuple,
Union, cast)
from urllib.parse import unquote, urlparse
from urllib.parse import unquote, urlparse, urlunparse

from docutils import nodes
from docutils.nodes import Element
Expand Down Expand Up @@ -627,6 +627,10 @@ def run(self, **kwargs: Any) -> None:
if 'refuri' not in refnode:
continue
uri = refnode['refuri']
newuri = self.app.emit_firstresult('linkcheck-process-uri', uri)
if newuri:
uri = newuri

lineno = get_node_line(refnode)
uri_info = Hyperlink(uri, self.env.docname, lineno)
if uri not in hyperlinks:
Expand All @@ -636,12 +640,31 @@ def run(self, **kwargs: Any) -> None:
for imgnode in self.document.traverse(nodes.image):
uri = imgnode['candidates'].get('?')
if uri and '://' in uri:
newuri = self.app.emit_firstresult('linkcheck-process-uri', uri)
if newuri:
uri = newuri

lineno = get_node_line(imgnode)
uri_info = Hyperlink(uri, self.env.docname, lineno)
if uri not in hyperlinks:
hyperlinks[uri] = uri_info


def rewrite_github_anchor(app: Sphinx, uri: str) -> Optional[str]:
"""Rewrite anchor name of the hyperlink to github.com
The hyperlink anchors in github.com are dynamically generated. This rewrites
them before checking and makes them comparable.
"""
parsed = urlparse(uri)
if parsed.hostname == "github.com" and parsed.fragment:
prefixed = parsed.fragment.startswith('user-content-')
if not prefixed:
fragment = f'user-content-{parsed.fragment}'
return urlunparse(parsed._replace(fragment=fragment))
return None


def setup(app: Sphinx) -> Dict[str, Any]:
app.add_builder(CheckExternalLinksBuilder)
app.add_post_transform(HyperlinkCollector)
Expand All @@ -658,6 +681,9 @@ def setup(app: Sphinx) -> Dict[str, Any]:
app.add_config_value('linkcheck_anchors_ignore', ["^!"], None)
app.add_config_value('linkcheck_rate_limit_timeout', 300.0, None)

app.add_event('linkcheck-process-uri')
app.connect('linkcheck-process-uri', rewrite_github_anchor)

return {
'version': 'builtin',
'parallel_read_safe': True,
Expand Down
2 changes: 2 additions & 0 deletions tests/roots/test-linkcheck/links.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ Some additional anchors to exercise ignore code
* `Complete nonsense <https://localhost:7777/doesnotexist>`_
* `Example valid local file <conf.py>`_
* `Example invalid local file <path/to/notfound>`_
* https://github.com/sphinx-doc/sphinx#documentation
* https://github.com/sphinx-doc/sphinx#user-content-testing

.. image:: https://www.google.com/image.png
.. figure:: https://www.google.com/image2.png
10 changes: 7 additions & 3 deletions tests/test_build_linkcheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ def test_defaults_json(app):
"info"]:
assert attr in row

assert len(content.splitlines()) == 10
assert len(rows) == 10
assert len(content.splitlines()) == 12
assert len(rows) == 12
# the output order of the rows is not stable
# due to possible variance in network latency
rowsby = {row["uri"]: row for row in rows}
Expand All @@ -87,7 +87,7 @@ def test_defaults_json(app):
assert dnerow['uri'] == 'https://localhost:7777/doesnotexist'
assert rowsby['https://www.google.com/image2.png'] == {
'filename': 'links.txt',
'lineno': 18,
'lineno': 20,
'status': 'broken',
'code': 0,
'uri': 'https://www.google.com/image2.png',
Expand All @@ -101,6 +101,10 @@ def test_defaults_json(app):
# images should fail
assert "Not Found for url: https://www.google.com/image.png" in \
rowsby["https://www.google.com/image.png"]["info"]
# The anchor of the URI for github.com is automatically modified
assert 'https://github.com/sphinx-doc/sphinx#documentation' not in rowsby
assert 'https://github.com/sphinx-doc/sphinx#user-content-documentation' in rowsby
assert 'https://github.com/sphinx-doc/sphinx#user-content-testing' in rowsby


@pytest.mark.sphinx(
Expand Down

0 comments on commit 1418e3a

Please sign in to comment.