Skip to content

Commit

Permalink
Fix linkcheck by excluding web.archive.org (#5335)
Browse files Browse the repository at this point in the history
Also cherry-picked #5333
  • Loading branch information
MetRonnie authored Jan 30, 2023
2 parents 8c855b4 + 7a7cdad commit 0473362
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 41 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/test_fast.yml
Original file line number Diff line number Diff line change
Expand Up @@ -115,5 +115,5 @@ jobs:
token: ${{ secrets.CODECOV_TOKEN }} # Token not required for public repos, but might reduce chance of random 404 error?

- name: Linkcheck
if: startsWith(matrix.python-version, 3.9)
run: pytest -m linkcheck tests/unit
if: startsWith(matrix.python-version, '3.10')
run: pytest -m linkcheck --dist=load tests/unit
4 changes: 2 additions & 2 deletions conda-environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ dependencies:
- psutil >=5.6.0
- python
- pyzmq >=22,<23
- setuptools >=49
- setuptools >=49, <67
- urwid >=2,<3
# Add # [py<3.11] for tomli once Python 3.11 Released
- tomli >=2.0.0
- tomli >=2

# optional dependencies
#- empy >=3.3,<3.4
Expand Down
3 changes: 1 addition & 2 deletions cylc/flow/hostuserutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@
"""Host name utilities
ATTRIBUTION:
https://web.archive.org/web/20140606052543/http://www.linux-support.com/cms/
get-local-ip-address-with-python/
https://web.archive.org/web/20140606052543/http://www.linux-support.com/cms/get-local-ip-address-with-python/
Fetching the outgoing IP address of a computer might be a difficult
task. Computers can contain a large set of network devices, each
Expand Down
6 changes: 3 additions & 3 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,13 @@ install_requires =
protobuf>=4.21.2,<4.22.0
psutil>=5.6.0
pyzmq==22.*
setuptools>=49
# https://github.com/pypa/setuptools/issues/3802
setuptools>=49, <67
urwid==2.*
# unpinned transient dependencies used for type checking
rx
promise
# Once Python 3.11 released -'; python_version<"3.11"'
tomli>=2.*
tomli>=2; python_version < "3.11"

[options.packages.find]
include = cylc*
Expand Down
58 changes: 26 additions & 32 deletions tests/unit/test_links.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,58 +13,55 @@
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""Check links inserted into internal documentation.
Reason for doing this here:
- Some links don't appear to be being picked up by Cylc-doc linkcheck.
- As we have more links it's worth checking them here, rather than waiting
for them to show up in Cylc.
"""
from functools import lru_cache

import fnmatch
from pathlib import Path
import re
from shlex import split
from subprocess import run
from time import sleep
import pytest
import urllib

EXCLUDE = [
'http://www.gnu.org/licenses/',
'http://my-site.com/workflows/%(workflow)s/index.html',
'http://ahost/%(owner)s/notes/%(workflow)s',
'http://my-site.com/workflows/%(workflow)s/'
r'*//www.gnu.org/licenses/',
r'*//my-site.com/*',
r'*//ahost/%(owner)s/notes/%(workflow)s',
r'*//web.archive.org/*'
]


def get_links():
searchdir = Path(__file__).parent.parent.parent / 'cylc/flow'
results = {}
for file_ in searchdir.rglob('*.py'):
searchdir = Path(__file__).parent.parent.parent / 'cylc' / 'flow'
return sorted({
url
for file_ in searchdir.rglob('*.py')
for url in re.findall(
r'(https?:\/\/.*?)[\n\s\>`"\',]', file_.read_text()
):
if url not in EXCLUDE and url in results:
results[url].append(file_)
if url not in EXCLUDE and url not in results:
results[url] = [file_]
return results
)
if not any(
fnmatch.fnmatch(url, pattern) for pattern in EXCLUDE
)
})


@pytest.mark.linkcheck
@pytest.mark.parametrize(
'link, files', [
pytest.param(
link,
files,
id=f"{link}"
)
for link, files in get_links().items()
]
)
def test_embedded_url(link, files):
@pytest.mark.parametrize('link', get_links())
def test_embedded_url(link):
"""Check links in the source code are not broken.
TIP: use `--dist=load` when running pytest to enable parametrized tests
to run in parallel
"""
try:
urllib.request.urlopen(link).getcode()
except urllib.error.HTTPError as exc:
except urllib.error.HTTPError:
# Sleep and retry to reduce risk of flakiness:
sleep(10)
try:
Expand All @@ -73,7 +70,4 @@ def test_embedded_url(link, files):
# Allowing 403 - just because a site forbids us doesn't mean the
# link is wrong.
if exc.code != 403:
raise Exception(f'{exc} | {link} | {", ".join(files)}')



raise Exception(f'{exc} | {link}')

0 comments on commit 0473362

Please sign in to comment.