Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[3.12] gh-101100: Docs: Check Sphinx warnings and fail if improved (GH-106460) #108116

Merged
merged 2 commits into from
Aug 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 10 additions & 17 deletions .github/workflows/reusable-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,35 +26,28 @@ jobs:
cache-dependency-path: 'Doc/requirements.txt'
- name: 'Install build dependencies'
run: make -C Doc/ venv
- name: 'Build HTML documentation'
run: make -C Doc/ SPHINXOPTS="-q" SPHINXERRORHANDLING="-W --keep-going" html

# Add pull request annotations for Sphinx nitpicks (missing references)
# To annotate PRs with Sphinx nitpicks (missing references)
- name: 'Get list of changed files'
if: github.event_name == 'pull_request'
id: changed_files
uses: Ana06/get-changed-files@v2.2.0
with:
filter: "Doc/**"
format: csv # works for paths with spaces
- name: 'Build changed files in nit-picky mode'
if: github.event_name == 'pull_request'
- name: 'Build HTML documentation'
continue-on-error: true
run: |
set -Eeuo pipefail
# Mark files the pull request modified
python Doc/tools/touch-clean-files.py --clean '${{ steps.changed_files.outputs.added_modified }}'
# Build docs with the '-n' (nit-picky) option; convert warnings to annotations
make -C Doc/ PYTHON=../python SPHINXOPTS="-q -n --keep-going" html 2>&1 |
python Doc/tools/warnings-to-gh-actions.py

# Ensure some files always pass Sphinx nit-picky mode (no missing references)
- name: 'Build known-good files in nit-picky mode'
# Build docs with the '-n' (nit-picky) option; write warnings to file
make -C Doc/ PYTHON=../python SPHINXOPTS="-q -n -W --keep-going -w sphinx-warnings.txt" html
- name: 'Check warnings'
if: github.event_name == 'pull_request'
run: |
# Mark files that must pass nit-picky
python Doc/tools/touch-clean-files.py
# Build docs with the '-n' (nit-picky) option, convert warnings to errors (-W)
make -C Doc/ PYTHON=../python SPHINXOPTS="-q -n -W --keep-going" html 2>&1
python Doc/tools/check-warnings.py \
--check-and-annotate '${{ steps.changed_files.outputs.added_modified }}' \
--fail-if-regression \
--fail-if-improved

# This build doesn't use problem matchers or check annotations
build_doc_oldest_supported_sphinx:
Expand Down
8 changes: 2 additions & 6 deletions Doc/tools/.nitignore
Original file line number Diff line number Diff line change
@@ -1,17 +1,14 @@
# All RST files under Doc/ -- except these -- must pass Sphinx nit-picky mode,
# as tested on the CI via touch-clean-files.py in doc.yml.
# Add blank lines between files and keep them sorted lexicographically
# to help avoid merge conflicts.
# as tested on the CI via check-warnings.py in reusable-docs.yml.
# Keep lines sorted lexicographically to help avoid merge conflicts.

Doc/c-api/arg.rst
Doc/c-api/datetime.rst
Doc/c-api/descriptor.rst
Doc/c-api/dict.rst
Doc/c-api/exceptions.rst
Doc/c-api/file.rst
Doc/c-api/float.rst
Doc/c-api/gcsupport.rst
Doc/c-api/import.rst
Doc/c-api/init.rst
Doc/c-api/init_config.rst
Doc/c-api/intro.rst
Expand Down Expand Up @@ -58,7 +55,6 @@ Doc/library/bz2.rst
Doc/library/calendar.rst
Doc/library/cgi.rst
Doc/library/chunk.rst
Doc/library/cmath.rst
Doc/library/cmd.rst
Doc/library/codecs.rst
Doc/library/collections.abc.rst
Expand Down
149 changes: 149 additions & 0 deletions Doc/tools/check-warnings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
#!/usr/bin/env python3
"""
Check the output of running Sphinx in nit-picky mode (missing references).
"""
import argparse
import csv
import os
import re
import sys
from pathlib import Path

# Exclude these whether they're dirty or clean,
# because they trigger a rebuild of dirty files.
EXCLUDE_FILES = {
"Doc/whatsnew/changelog.rst",
}

# Subdirectories of Doc/ to exclude.
EXCLUDE_SUBDIRS = {
".env",
".venv",
"env",
"includes",
"venv",
}

PATTERN = re.compile(r"(?P<file>[^:]+):(?P<line>\d+): WARNING: (?P<msg>.+)")


def check_and_annotate(warnings: list[str], files_to_check: str) -> None:
"""
Convert Sphinx warning messages to GitHub Actions.

Converts lines like:
.../Doc/library/cgi.rst:98: WARNING: reference target not found
to:
::warning file=.../Doc/library/cgi.rst,line=98::reference target not found

Non-matching lines are echoed unchanged.

see:
https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-a-warning-message
"""
files_to_check = next(csv.reader([files_to_check]))
for warning in warnings:
if any(filename in warning for filename in files_to_check):
if match := PATTERN.fullmatch(warning):
print("::warning file={file},line={line}::{msg}".format_map(match))


def fail_if_regression(
warnings: list[str], files_with_expected_nits: set[str], files_with_nits: set[str]
) -> int:
"""
Ensure some files always pass Sphinx nit-picky mode (no missing references).
These are files which are *not* in .nitignore.
"""
all_rst = {
str(rst)
for rst in Path("Doc/").rglob("*.rst")
if rst.parts[1] not in EXCLUDE_SUBDIRS
}
should_be_clean = all_rst - files_with_expected_nits - EXCLUDE_FILES
problem_files = sorted(should_be_clean & files_with_nits)
if problem_files:
print("\nError: must not contain warnings:\n")
for filename in problem_files:
print(filename)
for warning in warnings:
if filename in warning:
if match := PATTERN.fullmatch(warning):
print(" {line}: {msg}".format_map(match))
return -1
return 0


def fail_if_improved(
files_with_expected_nits: set[str], files_with_nits: set[str]
) -> int:
"""
We may have fixed warnings in some files so that the files are now completely clean.
Good news! Let's add them to .nitignore to prevent regression.
"""
files_with_no_nits = files_with_expected_nits - files_with_nits
if files_with_no_nits:
print("\nCongratulations! You improved:\n")
for filename in sorted(files_with_no_nits):
print(filename)
print("\nPlease remove from Doc/tools/.nitignore\n")
return -1
return 0


def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument(
"--check-and-annotate",
help="Comma-separated list of files to check, "
"and annotate those with warnings on GitHub Actions",
)
parser.add_argument(
"--fail-if-regression",
action="store_true",
help="Fail if known-good files have warnings",
)
parser.add_argument(
"--fail-if-improved",
action="store_true",
help="Fail if new files with no nits are found",
)
args = parser.parse_args()
exit_code = 0

wrong_directory_msg = "Must run this script from the repo root"
assert Path("Doc").exists() and Path("Doc").is_dir(), wrong_directory_msg

with Path("Doc/sphinx-warnings.txt").open() as f:
warnings = f.read().splitlines()

cwd = str(Path.cwd()) + os.path.sep
files_with_nits = {
warning.removeprefix(cwd).split(":")[0]
for warning in warnings
if "Doc/" in warning
}

with Path("Doc/tools/.nitignore").open() as clean_files:
files_with_expected_nits = {
filename.strip()
for filename in clean_files
if filename.strip() and not filename.startswith("#")
}

if args.check_and_annotate:
check_and_annotate(warnings, args.check_and_annotate)

if args.fail_if_regression:
exit_code += fail_if_regression(
warnings, files_with_expected_nits, files_with_nits
)

if args.fail_if_improved:
exit_code += fail_if_improved(files_with_expected_nits, files_with_nits)

return exit_code


if __name__ == "__main__":
sys.exit(main())
65 changes: 0 additions & 65 deletions Doc/tools/touch-clean-files.py

This file was deleted.

25 changes: 0 additions & 25 deletions Doc/tools/warnings-to-gh-actions.py

This file was deleted.