Skip to content

Commit

Permalink
Only show GitHub check annotations on changed doc paragraphs
Browse files Browse the repository at this point in the history
  • Loading branch information
CAM-Gerlach committed Aug 16, 2023
1 parent dc8fdf5 commit d4f66b9
Show file tree
Hide file tree
Showing 2 changed files with 147 additions and 27 deletions.
11 changes: 3 additions & 8 deletions .github/workflows/reusable-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ jobs:
timeout-minutes: 60
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- name: 'Set up Python'
uses: actions/setup-python@v4
with:
Expand All @@ -28,13 +30,6 @@ jobs:
run: make -C Doc/ venv

# To annotate PRs with Sphinx nitpicks (missing references)
- name: 'Get list of changed files'
if: github.event_name == 'pull_request'
id: changed_files
uses: Ana06/get-changed-files@v2.2.0
with:
filter: "Doc/**"
format: csv # works for paths with spaces
- name: 'Build HTML documentation'
continue-on-error: true
run: |
Expand All @@ -45,7 +40,7 @@ jobs:
if: github.event_name == 'pull_request'
run: |
python Doc/tools/check-warnings.py \
--check-and-annotate '${{ steps.changed_files.outputs.added_modified }}' \
--check-and-annotate-changed 'origin/${{ github.base_ref }}' '${{ github.sha }}' \
--fail-if-regression \
--fail-if-improved
Expand Down
163 changes: 144 additions & 19 deletions Doc/tools/check-warnings.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
Check the output of running Sphinx in nit-picky mode (missing references).
"""
import argparse
import csv
import itertools
import os
import re
import subprocess
import sys
from pathlib import Path
from typing import TextIO

# Exclude these whether they're dirty or clean,
# because they trigger a rebuild of dirty files.
Expand All @@ -24,28 +26,150 @@
"venv",
}

PATTERN = re.compile(r"(?P<file>[^:]+):(?P<line>\d+): WARNING: (?P<msg>.+)")
# Regex pattern to match the parts of a Sphinx warning
WARNING_PATTERN = re.compile(
r"(?P<file>([A-Za-z]:[\\/])?[^:]+):(?P<line>\d+): WARNING: (?P<msg>.+)"
)

# Regex pattern to match the line numbers in a Git unified diff
DIFF_PATTERN = re.compile(
r"^@@ -(?P<linea>\d+),(?P<removed>\d+) \+(?P<lineb>\d+),(?P<added>\d+) @@",
flags=re.MULTILINE,
)

def check_and_annotate(warnings: list[str], files_to_check: str) -> None:

def get_diff_files(ref_a: str, ref_b: str, filter_mode: str = "") -> set[Path]:
"""List the files changed between two Gif refs, filtered by change type."""
added_files_result = subprocess.run(
[
"git",
"diff",
f"--diff-filter={filter_mode}",
"--name-only",
f"{ref_a}...{ref_b}",
"--",
],
stdout=subprocess.PIPE,
check=True,
text=True,
)

added_files = added_files_result.stdout.strip().split("\n")
return {Path(file.strip()) for file in added_files if file.strip()}


def get_diff_lines(ref_a: str, ref_b: str, file: os.PathLike) -> list[int]:
"""List the lines changed between two Gif refs for a specific file."""
diff_output = subprocess.run(
[
"git",
"diff",
"--unified=0",
"--diff-filter=M",
f"{ref_a}...{ref_b}",
"--",
str(file),
],
stdout=subprocess.PIPE,
check=True,
text=True,
)

line_matches = DIFF_PATTERN.finditer(diff_output.stdout)
line_ints = [
(int(line_match["lineb"]), int(line_match["added"]))
for line_match in line_matches
]
line_ranges = [
range(line_b, line_b + added) for line_b, added in line_ints
]
line_numbers = list(itertools.chain(*line_ranges))
return line_numbers


def get_para_line_numbers(file_obj: TextIO) -> list[list[int]]:
"""Get the line numbers of text in a file object, grouped by paragraph."""
paragraphs = []
prev_line = None
for lineno, line in enumerate(file_obj):
lineno = lineno + 1
if prev_line is None or (line.strip() and not prev_line.strip()):
paragraph = [lineno - 1]
paragraphs.append(paragraph)
paragraph.append(lineno)
prev_line = line
return paragraphs


def process_touched_warnings(
warnings: list[str], ref_a: str, ref_b: str
) -> list[re.Match]:
"""Filter a list of Sphinx warnings to those affecting touched lines."""
added_files, modified_files = tuple(
get_diff_files(ref_a, ref_b, filter_mode=mode) for mode in ("A", "M")
)

warnings_added, warnings_modified = tuple(
[
WARNING_PATTERN.fullmatch(warning.strip())
for warning in warnings
if any(str(file) in warning for file in files)
]
for files in (added_files, modified_files)
)
warnings_added, warnings_modified = tuple(
[warning for warning in warning_matches if warning]
for warning_matches in (warnings_added, warnings_modified)
)

modified_files_warned = {
file
for file in modified_files
if any(str(file) in warning["file"] for warning in warnings_modified)
}

warnings_touched = warnings_added.copy()
for file in modified_files_warned:
diff_lines = get_diff_lines(ref_a, ref_b, file)
with file.open(encoding="UTF-8") as file_obj:
paragraphs = get_para_line_numbers(file_obj)
touched_paras = [
para_lines
for para_lines in paragraphs
if set(diff_lines) & set(para_lines)
]
touched_para_lines = set(itertools.chain(*touched_paras))
warnings_infile = [
warning
for warning in warnings_modified
if str(file) in warning["file"]
]
warnings_touched += [
warning
for warning in warnings_infile
if int(warning["line"]) in touched_para_lines
]

return warnings_touched


def check_and_annotate_changed(
warnings: list[str], ref_a: str = "main", ref_b: str = "HEAD"
) -> None:
"""
Convert Sphinx warning messages to GitHub Actions.
Convert Sphinx warning messages to GitHub Actions for changed paragraphs.
Converts lines like:
.../Doc/library/cgi.rst:98: WARNING: reference target not found
to:
::warning file=.../Doc/library/cgi.rst,line=98::reference target not found
Non-matching lines are echoed unchanged.
see:
See:
https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-a-warning-message
"""
files_to_check = next(csv.reader([files_to_check]))
for warning in warnings:
if any(filename in warning for filename in files_to_check):
if match := PATTERN.fullmatch(warning):
print("::warning file={file},line={line}::{msg}".format_map(match))
warnings_touched = process_touched_warnings(warnings, ref_a, ref_b)
for warning in warnings_touched:
print("::warning file={file},line={line}::{msg}".format_map(warning))


def fail_if_regression(
Expand All @@ -68,7 +192,7 @@ def fail_if_regression(
print(filename)
for warning in warnings:
if filename in warning:
if match := PATTERN.fullmatch(warning):
if match := WARNING_PATTERN.fullmatch(warning):
print(" {line}: {msg}".format_map(match))
return -1
return 0
Expand All @@ -94,9 +218,10 @@ def fail_if_improved(
def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument(
"--check-and-annotate",
help="Comma-separated list of files to check, "
"and annotate those with warnings on GitHub Actions",
"--check-and-annotate-changed",
nargs=2,
help="Annotate lines changed between two refs "
"with warnings on GitHub Actions",
)
parser.add_argument(
"--fail-if-regression",
Expand All @@ -114,7 +239,7 @@ def main() -> int:
wrong_directory_msg = "Must run this script from the repo root"
assert Path("Doc").exists() and Path("Doc").is_dir(), wrong_directory_msg

with Path("Doc/sphinx-warnings.txt").open() as f:
with Path("Doc/sphinx-warnings.txt").open(encoding="UTF-8") as f:
warnings = f.read().splitlines()

cwd = str(Path.cwd()) + os.path.sep
Expand All @@ -131,8 +256,8 @@ def main() -> int:
if filename.strip() and not filename.startswith("#")
}

if args.check_and_annotate:
check_and_annotate(warnings, args.check_and_annotate)
if args.check_and_annotate_changed:
check_and_annotate_changed(warnings, *args.check_and_annotate_changed)

if args.fail_if_regression:
exit_code += fail_if_regression(
Expand Down

0 comments on commit d4f66b9

Please sign in to comment.