Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-101100: Only show GitHub check annotations on changed doc paragraphs #108065

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 3 additions & 8 deletions .github/workflows/reusable-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ jobs:
timeout-minutes: 60
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
CAM-Gerlach marked this conversation as resolved.
Show resolved Hide resolved
- name: 'Set up Python'
uses: actions/setup-python@v4
with:
Expand All @@ -28,13 +30,6 @@ jobs:
run: make -C Doc/ venv

# To annotate PRs with Sphinx nitpicks (missing references)
- name: 'Get list of changed files'
if: github.event_name == 'pull_request'
id: changed_files
uses: Ana06/get-changed-files@v2.2.0
with:
filter: "Doc/**"
format: csv # works for paths with spaces
- name: 'Build HTML documentation'
continue-on-error: true
run: |
Expand All @@ -45,7 +40,7 @@ jobs:
if: github.event_name == 'pull_request'
run: |
python Doc/tools/check-warnings.py \
--check-and-annotate '${{ steps.changed_files.outputs.added_modified }}' \
--check-and-annotate-changed 'origin/${{ github.base_ref }}' '${{ github.sha }}' \
--fail-if-regression \
--fail-if-improved

Expand Down
182 changes: 162 additions & 20 deletions Doc/tools/check-warnings.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
Check the output of running Sphinx in nit-picky mode (missing references).
"""
import argparse
AA-Turner marked this conversation as resolved.
Show resolved Hide resolved
import csv
import itertools
import os
import re
import subprocess
import sys
from pathlib import Path
from typing import TextIO

# Exclude these whether they're dirty or clean,
# because they trigger a rebuild of dirty files.
Expand All @@ -24,28 +26,167 @@
"venv",
}

PATTERN = re.compile(r"(?P<file>[^:]+):(?P<line>\d+): WARNING: (?P<msg>.+)")
# Regex pattern to match the parts of a Sphinx warning
WARNING_PATTERN = re.compile(
r"(?P<file>([A-Za-z]:[\\/])?[^:]+):(?P<line>\d+): WARNING: (?P<msg>.+)"
)

# Regex pattern to match the line numbers in a Git unified diff
DIFF_PATTERN = re.compile(
r"^@@ -(?P<linea>\d+)(?:,(?P<removed>\d+))? \+(?P<lineb>\d+)(?:,(?P<added>\d+))? @@",
AA-Turner marked this conversation as resolved.
Show resolved Hide resolved
flags=re.MULTILINE,
)

def check_and_annotate(warnings: list[str], files_to_check: str) -> None:

def get_diff_files(ref_a: str, ref_b: str, filter_mode: str = "") -> set[Path]:
"""List the files changed between two Gif refs, filtered by change type."""
CAM-Gerlach marked this conversation as resolved.
Show resolved Hide resolved
added_files_result = subprocess.run(
[
"git",
"diff",
f"--diff-filter={filter_mode}",
"--name-only",
f"{ref_a}...{ref_b}",
"--",
AA-Turner marked this conversation as resolved.
Show resolved Hide resolved
],
stdout=subprocess.PIPE,
AA-Turner marked this conversation as resolved.
Show resolved Hide resolved
check=True,
text=True,
AA-Turner marked this conversation as resolved.
Show resolved Hide resolved
encoding="UTF-8",
)

added_files = added_files_result.stdout.strip().split("\n")
return {Path(file.strip()) for file in added_files if file.strip()}
AA-Turner marked this conversation as resolved.
Show resolved Hide resolved


def get_diff_lines(ref_a: str, ref_b: str, file: Path) -> list[int]:
"""List the lines changed between two Gif refs for a specific file."""
CAM-Gerlach marked this conversation as resolved.
Show resolved Hide resolved
diff_output = subprocess.run(
[
"git",
"diff",
"--unified=0",
f"{ref_a}...{ref_b}",
"--",
str(file),
],
stdout=subprocess.PIPE,
check=True,
text=True,
AA-Turner marked this conversation as resolved.
Show resolved Hide resolved
encoding="UTF-8",
)

# Scrape line offsets + lengths from diff and convert to line numbers
line_matches = DIFF_PATTERN.finditer(diff_output.stdout)
line_match_values = [
line_match.groupdict(default=1) for line_match in line_matches
AA-Turner marked this conversation as resolved.
Show resolved Hide resolved
]
line_ints = [
(int(match_value["lineb"]), int(match_value["added"]))
for match_value in line_match_values
]
line_ranges = [
range(line_b, line_b + added) for line_b, added in line_ints
]
line_numbers = list(itertools.chain(*line_ranges))

return line_numbers


def get_para_line_numbers(file_obj: TextIO) -> list[list[int]]:
"""Get the line numbers of text in a file object, grouped by paragraph."""
paragraphs = []
prev_line = None
for lineno, line in enumerate(file_obj):
lineno = lineno + 1
if prev_line is None or (line.strip() and not prev_line.strip()):
paragraph = [lineno - 1]
paragraphs.append(paragraph)
paragraph.append(lineno)
prev_line = line
return paragraphs


def parse_and_filter_warnings(
warnings: list[str], files: set[Path]
) -> list[re.Match[str]]:
"""Get the warnings matching passed files and parse them with regex."""
filtered_warnings = [
warning
for warning in warnings
if any(str(file) in warning for file in files)
]
warning_matches = [
WARNING_PATTERN.fullmatch(warning.strip())
for warning in filtered_warnings
]
non_null_matches = [warning for warning in warning_matches if warning]
AA-Turner marked this conversation as resolved.
Show resolved Hide resolved
return non_null_matches


def process_touched_warnings(
warnings: list[str], ref_a: str, ref_b: str
) -> list[re.Match[str]]:
"""Filter a list of Sphinx warnings to those affecting touched lines."""
added_files, modified_files = tuple(
get_diff_files(ref_a, ref_b, filter_mode=mode) for mode in ("A", "M")
)

warnings_added = parse_and_filter_warnings(warnings, added_files)
warnings_modified = parse_and_filter_warnings(warnings, modified_files)

modified_files_warned = {
file
for file in modified_files
if any(str(file) in warning["file"] for warning in warnings_modified)
}

warnings_touched = warnings_added.copy()
for file in modified_files_warned:
diff_lines = get_diff_lines(ref_a, ref_b, file)
with file.open(encoding="UTF-8") as file_obj:
paragraphs = get_para_line_numbers(file_obj)
touched_paras = [
para_lines
for para_lines in paragraphs
if set(diff_lines) & set(para_lines)
]
touched_para_lines = set(itertools.chain(*touched_paras))
warnings_infile = [
warning
for warning in warnings_modified
if str(file) in warning["file"]
]
warnings_touched += [
warning
for warning in warnings_infile
if int(warning["line"]) in touched_para_lines
]

return warnings_touched


def check_and_annotate_changed(
warnings: list[str], ref_a: str = "main", ref_b: str = "HEAD"
) -> None:
"""
Convert Sphinx warning messages to GitHub Actions.
Convert Sphinx warning messages to GitHub Actions for changed paragraphs.

Converts lines like:
.../Doc/library/cgi.rst:98: WARNING: reference target not found
to:
::warning file=.../Doc/library/cgi.rst,line=98::reference target not found

Non-matching lines are echoed unchanged.

see:
See:
https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-a-warning-message
"""
files_to_check = next(csv.reader([files_to_check]))
for warning in warnings:
if any(filename in warning for filename in files_to_check):
if match := PATTERN.fullmatch(warning):
print("::warning file={file},line={line}::{msg}".format_map(match))
warnings_touched = process_touched_warnings(warnings, ref_a, ref_b)
print("Emitting doc warnings matching modified lines:")
for warning in warnings_touched:
print(warning[0])
print("::warning file={file},line={line}::{msg}".format_map(warning))
if not warnings_touched:
print("None")


def fail_if_regression(
Expand All @@ -68,7 +209,7 @@ def fail_if_regression(
print(filename)
for warning in warnings:
if filename in warning:
if match := PATTERN.fullmatch(warning):
if match := WARNING_PATTERN.fullmatch(warning):
print(" {line}: {msg}".format_map(match))
return -1
return 0
Expand All @@ -94,9 +235,10 @@ def fail_if_improved(
def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument(
"--check-and-annotate",
help="Comma-separated list of files to check, "
"and annotate those with warnings on GitHub Actions",
"--check-and-annotate-changed",
AA-Turner marked this conversation as resolved.
Show resolved Hide resolved
nargs=2,
AA-Turner marked this conversation as resolved.
Show resolved Hide resolved
AA-Turner marked this conversation as resolved.
Show resolved Hide resolved
help="Annotate lines changed between two refs "
"with warnings on GitHub Actions",
)
parser.add_argument(
"--fail-if-regression",
Expand All @@ -114,7 +256,7 @@ def main() -> int:
wrong_directory_msg = "Must run this script from the repo root"
assert Path("Doc").exists() and Path("Doc").is_dir(), wrong_directory_msg

with Path("Doc/sphinx-warnings.txt").open() as f:
with Path("Doc/sphinx-warnings.txt").open(encoding="UTF-8") as f:
warnings = f.read().splitlines()

cwd = str(Path.cwd()) + os.path.sep
Expand All @@ -124,15 +266,15 @@ def main() -> int:
if "Doc/" in warning
}

with Path("Doc/tools/.nitignore").open() as clean_files:
with Path("Doc/tools/.nitignore").open(encoding="UTF-8") as clean_files:
files_with_expected_nits = {
filename.strip()
for filename in clean_files
if filename.strip() and not filename.startswith("#")
}

if args.check_and_annotate:
check_and_annotate(warnings, args.check_and_annotate)
if args.check_and_annotate_changed:
check_and_annotate_changed(warnings, *args.check_and_annotate_changed)

if args.fail_if_regression:
exit_code += fail_if_regression(
Expand Down