diff --git a/scripts/fuzz-parser/fuzz.py b/scripts/fuzz-parser/fuzz.py new file mode 100644 index 0000000000000..d64aba8e327d3 --- /dev/null +++ b/scripts/fuzz-parser/fuzz.py @@ -0,0 +1,239 @@ +""" +Run the parser on randomly generated (but syntactically valid) Python source-code files. + +To install all dependencies for this script into an environment using `uv`, run: + uv pip install -r scripts/fuzz-parser/requirements.txt + +Example invocations of the script: +- Run the fuzzer using seeds 0, 1, 2, 78 and 93 to generate the code: + `python scripts/fuzz-parser/fuzz.py 0-2 78 93` +- Run the fuzzer concurrently using seeds in range 0-10 inclusive, + but only reporting bugs that are new on your branch: + `python scripts/fuzz-parser/fuzz.py 0-10 --new-bugs-only` +- Run the fuzzer concurrently on 10,000 different Python source-code files, + and only print a summary at the end: + `python scripts/fuzz-parser/fuzz.py 1-10000 --quiet + +N.B. The script takes a few seconds to get started, as the script needs to compile +your checked out version of ruff with `--release` as a first step before it +can actually start fuzzing. +""" + +from __future__ import annotations + +import argparse +import concurrent.futures +import subprocess +from dataclasses import KW_ONLY, dataclass +from typing import NewType + +from pysource_codegen import generate as generate_random_code +from pysource_minimize import minimize as minimize_repro +from termcolor import colored + +MinimizedSourceCode = NewType("MinimizedSourceCode", str) +Seed = NewType("Seed", int) + + +def run_ruff(executable_args: list[str], code: str) -> subprocess.CompletedProcess[str]: + return subprocess.run( + [*executable_args, "check", "--select=E999", "--no-cache", "-"], + capture_output=True, + text=True, + input=code, + ) + + +def contains_bug(code: str, *, only_new_bugs: bool = False) -> bool: + """Return True if the code triggers a parser error and False otherwise. + + If `only_new_bugs` is set to `True`, + the function also runs an installed version of Ruff on the same source code, + and only returns `True` if the bug appears on the branch you have currently + checked out but *not* in the latest release. + """ + new_result = run_ruff(["cargo", "run", "--release", "--"], code) + if not only_new_bugs: + return new_result.returncode != 0 + if new_result.returncode == 0: + return False + old_result = run_ruff(["ruff"], code) + return old_result.returncode == 0 + + +@dataclass(slots=True) +class FuzzResult: + # The seed used to generate the random Python file. + # The same seed always generates the same file. + seed: Seed + # If we found a bug, this will be the minimum Python code + # required to trigger the bug. If not, it will be `None`. + maybe_bug: MinimizedSourceCode | None + + def print_description(self) -> None: + """Describe the results of fuzzing the parser with this seed.""" + if self.maybe_bug: + print(colored(f"Ran fuzzer on seed {self.seed}", "red")) + print(colored("The following code triggers a bug:", "red")) + print() + print(self.maybe_bug) + print() + else: + print(colored(f"Ran fuzzer successfully on seed {self.seed}", "green")) + + +def fuzz_code(seed: Seed, only_new_bugs: bool) -> FuzzResult: + """Return a `FuzzResult` instance describing the fuzzing result from this seed.""" + code = generate_random_code(seed) + if contains_bug(code, only_new_bugs=only_new_bugs): + try: + new_code = minimize_repro(code, contains_bug) + except ValueError: + # `pysource_minimize.minimize()` sometimes raises `ValueError` internally. + # Just ignore it if so, and use the original generated code; + # minimizing the repro is a nice-to-have, but isn't crucial. + new_code = code + return FuzzResult(seed, MinimizedSourceCode(new_code)) + return FuzzResult(seed, None) + + +def run_fuzzer_concurrently(args: ResolvedCliArgs) -> list[FuzzResult]: + print( + f"Concurrently running the fuzzer on " + f"{len(args.seeds)} randomly generated source-code files..." + ) + bugs: list[FuzzResult] = [] + with concurrent.futures.ProcessPoolExecutor() as executor: + fuzz_result_futures = [ + executor.submit(fuzz_code, seed, args.only_new_bugs) for seed in args.seeds + ] + try: + for future in concurrent.futures.as_completed(fuzz_result_futures): + fuzz_result = future.result() + if not args.quiet: + fuzz_result.print_description() + if fuzz_result.maybe_bug: + bugs.append(fuzz_result) + except KeyboardInterrupt: + print("\nShutting down the ProcessPoolExecutor due to KeyboardInterrupt...") + print("(This might take a few seconds)") + executor.shutdown(cancel_futures=True) + raise + return bugs + + +def run_fuzzer_sequentially(args: ResolvedCliArgs) -> list[FuzzResult]: + print( + f"Sequentially running the fuzzer on " + f"{len(args.seeds)} randomly generated source-code files..." + ) + bugs: list[FuzzResult] = [] + for seed in args.seeds: + fuzz_result = fuzz_code(seed, only_new_bugs=args.only_new_bugs) + if not args.quiet: + fuzz_result.print_description() + if fuzz_result.maybe_bug: + bugs.append(fuzz_result) + return bugs + + +def main(args: ResolvedCliArgs) -> None: + if args.only_new_bugs: + ruff_version = ( + subprocess.run( + ["ruff", "--version"], text=True, capture_output=True, check=True + ) + .stdout.strip() + .split(" ")[1] + ) + print( + f"As you have selected `--only-new-bugs`, " + f"bugs will only be reported if they appear on your current branch " + f"but do *not* appear in `ruff=={ruff_version}`" + ) + if len(args.seeds) <= 5: + bugs = run_fuzzer_sequentially(args) + else: + bugs = run_fuzzer_concurrently(args) + noun_phrase = "New bugs" if args.only_new_bugs else "Bugs" + if bugs: + print(colored(f"{noun_phrase} found in the following seeds:", "red")) + print(*sorted(bug.seed for bug in bugs)) + else: + print(colored(f"No {noun_phrase.lower()} found!", "green")) + + +def parse_seed_argument(arg: str) -> int | range: + """Helper for argument parsing""" + if "-" in arg: + start, end = map(int, arg.split("-")) + if end <= start: + raise argparse.ArgumentTypeError( + f"Error when parsing seed argument {arg!r}: " + f"range end must be > range start" + ) + seed_range = range(start, end + 1) + range_too_long = ( + f"Error when parsing seed argument {arg!r}: " + f"maximum allowed range length is 1_000_000_000" + ) + try: + if len(seed_range) > 1_000_000_000: + raise argparse.ArgumentTypeError(range_too_long) + except OverflowError: + raise argparse.ArgumentTypeError(range_too_long) from None + return range(int(start), int(end) + 1) + return int(arg) + + +@dataclass(slots=True) +class ResolvedCliArgs: + seeds: list[Seed] + _: KW_ONLY + only_new_bugs: bool + quiet: bool + + +def parse_args() -> ResolvedCliArgs: + """Parse command-line arguments""" + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawTextHelpFormatter + ) + parser.add_argument( + "seeds", + type=parse_seed_argument, + nargs="+", + help="Either a single seed, or an inclusive range of seeds in the format `0-5`", + ) + parser.add_argument( + "--only-new-bugs", + action="store_true", + help=( + "Only report bugs if they exist on the current branch, " + "but *didn't* exist on the released version of Ruff " + "installed into the Python environment we're running in" + ), + ) + parser.add_argument( + "--quiet", + action="store_true", + help="Print fewer things to the terminal while running the fuzzer", + ) + args = parser.parse_args() + seed_arguments: list[range | int] = args.seeds + seen_seeds: set[int] = set() + for arg in seed_arguments: + if isinstance(arg, int): + seen_seeds.add(arg) + else: + seen_seeds.update(arg) + return ResolvedCliArgs( + sorted(map(Seed, seen_seeds)), + only_new_bugs=args.only_new_bugs, + quiet=args.quiet, + ) + + +if __name__ == "__main__": + args = parse_args() + main(args) diff --git a/scripts/fuzz-parser/requirements.in b/scripts/fuzz-parser/requirements.in new file mode 100644 index 0000000000000..6582e77e4d0da --- /dev/null +++ b/scripts/fuzz-parser/requirements.in @@ -0,0 +1,4 @@ +pysource-codegen +pysource-minimize +ruff +termcolor diff --git a/scripts/fuzz-parser/requirements.txt b/scripts/fuzz-parser/requirements.txt new file mode 100644 index 0000000000000..781d7d6dcc490 --- /dev/null +++ b/scripts/fuzz-parser/requirements.txt @@ -0,0 +1,28 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile scripts/fuzz-parser/requirements.in --output-file scripts/fuzz-parser/requirements.txt +asttokens==2.4.1 + # via pysource-minimize +astunparse==1.6.3 + # via pysource-minimize +click==8.1.7 + # via pysource-minimize +markdown-it-py==3.0.0 + # via rich +mdurl==0.1.2 + # via markdown-it-py +pygments==2.17.2 + # via rich +pysource-codegen==0.5.1 +pysource-minimize==0.6.2 +rich==13.7.1 + # via pysource-minimize +ruff==0.4.0 +six==1.16.0 + # via + # asttokens + # astunparse +termcolor==2.4.0 +typing-extensions==4.11.0 + # via pysource-codegen +wheel==0.43.0 + # via astunparse