diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 5dd3357..f274d0b 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -36,15 +36,15 @@ jobs: source .venv/bin/activate pip install --upgrade pip pip install -r scripts/requirements-bm.txt - deactivate - name: Run benchmarks + env: + PYTHONPATH: "." run: | ulimit -c unlimited - + echo "core.%p" | sudo tee /proc/sys/kernel/core_pattern source .venv/bin/activate - python scripts/benchmark.py --format markdown | tee comment.txt - deactivate + python scripts/benchmark.py --format markdown --last | tee comment.txt - name: Post results on PR uses: marocchino/sticky-pull-request-comment@v2 diff --git a/scripts/benchmark.py b/scripts/benchmark.py index 971deae..311e258 100644 --- a/scripts/benchmark.py +++ b/scripts/benchmark.py @@ -4,18 +4,15 @@ import abc import re import sys -from textwrap import wrap import typing as t from argparse import ArgumentParser from math import floor, log from pathlib import Path - -from scipy.stats import ttest_ind - -from common import download_release - from test.utils import metadata, target +from textwrap import wrap +from common import download_release +from scipy.stats import ttest_ind VERSIONS = ("3.4.1", "3.5.0", "dev") SCENARIOS = [ @@ -96,6 +93,8 @@ def get_stats(output: str) -> t.Optional[dict]: class Outcome: + __critical_p__ = 0.025 + def __init__(self, data: list[float]) -> None: self.data = data self.mean = sum(data) / len(data) @@ -120,7 +119,7 @@ def __len__(self): def __eq__(self, other: "Outcome") -> bool: t, p = ttest_ind(self.data, other.data, equal_var=False) - return p < 0.05 + return p < self.__critical_p__ Results = t.Tuple[str, t.Dict[str, Outcome]] @@ -312,8 +311,25 @@ def main(): help="The output format", ) + argp.add_argument( + "-l", + "--last", + action="store_true", + help="Run only with the last release of Austin", + ) + + argp.add_argument( + "-p", + "--pvalue", + type=float, + default=0.025, + help="The p-value to use when testing for statistical significance", + ) + opts = argp.parse_args() + Outcome.__critical_p__ = opts.pvalue + renderer = {"terminal": TerminalRenderer, "markdown": MarkdownRenderer}[ opts.format ]() @@ -330,7 +346,7 @@ def main(): continue table: t.List[Results] = [] - for version in VERSIONS: + for version in VERSIONS[-2:] if opts.last else VERSIONS: print(f"> Running with Austin {version} ... ", end="\r", file=sys.stderr) try: austin = download_release(version, Path("/tmp"), variant_name=variant) diff --git a/scripts/requirements-bm.txt b/scripts/requirements-bm.txt index fae94aa..f2db6c7 100644 --- a/scripts/requirements-bm.txt +++ b/scripts/requirements-bm.txt @@ -1,2 +1,2 @@ austin-python~=1.6 -scipy~=1.10.1 +scipy~=1.10