ci(benchmarks): run only the last release

We speed up the benchmarks CI run by only running the latest release and dev. Any issues caught in this way can be analysed locally if needed.
P403n1x87 · Sep 11, 2023 · 55e8073 · 55e8073
1 parent 6455c21
commit 55e8073
Show file tree

Hide file tree

Showing 3 changed files with 29 additions and 13 deletions.
diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
@@ -36,15 +36,15 @@ jobs:
           source .venv/bin/activate
           pip install --upgrade pip
           pip install -r scripts/requirements-bm.txt
-          deactivate
 
       - name: Run benchmarks
+        env:
+          PYTHONPATH: "."
         run: |
           ulimit -c unlimited
-
+          echo "core.%p" | sudo tee /proc/sys/kernel/core_pattern
           source .venv/bin/activate
-          python scripts/benchmark.py --format markdown | tee comment.txt
-          deactivate
+          python scripts/benchmark.py --format markdown --last | tee comment.txt
 
       - name: Post results on PR
         uses: marocchino/sticky-pull-request-comment@v2

diff --git a/scripts/benchmark.py b/scripts/benchmark.py
@@ -4,18 +4,15 @@
 import abc
 import re
 import sys
-from textwrap import wrap
 import typing as t
 from argparse import ArgumentParser
 from math import floor, log
 from pathlib import Path
-
-from scipy.stats import ttest_ind
-
-from common import download_release
-
 from test.utils import metadata, target
+from textwrap import wrap
 
+from common import download_release
+from scipy.stats import ttest_ind
 
 VERSIONS = ("3.4.1", "3.5.0", "dev")
 SCENARIOS = [
@@ -96,6 +93,8 @@ def get_stats(output: str) -> t.Optional[dict]:
 
 
 class Outcome:
+    __critical_p__ = 0.025
+
     def __init__(self, data: list[float]) -> None:
         self.data = data
         self.mean = sum(data) / len(data)
@@ -120,7 +119,7 @@ def __len__(self):
 
     def __eq__(self, other: "Outcome") -> bool:
         t, p = ttest_ind(self.data, other.data, equal_var=False)
-        return p < 0.05
+        return p < self.__critical_p__
 
 
 Results = t.Tuple[str, t.Dict[str, Outcome]]
@@ -312,8 +311,25 @@ def main():
         help="The output format",
     )
 
+    argp.add_argument(
+        "-l",
+        "--last",
+        action="store_true",
+        help="Run only with the last release of Austin",
+    )
+
+    argp.add_argument(
+        "-p",
+        "--pvalue",
+        type=float,
+        default=0.025,
+        help="The p-value to use when testing for statistical significance",
+    )
+
     opts = argp.parse_args()
 
+    Outcome.__critical_p__ = opts.pvalue
+
     renderer = {"terminal": TerminalRenderer, "markdown": MarkdownRenderer}[
         opts.format
     ]()
@@ -330,7 +346,7 @@ def main():
             continue
 
         table: t.List[Results] = []
-        for version in VERSIONS:
+        for version in VERSIONS[-2:] if args.last else VERSIONS:
             print(f"> Running with Austin {version} ...    ", end="\r", file=sys.stderr)
             try:
                 austin = download_release(version, Path("/tmp"), variant_name=variant)

diff --git a/scripts/requirements-bm.txt b/scripts/requirements-bm.txt
@@ -1,2 +1,2 @@
 austin-python~=1.6
-scipy~=1.10.1
+scipy~=1.10