Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Run performance comparison in CI using Kani's Benchcomp #8171

Merged
merged 1 commit into from
Mar 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 106 additions & 0 deletions .github/workflows/benchcomp-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
# Compare CBMC performance of selected benchmarks across two versions of CBMC,
# erroring out on regression. This config file is to be used together with the
# set-up in performance.yaml, because it assumes that there are two CBMC
# checkouts in the 'old' and 'new' directories; benchcomp compares the
# performance of these two checkouts.
#
# This configuration file is based on Benchcomp's perf-regression.yaml that
# ships with Kani.

variants:
aws-c-common@old:
config:
command_line: cd verification/cbmc/proofs && export PATH=/home/runner/work/cbmc/cbmc/old/build/bin:$PATH &&
./run-cbmc-proofs.py
directory: /home/runner/work/cbmc/cbmc/aws-c-common.git
env: {}
aws-c-common@new:
config:
command_line: cd verification/cbmc/proofs && export PATH=/home/runner/work/cbmc/cbmc/new/build/bin:$PATH &&
./run-cbmc-proofs.py
directory: /home/runner/work/cbmc/cbmc/aws-c-common.git
env: {}

run:
suites:
aws-c-common:
parser:
command: /home/runner/work/cbmc/cbmc/new/.github/workflows/benchcomp-parse_cbmc.py
variants:
- aws-c-common@old
- aws-c-common@new

visualize:
- type: dump_yaml
out_file: '-'

- type: dump_markdown_results_table
out_file: '-'
scatterplot: linear
extra_columns:

# For these two metrics, display the difference between old and new and
# embolden if the absolute difference is more than 10% of the old value
number_vccs:
- column_name: diff old → new
text: >
lambda b: "" if b["aws-c-common@new"] == b["aws-c-common@old"]
else ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.1 else "")
+ ("+" if b["aws-c-common@new"] > b["aws-c-common@old"] else "")
+ str(b["aws-c-common@new"] - b["aws-c-common@old"])
+ ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.1 else "")
number_program_steps:
- column_name: diff old → new
text: >
lambda b: "" if b["aws-c-common@new"] == b["aws-c-common@old"]
else ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.1 else "")
+ ("+" if b["aws-c-common@new"] > b["aws-c-common@old"] else "")
+ str(b["aws-c-common@new"] - b["aws-c-common@old"])
+ ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.1 else "")

# For 'runtime' metrics, display the % change from old to new, emboldening
# cells whose absolute change is >50%
solver_runtime:
- column_name: "% change old → new"
text: >
lambda b: "" if b["aws-c-common@new"] == b["aws-c-common@old"]
else ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.5 else "")
+ ("+" if b["aws-c-common@new"] > b["aws-c-common@old"] else "")
+ "%.3f%%" % ((b["aws-c-common@new"] - b["aws-c-common@old"]) * 100 / b["aws-c-common@old"])
+ ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.5 else "")
verification_time:
- column_name: "% change old → new"
text: >
lambda b: "" if b["aws-c-common@new"] == b["aws-c-common@old"]
else ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.5 else "")
+ ("+" if b["aws-c-common@new"] > b["aws-c-common@old"] else "")
+ "%.3f%%" % ((b["aws-c-common@new"] - b["aws-c-common@old"]) * 100 / b["aws-c-common@old"])
+ ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.5 else "")
symex_runtime:
- column_name: "% change old → new"
text: >
lambda b: "" if b["aws-c-common@new"] == b["aws-c-common@old"]
else ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.5 else "")
+ ("+" if b["aws-c-common@new"] > b["aws-c-common@old"] else "")
+ "%.3f%%" % ((b["aws-c-common@new"] - b["aws-c-common@old"]) * 100 / b["aws-c-common@old"])
+ ("**" if abs((b["aws-c-common@new"]-b["aws-c-common@old"])/b["aws-c-common@old"]) > 0.5 else "")

# For success metric, display some text if success has changed
success:
- column_name: change
text: >
lambda b: "" if b["aws-c-common@new"] == b["aws-c-common@old"]
else "❌ newly failing" if b["aws-c-common@old"]
else "✅ newly passing"

- type: error_on_regression
variant_pairs: [[aws-c-common@old, aws-c-common@new]]
checks:
- metric: success
# Compare the old and new variants of each benchmark. The
# benchmark has regressed if the lambda returns true.
test: "lambda old, new: False if not old else not new"
- metric: solver_runtime
test: "lambda old, new: False if new < 10 else new/old > 1.5"
- metric: symex_runtime
test: "lambda old, new: False if new < 10 else new/old > 1.5"
141 changes: 141 additions & 0 deletions .github/workflows/benchcomp-parse_cbmc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
#!/usr/bin/env python3
# Copyright Kani Contributors
# SPDX-License-Identifier: Apache-2.0 OR MIT


import json
import logging
import os
import pathlib
import re
import sys
import textwrap

import yaml


def get_description():
return textwrap.dedent("""\
Read CBMC statistics from a Litani run.json file.
""")


def _get_metrics():
return {
"solver_runtime": {
"pat": re.compile(r"Runtime Solver: (?P<value>[-e\d\.]+)s"),
"parse": float,
},
"removed_program_steps": {
"pat": re.compile(r"slicing removed (?P<value>\d+) assignments"),
"parse": int,
},
"number_program_steps": {
"pat": re.compile(r"size of program expression: (?P<value>\d+) steps"),
"parse": int,
},
"number_vccs": {
"pat": re.compile(
r"Generated \d+ VCC\(s\), (?P<value>\d+) remaining after simplification"),
"parse": int,
},
"symex_runtime": {
"pat": re.compile(r"Runtime Symex: (?P<value>[-e\d\.]+)s"),
"parse": float,
},
"success": {
"pat": re.compile(r"VERIFICATION:- (?P<value>\w+)"),
"parse": lambda v: v == "SUCCESSFUL",
},
}


def get_metrics():
metrics = dict(_get_metrics())
for _, info in metrics.items():
for field in ("pat", "parse"):
info.pop(field)

# This is not a metric we return; it is used to find the correct value for
# the number_program_steps metric
metrics.pop("removed_program_steps", None)

# We don't parse this directly from the output
metrics["verification_time"] = {}

return metrics


def get_run(root_dir):
for fyle in pathlib.Path(root_dir).rglob("run.json"):
with open(fyle) as handle:
return json.load(handle)
logging.error("No run.json found in %s", root_dir)
sys.exit(1)


def main(root_dir):
root_dir = pathlib.Path(root_dir)
run = get_run(root_dir)
metrics = _get_metrics()
benchmarks = {}
for pipe in run["pipelines"]:
for stage in pipe["ci_stages"]:
if stage["name"] != "test":
continue
for job in stage["jobs"]:
if not job["wrapper_arguments"]["command"].startswith("cbmc "):
continue
if "cover" in job["wrapper_arguments"]["command"]:
continue
if "--show-properties" in job["wrapper_arguments"]["command"]:
continue

bench_name = f"{run['project']}::{pipe['name']}"
if not job["complete"]:
logging.warning(
"Found an incomplete CBMC benchmark '%s'",
bench_name)
continue

benchmarks[bench_name] = {
"metrics": {
"success": job["outcome"] == "success",
"verification_time": float(job["duration_ms"]),
"solver_runtime": 0.0,
}}

for line in job["stdout"]:
for metric, metric_info in metrics.items():
m = metric_info["pat"].search(line)
if not m:
continue

parse = metric_info["parse"]
try:
# CBMC prints out some metrics more than once, e.g.
# "Solver" and "decision procedure". Add those
# values together
benchmarks[bench_name]["metrics"][metric] += parse(m["value"])
except (KeyError, TypeError):
benchmarks[bench_name]["metrics"][metric] = parse(m["value"])
break

for bench_name, bench_info in benchmarks.items():
try:
n_steps = bench_info["metrics"]["number_program_steps"]
rm_steps = bench_info["metrics"]["removed_program_steps"]
bench_info["metrics"]["number_program_steps"] = n_steps - rm_steps
bench_info["metrics"].pop("removed_program_steps", None)
except KeyError:
pass

return {
"metrics": get_metrics(),
"benchmarks": benchmarks,
}


if __name__ == "__main__":
result = main(os.getcwd())
print(yaml.dump(result, default_flow_style=False))
116 changes: 116 additions & 0 deletions .github/workflows/performance.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
# Run performance benchmarks comparing two different CBMC versions.
name: Performance Benchmarking
on:
push:
branches: [ develop ]
pull_request:
branches: [ develop ]

jobs:
perf-benchcomp:
runs-on: ubuntu-20.04
steps:
- name: Save push event HEAD and HEAD~ to environment variables
if: ${{ github.event_name == 'push' }}
run: |
echo "NEW_REF=${{ github.event.after}}" | tee -a "$GITHUB_ENV"
echo "OLD_REF=${{ github.event.before }}" | tee -a "$GITHUB_ENV"

- name: Save pull request HEAD and base to environment variables
if: ${{ contains(fromJSON('["pull_request", "pull_request_target"]'), github.event_name) }}
run: |
echo "OLD_REF=${{ github.event.pull_request.base.sha }}" | tee -a "$GITHUB_ENV"
echo "NEW_REF=${{ github.event.pull_request.head.sha }}" | tee -a "$GITHUB_ENV"

- name: Check out CBMC (old variant)
uses: actions/checkout@v4
with:
submodules: recursive
path: ./old
ref: ${{ env.OLD_REF }}
fetch-depth: 2

- name: Check out CBMC (new variant)
uses: actions/checkout@v4
with:
submodules: recursive
path: ./new
ref: ${{ env.NEW_REF }}
fetch-depth: 1

- name: Fetch dependencies
env:
# This is needed in addition to -yq to prevent apt-get from asking for
# user input
DEBIAN_FRONTEND: noninteractive
run: |
sudo apt-get update
sudo apt-get install --no-install-recommends -yq cmake ninja-build gcc g++ maven flex bison ccache

- name: Prepare ccache
uses: actions/cache/restore@v4
with:
path: .ccache
key: ${{ runner.os }}-20.04-Release-${{ github.ref }}-${{ github.sha }}-PR
restore-keys: |
${{ runner.os }}-20.04-Release-${{ github.ref }}
${{ runner.os }}-20.04-Release
- name: ccache environment
run: echo "CCACHE_DIR=$PWD/.ccache" >> $GITHUB_ENV
- name: Zero ccache stats and limit in size
run: ccache -z --max-size=500M

- name: Configure using CMake (new variant)
run: cmake -S new/ -B new/build -G Ninja -DWITH_JBMC=OFF -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/bin/gcc -DCMAKE_CXX_COMPILER=/usr/bin/g++ -Dsat_impl="minisat2;cadical"
- name: ccache environment (new variant)
run: echo "CCACHE_BASEDIR=$PWD/new" >> $GITHUB_ENV
- name: Build with Ninja (new variant)
run: ninja -C new/build -j2
- name: Print ccache stats (new variant)
run: ccache -s

- name: Configure using CMake (old variant)
run: cmake -S old/ -B old/build -G Ninja -DWITH_JBMC=OFF -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/bin/gcc -DCMAKE_CXX_COMPILER=/usr/bin/g++ -Dsat_impl="minisat2;cadical"
- name: ccache environment (old variant)
run: echo "CCACHE_BASEDIR=$PWD/old" >> $GITHUB_ENV
- name: Build with Ninja (old variant)
run: ninja -C old/build -j2
- name: Print ccache stats (old variant)
run: ccache -s

- name: Obtain benchcomp
run: git clone --depth 1 https://github.com/model-checking/kani kani.git

- name: Fetch benchmarks
run: |
git clone --depth 1 https://github.com/awslabs/aws-c-common aws-c-common.git
pushd aws-c-common.git/verification/cbmc/proofs/
# keep 6 proofs of each kind of data structure
for c in $(ls -d aws_* | cut -f2 -d_ | uniq) ; do rm -rf $(ls -d aws_${c}_* | tail -n+7) ; done
popd
sudo apt-get install --no-install-recommends -yq gnuplot graphviz universal-ctags python3-pip
curl -L --remote-name \
https://github.com/awslabs/aws-build-accumulator/releases/download/1.29.0/litani-1.29.0.deb
sudo dpkg -i litani-1.29.0.deb
sudo python3 -m pip install cbmc-viewer

- name: Run benchcomp
run: |
kani.git/tools/benchcomp/bin/benchcomp \
--config new/.github/workflows/benchcomp-config.yaml \
run
kani.git/tools/benchcomp/bin/benchcomp \
--config new/.github/workflows/benchcomp-config.yaml \
collate

- name: Perf Regression Results Table
run: |
kani.git/tools/benchcomp/bin/benchcomp \
--config new/.github/workflows/benchcomp-config.yaml \
visualize --only dump_markdown_results_table >> "$GITHUB_STEP_SUMMARY"

- name: Run other visualizations
run: |
kani.git/tools/benchcomp/bin/benchcomp \
--config new/.github/workflows/benchcomp-config.yaml \
visualize --except dump_markdown_results_table
Loading