diff --git a/.github/workflows/benchmark.py b/.github/workflows/benchmark.py
deleted file mode 100644
index fd0d7509c..000000000
--- a/.github/workflows/benchmark.py
+++ /dev/null
@@ -1,270 +0,0 @@
-import re
-import io
-import selectors
-import subprocess
-import sys
-
-
-# ------------------------------------------------------------------------------
-# From 'nawatts/capture-and-print-subprocess-output.py'
-
-def capture_subprocess_output(subprocess_args):
-    # Start subprocess
-    # bufsize = 1 means output is line buffered
-    # universal_newlines = True is required for line buffering
-    process = subprocess.Popen(subprocess_args,
-                               bufsize=1,
-                               stdout=subprocess.PIPE,
-                               stderr=subprocess.STDOUT,
-                               universal_newlines=True)
-
-    # Create callback function for process output
-    buf = io.StringIO()
-    def handle_output(stream, mask):
-        # Because the process' output is line buffered, there's only ever one
-        # line to read when this function is called
-        line = stream.readline()
-        buf.write(line)
-        sys.stdout.write(line)
-
-    # Register callback for an "available for read" event from subprocess' stdout stream
-    selector = selectors.DefaultSelector()
-    selector.register(process.stdout, selectors.EVENT_READ, handle_output)
-
-    # Loop until subprocess is terminated
-    while process.poll() is None:
-        # Wait for events and handle them with their registered callbacks
-        events = selector.select()
-        for key, mask in events:
-            callback = key.data
-            callback(key.fileobj, mask)
-
-    # Get process return code
-    return_code = process.wait()
-    selector.close()
-
-    success = (return_code == 0)
-
-    # Store buffered output
-    output = buf.getvalue()
-    buf.close()
-
-    return (success, output)
-
-
-# ------------------------------------------------------------------------------
-# Branch switching
-def remote_name(remote):
-    return remote.replace('/','_')
-
-
-def git_remove_remote(remote):
-    if (remote == 'origin'):
-        return # origin should not be purged
-
-    subprocess.run(['git', 'remote', 'remove', remote_name(remote)])
-
-
-def git_add_remote(remote):
-    if (remote == 'origin'):
-        return # origin hopefully exists
-
-    git_remove_remote(remote)
-    subprocess.run(['git', 'remote', 'add', remote_name(remote), f'https://github.com/{remote}.git'])
-
-
-def git_fetch():
-    subprocess.run(['git', 'fetch', '--all'])
-
-
-def git_checkout(remote, branch):
-    subprocess.run(['git', 'checkout', f'{remote_name(remote)}/{branch}'])
-    subprocess.run(['git', 'checkout', 'update', '--init', '--recursive'])
-    subprocess.run(['git', 'status'])
-
-# ------------------------------------------------------------------------------
-# Running benchmarks
-def repetitions_for(n):
-    if n <= 12:
-        return 16
-    elif n == 13:
-        return 8
-    elif n == 14:
-        return 4
-    else:
-        return 1
-
-def run_queens(n):
-    (out_success, out_txt) = capture_subprocess_output(['make', 'example/queens', 'N='+str(n), 'M=8096'])
-    if not out_success:
-        exit(255);
-
-    matches = re.findall("time:\s*([0-9\.]+)\s*s", out_txt)
-    timing = sum([float(t) for t in matches])
-    return timing
-
-
-# ------------------------------------------------------------------------------
-# Markdown helper functions
-def markdown_table(args, timings):
-    header = ' | '.join([f'{r}/{b}' for (r,b) in args])
-    line   = '-|-'.join([re.sub('.','-',f'{r}/{b}') for (r,b) in args])
-
-    # assumes every value is a list of the same length
-    number_of_rows = len(timings[args[0][0]][args[0][1]])
-    rows = []
-
-    for i in range(0, number_of_rows):
-        rows.append(' | '.join([t + ' ' * (len(b)-len(t))
-                                for (b, t) in [(f'{r}/{b}', f'{timings[r][b][i]:.2f}') for (r,b) in args]]))
-
-    return '\n'.join(['| '+ header +' |', '|-'+ line +'-|'] + ['| '+ r +' |' for r in rows])
-
-
-def bold(txt):
-    return "**"+txt+"**"
-
-
-def spoiler(txt, summary):
-    return '> ' + ('<details>\n' +
-                   f'<summary><b>{summary}</b></summary>\n\n'
-                   + txt + '\n' +
-                   '</details>').replace('\n', '\n> ')
-
-
-def performance_report(n, args, timings):
-    output_txt = (f'# Benchmark Report `{args[0]}/{args[1]}`\n' +
-                  f'Minimum running time for {n}-Queens: {min(timings[args[0][1]])}s\n\n' +
-                  spoiler(f'Running times (s) for {n}-Queens:\n' +
-                          ', '.join([str(t) for t in timings[args[0][1]]]), 'Raw Data'))
-
-    return (0, output_txt)
-
-
-def comparison_report(n, args, timings):
-    # Create table of raw data
-    raw_data_txt = spoiler(f'Running times (s) for {n}-Queens:\n' + markdown_table(args, timings),
-                           'Raw Data')
-
-    # Compute minimum
-    for (r,b) in args:
-        timings[r][b] = [min(timings[r][b])]
-
-    minimum_data_txt = (f'Minimum running time (s) for {n}-Queens:\n' +
-                        markdown_table(args, timings) + '\n')
-
-    output_txt = ''
-
-    # Compute differences
-    reference_t = min(timings[args[0][0]][args[0][1]])
-    tested_t = [(rb, min(timings[rb[0]][rb[1]])) for rb in args[1::]]
-
-    diffs = []
-
-    if reference_t > 0.0:
-        diffs = [(rb, (t - reference_t) / reference_t)
-                 for (rb,t) in tested_t]
-    else:
-        diffs = [(rb, float('inf') if t > 0 else float(0))
-                 for (rb,t) in tested_t]
-
-    # Check if any diffs violate the threshold
-    worst_diff = 0.0 if len(diffs) == 0 else max([t for (rb,t) in diffs])
-    report_color = 'green' if worst_diff < 0.01 else ('yellow' if worst_diff < 0.05 else 'red')
-    exit_code = -1 if report_color == 'red' else 0
-
-    diffs_txt = [f'`{rb[0]}/{rb[1]}` ' + ('does not impact performance'
-                                          if t == 0.0
-                                          else ('is '+('an improvement' if t < 0.0 else 'a regression')+
-                                                f' of {abs(t)*100.0:.2f}%'))
-                 for (rb,t) in diffs]
-
-    output_txt = (f'# Benchmark Report :{report_color}_circle:\n' +
-                  bold(' and '.join(diffs_txt) + f' (compared to `{args[0][0]}/{args[0][1]}`).') +
-                  '\n\n' +
-                  minimum_data_txt + '\n' +
-                  raw_data_txt)
-
-    return (exit_code, output_txt)
-
-
-# ------------------------------------------------------------------------------
-# Main
-def main_current(n):
-    timings = []
-
-    print('Queens', end='', flush=True)
-
-    for run in range(0, repetitions_for(n)):
-        print(' .', end='', flush=True)
-        timings.append(run_queens(n))
-
-    print('\n  time: ', min(timings), ' s')
-
-def main_compare(n, remote_branch_pairs):
-    for remote in list(set([r for (r,b) in remote_branch_pairs])):
-        git_add_remote(remote)
-
-    git_fetch()
-
-    # Initialise results data
-    timings  = {  }
-
-    for (remote, branch) in remote_branch_pairs:
-        timings[remote] = {}
-
-    for (remote, branch) in remote_branch_pairs:
-        timings[remote][branch] = []
-
-    # Run benchmarks on all branches as fairly as possible
-    for run in range(0, repetitions_for(n)):
-        for (remote, branch) in remote_branch_pairs:
-            git_checkout(remote, branch)
-            timings[remote][branch].append(run_queens(n))
-
-    for remote in list(set([r for (r,b) in remote_branch_pairs])):
-        git_remove_remote(remote)
-
-    exit_code = 0
-    with open('benchmark.out', 'w') as file:
-        report = (comparison_report(n, remote_branch_pairs, timings)
-                  if len(remote_branch_pairs) > 1
-                  else performance_report(n, remote_branch_pairs, timings))
-
-        exit_code = report[0]
-
-        print(report[1])
-        file.write(report[1])
-
-    return exit_code
-
-
-def main(cmd_args):
-    if len(cmd_args) < 2:
-        print("Please provide the 'N' as the first argument")
-        exit(-1)
-
-    cmd_program = cmd_args[0]
-    cmd_N = int(cmd_args[1])
-    cmd_branches = cmd_args[2::1]
-
-    exit_code = 0
-
-    if len(cmd_branches) == 0:
-        main_current(cmd_N)
-
-    else:
-        if cmd_branches == 1:
-            cmd_branches = ['origin', cmd_branches[1]]
-
-        if len(cmd_branches) % 2 != 0:
-            print('Please provide pairs of remote , branch')
-            exit(-1)
-
-        exit_code = main_compare(cmd_N,
-                                 list(zip(*[iter(cmd_branches[0::])]*2)))
-
-    exit(exit_code)
-
-if __name__ == '__main__':
-    main(sys.argv)
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 0e175a716..bc0032185 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -6,8 +6,8 @@ on:
 
 jobs:
   fetch-branch:
-    name: 'Fetch branch name'
-    runs-on: ubuntu-latest  
+    name: 'Fetch branch'
+    runs-on: ubuntu-latest
 
     steps:
     - id: identify
@@ -42,8 +42,8 @@ jobs:
     outputs:
       should_skip: ${{ steps.skip_check.outputs.should_skip }}
 
-  run_benchmark:
-    name: 'Benchmark: ${{ matrix.N }}-Queens'
+  run_qbf:
+    name: 'Performance Regression: QBF ${{ matrix.category }}/${{ matrix.circuit }}'
     runs-on: ubuntu-latest
 
     needs: [fetch-branch, fetch-remote, skip_duplicate]
@@ -53,34 +53,111 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - N: '9'
-          - N: '12'
-          - N: '14'
+          - { category: breakthrough, circuit: 3x4_19_bwnib }
+          - { category: breakthrough, circuit: 3x5_11_bwnib }
+          - { category: domineering,  circuit: 5x5_13_bwnib }
+          - { category: ep,           circuit: 8x8_7_e-8-1_p-3-4_bwnib }
 
     steps:
       - uses: actions/checkout@v2
         with:
+          repository: 'ssoelvsten/bdd-benchmark'
           submodules: 'recursive'
+          fetch-depth: '0'
 
-      - name: 'Apt install'
+      - name: 'apt install'
         run: |
           sudo apt update
           sudo apt install libboost-all-dev
 
-      - name: 'Run benchmark.py'
+      - name: 'pip install'
         run: |
-          python3 ./.github/workflows/benchmark.py ${{ matrix.N }} origin main ${{ needs.fetch-remote.outputs.remote }} ${{ needs.fetch-branch.outputs.branch }}
+          sudo pip install wget
 
-      - name: 'Post benchmark.out on PR'
+      - name: 'python3 regression.py'
+        run: |
+          python3 ./regression.py <<< "qbf
+          yes
+          ${{ matrix.category }}
+          ${{ matrix.circuit }}
+          adiar
+          8192
+          origin
+          main
+          ${{ needs.fetch-remote.outputs.remote }}
+          ${{ needs.fetch-branch.outputs.branch }}
+          no
+          no
+          3
+          5
+          "
+
+      - name: 'post regression_adiar.out'
         if: always()
         uses: machine-learning-apps/pr-comment@master
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         with:
-          path: ./benchmark.out
+          path: ./regression_adiar.out
+
+  run_picotrav:
+    name: 'Performance Regression: Picotrav ${{ matrix.circuit }}'
+    runs-on: ubuntu-latest
+
+    needs: [fetch-branch, fetch-remote, skip_duplicate]
+    if: ${{ needs.skip_duplicate.outputs.should_skip != 'true' }}
+
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - { circuit: adder }
+          - { circuit: arbiter }
+          - { circuit: mem_ctrl }
+
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          repository: 'ssoelvsten/bdd-benchmark'
+          submodules: 'recursive'
+          fetch-depth: '0'
+
+      - name: 'apt install'
+        run: |
+          sudo apt update
+          sudo apt install libboost-all-dev
+
+      - name: 'pip install'
+        run: |
+          sudo pip install wget
+
+      - name: 'python3 regression.py'
+        run: |
+          python3 ./regression.py <<< "picotrav
+          yes
+          ${{ matrix.circuit }}
+          adiar
+          8192
+          origin
+          main
+          ${{ needs.fetch-remote.outputs.remote }}
+          ${{ needs.fetch-branch.outputs.branch }}
+          no
+          no
+          3
+          10
+          "
+
+      - name: 'post regression_adiar.out'
+        if: always()
+        uses: machine-learning-apps/pr-comment@master
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        with:
+          path: ./regression_adiar.out
 
   run_dummy:
-    name: 'Benchmark: ${{ matrix.N }}-Queens'
+    name: 'Performance Regression: ${{ matrix.title }}'
     runs-on: ubuntu-latest
 
     needs: [skip_duplicate]
@@ -89,12 +166,15 @@ jobs:
     strategy:
       matrix:
         include:
-          - N: '9'
-          - N: '12'
-          - N: '14'
+          - title: 'QBF breakthrough/3x4_19_bwnib'
+          - title: 'QBF breakthrough/3x5_11_bwnib'
+          - title: 'QBF domineering/5x5_13_bwnib'
+          - title: 'QBF ep/8x8_7_e-8-1_p-3-4_bwnib'
+          - title: 'Picotrav adder'
+          - title: 'Picotrav arbiter'
+          - title: 'Picotrav mem_ctrl'
 
     steps:
     - name: Echo skip
       run: |
         echo "Running benchmarks is skipped"
-
diff --git a/.github/workflows/system_test.yml b/.github/workflows/system_test.yml
index be5640faf..d0fba5989 100644
--- a/.github/workflows/system_test.yml
+++ b/.github/workflows/system_test.yml
@@ -63,11 +63,12 @@ jobs:
 
     steps:
     - name: Git | checkout SSoelvsten/BDD-Benchmark
-      uses: actions/checkout@v2
+      uses: actions/checkout@v4
       with:
         repository: 'ssoelvsten/bdd-benchmark'
         submodules: 'recursive'
         fetch-depth: '0'
+
     - name: Git | checkout pull request
       run: |
         cd external/adiar
@@ -111,12 +112,17 @@ jobs:
             order: 'INPUT'
 
     steps:
-    - name: Git | checkout SSoelvsten/BDD-Benchmark
-      uses: actions/checkout@v2
+    - name: Git | checkout ssoelvsten/bdd-benchmark
+      uses: actions/checkout@v4
       with:
         repository: 'ssoelvsten/bdd-benchmark'
         submodules: 'recursive'
         fetch-depth: '0'
+
+    - name: Git | clone lsils/benchmarks
+      run: |
+        git clone https://github.com/lsils/benchmarks.git ../epfl
+
     - name: Git | checkout pull request
       run: |
         cd external/adiar
@@ -140,8 +146,8 @@ jobs:
     - name: Run Benchmarks
       run: |
         for spec in ${{ matrix.circuits }} ; do \
-          for opt in benchmarks/epfl/best_results/**/$spec* ; do \
-            build/src/adiar_picotrav_bdd -f benchmarks/epfl/${{ matrix.name }}/$spec.blif -f $opt -o ${{ matrix.order }} -M 1024 ; \
+          for opt in ../epfl/best_results/**/$spec* ; do \
+            build/src/adiar_picotrav_bdd -f ../epfl/${{ matrix.name }}/$spec.blif -f $opt -o ${{ matrix.order }} -M 1024 ; \
           done ; \
         done