diff --git a/barretenberg/cpp/scripts/benchmarks.sh b/barretenberg/cpp/scripts/benchmarks.sh
new file mode 100755
index 00000000000..a7ec1dcb1d0
--- /dev/null
+++ b/barretenberg/cpp/scripts/benchmarks.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+set -eu
+
+# Move above script dir.
+cd $(dirname $0)/..
+
+# Configure and build.
+cmake --preset clang16
+cmake --build --preset clang16
+
+cd build
+
+# github markdown style, works in comments and descriptions
+echo -e "<details><summary>Standard Plonk</summary>"
+echo -e '\n```'
+./bin/standard_plonk_bench | tee standard_plonk_bench.out
+echo -e '```\n'
+echo -e "</details>"
+echo -e "<details><summary>Ultra Honk Round Breakdown</summary>"
+echo -e '\n```'
+./bin/ultra_honk_rounds_bench | tee ultra_honk_rounds_bench.out
+echo -e '```\n'
+echo -e "</details>"
+echo -e "<details><summary>Ultra Plonk Round Breakdown</summary>"
+echo -e '\n```'
+./bin/ultra_plonk_rounds_bench | tee ultra_plonk_rounds_bench.out
+echo -e '```\n'
+echo -e "</details>"
+echo -e "<details><summary>Ultra Honk</summary>"
+echo -e '\n```'
+./bin/ultra_honk_bench | tee ultra_honk_bench.out
+echo -e '```\n'
+echo -e "</details>"
+echo -e "<details><summary>Ultra Plonk</summary>"
+echo -e '\n```'
+./bin/ultra_plonk_bench | tee ultra_plonk_bench.out
+echo -e '```\n'
+echo -e "</details>"
diff --git a/barretenberg/cpp/scripts/collect_profile_information.sh b/barretenberg/cpp/scripts/collect_profile_information.sh
index 28ca73fe4d2..df932c086bc 100755
--- a/barretenberg/cpp/scripts/collect_profile_information.sh
+++ b/barretenberg/cpp/scripts/collect_profile_information.sh
@@ -1,9 +1,14 @@
 #!/bin/bash
 set -eu
 
-PRESET=${1:-xray} # can also be 'xray-1thread'
+# can also be 'xray-1thread'
+PRESET=${1:-xray}
+# pass "" to run and 1 to reuse old results
 ONLY_PROCESS=${2:-}
+# pass the executable name from build/bin
 EXECUTABLE=${3:-ultra_honk_rounds_bench}
+# by default run the executable, but we can provide an alt command e.g. use taskset and benchmark flags
+COMMAND=${4:-./bin/$EXECUTABLE}
 
 # Move above script dir.
 cd $(dirname $0)/..
@@ -19,7 +24,7 @@ if [ -z "$ONLY_PROCESS" ]; then
   rm -f xray-log.$EXECUTABLE.*
 
   # Run benchmark with profiling.
-  XRAY_OPTIONS="patch_premain=true xray_mode=xray-basic verbosity=1" ./bin/$EXECUTABLE
+  XRAY_OPTIONS="patch_premain=true xray_mode=xray-basic verbosity=1" $COMMAND
 fi
 
 function shorten_cpp_names() {
diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/CMakeLists.txt b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/CMakeLists.txt
index ed6122bb41e..fa33b5a6567 100644
--- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/CMakeLists.txt
+++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/CMakeLists.txt
@@ -4,6 +4,7 @@ set(BENCHMARK_SOURCES
   ultra_honk.bench.cpp
   ultra_honk_rounds.bench.cpp
   ultra_plonk.bench.cpp
+  ultra_plonk_rounds.bench.cpp
 )
 
 # Required libraries for benchmark suites
diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/benchmark_utilities.hpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/benchmark_utilities.hpp
index 5053d89bd54..84949504692 100644
--- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/benchmark_utilities.hpp
+++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/benchmark_utilities.hpp
@@ -4,6 +4,7 @@
 
 #include "barretenberg/honk/composer/ultra_composer.hpp"
 #include "barretenberg/honk/proof_system/ultra_prover.hpp"
+#include "barretenberg/plonk/composer/standard_composer.hpp"
 #include "barretenberg/plonk/composer/ultra_composer.hpp"
 #include "barretenberg/proof_system/types/circuit_type.hpp"
 #include "barretenberg/stdlib/encryption/ecdsa/ecdsa.hpp"
@@ -23,35 +24,25 @@ using namespace benchmark;
 
 namespace bench_utils {
 
-struct BenchParams {
-    // Num iterations of the operation of interest in a test circuit, e.g. num sha256 hashes
-    static constexpr size_t MIN_NUM_ITERATIONS = 10;
-    static constexpr size_t MAX_NUM_ITERATIONS = 10;
-
-    // Log num gates; for simple circuits only, e.g. standard arithmetic circuit
-    static constexpr size_t MIN_LOG_NUM_GATES = 16;
-    static constexpr size_t MAX_LOG_NUM_GATES = 16;
-
-    static constexpr size_t NUM_REPETITIONS = 1;
-};
-
 /**
  * @brief Generate test circuit with basic arithmetic operations
  *
  * @param composer
  * @param num_iterations
  */
-template <typename Builder> void generate_basic_arithmetic_circuit(Builder& builder, size_t num_gates)
+template <typename Builder> void generate_basic_arithmetic_circuit(Builder& builder, size_t log2_num_gates)
 {
     proof_system::plonk::stdlib::field_t a(
         proof_system::plonk::stdlib::witness_t(&builder, barretenberg::fr::random_element()));
     proof_system::plonk::stdlib::field_t b(
         proof_system::plonk::stdlib::witness_t(&builder, barretenberg::fr::random_element()));
     proof_system::plonk::stdlib::field_t c(&builder);
-    if (num_gates < 4) {
+    size_t passes = (1UL << log2_num_gates) / 4 - 4;
+    if (static_cast<int>(passes) <= 0) {
         throw std::runtime_error("too few gates");
     }
-    for (size_t i = 0; i < (num_gates / 4) - 4; ++i) {
+
+    for (size_t i = 0; i < passes; ++i) {
         c = a + b;
         c = a * c;
         a = b * b;
@@ -175,37 +166,7 @@ template <typename Builder> void generate_merkle_membership_test_circuit(Builder
     }
 }
 
-/**
- * @brief Performs proof constuction for benchmarks based on a provided circuit function
- *
- * @details This function assumes state.range refers to num_gates which is the size of the underlying circuit
- *
- * @tparam Builder
- * @param state
- * @param test_circuit_function
- */
-template <typename Composer>
-void construct_proof_with_specified_num_gates(State& state,
-                                              void (*test_circuit_function)(typename Composer::CircuitBuilder&,
-                                                                            size_t)) noexcept
-{
-    barretenberg::srs::init_crs_factory("../srs_db/ignition");
-    auto num_gates = static_cast<size_t>(1 << (size_t)state.range(0));
-    for (auto _ : state) {
-        // Constuct circuit and prover; don't include this part in measurement
-        state.PauseTiming();
-        auto builder = typename Composer::CircuitBuilder();
-        test_circuit_function(builder, num_gates);
-
-        auto composer = Composer();
-        auto ext_prover = composer.create_prover(builder);
-        state.ResumeTiming();
-
-        // Construct proof
-        auto proof = ext_prover.construct_proof();
-    }
-}
-
+// ultrahonk
 inline proof_system::honk::UltraProver get_prover(
     proof_system::honk::UltraComposer& composer,
     void (*test_circuit_function)(proof_system::honk::UltraComposer::CircuitBuilder&, size_t),
@@ -217,6 +178,18 @@ inline proof_system::honk::UltraProver get_prover(
     return composer.create_prover(instance);
 }
 
+// standard plonk
+inline proof_system::plonk::Prover get_prover(proof_system::plonk::StandardComposer& composer,
+                                              void (*test_circuit_function)(proof_system::StandardCircuitBuilder&,
+                                                                            size_t),
+                                              size_t num_iterations)
+{
+    proof_system::StandardCircuitBuilder builder;
+    test_circuit_function(builder, num_iterations);
+    return composer.create_prover(builder);
+}
+
+// ultraplonk
 inline proof_system::plonk::UltraProver get_prover(
     proof_system::plonk::UltraComposer& composer,
     void (*test_circuit_function)(proof_system::honk::UltraComposer::CircuitBuilder&, size_t),
@@ -237,15 +210,13 @@ inline proof_system::plonk::UltraProver get_prover(
  * @param test_circuit_function
  */
 template <typename Composer>
-void construct_proof_with_specified_num_iterations(State& state,
-                                                   void (*test_circuit_function)(typename Composer::CircuitBuilder&,
-                                                                                 size_t)) noexcept
+void construct_proof_with_specified_num_iterations(
+    State& state, void (*test_circuit_function)(typename Composer::CircuitBuilder&, size_t), size_t num_iterations)
 {
     barretenberg::srs::init_crs_factory("../srs_db/ignition");
 
     Composer composer;
 
-    auto num_iterations = static_cast<size_t>(state.range(0));
     for (auto _ : state) {
         // Constuct circuit and prover; don't include this part in measurement
         state.PauseTiming();
diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/standard_plonk.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/standard_plonk.bench.cpp
index 9ed605a3480..b44f4123f61 100644
--- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/standard_plonk.bench.cpp
+++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/standard_plonk.bench.cpp
@@ -4,30 +4,20 @@
 
 using namespace benchmark;
 
-namespace standard_plonk_bench {
-
 using StandardBuilder = proof_system::StandardCircuitBuilder;
 using StandardPlonk = proof_system::plonk::StandardComposer;
 
-// Log number of gates for test circuit
-constexpr size_t MIN_LOG_NUM_GATES = bench_utils::BenchParams::MIN_LOG_NUM_GATES;
-constexpr size_t MAX_LOG_NUM_GATES = bench_utils::BenchParams::MAX_LOG_NUM_GATES;
-// Number of times to repeat each benchmark
-constexpr size_t NUM_REPETITIONS = bench_utils::BenchParams::NUM_REPETITIONS;
-
 /**
  * @brief Benchmark: Construction of a Standard proof for a circuit determined by the provided circuit function
  */
-void construct_proof_standard(State& state, void (*test_circuit_function)(StandardBuilder&, size_t)) noexcept
+static void construct_proof_standard_power_of_2(State& state) noexcept
 {
-    bench_utils::construct_proof_with_specified_num_gates<StandardPlonk>(state, test_circuit_function);
+    auto log2_of_gates = static_cast<size_t>(state.range(0));
+    bench_utils::construct_proof_with_specified_num_iterations<proof_system::plonk::StandardComposer>(
+        state, &bench_utils::generate_basic_arithmetic_circuit<proof_system::StandardCircuitBuilder>, log2_of_gates);
 }
 
-BENCHMARK_CAPTURE(construct_proof_standard,
-                  arithmetic,
-                  &bench_utils::generate_basic_arithmetic_circuit<StandardBuilder>)
-    ->DenseRange(MIN_LOG_NUM_GATES, MAX_LOG_NUM_GATES)
-    ->Repetitions(NUM_REPETITIONS)
-    ->Unit(::benchmark::kSecond);
-
-} // namespace standard_plonk_bench
\ No newline at end of file
+BENCHMARK(construct_proof_standard_power_of_2)
+    // 2**13 gates to 2**18 gates
+    ->DenseRange(13, 18)
+    ->Unit(::benchmark::kMillisecond);
\ No newline at end of file
diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk.bench.cpp
index 92933bb4648..7d1bd15b413 100644
--- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk.bench.cpp
+++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk.bench.cpp
@@ -5,47 +5,44 @@
 #include "barretenberg/proof_system/circuit_builder/ultra_circuit_builder.hpp"
 
 using namespace benchmark;
-using namespace proof_system::plonk;
-
-namespace ultra_honk_bench {
-
-using UltraBuilder = proof_system::UltraCircuitBuilder;
-using UltraHonk = proof_system::honk::UltraComposer;
-
-// Number of times to perform operation of interest in the benchmark circuits, e.g. # of hashes to perform
-constexpr size_t MIN_NUM_ITERATIONS = bench_utils::BenchParams::MIN_NUM_ITERATIONS;
-constexpr size_t MAX_NUM_ITERATIONS = bench_utils::BenchParams::MAX_NUM_ITERATIONS;
-// Number of times to repeat each benchmark
-constexpr size_t NUM_REPETITIONS = bench_utils::BenchParams::NUM_REPETITIONS;
+using namespace proof_system;
 
 /**
  * @brief Benchmark: Construction of a Ultra Honk proof for a circuit determined by the provided circuit function
  */
-void construct_proof_ultra(State& state, void (*test_circuit_function)(UltraBuilder&, size_t)) noexcept
+static void construct_proof_ultrahonk(State& state,
+                                      void (*test_circuit_function)(UltraCircuitBuilder&, size_t)) noexcept
+{
+    size_t num_iterations = 10; // 10x the circuit
+    bench_utils::construct_proof_with_specified_num_iterations<honk::UltraComposer>(
+        state, test_circuit_function, num_iterations);
+}
+
+/**
+ * @brief Benchmark: Construction of a Ultra Plonk proof with 2**n gates
+ */
+static void construct_proof_ultrahonk_power_of_2(State& state) noexcept
 {
-    bench_utils::construct_proof_with_specified_num_iterations<UltraHonk>(state, test_circuit_function);
+    auto log2_of_gates = static_cast<size_t>(state.range(0));
+    bench_utils::construct_proof_with_specified_num_iterations<honk::UltraComposer>(
+        state, &bench_utils::generate_basic_arithmetic_circuit<UltraCircuitBuilder>, log2_of_gates);
 }
 
 // Define benchmarks
-BENCHMARK_CAPTURE(construct_proof_ultra, sha256, &bench_utils::generate_sha256_test_circuit<UltraBuilder>)
-    ->DenseRange(MIN_NUM_ITERATIONS, MAX_NUM_ITERATIONS)
-    ->Repetitions(NUM_REPETITIONS)
-    ->Unit(::benchmark::kMillisecond);
-BENCHMARK_CAPTURE(construct_proof_ultra, keccak, &bench_utils::generate_keccak_test_circuit<UltraBuilder>)
-    ->DenseRange(MIN_NUM_ITERATIONS, MAX_NUM_ITERATIONS)
-    ->Repetitions(NUM_REPETITIONS)
-    ->Unit(::benchmark::kMillisecond);
-BENCHMARK_CAPTURE(construct_proof_ultra,
+BENCHMARK_CAPTURE(construct_proof_ultrahonk, sha256, &bench_utils::generate_sha256_test_circuit<UltraCircuitBuilder>)
+    ->Unit(kMillisecond);
+BENCHMARK_CAPTURE(construct_proof_ultrahonk, keccak, &bench_utils::generate_keccak_test_circuit<UltraCircuitBuilder>)
+    ->Unit(kMillisecond);
+BENCHMARK_CAPTURE(construct_proof_ultrahonk,
                   ecdsa_verification,
-                  &bench_utils::generate_ecdsa_verification_test_circuit<UltraBuilder>)
-    ->DenseRange(MIN_NUM_ITERATIONS, MAX_NUM_ITERATIONS)
-    ->Repetitions(NUM_REPETITIONS)
-    ->Unit(::benchmark::kMillisecond);
-BENCHMARK_CAPTURE(construct_proof_ultra,
+                  &bench_utils::generate_ecdsa_verification_test_circuit<UltraCircuitBuilder>)
+    ->Unit(kMillisecond);
+BENCHMARK_CAPTURE(construct_proof_ultrahonk,
                   merkle_membership,
-                  &bench_utils::generate_merkle_membership_test_circuit<UltraBuilder>)
-    ->DenseRange(MIN_NUM_ITERATIONS, MAX_NUM_ITERATIONS)
-    ->Repetitions(NUM_REPETITIONS)
-    ->Unit(::benchmark::kMillisecond);
+                  &bench_utils::generate_merkle_membership_test_circuit<UltraCircuitBuilder>)
+    ->Unit(kMillisecond);
 
-} // namespace ultra_honk_bench
\ No newline at end of file
+BENCHMARK(construct_proof_ultrahonk_power_of_2)
+    // 2**13 gates to 2**18 gates
+    ->DenseRange(13, 18)
+    ->Unit(kMillisecond);
\ No newline at end of file
diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_rounds.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_rounds.bench.cpp
index 8b4e7145596..8f7afaa181d 100644
--- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_rounds.bench.cpp
+++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_rounds.bench.cpp
@@ -24,39 +24,41 @@ BBERG_PROFILE static void test_round_inner(State& state, honk::UltraProver& prov
     auto time_if_index = [&](size_t target_index, auto&& func) -> void {
         if (index == target_index) {
             state.ResumeTiming();
-            func();
+        }
+        func();
+        if (index == target_index) {
             state.PauseTiming();
-        } else {
-            func();
         }
     };
-    for (auto _ : state) {
-        state.PauseTiming();
-        time_if_index(PREAMBLE, [&] { prover.execute_preamble_round(); });
-        time_if_index(WIRE_COMMITMENTS, [&] { prover.execute_wire_commitments_round(); });
-        time_if_index(SORTED_LIST_ACCUMULATOR, [&] { prover.execute_sorted_list_accumulator_round(); });
-        time_if_index(GRAND_PRODUCT_COMPUTATION, [&] { prover.execute_grand_product_computation_round(); });
-        time_if_index(RELATION_CHECK, [&] { prover.execute_relation_check_rounds(); });
-        time_if_index(ZEROMORPH, [&] { prover.execute_zeromorph_rounds(); });
-        state.ResumeTiming();
-    }
+
+    time_if_index(PREAMBLE, [&] { prover.execute_preamble_round(); });
+    time_if_index(WIRE_COMMITMENTS, [&] { prover.execute_wire_commitments_round(); });
+    time_if_index(SORTED_LIST_ACCUMULATOR, [&] { prover.execute_sorted_list_accumulator_round(); });
+    time_if_index(GRAND_PRODUCT_COMPUTATION, [&] { prover.execute_grand_product_computation_round(); });
+    time_if_index(RELATION_CHECK, [&] { prover.execute_relation_check_rounds(); });
+    time_if_index(ZEROMORPH, [&] { prover.execute_zeromorph_rounds(); });
 }
 BBERG_PROFILE static void test_round(State& state, size_t index) noexcept
 {
     barretenberg::srs::init_crs_factory("../srs_db/ignition");
 
-    honk::UltraComposer composer;
-    // TODO(AD) benchmark both sparse and dense circuits?
-    honk::UltraProver prover =
-        bench_utils::get_prover(composer, &bench_utils::generate_keccak_test_circuit<UltraCircuitBuilder>, 1);
-    test_round_inner(state, prover, index);
+    for (auto _ : state) {
+        state.PauseTiming();
+        honk::UltraComposer composer;
+        // TODO: https://github.com/AztecProtocol/barretenberg/issues/761 benchmark both sparse and dense circuits
+        honk::UltraProver prover = bench_utils::get_prover(
+            composer, &bench_utils::generate_ecdsa_verification_test_circuit<UltraCircuitBuilder>, 10);
+        test_round_inner(state, prover, index);
+        state.ResumeTiming();
+        // NOTE: google bench is very finnicky, must end in ResumeTiming() for correctness
+    }
 }
 #define ROUND_BENCHMARK(round)                                                                                         \
     static void ROUND_##round(State& state) noexcept                                                                   \
     {                                                                                                                  \
         test_round(state, round);                                                                                      \
     }                                                                                                                  \
-    BENCHMARK(ROUND_##round)->Unit(::benchmark::kMillisecond)
+    BENCHMARK(ROUND_##round)->Unit(kMillisecond)
 
 // Fast rounds take a long time to benchmark because of how we compute statistical significance.
 // Limit to one iteration so we don't spend a lot of time redoing full proofs just to measure this part.
@@ -65,4 +67,4 @@ ROUND_BENCHMARK(WIRE_COMMITMENTS)->Iterations(1);
 ROUND_BENCHMARK(SORTED_LIST_ACCUMULATOR)->Iterations(1);
 ROUND_BENCHMARK(GRAND_PRODUCT_COMPUTATION)->Iterations(1);
 ROUND_BENCHMARK(RELATION_CHECK);
-ROUND_BENCHMARK(ZEROMORPH);
\ No newline at end of file
+ROUND_BENCHMARK(ZEROMORPH);
diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_plonk.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_plonk.bench.cpp
index 74a9fd1acc7..9ea895fe703 100644
--- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_plonk.bench.cpp
+++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_plonk.bench.cpp
@@ -3,45 +3,44 @@
 #include "barretenberg/proof_system/circuit_builder/ultra_circuit_builder.hpp"
 
 using namespace benchmark;
+using namespace proof_system;
 
-namespace ultra_plonk_bench {
-
-using UltraBuilder = proof_system::UltraCircuitBuilder;
-using UltraPlonk = proof_system::plonk::UltraComposer;
-
-// Number of times to perform operation of interest in the benchmark circuits, e.g. # of hashes to perform
-constexpr size_t MIN_NUM_ITERATIONS = bench_utils::BenchParams::MIN_NUM_ITERATIONS;
-constexpr size_t MAX_NUM_ITERATIONS = bench_utils::BenchParams::MAX_NUM_ITERATIONS;
-// Number of times to repeat each benchmark
-constexpr size_t NUM_REPETITIONS = bench_utils::BenchParams::NUM_REPETITIONS;
+/**
+ * @brief Benchmark: Construction of a Ultra Plonk proof for a circuit determined by the provided circuit function
+ */
+static void construct_proof_ultraplonk(State& state,
+                                       void (*test_circuit_function)(UltraCircuitBuilder&, size_t)) noexcept
+{
+    size_t num_iterations = 10; // 10x the circuit
+    bench_utils::construct_proof_with_specified_num_iterations<plonk::UltraComposer>(
+        state, test_circuit_function, num_iterations);
+}
 
 /**
- * @brief Benchmark: Construction of a Ultra Honk proof for a circuit determined by the provided circuit function
+ * @brief Benchmark: Construction of a Ultra Plonk proof with 2**n gates
  */
-void construct_proof_ultra(State& state, void (*test_circuit_function)(UltraBuilder&, size_t)) noexcept
+static void construct_proof_ultraplonk_power_of_2(State& state) noexcept
 {
-    bench_utils::construct_proof_with_specified_num_iterations<UltraPlonk>(state, test_circuit_function);
+    auto log2_of_gates = static_cast<size_t>(state.range(0));
+    bench_utils::construct_proof_with_specified_num_iterations<plonk::UltraComposer>(
+        state, &bench_utils::generate_basic_arithmetic_circuit<UltraCircuitBuilder>, log2_of_gates);
 }
 
-BENCHMARK_CAPTURE(construct_proof_ultra, sha256, &bench_utils::generate_sha256_test_circuit<UltraBuilder>)
-    ->DenseRange(MIN_NUM_ITERATIONS, MAX_NUM_ITERATIONS)
-    ->Repetitions(NUM_REPETITIONS)
-    ->Unit(::benchmark::kMillisecond);
-BENCHMARK_CAPTURE(construct_proof_ultra, keccak, &bench_utils::generate_keccak_test_circuit<UltraBuilder>)
-    ->DenseRange(MIN_NUM_ITERATIONS, MAX_NUM_ITERATIONS)
-    ->Repetitions(NUM_REPETITIONS)
-    ->Unit(::benchmark::kMillisecond);
-BENCHMARK_CAPTURE(construct_proof_ultra,
+// Define benchmarks
+BENCHMARK_CAPTURE(construct_proof_ultraplonk, sha256, &bench_utils::generate_sha256_test_circuit<UltraCircuitBuilder>)
+    ->Unit(kMillisecond);
+BENCHMARK_CAPTURE(construct_proof_ultraplonk, keccak, &bench_utils::generate_keccak_test_circuit<UltraCircuitBuilder>)
+    ->Unit(kMillisecond);
+BENCHMARK_CAPTURE(construct_proof_ultraplonk,
                   ecdsa_verification,
-                  &bench_utils::generate_ecdsa_verification_test_circuit<UltraBuilder>)
-    ->DenseRange(MIN_NUM_ITERATIONS, MAX_NUM_ITERATIONS)
-    ->Repetitions(NUM_REPETITIONS)
-    ->Unit(::benchmark::kMillisecond);
-BENCHMARK_CAPTURE(construct_proof_ultra,
+                  &bench_utils::generate_ecdsa_verification_test_circuit<UltraCircuitBuilder>)
+    ->Unit(kMillisecond);
+BENCHMARK_CAPTURE(construct_proof_ultraplonk,
                   merkle_membership,
-                  &bench_utils::generate_merkle_membership_test_circuit<UltraBuilder>)
-    ->DenseRange(MIN_NUM_ITERATIONS, MAX_NUM_ITERATIONS)
-    ->Repetitions(NUM_REPETITIONS)
-    ->Unit(::benchmark::kMillisecond);
+                  &bench_utils::generate_merkle_membership_test_circuit<UltraCircuitBuilder>)
+    ->Unit(kMillisecond);
 
-} // namespace ultra_plonk_bench
\ No newline at end of file
+BENCHMARK(construct_proof_ultraplonk_power_of_2)
+    // 2**13 gates to 2**18 gates
+    ->DenseRange(13, 18)
+    ->Unit(kMillisecond);
\ No newline at end of file
diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_plonk_rounds.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_plonk_rounds.bench.cpp
new file mode 100644
index 00000000000..0db10eea970
--- /dev/null
+++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_plonk_rounds.bench.cpp
@@ -0,0 +1,79 @@
+#include <benchmark/benchmark.h>
+
+#include "barretenberg/benchmark/honk_bench/benchmark_utilities.hpp"
+#include "barretenberg/proof_system/circuit_builder/ultra_circuit_builder.hpp"
+
+using namespace benchmark;
+using namespace proof_system;
+
+// The rounds to measure
+enum {
+    PREAMBLE,
+    FIRST_WIRE_COMMITMENTS,
+    SECOND_FIAT_SHAMIR_ETA,
+    THIRD_FIAT_SHAMIR_BETA_GAMMA,
+    FOURTH_FIAT_SHAMIR_ALPHA_AND_COMMIT,
+    FIFTH_COMPUTE_QUOTIENT_EVALUTION,
+    SIXTH_BATCH_OPEN
+};
+
+BBERG_PROFILE static void plonk_round(
+    State& state, plonk::UltraProver& prover, size_t target_index, size_t index, auto&& func) noexcept
+{
+    if (index == target_index) {
+        state.ResumeTiming();
+    }
+    func();
+    prover.queue.process_queue();
+    if (index == target_index) {
+        state.PauseTiming();
+    }
+}
+/**
+ * @details Benchmark ultraplonk by performing all the rounds, but only measuring one.
+ * Note: As a result the very short rounds take a long time for statistical significance, so recommended to set
+ *their iterations to 1.
+ * @param state - The google benchmark state.
+ * @param prover - The ultraplonk prover.
+ * @param index - The pass to measure.
+ **/
+BBERG_PROFILE static void test_round_inner(State& state, plonk::UltraProver& prover, size_t index) noexcept
+{
+    plonk_round(state, prover, PREAMBLE, index, [&] { prover.execute_preamble_round(); });
+    plonk_round(state, prover, FIRST_WIRE_COMMITMENTS, index, [&] { prover.execute_first_round(); });
+    plonk_round(state, prover, SECOND_FIAT_SHAMIR_ETA, index, [&] { prover.execute_second_round(); });
+    plonk_round(state, prover, THIRD_FIAT_SHAMIR_BETA_GAMMA, index, [&] { prover.execute_third_round(); });
+    plonk_round(state, prover, FOURTH_FIAT_SHAMIR_ALPHA_AND_COMMIT, index, [&] { prover.execute_fourth_round(); });
+    plonk_round(state, prover, FIFTH_COMPUTE_QUOTIENT_EVALUTION, index, [&] { prover.execute_fifth_round(); });
+    plonk_round(state, prover, SIXTH_BATCH_OPEN, index, [&] { prover.execute_sixth_round(); });
+}
+BBERG_PROFILE static void test_round(State& state, size_t index) noexcept
+{
+    barretenberg::srs::init_crs_factory("../srs_db/ignition");
+    for (auto _ : state) {
+        state.PauseTiming();
+        plonk::UltraComposer composer;
+        // TODO: https://github.com/AztecProtocol/barretenberg/issues/761 benchmark both sparse and dense circuits
+        plonk::UltraProver prover = bench_utils::get_prover(
+            composer, &bench_utils::generate_ecdsa_verification_test_circuit<UltraCircuitBuilder>, 10);
+        test_round_inner(state, prover, index);
+        // NOTE: google bench is very finnicky, must end in ResumeTiming() for correctness
+        state.ResumeTiming();
+    }
+}
+#define ROUND_BENCHMARK(round)                                                                                         \
+    static void ROUND_##round(State& state) noexcept                                                                   \
+    {                                                                                                                  \
+        test_round(state, round);                                                                                      \
+    }                                                                                                                  \
+    BENCHMARK(ROUND_##round)->Unit(kMillisecond)
+
+// Fast rounds take a long time to benchmark because of how we compute statistical significance.
+// Limit to one iteration so we don't spend a lot of time redoing full proofs just to measure this part.
+ROUND_BENCHMARK(PREAMBLE)->Iterations(1);
+ROUND_BENCHMARK(FIRST_WIRE_COMMITMENTS);
+ROUND_BENCHMARK(SECOND_FIAT_SHAMIR_ETA);
+ROUND_BENCHMARK(THIRD_FIAT_SHAMIR_BETA_GAMMA);
+ROUND_BENCHMARK(FOURTH_FIAT_SHAMIR_ALPHA_AND_COMMIT);
+ROUND_BENCHMARK(FIFTH_COMPUTE_QUOTIENT_EVALUTION);
+ROUND_BENCHMARK(SIXTH_BATCH_OPEN);
diff --git a/barretenberg/cpp/src/barretenberg/plonk/proof_system/prover/c_bind.cpp b/barretenberg/cpp/src/barretenberg/plonk/proof_system/prover/c_bind.cpp
deleted file mode 100644
index 18808d3a016..00000000000
--- a/barretenberg/cpp/src/barretenberg/plonk/proof_system/prover/c_bind.cpp
+++ /dev/null
@@ -1,134 +0,0 @@
-#include "prover.hpp"
-
-using namespace barretenberg;
-
-/**
- * Commenting out now, as we really shouldn't need these if we use native threading.
- * Not deleting because we *may* still want the work queue in some cases, but I'm really hoping we can just not.
- */
-/*
-extern "C" {
-
-using Prover = plonk::UltraProver;
-
-WASM_EXPORT void prover_process_queue(Prover* prover)
-{
-    prover->queue.process_queue();
-}
-
-WASM_EXPORT size_t prover_get_circuit_size(Prover* prover)
-{
-    return prover->get_circuit_size();
-}
-
-WASM_EXPORT void prover_get_work_queue_item_info(Prover* prover, uint8_t* result)
-{
-    auto info = prover->get_queued_work_item_info();
-    memcpy(result, &info, sizeof(info));
-}
-
-WASM_EXPORT fr* prover_get_scalar_multiplication_data(Prover* prover, size_t work_item_number)
-{
-    return prover->get_scalar_multiplication_data(work_item_number).get();
-}
-
-WASM_EXPORT size_t prover_get_scalar_multiplication_size(Prover* prover, size_t work_item_number)
-{
-    return prover->get_scalar_multiplication_size(work_item_number);
-}
-
-WASM_EXPORT void prover_put_scalar_multiplication_data(Prover* prover,
-                                                       g1::element* result,
-                                                       const size_t work_item_number)
-{
-    prover->put_scalar_multiplication_data(*result, work_item_number);
-}
-
-WASM_EXPORT fr* prover_get_fft_data(Prover* prover, fr* shift_factor, size_t work_item_number)
-{
-    auto data = prover->get_fft_data(work_item_number);
-    *shift_factor = data.shift_factor;
-    return data.data.get();
-}
-
-WASM_EXPORT void prover_put_fft_data(Prover* prover, fr* result, size_t work_item_number)
-{
-    prover->put_fft_data(std::shared_ptr<fr[]>(result, aligned_free), work_item_number);
-}
-
-WASM_EXPORT fr* prover_get_ifft_data(Prover* prover, size_t work_item_number)
-{
-    return prover->get_ifft_data(work_item_number).get();
-}
-
-WASM_EXPORT void prover_put_ifft_data(Prover* prover, fr* result, size_t work_item_number)
-{
-    prover->put_ifft_data(std::shared_ptr<fr[]>(result, aligned_free), work_item_number);
-}
-
-WASM_EXPORT void prover_execute_preamble_round(Prover* prover)
-{
-    prover->execute_preamble_round();
-}
-
-WASM_EXPORT void prover_execute_first_round(Prover* prover)
-{
-    prover->execute_first_round();
-}
-
-WASM_EXPORT void prover_execute_second_round(Prover* prover)
-{
-    prover->execute_second_round();
-}
-
-WASM_EXPORT void prover_execute_third_round(Prover* prover)
-{
-    prover->execute_third_round();
-}
-
-WASM_EXPORT void prover_execute_fourth_round(Prover* prover)
-{
-    prover->execute_fourth_round();
-}
-
-WASM_EXPORT void prover_execute_fifth_round(Prover* prover)
-{
-    prover->execute_fifth_round();
-}
-
-WASM_EXPORT void prover_execute_sixth_round(Prover* prover)
-{
-    prover->execute_sixth_round();
-}
-
-WASM_EXPORT size_t prover_export_proof(Prover* prover, uint8_t** proof_data_buf)
-{
-    auto& proof_data = prover->export_proof().proof_data;
-    *proof_data_buf = proof_data.data();
-    return proof_data.size();
-}
-
-WASM_EXPORT void coset_fft_with_generator_shift(fr* coefficients, fr* constant, evaluation_domain* domain)
-{
-    polynomial_arithmetic::coset_fft_with_generator_shift(coefficients, *domain, *constant);
-}
-
-WASM_EXPORT void ifft(fr* coefficients, evaluation_domain* domain)
-{
-    polynomial_arithmetic::ifft(coefficients, *domain);
-}
-
-WASM_EXPORT void* new_evaluation_domain(size_t circuit_size)
-{
-    auto domain = new evaluation_domain(circuit_size);
-    domain->compute_lookup_table();
-    return domain;
-}
-
-WASM_EXPORT void delete_evaluation_domain(void* domain)
-{
-    delete reinterpret_cast<evaluation_domain*>(domain);
-}
-}
-
-*/
diff --git a/barretenberg/cpp/src/barretenberg/plonk/proof_system/prover/prover.hpp b/barretenberg/cpp/src/barretenberg/plonk/proof_system/prover/prover.hpp
index 76295aafc72..408b9ea3363 100644
--- a/barretenberg/cpp/src/barretenberg/plonk/proof_system/prover/prover.hpp
+++ b/barretenberg/cpp/src/barretenberg/plonk/proof_system/prover/prover.hpp
@@ -19,13 +19,13 @@ template <typename settings> class ProverBase {
     ProverBase& operator=(const ProverBase& other) = delete;
     ProverBase& operator=(ProverBase&& other);
 
-    void execute_preamble_round();
-    void execute_first_round();
-    void execute_second_round();
-    void execute_third_round();
-    void execute_fourth_round();
-    void execute_fifth_round();
-    void execute_sixth_round();
+    BBERG_PROFILE void execute_preamble_round();
+    BBERG_PROFILE void execute_first_round();
+    BBERG_PROFILE void execute_second_round();
+    BBERG_PROFILE void execute_third_round();
+    BBERG_PROFILE void execute_fourth_round();
+    BBERG_PROFILE void execute_fifth_round();
+    BBERG_PROFILE void execute_sixth_round();
 
     void add_polynomial_evaluations_to_transcript();
     void compute_batch_opening_polynomials();