AztecProtocol · ludamad · Oct 27, 2023 · Oct 20, 2023 · Oct 20, 2023 · Oct 20, 2023
diff --git a/barretenberg/cpp/scripts/benchmarks.sh b/barretenberg/cpp/scripts/benchmarks.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+set -eu
+
+# Move above script dir.
+cd $(dirname $0)/..
+
+# Configure and build.
+cmake --preset clang16
+cmake --build --preset clang16
+
+cd build
+
+# github markdown style, works in comments and descriptions
+echo -e "<details><summary>Standard Plonk</summary>"
+echo -e '\n```'
+./bin/standard_plonk_bench | tee standard_plonk_bench.out
+echo -e '```\n'
+echo -e "</details>"
+echo -e "<details><summary>Ultra Honk Round Breakdown</summary>"
+echo -e '\n```'
+./bin/ultra_honk_rounds_bench | tee ultra_honk_rounds_bench.out
+echo -e '```\n'
+echo -e "</details>"
+echo -e "<details><summary>Ultra Plonk Round Breakdown</summary>"
+echo -e '\n```'
+./bin/ultra_plonk_rounds_bench | tee ultra_plonk_rounds_bench.out
+echo -e '```\n'
+echo -e "</details>"
+echo -e "<details><summary>Ultra Honk</summary>"
+echo -e '\n```'
+./bin/ultra_honk_bench | tee ultra_honk_bench.out
+echo -e '```\n'
+echo -e "</details>"
+echo -e "<details><summary>Ultra Plonk</summary>"
+echo -e '\n```'
+./bin/ultra_plonk_bench | tee ultra_plonk_bench.out
+echo -e '```\n'
+echo -e "</details>"
diff --git a/barretenberg/cpp/scripts/collect_profile_information.sh b/barretenberg/cpp/scripts/collect_profile_information.sh
@@ -1,9 +1,14 @@
 #!/bin/bash
 set -eu
 
-PRESET=${1:-xray} # can also be 'xray-1thread'
+# can also be 'xray-1thread'
+PRESET=${1:-xray}
+# pass "" to run and 1 to reuse old results
 ONLY_PROCESS=${2:-}
+# pass the executable name from build/bin
 EXECUTABLE=${3:-ultra_honk_rounds_bench}
+# by default run the executable, but we can provide an alt command e.g. use taskset and benchmark flags
+COMMAND=${4:-./bin/$EXECUTABLE}
 
 # Move above script dir.
 cd $(dirname $0)/..
@@ -19,7 +24,7 @@ if [ -z "$ONLY_PROCESS" ]; then
   rm -f xray-log.$EXECUTABLE.*
 
   # Run benchmark with profiling.
-  XRAY_OPTIONS="patch_premain=true xray_mode=xray-basic verbosity=1" ./bin/$EXECUTABLE
+  XRAY_OPTIONS="patch_premain=true xray_mode=xray-basic verbosity=1" $COMMAND
 fi
 
 function shorten_cpp_names() {

diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/CMakeLists.txt b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/CMakeLists.txt
@@ -4,6 +4,7 @@ set(BENCHMARK_SOURCES
   ultra_honk.bench.cpp
   ultra_honk_rounds.bench.cpp
   ultra_plonk.bench.cpp
+  ultra_plonk_rounds.bench.cpp
 )
 
 # Required libraries for benchmark suites

diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/benchmark_utilities.hpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/benchmark_utilities.hpp
@@ -4,6 +4,7 @@
 
 #include "barretenberg/honk/composer/ultra_composer.hpp"
 #include "barretenberg/honk/proof_system/ultra_prover.hpp"
+#include "barretenberg/plonk/composer/standard_composer.hpp"
 #include "barretenberg/plonk/composer/ultra_composer.hpp"
 #include "barretenberg/proof_system/types/circuit_type.hpp"
 #include "barretenberg/stdlib/encryption/ecdsa/ecdsa.hpp"
@@ -23,35 +24,25 @@ using namespace benchmark;
 
 namespace bench_utils {
 
-struct BenchParams {
-    // Num iterations of the operation of interest in a test circuit, e.g. num sha256 hashes
-    static constexpr size_t MIN_NUM_ITERATIONS = 10;
-    static constexpr size_t MAX_NUM_ITERATIONS = 10;
-
-    // Log num gates; for simple circuits only, e.g. standard arithmetic circuit
-    static constexpr size_t MIN_LOG_NUM_GATES = 16;
-    static constexpr size_t MAX_LOG_NUM_GATES = 16;
-
-    static constexpr size_t NUM_REPETITIONS = 1;
-};
-
 /**
  * @brief Generate test circuit with basic arithmetic operations
  *
  * @param composer
  * @param num_iterations
  */
-template <typename Builder> void generate_basic_arithmetic_circuit(Builder& builder, size_t num_gates)
+template <typename Builder> void generate_basic_arithmetic_circuit(Builder& builder, size_t log2_num_gates)
 {
     proof_system::plonk::stdlib::field_t a(
         proof_system::plonk::stdlib::witness_t(&builder, barretenberg::fr::random_element()));
     proof_system::plonk::stdlib::field_t b(
         proof_system::plonk::stdlib::witness_t(&builder, barretenberg::fr::random_element()));
     proof_system::plonk::stdlib::field_t c(&builder);
-    if (num_gates < 4) {
+    size_t passes = (1UL << log2_num_gates) / 4 - 4;
+    if (static_cast<int>(passes) <= 0) {
         throw std::runtime_error("too few gates");
     }
-    for (size_t i = 0; i < (num_gates / 4) - 4; ++i) {
+
+    for (size_t i = 0; i < passes; ++i) {
         c = a + b;
         c = a * c;
         a = b * b;
@@ -175,37 +166,7 @@ template <typename Builder> void generate_merkle_membership_test_circuit(Builder
     }
 }
 
-/**
- * @brief Performs proof constuction for benchmarks based on a provided circuit function
- *
- * @details This function assumes state.range refers to num_gates which is the size of the underlying circuit
- *
- * @tparam Builder
- * @param state
- * @param test_circuit_function
- */
-template <typename Composer>
-void construct_proof_with_specified_num_gates(State& state,
-                                              void (*test_circuit_function)(typename Composer::CircuitBuilder&,
-                                                                            size_t)) noexcept
-{
-    barretenberg::srs::init_crs_factory("../srs_db/ignition");
-    auto num_gates = static_cast<size_t>(1 << (size_t)state.range(0));
-    for (auto _ : state) {
-        // Constuct circuit and prover; don't include this part in measurement
-        state.PauseTiming();
-        auto builder = typename Composer::CircuitBuilder();
-        test_circuit_function(builder, num_gates);
-
-        auto composer = Composer();
-        auto ext_prover = composer.create_prover(builder);
-        state.ResumeTiming();
-
-        // Construct proof
-        auto proof = ext_prover.construct_proof();
-    }
-}
-
+// ultrahonk
 inline proof_system::honk::UltraProver get_prover(
     proof_system::honk::UltraComposer& composer,
     void (*test_circuit_function)(proof_system::honk::UltraComposer::CircuitBuilder&, size_t),
@@ -217,6 +178,18 @@ inline proof_system::honk::UltraProver get_prover(
     return composer.create_prover(instance);
 }
 
+// standard plonk
+inline proof_system::plonk::Prover get_prover(proof_system::plonk::StandardComposer& composer,
+                                              void (*test_circuit_function)(proof_system::StandardCircuitBuilder&,
+                                                                            size_t),
+                                              size_t num_iterations)
+{
+    proof_system::StandardCircuitBuilder builder;
+    test_circuit_function(builder, num_iterations);
+    return composer.create_prover(builder);
+}
+
+// ultraplonk
 inline proof_system::plonk::UltraProver get_prover(
     proof_system::plonk::UltraComposer& composer,
     void (*test_circuit_function)(proof_system::honk::UltraComposer::CircuitBuilder&, size_t),
@@ -237,15 +210,13 @@ inline proof_system::plonk::UltraProver get_prover(
  * @param test_circuit_function
  */
 template <typename Composer>
-void construct_proof_with_specified_num_iterations(State& state,
-                                                   void (*test_circuit_function)(typename Composer::CircuitBuilder&,
-                                                                                 size_t)) noexcept
+void construct_proof_with_specified_num_iterations(
+    State& state, void (*test_circuit_function)(typename Composer::CircuitBuilder&, size_t), size_t num_iterations)
 {
     barretenberg::srs::init_crs_factory("../srs_db/ignition");
 
     Composer composer;
 
-    auto num_iterations = static_cast<size_t>(state.range(0));
     for (auto _ : state) {
         // Constuct circuit and prover; don't include this part in measurement
         state.PauseTiming();

diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/standard_plonk.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/standard_plonk.bench.cpp
@@ -4,30 +4,20 @@
 
 using namespace benchmark;
 
-namespace standard_plonk_bench {
-
 using StandardBuilder = proof_system::StandardCircuitBuilder;
 using StandardPlonk = proof_system::plonk::StandardComposer;
 
-// Log number of gates for test circuit
-constexpr size_t MIN_LOG_NUM_GATES = bench_utils::BenchParams::MIN_LOG_NUM_GATES;
-constexpr size_t MAX_LOG_NUM_GATES = bench_utils::BenchParams::MAX_LOG_NUM_GATES;
-// Number of times to repeat each benchmark
-constexpr size_t NUM_REPETITIONS = bench_utils::BenchParams::NUM_REPETITIONS;
-
 /**
  * @brief Benchmark: Construction of a Standard proof for a circuit determined by the provided circuit function
  */
-void construct_proof_standard(State& state, void (*test_circuit_function)(StandardBuilder&, size_t)) noexcept
+static void construct_proof_standard_power_of_2(State& state) noexcept
 {
-    bench_utils::construct_proof_with_specified_num_gates<StandardPlonk>(state, test_circuit_function);
+    auto log2_of_gates = static_cast<size_t>(state.range(0));
+    bench_utils::construct_proof_with_specified_num_iterations<proof_system::plonk::StandardComposer>(
+        state, &bench_utils::generate_basic_arithmetic_circuit<proof_system::StandardCircuitBuilder>, log2_of_gates);
 }
 
-BENCHMARK_CAPTURE(construct_proof_standard,
-                  arithmetic,
-                  &bench_utils::generate_basic_arithmetic_circuit<StandardBuilder>)
-    ->DenseRange(MIN_LOG_NUM_GATES, MAX_LOG_NUM_GATES)
-    ->Repetitions(NUM_REPETITIONS)
-    ->Unit(::benchmark::kSecond);
-
-} // namespace standard_plonk_bench
+BENCHMARK(construct_proof_standard_power_of_2)
+    // 2**13 gates to 2**18 gates
+    ->DenseRange(13, 18)
+    ->Unit(::benchmark::kMillisecond);
diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk.bench.cpp
@@ -5,47 +5,44 @@
 #include "barretenberg/proof_system/circuit_builder/ultra_circuit_builder.hpp"
 
 using namespace benchmark;
-using namespace proof_system::plonk;
-
-namespace ultra_honk_bench {
-
-using UltraBuilder = proof_system::UltraCircuitBuilder;
-using UltraHonk = proof_system::honk::UltraComposer;
-
-// Number of times to perform operation of interest in the benchmark circuits, e.g. # of hashes to perform
-constexpr size_t MIN_NUM_ITERATIONS = bench_utils::BenchParams::MIN_NUM_ITERATIONS;
-constexpr size_t MAX_NUM_ITERATIONS = bench_utils::BenchParams::MAX_NUM_ITERATIONS;
-// Number of times to repeat each benchmark
-constexpr size_t NUM_REPETITIONS = bench_utils::BenchParams::NUM_REPETITIONS;
+using namespace proof_system;
 
 /**
  * @brief Benchmark: Construction of a Ultra Honk proof for a circuit determined by the provided circuit function
  */
-void construct_proof_ultra(State& state, void (*test_circuit_function)(UltraBuilder&, size_t)) noexcept
+static void construct_proof_ultrahonk(State& state,
+                                      void (*test_circuit_function)(UltraCircuitBuilder&, size_t)) noexcept
+{
+    size_t num_iterations = 10; // 10x the circuit
+    bench_utils::construct_proof_with_specified_num_iterations<honk::UltraComposer>(
+        state, test_circuit_function, num_iterations);
+}
+
+/**
+ * @brief Benchmark: Construction of a Ultra Plonk proof with 2**n gates
+ */
+static void construct_proof_ultrahonk_power_of_2(State& state) noexcept
 {
-    bench_utils::construct_proof_with_specified_num_iterations<UltraHonk>(state, test_circuit_function);
+    auto log2_of_gates = static_cast<size_t>(state.range(0));
+    bench_utils::construct_proof_with_specified_num_iterations<honk::UltraComposer>(
+        state, &bench_utils::generate_basic_arithmetic_circuit<UltraCircuitBuilder>, log2_of_gates);
 }
 
 // Define benchmarks
-BENCHMARK_CAPTURE(construct_proof_ultra, sha256, &bench_utils::generate_sha256_test_circuit<UltraBuilder>)
-    ->DenseRange(MIN_NUM_ITERATIONS, MAX_NUM_ITERATIONS)
-    ->Repetitions(NUM_REPETITIONS)
-    ->Unit(::benchmark::kMillisecond);
-BENCHMARK_CAPTURE(construct_proof_ultra, keccak, &bench_utils::generate_keccak_test_circuit<UltraBuilder>)
-    ->DenseRange(MIN_NUM_ITERATIONS, MAX_NUM_ITERATIONS)
-    ->Repetitions(NUM_REPETITIONS)
-    ->Unit(::benchmark::kMillisecond);
-BENCHMARK_CAPTURE(construct_proof_ultra,
+BENCHMARK_CAPTURE(construct_proof_ultrahonk, sha256, &bench_utils::generate_sha256_test_circuit<UltraCircuitBuilder>)
+    ->Unit(kMillisecond);
+BENCHMARK_CAPTURE(construct_proof_ultrahonk, keccak, &bench_utils::generate_keccak_test_circuit<UltraCircuitBuilder>)
+    ->Unit(kMillisecond);
+BENCHMARK_CAPTURE(construct_proof_ultrahonk,
                   ecdsa_verification,
-                  &bench_utils::generate_ecdsa_verification_test_circuit<UltraBuilder>)
-    ->DenseRange(MIN_NUM_ITERATIONS, MAX_NUM_ITERATIONS)
-    ->Repetitions(NUM_REPETITIONS)
-    ->Unit(::benchmark::kMillisecond);
-BENCHMARK_CAPTURE(construct_proof_ultra,
+                  &bench_utils::generate_ecdsa_verification_test_circuit<UltraCircuitBuilder>)
+    ->Unit(kMillisecond);
+BENCHMARK_CAPTURE(construct_proof_ultrahonk,
                   merkle_membership,
-                  &bench_utils::generate_merkle_membership_test_circuit<UltraBuilder>)
-    ->DenseRange(MIN_NUM_ITERATIONS, MAX_NUM_ITERATIONS)
-    ->Repetitions(NUM_REPETITIONS)
-    ->Unit(::benchmark::kMillisecond);
+                  &bench_utils::generate_merkle_membership_test_circuit<UltraCircuitBuilder>)
+    ->Unit(kMillisecond);
 
-} // namespace ultra_honk_bench
+BENCHMARK(construct_proof_ultrahonk_power_of_2)
+    // 2**13 gates to 2**18 gates
+    ->DenseRange(13, 18)
+    ->Unit(kMillisecond);
diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_rounds.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_rounds.bench.cpp
@@ -24,39 +24,41 @@ BBERG_PROFILE static void test_round_inner(State& state, honk::UltraProver& prov
     auto time_if_index = [&](size_t target_index, auto&& func) -> void {
         if (index == target_index) {
             state.ResumeTiming();
-            func();
+        }
+        func();
+        if (index == target_index) {
             state.PauseTiming();
-        } else {
-            func();
         }
     };
-    for (auto _ : state) {
-        state.PauseTiming();
-        time_if_index(PREAMBLE, [&] { prover.execute_preamble_round(); });
-        time_if_index(WIRE_COMMITMENTS, [&] { prover.execute_wire_commitments_round(); });
-        time_if_index(SORTED_LIST_ACCUMULATOR, [&] { prover.execute_sorted_list_accumulator_round(); });
-        time_if_index(GRAND_PRODUCT_COMPUTATION, [&] { prover.execute_grand_product_computation_round(); });
-        time_if_index(RELATION_CHECK, [&] { prover.execute_relation_check_rounds(); });
-        time_if_index(ZEROMORPH, [&] { prover.execute_zeromorph_rounds(); });
-        state.ResumeTiming();
-    }
+
+    time_if_index(PREAMBLE, [&] { prover.execute_preamble_round(); });
+    time_if_index(WIRE_COMMITMENTS, [&] { prover.execute_wire_commitments_round(); });
+    time_if_index(SORTED_LIST_ACCUMULATOR, [&] { prover.execute_sorted_list_accumulator_round(); });
+    time_if_index(GRAND_PRODUCT_COMPUTATION, [&] { prover.execute_grand_product_computation_round(); });
+    time_if_index(RELATION_CHECK, [&] { prover.execute_relation_check_rounds(); });
+    time_if_index(ZEROMORPH, [&] { prover.execute_zeromorph_rounds(); });
 }
 BBERG_PROFILE static void test_round(State& state, size_t index) noexcept
 {
     barretenberg::srs::init_crs_factory("../srs_db/ignition");
 
-    honk::UltraComposer composer;
-    // TODO(AD) benchmark both sparse and dense circuits?
-    honk::UltraProver prover =
-        bench_utils::get_prover(composer, &bench_utils::generate_keccak_test_circuit<UltraCircuitBuilder>, 1);
-    test_round_inner(state, prover, index);
+    for (auto _ : state) {
+        state.PauseTiming();
+        honk::UltraComposer composer;
+        // TODO(AD) benchmark both sparse and dense circuits?
+        honk::UltraProver prover = bench_utils::get_prover(
+            composer, &bench_utils::generate_ecdsa_verification_test_circuit<UltraCircuitBuilder>, 10);
+        test_round_inner(state, prover, index);
+        state.ResumeTiming();
+        // NOTE: google bench is very finnicky, must end in ResumeTiming() for correctness
+    }
 }
 #define ROUND_BENCHMARK(round)                                                                                         \
     static void ROUND_##round(State& state) noexcept                                                                   \
     {                                                                                                                  \
         test_round(state, round);                                                                                      \
     }                                                                                                                  \
-    BENCHMARK(ROUND_##round)->Unit(::benchmark::kMillisecond)
+    BENCHMARK(ROUND_##round)->Unit(kMillisecond)
 
 // Fast rounds take a long time to benchmark because of how we compute statistical significance.
 // Limit to one iteration so we don't spend a lot of time redoing full proofs just to measure this part.
@@ -65,4 +67,4 @@ ROUND_BENCHMARK(WIRE_COMMITMENTS)->Iterations(1);
 ROUND_BENCHMARK(SORTED_LIST_ACCUMULATOR)->Iterations(1);
 ROUND_BENCHMARK(GRAND_PRODUCT_COMPUTATION)->Iterations(1);
 ROUND_BENCHMARK(RELATION_CHECK);
-ROUND_BENCHMARK(ZEROMORPH);
+ROUND_BENCHMARK(ZEROMORPH);