From c8e1d8b9244c3955f0fea6a34a3cc28a81a29d2c Mon Sep 17 00:00:00 2001 From: ludamad Date: Fri, 27 Oct 2023 16:19:35 -0400 Subject: [PATCH] feat: measure plonk rounds (#3065) Features: - Adds round benchmarking executables - Adds benchmarking circuits of size `2**13` to `2**18` - Refactoring ultrahonk flame graph: Reference with 'Ultra Honk Round Breakdown' below Screenshot 2023-10-26 at 8 03 47 PM ultraplonk flame graph: Reference with 'Ultra Plonk Round Breakdown' below To read this one, you have to count plonk_round from round 1 through 6. Screenshot 2023-10-26 at 8 24 40 PM Current stats: ultrahonk ~43-49% faster than ultraplonk on 10x ECDSA, ~25% faster on 2**18 circuit basic arithmetic circuit Example output of scripts/benchmarks.sh: MACBOOK STATS 10 cores
Standard Plonk ``` Unable to determine clock rate from sysctl: hw.cpufrequency: No such file or directory This does not affect benchmark measurements, only the metadata output. 2023-10-26T19:40:15-04:00 Running ./bin/standard_plonk_bench Run on (10 X 24.1218 MHz CPU s) CPU Caches: L1 Data 64 KiB L1 Instruction 128 KiB L2 Unified 4096 KiB (x10) Load Average: 63.73, 26.17, 14.93 --------------------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------------------- construct_proof_standard_power_of_2/13 82.0 ms 70.9 ms 9 construct_proof_standard_power_of_2/14 132 ms 117 ms 6 construct_proof_standard_power_of_2/15 223 ms 202 ms 3 construct_proof_standard_power_of_2/16 413 ms 370 ms 2 construct_proof_standard_power_of_2/17 746 ms 716 ms 1 construct_proof_standard_power_of_2/18 1418 ms 1364 ms 1 ```
Ultra Honk Round Breakdown ``` Unable to determine clock rate from sysctl: hw.cpufrequency: No such file or directory This does not affect benchmark measurements, only the metadata output. 2023-10-26T19:40:24-04:00 Running ./bin/ultra_honk_rounds_bench Run on (10 X 24.121 MHz CPU s) CPU Caches: L1 Data 64 KiB L1 Instruction 128 KiB L2 Unified 4096 KiB (x10) Load Average: 59.43, 25.90, 14.90 --------------------------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------------------------- ROUND_PREAMBLE/iterations:1 16.7 ms 16.6 ms 1 ROUND_WIRE_COMMITMENTS/iterations:1 205 ms 169 ms 1 ROUND_SORTED_LIST_ACCUMULATOR/iterations:1 189 ms 169 ms 1 ROUND_GRAND_PRODUCT_COMPUTATION/iterations:1 452 ms 419 ms 1 ROUND_RELATION_CHECK 1426 ms 1407 ms 1 ROUND_ZEROMORPH 1204 ms 1158 ms 1 ```
Ultra Plonk Round Breakdown ``` Unable to determine clock rate from sysctl: hw.cpufrequency: No such file or directory This does not affect benchmark measurements, only the metadata output. 2023-10-26T19:40:56-04:00 Running ./bin/ultra_plonk_rounds_bench Run on (10 X 24.1204 MHz CPU s) CPU Caches: L1 Data 64 KiB L1 Instruction 128 KiB L2 Unified 4096 KiB (x10) Load Average: 33.61, 23.16, 14.35 ------------------------------------------------------------------------------------ Benchmark Time CPU Iterations ------------------------------------------------------------------------------------ ROUND_PREAMBLE/iterations:1 121 ms 112 ms 1 ROUND_FIRST_WIRE_COMMITMENTS 679 ms 657 ms 1 ROUND_SECOND_FIAT_SHAMIR_ETA 636 ms 561 ms 1 ROUND_THIRD_FIAT_SHAMIR_BETA_GAMMA 1255 ms 972 ms 1 ROUND_FOURTH_FIAT_SHAMIR_ALPHA_AND_COMMIT 1944 ms 1893 ms 1 ROUND_FIFTH_COMPUTE_QUOTIENT_EVALUTION 245 ms 187 ms 4 ROUND_SIXTH_BATCH_OPEN 569 ms 558 ms 1 ```
Ultra Honk ``` Unable to determine clock rate from sysctl: hw.cpufrequency: No such file or directory This does not affect benchmark measurements, only the metadata output. 2023-10-26T19:42:43-04:00 Running ./bin/ultra_honk_bench Run on (10 X 24.1212 MHz CPU s) CPU Caches: L1 Data 64 KiB L1 Instruction 128 KiB L2 Unified 4096 KiB (x10) Load Average: 10.61, 17.83, 13.27 --------------------------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------------------------- construct_proof_ultrahonk/sha256 510 ms 479 ms 2 construct_proof_ultrahonk/keccak 1833 ms 1749 ms 1 construct_proof_ultrahonk/ecdsa_verification 3396 ms 3193 ms 1 construct_proof_ultrahonk/merkle_membership 297 ms 279 ms 3 construct_proof_ultrahonk_power_of_2/13 121 ms 112 ms 6 construct_proof_ultrahonk_power_of_2/14 187 ms 177 ms 4 construct_proof_ultrahonk_power_of_2/15 320 ms 294 ms 2 construct_proof_ultrahonk_power_of_2/16 547 ms 524 ms 1 construct_proof_ultrahonk_power_of_2/17 1029 ms 991 ms 1 construct_proof_ultrahonk_power_of_2/18 1936 ms 1875 ms 1 ```
Ultra Plonk ``` Unable to determine clock rate from sysctl: hw.cpufrequency: No such file or directory This does not affect benchmark measurements, only the metadata output. 2023-10-26T19:43:02-04:00 Running ./bin/ultra_plonk_bench Run on (10 X 24.1209 MHz CPU s) CPU Caches: L1 Data 64 KiB L1 Instruction 128 KiB L2 Unified 4096 KiB (x10) Load Average: 9.93, 17.21, 13.15 ---------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------- construct_proof_ultraplonk/sha256 672 ms 628 ms 1 construct_proof_ultraplonk/keccak 2477 ms 2374 ms 1 construct_proof_ultraplonk/ecdsa_verification 5086 ms 4582 ms 1 construct_proof_ultraplonk/merkle_membership 459 ms 346 ms 2 construct_proof_ultraplonk_power_of_2/13 184 ms 126 ms 5 construct_proof_ultraplonk_power_of_2/14 293 ms 202 ms 3 construct_proof_ultraplonk_power_of_2/15 463 ms 349 ms 2 construct_proof_ultraplonk_power_of_2/16 772 ms 601 ms 1 construct_proof_ultraplonk_power_of_2/17 1410 ms 1229 ms 1 construct_proof_ultraplonk_power_of_2/18 2468 ms 2394 ms 1 ```
MAINFRAME STATS, ~128 cores
Standard Plonk ``` 2023-10-26T23:39:37+00:00 Running ./bin/standard_plonk_bench Run on (128 X 2649.99 MHz CPU s) CPU Caches: L1 Data 32 KiB (x64) L1 Instruction 32 KiB (x64) L2 Unified 512 KiB (x64) L3 Unified 32768 KiB (x8) Load Average: 15.48, 13.93, 12.70 --------------------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------------------- construct_proof_standard_power_of_2/13 128 ms 112 ms 6 construct_proof_standard_power_of_2/14 150 ms 131 ms 5 construct_proof_standard_power_of_2/15 182 ms 155 ms 4 construct_proof_standard_power_of_2/16 261 ms 203 ms 3 construct_proof_standard_power_of_2/17 410 ms 324 ms 2 construct_proof_standard_power_of_2/18 914 ms 710 ms 1 ```
Ultra Honk Round Breakdown ``` 2023-10-26T23:39:46+00:00 Running ./bin/ultra_honk_rounds_bench Run on (128 X 2649.99 MHz CPU s) CPU Caches: L1 Data 32 KiB (x64) L1 Instruction 32 KiB (x64) L2 Unified 512 KiB (x64) L3 Unified 32768 KiB (x8) Load Average: 16.06, 14.13, 12.78 --------------------------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------------------------- ROUND_PREAMBLE/iterations:1 29.8 ms 29.7 ms 1 ROUND_WIRE_COMMITMENTS/iterations:1 100 ms 53.4 ms 1 ROUND_SORTED_LIST_ACCUMULATOR/iterations:1 110 ms 87.6 ms 1 ROUND_GRAND_PRODUCT_COMPUTATION/iterations:1 190 ms 116 ms 1 ROUND_RELATION_CHECK 467 ms 300 ms 2 ROUND_ZEROMORPH 921 ms 807 ms 1 ```
Ultra Plonk Round Breakdown ``` 2023-10-26T23:40:21+00:00 Running ./bin/ultra_plonk_rounds_bench Run on (128 X 2649.99 MHz CPU s) CPU Caches: L1 Data 32 KiB (x64) L1 Instruction 32 KiB (x64) L2 Unified 512 KiB (x64) L3 Unified 32768 KiB (x8) Load Average: 16.26, 14.76, 13.06 ------------------------------------------------------------------------------------ Benchmark Time CPU Iterations ------------------------------------------------------------------------------------ ROUND_PREAMBLE/iterations:1 238 ms 229 ms 1 ROUND_FIRST_WIRE_COMMITMENTS 375 ms 270 ms 2 ROUND_SECOND_FIAT_SHAMIR_ETA 282 ms 212 ms 3 ROUND_THIRD_FIAT_SHAMIR_BETA_GAMMA 675 ms 513 ms 1 ROUND_FOURTH_FIAT_SHAMIR_ALPHA_AND_COMMIT 775 ms 596 ms 1 ROUND_FIFTH_COMPUTE_QUOTIENT_EVALUTION 76.0 ms 56.9 ms 13 ROUND_SIXTH_BATCH_OPEN 357 ms 291 ms 2 ```
Ultra Honk ``` 2023-10-26T23:43:17+00:00 Running ./bin/ultra_honk_bench Run on (128 X 3444.11 MHz CPU s) CPU Caches: L1 Data 32 KiB (x64) L1 Instruction 32 KiB (x64) L2 Unified 512 KiB (x64) L3 Unified 32768 KiB (x8) Load Average: 36.58, 26.06, 17.88 --------------------------------------------------------------------------------------- Benchmark Time CPU Iterations --------------------------------------------------------------------------------------- construct_proof_ultrahonk/sha256 344 ms 277 ms 3 construct_proof_ultrahonk/keccak 913 ms 706 ms 1 construct_proof_ultrahonk/ecdsa_verification 1645 ms 1270 ms 1 construct_proof_ultrahonk/merkle_membership 231 ms 191 ms 4 construct_proof_ultrahonk_power_of_2/13 149 ms 127 ms 5 construct_proof_ultrahonk_power_of_2/14 184 ms 152 ms 5 construct_proof_ultrahonk_power_of_2/15 243 ms 193 ms 4 construct_proof_ultrahonk_power_of_2/16 365 ms 278 ms 3 construct_proof_ultrahonk_power_of_2/17 545 ms 423 ms 2 construct_proof_ultrahonk_power_of_2/18 1053 ms 750 ms 1 ```
Ultra Plonk ``` 2023-10-26T23:43:37+00:00 Running ./bin/ultra_plonk_bench Run on (128 X 2649.99 MHz CPU s) CPU Caches: L1 Data 32 KiB (x64) L1 Instruction 32 KiB (x64) L2 Unified 512 KiB (x64) L3 Unified 32768 KiB (x8) Load Average: 28.74, 24.91, 17.67 ---------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------- construct_proof_ultraplonk/sha256 473 ms 353 ms 2 construct_proof_ultraplonk/keccak 1379 ms 1044 ms 1 construct_proof_ultraplonk/ecdsa_verification 2363 ms 1785 ms 1 construct_proof_ultraplonk/merkle_membership 275 ms 240 ms 3 construct_proof_ultraplonk_power_of_2/13 196 ms 181 ms 4 construct_proof_ultraplonk_power_of_2/14 226 ms 203 ms 3 construct_proof_ultraplonk_power_of_2/15 287 ms 238 ms 3 construct_proof_ultraplonk_power_of_2/16 363 ms 300 ms 2 construct_proof_ultraplonk_power_of_2/17 656 ms 532 ms 1 construct_proof_ultraplonk_power_of_2/18 1326 ms 1019 ms 1 ``` --------- Co-authored-by: ludamad Co-authored-by: codygunton --- barretenberg/cpp/scripts/benchmarks.sh | 38 +++++ .../scripts/collect_profile_information.sh | 9 +- .../benchmark/honk_bench/CMakeLists.txt | 1 + .../honk_bench/benchmark_utilities.hpp | 71 +++------- .../honk_bench/standard_plonk.bench.cpp | 26 ++-- .../benchmark/honk_bench/ultra_honk.bench.cpp | 63 ++++---- .../honk_bench/ultra_honk_rounds.bench.cpp | 42 +++--- .../honk_bench/ultra_plonk.bench.cpp | 63 ++++---- .../honk_bench/ultra_plonk_rounds.bench.cpp | 79 +++++++++++ .../plonk/proof_system/prover/c_bind.cpp | 134 ------------------ .../plonk/proof_system/prover/prover.hpp | 14 +- 11 files changed, 244 insertions(+), 296 deletions(-) create mode 100755 barretenberg/cpp/scripts/benchmarks.sh create mode 100644 barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_plonk_rounds.bench.cpp delete mode 100644 barretenberg/cpp/src/barretenberg/plonk/proof_system/prover/c_bind.cpp diff --git a/barretenberg/cpp/scripts/benchmarks.sh b/barretenberg/cpp/scripts/benchmarks.sh new file mode 100755 index 00000000000..a7ec1dcb1d0 --- /dev/null +++ b/barretenberg/cpp/scripts/benchmarks.sh @@ -0,0 +1,38 @@ +#!/bin/bash +set -eu + +# Move above script dir. +cd $(dirname $0)/.. + +# Configure and build. +cmake --preset clang16 +cmake --build --preset clang16 + +cd build + +# github markdown style, works in comments and descriptions +echo -e "
Standard Plonk" +echo -e '\n```' +./bin/standard_plonk_bench | tee standard_plonk_bench.out +echo -e '```\n' +echo -e "
" +echo -e "
Ultra Honk Round Breakdown" +echo -e '\n```' +./bin/ultra_honk_rounds_bench | tee ultra_honk_rounds_bench.out +echo -e '```\n' +echo -e "
" +echo -e "
Ultra Plonk Round Breakdown" +echo -e '\n```' +./bin/ultra_plonk_rounds_bench | tee ultra_plonk_rounds_bench.out +echo -e '```\n' +echo -e "
" +echo -e "
Ultra Honk" +echo -e '\n```' +./bin/ultra_honk_bench | tee ultra_honk_bench.out +echo -e '```\n' +echo -e "
" +echo -e "
Ultra Plonk" +echo -e '\n```' +./bin/ultra_plonk_bench | tee ultra_plonk_bench.out +echo -e '```\n' +echo -e "
" diff --git a/barretenberg/cpp/scripts/collect_profile_information.sh b/barretenberg/cpp/scripts/collect_profile_information.sh index 28ca73fe4d2..df932c086bc 100755 --- a/barretenberg/cpp/scripts/collect_profile_information.sh +++ b/barretenberg/cpp/scripts/collect_profile_information.sh @@ -1,9 +1,14 @@ #!/bin/bash set -eu -PRESET=${1:-xray} # can also be 'xray-1thread' +# can also be 'xray-1thread' +PRESET=${1:-xray} +# pass "" to run and 1 to reuse old results ONLY_PROCESS=${2:-} +# pass the executable name from build/bin EXECUTABLE=${3:-ultra_honk_rounds_bench} +# by default run the executable, but we can provide an alt command e.g. use taskset and benchmark flags +COMMAND=${4:-./bin/$EXECUTABLE} # Move above script dir. cd $(dirname $0)/.. @@ -19,7 +24,7 @@ if [ -z "$ONLY_PROCESS" ]; then rm -f xray-log.$EXECUTABLE.* # Run benchmark with profiling. - XRAY_OPTIONS="patch_premain=true xray_mode=xray-basic verbosity=1" ./bin/$EXECUTABLE + XRAY_OPTIONS="patch_premain=true xray_mode=xray-basic verbosity=1" $COMMAND fi function shorten_cpp_names() { diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/CMakeLists.txt b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/CMakeLists.txt index ed6122bb41e..fa33b5a6567 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/CMakeLists.txt +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/CMakeLists.txt @@ -4,6 +4,7 @@ set(BENCHMARK_SOURCES ultra_honk.bench.cpp ultra_honk_rounds.bench.cpp ultra_plonk.bench.cpp + ultra_plonk_rounds.bench.cpp ) # Required libraries for benchmark suites diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/benchmark_utilities.hpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/benchmark_utilities.hpp index 5053d89bd54..84949504692 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/benchmark_utilities.hpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/benchmark_utilities.hpp @@ -4,6 +4,7 @@ #include "barretenberg/honk/composer/ultra_composer.hpp" #include "barretenberg/honk/proof_system/ultra_prover.hpp" +#include "barretenberg/plonk/composer/standard_composer.hpp" #include "barretenberg/plonk/composer/ultra_composer.hpp" #include "barretenberg/proof_system/types/circuit_type.hpp" #include "barretenberg/stdlib/encryption/ecdsa/ecdsa.hpp" @@ -23,35 +24,25 @@ using namespace benchmark; namespace bench_utils { -struct BenchParams { - // Num iterations of the operation of interest in a test circuit, e.g. num sha256 hashes - static constexpr size_t MIN_NUM_ITERATIONS = 10; - static constexpr size_t MAX_NUM_ITERATIONS = 10; - - // Log num gates; for simple circuits only, e.g. standard arithmetic circuit - static constexpr size_t MIN_LOG_NUM_GATES = 16; - static constexpr size_t MAX_LOG_NUM_GATES = 16; - - static constexpr size_t NUM_REPETITIONS = 1; -}; - /** * @brief Generate test circuit with basic arithmetic operations * * @param composer * @param num_iterations */ -template void generate_basic_arithmetic_circuit(Builder& builder, size_t num_gates) +template void generate_basic_arithmetic_circuit(Builder& builder, size_t log2_num_gates) { proof_system::plonk::stdlib::field_t a( proof_system::plonk::stdlib::witness_t(&builder, barretenberg::fr::random_element())); proof_system::plonk::stdlib::field_t b( proof_system::plonk::stdlib::witness_t(&builder, barretenberg::fr::random_element())); proof_system::plonk::stdlib::field_t c(&builder); - if (num_gates < 4) { + size_t passes = (1UL << log2_num_gates) / 4 - 4; + if (static_cast(passes) <= 0) { throw std::runtime_error("too few gates"); } - for (size_t i = 0; i < (num_gates / 4) - 4; ++i) { + + for (size_t i = 0; i < passes; ++i) { c = a + b; c = a * c; a = b * b; @@ -175,37 +166,7 @@ template void generate_merkle_membership_test_circuit(Builder } } -/** - * @brief Performs proof constuction for benchmarks based on a provided circuit function - * - * @details This function assumes state.range refers to num_gates which is the size of the underlying circuit - * - * @tparam Builder - * @param state - * @param test_circuit_function - */ -template -void construct_proof_with_specified_num_gates(State& state, - void (*test_circuit_function)(typename Composer::CircuitBuilder&, - size_t)) noexcept -{ - barretenberg::srs::init_crs_factory("../srs_db/ignition"); - auto num_gates = static_cast(1 << (size_t)state.range(0)); - for (auto _ : state) { - // Constuct circuit and prover; don't include this part in measurement - state.PauseTiming(); - auto builder = typename Composer::CircuitBuilder(); - test_circuit_function(builder, num_gates); - - auto composer = Composer(); - auto ext_prover = composer.create_prover(builder); - state.ResumeTiming(); - - // Construct proof - auto proof = ext_prover.construct_proof(); - } -} - +// ultrahonk inline proof_system::honk::UltraProver get_prover( proof_system::honk::UltraComposer& composer, void (*test_circuit_function)(proof_system::honk::UltraComposer::CircuitBuilder&, size_t), @@ -217,6 +178,18 @@ inline proof_system::honk::UltraProver get_prover( return composer.create_prover(instance); } +// standard plonk +inline proof_system::plonk::Prover get_prover(proof_system::plonk::StandardComposer& composer, + void (*test_circuit_function)(proof_system::StandardCircuitBuilder&, + size_t), + size_t num_iterations) +{ + proof_system::StandardCircuitBuilder builder; + test_circuit_function(builder, num_iterations); + return composer.create_prover(builder); +} + +// ultraplonk inline proof_system::plonk::UltraProver get_prover( proof_system::plonk::UltraComposer& composer, void (*test_circuit_function)(proof_system::honk::UltraComposer::CircuitBuilder&, size_t), @@ -237,15 +210,13 @@ inline proof_system::plonk::UltraProver get_prover( * @param test_circuit_function */ template -void construct_proof_with_specified_num_iterations(State& state, - void (*test_circuit_function)(typename Composer::CircuitBuilder&, - size_t)) noexcept +void construct_proof_with_specified_num_iterations( + State& state, void (*test_circuit_function)(typename Composer::CircuitBuilder&, size_t), size_t num_iterations) { barretenberg::srs::init_crs_factory("../srs_db/ignition"); Composer composer; - auto num_iterations = static_cast(state.range(0)); for (auto _ : state) { // Constuct circuit and prover; don't include this part in measurement state.PauseTiming(); diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/standard_plonk.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/standard_plonk.bench.cpp index 9ed605a3480..b44f4123f61 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/standard_plonk.bench.cpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/standard_plonk.bench.cpp @@ -4,30 +4,20 @@ using namespace benchmark; -namespace standard_plonk_bench { - using StandardBuilder = proof_system::StandardCircuitBuilder; using StandardPlonk = proof_system::plonk::StandardComposer; -// Log number of gates for test circuit -constexpr size_t MIN_LOG_NUM_GATES = bench_utils::BenchParams::MIN_LOG_NUM_GATES; -constexpr size_t MAX_LOG_NUM_GATES = bench_utils::BenchParams::MAX_LOG_NUM_GATES; -// Number of times to repeat each benchmark -constexpr size_t NUM_REPETITIONS = bench_utils::BenchParams::NUM_REPETITIONS; - /** * @brief Benchmark: Construction of a Standard proof for a circuit determined by the provided circuit function */ -void construct_proof_standard(State& state, void (*test_circuit_function)(StandardBuilder&, size_t)) noexcept +static void construct_proof_standard_power_of_2(State& state) noexcept { - bench_utils::construct_proof_with_specified_num_gates(state, test_circuit_function); + auto log2_of_gates = static_cast(state.range(0)); + bench_utils::construct_proof_with_specified_num_iterations( + state, &bench_utils::generate_basic_arithmetic_circuit, log2_of_gates); } -BENCHMARK_CAPTURE(construct_proof_standard, - arithmetic, - &bench_utils::generate_basic_arithmetic_circuit) - ->DenseRange(MIN_LOG_NUM_GATES, MAX_LOG_NUM_GATES) - ->Repetitions(NUM_REPETITIONS) - ->Unit(::benchmark::kSecond); - -} // namespace standard_plonk_bench \ No newline at end of file +BENCHMARK(construct_proof_standard_power_of_2) + // 2**13 gates to 2**18 gates + ->DenseRange(13, 18) + ->Unit(::benchmark::kMillisecond); \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk.bench.cpp index 92933bb4648..7d1bd15b413 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk.bench.cpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk.bench.cpp @@ -5,47 +5,44 @@ #include "barretenberg/proof_system/circuit_builder/ultra_circuit_builder.hpp" using namespace benchmark; -using namespace proof_system::plonk; - -namespace ultra_honk_bench { - -using UltraBuilder = proof_system::UltraCircuitBuilder; -using UltraHonk = proof_system::honk::UltraComposer; - -// Number of times to perform operation of interest in the benchmark circuits, e.g. # of hashes to perform -constexpr size_t MIN_NUM_ITERATIONS = bench_utils::BenchParams::MIN_NUM_ITERATIONS; -constexpr size_t MAX_NUM_ITERATIONS = bench_utils::BenchParams::MAX_NUM_ITERATIONS; -// Number of times to repeat each benchmark -constexpr size_t NUM_REPETITIONS = bench_utils::BenchParams::NUM_REPETITIONS; +using namespace proof_system; /** * @brief Benchmark: Construction of a Ultra Honk proof for a circuit determined by the provided circuit function */ -void construct_proof_ultra(State& state, void (*test_circuit_function)(UltraBuilder&, size_t)) noexcept +static void construct_proof_ultrahonk(State& state, + void (*test_circuit_function)(UltraCircuitBuilder&, size_t)) noexcept +{ + size_t num_iterations = 10; // 10x the circuit + bench_utils::construct_proof_with_specified_num_iterations( + state, test_circuit_function, num_iterations); +} + +/** + * @brief Benchmark: Construction of a Ultra Plonk proof with 2**n gates + */ +static void construct_proof_ultrahonk_power_of_2(State& state) noexcept { - bench_utils::construct_proof_with_specified_num_iterations(state, test_circuit_function); + auto log2_of_gates = static_cast(state.range(0)); + bench_utils::construct_proof_with_specified_num_iterations( + state, &bench_utils::generate_basic_arithmetic_circuit, log2_of_gates); } // Define benchmarks -BENCHMARK_CAPTURE(construct_proof_ultra, sha256, &bench_utils::generate_sha256_test_circuit) - ->DenseRange(MIN_NUM_ITERATIONS, MAX_NUM_ITERATIONS) - ->Repetitions(NUM_REPETITIONS) - ->Unit(::benchmark::kMillisecond); -BENCHMARK_CAPTURE(construct_proof_ultra, keccak, &bench_utils::generate_keccak_test_circuit) - ->DenseRange(MIN_NUM_ITERATIONS, MAX_NUM_ITERATIONS) - ->Repetitions(NUM_REPETITIONS) - ->Unit(::benchmark::kMillisecond); -BENCHMARK_CAPTURE(construct_proof_ultra, +BENCHMARK_CAPTURE(construct_proof_ultrahonk, sha256, &bench_utils::generate_sha256_test_circuit) + ->Unit(kMillisecond); +BENCHMARK_CAPTURE(construct_proof_ultrahonk, keccak, &bench_utils::generate_keccak_test_circuit) + ->Unit(kMillisecond); +BENCHMARK_CAPTURE(construct_proof_ultrahonk, ecdsa_verification, - &bench_utils::generate_ecdsa_verification_test_circuit) - ->DenseRange(MIN_NUM_ITERATIONS, MAX_NUM_ITERATIONS) - ->Repetitions(NUM_REPETITIONS) - ->Unit(::benchmark::kMillisecond); -BENCHMARK_CAPTURE(construct_proof_ultra, + &bench_utils::generate_ecdsa_verification_test_circuit) + ->Unit(kMillisecond); +BENCHMARK_CAPTURE(construct_proof_ultrahonk, merkle_membership, - &bench_utils::generate_merkle_membership_test_circuit) - ->DenseRange(MIN_NUM_ITERATIONS, MAX_NUM_ITERATIONS) - ->Repetitions(NUM_REPETITIONS) - ->Unit(::benchmark::kMillisecond); + &bench_utils::generate_merkle_membership_test_circuit) + ->Unit(kMillisecond); -} // namespace ultra_honk_bench \ No newline at end of file +BENCHMARK(construct_proof_ultrahonk_power_of_2) + // 2**13 gates to 2**18 gates + ->DenseRange(13, 18) + ->Unit(kMillisecond); \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_rounds.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_rounds.bench.cpp index 8b4e7145596..8f7afaa181d 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_rounds.bench.cpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_rounds.bench.cpp @@ -24,39 +24,41 @@ BBERG_PROFILE static void test_round_inner(State& state, honk::UltraProver& prov auto time_if_index = [&](size_t target_index, auto&& func) -> void { if (index == target_index) { state.ResumeTiming(); - func(); + } + func(); + if (index == target_index) { state.PauseTiming(); - } else { - func(); } }; - for (auto _ : state) { - state.PauseTiming(); - time_if_index(PREAMBLE, [&] { prover.execute_preamble_round(); }); - time_if_index(WIRE_COMMITMENTS, [&] { prover.execute_wire_commitments_round(); }); - time_if_index(SORTED_LIST_ACCUMULATOR, [&] { prover.execute_sorted_list_accumulator_round(); }); - time_if_index(GRAND_PRODUCT_COMPUTATION, [&] { prover.execute_grand_product_computation_round(); }); - time_if_index(RELATION_CHECK, [&] { prover.execute_relation_check_rounds(); }); - time_if_index(ZEROMORPH, [&] { prover.execute_zeromorph_rounds(); }); - state.ResumeTiming(); - } + + time_if_index(PREAMBLE, [&] { prover.execute_preamble_round(); }); + time_if_index(WIRE_COMMITMENTS, [&] { prover.execute_wire_commitments_round(); }); + time_if_index(SORTED_LIST_ACCUMULATOR, [&] { prover.execute_sorted_list_accumulator_round(); }); + time_if_index(GRAND_PRODUCT_COMPUTATION, [&] { prover.execute_grand_product_computation_round(); }); + time_if_index(RELATION_CHECK, [&] { prover.execute_relation_check_rounds(); }); + time_if_index(ZEROMORPH, [&] { prover.execute_zeromorph_rounds(); }); } BBERG_PROFILE static void test_round(State& state, size_t index) noexcept { barretenberg::srs::init_crs_factory("../srs_db/ignition"); - honk::UltraComposer composer; - // TODO(AD) benchmark both sparse and dense circuits? - honk::UltraProver prover = - bench_utils::get_prover(composer, &bench_utils::generate_keccak_test_circuit, 1); - test_round_inner(state, prover, index); + for (auto _ : state) { + state.PauseTiming(); + honk::UltraComposer composer; + // TODO: https://github.com/AztecProtocol/barretenberg/issues/761 benchmark both sparse and dense circuits + honk::UltraProver prover = bench_utils::get_prover( + composer, &bench_utils::generate_ecdsa_verification_test_circuit, 10); + test_round_inner(state, prover, index); + state.ResumeTiming(); + // NOTE: google bench is very finnicky, must end in ResumeTiming() for correctness + } } #define ROUND_BENCHMARK(round) \ static void ROUND_##round(State& state) noexcept \ { \ test_round(state, round); \ } \ - BENCHMARK(ROUND_##round)->Unit(::benchmark::kMillisecond) + BENCHMARK(ROUND_##round)->Unit(kMillisecond) // Fast rounds take a long time to benchmark because of how we compute statistical significance. // Limit to one iteration so we don't spend a lot of time redoing full proofs just to measure this part. @@ -65,4 +67,4 @@ ROUND_BENCHMARK(WIRE_COMMITMENTS)->Iterations(1); ROUND_BENCHMARK(SORTED_LIST_ACCUMULATOR)->Iterations(1); ROUND_BENCHMARK(GRAND_PRODUCT_COMPUTATION)->Iterations(1); ROUND_BENCHMARK(RELATION_CHECK); -ROUND_BENCHMARK(ZEROMORPH); \ No newline at end of file +ROUND_BENCHMARK(ZEROMORPH); diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_plonk.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_plonk.bench.cpp index 74a9fd1acc7..9ea895fe703 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_plonk.bench.cpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_plonk.bench.cpp @@ -3,45 +3,44 @@ #include "barretenberg/proof_system/circuit_builder/ultra_circuit_builder.hpp" using namespace benchmark; +using namespace proof_system; -namespace ultra_plonk_bench { - -using UltraBuilder = proof_system::UltraCircuitBuilder; -using UltraPlonk = proof_system::plonk::UltraComposer; - -// Number of times to perform operation of interest in the benchmark circuits, e.g. # of hashes to perform -constexpr size_t MIN_NUM_ITERATIONS = bench_utils::BenchParams::MIN_NUM_ITERATIONS; -constexpr size_t MAX_NUM_ITERATIONS = bench_utils::BenchParams::MAX_NUM_ITERATIONS; -// Number of times to repeat each benchmark -constexpr size_t NUM_REPETITIONS = bench_utils::BenchParams::NUM_REPETITIONS; +/** + * @brief Benchmark: Construction of a Ultra Plonk proof for a circuit determined by the provided circuit function + */ +static void construct_proof_ultraplonk(State& state, + void (*test_circuit_function)(UltraCircuitBuilder&, size_t)) noexcept +{ + size_t num_iterations = 10; // 10x the circuit + bench_utils::construct_proof_with_specified_num_iterations( + state, test_circuit_function, num_iterations); +} /** - * @brief Benchmark: Construction of a Ultra Honk proof for a circuit determined by the provided circuit function + * @brief Benchmark: Construction of a Ultra Plonk proof with 2**n gates */ -void construct_proof_ultra(State& state, void (*test_circuit_function)(UltraBuilder&, size_t)) noexcept +static void construct_proof_ultraplonk_power_of_2(State& state) noexcept { - bench_utils::construct_proof_with_specified_num_iterations(state, test_circuit_function); + auto log2_of_gates = static_cast(state.range(0)); + bench_utils::construct_proof_with_specified_num_iterations( + state, &bench_utils::generate_basic_arithmetic_circuit, log2_of_gates); } -BENCHMARK_CAPTURE(construct_proof_ultra, sha256, &bench_utils::generate_sha256_test_circuit) - ->DenseRange(MIN_NUM_ITERATIONS, MAX_NUM_ITERATIONS) - ->Repetitions(NUM_REPETITIONS) - ->Unit(::benchmark::kMillisecond); -BENCHMARK_CAPTURE(construct_proof_ultra, keccak, &bench_utils::generate_keccak_test_circuit) - ->DenseRange(MIN_NUM_ITERATIONS, MAX_NUM_ITERATIONS) - ->Repetitions(NUM_REPETITIONS) - ->Unit(::benchmark::kMillisecond); -BENCHMARK_CAPTURE(construct_proof_ultra, +// Define benchmarks +BENCHMARK_CAPTURE(construct_proof_ultraplonk, sha256, &bench_utils::generate_sha256_test_circuit) + ->Unit(kMillisecond); +BENCHMARK_CAPTURE(construct_proof_ultraplonk, keccak, &bench_utils::generate_keccak_test_circuit) + ->Unit(kMillisecond); +BENCHMARK_CAPTURE(construct_proof_ultraplonk, ecdsa_verification, - &bench_utils::generate_ecdsa_verification_test_circuit) - ->DenseRange(MIN_NUM_ITERATIONS, MAX_NUM_ITERATIONS) - ->Repetitions(NUM_REPETITIONS) - ->Unit(::benchmark::kMillisecond); -BENCHMARK_CAPTURE(construct_proof_ultra, + &bench_utils::generate_ecdsa_verification_test_circuit) + ->Unit(kMillisecond); +BENCHMARK_CAPTURE(construct_proof_ultraplonk, merkle_membership, - &bench_utils::generate_merkle_membership_test_circuit) - ->DenseRange(MIN_NUM_ITERATIONS, MAX_NUM_ITERATIONS) - ->Repetitions(NUM_REPETITIONS) - ->Unit(::benchmark::kMillisecond); + &bench_utils::generate_merkle_membership_test_circuit) + ->Unit(kMillisecond); -} // namespace ultra_plonk_bench \ No newline at end of file +BENCHMARK(construct_proof_ultraplonk_power_of_2) + // 2**13 gates to 2**18 gates + ->DenseRange(13, 18) + ->Unit(kMillisecond); \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_plonk_rounds.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_plonk_rounds.bench.cpp new file mode 100644 index 00000000000..0db10eea970 --- /dev/null +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_plonk_rounds.bench.cpp @@ -0,0 +1,79 @@ +#include + +#include "barretenberg/benchmark/honk_bench/benchmark_utilities.hpp" +#include "barretenberg/proof_system/circuit_builder/ultra_circuit_builder.hpp" + +using namespace benchmark; +using namespace proof_system; + +// The rounds to measure +enum { + PREAMBLE, + FIRST_WIRE_COMMITMENTS, + SECOND_FIAT_SHAMIR_ETA, + THIRD_FIAT_SHAMIR_BETA_GAMMA, + FOURTH_FIAT_SHAMIR_ALPHA_AND_COMMIT, + FIFTH_COMPUTE_QUOTIENT_EVALUTION, + SIXTH_BATCH_OPEN +}; + +BBERG_PROFILE static void plonk_round( + State& state, plonk::UltraProver& prover, size_t target_index, size_t index, auto&& func) noexcept +{ + if (index == target_index) { + state.ResumeTiming(); + } + func(); + prover.queue.process_queue(); + if (index == target_index) { + state.PauseTiming(); + } +} +/** + * @details Benchmark ultraplonk by performing all the rounds, but only measuring one. + * Note: As a result the very short rounds take a long time for statistical significance, so recommended to set + *their iterations to 1. + * @param state - The google benchmark state. + * @param prover - The ultraplonk prover. + * @param index - The pass to measure. + **/ +BBERG_PROFILE static void test_round_inner(State& state, plonk::UltraProver& prover, size_t index) noexcept +{ + plonk_round(state, prover, PREAMBLE, index, [&] { prover.execute_preamble_round(); }); + plonk_round(state, prover, FIRST_WIRE_COMMITMENTS, index, [&] { prover.execute_first_round(); }); + plonk_round(state, prover, SECOND_FIAT_SHAMIR_ETA, index, [&] { prover.execute_second_round(); }); + plonk_round(state, prover, THIRD_FIAT_SHAMIR_BETA_GAMMA, index, [&] { prover.execute_third_round(); }); + plonk_round(state, prover, FOURTH_FIAT_SHAMIR_ALPHA_AND_COMMIT, index, [&] { prover.execute_fourth_round(); }); + plonk_round(state, prover, FIFTH_COMPUTE_QUOTIENT_EVALUTION, index, [&] { prover.execute_fifth_round(); }); + plonk_round(state, prover, SIXTH_BATCH_OPEN, index, [&] { prover.execute_sixth_round(); }); +} +BBERG_PROFILE static void test_round(State& state, size_t index) noexcept +{ + barretenberg::srs::init_crs_factory("../srs_db/ignition"); + for (auto _ : state) { + state.PauseTiming(); + plonk::UltraComposer composer; + // TODO: https://github.com/AztecProtocol/barretenberg/issues/761 benchmark both sparse and dense circuits + plonk::UltraProver prover = bench_utils::get_prover( + composer, &bench_utils::generate_ecdsa_verification_test_circuit, 10); + test_round_inner(state, prover, index); + // NOTE: google bench is very finnicky, must end in ResumeTiming() for correctness + state.ResumeTiming(); + } +} +#define ROUND_BENCHMARK(round) \ + static void ROUND_##round(State& state) noexcept \ + { \ + test_round(state, round); \ + } \ + BENCHMARK(ROUND_##round)->Unit(kMillisecond) + +// Fast rounds take a long time to benchmark because of how we compute statistical significance. +// Limit to one iteration so we don't spend a lot of time redoing full proofs just to measure this part. +ROUND_BENCHMARK(PREAMBLE)->Iterations(1); +ROUND_BENCHMARK(FIRST_WIRE_COMMITMENTS); +ROUND_BENCHMARK(SECOND_FIAT_SHAMIR_ETA); +ROUND_BENCHMARK(THIRD_FIAT_SHAMIR_BETA_GAMMA); +ROUND_BENCHMARK(FOURTH_FIAT_SHAMIR_ALPHA_AND_COMMIT); +ROUND_BENCHMARK(FIFTH_COMPUTE_QUOTIENT_EVALUTION); +ROUND_BENCHMARK(SIXTH_BATCH_OPEN); diff --git a/barretenberg/cpp/src/barretenberg/plonk/proof_system/prover/c_bind.cpp b/barretenberg/cpp/src/barretenberg/plonk/proof_system/prover/c_bind.cpp deleted file mode 100644 index 18808d3a016..00000000000 --- a/barretenberg/cpp/src/barretenberg/plonk/proof_system/prover/c_bind.cpp +++ /dev/null @@ -1,134 +0,0 @@ -#include "prover.hpp" - -using namespace barretenberg; - -/** - * Commenting out now, as we really shouldn't need these if we use native threading. - * Not deleting because we *may* still want the work queue in some cases, but I'm really hoping we can just not. - */ -/* -extern "C" { - -using Prover = plonk::UltraProver; - -WASM_EXPORT void prover_process_queue(Prover* prover) -{ - prover->queue.process_queue(); -} - -WASM_EXPORT size_t prover_get_circuit_size(Prover* prover) -{ - return prover->get_circuit_size(); -} - -WASM_EXPORT void prover_get_work_queue_item_info(Prover* prover, uint8_t* result) -{ - auto info = prover->get_queued_work_item_info(); - memcpy(result, &info, sizeof(info)); -} - -WASM_EXPORT fr* prover_get_scalar_multiplication_data(Prover* prover, size_t work_item_number) -{ - return prover->get_scalar_multiplication_data(work_item_number).get(); -} - -WASM_EXPORT size_t prover_get_scalar_multiplication_size(Prover* prover, size_t work_item_number) -{ - return prover->get_scalar_multiplication_size(work_item_number); -} - -WASM_EXPORT void prover_put_scalar_multiplication_data(Prover* prover, - g1::element* result, - const size_t work_item_number) -{ - prover->put_scalar_multiplication_data(*result, work_item_number); -} - -WASM_EXPORT fr* prover_get_fft_data(Prover* prover, fr* shift_factor, size_t work_item_number) -{ - auto data = prover->get_fft_data(work_item_number); - *shift_factor = data.shift_factor; - return data.data.get(); -} - -WASM_EXPORT void prover_put_fft_data(Prover* prover, fr* result, size_t work_item_number) -{ - prover->put_fft_data(std::shared_ptr(result, aligned_free), work_item_number); -} - -WASM_EXPORT fr* prover_get_ifft_data(Prover* prover, size_t work_item_number) -{ - return prover->get_ifft_data(work_item_number).get(); -} - -WASM_EXPORT void prover_put_ifft_data(Prover* prover, fr* result, size_t work_item_number) -{ - prover->put_ifft_data(std::shared_ptr(result, aligned_free), work_item_number); -} - -WASM_EXPORT void prover_execute_preamble_round(Prover* prover) -{ - prover->execute_preamble_round(); -} - -WASM_EXPORT void prover_execute_first_round(Prover* prover) -{ - prover->execute_first_round(); -} - -WASM_EXPORT void prover_execute_second_round(Prover* prover) -{ - prover->execute_second_round(); -} - -WASM_EXPORT void prover_execute_third_round(Prover* prover) -{ - prover->execute_third_round(); -} - -WASM_EXPORT void prover_execute_fourth_round(Prover* prover) -{ - prover->execute_fourth_round(); -} - -WASM_EXPORT void prover_execute_fifth_round(Prover* prover) -{ - prover->execute_fifth_round(); -} - -WASM_EXPORT void prover_execute_sixth_round(Prover* prover) -{ - prover->execute_sixth_round(); -} - -WASM_EXPORT size_t prover_export_proof(Prover* prover, uint8_t** proof_data_buf) -{ - auto& proof_data = prover->export_proof().proof_data; - *proof_data_buf = proof_data.data(); - return proof_data.size(); -} - -WASM_EXPORT void coset_fft_with_generator_shift(fr* coefficients, fr* constant, evaluation_domain* domain) -{ - polynomial_arithmetic::coset_fft_with_generator_shift(coefficients, *domain, *constant); -} - -WASM_EXPORT void ifft(fr* coefficients, evaluation_domain* domain) -{ - polynomial_arithmetic::ifft(coefficients, *domain); -} - -WASM_EXPORT void* new_evaluation_domain(size_t circuit_size) -{ - auto domain = new evaluation_domain(circuit_size); - domain->compute_lookup_table(); - return domain; -} - -WASM_EXPORT void delete_evaluation_domain(void* domain) -{ - delete reinterpret_cast(domain); -} -} - -*/ diff --git a/barretenberg/cpp/src/barretenberg/plonk/proof_system/prover/prover.hpp b/barretenberg/cpp/src/barretenberg/plonk/proof_system/prover/prover.hpp index 76295aafc72..408b9ea3363 100644 --- a/barretenberg/cpp/src/barretenberg/plonk/proof_system/prover/prover.hpp +++ b/barretenberg/cpp/src/barretenberg/plonk/proof_system/prover/prover.hpp @@ -19,13 +19,13 @@ template class ProverBase { ProverBase& operator=(const ProverBase& other) = delete; ProverBase& operator=(ProverBase&& other); - void execute_preamble_round(); - void execute_first_round(); - void execute_second_round(); - void execute_third_round(); - void execute_fourth_round(); - void execute_fifth_round(); - void execute_sixth_round(); + BBERG_PROFILE void execute_preamble_round(); + BBERG_PROFILE void execute_first_round(); + BBERG_PROFILE void execute_second_round(); + BBERG_PROFILE void execute_third_round(); + BBERG_PROFILE void execute_fourth_round(); + BBERG_PROFILE void execute_fifth_round(); + BBERG_PROFILE void execute_sixth_round(); void add_polynomial_evaluations_to_transcript(); void compute_batch_opening_polynomials();