diff --git a/barretenberg/cpp/CMakePresets.json b/barretenberg/cpp/CMakePresets.json index da2154687a6..02ad94ce29d 100644 --- a/barretenberg/cpp/CMakePresets.json +++ b/barretenberg/cpp/CMakePresets.json @@ -85,14 +85,25 @@ }, { "name": "tracy", - "displayName": "Release build with tracy", - "description": "Release build with tracy", + "displayName": "Release build with tracy, optimized for memory tracking", + "description": "Release build with tracy, optimized for memory tracking", "inherits": "clang16", "binaryDir": "build-tracy", "cacheVariables": { "ENABLE_TRACY": "ON" } }, + { + "name": "tracy-gates", + "displayName": "Release build with tracy - but hacked for gate tracking", + "description": "Release build with tracy - but hacker for gate tracking", + "inherits": "clang16", + "binaryDir": "build-tracy-gates", + "cacheVariables": { + "ENABLE_TRACY": "ON", + "CMAKE_CXX_FLAGS": "-DTRACY_HACK_GATES_AS_MEMORY" + } + }, { "name": "wasm-tracy", "displayName": "WASM build with tracy", @@ -435,6 +446,11 @@ "inherits": "default", "configurePreset": "tracy" }, + { + "name": "tracy-gates", + "inherits": "default", + "configurePreset": "tracy-gates" + }, { "name": "clang16-dbg-fast", "inherits": "default", diff --git a/barretenberg/cpp/scripts/benchmark_tracy.sh b/barretenberg/cpp/scripts/benchmark_tracy.sh index 4d9c2d3480e..e39f6517bbc 100644 --- a/barretenberg/cpp/scripts/benchmark_tracy.sh +++ b/barretenberg/cpp/scripts/benchmark_tracy.sh @@ -1,31 +1,40 @@ # NOTE: intended to be ran from one's external computer, connecting to Aztec mainframe +# IF ON YOUR LOCAL COMPUTER USE NORMAL INTERACTIVE TRACY WORKFLOW # the benchmark runs with headless capture and then we copy the trace file and run tracy profiler # This is thus only really useful internally at Aztec, sorry external folks. It can be easily tweaked -# however for any SSH setup, especially an ubuntu one, and of course if you are just tracing on the -# same machine you can use the normal interactive tracy workflow. +# however for any SSH setup, especially an ubuntu one. +# on local machine run: +# export USER=... +# export PRESET=...tracy for memory or tracy-gates for circuit gates... +# ssh $USER-box "cat ~/aztec-packages/barretenberg/cpp/scripts/benchmark_tracy.sh" | bash /dev/stdin $USER set -eux -USER=$1 +USER=${1:-$USER} BOX=$USER-box -BENCHMARK=${2:-ultra_plonk_bench} -COMMAND=${3:-./bin/$BENCHMARK} +BENCHMARK=${2:-client_ivc_bench} +COMMAND=${3:-./bin/$BENCHMARK --benchmark_filter=ClientIVCBench/Full/6"\$"} + +# Can also set PRESET=tracy-gates env variable +PRESET=${PRESET:-tracy} ssh $BOX " set -eux ; + cd ~/aztec-packages/barretenberg/cpp/ ; + cmake --preset $PRESET && cmake --build --preset $PRESET --target $BENCHMARK ; ! [ -d ~/tracy ] && git clone https://github.com/wolfpld/tracy ~/tracy ; cd ~/tracy/capture ; - sudo apt-get install libdbus-1-dev libdbus-glib-1-dev ; + git checkout 075395620a504c0cdcaf9bab3d196db16a043de7 ; + sudo apt-get install -y libdbus-1-dev libdbus-glib-1-dev ; mkdir -p build && cd build && cmake .. && make -j ; ./tracy-capture -a 127.0.0.1 -f -o trace-$BENCHMARK & ; sleep 0.1 ; - cd ~/aztec-packages/barretenberg/cpp/ ; - cmake --preset tracy && cmake --build --preset tracy --parallel $BENCHMARK ; - cd build-tracy ; - ninja $BENCHMARK ; + cd ~/aztec-packages/barretenberg/cpp/build-$PRESET ; $COMMAND ; -" +" & +wait # TODO(AD) hack - not sure why needed ! [ -d ~/tracy ] && git clone https://github.com/wolfpld/tracy ~/tracy cd ~/tracy +git checkout 075395620a504c0cdcaf9bab3d196db16a043de7 # release 0.11.0 cmake -B profiler/build -S profiler -DCMAKE_BUILD_TYPE=Release cmake --build profiler/build --parallel scp $BOX:/mnt/user-data/$USER/tracy/capture/build/trace-$BENCHMARK . diff --git a/barretenberg/cpp/src/barretenberg/common/mem.cpp b/barretenberg/cpp/src/barretenberg/common/mem.cpp index 24a2e14b2c5..d6f4891ada7 100644 --- a/barretenberg/cpp/src/barretenberg/common/mem.cpp +++ b/barretenberg/cpp/src/barretenberg/common/mem.cpp @@ -1,5 +1,6 @@ #include "barretenberg/common/mem.hpp" +#ifdef TRACY_ENABLE void* operator new(std::size_t count) { // NOLINTBEGIN(cppcoreguidelines-no-malloc) @@ -25,3 +26,5 @@ void operator delete(void* ptr, std::size_t size) noexcept free(ptr); // NOLINTEND(cppcoreguidelines-no-malloc) } + +#endif \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/common/mem.hpp b/barretenberg/cpp/src/barretenberg/common/mem.hpp index d07b85d5da9..713161ec9b2 100644 --- a/barretenberg/cpp/src/barretenberg/common/mem.hpp +++ b/barretenberg/cpp/src/barretenberg/common/mem.hpp @@ -7,9 +7,31 @@ #include // This can be altered to capture stack traces, though more expensive -// This is the only reason we wrap TracyAlloc or TracyAllocS +// so wrap TracyAlloc or TracyAllocS. We disable these if gates are being tracked +// Gates are hackishly tracked as if they were memory, for the sweet sweet memory +// stack tree that doesn't seem to be available for other metric types. +#ifndef TRACY_HACK_GATES_AS_MEMORY #define TRACY_ALLOC(t, size) TracyAllocS(t, size, /*stack depth*/ 10) #define TRACY_FREE(t) TracyFreeS(t, /*stack depth*/ 10) +#define TRACY_GATE_ALLOC(t) +#define TRACY_GATE_FREE(t) +#else +#include +#include +#define TRACY_ALLOC(t, size) +#define TRACY_FREE(t) + +namespace bb { +// These are hacks to make sure tracy plays along +// If we free an ID not allocated, or allocate an index twice without a free it will complain +// so we hack thread-safety and an incrementing global ID. +static std::mutex GLOBAL_GATE_MUTEX; +static size_t GLOBAL_GATE = 0; +static std::set FREED_GATES; // hack to prevent instrumentation failures +} // namespace bb +#define TRACY_GATE_ALLOC(index) TracyAllocS(reinterpret_cast(index), 1, /*stack depth*/ 50) +#define TRACY_GATE_FREE(index) TracyFreeS(reinterpret_cast(index), /*stack depth*/ 50) +#endif // #define TRACY_ALLOC(t, size) TracyAlloc(t, size) // #define TRACY_FREE(t) TracyFree(t) @@ -108,4 +130,4 @@ inline void tracy_free(void* mem) TRACY_FREE(mem); // NOLINTNEXTLINE(cppcoreguidelines-owning-memory, cppcoreguidelines-no-malloc) free(mem); -} \ No newline at end of file +} diff --git a/barretenberg/cpp/src/barretenberg/plonk_honk_shared/arithmetization/arithmetization.hpp b/barretenberg/cpp/src/barretenberg/plonk_honk_shared/arithmetization/arithmetization.hpp index 5c6b56f6605..8f40ef2a8fd 100644 --- a/barretenberg/cpp/src/barretenberg/plonk_honk_shared/arithmetization/arithmetization.hpp +++ b/barretenberg/cpp/src/barretenberg/plonk_honk_shared/arithmetization/arithmetization.hpp @@ -1,4 +1,5 @@ #pragma once +#include "barretenberg/common/mem.hpp" #include "barretenberg/common/ref_array.hpp" #include "barretenberg/common/slab_allocator.hpp" #include @@ -49,6 +50,18 @@ template class ExecutionTr // If enabled, we keep slow stack traces to be able to correlate gates with code locations where they were added StackTraces stack_traces; #endif +#ifdef TRACY_HACK_GATES_AS_MEMORY + std::vector allocated_gates; +#endif + void tracy_gate() + { +#ifdef TRACY_HACK_GATES_AS_MEMORY + std::unique_lock lock(GLOBAL_GATE_MUTEX); + GLOBAL_GATE++; + TRACY_GATE_ALLOC(GLOBAL_GATE); + allocated_gates.push_back(GLOBAL_GATE); +#endif + } Wires wires; // vectors of indices into a witness variables array Selectors selectors; @@ -76,6 +89,18 @@ template class ExecutionTr uint32_t get_fixed_size() const { return fixed_size; } void set_fixed_size(uint32_t size_in) { fixed_size = size_in; } +#ifdef TRACY_HACK_GATES_AS_MEMORY + ~ExecutionTraceBlock() + { + std::unique_lock lock(GLOBAL_GATE_MUTEX); + for ([[maybe_unused]] size_t gate : allocated_gates) { + if (!FREED_GATES.contains(gate)) { + TRACY_GATE_FREE(gate); + FREED_GATES.insert(gate); + } + } + } +#endif }; } // namespace bb \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/plonk_honk_shared/arithmetization/mega_arithmetization.hpp b/barretenberg/cpp/src/barretenberg/plonk_honk_shared/arithmetization/mega_arithmetization.hpp index 0311301b6c6..cca898f800a 100644 --- a/barretenberg/cpp/src/barretenberg/plonk_honk_shared/arithmetization/mega_arithmetization.hpp +++ b/barretenberg/cpp/src/barretenberg/plonk_honk_shared/arithmetization/mega_arithmetization.hpp @@ -124,6 +124,7 @@ template class MegaArith { #ifdef CHECK_CIRCUIT_STACKTRACES this->stack_traces.populate(); #endif + this->tracy_gate(); this->wires[0].emplace_back(idx_1); this->wires[1].emplace_back(idx_2); this->wires[2].emplace_back(idx_3); diff --git a/barretenberg/cpp/src/barretenberg/plonk_honk_shared/arithmetization/standard_arithmetization.hpp b/barretenberg/cpp/src/barretenberg/plonk_honk_shared/arithmetization/standard_arithmetization.hpp index e4cb58ea8a1..b79dcdf443a 100644 --- a/barretenberg/cpp/src/barretenberg/plonk_honk_shared/arithmetization/standard_arithmetization.hpp +++ b/barretenberg/cpp/src/barretenberg/plonk_honk_shared/arithmetization/standard_arithmetization.hpp @@ -17,6 +17,7 @@ template class StandardArith { #ifdef CHECK_CIRCUIT_STACKTRACES this->stack_traces.populate(); #endif + this->tracy_gate(); this->wires[0].emplace_back(idx_1); this->wires[1].emplace_back(idx_2); this->wires[2].emplace_back(idx_3); diff --git a/barretenberg/cpp/src/barretenberg/plonk_honk_shared/arithmetization/ultra_arithmetization.hpp b/barretenberg/cpp/src/barretenberg/plonk_honk_shared/arithmetization/ultra_arithmetization.hpp index 277ef866da9..be4ca5b5fcb 100644 --- a/barretenberg/cpp/src/barretenberg/plonk_honk_shared/arithmetization/ultra_arithmetization.hpp +++ b/barretenberg/cpp/src/barretenberg/plonk_honk_shared/arithmetization/ultra_arithmetization.hpp @@ -63,6 +63,7 @@ template class UltraArith { #ifdef CHECK_CIRCUIT_STACKTRACES this->stack_traces.populate(); #endif + this->tracy_gate(); this->wires[0].emplace_back(idx_1); this->wires[1].emplace_back(idx_2); this->wires[2].emplace_back(idx_3);