Skip to content

Commit

Permalink
feat(bb): integrate tracy memory/cpu profiler (#7718)
Browse files Browse the repository at this point in the history
This is an in-depth profiler that has a bit of a learning curve but has
really good thorough results

![image](https://github.com/user-attachments/assets/156c70ba-c7c5-4eab-94c2-f375b867381c)
  • Loading branch information
ludamad authored Aug 1, 2024
1 parent 3270662 commit 67efb8b
Show file tree
Hide file tree
Showing 14 changed files with 164 additions and 13 deletions.
10 changes: 10 additions & 0 deletions barretenberg/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -361,3 +361,13 @@ command script import ~/aztec-packages/barretenberg/cpp/scripts/lldb_format.py
```

Now when you `print` things with e.g. `print bigfield_t.get_value()` or inspect in VSCode (if you opened the debug console and put in these commands) then you will get pretty-printing of these types. This can be expanded fairly easily with more types if needed.


### Using Tracy to Profile Memory/CPU

See Tracy manual linked here https://github.com/wolfpld/tracy for in-depth Tracy documentation.

The basic use of Tracy is to run a benchmark with the `cmake --preset tracy` build type, create a capture file, then
transfer it to a local machine for interactive UI introspection.

All the steps to do this effectively are included in cpp/scripts/benchmark_tracy.sh
9 changes: 9 additions & 0 deletions barretenberg/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ option(ENABLE_ASAN "Address sanitizer for debugging tricky memory corruption" OF
option(ENABLE_HEAVY_TESTS "Enable heavy tests when collecting coverage" OFF)
# Note: Must do 'sudo apt-get install libdw-dev' or equivalent
option(CHECK_CIRCUIT_STACKTRACES "Enable (slow) stack traces for check circuit" OFF)
option(ENABLE_TRACY "Enable low-medium overhead profiling for memory and performance with tracy" OFF)

if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" OR CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
message(STATUS "Compiling for ARM.")
Expand All @@ -52,6 +53,13 @@ if(CHECK_CIRCUIT_STACKTRACES)
add_compile_options(-DCHECK_CIRCUIT_STACKTRACES)
endif()

if(ENABLE_TRACY)
add_compile_options(-DTRACY_ENABLE)
SET(TRACY_LIBS Tracy::TracyClient)
else()
SET(TRACY_LIBS)
endif()

if(ENABLE_ASAN)
add_compile_options(-fsanitize=address)
add_link_options(-fsanitize=address)
Expand Down Expand Up @@ -139,6 +147,7 @@ include(cmake/build.cmake)
include(GNUInstallDirs)
include(cmake/arch.cmake)
include(cmake/threading.cmake)
include(cmake/tracy.cmake)
include(cmake/gtest.cmake)
include(cmake/benchmark.cmake)
include(cmake/module.cmake)
Expand Down
25 changes: 25 additions & 0 deletions barretenberg/cpp/CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,26 @@
"DISABLE_ASM": "ON"
}
},
{
"name": "tracy",
"displayName": "Release build with tracy",
"description": "Release build with tracy",
"inherits": "clang16",
"binaryDir": "build-tracy",
"cacheVariables": {
"ENABLE_TRACY": "ON"
}
},
{
"name": "wasm-tracy",
"displayName": "WASM build with tracy",
"description": "WASM build with tracy",
"inherits": "clang16",
"binaryDir": "build-wasm-tracy",
"cacheVariables": {
"ENABLE_TRACY": "ON"
}
},
{
"name": "clang16-dbg-fast",
"displayName": "Optimized debug build with Clang-16",
Expand Down Expand Up @@ -410,6 +430,11 @@
"inherits": "default",
"configurePreset": "clang16-dbg"
},
{
"name": "tracy",
"inherits": "default",
"configurePreset": "tracy"
},
{
"name": "clang16-dbg-fast",
"inherits": "default",
Expand Down
5 changes: 5 additions & 0 deletions barretenberg/cpp/cmake/module.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ function(barretenberg_module MODULE_NAME)
${MODULE_NAME}
PUBLIC
${ARGN}
${TRACY_LIBS}
${TBB_IMPORTED_TARGETS}
)

Expand Down Expand Up @@ -89,6 +90,7 @@ function(barretenberg_module MODULE_NAME)
target_link_libraries(
${MODULE_NAME}_test_objects
PRIVATE
${TRACY_LIBS}
GTest::gtest
GTest::gtest_main
GTest::gmock_main
Expand Down Expand Up @@ -147,6 +149,7 @@ function(barretenberg_module MODULE_NAME)
GTest::gtest
GTest::gtest_main
GTest::gmock_main
${TRACY_LIBS}
${TBB_IMPORTED_TARGETS}
)

Expand Down Expand Up @@ -238,6 +241,7 @@ function(barretenberg_module MODULE_NAME)
${BENCHMARK_NAME}_bench_objects
PRIVATE
benchmark::benchmark
${TRACY_LIBS}
${TBB_IMPORTED_TARGETS}
)

Expand All @@ -253,6 +257,7 @@ function(barretenberg_module MODULE_NAME)
${MODULE_LINK_NAME}
${ARGN}
benchmark::benchmark
${TRACY_LIBS}
${TBB_IMPORTED_TARGETS}
)
if(CHECK_CIRCUIT_STACKTRACES)
Expand Down
2 changes: 1 addition & 1 deletion barretenberg/cpp/cmake/msgpack.cmake
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
include(ExternalProject)

# External project: Download msgpack-c from GitHu
# External project: Download msgpack-c from GitHub
set(MSGPACK_PREFIX "${CMAKE_BINARY_DIR}/_deps/msgpack-c")
set(MSGPACK_INCLUDE "${MSGPACK_PREFIX}/src/msgpack-c/include")

Expand Down
16 changes: 16 additions & 0 deletions barretenberg/cpp/cmake/tracy.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
include(FetchContent)

# Find the path where we will download the Tracy github repository
# we need this to find where the Tracy header files are for inclusion.
set(TRACY_INCLUDE "${CMAKE_BINARY_DIR}/_deps/tracy-src/public")

# Work around an issue finding threads.
set(CMAKE_THREAD_LIBS_INIT "-lpthread")

# Download the Tracy github project and do an add_subdirectory on it.
FetchContent_Declare(tracy
GIT_REPOSITORY https://github.com/wolfpld/tracy
GIT_TAG ffb98a972401c246b2348fb5341252e2ba855d00
SYSTEM # optional, the tracy include directory will be treated as system directory
)
FetchContent_MakeAvailable(tracy)
32 changes: 32 additions & 0 deletions barretenberg/cpp/scripts/benchmark_tracy.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@

# NOTE: intended to be ran from one's external computer, connecting to Aztec mainframe
# the benchmark runs with headless capture and then we copy the trace file and run tracy profiler
# This is thus only really useful internally at Aztec, sorry external folks. It can be easily tweaked
# however for any SSH setup, especially an ubuntu one, and of course if you are just tracing on the
# same machine you can use the normal interactive tracy workflow.
set -eux
USER=$1
BOX=$USER-box
BENCHMARK=${2:-ultra_plonk_bench}
COMMAND=${3:-./bin/$BENCHMARK}

ssh $BOX "
set -eux ;
! [ -d ~/tracy ] && git clone https://github.com/wolfpld/tracy ~/tracy ;
cd ~/tracy/capture ;
sudo apt-get install libdbus-1-dev libdbus-glib-1-dev ;
mkdir -p build && cd build && cmake .. && make -j ;
./tracy-capture -a 127.0.0.1 -f -o trace-$BENCHMARK & ;
sleep 0.1 ;
cd ~/aztec-packages/barretenberg/cpp/ ;
cmake --preset tracy && cmake --build --preset tracy --parallel $BENCHMARK ;
cd build-tracy ;
ninja $BENCHMARK ;
$COMMAND ;
"
! [ -d ~/tracy ] && git clone https://github.com/wolfpld/tracy ~/tracy
cd ~/tracy
cmake -B profiler/build -S profiler -DCMAKE_BUILD_TYPE=Release
cmake --build profiler/build --parallel
scp $BOX:/mnt/user-data/$USER/tracy/capture/build/trace-$BENCHMARK .
~/tracy/profiler/build/tracy-profiler trace-$BENCHMARK
2 changes: 1 addition & 1 deletion barretenberg/cpp/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ if(WASM)
add_link_options(-Wl,--export-memory,--import-memory,--stack-first,-z,stack-size=1048576,--max-memory=4294967296)
endif()

include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${MSGPACK_INCLUDE})
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${MSGPACK_INCLUDE} ${TRACY_INCLUDE})

# I feel this should be limited to ecc, however it's currently used in headers that go across libraries,
# and there currently isn't an easy way to inherit the DDISABLE_ASM parameter.
Expand Down
1 change: 1 addition & 0 deletions barretenberg/cpp/src/barretenberg/bb/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ if (NOT(FUZZING) AND NOT(WASM))
barretenberg
env
circuit_checker
${TRACY_LIBS}
libdeflate::libdeflate_static
)
if(CHECK_CIRCUIT_STACKTRACES)
Expand Down
4 changes: 3 additions & 1 deletion barretenberg/cpp/src/barretenberg/client_ivc/client_ivc.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "barretenberg/client_ivc/client_ivc.hpp"
#include "tracy/Tracy.hpp"

namespace bb {

Expand Down Expand Up @@ -56,6 +57,7 @@ void ClientIVC::accumulate(ClientCircuit& circuit, const std::shared_ptr<Verific
*/
ClientIVC::Proof ClientIVC::prove()
{
ZoneScoped;
max_block_size_tracker.print(); // print minimum structured sizes for each block
return { fold_output.proof, decider_prove(), goblin.prove() };
};
Expand Down Expand Up @@ -85,7 +87,7 @@ bool ClientIVC::verify(const Proof& proof,
* @param proof
* @return bool
*/
bool ClientIVC::verify(Proof& proof, const std::vector<std::shared_ptr<VerifierInstance>>& verifier_instances)
bool ClientIVC::verify(Proof& proof, const std::vector<std::shared_ptr<VerifierInstance>>& verifier_instances) const
{
auto eccvm_vk = std::make_shared<ECCVMVerificationKey>(goblin.get_eccvm_proving_key());
auto translator_vk = std::make_shared<TranslatorVerificationKey>(goblin.get_translator_proving_key());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ class ClientIVC {
const std::shared_ptr<ClientIVC::ECCVMVerificationKey>& eccvm_vk,
const std::shared_ptr<ClientIVC::TranslatorVerificationKey>& translator_vk);

bool verify(Proof& proof, const std::vector<std::shared_ptr<VerifierInstance>>& verifier_instances);
bool verify(Proof& proof, const std::vector<std::shared_ptr<VerifierInstance>>& verifier_instances) const;

bool prove_and_verify();

Expand Down
27 changes: 27 additions & 0 deletions barretenberg/cpp/src/barretenberg/common/mem.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#include "barretenberg/common/mem.hpp"

void* operator new(std::size_t count)
{
// NOLINTBEGIN(cppcoreguidelines-no-malloc)
void* ptr = malloc(count);
// NOLINTEND(cppcoreguidelines-no-malloc)
TRACY_ALLOC(ptr, count);
return ptr;
}

void operator delete(void* ptr) noexcept
{
TRACY_FREE(ptr);
// NOLINTBEGIN(cppcoreguidelines-no-malloc)
free(ptr);
// NOLINTEND(cppcoreguidelines-no-malloc)
}

void operator delete(void* ptr, std::size_t size) noexcept
{
static_cast<void>(size); // unused
TRACY_FREE(ptr);
// NOLINTBEGIN(cppcoreguidelines-no-malloc)
free(ptr);
// NOLINTEND(cppcoreguidelines-no-malloc)
}
35 changes: 32 additions & 3 deletions barretenberg/cpp/src/barretenberg/common/mem.hpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
#pragma once
#include "log.hpp"
#include "memory.h"
#include "tracy/Tracy.hpp"
#include "wasm_export.hpp"
#include <cstdlib>
#include <memory>
// #include <malloc.h>

// This can be altered to capture stack traces, though more expensive
// This is the only reason we wrap TracyAlloc or TracyAllocS
#define TRACY_ALLOC(t, size) TracyAllocS(t, size, /*stack depth*/ 10)
#define TRACY_FREE(t) TracyFreeS(t, /*stack depth*/ 10)
// #define TRACY_ALLOC(t, size) TracyAlloc(t, size)
// #define TRACY_FREE(t) TracyFree(t)

#define pad(size, alignment) (size - (size % alignment) + ((size % alignment) == 0 ? 0 : alignment))

Expand All @@ -17,11 +24,13 @@ inline void* aligned_alloc(size_t alignment, size_t size)
info("bad alloc of size: ", size);
std::abort();
}
TRACY_ALLOC(t, size);
return t;
}

inline void aligned_free(void* mem)
{
TRACY_FREE(mem);
free(mem);
}
#endif
Expand All @@ -41,13 +50,15 @@ inline void* protected_aligned_alloc(size_t alignment, size_t size)
info("bad alloc of size: ", size);
std::abort();
}
TRACY_ALLOC(t, size);
return t;
}

#define aligned_alloc protected_aligned_alloc

inline void aligned_free(void* mem)
{
TRACY_FREE(mem);
// NOLINTNEXTLINE(cppcoreguidelines-owning-memory, cppcoreguidelines-no-malloc)
free(mem);
}
Expand All @@ -56,11 +67,14 @@ inline void aligned_free(void* mem)
#ifdef _WIN32
inline void* aligned_alloc(size_t alignment, size_t size)
{
return _aligned_malloc(size, alignment);
void* t = _aligned_malloc(size, alignment);
TRACY_ALLOC(t, size);
return t;
}

inline void aligned_free(void* mem)
{
TRACY_FREE(mem);
_aligned_free(mem);
}
#endif
Expand All @@ -79,4 +93,19 @@ inline void aligned_free(void* mem)
// info("Total allocated space (uordblks): ", minfo.uordblks);
// info("Total free space (fordblks): ", minfo.fordblks);
// info("Top-most, releasable space (keepcost): ", minfo.keepcost);
// }
// }

inline void* tracy_malloc(size_t size)
{
// NOLINTNEXTLINE(cppcoreguidelines-owning-memory, cppcoreguidelines-no-malloc)
void* t = malloc(size);
TRACY_ALLOC(t, size);
return t;
}

inline void tracy_free(void* mem)
{
TRACY_FREE(mem);
// NOLINTNEXTLINE(cppcoreguidelines-owning-memory, cppcoreguidelines-no-malloc)
free(mem);
}
7 changes: 1 addition & 6 deletions barretenberg/cpp/src/barretenberg/common/slab_allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ std::shared_ptr<void> SlabAllocator::get(size_t req_size)
return { aligned_alloc(32, req_size), aligned_free };
}
// NOLINTNEXTLINE(cppcoreguidelines-no-malloc)
return { malloc(req_size), free };
return { tracy_malloc(req_size), tracy_free };
}

size_t SlabAllocator::get_total_size()
Expand Down Expand Up @@ -209,11 +209,6 @@ void init_slab_allocator(size_t circuit_subgroup_size)
allocator.init(circuit_subgroup_size);
}

// auto init = ([]() {
// init_slab_allocator(524288);
// return 0;
// })();

std::shared_ptr<void> get_mem_slab(size_t size)
{
return allocator.get(size);
Expand Down

0 comments on commit 67efb8b

Please sign in to comment.