Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(ci): tracy gate counter preset #8382

Merged
merged 11 commits into from
Sep 5, 2024
Merged
20 changes: 18 additions & 2 deletions barretenberg/cpp/CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,25 @@
},
{
"name": "tracy",
"displayName": "Release build with tracy",
"description": "Release build with tracy",
"displayName": "Release build with tracy, optimized for memory tracking",
"description": "Release build with tracy, optimized for memory tracking",
"inherits": "clang16",
"binaryDir": "build-tracy",
"cacheVariables": {
"ENABLE_TRACY": "ON"
}
},
{
"name": "tracy-gates",
"displayName": "Release build with tracy - but hacked for gate tracking",
"description": "Release build with tracy - but hacker for gate tracking",
"inherits": "clang16",
"binaryDir": "build-tracy-gates",
"cacheVariables": {
"ENABLE_TRACY": "ON",
"CMAKE_CXX_FLAGS": "-DTRACY_HACK_GATES_AS_MEMORY"
}
},
{
"name": "wasm-tracy",
"displayName": "WASM build with tracy",
Expand Down Expand Up @@ -435,6 +446,11 @@
"inherits": "default",
"configurePreset": "tracy"
},
{
"name": "tracy-gates",
"inherits": "default",
"configurePreset": "tracy-gates"
},
{
"name": "clang16-dbg-fast",
"inherits": "default",
Expand Down
31 changes: 20 additions & 11 deletions barretenberg/cpp/scripts/benchmark_tracy.sh
Original file line number Diff line number Diff line change
@@ -1,31 +1,40 @@

# NOTE: intended to be ran from one's external computer, connecting to Aztec mainframe
# IF ON YOUR LOCAL COMPUTER USE NORMAL INTERACTIVE TRACY WORKFLOW
# the benchmark runs with headless capture and then we copy the trace file and run tracy profiler
# This is thus only really useful internally at Aztec, sorry external folks. It can be easily tweaked
# however for any SSH setup, especially an ubuntu one, and of course if you are just tracing on the
# same machine you can use the normal interactive tracy workflow.
# however for any SSH setup, especially an ubuntu one.
# on local machine run:
# export USER=...
# export PRESET=...tracy for memory or tracy-gates for circuit gates...
# ssh $USER-box "cat ~/aztec-packages/barretenberg/cpp/scripts/benchmark_tracy.sh" | bash /dev/stdin $USER
set -eux
USER=$1
USER=${1:-$USER}
BOX=$USER-box
BENCHMARK=${2:-ultra_plonk_bench}
COMMAND=${3:-./bin/$BENCHMARK}
BENCHMARK=${2:-client_ivc_bench}
COMMAND=${3:-./bin/$BENCHMARK --benchmark_filter=ClientIVCBench/Full/6"\$"}

# Can also set PRESET=tracy-gates env variable
PRESET=${PRESET:-tracy}

ssh $BOX "
set -eux ;
cd ~/aztec-packages/barretenberg/cpp/ ;
cmake --preset $PRESET && cmake --build --preset $PRESET --target $BENCHMARK ;
! [ -d ~/tracy ] && git clone https://github.com/wolfpld/tracy ~/tracy ;
cd ~/tracy/capture ;
sudo apt-get install libdbus-1-dev libdbus-glib-1-dev ;
git checkout 075395620a504c0cdcaf9bab3d196db16a043de7 ;
sudo apt-get install -y libdbus-1-dev libdbus-glib-1-dev ;
mkdir -p build && cd build && cmake .. && make -j ;
./tracy-capture -a 127.0.0.1 -f -o trace-$BENCHMARK & ;
sleep 0.1 ;
cd ~/aztec-packages/barretenberg/cpp/ ;
cmake --preset tracy && cmake --build --preset tracy --parallel $BENCHMARK ;
cd build-tracy ;
ninja $BENCHMARK ;
cd ~/aztec-packages/barretenberg/cpp/build-$PRESET ;
$COMMAND ;
"
" &
wait # TODO(AD) hack - not sure why needed
! [ -d ~/tracy ] && git clone https://github.com/wolfpld/tracy ~/tracy
cd ~/tracy
git checkout 075395620a504c0cdcaf9bab3d196db16a043de7 # release 0.11.0
cmake -B profiler/build -S profiler -DCMAKE_BUILD_TYPE=Release
cmake --build profiler/build --parallel
scp $BOX:/mnt/user-data/$USER/tracy/capture/build/trace-$BENCHMARK .
Expand Down
3 changes: 3 additions & 0 deletions barretenberg/cpp/src/barretenberg/common/mem.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "barretenberg/common/mem.hpp"

#ifdef TRACY_ENABLE
void* operator new(std::size_t count)
{
// NOLINTBEGIN(cppcoreguidelines-no-malloc)
Expand All @@ -25,3 +26,5 @@ void operator delete(void* ptr, std::size_t size) noexcept
free(ptr);
// NOLINTEND(cppcoreguidelines-no-malloc)
}

#endif
26 changes: 24 additions & 2 deletions barretenberg/cpp/src/barretenberg/common/mem.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,31 @@
#include <memory>

// This can be altered to capture stack traces, though more expensive
// This is the only reason we wrap TracyAlloc or TracyAllocS
// so wrap TracyAlloc or TracyAllocS. We disable these if gates are being tracked
// Gates are hackishly tracked as if they were memory, for the sweet sweet memory
// stack tree that doesn't seem to be available for other metric types.
#ifndef TRACY_HACK_GATES_AS_MEMORY
#define TRACY_ALLOC(t, size) TracyAllocS(t, size, /*stack depth*/ 10)
#define TRACY_FREE(t) TracyFreeS(t, /*stack depth*/ 10)
#define TRACY_GATE_ALLOC(t)
#define TRACY_GATE_FREE(t)
#else
#include <mutex>
#include <set>
#define TRACY_ALLOC(t, size)
#define TRACY_FREE(t)

namespace bb {
// These are hacks to make sure tracy plays along
// If we free an ID not allocated, or allocate an index twice without a free it will complain
// so we hack thread-safety and an incrementing global ID.
static std::mutex GLOBAL_GATE_MUTEX;
static size_t GLOBAL_GATE = 0;
static std::set<size_t> FREED_GATES; // hack to prevent instrumentation failures
} // namespace bb
#define TRACY_GATE_ALLOC(index) TracyAllocS(reinterpret_cast<void*>(index), 1, /*stack depth*/ 50)
#define TRACY_GATE_FREE(index) TracyFreeS(reinterpret_cast<void*>(index), /*stack depth*/ 50)
#endif
// #define TRACY_ALLOC(t, size) TracyAlloc(t, size)
// #define TRACY_FREE(t) TracyFree(t)

Expand Down Expand Up @@ -108,4 +130,4 @@ inline void tracy_free(void* mem)
TRACY_FREE(mem);
// NOLINTNEXTLINE(cppcoreguidelines-owning-memory, cppcoreguidelines-no-malloc)
free(mem);
}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#pragma once
#include "barretenberg/common/mem.hpp"
#include "barretenberg/common/ref_array.hpp"
#include "barretenberg/common/slab_allocator.hpp"
#include <cstddef>
Expand Down Expand Up @@ -49,6 +50,18 @@ template <typename FF, size_t NUM_WIRES, size_t NUM_SELECTORS> class ExecutionTr
// If enabled, we keep slow stack traces to be able to correlate gates with code locations where they were added
StackTraces stack_traces;
#endif
#ifdef TRACY_HACK_GATES_AS_MEMORY
std::vector<size_t> allocated_gates;
#endif
void tracy_gate()
{
#ifdef TRACY_HACK_GATES_AS_MEMORY
std::unique_lock<std::mutex> lock(GLOBAL_GATE_MUTEX);
GLOBAL_GATE++;
TRACY_GATE_ALLOC(GLOBAL_GATE);
allocated_gates.push_back(GLOBAL_GATE);
#endif
}

Wires wires; // vectors of indices into a witness variables array
Selectors selectors;
Expand Down Expand Up @@ -76,6 +89,18 @@ template <typename FF, size_t NUM_WIRES, size_t NUM_SELECTORS> class ExecutionTr

uint32_t get_fixed_size() const { return fixed_size; }
void set_fixed_size(uint32_t size_in) { fixed_size = size_in; }
#ifdef TRACY_HACK_GATES_AS_MEMORY
~ExecutionTraceBlock()
{
std::unique_lock<std::mutex> lock(GLOBAL_GATE_MUTEX);
for ([[maybe_unused]] size_t gate : allocated_gates) {
if (!FREED_GATES.contains(gate)) {
TRACY_GATE_FREE(gate);
FREED_GATES.insert(gate);
}
}
}
#endif
};

} // namespace bb
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ template <typename FF_> class MegaArith {
#ifdef CHECK_CIRCUIT_STACKTRACES
this->stack_traces.populate();
#endif
this->tracy_gate();
this->wires[0].emplace_back(idx_1);
this->wires[1].emplace_back(idx_2);
this->wires[2].emplace_back(idx_3);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ template <typename FF_> class StandardArith {
#ifdef CHECK_CIRCUIT_STACKTRACES
this->stack_traces.populate();
#endif
this->tracy_gate();
this->wires[0].emplace_back(idx_1);
this->wires[1].emplace_back(idx_2);
this->wires[2].emplace_back(idx_3);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ template <typename FF_> class UltraArith {
#ifdef CHECK_CIRCUIT_STACKTRACES
this->stack_traces.populate();
#endif
this->tracy_gate();
this->wires[0].emplace_back(idx_1);
this->wires[1].emplace_back(idx_2);
this->wires[2].emplace_back(idx_3);
Expand Down
Loading