-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathPerformanceProfiler.hpp
215 lines (183 loc) · 6.12 KB
/
PerformanceProfiler.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
#pragma once
#include <atomic>
#include <chrono>
#include <fstream>
#include <functional>
#include <memory>
#include <mutex>
#include <shared_mutex>
#include <string>
#include <thread>
#include <unordered_map>
#include <vector>
#include <array>
#include <limits>
/**
* @brief Lock-free ring buffer for profiling events
*
* This class provides a fixed-size, lock-free ring buffer specifically
* designed for storing profiling events without blocking the producer thread.
*/
class ProfilerRingBuffer {
public:
// Buffer size must be power of 2 for efficient masking
static constexpr size_t BUFFER_SIZE = 4096;
static constexpr size_t BUFFER_MASK = BUFFER_SIZE - 1;
struct ProfileEvent {
std::chrono::high_resolution_clock::time_point timestamp;
const char* section;
bool isEntry;
uint64_t threadId;
uint64_t additionalData;
};
ProfilerRingBuffer();
/**
* @brief Try to write an event to the ring buffer
*
* @param section Name of the code section
* @param isEntry True if entering section, false if exiting
* @param additionalData Any additional data to store with event
* @return true if write was successful, false if buffer was full
*/
bool tryWrite(const char* section, bool isEntry, uint64_t additionalData = 0);
/**
* @brief Try to read an event from the ring buffer
*
* @param processor Function to process the event
* @return true if an event was read and processed, false if buffer was empty
*/
bool tryRead(std::function<void(const ProfileEvent&)> processor);
private:
// Cache-aligned buffer to prevent false sharing
alignas(64) std::array<ProfileEvent, BUFFER_SIZE> m_buffer;
// Write index (producer)
alignas(64) std::atomic<size_t> m_writeIndex;
// Read index (consumer)
alignas(64) std::atomic<size_t> m_readIndex;
};
/**
* @brief High-performance, non-blocking profiler for real-time applications
*
* This class provides a comprehensive profiling system designed specifically
* for real-time applications where latency is critical. It uses a background
* thread for file I/O and a lock-free ring buffer to avoid blocking the
* instrumented threads.
*/
class PerformanceProfiler {
public:
/**
* @brief Construct a new Performance Profiler
*
* @param filepath Path for the output CSV file
*/
explicit PerformanceProfiler(const std::string& filepath);
/**
* @brief Destroy the Performance Profiler
*
* Stops the profiler if it's still running and finishes writing data
*/
~PerformanceProfiler();
/**
* @brief Start the profiler
*
* @return true if successfully started, false on error
*/
bool start();
/**
* @brief Stop the profiler
*
* Waits for the writer thread to finish and writes statistics
*/
void stop();
/**
* @brief Record entering a code section
*
* @param section Name of the section being entered
*/
void enterSection(const char* section);
/**
* @brief Record exiting a code section
*
* @param section Name of the section being exited
* @param additionalData Any additional data to record (e.g., batch size)
*/
void exitSection(const char* section, uint64_t additionalData = 0);
/**
* @brief Get the singleton instance
*
* @return Reference to the global profiler instance
*/
static PerformanceProfiler& getInstance();
private:
// Thread-local storage for section entry times
struct ThreadStorage {
std::unordered_map<std::string, std::chrono::high_resolution_clock::time_point> entryTimes;
};
static thread_local ThreadStorage tls;
// Statistics for a single code section
struct SectionStat {
std::atomic<uint64_t> totalNs{ 0 };
std::atomic<uint64_t> callCount{ 0 };
std::atomic<uint64_t> maxNs{ 0 };
std::atomic<uint64_t> minNs{ std::numeric_limits<uint64_t>::max() };
};
std::string m_filepath;
std::ofstream m_file;
ProfilerRingBuffer m_ringBuffer;
std::atomic<bool> m_isRunning;
std::thread m_writerThread;
std::chrono::high_resolution_clock::time_point m_startTime;
// Thread-safe map for section statistics
std::unordered_map<std::string, SectionStat> m_stats;
std::shared_mutex m_statsMutex;
// Writer thread function
void writerLoop();
};
// Convenience macros for profiling
#define PROFILER_START() \
PerformanceProfiler::getInstance().start()
#define PROFILER_STOP() \
PerformanceProfiler::getInstance().stop()
#define PROFILE_SCOPE_START(name) \
PerformanceProfiler::getInstance().enterSection(name)
#define PROFILE_SCOPE_END(name) \
PerformanceProfiler::getInstance().exitSection(name)
/**
* @brief RAII-style automatic section profiler
*
* Automatically profiles a scope from construction to destruction
*/
class ScopedProfiler {
public:
/**
* @brief Construct a new Scoped Profiler
*
* @param section Section name to profile
* @param additionalData Optional additional data to record on exit
*/
explicit ScopedProfiler(const char* section, uint64_t additionalData = 0);
/**
* @brief Destroy the Scoped Profiler
*
* Records the section exit time
*/
~ScopedProfiler();
private:
const char* m_section;
uint64_t m_additionalData;
};
// Convenient macros for RAII profiling
#define PROFILE_FUNCTION() \
ScopedProfiler _profiler_##__LINE__(__FUNCTION__)
#define PROFILE_SCOPE(name) \
ScopedProfiler _profiler_##__LINE__(name)
#define PROFILE_SCOPE_WITH_DATA(name, data) \
ScopedProfiler _profiler_##__LINE__(name, data)
// For release builds, you can define this to disable profiling:
// #ifdef NDEBUG
// #define PROFILE_FUNCTION()
// #define PROFILE_SCOPE(name)
// #define PROFILE_SCOPE_WITH_DATA(name, data)
// #define PROFILER_START()
// #define PROFILER_STOP()
// #endif