Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extend AllocMonitor #42814

Merged
merged 5 commits into from
Sep 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions PerfTools/AllocMonitor/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,25 @@ bin is a power of 2 larger than the previous. The histograms made are
- Amount of bytes actually returned by all deallocation calls

This service is multi-thread safe. Note that when run multi-threaded the maximum reported value will vary from job to job.

### PeriodicAllocMonitor
This service registers a monitor when the service is created (after python parsing is finished but before any modules
have been loaded into cmsRun) and prints its accumulated information to the specified file at specified intervals. Both
the file name and interval are specified by setting parameters of the service in the configuration. The parameters are
- filename: name of file to which to write reports
- millisecondsPerMeasurement: number of milliseconds to wait between making each report

The output file contains the following information on each line
- The time, in milliseconds, since the service was created
- The total number of Runs which have been started in the job
- The total number of LuminosityBlocks which have been started
- The total number of Events which have been started
- The total number of Events which have finished
- Total amount of bytes requested by all allocation calls since the service started
- The maximum amount of _used_ (i.e. actual size) allocated memory that has been seen up to this point in the job
- The amount of _used_ memory allocated at the time the report was made.
- The largest single allocation request that has been seen up to the time of the report
- Number of calls made to allocation functions
- Number of calls made to deallocation functions

This service is multi-thread safe. Note that when run multi-threaded the maximum reported value will vary from job to job.
10 changes: 6 additions & 4 deletions PerfTools/AllocMonitor/interface/AllocMonitorRegistry.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ namespace cms::perftools {
friend void* ::calloc(size_t, size_t) noexcept;
friend void* ::realloc(void*, size_t) noexcept;
friend void* ::aligned_alloc(size_t, size_t) noexcept;
friend int ::posix_memalign(void**, size_t, size_t) noexcept;
friend void* ::memalign(size_t, size_t) noexcept;
friend void ::free(void*) noexcept;

friend void* ::operator new(std::size_t size);
Expand Down Expand Up @@ -118,12 +120,12 @@ namespace cms::perftools {
return a;
}
template <typename DEALLOC, typename ACT>
void deallocCalled(DEALLOC iDealloc, ACT iGetActual) {
void deallocCalled(void* iPtr, DEALLOC iDealloc, ACT iGetActual) {
[[maybe_unused]] Guard g = makeGuard();
if (g.running()) {
deallocCalled_(iGetActual());
if (g.running() and iPtr != nullptr) {
deallocCalled_(iGetActual(iPtr));
}
iDealloc();
iDealloc(iPtr);
}

AllocMonitorRegistry();
Expand Down
158 changes: 158 additions & 0 deletions PerfTools/AllocMonitor/plugins/PeriodicAllocMonitor.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
// -*- C++ -*-
//
// Package: PerfTools/AllocMonitor
// Class : PeriodicAllocMonitor
//
// Implementation:
// [Notes on implementation]
//
// Original Author: Christopher Jones
// Created: Fri, 15 Sep 2023 14:44:38 GMT
//

// system include files
#include <thread>
#include <chrono>
#include <fstream>

// user include files
#include "PerfTools/AllocMonitor/interface/AllocMonitorBase.h"
#include "PerfTools/AllocMonitor/interface/AllocMonitorRegistry.h"
#include "FWCore/ServiceRegistry/interface/ServiceRegistry.h"
#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
#include "FWCore/ServiceRegistry/interface/ServiceMaker.h"

namespace {
class MonitorAdaptor : public cms::perftools::AllocMonitorBase {
public:
struct Report {
size_t requested_;
size_t presentActual_;
size_t maxActual_;
size_t nAllocations_;
size_t nDeallocations_;
size_t maxSingleRequested_;
};
Report report() const {
Report report;
report.requested_ = requested_.load(std::memory_order_acquire);
report.maxActual_ = maxActual_.load(std::memory_order_acquire);
report.presentActual_ = presentActual_.load(std::memory_order_acquire);
report.nAllocations_ = nAllocations_.load(std::memory_order_acquire);
report.nDeallocations_ = nDeallocations_.load(std::memory_order_acquire);
report.maxSingleRequested_ = maxSingleRequested_.load(std::memory_order_acquire);

return report;
}

private:
void allocCalled(size_t iRequested, size_t iActual) final {
nAllocations_.fetch_add(1, std::memory_order_acq_rel);
requested_.fetch_add(iRequested, std::memory_order_acq_rel);

//returns previous value
auto a = presentActual_.fetch_add(iActual, std::memory_order_acq_rel);
a += iActual;

auto max = maxActual_.load(std::memory_order_relaxed);
while (a > max) {
if (maxActual_.compare_exchange_strong(max, a, std::memory_order_acq_rel)) {
break;
}
}

auto single = maxSingleRequested_.load(std::memory_order_relaxed);
while (iRequested > single) {
if (maxSingleRequested_.compare_exchange_strong(single, iRequested, std::memory_order_acq_rel)) {
break;
}
}
}
void deallocCalled(size_t iActual) final {
if (0 == iActual)
return;
nDeallocations_.fetch_add(1, std::memory_order_acq_rel);
auto present = presentActual_.load(std::memory_order_acquire);
if (present >= iActual) {
presentActual_.fetch_sub(iActual, std::memory_order_acq_rel);
}
}

std::atomic<size_t> requested_ = 0;
std::atomic<size_t> presentActual_ = 0;
std::atomic<size_t> maxActual_ = 0;
std::atomic<size_t> nAllocations_ = 0;
std::atomic<size_t> nDeallocations_ = 0;
std::atomic<size_t> maxSingleRequested_ = 0;
};

} // namespace

class PeriodicAllocMonitor {
public:
PeriodicAllocMonitor(edm::ParameterSet const& iPS, edm::ActivityRegistry& iAR) {
auto adaptor = cms::perftools::AllocMonitorRegistry::instance().createAndRegisterMonitor<MonitorAdaptor>();
auto fileName = iPS.getUntrackedParameter<std::string>("filename");
auto interval = iPS.getUntrackedParameter<unsigned long long>("millisecondsPerMeasurement");

threadShutDown_ = false;
thread_ = std::thread([this, fileName, interval, adaptor]() {
auto const start = std::chrono::steady_clock::now();
std::ofstream fs(fileName);
fs << "timestamp, runs-started, lumis-started, events-started, events-finished, total-requested, max-actual, "
"present-actual, max-single, nAllocs, nDeallocs\n";
while (continueRunning_.load()) {
auto rStarted = nRunsStarted_.load(std::memory_order_acquire);
auto lStarted = nLumisStarted_.load(std::memory_order_acquire);
auto const now = std::chrono::steady_clock::now();
auto eStarted = nEventsStarted_.load(std::memory_order_acquire);
auto eFinished = nEventsFinished_.load(std::memory_order_acquire);
auto report = adaptor->report();

fs << std::chrono::duration_cast<std::chrono::milliseconds>(now - start).count() << ", " << rStarted << ", "
<< lStarted << ", " << eStarted << ", " << eFinished << ", " << report.requested_ << ", "
<< report.maxActual_ << ", " << report.presentActual_ << ", " << report.maxSingleRequested_ << ", "
<< report.nAllocations_ << ", " << report.nDeallocations_ << std::endl;
std::this_thread::sleep_for(std::chrono::milliseconds(interval));
}
});

iAR.watchPreEvent([this](auto const&) { nEventsStarted_.fetch_add(1, std::memory_order_acq_rel); });
iAR.watchPostEvent([this](auto const&) { nEventsFinished_.fetch_add(1, std::memory_order_acq_rel); });
iAR.watchPreGlobalBeginRun([this](auto const&) { nRunsStarted_.fetch_add(1, std::memory_order_acq_rel); });
iAR.watchPreGlobalBeginLumi([this](auto const&) { nLumisStarted_.fetch_add(1, std::memory_order_acq_rel); });
iAR.watchPreEndJob([adaptor, this]() {
continueRunning_ = false;
thread_.join();
threadShutDown_ = true;
cms::perftools::AllocMonitorRegistry::instance().deregisterMonitor(adaptor);
});
}
~PeriodicAllocMonitor() {
if (not threadShutDown_) {
continueRunning_ = false;
thread_.join();
}
}

static void fillDescriptions(edm::ConfigurationDescriptions& iDesc) {
edm::ParameterSetDescription ps;
ps.addUntracked<std::string>("filename", "timing.log")->setComment("Name of file to write the reports");
ps.addUntracked<unsigned long long>("millisecondsPerMeasurement", 1000)
->setComment("The frequency at which to write reports");
iDesc.addDefault(ps);
}

private:
std::thread thread_;
std::atomic<std::size_t> nRunsStarted_ = 0;
std::atomic<std::size_t> nLumisStarted_ = 0;
std::atomic<std::size_t> nEventsStarted_ = 0;
std::atomic<std::size_t> nEventsFinished_ = 0;
std::atomic<bool> continueRunning_ = true;
bool threadShutDown_ = true;
};

DEFINE_FWK_SERVICE(PeriodicAllocMonitor);
33 changes: 30 additions & 3 deletions PerfTools/AllocMonitor/test/test_catch2_AllocMonitorRegistry.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ namespace cms::perftools {
}

void callDealloc(size_t iActual) {
reg_.deallocCalled([]() {}, [iActual]() { return iActual; });
reg_.deallocCalled(
reinterpret_cast<void*>(1), [](auto) {}, [iActual](auto) { return iActual; });
}

template <typename A>
Expand All @@ -22,7 +23,12 @@ namespace cms::perftools {

template <typename D>
void callDealloc(size_t iActual, D&& iDealloc) {
reg_.deallocCalled(std::forward<D>(iDealloc), [iActual]() { return iActual; });
reg_.deallocCalled(reinterpret_cast<void*>(1), std::forward<D>(iDealloc), [iActual](auto) { return iActual; });
}

void callDeallocNull() {
reg_.deallocCalled(
nullptr, [](auto) {}, [](auto) { return 0; });
}

AllocMonitorRegistry reg_;
Expand Down Expand Up @@ -111,6 +117,27 @@ TEST_CASE("Test API for AllocMonitorRegistry", "[AllocMonitorRegistry]") {
s_stopped = false;
s_calls = 0;
}
SECTION("Null delete") {
{
AllocTester t;
CHECK(s_started == false);
CHECK(s_stopped == false);

auto tester = t.reg_.createAndRegisterMonitor<TestCallMonitor>(1);
CHECK(s_started == true);
CHECK(s_stopped == false);
CHECK(1 == s_calls);
CHECK(tester != nullptr);

t.callDeallocNull();
CHECK(1 == s_calls);
t.reg_.deregisterMonitor(tester);
CHECK(2 == s_calls);
}
s_started = false;
s_stopped = false;
s_calls = 0;
}
SECTION("Recursion in monitor") {
CHECK(0 == s_calls);
CHECK(s_started == false);
Expand Down Expand Up @@ -162,7 +189,7 @@ TEST_CASE("Test API for AllocMonitorRegistry", "[AllocMonitorRegistry]") {
});
CHECK(2 == s_calls);

t.callDealloc(1, [&t]() { t.callDealloc(1); });
t.callDealloc(1, [&t](auto) { t.callDealloc(1); });
CHECK(3 == s_calls);

t.reg_.deregisterMonitor(tester);
Expand Down
39 changes: 36 additions & 3 deletions PerfTools/AllocMonitor/test/test_proxies.cc
Original file line number Diff line number Diff line change
Expand Up @@ -182,17 +182,50 @@ int main() {
exit(1);
}

p = aligned_alloc(128, 32);
if (requested != 32) {
p = aligned_alloc(128, 128 * 3);
if (requested != 128 * 3) {
auto r = requested;
std::cout << "aligned_alloc request size wrong, got " << r << " expected " << 32;
std::cout << "aligned_alloc request size wrong, got " << r << " expected " << 128 * 3;
exit(1);
}
free(p);
if (total != 0) {
std::cout << "aligned_alloc request not cleaned up";
exit(1);
}

p = memalign(256, 24);
if (requested != 24) {
auto r = requested;
std::cout << "memalign request size wrong, got " << r << " expected " << 24;
exit(1);
}
free(p);
if (total != 0) {
std::cout << "memalign request not cleaned up";
exit(1);
}

p = nullptr;
auto ret = posix_memalign(&p, 128, 64);
if (p == nullptr) {
std::cout << "posix_memalign failed to allocate ";
exit(1);
}
if (ret != 0) {
std::cout << "posix_memalign returned failed valued " << ret;
exit(1);
}
if (requested != 64) {
auto r = requested;
std::cout << "posix_memalign request size wrong, got " << r << " expected " << 64;
exit(1);
}
free(p);
if (total != 0) {
std::cout << "posix_memalign request not cleaned up";
exit(1);
}
}
AllocMonitorRegistry::instance().deregisterMonitor(monitor);
}
Expand Down
Loading