Skip to content

Commit

Permalink
Fixing bugs in OMPT offload tracing, task dependencies, flow events. …
Browse files Browse the repository at this point in the history
…All seems working?
  • Loading branch information
khuck committed May 3, 2022
1 parent 5e2d561 commit 337816c
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 24 deletions.
23 changes: 15 additions & 8 deletions src/apex/apex_ompt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,16 @@ class linked_timer {
void *p,
std::shared_ptr<apex::task_wrapper> &parent,
bool auto_start,
const void * codeptr_ra) :
const void * codeptr_ra,
bool is_par_reg) :
prev(p), timing(auto_start), codeptr(codeptr_ra) {
// No GUIDs generated by the runtime? Generate our own.
if (task_id == 0ULL) {
tw = apex::new_task(name, UINT64_MAX, parent);
} else {
tw = apex::new_task(name, task_id, parent);
}
if (is_par_reg) { tw->explicit_trace_start = true; }
if (auto_start) { this->start(); }
}
/* destructor */
Expand Down Expand Up @@ -312,8 +314,12 @@ static void print_record_ompt(ompt_record_ompt_t *rec) {
target_map.erase(rec->target_id);
target_start_times.erase(rec->target_id);
}
apex::base_thread_node node(target_rec.device_num, APEX_ASYNC_KERNEL);
stop_async_task(tt, start, rec->time, rec->target_id, node);
/* If we have a target region with a device id of -1, we might not get
a target region start event - so ignore this end event for now. */
if (tt != nullptr) {
apex::base_thread_node node(target_rec.device_num, APEX_ASYNC_KERNEL);
stop_async_task(tt, start, rec->time, rec->target_id, node);
}
}
break;
}
Expand Down Expand Up @@ -499,7 +505,8 @@ void apex_ompt_start(const char * state,
ompt_data_t * ompt_data,
ompt_data_t * region_data,
bool auto_start,
const void * codeptr_ra = nullptr) {
const void * codeptr_ra = nullptr,
bool is_par_reg = false) {
if (!enabled) { return; }
static std::shared_ptr<apex::task_wrapper> nothing(nullptr);
linked_timer* tmp;
Expand All @@ -509,10 +516,10 @@ void apex_ompt_start(const char * state,
linked_timer* parent = (linked_timer*)(region_data->ptr);
if (parent != nullptr) {
tmp = new linked_timer(state, ompt_data->value, ompt_data->ptr,
parent->tw, auto_start, codeptr_ra);
parent->tw, auto_start, codeptr_ra, is_par_reg);
} else {
tmp = new linked_timer(state, ompt_data->value, ompt_data->ptr,
nothing, auto_start, codeptr_ra);
nothing, auto_start, codeptr_ra, is_par_reg);
}
#if 0
/* if the ompt_data->ptr pointer is not null, that means we have an implicit
Expand All @@ -525,7 +532,7 @@ void apex_ompt_start(const char * state,
#endif
} else {
tmp = new linked_timer(state, ompt_data->value, ompt_data->ptr,
nothing, auto_start, codeptr_ra);
nothing, auto_start, codeptr_ra, is_par_reg);
}

/* Save the address of the scoped timer with the parallel region
Expand Down Expand Up @@ -604,7 +611,7 @@ static void apex_parallel_region_begin (
APEX_UNUSED(flags);
char regionIDstr[128] = {0};
sprintf(regionIDstr, "OpenMP Parallel Region: UNRESOLVED ADDR %p", codeptr_ra);
apex_ompt_start(regionIDstr, parallel_data, encountering_task_data, true, codeptr_ra);
apex_ompt_start(regionIDstr, parallel_data, encountering_task_data, true, codeptr_ra, true);
DEBUG_PRINT("%" PRId64 ": Parallel Region Begin parent: %p, apex_parent: %p, region: %p, apex_region: %p, %s\n", apex_threadid, (void*)encountering_task_data, encountering_task_data->ptr, (void*)parallel_data, parallel_data->ptr, regionIDstr);
}

Expand Down
7 changes: 6 additions & 1 deletion src/apex/task_wrapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@ struct task_wrapper {
\brief Time (in microseconds) when this task was started, if started
*/
double start_time;
/**
\brief Whether this event requires separate start/end events in gtrace
*/
bool explicit_trace_start;
/**
\brief Constructor.
*/
Expand All @@ -89,7 +93,8 @@ struct task_wrapper {
tree_node(nullptr),
alias(nullptr),
thread_id(0UL),
start_time(0)
start_time(0),
explicit_trace_start(false)
{ }
/**
\brief Get the task_identifier for this task_wrapper.
Expand Down
59 changes: 46 additions & 13 deletions src/apex/trace_event_listener.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,19 +91,36 @@ void trace_event_listener::on_exit_thread(event_data &data) {
return;
}

inline void trace_event_listener::_common_start(std::shared_ptr<task_wrapper> &tt_ptr) {
static APEX_NATIVE_TLS long unsigned int tid = get_thread_id_metadata();
if (!_terminate) {
std::stringstream ss;
ss << fixed;
uint64_t pguid = 0;
if (tt_ptr->parent != nullptr) {
pguid = tt_ptr->parent->guid;
}
ss << "{\"name\":\"" << tt_ptr->get_task_id()->get_name()
<< "\",\"cat\":\"CPU\""
<< ",\"ph\":\"B\",\"pid\":"
<< saved_node_id << ",\"tid\":" << tid
<< ",\"ts\":" << tt_ptr->prof->get_start_us()
<< ",\"args\":{\"GUID\":" << tt_ptr->prof->guid << ",\"Parent GUID\":" << pguid << "}},\n";
write_to_trace(ss);
flush_trace_if_necessary();
}
return;
}

bool trace_event_listener::on_start(std::shared_ptr<task_wrapper> &tt_ptr) {
APEX_UNUSED(tt_ptr);
/*
* Do nothing - we can do a "complete" record at stop
*/
if (tt_ptr->explicit_trace_start) _common_start(tt_ptr);
return true;
}

bool trace_event_listener::on_resume(std::shared_ptr<task_wrapper> &tt_ptr) {
APEX_UNUSED(tt_ptr);
/*
* Do nothing - we can do a "complete" record at stop
*/
if (tt_ptr->explicit_trace_start) _common_start(tt_ptr);
return true;
}

Expand All @@ -125,6 +142,11 @@ long unsigned int trace_event_listener::get_thread_id_metadata() {
return tid;
}

uint64_t get_flow_id() {
static uint64_t flow_id = 0;
return ++flow_id;
}

void write_flow_event(std::stringstream& ss, double ts, char ph,
std::string cat, uint64_t id, uint64_t pid, uint64_t tid, std::string name) {
ss << "{\"ts\":" << ts
Expand All @@ -148,18 +170,28 @@ inline void trace_event_listener::_common_stop(std::shared_ptr<profiler> &p) {
// if the parent tid is not the same, create a flow event BEFORE the single event
if (p->tt_ptr->parent != nullptr && p->tt_ptr->parent->thread_id != tid) {
//std::cout << "FLOWING!" << std::endl;
write_flow_event(ss, p->tt_ptr->parent->start_time, 's', "ControlFlow", pguid,
uint64_t flow_id = get_flow_id();
write_flow_event(ss, p->tt_ptr->parent->start_time, 's', "ControlFlow", flow_id,
saved_node_id, p->tt_ptr->parent->thread_id, p->tt_ptr->parent->task_id->get_name());
write_flow_event(ss, p->get_start_us(), 'f', "ControlFlow", pguid,
write_flow_event(ss, p->get_start_us(), 'f', "ControlFlow", flow_id,
saved_node_id, tid, p->tt_ptr->parent->task_id->get_name());
}
ss << "{\"name\":\"" << p->get_task_id()->get_name()
if (p->tt_ptr->explicit_trace_start) {
ss << "{\"name\":\"" << p->get_task_id()->get_name()
<< "\",\"cat\":\"CPU\""
<< ",\"ph\":\"E\",\"pid\":"
<< saved_node_id << ",\"tid\":" << tid
<< ",\"ts\":" << p->get_stop_us()
<< "},\n";
} else {
ss << "{\"name\":\"" << p->get_task_id()->get_name()
<< "\",\"cat\":\"CPU\""
<< ",\"ph\":\"X\",\"pid\":"
<< saved_node_id << ",\"tid\":" << tid
<< ",\"ts\":" << p->get_start_us() << ",\"dur\":"
<< p->get_stop_us() - p->get_start_us()
<< ",\"args\":{\"GUID\":" << p->guid << ",\"Parent GUID\":" << pguid << "}},\n";
}
write_to_trace(ss);
flush_trace_if_necessary();
}
Expand Down Expand Up @@ -268,16 +300,17 @@ void trace_event_listener::on_async_event(base_thread_node &node,
// write a flow event pair!
// make sure the start of the flow is before the end of the flow, ideally the middle of the parent
if (data.flow) {
uint64_t flow_id = get_flow_id();
if (data.reverse_flow) {
double begin_ts = (p->get_stop_us() + p->get_start_us()) * 0.5;
double end_ts = std::min(p->get_stop_us(), data.parent_ts_stop);
write_flow_event(ss, begin_ts, 's', data.cat, data.id, saved_node_id, atol(tid.c_str()), data.name);
write_flow_event(ss, end_ts, 't', data.cat, data.id, saved_node_id, data.parent_tid, data.name);
write_flow_event(ss, begin_ts, 's', data.cat, flow_id, saved_node_id, atol(tid.c_str()), data.name);
write_flow_event(ss, end_ts, 't', data.cat, flow_id, saved_node_id, data.parent_tid, data.name);
} else {
double begin_ts = std::min(p->get_start_us(), ((data.parent_ts_stop + data.parent_ts_start) * 0.5));
double end_ts = p->get_start_us();
write_flow_event(ss, begin_ts, 's', data.cat, data.id, saved_node_id, data.parent_tid, data.name);
write_flow_event(ss, end_ts, 't', data.cat, data.id, saved_node_id, atol(tid.c_str()), data.name);
write_flow_event(ss, begin_ts, 's', data.cat, flow_id, saved_node_id, data.parent_tid, data.name);
write_flow_event(ss, end_ts, 't', data.cat, flow_id, saved_node_id, atol(tid.c_str()), data.name);
}
}
write_to_trace(ss);
Expand Down
1 change: 1 addition & 0 deletions src/apex/trace_event_listener.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ class trace_event_listener : public event_listener {
void flush_trace(void);
void close_trace(void);
void flush_trace_if_necessary(void);
void _common_start(std::shared_ptr<task_wrapper> &tt_ptr);
void _common_stop(std::shared_ptr<profiler> &p);
std::string make_tid (base_thread_node &node);
long unsigned int get_thread_id_metadata();
Expand Down
2 changes: 0 additions & 2 deletions src/openmp/ompt_parallel_region.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#include <unistd.h>
#include <stdio.h>
#include <omp.h>
#include "apex.h"

__attribute__((noinline)) void foo(void) {
#pragma omp parallel
Expand All @@ -24,7 +23,6 @@ __attribute__((noinline)) void bar(void) {
}

int main (int argc, char** argv) {
apex_set_use_screen_output(1);
foo();
bar();
return 0;
Expand Down

0 comments on commit 337816c

Please sign in to comment.