Skip to content

Commit

Permalink
Fixing #136.
Browse files Browse the repository at this point in the history
Now have the ability to capture task tree, not just graph.
No more cycles!
  • Loading branch information
khuck committed Mar 12, 2021
1 parent 68e9089 commit 9b89b30
Show file tree
Hide file tree
Showing 12 changed files with 143 additions and 62 deletions.
2 changes: 2 additions & 0 deletions src/apex/CMakeLists.hpx
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,7 @@ set(apex_headers
apex_policies.hpp
apex_types.h
concurrency_handler.hpp
dependency_tree.hpp
event_listener.hpp
handler.hpp
policy_handler.hpp
Expand All @@ -300,6 +301,7 @@ set(apex_sources
apex_options.cpp
apex_policies.cpp
concurrency_handler.cpp
dependency_tree.cpp
event_listener.cpp
event_filter.cpp
policy_handler.cpp
Expand Down
1 change: 1 addition & 0 deletions src/apex/CMakeLists.standalone
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ ${OMPT_SOURCE}
${OpenACC_SOURCE}
${RAJA_SOURCE}
concurrency_handler.cpp
dependency_tree.cpp
event_listener.cpp
handler.cpp
${OTF2_SOURCE}
Expand Down
4 changes: 2 additions & 2 deletions src/apex/activity_trace_async.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1218,7 +1218,7 @@ bool getBytesIfMalloc(CUpti_CallbackId id, const void* params, std::string conte
} else {
store_sync_counter_data("GPU: Bytes Freed", context, value);
}
totalAllocated = totalAllocated - value;
totalAllocated.fetch_sub(value, std::memory_order_relaxed);
store_sync_counter_data("GPU: Total Bytes Occupied on Device", context, totalAllocated, false, false);
}
// If we are in the exit of a function, and we are allocating memory,
Expand All @@ -1244,7 +1244,7 @@ bool getBytesIfMalloc(CUpti_CallbackId id, const void* params, std::string conte
mapMutex.lock();
memoryMap[ptr] = value;
mapMutex.unlock();
totalAllocated = totalAllocated + value;
totalAllocated.fetch_add(value, std::memory_order_relaxed);
store_sync_counter_data("GPU: Total Bytes Occupied on Device", context, totalAllocated, false, false);
}
}
Expand Down
11 changes: 10 additions & 1 deletion src/apex/apex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@
#include "global_constructor_destructor.h"
#else
#include "global_constructor_destructor.h"
#ifdef HAS_CONSTRUCTORS
#if defined(HAS_CONSTRUCTORS)
extern "C" {
DEFINE_CONSTRUCTOR(apex_init_static_void)
DEFINE_DESTRUCTOR(apex_finalize_static_void)
Expand Down Expand Up @@ -499,6 +499,9 @@ inline std::shared_ptr<task_wrapper> _new_task(
// tt_ptr->parent_guid is 0 by default
}
}
if (apex_options::use_tasktree_output()) {
tt_ptr->assign_heritage();
}
if (task_id == UINTMAX_MAX) {
// generate a GUID
tt_ptr->guid = thread_instance::get_guid();
Expand Down Expand Up @@ -1184,6 +1187,9 @@ std::shared_ptr<task_wrapper> update_task(
} else {
wrapper->alias = id;
}
if (apex_options::use_tasktree_output()) {
wrapper->update_heritage();
}
/*
printf("%llu New alias: %s to %s\n", wrapper->guid,
wrapper->task_id->get_name().c_str(), timer_name.c_str());
Expand Down Expand Up @@ -1219,6 +1225,9 @@ std::shared_ptr<task_wrapper> update_task(
} else {
wrapper->alias = id;
}
if (apex_options::use_tasktree_output()) {
wrapper->update_heritage();
}
}
}
return wrapper;
Expand Down
1 change: 1 addition & 0 deletions src/apex/apex_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ inline unsigned int sc_nprocessors_onln()
macro (APEX_PROFILE_OUTPUT, use_profile_output, int, false) \
macro (APEX_CSV_OUTPUT, use_csv_output, int, false) \
macro (APEX_TASKGRAPH_OUTPUT, use_taskgraph_output, bool, false) \
macro (APEX_TASKTREE_OUTPUT, use_tasktree_output, bool, false) \
macro (APEX_PROC_CPUINFO, use_proc_cpuinfo, bool, false) \
macro (APEX_PROC_LOADAVG, use_proc_loadavg, bool, true) \
macro (APEX_PROC_MEMINFO, use_proc_meminfo, bool, false) \
Expand Down
103 changes: 45 additions & 58 deletions src/apex/profiler_listener.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,9 @@ std::unordered_set<profile*> free_profiles;
}
}
#endif
if (apex_options::use_tasktree_output()) {
p->tt_ptr->tree_node->addAccumulated(p->elapsed(), p->is_resume);
}
return 1;
}

Expand Down Expand Up @@ -780,64 +783,6 @@ std::unordered_set<profile*> free_profiles;
}
}

/* The following code is from:
http://stackoverflow.com/questions/7706339/
grayscale-to-red-green-blue-matlab-jet-color-scale */
class node_color {
public:
double red;
double green;
double blue;
node_color() : red(1.0), green(1.0), blue(1.0) {}
int convert(double in) { return (int)(in * 255.0); }
} ;

node_color * get_node_color_visible(double v, double vmin, double vmax) {
node_color * c = new node_color();

if (v < vmin)
v = vmin;
if (v > vmax)
v = vmax;
double dv = vmax - vmin;
double fraction = 1.0 - ( (v - vmin) / dv );
// red should be full on.
c->red = 1.0;
// blue should increase as the fraction increases.
c->blue = (1.0 * fraction);
// green should increase as the fraction increases.
c->green = (1.0 * fraction);
return c;
}

node_color * get_node_color(double v,double vmin,double vmax)
{
node_color * c = new node_color();
double dv;

if (v < vmin)
v = vmin;
if (v > vmax)
v = vmax;
dv = vmax - vmin;

if (v < (vmin + 0.25 * dv)) {
c->red = 0;
c->green = 4 * (v - vmin) / dv;
} else if (v < (vmin + 0.5 * dv)) {
c->red = 0;
c->blue = 1 + 4 * (vmin + 0.25 * dv - v) / dv;
} else if (v < (vmin + 0.75 * dv)) {
c->red = 4 * (v - vmin - 0.5 * dv) / dv;
c->blue = 0;
} else {
c->green = 1 + 4 * (vmin + 0.75 * dv - v) / dv;
c->blue = 0;
}

return(c);
}

void profiler_listener::write_taskgraph(void) {
std::cout << "Writing APEX taskgraph..." << std::endl;
{ // we need to lock in case another thread appears
Expand Down Expand Up @@ -976,6 +921,44 @@ node_color * get_node_color(double v,double vmin,double vmax)
myfile << endl;
}

void profiler_listener::write_tasktree(void) {
std::cout << "Writing APEX tasktree..." << std::endl;
/* before calling parent.get_name(), make sure we create
* a thread_instance object that is NOT a worker. */
thread_instance::instance(false);
ofstream myfile;
stringstream dotname;
dotname << apex_options::output_file_path();
dotname << filesystem_separator() << "tasktree." << node_id << ".dot";
myfile.open(dotname.str().c_str());

// our TOTAL available time is the elapsed * the number of threads, or cores
int num_worker_threads = thread_instance::get_num_workers();
#ifdef APEX_HAVE_HPX
num_worker_threads = num_worker_threads - num_non_worker_threads_registered;
#endif
double total_main = main_timer->elapsed() * fmin(hardware_concurrency(),
num_worker_threads);

myfile << "digraph prof {\n";
myfile << " label = \"Elapsed Time: " << main_timer->elapsed(true);
myfile << " seconds\\lCores detected: " << hardware_concurrency();
myfile << "\\lWorker threads observed: " << num_worker_threads;
// is scaling this necessary?
myfile << "\\lAvailable CPU time: " << total_main*profiler::get_cpu_mhz() << " seconds\\l\";\n";
myfile << " labelloc = \"t\";\n";
myfile << " labeljust = \"l\";\n";
myfile << " overlap = false;\n";
myfile << " splines = true;\n";
myfile << " rankdir = \"LR\";\n";
myfile << " node [shape=box];\n";
auto root = task_wrapper::get_apex_main_wrapper();
// recursively write out the tree
root->tree_node->writeNode(myfile, main_timer->elapsed());
myfile << "}\n";
myfile.close();
}

/* Write TAU profiles from the collected data. */
void profiler_listener::write_profile() {
ofstream myfile;
Expand Down Expand Up @@ -1425,6 +1408,10 @@ if (rc != 0) cout << "PAPI error! " << name << ": " << PAPI_strerror(rc) << endl
{
write_taskgraph();
}
else if (apex_options::use_tasktree_output())
{
write_tasktree();
}

// output to 1 TAU profile per process?
if (apex_options::use_profile_output() && !apex_options::use_tau()) {
Expand Down
1 change: 1 addition & 0 deletions src/apex/profiler_listener.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ class profiler_listener : public event_listener {
double &total_main, bool timer);
void finalize_profiles(dump_event_data &data);
void write_taskgraph(void);
void write_tasktree(void);
void write_profile(void);
void delete_profiles(void);
#ifdef APEX_HAVE_HPX
Expand Down
3 changes: 2 additions & 1 deletion src/apex/task_identifier.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,8 @@ namespace std {
{
std::size_t h1 = std::hash<std::size_t>()(k.address);
std::size_t h2 = std::hash<std::string>()(k.name);
return h1 ^ (h2 << 1);; // instead of boost::hash_combine
std::size_t h3 = h1 ^ (h2 << 1);
return h3; // instead of boost::hash_combine
}
};

Expand Down
15 changes: 15 additions & 0 deletions src/apex/task_wrapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ struct task_wrapper;
#include <memory>
#include <string>
#include <unordered_set>
#include "dependency_tree.hpp"

namespace apex {

Expand Down Expand Up @@ -53,6 +54,10 @@ struct task_wrapper {
\brief A managed pointer to the parent task_wrapper for this task.
*/
std::shared_ptr<task_wrapper> parent;
/**
\brief A node in the task tree representing this task type
*/
dependency::Node* tree_node;
/**
\brief Internal usage, used to manage HPX direct actions when their
parent task is yielded by the runtime.
Expand All @@ -73,6 +78,7 @@ struct task_wrapper {
guid(0ull),
parent_guid(0ull),
parent(nullptr),
tree_node(nullptr),
alias(nullptr)
{ }
/**
Expand All @@ -98,11 +104,20 @@ struct task_wrapper {
const std::string apex_main_str("APEX MAIN");
tt_ptr = std::make_shared<task_wrapper>();
tt_ptr->task_id = task_identifier::get_task_id(apex_main_str);
tt_ptr->tree_node = new dependency::Node(tt_ptr->task_id, nullptr);
}
mtx.unlock();
}
return tt_ptr;
}
void assign_heritage() {
// make/find a node for ourselves
tree_node = parent->tree_node->appendChild(task_id);
}
void update_heritage() {
// make/find a node for ourselves
tree_node = parent->tree_node->replaceChild(task_id, alias);
}
}; // struct task_wrapper

} // namespace apex
49 changes: 49 additions & 0 deletions src/apex/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,55 @@ std::string activity_to_string(apex_cuda_async_activity_t activity) {
}
}

/* The following code is from:
http://stackoverflow.com/questions/7706339/
grayscale-to-red-green-blue-matlab-jet-color-scale */
node_color * get_node_color_visible(double v, double vmin, double vmax) {
node_color * c = new node_color();

if (v < vmin)
v = vmin;
if (v > vmax)
v = vmax;
double dv = vmax - vmin;
double fraction = 1.0 - ( (v - vmin) / dv );
// red should be full on.
c->red = 1.0;
// blue should increase as the fraction increases.
c->blue = (1.0 * fraction);
// green should increase as the fraction increases.
c->green = (1.0 * fraction);
return c;
}

node_color * get_node_color(double v,double vmin,double vmax)
{
node_color * c = new node_color();
double dv;

if (v < vmin)
v = vmin;
if (v > vmax)
v = vmax;
dv = vmax - vmin;

if (v < (vmin + 0.25 * dv)) {
c->red = 0;
c->green = 4 * (v - vmin) / dv;
} else if (v < (vmin + 0.5 * dv)) {
c->red = 0;
c->blue = 1 + 4 * (vmin + 0.25 * dv - v) / dv;
} else if (v < (vmin + 0.75 * dv)) {
c->red = 4 * (v - vmin - 0.5 * dv) / dv;
c->blue = 0;
} else {
c->green = 1 + 4 * (vmin + 0.75 * dv - v) / dv;
c->blue = 0;
}

return(c);
}

} // namespace apex

extern "C" void __cyg_profile_func_enter(void *this_fn, void *call_site) __attribute__((no_instrument_function));
Expand Down
12 changes: 12 additions & 0 deletions src/apex/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -259,5 +259,17 @@ uint64_t test_for_MPI_comm_size(uint64_t commsize);

std::string activity_to_string(apex_cuda_async_activity_t activity);

class node_color {
public:
double red;
double green;
double blue;
node_color() : red(1.0), green(1.0), blue(1.0) {}
int convert(double in) { return (int)(in * 255.0); }
} ;

node_color * get_node_color_visible(double v, double vmin, double vmax);
node_color * get_node_color(double v,double vmin,double vmax);

}

3 changes: 3 additions & 0 deletions src/unit_tests/C++/apex_hpx_annotated_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ int main (int argc, char** argv) {
tids[i] = i;
pthread_create(&(thread[i]), NULL, someThread, &(tids[i]));
}
for (int i = 0 ; i < num_iterations ; i++) {
innerLoop(&(tids[i]));
}
/* wait for the threads to finish */
for (uint32_t i = 0 ; i < test_numthreads ; i++) {
pthread_join(thread[i], NULL);
Expand Down

0 comments on commit 9b89b30

Please sign in to comment.