From eea16b8b5fd19802695498a6a41914ee67dd6a58 Mon Sep 17 00:00:00 2001 From: Kevin Huck Date: Fri, 8 Dec 2023 21:10:15 -0500 Subject: [PATCH] Fixing shutdown bug when finalize is called without dump on frontier --- src/apex/apex.cpp | 23 +++++++---------------- src/apex/apex_api.hpp | 3 ++- src/apex/apex_kokkos.cpp | 4 +++- src/apex/apex_types.h | 1 + src/apex/profile_reducer.cpp | 1 + src/apex/profiler_listener.cpp | 11 ++++++----- src/apex/task_wrapper.hpp | 2 +- src/scripts/apex_exec | 15 +++++++++++++++ 8 files changed, 36 insertions(+), 24 deletions(-) diff --git a/src/apex/apex.cpp b/src/apex/apex.cpp index 7a2b8aa8..959ee3ff 100644 --- a/src/apex/apex.cpp +++ b/src/apex/apex.cpp @@ -130,11 +130,6 @@ std::shared_ptr& top_level_timer() { } */ -std::shared_ptr& main_timer() { - static std::shared_ptr main_timer = nullptr; - return main_timer; -} - /* * The destructor will request power data from RCRToolkit */ @@ -545,7 +540,6 @@ uint64_t init(const char * thread_name, uint64_t comm_rank, // be stopped on the thread that is calling apex::init. You've been warned. main->explicit_trace_start = true; start(main); - main_timer() = main; if (apex_options::top_level_os_threads()) { // start top-level timer for main thread, it will get automatically // stopped when the main wrapper timer is stopped. @@ -1663,10 +1657,12 @@ void finalize_plugins(void) { #endif } -std::string dump(bool reset) { +std::string dump(bool reset, bool finalizing) { in_apex prevent_deadlocks; // if APEX is disabled, do nothing. - if (apex_options::disable() == true) { return(std::string("")); } + if (apex_options::disable() == true || + (!finalizing && apex_options::use_final_output_only())) + { return(std::string("")); } bool old_screen_output = apex_options::use_screen_output(); if (apex_options::use_jupyter_support()) { // force output in the Jupyter notebook @@ -1749,11 +1745,7 @@ void finalize(void) } // Second, stop the main timer, while the infrastructure is still // functioning. - tmp = main_timer(); - if (tmp != nullptr) { - stop(tmp); - main_timer() = nullptr; - } + instance->the_profiler_listener->stop_main_timer(); // if not done already... shutdown_throttling(); // stop thread scheduler policies /* Do this before OTF2 grabs a final timestamp - we might have @@ -1763,7 +1755,6 @@ void finalize(void) for (unsigned int i = 0 ; i < instance->listeners.size() ; i++) { instance->listeners[i]->on_pre_shutdown(); } - //instance->the_profiler_listener->stop_main_timer(); stop_all_async_threads(); // stop OS/HW monitoring, including PAPI /* This could take a while */ @@ -1778,7 +1769,7 @@ void finalize(void) apex_options::suspend(true); // now, process all output - dump(false); + dump(false, true); exit_thread(); if (!_measurement_stopped) { @@ -1905,7 +1896,7 @@ void register_thread(const std::string &name, // spawned by the main timer. std::shared_ptr twp = new_task(task_name, UINTMAX_MAX, - (parent == nullptr ? main_timer() : parent)); + (parent == nullptr ? task_wrapper::get_apex_main_wrapper() : parent)); start(twp); //printf("New thread: %p\n", &(*twp)); thread_instance::set_top_level_timer(twp); diff --git a/src/apex/apex_api.hpp b/src/apex/apex_api.hpp index cac9f3b7..992f5a6e 100644 --- a/src/apex/apex_api.hpp +++ b/src/apex/apex_api.hpp @@ -88,10 +88,11 @@ APEX_EXPORT uint64_t init(const char * thread_name, - write a profile to disk (if requested) - output all other visualization data \param reset Whether to reset all statistics + \param reset Whether this is the final dump at shutdown \return a string containing the output \sa @ref apex::finalize */ -APEX_EXPORT std::string dump(bool reset); +APEX_EXPORT std::string dump(bool reset, bool finalizing = false); /** \brief Finalize APEX. diff --git a/src/apex/apex_kokkos.cpp b/src/apex/apex_kokkos.cpp index 54525d7c..2a95e3c7 100644 --- a/src/apex/apex_kokkos.cpp +++ b/src/apex/apex_kokkos.cpp @@ -148,7 +148,9 @@ void kokkosp_init_library(int loadseq, uint64_t version, */ void kokkosp_finalize_library() { #ifndef APEX_HAVE_HPX - apex::finalize(); + if (!apex::apex_options::use_mpi()) { + apex::finalize(); + } #endif } diff --git a/src/apex/apex_types.h b/src/apex/apex_types.h index 2faa57e8..f340e999 100644 --- a/src/apex/apex_types.h +++ b/src/apex/apex_types.h @@ -280,6 +280,7 @@ inline unsigned int sc_nprocessors_onln(void) macro (APEX_MEASURE_CONCURRENCY, use_concurrency, int, 0, "Periodically sample thread activity and output report at exit.") \ macro (APEX_MEASURE_CONCURRENCY_MAX_TIMERS, concurrency_max_timers, int, 5, "Maximum number of timers in the concurrency report.") \ macro (APEX_MEASURE_CONCURRENCY_PERIOD, concurrency_period, int, 1000000, "Thread concurrency sampling period, in microseconds.") \ + macro (APEX_FINAL_OUTPUT_ONLY, use_final_output_only, bool, false, "Output APEX performance log files only at exit (ignore intermediate dump calls).") \ macro (APEX_SCREEN_OUTPUT, use_screen_output, bool, false, "Output APEX performance summary at exit.") \ macro (APEX_SCREEN_OUTPUT_DETAIL, use_screen_output_detail, bool, false, "Output detailed APEX performance summary at exit.") \ macro (APEX_VERBOSE, use_verbose, bool, false, "Output APEX options at entry.") \ diff --git a/src/apex/profile_reducer.cpp b/src/apex/profile_reducer.cpp index 761575bb..32573359 100644 --- a/src/apex/profile_reducer.cpp +++ b/src/apex/profile_reducer.cpp @@ -370,6 +370,7 @@ std::map reduce_profiles_for_screen() { std::vector> *rows = new std::vector>{}; treemerge::ThreadPool pool{}; pool.Start(); + treemerge::node::reset(); treemerge::node * root{nullptr}; std::cout << "Merging common tree for all ranks... "; auto start = high_resolution_clock::now(); diff --git a/src/apex/profiler_listener.cpp b/src/apex/profiler_listener.cpp index e0737f06..ee23ac5f 100644 --- a/src/apex/profiler_listener.cpp +++ b/src/apex/profiler_listener.cpp @@ -710,7 +710,7 @@ std::unordered_set free_profiles; total_time = get_profile(main_id); } #endif // APEX_SYNCHRONOUS_PROCESSING - double wall_clock_main = total_time->get_accumulated_seconds(); + double wall_clock_main = (total_time != nullptr) ? total_time->get_accumulated_seconds() : 0.0; #ifdef APEX_HAVE_HPX num_worker_threads = num_worker_threads - num_non_worker_threads_registered; #endif @@ -953,7 +953,7 @@ std::unordered_set free_profiles; int num_worker_threads = thread_instance::get_num_workers(); auto main_id = task_identifier::get_main_task_id(); profile * total_time = get_profile(*main_id); - double wall_clock_main = total_time->get_accumulated_seconds(); + double wall_clock_main = (total_time != nullptr) ? total_time->get_accumulated_seconds() : 0.0; #ifdef APEX_HAVE_HPX num_worker_threads = num_worker_threads - num_non_worker_threads_registered; #endif @@ -982,7 +982,7 @@ std::unordered_set free_profiles; dep != task_dependencies.end(); dep++) { task_identifier parent = dep->first; string parent_name = parent.get_tree_name(); - if (parent_name.compare("APEX MAIN") == 0 || + if (parent_name.compare(APEX_MAIN_STR) == 0 || parent_name.substr(0, pthread_wrapper.size()) == pthread_wrapper || parent_name.substr(0, preload_main.size()) == preload_main) { auto children = dep->second; @@ -1001,7 +1001,7 @@ std::unordered_set free_profiles; dep != task_dependencies.end(); dep++) { task_identifier parent = dep->first; string parent_name = parent.get_tree_name(); - if (parent_name.compare("APEX MAIN") != 0 && + if (parent_name.compare(APEX_MAIN_STR) != 0 && parent_name.substr(0, pthread_wrapper.size()) != pthread_wrapper && parent_name.substr(0, preload_main.size()) != preload_main) { auto children = dep->second; @@ -1131,7 +1131,7 @@ std::unordered_set free_profiles; // our TOTAL available time is the elapsed * the number of threads, or cores auto main_id = task_identifier::get_main_task_id(); profile * total_time = get_profile(*main_id); - double wall_clock_main = total_time->get_accumulated_seconds(); + double wall_clock_main = (total_time != nullptr) ? total_time->get_accumulated_seconds() : 0.0; #if 0 int num_worker_threads = thread_instance::get_num_workers(); @@ -2171,6 +2171,7 @@ if (rc != 0) cout << "PAPI error! " << name << ": " << PAPI_strerror(rc) << endl void profiler_listener::on_pre_shutdown(void) { stop_main_timer(); + push_profiler((unsigned int)thread_instance::get_id(), *main_timer); } void profiler_listener::push_profiler_public(std::shared_ptr &p) { diff --git a/src/apex/task_wrapper.hpp b/src/apex/task_wrapper.hpp index 8e04376c..eb06a995 100644 --- a/src/apex/task_wrapper.hpp +++ b/src/apex/task_wrapper.hpp @@ -121,7 +121,7 @@ struct task_wrapper { if (tt_ptr.get() == nullptr) { mtx.lock(); if (tt_ptr.get() == nullptr) { - const std::string apex_main_str("APEX MAIN"); + const std::string apex_main_str(APEX_MAIN_STR); tt_ptr = std::make_shared(); tt_ptr->task_id = task_identifier::get_task_id(apex_main_str); tt_ptr->tree_node = new dependency::Node(tt_ptr->task_id, nullptr); diff --git a/src/scripts/apex_exec b/src/scripts/apex_exec index b56de1fc..e340e1a8 100755 --- a/src/scripts/apex_exec +++ b/src/scripts/apex_exec @@ -33,6 +33,7 @@ where APEX options are zero or more of: --apex:screen enable screen text output (on by default) --apex:screen-detail enable detailed text output (off by default) --apex:quiet disable screen text output + --apex:final-output-only only output performance data at exit (ignore intermediate dump calls) --apex:csv enable csv text output --apex:tau enable tau profile output --apex:taskgraph enable taskgraph output @@ -97,6 +98,7 @@ where APEX options are zero or more of: exit 1 } +apex_opts=yes openacc=no kokkos=no kokkos_tuning=no @@ -252,6 +254,10 @@ while (( "$#" )); do # on by default shift ;; + --apex:final-output-only) + export APEX_FINAL_OUTPUT_ONLY=1 + shift + ;; --apex:screen_details|--apex:screen-details) screen=yes export APEX_SCREEN_OUTPUT_DETAIL=1 @@ -481,11 +487,20 @@ while (( "$#" )); do usage fi ;; + --apex:help|--help|-h) + if [ $apex_opts = yes ] ; then + usage + fi + # Could be a program argument! + PARAMS="$PARAMS $1" + shift + ;; --apex:*) # unsupported flags echo "Error: Unsupported flag $1" >&2 usage ;; *) # preserve positional arguments + apex_opts=no if [ "$prog" = "" ] ; then prog=$1 fi