Skip to content

Commit

Permalink
Cleanup changes introduced a bug, this fixes it
Browse files Browse the repository at this point in the history
The cleanup changes caused APEX to request HPX to schedule profile
processing during shutdown, but unfortunately HPX has already stopped
by then.  Instead, force synchronous processing of remaining
profile data from the on_dump() event.
  • Loading branch information
khuck committed Mar 26, 2020
1 parent c002fd4 commit 5e3198f
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 15 deletions.
3 changes: 1 addition & 2 deletions etc/buildbot.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,14 @@ yes_ompt=" -DUSE_OMPT=TRUE -DOMPT_ROOT=/usr/local/ompt/5.0"
#yes_mpi=" -DUSE_MPI=TRUE -DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx"
yes_mpi=" -DUSE_MPI=TRUE"
yes_papi=" -DUSE_PAPI=TRUE -DPAPI_ROOT=/usr"
yes_tau=" -DUSE_TAU=TRUE -DTAU_ROOT=/usr/local/tau/git -DTAU_ARCH=x86_64 -DTAU_OPTIONS=-pthread"
yes_tau=" -DUSE_TAU=TRUE "
if [ "$host" == "delphi" ] ; then
yes_bfd=" -DUSE_BFD=TRUE -DBFD_ROOT=/usr/local/packages/binutils/2.27"
yes_malloc=" -DUSE_JEMALLOC=TRUE -DJEMALLOC_ROOT=/usr/local/packages/jemalloc/5.0.1-gcc"
yes_ah=" -DUSE_ACTIVEHARMONY=TRUE -DACTIVEHARMONY_ROOT=/usr/local/packages/activeharmony/4.6.0-gcc -DUSE_PLUGINS=TRUE"
yes_otf=" -DUSE_OTF2=TRUE -DOTF2_ROOT=/usr/local/packages/otf2/2.1"
yes_ompt=" -DUSE_OMPT=TRUE -DOMPT_ROOT=/usr/local/packages/llvm-openmp/2020-03-25"
yes_papi=" -DUSE_PAPI=TRUE -DPAPI_ROOT=/usr/local/packages/papi/5.6.0"
yes_tau=" -DUSE_TAU=TRUE "
fi

# set defaults
Expand Down
28 changes: 24 additions & 4 deletions src/apex/CMakeLists.standalone
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Create a library called "Apex" which includes the source file "apex.cxx".
# The extension is already found. Any number of sources could be listed here.
# Create a library called "Apex" which includes the source file "apex.cxx".
# The extension is already found. Any number of sources could be listed here.

include_directories(${PROJECT_BINARY_DIR}/src/apex)

Expand All @@ -25,7 +25,27 @@ if (OTF2_FOUND)
SET(OTF2_SOURCE otf2_listener.cpp)
endif(OTF2_FOUND)

SET(all_SOURCE task_identifier.cpp apex.cpp thread_instance.cpp event_listener.cpp concurrency_handler.cpp policy_handler.cpp handler.cpp utils.cpp ${tau_SOURCE} profiler_listener.cpp ${bfd_SOURCE} apex_options.cpp apex_policies.cpp perftool_implementation.cpp ${PROC_SOURCE} ${OMPT_SOURCE} ${SENSOR_SOURCE} ${OTF2_SOURCE})
# Try to keep this in alphabetical order
SET(all_SOURCE
apex.cpp
apex_options.cpp
apex_policies.cpp
${bfd_SOURCE}
${OMPT_SOURCE}
concurrency_handler.cpp
event_listener.cpp
handler.cpp
${OTF2_SOURCE}
perftool_implementation.cpp
policy_handler.cpp
${PROC_SOURCE}
profiler_listener.cpp
${SENSOR_SOURCE}
task_identifier.cpp
${tau_SOURCE}
thread_instance.cpp
utils.cpp
)

add_library (apex ${all_SOURCE})
add_library (taudummy tau_dummy.cpp)
Expand Down Expand Up @@ -74,7 +94,7 @@ else()
include_directories("${PROJECT_BINARY_DIR}/src/apex")
endif()

INSTALL(FILES apex.h
INSTALL(FILES apex.h
apex_api.hpp
apex_types.h
apex_policies.h
Expand Down
29 changes: 20 additions & 9 deletions src/apex/profiler_listener.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ std::mutex event_set_mutex;
#include <hpx/local_lcos/composable_guard.hpp>
static void apex_schedule_process_profiles(void); // not in apex namespace
const int num_non_worker_threads_registered = 1; // including the main thread
//bool synchronous_flush{false};
#endif

#define APEX_MAIN "APEX MAIN"
Expand Down Expand Up @@ -1094,7 +1095,7 @@ node_color * get_node_color(double v,double vmin,double vmax)
std::shared_ptr<profiler> p;
task_dependency* td;
#ifdef APEX_HAVE_HPX
bool schedule_another_task = false;
//bool schedule_another_task = false;
{
size_t num_queues = 0;
{
Expand All @@ -1105,10 +1106,12 @@ node_color * get_node_color(double v,double vmin,double vmax)
int i = 0;
while(!_done && allqueues[q]->try_dequeue(p)) {
process_profile(p, 0);
if (++i > 1000) {
/*
if (++i > 1000 && !synchronous_flush) {
schedule_another_task = true;
break;
}
*/
}
}
}
Expand All @@ -1122,10 +1125,12 @@ node_color * get_node_color(double v,double vmin,double vmax)
int i = 0;
while(!_done && dependency_queues[q]->try_dequeue(td)) {
process_dependency(td);
if (++i > 1000) {
/*
if (++i > 1000 && !synchronous_flush) {
schedule_another_task = true;
break;
}
*/
}
}
}
Expand Down Expand Up @@ -1171,9 +1176,11 @@ node_color * get_node_color(double v,double vmin,double vmax)
#endif

#ifdef APEX_HAVE_HPX // don't hang out in this task too long.
/*
if (schedule_another_task) {
apex_schedule_process_profiles();
}
*/
#endif

if (apex_options::use_tau()) {
Expand Down Expand Up @@ -1313,20 +1320,24 @@ if (rc != 0) cout << "PAPI error! " << name << ": " << PAPI_strerror(rc) << endl
void profiler_listener::on_dump(dump_event_data &data) {
if (_done) { return; }

// stop the main timer, and process that profile?
main_timer->stop(true);
push_profiler((unsigned int)thread_instance::get_id(), main_timer);

// trigger statistics updating
#ifdef APEX_HAVE_HPX
// schedule an HPX action
apex_schedule_process_profiles();
// We can't schedule an action, because the runtime might be gone
// if we are in the dump() during finalize. So synchronously
// process the queue.
// synchronous_flush = true;
process_profiles_wrapper();
// synchronous_flush = false;
#else
queue_signal.post();
#endif
// wait until any other threads are done processing dependencies
while(consumer_task_running.test_and_set(memory_order_acq_rel)) { }

// stop the main timer, and process that profile?
main_timer->stop(true);
push_profiler((unsigned int)thread_instance::get_id(), main_timer);

// output to screen?
if ((apex_options::use_screen_output() && node_id == 0) ||
apex_options::use_taskgraph_output() ||
Expand Down

0 comments on commit 5e3198f

Please sign in to comment.