Skip to content

Commit

Permalink
Adding new "pre-shutdown" event for listeners
Browse files Browse the repository at this point in the history
The profiler_listener, otf2_listener and trace_event_listener
all need to take a timestamp when the program is finished, but when
CUPTI asynchronous processing has to happen, that can dialate
the trace because the final timestamp doesn't get taken until
long after the buffers are processed.  Now, the timestamp is
taken before the buffers are processed.  All asynchronous background
processing also needs to be disabled, so that there aren't new
events in the trace after the last timestamp.
  • Loading branch information
khuck committed Aug 6, 2020
1 parent 1d58c06 commit f220829
Show file tree
Hide file tree
Showing 11 changed files with 35 additions and 5 deletions.
13 changes: 9 additions & 4 deletions src/apex/apex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1386,17 +1386,22 @@ void finalize()
// FIRST, stop the top level timer, while the infrastructure is still
// functioning.
if (top_level_timer != nullptr) { stop(top_level_timer); }
instance->the_profiler_listener->stop_main_timer();
// if not done already...
shutdown_throttling(); // stop thread scheduler policies
stop_all_async_threads(); // stop OS/HW monitoring
// notify all listeners that we are going to stop soon
for (unsigned int i = 0 ; i < instance->listeners.size() ; i++) {
instance->listeners[i]->on_pre_shutdown();
}
//instance->the_profiler_listener->stop_main_timer();
/* This could take a while */
#ifdef APEX_WITH_CUDA
flushTrace();
#endif
// stop processing new timers/counters/messages/tasks/etc.
apex_options::suspend(true);
// now, process all output
dump(false);
// if not done already...
shutdown_throttling();
stop_all_async_threads();
exit_thread();
if (!_measurement_stopped)
{
Expand Down
1 change: 1 addition & 0 deletions src/apex/concurrency_handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ class concurrency_handler : public handler, public event_listener {
void on_startup(startup_event_data &data) { APEX_UNUSED(data); };
void on_dump(dump_event_data &data);
void on_reset(task_identifier * id);
void on_pre_shutdown(void) { } ;
void on_shutdown(shutdown_event_data &data);
void on_new_node(node_event_data &data) { APEX_UNUSED(data); };
void on_new_thread(new_thread_event_data &data);
Expand Down
1 change: 1 addition & 0 deletions src/apex/event_listener.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ class event_listener
virtual ~event_listener() {};
// all methods in the interface that a handler has to override
virtual void on_startup(startup_event_data &data) = 0;
virtual void on_pre_shutdown(void) = 0;
virtual void on_shutdown(shutdown_event_data &data) = 0;
virtual void on_dump(dump_event_data &data) = 0;
virtual void on_reset(task_identifier * id) = 0;
Expand Down
9 changes: 8 additions & 1 deletion src/apex/otf2_listener.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ namespace apex {
};

uint64_t otf2_listener::globalOffset(0);
uint64_t otf2_listener::saved_end_timestamp(0);
const std::string otf2_listener::empty("");
int otf2_listener::my_saved_node_id(0);
int otf2_listener::my_saved_node_count(1);
Expand Down Expand Up @@ -773,7 +774,7 @@ namespace apex {
void otf2_listener::write_clock_properties(void) {
/* write the clock properties */
uint64_t ticks_per_second = 1e9;
uint64_t traceLength = get_time();
uint64_t traceLength = saved_end_timestamp;
OTF2_GlobalDefWriter_WriteClockProperties( global_def_writer,
ticks_per_second, 0 /* start */, traceLength /* length */ );
}
Expand Down Expand Up @@ -854,6 +855,12 @@ namespace apex {
return;
}

/* Before shutdown, take a timestamp in case the shutdown process
* takes a really long time, we don't want to confuse the user */
void otf2_listener::on_pre_shutdown(void) {
saved_end_timestamp = get_time();
}

/* At shutdown, we need to reduce all the global information,
* and write out the global definitions - strings, regions,
* locations, communicators, groups, metrics, etc.
Expand Down
4 changes: 4 additions & 0 deletions src/apex/otf2_listener.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ namespace apex {
/* The global offset is referenced from the get_time static function,
* so it needs to be static itself. */
static uint64_t globalOffset;
/* We take an end timestamp before shutdown, because we don't
* want to confuse the user with a long trace time */
static uint64_t saved_end_timestamp;
/* All OTF2 callback functions have to be declared static, so that they
* can be registered with the OTF2 library */
static OTF2_TimeStamp get_time( void ) {
Expand Down Expand Up @@ -213,6 +216,7 @@ namespace apex {
void on_dump(dump_event_data &data);
void on_reset(task_identifier * id)
{ APEX_UNUSED(id); };
void on_pre_shutdown(void);
void on_shutdown(shutdown_event_data &data);
void on_new_node(node_event_data &data);
void on_new_thread(new_thread_event_data &data);
Expand Down
1 change: 1 addition & 0 deletions src/apex/policy_handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ class policy_handler : public handler, public event_listener
void on_dump(dump_event_data &data);
void on_reset(task_identifier * id)
{ APEX_UNUSED(id); };
void on_pre_shutdown(void) {};
void on_shutdown(shutdown_event_data &data);
void on_new_node(node_event_data &data);
void on_new_thread(new_thread_event_data &data);
Expand Down
4 changes: 4 additions & 0 deletions src/apex/profiler_listener.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1755,6 +1755,10 @@ if (rc != 0) cout << "PAPI error! " << name << ": " << PAPI_strerror(rc) << endl
}
}

void profiler_listener::on_pre_shutdown(void) {
stop_main_timer();
}

void profiler_listener::push_profiler_public(std::shared_ptr<profiler> &p) {
push_profiler(0, p);
}
Expand Down
1 change: 1 addition & 0 deletions src/apex/profiler_listener.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ class profiler_listener : public event_listener {
void on_startup(startup_event_data &data);
void on_dump(dump_event_data &data);
void on_reset(task_identifier * id);
void on_pre_shutdown(void);
void on_shutdown(shutdown_event_data &data);
void on_new_node(node_event_data &data);
void on_new_thread(new_thread_event_data &data);
Expand Down
1 change: 1 addition & 0 deletions src/apex/tau_listener.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class tau_listener : public event_listener {
void on_dump(dump_event_data &data);
void on_reset(task_identifier * id)
{ APEX_UNUSED(id); };
void on_pre_shutdown(void) {};
void on_shutdown(shutdown_event_data &data);
void on_new_node(node_event_data &data);
void on_new_thread(new_thread_event_data &data);
Expand Down
4 changes: 4 additions & 0 deletions src/apex/trace_event_listener.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ void trace_event_listener::on_dump(dump_event_data &data) {
return;
}

void trace_event_listener::on_pre_shutdown(void) {
end_trace_time();
}

void trace_event_listener::on_shutdown(shutdown_event_data &data) {
APEX_UNUSED(data);
if (!_terminate) {
Expand Down
1 change: 1 addition & 0 deletions src/apex/trace_event_listener.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class trace_event_listener : public event_listener {
void on_dump(dump_event_data &data);
void on_reset(task_identifier * id)
{ APEX_UNUSED(id); };
void on_pre_shutdown(void);
void on_shutdown(shutdown_event_data &data);
void on_new_node(node_event_data &data);
void on_new_thread(new_thread_event_data &data);
Expand Down

0 comments on commit f220829

Please sign in to comment.