Skip to content

Commit

Permalink
Timer throttling is now a runtime option, disabled by default.
Browse files Browse the repository at this point in the history
  • Loading branch information
khuck committed Jan 13, 2023
1 parent a230050 commit f0db5e7
Show file tree
Hide file tree
Showing 6 changed files with 85 additions and 81 deletions.
7 changes: 7 additions & 0 deletions src/apex/apex_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ typedef struct _profile
double bytes_freed; /*!< total bytes freed in this task */
int times_reset; /*!< How many times was this timer reset */
size_t num_threads; /*!< How many threads have seen this timer? */
bool throttled; /*!< Is this timer throttled? */
} apex_profile;

/** Rather than use void pointers everywhere, be explicit about
Expand Down Expand Up @@ -296,6 +297,12 @@ inline unsigned int sc_nprocessors_onln()
macro (APEX_PROC_STAT_DETAILS, use_proc_stat_details, bool, false, "Periodically read detailed data from /proc/self/stat.") \
macro (APEX_PROC_PERIOD, proc_period, int, 1000000, "/proc/* sampling period.") \
macro (APEX_SORT_TIMERS_BY_NAME, sort_timers_by_name, bool, false, "Sort timer screen data by name.") \
macro (APEX_THROTTLE_TIMERS, throttle_timers, \
bool, false, "Enable throttling of short-lived timer events.") \
macro (APEX_THROTTLE_TIMERS_CALLS, throttle_timers_calls, \
int, 1000, "Minimum number of calls for timer throttling.") \
macro (APEX_THROTTLE_TIMERS_PERCALL, throttle_timers_percall, \
int, 10, "Minimum duration per call for timer throttling (microseconds).") \
macro (APEX_THROTTLE_CONCURRENCY, throttle_concurrency, \
bool, false, "Enable thread concurrency throttling.") \
macro (APEX_THROTTLING_MAX_THREADS, throttling_max_threads, \
Expand Down
5 changes: 4 additions & 1 deletion src/apex/profile.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ class profile {
_profile.bytes_allocated = 0;
_profile.bytes_freed = 0;
_profile.num_threads = 1;
_profile.throttled = false;
};
profile(double initial, int num_metrics, double * papi_metrics, bool
yielded, double allocations, double frees, double bytes_allocated,
Expand Down Expand Up @@ -95,6 +96,7 @@ class profile {
_profile.bytes_allocated = bytes_allocated;
_profile.bytes_freed = bytes_freed;
_profile.num_threads = 1;
_profile.throttled = false;
};
/* This constructor is so that we can create a dummy wrapper around profile
* data after we've done a reduction across ranks. */
Expand Down Expand Up @@ -206,7 +208,8 @@ class profile {
double get_bytes_freed() { return _profile.bytes_freed; }
apex_profile_type get_type() { return _profile.type; }
apex_profile * get_profile() { return &_profile; };

bool get_throttled() { return _profile.throttled; };
void set_throttled() { _profile.throttled = true; };
};

}
Expand Down
6 changes: 4 additions & 2 deletions src/apex/profile_reducer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@
#include <limits>
#include <inttypes.h>

/* 9 values per timer/counter by default
/* 10 values per timer/counter by default
* 4 values related to memory allocation tracking
* 8 values (up to) when PAPI enabled */
constexpr size_t num_fields{21};
constexpr size_t num_fields{22};

#if !defined(HPX_HAVE_NETWORKING) && defined(APEX_HAVE_MPI)
#include "mpi.h"
Expand Down Expand Up @@ -169,6 +169,7 @@ std::map<std::string, apex_profile*> reduce_profiles() {
dptr[i++] = p->times_reset;
dptr[i++] = (double)p->type;
dptr[i++] = p->num_threads;
dptr[i++] = (p->throttled ? 1.0 : 0.0);
dptr[i++] = p->allocations;
dptr[i++] = p->frees;
dptr[i++] = p->bytes_allocated;
Expand Down Expand Up @@ -232,6 +233,7 @@ std::map<std::string, apex_profile*> reduce_profiles() {
p->type = (apex_profile_type)(dptr[index++]);
p->num_threads = dptr[index] > p->num_threads ? dptr[index] : p->num_threads;
index++;
p->throttled = (p->throttled || (dptr[index++] > 0.0)) ? true : false;
p->allocations = dptr[index++];
p->frees = dptr[index++];
p->bytes_allocated = dptr[index++];
Expand Down
1 change: 0 additions & 1 deletion src/apex/profiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ class disabled_profiler_exception : public std::exception {
}
};

#define APEX_THROTTLE_PERCALL 0.00001 // 10 microseconds.
#define MYCLOCK std::chrono::system_clock

class profiler {
Expand Down
145 changes: 70 additions & 75 deletions src/apex/profiler_listener.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,8 @@
#include <thread>
#include <future>

#if defined(APEX_THROTTLE)
#include "apex_cxx_shared_lock.hpp"
apex::shared_mutex_type throttled_event_set_mutex;
#define APEX_THROTTLE_CALLS 1000
#endif

#if APEX_HAVE_PAPI
#include "papi.h"
Expand Down Expand Up @@ -159,19 +156,19 @@ std::unordered_set<profile*> free_profiles;
std::unique_lock<std::mutex> task_map_lock(_task_map_mutex);
for(it2 = task_map.begin(); it2 != task_map.end(); it2++) {
profile * p = it2->second;
#if defined(APEX_THROTTLE)
if (!apex_options::use_tau()) {
task_identifier id = it2->first;
unordered_set<task_identifier>::const_iterator it4;
{
read_lock_type l(throttled_event_set_mutex);
it4 = throttled_tasks.find(id);
}
if (it4!= throttled_tasks.end()) {
continue;
if (apex_options::throttle_timers()) {
if (!apex_options::use_tau()) {
task_identifier id = it2->first;
unordered_set<task_identifier>::const_iterator it4;
{
read_lock_type l(throttled_event_set_mutex);
it4 = throttled_tasks.find(id);
}
if (it4!= throttled_tasks.end()) {
continue;
}
}
}
#endif
if (p->get_type() == APEX_TIMER) {
non_idle_time += p->get_accumulated();
}
Expand Down Expand Up @@ -311,38 +308,41 @@ std::unordered_set<profile*> free_profiles;
values, p.is_resume, p.thread_id);
}
}
#if defined(APEX_THROTTLE)
if (!apex_options::use_tau()) {
// Is this a lightweight task? If so, we shouldn't measure it any more,
// in order to reduce overhead.
if (theprofile->get_calls() > APEX_THROTTLE_CALLS &&
theprofile->get_mean() < APEX_THROTTLE_PERCALL) {
unordered_set<task_identifier>::const_iterator it2;
{
read_lock_type l(throttled_event_set_mutex);
it2 = throttled_tasks.find(*(p.get_task_id()));
}
if (it2 == throttled_tasks.end()) {
// lock the set for insert
{
if (apex_options::throttle_timers()) {
if (!apex_options::use_tau()) {
// Is this a lightweight task? If so, we shouldn't measure it any more,
// in order to reduce overhead.
if (theprofile->get_calls() > apex_options::throttle_timers_calls() &&
theprofile->get_mean_useconds() < apex_options::throttle_timers_percall()) {
// set the profile to throttled for output reasons
theprofile->set_throttled();
// add the task_identifier to the list of throttled events
unordered_set<task_identifier>::const_iterator it2;
{
read_lock_type l(throttled_event_set_mutex);
it2 = throttled_tasks.find(*(p.get_task_id()));
}
if (it2 == throttled_tasks.end()) {
// lock the set for insert
{
write_lock_type l(throttled_event_set_mutex);
// was it inserted when we were waiting?
it2 = throttled_tasks.find(*(p.get_task_id()));
// no? OK - insert it.
if (it2 == throttled_tasks.end()) {
throttled_tasks.insert(*(p.get_task_id()));
}
}
if (apex_options::use_verbose()) {
cout << "APEX: disabling lightweight timer "
<< p.get_task_id()->get_name()
<< endl;
fflush(stdout);
}
}
}
// was it inserted when we were waiting?
it2 = throttled_tasks.find(*(p.get_task_id()));
// no? OK - insert it.
if (it2 == throttled_tasks.end()) {
throttled_tasks.insert(*(p.get_task_id()));
}
}
if (apex_options::use_verbose()) {
cout << "APEX: disabling lightweight timer "
<< p.get_task_id()->get_name()
<< endl;
fflush(stdout);
}
}
}
}
}
#endif
} else {
// Create a new profile for this name.
if ((apex_options::track_cpu_memory() ||
Expand Down Expand Up @@ -504,22 +504,17 @@ std::unordered_set<profile*> free_profiles;
}
//screen_output << "\"" << shorter << "\", " ;
screen_output << string_format("%52s", shorter.c_str()) << " : ";
#if defined(APEX_THROTTLE)
if (!apex_options::use_tau()) {
// if this profile was throttled, don't output the measurements.
// they are limited and bogus, anyway.
unordered_set<task_identifier>::const_iterator it4;
{
read_lock_type l(throttled_event_set_mutex);
it4 = throttled_tasks.find(task_id);
}
if (it4!= throttled_tasks.end()) {
screen_output << "DISABLED (high frequency, short duration)"
<< endl;
return;
if (apex_options::throttle_timers()) {
if (!apex_options::use_tau()) {
// if this profile was throttled, don't output the measurements.
// they are limited and bogus, anyway.
if (p->get_throttled()) {
screen_output << "DISABLED (high frequency, short duration)"
<< endl;
return;
}
}
}
#endif
if(p->get_calls() == 0 && p->get_times_reset() > 0) {
screen_output << "Not called since reset." << endl;
return;
Expand Down Expand Up @@ -1876,25 +1871,25 @@ if (rc != 0) cout << "PAPI error! " << name << ": " << PAPI_strerror(rc) << endl
&tt_ptr, bool is_resume) {
//std::cout << "Starting " << tt_ptr->get_task_id()->get_name() << std::endl;
if (!_done) {
#if defined(APEX_THROTTLE)
if (!apex_options::use_tau()) {
// if this timer is throttled, return without doing anything
unordered_set<task_identifier>::const_iterator it;
{
read_lock_type l(throttled_event_set_mutex);
it = throttled_tasks.find(*tt_ptr->get_task_id());
}
if (it != throttled_tasks.end()) {
/*
* The throw is removed, because it is a performance penalty on some
* systems on_start now returns a boolean
*/
// to be caught by apex::start/resume
//throw disabled_profiler_exception();
return false;
if (apex_options::throttle_timers()) {
if (!apex_options::use_tau()) {
// if this timer is throttled, return without doing anything
unordered_set<task_identifier>::const_iterator it;
{
read_lock_type l(throttled_event_set_mutex);
it = throttled_tasks.find(*tt_ptr->get_task_id());
}
if (it != throttled_tasks.end()) {
/*
* The throw is removed, because it is a performance penalty on some
* systems on_start now returns a boolean
*/
// to be caught by apex::start/resume
//throw disabled_profiler_exception();
return false;
}
}
}
#endif
// start the profiler object, which starts our timers
//std::shared_ptr<profiler> p = std::make_shared<profiler>(tt_ptr,
//is_resume);
Expand Down
2 changes: 0 additions & 2 deletions src/apex/profiler_listener.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,9 +137,7 @@ class profiler_listener : public event_listener {
dependency_queue_t * _construct_dependency_queue(void);
dependency_queue_t * dependency_queue(void);
//ConcurrentQueue<task_dependency*> dependency_queue;
#if defined(APEX_THROTTLE)
std::unordered_set<task_identifier> throttled_tasks;
#endif
#if APEX_HAVE_PAPI
int num_papi_counters;
std::vector<std::string> metric_names;
Expand Down

0 comments on commit f0db5e7

Please sign in to comment.