Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Trace thread config + paranoid level + preload #176

Merged
merged 1 commit into from
Oct 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.7.0
1.7.1
2 changes: 1 addition & 1 deletion external/timemory
6 changes: 5 additions & 1 deletion source/docs/runtime.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,13 +96,17 @@ variable to be enabled (i.e., `OMNITRACE_USE_ROCPROFILER=ON`).
Example configuration for hardware counters:

```console
# using papi identifiers
OMNITRACE_PAPI_EVENTS = PAPI_TOT_CYC PAPI_TOT_INS

# using perf identifiers
OMNITRACE_PAPI_EVENTS = perf::INSTRUCTIONS perf::CACHE-REFERENCES perf::CACHE-MISSES
```

#### OMNITRACE_PAPI_EVENTS

In order to collect the majority of hardware counters via PAPI, you need to make sure the `/proc/sys/kernel/perf_event_paranoid`
has a value of less than 2. If you have sudo access, you can use the following command to modify the value:
has a value <= 2. If you have sudo access, you can use the following command to modify the value:

```shell
echo 0 | sudo tee /proc/sys/kernel/perf_event_paranoid
Expand Down
28 changes: 26 additions & 2 deletions source/lib/omnitrace-dl/dl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,32 @@ get_omnitrace_dl_env()
inline bool
get_omnitrace_preload()
{
auto&& _preload = get_env("OMNITRACE_PRELOAD", true);
static bool _v = []() {
auto&& _preload = get_env("OMNITRACE_PRELOAD", true);
auto&& _preload_libs = get_env("LD_PRELOAD", std::string{});
return (_preload &&
_preload_libs.find("libomnitrace-dl.so") != std::string::npos);
}();
return _v;
}

inline void
reset_omnitrace_preload()
{
auto&& _preload_libs = get_env("LD_PRELOAD", std::string{});
return (_preload && _preload_libs.find("libomnitrace-dl.so") != std::string::npos);
if(_preload_libs.find("libomnitrace-dl.so") != std::string::npos)
{
auto _modified_preload = std::string{};
for(const auto& itr : delimit(_preload_libs, ":"))
{
if(itr.find("libomnitrace") != std::string::npos) continue;
_modified_preload += common::join("", ":", itr);
}
if(!_modified_preload.empty() && _modified_preload.find(':') == 0)
_modified_preload = _modified_preload.substr(1);

setenv("LD_PRELOAD", _modified_preload.c_str(), 1);
}
}

// environment priority:
Expand Down Expand Up @@ -940,6 +963,7 @@ omnitrace_preload()

if(_preload)
{
reset_omnitrace_preload();
omnitrace_preinit_library();
OMNITRACE_DL_LOG(1, "[%s] invoking %s(%s)\n", __FUNCTION__, "omnitrace_init",
::omnitrace::join(::omnitrace::QuoteStrings{}, ", ", "sampling",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ pthread_mutex_gotcha::get_hashes()
for(size_t i = 9; i < 12; ++i)
_skip.emplace(i);
}
if(!config::get_trace_thread_barriers()) _skip.emplace(8);
if(!config::get_trace_thread_join()) _skip.emplace(12);
for(size_t i = 0; i < gotcha_capacity; ++i)
{
auto&& _id = _data.at(i).tool_id;
Expand Down Expand Up @@ -132,8 +134,12 @@ pthread_mutex_gotcha::configure()
"pthread_rwlock_unlock" });
}

pthread_mutex_gotcha_t::configure(
comp::gotcha_config<8, int, pthread_barrier_t*>{ "pthread_barrier_wait" });
if(config::get_trace_thread_barriers())
{
pthread_mutex_gotcha_t::configure(
comp::gotcha_config<8, int, pthread_barrier_t*>{
"pthread_barrier_wait" });
}

if(config::get_trace_thread_spin_locks())
{
Expand All @@ -149,8 +155,11 @@ pthread_mutex_gotcha::configure()
"pthread_spin_unlock" });
}

pthread_mutex_gotcha_t::configure(
comp::gotcha_config<12, int, pthread_t, void**>{ "pthread_join" });
if(config::get_trace_thread_join())
{
pthread_mutex_gotcha_t::configure(
comp::gotcha_config<12, int, pthread_t, void**>{ "pthread_join" });
}
};
}

Expand Down
54 changes: 42 additions & 12 deletions source/lib/omnitrace/library/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,14 @@ configure_settings(bool _init)
"cause deadlocks with MPI distributions.",
false, "backend", "parallelism", "gotcha", "advanced");

OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_TRACE_THREAD_BARRIERS",
"Enable tracing calls to pthread_barrier functions.", true,
"backend", "parallelism", "gotcha", "advanced");

OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_TRACE_THREAD_JOIN",
"Enable tracing calls to pthread_join functions.", true,
"backend", "parallelism", "gotcha", "advanced");

OMNITRACE_CONFIG_SETTING(
bool, "OMNITRACE_SAMPLING_KEEP_INTERNAL",
"Configure whether the statistical samples should include call-stack entries "
Expand Down Expand Up @@ -601,7 +609,6 @@ configure_settings(bool _init)
_config->get_max_thread_bookmarks() = 1;
_config->get_timing_units() = "sec";
_config->get_memory_units() = "MB";
_config->get_papi_events() = "PAPI_TOT_CYC";

// settings native to timemory but critically and/or extensively used by omnitrace
auto _add_omnitrace_category = [&_config](auto itr) {
Expand Down Expand Up @@ -685,21 +692,22 @@ configure_settings(bool _init)
if(_fparanoid) _fparanoid >> _paranoid;
}

if(_paranoid > 1)
if(_paranoid > 2)
{
OMNITRACE_BASIC_VERBOSE(0,
"/proc/sys/kernel/perf_event_paranoid has a value of %i. "
"Disabling PAPI (requires a value <= 1)...\n",
"Disabling PAPI (requires a value <= 2)...\n",
_paranoid);
OMNITRACE_BASIC_VERBOSE(0,
"In order to enable PAPI support, run 'echo N | sudo tee "
"/proc/sys/kernel/perf_event_paranoid' where N is < 2\n");
tim::trait::runtime_enabled<comp::papi_common<void>>::set(false);
tim::trait::runtime_enabled<comp::papi_array_t>::set(false);
tim::trait::runtime_enabled<comp::papi_vector>::set(false);
tim::trait::runtime_enabled<comp::cpu_roofline_flops>::set(false);
tim::trait::runtime_enabled<comp::cpu_roofline_dp_flops>::set(false);
tim::trait::runtime_enabled<comp::cpu_roofline_sp_flops>::set(false);
OMNITRACE_BASIC_VERBOSE(
0, "In order to enable PAPI support, run 'echo N | sudo tee "
"/proc/sys/kernel/perf_event_paranoid' where N is <= 2\n");
trait::runtime_enabled<comp::papi_config>::set(false);
trait::runtime_enabled<comp::papi_common<void>>::set(false);
trait::runtime_enabled<comp::papi_array_t>::set(false);
trait::runtime_enabled<comp::papi_vector>::set(false);
trait::runtime_enabled<comp::cpu_roofline_flops>::set(false);
trait::runtime_enabled<comp::cpu_roofline_dp_flops>::set(false);
trait::runtime_enabled<comp::cpu_roofline_sp_flops>::set(false);
_config->get_papi_events() = std::string{};
}
else
Expand Down Expand Up @@ -812,6 +820,14 @@ configure_settings(bool _init)
tim::set_env(std::string{ _dl_verbose->first }, _dl_verbose->second->as_string(),
0);

if(_config->get_papi_events().empty())
{
trait::runtime_enabled<comp::papi_config>::set(false);
trait::runtime_enabled<comp::papi_common<void>>::set(false);
trait::runtime_enabled<comp::papi_array_t>::set(false);
trait::runtime_enabled<comp::papi_vector>::set(false);
}

configure_mode_settings();
configure_signal_handler();
configure_disabled_settings();
Expand Down Expand Up @@ -1886,6 +1902,20 @@ get_trace_thread_spin_locks()
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
}

bool
get_trace_thread_barriers()
{
static auto _v = get_config()->find("OMNITRACE_TRACE_THREAD_BARRIERS");
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
}

bool
get_trace_thread_join()
{
static auto _v = get_config()->find("OMNITRACE_TRACE_THREAD_JOIN");
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
}

bool
get_debug_tid()
{
Expand Down
6 changes: 6 additions & 0 deletions source/lib/omnitrace/library/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,12 @@ get_trace_thread_rwlocks();
bool
get_trace_thread_spin_locks();

bool
get_trace_thread_barriers();

bool
get_trace_thread_join();

std::string
get_rocm_events();
} // namespace config
Expand Down