diff --git a/VERSION b/VERSION index bd8bf882d..943f9cbc4 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.7.0 +1.7.1 diff --git a/external/timemory b/external/timemory index 46f25fbb4..ddc49db2e 160000 --- a/external/timemory +++ b/external/timemory @@ -1 +1 @@ -Subproject commit 46f25fbb467e11982a2f13b6408f4943d812c568 +Subproject commit ddc49db2ec0f0dc83ad9a0bfc6780be10d215704 diff --git a/source/docs/runtime.md b/source/docs/runtime.md index 71ce68185..789da0321 100644 --- a/source/docs/runtime.md +++ b/source/docs/runtime.md @@ -96,13 +96,17 @@ variable to be enabled (i.e., `OMNITRACE_USE_ROCPROFILER=ON`). Example configuration for hardware counters: ```console +# using papi identifiers +OMNITRACE_PAPI_EVENTS = PAPI_TOT_CYC PAPI_TOT_INS +# using perf identifiers +OMNITRACE_PAPI_EVENTS = perf::INSTRUCTIONS perf::CACHE-REFERENCES perf::CACHE-MISSES ``` #### OMNITRACE_PAPI_EVENTS In order to collect the majority of hardware counters via PAPI, you need to make sure the `/proc/sys/kernel/perf_event_paranoid` -has a value of less than 2. If you have sudo access, you can use the following command to modify the value: +has a value <= 2. If you have sudo access, you can use the following command to modify the value: ```shell echo 0 | sudo tee /proc/sys/kernel/perf_event_paranoid diff --git a/source/lib/omnitrace-dl/dl.cpp b/source/lib/omnitrace-dl/dl.cpp index d901b42cd..2cf0e61f2 100644 --- a/source/lib/omnitrace-dl/dl.cpp +++ b/source/lib/omnitrace-dl/dl.cpp @@ -98,9 +98,32 @@ get_omnitrace_dl_env() inline bool get_omnitrace_preload() { - auto&& _preload = get_env("OMNITRACE_PRELOAD", true); + static bool _v = []() { + auto&& _preload = get_env("OMNITRACE_PRELOAD", true); + auto&& _preload_libs = get_env("LD_PRELOAD", std::string{}); + return (_preload && + _preload_libs.find("libomnitrace-dl.so") != std::string::npos); + }(); + return _v; +} + +inline void +reset_omnitrace_preload() +{ auto&& _preload_libs = get_env("LD_PRELOAD", std::string{}); - return (_preload && _preload_libs.find("libomnitrace-dl.so") != std::string::npos); + if(_preload_libs.find("libomnitrace-dl.so") != std::string::npos) + { + auto _modified_preload = std::string{}; + for(const auto& itr : delimit(_preload_libs, ":")) + { + if(itr.find("libomnitrace") != std::string::npos) continue; + _modified_preload += common::join("", ":", itr); + } + if(!_modified_preload.empty() && _modified_preload.find(':') == 0) + _modified_preload = _modified_preload.substr(1); + + setenv("LD_PRELOAD", _modified_preload.c_str(), 1); + } } // environment priority: @@ -940,6 +963,7 @@ omnitrace_preload() if(_preload) { + reset_omnitrace_preload(); omnitrace_preinit_library(); OMNITRACE_DL_LOG(1, "[%s] invoking %s(%s)\n", __FUNCTION__, "omnitrace_init", ::omnitrace::join(::omnitrace::QuoteStrings{}, ", ", "sampling", diff --git a/source/lib/omnitrace/library/components/pthread_mutex_gotcha.cpp b/source/lib/omnitrace/library/components/pthread_mutex_gotcha.cpp index 430537234..f0b3aeeda 100644 --- a/source/lib/omnitrace/library/components/pthread_mutex_gotcha.cpp +++ b/source/lib/omnitrace/library/components/pthread_mutex_gotcha.cpp @@ -71,6 +71,8 @@ pthread_mutex_gotcha::get_hashes() for(size_t i = 9; i < 12; ++i) _skip.emplace(i); } + if(!config::get_trace_thread_barriers()) _skip.emplace(8); + if(!config::get_trace_thread_join()) _skip.emplace(12); for(size_t i = 0; i < gotcha_capacity; ++i) { auto&& _id = _data.at(i).tool_id; @@ -132,8 +134,12 @@ pthread_mutex_gotcha::configure() "pthread_rwlock_unlock" }); } - pthread_mutex_gotcha_t::configure( - comp::gotcha_config<8, int, pthread_barrier_t*>{ "pthread_barrier_wait" }); + if(config::get_trace_thread_barriers()) + { + pthread_mutex_gotcha_t::configure( + comp::gotcha_config<8, int, pthread_barrier_t*>{ + "pthread_barrier_wait" }); + } if(config::get_trace_thread_spin_locks()) { @@ -149,8 +155,11 @@ pthread_mutex_gotcha::configure() "pthread_spin_unlock" }); } - pthread_mutex_gotcha_t::configure( - comp::gotcha_config<12, int, pthread_t, void**>{ "pthread_join" }); + if(config::get_trace_thread_join()) + { + pthread_mutex_gotcha_t::configure( + comp::gotcha_config<12, int, pthread_t, void**>{ "pthread_join" }); + } }; } diff --git a/source/lib/omnitrace/library/config.cpp b/source/lib/omnitrace/library/config.cpp index 6ddd36601..93bde5c7d 100644 --- a/source/lib/omnitrace/library/config.cpp +++ b/source/lib/omnitrace/library/config.cpp @@ -459,6 +459,14 @@ configure_settings(bool _init) "cause deadlocks with MPI distributions.", false, "backend", "parallelism", "gotcha", "advanced"); + OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_TRACE_THREAD_BARRIERS", + "Enable tracing calls to pthread_barrier functions.", true, + "backend", "parallelism", "gotcha", "advanced"); + + OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_TRACE_THREAD_JOIN", + "Enable tracing calls to pthread_join functions.", true, + "backend", "parallelism", "gotcha", "advanced"); + OMNITRACE_CONFIG_SETTING( bool, "OMNITRACE_SAMPLING_KEEP_INTERNAL", "Configure whether the statistical samples should include call-stack entries " @@ -601,7 +609,6 @@ configure_settings(bool _init) _config->get_max_thread_bookmarks() = 1; _config->get_timing_units() = "sec"; _config->get_memory_units() = "MB"; - _config->get_papi_events() = "PAPI_TOT_CYC"; // settings native to timemory but critically and/or extensively used by omnitrace auto _add_omnitrace_category = [&_config](auto itr) { @@ -685,21 +692,22 @@ configure_settings(bool _init) if(_fparanoid) _fparanoid >> _paranoid; } - if(_paranoid > 1) + if(_paranoid > 2) { OMNITRACE_BASIC_VERBOSE(0, "/proc/sys/kernel/perf_event_paranoid has a value of %i. " - "Disabling PAPI (requires a value <= 1)...\n", + "Disabling PAPI (requires a value <= 2)...\n", _paranoid); - OMNITRACE_BASIC_VERBOSE(0, - "In order to enable PAPI support, run 'echo N | sudo tee " - "/proc/sys/kernel/perf_event_paranoid' where N is < 2\n"); - tim::trait::runtime_enabled>::set(false); - tim::trait::runtime_enabled::set(false); - tim::trait::runtime_enabled::set(false); - tim::trait::runtime_enabled::set(false); - tim::trait::runtime_enabled::set(false); - tim::trait::runtime_enabled::set(false); + OMNITRACE_BASIC_VERBOSE( + 0, "In order to enable PAPI support, run 'echo N | sudo tee " + "/proc/sys/kernel/perf_event_paranoid' where N is <= 2\n"); + trait::runtime_enabled::set(false); + trait::runtime_enabled>::set(false); + trait::runtime_enabled::set(false); + trait::runtime_enabled::set(false); + trait::runtime_enabled::set(false); + trait::runtime_enabled::set(false); + trait::runtime_enabled::set(false); _config->get_papi_events() = std::string{}; } else @@ -812,6 +820,14 @@ configure_settings(bool _init) tim::set_env(std::string{ _dl_verbose->first }, _dl_verbose->second->as_string(), 0); + if(_config->get_papi_events().empty()) + { + trait::runtime_enabled::set(false); + trait::runtime_enabled>::set(false); + trait::runtime_enabled::set(false); + trait::runtime_enabled::set(false); + } + configure_mode_settings(); configure_signal_handler(); configure_disabled_settings(); @@ -1886,6 +1902,20 @@ get_trace_thread_spin_locks() return static_cast&>(*_v->second).get(); } +bool +get_trace_thread_barriers() +{ + static auto _v = get_config()->find("OMNITRACE_TRACE_THREAD_BARRIERS"); + return static_cast&>(*_v->second).get(); +} + +bool +get_trace_thread_join() +{ + static auto _v = get_config()->find("OMNITRACE_TRACE_THREAD_JOIN"); + return static_cast&>(*_v->second).get(); +} + bool get_debug_tid() { diff --git a/source/lib/omnitrace/library/config.hpp b/source/lib/omnitrace/library/config.hpp index 2c32c7f0c..d5859627c 100644 --- a/source/lib/omnitrace/library/config.hpp +++ b/source/lib/omnitrace/library/config.hpp @@ -322,6 +322,12 @@ get_trace_thread_rwlocks(); bool get_trace_thread_spin_locks(); +bool +get_trace_thread_barriers(); + +bool +get_trace_thread_join(); + std::string get_rocm_events(); } // namespace config