diff --git a/source/lib/omnitrace/library/components/category_region.hpp b/source/lib/omnitrace/library/components/category_region.hpp index eb9505c5a..d12529e50 100644 --- a/source/lib/omnitrace/library/components/category_region.hpp +++ b/source/lib/omnitrace/library/components/category_region.hpp @@ -28,8 +28,24 @@ #include "library/tracing.hpp" #include +#include +#include #include +#include + +namespace tim +{ +namespace quirk +{ +struct perfetto : concepts::quirk_type +{}; + +struct timemory : concepts::quirk_type +{}; +} // namespace quirk +} // namespace tim + namespace omnitrace { namespace audit = ::tim::audit; @@ -47,23 +63,32 @@ struct category_region : comp::base, void> static std::string label() { return JOIN('_', "omnitrace", category_name, "region"); } - template - static void start(const char* name, Args&&...); + template + static void start(std::string_view name, Args&&...); - template - static void stop(const char* name, Args&&...); + template + static void stop(std::string_view name, Args&&...); - template + template static void audit(const gotcha_data_t&, audit::incoming, Args&&...); - template + template static void audit(const gotcha_data_t&, audit::outgoing, Args&&...); + + template + static void audit(std::string_view, audit::incoming, Args&&...); + + template + static void audit(std::string_view, audit::outgoing, Args&&...); + + template + static void audit(quirk::config, Args&&...); }; template -template +template void -category_region::start(const char* name, Args&&... args) +category_region::start(std::string_view name, Args&&... args) { OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); @@ -72,12 +97,12 @@ category_region::start(const char* name, Args&&... args) { OMNITRACE_CONDITIONAL_BASIC_PRINT( tracing::debug_user, "omnitrace_push_region(%s) called during finalization\n", - name); + name.data()); return; } OMNITRACE_CONDITIONAL_BASIC_PRINT(tracing::debug_push, "omnitrace_push_region(%s)\n", - name); + name.data()); // the expectation here is that if the state is not active then the call // to omnitrace_init_tooling_hidden will activate all the appropriate @@ -86,78 +111,140 @@ category_region::start(const char* name, Args&&... args) { static auto _debug = get_debug_env() || get_debug_init(); OMNITRACE_CONDITIONAL_BASIC_PRINT( - _debug, "omnitrace_push_region(%s) ignored :: not active. state = %s\n", name, - std::to_string(get_state()).c_str()); + _debug, "omnitrace_push_region(%s) ignored :: not active. state = %s\n", + name.data(), std::to_string(get_state()).c_str()); return; } - OMNITRACE_DEBUG("[%s] omnitrace_push_region(%s)\n", category_name, name); + OMNITRACE_DEBUG("[%s] omnitrace_push_region(%s)\n", category_name, name.data()); auto _use_timemory = get_use_timemory(); auto _use_perfetto = get_use_perfetto(); + constexpr bool _ct_use_timemory = + (sizeof...(OptsT) == 0 || + tim::is_one_of>::value); + + constexpr bool _ct_use_perfetto = + (sizeof...(OptsT) == 0 || + tim::is_one_of>::value); + if(_use_timemory || _use_perfetto) tracing::thread_init(); + if(_use_perfetto) { - tracing::push_perfetto(CategoryT{}, name, std::forward(args)...); + if constexpr(_ct_use_perfetto) + { + tracing::push_perfetto(CategoryT{}, name.data(), std::forward(args)...); + } } if(_use_timemory) { - tracing::push_timemory(name, std::forward(args)...); + if constexpr(_ct_use_timemory) + { + tracing::push_timemory(name.data(), std::forward(args)...); + } } if(_use_timemory || _use_perfetto) tracing::thread_init_sampling(); } template -template +template void -category_region::stop(const char* name, Args&&... args) +category_region::stop(std::string_view name, Args&&... args) { OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); + constexpr bool _ct_use_timemory = + (sizeof...(OptsT) == 0 || + tim::is_one_of>::value); + + constexpr bool _ct_use_perfetto = + (sizeof...(OptsT) == 0 || + tim::is_one_of>::value); + // only execute when active if(get_state() == State::Active) { OMNITRACE_CONDITIONAL_PRINT(tracing::debug_pop || get_debug(), - "omnitrace_pop_region(%s)\n", name); + "omnitrace_pop_region(%s)\n", name.data()); if(get_use_timemory()) { - tracing::pop_timemory(name, std::forward(args)...); + if constexpr(_ct_use_timemory) + { + tracing::pop_timemory(name.data(), std::forward(args)...); + } } if(get_use_perfetto()) { - tracing::pop_perfetto(CategoryT{}, name, std::forward(args)...); + if constexpr(_ct_use_perfetto) + { + tracing::pop_perfetto(CategoryT{}, name.data(), + std::forward(args)...); + } } } else { static auto _debug = get_debug_env(); OMNITRACE_CONDITIONAL_BASIC_PRINT( - _debug, "omnitrace_pop_region(%s) ignored :: state = %s\n", name, + _debug, "omnitrace_pop_region(%s) ignored :: state = %s\n", name.data(), std::to_string(get_state()).c_str()); } } template -template +template void category_region::audit(const gotcha_data_t& _data, audit::incoming, Args&&... _args) { OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); - start(_data.tool_id.c_str(), "args", - JOIN(", ", - JOIN('=', tim::try_demangle>(), _args)...)); + start( + _data.tool_id.c_str(), "args", + JOIN(", ", + JOIN('=', tim::try_demangle>(), _args)...)); } template -template +template void category_region::audit(const gotcha_data_t& _data, audit::outgoing, Args&&... _args) { OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); - stop(_data.tool_id.c_str(), "return", JOIN(", ", _args...)); + stop(_data.tool_id.c_str(), "return", JOIN(", ", _args...)); +} + +template +template +void +category_region::audit(std::string_view _name, audit::incoming, + Args&&... _args) +{ + OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); + start( + _name.data(), "args", + JOIN(", ", + JOIN('=', tim::try_demangle>(), _args)...)); +} + +template +template +void +category_region::audit(std::string_view _name, audit::outgoing, + Args&&... _args) +{ + OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); + stop(_name.data(), "return", JOIN(", ", _args...)); +} + +template +template +void +category_region::audit(quirk::config, Args&&... _args) +{ + audit(std::forward(_args)...); } } // namespace component } // namespace omnitrace diff --git a/source/lib/omnitrace/library/components/fwd.hpp b/source/lib/omnitrace/library/components/fwd.hpp index c21014fa1..d8e6abd5d 100644 --- a/source/lib/omnitrace/library/components/fwd.hpp +++ b/source/lib/omnitrace/library/components/fwd.hpp @@ -70,6 +70,7 @@ TIMEMORY_DEFINE_NS_API(category, rocm_hip) TIMEMORY_DEFINE_NS_API(category, rocm_hsa) TIMEMORY_DEFINE_NS_API(category, rocm_smi) TIMEMORY_DEFINE_NS_API(category, rocm_roctx) +TIMEMORY_DEFINE_NS_API(category, pthread) TIMEMORY_DEFINE_NS_API(category, kokkos) TIMEMORY_DEFINE_NS_API(category, mpi) TIMEMORY_DEFINE_NS_API(category, ompt) @@ -88,6 +89,7 @@ TIMEMORY_DEFINE_NAME_TRAIT("rocm_smi", category::rocm_smi); TIMEMORY_DEFINE_NAME_TRAIT("rocm_roctx", category::rocm_roctx); TIMEMORY_DEFINE_NAME_TRAIT("sampling", category::sampling); TIMEMORY_DEFINE_NAME_TRAIT("thread_sampling", category::thread_sampling); +TIMEMORY_DEFINE_NAME_TRAIT("pthread", category::pthread); TIMEMORY_DEFINE_NAME_TRAIT("kokkos", category::kokkos); TIMEMORY_DEFINE_NAME_TRAIT("mpi", category::mpi); TIMEMORY_DEFINE_NAME_TRAIT("ompt", category::ompt); diff --git a/source/lib/omnitrace/library/components/pthread_create_gotcha.cpp b/source/lib/omnitrace/library/components/pthread_create_gotcha.cpp index 40d0c28e6..0a1324deb 100644 --- a/source/lib/omnitrace/library/components/pthread_create_gotcha.cpp +++ b/source/lib/omnitrace/library/components/pthread_create_gotcha.cpp @@ -21,6 +21,7 @@ // SOFTWARE. #include "library/components/pthread_create_gotcha.hpp" +#include "library/components/category_region.hpp" #include "library/components/omnitrace.hpp" #include "library/components/pthread_gotcha.hpp" #include "library/components/roctracer.hpp" @@ -53,6 +54,8 @@ namespace mpl = tim::mpl; using bundle_t = tim::lightweight_tuple; using wall_pw_t = mpl::piecewise_select; // only wall-clock using main_pw_t = mpl::piecewise_ignore; // exclude wall-clock +using category_region_t = + tim::lightweight_tuple>; namespace { @@ -67,29 +70,63 @@ auto bundles_dtor = scope::destructor{ []() { bundles_mutex = nullptr; } }; +template inline void -start_bundle(bundle_t& _bundle) +start_bundle(bundle_t& _bundle, Args&&... _args) { - if(!get_use_timemory()) return; + if(!get_use_timemory() && !get_use_perfetto()) return; OMNITRACE_BASIC_VERBOSE_F(3, "starting bundle '%s'...\n", _bundle.key().c_str()); - _bundle.push(); - _bundle.start(); + if constexpr(sizeof...(Args) > 0) + { + const char* _name = nullptr; + if(tim::get_hash_identifier(_bundle.hash(), _name) && _name != nullptr) + { + category_region_t{}.audit(quirk::config{}, + std::string_view{ _name }, _args...); + } + } + else + { + tim::consume_parameters(_args...); + } + if(get_use_timemory()) + { + _bundle.push(); + _bundle.start(); + } } +template inline void -stop_bundle(bundle_t& _bundle, int64_t _tid) +stop_bundle(bundle_t& _bundle, int64_t _tid, Args&&... _args) { - if(!get_use_timemory()) return; + if(!get_use_timemory() && !get_use_perfetto()) return; OMNITRACE_BASIC_VERBOSE_F(3, "stopping bundle '%s' in thread %li...\n", _bundle.key().c_str(), _tid); - _bundle.stop(wall_pw_t{}); // stop wall-clock so we can get the value - // update roctracer_data - _bundle.store(std::plus{}, - _bundle.get()->get() * units::sec); - // stop all other components including roctracer_data after update - _bundle.stop(main_pw_t{}); - // exclude popping wall-clock - _bundle.pop(_tid); + if(get_use_timemory()) + { + _bundle.stop(wall_pw_t{}); // stop wall-clock so we can get the value + // update roctracer_data + _bundle.store(std::plus{}, + _bundle.get()->get() * units::sec); + // stop all other components including roctracer_data after update + _bundle.stop(main_pw_t{}); + // exclude popping wall-clock + _bundle.pop(_tid); + } + if constexpr(sizeof...(Args) > 0) + { + const char* _name = nullptr; + if(tim::get_hash_identifier(_bundle.hash(), _name) && _name != nullptr) + { + category_region_t{}.audit(quirk::config{}, + std::string_view{ _name }, _args...); + } + } + else + { + tim::consume_parameters(_args...); + } } } // namespace @@ -117,14 +154,15 @@ pthread_create_gotcha::wrapper::operator()() const return m_routine(m_arg); } - set_thread_state(ThreadState::Internal); + push_thread_state(omnitrace::ThreadState::Internal); int64_t _tid = -1; + void* _ret = nullptr; auto _is_sampling = false; auto _bundle = std::shared_ptr{}; auto _signals = std::set{}; auto _coverage = (get_mode() == omnitrace::Mode::Coverage); - auto _dtor = scope::destructor{ [&]() { + auto _dtor = [&]() { set_thread_state(ThreadState::Internal); if(_is_sampling) { @@ -134,15 +172,18 @@ pthread_create_gotcha::wrapper::operator()() const if(_tid >= 0) { + auto _active = (get_state() == omnitrace::State::Active && + bundles != nullptr && bundles_mutex != nullptr); + if(!_active) return; get_execution_time(_tid)->second = comp::wall_clock::record(); auto& _thr_bundle = thread_bundle_data_t::instance(); if(_thr_bundle && _thr_bundle->get() && _thr_bundle->get()->get_is_running()) _thr_bundle->stop(); + if(_bundle) stop_bundle(*_bundle, _tid); pthread_create_gotcha::shutdown(_tid); } - set_thread_state(ThreadState::Completed); - } }; + }; auto _active = (get_state() == omnitrace::State::Active && bundles != nullptr && bundles_mutex != nullptr); @@ -180,11 +221,25 @@ pthread_create_gotcha::wrapper::operator()() const } } + // notify the wrapper that all internal work is completed if(m_promise) m_promise->set_value(); - set_thread_state(ThreadState::Enabled); + // Internal -> Enabled + pop_thread_state(); + + push_thread_state(omnitrace::ThreadState::Enabled); + // execute the original function - return m_routine(m_arg); + _ret = m_routine(m_arg); + + pop_thread_state(); + + // execute the destructor actions + _dtor(); + + set_thread_state(omnitrace::ThreadState::Completed); + + return _ret; } void* @@ -269,6 +324,7 @@ int pthread_create_gotcha::operator()(pthread_t* thread, const pthread_attr_t* attr, void* (*start_routine)(void*), void* arg) const { + auto _initial_thread_state = get_thread_state(); OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); bundle_t _bundle{ "pthread_create" }; auto _enable_sampling = pthread_gotcha::sampling_enabled_on_child_threads(); @@ -288,8 +344,14 @@ pthread_create_gotcha::operator()(pthread_t* thread, const pthread_attr_t* attr, if(!get_use_sampling() || !_enable_sampling) { auto* _obj = new wrapper(start_routine, arg, _enable_sampling, _tid, nullptr); + if(_active && !_coverage && _enable_sampling && + _initial_thread_state == ThreadState::Enabled) + start_bundle(_bundle, audit::incoming{}, thread, attr, start_routine, arg); // create the thread auto _ret = (*m_wrappee)(thread, attr, &wrapper::wrap, static_cast(_obj)); + if(_active && !_coverage && _enable_sampling && + _initial_thread_state == ThreadState::Enabled) + stop_bundle(_bundle, _tid, audit::outgoing{}, _ret); return _ret; } @@ -298,7 +360,7 @@ pthread_create_gotcha::operator()(pthread_t* thread, const pthread_attr_t* attr, auto _blocked_signals = get_sampling_signals(); tim::sampling::block_signals(_blocked_signals, tim::sampling::sigmask_scope::process); - start_bundle(_bundle); + start_bundle(_bundle, audit::incoming{}, thread, attr, start_routine, arg); // promise set by thread when signal handler is configured auto _promise = std::promise{}; @@ -312,7 +374,7 @@ pthread_create_gotcha::operator()(pthread_t* thread, const pthread_attr_t* attr, OMNITRACE_DEBUG("waiting for child to signal it is setup...\n"); _fut.wait(); - stop_bundle(_bundle, threading::get_id()); + stop_bundle(_bundle, threading::get_id(), audit::outgoing{}, _ret); // unblock the signals in the entire process OMNITRACE_DEBUG("unblocking signals...\n"); diff --git a/source/lib/omnitrace/library/components/pthread_mutex_gotcha.cpp b/source/lib/omnitrace/library/components/pthread_mutex_gotcha.cpp index 6c87ee3de..1c033392c 100644 --- a/source/lib/omnitrace/library/components/pthread_mutex_gotcha.cpp +++ b/source/lib/omnitrace/library/components/pthread_mutex_gotcha.cpp @@ -22,6 +22,7 @@ #include "library/components/pthread_mutex_gotcha.hpp" #include "library.hpp" +#include "library/components/category_region.hpp" #include "library/components/pthread_gotcha.hpp" #include "library/config.hpp" #include "library/critical_trace.hpp" @@ -180,6 +181,8 @@ auto pthread_mutex_gotcha::operator()(uintptr_t&& _id, const comp::gotcha_data& _data, int (*_callee)(Args...), Args... _args) const { + using bundle_t = omnitrace::component::category_region; + if(is_disabled()) { if(!_callee) @@ -203,9 +206,9 @@ pthread_mutex_gotcha::operator()(uintptr_t&& _id, const comp::gotcha_data& _data _ts = comp::wall_clock::record(); } - omnitrace_push_region(_data.tool_id.c_str()); + bundle_t::audit(_data, audit::incoming{}, _args...); auto _ret = (*_callee)(_args...); - omnitrace_pop_region(_data.tool_id.c_str()); + bundle_t::audit(_data, audit::outgoing{}, _ret); if(_id < std::numeric_limits::max() && get_use_critical_trace()) { diff --git a/source/lib/omnitrace/library/perfetto.hpp b/source/lib/omnitrace/library/perfetto.hpp index 34db2c8f5..c237c3df8 100644 --- a/source/lib/omnitrace/library/perfetto.hpp +++ b/source/lib/omnitrace/library/perfetto.hpp @@ -79,8 +79,9 @@ perfetto::Category("process_kernel_cpu_time") \ .SetDescription("CPU time of functions executing in kernel-space in " \ "process in seconds (collected in background thread)"), \ - perfetto::Category("mpi").SetDescription("MPI regions"), \ + perfetto::Category("pthread").SetDescription("Pthread functions"), \ perfetto::Category("kokkos").SetDescription("Kokkos regions"), \ + perfetto::Category("mpi").SetDescription("MPI regions"), \ perfetto::Category("ompt").SetDescription("OpenMP Tools regions"), \ perfetto::Category("critical-trace").SetDescription("Combined critical traces"), \ perfetto::Category("host-critical-trace") \ diff --git a/source/lib/omnitrace/library/process_sampler.cpp b/source/lib/omnitrace/library/process_sampler.cpp index 0d1e5a227..b1af4d622 100644 --- a/source/lib/omnitrace/library/process_sampler.cpp +++ b/source/lib/omnitrace/library/process_sampler.cpp @@ -26,6 +26,7 @@ #include "library/config.hpp" #include "library/cpu_freq.hpp" #include "library/debug.hpp" +#include "library/runtime.hpp" #include #include @@ -73,7 +74,8 @@ get_sampler_is_sampling() void sampler::poll(std::atomic* _state, nsec_t _interval, promise_t* _ready) { - set_thread_state(ThreadState::Internal); + OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); + threading::set_thread_name("omni.sampler"); // notify thread started