diff --git a/source/lib/omnitrace/library/components/backtrace.cpp b/source/lib/omnitrace/library/components/backtrace.cpp index 6a91c45a7..8445dd4d0 100644 --- a/source/lib/omnitrace/library/components/backtrace.cpp +++ b/source/lib/omnitrace/library/components/backtrace.cpp @@ -36,6 +36,8 @@ #include #include #include +#include +#include #include #include #include @@ -110,6 +112,9 @@ using papi_vector_instances = thread_data; namespace { +struct perfetto_rusage +{}; + unique_ptr_t& get_papi_vector(int64_t _tid) { @@ -270,9 +275,13 @@ backtrace::sample(int signum) m_tid = threading::get_id(); m_ts = clock_type::now(); m_thr_cpu_ts = tim::get_clock_thread_now(); - m_mem_peak = tim::get_peak_rss(RUSAGE_THREAD); - m_data = tim::get_unw_backtrace(); - auto* itr = m_data.begin(); + auto _cache = tim::rusage_cache{ RUSAGE_THREAD }; + m_mem_peak = _cache.get_peak_rss(); + m_ctx_swch = _cache.get_num_priority_context_switch() + + _cache.get_num_voluntary_context_switch(); + m_page_flt = _cache.get_num_major_page_faults() + _cache.get_num_minor_page_faults(); + m_data = tim::get_unw_backtrace(); + auto* itr = m_data.begin(); for(; itr != m_data.end(); ++itr, ++m_size) { if(strlen(*itr) == 0) break; @@ -299,16 +308,7 @@ backtrace::sample(int signum) if(tim::trait::runtime_enabled::get()) { assert(get_papi_vector(m_tid).get() != nullptr); - static thread_local auto& _pv = get_papi_vector(m_tid); - auto _hw_counter = _pv->record(); - auto _num_hw_counters = std::min(_hw_counter.size(), num_hw_counters); - for(size_t i = 0; i < _num_hw_counters; ++i) - { - auto& _last = get_last_hwcounters().at(i); - auto itr = _hw_counter.at(i); - m_hw_counter[i] = itr - _last; - _last = itr; - } + m_hw_counter = get_papi_vector(m_tid)->record(); } } } @@ -477,35 +477,51 @@ backtrace::post_process(int64_t _tid) : std::vector{}; auto _process_perfetto_counters = [&](const std::vector& _data) { - if(!perfetto_counter_track::exists(_tid)) + if(!perfetto_counter_track::exists(_tid)) { - auto _thrname = TIMEMORY_JOIN("", "[Thread ", _tid, "] "); - auto addendum = [&](const std::string& _v) { return _thrname + _v + " (S)"; }; - perfetto_counter_track::emplace( - _tid, addendum("Peak Memory Usage"), "MB"); + perfetto_counter_track::emplace( + _tid, JOIN("", "Peak Memory Usage", " [Thread ", _tid, "] (S)"), "MB"); + perfetto_counter_track::emplace( + _tid, JOIN("", "Context Switches", " [Thread ", _tid, "] (S)")); + perfetto_counter_track::emplace( + _tid, JOIN("", "Page Faults", " [Thread ", _tid, "] (S)")); } if(!perfetto_counter_track::exists(_tid) && tim::trait::runtime_enabled::get()) { - auto _thrname = TIMEMORY_JOIN("", "[Thread ", _tid, "] "); - auto addendum = [&](const std::string& _v) { return _thrname + _v + " (S)"; }; for(auto& itr : _hw_cnt_labels) { perfetto_counter_track::emplace( - _tid, addendum(tim::papi::get_event_info(itr).short_descr), ""); + _tid, + JOIN("", tim::papi::get_event_info(itr).short_descr, " [Thread ", + _tid, "] (S)"), + ""); } } + uint64_t _mean_ts = 0; + const backtrace* _last_bt = nullptr; for(const auto& ditr : _data) { const auto* _bt = ditr->get(); if(_bt->m_tid != _tid) continue; auto _ts = static_cast(_bt->m_ts.time_since_epoch().count()); + _last_bt = _bt; + _mean_ts += _ts; + + TRACE_COUNTER("sampling", + perfetto_counter_track::at(_tid, 0), _ts, + _bt->m_mem_peak / units::megabyte); - TRACE_COUNTER("sampling", perfetto_counter_track::at(_tid, 0), - _ts, _bt->m_mem_peak / units::megabyte); + TRACE_COUNTER("sampling", + perfetto_counter_track::at(_tid, 1), _ts, + _bt->m_ctx_swch); + + TRACE_COUNTER("sampling", + perfetto_counter_track::at(_tid, 2), _ts, + _bt->m_page_flt); if(tim::trait::runtime_enabled::get()) { @@ -521,6 +537,38 @@ backtrace::post_process(int64_t _tid) } } } + + if(_tid > 0 && _last_bt) + { + auto _ts = static_cast(_last_bt->m_ts.time_since_epoch().count()) + + (_mean_ts / _data.size()); + uint64_t _zero = 0; + TRACE_COUNTER("sampling", + perfetto_counter_track::at(_tid, 0), _ts, + _zero); + + TRACE_COUNTER("sampling", + perfetto_counter_track::at(_tid, 1), _ts, + _zero); + + TRACE_COUNTER("sampling", + perfetto_counter_track::at(_tid, 2), _ts, + _zero); + + if(tim::trait::runtime_enabled::get()) + { + for(size_t i = 0; i < perfetto_counter_track::size(_tid); + ++i) + { + if(i < _last_bt->m_hw_counter.size()) + { + TRACE_COUNTER("sampling", + perfetto_counter_track::at(_tid, i), + _ts, _zero); + } + } + } + } }; auto _process_perfetto = [&](const std::vector& _data, @@ -611,8 +659,7 @@ backtrace::post_process(int64_t _tid) if(get_timeline_sampling()) _scope += scope::timeline{}; if(get_flat_sampling()) _scope += scope::flat{}; - time_point_type _last_wall_ts = _init->get_timestamp(); - int64_t _last_cpu_ts = _init->get_thread_cpu_timestamp(); + backtrace* _last_bt = _init.get(); for(auto& ditr : _data) { using bundle_t = tim::lightweight_tupleget(); - if(_bt->m_ts < _last_wall_ts) continue; + if(_bt->m_ts < _last_bt->m_ts) continue; - double _elapsed_wc = (_bt->m_ts - _last_wall_ts).count(); - double _elapsed_cc = (_bt->m_thr_cpu_ts - _last_cpu_ts); + double _elapsed_wc = (_bt->m_ts - _last_bt->m_ts).count(); + double _elapsed_cc = (_bt->m_thr_cpu_ts - _last_bt->m_thr_cpu_ts); std::vector _tc{}; _tc.reserve(_bt->size()); @@ -670,17 +717,25 @@ backtrace::post_process(int64_t _tid) } if constexpr(tim::trait::is_available::value) { + auto _hw_cnt_vals = _bt->m_hw_counter; + if(_last_bt && _bt->m_hw_counter.size() == _last_bt->m_hw_counter.size()) + { + for(size_t k = 0; k < _bt->m_hw_counter.size(); ++k) + { + if(_last_bt->m_hw_counter[k] > _hw_cnt_vals[k]) + _hw_cnt_vals[k] -= _last_bt->m_hw_counter[k]; + } + } auto* _hw_counter = itr.get(); if(_hw_counter) { - _hw_counter->set_value(_bt->m_hw_counter); - _hw_counter->set_accum(_bt->m_hw_counter); + _hw_counter->set_value(_hw_cnt_vals); + _hw_counter->set_accum(_hw_cnt_vals); } } itr.pop(); } - _last_wall_ts = _bt->m_ts; - _last_cpu_ts = _bt->m_thr_cpu_ts; + _last_bt = _bt; } for(auto&& ditr : _data) diff --git a/source/lib/omnitrace/library/components/backtrace.hpp b/source/lib/omnitrace/library/components/backtrace.hpp index ac9f93a62..ba3955279 100644 --- a/source/lib/omnitrace/library/components/backtrace.hpp +++ b/source/lib/omnitrace/library/components/backtrace.hpp @@ -95,6 +95,8 @@ struct backtrace int64_t m_tid = 0; int64_t m_thr_cpu_ts = 0; int64_t m_mem_peak = 0; + int64_t m_ctx_swch = 0; + int64_t m_page_flt = 0; size_t m_size = 0; time_point_type m_ts = {}; data_t m_data = {}; diff --git a/source/lib/omnitrace/library/components/rocm_smi.cpp b/source/lib/omnitrace/library/components/rocm_smi.cpp index d1f085b4a..7bb0f1fb4 100644 --- a/source/lib/omnitrace/library/components/rocm_smi.cpp +++ b/source/lib/omnitrace/library/components/rocm_smi.cpp @@ -229,9 +229,8 @@ data::post_process(uint32_t _dev_id) if(itr.m_dev_id != _dev_id) continue; if(!counter_track::exists(_dev_id)) { - auto _devname = TIMEMORY_JOIN("", "[GPU ", _dev_id, "] "); auto addendum = [&](const char* _v) { - return _devname + std::string{ _v }; + return JOIN(" ", "GPU", _v, JOIN("", '[', _dev_id, ']'), "(S)"); }; counter_track::emplace(_dev_id, addendum("Busy"), "%"); counter_track::emplace(_dev_id, addendum("Temperature"), "deg C"); diff --git a/source/lib/omnitrace/library/cpu_freq.cpp b/source/lib/omnitrace/library/cpu_freq.cpp index dcd6c7ce1..cd2e84e4a 100644 --- a/source/lib/omnitrace/library/cpu_freq.cpp +++ b/source/lib/omnitrace/library/cpu_freq.cpp @@ -29,10 +29,14 @@ #include "library/timemory.hpp" #include +#include +#include #include +#include #include #include +#include #include #include #include @@ -43,6 +47,9 @@ namespace omnitrace { namespace cpu_freq { +template +using type_list = tim::type_list; + namespace { struct cpu_freq @@ -51,19 +58,34 @@ struct cpu_page {}; struct cpu_virt {}; -using cpu_data_tuple_t = std::tuple>; +struct cpu_context_switch +{}; +struct cpu_page_fault +{}; +struct cpu_user_mode_time +{}; +struct cpu_kernel_mode_time +{}; +using cpu_data_tuple_t = std::tuple>; std::set enabled_cpu_freqs = {}; std::deque cpu_data = {}; int64_t ncpu = threading::affinity::hw_concurrency(); + +template +void init_perfetto_counter_tracks(type_list) +{ + (perfetto_counter_track::init(), ...); +} } // namespace void setup() { - perfetto_counter_track::init(); - perfetto_counter_track::init(); - perfetto_counter_track::init(); + init_perfetto_counter_tracks( + type_list{}); } void @@ -166,14 +188,51 @@ sample() auto _ts = tim::get_clock_real_now(); + tim::rusage_cache _rcache{ RUSAGE_SELF }; + // user and kernel mode times are in microseconds cpu_data.emplace_back(_ts, tim::get_page_rss(), tim::get_virt_mem(), - std::move(_freqs)); + _rcache.get_num_priority_context_switch() + + _rcache.get_num_voluntary_context_switch(), + _rcache.get_num_major_page_faults() + + _rcache.get_num_minor_page_faults(), + _rcache.get_user_mode_time() * 1000, + _rcache.get_kernel_mode_time() * 1000, std::move(_freqs)); } void shutdown() {} +namespace +{ +template +void +config_perfetto_counter_tracks(type_list, std::array _labels, + std::array _units) +{ + auto _config = [&](auto _t) { + using type = std::decay_t; + using track = perfetto_counter_track; + constexpr auto _idx = tim::index_of>::value; + if(!track::exists(0)) + { + auto addendum = [&](const char* _v) { return JOIN(" ", "CPU", _v, "(S)"); }; + track::emplace(0, addendum(_labels.at(_idx)), _units.at(_idx)); + } + }; + + (_config(Types{}), ...); +} + +template +void +write_perfetto_counter_track(Args... _args) +{ + using track = perfetto_counter_track; + TRACE_COUNTER("sampling", track::at(0, 0), _args...); +} +} // namespace + void post_process() { @@ -183,48 +242,56 @@ post_process() using freq_track = perfetto_counter_track; if(!freq_track::exists(_idx)) { - auto _devname = TIMEMORY_JOIN("", "[CPU ", _idx, "] "); - auto addendum = [&](const char* _v) { return _devname + std::string{ _v }; }; - freq_track::emplace(_idx, addendum("Frequency (S)"), "MHz"); + auto addendum = [&](const char* _v) { + return JOIN(" ", "CPU", _v, JOIN("", '[', _idx, ']'), "(S)"); + }; + freq_track::emplace(_idx, addendum("Frequency"), "MHz"); } for(auto& itr : cpu_data) { uint64_t _ts = std::get<0>(itr); - double _freq = std::get<3>(itr).at(_offset); + double _freq = std::get<7>(itr).at(_offset); TRACE_COUNTER("sampling", freq_track::at(_idx, 0), _ts, _freq); } }; - auto _process_cpu_mem_usage = []() { - using page_track = perfetto_counter_track; - using virt_track = perfetto_counter_track; - - if(!page_track::exists(0)) - { - auto _devname = TIMEMORY_JOIN("", "[CPU] "); - auto addendum = [&](const char* _v) { return _devname + std::string{ _v }; }; - page_track::emplace(0, addendum("Memory Usage (S)"), "MB"); - } - - if(!virt_track::exists(0)) - { - auto _devname = TIMEMORY_JOIN("", "[CPU] "); - auto addendum = [&](const char* _v) { return _devname + std::string{ _v }; }; - virt_track::emplace(0, addendum("Virtual Memory Usage (S)"), "MB"); - } + auto _process_cpu_rusage = []() { + config_perfetto_counter_tracks( + type_list{}, + { "Memory Usage", "Virtual Memory Usage", "Context Switches", "Page Faults", + "User CPU Time", "Kernel CPU Time" }, + { "MB", "MB", "", "", "sec", "sec" }); + cpu_data_tuple_t* _last = nullptr; for(auto& itr : cpu_data) { uint64_t _ts = std::get<0>(itr); double _page = std::get<1>(itr); double _virt = std::get<2>(itr); - TRACE_COUNTER("sampling", page_track::at(0, 0), _ts, _page / units::megabyte); - TRACE_COUNTER("sampling", virt_track::at(0, 0), _ts, _virt / units::megabyte); + uint64_t _cntx = std::get<3>(itr); + uint64_t _flts = std::get<4>(itr); + double _user = std::get<5>(itr); + double _kern = std::get<6>(itr); + if(_last) + { + _cntx -= std::get<3>(*_last); + _flts -= std::get<4>(*_last); + _user -= std::get<5>(*_last); + _kern -= std::get<6>(*_last); + } + write_perfetto_counter_track(_ts, _page / units::megabyte); + write_perfetto_counter_track(_ts, _virt / units::megabyte); + write_perfetto_counter_track(_ts, _cntx); + write_perfetto_counter_track(_ts, _flts); + write_perfetto_counter_track(_ts, _user / units::sec); + write_perfetto_counter_track(_ts, _kern / units::sec); + _last = &itr; } }; - _process_cpu_mem_usage(); + _process_cpu_rusage(); for(auto itr = enabled_cpu_freqs.begin(); itr != enabled_cpu_freqs.end(); ++itr) { auto _idx = *itr;