Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rework sampling trace counter names + new trace counters #30

Merged
merged 2 commits into from
Jun 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 87 additions & 32 deletions source/lib/omnitrace/library/components/backtrace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
#include <timemory/components/papi/extern.hpp>
#include <timemory/components/papi/papi_array.hpp>
#include <timemory/components/papi/papi_vector.hpp>
#include <timemory/components/rusage/components.hpp>
#include <timemory/components/rusage/types.hpp>
#include <timemory/components/timing/backends.hpp>
#include <timemory/components/trip_count/extern.hpp>
#include <timemory/macros.hpp>
Expand Down Expand Up @@ -110,6 +112,9 @@ using papi_vector_instances = thread_data<hw_counters, api::sampling>;

namespace
{
struct perfetto_rusage
{};

unique_ptr_t<hw_counters>&
get_papi_vector(int64_t _tid)
{
Expand Down Expand Up @@ -270,9 +275,13 @@ backtrace::sample(int signum)
m_tid = threading::get_id();
m_ts = clock_type::now();
m_thr_cpu_ts = tim::get_clock_thread_now<int64_t, std::nano>();
m_mem_peak = tim::get_peak_rss(RUSAGE_THREAD);
m_data = tim::get_unw_backtrace<stack_depth, 4, false>();
auto* itr = m_data.begin();
auto _cache = tim::rusage_cache{ RUSAGE_THREAD };
m_mem_peak = _cache.get_peak_rss();
m_ctx_swch = _cache.get_num_priority_context_switch() +
_cache.get_num_voluntary_context_switch();
m_page_flt = _cache.get_num_major_page_faults() + _cache.get_num_minor_page_faults();
m_data = tim::get_unw_backtrace<stack_depth, 4, false>();
auto* itr = m_data.begin();
for(; itr != m_data.end(); ++itr, ++m_size)
{
if(strlen(*itr) == 0) break;
Expand All @@ -299,16 +308,7 @@ backtrace::sample(int signum)
if(tim::trait::runtime_enabled<hw_counters>::get())
{
assert(get_papi_vector(m_tid).get() != nullptr);
static thread_local auto& _pv = get_papi_vector(m_tid);
auto _hw_counter = _pv->record();
auto _num_hw_counters = std::min<size_t>(_hw_counter.size(), num_hw_counters);
for(size_t i = 0; i < _num_hw_counters; ++i)
{
auto& _last = get_last_hwcounters().at(i);
auto itr = _hw_counter.at(i);
m_hw_counter[i] = itr - _last;
_last = itr;
}
m_hw_counter = get_papi_vector(m_tid)->record();
}
}
}
Expand Down Expand Up @@ -477,35 +477,51 @@ backtrace::post_process(int64_t _tid)
: std::vector<std::string>{};

auto _process_perfetto_counters = [&](const std::vector<sampling::bundle_t*>& _data) {
if(!perfetto_counter_track<comp::peak_rss>::exists(_tid))
if(!perfetto_counter_track<perfetto_rusage>::exists(_tid))
{
auto _thrname = TIMEMORY_JOIN("", "[Thread ", _tid, "] ");
auto addendum = [&](const std::string& _v) { return _thrname + _v + " (S)"; };
perfetto_counter_track<comp::peak_rss>::emplace(
_tid, addendum("Peak Memory Usage"), "MB");
perfetto_counter_track<perfetto_rusage>::emplace(
_tid, JOIN("", "Peak Memory Usage", " [Thread ", _tid, "] (S)"), "MB");
perfetto_counter_track<perfetto_rusage>::emplace(
_tid, JOIN("", "Context Switches", " [Thread ", _tid, "] (S)"));
perfetto_counter_track<perfetto_rusage>::emplace(
_tid, JOIN("", "Page Faults", " [Thread ", _tid, "] (S)"));
}

if(!perfetto_counter_track<hw_counters>::exists(_tid) &&
tim::trait::runtime_enabled<hw_counters>::get())
{
auto _thrname = TIMEMORY_JOIN("", "[Thread ", _tid, "] ");
auto addendum = [&](const std::string& _v) { return _thrname + _v + " (S)"; };
for(auto& itr : _hw_cnt_labels)
{
perfetto_counter_track<hw_counters>::emplace(
_tid, addendum(tim::papi::get_event_info(itr).short_descr), "");
_tid,
JOIN("", tim::papi::get_event_info(itr).short_descr, " [Thread ",
_tid, "] (S)"),
"");
}
}

uint64_t _mean_ts = 0;
const backtrace* _last_bt = nullptr;
for(const auto& ditr : _data)
{
const auto* _bt = ditr->get<backtrace>();
if(_bt->m_tid != _tid) continue;

auto _ts = static_cast<uint64_t>(_bt->m_ts.time_since_epoch().count());
_last_bt = _bt;
_mean_ts += _ts;

TRACE_COUNTER("sampling",
perfetto_counter_track<perfetto_rusage>::at(_tid, 0), _ts,
_bt->m_mem_peak / units::megabyte);

TRACE_COUNTER("sampling", perfetto_counter_track<comp::peak_rss>::at(_tid, 0),
_ts, _bt->m_mem_peak / units::megabyte);
TRACE_COUNTER("sampling",
perfetto_counter_track<perfetto_rusage>::at(_tid, 1), _ts,
_bt->m_ctx_swch);

TRACE_COUNTER("sampling",
perfetto_counter_track<perfetto_rusage>::at(_tid, 2), _ts,
_bt->m_page_flt);

if(tim::trait::runtime_enabled<hw_counters>::get())
{
Expand All @@ -521,6 +537,38 @@ backtrace::post_process(int64_t _tid)
}
}
}

if(_tid > 0 && _last_bt)
{
auto _ts = static_cast<uint64_t>(_last_bt->m_ts.time_since_epoch().count()) +
(_mean_ts / _data.size());
uint64_t _zero = 0;
TRACE_COUNTER("sampling",
perfetto_counter_track<perfetto_rusage>::at(_tid, 0), _ts,
_zero);

TRACE_COUNTER("sampling",
perfetto_counter_track<perfetto_rusage>::at(_tid, 1), _ts,
_zero);

TRACE_COUNTER("sampling",
perfetto_counter_track<perfetto_rusage>::at(_tid, 2), _ts,
_zero);

if(tim::trait::runtime_enabled<hw_counters>::get())
{
for(size_t i = 0; i < perfetto_counter_track<hw_counters>::size(_tid);
++i)
{
if(i < _last_bt->m_hw_counter.size())
{
TRACE_COUNTER("sampling",
perfetto_counter_track<hw_counters>::at(_tid, i),
_ts, _zero);
}
}
}
}
};

auto _process_perfetto = [&](const std::vector<sampling::bundle_t*>& _data,
Expand Down Expand Up @@ -611,19 +659,18 @@ backtrace::post_process(int64_t _tid)
if(get_timeline_sampling()) _scope += scope::timeline{};
if(get_flat_sampling()) _scope += scope::flat{};

time_point_type _last_wall_ts = _init->get_timestamp();
int64_t _last_cpu_ts = _init->get_thread_cpu_timestamp();
backtrace* _last_bt = _init.get();
for(auto& ditr : _data)
{
using bundle_t = tim::lightweight_tuple<comp::trip_count, sampling_wall_clock,
sampling_cpu_clock, hw_counters>;

auto* _bt = ditr->get<backtrace>();

if(_bt->m_ts < _last_wall_ts) continue;
if(_bt->m_ts < _last_bt->m_ts) continue;

double _elapsed_wc = (_bt->m_ts - _last_wall_ts).count();
double _elapsed_cc = (_bt->m_thr_cpu_ts - _last_cpu_ts);
double _elapsed_wc = (_bt->m_ts - _last_bt->m_ts).count();
double _elapsed_cc = (_bt->m_thr_cpu_ts - _last_bt->m_thr_cpu_ts);

std::vector<bundle_t> _tc{};
_tc.reserve(_bt->size());
Expand Down Expand Up @@ -670,17 +717,25 @@ backtrace::post_process(int64_t _tid)
}
if constexpr(tim::trait::is_available<hw_counters>::value)
{
auto _hw_cnt_vals = _bt->m_hw_counter;
if(_last_bt && _bt->m_hw_counter.size() == _last_bt->m_hw_counter.size())
{
for(size_t k = 0; k < _bt->m_hw_counter.size(); ++k)
{
if(_last_bt->m_hw_counter[k] > _hw_cnt_vals[k])
_hw_cnt_vals[k] -= _last_bt->m_hw_counter[k];
}
}
auto* _hw_counter = itr.get<hw_counters>();
if(_hw_counter)
{
_hw_counter->set_value(_bt->m_hw_counter);
_hw_counter->set_accum(_bt->m_hw_counter);
_hw_counter->set_value(_hw_cnt_vals);
_hw_counter->set_accum(_hw_cnt_vals);
}
}
itr.pop();
}
_last_wall_ts = _bt->m_ts;
_last_cpu_ts = _bt->m_thr_cpu_ts;
_last_bt = _bt;
}

for(auto&& ditr : _data)
Expand Down
2 changes: 2 additions & 0 deletions source/lib/omnitrace/library/components/backtrace.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ struct backtrace
int64_t m_tid = 0;
int64_t m_thr_cpu_ts = 0;
int64_t m_mem_peak = 0;
int64_t m_ctx_swch = 0;
int64_t m_page_flt = 0;
size_t m_size = 0;
time_point_type m_ts = {};
data_t m_data = {};
Expand Down
3 changes: 1 addition & 2 deletions source/lib/omnitrace/library/components/rocm_smi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -229,9 +229,8 @@ data::post_process(uint32_t _dev_id)
if(itr.m_dev_id != _dev_id) continue;
if(!counter_track::exists(_dev_id))
{
auto _devname = TIMEMORY_JOIN("", "[GPU ", _dev_id, "] ");
auto addendum = [&](const char* _v) {
return _devname + std::string{ _v };
return JOIN(" ", "GPU", _v, JOIN("", '[', _dev_id, ']'), "(S)");
};
counter_track::emplace(_dev_id, addendum("Busy"), "%");
counter_track::emplace(_dev_id, addendum("Temperature"), "deg C");
Expand Down
Loading