From 14b9ea36a1d92d3067507c32f6311df40651e1df Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Thu, 20 Jun 2024 17:47:31 -0500 Subject: [PATCH] Workflow, submodules, and thread info Updates (#352) * Update CI workflows - use node20 workflow packages * Update tests/source/CMakeLists.txt - Use OMNITRACE_TRACE and OMNTRACE_PROFILE instead of perfetto/timemory * Update timemory submodule - argparse: requires -> required - parse callbacks * Update thread_info.cpp - fix causal::delay::get_local usage * Update timemory submodule * Update kokkos submodule - release 3.7.02 * Revert opensuse.yml and ubuntu-bionic.yml to use node16 workflows * Update docs.yml --- .github/workflows/docs.yml | 4 ++-- examples/lulesh/external/kokkos | 2 +- external/timemory | 2 +- source/bin/omnitrace-sample/impl.cpp | 8 ++++---- source/lib/core/argparse.cpp | 8 ++++---- source/lib/omnitrace/library/thread_info.cpp | 14 +++++++++++--- tests/source/CMakeLists.txt | 2 +- 7 files changed, 24 insertions(+), 16 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 3731ea82d..c5af45159 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -49,7 +49,7 @@ jobs: conda activate omnitrace-docs ./update-docs.sh - name: Upload artifact - uses: actions/upload-pages-artifact@v1 + uses: actions/upload-pages-artifact@v3 with: path: ./docs @@ -67,4 +67,4 @@ jobs: steps: - name: Deploy to GitHub Pages id: deployment - uses: actions/deploy-pages@v1 + uses: actions/deploy-pages@v4 diff --git a/examples/lulesh/external/kokkos b/examples/lulesh/external/kokkos index 698a67731..1a0c2ff6d 160000 --- a/examples/lulesh/external/kokkos +++ b/examples/lulesh/external/kokkos @@ -1 +1 @@ -Subproject commit 698a67731a4c3b20ef4fcb728298176e49474033 +Subproject commit 1a0c2ff6daf1068c65529ec04c2c046177847869 diff --git a/external/timemory b/external/timemory index 2a1bcba0c..693746c15 160000 --- a/external/timemory +++ b/external/timemory @@ -1 +1 @@ -Subproject commit 2a1bcba0cad46efd4421c0c7a145e83b161fb934 +Subproject commit 693746c1566de377d8d089195e2df4020fb69976 diff --git a/source/bin/omnitrace-sample/impl.cpp b/source/bin/omnitrace-sample/impl.cpp index 47d38443c..ce5abc848 100644 --- a/source/bin/omnitrace-sample/impl.cpp +++ b/source/bin/omnitrace-sample/impl.cpp @@ -562,7 +562,7 @@ parse_args(int argc, char** argv, std::vector& _env) parser.add_argument({ "--profile-format" }, "Data formats for profiling results") .min_count(1) .max_count(3) - .requires({ "profile|flat-profile" }) + .required({ "profile|flat-profile" }) .choices({ "text", "json", "console" }) .action([&](parser_t& p) { auto _v = p.get>("profile"); @@ -624,7 +624,7 @@ parse_args(int argc, char** argv, std::vector& _env) .add_argument({ "--cpus" }, "CPU IDs for frequency sampling. Supports integers and/or ranges") .dtype("int or range") - .requires({ "host" }) + .required({ "host" }) .action([&](parser_t& p) { update_env( _env, "OMNITRACE_SAMPLING_CPUS", @@ -634,7 +634,7 @@ parse_args(int argc, char** argv, std::vector& _env) .add_argument({ "--gpus" }, "GPU IDs for SMI queries. Supports integers and/or ranges") .dtype("int or range") - .requires({ "device" }) + .required({ "device" }) .action([&](parser_t& p) { update_env( _env, "OMNITRACE_SAMPLING_GPUS", @@ -709,7 +709,7 @@ parse_args(int argc, char** argv, std::vector& _env) parser.add_argument({ "--realtime" }, _realtime_desc) .min_count(0) - .requires(std::move(_realtime_reqs)) + .required(std::move(_realtime_reqs)) .action([&](parser_t& p) { auto _v = p.get>("realtime"); update_env(_env, "OMNITRACE_SAMPLING_REALTIME", true); diff --git a/source/lib/core/argparse.cpp b/source/lib/core/argparse.cpp index b69a4c5de..c140aaa50 100644 --- a/source/lib/core/argparse.cpp +++ b/source/lib/core/argparse.cpp @@ -870,7 +870,7 @@ add_core_arguments(parser_t& _parser, parser_data& _data) .min_count(1) .max_count(3) .dtype("string") - .requires({ "profile|flat-profile" }) + .required({ "profile|flat-profile" }) .choices({ "text", "json", "console" }) .action([&](parser_t& p) { auto _v = p.get("profile-format"); @@ -976,7 +976,7 @@ add_core_arguments(parser_t& _parser, parser_data& _data) { "--cpus" }, "CPU IDs for frequency sampling. Supports integers and/or ranges") .dtype("int and/or range") - .requires({ "host" }) + .required({ "host" }) .action([&](parser_t& p) { update_env(_data, "OMNITRACE_SAMPLING_CPUS", join(array_config_t{ "," }, p.get("cpus"))); @@ -992,7 +992,7 @@ add_core_arguments(parser_t& _parser, parser_data& _data) .add_argument({ "--gpus" }, "GPU IDs for SMI queries. Supports integers and/or ranges") .dtype("int and/or range") - .requires({ "device" }) + .required({ "device" }) .action([&](parser_t& p) { update_env(_data, "OMNITRACE_SAMPLING_GPUS", join(array_config_t{ "," }, p.get("gpus"))); @@ -1117,7 +1117,7 @@ add_core_arguments(parser_t& _parser, parser_data& _data) _parser.add_argument({ "--sample-realtime" }, _realtime_desc) .min_count(0) .dtype("[freq] [delay] [tids...]") - .requires(std::move(_realtime_reqs)) + .required(std::move(_realtime_reqs)) .action([&](parser_t& p) { auto _v = p.get>("sample-realtime"); update_env(_data, "OMNITRACE_SAMPLING_REALTIME", true); diff --git a/source/lib/omnitrace/library/thread_info.cpp b/source/lib/omnitrace/library/thread_info.cpp index bca571653..01ebf95f1 100644 --- a/source/lib/omnitrace/library/thread_info.cpp +++ b/source/lib/omnitrace/library/thread_info.cpp @@ -35,6 +35,8 @@ #include #include +#include + namespace omnitrace { namespace @@ -105,8 +107,9 @@ init_index_data(int64_t _tid, bool _offset = false) return itr; } -const auto unknown_thread = std::optional{}; -int64_t peak_num_threads = max_supported_threads; +thread_local int64_t offset_causal_count = 0; +const auto unknown_thread = std::optional{}; +int64_t peak_num_threads = max_supported_threads; } // namespace std::string @@ -187,8 +190,13 @@ thread_info::init(bool _offset) _info = thread_info{}; _info->is_offset = threading::offset_this_id(); _info->index_data = init_index_data(_tid, _info->is_offset); - _info->causal_count = &causal::delay::get_local(); _info->lifetime.first = tim::get_clock_real_now(); + + const auto _sequent_tid = _info->index_data->sequent_value; + _info->causal_count = (!_info->is_offset && _sequent_tid < peak_num_threads) + ? &causal::delay::get_local(_sequent_tid) + : &offset_causal_count; + if(_info->is_offset) set_thread_state(ThreadState::Disabled); } diff --git a/tests/source/CMakeLists.txt b/tests/source/CMakeLists.txt index e28008475..ea9bae3c4 100644 --- a/tests/source/CMakeLists.txt +++ b/tests/source/CMakeLists.txt @@ -9,7 +9,7 @@ target_compile_definitions(thread-limit PRIVATE MAX_THREADS=${OMNITRACE_MAX_THRE target_link_libraries(thread-limit PRIVATE Threads::Threads tests-compile-options) set(_thread_limit_environment - "${_base_environment}" "OMNITRACE_USE_PERFETTO=ON" "OMNITRACE_USE_TIMEMORY=ON" + "${_base_environment}" "OMNITRACE_TRACE=ON" "OMNITRACE_PROFILE=ON" "OMNITRACE_COUT_OUTPUT=ON" "OMNITRACE_USE_SAMPLING=ON" "OMNITRACE_SAMPLING_FREQ=250" "OMNITRACE_VERBOSE=2" "OMNITRACE_TIMEMORY_COMPONENTS=wall_clock,peak_rss,page_rss")