From d15b2bf68bd3f0a3001f13c407a6372a6296aec8 Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Mon, 26 Sep 2022 23:26:31 -0500 Subject: [PATCH 01/15] omnitrace-run executable --- source/bin/CMakeLists.txt | 1 + source/bin/omnitrace-run/CMakeLists.txt | 22 ++ source/bin/omnitrace-run/impl.cpp | 301 ++++++++++++++++++ source/bin/omnitrace-run/omnitrace-run.cpp | 60 ++++ source/bin/omnitrace-run/omnitrace-run.hpp | 43 +++ source/lib/omnitrace-dl/dl.cpp | 12 +- source/lib/omnitrace-dl/dl.hpp | 2 + source/lib/omnitrace-dl/main.c | 5 + source/lib/omnitrace/library.cpp | 16 +- .../components/pthread_create_gotcha.cpp | 3 +- source/lib/omnitrace/library/config.cpp | 2 +- source/lib/omnitrace/library/debug.cpp | 4 +- tests/CMakeLists.txt | 16 +- 13 files changed, 475 insertions(+), 12 deletions(-) create mode 100644 source/bin/omnitrace-run/CMakeLists.txt create mode 100644 source/bin/omnitrace-run/impl.cpp create mode 100644 source/bin/omnitrace-run/omnitrace-run.cpp create mode 100644 source/bin/omnitrace-run/omnitrace-run.hpp diff --git a/source/bin/CMakeLists.txt b/source/bin/CMakeLists.txt index 0d64fcadb..630bf955d 100644 --- a/source/bin/CMakeLists.txt +++ b/source/bin/CMakeLists.txt @@ -16,6 +16,7 @@ endif() # executables add_subdirectory(omnitrace-avail) add_subdirectory(omnitrace-critical-trace) +add_subdirectory(omnitrace-run) add_subdirectory(omnitrace) if(OMNITRACE_BUILD_TESTING OR "$ENV{OMNITRACE_CI}" MATCHES "[1-9]+|ON|on|y|yes") diff --git a/source/bin/omnitrace-run/CMakeLists.txt b/source/bin/omnitrace-run/CMakeLists.txt new file mode 100644 index 000000000..e9cd8298b --- /dev/null +++ b/source/bin/omnitrace-run/CMakeLists.txt @@ -0,0 +1,22 @@ +# ------------------------------------------------------------------------------# +# +# omnitrace-run target +# +# ------------------------------------------------------------------------------# + +add_executable(omnitrace-run ${CMAKE_CURRENT_LIST_DIR}/omnitrace-run.cpp + ${CMAKE_CURRENT_LIST_DIR}/impl.cpp) + +target_include_directories(omnitrace-run PRIVATE ${CMAKE_CURRENT_LIST_DIR}) +target_link_libraries( + omnitrace-run + PRIVATE omnitrace::omnitrace-compile-definitions omnitrace::omnitrace-headers + omnitrace::omnitrace-common-library) +set_target_properties( + omnitrace-run PROPERTIES BUILD_RPATH "\$ORIGIN:\$ORIGIN/../${CMAKE_INSTALL_LIBDIR}" + INSTALL_RPATH "${OMNITRACE_EXE_INSTALL_RPATH}") + +install( + TARGETS omnitrace-run + DESTINATION ${CMAKE_INSTALL_BINDIR} + OPTIONAL) diff --git a/source/bin/omnitrace-run/impl.cpp b/source/bin/omnitrace-run/impl.cpp new file mode 100644 index 000000000..33a6af805 --- /dev/null +++ b/source/bin/omnitrace-run/impl.cpp @@ -0,0 +1,301 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "common/delimit.hpp" +#include "common/environment.hpp" +#include "common/join.hpp" +#include "common/setup.hpp" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace color = tim::log::color; +using tim::log::stream; +using namespace timemory::join; + +namespace +{ +int verbose = 0; +} + +std::string +get_command(const char* _argv0) +{ + return omnitrace::path::find_path(_argv0, 0, omnitrace::common::get_env("PATH", "")); +} + +void +print_command(const std::vector& _argv) +{ + if(verbose >= 1) + stream(std::cout, color::info()) + << "Executing '" << join(array_config{ " " }, _argv) << "'...\n"; +} + +std::vector +get_environment() +{ + std::vector _environ; + if(environ != nullptr) + { + int idx = 0; + while(environ[idx] != nullptr) + _environ.emplace_back(strdup(environ[idx++])); + } + + auto _exe = std::string_view{ realpath("/proc/self/exe", nullptr) }; + auto _pos = _exe.find_last_of('/'); + auto _dir = std::string{ "./" }; + if(_pos != std::string_view::npos) _dir = _exe.substr(0, _pos); + auto _lib = omnitrace::common::join("/", _dir, "..", "lib", "libomnitrace-dl.so"); + _environ.emplace_back( + strdup(omnitrace::common::join("=", "LD_PRELOAD", realpath(_lib.c_str(), nullptr)) + .c_str())); + + return _environ; +} + +template +void +update_env(std::vector& _environ, std::string_view _env_var, Tp&& _env_val) +{ + for(auto& itr : _environ) + { + if(!itr) continue; + if(std::string_view{ itr }.find(_env_var) == 0) + { + free(itr); + itr = strdup(omnitrace::common::join('=', _env_var, _env_val).c_str()); + return; + } + } + _environ.emplace_back( + strdup(omnitrace::common::join('=', _env_var, _env_val).c_str())); +} + +std::vector +parse_args(int argc, char** argv, std::vector& _env) +{ + using parser_t = tim::argparse::argument_parser; + using parser_err_t = typename parser_t::result_type; + + auto help_check = [](parser_t& p, int _argc, char** _argv) { + std::set help_args = { "-h", "--help", "-?" }; + return (p.exists("help") || _argc == 1 || + (_argc > 1 && help_args.find(_argv[1]) != help_args.end())); + }; + + auto _pec = EXIT_SUCCESS; + auto help_action = [&_pec, argc, argv](parser_t& p) { + if(_pec != EXIT_SUCCESS) + { + std::stringstream msg; + msg << "Error in command:"; + for(int i = 0; i < argc; ++i) + msg << " " << argv[i]; + msg << "\n\n"; + stream(std::cerr, color::fatal()) << msg.str(); + std::cerr << std::flush; + } + + p.print_help(); + exit(_pec); + }; + + auto parser = parser_t(argv[0]); + + parser.enable_help(); + parser.on_error([=, &_pec](parser_t& p, const parser_err_t& _err) { + stream(std::cerr, color::fatal()) << _err << "\n"; + _pec = EXIT_FAILURE; + help_action(p); + }); + + parser.add_argument() + .names({ "--debug" }) + .description("Debug output") + .max_count(1) + .action([&](parser_t& p) { + update_env(_env, "OMNITRACE_DEBUG", p.get("debug")); + }); + parser.add_argument() + .names({ "-v", "--verbose" }) + .description("Verbose output") + .count(1) + .action([&](parser_t& p) { + auto _v = p.get("verbose"); + verbose = _v; + update_env(_env, "OMNITRACE_VERBOSE", _v); + }); + parser.add_argument({ "-N", "--no-color" }, "Disable colorized output") + .max_count(1) + .dtype("bool") + .action([&](parser_t& p) { + auto _colorized = !p.get("no-color"); + update_env(_env, "OMNITRACE_COLORIZED_LOG", (_colorized) ? "1" : "0"); + update_env(_env, "COLORIZED_LOG", (_colorized) ? "1" : "0"); + }); + parser.add_argument() + .names({ "-c", "--config" }) + .description("Configuration file") + .min_count(1) + .action([&](parser_t& p) { + update_env( + _env, "OMNITRACE_CONFIG_FILE", + join(array_config{ ":" }, p.get>("config"))); + }); + parser + .add_argument({ "-d", "--delay" }, + "Set the delay before the sampler starts (seconds)") + .count(1) + .action([&](parser_t& p) { + update_env(_env, "OMNITRACE_SAMPLING_DELAY", p.get("delay")); + }); + parser + .add_argument({ "-f", "--freq" }, "Set the frequency of the sampler " + "(number of interrupts per second)") + .count(1) + .action([&](parser_t& p) { + update_env(_env, "OMNITRACE_SAMPLING_FREQ", p.get("freq")); + }); + parser + .add_argument({ "-D", "--duration" }, + "Set the duration of the sampling (seconds)") + .count(1) + .action([&](parser_t& p) { + update_env(_env, "OMNITRACE_SAMPLING_DURATION", p.get("duration")); + }); + parser + .add_argument( + { "-C", "--cpu-events" }, + "Set the hardware counter events to record (ref: `omnitrace-avail -H -c CPU)") + .action([&](parser_t& p) { + auto _events = + join(array_config{ "," }, p.get>("cpu-events")); + update_env(_env, "OMNITRACE_PAPI_EVENTS", _events); + }); + parser + .add_argument({ "-G", "--gpu-events" }, + "Set the GPU hardware counter events to record (ref: " + "`omnitrace-avail -H -c GPU)") + .action([&](parser_t& p) { + auto _events = + join(array_config{ "," }, p.get>("gpu-events")); + update_env(_env, "OMNITRACE_ROCM_EVENTS", _events); + }); + + auto _args = parser.parse_known_args(argc, argv); + auto _cmdc = std::get<1>(_args); + auto* _cmdv = std::get<2>(_args); + + if(help_check(parser, _cmdc, _cmdv)) help_action(parser); + + std::vector _argv = {}; + _argv.reserve(_cmdc); + for(int i = 1; i < _cmdc; ++i) + _argv.emplace_back(_cmdv[i]); + + return _argv; +} + +/* +void +update_env(char*** envp) +{ + if(!envp) return; + + static constexpr size_t N = 3; + + using pair_t = std::pair; + std::array _locs = { pair_t{ "HSA_TOOLS_LIB", -1 }, + pair_t{ "ROCP_TOOL_LIB", -1 }, + pair_t{ "HSA_TOOLS_REPORT_LOAD_FAILURE", -1 } }; + + char**& _envp = *envp; + size_t nenv = 0; + int64_t nadd = _locs.size(); + if(_envp) + { + size_t i = 0; + while(_envp[(i = nenv)]) + { + ++nenv; + for(auto& itr : _locs) + { + if(itr.second < 0 && std::string_view{ _envp[i] }.find(itr.first) == 0) + { + itr.second = i; + --nadd; + fprintf(stderr, "found %s at index %zi\n", itr.first.data(), i); + } + } + } + } + + size_t nsize = nenv + 1; + if(nadd > 0) + { + nsize += nadd; + + size_t _off = 0; + for(auto& itr : _locs) + if(itr.second < 0) itr.second = nenv + _off++; + + char** _envp_new = new char*[nsize]; + memset(_envp_new, 0, nsize * sizeof(char*)); + for(size_t i = 0; i < nenv; ++i) + _envp_new[i] = _envp[i]; + + _envp = _envp_new; + } + + fprintf(stderr, "nsize=%zu, nenv=%zu, nadd=%zu\n", nsize, nenv, nadd); + + using loc_pair_t = std::pair; + std::array _libs = { + loc_pair_t{ "HSA_TOOLS_LIB", omnitrace::dl::get_indirect().get_dl_library() }, + loc_pair_t{ "ROCP_TOOL_LIB", omnitrace::dl::get_indirect().get_omni_library() }, + loc_pair_t{ "HSA_TOOLS_REPORT_LOAD_FAILURE", "1" } + }; + + for(auto itr : _locs) + { + fprintf(stderr, "%s is at index %zu\n", itr.first.data(), itr.second); + for(const auto& litr : _libs) + { + if(itr.first == litr.first) + _envp[itr.second] = + strdup(omnitrace::common::join("=", itr.first, litr.second).c_str()); + } + } +} +*/ diff --git a/source/bin/omnitrace-run/omnitrace-run.cpp b/source/bin/omnitrace-run/omnitrace-run.cpp new file mode 100644 index 000000000..6b2619628 --- /dev/null +++ b/source/bin/omnitrace-run/omnitrace-run.cpp @@ -0,0 +1,60 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "omnitrace-run.hpp" + +#include + +int +main(int argc, char** argv) +{ + auto _env = get_environment(); + + bool _has_double_hyphen = false; + for(int i = 1; i < argc; ++i) + { + auto _arg = std::string_view{ argv[i] }; + if(_arg == "--" || _arg == "-?" || _arg == "-h" || _arg == "--help") + _has_double_hyphen = true; + } + + std::vector _argv = {}; + if(_has_double_hyphen) + { + _argv = parse_args(argc, argv, _env); + } + else + { + _argv.reserve(argc); + for(int i = 1; i < argc; ++i) + _argv.emplace_back(argv[i]); + } + + if(!_argv.empty()) + { + std::string _argv0 = get_command(_argv[0]); + print_command(_argv); + _argv.emplace_back(nullptr); + + return execve(_argv0.c_str(), _argv.data(), _env.data()); + } +} diff --git a/source/bin/omnitrace-run/omnitrace-run.hpp b/source/bin/omnitrace-run/omnitrace-run.hpp new file mode 100644 index 000000000..616b40ec4 --- /dev/null +++ b/source/bin/omnitrace-run/omnitrace-run.hpp @@ -0,0 +1,43 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include +#include +#include + +std::string +get_command(const char*); + +void +print_command(const std::vector& _argv); + +std::vector +get_environment(); + +template +void +update_env(std::vector&, std::string_view, Tp&&); + +std::vector +parse_args(int argc, char** argv, std::vector&); diff --git a/source/lib/omnitrace-dl/dl.cpp b/source/lib/omnitrace-dl/dl.cpp index 23fb06abc..f1b6697e7 100644 --- a/source/lib/omnitrace-dl/dl.cpp +++ b/source/lib/omnitrace-dl/dl.cpp @@ -408,6 +408,8 @@ get_indirect() OMNITRACE_HIDDEN_API; indirect& get_indirect() { + omnitrace_preinit_library(); + static auto _libomni = get_env("OMNITRACE_LIBRARY", "libomnitrace.so"); static auto _libuser = get_env("OMNITRACE_USER_LIBRARY", "libomnitrace-user.so"); static auto _libdlib = get_env("OMNITRACE_DL_LIBRARY", "libomnitrace-dl.so"); @@ -503,6 +505,12 @@ namespace dl = omnitrace::dl; extern "C" { + void omnitrace_preinit_library(void) + { + if(!omnitrace::common::get_env("OMNITRACE_COLORIZED_LOG", tim::log::colorized())) + tim::log::colorized() = false; + } + void omnitrace_init_library(void) { OMNITRACE_DL_INVOKE(get_indirect().omnitrace_init_library_f); @@ -913,6 +921,8 @@ omnitrace_preload() OMNITRACE_HIDDEN_API; bool omnitrace_preload() { + omnitrace_preinit_library(); + auto _preloaded = get_omnitrace_preload(); auto _enabled = get_env("OMNITRACE_ENABLED", true); @@ -922,7 +932,7 @@ omnitrace_preload() if(_preloaded && _enabled) { - OMNITRACE_DL_LOG(0, "[%s] invoking %s(%s)\n", __FUNCTION__, "omnitrace_init", + OMNITRACE_DL_LOG(1, "[%s] invoking %s(%s)\n", __FUNCTION__, "omnitrace_init", ::omnitrace::join(::omnitrace::QuoteStrings{}, ", ", "sampling", false, "main") .c_str()); diff --git a/source/lib/omnitrace-dl/dl.hpp b/source/lib/omnitrace-dl/dl.hpp index 373a94b4f..1281fdd0d 100644 --- a/source/lib/omnitrace-dl/dl.hpp +++ b/source/lib/omnitrace-dl/dl.hpp @@ -87,6 +87,8 @@ extern "C" size_t address) OMNITRACE_PUBLIC_API; #if defined(OMNITRACE_DL_SOURCE) && (OMNITRACE_DL_SOURCE > 0) + void omnitrace_preinit_library(void) OMNITRACE_HIDDEN_API; + int omnitrace_user_start_trace_dl(void) OMNITRACE_HIDDEN_API; int omnitrace_user_stop_trace_dl(void) OMNITRACE_HIDDEN_API; diff --git a/source/lib/omnitrace-dl/main.c b/source/lib/omnitrace-dl/main.c index 0ded1bfb1..63d0caeac 100644 --- a/source/lib/omnitrace-dl/main.c +++ b/source/lib/omnitrace-dl/main.c @@ -29,6 +29,9 @@ #include #include +extern +void omnitrace_preinit_library(void); + extern void omnitrace_finalize(void); @@ -84,6 +87,8 @@ __libc_start_main(int (*_main)(int, char**, char**), int _argc, char** _argv, int (*_init)(int, char**, char**), void (*_fini)(void), void (*_rtld_fini)(void), void* _stack_end) { + omnitrace_preinit_library(); + // prevent re-entry static int _reentry = 0; if(_reentry > 0) return -1; diff --git a/source/lib/omnitrace/library.cpp b/source/lib/omnitrace/library.cpp index 149721dd4..e739b058a 100644 --- a/source/lib/omnitrace/library.cpp +++ b/source/lib/omnitrace/library.cpp @@ -20,6 +20,10 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. +// clang-format off +#include +// clang-format on + #include "api.hpp" #include "common/setup.hpp" #include "library/components/category_region.hpp" @@ -331,6 +335,8 @@ omnitrace_init_library_hidden() extern "C" bool omnitrace_init_tooling_hidden() { + if(!get_env("OMNITRACE_COLORIZED_LOG", true, false)) tim::log::colorized() = false; + if(!tim::get_env("OMNITRACE_INIT_TOOLING", true)) { omnitrace_init_library_hidden(); @@ -604,8 +610,14 @@ omnitrace_finalize_hidden(void) auto& _thread_bundle = thread_data::instance(); if(_thread_bundle) _thread_bundle->stop(); - if(dmp::rank() == 0 && get_verbose() >= 0) fprintf(stderr, "\n"); - if(get_verbose() > 0 || get_debug()) config::print_settings(); + if(get_verbose() >= 1 || get_debug()) + { + if(dmp::rank() == 0) + { + fprintf(stderr, "\n"); + config::print_settings(); + } + } OMNITRACE_VERBOSE_F(1, "omnitrace_push_trace :: called %zux\n", _push_count); OMNITRACE_VERBOSE_F(1, "omnitrace_pop_trace :: called %zux\n", _pop_count); diff --git a/source/lib/omnitrace/library/components/pthread_create_gotcha.cpp b/source/lib/omnitrace/library/components/pthread_create_gotcha.cpp index b0c8829ea..2e56b98ab 100644 --- a/source/lib/omnitrace/library/components/pthread_create_gotcha.cpp +++ b/source/lib/omnitrace/library/components/pthread_create_gotcha.cpp @@ -290,8 +290,7 @@ pthread_create_gotcha::shutdown() bundles->clear(); OMNITRACE_BASIC_VERBOSE( - 1, - // 2 && _ndangling > 0, + 1 && _ndangling > 0, "[pthread_create_gotcha::shutdown] cleaned up %lu dangling bundles\n", _ndangling); } diff --git a/source/lib/omnitrace/library/config.cpp b/source/lib/omnitrace/library/config.cpp index cdca176cf..aa6f1e261 100644 --- a/source/lib/omnitrace/library/config.cpp +++ b/source/lib/omnitrace/library/config.cpp @@ -812,7 +812,7 @@ configure_settings(bool _init) configure_signal_handler(); configure_disabled_settings(); - OMNITRACE_VERBOSE(1, "configuration complete\n"); + OMNITRACE_VERBOSE(2, "configuration complete\n"); } void diff --git a/source/lib/omnitrace/library/debug.cpp b/source/lib/omnitrace/library/debug.cpp index 32c426fa9..021997115 100644 --- a/source/lib/omnitrace/library/debug.cpp +++ b/source/lib/omnitrace/library/debug.cpp @@ -78,8 +78,8 @@ FILE* get_file() { static FILE* _v = []() { - auto&& _fname = tim::get_env("OMNITRACE_LOG_FILE", ""); - tim::log::colorized() = _fname.empty(); + auto&& _fname = tim::get_env("OMNITRACE_LOG_FILE", ""); + if(!_fname.empty()) tim::log::colorized() = false; return (_fname.empty()) ? stderr : tim::filepath::fopen(_fname, "w"); }(); return _v; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e87c5e906..af7b05b85 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -54,7 +54,7 @@ set(_flat_environment set(_lock_environment "OMNITRACE_USE_SAMPLING=ON" "OMNITRACE_USE_PROCESS_SAMPLING=OFF" - "OMNITRACE_SAMPLING_FREQ=250" + "OMNITRACE_SAMPLING_FREQ=750" "OMNITRACE_CRITICAL_TRACE=ON" "OMNITRACE_COLLAPSE_THREADS=ON" "OMNITRACE_TRACE_THREAD_LOCKS=ON" @@ -306,6 +306,12 @@ function(OMNITRACE_ADD_TEST) NAME ${TEST_NAME}-baseline COMMAND ${COMMAND_PREFIX} $ ${TEST_RUN_ARGS} WORKING_DIRECTORY ${PROJECT_BINARY_DIR}) + + add_test( + NAME ${TEST_NAME}-preload + COMMAND ${COMMAND_PREFIX} $ -- + $ ${TEST_RUN_ARGS} + WORKING_DIRECTORY ${PROJECT_BINARY_DIR}) endif() if(NOT TEST_SKIP_REWRITE) @@ -370,7 +376,7 @@ function(OMNITRACE_ADD_TEST) foreach( _TEST - baseline binary-rewrite binary-rewrite-run binary-rewrite-sampling + baseline preload binary-rewrite binary-rewrite-run binary-rewrite-sampling binary-rewrite-sampling-run runtime-instrument runtime-instrument-sampling) string(REGEX REPLACE "-run(-|/)" "\\1" _prefix "${TEST_NAME}-${_TEST}/") set(_environ "${TEST_ENVIRONMENT}") @@ -405,6 +411,8 @@ function(OMNITRACE_ADD_TEST) set(_REGEX_VAR REWRITE) elseif("${_TEST}" MATCHES "baseline") set(_REGEX_VAR BASELINE) + elseif("${_TEST}" MATCHES "preload") + set(_REGEX_VAR BASELINE) else() set(_REGEX_VAR) endif() @@ -836,7 +844,7 @@ omnitrace_add_test( -ME [==[lib(gomp|m-)]==] LABELS "kokkos;kokkos-profile-library" - RUN_ARGS -i 10 -s 20 -p + RUN_ARGS -i 25 -s 20 -p ENVIRONMENT "${_base_environment};OMNITRACE_CRITICAL_TRACE=OFF;OMNITRACE_USE_KOKKOSP=ON;OMNITRACE_COUT_OUTPUT=ON;KOKKOS_PROFILE_LIBRARY=libomnitrace-dl.so" REWRITE_RUN_PASS_REGEX "\\|_\\[kokkos\\]" @@ -972,7 +980,7 @@ omnitrace_add_test( REWRITE_TIMEOUT 180 RUNTIME_TIMEOUT 360 ENVIRONMENT - "${_ompt_environment};OMNITRACE_USE_SAMPLING=ON;OMNITRACE_SAMPLING_FREQ=100;OMNITRACE_COUT_OUTPUT=ON" + "${_ompt_environment};OMNITRACE_USE_SAMPLING=ON;OMNITRACE_SAMPLING_FREQ=600;OMNITRACE_COUT_OUTPUT=ON" REWRITE_RUN_PASS_REGEX "${_OMPT_PASS_REGEX}" REWRITE_FAIL_REGEX "0 instrumented loops in procedure") From 8b2b6505355af443844391a38cc2e500841134b2 Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Tue, 27 Sep 2022 05:46:39 -0500 Subject: [PATCH 02/15] Fix thread_info::is_valid_time when threads do not terminate - was discarding samples - timemory fixes for BFD support --- .../library/components/pthread_create_gotcha.cpp | 16 ++++++++++++---- source/lib/omnitrace/library/sampling.cpp | 8 ++++---- source/lib/omnitrace/library/thread_info.cpp | 13 +++++++++---- 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/source/lib/omnitrace/library/components/pthread_create_gotcha.cpp b/source/lib/omnitrace/library/components/pthread_create_gotcha.cpp index 2e56b98ab..2d09ae3d5 100644 --- a/source/lib/omnitrace/library/components/pthread_create_gotcha.cpp +++ b/source/lib/omnitrace/library/components/pthread_create_gotcha.cpp @@ -289,10 +289,18 @@ pthread_create_gotcha::shutdown() bundles->clear(); - OMNITRACE_BASIC_VERBOSE( - 1 && _ndangling > 0, - "[pthread_create_gotcha::shutdown] cleaned up %lu dangling bundles\n", - _ndangling); + if(config::settings_are_configured()) + { + OMNITRACE_VERBOSE(2 && _ndangling > 0, + "[pthread_create_gotcha] cleaned up %lu dangling bundles\n", + _ndangling); + } + else + { + OMNITRACE_BASIC_VERBOSE( + 2 && _ndangling > 0, + "[pthread_create_gotcha] cleaned up %lu dangling bundles\n", _ndangling); + } } void diff --git a/source/lib/omnitrace/library/sampling.cpp b/source/lib/omnitrace/library/sampling.cpp index 9c71e4cd0..ffb1a9e76 100644 --- a/source/lib/omnitrace/library/sampling.cpp +++ b/source/lib/omnitrace/library/sampling.cpp @@ -534,10 +534,10 @@ post_process() if(_data.empty()) { - OMNITRACE_VERBOSE( - 3 || get_debug_sampling(), - "Sampler data for thread %lu has %zu valid entries... (skipped)\n", i, - _raw_data.size()); + OMNITRACE_VERBOSE(2 || get_debug_sampling(), + "Sampler data for thread %lu has zero valid entries out of " + "%zu... (skipped)\n", + i, _raw_data.size()); continue; } diff --git a/source/lib/omnitrace/library/thread_info.cpp b/source/lib/omnitrace/library/thread_info.cpp index 7d6bd14f5..675d846e9 100644 --- a/source/lib/omnitrace/library/thread_info.cpp +++ b/source/lib/omnitrace/library/thread_info.cpp @@ -53,17 +53,20 @@ init_index_data(int64_t _tid, bool _offset = false) if(!itr) { threading::offset_this_id(_offset); - itr = thread_index_data{}; + itr = thread_index_data{}; + int _verb = 2; + // if thread created using finalization, bump up the minimum verbosity level + if(get_state() == State::Finalized && _offset) _verb += 2; if(!config::settings_are_configured()) { OMNITRACE_BASIC_VERBOSE_F( - 2, "Thread %li on PID %i (rank: %i) assigned omnitrace TID %li\n", + _verb, "Thread %li on PID %i (rank: %i) assigned omnitrace TID %li\n", itr->system_value, process::get_id(), dmp::rank(), itr->sequent_value); } else { OMNITRACE_VERBOSE_F( - 2, "Thread %li on PID %i (rank: %i) assigned omnitrace TID %li\n", + _verb, "Thread %li on PID %i (rank: %i) assigned omnitrace TID %li\n", itr->system_value, process::get_id(), dmp::rank(), itr->sequent_value); } } @@ -149,10 +152,12 @@ thread_info::set_stop(uint64_t _ts) { for(auto& itr : thread_info_data_t::instances()) { - if(itr && itr->index_data && itr->index_data->internal_value > _tid) + if(itr && itr->index_data && itr->index_data->internal_value != _tid) { if(itr->lifetime.second > _v->lifetime.second) itr->lifetime.second = _v->lifetime.second; + else if(itr->lifetime.second == 0) + itr->lifetime.second = _v->lifetime.second; } } } From 51eef5a8c2a4b12748e1090a11dd93f0d1ba8fe6 Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Wed, 28 Sep 2022 09:39:47 -0500 Subject: [PATCH 03/15] fork fixes + sampling updates - more options for omnitrace-run - OMNITRACE_SAMPLING_INCLUDE_INLINES - fixed OMPT not being disabled - fixed sampling duration - fixed LD_PRELOAD when forked --- .cmake-format.yaml | 6 + .github/workflows/cpack.yml | 8 +- CMakeLists.txt | 62 +++-- cmake/BuildSettings.cmake | 5 +- source/bin/omnitrace-run/impl.cpp | 257 ++++++++++++++++-- source/lib/omnitrace-dl/dl.cpp | 21 +- source/lib/omnitrace-dl/dl.hpp | 1 + source/lib/omnitrace-dl/main.c | 25 +- .../library/components/fork_gotcha.cpp | 1 + source/lib/omnitrace/library/config.cpp | 19 +- source/lib/omnitrace/library/config.hpp | 3 + source/lib/omnitrace/library/ompt.cpp | 18 +- source/lib/omnitrace/library/sampling.cpp | 151 +++++++--- tests/CMakeLists.txt | 96 +++++-- 14 files changed, 527 insertions(+), 146 deletions(-) diff --git a/.cmake-format.yaml b/.cmake-format.yaml index ed1370b64..ef47853ee 100644 --- a/.cmake-format.yaml +++ b/.cmake-format.yaml @@ -21,6 +21,7 @@ parse: omnitrace_add_test: flags: - SKIP_BASELINE + - SKIP_PRELOAD - SKIP_REWRITE - SKIP_RUNTIME - SKIP_SAMPLING @@ -32,16 +33,21 @@ parse: NUM_PROCS: '*' REWRITE_TIMEOUT: '*' RUNTIME_TIMEOUT: '*' + PRELOAD_TIMEOUT: '*' REWRITE_ARGS: '*' RUNTIME_ARGS: '*' RUN_ARGS: '*' ENVIRONMENT: '*' LABELS: '*' PROPERTIES: '*' + PRELOAD_PASS_REGEX: '*' + PRELOAD_FAIL_REGEX: '*' RUNTIME_PASS_REGEX: '*' RUNTIME_FAIL_REGEX: '*' REWRITE_PASS_REGEX: '*' REWRITE_FAIL_REGEX: '*' + BASELINE_PASS_REGEX: '*' + BASELINE_FAIL_REGEX: '*' REWRITE_RUN_PASS_REGEX: '*' REWRITE_RUN_FAIL_REGEX: '*' omnitrace_target_compile_definitions: diff --git a/.github/workflows/cpack.yml b/.github/workflows/cpack.yml index ed2dbbde8..99bcebeeb 100644 --- a/.github/workflows/cpack.yml +++ b/.github/workflows/cpack.yml @@ -78,7 +78,7 @@ jobs: timeout-minutes: 10 uses: actions/upload-artifact@v2 with: - name: ubuntu-stgz-installers + name: ubuntu-${{ matrix.os }}-rocm-${{ matrix.rocm-version }}-stgz-installers path: | build-release/stgz/*.sh @@ -86,7 +86,7 @@ jobs: timeout-minutes: 10 uses: actions/upload-artifact@v2 with: - name: ubuntu-deb-installers + name: ubuntu-${{ matrix.os }}-rocm-${{ matrix.rocm-version }}-deb-installers path: | build-release/deb/*.deb @@ -187,7 +187,7 @@ jobs: timeout-minutes: 10 uses: actions/upload-artifact@v2 with: - name: opensuse-stgz-installers + name: opensuse-${{ matrix.os }}-rocm-${{ matrix.rocm-version }}-stgz-installers path: | build-release/stgz/*.sh @@ -195,7 +195,7 @@ jobs: timeout-minutes: 10 uses: actions/upload-artifact@v2 with: - name: opensuse-rpm-installers + name: opensuse-${{ matrix.os }}-rocm-${{ matrix.rocm-version }}-rpm-installers path: | build-release/rpm/*.rpm diff --git a/CMakeLists.txt b/CMakeLists.txt index b8f9193cb..1a5d35ee6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -90,8 +90,46 @@ if(DEFINED CMAKE_INSTALL_LIBDIR AND NOT DEFINED CMAKE_DEFAULT_INSTALL_LIBDIR) CACHE STRING "Object code libraries" FORCE) endif() +if(NOT "$ENV{OMNITRACE_CI}" STREQUAL "") + set(CI_BUILD $ENV{OMNITRACE_CI}) +else() + set(CI_BUILD OFF) +endif() + include(GNUInstallDirs) # install directories include(MacroUtilities) # various functions and macros + +if(CI_BUILD) + omnitrace_add_option(OMNITRACE_BUILD_CI "Enable internal asserts, etc." ON ADVANCED + NO_FEATURE) + omnitrace_add_option(OMNITRACE_BUILD_TESTING "Enable building the testing suite" ON + ADVANCED) + omnitrace_add_option(OMNITRACE_BUILD_DEBUG + "Enable building with extensive debug symbols" OFF ADVANCED) + omnitrace_add_option(OMNITRACE_CUSTOM_DATA_SOURCE "Enable custom data source" OFF + ADVANCED) + omnitrace_add_option( + OMNITRACE_BUILD_HIDDEN_VISIBILITY + "Build with hidden visibility (disable for Debug builds)" OFF ADVANCED) + omnitrace_add_option(OMNITRACE_STRIP_LIBRARIES "Strip the libraries" OFF ADVANCED) +else() + omnitrace_add_option(OMNITRACE_BUILD_CI "Enable internal asserts, etc." OFF ADVANCED + NO_FEATURE) + omnitrace_add_option(OMNITRACE_BUILD_EXAMPLES "Enable building the examples" OFF + ADVANCED) + omnitrace_add_option(OMNITRACE_BUILD_TESTING "Enable building the testing suite" OFF + ADVANCED) + omnitrace_add_option(OMNITRACE_BUILD_DEBUG + "Enable building with extensive debug symbols" OFF ADVANCED) + omnitrace_add_option(OMNITRACE_CUSTOM_DATA_SOURCE "Enable custom data source" OFF + ADVANCED) + omnitrace_add_option( + OMNITRACE_BUILD_HIDDEN_VISIBILITY + "Build with hidden visibility (disable for Debug builds)" ON ADVANCED) + omnitrace_add_option(OMNITRACE_STRIP_LIBRARIES "Strip the libraries" + ${_STRIP_LIBRARIES_DEFAULT} ADVANCED) +endif() + include(Compilers) # compiler identification include(BuildSettings) # compiler flags @@ -135,22 +173,8 @@ omnitrace_add_option(OMNITRACE_USE_OMPT "Enable OpenMP tools support" ON) omnitrace_add_option(OMNITRACE_USE_PYTHON "Enable Python support" OFF) omnitrace_add_option(OMNITRACE_BUILD_DYNINST "Build dyninst from submodule" OFF) omnitrace_add_option(OMNITRACE_BUILD_LIBUNWIND "Build libunwind from submodule" ON) -omnitrace_add_option(OMNITRACE_BUILD_EXAMPLES "Enable building the examples" OFF ADVANCED) -omnitrace_add_option(OMNITRACE_BUILD_TESTING "Enable building the testing suite" OFF - ADVANCED) -omnitrace_add_option(OMNITRACE_BUILD_DEBUG "Enable building with extensive debug symbols" - OFF ADVANCED) -omnitrace_add_option(OMNITRACE_CUSTOM_DATA_SOURCE "Enable custom data source" OFF - ADVANCED) -omnitrace_add_option( - OMNITRACE_BUILD_HIDDEN_VISIBILITY - "Build with hidden visibility (disable for Debug builds)" ON ADVANCED) -omnitrace_add_option(OMNITRACE_BUILD_CI "Enable internal asserts, etc." OFF ADVANCED - NO_FEATURE) omnitrace_add_option(OMNITRACE_INSTALL_PERFETTO_TOOLS "Install perfetto tools (i.e. traced, perfetto, etc.)" OFF) -omnitrace_add_option(OMNITRACE_STRIP_LIBRARIES "Strip the libraries" - ${_STRIP_LIBRARIES_DEFAULT} ADVANCED) if(OMNITRACE_USE_PAPI) omnitrace_add_option(OMNITRACE_BUILD_PAPI "Build PAPI from submodule" ON) @@ -161,16 +185,6 @@ if(OMNITRACE_USE_PYTHON) "Build python bindings with internal pybind11" ON) endif() -if(NOT "$ENV{OMNITRACE_CI}" STREQUAL "") - message( - AUTHOR_WARNING - "OMNITRACE_CI environment variable ($ENV{OMNITRACE_CI}) is overridding the OMNITRACE_BUILD_CI cache value" - ) - set(OMNITRACE_BUILD_CI - "$ENV{OMNITRACE_CI}" - CACHE BOOL "Enable internal asserts, etc" FORCE) -endif() - if(NOT OMNITRACE_USE_HIP) set(OMNITRACE_USE_ROCTRACER OFF diff --git a/cmake/BuildSettings.cmake b/cmake/BuildSettings.cmake index d5dc9f016..286f50d2b 100644 --- a/cmake/BuildSettings.cmake +++ b/cmake/BuildSettings.cmake @@ -12,8 +12,9 @@ include(Compilers) include(FindPackageHandleStandardArgs) include(MacroUtilities) -omnitrace_add_option(OMNITRACE_BUILD_DEVELOPER - "Extra build flags for development like -Werror" OFF) +omnitrace_add_option( + OMNITRACE_BUILD_DEVELOPER "Extra build flags for development like -Werror" + ${OMNITRACE_BUILD_CI}) omnitrace_add_option(OMNITRACE_BUILD_EXTRA_OPTIMIZATIONS "Extra optimization flags" OFF) omnitrace_add_option(OMNITRACE_BUILD_LTO "Build with link-time optimization" OFF) omnitrace_add_option(OMNITRACE_USE_COMPILE_TIMING diff --git a/source/bin/omnitrace-run/impl.cpp b/source/bin/omnitrace-run/impl.cpp index 33a6af805..07107ef9e 100644 --- a/source/bin/omnitrace-run/impl.cpp +++ b/source/bin/omnitrace-run/impl.cpp @@ -108,6 +108,9 @@ parse_args(int argc, char** argv, std::vector& _env) using parser_t = tim::argparse::argument_parser; using parser_err_t = typename parser_t::result_type; + update_env(_env, "OMNITRACE_USE_SAMPLING", true); + update_env(_env, "OMNITRACE_CRITICAL_TRACE", false); + auto help_check = [](parser_t& p, int _argc, char** _argv) { std::set help_args = { "-h", "--help", "-?" }; return (p.exists("help") || _argc == 1 || @@ -140,64 +143,258 @@ parse_args(int argc, char** argv, std::vector& _env) help_action(p); }); - parser.add_argument() - .names({ "--debug" }) - .description("Debug output") + const auto* _cputime_desc = + R"(Sample based on a CPU-clock timer. Accepts up to 2 arguments: + %{INDENT}%1. Interrupts per second. E.g., 100 == sample every 10 milliseconds of CPU-time. + %{INDENT}%2. Delay (in seconds of CPU-clock time). I.e., how long each thread should wait before taking first sample.)"; + + const auto* _realtime_desc = + R"(Sample based on a real-clock timer. Accepts up to 2 arguments: + %{INDENT}%1. Interrupts per second. E.g., 100 == sample every 10 milliseconds of realtime. + %{INDENT}%2. Delay (in seconds of real-clock time). I.e., how long each thread should wait before taking first sample.)"; + + const auto* _trace_policy_desc = + R"(Policy for new data when the buffer size limit is reached: + %{INDENT}%- discard : new data is ignored + %{INDENT}%- ring_buffer : new data overwrites oldest data)"; + + parser.add_argument({ "" }, ""); + parser.add_argument({ "--monochrome" }, "Disable colorized output") + .max_count(1) + .dtype("bool") + .action([&](parser_t& p) { + auto _colorized = !p.get("monochrome"); + update_env(_env, "OMNITRACE_COLORIZED_LOG", (_colorized) ? "1" : "0"); + update_env(_env, "COLORIZED_LOG", (_colorized) ? "1" : "0"); + }); + parser.add_argument({ "--debug" }, "Debug output") .max_count(1) .action([&](parser_t& p) { update_env(_env, "OMNITRACE_DEBUG", p.get("debug")); }); - parser.add_argument() - .names({ "-v", "--verbose" }) - .description("Verbose output") + parser.add_argument({ "-v", "--verbose" }, "Verbose output") .count(1) .action([&](parser_t& p) { auto _v = p.get("verbose"); verbose = _v; update_env(_env, "OMNITRACE_VERBOSE", _v); }); - parser.add_argument({ "-N", "--no-color" }, "Disable colorized output") - .max_count(1) - .dtype("bool") - .action([&](parser_t& p) { - auto _colorized = !p.get("no-color"); - update_env(_env, "OMNITRACE_COLORIZED_LOG", (_colorized) ? "1" : "0"); - update_env(_env, "COLORIZED_LOG", (_colorized) ? "1" : "0"); - }); - parser.add_argument() - .names({ "-c", "--config" }) - .description("Configuration file") + + parser.add_argument({ "" }, ""); + parser.add_argument({ "-c", "--config" }, "Configuration file") .min_count(1) .action([&](parser_t& p) { update_env( _env, "OMNITRACE_CONFIG_FILE", join(array_config{ ":" }, p.get>("config"))); }); + parser.add_argument({ "-o", "--output" }, "Output path") + .min_count(1) + .max_count(2) + .action([&](parser_t& p) { + auto _v = p.get>("output"); + update_env(_env, "OMNITRACE_OUTPUT_PATH", _v.at(0)); + if(_v.size() > 1) update_env(_env, "OMNITRACE_OUTPUT_PREFIX", _v.at(1)); + }); + + parser.add_argument({ "" }, ""); + parser.add_argument({ "--trace" }, "Generate a detailed trace") + .max_count(1) + .action([&](parser_t& p) { + update_env(_env, "OMNITRACE_USE_PERFETTO", p.get("trace")); + }); parser - .add_argument({ "-d", "--delay" }, - "Set the delay before the sampler starts (seconds)") + .add_argument({ "--trace-buffer-size" }, + "Size limit for the trace output (in KB)") .count(1) + .dtype("KB") .action([&](parser_t& p) { - update_env(_env, "OMNITRACE_SAMPLING_DELAY", p.get("delay")); + update_env(_env, "OMNITRACE_PERFETTO_BUFFER_SIZE_KB", + p.get("trace-buffer-size")); + }); + parser.add_argument({ "--trace-fill-policy" }, _trace_policy_desc) + .count(1) + .choices({ "discard", "ring_buffer" }) + .action([&](parser_t& p) { + update_env(_env, "OMNITRACE_PERFETTO_FILL_POLICY", + p.get("trace-fill-policy")); + }); + + parser.add_argument({ "" }, ""); + parser.add_argument({ "--profile" }, "Generate a call-stack-based profile") + .min_count(0) + .max_count(3) + .choices({ "text", "json", "console" }) + .action([&](parser_t& p) { + auto _v = p.get>("profile"); + update_env(_env, "OMNITRACE_USE_TIMEMORY", true); + if(!_v.empty()) + { + update_env(_env, "OMNITRACE_TEXT_OUTPUT", _v.count("text") != 0); + update_env(_env, "OMNITRACE_JSON_OUTPUT", _v.count("json") != 0); + update_env(_env, "OMNITRACE_COUT_OUTPUT", _v.count("console") != 0); + } + }); + + parser.add_argument({ "--flat-profile" }, "Generate a flat profile") + .min_count(0) + .max_count(3) + .choices({ "text", "json", "console" }) + .action([&](parser_t& p) { + auto _v = p.get>("flat-profile"); + update_env(_env, "OMNITRACE_USE_TIMEMORY", true); + update_env(_env, "OMNITRACE_FLAT_PROFILE", true); + if(!_v.empty()) + { + update_env(_env, "OMNITRACE_TEXT_OUTPUT", _v.count("text") != 0); + update_env(_env, "OMNITRACE_JSON_OUTPUT", _v.count("json") != 0); + update_env(_env, "OMNITRACE_COUT_OUTPUT", _v.count("console") != 0); + } }); parser - .add_argument({ "-f", "--freq" }, "Set the frequency of the sampler " + .add_argument({ "--diff-profile" }, + "Generate a profile diff from the specified input directory") + .min_count(1) + .max_count(2) + .action([&](parser_t& p) { + auto _v = p.get>("diff-profile"); + update_env(_env, "OMNITRACE_DIFF_OUTPUT", true); + update_env(_env, "OMNITRACE_INPUT_PATH", _v.at(0)); + if(_v.size() > 1) update_env(_env, "OMNITRACE_INPUT_PREFIX", _v.at(1)); + }); + + parser.add_argument({ "" }, ""); + parser + .add_argument({ "-f", "--freq" }, "Set the default sampling frequency " "(number of interrupts per second)") .count(1) .action([&](parser_t& p) { update_env(_env, "OMNITRACE_SAMPLING_FREQ", p.get("freq")); }); parser - .add_argument({ "-D", "--duration" }, - "Set the duration of the sampling (seconds)") + .add_argument( + { "-w", "--wait" }, + "Set the default wait time (i.e. delay) before taking first sample " + "(in seconds). This delay time is based on the clock of the sampler, i.e., a " + "delay of 1 second for CPU-clock sampler may not equal 1 second of realtime") .count(1) .action([&](parser_t& p) { - update_env(_env, "OMNITRACE_SAMPLING_DURATION", p.get("duration")); + update_env(_env, "OMNITRACE_SAMPLING_DELAY", p.get("delay")); }); parser .add_argument( - { "-C", "--cpu-events" }, - "Set the hardware counter events to record (ref: `omnitrace-avail -H -c CPU)") + { "-d", "--duration" }, + "Set the duration of the sampling (in seconds of realtime). I.e., it is " + "possible (currently) to set a CPU-clock time delay that exceeds the " + "real-time duration... resulting in zero samples being taken") + .count(1) + .action([&](parser_t& p) { + update_env(_env, "OMNITRACE_SAMPLING_DURATION", p.get("duration")); + }); + parser + .add_argument({ "-t", "--tids" }, + "Specify the default thread IDs for sampling, where 0 (zero) is " + "the main thread and each thread created by the target application " + "is assigned an atomically incrementing value.") + .min_count(1) + .action([&](parser_t& p) { + update_env(_env, "OMNITRACE_SAMPLING_TIDS", + join(array_config{ ", " }, p.get>("tids"))); + }); + + parser.add_argument({ "" }, ""); + parser.add_argument({ "--cputime" }, _cputime_desc) + .min_count(0) + .max_count(2) + .action([&](parser_t& p) { + auto _v = p.get>("cputime"); + update_env(_env, "OMNITRACE_SAMPLING_CPUTIME", true); + if(!_v.empty()) update_env(_env, "OMNITRACE_SAMPLING_CPUTIME_FREQ", _v.at(0)); + if(_v.size() > 1) + update_env(_env, "OMNITRACE_SAMPLING_CPUTIME_DELAY", _v.at(1)); + }); + parser.add_argument({ "" }, ""); + parser.add_argument({ "--realtime" }, _realtime_desc) + .min_count(0) + .max_count(2) + .action([&](parser_t& p) { + auto _v = p.get>("realtime"); + update_env(_env, "OMNITRACE_SAMPLING_REALTIME", true); + if(!_v.empty()) + update_env(_env, "OMNITRACE_SAMPLING_REALTIME_FREQ", _v.at(0)); + if(_v.size() > 1) + update_env(_env, "OMNITRACE_SAMPLING_REALTIME_DELAY", _v.at(1)); + }); + + parser.add_argument({ "" }, ""); + parser.add_argument({ "-E", "--enable" }, "Enable these backends") + .choices({ "all", "kokkosp", "mpip", "ompt", "rcclp", "rocm-smi", "roctracer", + "rocprofiler", "roctx", "mutex-locks", "spin-locks", "rw-locks" }) + .action([&](parser_t& p) { + auto _v = p.get>("enable"); + auto _update = [&](const auto& _opt, bool _cond) { + if(_cond || _v.count("all") > 0) update_env(_env, _opt, true); + }; + _update("OMNITRACE_USE_KOKKOSP", _v.count("kokkosp") > 0); + _update("OMNITRACE_USE_MPIP", _v.count("mpip") > 0); + _update("OMNITRACE_USE_OMPT", _v.count("ompt") > 0); + _update("OMNITRACE_USE_RCCLP", _v.count("rcclp") > 0); + _update("OMNITRACE_USE_ROCTX", _v.count("roctx") > 0); + _update("OMNITRACE_USE_ROCM_SMI", _v.count("rocm-smi") > 0); + _update("OMNITRACE_USE_ROCTRACER", _v.count("roctracer") > 0); + _update("OMNITRACE_USE_ROCPROFILER", _v.count("rocprofiler") > 0); + _update("OMNITRACE_TRACE_THREAD_LOCKS", _v.count("mutex-locks") > 0); + _update("OMNITRACE_TRACE_THREAD_RW_LOCKS", _v.count("rw-locks") > 0); + _update("OMNITRACE_TRACE_THREAD_SPIN_LOCKS", _v.count("spin-locks") > 0); + }); + + parser.add_argument({ "-D", "--disable" }, "Disable these backends") + .choices({ "all", "kokkosp", "mpip", "ompt", "rcclp", "rocm-smi", "roctracer", + "rocprofiler", "roctx", "mutex-locks", "spin-locks", "rw-locks" }) + .action([&](parser_t& p) { + auto _v = p.get>("disable"); + auto _update = [&](const auto& _opt, bool _cond) { + if(_cond || _v.count("all") > 0) update_env(_env, _opt, false); + }; + _update("OMNITRACE_USE_KOKKOSP", _v.count("kokkosp") > 0); + _update("OMNITRACE_USE_MPIP", _v.count("mpip") > 0); + _update("OMNITRACE_USE_OMPT", _v.count("ompt") > 0); + _update("OMNITRACE_USE_RCCLP", _v.count("rcclp") > 0); + _update("OMNITRACE_USE_ROCTX", _v.count("roctx") > 0); + _update("OMNITRACE_USE_ROCM_SMI", _v.count("rocm-smi") > 0); + _update("OMNITRACE_USE_ROCTRACER", _v.count("roctracer") > 0); + _update("OMNITRACE_USE_ROCPROFILER", _v.count("rocprofiler") > 0); + _update("OMNITRACE_TRACE_THREAD_LOCKS", _v.count("mutex-locks") > 0); + _update("OMNITRACE_TRACE_THREAD_RW_LOCKS", _v.count("rw-locks") > 0); + _update("OMNITRACE_TRACE_THREAD_SPIN_LOCKS", _v.count("spin-locks") > 0); + }); + + parser.add_argument({ "" }, ""); + parser + .add_argument({ "--cpus" }, + "CPU IDs for frequency sampling. Supports integers and/or ranges") + .dtype("int or range") + .action([&](parser_t& p) { + update_env(_env, "OMNITRACE_USE_PROCESS_SAMPLING", true); + update_env( + _env, "OMNITRACE_PROCESS_SAMPLING_CPUS", + join(array_config{ "," }, p.get>("cpus"))); + }); + parser + .add_argument({ "--gpus" }, + "GPU IDs for SMI queries. Supports integers and/or ranges") + .dtype("int or range") + .action([&](parser_t& p) { + update_env( + _env, "OMNITRACE_PROCESS_SAMPLING_GPUS", + join(array_config{ "," }, p.get>("gpus"))); + }); + + parser.add_argument({ "" }, ""); + parser + .add_argument({ "-C", "--cpu-events" }, + "Set the CPU hardware counter events to record (ref: " + "`omnitrace-avail -H -c CPU`)") .action([&](parser_t& p) { auto _events = join(array_config{ "," }, p.get>("cpu-events")); @@ -206,7 +403,7 @@ parse_args(int argc, char** argv, std::vector& _env) parser .add_argument({ "-G", "--gpu-events" }, "Set the GPU hardware counter events to record (ref: " - "`omnitrace-avail -H -c GPU)") + "`omnitrace-avail -H -c GPU`)") .action([&](parser_t& p) { auto _events = join(array_config{ "," }, p.get>("gpu-events")); @@ -217,6 +414,12 @@ parse_args(int argc, char** argv, std::vector& _env) auto _cmdc = std::get<1>(_args); auto* _cmdv = std::get<2>(_args); + if(parser.exists("realtime") && !parser.exists("cputime")) + update_env(_env, "OMNITRACE_SAMPLING_CPUTIME", false); + if(parser.exists("profile") && parser.exists("flat-profile")) + throw std::runtime_error( + "Error! '--profile' argument conflicts with '--flat-profile' argument"); + if(help_check(parser, _cmdc, _cmdv)) help_action(parser); std::vector _argv = {}; diff --git a/source/lib/omnitrace-dl/dl.cpp b/source/lib/omnitrace-dl/dl.cpp index f1b6697e7..716cd0fac 100644 --- a/source/lib/omnitrace-dl/dl.cpp +++ b/source/lib/omnitrace-dl/dl.cpp @@ -98,9 +98,9 @@ get_omnitrace_dl_env() inline bool get_omnitrace_preload() { - auto&& _preload = get_env("OMNITRACE_PRELOAD", false); + auto&& _preload = get_env("OMNITRACE_PRELOAD", true); auto&& _preload_libs = get_env("LD_PRELOAD", std::string{}); - return (_preload || _preload_libs.find("libomnitrace-dl.so") != std::string::npos); + return (_preload && _preload_libs.find("libomnitrace-dl.so") != std::string::npos); } // environment priority: @@ -511,6 +511,11 @@ extern "C" tim::log::colorized() = false; } + int omnitrace_preload_library(void) + { + return (::omnitrace::dl::get_omnitrace_preload()) ? 1 : 0; + } + void omnitrace_init_library(void) { OMNITRACE_DL_INVOKE(get_indirect().omnitrace_init_library_f); @@ -921,17 +926,15 @@ omnitrace_preload() OMNITRACE_HIDDEN_API; bool omnitrace_preload() { - omnitrace_preinit_library(); - - auto _preloaded = get_omnitrace_preload(); - auto _enabled = get_env("OMNITRACE_ENABLED", true); + auto _preload = get_omnitrace_preload() && get_env("OMNITRACE_ENABLED", true); static bool _once = false; - if(_once) return _preloaded; + if(_once) return _preload; _once = true; - if(_preloaded && _enabled) + if(_preload) { + omnitrace_preinit_library(); OMNITRACE_DL_LOG(1, "[%s] invoking %s(%s)\n", __FUNCTION__, "omnitrace_init", ::omnitrace::join(::omnitrace::QuoteStrings{}, ", ", "sampling", false, "main") @@ -940,7 +943,7 @@ omnitrace_preload() omnitrace_init_tooling(); } - return _preloaded; + return _preload; } bool _handle_preload = omnitrace::dl::omnitrace_preload(); diff --git a/source/lib/omnitrace-dl/dl.hpp b/source/lib/omnitrace-dl/dl.hpp index 1281fdd0d..b6ad237cd 100644 --- a/source/lib/omnitrace-dl/dl.hpp +++ b/source/lib/omnitrace-dl/dl.hpp @@ -88,6 +88,7 @@ extern "C" #if defined(OMNITRACE_DL_SOURCE) && (OMNITRACE_DL_SOURCE > 0) void omnitrace_preinit_library(void) OMNITRACE_HIDDEN_API; + int omnitrace_preload_library(void) OMNITRACE_HIDDEN_API; int omnitrace_user_start_trace_dl(void) OMNITRACE_HIDDEN_API; int omnitrace_user_stop_trace_dl(void) OMNITRACE_HIDDEN_API; diff --git a/source/lib/omnitrace-dl/main.c b/source/lib/omnitrace-dl/main.c index 63d0caeac..c82ebe8cb 100644 --- a/source/lib/omnitrace-dl/main.c +++ b/source/lib/omnitrace-dl/main.c @@ -29,8 +29,8 @@ #include #include -extern -void omnitrace_preinit_library(void); +extern int +omnitrace_preload_library(void); extern void omnitrace_finalize(void); @@ -87,7 +87,7 @@ __libc_start_main(int (*_main)(int, char**, char**), int _argc, char** _argv, int (*_init)(int, char**, char**), void (*_fini)(void), void (*_rtld_fini)(void), void* _stack_end) { - omnitrace_preinit_library(); + int _preload = omnitrace_preload_library(); // prevent re-entry static int _reentry = 0; @@ -103,14 +103,23 @@ __libc_start_main(int (*_main)(int, char**, char**), int _argc, char** _argv, // Find the real __libc_start_main() omnitrace_libc_start_main user_main = dlsym(RTLD_NEXT, "__libc_start_main"); + // disable future LD_PRELOADs + setenv("OMNITRACE_PRELOAD", "0", 1); + if(user_main && user_main != _this_func) { - //if(strcmp(_argv[0], "mpirun") == 0) - // return user_main(_main, _argc, _argv, _init, _fini, _rtld_fini, - // _stack_end); - //else + if(_preload == 0) + { + // call original main + return user_main(main_real, _argc, _argv, _init, _fini, _rtld_fini, + _stack_end); + } + else + { // call omnitrace main function wrapper - return user_main(omnitrace_main, _argc, _argv, _init, _fini, _rtld_fini, _stack_end); + return user_main(omnitrace_main, _argc, _argv, _init, _fini, _rtld_fini, + _stack_end); + } } else { diff --git a/source/lib/omnitrace/library/components/fork_gotcha.cpp b/source/lib/omnitrace/library/components/fork_gotcha.cpp index cd4bd31f7..70ddf0466 100644 --- a/source/lib/omnitrace/library/components/fork_gotcha.cpp +++ b/source/lib/omnitrace/library/components/fork_gotcha.cpp @@ -45,6 +45,7 @@ fork_gotcha::configure() void fork_gotcha::audit(const gotcha_data_t&, audit::incoming) { + tim::set_env("OMNITRACE_PRELOAD", "0", 1); OMNITRACE_VERBOSE(1, "fork() called on PID %i (rank: %i), TID %li\n", process::get_id(), dmp::rank(), threading::get_id()); OMNITRACE_BASIC_DEBUG( diff --git a/source/lib/omnitrace/library/config.cpp b/source/lib/omnitrace/library/config.cpp index aa6f1e261..181ce7258 100644 --- a/source/lib/omnitrace/library/config.cpp +++ b/source/lib/omnitrace/library/config.cpp @@ -210,7 +210,7 @@ configure_settings(bool _init) if(_once) return; _once = true; - if(get_state() < State::Init) + if(get_is_continuous_integration() && get_state() < State::Init) { timemory_print_demangled_backtrace<64>(); OMNITRACE_THROW("config::configure_settings() called before " @@ -467,6 +467,10 @@ configure_settings(bool _init) "filter out internal routines from the sampling call-stacks", true, "sampling", "data", "advanced"); + OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_SAMPLING_INCLUDE_INLINES", + "Create entries for inlined functions when available", false, + "sampling", "data", "advanced"); + OMNITRACE_CONFIG_SETTING( bool, "OMNITRACE_SAMPLING_REALTIME", "Enable sampling frequency via a wall-clock timer on child threads. This may " @@ -827,8 +831,10 @@ configure_mode_settings() } else { + bool _changed = get_setting_value(_name).second != _v; OMNITRACE_VERBOSE( - 1, "[configure_mode_settings] Overriding %s to %s in %s mode...\n", + 1 && _changed, + "[configure_mode_settings] Overriding %s to %s in %s mode...\n", _name.c_str(), JOIN("", std::boolalpha, _v).c_str(), std::to_string(get_mode()).c_str()); } @@ -1812,6 +1818,13 @@ get_sampling_real_tids() static_cast&>(*_v->second).get(), "thread IDs"); } +bool +get_sampling_include_inlines() +{ + static auto _v = get_config()->find("OMNITRACE_SAMPLING_INCLUDE_INLINES"); + return static_cast&>(*_v->second).get(); +} + int64_t get_critical_trace_count() { @@ -1825,7 +1838,7 @@ get_process_sampling_freq() static auto _v = get_config()->find("OMNITRACE_PROCESS_SAMPLING_FREQ"); auto _val = std::min(static_cast&>(*_v->second).get(), 1000.0); - if(_val < 1.0e-9) return get_sampling_freq(); + if(_val < 1.0e-9) return std::min(get_sampling_freq(), 100.0); return _val; } diff --git a/source/lib/omnitrace/library/config.hpp b/source/lib/omnitrace/library/config.hpp index 4409f874c..2c32c7f0c 100644 --- a/source/lib/omnitrace/library/config.hpp +++ b/source/lib/omnitrace/library/config.hpp @@ -298,6 +298,9 @@ get_sampling_cpu_tids(); std::set get_sampling_real_tids(); +bool +get_sampling_include_inlines(); + double get_process_sampling_freq(); diff --git a/source/lib/omnitrace/library/ompt.cpp b/source/lib/omnitrace/library/ompt.cpp index d1e869823..c70746d70 100644 --- a/source/lib/omnitrace/library/ompt.cpp +++ b/source/lib/omnitrace/library/ompt.cpp @@ -100,16 +100,22 @@ ompt_start_tool(unsigned int omp_version, const char* runtime_version) static auto ompt_initialize = [](ompt_function_lookup_t lookup, int initial_device_num, ompt_data_t* tool_data) -> int { - TIMEMORY_PRINTF(stderr, "OpenMP-tools configuring for initial device %i\n\n", - initial_device_num); - tim::ompt::configure(lookup, initial_device_num, - tool_data); + if(omnitrace::config::get_use_ompt()) + { + TIMEMORY_PRINTF(stderr, "OpenMP-tools configuring for initial device %i\n\n", + initial_device_num); + tim::ompt::configure(lookup, initial_device_num, + tool_data); + } return 1; // success }; static auto ompt_finalize = [](ompt_data_t* tool_data) { - TIMEMORY_PRINTF(stderr, "OpenMP-tools finalized\n\n"); - tim::consume_parameters(tool_data); + if(omnitrace::config::get_use_ompt()) + { + TIMEMORY_PRINTF(stderr, "OpenMP-tools finalized\n\n"); + tim::consume_parameters(tool_data); + } }; static auto data = ompt_start_tool_result_t{ ompt_initialize, ompt_finalize, { 0 } }; diff --git a/source/lib/omnitrace/library/sampling.cpp b/source/lib/omnitrace/library/sampling.cpp index ffb1a9e76..a5a92d509 100644 --- a/source/lib/omnitrace/library/sampling.cpp +++ b/source/lib/omnitrace/library/sampling.cpp @@ -103,6 +103,9 @@ namespace sampling { namespace { +std::set +configure(bool _setup, int64_t _tid = threading::get_id()); + template void thread_sigmask(Args... _args) @@ -163,6 +166,20 @@ get_sampler_running(int64_t _tid) return _v.at(_tid); } +auto& +get_duration_disabled() +{ + static auto _v = std::atomic{ false }; + return _v; +} + +auto& +get_is_duration_thread() +{ + static thread_local auto _v = false; + return _v; +} + auto& get_duration_cv() { @@ -170,6 +187,13 @@ get_duration_cv() return _v; } +auto& +get_duration_mutex() +{ + static auto _v = std::mutex{}; + return _v; +} + auto& get_duration_thread() { @@ -177,6 +201,28 @@ get_duration_thread() return _v; } +auto +notify_duration_thread() +{ + if(get_duration_thread() && !get_is_duration_thread()) + { + std::unique_lock _lk{ get_duration_mutex(), std::defer_lock }; + if(!_lk.owns_lock()) _lk.lock(); + get_duration_cv().notify_all(); + } +} + +void +stop_duration_thread() +{ + if(get_duration_thread() && !get_is_duration_thread()) + { + notify_duration_thread(); + get_duration_thread()->join(); + get_duration_thread().reset(); + } +} + void start_duration_thread() { @@ -195,12 +241,14 @@ start_duration_thread() config::get_sampling_duration() * units::sec) }; auto _func = [_end]() { thread_info::init(true); - std::mutex _mutex{}; - bool _wait = true; + threading::set_thread_name("omni.samp.dur"); + get_is_duration_thread() = true; + bool _wait = true; while(_wait) { _wait = false; - std::unique_lock _lk{ _mutex }; + std::unique_lock _lk{ get_duration_mutex(), std::defer_lock }; + if(!_lk.owns_lock()) _lk.lock(); get_duration_cv().wait_until(_lk, _end); auto _premature = (std::chrono::steady_clock::now() < _end); auto _finalized = (get_state() == State::Finalized); @@ -218,11 +266,12 @@ start_duration_thread() } else { + get_duration_disabled().store(true); OMNITRACE_VERBOSE(1, "Sampling duration of %f seconds has elapsed. " "Shutting down sampling...\n", config::get_sampling_duration()); - shutdown(); + configure(false, 0); } } }; @@ -237,7 +286,7 @@ start_duration_thread() } std::set -configure(bool _setup, int64_t _tid = threading::get_id()) +configure(bool _setup, int64_t _tid) { const auto& _info = thread_info::get(_tid, SequentTID); auto& _sampler = sampling::get_sampler(_tid); @@ -266,6 +315,8 @@ configure(bool _setup, int64_t _tid = threading::get_id()) if(_setup && !_sampler && !_is_running && !_signal_types->empty()) { + if(get_duration_disabled()) return std::set{}; + // if this thread has an offset ID, that means it was created internally // and is probably here bc it called a function which was instrumented. // thus we should not start a sampler for it @@ -356,7 +407,8 @@ configure(bool _setup, int64_t _tid = threading::get_id()) sampling::block_signals(*_signal_types); } - get_duration_cv().notify_one(); + notify_duration_thread(); + if(_tid == 0) { // this propagates to all threads @@ -371,11 +423,7 @@ configure(bool _setup, int64_t _tid = threading::get_id()) } } - if(get_duration_thread()) - { - get_duration_thread()->join(); - get_duration_thread().reset(); - } + stop_duration_thread(); } _sampler->stop(); @@ -416,7 +464,9 @@ setup() std::set shutdown() { - return configure(false); + auto _v = configure(false); + if(utility::get_thread_index() == 0) stop_duration_thread(); + return _v; } void @@ -619,35 +669,64 @@ post_process_perfetto(int64_t _tid, const bundle_t* _init, static std::set _static_strings{}; for(const auto& iitr : backtrace::filter_and_patch(_bt_cs->get())) { - const auto* _name = _static_strings.emplace(iitr.name).first->c_str(); - uint64_t _beg = _last_ts; - uint64_t _end = _bt_ts->get_timestamp(); + uint64_t _beg = _last_ts; + uint64_t _end = _bt_ts->get_timestamp(); if(!_thread_info->is_valid_lifetime({ _beg, _end })) continue; - tracing::push_perfetto_ts( - category::sampling{}, _name, _beg, [&](perfetto::EventContext ctx) { - tracing::add_perfetto_annotation(ctx, "begin_ns", _beg); - tracing::add_perfetto_annotation(ctx, "file", iitr.location); - tracing::add_perfetto_annotation(ctx, "pc", - _as_hex(iitr.address)); - tracing::add_perfetto_annotation(ctx, "line_address", - _as_hex(iitr.line_address)); - if(iitr.lineinfo) - { - size_t _n = 0; - for(const auto& litr : iitr.lineinfo.lines) + if(get_sampling_include_inlines() && iitr.lineinfo) + { + auto _lines = iitr.lineinfo.lines; + std::reverse(_lines.begin(), _lines.end()); + size_t _n = 0; + for(const auto& litr : _lines) + { + const auto* _name = + _static_strings.emplace(demangle(litr.name)).first->c_str(); + auto _info = JOIN(':', litr.location, litr.line); + tracing::push_perfetto_ts( + category::sampling{}, _name, _beg, + [&](perfetto::EventContext ctx) { + tracing::add_perfetto_annotation(ctx, "begin_ns", _beg); + tracing::add_perfetto_annotation(ctx, "lineinfo", _info); + tracing::add_perfetto_annotation(ctx, "inlined", + (_n++ > 0)); + }); + tracing::pop_perfetto_ts(category::sampling{}, _name, _end, + "end_ns", _end); + } + } + else + { + const auto* _name = _static_strings.emplace(iitr.name).first->c_str(); + tracing::push_perfetto_ts( + category::sampling{}, _name, _beg, + [&](perfetto::EventContext ctx) { + tracing::add_perfetto_annotation(ctx, "begin_ns", _beg); + tracing::add_perfetto_annotation(ctx, "file", iitr.location); + tracing::add_perfetto_annotation(ctx, "pc", + _as_hex(iitr.address)); + tracing::add_perfetto_annotation(ctx, "line_address", + _as_hex(iitr.line_address)); + + if(iitr.lineinfo) { - auto _label = JOIN('-', "lineinfo", _n++); - tracing::add_perfetto_annotation( - ctx, _label.c_str(), - JOIN('@', demangle(litr.name), - JOIN(':', litr.location, litr.line))); + auto _lines = iitr.lineinfo.lines; + std::reverse(_lines.begin(), _lines.end()); + size_t _n = 0; + for(const auto& litr : _lines) + { + auto _label = JOIN('-', "lineinfo", _n++); + tracing::add_perfetto_annotation( + ctx, _label.c_str(), + JOIN('@', demangle(litr.name), + JOIN(':', litr.location, litr.line))); + } } - } - }); + }); - tracing::pop_perfetto_ts(category::sampling{}, _name, _end, "end_ns", - _end); + tracing::pop_perfetto_ts(category::sampling{}, _name, _end, "end_ns", + _end); + } } _last_ts = _bt_ts->get_timestamp(); } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index af7b05b85..4b00a50c2 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -47,6 +47,7 @@ set(_flat_environment "OMNITRACE_TIMELINE_PROFILE=OFF" "OMNITRACE_COLLAPSE_PROCESSES=ON" "OMNITRACE_COLLAPSE_THREADS=ON" + "OMNITRACE_SAMPLING_FREQ=50" "OMNITRACE_TIMEMORY_COMPONENTS=wall_clock,trip_count" "${_test_openmp_env}" "${_test_library_path}") @@ -233,7 +234,7 @@ endfunction() # -------------------------------------------------------------------------------------- # function(OMNITRACE_ADD_TEST) - foreach(_PREFIX RUNTIME REWRITE REWRITE_RUN BASELINE) + foreach(_PREFIX PRELOAD RUNTIME REWRITE REWRITE_RUN BASELINE) foreach(_TYPE PASS FAIL SKIP) list(APPEND _REGEX_OPTS "${_PREFIX}_${_TYPE}_REGEX") endforeach() @@ -243,10 +244,9 @@ function(OMNITRACE_ADD_TEST) cmake_parse_arguments( TEST - "SKIP_BASELINE;SKIP_REWRITE;SKIP_RUNTIME;SKIP_SAMPLING;FORCE_SAMPLING" # options - "NAME;TARGET;MPI;GPU;NUM_PROCS;REWRITE_TIMEOUT;RUNTIME_TIMEOUT" # single value - # args - "${_KWARGS}" # multiple value args + "SKIP_BASELINE;SKIP_PRELOAD;SKIP_REWRITE;SKIP_RUNTIME;SKIP_SAMPLING;FORCE_SAMPLING" + "NAME;TARGET;MPI;GPU;NUM_PROCS;REWRITE_TIMEOUT;RUNTIME_TIMEOUT;PRELOAD" + "${_KWARGS}" ${ARGN}) if(TEST_GPU AND NOT _VALID_GPU) @@ -278,6 +278,10 @@ function(OMNITRACE_ADD_TEST) set(TEST_RUNTIME_TIMEOUT 300) endif() + if(NOT TEST_PRELOAD_TIMEOUT) + set(TEST_PRELOAD_TIMEOUT 120) + endif() + if(NOT DEFINED TEST_ENVIRONMENT OR "${TEST_ENVIRONMENT}" STREQUAL "") set(TEST_ENVIRONMENT "${_test_environment}") endif() @@ -306,7 +310,9 @@ function(OMNITRACE_ADD_TEST) NAME ${TEST_NAME}-baseline COMMAND ${COMMAND_PREFIX} $ ${TEST_RUN_ARGS} WORKING_DIRECTORY ${PROJECT_BINARY_DIR}) + endif() + if(NOT TEST_SKIP_PRELOAD) add_test( NAME ${TEST_NAME}-preload COMMAND ${COMMAND_PREFIX} $ -- @@ -379,24 +385,26 @@ function(OMNITRACE_ADD_TEST) baseline preload binary-rewrite binary-rewrite-run binary-rewrite-sampling binary-rewrite-sampling-run runtime-instrument runtime-instrument-sampling) string(REGEX REPLACE "-run(-|/)" "\\1" _prefix "${TEST_NAME}-${_TEST}/") - set(_environ "${TEST_ENVIRONMENT}") - set(_labels "${_TEST}") - set(_timeout ${TEST_REWRITE_TIMEOUT}) - list(APPEND _environ "OMNITRACE_OUTPUT_PATH=omnitrace-tests-output" - "OMNITRACE_OUTPUT_PREFIX=${_prefix}") + set(_labels "${TEST_LABELS}" "${_TEST}") string(REPLACE "-run" "" _labels "${_TEST}") string(REPLACE "-sampling" ";sampling" _labels "${_labels}") if(TEST_TARGET) list(APPEND _labels "${TEST_TARGET}") endif() - if("${_TEST}" MATCHES "runtime-instrument") + + set(_environ + "${TEST_ENVIRONMENT}" "OMNITRACE_OUTPUT_PATH=omnitrace-tests-output" + "OMNITRACE_OUTPUT_PREFIX=${_prefix}") + + set(_timeout ${TEST_REWRITE_TIMEOUT}) + if("${_TEST}" MATCHES "preload") + set(_timeout ${TEST_PRELOAD_TIMEOUT}) + elseif("${_TEST}" MATCHES "runtime-instrument") set(_timeout ${TEST_RUNTIME_TIMEOUT}) endif() - if("${_TEST}" MATCHES "binary-rewrite-run") - list(APPEND _labels "binary-rewrite-run") - endif() + set(_props) - if("${_TEST}" MATCHES "run|baseline") + if("${_TEST}" MATCHES "run|preload|baseline") set(_props ${TEST_PROPERTIES}) if(NOT "RUN_SERIAL" IN_LIST _props) list(APPEND _props RUN_SERIAL ON) @@ -412,7 +420,7 @@ function(OMNITRACE_ADD_TEST) elseif("${_TEST}" MATCHES "baseline") set(_REGEX_VAR BASELINE) elseif("${_TEST}" MATCHES "preload") - set(_REGEX_VAR BASELINE) + set(_REGEX_VAR PRELOAD) else() set(_REGEX_VAR) endif() @@ -434,7 +442,7 @@ function(OMNITRACE_ADD_TEST) TIMEOUT ${_timeout} LABELS - "${_labels};${TEST_LABELS}" + "${_labels}" PASS_REGULAR_EXPRESSION "${${_PASS_REGEX}}" FAIL_REGULAR_EXPRESSION @@ -655,7 +663,7 @@ omnitrace_add_test( NUM_PROCS ${NUM_PROCS} REWRITE_ARGS -e -v 2 -E uniform_int_distribution ENVIRONMENT - "${_base_environment};OMNITRACE_CRITICAL_TRACE=OFF;OMNITRACE_COUT_OUTPUT=ON;OMNITRACE_ROCM_EVENTS=${OMNITRACE_ROCM_EVENTS_TEST}" + "${_base_environment};OMNITRACE_CRITICAL_TRACE=OFF;OMNITRACE_ROCM_EVENTS=${OMNITRACE_ROCM_EVENTS_TEST}" REWRITE_RUN_PASS_REGEX "rocprof-device-0-GRBM_COUNT.txt(.*)rocprof-device-0-GPUBusy.txt(.*)rocprof-device-0-SQ_WAVES.txt(.*)rocprof-device-0-SQ_INSTS_VALU.txt(.*)rocprof-device-0-VALUInsts.txt(.*)rocprof-device-0-TCC_HIT_sum.txt(.*)rocprof-device-0-TA_TA_BUSY_0.txt(.*)rocprof-device-0-TA_TA_BUSY_11.txt" ) @@ -670,7 +678,7 @@ omnitrace_add_test( NUM_PROCS ${NUM_PROCS} REWRITE_ARGS -e -v 2 -E uniform_int_distribution ENVIRONMENT - "${_base_environment};OMNITRACE_CRITICAL_TRACE=OFF;OMNITRACE_COUT_OUTPUT=ON;OMNITRACE_USE_ROCTRACER=OFF;OMNITRACE_ROCM_EVENTS=${OMNITRACE_ROCM_EVENTS_TEST}" + "${_base_environment};OMNITRACE_CRITICAL_TRACE=OFF;OMNITRACE_USE_ROCTRACER=OFF;OMNITRACE_ROCM_EVENTS=${OMNITRACE_ROCM_EVENTS_TEST}" REWRITE_RUN_PASS_REGEX "rocprof-device-0-GRBM_COUNT.txt(.*)rocprof-device-0-GPUBusy.txt(.*)rocprof-device-0-SQ_WAVES.txt(.*)rocprof-device-0-SQ_INSTS_VALU.txt(.*)rocprof-device-0-VALUInsts.txt(.*)rocprof-device-0-TCC_HIT_sum.txt(.*)rocprof-device-0-TA_TA_BUSY_0.txt(.*)rocprof-device-0-TA_TA_BUSY_11.txt" REWRITE_RUN_FAIL_REGEX "roctracer.txt") @@ -846,7 +854,7 @@ omnitrace_add_test( LABELS "kokkos;kokkos-profile-library" RUN_ARGS -i 25 -s 20 -p ENVIRONMENT - "${_base_environment};OMNITRACE_CRITICAL_TRACE=OFF;OMNITRACE_USE_KOKKOSP=ON;OMNITRACE_COUT_OUTPUT=ON;KOKKOS_PROFILE_LIBRARY=libomnitrace-dl.so" + "${_base_environment};OMNITRACE_CRITICAL_TRACE=OFF;OMNITRACE_USE_KOKKOSP=ON;OMNITRACE_COUT_OUTPUT=ON;OMNITRACE_SAMPLING_FREQ=50;KOKKOS_PROFILE_LIBRARY=libomnitrace-dl.so" REWRITE_RUN_PASS_REGEX "\\|_\\[kokkos\\]" RUNTIME_PASS_REGEX "\\|_\\[kokkos\\]") @@ -860,9 +868,8 @@ omnitrace_add_test( LABELS "kokkos;kokkos-profile-library" RUN_ARGS -i 10 -s 20 -p ENVIRONMENT - "${_base_environment};OMNITRACE_CRITICAL_TRACE=OFF;OMNITRACE_USE_KOKKOSP=ON;OMNITRACE_COUT_OUTPUT=ON;KOKKOS_PROFILE_LIBRARY=libomnitrace.so" - BASELINE_PASS_REGEX - "\\|_\\[kokkos\\]") + "${_base_environment};OMNITRACE_CRITICAL_TRACE=OFF;OMNITRACE_USE_KOKKOSP=ON;OMNITRACE_COUT_OUTPUT=ON;OMNITRACE_SAMPLING_FREQ=50;KOKKOS_PROFILE_LIBRARY=libomnitrace.so" + BASELINE_PASS_REGEX "\\|_\\[kokkos\\]") omnitrace_add_test( SKIP_RUNTIME SKIP_REWRITE @@ -874,9 +881,8 @@ omnitrace_add_test( LABELS "kokkos;kokkos-profile-library" RUN_ARGS -i 10 -s 20 -p ENVIRONMENT - "${_base_environment};OMNITRACE_CRITICAL_TRACE=OFF;OMNITRACE_USE_KOKKOSP=ON;OMNITRACE_COUT_OUTPUT=ON;KOKKOS_PROFILE_LIBRARY=libomnitrace-dl.so" - BASELINE_PASS_REGEX - "\\|_\\[kokkos\\]") + "${_base_environment};OMNITRACE_CRITICAL_TRACE=OFF;OMNITRACE_USE_KOKKOSP=ON;OMNITRACE_COUT_OUTPUT=ON;OMNITRACE_SAMPLING_FREQ=50;KOKKOS_PROFILE_LIBRARY=libomnitrace-dl.so" + BASELINE_PASS_REGEX "\\|_\\[kokkos\\]") omnitrace_add_test( SKIP_BASELINE @@ -980,10 +986,46 @@ omnitrace_add_test( REWRITE_TIMEOUT 180 RUNTIME_TIMEOUT 360 ENVIRONMENT - "${_ompt_environment};OMNITRACE_USE_SAMPLING=ON;OMNITRACE_SAMPLING_FREQ=600;OMNITRACE_COUT_OUTPUT=ON" + "${_ompt_environment};OMNITRACE_USE_SAMPLING=ON;OMNITRACE_SAMPLING_FREQ=50;OMNITRACE_COUT_OUTPUT=ON" REWRITE_RUN_PASS_REGEX "${_OMPT_PASS_REGEX}" REWRITE_FAIL_REGEX "0 instrumented loops in procedure") +set(_ompt_preload_environ + "${_ompt_environment}" + "OMNITRACE_USE_OMPT=OFF" + "OMNITRACE_USE_SAMPLING=ON" + "OMNITRACE_USE_PROCESS_SAMPLING=OFF" + "OMNITRACE_SAMPLING_FREQ=100" + "OMNITRACE_SAMPLING_DELAY=0.1" + "OMNITRACE_SAMPLING_DURATION=0.25" + "OMNITRACE_SAMPLING_CPUTIME=ON" + "OMNITRACE_SAMPLING_REALTIME=ON" + "OMNITRACE_SAMPLING_CPUTIME_FREQ=1000" + "OMNITRACE_SAMPLING_REALTIME_FREQ=500") + +set(_ompt_preload_samp_regex + "Sampler for thread 0 will be triggered 1000.0x per second of CPU-time(.*)Sampler for thread 0 will be triggered 500.0x per second of wall-time(.*)Sampling will be disabled after 0.250000 seconds(.*)Sampling duration of 0.250000 seconds has elapsed. Shutting down sampling" + ) +set(_ompt_preload_file_regex + "sampling-duration-preload/sampling_percent.(json|txt)(.*)sampling-duration-preload/sampling_cpu_clock.(json|txt)(.*)sampling-duration-preload/sampling_wall_clock.(json|txt)" + ) + +omnitrace_add_test( + SKIP_BASELINE SKIP_RUNTIME SKIP_REWRITE SKIP_SAMPLING + NAME openmp-cg-sampling-duration + TARGET openmp-cg + LABELS "openmp;sampling-duration" + ENVIRONMENT "${_ompt_preload_environ}" + PRELOAD_PASS_REGEX "${_ompt_preload_samp_regex}(.*)${_ompt_preload_file_regex}") + +omnitrace_add_test( + SKIP_BASELINE SKIP_RUNTIME SKIP_REWRITE SKIP_SAMPLING + NAME openmp-lu-sampling-duration + TARGET openmp-lu + LABELS "openmp;sampling-duration" + ENVIRONMENT "${_ompt_preload_environ}" + PRELOAD_PASS_REGEX "${_ompt_preload_samp_regex}(.*)${_ompt_preload_file_regex}") + omnitrace_add_test( SKIP_BASELINE SKIP_SAMPLING NAME code-coverage From 84957d5943724df9020a7e37c69015338180b4a0 Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Wed, 28 Sep 2022 09:50:53 -0500 Subject: [PATCH 04/15] Fix mpi-flat tests --- tests/CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 4b00a50c2..f4ecf61dd 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -798,7 +798,6 @@ if(OMNITRACE_USE_MPI OR OMNITRACE_USE_MPI_HEADERS) --label file line - return args --min-instructions 0 @@ -822,7 +821,6 @@ if(OMNITRACE_USE_MPI OR OMNITRACE_USE_MPI_HEADERS) --label file line - return args --min-instructions 0 From 9bc2159ae3ac823b7afe3d9143c52151b97f5002 Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Wed, 28 Sep 2022 11:53:30 -0500 Subject: [PATCH 05/15] Fix segfault from OMPT after finalization --- source/lib/omnitrace/library/ompt.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/source/lib/omnitrace/library/ompt.cpp b/source/lib/omnitrace/library/ompt.cpp index c70746d70..f4429a8c6 100644 --- a/source/lib/omnitrace/library/ompt.cpp +++ b/source/lib/omnitrace/library/ompt.cpp @@ -97,10 +97,12 @@ ompt_start_tool(unsigned int omp_version, const char* runtime_version) OMNITRACE_METADATA("OMP_VERSION", omp_version); OMNITRACE_METADATA("OMP_RUNTIME_VERSION", runtime_version); + static bool _use_ompt = omnitrace::config::get_use_ompt(); static auto ompt_initialize = [](ompt_function_lookup_t lookup, int initial_device_num, ompt_data_t* tool_data) -> int { - if(omnitrace::config::get_use_ompt()) + _use_ompt = omnitrace::config::get_use_ompt(); + if(_use_ompt) { TIMEMORY_PRINTF(stderr, "OpenMP-tools configuring for initial device %i\n\n", initial_device_num); @@ -111,7 +113,7 @@ ompt_start_tool(unsigned int omp_version, const char* runtime_version) }; static auto ompt_finalize = [](ompt_data_t* tool_data) { - if(omnitrace::config::get_use_ompt()) + if(_use_ompt) { TIMEMORY_PRINTF(stderr, "OpenMP-tools finalized\n\n"); tim::consume_parameters(tool_data); From 646b7a65e8e8419c08e85beaa93335468db34c6d Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Wed, 28 Sep 2022 12:05:09 -0500 Subject: [PATCH 06/15] omnitrace-run -> omnitrace-sample --- source/bin/CMakeLists.txt | 2 +- source/bin/omnitrace-run/CMakeLists.txt | 22 ------------------- source/bin/omnitrace-sample/CMakeLists.txt | 22 +++++++++++++++++++ .../impl.cpp | 0 .../omnitrace-sample.cpp} | 2 +- .../omnitrace-sample.hpp} | 0 tests/CMakeLists.txt | 2 +- 7 files changed, 25 insertions(+), 25 deletions(-) delete mode 100644 source/bin/omnitrace-run/CMakeLists.txt create mode 100644 source/bin/omnitrace-sample/CMakeLists.txt rename source/bin/{omnitrace-run => omnitrace-sample}/impl.cpp (100%) rename source/bin/{omnitrace-run/omnitrace-run.cpp => omnitrace-sample/omnitrace-sample.cpp} (98%) rename source/bin/{omnitrace-run/omnitrace-run.hpp => omnitrace-sample/omnitrace-sample.hpp} (100%) diff --git a/source/bin/CMakeLists.txt b/source/bin/CMakeLists.txt index 630bf955d..c00f45b99 100644 --- a/source/bin/CMakeLists.txt +++ b/source/bin/CMakeLists.txt @@ -16,7 +16,7 @@ endif() # executables add_subdirectory(omnitrace-avail) add_subdirectory(omnitrace-critical-trace) -add_subdirectory(omnitrace-run) +add_subdirectory(omnitrace-sample) add_subdirectory(omnitrace) if(OMNITRACE_BUILD_TESTING OR "$ENV{OMNITRACE_CI}" MATCHES "[1-9]+|ON|on|y|yes") diff --git a/source/bin/omnitrace-run/CMakeLists.txt b/source/bin/omnitrace-run/CMakeLists.txt deleted file mode 100644 index e9cd8298b..000000000 --- a/source/bin/omnitrace-run/CMakeLists.txt +++ /dev/null @@ -1,22 +0,0 @@ -# ------------------------------------------------------------------------------# -# -# omnitrace-run target -# -# ------------------------------------------------------------------------------# - -add_executable(omnitrace-run ${CMAKE_CURRENT_LIST_DIR}/omnitrace-run.cpp - ${CMAKE_CURRENT_LIST_DIR}/impl.cpp) - -target_include_directories(omnitrace-run PRIVATE ${CMAKE_CURRENT_LIST_DIR}) -target_link_libraries( - omnitrace-run - PRIVATE omnitrace::omnitrace-compile-definitions omnitrace::omnitrace-headers - omnitrace::omnitrace-common-library) -set_target_properties( - omnitrace-run PROPERTIES BUILD_RPATH "\$ORIGIN:\$ORIGIN/../${CMAKE_INSTALL_LIBDIR}" - INSTALL_RPATH "${OMNITRACE_EXE_INSTALL_RPATH}") - -install( - TARGETS omnitrace-run - DESTINATION ${CMAKE_INSTALL_BINDIR} - OPTIONAL) diff --git a/source/bin/omnitrace-sample/CMakeLists.txt b/source/bin/omnitrace-sample/CMakeLists.txt new file mode 100644 index 000000000..e502d4a26 --- /dev/null +++ b/source/bin/omnitrace-sample/CMakeLists.txt @@ -0,0 +1,22 @@ +# ------------------------------------------------------------------------------# +# +# omnitrace-sample target +# +# ------------------------------------------------------------------------------# + +add_executable(omnitrace-sample ${CMAKE_CURRENT_LIST_DIR}/omnitrace-sample.cpp + ${CMAKE_CURRENT_LIST_DIR}/impl.cpp) + +target_include_directories(omnitrace-sample PRIVATE ${CMAKE_CURRENT_LIST_DIR}) +target_link_libraries( + omnitrace-sample + PRIVATE omnitrace::omnitrace-compile-definitions omnitrace::omnitrace-headers + omnitrace::omnitrace-common-library) +set_target_properties( + omnitrace-sample PROPERTIES BUILD_RPATH "\$ORIGIN:\$ORIGIN/../${CMAKE_INSTALL_LIBDIR}" + INSTALL_RPATH "${OMNITRACE_EXE_INSTALL_RPATH}") + +install( + TARGETS omnitrace-sample + DESTINATION ${CMAKE_INSTALL_BINDIR} + OPTIONAL) diff --git a/source/bin/omnitrace-run/impl.cpp b/source/bin/omnitrace-sample/impl.cpp similarity index 100% rename from source/bin/omnitrace-run/impl.cpp rename to source/bin/omnitrace-sample/impl.cpp diff --git a/source/bin/omnitrace-run/omnitrace-run.cpp b/source/bin/omnitrace-sample/omnitrace-sample.cpp similarity index 98% rename from source/bin/omnitrace-run/omnitrace-run.cpp rename to source/bin/omnitrace-sample/omnitrace-sample.cpp index 6b2619628..0e30d7dc9 100644 --- a/source/bin/omnitrace-run/omnitrace-run.cpp +++ b/source/bin/omnitrace-sample/omnitrace-sample.cpp @@ -20,7 +20,7 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. -#include "omnitrace-run.hpp" +#include "omnitrace-sample.hpp" #include diff --git a/source/bin/omnitrace-run/omnitrace-run.hpp b/source/bin/omnitrace-sample/omnitrace-sample.hpp similarity index 100% rename from source/bin/omnitrace-run/omnitrace-run.hpp rename to source/bin/omnitrace-sample/omnitrace-sample.hpp diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index f4ecf61dd..3a10ffdee 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -315,7 +315,7 @@ function(OMNITRACE_ADD_TEST) if(NOT TEST_SKIP_PRELOAD) add_test( NAME ${TEST_NAME}-preload - COMMAND ${COMMAND_PREFIX} $ -- + COMMAND ${COMMAND_PREFIX} $ -- $ ${TEST_RUN_ARGS} WORKING_DIRECTORY ${PROJECT_BINARY_DIR}) endif() From 94148ad650892f5d27e9953c9dfb53e5bc0ef769 Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Wed, 28 Sep 2022 14:32:17 -0500 Subject: [PATCH 07/15] Update timemory with openmp callback connector fix --- external/timemory | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/timemory b/external/timemory index a781a2169..94f5c00e1 160000 --- a/external/timemory +++ b/external/timemory @@ -1 +1 @@ -Subproject commit a781a2169589e375db220f6e615d26714d8b5ba7 +Subproject commit 94f5c00e1239e7c5f340bf749e133a3d6b51ffab From 1c00d4c4ce80a5f9dce562fc8a86e47841666718 Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Wed, 28 Sep 2022 15:12:39 -0500 Subject: [PATCH 08/15] Fix shadowed variables and aliases in timemory --- external/timemory | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/timemory b/external/timemory index 94f5c00e1..56b4f0eda 160000 --- a/external/timemory +++ b/external/timemory @@ -1 +1 @@ -Subproject commit 94f5c00e1239e7c5f340bf749e133a3d6b51ffab +Subproject commit 56b4f0eda5489e3c9b81bb86ad18533650b1a1f8 From fd8eccf8a5646f4053925242ef54352495025a30 Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Wed, 28 Sep 2022 15:53:45 -0500 Subject: [PATCH 09/15] Fix OMPT initialization --- source/lib/omnitrace/library/ompt.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/source/lib/omnitrace/library/ompt.cpp b/source/lib/omnitrace/library/ompt.cpp index f4429a8c6..ef6357d32 100644 --- a/source/lib/omnitrace/library/ompt.cpp +++ b/source/lib/omnitrace/library/ompt.cpp @@ -20,6 +20,10 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. +#include "api.hpp" +#include "library/common.hpp" +#include "library/config.hpp" +#include "library/debug.hpp" #include "library/defines.hpp" #include @@ -97,6 +101,16 @@ ompt_start_tool(unsigned int omp_version, const char* runtime_version) OMNITRACE_METADATA("OMP_VERSION", omp_version); OMNITRACE_METADATA("OMP_RUNTIME_VERSION", runtime_version); + if(!omnitrace::settings_are_configured()) + { + OMNITRACE_BASIC_WARNING( + 0, + "[%s] invoked before omnitrace was initialized. In instrumentation mode, " + "settings exported to the environment have not been propagated yet...\n", + __FUNCTION__); + omnitrace::configure_settings(); + } + static bool _use_ompt = omnitrace::config::get_use_ompt(); static auto ompt_initialize = [](ompt_function_lookup_t lookup, int initial_device_num, From e41a4081ee24096fd87780ac3753eeb5e2f2f82d Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Fri, 30 Sep 2022 10:28:29 -0500 Subject: [PATCH 10/15] Improve command line --- source/bin/omnitrace-sample/impl.cpp | 512 ++++++++++++------ .../bin/omnitrace-sample/omnitrace-sample.cpp | 19 +- .../bin/omnitrace-sample/omnitrace-sample.hpp | 10 +- 3 files changed, 367 insertions(+), 174 deletions(-) diff --git a/source/bin/omnitrace-sample/impl.cpp b/source/bin/omnitrace-sample/impl.cpp index 07107ef9e..4d235ac3d 100644 --- a/source/bin/omnitrace-sample/impl.cpp +++ b/source/bin/omnitrace-sample/impl.cpp @@ -20,13 +20,17 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. +#include "omnitrace-sample.hpp" + #include "common/delimit.hpp" #include "common/environment.hpp" #include "common/join.hpp" #include "common/setup.hpp" +#include #include #include +#include #include #include @@ -34,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -41,6 +46,7 @@ namespace color = tim::log::color; using tim::log::stream; using namespace timemory::join; +using tim::get_env; namespace { @@ -53,6 +59,15 @@ get_command(const char* _argv0) return omnitrace::path::find_path(_argv0, 0, omnitrace::common::get_env("PATH", "")); } +std::string +get_realpath(const std::string& _v) +{ + auto* _tmp = realpath(_v.c_str(), nullptr); + auto _ret = std::string{ _tmp }; + free(_tmp); + return _ret; +} + void print_command(const std::vector& _argv) { @@ -62,39 +77,85 @@ print_command(const std::vector& _argv) } std::vector -get_environment() +get_initial_environment() { - std::vector _environ; + std::vector _env; if(environ != nullptr) { int idx = 0; while(environ[idx] != nullptr) - _environ.emplace_back(strdup(environ[idx++])); + _env.emplace_back(strdup(environ[idx++])); } + update_env(_env, "LD_PRELOAD", + get_realpath(get_internal_libpath("libomnitrace-dl.so")), true); + + auto* _dl_libpath = + realpath(get_internal_libpath("libomnitrace-dl.so").c_str(), nullptr); + auto* _omni_libpath = + realpath(get_internal_libpath("libomnitrace.so").c_str(), nullptr); + + update_env(_env, "OMNITRACE_USE_SAMPLING", true); + update_env(_env, "OMNITRACE_CRITICAL_TRACE", false); + update_env(_env, "OMNITRACE_USE_PROCESS_SAMPLING", false); + // update_env(_env, "OMNITRACE_USE_PID", false); + // update_env(_env, "OMNITRACE_TIME_OUTPUT", false); + // update_env(_env, "OMNITRACE_OUTPUT_PATH", "omnitrace-output/%tag%/%launch_time%"); + +#if defined(OMNITRACE_USE_ROCTRACER) || defined(OMNITRACE_USE_ROCPROFILER) + update_env(_env, "HSA_TOOLS_LIB", _dl_libpath); + update_env(_env, "HSA_TOOLS_REPORT_LOAD_FAILURE", "1"); +#endif + +#if defined(OMNITRACE_USE_ROCPROFILER) + update_env(_env, "ROCP_TOOL_LIB", _omni_libpath); + update_env(_env, "ROCP_HSA_INTERCEPT", "1"); +#endif + +#if defined(OMNITRACE_USE_OMPT) + update_env(_env, "OMP_TOOL_LIBRARIES", _dl_libpath); +#endif + + free(_dl_libpath); + free(_omni_libpath); + + return _env; +} + +std::string +get_internal_libpath(const std::string& _lib) +{ auto _exe = std::string_view{ realpath("/proc/self/exe", nullptr) }; auto _pos = _exe.find_last_of('/'); auto _dir = std::string{ "./" }; if(_pos != std::string_view::npos) _dir = _exe.substr(0, _pos); - auto _lib = omnitrace::common::join("/", _dir, "..", "lib", "libomnitrace-dl.so"); - _environ.emplace_back( - strdup(omnitrace::common::join("=", "LD_PRELOAD", realpath(_lib.c_str(), nullptr)) - .c_str())); - - return _environ; + return omnitrace::common::join("/", _dir, "..", "lib", _lib); } template void -update_env(std::vector& _environ, std::string_view _env_var, Tp&& _env_val) +update_env(std::vector& _environ, std::string_view _env_var, Tp&& _env_val, + bool _append) { + auto _key = join("", _env_var, "="); for(auto& itr : _environ) { if(!itr) continue; - if(std::string_view{ itr }.find(_env_var) == 0) + if(std::string_view{ itr }.find(_key) == 0) { - free(itr); - itr = strdup(omnitrace::common::join('=', _env_var, _env_val).c_str()); + if(_append) + { + auto _val = std::string{ itr }.substr(_key.length()); + free(itr); + itr = strdup( + omnitrace::common::join('=', _env_var, join(":", _env_val, _val)) + .c_str()); + } + else + { + free(itr); + itr = strdup(omnitrace::common::join('=', _env_var, _env_val).c_str()); + } return; } } @@ -108,9 +169,6 @@ parse_args(int argc, char** argv, std::vector& _env) using parser_t = tim::argparse::argument_parser; using parser_err_t = typename parser_t::result_type; - update_env(_env, "OMNITRACE_USE_SAMPLING", true); - update_env(_env, "OMNITRACE_CRITICAL_TRACE", false); - auto help_check = [](parser_t& p, int _argc, char** _argv) { std::set help_args = { "-h", "--help", "-?" }; return (p.exists("help") || _argc == 1 || @@ -136,34 +194,74 @@ parse_args(int argc, char** argv, std::vector& _env) auto parser = parser_t(argv[0]); - parser.enable_help(); - parser.on_error([=, &_pec](parser_t& p, const parser_err_t& _err) { + parser.on_error([](parser_t&, const parser_err_t& _err) { stream(std::cerr, color::fatal()) << _err << "\n"; - _pec = EXIT_FAILURE; - help_action(p); + exit(EXIT_FAILURE); }); const auto* _cputime_desc = - R"(Sample based on a CPU-clock timer. Accepts up to 2 arguments: + R"(Sample based on a CPU-clock timer (default). Accepts zero or more arguments: + %{INDENT}%0. Enables sampling based on CPU-clock timer. %{INDENT}%1. Interrupts per second. E.g., 100 == sample every 10 milliseconds of CPU-time. - %{INDENT}%2. Delay (in seconds of CPU-clock time). I.e., how long each thread should wait before taking first sample.)"; + %{INDENT}%2. Delay (in seconds of CPU-clock time). I.e., how long each thread should wait before taking first sample. + %{INDENT}%3+ Thread IDs to target for sampling, starting at 0 (the main thread). + %{INDENT}% May be specified as index or range, e.g., '0 2-4' will be interpreted as: + %{INDENT}% sample the main thread (0), do not sample the first child thread but sample the 2nd, 3rd, and 4th child threads)"; const auto* _realtime_desc = - R"(Sample based on a real-clock timer. Accepts up to 2 arguments: + R"(Sample based on a real-clock timer. Accepts zero or more arguments: + %{INDENT}%0. Enables sampling based on real-clock timer. %{INDENT}%1. Interrupts per second. E.g., 100 == sample every 10 milliseconds of realtime. - %{INDENT}%2. Delay (in seconds of real-clock time). I.e., how long each thread should wait before taking first sample.)"; + %{INDENT}%2. Delay (in seconds of real-clock time). I.e., how long each thread should wait before taking first sample. + %{INDENT}%3+ Thread IDs to target for sampling, starting at 0 (the main thread). + %{INDENT}% May be specified as index or range, e.g., '0 2-4' will be interpreted as: + %{INDENT}% sample the main thread (0), do not sample the first child thread but sample the 2nd, 3rd, and 4th child threads + %{INDENT}% When sampling with a real-clock timer, please note that enabling this will cause threads which are typically "idle" + %{INDENT}% to consume more resources since, while idle, the real-clock time increases (and therefore triggers taking samples) + %{INDENT}% whereas the CPU-clock time does not.)"; + + const auto* _hsa_interrupt_desc = + R"(Set the value of the HSA_ENABLE_INTERRUPT environment variable. +%{INDENT}% ROCm version 5.2 and older have a bug which will cause a deadlock if a sample is taken while waiting for the signal +%{INDENT}% that a kernel completed -- which happens when sampling with a real-clock timer. We require this option to be set to +%{INDENT}% when --realtime is specified to make users aware that, while this may fix the bug, it can have a negative impact on +%{INDENT}% performance. +%{INDENT}% Values: +%{INDENT}% 0 avoid triggering the bug, potentially at the cost of reduced performance +%{INDENT}% 1 do not modify how ROCm is notified about kernel completion)"; + + auto _realtime_reqs = (get_env("HSA_ENABLE_INTERRUPT", std::string{}, false).empty()) + ? std::initializer_list{ "hsa-interrupt" } + : std::initializer_list{}; const auto* _trace_policy_desc = R"(Policy for new data when the buffer size limit is reached: %{INDENT}%- discard : new data is ignored %{INDENT}%- ring_buffer : new data overwrites oldest data)"; - parser.add_argument({ "" }, ""); + auto _add_separator = [&](std::string _v, const std::string& _desc) { + parser.add_argument({ "" }, ""); + parser + .add_argument({ join("", "[", _v, "]") }, + (_desc.empty()) ? _desc : join({ "", "(", ")" }, _desc)) + .color(tim::log::color::info()); + parser.add_argument({ "" }, ""); + }; + + parser.enable_help(); + + auto _cols = std::get<0>(tim::utility::console::get_columns()); + if(_cols > parser.get_help_width() + 8) + parser.set_description_width( + std::min(_cols - parser.get_help_width() - 8, 120)); + + _add_separator("DEBUG OPTIONS", ""); parser.add_argument({ "--monochrome" }, "Disable colorized output") .max_count(1) .dtype("bool") .action([&](parser_t& p) { auto _colorized = !p.get("monochrome"); + p.set_use_color(_colorized); update_env(_env, "OMNITRACE_COLORIZED_LOG", (_colorized) ? "1" : "0"); update_env(_env, "COLORIZED_LOG", (_colorized) ? "1" : "0"); }); @@ -180,15 +278,19 @@ parse_args(int argc, char** argv, std::vector& _env) update_env(_env, "OMNITRACE_VERBOSE", _v); }); - parser.add_argument({ "" }, ""); + _add_separator("GENERAL OPTIONS", ""); parser.add_argument({ "-c", "--config" }, "Configuration file") - .min_count(1) + .min_count(0) + .dtype("filepath") .action([&](parser_t& p) { update_env( _env, "OMNITRACE_CONFIG_FILE", join(array_config{ ":" }, p.get>("config"))); }); - parser.add_argument({ "-o", "--output" }, "Output path") + parser + .add_argument({ "-o", "--output" }, + "Output path. Accepts 1-2 parameters corresponding to the output " + "path and the output prefix") .min_count(1) .max_count(2) .action([&](parser_t& p) { @@ -196,13 +298,63 @@ parse_args(int argc, char** argv, std::vector& _env) update_env(_env, "OMNITRACE_OUTPUT_PATH", _v.at(0)); if(_v.size() > 1) update_env(_env, "OMNITRACE_OUTPUT_PREFIX", _v.at(1)); }); - - parser.add_argument({ "" }, ""); - parser.add_argument({ "--trace" }, "Generate a detailed trace") + parser + .add_argument({ "-T", "--trace" }, "Generate a detailed trace (perfetto output)") .max_count(1) .action([&](parser_t& p) { update_env(_env, "OMNITRACE_USE_PERFETTO", p.get("trace")); }); + parser + .add_argument( + { "-P", "--profile" }, + "Generate a call-stack-based profile (conflicts with --flat-profile)") + .max_count(1) + .conflicts({ "flat-profile" }) + .action([&](parser_t& p) { + update_env(_env, "OMNITRACE_USE_TIMEMORY", p.get("profile")); + }); + parser + .add_argument({ "-F", "--flat-profile" }, + "Generate a flat profile (conflicts with --profile)") + .max_count(1) + .conflicts({ "profile" }) + .action([&](parser_t& p) { + update_env(_env, "OMNITRACE_USE_TIMEMORY", p.get("flat-profile")); + update_env(_env, "OMNITRACE_FLAT_PROFILE", p.get("flat-profile")); + }); + parser + .add_argument({ "-H", "--host" }, + "Enable sampling host-based metrics for the process. E.g. CPU " + "frequency, memory usage, etc.") + .max_count(1) + .action([&](parser_t& p) { + auto _h = p.get("host"); + auto _d = p.get("device"); + update_env(_env, "OMNITRACE_USE_PROCESS_SAMPLING", _h || _d); + update_env(_env, "OMNITRACE_CPU_FREQ_ENABLED", _h); + }); + parser + .add_argument({ "-D", "--device" }, + "Enable sampling device-based metrics for the process. E.g. GPU " + "temperature, memory usage, etc.") + .max_count(1) + .action([&](parser_t& p) { + auto _h = p.get("host"); + auto _d = p.get("device"); + update_env(_env, "OMNITRACE_USE_PROCESS_SAMPLING", _h || _d); + update_env(_env, "OMNITRACE_USE_ROCM_SMI", _d); + }); + + _add_separator("TRACING OPTIONS", ""); + parser + .add_argument({ "--trace-file" }, + "Specify the trace output filename. Relative filepath will be with " + "respect to output path and output prefix.") + .count(1) + .dtype("filepath") + .action([&](parser_t& p) { + update_env(_env, "OMNITRACE_PERFETTO_FILE", p.get("trace-file")); + }); parser .add_argument({ "--trace-buffer-size" }, "Size limit for the trace output (in KB)") @@ -220,10 +372,11 @@ parse_args(int argc, char** argv, std::vector& _env) p.get("trace-fill-policy")); }); - parser.add_argument({ "" }, ""); - parser.add_argument({ "--profile" }, "Generate a call-stack-based profile") - .min_count(0) + _add_separator("PROFILE OPTIONS", ""); + parser.add_argument({ "--profile-format" }, "Data formats for profiling results") + .min_count(1) .max_count(3) + .requires({ "profile|flat-profile" }) .choices({ "text", "json", "console" }) .action([&](parser_t& p) { auto _v = p.get>("profile"); @@ -236,34 +389,70 @@ parse_args(int argc, char** argv, std::vector& _env) } }); - parser.add_argument({ "--flat-profile" }, "Generate a flat profile") - .min_count(0) - .max_count(3) - .choices({ "text", "json", "console" }) - .action([&](parser_t& p) { - auto _v = p.get>("flat-profile"); - update_env(_env, "OMNITRACE_USE_TIMEMORY", true); - update_env(_env, "OMNITRACE_FLAT_PROFILE", true); - if(!_v.empty()) - { - update_env(_env, "OMNITRACE_TEXT_OUTPUT", _v.count("text") != 0); - update_env(_env, "OMNITRACE_JSON_OUTPUT", _v.count("json") != 0); - update_env(_env, "OMNITRACE_COUT_OUTPUT", _v.count("console") != 0); - } - }); parser - .add_argument({ "--diff-profile" }, - "Generate a profile diff from the specified input directory") + .add_argument({ "--profile-diff" }, + "Generate a diff output b/t the profile collected and an existing " + "profile from another run Accepts 1-2 parameters corresponding to " + "the input path and the input prefix") .min_count(1) .max_count(2) .action([&](parser_t& p) { - auto _v = p.get>("diff-profile"); + auto _v = p.get>("profile-diff"); update_env(_env, "OMNITRACE_DIFF_OUTPUT", true); update_env(_env, "OMNITRACE_INPUT_PATH", _v.at(0)); if(_v.size() > 1) update_env(_env, "OMNITRACE_INPUT_PREFIX", _v.at(1)); }); - parser.add_argument({ "" }, ""); + _add_separator("HOST/DEVICE (PROCESS SAMPLING) OPTIONS", ""); + parser + .add_argument({ "--process-freq" }, + "Set the default host/device sampling frequency " + "(number of interrupts per second)") + .count(1) + .action([&](parser_t& p) { + update_env(_env, "OMNITRACE_PROCESS_SAMPLING_FREQ", + p.get("process-freq")); + }); + parser + .add_argument({ "--process-wait" }, "Set the default wait time (i.e. delay) " + "before taking first host/device sample " + "(in seconds of realtime)") + .count(1) + .action([&](parser_t& p) { + update_env(_env, "OMNITRACE_PROCESS_SAMPLING_DELAY", + p.get("process-wait")); + }); + parser + .add_argument( + { "--process-duration" }, + "Set the duration of the host/device sampling (in seconds of realtime)") + .count(1) + .action([&](parser_t& p) { + update_env(_env, "OMNITRACE_SAMPLING_PROCESS_DURATION", + p.get("process-duration")); + }); + parser + .add_argument({ "--cpus" }, + "CPU IDs for frequency sampling. Supports integers and/or ranges") + .dtype("int or range") + .requires({ "host" }) + .action([&](parser_t& p) { + update_env( + _env, "OMNITRACE_PROCESS_SAMPLING_CPUS", + join(array_config{ "," }, p.get>("cpus"))); + }); + parser + .add_argument({ "--gpus" }, + "GPU IDs for SMI queries. Supports integers and/or ranges") + .dtype("int or range") + .requires({ "device" }) + .action([&](parser_t& p) { + update_env( + _env, "OMNITRACE_PROCESS_SAMPLING_GPUS", + join(array_config{ "," }, p.get>("gpus"))); + }); + + _add_separator("GENERAL SAMPLING OPTIONS", ""); parser .add_argument({ "-f", "--freq" }, "Set the default sampling frequency " "(number of interrupts per second)") @@ -279,7 +468,7 @@ parse_args(int argc, char** argv, std::vector& _env) "delay of 1 second for CPU-clock sampler may not equal 1 second of realtime") .count(1) .action([&](parser_t& p) { - update_env(_env, "OMNITRACE_SAMPLING_DELAY", p.get("delay")); + update_env(_env, "OMNITRACE_SAMPLING_DELAY", p.get("wait")); }); parser .add_argument( @@ -302,36 +491,88 @@ parse_args(int argc, char** argv, std::vector& _env) join(array_config{ ", " }, p.get>("tids"))); }); - parser.add_argument({ "" }, ""); + _add_separator("SAMPLING TIMER OPTIONS", ""); parser.add_argument({ "--cputime" }, _cputime_desc) .min_count(0) - .max_count(2) .action([&](parser_t& p) { - auto _v = p.get>("cputime"); + auto _v = p.get>("cputime"); update_env(_env, "OMNITRACE_SAMPLING_CPUTIME", true); - if(!_v.empty()) update_env(_env, "OMNITRACE_SAMPLING_CPUTIME_FREQ", _v.at(0)); - if(_v.size() > 1) - update_env(_env, "OMNITRACE_SAMPLING_CPUTIME_DELAY", _v.at(1)); + if(!_v.empty()) + { + update_env(_env, "OMNITRACE_SAMPLING_CPUTIME_FREQ", _v.front()); + _v.pop_front(); + } + if(!_v.empty()) + { + update_env(_env, "OMNITRACE_SAMPLING_CPUTIME_DELAY", _v.front()); + _v.pop_front(); + } + if(!_v.empty()) + { + update_env(_env, "OMNITRACE_SAMPLING_CPUTIME_TIDS", + join(array_config{ "," }, _v)); + } }); - parser.add_argument({ "" }, ""); + parser.add_argument({ "--realtime" }, _realtime_desc) .min_count(0) - .max_count(2) + .requires(_realtime_reqs) .action([&](parser_t& p) { - auto _v = p.get>("realtime"); + auto _v = p.get>("realtime"); update_env(_env, "OMNITRACE_SAMPLING_REALTIME", true); if(!_v.empty()) - update_env(_env, "OMNITRACE_SAMPLING_REALTIME_FREQ", _v.at(0)); - if(_v.size() > 1) - update_env(_env, "OMNITRACE_SAMPLING_REALTIME_DELAY", _v.at(1)); + { + update_env(_env, "OMNITRACE_SAMPLING_REALTIME_FREQ", _v.front()); + _v.pop_front(); + } + if(!_v.empty()) + { + update_env(_env, "OMNITRACE_SAMPLING_REALTIME_DELAY", _v.front()); + _v.pop_front(); + } + if(!_v.empty()) + { + update_env(_env, "OMNITRACE_SAMPLING_REALTIME_TIDS", + join(array_config{ "," }, _v)); + } }); - parser.add_argument({ "" }, ""); - parser.add_argument({ "-E", "--enable" }, "Enable these backends") - .choices({ "all", "kokkosp", "mpip", "ompt", "rcclp", "rocm-smi", "roctracer", - "rocprofiler", "roctx", "mutex-locks", "spin-locks", "rw-locks" }) + std::set _backend_choices = { "all", "kokkosp", "mpip", + "ompt", "rcclp", "rocm-smi", + "roctracer", "rocprofiler", "roctx", + "mutex-locks", "spin-locks", "rw-locks" }; + +#if !defined(OMNITRACE_USE_MPI) && !defined(OMNITRACE_USE_MPI_HEADERS) + _backend_choices.erase("mpip"); +#endif + +#if !defined(OMNITRACE_USE_OMPT) + _backend_choices.erase("ompt"); +#endif + +#if !defined(OMNITRACE_USE_RCCL) + _backend_choices.erase("rcclp"); +#endif + +#if !defined(OMNITRACE_USE_ROCM_SMI) + _backend_choices.erase("rocm-smi"); +#endif + +#if !defined(OMNITRACE_USE_ROCTRACER) + _backend_choices.erase("roctracer"); + _backend_choices.erase("roctx"); +#endif + +#if !defined(OMNITRACE_USE_ROCPROFILER) + _backend_choices.erase("rocprofiler"); +#endif + + _add_separator("BACKEND OPTIONS", "These options control region information captured " + "w/o sampling or instrumentation"); + parser.add_argument({ "-I", "--include" }, "Include data from these backends") + .choices(_backend_choices) .action([&](parser_t& p) { - auto _v = p.get>("enable"); + auto _v = p.get>("include"); auto _update = [&](const auto& _opt, bool _cond) { if(_cond || _v.count("all") > 0) update_env(_env, _opt, true); }; @@ -348,11 +589,10 @@ parse_args(int argc, char** argv, std::vector& _env) _update("OMNITRACE_TRACE_THREAD_SPIN_LOCKS", _v.count("spin-locks") > 0); }); - parser.add_argument({ "-D", "--disable" }, "Disable these backends") - .choices({ "all", "kokkosp", "mpip", "ompt", "rcclp", "rocm-smi", "roctracer", - "rocprofiler", "roctx", "mutex-locks", "spin-locks", "rw-locks" }) + parser.add_argument({ "-E", "--exclude" }, "Exclude data from these backends") + .choices(_backend_choices) .action([&](parser_t& p) { - auto _v = p.get>("disable"); + auto _v = p.get>("exclude"); auto _update = [&](const auto& _opt, bool _cond) { if(_cond || _v.count("all") > 0) update_env(_env, _opt, false); }; @@ -369,48 +609,51 @@ parse_args(int argc, char** argv, std::vector& _env) _update("OMNITRACE_TRACE_THREAD_SPIN_LOCKS", _v.count("spin-locks") > 0); }); - parser.add_argument({ "" }, ""); - parser - .add_argument({ "--cpus" }, - "CPU IDs for frequency sampling. Supports integers and/or ranges") - .dtype("int or range") - .action([&](parser_t& p) { - update_env(_env, "OMNITRACE_USE_PROCESS_SAMPLING", true); - update_env( - _env, "OMNITRACE_PROCESS_SAMPLING_CPUS", - join(array_config{ "," }, p.get>("cpus"))); - }); - parser - .add_argument({ "--gpus" }, - "GPU IDs for SMI queries. Supports integers and/or ranges") - .dtype("int or range") - .action([&](parser_t& p) { - update_env( - _env, "OMNITRACE_PROCESS_SAMPLING_GPUS", - join(array_config{ "," }, p.get>("gpus"))); - }); - - parser.add_argument({ "" }, ""); + _add_separator("HARDWARE COUNTER OPTIONS", ""); parser .add_argument({ "-C", "--cpu-events" }, "Set the CPU hardware counter events to record (ref: " "`omnitrace-avail -H -c CPU`)") + .set_default(std::set{}) .action([&](parser_t& p) { auto _events = join(array_config{ "," }, p.get>("cpu-events")); update_env(_env, "OMNITRACE_PAPI_EVENTS", _events); }); + +#if defined(OMNITRACE_USE_ROCPROFILER) parser .add_argument({ "-G", "--gpu-events" }, "Set the GPU hardware counter events to record (ref: " "`omnitrace-avail -H -c GPU`)") + .set_default(std::set{}) .action([&](parser_t& p) { auto _events = join(array_config{ "," }, p.get>("gpu-events")); update_env(_env, "OMNITRACE_ROCM_EVENTS", _events); }); +#endif + + _add_separator("MISCELLANEOUS OPTIONS", ""); + parser + .add_argument({ "-i", "--inlines" }, + "Include inline info in output when available") + .max_count(1) + .action([&](parser_t& p) { + update_env(_env, "OMNITRACE_SAMPLING_INCLUDE_INLINES", + p.get("inlines")); + }); + + parser.add_argument({ "--hsa-interrupt" }, _hsa_interrupt_desc) + .count(1) + .dtype("int") + .choices({ 0, 1 }) + .action([&](parser_t& p) { + update_env(_env, "HSA_ENABLE_INTERRUPT", p.get("hsa-interrupt")); + }); auto _args = parser.parse_known_args(argc, argv); + auto _cerr = std::get<0>(_args); auto _cmdc = std::get<1>(_args); auto* _cmdv = std::get<2>(_args); @@ -422,6 +665,8 @@ parse_args(int argc, char** argv, std::vector& _env) if(help_check(parser, _cmdc, _cmdv)) help_action(parser); + if(_cerr) throw std::runtime_error(_cerr.what()); + std::vector _argv = {}; _argv.reserve(_cmdc); for(int i = 1; i < _cmdc; ++i) @@ -429,76 +674,3 @@ parse_args(int argc, char** argv, std::vector& _env) return _argv; } - -/* -void -update_env(char*** envp) -{ - if(!envp) return; - - static constexpr size_t N = 3; - - using pair_t = std::pair; - std::array _locs = { pair_t{ "HSA_TOOLS_LIB", -1 }, - pair_t{ "ROCP_TOOL_LIB", -1 }, - pair_t{ "HSA_TOOLS_REPORT_LOAD_FAILURE", -1 } }; - - char**& _envp = *envp; - size_t nenv = 0; - int64_t nadd = _locs.size(); - if(_envp) - { - size_t i = 0; - while(_envp[(i = nenv)]) - { - ++nenv; - for(auto& itr : _locs) - { - if(itr.second < 0 && std::string_view{ _envp[i] }.find(itr.first) == 0) - { - itr.second = i; - --nadd; - fprintf(stderr, "found %s at index %zi\n", itr.first.data(), i); - } - } - } - } - - size_t nsize = nenv + 1; - if(nadd > 0) - { - nsize += nadd; - - size_t _off = 0; - for(auto& itr : _locs) - if(itr.second < 0) itr.second = nenv + _off++; - - char** _envp_new = new char*[nsize]; - memset(_envp_new, 0, nsize * sizeof(char*)); - for(size_t i = 0; i < nenv; ++i) - _envp_new[i] = _envp[i]; - - _envp = _envp_new; - } - - fprintf(stderr, "nsize=%zu, nenv=%zu, nadd=%zu\n", nsize, nenv, nadd); - - using loc_pair_t = std::pair; - std::array _libs = { - loc_pair_t{ "HSA_TOOLS_LIB", omnitrace::dl::get_indirect().get_dl_library() }, - loc_pair_t{ "ROCP_TOOL_LIB", omnitrace::dl::get_indirect().get_omni_library() }, - loc_pair_t{ "HSA_TOOLS_REPORT_LOAD_FAILURE", "1" } - }; - - for(auto itr : _locs) - { - fprintf(stderr, "%s is at index %zu\n", itr.first.data(), itr.second); - for(const auto& litr : _libs) - { - if(itr.first == litr.first) - _envp[itr.second] = - strdup(omnitrace::common::join("=", itr.first, litr.second).c_str()); - } - } -} -*/ diff --git a/source/bin/omnitrace-sample/omnitrace-sample.cpp b/source/bin/omnitrace-sample/omnitrace-sample.cpp index 0e30d7dc9..46ec2b691 100644 --- a/source/bin/omnitrace-sample/omnitrace-sample.cpp +++ b/source/bin/omnitrace-sample/omnitrace-sample.cpp @@ -22,12 +22,15 @@ #include "omnitrace-sample.hpp" +#include +#include +#include #include int main(int argc, char** argv) { - auto _env = get_environment(); + auto _env = get_initial_environment(); bool _has_double_hyphen = false; for(int i = 1; i < argc; ++i) @@ -40,7 +43,8 @@ main(int argc, char** argv) std::vector _argv = {}; if(_has_double_hyphen) { - _argv = parse_args(argc, argv, _env); + auto _size = _env.size(); + _argv = parse_args(argc, argv, _env); } else { @@ -49,11 +53,22 @@ main(int argc, char** argv) _argv.emplace_back(argv[i]); } + std::sort(_env.begin(), _env.end(), [](auto* _lhs, auto* _rhs) { + if(!_lhs) return false; + if(!_rhs) return true; + return std::string_view{ _lhs } < std::string_view{ _rhs }; + }); + + for(auto* itr : _env) + if(itr != nullptr && std::string_view{ itr }.find("OMNITRACE") == 0) + std::cout << itr << "\n"; + if(!_argv.empty()) { std::string _argv0 = get_command(_argv[0]); print_command(_argv); _argv.emplace_back(nullptr); + _env.emplace_back(nullptr); return execve(_argv0.c_str(), _argv.data(), _env.data()); } diff --git a/source/bin/omnitrace-sample/omnitrace-sample.hpp b/source/bin/omnitrace-sample/omnitrace-sample.hpp index 616b40ec4..8581e36a4 100644 --- a/source/bin/omnitrace-sample/omnitrace-sample.hpp +++ b/source/bin/omnitrace-sample/omnitrace-sample.hpp @@ -29,15 +29,21 @@ std::string get_command(const char*); +std::string +get_realpath(const std::string&); + void print_command(const std::vector& _argv); std::vector -get_environment(); +get_initial_environment(); + +std::string +get_internal_libpath(const std::string& _lib); template void -update_env(std::vector&, std::string_view, Tp&&); +update_env(std::vector&, std::string_view, Tp&&, bool _append = false); std::vector parse_args(int argc, char** argv, std::vector&); From 01b3f6ca6a6c6fae04ab681775f6cfe873b0cd13 Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Fri, 30 Sep 2022 10:28:44 -0500 Subject: [PATCH 11/15] Update docs --- source/docs/about.md | 39 +++++++++++++++++++++++++++++++-------- source/docs/runtime.md | 2 +- 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/source/docs/about.md b/source/docs/about.md index 771c76928..acb475c01 100644 --- a/source/docs/about.md +++ b/source/docs/about.md @@ -6,25 +6,48 @@ :maxdepth: 4 ``` +## Overview + > ***[Omnitrace](https://github.com/AMDResearch/omnitrace) is an AMD open source research project and is not supported as part of the ROCm software stack.*** [Browse Omnitrace source code on Github](https://github.com/AMDResearch/omnitrace) -[Omnitrace](https://github.com/AMDResearch/omnitrace) is designed for both high-level and -comprehensive application tracing and profiling on both the CPU and GPU. -[Omnitrace](https://github.com/AMDResearch/omnitrace) supports both binary instrumentation -and sampling as a means of collecting various metrics. +[Omnitrace](https://github.com/AMDResearch/omnitrace) is designed for both high-level profiling and +comprehensive tracing of applications running on the CPU or the CPU+GPU via dynamic binary instrumentation, +call-stack sampling, and various other means for determining currently executing function and line information. -Visualization of the comprehensive omnitrace results can be viewed in any modern web browser by visiting [ui.perfetto.dev](https://ui.perfetto.dev/) -and loading the perfetto output (`.proto` files) produced by omnitrace. +Visualization of the comprehensive omnitrace results can be viewed in any modern web browser by visiting +[ui.perfetto.dev](https://ui.perfetto.dev/) and loading the perfetto output (`.proto` files) produced by omnitrace. Aggregated high-level results are available in text files for human consumption and JSON files for programmatic analysis. The JSON output files are compatible with the python package [hatchet](https://github.com/hatchet/hatchet) which converts -the performance data into pandas dataframes and facilitate multi-run comparisons, filtering, visualization in Jupyter notebooks, and much more. +the performance data into pandas dataframes and facilitate multi-run comparisons, filtering, visualization in Jupyter notebooks, +and much more. -[Omnitrace](https://github.com/AMDResearch/omnitrace) has two distinct configuration steps: +[Omnitrace](https://github.com/AMDResearch/omnitrace) has two distinct configuration steps when instrumenting: 1. Configuring which functions and modules are instrumented in the target binaries (i.e. executable and/or libraries) - [Instrumenting with Omnitrace](instrumenting.md) 2. Configuring what the instrumentation does happens when the instrumented binaries are executed - [Customizing Omnitrace Runtime](runtime.md) + +## Omnitrace Use Cases + +When analyzing the performance of an application, ***it is always best to NOT assume you know where the performance bottlenecks are*** +***and why they are happening.*** Omnitrace is a ***tool for the entire execution of application***. It is the sort of tool which is +ideal for *characterizing* where optimization would have the greatest impact on the end-to-end execution of the application and/or +viewing what else is happening on the system during a performance bottleneck. + +Especially when GPUs are involved, there is a tendency to assume that the quickest path to performance improvement is minimizing +the runtime of the GPU kernels. This is a highly flawed assumption: if you optimize the runtime of a kernel from 1 millisecond +to 1 microsecond (1000x speed-up) but the original application *never spent time waiting* for kernel(s) to complete, +you will see zero statistically significant speed-up in end-to-end runtime of your application. In other words, it does not matter +how fast or slow the code on GPU is if the application is not bottlenecked waiting on the GPU. + +Use Omnitrace to obtain a high-level view of the entire application. Use it to determine where the performance bottlenecks are and +obtain clues to why these bottlenecks are happening. If you want ***extensive*** insight into the execution of individual kernels +on the GPU, AMD Research is working on another tool for this but you should start with the tool which characterizes the +broad picture: Omnitrace. + +With regard to the CPU, Omnitrace does not target any specific vendor, it works just as well with non-AMD CPUs as with AMD CPUs. +With regard to the GPU, Omnitrace is currently restricted to the HIP and HSA APIs and kernels executing on AMD GPUs. diff --git a/source/docs/runtime.md b/source/docs/runtime.md index 84795fabe..71ce68185 100644 --- a/source/docs/runtime.md +++ b/source/docs/runtime.md @@ -248,7 +248,7 @@ OMNITRACE_STRICT_CONFIG = true OMNITRACE_SUPPRESS_CONFIG = true OMNITRACE_SUPPRESS_PARSING = true OMNITRACE_TEXT_OUTPUT = true -OMNITRACE_TIME_FORMAT = %F_%I.%M_%p +OMNITRACE_TIME_FORMAT = %F_%H.%M OMNITRACE_TIMELINE_PROFILE = false OMNITRACE_TIMING_PRECISION = 6 OMNITRACE_TIMING_SCIENTIFIC = false From ebc6b9eee22dff7e7435df4894c608513a2fd2d8 Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Fri, 30 Sep 2022 10:28:54 -0500 Subject: [PATCH 12/15] Update timemory submodule --- external/timemory | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/timemory b/external/timemory index 56b4f0eda..98e2306ca 160000 --- a/external/timemory +++ b/external/timemory @@ -1 +1 @@ -Subproject commit 56b4f0eda5489e3c9b81bb86ad18533650b1a1f8 +Subproject commit 98e2306ca9226226013335637ce6c33f72bf1e3a From f6b145656f1a4bc9f6955f632c15af1ae9c182f2 Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Fri, 30 Sep 2022 10:29:23 -0500 Subject: [PATCH 13/15] Quiet numa_gotcha warnings --- .../lib/omnitrace/library/components/numa_gotcha.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/source/lib/omnitrace/library/components/numa_gotcha.cpp b/source/lib/omnitrace/library/components/numa_gotcha.cpp index f97fff147..054ed7e2b 100644 --- a/source/lib/omnitrace/library/components/numa_gotcha.cpp +++ b/source/lib/omnitrace/library/components/numa_gotcha.cpp @@ -61,6 +61,16 @@ get_numa_gotcha() void numa_gotcha::configure() { + // don't emit warnings for missing MPI functions unless debug or verbosity >= 3 + if(get_verbose_env() < 3 && !get_debug_env()) + { + for(size_t i = 0; i < numa_gotcha_t::capacity(); ++i) + { + auto* itr = numa_gotcha_t::at(i); + if(itr) itr->verbose = -1; + } + } + numa_gotcha_t::get_initializer() = []() { numa_gotcha_t::configure<0, long, void*, unsigned long, int, const unsigned long*, unsigned long, unsigned>("mbind"); From 6ee92badef8b4d5ca1e16aa5f76d0374e9cb3ecf Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Fri, 30 Sep 2022 10:30:10 -0500 Subject: [PATCH 14/15] rocm_feature_value variant supports float --- source/lib/omnitrace/library/components/rocprofiler.cpp | 3 +-- source/lib/omnitrace/library/components/rocprofiler.hpp | 2 +- source/lib/omnitrace/library/rocprofiler.cpp | 7 +++++++ 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/source/lib/omnitrace/library/components/rocprofiler.cpp b/source/lib/omnitrace/library/components/rocprofiler.cpp index 96fba6085..cf3d54235 100644 --- a/source/lib/omnitrace/library/components/rocprofiler.cpp +++ b/source/lib/omnitrace/library/components/rocprofiler.cpp @@ -94,8 +94,7 @@ rocm_event::rocm_event(uint32_t _dev, uint32_t _thr, uint32_t _queue, feature_values.emplace_back(rocm_feature_value{ p->data.result_int32 }); break; case ROCPROFILER_DATA_KIND_FLOAT: - feature_values.emplace_back( - rocm_feature_value{ static_cast(p->data.result_float) }); + feature_values.emplace_back(rocm_feature_value{ p->data.result_float }); break; case ROCPROFILER_DATA_KIND_DOUBLE: feature_values.emplace_back(rocm_feature_value{ p->data.result_double }); diff --git a/source/lib/omnitrace/library/components/rocprofiler.hpp b/source/lib/omnitrace/library/components/rocprofiler.hpp index b6afaacf9..bb3ed6f1f 100644 --- a/source/lib/omnitrace/library/components/rocprofiler.hpp +++ b/source/lib/omnitrace/library/components/rocprofiler.hpp @@ -53,7 +53,7 @@ namespace component { using rocm_metric_type = unsigned long long; using rocm_info_entry = ::tim::hardware_counters::info; -using rocm_feature_value = std::variant; +using rocm_feature_value = std::variant; struct rocm_counter { diff --git a/source/lib/omnitrace/library/rocprofiler.cpp b/source/lib/omnitrace/library/rocprofiler.cpp index f4a31d2fb..213802627 100644 --- a/source/lib/omnitrace/library/rocprofiler.cpp +++ b/source/lib/omnitrace/library/rocprofiler.cpp @@ -572,8 +572,15 @@ post_process_perfetto() { for(size_t i = 0; i < _n; ++i) { +#ifdef __GNUC__ +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wdouble-promotion" +#endif auto _plus = [](auto& _lhs, auto&& _rhs) { _lhs += _rhs; }; std::visit(_plus, _values.at(i), vitr->feature_values.at(i)); +#ifdef __GNUC__ +# pragma GCC diagnostic pop +#endif } } } From 450e6e7fdc1a47f3fa1309216cdb2cb81f5f49a0 Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Fri, 30 Sep 2022 10:31:00 -0500 Subject: [PATCH 15/15] ROCP_TOOL_LIB=omnitrace-dl throws error - rocprofiler does not activate correctly --- source/lib/omnitrace-dl/dl.cpp | 16 +++++++++++----- source/lib/omnitrace-dl/dl.hpp | 3 +-- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/source/lib/omnitrace-dl/dl.cpp b/source/lib/omnitrace-dl/dl.cpp index 716cd0fac..d901b42cd 100644 --- a/source/lib/omnitrace-dl/dl.cpp +++ b/source/lib/omnitrace-dl/dl.cpp @@ -381,8 +381,8 @@ struct OMNITRACE_HIDDEN_API indirect // ROCP functions #if OMNITRACE_USE_ROCPROFILER > 0 - void (*rocp_on_load_tool_prop_f)(rocprofiler_settings* settings) = nullptr; - void (*rocp_on_unload_tool_f)() = nullptr; + void (*rocp_on_load_tool_prop_f)(void* settings) = nullptr; + void (*rocp_on_unload_tool_f)() = nullptr; #endif // OpenMP functions @@ -886,9 +886,15 @@ extern "C" // //----------------------------------------------------------------------------------// -#if OMNITRACE_USE_ROCTRACER > 0 - void OnLoadToolProp(rocprofiler_settings* settings) - { +#if OMNITRACE_USE_ROCPROFILER > 0 + void OnLoadToolProp(void* settings) + { + OMNITRACE_DL_LOG(-16, + "invoking %s(rocprofiler_settings_t*) within omnitrace-dl.so " + "will cause a silent failure for rocprofiler. ROCP_TOOL_LIB " + "should be set to libomnitrace.so\n", + __FUNCTION__); + abort(); return OMNITRACE_DL_INVOKE(get_indirect().rocp_on_load_tool_prop_f, settings); } diff --git a/source/lib/omnitrace-dl/dl.hpp b/source/lib/omnitrace-dl/dl.hpp index b6ad237cd..c3e4b5aa3 100644 --- a/source/lib/omnitrace-dl/dl.hpp +++ b/source/lib/omnitrace-dl/dl.hpp @@ -167,8 +167,7 @@ extern "C" # if OMNITRACE_USE_ROCPROFILER > 0 // ROCP - struct rocprofiler_settings; - void OnLoadToolProp(rocprofiler_settings* settings) OMNITRACE_PUBLIC_API; + void OnLoadToolProp(void* settings) OMNITRACE_PUBLIC_API; void OnUnloadTool() OMNITRACE_PUBLIC_API; # endif #endif