From 2681cfe7b74bed3a228c83fe939c0aa0ec87b198 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Thu, 18 Feb 2021 22:10:50 -0500 Subject: [PATCH] Add `thread_ids::Vector` option to Profile.init() - This option will configure julia's profiler to only run on the provided thread ids! :) - Adds a global int mask to allow toggling profiling for up to 64 threads in a performant way. I think if you have more than 64 threads, it's okay that you can't profile individual threads since it's unlikely to be very meaningful by then... --- src/signal-handling.c | 6 ++++++ src/signals-mach.c | 13 +++++++++---- src/signals-unix.c | 11 ++++++++--- stdlib/Profile/src/Profile.jl | 26 +++++++++++++++++++++----- 4 files changed, 44 insertions(+), 12 deletions(-) diff --git a/src/signal-handling.c b/src/signal-handling.c index 80dfdb3b2fc21..69e76c70fd15d 100644 --- a/src/signal-handling.c +++ b/src/signal-handling.c @@ -23,6 +23,7 @@ static volatile jl_bt_element_t *bt_data_prof = NULL; static volatile size_t bt_size_max = 0; static volatile size_t bt_size_cur = 0; static volatile uint64_t nsecprof = 0; +static volatile uint64_t threadid_mask = 0; // each bit represents a threadid to enable. static volatile int running = 0; static const uint64_t GIGA = 1000000000ULL; // Timers to take samples at intervals @@ -256,6 +257,11 @@ void jl_critical_error(int sig, bt_context_t *context, jl_bt_element_t *bt_data, /////////////////////// // Utility functions // /////////////////////// +JL_DLLEXPORT void jl_profile_init_threadid_filter(uint64_t tid_mask) +{ + threadid_mask = tid_mask; +} + JL_DLLEXPORT int jl_profile_init(size_t maxsize, uint64_t delay_nsec) { bt_size_max = maxsize; diff --git a/src/signals-mach.c b/src/signals-mach.c index 3737bab1002cd..cfa39793c0993 100644 --- a/src/signals-mach.c +++ b/src/signals-mach.c @@ -474,7 +474,7 @@ static kern_return_t profiler_segv_handler void *mach_profile_listener(void *arg) { (void)arg; - int i; + int tid; const int max_size = 512; attach_exception_port(mach_thread_self(), 1); #ifdef LIBOSXUNWIND @@ -491,15 +491,20 @@ void *mach_profile_listener(void *arg) jl_lock_profile(); void *unused = NULL; int keymgr_locked = _keymgr_get_and_lock_processwide_ptr_2(KEYMGR_GCC3_DW2_OBJ_LIST, &unused) == 0; - for (i = jl_n_threads; i-- > 0; ) { + for (tid = jl_n_threads; tid-- > 0; ) { // if there is no space left, break early if (jl_profile_is_buffer_full()) { jl_profile_stop_timer(); break; } + // If the threadid mask is set, skip threads that aren't enabled. + if (threadid_mask != 0 && ((0x1 << tid) & threadid_mask)==0) { + continue; + } + unw_context_t *uc; - jl_thread_suspend_and_get_state(i, &uc); + jl_thread_suspend_and_get_state(tid, &uc); if (running) { #ifdef LIBOSXUNWIND /* @@ -539,7 +544,7 @@ void *mach_profile_listener(void *arg) bt_data_prof[bt_size_cur++].uintptr = 0; } // We're done! Resume the thread. - jl_thread_resume(i, 0); + jl_thread_resume(tid, 0); } if (keymgr_locked) _keymgr_unlock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST); diff --git a/src/signals-unix.c b/src/signals-unix.c index f89d32f09486a..0103fb4a5753f 100644 --- a/src/signals-unix.c +++ b/src/signals-unix.c @@ -678,9 +678,14 @@ static void *signal_listener(void *arg) // (so that thread zero gets notified last) if (critical || profile) jl_lock_profile(); - for (int i = jl_n_threads; i-- > 0; ) { + for (int tid = jl_n_threads; tid-- > 0; ) { + // If the threadid mask is set, skip threads that aren't enabled. + if (threadid_mask != 0 && ((0x1 << tid) & threadid_mask)==0) { + continue; + } + // notify thread to stop - jl_thread_suspend_and_get_state(i, &signal_context); + jl_thread_suspend_and_get_state(tid, &signal_context); // do backtrace on thread contexts for critical signals // this part must be signal-handler safe @@ -720,7 +725,7 @@ static void *signal_listener(void *arg) } // notify thread to resume - jl_thread_resume(i, sig); + jl_thread_resume(tid, sig); } if (critical || profile) jl_unlock_profile(); diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl index f297ad12f80a1..8fa53bf83544b 100644 --- a/stdlib/Profile/src/Profile.jl +++ b/stdlib/Profile/src/Profile.jl @@ -37,30 +37,46 @@ end #### """ - init(; n::Integer, delay::Real)) + init(; n::Integer, delay::Real, thread_ids::Vector{<:Integer})) Configure the `delay` between backtraces (measured in seconds), and the number `n` of instruction pointers that may be stored. Each instruction pointer corresponds to a single line of code; backtraces generally consist of a long list of instruction pointers. Current settings can be obtained by calling this function with no arguments, and each can be set -independently using keywords or in the order `(n, delay)`. +independently using keywords or in the order `(n, delay)`. Finally, you can configure +profiling to only profile individual threads by passing their thread ids in `thread_ids`. """ -function init(; n::Union{Nothing,Integer} = nothing, delay::Union{Nothing,Real} = nothing) +function init(; n::Union{Nothing,Integer} = nothing, delay::Union{Nothing,Real} = nothing, + thread_ids::Vector{<:Integer} = Int[]) n_cur = ccall(:jl_profile_maxlen_data, Csize_t, ()) delay_cur = ccall(:jl_profile_delay_nsec, UInt64, ())/10^9 if n === nothing && delay === nothing + _init_threadid_filter(thread_ids) return Int(n_cur), delay_cur end nnew = (n === nothing) ? n_cur : n delaynew = (delay === nothing) ? delay_cur : delay - init(nnew, delaynew) + init(nnew, delaynew, thread_ids) end -function init(n::Integer, delay::Real) +function init(n::Integer, delay::Real, thread_ids::Vector = Int[]) status = ccall(:jl_profile_init, Cint, (Csize_t, UInt64), n, round(UInt64,10^9*delay)) if status == -1 error("could not allocate space for ", n, " instruction pointers") end + _init_threadid_filter(thread_ids) +end +function _init_threadid_filter(thread_ids::Vector) + threadid_mask = UInt64(0) + if !isempty(thread_ids) + for tid in thread_ids + if tid > 64 + error("Cannot enable thread id > 64 via `Profile.init(thread_ids)`: $(tid).") + end + threadid_mask |= 0x1 << (tid-1) + end + ccall(:jl_profile_init_threadid_filter, Cvoid, (UInt64,), threadid_mask) + end end # init with default values