diff --git a/base/timing.jl b/base/timing.jl index ab7af23048305b..45a27e33789773 100644 --- a/base/timing.jl +++ b/base/timing.jl @@ -55,7 +55,7 @@ function gc_alloc_count(diff::GC_Diff) diff.malloc + diff.realloc + diff.poolalloc + diff.bigalloc end -# cumulative total time spent on compilation +# cumulative total time spent on compilation, in nanoseconds cumulative_compile_time_ns_before() = ccall(:jl_cumulative_compile_time_ns_before, UInt64, ()) cumulative_compile_time_ns_after() = ccall(:jl_cumulative_compile_time_ns_after, UInt64, ()) diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp index 96364d99048160..7917d9300c09f2 100644 --- a/src/aotcompile.cpp +++ b/src/aotcompile.cpp @@ -286,8 +286,8 @@ void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _p JL_GC_PUSH1(&src); JL_LOCK(&codegen_lock); uint64_t compiler_start_time = 0; - int tid = jl_threadid(); - if (jl_measure_compile_time[tid]) + uint8_t measure_compile_time_enabled = jl_atomic_load(&jl_measure_compile_time_enabled); + if (measure_compile_time_enabled) compiler_start_time = jl_hrtime(); CompilationPolicy policy = (CompilationPolicy) _policy; @@ -415,8 +415,8 @@ void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _p } data->M = std::move(clone); - if (jl_measure_compile_time[tid]) - jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time); + if (measure_compile_time_enabled) + jl_atomic_fetch_add(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time)); if (policy == CompilationPolicy::ImagingMode) imaging_mode = 0; JL_UNLOCK(&codegen_lock); // Might GC @@ -916,8 +916,8 @@ void *jl_get_llvmf_defn(jl_method_instance_t *mi, size_t world, char getwrapper, jl_llvm_functions_t decls; JL_LOCK(&codegen_lock); uint64_t compiler_start_time = 0; - int tid = jl_threadid(); - if (jl_measure_compile_time[tid]) + uint8_t measure_compile_time_enabled = jl_atomic_load(&jl_measure_compile_time_enabled); + if (measure_compile_time_enabled) compiler_start_time = jl_hrtime(); std::tie(m, decls) = jl_emit_code(mi, src, jlrettype, output); @@ -942,8 +942,8 @@ void *jl_get_llvmf_defn(jl_method_instance_t *mi, size_t world, char getwrapper, m.release(); // the return object `llvmf` will be the owning pointer } JL_GC_POP(); - if (jl_measure_compile_time[tid]) - jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time); + if (measure_compile_time_enabled) + jl_atomic_fetch_add(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time)); JL_UNLOCK(&codegen_lock); // Might GC if (F) return F; diff --git a/src/gf.c b/src/gf.c index 41381ccc5178e8..47a5503e6bb9b2 100644 --- a/src/gf.c +++ b/src/gf.c @@ -3160,19 +3160,23 @@ int jl_has_concrete_subtype(jl_value_t *typ) #define typeinf_lock codegen_lock static uint64_t inference_start_time = 0; +static uint8_t inference_is_measuring_compile_time = 0; JL_DLLEXPORT void jl_typeinf_begin(void) { JL_LOCK(&typeinf_lock); - if (jl_measure_compile_time[jl_threadid()]) + if (jl_atomic_load(&jl_measure_compile_time_enabled)) { inference_start_time = jl_hrtime(); + inference_is_measuring_compile_time = 1; + } } JL_DLLEXPORT void jl_typeinf_end(void) { - int tid = jl_threadid(); - if (typeinf_lock.count == 1 && jl_measure_compile_time[tid]) - jl_cumulative_compile_time[tid] += (jl_hrtime() - inference_start_time); + if (typeinf_lock.count == 1 && inference_is_measuring_compile_time) { + jl_atomic_fetch_add(&jl_cumulative_compile_time, (jl_hrtime() - inference_start_time)); + inference_is_measuring_compile_time = 0; + } JL_UNLOCK(&typeinf_lock); } diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index e86d6109ff4275..c8efbb97ffb9d0 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -78,16 +78,16 @@ void jl_jit_globals(std::map &globals) extern "C" JL_DLLEXPORT uint64_t jl_cumulative_compile_time_ns_before() { - int tid = jl_threadid(); - jl_measure_compile_time[tid] = 1; - return jl_cumulative_compile_time[tid]; + // Increment the flag to allow reentrant callers to `@time`. + jl_atomic_fetch_add(&jl_measure_compile_time_enabled, 1); + return jl_atomic_load(&jl_cumulative_compile_time); } extern "C" JL_DLLEXPORT uint64_t jl_cumulative_compile_time_ns_after() { - int tid = jl_threadid(); - jl_measure_compile_time[tid] = 0; - return jl_cumulative_compile_time[tid]; + // Decrement the flag when done measuring, allowing other callers to continue measuring. + jl_atomic_fetch_add(&jl_measure_compile_time_enabled, -1); + return jl_atomic_load(&jl_cumulative_compile_time); } // this generates llvm code for the lambda info @@ -233,8 +233,8 @@ int jl_compile_extern_c(void *llvmmod, void *p, void *sysimg, jl_value_t *declrt { JL_LOCK(&codegen_lock); uint64_t compiler_start_time = 0; - int tid = jl_threadid(); - if (jl_measure_compile_time[tid]) + uint8_t measure_compile_time_enabled = jl_atomic_load(&jl_measure_compile_time_enabled); + if (measure_compile_time_enabled) compiler_start_time = jl_hrtime(); jl_codegen_params_t params; jl_codegen_params_t *pparams = (jl_codegen_params_t*)p; @@ -258,8 +258,8 @@ int jl_compile_extern_c(void *llvmmod, void *p, void *sysimg, jl_value_t *declrt if (success && llvmmod == NULL) jl_add_to_ee(std::unique_ptr(into)); } - if (codegen_lock.count == 1 && jl_measure_compile_time[tid]) - jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time); + if (codegen_lock.count == 1 && measure_compile_time_enabled) + jl_atomic_fetch_add(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time)); JL_UNLOCK(&codegen_lock); return success; } @@ -315,8 +315,8 @@ jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT { JL_LOCK(&codegen_lock); // also disables finalizers, to prevent any unexpected recursion uint64_t compiler_start_time = 0; - int tid = jl_threadid(); - if (jl_measure_compile_time[tid]) + uint8_t measure_compile_time_enabled = jl_atomic_load(&jl_measure_compile_time_enabled); + if (measure_compile_time_enabled) compiler_start_time = jl_hrtime(); // if we don't have any decls already, try to generate it now jl_code_info_t *src = NULL; @@ -354,8 +354,8 @@ jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT else { codeinst = NULL; } - if (codegen_lock.count == 1 && jl_measure_compile_time[tid]) - jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time); + if (codegen_lock.count == 1 && measure_compile_time_enabled) + jl_atomic_fetch_add(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time)); JL_UNLOCK(&codegen_lock); JL_GC_POP(); return codeinst; @@ -369,8 +369,8 @@ void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec) } JL_LOCK(&codegen_lock); uint64_t compiler_start_time = 0; - int tid = jl_threadid(); - if (jl_measure_compile_time[tid]) + uint8_t measure_compile_time_enabled = jl_atomic_load(&jl_measure_compile_time_enabled); + if (measure_compile_time_enabled) compiler_start_time = jl_hrtime(); if (unspec->invoke == NULL) { jl_code_info_t *src = NULL; @@ -398,8 +398,8 @@ void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec) } JL_GC_POP(); } - if (codegen_lock.count == 1 && jl_measure_compile_time[tid]) - jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time); + if (codegen_lock.count == 1 && measure_compile_time_enabled) + jl_atomic_fetch_add(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time)); JL_UNLOCK(&codegen_lock); // Might GC } @@ -422,8 +422,8 @@ jl_value_t *jl_dump_method_asm(jl_method_instance_t *mi, size_t world, // so create an exception here so we can print pretty our lies JL_LOCK(&codegen_lock); // also disables finalizers, to prevent any unexpected recursion uint64_t compiler_start_time = 0; - int tid = jl_threadid(); - if (jl_measure_compile_time[tid]) + uint8_t measure_compile_time_enabled = jl_atomic_load(&jl_measure_compile_time_enabled); + if (measure_compile_time_enabled) compiler_start_time = jl_hrtime(); specfptr = (uintptr_t)codeinst->specptr.fptr; if (specfptr == 0) { @@ -448,8 +448,8 @@ jl_value_t *jl_dump_method_asm(jl_method_instance_t *mi, size_t world, } JL_GC_POP(); } - if (jl_measure_compile_time[tid]) - jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time); + if (measure_compile_time_enabled) + jl_atomic_fetch_add(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time)); JL_UNLOCK(&codegen_lock); } if (specfptr != 0) diff --git a/src/julia_internal.h b/src/julia_internal.h index b997ad0d1214a7..f53f04984d9f05 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -154,8 +154,9 @@ static inline uint64_t cycleclock(void) #include "timing.h" -extern uint8_t *jl_measure_compile_time; -extern uint64_t *jl_cumulative_compile_time; +// Global *atomic* integers controlling *process-wide* measurement of compilation time. +extern uint8_t jl_measure_compile_time_enabled; +extern uint64_t jl_cumulative_compile_time; #ifdef _COMPILER_MICROSOFT_ # define jl_return_address() ((uintptr_t)_ReturnAddress()) diff --git a/src/task.c b/src/task.c index c3ac26fbcf5119..41df80d64e5808 100644 --- a/src/task.c +++ b/src/task.c @@ -561,7 +561,10 @@ static void JL_NORETURN throw_internal(jl_task_t *ct, jl_value_t *exception JL_M ptls->io_wait = 0; // @time needs its compile timer disabled on error, // and cannot use a try-finally as it would break scope for assignments - jl_measure_compile_time[ptls->tid] = 0; + // We blindly disable compilation time tracking here, for all running Tasks, even though + // it may cause some incorrect measurements. This is a known bug, and is being tracked + // here: https://github.com/JuliaLang/julia/pull/39138 + jl_atomic_store_release(&jl_measure_compile_time_enabled, 0); JL_GC_PUSH1(&exception); jl_gc_unsafe_enter(ptls); if (exception) { diff --git a/src/threading.c b/src/threading.c index 235bb9f870ba19..ffe53c07b45ee3 100644 --- a/src/threading.c +++ b/src/threading.c @@ -287,8 +287,8 @@ void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t *k) #endif jl_ptls_t *jl_all_tls_states JL_GLOBALLY_ROOTED; -uint8_t *jl_measure_compile_time = NULL; -uint64_t *jl_cumulative_compile_time = NULL; +uint8_t jl_measure_compile_time_enabled = 0; +uint64_t jl_cumulative_compile_time = 0; // return calling thread's ID // Also update the suspended_threads list in signals-mach when changing the @@ -467,8 +467,6 @@ void jl_init_threading(void) } if (jl_n_threads <= 0) jl_n_threads = 1; - jl_measure_compile_time = (uint8_t*)calloc(jl_n_threads, sizeof(*jl_measure_compile_time)); - jl_cumulative_compile_time = (uint64_t*)calloc(jl_n_threads, sizeof(*jl_cumulative_compile_time)); #ifndef __clang_analyzer__ jl_all_tls_states = (jl_ptls_t*)calloc(jl_n_threads, sizeof(void*)); #endif diff --git a/test/misc.jl b/test/misc.jl index e765dc9279b86d..d15b5f64426d99 100644 --- a/test/misc.jl +++ b/test/misc.jl @@ -263,6 +263,23 @@ function timev_macro_scope() end @test timev_macro_scope() == 1 +before = Base.cumulative_compile_time_ns_before(); + +# exercise concurrent calls to `@time` for reentrant compilation time measurement. +t1 = @async @time begin + sleep(2) + @eval module M ; f(x,y) = x+y ; end + @eval M.f(2,3) +end +t2 = @async begin + sleep(1) + @time 2 + 2 +end + +after = Base.cumulative_compile_time_ns_after(); +@test after >= before; +@test Base.cumulative_compile_time_ns() >= after; + # interactive utilities struct ambigconvert; end # inject a problematic `convert` method to ensure it still works