Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make jl_cumulative_compile_time_ns global (and reentrant). #41733

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion base/timing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ function gc_alloc_count(diff::GC_Diff)
diff.malloc + diff.realloc + diff.poolalloc + diff.bigalloc
end

# cumulative total time spent on compilation
# cumulative total time spent on compilation, in nanoseconds
cumulative_compile_time_ns_before() = ccall(:jl_cumulative_compile_time_ns_before, UInt64, ())
cumulative_compile_time_ns_after() = ccall(:jl_cumulative_compile_time_ns_after, UInt64, ())

Expand Down
16 changes: 8 additions & 8 deletions src/aotcompile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -278,8 +278,8 @@ void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _p
JL_GC_PUSH1(&src);
JL_LOCK(&codegen_lock);
uint64_t compiler_start_time = 0;
int tid = jl_threadid();
if (jl_measure_compile_time[tid])
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
if (measure_compile_time_enabled)
compiler_start_time = jl_hrtime();

CompilationPolicy policy = (CompilationPolicy) _policy;
Expand Down Expand Up @@ -407,8 +407,8 @@ void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _p
}

data->M = std::move(clone);
if (jl_measure_compile_time[tid])
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
if (measure_compile_time_enabled)
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
if (policy == CompilationPolicy::ImagingMode)
imaging_mode = 0;
JL_UNLOCK(&codegen_lock); // Might GC
Expand Down Expand Up @@ -908,8 +908,8 @@ void *jl_get_llvmf_defn(jl_method_instance_t *mi, size_t world, char getwrapper,
jl_llvm_functions_t decls;
JL_LOCK(&codegen_lock);
uint64_t compiler_start_time = 0;
int tid = jl_threadid();
if (jl_measure_compile_time[tid])
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
if (measure_compile_time_enabled)
compiler_start_time = jl_hrtime();
std::tie(m, decls) = jl_emit_code(mi, src, jlrettype, output);

Expand All @@ -934,8 +934,8 @@ void *jl_get_llvmf_defn(jl_method_instance_t *mi, size_t world, char getwrapper,
m.release(); // the return object `llvmf` will be the owning pointer
}
JL_GC_POP();
if (jl_measure_compile_time[tid])
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
if (measure_compile_time_enabled)
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
JL_UNLOCK(&codegen_lock); // Might GC
if (F)
return F;
Expand Down
12 changes: 8 additions & 4 deletions src/gf.c
Original file line number Diff line number Diff line change
Expand Up @@ -3160,19 +3160,23 @@ int jl_has_concrete_subtype(jl_value_t *typ)
#define typeinf_lock codegen_lock

static uint64_t inference_start_time = 0;
static uint8_t inference_is_measuring_compile_time = 0;

JL_DLLEXPORT void jl_typeinf_begin(void)
{
JL_LOCK(&typeinf_lock);
if (jl_measure_compile_time[jl_threadid()])
if (jl_atomic_load_relaxed(&jl_measure_compile_time_enabled)) {
inference_start_time = jl_hrtime();
inference_is_measuring_compile_time = 1;
}
}

JL_DLLEXPORT void jl_typeinf_end(void)
{
int tid = jl_threadid();
if (typeinf_lock.count == 1 && jl_measure_compile_time[tid])
jl_cumulative_compile_time[tid] += (jl_hrtime() - inference_start_time);
if (typeinf_lock.count == 1 && inference_is_measuring_compile_time) {
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - inference_start_time));
inference_is_measuring_compile_time = 0;
}
JL_UNLOCK(&typeinf_lock);
}

Expand Down
44 changes: 22 additions & 22 deletions src/jitlayers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,16 +76,16 @@ void jl_jit_globals(std::map<void *, GlobalVariable*> &globals)
extern "C" JL_DLLEXPORT
uint64_t jl_cumulative_compile_time_ns_before()
{
int tid = jl_threadid();
jl_measure_compile_time[tid] = 1;
return jl_cumulative_compile_time[tid];
// Increment the flag to allow reentrant callers to `@time`.
jl_atomic_fetch_add(&jl_measure_compile_time_enabled, 1);
return jl_atomic_load_relaxed(&jl_cumulative_compile_time);
}
extern "C" JL_DLLEXPORT
uint64_t jl_cumulative_compile_time_ns_after()
{
int tid = jl_threadid();
jl_measure_compile_time[tid] = 0;
return jl_cumulative_compile_time[tid];
// Decrement the flag when done measuring, allowing other callers to continue measuring.
jl_atomic_fetch_add(&jl_measure_compile_time_enabled, -1);
return jl_atomic_load_relaxed(&jl_cumulative_compile_time);
}

// this generates llvm code for the lambda info
Expand Down Expand Up @@ -231,8 +231,8 @@ int jl_compile_extern_c(void *llvmmod, void *p, void *sysimg, jl_value_t *declrt
{
JL_LOCK(&codegen_lock);
uint64_t compiler_start_time = 0;
int tid = jl_threadid();
if (jl_measure_compile_time[tid])
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
if (measure_compile_time_enabled)
compiler_start_time = jl_hrtime();
jl_codegen_params_t params;
jl_codegen_params_t *pparams = (jl_codegen_params_t*)p;
Expand All @@ -256,8 +256,8 @@ int jl_compile_extern_c(void *llvmmod, void *p, void *sysimg, jl_value_t *declrt
if (success && llvmmod == NULL)
jl_add_to_ee(std::unique_ptr<Module>(into));
}
if (codegen_lock.count == 1 && jl_measure_compile_time[tid])
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
if (codegen_lock.count == 1 && measure_compile_time_enabled)
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
JL_UNLOCK(&codegen_lock);
return success;
}
Expand Down Expand Up @@ -313,8 +313,8 @@ jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT
{
JL_LOCK(&codegen_lock); // also disables finalizers, to prevent any unexpected recursion
uint64_t compiler_start_time = 0;
int tid = jl_threadid();
if (jl_measure_compile_time[tid])
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
if (measure_compile_time_enabled)
compiler_start_time = jl_hrtime();
// if we don't have any decls already, try to generate it now
jl_code_info_t *src = NULL;
Expand Down Expand Up @@ -352,8 +352,8 @@ jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT
else {
codeinst = NULL;
}
if (codegen_lock.count == 1 && jl_measure_compile_time[tid])
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
if (codegen_lock.count == 1 && measure_compile_time_enabled)
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
JL_UNLOCK(&codegen_lock);
JL_GC_POP();
return codeinst;
Expand All @@ -367,8 +367,8 @@ void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec)
}
JL_LOCK(&codegen_lock);
uint64_t compiler_start_time = 0;
int tid = jl_threadid();
if (jl_measure_compile_time[tid])
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
if (measure_compile_time_enabled)
compiler_start_time = jl_hrtime();
if (unspec->invoke == NULL) {
jl_code_info_t *src = NULL;
Expand Down Expand Up @@ -396,8 +396,8 @@ void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec)
}
JL_GC_POP();
}
if (codegen_lock.count == 1 && jl_measure_compile_time[tid])
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
if (codegen_lock.count == 1 && measure_compile_time_enabled)
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
JL_UNLOCK(&codegen_lock); // Might GC
}

Expand All @@ -420,8 +420,8 @@ jl_value_t *jl_dump_method_asm(jl_method_instance_t *mi, size_t world,
// so create an exception here so we can print pretty our lies
JL_LOCK(&codegen_lock); // also disables finalizers, to prevent any unexpected recursion
uint64_t compiler_start_time = 0;
int tid = jl_threadid();
if (jl_measure_compile_time[tid])
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
if (measure_compile_time_enabled)
compiler_start_time = jl_hrtime();
specfptr = (uintptr_t)codeinst->specptr.fptr;
if (specfptr == 0) {
Expand All @@ -446,8 +446,8 @@ jl_value_t *jl_dump_method_asm(jl_method_instance_t *mi, size_t world,
}
JL_GC_POP();
}
if (jl_measure_compile_time[tid])
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
if (measure_compile_time_enabled)
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
JL_UNLOCK(&codegen_lock);
}
if (specfptr != 0)
Expand Down
5 changes: 3 additions & 2 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,9 @@ static inline uint64_t cycleclock(void)

#include "timing.h"

extern uint8_t *jl_measure_compile_time;
extern uint64_t *jl_cumulative_compile_time;
// Global *atomic* integers controlling *process-wide* measurement of compilation time.
extern uint8_t jl_measure_compile_time_enabled;
extern uint64_t jl_cumulative_compile_time;

#ifdef _COMPILER_MICROSOFT_
# define jl_return_address() ((uintptr_t)_ReturnAddress())
Expand Down
5 changes: 4 additions & 1 deletion src/task.c
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,10 @@ static void JL_NORETURN throw_internal(jl_task_t *ct, jl_value_t *exception JL_M
ptls->io_wait = 0;
// @time needs its compile timer disabled on error,
// and cannot use a try-finally as it would break scope for assignments
jl_measure_compile_time[ptls->tid] = 0;
NHDaly marked this conversation as resolved.
Show resolved Hide resolved
// We blindly disable compilation time tracking here, for all running Tasks, even though
// it may cause some incorrect measurements. This is a known bug, and is being tracked
// here: https://github.com/JuliaLang/julia/pull/39138
jl_atomic_store_relaxed(&jl_measure_compile_time_enabled, 0);
JL_GC_PUSH1(&exception);
jl_gc_unsafe_enter(ptls);
if (exception) {
Expand Down
6 changes: 2 additions & 4 deletions src/threading.c
Original file line number Diff line number Diff line change
Expand Up @@ -287,8 +287,8 @@ void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t *k)
#endif

jl_ptls_t *jl_all_tls_states JL_GLOBALLY_ROOTED;
uint8_t *jl_measure_compile_time = NULL;
uint64_t *jl_cumulative_compile_time = NULL;
uint8_t jl_measure_compile_time_enabled = 0;
uint64_t jl_cumulative_compile_time = 0;

// return calling thread's ID
// Also update the suspended_threads list in signals-mach when changing the
Expand Down Expand Up @@ -467,8 +467,6 @@ void jl_init_threading(void)
}
if (jl_n_threads <= 0)
jl_n_threads = 1;
jl_measure_compile_time = (uint8_t*)calloc(jl_n_threads, sizeof(*jl_measure_compile_time));
jl_cumulative_compile_time = (uint64_t*)calloc(jl_n_threads, sizeof(*jl_cumulative_compile_time));
#ifndef __clang_analyzer__
jl_all_tls_states = (jl_ptls_t*)calloc(jl_n_threads, sizeof(void*));
#endif
Expand Down
16 changes: 16 additions & 0 deletions test/misc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,22 @@ function timev_macro_scope()
end
@test timev_macro_scope() == 1

before = Base.cumulative_compile_time_ns_before();

# exercise concurrent calls to `@time` for reentrant compilation time measurement.
t1 = @async @time begin
sleep(2)
@eval module M ; f(x,y) = x+y ; end
@eval M.f(2,3)
end
t2 = @async begin
sleep(1)
@time 2 + 2
end

after = Base.cumulative_compile_time_ns_after();
@test after >= before;

# interactive utilities

struct ambigconvert; end # inject a problematic `convert` method to ensure it still works
Expand Down