Skip to content

Commit

Permalink
Remove typeinfer lock altogether (#46825)
Browse files Browse the repository at this point in the history
* Remove typeinfer lock altogether

* Don't remove the typeinf lock functions

* Track reentrancy in current task state

* Fix up some git status

* Initialize task variables

* Promise that jl_typeinf_func is rooted somewhere
  • Loading branch information
pchintalapudi authored Nov 23, 2022
1 parent 27ebaa7 commit 113efb6
Show file tree
Hide file tree
Showing 10 changed files with 65 additions and 42 deletions.
4 changes: 1 addition & 3 deletions base/compiler/typeinfer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -407,9 +407,7 @@ function cache_result!(interp::AbstractInterpreter, result::InferenceResult)
if track_newly_inferred[]
m = linfo.def
if isa(m, Method) && m.module != Core
ccall(:jl_typeinf_lock_begin, Cvoid, ())
push!(newly_inferred, linfo)
ccall(:jl_typeinf_lock_end, Cvoid, ())
ccall(:jl_push_newly_inferred, Cvoid, (Any,), linfo)
end
end
end
Expand Down
2 changes: 1 addition & 1 deletion base/loading.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1662,6 +1662,7 @@ function include_package_for_output(pkg::PkgId, input::String, depot_path::Vecto
task_local_storage()[:SOURCE_PATH] = source
end

ccall(:jl_set_newly_inferred, Cvoid, (Any,), Core.Compiler.newly_inferred)
Core.Compiler.track_newly_inferred.x = true
try
Base.include(Base.__toplevel__, input)
Expand All @@ -1672,7 +1673,6 @@ function include_package_for_output(pkg::PkgId, input::String, depot_path::Vecto
finally
Core.Compiler.track_newly_inferred.x = false
end
ccall(:jl_set_newly_inferred, Cvoid, (Any,), Core.Compiler.newly_inferred)
end

const PRECOMPILE_TRACE_COMPILE = Ref{String}()
Expand Down
1 change: 1 addition & 0 deletions doc/src/devdocs/locks.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ The following is a leaf lock (level 2), and only acquires level 1 locks (safepoi
> * typecache
> * Module->lock
> * JLDebuginfoPlugin::PluginMutex
> * newly_inferred_mutex
The following is a level 3 lock, which can only acquire level 1 or level 2 locks internally:

Expand Down
3 changes: 3 additions & 0 deletions src/aotcompile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,8 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
jl_code_info_t *src = NULL;
JL_GC_PUSH1(&src);
JL_LOCK(&jl_codegen_lock);
auto ct = jl_current_task;
ct->reentrant_codegen++;
orc::ThreadSafeContext ctx;
orc::ThreadSafeModule backing;
if (!llvmmod) {
Expand Down Expand Up @@ -425,6 +427,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
if (ctx.getContext()) {
jl_ExecutionEngine->releaseContext(std::move(ctx));
}
ct->reentrant_codegen--;
JL_UNLOCK(&jl_codegen_lock); // Might GC
return (void*)data;
}
Expand Down
15 changes: 13 additions & 2 deletions src/dump.c
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,8 @@ static htable_t external_mis;
// Inference tracks newly-inferred MethodInstances during precompilation
// and registers them by calling jl_set_newly_inferred
static jl_array_t *newly_inferred JL_GLOBALLY_ROOTED;
// Mutex for newly_inferred
static jl_mutex_t newly_inferred_mutex;

// New roots to add to Methods. These can't be added until after
// recaching is complete, so we have to hold on to them separately
Expand Down Expand Up @@ -2894,14 +2896,23 @@ JL_DLLEXPORT void jl_init_restored_modules(jl_array_t *init_order)

// --- entry points ---

// Register all newly-inferred MethodInstances
// This gets called as the final step of Base.include_package_for_output
// Register array of newly-inferred MethodInstances
// This gets called as the first step of Base.include_package_for_output
JL_DLLEXPORT void jl_set_newly_inferred(jl_value_t* _newly_inferred)
{
assert(_newly_inferred == NULL || jl_is_array(_newly_inferred));
newly_inferred = (jl_array_t*) _newly_inferred;
}

JL_DLLEXPORT void jl_push_newly_inferred(jl_value_t* linfo)
{
JL_LOCK(&newly_inferred_mutex);
size_t end = jl_array_len(newly_inferred);
jl_array_grow_end(newly_inferred, 1);
jl_arrayset(newly_inferred, linfo, end);
JL_UNLOCK(&newly_inferred_mutex);
}

// Serialize the modules in `worklist` to file `fname`
JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist)
{
Expand Down
42 changes: 18 additions & 24 deletions src/gf.c
Original file line number Diff line number Diff line change
Expand Up @@ -279,8 +279,8 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force)
JL_TIMING(INFERENCE);
if (jl_typeinf_func == NULL)
return NULL;
static int in_inference;
if (in_inference > 2)
jl_task_t *ct = jl_current_task;
if (ct->reentrant_inference > 2)
return NULL;

jl_code_info_t *src = NULL;
Expand All @@ -300,15 +300,14 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force)
jl_printf(JL_STDERR, "\n");
}
#endif
jl_task_t *ct = jl_current_task;
int last_errno = errno;
#ifdef _OS_WINDOWS_
DWORD last_error = GetLastError();
#endif
size_t last_age = ct->world_age;
ct->world_age = jl_typeinf_world;
mi->inInference = 1;
in_inference++;
ct->reentrant_inference++;
JL_TRY {
src = (jl_code_info_t*)jl_apply(fargs, 3);
}
Expand All @@ -329,7 +328,7 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force)
src = NULL;
}
ct->world_age = last_age;
in_inference--;
ct->reentrant_inference--;
mi->inInference = 0;
#ifdef _OS_WINDOWS_
SetLastError(last_error);
Expand Down Expand Up @@ -544,7 +543,7 @@ static int reset_mt_caches(jl_methtable_t *mt, void *env)
}


jl_function_t *jl_typeinf_func = NULL;
jl_function_t *jl_typeinf_func JL_GLOBALLY_ROOTED = NULL;
JL_DLLEXPORT size_t jl_typeinf_world = 1;

JL_DLLEXPORT void jl_set_typeinf_func(jl_value_t *f)
Expand Down Expand Up @@ -3416,44 +3415,39 @@ int jl_has_concrete_subtype(jl_value_t *typ)
return ((jl_datatype_t*)typ)->has_concrete_subtype;
}

// TODO: separate the codegen and typeinf locks
// currently using a coarser lock seems like
// the best way to avoid acquisition priority
// ordering violations
//static jl_mutex_t typeinf_lock;
#define typeinf_lock jl_codegen_lock

static jl_mutex_t inference_timing_mutex;
static uint64_t inference_start_time = 0;
static uint8_t inference_is_measuring_compile_time = 0;

JL_DLLEXPORT void jl_typeinf_timing_begin(void)
{
if (jl_atomic_load_relaxed(&jl_measure_compile_time_enabled)) {
JL_LOCK_NOGC(&inference_timing_mutex);
if (inference_is_measuring_compile_time++ == 0) {
inference_start_time = jl_hrtime();
}
JL_UNLOCK_NOGC(&inference_timing_mutex);
jl_task_t *ct = jl_current_task;
if (ct->inference_start_time == 0 && ct->reentrant_inference == 1)
ct->inference_start_time = jl_hrtime();
}
}

JL_DLLEXPORT void jl_typeinf_timing_end(void)
{
JL_LOCK_NOGC(&inference_timing_mutex);
if (--inference_is_measuring_compile_time == 0) {
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - inference_start_time));
jl_task_t *ct = jl_current_task;
if (ct->inference_start_time != 0 && ct->reentrant_inference == 1) {
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - ct->inference_start_time));
ct->inference_start_time = 0;
}
JL_UNLOCK_NOGC(&inference_timing_mutex);
}

JL_DLLEXPORT void jl_typeinf_lock_begin(void)
{
JL_LOCK(&typeinf_lock);
//Although this is claiming to be a typeinfer lock, it is actually
//affecting the codegen lock count, not type inference's inferencing count
jl_task_t *ct = jl_current_task;
ct->reentrant_codegen++;
}

JL_DLLEXPORT void jl_typeinf_lock_end(void)
{
jl_task_t *ct = jl_current_task;
ct->reentrant_codegen--;
JL_UNLOCK(&typeinf_lock);
}

Expand Down
30 changes: 19 additions & 11 deletions src/jitlayers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,8 @@ const char *jl_generate_ccallable(LLVMOrcThreadSafeModuleRef llvmmod, void *sysi
extern "C" JL_DLLEXPORT
int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void *sysimg, jl_value_t *declrt, jl_value_t *sigt)
{
JL_LOCK(&jl_codegen_lock);
auto ct = jl_current_task;
ct->reentrant_codegen++;
uint64_t compiler_start_time = 0;
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
if (measure_compile_time_enabled)
Expand All @@ -311,6 +312,7 @@ int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void *
backing = jl_create_llvm_module("cextern", pparams ? pparams->tsctx : ctx, pparams ? pparams->imaging : imaging_default());
into = &backing;
}
JL_LOCK(&jl_codegen_lock);
jl_codegen_params_t params(into->getContext());
if (pparams == NULL)
pparams = &params;
Expand All @@ -330,12 +332,12 @@ int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void *
if (success && llvmmod == NULL)
jl_ExecutionEngine->addModule(std::move(*into));
}
if (jl_codegen_lock.count == 1 && measure_compile_time_enabled)
JL_UNLOCK(&jl_codegen_lock);
if (!--ct->reentrant_codegen && measure_compile_time_enabled)
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
if (ctx.getContext()) {
jl_ExecutionEngine->releaseContext(std::move(ctx));
}
JL_UNLOCK(&jl_codegen_lock);
return success;
}

Expand Down Expand Up @@ -386,7 +388,8 @@ void jl_extern_c_impl(jl_value_t *declrt, jl_tupletype_t *sigt)
extern "C" JL_DLLEXPORT
jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world)
{
JL_LOCK(&jl_codegen_lock); // also disables finalizers, to prevent any unexpected recursion
auto ct = jl_current_task;
ct->reentrant_codegen++;
uint64_t compiler_start_time = 0;
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
bool is_recompile = false;
Expand All @@ -395,6 +398,7 @@ jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES
// if we don't have any decls already, try to generate it now
jl_code_info_t *src = NULL;
JL_GC_PUSH1(&src);
JL_LOCK(&jl_codegen_lock); // also disables finalizers, to prevent any unexpected recursion
jl_value_t *ci = jl_rettype_inferred(mi, world, world);
jl_code_instance_t *codeinst = (ci == jl_nothing ? NULL : (jl_code_instance_t*)ci);
if (codeinst) {
Expand Down Expand Up @@ -437,13 +441,13 @@ jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES
else {
codeinst = NULL;
}
if (jl_codegen_lock.count == 1 && measure_compile_time_enabled) {
JL_UNLOCK(&jl_codegen_lock);
if (!--ct->reentrant_codegen && measure_compile_time_enabled) {
uint64_t t_comp = jl_hrtime() - compiler_start_time;
if (is_recompile)
jl_atomic_fetch_add_relaxed(&jl_cumulative_recompile_time, t_comp);
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, t_comp);
}
JL_UNLOCK(&jl_codegen_lock);
JL_GC_POP();
return codeinst;
}
Expand All @@ -454,11 +458,13 @@ void jl_generate_fptr_for_unspecialized_impl(jl_code_instance_t *unspec)
if (jl_atomic_load_relaxed(&unspec->invoke) != NULL) {
return;
}
JL_LOCK(&jl_codegen_lock);
auto ct = jl_current_task;
ct->reentrant_codegen++;
uint64_t compiler_start_time = 0;
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
if (measure_compile_time_enabled)
compiler_start_time = jl_hrtime();
JL_LOCK(&jl_codegen_lock);
if (jl_atomic_load_relaxed(&unspec->invoke) == NULL) {
jl_code_info_t *src = NULL;
JL_GC_PUSH1(&src);
Expand Down Expand Up @@ -486,9 +492,9 @@ void jl_generate_fptr_for_unspecialized_impl(jl_code_instance_t *unspec)
}
JL_GC_POP();
}
if (jl_codegen_lock.count == 1 && measure_compile_time_enabled)
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
JL_UNLOCK(&jl_codegen_lock); // Might GC
if (!--ct->reentrant_codegen && measure_compile_time_enabled)
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
}


Expand All @@ -508,11 +514,13 @@ jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world,
// normally we prevent native code from being generated for these functions,
// (using sentinel value `1` instead)
// so create an exception here so we can print pretty our lies
JL_LOCK(&jl_codegen_lock); // also disables finalizers, to prevent any unexpected recursion
auto ct = jl_current_task;
ct->reentrant_codegen++;
uint64_t compiler_start_time = 0;
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
if (measure_compile_time_enabled)
compiler_start_time = jl_hrtime();
JL_LOCK(&jl_codegen_lock); // also disables finalizers, to prevent any unexpected recursion
specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr);
if (specfptr == 0) {
jl_code_info_t *src = jl_type_infer(mi, world, 0);
Expand All @@ -536,7 +544,7 @@ jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world,
}
JL_GC_POP();
}
if (measure_compile_time_enabled)
if (!--ct->reentrant_codegen && measure_compile_time_enabled)
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
JL_UNLOCK(&jl_codegen_lock);
}
Expand Down
4 changes: 4 additions & 0 deletions src/julia.h
Original file line number Diff line number Diff line change
Expand Up @@ -1768,6 +1768,7 @@ JL_DLLEXPORT void jl_save_system_image(const char *fname);
JL_DLLEXPORT void jl_restore_system_image(const char *fname);
JL_DLLEXPORT void jl_restore_system_image_data(const char *buf, size_t len);
JL_DLLEXPORT void jl_set_newly_inferred(jl_value_t *newly_inferred);
JL_DLLEXPORT void jl_push_newly_inferred(jl_value_t *linfo);
JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist);
JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *depmods);
JL_DLLEXPORT jl_value_t *jl_restore_incremental_from_buf(const char *buf, size_t sz, jl_array_t *depmods);
Expand Down Expand Up @@ -1938,6 +1939,9 @@ typedef struct _jl_task_t {
jl_ucontext_t ctx;
void *stkbuf; // malloc'd memory (either copybuf or stack)
size_t bufsz; // actual sizeof stkbuf
uint64_t inference_start_time; // time when inference started
unsigned int reentrant_inference; // How many times we've reentered inference
unsigned int reentrant_codegen; // How many times we've reentered codegen
unsigned int copy_stack:31; // sizeof stack for copybuf
unsigned int started:1;
} jl_task_t;
Expand Down
2 changes: 1 addition & 1 deletion src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ void print_func_loc(JL_STREAM *s, jl_method_t *m);
extern jl_array_t *_jl_debug_method_invalidation JL_GLOBALLY_ROOTED;

extern JL_DLLEXPORT size_t jl_page_size;
extern jl_function_t *jl_typeinf_func;
extern jl_function_t *jl_typeinf_func JL_GLOBALLY_ROOTED;
extern JL_DLLEXPORT size_t jl_typeinf_world;
extern _Atomic(jl_typemap_entry_t*) call_cache[N_CALL_CACHE] JL_GLOBALLY_ROOTED;
extern jl_array_t *jl_all_methods JL_GLOBALLY_ROOTED;
Expand Down
4 changes: 4 additions & 0 deletions src/task.c
Original file line number Diff line number Diff line change
Expand Up @@ -938,6 +938,8 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion
t->threadpoolid = ct->threadpoolid;
t->ptls = NULL;
t->world_age = ct->world_age;
t->reentrant_codegen = 0;
t->reentrant_inference = 0;

#ifdef COPY_STACKS
if (!t->copy_stack) {
Expand Down Expand Up @@ -1523,6 +1525,8 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
ct->sticky = 1;
ct->ptls = ptls;
ct->world_age = 1; // OK to run Julia code on this task
ct->reentrant_codegen = 0;
ct->reentrant_inference = 0;
ptls->root_task = ct;
jl_atomic_store_relaxed(&ptls->current_task, ct);
JL_GC_PROMISE_ROOTED(ct);
Expand Down

0 comments on commit 113efb6

Please sign in to comment.