Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove typeinfer lock altogether #46825

Merged
merged 6 commits into from
Nov 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions base/compiler/typeinfer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -407,9 +407,7 @@ function cache_result!(interp::AbstractInterpreter, result::InferenceResult)
if track_newly_inferred[]
m = linfo.def
if isa(m, Method) && m.module != Core
ccall(:jl_typeinf_lock_begin, Cvoid, ())
push!(newly_inferred, linfo)
ccall(:jl_typeinf_lock_end, Cvoid, ())
ccall(:jl_push_newly_inferred, Cvoid, (Any,), linfo)
end
end
end
Expand Down
2 changes: 1 addition & 1 deletion base/loading.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1662,6 +1662,7 @@ function include_package_for_output(pkg::PkgId, input::String, depot_path::Vecto
task_local_storage()[:SOURCE_PATH] = source
end

ccall(:jl_set_newly_inferred, Cvoid, (Any,), Core.Compiler.newly_inferred)
Core.Compiler.track_newly_inferred.x = true
try
Base.include(Base.__toplevel__, input)
Expand All @@ -1672,7 +1673,6 @@ function include_package_for_output(pkg::PkgId, input::String, depot_path::Vecto
finally
Core.Compiler.track_newly_inferred.x = false
end
ccall(:jl_set_newly_inferred, Cvoid, (Any,), Core.Compiler.newly_inferred)
end

const PRECOMPILE_TRACE_COMPILE = Ref{String}()
Expand Down
1 change: 1 addition & 0 deletions doc/src/devdocs/locks.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ The following is a leaf lock (level 2), and only acquires level 1 locks (safepoi
> * typecache
> * Module->lock
> * JLDebuginfoPlugin::PluginMutex
> * newly_inferred_mutex

The following is a level 3 lock, which can only acquire level 1 or level 2 locks internally:

Expand Down
3 changes: 3 additions & 0 deletions src/aotcompile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,8 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
jl_code_info_t *src = NULL;
JL_GC_PUSH1(&src);
JL_LOCK(&jl_codegen_lock);
auto ct = jl_current_task;
ct->reentrant_codegen++;
orc::ThreadSafeContext ctx;
orc::ThreadSafeModule backing;
if (!llvmmod) {
Expand Down Expand Up @@ -425,6 +427,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
if (ctx.getContext()) {
jl_ExecutionEngine->releaseContext(std::move(ctx));
}
ct->reentrant_codegen--;
JL_UNLOCK(&jl_codegen_lock); // Might GC
return (void*)data;
}
Expand Down
15 changes: 13 additions & 2 deletions src/dump.c
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,8 @@ static htable_t external_mis;
// Inference tracks newly-inferred MethodInstances during precompilation
// and registers them by calling jl_set_newly_inferred
static jl_array_t *newly_inferred JL_GLOBALLY_ROOTED;
// Mutex for newly_inferred
static jl_mutex_t newly_inferred_mutex;

// New roots to add to Methods. These can't be added until after
// recaching is complete, so we have to hold on to them separately
Expand Down Expand Up @@ -2894,14 +2896,23 @@ JL_DLLEXPORT void jl_init_restored_modules(jl_array_t *init_order)

// --- entry points ---

// Register all newly-inferred MethodInstances
// This gets called as the final step of Base.include_package_for_output
// Register array of newly-inferred MethodInstances
// This gets called as the first step of Base.include_package_for_output
JL_DLLEXPORT void jl_set_newly_inferred(jl_value_t* _newly_inferred)
{
assert(_newly_inferred == NULL || jl_is_array(_newly_inferred));
newly_inferred = (jl_array_t*) _newly_inferred;
}

JL_DLLEXPORT void jl_push_newly_inferred(jl_value_t* linfo)
{
JL_LOCK(&newly_inferred_mutex);
size_t end = jl_array_len(newly_inferred);
jl_array_grow_end(newly_inferred, 1);
jl_arrayset(newly_inferred, linfo, end);
JL_UNLOCK(&newly_inferred_mutex);
}

// Serialize the modules in `worklist` to file `fname`
JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist)
{
Expand Down
42 changes: 18 additions & 24 deletions src/gf.c
Original file line number Diff line number Diff line change
Expand Up @@ -279,8 +279,8 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force)
JL_TIMING(INFERENCE);
if (jl_typeinf_func == NULL)
return NULL;
static int in_inference;
if (in_inference > 2)
jl_task_t *ct = jl_current_task;
if (ct->reentrant_inference > 2)
return NULL;

jl_code_info_t *src = NULL;
Expand All @@ -300,15 +300,14 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force)
jl_printf(JL_STDERR, "\n");
}
#endif
jl_task_t *ct = jl_current_task;
int last_errno = errno;
#ifdef _OS_WINDOWS_
DWORD last_error = GetLastError();
#endif
size_t last_age = ct->world_age;
ct->world_age = jl_typeinf_world;
mi->inInference = 1;
in_inference++;
ct->reentrant_inference++;
JL_TRY {
src = (jl_code_info_t*)jl_apply(fargs, 3);
}
Expand All @@ -329,7 +328,7 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force)
src = NULL;
}
ct->world_age = last_age;
in_inference--;
ct->reentrant_inference--;
mi->inInference = 0;
#ifdef _OS_WINDOWS_
SetLastError(last_error);
Expand Down Expand Up @@ -544,7 +543,7 @@ static int reset_mt_caches(jl_methtable_t *mt, void *env)
}


jl_function_t *jl_typeinf_func = NULL;
jl_function_t *jl_typeinf_func JL_GLOBALLY_ROOTED = NULL;
JL_DLLEXPORT size_t jl_typeinf_world = 1;

JL_DLLEXPORT void jl_set_typeinf_func(jl_value_t *f)
Expand Down Expand Up @@ -3416,44 +3415,39 @@ int jl_has_concrete_subtype(jl_value_t *typ)
return ((jl_datatype_t*)typ)->has_concrete_subtype;
}

// TODO: separate the codegen and typeinf locks
// currently using a coarser lock seems like
// the best way to avoid acquisition priority
// ordering violations
//static jl_mutex_t typeinf_lock;
#define typeinf_lock jl_codegen_lock

static jl_mutex_t inference_timing_mutex;
static uint64_t inference_start_time = 0;
static uint8_t inference_is_measuring_compile_time = 0;

JL_DLLEXPORT void jl_typeinf_timing_begin(void)
{
if (jl_atomic_load_relaxed(&jl_measure_compile_time_enabled)) {
JL_LOCK_NOGC(&inference_timing_mutex);
if (inference_is_measuring_compile_time++ == 0) {
inference_start_time = jl_hrtime();
}
JL_UNLOCK_NOGC(&inference_timing_mutex);
jl_task_t *ct = jl_current_task;
if (ct->inference_start_time == 0 && ct->reentrant_inference == 1)
ct->inference_start_time = jl_hrtime();
}
}

JL_DLLEXPORT void jl_typeinf_timing_end(void)
{
JL_LOCK_NOGC(&inference_timing_mutex);
if (--inference_is_measuring_compile_time == 0) {
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - inference_start_time));
jl_task_t *ct = jl_current_task;
if (ct->inference_start_time != 0 && ct->reentrant_inference == 1) {
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - ct->inference_start_time));
ct->inference_start_time = 0;
}
JL_UNLOCK_NOGC(&inference_timing_mutex);
}

JL_DLLEXPORT void jl_typeinf_lock_begin(void)
{
JL_LOCK(&typeinf_lock);
//Although this is claiming to be a typeinfer lock, it is actually
//affecting the codegen lock count, not type inference's inferencing count
jl_task_t *ct = jl_current_task;
ct->reentrant_codegen++;
}

JL_DLLEXPORT void jl_typeinf_lock_end(void)
{
jl_task_t *ct = jl_current_task;
ct->reentrant_codegen--;
JL_UNLOCK(&typeinf_lock);
}
Comment on lines 3438 to 3452
Copy link
Member

@vtjnash vtjnash Nov 18, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we instead just remove this now?

Suggested change
JL_DLLEXPORT void jl_typeinf_lock_begin(void)
{
JL_LOCK(&typeinf_lock);
//Although this is claiming to be a typeinfer lock, it is actually
//affecting the codegen lock count, not type inference's inferencing count
jl_task_t *ct = jl_current_task;
ct->reentrant_codegen++;
}
JL_DLLEXPORT void jl_typeinf_lock_end(void)
{
jl_task_t *ct = jl_current_task;
ct->reentrant_codegen--;
JL_UNLOCK(&typeinf_lock);
}

(and related exports in jl_exported_funcs.inc)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since GPUCompiler is depending on those for type inference, I'd rather keep them around until we completely drop locking around our type inference, just in case we locate any hidden bugs along the way.


Expand Down
30 changes: 19 additions & 11 deletions src/jitlayers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,8 @@ const char *jl_generate_ccallable(LLVMOrcThreadSafeModuleRef llvmmod, void *sysi
extern "C" JL_DLLEXPORT
int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void *sysimg, jl_value_t *declrt, jl_value_t *sigt)
{
JL_LOCK(&jl_codegen_lock);
auto ct = jl_current_task;
ct->reentrant_codegen++;
uint64_t compiler_start_time = 0;
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
if (measure_compile_time_enabled)
Expand All @@ -311,6 +312,7 @@ int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void *
backing = jl_create_llvm_module("cextern", pparams ? pparams->tsctx : ctx, pparams ? pparams->imaging : imaging_default());
into = &backing;
}
JL_LOCK(&jl_codegen_lock);
jl_codegen_params_t params(into->getContext());
if (pparams == NULL)
pparams = &params;
Expand All @@ -330,12 +332,12 @@ int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void *
if (success && llvmmod == NULL)
jl_ExecutionEngine->addModule(std::move(*into));
}
if (jl_codegen_lock.count == 1 && measure_compile_time_enabled)
JL_UNLOCK(&jl_codegen_lock);
if (!--ct->reentrant_codegen && measure_compile_time_enabled)
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
if (ctx.getContext()) {
jl_ExecutionEngine->releaseContext(std::move(ctx));
}
JL_UNLOCK(&jl_codegen_lock);
return success;
}

Expand Down Expand Up @@ -386,7 +388,8 @@ void jl_extern_c_impl(jl_value_t *declrt, jl_tupletype_t *sigt)
extern "C" JL_DLLEXPORT
jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world)
{
JL_LOCK(&jl_codegen_lock); // also disables finalizers, to prevent any unexpected recursion
auto ct = jl_current_task;
ct->reentrant_codegen++;
uint64_t compiler_start_time = 0;
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
bool is_recompile = false;
Expand All @@ -395,6 +398,7 @@ jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES
// if we don't have any decls already, try to generate it now
jl_code_info_t *src = NULL;
JL_GC_PUSH1(&src);
JL_LOCK(&jl_codegen_lock); // also disables finalizers, to prevent any unexpected recursion
jl_value_t *ci = jl_rettype_inferred(mi, world, world);
jl_code_instance_t *codeinst = (ci == jl_nothing ? NULL : (jl_code_instance_t*)ci);
if (codeinst) {
Expand Down Expand Up @@ -437,13 +441,13 @@ jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES
else {
codeinst = NULL;
}
if (jl_codegen_lock.count == 1 && measure_compile_time_enabled) {
JL_UNLOCK(&jl_codegen_lock);
if (!--ct->reentrant_codegen && measure_compile_time_enabled) {
uint64_t t_comp = jl_hrtime() - compiler_start_time;
if (is_recompile)
jl_atomic_fetch_add_relaxed(&jl_cumulative_recompile_time, t_comp);
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, t_comp);
}
JL_UNLOCK(&jl_codegen_lock);
JL_GC_POP();
return codeinst;
}
Expand All @@ -454,11 +458,13 @@ void jl_generate_fptr_for_unspecialized_impl(jl_code_instance_t *unspec)
if (jl_atomic_load_relaxed(&unspec->invoke) != NULL) {
return;
}
JL_LOCK(&jl_codegen_lock);
auto ct = jl_current_task;
ct->reentrant_codegen++;
uint64_t compiler_start_time = 0;
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
if (measure_compile_time_enabled)
compiler_start_time = jl_hrtime();
JL_LOCK(&jl_codegen_lock);
if (jl_atomic_load_relaxed(&unspec->invoke) == NULL) {
jl_code_info_t *src = NULL;
JL_GC_PUSH1(&src);
Expand Down Expand Up @@ -486,9 +492,9 @@ void jl_generate_fptr_for_unspecialized_impl(jl_code_instance_t *unspec)
}
JL_GC_POP();
}
if (jl_codegen_lock.count == 1 && measure_compile_time_enabled)
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
JL_UNLOCK(&jl_codegen_lock); // Might GC
if (!--ct->reentrant_codegen && measure_compile_time_enabled)
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
}


Expand All @@ -508,11 +514,13 @@ jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world,
// normally we prevent native code from being generated for these functions,
// (using sentinel value `1` instead)
// so create an exception here so we can print pretty our lies
JL_LOCK(&jl_codegen_lock); // also disables finalizers, to prevent any unexpected recursion
auto ct = jl_current_task;
ct->reentrant_codegen++;
uint64_t compiler_start_time = 0;
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
if (measure_compile_time_enabled)
compiler_start_time = jl_hrtime();
JL_LOCK(&jl_codegen_lock); // also disables finalizers, to prevent any unexpected recursion
specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr);
if (specfptr == 0) {
jl_code_info_t *src = jl_type_infer(mi, world, 0);
Expand All @@ -536,7 +544,7 @@ jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world,
}
JL_GC_POP();
}
if (measure_compile_time_enabled)
if (!--ct->reentrant_codegen && measure_compile_time_enabled)
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
JL_UNLOCK(&jl_codegen_lock);
}
Expand Down
4 changes: 4 additions & 0 deletions src/julia.h
Original file line number Diff line number Diff line change
Expand Up @@ -1768,6 +1768,7 @@ JL_DLLEXPORT void jl_save_system_image(const char *fname);
JL_DLLEXPORT void jl_restore_system_image(const char *fname);
JL_DLLEXPORT void jl_restore_system_image_data(const char *buf, size_t len);
JL_DLLEXPORT void jl_set_newly_inferred(jl_value_t *newly_inferred);
JL_DLLEXPORT void jl_push_newly_inferred(jl_value_t *linfo);
JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist);
JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *depmods);
JL_DLLEXPORT jl_value_t *jl_restore_incremental_from_buf(const char *buf, size_t sz, jl_array_t *depmods);
Expand Down Expand Up @@ -1938,6 +1939,9 @@ typedef struct _jl_task_t {
jl_ucontext_t ctx;
void *stkbuf; // malloc'd memory (either copybuf or stack)
size_t bufsz; // actual sizeof stkbuf
uint64_t inference_start_time; // time when inference started
unsigned int reentrant_inference; // How many times we've reentered inference
unsigned int reentrant_codegen; // How many times we've reentered codegen
unsigned int copy_stack:31; // sizeof stack for copybuf
unsigned int started:1;
} jl_task_t;
Expand Down
2 changes: 1 addition & 1 deletion src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ void print_func_loc(JL_STREAM *s, jl_method_t *m);
extern jl_array_t *_jl_debug_method_invalidation JL_GLOBALLY_ROOTED;

extern JL_DLLEXPORT size_t jl_page_size;
extern jl_function_t *jl_typeinf_func;
extern jl_function_t *jl_typeinf_func JL_GLOBALLY_ROOTED;
extern JL_DLLEXPORT size_t jl_typeinf_world;
extern _Atomic(jl_typemap_entry_t*) call_cache[N_CALL_CACHE] JL_GLOBALLY_ROOTED;
extern jl_array_t *jl_all_methods JL_GLOBALLY_ROOTED;
Expand Down
4 changes: 4 additions & 0 deletions src/task.c
Original file line number Diff line number Diff line change
Expand Up @@ -938,6 +938,8 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion
t->threadpoolid = ct->threadpoolid;
t->ptls = NULL;
t->world_age = ct->world_age;
t->reentrant_codegen = 0;
t->reentrant_inference = 0;

#ifdef COPY_STACKS
if (!t->copy_stack) {
Expand Down Expand Up @@ -1523,6 +1525,8 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
ct->sticky = 1;
ct->ptls = ptls;
ct->world_age = 1; // OK to run Julia code on this task
ct->reentrant_codegen = 0;
ct->reentrant_inference = 0;
ptls->root_task = ct;
jl_atomic_store_relaxed(&ptls->current_task, ct);
JL_GC_PROMISE_ROOTED(ct);
Expand Down