From bcabe46a5984dadefe7b703d9ce1f6a992c39280 Mon Sep 17 00:00:00 2001 From: Prem Chintalapudi Date: Wed, 28 Dec 2022 20:01:49 -0500 Subject: [PATCH 1/3] Use the same timing reentrancy counter for both inference and codegen --- src/aotcompile.cpp | 15 +++++++++------ src/gf.c | 10 ++-------- src/jitlayers.cpp | 31 +++++++++++++++++++------------ src/julia.h | 2 +- src/task.c | 4 ++-- 5 files changed, 33 insertions(+), 29 deletions(-) diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp index 6f9345ee18f82..bd4c896a39e11 100644 --- a/src/aotcompile.cpp +++ b/src/aotcompile.cpp @@ -284,7 +284,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm jl_code_info_t *src = NULL; JL_GC_PUSH1(&src); auto ct = jl_current_task; - ct->reentrant_codegen++; + ct->reentrant_timing++; orc::ThreadSafeContext ctx; orc::ThreadSafeModule backing; if (!llvmmod) { @@ -471,12 +471,13 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm } data->M = std::move(clone); - if (measure_compile_time_enabled) - jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time)); + if (!ct->reentrant_timing-- && measure_compile_time_enabled) { + auto end = jl_hrtime(); + jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time); + } if (ctx.getContext()) { jl_ExecutionEngine->releaseContext(std::move(ctx)); } - ct->reentrant_codegen--; return (void*)data; } @@ -1138,8 +1139,10 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, siz F = cast(m.getModuleUnlocked()->getNamedValue(*fname)); } JL_GC_POP(); - if (measure_compile_time_enabled) - jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time)); + if (measure_compile_time_enabled) { + auto end = jl_hrtime(); + jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time); + } if (F) { dump->TSM = wrap(new orc::ThreadSafeModule(std::move(m))); dump->F = wrap(F); diff --git a/src/gf.c b/src/gf.c index 99c482420e2f2..2828d0e22e2d9 100644 --- a/src/gf.c +++ b/src/gf.c @@ -3542,7 +3542,7 @@ int jl_has_concrete_subtype(jl_value_t *typ) JL_DLLEXPORT void jl_typeinf_timing_begin(void) { jl_task_t *ct = jl_current_task; - if (ct->reentrant_inference == 1) { + if (!ct->reentrant_timing++) { ct->inference_start_time = jl_hrtime(); } } @@ -3550,7 +3550,7 @@ JL_DLLEXPORT void jl_typeinf_timing_begin(void) JL_DLLEXPORT void jl_typeinf_timing_end(void) { jl_task_t *ct = jl_current_task; - if (ct->reentrant_inference == 1) { + if (!--ct->reentrant_timing) { if (jl_atomic_load_relaxed(&jl_measure_compile_time_enabled)) { uint64_t inftime = jl_hrtime() - ct->inference_start_time; jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, inftime); @@ -3562,16 +3562,10 @@ JL_DLLEXPORT void jl_typeinf_timing_end(void) JL_DLLEXPORT void jl_typeinf_lock_begin(void) { JL_LOCK(&jl_codegen_lock); - //Although this is claiming to be a typeinfer lock, it is actually - //affecting the codegen lock count, not type inference's inferencing count - jl_task_t *ct = jl_current_task; - ct->reentrant_codegen++; } JL_DLLEXPORT void jl_typeinf_lock_end(void) { - jl_task_t *ct = jl_current_task; - ct->reentrant_codegen--; JL_UNLOCK(&jl_codegen_lock); } diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index f6ecd64e757d8..fecbc28cf8ea7 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -303,7 +303,7 @@ extern "C" JL_DLLEXPORT int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void *sysimg, jl_value_t *declrt, jl_value_t *sigt) { auto ct = jl_current_task; - ct->reentrant_codegen++; + ct->reentrant_timing++; uint64_t compiler_start_time = 0; uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled); if (measure_compile_time_enabled) @@ -340,8 +340,10 @@ int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void * jl_ExecutionEngine->addModule(std::move(*into)); } JL_UNLOCK(&jl_codegen_lock); - if (!--ct->reentrant_codegen && measure_compile_time_enabled) - jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time)); + if (!--ct->reentrant_timing && measure_compile_time_enabled) { + auto end = jl_hrtime(); + jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time); + } if (ctx.getContext()) { jl_ExecutionEngine->releaseContext(std::move(ctx)); } @@ -396,7 +398,7 @@ extern "C" JL_DLLEXPORT jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world) { auto ct = jl_current_task; - ct->reentrant_codegen++; + ct->reentrant_timing++; uint64_t compiler_start_time = 0; uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled); bool is_recompile = false; @@ -449,10 +451,11 @@ jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES codeinst = NULL; } JL_UNLOCK(&jl_codegen_lock); - if (!--ct->reentrant_codegen && measure_compile_time_enabled) { + if (!--ct->reentrant_timing && measure_compile_time_enabled) { uint64_t t_comp = jl_hrtime() - compiler_start_time; - if (is_recompile) + if (is_recompile) { jl_atomic_fetch_add_relaxed(&jl_cumulative_recompile_time, t_comp); + } jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, t_comp); } JL_GC_POP(); @@ -466,7 +469,7 @@ void jl_generate_fptr_for_unspecialized_impl(jl_code_instance_t *unspec) return; } auto ct = jl_current_task; - ct->reentrant_codegen++; + ct->reentrant_timing++; uint64_t compiler_start_time = 0; uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled); if (measure_compile_time_enabled) @@ -500,8 +503,10 @@ void jl_generate_fptr_for_unspecialized_impl(jl_code_instance_t *unspec) JL_GC_POP(); } JL_UNLOCK(&jl_codegen_lock); // Might GC - if (!--ct->reentrant_codegen && measure_compile_time_enabled) - jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time)); + if (!--ct->reentrant_timing && measure_compile_time_enabled) { + auto end = jl_hrtime(); + jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time); + } } @@ -522,7 +527,7 @@ jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world, // (using sentinel value `1` instead) // so create an exception here so we can print pretty our lies auto ct = jl_current_task; - ct->reentrant_codegen++; + ct->reentrant_timing++; uint64_t compiler_start_time = 0; uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled); if (measure_compile_time_enabled) @@ -552,8 +557,10 @@ jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world, JL_GC_POP(); } JL_UNLOCK(&jl_codegen_lock); - if (!--ct->reentrant_codegen && measure_compile_time_enabled) - jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time)); + if (!--ct->reentrant_timing && measure_compile_time_enabled) { + auto end = jl_hrtime(); + jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time); + } } if (specfptr != 0) return jl_dump_fptr_asm(specfptr, raw_mc, asm_variant, debuginfo, binary); diff --git a/src/julia.h b/src/julia.h index 84fecd5fe28c4..1395df4501329 100644 --- a/src/julia.h +++ b/src/julia.h @@ -1938,7 +1938,7 @@ typedef struct _jl_task_t { size_t bufsz; // actual sizeof stkbuf uint64_t inference_start_time; // time when inference started uint16_t reentrant_inference; // How many times we've reentered inference - uint16_t reentrant_codegen; // How many times we've reentered codegen + uint16_t reentrant_timing; // How many times we've reentered timing unsigned int copy_stack:31; // sizeof stack for copybuf unsigned int started:1; } jl_task_t; diff --git a/src/task.c b/src/task.c index a5ebc1ce26005..1772127391183 100644 --- a/src/task.c +++ b/src/task.c @@ -938,7 +938,7 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion t->threadpoolid = ct->threadpoolid; t->ptls = NULL; t->world_age = ct->world_age; - t->reentrant_codegen = 0; + t->reentrant_timing = 0; t->reentrant_inference = 0; t->inference_start_time = 0; @@ -1526,7 +1526,7 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi) ct->sticky = 1; ct->ptls = ptls; ct->world_age = 1; // OK to run Julia code on this task - ct->reentrant_codegen = 0; + ct->reentrant_timing = 0; ct->reentrant_inference = 0; ct->inference_start_time = 0; ptls->root_task = ct; From a2445e735afa7edd8696e8aa1748a68bc6c2c92e Mon Sep 17 00:00:00 2001 From: Prem Chintalapudi Date: Wed, 28 Dec 2022 20:34:47 -0500 Subject: [PATCH 2/3] Add some timing tests --- test/misc.jl | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/test/misc.jl b/test/misc.jl index 8182312f45a6a..7359f5c2b96bb 100644 --- a/test/misc.jl +++ b/test/misc.jl @@ -353,6 +353,8 @@ end after_comp, after_recomp = Base.cumulative_compile_time_ns() # no need to turn timing off, @time will do that @test after_comp >= before_comp; +@test after_recomp >= before_recomp; +@test after_recomp - before_recomp <= after_comp - before_comp; # should be approximately 60,000,000 ns, we definitely shouldn't exceed 100x that value # failing this probably means an uninitialized variable somewhere @@ -360,6 +362,40 @@ after_comp, after_recomp = Base.cumulative_compile_time_ns() # no need to turn t end # redirect_stdout +# issue #48024, avoid overcounting timers +begin + double(x::Real) = 2x; + calldouble(container) = double(container[1]); + calldouble2(container) = calldouble(container); + + Base.Experimental.@force_compile; + local elapsed = Base.time_ns(); + Base.cumulative_compile_timing(true); + local compiles = Base.cumulative_compile_time_ns(); + @eval calldouble([1.0]); + Base.cumulative_compile_timing(false); + compiles = Base.cumulative_compile_time_ns() .- compiles; + elapsed = Base.time_ns() - elapsed; + + # compile time should be at most total time + @test compiles[1] <= elapsed + # recompile time should be at most compile time + @test compiles[2] <= compiles[1] + + elapsed = Base.time_ns(); + Base.cumulative_compile_timing(true); + compiles = Base.cumulative_compile_time_ns(); + @eval calldouble(1.0); + Base.cumulative_compile_timing(false); + compiles = Base.cumulative_compile_time_ns() .- compiles; + elapsed = Base.time_ns() - elapsed; + + # compile time should be at most total time + @test compiles[1] <= elapsed + # recompile time should be at most compile time + @test compiles[2] <= compiles[1] +end + macro capture_stdout(ex) quote mktemp() do fname, f From 399c0611dc7a24a117e6166ea0c9a4f603b43d51 Mon Sep 17 00:00:00 2001 From: Prem Chintalapudi Date: Wed, 28 Dec 2022 21:27:52 -0500 Subject: [PATCH 3/3] Add macro-based timer test --- test/misc.jl | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/test/misc.jl b/test/misc.jl index 7359f5c2b96bb..480334fdaf0ae 100644 --- a/test/misc.jl +++ b/test/misc.jl @@ -408,6 +408,27 @@ macro capture_stdout(ex) end end +# issue #48024, but with the time macro itself +begin + double(x::Real) = 2x; + calldouble(container) = double(container[1]); + calldouble2(container) = calldouble(container); + + local first = @capture_stdout @time @eval calldouble([1.0]) + local second = @capture_stdout @time @eval calldouble2(1.0) + + # these functions were not recompiled + local matches = collect(eachmatch(r"(\d+(?:\.\d+)?)%", first)) + @test length(matches) == 1 + @test parse(Float64, matches[1][1]) > 0.0 + @test parse(Float64, matches[1][1]) <= 100.0 + + matches = collect(eachmatch(r"(\d+(?:\.\d+)?)%", second)) + @test length(matches) == 1 + @test parse(Float64, matches[1][1]) > 0.0 + @test parse(Float64, matches[1][1]) <= 100.0 +end + # compilation reports in @time, @timev let f = gensym("f"), callf = gensym("callf"), call2f = gensym("call2f") @eval begin