Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Don't double-count inference time #48033

Merged
merged 3 commits into from
Dec 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions src/aotcompile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
jl_code_info_t *src = NULL;
JL_GC_PUSH1(&src);
auto ct = jl_current_task;
ct->reentrant_codegen++;
ct->reentrant_timing++;
orc::ThreadSafeContext ctx;
orc::ThreadSafeModule backing;
if (!llvmmod) {
Expand Down Expand Up @@ -471,12 +471,13 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
}

data->M = std::move(clone);
if (measure_compile_time_enabled)
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
if (!ct->reentrant_timing-- && measure_compile_time_enabled) {
auto end = jl_hrtime();
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
}
if (ctx.getContext()) {
jl_ExecutionEngine->releaseContext(std::move(ctx));
}
ct->reentrant_codegen--;
return (void*)data;
}

Expand Down Expand Up @@ -1138,8 +1139,10 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, siz
F = cast<Function>(m.getModuleUnlocked()->getNamedValue(*fname));
}
JL_GC_POP();
if (measure_compile_time_enabled)
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
if (measure_compile_time_enabled) {
auto end = jl_hrtime();
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
}
if (F) {
dump->TSM = wrap(new orc::ThreadSafeModule(std::move(m)));
dump->F = wrap(F);
Expand Down
10 changes: 2 additions & 8 deletions src/gf.c
Original file line number Diff line number Diff line change
Expand Up @@ -3542,15 +3542,15 @@ int jl_has_concrete_subtype(jl_value_t *typ)
JL_DLLEXPORT void jl_typeinf_timing_begin(void)
{
jl_task_t *ct = jl_current_task;
if (ct->reentrant_inference == 1) {
if (!ct->reentrant_timing++) {
ct->inference_start_time = jl_hrtime();
}
}

JL_DLLEXPORT void jl_typeinf_timing_end(void)
{
jl_task_t *ct = jl_current_task;
if (ct->reentrant_inference == 1) {
if (!--ct->reentrant_timing) {
if (jl_atomic_load_relaxed(&jl_measure_compile_time_enabled)) {
uint64_t inftime = jl_hrtime() - ct->inference_start_time;
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, inftime);
Expand All @@ -3562,16 +3562,10 @@ JL_DLLEXPORT void jl_typeinf_timing_end(void)
JL_DLLEXPORT void jl_typeinf_lock_begin(void)
{
JL_LOCK(&jl_codegen_lock);
//Although this is claiming to be a typeinfer lock, it is actually
//affecting the codegen lock count, not type inference's inferencing count
jl_task_t *ct = jl_current_task;
ct->reentrant_codegen++;
}

JL_DLLEXPORT void jl_typeinf_lock_end(void)
{
jl_task_t *ct = jl_current_task;
ct->reentrant_codegen--;
JL_UNLOCK(&jl_codegen_lock);
}

Expand Down
31 changes: 19 additions & 12 deletions src/jitlayers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ extern "C" JL_DLLEXPORT
int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void *sysimg, jl_value_t *declrt, jl_value_t *sigt)
{
auto ct = jl_current_task;
ct->reentrant_codegen++;
ct->reentrant_timing++;
uint64_t compiler_start_time = 0;
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
if (measure_compile_time_enabled)
Expand Down Expand Up @@ -340,8 +340,10 @@ int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void *
jl_ExecutionEngine->addModule(std::move(*into));
}
JL_UNLOCK(&jl_codegen_lock);
if (!--ct->reentrant_codegen && measure_compile_time_enabled)
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
if (!--ct->reentrant_timing && measure_compile_time_enabled) {
auto end = jl_hrtime();
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
}
if (ctx.getContext()) {
jl_ExecutionEngine->releaseContext(std::move(ctx));
}
Expand Down Expand Up @@ -396,7 +398,7 @@ extern "C" JL_DLLEXPORT
jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world)
{
auto ct = jl_current_task;
ct->reentrant_codegen++;
ct->reentrant_timing++;
uint64_t compiler_start_time = 0;
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
bool is_recompile = false;
Expand Down Expand Up @@ -449,10 +451,11 @@ jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES
codeinst = NULL;
}
JL_UNLOCK(&jl_codegen_lock);
if (!--ct->reentrant_codegen && measure_compile_time_enabled) {
if (!--ct->reentrant_timing && measure_compile_time_enabled) {
uint64_t t_comp = jl_hrtime() - compiler_start_time;
if (is_recompile)
if (is_recompile) {
jl_atomic_fetch_add_relaxed(&jl_cumulative_recompile_time, t_comp);
}
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, t_comp);
}
JL_GC_POP();
Expand All @@ -466,7 +469,7 @@ void jl_generate_fptr_for_unspecialized_impl(jl_code_instance_t *unspec)
return;
}
auto ct = jl_current_task;
ct->reentrant_codegen++;
ct->reentrant_timing++;
uint64_t compiler_start_time = 0;
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
if (measure_compile_time_enabled)
Expand Down Expand Up @@ -500,8 +503,10 @@ void jl_generate_fptr_for_unspecialized_impl(jl_code_instance_t *unspec)
JL_GC_POP();
}
JL_UNLOCK(&jl_codegen_lock); // Might GC
if (!--ct->reentrant_codegen && measure_compile_time_enabled)
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
if (!--ct->reentrant_timing && measure_compile_time_enabled) {
auto end = jl_hrtime();
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
}
}


Expand All @@ -522,7 +527,7 @@ jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world,
// (using sentinel value `1` instead)
// so create an exception here so we can print pretty our lies
auto ct = jl_current_task;
ct->reentrant_codegen++;
ct->reentrant_timing++;
uint64_t compiler_start_time = 0;
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
if (measure_compile_time_enabled)
Expand Down Expand Up @@ -552,8 +557,10 @@ jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world,
JL_GC_POP();
}
JL_UNLOCK(&jl_codegen_lock);
if (!--ct->reentrant_codegen && measure_compile_time_enabled)
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
if (!--ct->reentrant_timing && measure_compile_time_enabled) {
auto end = jl_hrtime();
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
}
}
if (specfptr != 0)
return jl_dump_fptr_asm(specfptr, raw_mc, asm_variant, debuginfo, binary);
Expand Down
2 changes: 1 addition & 1 deletion src/julia.h
Original file line number Diff line number Diff line change
Expand Up @@ -1938,7 +1938,7 @@ typedef struct _jl_task_t {
size_t bufsz; // actual sizeof stkbuf
uint64_t inference_start_time; // time when inference started
uint16_t reentrant_inference; // How many times we've reentered inference
uint16_t reentrant_codegen; // How many times we've reentered codegen
uint16_t reentrant_timing; // How many times we've reentered timing
unsigned int copy_stack:31; // sizeof stack for copybuf
unsigned int started:1;
} jl_task_t;
Expand Down
4 changes: 2 additions & 2 deletions src/task.c
Original file line number Diff line number Diff line change
Expand Up @@ -938,7 +938,7 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion
t->threadpoolid = ct->threadpoolid;
t->ptls = NULL;
t->world_age = ct->world_age;
t->reentrant_codegen = 0;
t->reentrant_timing = 0;
t->reentrant_inference = 0;
t->inference_start_time = 0;

Expand Down Expand Up @@ -1526,7 +1526,7 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
ct->sticky = 1;
ct->ptls = ptls;
ct->world_age = 1; // OK to run Julia code on this task
ct->reentrant_codegen = 0;
ct->reentrant_timing = 0;
ct->reentrant_inference = 0;
ct->inference_start_time = 0;
ptls->root_task = ct;
Expand Down
57 changes: 57 additions & 0 deletions test/misc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -353,13 +353,49 @@ end

after_comp, after_recomp = Base.cumulative_compile_time_ns() # no need to turn timing off, @time will do that
@test after_comp >= before_comp;
@test after_recomp >= before_recomp;
@test after_recomp - before_recomp <= after_comp - before_comp;

# should be approximately 60,000,000 ns, we definitely shouldn't exceed 100x that value
# failing this probably means an uninitialized variable somewhere
@test after_comp - before_comp < 6_000_000_000;

end # redirect_stdout

# issue #48024, avoid overcounting timers
begin
double(x::Real) = 2x;
calldouble(container) = double(container[1]);
calldouble2(container) = calldouble(container);

Base.Experimental.@force_compile;
local elapsed = Base.time_ns();
Base.cumulative_compile_timing(true);
local compiles = Base.cumulative_compile_time_ns();
@eval calldouble([1.0]);
Base.cumulative_compile_timing(false);
compiles = Base.cumulative_compile_time_ns() .- compiles;
elapsed = Base.time_ns() - elapsed;

# compile time should be at most total time
@test compiles[1] <= elapsed
# recompile time should be at most compile time
@test compiles[2] <= compiles[1]

elapsed = Base.time_ns();
Base.cumulative_compile_timing(true);
compiles = Base.cumulative_compile_time_ns();
@eval calldouble(1.0);
Base.cumulative_compile_timing(false);
compiles = Base.cumulative_compile_time_ns() .- compiles;
elapsed = Base.time_ns() - elapsed;

# compile time should be at most total time
@test compiles[1] <= elapsed
# recompile time should be at most compile time
@test compiles[2] <= compiles[1]
end

macro capture_stdout(ex)
quote
mktemp() do fname, f
Expand All @@ -372,6 +408,27 @@ macro capture_stdout(ex)
end
end

# issue #48024, but with the time macro itself
begin
double(x::Real) = 2x;
calldouble(container) = double(container[1]);
calldouble2(container) = calldouble(container);

local first = @capture_stdout @time @eval calldouble([1.0])
local second = @capture_stdout @time @eval calldouble2(1.0)

# these functions were not recompiled
local matches = collect(eachmatch(r"(\d+(?:\.\d+)?)%", first))
@test length(matches) == 1
@test parse(Float64, matches[1][1]) > 0.0
@test parse(Float64, matches[1][1]) <= 100.0

matches = collect(eachmatch(r"(\d+(?:\.\d+)?)%", second))
@test length(matches) == 1
@test parse(Float64, matches[1][1]) > 0.0
@test parse(Float64, matches[1][1]) <= 100.0
end

# compilation reports in @time, @timev
let f = gensym("f"), callf = gensym("callf"), call2f = gensym("call2f")
@eval begin
Expand Down