Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Report any recompilation in time macro #45015

Merged
merged 10 commits into from
Apr 20, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 24 additions & 11 deletions base/timing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,16 @@ function gc_alloc_count(diff::GC_Diff)
end

# cumulative total time spent on compilation, in nanoseconds
cumulative_compile_time_ns_before() = ccall(:jl_cumulative_compile_time_ns_before, UInt64, ())
cumulative_compile_time_ns_after() = ccall(:jl_cumulative_compile_time_ns_after, UInt64, ())
function cumulative_compile_time_ns_before()
comp = ccall(:jl_cumulative_compile_time_ns_before, UInt64, ())
recomp = ccall(:jl_cumulative_recompile_time_ns_before, UInt64, ())
return comp, recomp
end
function cumulative_compile_time_ns_after()
comp = ccall(:jl_cumulative_compile_time_ns_after, UInt64, ())
recomp = ccall(:jl_cumulative_recompile_time_ns_before, UInt64, ())
IanButterworth marked this conversation as resolved.
Show resolved Hide resolved
return comp, recomp
end

# total time spend in garbage collection, in nanoseconds
gc_time_ns() = ccall(:jl_gc_total_hrtime, UInt64, ())
Expand Down Expand Up @@ -114,7 +122,7 @@ function format_bytes(bytes) # also used by InteractiveUtils
end
end

function time_print(elapsedtime, bytes=0, gctime=0, allocs=0, compile_time=0, newline=false, _lpad=true)
function time_print(elapsedtime, bytes=0, gctime=0, allocs=0, compile_time=0, recompile_time=0, newline=false, _lpad=true)
timestr = Ryu.writefixed(Float64(elapsedtime/1e9), 6)
str = sprint() do io
_lpad && print(io, length(timestr) < 10 ? (" "^(10 - length(timestr))) : "")
Expand Down Expand Up @@ -142,15 +150,20 @@ function time_print(elapsedtime, bytes=0, gctime=0, allocs=0, compile_time=0, ne
end
print(io, Ryu.writefixed(Float64(100*compile_time/elapsedtime), 2), "% compilation time")
end
if recompile_time > 0
print(io, " of which ", Ryu.writefixed(Float64(100*recompile_time/compile_time), 0), "% was recompilation")
end
parens && print(io, ")")
end
newline ? println(str) : print(str)
nothing
end

function timev_print(elapsedtime, diff::GC_Diff, compile_time, _lpad)
function timev_print(elapsedtime, diff::GC_Diff, compile_times, _lpad)
allocs = gc_alloc_count(diff)
time_print(elapsedtime, diff.allocd, diff.total_time, allocs, compile_time, true, _lpad)
compile_time = first(compile_times)
recompile_time = last(compile_times)
time_print(elapsedtime, diff.allocd, diff.total_time, allocs, compile_time, recompile_time, true, _lpad)
padded_nonzero_print(elapsedtime, "elapsed time (ns)")
padded_nonzero_print(diff.total_time, "gc time (ns)")
padded_nonzero_print(diff.allocd, "bytes allocated")
Expand Down Expand Up @@ -238,16 +251,16 @@ macro time(msg, ex)
Experimental.@force_compile
local stats = gc_num()
local elapsedtime = time_ns()
local compile_elapsedtime = cumulative_compile_time_ns_before()
local compile_elapsedtimes = cumulative_compile_time_ns_before()
local val = @__tryfinally($(esc(ex)),
(elapsedtime = time_ns() - elapsedtime;
compile_elapsedtime = cumulative_compile_time_ns_after() - compile_elapsedtime)
compile_elapsedtimes = cumulative_compile_time_ns_after() .- compile_elapsedtimes)
)
local diff = GC_Diff(gc_num(), stats)
local _msg = $(esc(msg))
local has_msg = !isnothing(_msg)
has_msg && print(_msg, ": ")
time_print(elapsedtime, diff.allocd, diff.total_time, gc_alloc_count(diff), compile_elapsedtime, true, !has_msg)
time_print(elapsedtime, diff.allocd, diff.total_time, gc_alloc_count(diff), first(compile_elapsedtimes), last(compile_elapsedtimes), true, !has_msg)
val
end
end
Expand Down Expand Up @@ -320,16 +333,16 @@ macro timev(msg, ex)
Experimental.@force_compile
local stats = gc_num()
local elapsedtime = time_ns()
local compile_elapsedtime = cumulative_compile_time_ns_before()
local compile_elapsedtimes = cumulative_compile_time_ns_before()
local val = @__tryfinally($(esc(ex)),
(elapsedtime = time_ns() - elapsedtime;
compile_elapsedtime = cumulative_compile_time_ns_after() - compile_elapsedtime)
compile_elapsedtimes = cumulative_compile_time_ns_after() .- compile_elapsedtimes)
)
local diff = GC_Diff(gc_num(), stats)
local _msg = $(esc(msg))
local has_msg = !isnothing(_msg)
has_msg && print(_msg, ": ")
timev_print(elapsedtime, diff, compile_elapsedtime, !has_msg)
timev_print(elapsedtime, diff, compile_elapsedtimes, !has_msg)
val
end
end
Expand Down
13 changes: 11 additions & 2 deletions src/jitlayers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,7 @@ jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES
auto &context = *ctx;
uint64_t compiler_start_time = 0;
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
bool is_recompile = false;
if (measure_compile_time_enabled)
compiler_start_time = jl_hrtime();
// if we don't have any decls already, try to generate it now
Expand All @@ -330,6 +331,10 @@ jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES
else if (jl_is_method(mi->def.method))
src = jl_uncompress_ir(mi->def.method, codeinst, (jl_array_t*)src);
}
else {
// identify whether this is an invalidated method that is being recompiled
is_recompile = jl_atomic_load_relaxed(&mi->cache) != NULL;
}
if (src == NULL && jl_is_method(mi->def.method) &&
jl_symbol_name(mi->def.method->name)[0] != '@') {
if (mi->def.method->source != jl_nothing) {
Expand All @@ -356,8 +361,12 @@ jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES
else {
codeinst = NULL;
}
if (jl_codegen_lock.count == 1 && measure_compile_time_enabled)
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
if (jl_codegen_lock.count == 1 && measure_compile_time_enabled) {
uint64_t t_comp = jl_hrtime() - compiler_start_time;
if (is_recompile)
jl_atomic_fetch_add_relaxed(&jl_cumulative_recompile_time, t_comp);
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, t_comp);
}
JL_UNLOCK(&jl_codegen_lock);
JL_GC_POP();
return codeinst;
Expand Down
10 changes: 10 additions & 0 deletions src/jlapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,16 @@ JL_DLLEXPORT uint64_t jl_cumulative_compile_time_ns_after(void)
return jl_atomic_load_relaxed(&jl_cumulative_compile_time);
}

JL_DLLEXPORT uint64_t jl_cumulative_recompile_time_ns_before(void)
{
return jl_atomic_load_relaxed(&jl_cumulative_recompile_time);
}

JL_DLLEXPORT uint64_t jl_cumulative_recompile_time_ns_after(void)
{
return jl_atomic_load_relaxed(&jl_cumulative_recompile_time);
}

JL_DLLEXPORT void jl_get_fenv_consts(int *ret)
{
ret[0] = FE_INEXACT;
Expand Down
1 change: 1 addition & 0 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ static inline uint64_t cycleclock(void) JL_NOTSAFEPOINT
// Global *atomic* integers controlling *process-wide* measurement of compilation time.
extern JL_DLLEXPORT _Atomic(uint8_t) jl_measure_compile_time_enabled;
extern JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_compile_time;
extern JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_recompile_time;

#define jl_return_address() ((uintptr_t)__builtin_return_address(0))

Expand Down
1 change: 1 addition & 0 deletions src/threading.c
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,7 @@ void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t *k)
jl_ptls_t *jl_all_tls_states JL_GLOBALLY_ROOTED;
JL_DLLEXPORT _Atomic(uint8_t) jl_measure_compile_time_enabled = 0;
JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_compile_time = 0;
JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_recompile_time = 0;

// return calling thread's ID
// Also update the suspended_threads list in signals-mach when changing the
Expand Down
6 changes: 3 additions & 3 deletions test/misc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ function timev_macro_scope()
end
@test timev_macro_scope() == 1

before = Base.cumulative_compile_time_ns_before();
before_comp, before_recomp = Base.cumulative_compile_time_ns_before()

# exercise concurrent calls to `@time` for reentrant compilation time measurement.
t1 = @async @time begin
Expand All @@ -347,8 +347,8 @@ t2 = @async begin
@time 2 + 2
end

after = Base.cumulative_compile_time_ns_after();
@test after >= before;
after_comp, after_recomp = Base.cumulative_compile_time_ns_after()
@test after_comp >= before_comp;

# wait for completion of these tasks before restoring stdout, to suppress their @time prints.
wait(t1); wait(t2)
Expand Down