Skip to content

Commit

Permalink
Report any recompilation in time macro (#45015)
Browse files Browse the repository at this point in the history
`@time` now shows if any of the compilation time was spent recompiling invalidated methods. The new percentage is % of the compilation time, not the total execution time.

(cherry picked from commit 7074f04)
  • Loading branch information
IanButterworth authored and KristofferC committed Jul 4, 2022
1 parent f83a9b3 commit b97d34a
Show file tree
Hide file tree
Showing 7 changed files with 75 additions and 33 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ New library features
* TCP socket objects now expose `closewrite` functionality and support half-open mode usage ([#40783]).
* `extrema` now accepts an `init` keyword argument ([#36265], [#43604]).
* `Iterators.countfrom` now accepts any type that defines `+` ([#37747]).
* `@time` now separates out % time spent recompiling invalidated methods ([#45015]).
* `@time_imports` now shows any compilation and recompilation time percentages per import ([#45064]).

Standard library changes
------------------------
Expand Down
50 changes: 36 additions & 14 deletions base/timing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,21 @@ function gc_alloc_count(diff::GC_Diff)
diff.malloc + diff.realloc + diff.poolalloc + diff.bigalloc
end

# cumulative total time spent on compilation, in nanoseconds
cumulative_compile_time_ns_before() = ccall(:jl_cumulative_compile_time_ns_before, UInt64, ())
cumulative_compile_time_ns_after() = ccall(:jl_cumulative_compile_time_ns_after, UInt64, ())
# cumulative total time spent on compilation and recompilation, in nanoseconds
function cumulative_compile_time_ns()
comp = ccall(:jl_cumulative_compile_time_ns, UInt64, ())
recomp = ccall(:jl_cumulative_recompile_time_ns, UInt64, ())
return comp, recomp
end

function cumulative_compile_timing(b::Bool)
if b
ccall(:jl_cumulative_compile_timing_enable, Cvoid, ())
else
ccall(:jl_cumulative_compile_timing_disable, Cvoid, ())
end
return
end

# total time spend in garbage collection, in nanoseconds
gc_time_ns() = ccall(:jl_gc_total_hrtime, UInt64, ())
Expand Down Expand Up @@ -114,7 +126,7 @@ function format_bytes(bytes) # also used by InteractiveUtils
end
end

function time_print(elapsedtime, bytes=0, gctime=0, allocs=0, compile_time=0, newline=false, _lpad=true)
function time_print(elapsedtime, bytes=0, gctime=0, allocs=0, compile_time=0, recompile_time=0, newline=false, _lpad=true)
timestr = Ryu.writefixed(Float64(elapsedtime/1e9), 6)
str = sprint() do io
_lpad && print(io, length(timestr) < 10 ? (" "^(10 - length(timestr))) : "")
Expand Down Expand Up @@ -142,15 +154,20 @@ function time_print(elapsedtime, bytes=0, gctime=0, allocs=0, compile_time=0, ne
end
print(io, Ryu.writefixed(Float64(100*compile_time/elapsedtime), 2), "% compilation time")
end
if recompile_time > 0
print(io, ": ", Ryu.writefixed(Float64(100*recompile_time/compile_time), 0), "% of which was recompilation")
end
parens && print(io, ")")
end
newline ? println(str) : print(str)
nothing
end

function timev_print(elapsedtime, diff::GC_Diff, compile_time, _lpad)
function timev_print(elapsedtime, diff::GC_Diff, compile_times, _lpad)
allocs = gc_alloc_count(diff)
time_print(elapsedtime, diff.allocd, diff.total_time, allocs, compile_time, true, _lpad)
compile_time = first(compile_times)
recompile_time = last(compile_times)
time_print(elapsedtime, diff.allocd, diff.total_time, allocs, compile_time, recompile_time, true, _lpad)
padded_nonzero_print(elapsedtime, "elapsed time (ns)")
padded_nonzero_print(diff.total_time, "gc time (ns)")
padded_nonzero_print(diff.allocd, "bytes allocated")
Expand Down Expand Up @@ -181,8 +198,8 @@ end
A macro to execute an expression, printing the time it took to execute, the number of
allocations, and the total number of bytes its execution caused to be allocated, before
returning the value of the expression. Any time spent garbage collecting (gc) or
compiling is shown as a percentage.
returning the value of the expression. Any time spent garbage collecting (gc), compiling
new code, or recompiling invalidated code is shown as a percentage.
Optionally provide a description string to print before the time report.
Expand All @@ -201,6 +218,9 @@ See also [`@showtime`](@ref), [`@timev`](@ref), [`@timed`](@ref), [`@elapsed`](@
!!! compat "Julia 1.8"
The option to add a description was introduced in Julia 1.8.
!!! compat "Julia 1.9"
Recompilation time being shown separately from compilation time was introduced in Julia 1.9
```julia-repl
julia> x = rand(10,10);
Expand Down Expand Up @@ -238,16 +258,18 @@ macro time(msg, ex)
Experimental.@force_compile
local stats = gc_num()
local elapsedtime = time_ns()
local compile_elapsedtime = cumulative_compile_time_ns_before()
cumulative_compile_timing(true)
local compile_elapsedtimes = cumulative_compile_time_ns()
local val = @__tryfinally($(esc(ex)),
(elapsedtime = time_ns() - elapsedtime;
compile_elapsedtime = cumulative_compile_time_ns_after() - compile_elapsedtime)
cumulative_compile_timing(false);
compile_elapsedtimes = cumulative_compile_time_ns() .- compile_elapsedtimes)
)
local diff = GC_Diff(gc_num(), stats)
local _msg = $(esc(msg))
local has_msg = !isnothing(_msg)
has_msg && print(_msg, ": ")
time_print(elapsedtime, diff.allocd, diff.total_time, gc_alloc_count(diff), compile_elapsedtime, true, !has_msg)
time_print(elapsedtime, diff.allocd, diff.total_time, gc_alloc_count(diff), first(compile_elapsedtimes), last(compile_elapsedtimes), true, !has_msg)
val
end
end
Expand Down Expand Up @@ -320,16 +342,16 @@ macro timev(msg, ex)
Experimental.@force_compile
local stats = gc_num()
local elapsedtime = time_ns()
local compile_elapsedtime = cumulative_compile_time_ns_before()
local compile_elapsedtimes = cumulative_compile_time_ns()
local val = @__tryfinally($(esc(ex)),
(elapsedtime = time_ns() - elapsedtime;
compile_elapsedtime = cumulative_compile_time_ns_after() - compile_elapsedtime)
compile_elapsedtimes = cumulative_compile_time_ns() .- compile_elapsedtimes)
)
local diff = GC_Diff(gc_num(), stats)
local _msg = $(esc(msg))
local has_msg = !isnothing(_msg)
has_msg && print(_msg, ": ")
timev_print(elapsedtime, diff, compile_elapsedtime, !has_msg)
timev_print(elapsedtime, diff, compile_elapsedtimes, !has_msg)
val
end
end
Expand Down
13 changes: 11 additions & 2 deletions src/jitlayers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,7 @@ jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES
JL_LOCK(&jl_codegen_lock); // also disables finalizers, to prevent any unexpected recursion
uint64_t compiler_start_time = 0;
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
bool is_recompile = false;
if (measure_compile_time_enabled)
compiler_start_time = jl_hrtime();
// if we don't have any decls already, try to generate it now
Expand All @@ -305,6 +306,10 @@ jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES
else if (jl_is_method(mi->def.method))
src = jl_uncompress_ir(mi->def.method, codeinst, (jl_array_t*)src);
}
else {
// identify whether this is an invalidated method that is being recompiled
is_recompile = jl_atomic_load_relaxed(&mi->cache) != NULL;
}
if (src == NULL && jl_is_method(mi->def.method) &&
jl_symbol_name(mi->def.method->name)[0] != '@') {
if (mi->def.method->source != jl_nothing) {
Expand All @@ -331,8 +336,12 @@ jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES
else {
codeinst = NULL;
}
if (jl_codegen_lock.count == 1 && measure_compile_time_enabled)
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
if (jl_codegen_lock.count == 1 && measure_compile_time_enabled) {
uint64_t t_comp = jl_hrtime() - compiler_start_time;
if (is_recompile)
jl_atomic_fetch_add_relaxed(&jl_cumulative_recompile_time, t_comp);
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, t_comp);
}
JL_UNLOCK(&jl_codegen_lock);
JL_GC_POP();
return codeinst;
Expand Down
14 changes: 11 additions & 3 deletions src/jlapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -480,20 +480,28 @@ JL_DLLEXPORT void (jl_cpu_wake)(void)
jl_cpu_wake();
}

JL_DLLEXPORT uint64_t jl_cumulative_compile_time_ns_before(void)
JL_DLLEXPORT void jl_cumulative_compile_timing_enable(void)
{
// Increment the flag to allow reentrant callers to `@time`.
jl_atomic_fetch_add(&jl_measure_compile_time_enabled, 1);
return jl_atomic_load_relaxed(&jl_cumulative_compile_time);
}

JL_DLLEXPORT uint64_t jl_cumulative_compile_time_ns_after(void)
JL_DLLEXPORT void jl_cumulative_compile_timing_disable(void)
{
// Decrement the flag when done measuring, allowing other callers to continue measuring.
jl_atomic_fetch_add(&jl_measure_compile_time_enabled, -1);
}

JL_DLLEXPORT uint64_t jl_cumulative_compile_time_ns(void)
{
return jl_atomic_load_relaxed(&jl_cumulative_compile_time);
}

JL_DLLEXPORT uint64_t jl_cumulative_recompile_time_ns(void)
{
return jl_atomic_load_relaxed(&jl_cumulative_recompile_time);
}

JL_DLLEXPORT void jl_get_fenv_consts(int *ret)
{
ret[0] = FE_INEXACT;
Expand Down
1 change: 1 addition & 0 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ static inline uint64_t cycleclock(void) JL_NOTSAFEPOINT
// Global *atomic* integers controlling *process-wide* measurement of compilation time.
extern JL_DLLEXPORT _Atomic(uint8_t) jl_measure_compile_time_enabled;
extern JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_compile_time;
extern JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_recompile_time;

#define jl_return_address() ((uintptr_t)__builtin_return_address(0))

Expand Down
1 change: 1 addition & 0 deletions src/threading.c
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,7 @@ void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t *k)
jl_ptls_t *jl_all_tls_states JL_GLOBALLY_ROOTED;
JL_DLLEXPORT _Atomic(uint8_t) jl_measure_compile_time_enabled = 0;
JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_compile_time = 0;
JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_recompile_time = 0;

// return calling thread's ID
// Also update the suspended_threads list in signals-mach when changing the
Expand Down
27 changes: 13 additions & 14 deletions test/misc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -334,24 +334,23 @@ function timev_macro_scope()
end
@test timev_macro_scope() == 1

before = Base.cumulative_compile_time_ns_before();
before_comp, before_recomp = Base.cumulative_compile_time_ns() # no need to turn timing on, @time will do that

# exercise concurrent calls to `@time` for reentrant compilation time measurement.
t1 = @async @time begin
sleep(2)
@eval module M ; f(x,y) = x+y ; end
@eval M.f(2,3)
end
t2 = @async begin
sleep(1)
@time 2 + 2
@sync begin
t1 = @async @time begin
sleep(2)
@eval module M ; f(x,y) = x+y ; end
@eval M.f(2,3)
end
t2 = @async begin
sleep(1)
@time 2 + 2
end
end

after = Base.cumulative_compile_time_ns_after();
@test after >= before;

# wait for completion of these tasks before restoring stdout, to suppress their @time prints.
wait(t1); wait(t2)
after_comp, after_recomp = Base.cumulative_compile_time_ns() # no need to turn timing off, @time will do that
@test after_comp >= before_comp;

end # redirect_stdout

Expand Down

0 comments on commit b97d34a

Please sign in to comment.