Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Show any compile time spent in time and timev macros #37678

Merged
merged 11 commits into from
Sep 30, 2020
55 changes: 41 additions & 14 deletions base/timing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ function gc_alloc_count(diff::GC_Diff)
diff.malloc + diff.realloc + diff.poolalloc + diff.bigalloc
end

# cumulative total time spent on compilation
cumulative_compile_time_ns() = ccall(:jl_cumulative_compile_time_ns, UInt64, ())

# total time spend in garbage collection, in nanoseconds
gc_time_ns() = ccall(:jl_gc_total_hrtime, UInt64, ())
Expand Down Expand Up @@ -102,7 +104,7 @@ function format_bytes(bytes) # also used by InteractiveUtils
end
end

function time_print(elapsedtime, bytes=0, gctime=0, allocs=0)
function time_print(elapsedtime, bytes=0, gctime=0, allocs=0, compile_time=0)
timestr = Ryu.writefixed(Float64(elapsedtime/1e9), 6)
length(timestr) < 10 && print(" "^(10 - length(timestr)))
print(timestr, " seconds")
Expand All @@ -118,15 +120,18 @@ function time_print(elapsedtime, bytes=0, gctime=0, allocs=0)
if gctime > 0
print(", ", Ryu.writefixed(Float64(100*gctime/elapsedtime), 2), "% gc time")
end
if compile_time > 0
print(", ", Ryu.writefixed(Float64(100*compile_time/elapsedtime), 2), "% compilation time")
end
if bytes != 0 || allocs != 0
print(")")
end
nothing
end

function timev_print(elapsedtime, diff::GC_Diff)
function timev_print(elapsedtime, diff::GC_Diff, compile_time)
allocs = gc_alloc_count(diff)
time_print(elapsedtime, diff.allocd, diff.total_time, allocs)
time_print(elapsedtime, diff.allocd, diff.total_time, allocs, compile_time)
print("\nelapsed time (ns): $elapsedtime\n")
padded_nonzero_print(diff.total_time, "gc time (ns)")
padded_nonzero_print(diff.allocd, "bytes allocated")
Expand All @@ -144,7 +149,8 @@ end

A macro to execute an expression, printing the time it took to execute, the number of
allocations, and the total number of bytes its execution caused to be allocated, before
returning the value of the expression.
returning the value of the expression. Any time spent garbage collecting (gc) or
compiling is shown as a percentage.

See also [`@timev`](@ref), [`@timed`](@ref), [`@elapsed`](@ref), and
[`@allocated`](@ref).
Expand All @@ -155,8 +161,13 @@ See also [`@timev`](@ref), [`@timed`](@ref), [`@elapsed`](@ref), and
reduce noise.

```julia-repl
julia> @time rand(10^6);
0.001525 seconds (7 allocations: 7.630 MiB)
julia> x = rand(10,10);

julia> @time x * x;
0.606588 seconds (2.19 M allocations: 116.555 MiB, 3.75% gc time, 99.94% compilation time)

julia> @time x * x;
0.000009 seconds (1 allocation: 896 bytes)

julia> @time begin
sleep(0.3)
Expand All @@ -170,12 +181,14 @@ macro time(ex)
quote
while false; end # compiler heuristic: compile this block (alter this if the heuristic changes)
local stats = gc_num()
local compile_elapsedtime = cumulative_compile_time_ns()
local elapsedtime = time_ns()
local val = $(esc(ex))
elapsedtime = time_ns() - elapsedtime
compile_elapsedtime = cumulative_compile_time_ns() - compile_elapsedtime
local diff = GC_Diff(gc_num(), stats)
time_print(elapsedtime, diff.allocd, diff.total_time,
gc_alloc_count(diff))
gc_alloc_count(diff), compile_elapsedtime)
println()
val
end
Expand All @@ -192,22 +205,36 @@ See also [`@time`](@ref), [`@timed`](@ref), [`@elapsed`](@ref), and
[`@allocated`](@ref).

```julia-repl
julia> @timev rand(10^6);
0.001006 seconds (7 allocations: 7.630 MiB)
elapsed time (ns): 1005567
bytes allocated: 8000256
pool allocs: 6
malloc() calls: 1
julia> x = rand(10,10);

julia> @timev x * x;
0.546770 seconds (2.20 M allocations: 116.632 MiB, 4.23% gc time, 99.94% compilation time)
elapsed time (ns): 546769547
gc time (ns): 23115606
bytes allocated: 122297811
pool allocs: 2197930
non-pool GC allocs:1327
malloc() calls: 36
realloc() calls: 5
GC pauses: 3

julia> @timev x * x;
0.000010 seconds (1 allocation: 896 bytes)
elapsed time (ns): 9848
bytes allocated: 896
pool allocs: 1
```
"""
macro timev(ex)
quote
while false; end # compiler heuristic: compile this block (alter this if the heuristic changes)
local stats = gc_num()
local compile_elapsedtime = cumulative_compile_time_ns()
local elapsedtime = time_ns()
local val = $(esc(ex))
elapsedtime = time_ns() - elapsedtime
timev_print(elapsedtime, GC_Diff(gc_num(), stats))
compile_elapsedtime = cumulative_compile_time_ns() - compile_elapsedtime
timev_print(elapsedtime, GC_Diff(gc_num(), stats), compile_elapsedtime)
val
end
end
Expand Down
8 changes: 4 additions & 4 deletions doc/src/manual/performance-tips.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ julia> function sum_global()
end;

julia> @time sum_global()
0.017705 seconds (15.28 k allocations: 694.484 KiB)
0.009639 seconds (7.36 k allocations: 300.310 KiB, 98.32% compilation time)
496.84883432553846

julia> @time sum_global()
Expand Down Expand Up @@ -106,15 +106,15 @@ julia> function sum_arg(x)
end;

julia> @time sum_arg(x)
0.007701 seconds (821 allocations: 43.059 KiB)
0.006202 seconds (4.18 k allocations: 217.860 KiB, 99.72% compilation time)
496.84883432553846

julia> @time sum_arg(x)
0.000006 seconds (5 allocations: 176 bytes)
0.000005 seconds (1 allocation: 16 bytes)
496.84883432553846
```

The 5 allocations seen are from running the `@time` macro itself in global scope. If we instead run
The 1 allocation seen is from running the `@time` macro itself in global scope. If we instead run
the timing in a function, we can see that indeed no allocations are performed:

```jldoctest sumarg; filter = r"[0-9\.]+ seconds"
Expand Down
17 changes: 17 additions & 0 deletions src/jitlayers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,13 @@ void jl_jit_globals(std::map<void *, GlobalVariable*> &globals)
}
}

static uint64_t cumulative_compile_time = 0;

extern "C" JL_DLLEXPORT
uint64_t jl_cumulative_compile_time_ns() {
return cumulative_compile_time;
}

// this generates llvm code for the lambda info
// and adds the result to the jitlayers
// (and the shadow module),
Expand Down Expand Up @@ -210,6 +217,7 @@ extern "C" JL_DLLEXPORT
void jl_compile_extern_c(void *llvmmod, void *p, void *sysimg, jl_value_t *declrt, jl_value_t *sigt)
{
JL_LOCK(&codegen_lock);
uint64_t compiler_start_time = jl_hrtime();
jl_codegen_params_t params;
jl_codegen_params_t *pparams = (jl_codegen_params_t*)p;
if (pparams == NULL)
Expand All @@ -228,6 +236,7 @@ void jl_compile_extern_c(void *llvmmod, void *p, void *sysimg, jl_value_t *declr
if (llvmmod == NULL)
jl_add_to_ee(std::unique_ptr<Module>(into));
}
cumulative_compile_time += (jl_hrtime() - compiler_start_time);
JL_UNLOCK(&codegen_lock);
}

Expand All @@ -251,8 +260,10 @@ void jl_extern_c(jl_value_t *declrt, jl_tupletype_t *sigt)
if (!jl_is_concrete_type(declrt) || jl_is_kind(declrt))
jl_error("@ccallable: return type must be concrete and correspond to a C type");
JL_LOCK(&codegen_lock);
uint64_t compiler_start_time = jl_hrtime();
if (!jl_type_mappable_to_c(declrt))
jl_error("@ccallable: return type doesn't correspond to a C type");
cumulative_compile_time += (jl_hrtime() - compiler_start_time);
JL_UNLOCK(&codegen_lock);

// validate method signature
Expand Down Expand Up @@ -281,6 +292,7 @@ extern "C"
jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world)
{
JL_LOCK(&codegen_lock); // also disables finalizers, to prevent any unexpected recursion
uint64_t compiler_start_time = jl_hrtime();
// if we don't have any decls already, try to generate it now
jl_code_info_t *src = NULL;
JL_GC_PUSH1(&src);
Expand Down Expand Up @@ -317,6 +329,7 @@ jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT
else {
codeinst = NULL;
}
cumulative_compile_time += (jl_hrtime() - compiler_start_time);
JL_UNLOCK(&codegen_lock);
JL_GC_POP();
return codeinst;
Expand All @@ -329,6 +342,7 @@ void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec)
return;
}
JL_LOCK(&codegen_lock);
uint64_t compiler_start_time = jl_hrtime();
if (unspec->invoke == NULL) {
jl_code_info_t *src = NULL;
JL_GC_PUSH1(&src);
Expand All @@ -355,6 +369,7 @@ void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec)
}
JL_GC_POP();
}
cumulative_compile_time += (jl_hrtime() - compiler_start_time);
JL_UNLOCK(&codegen_lock); // Might GC
}

Expand All @@ -376,6 +391,7 @@ jl_value_t *jl_dump_method_asm(jl_method_instance_t *mi, size_t world,
// (using sentinel value `1` instead)
// so create an exception here so we can print pretty our lies
JL_LOCK(&codegen_lock); // also disables finalizers, to prevent any unexpected recursion
uint64_t compiler_start_time = jl_hrtime();
specfptr = (uintptr_t)codeinst->specptr.fptr;
if (specfptr == 0) {
jl_code_info_t *src = jl_type_infer(mi, world, 0);
Expand All @@ -399,6 +415,7 @@ jl_value_t *jl_dump_method_asm(jl_method_instance_t *mi, size_t world,
}
JL_GC_POP();
}
cumulative_compile_time += (jl_hrtime() - compiler_start_time);
JL_UNLOCK(&codegen_lock);
}
if (specfptr != 0)
Expand Down