From 7df7be9dbe6a60effd81372e52c751cecb7f1937 Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Wed, 19 Jan 2022 15:51:00 -0500 Subject: [PATCH] Allocation profiler (#42768) ## Overview Record the type and stack of every allocation (or only at a given sample interval), and return as Julia objects. Alternate approach to existing alloc profiler PR: https://github.com/JuliaLang/julia/pull/33467 Complementary to garbage profiler PR: https://github.com/JuliaLang/julia/pull/42658 (maybe there's some nice way to meld them) This may be reinventing the wheel from #33467, but I'm not sure why that one needs stuff like LLVM passes. I mimicked some stuff from it, but this was my attempt to get something up and running. Could easily be missing stuff. ## Usage: ```julia using Profile.Allocs res = Allocs.@profile sample_rate=0.001 my_func() prof = Allocs.fetch() # do something with `prof` ``` See also: https://github.com/JuliaPerf/PProf.jl/pull/46 for support for visualizing these. Co-authored-by: Nathan Daly --- NEWS.md | 3 + doc/src/manual/profile.md | 25 ++++ src/Makefile | 4 +- src/array.c | 1 + src/gc-alloc-profiler.cpp | 139 +++++++++++++++++++ src/gc-alloc-profiler.h | 49 +++++++ src/gc.h | 1 + src/julia_internal.h | 2 + stdlib/Profile/docs/src/index.md | 17 +++ stdlib/Profile/src/Allocs.jl | 230 +++++++++++++++++++++++++++++++ stdlib/Profile/src/Profile.jl | 2 + stdlib/Profile/test/allocs.jl | 147 ++++++++++++++++++++ stdlib/Profile/test/runtests.jl | 2 + 13 files changed, 620 insertions(+), 2 deletions(-) create mode 100644 src/gc-alloc-profiler.cpp create mode 100644 src/gc-alloc-profiler.h create mode 100644 stdlib/Profile/src/Allocs.jl create mode 100644 stdlib/Profile/test/allocs.jl diff --git a/NEWS.md b/NEWS.md index 98baf83ad1c1e9..a0aea442048ba8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -139,6 +139,9 @@ Standard library changes Further, percent utilization is now reported as a total or per-thread, based on whether the thread is idle or not at each sample. `Profile.fetch()` by default strips out the new metadata to ensure backwards compatibility with external profiling data consumers, but can be included with the `include_meta` kwarg. ([#41742]) +* The new `Profile.Allocs` module allows memory allocations to be profiled. The stack trace, type, and size of each + allocation is recorded, and a `sample_rate` argument allows a tunable amount of allocations to be skipped, + reducing performance overhead. ([#42768]) #### Random diff --git a/doc/src/manual/profile.md b/doc/src/manual/profile.md index 5596ebae512aa1..91b3c34a4fb20e 100644 --- a/doc/src/manual/profile.md +++ b/doc/src/manual/profile.md @@ -336,6 +336,31 @@ and how much garbage it collects each time. This can be enabled with [`GC.enable_logging(true)`](@ref), which causes Julia to log to stderr every time a garbage collection happens. +### Allocation Profiler + +The allocation profiler records the stack trace, type, and size of each +allocation while it is running. It can be invoked with +[`Profile.Allocs.@profile`](@ref). + +This information about the allocations is returned as an array of `Alloc` +objects, wrapped in an `AllocResults` object. The best way to visualize +these is currently with the [PProf.jl](https://github.com/JuliaPerf/PProf.jl) +library, which can visualize the call stacks which are making the most +allocations. + +The allocation profiler does have significant overhead, so a `sample_rate` +argument can be passed to speed it up by making it skip some allocations. +Passing `sample_rate=1.0` will make it record everything (which is slow); +`sample_rate=0.1` will record only 10% of the allocations (faster), etc. + +!!! note + + The current implementation of the Allocations Profiler _does not + capture all allocations._ You can read more about the missing allocations + and the plan to improve this, here: https://github.com/JuliaLang/julia/issues/43688. + Calling `Profile.Allocs.fetch()` will print a log line reporting the percentage + of missed allocations, so you can understand the accuracy of your profile. + ## External Profiling Currently Julia supports `Intel VTune`, `OProfile` and `perf` as external profiling tools. diff --git a/src/Makefile b/src/Makefile index 0f6f9abd5ee95b..b7235597fd08c5 100644 --- a/src/Makefile +++ b/src/Makefile @@ -45,7 +45,7 @@ RUNTIME_SRCS := \ jltypes gf typemap smallintset ast builtins module interpreter symbol \ dlload sys init task array dump staticdata toplevel jl_uv datatype \ simplevector runtime_intrinsics precompile \ - threading partr stackwalk gc gc-debug gc-pages gc-stacks method \ + threading partr stackwalk gc gc-debug gc-pages gc-stacks gc-alloc-profiler method \ jlapi signal-handling safepoint timing subtype \ crc32c APInt-C processor ircode opaque_closure codegen-stubs coverage SRCS := jloptions runtime_ccall rtutils @@ -289,7 +289,7 @@ $(BUILDDIR)/disasm.o $(BUILDDIR)/disasm.dbg.obj: $(SRCDIR)/debuginfo.h $(SRCDIR) $(BUILDDIR)/dump.o $(BUILDDIR)/dump.dbg.obj: $(addprefix $(SRCDIR)/,common_symbols1.inc common_symbols2.inc builtin_proto.h serialize.h) $(BUILDDIR)/gc-debug.o $(BUILDDIR)/gc-debug.dbg.obj: $(SRCDIR)/gc.h $(BUILDDIR)/gc-pages.o $(BUILDDIR)/gc-pages.dbg.obj: $(SRCDIR)/gc.h -$(BUILDDIR)/gc.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc.h +$(BUILDDIR)/gc.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-alloc-profiler.h $(BUILDDIR)/init.o $(BUILDDIR)/init.dbg.obj: $(SRCDIR)/builtin_proto.h $(BUILDDIR)/interpreter.o $(BUILDDIR)/interpreter.dbg.obj: $(SRCDIR)/builtin_proto.h $(BUILDDIR)/jitlayers.o $(BUILDDIR)/jitlayers.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/codegen_shared.h diff --git a/src/array.c b/src/array.c index 104052644428a8..d620278e34b141 100644 --- a/src/array.c +++ b/src/array.c @@ -508,6 +508,7 @@ JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len) s = jl_gc_big_alloc(ptls, allocsz); } jl_set_typeof(s, jl_string_type); + maybe_record_alloc_to_profile(s, len); *(size_t*)s = len; jl_string_data(s)[len] = 0; return s; diff --git a/src/gc-alloc-profiler.cpp b/src/gc-alloc-profiler.cpp new file mode 100644 index 00000000000000..d00b2117e2c041 --- /dev/null +++ b/src/gc-alloc-profiler.cpp @@ -0,0 +1,139 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + +#include "gc-alloc-profiler.h" + +#include "julia_internal.h" +#include "gc.h" + +#include +#include + +using std::string; +using std::vector; + +struct jl_raw_backtrace_t { + jl_bt_element_t *data; + size_t size; +}; + +struct jl_raw_alloc_t { + jl_datatype_t *type_address; + jl_raw_backtrace_t backtrace; + size_t size; +}; + +// == These structs define the global singleton profile buffer that will be used by +// callbacks to store profile results. == +struct jl_per_thread_alloc_profile_t { + vector allocs; +}; + +struct jl_alloc_profile_t { + double sample_rate; + + vector per_thread_profiles; +}; + +struct jl_combined_results { + vector combined_allocs; +}; + +// == Global variables manipulated by callbacks == + +jl_alloc_profile_t g_alloc_profile; +int g_alloc_profile_enabled = false; +jl_combined_results g_combined_results; // Will live forever. + +// === stack stuff === + +jl_raw_backtrace_t get_raw_backtrace() { + // A single large buffer to record backtraces onto + static jl_bt_element_t static_bt_data[JL_MAX_BT_SIZE]; + + size_t bt_size = rec_backtrace(static_bt_data, JL_MAX_BT_SIZE, 2); + + // Then we copy only the needed bytes out of the buffer into our profile. + size_t bt_bytes = bt_size * sizeof(jl_bt_element_t); + jl_bt_element_t *bt_data = (jl_bt_element_t*) malloc(bt_bytes); + memcpy(bt_data, static_bt_data, bt_bytes); + + return jl_raw_backtrace_t{ + bt_data, + bt_size + }; +} + +// == exported interface == + +extern "C" { // Needed since these functions doesn't take any arguments. + +JL_DLLEXPORT void jl_start_alloc_profile(double sample_rate) { + // We only need to do this once, the first time this is called. + while (g_alloc_profile.per_thread_profiles.size() < jl_n_threads) { + g_alloc_profile.per_thread_profiles.push_back(jl_per_thread_alloc_profile_t{}); + } + + g_alloc_profile.sample_rate = sample_rate; + g_alloc_profile_enabled = true; +} + +JL_DLLEXPORT jl_profile_allocs_raw_results_t jl_fetch_alloc_profile() { + // combine allocs + // TODO: interleave to preserve ordering + for (auto& profile : g_alloc_profile.per_thread_profiles) { + for (const auto& alloc : profile.allocs) { + g_combined_results.combined_allocs.push_back(alloc); + } + + profile.allocs.clear(); + } + + return jl_profile_allocs_raw_results_t{ + g_combined_results.combined_allocs.data(), + g_combined_results.combined_allocs.size(), + }; +} + +JL_DLLEXPORT void jl_stop_alloc_profile() { + g_alloc_profile_enabled = false; +} + +JL_DLLEXPORT void jl_free_alloc_profile() { + // Free any allocs that remain in the per-thread profiles, that haven't + // been combined yet (which happens in fetch_alloc_profiles()). + for (auto& profile : g_alloc_profile.per_thread_profiles) { + for (auto alloc : profile.allocs) { + free(alloc.backtrace.data); + } + profile.allocs.clear(); + } + + // Free the allocs that have been already combined into the combined results object. + for (auto alloc : g_combined_results.combined_allocs) { + free(alloc.backtrace.data); + } + + g_combined_results.combined_allocs.clear(); +} + +// == callback called into by the outside == + +void _maybe_record_alloc_to_profile(jl_value_t *val, size_t size) JL_NOTSAFEPOINT { + auto& global_profile = g_alloc_profile; + auto& profile = global_profile.per_thread_profiles[jl_threadid()]; + + auto sample_val = double(rand()) / double(RAND_MAX); + auto should_record = sample_val <= global_profile.sample_rate; + if (!should_record) { + return; + } + + auto type = (jl_datatype_t*)jl_typeof(val); + profile.allocs.emplace_back(jl_raw_alloc_t{ + type, + get_raw_backtrace(), + size + }); +} + +} // extern "C" diff --git a/src/gc-alloc-profiler.h b/src/gc-alloc-profiler.h new file mode 100644 index 00000000000000..3509b77daa1fcf --- /dev/null +++ b/src/gc-alloc-profiler.h @@ -0,0 +1,49 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + +#ifndef JL_GC_ALLOC_PROFILER_H +#define JL_GC_ALLOC_PROFILER_H + +#include "julia.h" +#include "ios.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// --------------------------------------------------------------------- +// The public interface to call from Julia for allocations profiling +// --------------------------------------------------------------------- + +// Forward-declaration to avoid depenency in header file. +struct jl_raw_alloc_t; // Defined in gc-alloc-profiler.cpp + +typedef struct { + struct jl_raw_alloc_t *allocs; + size_t num_allocs; +} jl_profile_allocs_raw_results_t; + +JL_DLLEXPORT void jl_start_alloc_profile(double sample_rate); +JL_DLLEXPORT jl_profile_allocs_raw_results_t jl_fetch_alloc_profile(void); +JL_DLLEXPORT void jl_stop_alloc_profile(void); +JL_DLLEXPORT void jl_free_alloc_profile(void); + +// --------------------------------------------------------------------- +// Functions to call from GC when alloc profiling is enabled +// --------------------------------------------------------------------- + +void _maybe_record_alloc_to_profile(jl_value_t *val, size_t size) JL_NOTSAFEPOINT; + +extern int g_alloc_profile_enabled; + +static inline void maybe_record_alloc_to_profile(jl_value_t *val, size_t size) JL_NOTSAFEPOINT { + if (__unlikely(g_alloc_profile_enabled)) { + _maybe_record_alloc_to_profile(val, size); + } +} + +#ifdef __cplusplus +} +#endif + + +#endif // JL_GC_ALLOC_PROFILER_H diff --git a/src/gc.h b/src/gc.h index f34ddaee5ee64a..544486d933e105 100644 --- a/src/gc.h +++ b/src/gc.h @@ -24,6 +24,7 @@ #endif #endif #include "julia_assert.h" +#include "gc-alloc-profiler.h" #ifdef __cplusplus extern "C" { diff --git a/src/julia_internal.h b/src/julia_internal.h index da5195b3cd83c1..1fbfb082e4a8f1 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -9,6 +9,7 @@ #include "support/hashing.h" #include "support/ptrhash.h" #include "support/strtod.h" +#include "gc-alloc-profiler.h" #include #if !defined(_WIN32) #include @@ -364,6 +365,7 @@ STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) v = jl_gc_big_alloc(ptls, allocsz); } jl_set_typeof(v, ty); + maybe_record_alloc_to_profile(v, sz); return v; } diff --git a/stdlib/Profile/docs/src/index.md b/stdlib/Profile/docs/src/index.md index ac60bb92cb5ed8..89894723b11165 100644 --- a/stdlib/Profile/docs/src/index.md +++ b/stdlib/Profile/docs/src/index.md @@ -1,5 +1,7 @@ # [Profiling](@id lib-profiling) +## CPU Profiling + ```@docs Profile.@profile ``` @@ -15,3 +17,18 @@ Profile.retrieve Profile.callers Profile.clear_malloc_data ``` + +## Memory profiling + +```@docs +Profile.Allocs.@profile +``` + +The methods in `Profile.Allocs` are not exported and need to be called e.g. as `Profile.Allocs.fetch()`. + +```@docs +Profile.Allocs.clear +Profile.Allocs.fetch +Profile.Allocs.start +Profile.Allocs.stop +``` diff --git a/stdlib/Profile/src/Allocs.jl b/stdlib/Profile/src/Allocs.jl new file mode 100644 index 00000000000000..922b31932cb0fd --- /dev/null +++ b/stdlib/Profile/src/Allocs.jl @@ -0,0 +1,230 @@ +module Allocs + +using Base.StackTraces: StackTrace, StackFrame, lookup +using Base: InterpreterIP + +# --- Raw results structs, originally defined in C --- + +# The C jl_bt_element_t object contains either an IP pointer (size_t) or a void*. +const BTElement = Csize_t; + +# matches jl_raw_backtrace_t on the C side +struct RawBacktrace + data::Ptr{BTElement} # in C: *jl_bt_element_t + size::Csize_t +end + +# matches jl_raw_alloc_t on the C side +struct RawAlloc + type::Ptr{Type} + backtrace::RawBacktrace + size::Csize_t +end + +# matches jl_profile_allocs_raw_results_t on the C side +struct RawResults + allocs::Ptr{RawAlloc} + num_allocs::Csize_t +end + +""" + Profile.Allocs.@profile [sample_rate=0.0001] expr + +Profile allocations that happen during `expr`, returning +both the result and and AllocResults struct. + +A sample rate of 1.0 will record everything; 0.0 will record nothing. + +```julia +julia> Profile.Allocs.@profile sample_rate=0.01 peakflops() +1.03733270279065e11 + +julia> results = Profile.Allocs.fetch() + +julia> last(sort(results.allocs, by=x->x.size)) +Profile.Allocs.Alloc(Vector{Any}, Base.StackTraces.StackFrame[_new_array_ at array.c:127, ...], 5576) +``` +""" +macro profile(opts, ex) + _prof_expr(ex, opts) +end +macro profile(ex) + _prof_expr(ex, :(sample_rate=0.0001)) +end + +# globals used for tracking how many allocs we're missing +# vs the alloc counters used by @time +const _g_gc_num_before = Ref{Base.GC_Num}() +const _g_sample_rate = Ref{Real}() +const _g_expected_sampled_allocs = Ref{Float64}(0) + +function _prof_expr(expr, opts) + quote + $start(; $(esc(opts))) + local res = $(esc(expr)) + $stop() + res + end +end + +""" + Profile.Allocs.start(sample_rate::Real) + +Begin recording allocations with the given sample rate +A sample rate of 1.0 will record everything; 0.0 will record nothing. +""" +function start(; sample_rate::Real) + ccall(:jl_start_alloc_profile, Cvoid, (Cdouble,), Float64(sample_rate)) + + _g_sample_rate[] = sample_rate + _g_gc_num_before[] = Base.gc_num() +end + +""" + Profile.Allocs.stop() + +Stop recording allocations. +""" +function stop() + ccall(:jl_stop_alloc_profile, Cvoid, ()) + + # increment a counter of how many allocs we would expect + # the memory profiler to see, based on how many allocs + # actually happened. + gc_num_after = Base.gc_num() + gc_diff = Base.GC_Diff(gc_num_after, _g_gc_num_before[]) + alloc_count = Base.gc_alloc_count(gc_diff) + expected_samples = alloc_count * _g_sample_rate[] + _g_expected_sampled_allocs[] += expected_samples +end + +""" + Profile.Allocs.clear() + +Clear all previously profiled allocation information from memory. +""" +function clear() + ccall(:jl_free_alloc_profile, Cvoid, ()) + + _g_expected_sampled_allocs[] = 0 + return nothing +end + +""" + Profile.Allocs.fetch() + +Retrieve the recorded allocations, and decode them into Julia +objects which can be analyzed. +""" +function fetch() + raw_results = ccall(:jl_fetch_alloc_profile, RawResults, ()) + decoded_results = decode(raw_results) + + # avoid divide-by-0 errors + if _g_expected_sampled_allocs[] > 0 + missed_allocs = max(0, _g_expected_sampled_allocs[] - length(decoded_results.allocs)) + missed_percentage = max(0, round(Int, missed_allocs / _g_expected_sampled_allocs[] * 100)) + if missed_percentage > 0 + @warn("The allocation profiler is not fully implemented, and missed approximately" * + " $(missed_percentage)% (estimated $(round(Int, missed_allocs)) / $(round(Int, + _g_expected_sampled_allocs[]))) " * + "of sampled allocs in the last run. " * + "For more info see https://github.com/JuliaLang/julia/issues/43688") + else + @warn("The allocation profiler is not fully implemented, and may have missed" * + " some of the allocs. " * + "For more info see https://github.com/JuliaLang/julia/issues/43688") + end + end + return decoded_results +end + +# decoded results + +struct Alloc + type::Any + stacktrace::StackTrace + size::Int +end + +struct AllocResults + allocs::Vector{Alloc} +end + +# Without this, the Alloc's stacktrace prints for lines and lines and lines... +function Base.show(io::IO, a::Alloc) + stacktrace_sample = length(a.stacktrace) >= 1 ? "$(a.stacktrace[1]), ..." : "" + print(io, "$Alloc($(a.type), $StackFrame[$stacktrace_sample], $(a.size))") +end + +const BacktraceCache = Dict{BTElement,Vector{StackFrame}} + +# copied from julia_internal.h +const JL_BUFF_TAG = UInt(0x4eadc000) + +struct CorruptType end +struct BufferType end + +function load_type(ptr::Ptr{Type}) + if UInt(ptr) < UInt(4096) + return CorruptType + elseif UInt(ptr) == JL_BUFF_TAG + return BufferType + end + return unsafe_pointer_to_objref(ptr) +end + +function decode_alloc(cache::BacktraceCache, raw_alloc::RawAlloc)::Alloc + Alloc( + load_type(raw_alloc.type), + stacktrace_memoized(cache, load_backtrace(raw_alloc.backtrace)), + UInt(raw_alloc.size) + ) +end + +function decode(raw_results::RawResults)::AllocResults + cache = BacktraceCache() + allocs = [ + decode_alloc(cache, unsafe_load(raw_results.allocs, i)) + for i in 1:raw_results.num_allocs + ] + return AllocResults(allocs) +end + +function load_backtrace(trace::RawBacktrace)::Vector{BTElement} + out = Vector{BTElement}() + for i in 1:trace.size + push!(out, unsafe_load(trace.data, i)) + end + + return out +end + +function stacktrace_memoized( + cache::BacktraceCache, + trace::Vector{BTElement}, + c_funcs::Bool=true +)::StackTrace + stack = StackTrace() + for ip in trace + frames = get(cache, ip) do + res = lookup(ip) + cache[ip] = res + return res + end + for frame in frames + # Skip frames that come from C calls. + if c_funcs || !frame.from_c + push!(stack, frame) + end + end + end + return stack +end + +# Precompile once for the package cache. +@assert precompile(start, ()) +@assert precompile(stop, ()) +@assert precompile(fetch, ()) + +end diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl index 3409e79bdb128f..f969df5ed040e9 100644 --- a/stdlib/Profile/src/Profile.jl +++ b/stdlib/Profile/src/Profile.jl @@ -1139,4 +1139,6 @@ function warning_empty(;summary = false) end end +include("Allocs.jl") + end # module diff --git a/stdlib/Profile/test/allocs.jl b/stdlib/Profile/test/allocs.jl new file mode 100644 index 00000000000000..4eae5b0cf68c19 --- /dev/null +++ b/stdlib/Profile/test/allocs.jl @@ -0,0 +1,147 @@ +using Test +using Profile: Allocs + +@testset "alloc profiler doesn't segfault" begin + res = Allocs.@profile sample_rate=1.0 begin + # test the allocations during compilation + using Base64 + end + profile = Allocs.fetch() + + @test length(profile.allocs) > 0 + first_alloc = profile.allocs[1] + @test first_alloc.size > 0 + @test length(first_alloc.stacktrace) > 0 + @test length(string(first_alloc.type)) > 0 +end + +@testset "alloc profiler works when there are multiple tasks on multiple threads" begin + NUM_TASKS = 1000 + + # This test is only really meaningful if we're running on + # multiple threads, but this isn't true on the windows tests, + # causing them to fail. So, commenting this assertion out. + # @test Threads.nthreads() > 1 + + function do_work() + ch = Channel{Vector{Float64}}(Inf) + @sync for i in 1:NUM_TASKS + Threads.@spawn begin + # generate garbage + put!(ch, zeros(100)) + end + end + close(ch) + end + + # call once to make sure it's compiled + precompile(do_work, ()) + do_work() + + res = Allocs.@profile sample_rate=1 begin + do_work() + end + profile = Allocs.fetch() + + # expecting at least 2 allocations per task: + # 1. the task + # 2. the vector + @test length(profile.allocs) >= 2*NUM_TASKS + first_alloc = profile.allocs[1] + @test first_alloc.size > 0 + @test length(first_alloc.stacktrace) > 0 + @test length(string(first_alloc.type)) > 0 + + @testset for type in (Task, Vector{Float64},) + @test length(filter(a->a.type <: type, profile.allocs)) >= NUM_TASKS + end + + # TODO: it would be nice to assert that these tasks + # were actually scheduled onto multiple threads, + # and we see allocs from all threads in the profile +end + +@testset "alloc profiler start stop fetch clear" begin + function do_work() + # Compiling allocates a lot + for f in (gensym() for _ in 1:10) + @eval begin + $f() = 10 + $f() + end + end + end + + Allocs.@profile sample_rate=1 do_work() + @test length(Allocs.fetch().allocs) > 10 + + Allocs.clear() + @test length(Allocs.fetch().allocs) == 0 + Allocs.clear() + @test length(Allocs.fetch().allocs) == 0 + + Allocs.@profile sample_rate=1 do_work() + curr_allocs = length(Allocs.fetch().allocs) + @test curr_allocs > 10 + + # Do _more_ work, adding into the same profile + Allocs.@profile sample_rate=1 do_work() + @test length(Allocs.fetch().allocs) > curr_allocs + + Allocs.clear() + @test length(Allocs.fetch().allocs) == 0 + + # Clear without fetching + + Allocs.@profile sample_rate=1 do_work() + Allocs.clear() + @test length(Allocs.fetch().allocs) == 0 + + # And things still work like normal afterwards + + Allocs.@profile sample_rate=1 do_work() + Allocs.@profile sample_rate=1 do_work() + Allocs.@profile sample_rate=1 do_work() + @test length(Allocs.fetch().allocs) > 10 + + Allocs.@profile sample_rate=1 do_work() + Allocs.@profile sample_rate=1 do_work() + @test length(Allocs.fetch().allocs) > 10 + + Allocs.clear() +end + +@testset "alloc profiler warning message" begin + @testset "no allocs" begin + Profile.Allocs.clear() + Profile.Allocs.fetch() + end + @testset "catches all allocations" begin + foo() = [] + precompile(foo, ()) + Profile.Allocs.clear() + Profile.Allocs.@profile sample_rate=1 foo() + # Fake that we expected exactly 1 alloc, since we should have recorded >= 1 + Profile.Allocs._g_expected_sampled_allocs[] = 1 + @assert length(Profile.Allocs.fetch().allocs) >= 1 + end + @testset "misses some allocations" begin + foo() = [] + precompile(foo, ()) + Profile.Allocs.clear() + Profile.Allocs.@profile sample_rate=1 foo() + # Fake some allocs that we missed, to force the print statement + Profile.Allocs._g_expected_sampled_allocs[] += 10 + @assert 1 <= length(Profile.Allocs.fetch().allocs) < 10 + end +end + +@testset "alloc profiler catches strings" begin + Allocs.@profile sample_rate=1 "$(rand())" + + prof = Allocs.fetch() + Allocs.clear() + + @test length(prof.allocs) >= 1 + @test length([a for a in prof.allocs if a.type == String]) >= 1 +end diff --git a/stdlib/Profile/test/runtests.jl b/stdlib/Profile/test/runtests.jl index ac7c8baefe09e1..ddddac55fcd931 100644 --- a/stdlib/Profile/test/runtests.jl +++ b/stdlib/Profile/test/runtests.jl @@ -219,3 +219,5 @@ end node = root.down[stackframe(:f1, :file1, 2)] @test only(node.down).first == lidict[8] end + +include("allocs.jl")