From 0aa38fbc6a9de8fe64e4cf5cec58fd5279d1863b Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Mon, 7 Feb 2022 12:16:48 -0500 Subject: [PATCH] allocation profiler: get stacks for pool allocs via wrapper function (#43868) --- src/array.c | 6 ++++-- src/gc-alloc-profiler.cpp | 3 +-- src/gc-alloc-profiler.h | 8 +++++--- src/gc.c | 23 +++++++++++++++++++++-- src/julia_internal.h | 10 ++++++---- stdlib/Profile/src/Allocs.jl | 4 ++++ 6 files changed, 41 insertions(+), 13 deletions(-) diff --git a/src/array.c b/src/array.c index d620278e34b14..fcfe22ed454d4 100644 --- a/src/array.c +++ b/src/array.c @@ -500,7 +500,9 @@ JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len) int pool_id = jl_gc_szclass_align8(allocsz); jl_gc_pool_t *p = &ptls->heap.norm_pools[pool_id]; int osize = jl_gc_sizeclasses[pool_id]; - s = jl_gc_pool_alloc(ptls, (char*)p - (char*)ptls, osize); + // We call `jl_gc_pool_alloc_noinline` instead of `jl_gc_pool_alloc` to avoid double-counting in + // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.) + s = jl_gc_pool_alloc_noinline(ptls, (char*)p - (char*)ptls, osize); } else { if (allocsz < sz) // overflow in adding offs, size was "negative" @@ -508,7 +510,7 @@ JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len) s = jl_gc_big_alloc(ptls, allocsz); } jl_set_typeof(s, jl_string_type); - maybe_record_alloc_to_profile(s, len); + maybe_record_alloc_to_profile(s, len, jl_string_type); *(size_t*)s = len; jl_string_data(s)[len] = 0; return s; diff --git a/src/gc-alloc-profiler.cpp b/src/gc-alloc-profiler.cpp index d1859aeae4be4..c799e10143478 100644 --- a/src/gc-alloc-profiler.cpp +++ b/src/gc-alloc-profiler.cpp @@ -118,7 +118,7 @@ JL_DLLEXPORT void jl_free_alloc_profile() { // == callback called into by the outside == -void _maybe_record_alloc_to_profile(jl_value_t *val, size_t size) JL_NOTSAFEPOINT { +void _maybe_record_alloc_to_profile(jl_value_t *val, size_t size, jl_datatype_t *type) JL_NOTSAFEPOINT { auto& global_profile = g_alloc_profile; auto thread_id = jl_atomic_load_relaxed(&jl_current_task->tid); auto& profile = global_profile.per_thread_profiles[thread_id]; @@ -129,7 +129,6 @@ void _maybe_record_alloc_to_profile(jl_value_t *val, size_t size) JL_NOTSAFEPOIN return; } - auto type = (jl_datatype_t*)jl_typeof(val); profile.allocs.emplace_back(jl_raw_alloc_t{ type, get_raw_backtrace(), diff --git a/src/gc-alloc-profiler.h b/src/gc-alloc-profiler.h index 3509b77daa1fc..8be6fed21a899 100644 --- a/src/gc-alloc-profiler.h +++ b/src/gc-alloc-profiler.h @@ -31,13 +31,15 @@ JL_DLLEXPORT void jl_free_alloc_profile(void); // Functions to call from GC when alloc profiling is enabled // --------------------------------------------------------------------- -void _maybe_record_alloc_to_profile(jl_value_t *val, size_t size) JL_NOTSAFEPOINT; +void _maybe_record_alloc_to_profile(jl_value_t *val, size_t size, jl_datatype_t *typ) JL_NOTSAFEPOINT; extern int g_alloc_profile_enabled; -static inline void maybe_record_alloc_to_profile(jl_value_t *val, size_t size) JL_NOTSAFEPOINT { +#define jl_gc_unknown_type_tag ((jl_datatype_t*)0xdeadaa03) + +static inline void maybe_record_alloc_to_profile(jl_value_t *val, size_t size, jl_datatype_t *typ) JL_NOTSAFEPOINT { if (__unlikely(g_alloc_profile_enabled)) { - _maybe_record_alloc_to_profile(val, size); + _maybe_record_alloc_to_profile(val, size, typ); } } diff --git a/src/gc.c b/src/gc.c index 56b2c31cbe7f1..745f5365510f9 100644 --- a/src/gc.c +++ b/src/gc.c @@ -1197,7 +1197,7 @@ static NOINLINE jl_taggedvalue_t *add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT } // Size includes the tag and the tag is not cleared!! -JL_DLLEXPORT jl_value_t *jl_gc_pool_alloc(jl_ptls_t ptls, int pool_offset, +static inline jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset, int osize) { // Use the pool offset instead of the pool address as the argument @@ -1253,6 +1253,23 @@ JL_DLLEXPORT jl_value_t *jl_gc_pool_alloc(jl_ptls_t ptls, int pool_offset, return jl_valueof(v); } +// Instrumented version of jl_gc_pool_alloc_inner, called into by LLVM-generated code. +JL_DLLEXPORT jl_value_t *jl_gc_pool_alloc(jl_ptls_t ptls, int pool_offset, + int osize) +{ + jl_value_t *val = jl_gc_pool_alloc_inner(ptls, pool_offset, osize); + + maybe_record_alloc_to_profile(val, osize, jl_gc_unknown_type_tag); + return val; +} + +// This wrapper exists only to prevent `jl_gc_pool_alloc_inner` from being inlined into +// its callers. We provide an external-facing interface for callers, and inline `jl_gc_pool_alloc_inner` +// into this. (See https://github.com/JuliaLang/julia/pull/43868 for more details.) +jl_value_t *jl_gc_pool_alloc_noinline(jl_ptls_t ptls, int pool_offset, int osize) { + return jl_gc_pool_alloc_inner(ptls, pool_offset, osize); +} + int jl_gc_classify_pools(size_t sz, int *osize) { if (sz > GC_MAX_SZCLASS) @@ -3540,6 +3557,8 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) SetLastError(last_error); #endif errno = last_errno; + // jl_gc_managed_malloc is currently always used for allocating array buffers. + maybe_record_alloc_to_profile(b, sz, (jl_datatype_t*)jl_buff_tag); return b; } @@ -3581,7 +3600,7 @@ static void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t olds SetLastError(last_error); #endif errno = last_errno; - + maybe_record_alloc_to_profile(b, sz, jl_gc_unknown_type_tag); return b; } diff --git a/src/julia_internal.h b/src/julia_internal.h index 4990268e5f417..67166cd635165 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -229,8 +229,8 @@ extern jl_array_t *jl_all_methods JL_GLOBALLY_ROOTED; JL_DLLEXPORT extern int jl_lineno; JL_DLLEXPORT extern const char *jl_filename; -JL_DLLEXPORT jl_value_t *jl_gc_pool_alloc(jl_ptls_t ptls, int pool_offset, - int osize); +jl_value_t *jl_gc_pool_alloc_noinline(jl_ptls_t ptls, int pool_offset, + int osize); JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t allocsz); JL_DLLEXPORT int jl_gc_classify_pools(size_t sz, int *osize); extern uv_mutex_t gc_perm_lock; @@ -358,7 +358,9 @@ STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) int pool_id = jl_gc_szclass(allocsz); jl_gc_pool_t *p = &ptls->heap.norm_pools[pool_id]; int osize = jl_gc_sizeclasses[pool_id]; - v = jl_gc_pool_alloc(ptls, (char*)p - (char*)ptls, osize); + // We call `jl_gc_pool_alloc_noinline` instead of `jl_gc_pool_alloc` to avoid double-counting in + // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.) + v = jl_gc_pool_alloc_noinline(ptls, (char*)p - (char*)ptls, osize); } else { if (allocsz < sz) // overflow in adding offs, size was "negative" @@ -366,7 +368,7 @@ STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) v = jl_gc_big_alloc(ptls, allocsz); } jl_set_typeof(v, ty); - maybe_record_alloc_to_profile(v, sz); + maybe_record_alloc_to_profile(v, sz, (jl_datatype_t*)ty); return v; } diff --git a/stdlib/Profile/src/Allocs.jl b/stdlib/Profile/src/Allocs.jl index fae6a686b824c..eb75775ff7a35 100644 --- a/stdlib/Profile/src/Allocs.jl +++ b/stdlib/Profile/src/Allocs.jl @@ -163,15 +163,19 @@ const BacktraceCache = Dict{BTElement,Vector{StackFrame}} # copied from julia_internal.h const JL_BUFF_TAG = UInt(0x4eadc000) +const JL_GC_UNKNOWN_TYPE_TAG = UInt(0xdeadaa03) struct CorruptType end struct BufferType end +struct UnknownType end function load_type(ptr::Ptr{Type}) if UInt(ptr) < UInt(4096) return CorruptType elseif UInt(ptr) == JL_BUFF_TAG return BufferType + elseif UInt(ptr) == JL_GC_UNKNOWN_TYPE_TAG + return UnknownType end return unsafe_pointer_to_objref(ptr) end