-
-
Notifications
You must be signed in to change notification settings - Fork 5.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
## Overview Record the type and stack of every allocation (or only at a given sample interval), and return as Julia objects. Alternate approach to existing alloc profiler PR: #33467 Complementary to garbage profiler PR: #42658 (maybe there's some nice way to meld them) This may be reinventing the wheel from #33467, but I'm not sure why that one needs stuff like LLVM passes. I mimicked some stuff from it, but this was my attempt to get something up and running. Could easily be missing stuff. ## Usage: ```julia using Profile.Allocs res = Allocs.@Profile sample_rate=0.001 my_func() prof = Allocs.fetch() # do something with `prof` ``` See also: JuliaPerf/PProf.jl#46 for support for visualizing these. Co-authored-by: Nathan Daly <[email protected]>
- Loading branch information
Showing
13 changed files
with
620 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
// This file is a part of Julia. License is MIT: https://julialang.org/license | ||
|
||
#include "gc-alloc-profiler.h" | ||
|
||
#include "julia_internal.h" | ||
#include "gc.h" | ||
|
||
#include <string> | ||
#include <vector> | ||
|
||
using std::string; | ||
using std::vector; | ||
|
||
struct jl_raw_backtrace_t { | ||
jl_bt_element_t *data; | ||
size_t size; | ||
}; | ||
|
||
struct jl_raw_alloc_t { | ||
jl_datatype_t *type_address; | ||
jl_raw_backtrace_t backtrace; | ||
size_t size; | ||
}; | ||
|
||
// == These structs define the global singleton profile buffer that will be used by | ||
// callbacks to store profile results. == | ||
struct jl_per_thread_alloc_profile_t { | ||
vector<jl_raw_alloc_t> allocs; | ||
}; | ||
|
||
struct jl_alloc_profile_t { | ||
double sample_rate; | ||
|
||
vector<jl_per_thread_alloc_profile_t> per_thread_profiles; | ||
}; | ||
|
||
struct jl_combined_results { | ||
vector<jl_raw_alloc_t> combined_allocs; | ||
}; | ||
|
||
// == Global variables manipulated by callbacks == | ||
|
||
jl_alloc_profile_t g_alloc_profile; | ||
int g_alloc_profile_enabled = false; | ||
jl_combined_results g_combined_results; // Will live forever. | ||
|
||
// === stack stuff === | ||
|
||
jl_raw_backtrace_t get_raw_backtrace() { | ||
// A single large buffer to record backtraces onto | ||
static jl_bt_element_t static_bt_data[JL_MAX_BT_SIZE]; | ||
|
||
size_t bt_size = rec_backtrace(static_bt_data, JL_MAX_BT_SIZE, 2); | ||
|
||
// Then we copy only the needed bytes out of the buffer into our profile. | ||
size_t bt_bytes = bt_size * sizeof(jl_bt_element_t); | ||
jl_bt_element_t *bt_data = (jl_bt_element_t*) malloc(bt_bytes); | ||
memcpy(bt_data, static_bt_data, bt_bytes); | ||
|
||
return jl_raw_backtrace_t{ | ||
bt_data, | ||
bt_size | ||
}; | ||
} | ||
|
||
// == exported interface == | ||
|
||
extern "C" { // Needed since these functions doesn't take any arguments. | ||
|
||
JL_DLLEXPORT void jl_start_alloc_profile(double sample_rate) { | ||
// We only need to do this once, the first time this is called. | ||
while (g_alloc_profile.per_thread_profiles.size() < jl_n_threads) { | ||
g_alloc_profile.per_thread_profiles.push_back(jl_per_thread_alloc_profile_t{}); | ||
} | ||
|
||
g_alloc_profile.sample_rate = sample_rate; | ||
g_alloc_profile_enabled = true; | ||
} | ||
|
||
JL_DLLEXPORT jl_profile_allocs_raw_results_t jl_fetch_alloc_profile() { | ||
// combine allocs | ||
// TODO: interleave to preserve ordering | ||
for (auto& profile : g_alloc_profile.per_thread_profiles) { | ||
for (const auto& alloc : profile.allocs) { | ||
g_combined_results.combined_allocs.push_back(alloc); | ||
} | ||
|
||
profile.allocs.clear(); | ||
} | ||
|
||
return jl_profile_allocs_raw_results_t{ | ||
g_combined_results.combined_allocs.data(), | ||
g_combined_results.combined_allocs.size(), | ||
}; | ||
} | ||
|
||
JL_DLLEXPORT void jl_stop_alloc_profile() { | ||
g_alloc_profile_enabled = false; | ||
} | ||
|
||
JL_DLLEXPORT void jl_free_alloc_profile() { | ||
// Free any allocs that remain in the per-thread profiles, that haven't | ||
// been combined yet (which happens in fetch_alloc_profiles()). | ||
for (auto& profile : g_alloc_profile.per_thread_profiles) { | ||
for (auto alloc : profile.allocs) { | ||
free(alloc.backtrace.data); | ||
} | ||
profile.allocs.clear(); | ||
} | ||
|
||
// Free the allocs that have been already combined into the combined results object. | ||
for (auto alloc : g_combined_results.combined_allocs) { | ||
free(alloc.backtrace.data); | ||
} | ||
|
||
g_combined_results.combined_allocs.clear(); | ||
} | ||
|
||
// == callback called into by the outside == | ||
|
||
void _maybe_record_alloc_to_profile(jl_value_t *val, size_t size) JL_NOTSAFEPOINT { | ||
auto& global_profile = g_alloc_profile; | ||
auto& profile = global_profile.per_thread_profiles[jl_threadid()]; | ||
|
||
auto sample_val = double(rand()) / double(RAND_MAX); | ||
auto should_record = sample_val <= global_profile.sample_rate; | ||
if (!should_record) { | ||
return; | ||
} | ||
|
||
auto type = (jl_datatype_t*)jl_typeof(val); | ||
profile.allocs.emplace_back(jl_raw_alloc_t{ | ||
type, | ||
get_raw_backtrace(), | ||
size | ||
}); | ||
} | ||
|
||
} // extern "C" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
// This file is a part of Julia. License is MIT: https://julialang.org/license | ||
|
||
#ifndef JL_GC_ALLOC_PROFILER_H | ||
#define JL_GC_ALLOC_PROFILER_H | ||
|
||
#include "julia.h" | ||
#include "ios.h" | ||
|
||
#ifdef __cplusplus | ||
extern "C" { | ||
#endif | ||
|
||
// --------------------------------------------------------------------- | ||
// The public interface to call from Julia for allocations profiling | ||
// --------------------------------------------------------------------- | ||
|
||
// Forward-declaration to avoid depenency in header file. | ||
struct jl_raw_alloc_t; // Defined in gc-alloc-profiler.cpp | ||
|
||
typedef struct { | ||
struct jl_raw_alloc_t *allocs; | ||
size_t num_allocs; | ||
} jl_profile_allocs_raw_results_t; | ||
|
||
JL_DLLEXPORT void jl_start_alloc_profile(double sample_rate); | ||
JL_DLLEXPORT jl_profile_allocs_raw_results_t jl_fetch_alloc_profile(void); | ||
JL_DLLEXPORT void jl_stop_alloc_profile(void); | ||
JL_DLLEXPORT void jl_free_alloc_profile(void); | ||
|
||
// --------------------------------------------------------------------- | ||
// Functions to call from GC when alloc profiling is enabled | ||
// --------------------------------------------------------------------- | ||
|
||
void _maybe_record_alloc_to_profile(jl_value_t *val, size_t size) JL_NOTSAFEPOINT; | ||
|
||
extern int g_alloc_profile_enabled; | ||
|
||
static inline void maybe_record_alloc_to_profile(jl_value_t *val, size_t size) JL_NOTSAFEPOINT { | ||
if (__unlikely(g_alloc_profile_enabled)) { | ||
_maybe_record_alloc_to_profile(val, size); | ||
} | ||
} | ||
|
||
#ifdef __cplusplus | ||
} | ||
#endif | ||
|
||
|
||
#endif // JL_GC_ALLOC_PROFILER_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
ef23d6d
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@nanosoldier
runbenchmarks(ALL, vs="25864e4341b0f11879a566bb6dbc571e230c26691")
ef23d6d
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@nanosoldier
runbenchmarks(ALL, vs="@5864e4341b0f11879a566bb6dbc571e230c26691")
ef23d6d
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Something went wrong when running your job:
Unfortunately, the logs could not be uploaded.
ef23d6d
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
https://github.com/JuliaCI/NanosoldierReports/blob/87800186ad66a001480d1413173374a57e154c2a/benchmark/by_hash/ef23d6d_vs_5864e43/report.md
ef23d6d
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@nanosoldier
runbenchmarks("broadcast", vs="@5864e4341b0f11879a566bb6dbc571e230c26691")
ef23d6d
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Your benchmark job has completed - possible performance regressions were detected. A full report can be found here.
ef23d6d
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Cool, thanks for running Jameson - this appears to be perf neutral.
ef23d6d
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
cc: @vilterp