Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add more profiling events #49493

Merged
merged 8 commits into from
Apr 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/aotcompile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,7 @@ static void jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_instance
extern "C" JL_DLLEXPORT
void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int _policy, int _imaging_mode, int _external_linkage, size_t _world)
{
JL_TIMING(NATIVE_AOT, NATIVE_Create);
++CreateNativeCalls;
CreateNativeMax.updateMax(jl_array_len(methods));
if (cgparams == NULL)
Expand Down Expand Up @@ -1448,7 +1449,7 @@ void jl_dump_native_impl(void *native_code,
const char *asm_fname,
const char *sysimg_data, size_t sysimg_len, ios_t *s)
{
JL_TIMING(NATIVE_DUMP, NATIVE_DUMP);
JL_TIMING(NATIVE_AOT, NATIVE_Dump);
jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code;
if (!bc_fname && !unopt_bc_fname && !obj_fname && !asm_fname) {
LLVM_DEBUG(dbgs() << "No output requested, skipping native code dump?\n");
Expand Down
2 changes: 1 addition & 1 deletion src/gf.c
Original file line number Diff line number Diff line change
Expand Up @@ -2266,7 +2266,6 @@ jl_method_instance_t *jl_method_lookup(jl_value_t **args, size_t nargs, size_t w
JL_DLLEXPORT jl_value_t *jl_matching_methods(jl_tupletype_t *types, jl_value_t *mt, int lim, int include_ambiguous,
size_t world, size_t *min_valid, size_t *max_valid, int *ambig)
{
JL_TIMING(METHOD_MATCH, METHOD_MATCH);
pchintalapudi marked this conversation as resolved.
Show resolved Hide resolved
if (ambig != NULL)
*ambig = 0;
jl_value_t *unw = jl_unwrap_unionall((jl_value_t*)types);
Expand Down Expand Up @@ -3304,6 +3303,7 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
int intersections, size_t world, int cache_result,
size_t *min_valid, size_t *max_valid, int *ambig)
{
JL_TIMING(METHOD_MATCH, METHOD_MATCH);
if (world > jl_atomic_load_acquire(&jl_world_counter))
return jl_nothing; // the future is not enumerable
int has_ambiguity = 0;
Expand Down
1 change: 1 addition & 0 deletions src/processor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -629,6 +629,7 @@ static inline std::vector<TargetData<n>> &get_cmdline_targets(F &&feature_cb)
template<typename F>
static inline jl_image_t parse_sysimg(void *hdl, F &&callback)
{
JL_TIMING(LOAD_IMAGE, LOAD_Processor);
jl_image_t res{};

const jl_image_pointers_t *pointers;
Expand Down
2 changes: 2 additions & 0 deletions src/stackwalk.c
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,7 @@ static void decode_backtrace(jl_bt_element_t *bt_data, size_t bt_size,

JL_DLLEXPORT jl_value_t *jl_get_backtrace(void)
{
JL_TIMING(STACKWALK, STACKWALK_Backtrace);
topolarity marked this conversation as resolved.
Show resolved Hide resolved
jl_excstack_t *s = jl_current_task->excstack;
jl_bt_element_t *bt_data = NULL;
size_t bt_size = 0;
Expand All @@ -343,6 +344,7 @@ JL_DLLEXPORT jl_value_t *jl_get_backtrace(void)
JL_DLLEXPORT jl_value_t *jl_get_excstack(jl_task_t* task, int include_bt, int max_entries)
{
JL_TYPECHK(current_exceptions, task, (jl_value_t*)task);
JL_TIMING(STACKWALK, STACKWALK_Excstack);
jl_task_t *ct = jl_current_task;
if (task != ct && jl_atomic_load_relaxed(&task->_state) == JL_TASK_STATE_RUNNABLE) {
jl_error("Inspecting the exception stack of a task which might "
Expand Down
3 changes: 2 additions & 1 deletion src/staticdata.c
Original file line number Diff line number Diff line change
Expand Up @@ -2672,7 +2672,6 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
jl_array_t **ext_targets, jl_array_t **edges,
char **base, arraylist_t *ccallable_list, pkgcachesizes *cachesizes) JL_GC_DISABLED
{
JL_TIMING(SYSIMG_LOAD, SYSIMG_LOAD);
int en = jl_gc_enable(0);
ios_t sysimg, const_data, symbols, relocs, gvar_record, fptr_record;
jl_serializer_state s;
Expand Down Expand Up @@ -3195,6 +3194,7 @@ static jl_value_t *jl_validate_cache_file(ios_t *f, jl_array_t *depmods, uint64_
// TODO?: refactor to make it easier to create the "package inspector"
static jl_value_t *jl_restore_package_image_from_stream(ios_t *f, jl_image_t *image, jl_array_t *depmods, int completeinfo)
{
JL_TIMING(LOAD_IMAGE, LOAD_Pkgimg);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For a future PR it would be good I guess to also annotate this trace point with the name of the package getting loaded.

uint64_t checksum = 0;
int64_t dataendpos = 0;
int64_t datastartpos = 0;
Expand Down Expand Up @@ -3265,6 +3265,7 @@ static jl_value_t *jl_restore_package_image_from_stream(ios_t *f, jl_image_t *im

static void jl_restore_system_image_from_stream(ios_t *f, jl_image_t *image, uint32_t checksum)
{
JL_TIMING(LOAD_IMAGE, LOAD_Sysimg);
jl_restore_system_image_from_stream_(f, image, NULL, checksum | ((uint64_t)0xfdfcfbfa << 32), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
}

Expand Down
3 changes: 3 additions & 0 deletions src/staticdata_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -836,6 +836,7 @@ static void jl_copy_roots(jl_array_t *method_roots_list, uint64_t key)
// verify that these edges intersect with the same methods as before
static jl_array_t *jl_verify_edges(jl_array_t *targets, size_t minworld)
{
JL_TIMING(VERIFY_IMAGE, VERIFY_Edges);
topolarity marked this conversation as resolved.
Show resolved Hide resolved
size_t i, l = jl_array_len(targets) / 3;
static jl_value_t *ulong_array JL_ALWAYS_LEAFTYPE = NULL;
if (ulong_array == NULL)
Expand Down Expand Up @@ -936,6 +937,7 @@ static jl_array_t *jl_verify_edges(jl_array_t *targets, size_t minworld)
// Combine all edges relevant to a method to initialize the maxvalids list
static jl_array_t *jl_verify_methods(jl_array_t *edges, jl_array_t *maxvalids)
{
JL_TIMING(VERIFY_IMAGE, VERIFY_Methods);
jl_value_t *loctag = NULL;
jl_array_t *maxvalids2 = NULL;
JL_GC_PUSH2(&loctag, &maxvalids2);
Expand Down Expand Up @@ -1049,6 +1051,7 @@ static int jl_verify_graph_edge(size_t *maxvalids2_data, jl_array_t *edges, size
// Visit all entries in edges, verify if they are valid
static void jl_verify_graph(jl_array_t *edges, jl_array_t *maxvalids2)
{
JL_TIMING(VERIFY_IMAGE, VERIFY_Graph);
arraylist_t stack, visited;
arraylist_new(&stack, 0);
size_t i, n = jl_array_len(edges) / 2;
Expand Down
8 changes: 7 additions & 1 deletion src/threading.c
Original file line number Diff line number Diff line change
Expand Up @@ -744,8 +744,14 @@ void _jl_mutex_wait(jl_task_t *self, jl_mutex_t *lock, int safepoint)
lock->count++;
return;
}
JL_TIMING(LOCK_SPIN, LOCK_SPIN);
// Don't use JL_TIMING for instant acquires, results in large blowup of events
jl_profile_lock_start_wait(lock);
if (owner == NULL && jl_atomic_cmpswap(&lock->owner, &owner, self)) {
lock->count = 1;
jl_profile_lock_acquired(lock);
return;
}
JL_TIMING(LOCK_SPIN, LOCK_SPIN);
while (1) {
if (owner == NULL && jl_atomic_cmpswap(&lock->owner, &owner, self)) {
lock->count = 1;
Expand Down
31 changes: 21 additions & 10 deletions src/timing.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,16 +133,17 @@ void jl_timing_printf(jl_timing_block_t *cur_block, const char *format, ...);
X(MACRO_INVOCATION) \
X(AST_COMPRESS) \
X(AST_UNCOMPRESS) \
X(SYSIMG_LOAD) \
X(SYSIMG_DUMP) \
X(NATIVE_DUMP) \
X(NATIVE_AOT) \
X(ADD_METHOD) \
X(LOAD_MODULE) \
X(LOAD_IMAGE) \
X(VERIFY_IMAGE) \
X(SAVE_MODULE) \
X(INIT_MODULE) \
X(LOCK_SPIN) \
X(DL_OPEN) \

X(STACKWALK) \
X(DL_OPEN) \


#define JL_TIMING_EVENTS \
Expand All @@ -154,6 +155,16 @@ void jl_timing_printf(jl_timing_block_t *cur_block, const char *format, ...);
X(CODEGEN_LLVM) \
X(CODEGEN_Codeinst) \
X(CODEGEN_Workqueue) \
X(LOAD_Sysimg) \
X(LOAD_Pkgimg) \
X(LOAD_Processor) \
X(VERIFY_Edges) \
X(VERIFY_Methods) \
X(VERIFY_Graph) \
X(STACKWALK_Backtrace) \
X(STACKWALK_Excstack) \
X(NATIVE_Dump) \
X(NATIVE_Create) \


enum jl_timing_owners {
Expand Down Expand Up @@ -209,13 +220,13 @@ enum jl_timing_events {
#endif

#ifdef USE_ITTAPI
#define _ITTAPI_CTX_MEMBER int event;
#define _ITTAPI_CTOR(block, event) block->event = event
#define _ITTAPI_START(block) if (_jl_timing_enabled(block->event)) __itt_event_start(jl_timing_ittapi_events[block->event])
#define _ITTAPI_STOP(block) if (_jl_timing_enabled(block->event)) __itt_event_end(jl_timing_ittapi_events[block->event])
#define _ITTAPI_CTX_MEMBER int owner; int event;
#define _ITTAPI_CTOR(block, owner, event) block->owner = owner; block->event = event
#define _ITTAPI_START(block) if (_jl_timing_enabled(block->owner)) __itt_event_start(jl_timing_ittapi_events[block->event])
#define _ITTAPI_STOP(block) if (_jl_timing_enabled(block->owner)) __itt_event_end(jl_timing_ittapi_events[block->event])
#else
#define _ITTAPI_CTX_MEMBER
#define _ITTAPI_CTOR(block, event)
#define _ITTAPI_CTOR(block, owner, event)
#define _ITTAPI_START(block)
#define _ITTAPI_STOP(block)
#endif
Expand Down Expand Up @@ -287,7 +298,7 @@ STATIC_INLINE void _jl_timing_block_ctor(jl_timing_block_t *block, int owner, in
uint64_t t = cycleclock(); (void)t;
_COUNTS_CTOR(&block->counts_ctx, owner);
_COUNTS_START(&block->counts_ctx, t);
_ITTAPI_CTOR(block, event);
_ITTAPI_CTOR(block, owner, event);
_ITTAPI_START(block);

jl_task_t *ct = jl_current_task;
Expand Down