diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp index fc1d4074e92bb..391c5d3df46fb 100644 --- a/src/aotcompile.cpp +++ b/src/aotcompile.cpp @@ -268,6 +268,7 @@ static void jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_instance extern "C" JL_DLLEXPORT void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int _policy, int _imaging_mode, int _external_linkage, size_t _world) { + JL_TIMING(NATIVE_AOT, NATIVE_Create); ++CreateNativeCalls; CreateNativeMax.updateMax(jl_array_len(methods)); if (cgparams == NULL) @@ -1448,7 +1449,7 @@ void jl_dump_native_impl(void *native_code, const char *asm_fname, const char *sysimg_data, size_t sysimg_len, ios_t *s) { - JL_TIMING(NATIVE_DUMP, NATIVE_DUMP); + JL_TIMING(NATIVE_AOT, NATIVE_Dump); jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code; if (!bc_fname && !unopt_bc_fname && !obj_fname && !asm_fname) { LLVM_DEBUG(dbgs() << "No output requested, skipping native code dump?\n"); diff --git a/src/gf.c b/src/gf.c index 23ce8d33c82d2..b7d4dd70dc8a4 100644 --- a/src/gf.c +++ b/src/gf.c @@ -2266,7 +2266,6 @@ jl_method_instance_t *jl_method_lookup(jl_value_t **args, size_t nargs, size_t w JL_DLLEXPORT jl_value_t *jl_matching_methods(jl_tupletype_t *types, jl_value_t *mt, int lim, int include_ambiguous, size_t world, size_t *min_valid, size_t *max_valid, int *ambig) { - JL_TIMING(METHOD_MATCH, METHOD_MATCH); if (ambig != NULL) *ambig = 0; jl_value_t *unw = jl_unwrap_unionall((jl_value_t*)types); @@ -3304,6 +3303,7 @@ static jl_value_t *ml_matches(jl_methtable_t *mt, int intersections, size_t world, int cache_result, size_t *min_valid, size_t *max_valid, int *ambig) { + JL_TIMING(METHOD_MATCH, METHOD_MATCH); if (world > jl_atomic_load_acquire(&jl_world_counter)) return jl_nothing; // the future is not enumerable int has_ambiguity = 0; diff --git a/src/processor.cpp b/src/processor.cpp index 0b4f9b1243446..88fcb813d8248 100644 --- a/src/processor.cpp +++ b/src/processor.cpp @@ -629,6 +629,7 @@ static inline std::vector> &get_cmdline_targets(F &&feature_cb) template static inline jl_image_t parse_sysimg(void *hdl, F &&callback) { + JL_TIMING(LOAD_IMAGE, LOAD_Processor); jl_image_t res{}; const jl_image_pointers_t *pointers; diff --git a/src/stackwalk.c b/src/stackwalk.c index caf0705b85be7..093467750d573 100644 --- a/src/stackwalk.c +++ b/src/stackwalk.c @@ -321,6 +321,7 @@ static void decode_backtrace(jl_bt_element_t *bt_data, size_t bt_size, JL_DLLEXPORT jl_value_t *jl_get_backtrace(void) { + JL_TIMING(STACKWALK, STACKWALK_Backtrace); jl_excstack_t *s = jl_current_task->excstack; jl_bt_element_t *bt_data = NULL; size_t bt_size = 0; @@ -343,6 +344,7 @@ JL_DLLEXPORT jl_value_t *jl_get_backtrace(void) JL_DLLEXPORT jl_value_t *jl_get_excstack(jl_task_t* task, int include_bt, int max_entries) { JL_TYPECHK(current_exceptions, task, (jl_value_t*)task); + JL_TIMING(STACKWALK, STACKWALK_Excstack); jl_task_t *ct = jl_current_task; if (task != ct && jl_atomic_load_relaxed(&task->_state) == JL_TASK_STATE_RUNNABLE) { jl_error("Inspecting the exception stack of a task which might " diff --git a/src/staticdata.c b/src/staticdata.c index b5d3aecaeb96e..2f61e91c8128e 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -2672,7 +2672,6 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl jl_array_t **ext_targets, jl_array_t **edges, char **base, arraylist_t *ccallable_list, pkgcachesizes *cachesizes) JL_GC_DISABLED { - JL_TIMING(SYSIMG_LOAD, SYSIMG_LOAD); int en = jl_gc_enable(0); ios_t sysimg, const_data, symbols, relocs, gvar_record, fptr_record; jl_serializer_state s; @@ -3195,6 +3194,7 @@ static jl_value_t *jl_validate_cache_file(ios_t *f, jl_array_t *depmods, uint64_ // TODO?: refactor to make it easier to create the "package inspector" static jl_value_t *jl_restore_package_image_from_stream(ios_t *f, jl_image_t *image, jl_array_t *depmods, int completeinfo) { + JL_TIMING(LOAD_IMAGE, LOAD_Pkgimg); uint64_t checksum = 0; int64_t dataendpos = 0; int64_t datastartpos = 0; @@ -3265,6 +3265,7 @@ static jl_value_t *jl_restore_package_image_from_stream(ios_t *f, jl_image_t *im static void jl_restore_system_image_from_stream(ios_t *f, jl_image_t *image, uint32_t checksum) { + JL_TIMING(LOAD_IMAGE, LOAD_Sysimg); jl_restore_system_image_from_stream_(f, image, NULL, checksum | ((uint64_t)0xfdfcfbfa << 32), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); } diff --git a/src/staticdata_utils.c b/src/staticdata_utils.c index ad9149bb54526..ca5cf9100f5d7 100644 --- a/src/staticdata_utils.c +++ b/src/staticdata_utils.c @@ -836,6 +836,7 @@ static void jl_copy_roots(jl_array_t *method_roots_list, uint64_t key) // verify that these edges intersect with the same methods as before static jl_array_t *jl_verify_edges(jl_array_t *targets, size_t minworld) { + JL_TIMING(VERIFY_IMAGE, VERIFY_Edges); size_t i, l = jl_array_len(targets) / 3; static jl_value_t *ulong_array JL_ALWAYS_LEAFTYPE = NULL; if (ulong_array == NULL) @@ -936,6 +937,7 @@ static jl_array_t *jl_verify_edges(jl_array_t *targets, size_t minworld) // Combine all edges relevant to a method to initialize the maxvalids list static jl_array_t *jl_verify_methods(jl_array_t *edges, jl_array_t *maxvalids) { + JL_TIMING(VERIFY_IMAGE, VERIFY_Methods); jl_value_t *loctag = NULL; jl_array_t *maxvalids2 = NULL; JL_GC_PUSH2(&loctag, &maxvalids2); @@ -1049,6 +1051,7 @@ static int jl_verify_graph_edge(size_t *maxvalids2_data, jl_array_t *edges, size // Visit all entries in edges, verify if they are valid static void jl_verify_graph(jl_array_t *edges, jl_array_t *maxvalids2) { + JL_TIMING(VERIFY_IMAGE, VERIFY_Graph); arraylist_t stack, visited; arraylist_new(&stack, 0); size_t i, n = jl_array_len(edges) / 2; diff --git a/src/threading.c b/src/threading.c index ea9ec8e16ca45..6718a47f5e836 100644 --- a/src/threading.c +++ b/src/threading.c @@ -744,8 +744,14 @@ void _jl_mutex_wait(jl_task_t *self, jl_mutex_t *lock, int safepoint) lock->count++; return; } - JL_TIMING(LOCK_SPIN, LOCK_SPIN); + // Don't use JL_TIMING for instant acquires, results in large blowup of events jl_profile_lock_start_wait(lock); + if (owner == NULL && jl_atomic_cmpswap(&lock->owner, &owner, self)) { + lock->count = 1; + jl_profile_lock_acquired(lock); + return; + } + JL_TIMING(LOCK_SPIN, LOCK_SPIN); while (1) { if (owner == NULL && jl_atomic_cmpswap(&lock->owner, &owner, self)) { lock->count = 1; diff --git a/src/timing.h b/src/timing.h index 4a30da583badc..4cdd32da8b195 100644 --- a/src/timing.h +++ b/src/timing.h @@ -133,16 +133,17 @@ void jl_timing_printf(jl_timing_block_t *cur_block, const char *format, ...); X(MACRO_INVOCATION) \ X(AST_COMPRESS) \ X(AST_UNCOMPRESS) \ - X(SYSIMG_LOAD) \ X(SYSIMG_DUMP) \ - X(NATIVE_DUMP) \ + X(NATIVE_AOT) \ X(ADD_METHOD) \ X(LOAD_MODULE) \ + X(LOAD_IMAGE) \ + X(VERIFY_IMAGE) \ X(SAVE_MODULE) \ X(INIT_MODULE) \ X(LOCK_SPIN) \ - X(DL_OPEN) \ - + X(STACKWALK) \ + X(DL_OPEN) \ #define JL_TIMING_EVENTS \ @@ -154,6 +155,16 @@ void jl_timing_printf(jl_timing_block_t *cur_block, const char *format, ...); X(CODEGEN_LLVM) \ X(CODEGEN_Codeinst) \ X(CODEGEN_Workqueue) \ + X(LOAD_Sysimg) \ + X(LOAD_Pkgimg) \ + X(LOAD_Processor) \ + X(VERIFY_Edges) \ + X(VERIFY_Methods) \ + X(VERIFY_Graph) \ + X(STACKWALK_Backtrace) \ + X(STACKWALK_Excstack) \ + X(NATIVE_Dump) \ + X(NATIVE_Create) \ enum jl_timing_owners { @@ -209,13 +220,13 @@ enum jl_timing_events { #endif #ifdef USE_ITTAPI -#define _ITTAPI_CTX_MEMBER int event; -#define _ITTAPI_CTOR(block, event) block->event = event -#define _ITTAPI_START(block) if (_jl_timing_enabled(block->event)) __itt_event_start(jl_timing_ittapi_events[block->event]) -#define _ITTAPI_STOP(block) if (_jl_timing_enabled(block->event)) __itt_event_end(jl_timing_ittapi_events[block->event]) +#define _ITTAPI_CTX_MEMBER int owner; int event; +#define _ITTAPI_CTOR(block, owner, event) block->owner = owner; block->event = event +#define _ITTAPI_START(block) if (_jl_timing_enabled(block->owner)) __itt_event_start(jl_timing_ittapi_events[block->event]) +#define _ITTAPI_STOP(block) if (_jl_timing_enabled(block->owner)) __itt_event_end(jl_timing_ittapi_events[block->event]) #else #define _ITTAPI_CTX_MEMBER -#define _ITTAPI_CTOR(block, event) +#define _ITTAPI_CTOR(block, owner, event) #define _ITTAPI_START(block) #define _ITTAPI_STOP(block) #endif @@ -287,7 +298,7 @@ STATIC_INLINE void _jl_timing_block_ctor(jl_timing_block_t *block, int owner, in uint64_t t = cycleclock(); (void)t; _COUNTS_CTOR(&block->counts_ctx, owner); _COUNTS_START(&block->counts_ctx, t); - _ITTAPI_CTOR(block, event); + _ITTAPI_CTOR(block, owner, event); _ITTAPI_START(block); jl_task_t *ct = jl_current_task;