Skip to content

Commit

Permalink
Add more profiling events (#49493)
Browse files Browse the repository at this point in the history
  • Loading branch information
pchintalapudi authored Apr 26, 2023
1 parent 960870e commit 3f7ae8b
Show file tree
Hide file tree
Showing 8 changed files with 39 additions and 14 deletions.
3 changes: 2 additions & 1 deletion src/aotcompile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,7 @@ static void jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_instance
extern "C" JL_DLLEXPORT
void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int _policy, int _imaging_mode, int _external_linkage, size_t _world)
{
JL_TIMING(NATIVE_AOT, NATIVE_Create);
++CreateNativeCalls;
CreateNativeMax.updateMax(jl_array_len(methods));
if (cgparams == NULL)
Expand Down Expand Up @@ -1448,7 +1449,7 @@ void jl_dump_native_impl(void *native_code,
const char *asm_fname,
const char *sysimg_data, size_t sysimg_len, ios_t *s)
{
JL_TIMING(NATIVE_DUMP, NATIVE_DUMP);
JL_TIMING(NATIVE_AOT, NATIVE_Dump);
jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code;
if (!bc_fname && !unopt_bc_fname && !obj_fname && !asm_fname) {
LLVM_DEBUG(dbgs() << "No output requested, skipping native code dump?\n");
Expand Down
2 changes: 1 addition & 1 deletion src/gf.c
Original file line number Diff line number Diff line change
Expand Up @@ -2266,7 +2266,6 @@ jl_method_instance_t *jl_method_lookup(jl_value_t **args, size_t nargs, size_t w
JL_DLLEXPORT jl_value_t *jl_matching_methods(jl_tupletype_t *types, jl_value_t *mt, int lim, int include_ambiguous,
size_t world, size_t *min_valid, size_t *max_valid, int *ambig)
{
JL_TIMING(METHOD_MATCH, METHOD_MATCH);
if (ambig != NULL)
*ambig = 0;
jl_value_t *unw = jl_unwrap_unionall((jl_value_t*)types);
Expand Down Expand Up @@ -3304,6 +3303,7 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
int intersections, size_t world, int cache_result,
size_t *min_valid, size_t *max_valid, int *ambig)
{
JL_TIMING(METHOD_MATCH, METHOD_MATCH);
if (world > jl_atomic_load_acquire(&jl_world_counter))
return jl_nothing; // the future is not enumerable
int has_ambiguity = 0;
Expand Down
1 change: 1 addition & 0 deletions src/processor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -629,6 +629,7 @@ static inline std::vector<TargetData<n>> &get_cmdline_targets(F &&feature_cb)
template<typename F>
static inline jl_image_t parse_sysimg(void *hdl, F &&callback)
{
JL_TIMING(LOAD_IMAGE, LOAD_Processor);
jl_image_t res{};

const jl_image_pointers_t *pointers;
Expand Down
2 changes: 2 additions & 0 deletions src/stackwalk.c
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,7 @@ static void decode_backtrace(jl_bt_element_t *bt_data, size_t bt_size,

JL_DLLEXPORT jl_value_t *jl_get_backtrace(void)
{
JL_TIMING(STACKWALK, STACKWALK_Backtrace);
jl_excstack_t *s = jl_current_task->excstack;
jl_bt_element_t *bt_data = NULL;
size_t bt_size = 0;
Expand All @@ -343,6 +344,7 @@ JL_DLLEXPORT jl_value_t *jl_get_backtrace(void)
JL_DLLEXPORT jl_value_t *jl_get_excstack(jl_task_t* task, int include_bt, int max_entries)
{
JL_TYPECHK(current_exceptions, task, (jl_value_t*)task);
JL_TIMING(STACKWALK, STACKWALK_Excstack);
jl_task_t *ct = jl_current_task;
if (task != ct && jl_atomic_load_relaxed(&task->_state) == JL_TASK_STATE_RUNNABLE) {
jl_error("Inspecting the exception stack of a task which might "
Expand Down
3 changes: 2 additions & 1 deletion src/staticdata.c
Original file line number Diff line number Diff line change
Expand Up @@ -2672,7 +2672,6 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
jl_array_t **ext_targets, jl_array_t **edges,
char **base, arraylist_t *ccallable_list, pkgcachesizes *cachesizes) JL_GC_DISABLED
{
JL_TIMING(SYSIMG_LOAD, SYSIMG_LOAD);
int en = jl_gc_enable(0);
ios_t sysimg, const_data, symbols, relocs, gvar_record, fptr_record;
jl_serializer_state s;
Expand Down Expand Up @@ -3195,6 +3194,7 @@ static jl_value_t *jl_validate_cache_file(ios_t *f, jl_array_t *depmods, uint64_
// TODO?: refactor to make it easier to create the "package inspector"
static jl_value_t *jl_restore_package_image_from_stream(ios_t *f, jl_image_t *image, jl_array_t *depmods, int completeinfo)
{
JL_TIMING(LOAD_IMAGE, LOAD_Pkgimg);
uint64_t checksum = 0;
int64_t dataendpos = 0;
int64_t datastartpos = 0;
Expand Down Expand Up @@ -3265,6 +3265,7 @@ static jl_value_t *jl_restore_package_image_from_stream(ios_t *f, jl_image_t *im

static void jl_restore_system_image_from_stream(ios_t *f, jl_image_t *image, uint32_t checksum)
{
JL_TIMING(LOAD_IMAGE, LOAD_Sysimg);
jl_restore_system_image_from_stream_(f, image, NULL, checksum | ((uint64_t)0xfdfcfbfa << 32), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
}

Expand Down
3 changes: 3 additions & 0 deletions src/staticdata_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -836,6 +836,7 @@ static void jl_copy_roots(jl_array_t *method_roots_list, uint64_t key)
// verify that these edges intersect with the same methods as before
static jl_array_t *jl_verify_edges(jl_array_t *targets, size_t minworld)
{
JL_TIMING(VERIFY_IMAGE, VERIFY_Edges);
size_t i, l = jl_array_len(targets) / 3;
static jl_value_t *ulong_array JL_ALWAYS_LEAFTYPE = NULL;
if (ulong_array == NULL)
Expand Down Expand Up @@ -936,6 +937,7 @@ static jl_array_t *jl_verify_edges(jl_array_t *targets, size_t minworld)
// Combine all edges relevant to a method to initialize the maxvalids list
static jl_array_t *jl_verify_methods(jl_array_t *edges, jl_array_t *maxvalids)
{
JL_TIMING(VERIFY_IMAGE, VERIFY_Methods);
jl_value_t *loctag = NULL;
jl_array_t *maxvalids2 = NULL;
JL_GC_PUSH2(&loctag, &maxvalids2);
Expand Down Expand Up @@ -1049,6 +1051,7 @@ static int jl_verify_graph_edge(size_t *maxvalids2_data, jl_array_t *edges, size
// Visit all entries in edges, verify if they are valid
static void jl_verify_graph(jl_array_t *edges, jl_array_t *maxvalids2)
{
JL_TIMING(VERIFY_IMAGE, VERIFY_Graph);
arraylist_t stack, visited;
arraylist_new(&stack, 0);
size_t i, n = jl_array_len(edges) / 2;
Expand Down
8 changes: 7 additions & 1 deletion src/threading.c
Original file line number Diff line number Diff line change
Expand Up @@ -744,8 +744,14 @@ void _jl_mutex_wait(jl_task_t *self, jl_mutex_t *lock, int safepoint)
lock->count++;
return;
}
JL_TIMING(LOCK_SPIN, LOCK_SPIN);
// Don't use JL_TIMING for instant acquires, results in large blowup of events
jl_profile_lock_start_wait(lock);
if (owner == NULL && jl_atomic_cmpswap(&lock->owner, &owner, self)) {
lock->count = 1;
jl_profile_lock_acquired(lock);
return;
}
JL_TIMING(LOCK_SPIN, LOCK_SPIN);
while (1) {
if (owner == NULL && jl_atomic_cmpswap(&lock->owner, &owner, self)) {
lock->count = 1;
Expand Down
31 changes: 21 additions & 10 deletions src/timing.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,16 +133,17 @@ void jl_timing_printf(jl_timing_block_t *cur_block, const char *format, ...);
X(MACRO_INVOCATION) \
X(AST_COMPRESS) \
X(AST_UNCOMPRESS) \
X(SYSIMG_LOAD) \
X(SYSIMG_DUMP) \
X(NATIVE_DUMP) \
X(NATIVE_AOT) \
X(ADD_METHOD) \
X(LOAD_MODULE) \
X(LOAD_IMAGE) \
X(VERIFY_IMAGE) \
X(SAVE_MODULE) \
X(INIT_MODULE) \
X(LOCK_SPIN) \
X(DL_OPEN) \

X(STACKWALK) \
X(DL_OPEN) \


#define JL_TIMING_EVENTS \
Expand All @@ -154,6 +155,16 @@ void jl_timing_printf(jl_timing_block_t *cur_block, const char *format, ...);
X(CODEGEN_LLVM) \
X(CODEGEN_Codeinst) \
X(CODEGEN_Workqueue) \
X(LOAD_Sysimg) \
X(LOAD_Pkgimg) \
X(LOAD_Processor) \
X(VERIFY_Edges) \
X(VERIFY_Methods) \
X(VERIFY_Graph) \
X(STACKWALK_Backtrace) \
X(STACKWALK_Excstack) \
X(NATIVE_Dump) \
X(NATIVE_Create) \


enum jl_timing_owners {
Expand Down Expand Up @@ -209,13 +220,13 @@ enum jl_timing_events {
#endif

#ifdef USE_ITTAPI
#define _ITTAPI_CTX_MEMBER int event;
#define _ITTAPI_CTOR(block, event) block->event = event
#define _ITTAPI_START(block) if (_jl_timing_enabled(block->event)) __itt_event_start(jl_timing_ittapi_events[block->event])
#define _ITTAPI_STOP(block) if (_jl_timing_enabled(block->event)) __itt_event_end(jl_timing_ittapi_events[block->event])
#define _ITTAPI_CTX_MEMBER int owner; int event;
#define _ITTAPI_CTOR(block, owner, event) block->owner = owner; block->event = event
#define _ITTAPI_START(block) if (_jl_timing_enabled(block->owner)) __itt_event_start(jl_timing_ittapi_events[block->event])
#define _ITTAPI_STOP(block) if (_jl_timing_enabled(block->owner)) __itt_event_end(jl_timing_ittapi_events[block->event])
#else
#define _ITTAPI_CTX_MEMBER
#define _ITTAPI_CTOR(block, event)
#define _ITTAPI_CTOR(block, owner, event)
#define _ITTAPI_START(block)
#define _ITTAPI_STOP(block)
#endif
Expand Down Expand Up @@ -287,7 +298,7 @@ STATIC_INLINE void _jl_timing_block_ctor(jl_timing_block_t *block, int owner, in
uint64_t t = cycleclock(); (void)t;
_COUNTS_CTOR(&block->counts_ctx, owner);
_COUNTS_START(&block->counts_ctx, t);
_ITTAPI_CTOR(block, event);
_ITTAPI_CTOR(block, owner, event);
_ITTAPI_START(block);

jl_task_t *ct = jl_current_task;
Expand Down

0 comments on commit 3f7ae8b

Please sign in to comment.