From ec1afc6092262700aa9dca25e147f3514e612f1e Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Thu, 7 Oct 2021 10:37:42 -0400 Subject: [PATCH 01/11] Halfway through draft commit to construct full fieldpath from slot Account for inline-allocated structs in field path. Only partway done. --- src/gc-debug.c | 3 +- src/gc-heap-snapshot.cpp | 67 +++++++++++++++++++++++++++++++++------- src/gc.h | 2 +- 3 files changed, 58 insertions(+), 14 deletions(-) diff --git a/src/gc-debug.c b/src/gc-debug.c index 050ae9c9082ba..b48e2977a00d8 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -1205,7 +1205,7 @@ void gc_count_pool(void) jl_safe_printf("************************\n"); } -int gc_slot_to_fieldidx(void *obj, void *slot) JL_NOTSAFEPOINT +JL_DLLEXPORT int gc_slot_to_fieldidx(void *obj, void *slot) JL_NOTSAFEPOINT { jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); int nf = (int)jl_datatype_nfields(vt); @@ -1217,7 +1217,6 @@ int gc_slot_to_fieldidx(void *obj, void *slot) JL_NOTSAFEPOINT } return -1; } - int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT { char *slot = (char*)_slot; diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 9cf4e612b17e2..34ae82e2d4f76 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -10,9 +10,11 @@ #include #include #include +#include using std::vector; using std::string; +using std::pair; using std::unordered_map; using std::unordered_set; @@ -216,7 +218,7 @@ void record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { self_size = jl_is_array_type(type) ? jl_array_nbytes((jl_array_t*)a) : (size_t)jl_datatype_size(type); - + // print full type ios_t str_; ios_mem(&str_, 1024); @@ -250,6 +252,49 @@ void record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { g_snapshot->nodes.push_back(from_node); } +typedef pair inlineallocd_field_type_t; +vector _fieldpath_for_slot(jl_value_t *obj, jl_value_t *slot) { + jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); + + vector result; + bool found = _fieldpath_for_slot_helper(result, "", vt, obj, slot); + // jl_datatype_t* final_type; + // if (!found) { + // final_type = vt; + // } else { + // final_type = result.back().first; + // } + // NOTE THE RETURNED VECTOR IS REVERSED + return result; +} + +bool _fieldpath_for_slot_helper( + vector& out, const char *fieldname, jl_datatype_t *objtype, + void *obj, jl_value_t *slot) +{ + int nf = (int)jl_datatype_nfields(objtype); + jl_svec_t *field_names = jl_field_names(objtype); + for (int i = 0; i < nf; i++) { + jl_datatype_t *field_type = (jl_datatype_t*)jl_field_type(objtype, i); + void *fieldaddr = (char*)obj + jl_field_offset(objtype, i); + jl_sym_t *name = (jl_sym_t*)jl_svecref(field_names, i); + const char *field_name = jl_symbol_name(name); + if (fieldaddr >= slot) { + out.push_back(inlineallocd_field_type_t(objtype, field_name)); + return true; + } + if (jl_stored_inline((jl_value_t*)field_type)) { + bool found = _fieldpath_for_slot_helper(out, field_name, field_type, fieldaddr, slot); + if (found) { + out.push_back(inlineallocd_field_type_t(field_type, field_name)); + return true; + } + } + } + return false; +} + + void _gc_heap_snapshot_record_root(jl_value_t *root, char *name) { record_node_to_gc_snapshot(root); @@ -288,16 +333,16 @@ void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size _record_gc_edge("object", "element", from, to, field_index); return; } - if (field_index < 0 || jl_datatype_nfields(type) <= field_index) { - // TODO: We're getting -1 in some cases - //jl_printf(JL_STDERR, "WARNING - incorrect field index (%zu) for type\n", field_index); - //jl_(type); - _record_gc_edge("object", "element", from, to, field_index); - return; - } - jl_svec_t *field_names = jl_field_names(type); - jl_sym_t *name = (jl_sym_t*)jl_svecref(field_names, field_index); - const char *field_name = jl_symbol_name(name); + // if (field_index < 0 || jl_datatype_nfields(type) <= field_index) { + // // TODO: We're getting -1 in some cases + // //jl_printf(JL_STDERR, "WARNING - incorrect field index (%zu) for type\n", field_index); + // //jl_(type); + // _record_gc_edge("object", "element", from, to, field_index); + // return; + // } + // jl_svec_t *field_names = jl_field_names(type); + // jl_sym_t *name = (jl_sym_t*)jl_svecref(field_names, field_index); + // const char *field_name = jl_symbol_name(name); _record_gc_edge("object", "property", from, to, g_snapshot->names.find_or_create_string_id(field_name)); diff --git a/src/gc.h b/src/gc.h index e846bfb7f8897..479a301f93d4f 100644 --- a/src/gc.h +++ b/src/gc.h @@ -642,7 +642,7 @@ extern int gc_verifying; #endif -int gc_slot_to_fieldidx(void *_obj, void *slot) JL_NOTSAFEPOINT; +JL_DLLEXPORT int gc_slot_to_fieldidx(void *_obj, void *slot) JL_NOTSAFEPOINT; int gc_slot_to_arrayidx(void *_obj, void *begin) JL_NOTSAFEPOINT; NOINLINE void gc_mark_loop_unwind(jl_ptls_t ptls, jl_gc_mark_sp_t sp, int pc_offset); From 64ac012558f5cd4dbf84fd5ed7d2a0fd3a839f6c Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Thu, 7 Oct 2021 10:55:27 -0400 Subject: [PATCH 02/11] Start hooking up the field paths --- src/gc-heap-snapshot.cpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 34ae82e2d4f76..1829aa3d7cb04 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -257,19 +257,13 @@ vector _fieldpath_for_slot(jl_value_t *obj, jl_value_ jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); vector result; - bool found = _fieldpath_for_slot_helper(result, "", vt, obj, slot); - // jl_datatype_t* final_type; - // if (!found) { - // final_type = vt; - // } else { - // final_type = result.back().first; - // } + bool found = _fieldpath_for_slot_helper(result, vt, obj, slot); // NOTE THE RETURNED VECTOR IS REVERSED return result; } bool _fieldpath_for_slot_helper( - vector& out, const char *fieldname, jl_datatype_t *objtype, + vector& out, jl_datatype_t *objtype, void *obj, jl_value_t *slot) { int nf = (int)jl_datatype_nfields(objtype); @@ -343,6 +337,13 @@ void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size // jl_svec_t *field_names = jl_field_names(type); // jl_sym_t *name = (jl_sym_t*)jl_svecref(field_names, field_index); // const char *field_name = jl_symbol_name(name); + auto field_paths = _fieldpath_for_slot(from, to); + // Build the new field name by joining the strings, and/or use the struct + field names + // to create a bunch of edges + nodes + // (iterate the vector in reverse - the last element is the first path) + for (auto it = field_paths.rbegin(); it != field_paths.rend(); ++it) { + // ... + } _record_gc_edge("object", "property", from, to, g_snapshot->names.find_or_create_string_id(field_name)); From af589eb25926413ac641f14ea45ae30864bb71d2 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Tue, 12 Oct 2021 22:13:37 -0400 Subject: [PATCH 03/11] connect up the code from before (but it's still not working) maybe still need to do the off-by-one jameson mentioned? :thinking: --- src/gc-heap-snapshot.cpp | 44 +++++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 1829aa3d7cb04..9076377aa15bf 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -253,14 +253,6 @@ void record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { } typedef pair inlineallocd_field_type_t; -vector _fieldpath_for_slot(jl_value_t *obj, jl_value_t *slot) { - jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); - - vector result; - bool found = _fieldpath_for_slot_helper(result, vt, obj, slot); - // NOTE THE RETURNED VECTOR IS REVERSED - return result; -} bool _fieldpath_for_slot_helper( vector& out, jl_datatype_t *objtype, @@ -278,7 +270,7 @@ bool _fieldpath_for_slot_helper( return true; } if (jl_stored_inline((jl_value_t*)field_type)) { - bool found = _fieldpath_for_slot_helper(out, field_name, field_type, fieldaddr, slot); + bool found = _fieldpath_for_slot_helper(out, field_type, fieldaddr, slot); if (found) { out.push_back(inlineallocd_field_type_t(field_type, field_name)); return true; @@ -288,6 +280,28 @@ bool _fieldpath_for_slot_helper( return false; } +vector _fieldpath_for_slot(jl_value_t *obj, jl_value_t *slot) { + jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); + + vector result; + bool found = _fieldpath_for_slot_helper(result, vt, obj, slot); + // TODO: maybe don't need the return value here actually...? + if (!found) { + // TODO: Debug these failures. Some of them seem really wrong, like with the slot + // being _kilobytes_ past the start of the object for an object with 1 pointer and 1 + // field... + // jl_printf(JL_STDERR, "WARNING: No fieldpath found for obj: %p slot: %p ", (void*)obj, (void*)slot); + // jl_datatype_t* type = (jl_datatype_t*)jl_typeof(obj); + // if (jl_is_datatype(type)) { + // jl_printf(JL_STDERR, "typeof: "); + // jl_static_show(JL_STDERR, (jl_value_t*)type); + // } + // jl_printf(JL_STDERR, "\n"); + } + // NOTE THE RETURNED VECTOR IS REVERSED + return result; +} + void _gc_heap_snapshot_record_root(jl_value_t *root, char *name) { record_node_to_gc_snapshot(root); @@ -341,12 +355,22 @@ void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size // Build the new field name by joining the strings, and/or use the struct + field names // to create a bunch of edges + nodes // (iterate the vector in reverse - the last element is the first path) + // TODO: Prefer to create intermediate edges and nodes instead of a combined string path. + if (field_paths.size() > 1) { + jl_printf(JL_STDERR, "count: %lu\n", field_paths.size()); + } + + string path; for (auto it = field_paths.rbegin(); it != field_paths.rend(); ++it) { // ... + path += it->second; + if ( it + 1 != field_paths.rend() ) { + path += "."; + } } _record_gc_edge("object", "property", from, to, - g_snapshot->names.find_or_create_string_id(field_name)); + g_snapshot->names.find_or_create_string_id(path)); } void _gc_heap_snapshot_record_internal_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT { // TODO: probably need to inline this here and make some changes From ef9b34bb0d1ad1cf94dcf4329476dc01817ceb94 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Wed, 13 Oct 2021 12:24:18 -0400 Subject: [PATCH 04/11] Add debug logging for objects in `Main` module --- src/gc-heap-snapshot.cpp | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 9076377aa15bf..bbccad281cea7 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -254,21 +254,31 @@ void record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { typedef pair inlineallocd_field_type_t; +static bool debug_log = false; + bool _fieldpath_for_slot_helper( vector& out, jl_datatype_t *objtype, void *obj, jl_value_t *slot) { int nf = (int)jl_datatype_nfields(objtype); jl_svec_t *field_names = jl_field_names(objtype); + if (debug_log) { + jl_((jl_value_t*)objtype); + jl_printf(JL_STDERR, "obj: %p, slot: %p, nf: %d\n", obj, (void*)slot, nf); + } for (int i = 0; i < nf; i++) { jl_datatype_t *field_type = (jl_datatype_t*)jl_field_type(objtype, i); void *fieldaddr = (char*)obj + jl_field_offset(objtype, i); jl_sym_t *name = (jl_sym_t*)jl_svecref(field_names, i); const char *field_name = jl_symbol_name(name); + if (debug_log) { + jl_printf(JL_STDERR, "%d - field_name: %s fieldaddr: %p\n", i, field_name, fieldaddr); + } if (fieldaddr >= slot) { out.push_back(inlineallocd_field_type_t(objtype, field_name)); return true; } + // If the field is an inline-allocated struct if (jl_stored_inline((jl_value_t*)field_type)) { bool found = _fieldpath_for_slot_helper(out, field_type, fieldaddr, slot); if (found) { @@ -279,12 +289,24 @@ bool _fieldpath_for_slot_helper( } return false; } +JL_DLLEXPORT void jl_breakpoint(jl_value_t *v) +{ + // put a breakpoint in your debugger here +} + vector _fieldpath_for_slot(jl_value_t *obj, jl_value_t *slot) { jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); + if (vt->name->module == jl_main_module) { + // jl_breakpoint(obj); + debug_log = true; + } vector result; bool found = _fieldpath_for_slot_helper(result, vt, obj, slot); + + debug_log = false; + // TODO: maybe don't need the return value here actually...? if (!found) { // TODO: Debug these failures. Some of them seem really wrong, like with the slot From 5578921fb23408ec867f796d0f85c0ee582d3490 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Wed, 13 Oct 2021 12:34:23 -0400 Subject: [PATCH 05/11] Fix silly mistake in slot accounting :) It works now!! ``` - ga::Main.A @302073680 - b.y::Array{Any, 1} @302026704 ``` --- src/gc-heap-snapshot.cpp | 10 +++++----- src/gc-heap-snapshot.h | 6 +++--- src/gc.c | 9 +++------ 3 files changed, 11 insertions(+), 14 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index bbccad281cea7..b8e6e14e45f98 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -258,7 +258,7 @@ static bool debug_log = false; bool _fieldpath_for_slot_helper( vector& out, jl_datatype_t *objtype, - void *obj, jl_value_t *slot) + void *obj, void *slot) { int nf = (int)jl_datatype_nfields(objtype); jl_svec_t *field_names = jl_field_names(objtype); @@ -295,7 +295,7 @@ JL_DLLEXPORT void jl_breakpoint(jl_value_t *v) } -vector _fieldpath_for_slot(jl_value_t *obj, jl_value_t *slot) { +vector _fieldpath_for_slot(jl_value_t *obj, void *slot) { jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); if (vt->name->module == jl_main_module) { // jl_breakpoint(obj); @@ -355,12 +355,12 @@ void _gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, cha _record_gc_edge("object", "property", (jl_value_t *)from, to, g_snapshot->names.find_or_create_string_id(name)); } -void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size_t field_index) JL_NOTSAFEPOINT { +void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, void* slot) JL_NOTSAFEPOINT { jl_datatype_t *type = (jl_datatype_t*)jl_typeof(from); // TODO: It seems like NamedTuples should have field names? Maybe there's another way to get them? if (jl_is_tuple_type(type) || jl_is_namedtuple_type(type)) { // TODO: Maybe not okay to match element and object - _record_gc_edge("object", "element", from, to, field_index); + _record_gc_edge("object", "element", from, to, /* TODO */0); return; } // if (field_index < 0 || jl_datatype_nfields(type) <= field_index) { @@ -373,7 +373,7 @@ void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size // jl_svec_t *field_names = jl_field_names(type); // jl_sym_t *name = (jl_sym_t*)jl_svecref(field_names, field_index); // const char *field_name = jl_symbol_name(name); - auto field_paths = _fieldpath_for_slot(from, to); + auto field_paths = _fieldpath_for_slot(from, slot); // Build the new field name by joining the strings, and/or use the struct + field names // to create a bunch of edges + nodes // (iterate the vector in reverse - the last element is the first path) diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h index cfea57ed6b86c..6b1c6449737ab 100644 --- a/src/gc-heap-snapshot.h +++ b/src/gc-heap-snapshot.h @@ -17,7 +17,7 @@ extern "C" { void _gc_heap_snapshot_record_root(jl_value_t *root, char *name); void _gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT; void _gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, char *name) JL_NOTSAFEPOINT; -void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size_t field_index) JL_NOTSAFEPOINT; +void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, void* slot) JL_NOTSAFEPOINT; // Used for objects managed by GC, but which aren't exposed in the julia object, so have no // field or index. i.e. they're not reacahable from julia code, but we _will_ hit them in // the GC mark phase (so we can check their type tag to get the size). @@ -44,9 +44,9 @@ static inline void gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_val _gc_heap_snapshot_record_module_edge(from, to, name); } } -static inline void gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size_t field_index) JL_NOTSAFEPOINT { +static inline void gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, void* slot) JL_NOTSAFEPOINT { if (__unlikely(gc_heap_snapshot_enabled)) { - _gc_heap_snapshot_record_object_edge(from, to, field_index); + _gc_heap_snapshot_record_object_edge(from, to, slot); } } static inline void gc_heap_snapshot_record_internal_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT { diff --git a/src/gc.c b/src/gc.c index 9149ca02ccd5d..4f5b2a866edb4 100644 --- a/src/gc.c +++ b/src/gc.c @@ -1959,8 +1959,7 @@ STATIC_INLINE int gc_mark_scan_obj8(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, gc_mark if (*pnew_obj) { verify_parent2("object", parent, slot, "field(%d)", gc_slot_to_fieldidx(parent, slot)); - gc_heap_snapshot_record_object_edge(parent, *slot, - gc_slot_to_fieldidx(parent, slot)); + gc_heap_snapshot_record_object_edge(parent, *slot, slot); } if (!gc_try_setmark(*pnew_obj, &obj8->nptr, ptag, pbits)) continue; @@ -1996,8 +1995,7 @@ STATIC_INLINE int gc_mark_scan_obj16(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, gc_mar verify_parent2("object", parent, slot, "field(%d)", gc_slot_to_fieldidx(parent, slot)); // TODO: Should this be *parent? Given the way it's used above? - gc_heap_snapshot_record_object_edge(parent, *slot, - gc_slot_to_fieldidx(parent, slot)); + gc_heap_snapshot_record_object_edge(parent, *slot, slot); } if (!gc_try_setmark(*pnew_obj, &obj16->nptr, ptag, pbits)) continue; @@ -2032,8 +2030,7 @@ STATIC_INLINE int gc_mark_scan_obj32(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, gc_mar if (*pnew_obj) { verify_parent2("object", parent, slot, "field(%d)", gc_slot_to_fieldidx(parent, slot)); - gc_heap_snapshot_record_object_edge(parent, *slot, - gc_slot_to_fieldidx(parent, slot)); + gc_heap_snapshot_record_object_edge(parent, *slot, slot); } if (!gc_try_setmark(*pnew_obj, &obj32->nptr, ptag, pbits)) continue; From 6889aeb349d90e964a8262f9ccd6ba5cf4a79b11 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Thu, 14 Oct 2021 12:41:13 -0400 Subject: [PATCH 06/11] Clean up debug logs --- src/gc-heap-snapshot.cpp | 37 +++++++++---------------------------- 1 file changed, 9 insertions(+), 28 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index b8e6e14e45f98..fff19e1d9f7a1 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -289,17 +289,12 @@ bool _fieldpath_for_slot_helper( } return false; } -JL_DLLEXPORT void jl_breakpoint(jl_value_t *v) -{ - // put a breakpoint in your debugger here -} - vector _fieldpath_for_slot(jl_value_t *obj, void *slot) { jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); + // TODO(PR): Remove this debugging code if (vt->name->module == jl_main_module) { - // jl_breakpoint(obj); - debug_log = true; + // debug_log = true; } vector result; @@ -312,13 +307,13 @@ vector _fieldpath_for_slot(jl_value_t *obj, void *slo // TODO: Debug these failures. Some of them seem really wrong, like with the slot // being _kilobytes_ past the start of the object for an object with 1 pointer and 1 // field... - // jl_printf(JL_STDERR, "WARNING: No fieldpath found for obj: %p slot: %p ", (void*)obj, (void*)slot); - // jl_datatype_t* type = (jl_datatype_t*)jl_typeof(obj); - // if (jl_is_datatype(type)) { - // jl_printf(JL_STDERR, "typeof: "); - // jl_static_show(JL_STDERR, (jl_value_t*)type); - // } - // jl_printf(JL_STDERR, "\n"); + jl_printf(JL_STDERR, "WARNING: No fieldpath found for obj: %p slot: %p ", (void*)obj, (void*)slot); + jl_datatype_t* type = (jl_datatype_t*)jl_typeof(obj); + if (jl_is_datatype(type)) { + jl_printf(JL_STDERR, "typeof: "); + jl_static_show(JL_STDERR, (jl_value_t*)type); + } + jl_printf(JL_STDERR, "\n"); } // NOTE THE RETURNED VECTOR IS REVERSED return result; @@ -363,25 +358,11 @@ void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, void _record_gc_edge("object", "element", from, to, /* TODO */0); return; } - // if (field_index < 0 || jl_datatype_nfields(type) <= field_index) { - // // TODO: We're getting -1 in some cases - // //jl_printf(JL_STDERR, "WARNING - incorrect field index (%zu) for type\n", field_index); - // //jl_(type); - // _record_gc_edge("object", "element", from, to, field_index); - // return; - // } - // jl_svec_t *field_names = jl_field_names(type); - // jl_sym_t *name = (jl_sym_t*)jl_svecref(field_names, field_index); - // const char *field_name = jl_symbol_name(name); auto field_paths = _fieldpath_for_slot(from, slot); // Build the new field name by joining the strings, and/or use the struct + field names // to create a bunch of edges + nodes // (iterate the vector in reverse - the last element is the first path) // TODO: Prefer to create intermediate edges and nodes instead of a combined string path. - if (field_paths.size() > 1) { - jl_printf(JL_STDERR, "count: %lu\n", field_paths.size()); - } - string path; for (auto it = field_paths.rbegin(); it != field_paths.rend(); ++it) { // ... From c5a20e7bef5de09f27435d9a7924a16b56cff30b Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Thu, 14 Oct 2021 12:50:06 -0400 Subject: [PATCH 07/11] Fixups, comments, todos --- src/gc-debug.c | 3 ++- src/gc-heap-snapshot.cpp | 4 +++- src/gc.h | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/gc-debug.c b/src/gc-debug.c index 6dd8f1b2091c0..372ac844db530 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -1205,7 +1205,7 @@ void gc_count_pool(void) jl_safe_printf("************************\n"); } -JL_DLLEXPORT int gc_slot_to_fieldidx(void *obj, void *slot) JL_NOTSAFEPOINT +int gc_slot_to_fieldidx(void *obj, void *slot) JL_NOTSAFEPOINT { jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); int nf = (int)jl_datatype_nfields(vt); // what happens if you're inlined? lol @@ -1217,6 +1217,7 @@ JL_DLLEXPORT int gc_slot_to_fieldidx(void *obj, void *slot) JL_NOTSAFEPOINT } return -1; } + int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT { char *slot = (char*)_slot; diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 83e49ea68d9f9..91adcfce2afe4 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -419,7 +419,9 @@ void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, void // TODO: It seems like NamedTuples should have field names? Maybe there's another way to get them? if (jl_is_tuple_type(type) || jl_is_namedtuple_type(type)) { // TODO: Maybe not okay to match element and object - _record_gc_edge("object", "element", from, to, /* TODO */0); + _record_gc_edge("object", "element", from, to, + // TODO: Get the names for tuple elements + g_snapshot->names.find_or_create_string_id("")); return; } auto field_paths = _fieldpath_for_slot(from, slot); diff --git a/src/gc.h b/src/gc.h index 479a301f93d4f..e846bfb7f8897 100644 --- a/src/gc.h +++ b/src/gc.h @@ -642,7 +642,7 @@ extern int gc_verifying; #endif -JL_DLLEXPORT int gc_slot_to_fieldidx(void *_obj, void *slot) JL_NOTSAFEPOINT; +int gc_slot_to_fieldidx(void *_obj, void *slot) JL_NOTSAFEPOINT; int gc_slot_to_arrayidx(void *_obj, void *begin) JL_NOTSAFEPOINT; NOINLINE void gc_mark_loop_unwind(jl_ptls_t ptls, jl_gc_mark_sp_t sp, int pc_offset); From 90984d7dd2e08d151fe9479ed4ee4b7c26521145 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Thu, 14 Oct 2021 14:45:52 -0400 Subject: [PATCH 08/11] fix merge conflict --- src/gc-heap-snapshot.cpp | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 91adcfce2afe4..ed7129ce31fce 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -299,7 +299,7 @@ vector _fieldpath_for_slot(jl_value_t *obj, void *slo jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); // TODO(PR): Remove this debugging code if (vt->name->module == jl_main_module) { - // debug_log = true; + debug_log = true; } vector result; @@ -409,13 +409,6 @@ void _gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, cha void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, void* slot) JL_NOTSAFEPOINT { jl_datatype_t *type = (jl_datatype_t*)jl_typeof(from); - if (field_index < 0 || field_index > jl_datatype_nfields(type)) { - // TODO: We're getting -1 in some cases - jl_printf(JL_STDERR, "WARNING - incorrect field index (%d) for type\n", field_index); - jl_(type); - return; - } - // TODO: It seems like NamedTuples should have field names? Maybe there's another way to get them? if (jl_is_tuple_type(type) || jl_is_namedtuple_type(type)) { // TODO: Maybe not okay to match element and object From da82125c57475cb1e720c577b6d5cd43c17a5d9c Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Thu, 14 Oct 2021 15:06:28 -0400 Subject: [PATCH 09/11] Fix Tuple types in fieldpath_for_slot --- src/gc-heap-snapshot.cpp | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index ed7129ce31fce..2ced98ff1f6de 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -14,6 +15,7 @@ using std::vector; using std::string; +using std::ostringstream; using std::pair; using std::unordered_map; using std::unordered_set; @@ -256,7 +258,7 @@ size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { return node_idx; } -typedef pair inlineallocd_field_type_t; +typedef pair inlineallocd_field_type_t; // TODO(PR): remove this static bool debug_log = false; @@ -274,10 +276,19 @@ bool _fieldpath_for_slot_helper( for (int i = 0; i < nf; i++) { jl_datatype_t *field_type = (jl_datatype_t*)jl_field_type(objtype, i); void *fieldaddr = (char*)obj + jl_field_offset(objtype, i); - jl_sym_t *name = (jl_sym_t*)jl_svecref(field_names, i); - const char *field_name = jl_symbol_name(name); + ostringstream ss; // NOTE: must have same scope as field_name, below. + string field_name; + // TODO: NamedTuples should maybe have field names? Maybe another way to get them? + if (jl_is_tuple_type(objtype) || jl_is_namedtuple_type(objtype)) { + jl_printf(JL_STDERR, "HERE\n"); + ss << "[" << i << "]"; + field_name = ss.str().c_str(); // See scope comment, above. + } else { + jl_sym_t *name = (jl_sym_t*)jl_svecref(field_names, i); + field_name = jl_symbol_name(name); + } if (debug_log) { - jl_printf(JL_STDERR, "%d - field_name: %s fieldaddr: %p\n", i, field_name, fieldaddr); + jl_printf(JL_STDERR, "%d - field_name: %s fieldaddr: %p\n", i, field_name.c_str(), fieldaddr); } if (fieldaddr >= slot) { out.push_back(inlineallocd_field_type_t(objtype, field_name)); @@ -299,7 +310,7 @@ vector _fieldpath_for_slot(jl_value_t *obj, void *slo jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); // TODO(PR): Remove this debugging code if (vt->name->module == jl_main_module) { - debug_log = true; + // debug_log = true; } vector result; @@ -409,14 +420,6 @@ void _gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, cha void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, void* slot) JL_NOTSAFEPOINT { jl_datatype_t *type = (jl_datatype_t*)jl_typeof(from); - // TODO: It seems like NamedTuples should have field names? Maybe there's another way to get them? - if (jl_is_tuple_type(type) || jl_is_namedtuple_type(type)) { - // TODO: Maybe not okay to match element and object - _record_gc_edge("object", "element", from, to, - // TODO: Get the names for tuple elements - g_snapshot->names.find_or_create_string_id("")); - return; - } auto field_paths = _fieldpath_for_slot(from, slot); // Build the new field name by joining the strings, and/or use the struct + field names // to create a bunch of edges + nodes From 3fbd301370fb586120c6f8c456583a017c408e4a Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Thu, 14 Oct 2021 15:09:23 -0400 Subject: [PATCH 10/11] Add TODO about overrunning type printing buffer --- src/gc-heap-snapshot.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 2ced98ff1f6de..4faa6be07f7ec 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -223,6 +223,7 @@ size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { : (size_t)jl_datatype_size(type); // print full type + // TODO: We _definitely_ have types longer than 1024 bytes.... ios_t str_; ios_mem(&str_, 1024); JL_STREAM* str = (JL_STREAM*)&str_; From 063b2860dc75541c81b859f7df139ab2fedec8af Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Thu, 14 Oct 2021 15:15:33 -0400 Subject: [PATCH 11/11] remove log --- src/gc-heap-snapshot.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 4faa6be07f7ec..979090191b945 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -281,7 +281,6 @@ bool _fieldpath_for_slot_helper( string field_name; // TODO: NamedTuples should maybe have field names? Maybe another way to get them? if (jl_is_tuple_type(objtype) || jl_is_namedtuple_type(objtype)) { - jl_printf(JL_STDERR, "HERE\n"); ss << "[" << i << "]"; field_name = ss.str().c_str(); // See scope comment, above. } else {