From 44a74eab6e9d1c4f88c5e8cedde4028381226572 Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Thu, 16 Sep 2021 15:57:47 -0400 Subject: [PATCH 001/106] add empty files --- src/gc-heap-snapshot.cpp | 0 src/gc-heap-snapshot.h | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/gc-heap-snapshot.cpp create mode 100644 src/gc-heap-snapshot.h diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h new file mode 100644 index 0000000000000..e69de29bb2d1d From e207adfea19b6e90d361e6e256fb586c3194f978 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Thu, 16 Sep 2021 16:08:57 -0400 Subject: [PATCH 002/106] Initial dummy implementation --- src/gc-heap-snapshot.cpp | 16 ++++++++++++++++ src/gc-heap-snapshot.h | 27 +++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index e69de29bb2d1d..49efbdc695dc2 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -0,0 +1,16 @@ +#include "gc-heap-snapshot.h" + +JL_DLLEXPORT void take_gc_snapshot() { + jl_printf("HELLO\n"); + // Create the snapshot object + + // Enable GC Snapshotting + + // Do GC + // - which will callback into record_edge_to_gc_snapshot()... + + // When we return, the snapshot is full + // Disable snapshotting + + // Dump the snapshot +} diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h index e69de29bb2d1d..cf0da76506c80 100644 --- a/src/gc-heap-snapshot.h +++ b/src/gc-heap-snapshot.h @@ -0,0 +1,27 @@ +#ifndef JL_GC_HEAP_SNAPSHOT_H +#define JL_GC_HEAP_SNAPSHOT_H + +#include "julia.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// --------------------------------------------------------------------- +// Functions to call from GC when heap snapshot is enabled +// --------------------------------------------------------------------- +void record_edge_to_gc_snapshot(jl_value_t *a, jl_value_t *b); + +// --------------------------------------------------------------------- +// Functions to call from Julia to start heap snapshot +// --------------------------------------------------------------------- +// ... +JL_DLLEXPORT void take_gc_snapshot(); + + +#ifdef __cplusplus +} +#endif + + +#endif // JL_GC_HEAP_SNAPSHOT_H From 8e0c399063fdadad5ba6bd02fb5a744c922debb0 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Thu, 16 Sep 2021 16:14:07 -0400 Subject: [PATCH 003/106] dummy print --- src/gc-heap-snapshot.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 49efbdc695dc2..c732e03e651c7 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -1,7 +1,7 @@ #include "gc-heap-snapshot.h" JL_DLLEXPORT void take_gc_snapshot() { - jl_printf("HELLO\n"); + jl_printf(JL_STDERR, "%s\n", "HELLO"); // Create the snapshot object // Enable GC Snapshotting @@ -14,3 +14,7 @@ JL_DLLEXPORT void take_gc_snapshot() { // Dump the snapshot } + +void record_edge_to_gc_snapshot(jl_value_t *a, jl_value_t *b) { + jl_printf(JL_STDERR, "edge: %p -> %p\n", a, b); +} From 2f591beeadb3567594577de4709c856dc639454d Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Thu, 16 Sep 2021 16:15:25 -0400 Subject: [PATCH 004/106] make dummy callable --- src/gc-heap-snapshot.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index c732e03e651c7..d95e1b13371e7 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -15,6 +15,7 @@ JL_DLLEXPORT void take_gc_snapshot() { // Dump the snapshot } -void record_edge_to_gc_snapshot(jl_value_t *a, jl_value_t *b) { +// TODO: remove JL_DLLEXPORT +JL_DLLEXPORT void record_edge_to_gc_snapshot(jl_value_t *a, jl_value_t *b) { jl_printf(JL_STDERR, "edge: %p -> %p\n", a, b); } From ae84768e285991246542ef82191cfe87088ebf41 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Thu, 16 Sep 2021 16:26:48 -0400 Subject: [PATCH 005/106] Add us to the makefile --- src/Makefile | 2 +- src/gc-heap-snapshot.cpp | 46 ++++++++++++++++++++++++++++++++++++++++ src/gc-heap-snapshot.h | 3 ++- 3 files changed, 49 insertions(+), 2 deletions(-) diff --git a/src/Makefile b/src/Makefile index 3646f14c75ff5..ba1b65b3fe363 100644 --- a/src/Makefile +++ b/src/Makefile @@ -45,7 +45,7 @@ RUNTIME_SRCS := \ jltypes gf typemap smallintset ast builtins module interpreter symbol \ dlload sys init task array dump staticdata toplevel jl_uv datatype \ simplevector runtime_intrinsics precompile \ - threading partr stackwalk gc gc-debug gc-pages gc-stacks method \ + threading partr stackwalk gc gc-debug gc-heap-snapshot gc-pages gc-stacks method \ jlapi signal-handling safepoint timing subtype \ crc32c APInt-C processor ircode opaque_closure codegen-stubs coverage SRCS := jloptions runtime_ccall rtutils diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index d95e1b13371e7..3232198bebe6d 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -1,5 +1,51 @@ #include "gc-heap-snapshot.h" +#include +#include + +using std::vector; +using std::string; + + + +// Dump format: +// Nodes +// "node_fields": +// [ "type", "name", "id", "self_size", "edge_count", "trace_node_id", "detachedness" ] + +// Edges +// "edge_fields": +// [ "type", "name_or_index", "to_node" ] + +struct Node { + string type; + string name; + size_t id; + size_t self_size; + int edge_count; + size_t trace_node_id; + // whether the node is attached or dettached from the main application state + // TODO: .... meaning not yet understood. + // https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/include/v8-profiler.h#L739-L745 + int detachedness; // 0 - unknown, 1 - attached; 2 - detached +}; +struct Edge { +}; + +class HeapSnapshot { +public: + +private: + vector nodes; + vector edges; +}; + + + + + + + JL_DLLEXPORT void take_gc_snapshot() { jl_printf(JL_STDERR, "%s\n", "HELLO"); // Create the snapshot object diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h index cf0da76506c80..9941f682e5ffd 100644 --- a/src/gc-heap-snapshot.h +++ b/src/gc-heap-snapshot.h @@ -10,7 +10,8 @@ extern "C" { // --------------------------------------------------------------------- // Functions to call from GC when heap snapshot is enabled // --------------------------------------------------------------------- -void record_edge_to_gc_snapshot(jl_value_t *a, jl_value_t *b); +// TODO: remove JL_DLLEXPORT +JL_DLLEXPORT void record_edge_to_gc_snapshot(jl_value_t *a, jl_value_t *b); // --------------------------------------------------------------------- // Functions to call from Julia to start heap snapshot From 9084709bd0da3218a6779a1538da25d414de130b Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Thu, 16 Sep 2021 16:49:41 -0400 Subject: [PATCH 006/106] set up global heap snapshot and serializer function --- src/gc-heap-snapshot.cpp | 21 ++++++++++++++++----- src/gc-heap-snapshot.h | 4 ++++ 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 3232198bebe6d..70cd8a043c1f8 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -40,15 +40,13 @@ class HeapSnapshot { vector edges; }; - - - - - +HeapSnapshot *g_snapshot = 0; JL_DLLEXPORT void take_gc_snapshot() { jl_printf(JL_STDERR, "%s\n", "HELLO"); // Create the snapshot object + HeapSnapshot snapshot; + g_snapshot = &snapshot; // Enable GC Snapshotting @@ -59,9 +57,22 @@ JL_DLLEXPORT void take_gc_snapshot() { // Disable snapshotting // Dump the snapshot + serialize_heap_snapshot(JL_STDERR, &snapshot); + g_snapshot = 0; } // TODO: remove JL_DLLEXPORT JL_DLLEXPORT void record_edge_to_gc_snapshot(jl_value_t *a, jl_value_t *b) { + // check if snapshot is 0 + jl_printf(JL_STDERR, "edge: %p -> %p\n", a, b); } + +void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot *snapshot) { + jl_printf(stream, "{"); + + // ... + // ... + + jl_printf(stream, "}"); +} diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h index 9941f682e5ffd..31de6e3bba8a5 100644 --- a/src/gc-heap-snapshot.h +++ b/src/gc-heap-snapshot.h @@ -7,6 +7,10 @@ extern "C" { #endif +class HeapSnapshot; + +void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot *snapshot); + // --------------------------------------------------------------------- // Functions to call from GC when heap snapshot is enabled // --------------------------------------------------------------------- From 7b9e929188479e61098e5ff9e842c1eb35d97247 Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Thu, 16 Sep 2021 17:34:25 -0400 Subject: [PATCH 007/106] it builds! --- src/gc-heap-snapshot.cpp | 83 ++++++++++++++++++++++++++++++++++------ 1 file changed, 71 insertions(+), 12 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 70cd8a043c1f8..389520b14f232 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -2,21 +2,20 @@ #include #include +#include using std::vector; using std::string; +using std::unordered_map; - +struct HeapSnapshot; +void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot); // Dump format: // Nodes // "node_fields": // [ "type", "name", "id", "self_size", "edge_count", "trace_node_id", "detachedness" ] -// Edges -// "edge_fields": -// [ "type", "name_or_index", "to_node" ] - struct Node { string type; string name; @@ -29,21 +28,43 @@ struct Node { // https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/include/v8-profiler.h#L739-L745 int detachedness; // 0 - unknown, 1 - attached; 2 - detached }; + +// Edges +// "edge_fields": +// [ "type", "name_or_index", "to_node" ] + struct Edge { + string type; + size_t name_or_index; // essentially 'from' + size_t to_node; }; +typedef unordered_map MapType; + class HeapSnapshot { public: -private: +// private: vector nodes; vector edges; + + MapType names; }; + +size_t find_or_create_string_id(HeapSnapshot& snapshot, string key) { + auto &names = snapshot.names; + + auto val = names.find(key); + if (val == names.end()) { + val = names.insert(val, {key, names.size()}); + } + return val->second; +} + HeapSnapshot *g_snapshot = 0; JL_DLLEXPORT void take_gc_snapshot() { - jl_printf(JL_STDERR, "%s\n", "HELLO"); // Create the snapshot object HeapSnapshot snapshot; g_snapshot = &snapshot; @@ -57,22 +78,60 @@ JL_DLLEXPORT void take_gc_snapshot() { // Disable snapshotting // Dump the snapshot - serialize_heap_snapshot(JL_STDERR, &snapshot); + serialize_heap_snapshot(JL_STDERR, snapshot); g_snapshot = 0; } // TODO: remove JL_DLLEXPORT JL_DLLEXPORT void record_edge_to_gc_snapshot(jl_value_t *a, jl_value_t *b) { - // check if snapshot is 0 + if (!g_snapshot) { + return; + } + + g_snapshot->edges.push_back(Edge{"", (size_t)a, (size_t)b}); jl_printf(JL_STDERR, "edge: %p -> %p\n", a, b); } -void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot *snapshot) { +void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot) { jl_printf(stream, "{"); - // ... - // ... + jl_printf(stream, "\"nodes\":["); + bool first_node = true; + for (const auto &node : snapshot.nodes) { + if (!first_node) { + jl_printf(stream, ","); + first_node = false; + } + // ["type","name","id","self_size","edge_count","trace_node_id","detachedness"] + jl_printf(stream, "%d", find_or_create_string_id(snapshot, node.type)); // type + jl_printf(stream, ",%d", find_or_create_string_id(snapshot, node.name)); // name + jl_printf(stream, ",%d", 0);//XXX); // id + jl_printf(stream, ",%d", 0);//XXX); // self_size + jl_printf(stream, ",%d", 0);//XXX); // edge_count + jl_printf(stream, ",%d", 0);//XXX); // trace_node_id + jl_printf(stream, ",%d", 0);//XXX); // detachedness + jl_printf(stream, "\n"); + } + jl_printf(stream, "],\n"); + + jl_printf(stream, "\"edges\":["); + bool first_edge = true; + for (const auto &edge : snapshot.edges) { + if (!first_edge) { + jl_printf(stream, ","); + first_edge = false; + } + // edge type + jl_printf(stream, ",%d", find_or_create_string_id(snapshot, edge.type)); + // edge from + jl_printf(stream, ",%d", edge.name_or_index); + // edge to + // TODO: don't print comma after the last + jl_printf(stream, ",%d", edge.to_node); + jl_printf(stream, "\n"); + } + jl_printf(stream, "]"); jl_printf(stream, "}"); } From 26f15c2d66371732aa6e45da45f1227e6c372993 Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Thu, 16 Sep 2021 17:44:34 -0400 Subject: [PATCH 008/106] fix commas --- src/gc-heap-snapshot.cpp | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 389520b14f232..ac18bbd523fe0 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -62,12 +62,14 @@ size_t find_or_create_string_id(HeapSnapshot& snapshot, string key) { return val->second; } -HeapSnapshot *g_snapshot = 0; +HeapSnapshot *g_snapshot = nullptr; JL_DLLEXPORT void take_gc_snapshot() { // Create the snapshot object - HeapSnapshot snapshot; - g_snapshot = &snapshot; + //HeapSnapshot snapshot; + //g_snapshot = &snapshot; + if (!g_snapshot) + g_snapshot = new HeapSnapshot(); // Enable GC Snapshotting @@ -78,8 +80,9 @@ JL_DLLEXPORT void take_gc_snapshot() { // Disable snapshotting // Dump the snapshot - serialize_heap_snapshot(JL_STDERR, snapshot); - g_snapshot = 0; + serialize_heap_snapshot(JL_STDERR, *g_snapshot); + // TODO(PR): Put this back, but disabled for debugging + //g_snapshot = nullptr; } // TODO: remove JL_DLLEXPORT @@ -99,9 +102,10 @@ void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot) { jl_printf(stream, "\"nodes\":["); bool first_node = true; for (const auto &node : snapshot.nodes) { - if (!first_node) { - jl_printf(stream, ","); + if (first_node) { first_node = false; + } else { + jl_printf(stream, ","); } // ["type","name","id","self_size","edge_count","trace_node_id","detachedness"] jl_printf(stream, "%d", find_or_create_string_id(snapshot, node.type)); // type @@ -118,12 +122,13 @@ void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot) { jl_printf(stream, "\"edges\":["); bool first_edge = true; for (const auto &edge : snapshot.edges) { - if (!first_edge) { - jl_printf(stream, ","); + if (first_edge) { first_edge = false; + } else { + jl_printf(stream, ","); } // edge type - jl_printf(stream, ",%d", find_or_create_string_id(snapshot, edge.type)); + jl_printf(stream, "%d", find_or_create_string_id(snapshot, edge.type)); // edge from jl_printf(stream, ",%d", edge.name_or_index); // edge to From e3713bb81df71ee860a9c9f13d3dc3a7f8f33647 Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Thu, 16 Sep 2021 18:11:16 -0400 Subject: [PATCH 009/106] add nodes; print addresses correctly --- src/gc-heap-snapshot.cpp | 76 +++++++++++++++++++++++++++++----------- 1 file changed, 55 insertions(+), 21 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index ac18bbd523fe0..a5495b62af3af 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -3,10 +3,12 @@ #include #include #include +#include using std::vector; using std::string; using std::unordered_map; +using std::unordered_set; struct HeapSnapshot; void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot); @@ -49,18 +51,27 @@ class HeapSnapshot { vector edges; MapType names; + unordered_set seen_node_ids; }; +template +auto find_or_insert_iter(unordered_map& map, const K &key) { + auto val = map.find(key); + if (val == map.end()) { + val = map.insert(val, {key, map.size()}); + } + return val; +} size_t find_or_create_string_id(HeapSnapshot& snapshot, string key) { auto &names = snapshot.names; - auto val = names.find(key); - if (val == names.end()) { - val = names.insert(val, {key, names.size()}); - } - return val->second; + return find_or_insert_iter(names, key)->second; } +// TODO: Do we need to refer to nodes by their index in the node array? +//size_t find_or_create_node_id(HeapSnapshot& snapshot, string key) { +// return find_or_insert_iter(snapshot.nodes_map, key)->second; +//} HeapSnapshot *g_snapshot = nullptr; @@ -85,19 +96,45 @@ JL_DLLEXPORT void take_gc_snapshot() { //g_snapshot = nullptr; } +JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { + auto val = g_snapshot->seen_node_ids.find((size_t)a); + if (val != g_snapshot->seen_node_ids.end()) { + return; + } + // Insert a new Node + g_snapshot->seen_node_ids.insert(val, (size_t)a); + + Node node{ + "", // string type; + "", // string name; + (size_t)a, // size_t id; + 0, // size_t self_size; + 0, // int edge_count; + 0, // size_t trace_node_id; + 0 // int detachedness; // 0 - unknown, 1 - attached; 2 - detached + }; + g_snapshot->nodes.push_back(node); +} + // TODO: remove JL_DLLEXPORT JL_DLLEXPORT void record_edge_to_gc_snapshot(jl_value_t *a, jl_value_t *b) { if (!g_snapshot) { return; } + record_node_to_gc_snapshot(a); + record_node_to_gc_snapshot(b); g_snapshot->edges.push_back(Edge{"", (size_t)a, (size_t)b}); jl_printf(JL_STDERR, "edge: %p -> %p\n", a, b); } void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot) { - jl_printf(stream, "{"); + jl_printf(stream, "{\"snapshot\":{"); + jl_printf(stream, "\"meta\":{"); + jl_printf(stream, "\"node_fields\":[\"type\",\"name\",\"id\",\"self_size\",\"edge_count\",\"trace_node_id\",\"detachedness\"]"); + // jl_printf(stream, "\"node_types\":XXX"); + jl_printf(stream, "},\n"); // end "meta" jl_printf(stream, "\"nodes\":["); bool first_node = true; @@ -108,13 +145,13 @@ void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot) { jl_printf(stream, ","); } // ["type","name","id","self_size","edge_count","trace_node_id","detachedness"] - jl_printf(stream, "%d", find_or_create_string_id(snapshot, node.type)); // type - jl_printf(stream, ",%d", find_or_create_string_id(snapshot, node.name)); // name - jl_printf(stream, ",%d", 0);//XXX); // id - jl_printf(stream, ",%d", 0);//XXX); // self_size - jl_printf(stream, ",%d", 0);//XXX); // edge_count - jl_printf(stream, ",%d", 0);//XXX); // trace_node_id - jl_printf(stream, ",%d", 0);//XXX); // detachedness + jl_printf(stream, "%zu", find_or_create_string_id(snapshot, node.type)); + jl_printf(stream, ",%zu", find_or_create_string_id(snapshot, node.name)); + jl_printf(stream, ",%zu", node.id); + jl_printf(stream, ",%zu", 0);//XXX); // self_size + jl_printf(stream, ",%zu", 0);//XXX); // edge_count + jl_printf(stream, ",%zu", 0);//XXX); // trace_node_id + jl_printf(stream, ",%zu", 0);//XXX); // detachedness jl_printf(stream, "\n"); } jl_printf(stream, "],\n"); @@ -127,16 +164,13 @@ void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot) { } else { jl_printf(stream, ","); } - // edge type - jl_printf(stream, "%d", find_or_create_string_id(snapshot, edge.type)); - // edge from - jl_printf(stream, ",%d", edge.name_or_index); - // edge to - // TODO: don't print comma after the last - jl_printf(stream, ",%d", edge.to_node); + jl_printf(stream, "%zu", find_or_create_string_id(snapshot, edge.type)); + jl_printf(stream, ",%zu", edge.name_or_index); + jl_printf(stream, ",%zu", edge.to_node); jl_printf(stream, "\n"); } - jl_printf(stream, "]"); + jl_printf(stream, "]"); // end "edges" + jl_printf(stream, "}"); // end "snapshot" jl_printf(stream, "}"); } From 208a5ea42752db5d3883d60018b09844627c1922 Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Thu, 16 Sep 2021 18:39:10 -0400 Subject: [PATCH 010/106] fill out metadata header --- src/gc-heap-snapshot.cpp | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index a5495b62af3af..b9bf9fb32dc59 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -124,16 +124,28 @@ JL_DLLEXPORT void record_edge_to_gc_snapshot(jl_value_t *a, jl_value_t *b) { record_node_to_gc_snapshot(a); record_node_to_gc_snapshot(b); - g_snapshot->edges.push_back(Edge{"", (size_t)a, (size_t)b}); + g_snapshot->edges.push_back(Edge{"property", (size_t)a, (size_t)b}); jl_printf(JL_STDERR, "edge: %p -> %p\n", a, b); } void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot) { + // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2567-L2567 jl_printf(stream, "{\"snapshot\":{"); jl_printf(stream, "\"meta\":{"); - jl_printf(stream, "\"node_fields\":[\"type\",\"name\",\"id\",\"self_size\",\"edge_count\",\"trace_node_id\",\"detachedness\"]"); - // jl_printf(stream, "\"node_types\":XXX"); + jl_printf(stream, "\"node_fields\":[\"type\",\"name\",\"id\",\"self_size\",\"edge_count\",\"trace_node_id\",\"detachedness\"],"); + jl_printf(stream, "\"node_types\":[["); + // TODO: print string table + jl_printf(stream, "], \"string\", \"number\", \"number\", \"number\", \"number\", \"number\"]"); + jl_printf(stream, "\"edge_fields\":[\"type\",\"name_or_index\",\"to_node\"],"); + jl_printf(stream, "\"edge_types\":[[\"property\"],\"string_or_number\",\"node\"],"); + jl_printf(stream, "\"trace_function_info_fields\":[],"); + jl_printf(stream, "\"trace_node_fields\":[],"); + jl_printf(stream, "\"sample_fields\":[],"); + jl_printf(stream, "\"location_fields\":[],"); + jl_printf(stream, "\"node_count\":%zu,", snapshot.nodes.size()); + jl_printf(stream, "\"edge_count\":%zu,", snapshot.edges.size()); + jl_printf(stream, "\"trace_function_count\":0"); jl_printf(stream, "},\n"); // end "meta" jl_printf(stream, "\"nodes\":["); From ec48cffe3e9ef85262ecba95256996adc41efd2b Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Thu, 16 Sep 2021 19:00:40 -0400 Subject: [PATCH 011/106] factor out string table --- src/gc-heap-snapshot.cpp | 86 +++++++++++++++++++++++++++++----------- 1 file changed, 62 insertions(+), 24 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index b9bf9fb32dc59..4ff1c70360d70 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -41,33 +41,65 @@ struct Edge { size_t to_node; }; -typedef unordered_map MapType; +//template +//auto find_or_insert_iter(unordered_map& map, const K &key) { +//} + +struct StringTable { + typedef unordered_map MapType; + + MapType map; + vector strings; + + StringTable() {} + StringTable(std::initializer_list strs) : strings(strs) { + for (const auto& str : strs) { + map.insert({str, map.size()}); + } + } + + size_t find_or_create_string_id(string key) { + auto val = map.find(key); + if (val == map.end()) { + val = map.insert(val, {key, map.size()}); + strings.push_back(key); + } + return val->second; + } + + void print_json_array(JL_STREAM *stream, bool newlines) { + jl_printf(stream, "["); + bool first = true; + for (const auto &str : strings) { + if (first) { + first = false; + } else { + jl_printf(stream, ","); + if (newlines) { + jl_printf(stream, "\n"); + } + } + jl_printf(stream, "\"%s\"", str.c_str()); + } + jl_printf(stream, "]"); + } +}; -class HeapSnapshot { +struct HeapSnapshot { public: // private: vector nodes; + vector edges; - MapType names; + StringTable names; + StringTable node_types = {"node_type1", "node_type2"}; + StringTable edge_types = {"edge_type1", "edge_type2"}; unordered_set seen_node_ids; }; -template -auto find_or_insert_iter(unordered_map& map, const K &key) { - auto val = map.find(key); - if (val == map.end()) { - val = map.insert(val, {key, map.size()}); - } - return val; -} -size_t find_or_create_string_id(HeapSnapshot& snapshot, string key) { - auto &names = snapshot.names; - - return find_or_insert_iter(names, key)->second; -} // TODO: Do we need to refer to nodes by their index in the node array? //size_t find_or_create_node_id(HeapSnapshot& snapshot, string key) { // return find_or_insert_iter(snapshot.nodes_map, key)->second; @@ -134,11 +166,13 @@ void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot) { jl_printf(stream, "{\"snapshot\":{"); jl_printf(stream, "\"meta\":{"); jl_printf(stream, "\"node_fields\":[\"type\",\"name\",\"id\",\"self_size\",\"edge_count\",\"trace_node_id\",\"detachedness\"],"); - jl_printf(stream, "\"node_types\":[["); - // TODO: print string table - jl_printf(stream, "], \"string\", \"number\", \"number\", \"number\", \"number\", \"number\"]"); + jl_printf(stream, "\"node_types\":["); + snapshot.node_types.print_json_array(stream, false); + jl_printf(stream, ",\"string\", \"number\", \"number\", \"number\", \"number\", \"number\"]"); jl_printf(stream, "\"edge_fields\":[\"type\",\"name_or_index\",\"to_node\"],"); - jl_printf(stream, "\"edge_types\":[[\"property\"],\"string_or_number\",\"node\"],"); + jl_printf(stream, "\"edge_types\":["); + snapshot.edge_types.print_json_array(stream, false); + jl_printf(stream, "\"string_or_number\",\"node\"],"); jl_printf(stream, "\"trace_function_info_fields\":[],"); jl_printf(stream, "\"trace_node_fields\":[],"); jl_printf(stream, "\"sample_fields\":[],"); @@ -157,8 +191,8 @@ void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot) { jl_printf(stream, ","); } // ["type","name","id","self_size","edge_count","trace_node_id","detachedness"] - jl_printf(stream, "%zu", find_or_create_string_id(snapshot, node.type)); - jl_printf(stream, ",%zu", find_or_create_string_id(snapshot, node.name)); + jl_printf(stream, "%zu", snapshot.names.find_or_create_string_id(node.type)); + jl_printf(stream, ",%zu", snapshot.names.find_or_create_string_id(node.name)); jl_printf(stream, ",%zu", node.id); jl_printf(stream, ",%zu", 0);//XXX); // self_size jl_printf(stream, ",%zu", 0);//XXX); // edge_count @@ -176,12 +210,16 @@ void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot) { } else { jl_printf(stream, ","); } - jl_printf(stream, "%zu", find_or_create_string_id(snapshot, edge.type)); + jl_printf(stream, "%zu", snapshot.names.find_or_create_string_id(edge.type)); jl_printf(stream, ",%zu", edge.name_or_index); jl_printf(stream, ",%zu", edge.to_node); jl_printf(stream, "\n"); } - jl_printf(stream, "]"); // end "edges" + jl_printf(stream, "],\n"); // end "edges" + + jl_printf(stream, "\"strings\":"); + + snapshot.names.print_json_array(stream, true); jl_printf(stream, "}"); // end "snapshot" jl_printf(stream, "}"); From 6e78dadae235cb8407d193d63fa53c7301d9a596 Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Thu, 16 Sep 2021 19:01:17 -0400 Subject: [PATCH 012/106] Remove stderr println in record_edge --- src/gc-heap-snapshot.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 4ff1c70360d70..2cd2920dbebfb 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -157,8 +157,6 @@ JL_DLLEXPORT void record_edge_to_gc_snapshot(jl_value_t *a, jl_value_t *b) { record_node_to_gc_snapshot(a); record_node_to_gc_snapshot(b); g_snapshot->edges.push_back(Edge{"property", (size_t)a, (size_t)b}); - - jl_printf(JL_STDERR, "edge: %p -> %p\n", a, b); } void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot) { From 5fcdd6d58e5f927334a5650338fb65c9fecc950e Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Thu, 16 Sep 2021 18:31:31 -0400 Subject: [PATCH 013/106] Plug the snapsshotter into GC fo real do --- src/gc-heap-snapshot.h | 6 +++--- src/gc.c | 18 +++++++++--------- src/gc.h | 13 +++++++++++++ 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h index 31de6e3bba8a5..172344b67ec8a 100644 --- a/src/gc-heap-snapshot.h +++ b/src/gc-heap-snapshot.h @@ -7,9 +7,9 @@ extern "C" { #endif -class HeapSnapshot; +struct HeapSnapshot; -void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot *snapshot); +void serialize_heap_snapshot(JL_STREAM *stream, struct HeapSnapshot *snapshot); // --------------------------------------------------------------------- // Functions to call from GC when heap snapshot is enabled @@ -21,7 +21,7 @@ JL_DLLEXPORT void record_edge_to_gc_snapshot(jl_value_t *a, jl_value_t *b); // Functions to call from Julia to start heap snapshot // --------------------------------------------------------------------- // ... -JL_DLLEXPORT void take_gc_snapshot(); +JL_DLLEXPORT void take_gc_snapshot(void); #ifdef __cplusplus diff --git a/src/gc.c b/src/gc.c index 577ac5839eb87..7847de114bbad 100644 --- a/src/gc.c +++ b/src/gc.c @@ -1818,7 +1818,7 @@ STATIC_INLINE int gc_mark_scan_objarray(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, for (; begin < end; begin += objary->step) { *pnew_obj = *begin; if (*pnew_obj) - verify_parent2("obj array", objary->parent, begin, "elem(%d)", + gc_debug_edge2("obj array", objary->parent, begin, "elem(%d)", gc_slot_to_arrayidx(objary->parent, begin)); if (!gc_try_setmark(*pnew_obj, &objary->nptr, ptag, pbits)) continue; @@ -1854,7 +1854,7 @@ STATIC_INLINE int gc_mark_scan_array8(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, jl_value_t **slot = &begin[*elem_begin]; *pnew_obj = *slot; if (*pnew_obj) - verify_parent2("array", ary8->elem.parent, slot, "elem(%d)", + gc_debug_edge2("array", ary8->elem.parent, slot, "elem(%d)", gc_slot_to_arrayidx(ary8->elem.parent, begin)); if (!gc_try_setmark(*pnew_obj, &ary8->elem.nptr, ptag, pbits)) continue; @@ -1902,7 +1902,7 @@ STATIC_INLINE int gc_mark_scan_array16(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, jl_value_t **slot = &begin[*elem_begin]; *pnew_obj = *slot; if (*pnew_obj) - verify_parent2("array", ary16->elem.parent, slot, "elem(%d)", + gc_debug_edge2("array", ary16->elem.parent, slot, "elem(%d)", gc_slot_to_arrayidx(ary16->elem.parent, begin)); if (!gc_try_setmark(*pnew_obj, &ary16->elem.nptr, ptag, pbits)) continue; @@ -1948,7 +1948,7 @@ STATIC_INLINE int gc_mark_scan_obj8(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, gc_mark jl_value_t **slot = &((jl_value_t**)parent)[*begin]; *pnew_obj = *slot; if (*pnew_obj) - verify_parent2("object", parent, slot, "field(%d)", + gc_debug_edge2("object", parent, slot, "field(%d)", gc_slot_to_fieldidx(parent, slot)); if (!gc_try_setmark(*pnew_obj, &obj8->nptr, ptag, pbits)) continue; @@ -1981,7 +1981,7 @@ STATIC_INLINE int gc_mark_scan_obj16(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, gc_mar jl_value_t **slot = &((jl_value_t**)parent)[*begin]; *pnew_obj = *slot; if (*pnew_obj) - verify_parent2("object", parent, slot, "field(%d)", + gc_debug_edge2("object", parent, slot, "field(%d)", gc_slot_to_fieldidx(parent, slot)); if (!gc_try_setmark(*pnew_obj, &obj16->nptr, ptag, pbits)) continue; @@ -2014,7 +2014,7 @@ STATIC_INLINE int gc_mark_scan_obj32(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, gc_mar jl_value_t **slot = &((jl_value_t**)parent)[*begin]; *pnew_obj = *slot; if (*pnew_obj) - verify_parent2("object", parent, slot, "field(%d)", + gc_debug_edge2("object", parent, slot, "field(%d)", gc_slot_to_fieldidx(parent, slot)); if (!gc_try_setmark(*pnew_obj, &obj32->nptr, ptag, pbits)) continue; @@ -2402,12 +2402,12 @@ module_binding: { gc_setmark_buf_(ptls, b, mbits, sizeof(jl_binding_t)); } void *vb = jl_astaggedvalue(b); - verify_parent1("module", binding->parent, &vb, "binding_buff"); + gc_debug_edge1("module", binding->parent, &vb, "binding_buff"); (void)vb; jl_value_t *value = jl_atomic_load_relaxed(&b->value); jl_value_t *globalref = jl_atomic_load_relaxed(&b->globalref); if (value) { - verify_parent2("module", binding->parent, + gc_debug_edge2("module", binding->parent, &b->value, "binding(%s)", jl_symbol_name(b->name)); if (gc_try_setmark(value, &binding->nptr, &tag, &bits)) { new_obj = value; @@ -2539,7 +2539,7 @@ mark: { objprofile_count(vt, bits == GC_OLD_MARKED, sizeof(jl_array_t)); if (flags.how == 1) { void *val_buf = jl_astaggedvalue((char*)a->data - a->offset * a->elsize); - verify_parent1("array", new_obj, &val_buf, "buffer ('loc' addr is meaningless)"); + gc_debug_edge1("array", new_obj, &val_buf, "buffer ('loc' addr is meaningless)"); (void)val_buf; gc_setmark_buf_(ptls, (char*)a->data - a->offset * a->elsize, bits, jl_array_nbytes(a)); diff --git a/src/gc.h b/src/gc.h index 8b420d28cffbc..5d014d5ddf493 100644 --- a/src/gc.h +++ b/src/gc.h @@ -24,6 +24,7 @@ #endif #endif #include "julia_assert.h" +#include "gc-heap-snapshot.h" #ifdef __cplusplus extern "C" { @@ -631,6 +632,18 @@ extern int gc_verifying; #define verify_parent2(ty,obj,slot,arg1,arg2) do {} while (0) #define gc_verifying (0) #endif + + +// For GC Debugging +#define gc_debug_edge1(ty,obj,slot,arg1) do { \ + verify_parent1(ty,obj,slot,arg1); \ + record_edge_to_gc_snapshot(obj,slot); \ +} while (0) +#define gc_debug_edge2(ty,obj,slot,arg1,arg2) do { \ + verify_parent2(ty,obj,slot,arg1,arg2); \ + record_edge_to_gc_snapshot(obj,slot); \ +} while (0) + int gc_slot_to_fieldidx(void *_obj, void *slot); int gc_slot_to_arrayidx(void *_obj, void *begin); NOINLINE void gc_mark_loop_unwind(jl_ptls_t ptls, jl_gc_mark_sp_t sp, int pc_offset); From e398fe009b4a4b5cc44e364e851edb8ab165ed04 Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Thu, 16 Sep 2021 19:04:39 -0400 Subject: [PATCH 014/106] add todos --- src/gc.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/gc.h b/src/gc.h index 5d014d5ddf493..1e2f38cd7bbae 100644 --- a/src/gc.h +++ b/src/gc.h @@ -635,12 +635,14 @@ extern int gc_verifying; // For GC Debugging +// TODO: Is slot the right target object? #define gc_debug_edge1(ty,obj,slot,arg1) do { \ - verify_parent1(ty,obj,slot,arg1); \ + verify_parent1(ty,obj,slot,arg1); \ record_edge_to_gc_snapshot(obj,slot); \ } while (0) +// TODO: Is slot the right target object? #define gc_debug_edge2(ty,obj,slot,arg1,arg2) do { \ - verify_parent2(ty,obj,slot,arg1,arg2); \ + verify_parent2(ty,obj,slot,arg1,arg2); \ record_edge_to_gc_snapshot(obj,slot); \ } while (0) From 6db1ea5a1664aa349041a3726464e95f72545e9a Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Thu, 16 Sep 2021 19:27:02 -0400 Subject: [PATCH 015/106] direct output to a stream --- src/gc-heap-snapshot.cpp | 10 +++++----- src/gc-heap-snapshot.h | 6 +----- src/gc.h | 4 ++-- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 2cd2920dbebfb..ab22b6a062ef5 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -107,7 +107,7 @@ struct HeapSnapshot { HeapSnapshot *g_snapshot = nullptr; -JL_DLLEXPORT void take_gc_snapshot() { +JL_DLLEXPORT void take_gc_snapshot(JL_STREAM *stream) { // Create the snapshot object //HeapSnapshot snapshot; //g_snapshot = &snapshot; @@ -123,7 +123,7 @@ JL_DLLEXPORT void take_gc_snapshot() { // Disable snapshotting // Dump the snapshot - serialize_heap_snapshot(JL_STDERR, *g_snapshot); + serialize_heap_snapshot(stream, *g_snapshot); // TODO(PR): Put this back, but disabled for debugging //g_snapshot = nullptr; } @@ -174,11 +174,12 @@ void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot) { jl_printf(stream, "\"trace_function_info_fields\":[],"); jl_printf(stream, "\"trace_node_fields\":[],"); jl_printf(stream, "\"sample_fields\":[],"); - jl_printf(stream, "\"location_fields\":[],"); + jl_printf(stream, "\"location_fields\":[]"); + jl_printf(stream, "},\n"); // end "meta" jl_printf(stream, "\"node_count\":%zu,", snapshot.nodes.size()); jl_printf(stream, "\"edge_count\":%zu,", snapshot.edges.size()); jl_printf(stream, "\"trace_function_count\":0"); - jl_printf(stream, "},\n"); // end "meta" + jl_printf(stream, "},\n"); // end "snapshot" jl_printf(stream, "\"nodes\":["); bool first_node = true; @@ -219,6 +220,5 @@ void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot) { snapshot.names.print_json_array(stream, true); - jl_printf(stream, "}"); // end "snapshot" jl_printf(stream, "}"); } diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h index 172344b67ec8a..5ec65bba2306f 100644 --- a/src/gc-heap-snapshot.h +++ b/src/gc-heap-snapshot.h @@ -7,10 +7,6 @@ extern "C" { #endif -struct HeapSnapshot; - -void serialize_heap_snapshot(JL_STREAM *stream, struct HeapSnapshot *snapshot); - // --------------------------------------------------------------------- // Functions to call from GC when heap snapshot is enabled // --------------------------------------------------------------------- @@ -21,7 +17,7 @@ JL_DLLEXPORT void record_edge_to_gc_snapshot(jl_value_t *a, jl_value_t *b); // Functions to call from Julia to start heap snapshot // --------------------------------------------------------------------- // ... -JL_DLLEXPORT void take_gc_snapshot(void); +JL_DLLEXPORT void take_gc_snapshot(JL_STREAM *stream); #ifdef __cplusplus diff --git a/src/gc.h b/src/gc.h index 1e2f38cd7bbae..acb040867c58d 100644 --- a/src/gc.h +++ b/src/gc.h @@ -638,12 +638,12 @@ extern int gc_verifying; // TODO: Is slot the right target object? #define gc_debug_edge1(ty,obj,slot,arg1) do { \ verify_parent1(ty,obj,slot,arg1); \ - record_edge_to_gc_snapshot(obj,slot); \ + record_edge_to_gc_snapshot(obj,*slot); \ } while (0) // TODO: Is slot the right target object? #define gc_debug_edge2(ty,obj,slot,arg1,arg2) do { \ verify_parent2(ty,obj,slot,arg1,arg2); \ - record_edge_to_gc_snapshot(obj,slot); \ + record_edge_to_gc_snapshot(obj,*slot); \ } while (0) int gc_slot_to_fieldidx(void *_obj, void *slot); From 00506ca3d35489c79b01dbbaa3ffc97ebde8776b Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Thu, 16 Sep 2021 19:26:12 -0400 Subject: [PATCH 016/106] fix commas --- src/gc-heap-snapshot.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index ab22b6a062ef5..4605d63138376 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -46,7 +46,7 @@ struct Edge { //} struct StringTable { - typedef unordered_map MapType; + typedef unordered_map MapType; MapType map; vector strings; @@ -166,10 +166,12 @@ void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot) { jl_printf(stream, "\"node_fields\":[\"type\",\"name\",\"id\",\"self_size\",\"edge_count\",\"trace_node_id\",\"detachedness\"],"); jl_printf(stream, "\"node_types\":["); snapshot.node_types.print_json_array(stream, false); - jl_printf(stream, ",\"string\", \"number\", \"number\", \"number\", \"number\", \"number\"]"); + jl_printf(stream, ","); + jl_printf(stream, "\"string\", \"number\", \"number\", \"number\", \"number\", \"number\"],"); jl_printf(stream, "\"edge_fields\":[\"type\",\"name_or_index\",\"to_node\"],"); jl_printf(stream, "\"edge_types\":["); snapshot.edge_types.print_json_array(stream, false); + jl_printf(stream, ","); jl_printf(stream, "\"string_or_number\",\"node\"],"); jl_printf(stream, "\"trace_function_info_fields\":[],"); jl_printf(stream, "\"trace_node_fields\":[],"); From 7671eff6ef387fa76181866854463207e5219840 Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Fri, 17 Sep 2021 09:20:27 -0400 Subject: [PATCH 017/106] rename to take_heap_snapshot --- src/gc-heap-snapshot.cpp | 2 +- src/gc-heap-snapshot.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 4605d63138376..4879808d73266 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -107,7 +107,7 @@ struct HeapSnapshot { HeapSnapshot *g_snapshot = nullptr; -JL_DLLEXPORT void take_gc_snapshot(JL_STREAM *stream) { +JL_DLLEXPORT void take_heap_snapshot(JL_STREAM *stream) { // Create the snapshot object //HeapSnapshot snapshot; //g_snapshot = &snapshot; diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h index 5ec65bba2306f..0394b878c79e6 100644 --- a/src/gc-heap-snapshot.h +++ b/src/gc-heap-snapshot.h @@ -14,10 +14,10 @@ extern "C" { JL_DLLEXPORT void record_edge_to_gc_snapshot(jl_value_t *a, jl_value_t *b); // --------------------------------------------------------------------- -// Functions to call from Julia to start heap snapshot +// Functions to call from Julia to take heap snapshot // --------------------------------------------------------------------- // ... -JL_DLLEXPORT void take_gc_snapshot(JL_STREAM *stream); +JL_DLLEXPORT void take_heap_snapshot(JL_STREAM *stream); #ifdef __cplusplus From 612c6b8ad839a56ef359269017e92d359d40b97a Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Fri, 17 Sep 2021 09:47:48 -0400 Subject: [PATCH 018/106] add Julia wrapper function GC.take_heap_snapshot --- base/gcutils.jl | 11 +++++++++++ src/gc-heap-snapshot.cpp | 22 ++++++++-------------- src/gc-heap-snapshot.h | 3 +-- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/base/gcutils.jl b/base/gcutils.jl index b794bd32a55da..bdfc2f3a1bb00 100644 --- a/base/gcutils.jl +++ b/base/gcutils.jl @@ -105,6 +105,17 @@ Control whether garbage collection is enabled using a boolean argument (`true` f """ enable(on::Bool) = ccall(:jl_gc_enable, Int32, (Int32,), on) != 0 +""" + GC.take_heap_snapshot(io::IOStream) + +Write a snapshot of the heap, in the JSON format expected by the Chrome +Devtools Heap Snapshot viewer (.heapsnapshot extension), to the given +IO stream. +""" +function take_heap_snapshot(io) + ccall(:jl_gc_take_heap_snapshot, Cvoid, (Ptr{Cvoid},), (io::IOStream).handle::Ptr{Cvoid}) +end + """ GC.enable_finalizers(on::Bool) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 4879808d73266..080362c1f69c9 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -107,25 +107,19 @@ struct HeapSnapshot { HeapSnapshot *g_snapshot = nullptr; -JL_DLLEXPORT void take_heap_snapshot(JL_STREAM *stream) { - // Create the snapshot object - //HeapSnapshot snapshot; - //g_snapshot = &snapshot; - if (!g_snapshot) - g_snapshot = new HeapSnapshot(); +JL_DLLEXPORT void jl_gc_take_heap_snapshot(JL_STREAM *stream) { + // Enable snapshotting + g_snapshot = new HeapSnapshot(); - // Enable GC Snapshotting - - // Do GC - // - which will callback into record_edge_to_gc_snapshot()... + // Do GC, which will callback into record_edge_to_gc_snapshot()... + jl_gc_collect(JL_GC_FULL); // When we return, the snapshot is full - // Disable snapshotting - // Dump the snapshot serialize_heap_snapshot(stream, *g_snapshot); - // TODO(PR): Put this back, but disabled for debugging - //g_snapshot = nullptr; + + // Disable snapshotting + g_snapshot = nullptr; } JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h index 0394b878c79e6..5e27693506f22 100644 --- a/src/gc-heap-snapshot.h +++ b/src/gc-heap-snapshot.h @@ -16,8 +16,7 @@ JL_DLLEXPORT void record_edge_to_gc_snapshot(jl_value_t *a, jl_value_t *b); // --------------------------------------------------------------------- // Functions to call from Julia to take heap snapshot // --------------------------------------------------------------------- -// ... -JL_DLLEXPORT void take_heap_snapshot(JL_STREAM *stream); +JL_DLLEXPORT void jl_gc_take_heap_snapshot(JL_STREAM *stream); #ifdef __cplusplus From cb0c8092d52f305e9e8dd25d1d6368481bda8996 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Fri, 17 Sep 2021 09:33:15 -0400 Subject: [PATCH 019/106] Add commented out code for getting size once we know the type --- src/gc-heap-snapshot.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 080362c1f69c9..de23ff184d1d3 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -134,7 +134,7 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { "", // string type; "", // string name; (size_t)a, // size_t id; - 0, // size_t self_size; + /*jl_datatype_size(type)*/ 0, // size_t self_size; 0, // int edge_count; 0, // size_t trace_node_id; 0 // int detachedness; // 0 - unknown, 1 - attached; 2 - detached From 3087e8fe9132f8d7ef5e8755ae27831627a8c1e2 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Fri, 17 Sep 2021 10:13:42 -0400 Subject: [PATCH 020/106] Make every node an "object" type for now. --- src/gc-heap-snapshot.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index de23ff184d1d3..8f188401e2443 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -94,7 +94,7 @@ struct HeapSnapshot { vector edges; StringTable names; - StringTable node_types = {"node_type1", "node_type2"}; + StringTable node_types = {"object"}; StringTable edge_types = {"edge_type1", "edge_type2"}; unordered_set seen_node_ids; }; @@ -131,7 +131,7 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { g_snapshot->seen_node_ids.insert(val, (size_t)a); Node node{ - "", // string type; + "object", // string type; "", // string name; (size_t)a, // size_t id; /*jl_datatype_size(type)*/ 0, // size_t self_size; From c78e2107b348a1804052a976423276466fe872d6 Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Fri, 17 Sep 2021 10:19:48 -0400 Subject: [PATCH 021/106] update hardcoded string tables --- src/gc-heap-snapshot.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 8f188401e2443..fbcc12c003e02 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -95,7 +95,7 @@ struct HeapSnapshot { StringTable names; StringTable node_types = {"object"}; - StringTable edge_types = {"edge_type1", "edge_type2"}; + StringTable edge_types = {"property"}; unordered_set seen_node_ids; }; From 632b52d225fe1664790ec60e13bc14fb91faee73 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Fri, 17 Sep 2021 10:18:39 -0400 Subject: [PATCH 022/106] trying to get type but also seggfault --- src/gc-heap-snapshot.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index fbcc12c003e02..09b676eef654d 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -130,11 +130,16 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { // Insert a new Node g_snapshot->seen_node_ids.insert(val, (size_t)a); + jl_value_t* type = jl_typeof(a); + Node node{ "object", // string type; "", // string name; (size_t)a, // size_t id; - /*jl_datatype_size(type)*/ 0, // size_t self_size; + // TODO: This currently segfaults: + //(size_t)jl_datatype_size(type), // size_t self_size; + 0, // size_t self_size; + 0, // int edge_count; 0, // size_t trace_node_id; 0 // int detachedness; // 0 - unknown, 1 - attached; 2 - detached From ace0d4ee99d910e14ebebcaf3aacb8364847d1a5 Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Fri, 17 Sep 2021 15:49:03 -0400 Subject: [PATCH 023/106] don't write out empty arrays which confuse chrome --- src/gc-heap-snapshot.cpp | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 09b676eef654d..fc42a9c508c9a 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -171,15 +171,10 @@ void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot) { jl_printf(stream, "\"edge_types\":["); snapshot.edge_types.print_json_array(stream, false); jl_printf(stream, ","); - jl_printf(stream, "\"string_or_number\",\"node\"],"); - jl_printf(stream, "\"trace_function_info_fields\":[],"); - jl_printf(stream, "\"trace_node_fields\":[],"); - jl_printf(stream, "\"sample_fields\":[],"); - jl_printf(stream, "\"location_fields\":[]"); + jl_printf(stream, "\"string_or_number\",\"node\"]"); jl_printf(stream, "},\n"); // end "meta" jl_printf(stream, "\"node_count\":%zu,", snapshot.nodes.size()); - jl_printf(stream, "\"edge_count\":%zu,", snapshot.edges.size()); - jl_printf(stream, "\"trace_function_count\":0"); + jl_printf(stream, "\"edge_count\":%zu", snapshot.edges.size()); jl_printf(stream, "},\n"); // end "snapshot" jl_printf(stream, "\"nodes\":["); From 9471734bb40cb20e1efde519497b344a4c39a677 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Fri, 17 Sep 2021 10:24:15 -0400 Subject: [PATCH 024/106] Re-enable the (Crashing) type --- src/gc-heap-snapshot.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index fc42a9c508c9a..d15bb9321352b 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -131,14 +131,16 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { g_snapshot->seen_node_ids.insert(val, (size_t)a); jl_value_t* type = jl_typeof(a); + jl_printf(JL_STDERR, "value: %p\n", a); + jl_printf(JL_STDERR, "type: %p\n", type); Node node{ "object", // string type; "", // string name; (size_t)a, // size_t id; // TODO: This currently segfaults: - //(size_t)jl_datatype_size(type), // size_t self_size; - 0, // size_t self_size; + (size_t)jl_datatype_size(type), // size_t self_size; + //0, // size_t self_size; 0, // int edge_count; 0, // size_t trace_node_id; From 5990e8ad4d7973bd7792f2835ac58b2c0b916c06 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Fri, 17 Sep 2021 13:24:08 -0400 Subject: [PATCH 025/106] guard against nullptr types for non-objects --- src/gc-heap-snapshot.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index d15bb9321352b..0d701de35a1b0 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -133,13 +133,21 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { jl_value_t* type = jl_typeof(a); jl_printf(JL_STDERR, "value: %p\n", a); jl_printf(JL_STDERR, "type: %p\n", type); + jl_static_show(JL_STDERR, a); + + size_t self_size = 0; + string name = ""; + if (type != nullptr) { + self_size = (size_t)jl_datatype_size(type); + name = "..."; + } Node node{ "object", // string type; - "", // string name; + name, // string name; (size_t)a, // size_t id; // TODO: This currently segfaults: - (size_t)jl_datatype_size(type), // size_t self_size; + self_size, // size_t self_size; //0, // size_t self_size; 0, // int edge_count; From aa635a6cc5cf471c6ca04de0c97c487260fadeb4 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Fri, 17 Sep 2021 13:47:26 -0400 Subject: [PATCH 026/106] Start to support size and name, but still crashing, so i commented out --- src/gc-heap-snapshot.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 0d701de35a1b0..be7f74c2f7258 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -114,12 +114,12 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(JL_STREAM *stream) { // Do GC, which will callback into record_edge_to_gc_snapshot()... jl_gc_collect(JL_GC_FULL); + // Disable snapshotting + g_snapshot = nullptr; + // When we return, the snapshot is full // Dump the snapshot serialize_heap_snapshot(stream, *g_snapshot); - - // Disable snapshotting - g_snapshot = nullptr; } JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { @@ -131,14 +131,14 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { g_snapshot->seen_node_ids.insert(val, (size_t)a); jl_value_t* type = jl_typeof(a); - jl_printf(JL_STDERR, "value: %p\n", a); - jl_printf(JL_STDERR, "type: %p\n", type); - jl_static_show(JL_STDERR, a); + // jl_printf(JL_STDERR, "value: %p\n", a); + // jl_printf(JL_STDERR, "type: %p\n", type); + // jl_static_show(JL_STDERR, a); size_t self_size = 0; string name = ""; if (type != nullptr) { - self_size = (size_t)jl_datatype_size(type); + //self_size = (size_t)jl_datatype_size(type); name = "..."; } From dfb017ac8173757573f3f72b5635edb7e54a9189 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Fri, 17 Sep 2021 15:52:09 -0400 Subject: [PATCH 027/106] Print node/edge counts & fix memory leak & build warnings --- src/gc-heap-snapshot.cpp | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index be7f74c2f7258..4005195f94de8 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -107,9 +107,13 @@ struct HeapSnapshot { HeapSnapshot *g_snapshot = nullptr; +JL_DLLEXPORT int count_nodes = 0; +JL_DLLEXPORT int count_edges = 0; + JL_DLLEXPORT void jl_gc_take_heap_snapshot(JL_STREAM *stream) { // Enable snapshotting - g_snapshot = new HeapSnapshot(); + HeapSnapshot snapshot; + g_snapshot = &snapshot; // Do GC, which will callback into record_edge_to_gc_snapshot()... jl_gc_collect(JL_GC_FULL); @@ -119,7 +123,10 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(JL_STREAM *stream) { // When we return, the snapshot is full // Dump the snapshot - serialize_heap_snapshot(stream, *g_snapshot); + serialize_heap_snapshot(stream, snapshot); + + jl_printf(JL_STDERR, "nodes: %d\n", count_nodes); + jl_printf(JL_STDERR, "edges: %d\n", count_edges); } JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { @@ -129,15 +136,16 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { } // Insert a new Node g_snapshot->seen_node_ids.insert(val, (size_t)a); + count_nodes += 1; jl_value_t* type = jl_typeof(a); - // jl_printf(JL_STDERR, "value: %p\n", a); - // jl_printf(JL_STDERR, "type: %p\n", type); - // jl_static_show(JL_STDERR, a); size_t self_size = 0; string name = ""; if (type != nullptr) { + //jl_printf(JL_STDERR, "value: %p\n", a); + //jl_printf(JL_STDERR, "type: %p\n", type); + //jl_static_show(JL_STDERR, a); //self_size = (size_t)jl_datatype_size(type); name = "..."; } @@ -166,6 +174,8 @@ JL_DLLEXPORT void record_edge_to_gc_snapshot(jl_value_t *a, jl_value_t *b) { record_node_to_gc_snapshot(a); record_node_to_gc_snapshot(b); g_snapshot->edges.push_back(Edge{"property", (size_t)a, (size_t)b}); + + count_edges += 1; } void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot) { @@ -199,10 +209,10 @@ void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot) { jl_printf(stream, "%zu", snapshot.names.find_or_create_string_id(node.type)); jl_printf(stream, ",%zu", snapshot.names.find_or_create_string_id(node.name)); jl_printf(stream, ",%zu", node.id); - jl_printf(stream, ",%zu", 0);//XXX); // self_size - jl_printf(stream, ",%zu", 0);//XXX); // edge_count - jl_printf(stream, ",%zu", 0);//XXX); // trace_node_id - jl_printf(stream, ",%zu", 0);//XXX); // detachedness + jl_printf(stream, ",%zu", (size_t)0);//XXX); // self_size + jl_printf(stream, ",%zu", (size_t)0);//XXX); // edge_count + jl_printf(stream, ",%zu", (size_t)0);//XXX); // trace_node_id + jl_printf(stream, ",%zu", (size_t)0);//XXX); // detachedness jl_printf(stream, "\n"); } jl_printf(stream, "],\n"); From e77c09e01da4c6081c6b91588dc6c135ad0a62ff Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Fri, 17 Sep 2021 15:59:19 -0400 Subject: [PATCH 028/106] Use node indexes instead of IDs --- src/gc-heap-snapshot.cpp | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 4005195f94de8..cb67c8fe07b81 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -96,7 +96,7 @@ struct HeapSnapshot { StringTable names; StringTable node_types = {"object"}; StringTable edge_types = {"property"}; - unordered_set seen_node_ids; + unordered_map node_ptr_to_index_map; }; @@ -130,14 +130,11 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(JL_STREAM *stream) { } JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { - auto val = g_snapshot->seen_node_ids.find((size_t)a); - if (val != g_snapshot->seen_node_ids.end()) { + auto val = g_snapshot->node_ptr_to_index_map.find((void*)a); + if (val != g_snapshot->node_ptr_to_index_map.end()) { return; } // Insert a new Node - g_snapshot->seen_node_ids.insert(val, (size_t)a); - count_nodes += 1; - jl_value_t* type = jl_typeof(a); size_t self_size = 0; @@ -150,6 +147,10 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { name = "..."; } + + g_snapshot->node_ptr_to_index_map.insert(val, {a, g_snapshot->nodes.size()}); + count_nodes += 1; + Node node{ "object", // string type; name, // string name; @@ -163,6 +164,7 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { 0 // int detachedness; // 0 - unknown, 1 - attached; 2 - detached }; g_snapshot->nodes.push_back(node); + } // TODO: remove JL_DLLEXPORT @@ -173,7 +175,9 @@ JL_DLLEXPORT void record_edge_to_gc_snapshot(jl_value_t *a, jl_value_t *b) { record_node_to_gc_snapshot(a); record_node_to_gc_snapshot(b); - g_snapshot->edges.push_back(Edge{"property", (size_t)a, (size_t)b}); + g_snapshot->edges.push_back(Edge{"property", + g_snapshot->node_ptr_to_index_map[a], + g_snapshot->node_ptr_to_index_map[b]}); count_edges += 1; } From d567018c5ab5d39dc530d33f99db51a6afa454a4 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Fri, 17 Sep 2021 16:02:50 -0400 Subject: [PATCH 029/106] fixup writing indices correctly --- src/gc-heap-snapshot.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index cb67c8fe07b81..ffb0fc11fdc28 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -18,6 +18,7 @@ void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot); // "node_fields": // [ "type", "name", "id", "self_size", "edge_count", "trace_node_id", "detachedness" ] +const int k_node_number_of_fields = 7; struct Node { string type; string name; @@ -35,6 +36,8 @@ struct Node { // "edge_fields": // [ "type", "name_or_index", "to_node" ] +const int k_edge_number_of_fields = 3; + struct Edge { string type; size_t name_or_index; // essentially 'from' @@ -148,7 +151,8 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { } - g_snapshot->node_ptr_to_index_map.insert(val, {a, g_snapshot->nodes.size()}); + g_snapshot->node_ptr_to_index_map.insert(val, + {a, g_snapshot->nodes.size() * k_node_number_of_fields}); count_nodes += 1; Node node{ From 81890b1a0a2d845d42e8a43e134a698cbbf42019 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Fri, 17 Sep 2021 16:42:09 -0400 Subject: [PATCH 030/106] Added edge counts to the Node records --- src/gc-heap-snapshot.cpp | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index ffb0fc11fdc28..a0f04ed96ef3e 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -4,7 +4,9 @@ #include #include #include +#include +using std::cout; using std::endl; using std::vector; using std::string; using std::unordered_map; @@ -24,7 +26,7 @@ struct Node { string name; size_t id; size_t self_size; - int edge_count; + size_t edge_count; size_t trace_node_id; // whether the node is attached or dettached from the main application state // TODO: .... meaning not yet understood. @@ -136,6 +138,7 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { auto val = g_snapshot->node_ptr_to_index_map.find((void*)a); if (val != g_snapshot->node_ptr_to_index_map.end()) { return; + //return &g_snapshot->nodes[val->second]; } // Insert a new Node jl_value_t* type = jl_typeof(a); @@ -152,7 +155,7 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { g_snapshot->node_ptr_to_index_map.insert(val, - {a, g_snapshot->nodes.size() * k_node_number_of_fields}); + {a, g_snapshot->nodes.size()}); count_nodes += 1; Node node{ @@ -168,7 +171,7 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { 0 // int detachedness; // 0 - unknown, 1 - attached; 2 - detached }; g_snapshot->nodes.push_back(node); - + //return &g_snapshot->nodes.back(); } // TODO: remove JL_DLLEXPORT @@ -179,6 +182,11 @@ JL_DLLEXPORT void record_edge_to_gc_snapshot(jl_value_t *a, jl_value_t *b) { record_node_to_gc_snapshot(a); record_node_to_gc_snapshot(b); + + auto from_node_idx = g_snapshot->node_ptr_to_index_map[a]; + //cout << from_node_idx << endl; + + g_snapshot->nodes[from_node_idx].edge_count += 1; g_snapshot->edges.push_back(Edge{"property", g_snapshot->node_ptr_to_index_map[a], g_snapshot->node_ptr_to_index_map[b]}); @@ -217,10 +225,10 @@ void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot) { jl_printf(stream, "%zu", snapshot.names.find_or_create_string_id(node.type)); jl_printf(stream, ",%zu", snapshot.names.find_or_create_string_id(node.name)); jl_printf(stream, ",%zu", node.id); - jl_printf(stream, ",%zu", (size_t)0);//XXX); // self_size - jl_printf(stream, ",%zu", (size_t)0);//XXX); // edge_count - jl_printf(stream, ",%zu", (size_t)0);//XXX); // trace_node_id - jl_printf(stream, ",%zu", (size_t)0);//XXX); // detachedness + jl_printf(stream, ",%zu", node.self_size); + jl_printf(stream, ",%zu", node.edge_count); + jl_printf(stream, ",%zu", node.trace_node_id); + jl_printf(stream, ",%d", node.detachedness); jl_printf(stream, "\n"); } jl_printf(stream, "],\n"); @@ -234,8 +242,8 @@ void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot) { jl_printf(stream, ","); } jl_printf(stream, "%zu", snapshot.names.find_or_create_string_id(edge.type)); - jl_printf(stream, ",%zu", edge.name_or_index); - jl_printf(stream, ",%zu", edge.to_node); + jl_printf(stream, ",%zu", edge.name_or_index * k_node_number_of_fields); + jl_printf(stream, ",%zu", edge.to_node * k_node_number_of_fields); jl_printf(stream, "\n"); } jl_printf(stream, "],\n"); // end "edges" From e08f16bec58751c8c1187cf60ae38c02dcf357c3 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Fri, 17 Sep 2021 16:49:11 -0400 Subject: [PATCH 031/106] DUH: OBJECTS NEED SIZES TO SHOW UP IN THE HEAP SNAPSHOT! --- src/gc-heap-snapshot.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index a0f04ed96ef3e..255e0e0719114 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -27,7 +27,7 @@ struct Node { size_t id; size_t self_size; size_t edge_count; - size_t trace_node_id; + size_t trace_node_id; // This is ALWAYS 0 in Javascript heap-snapshots. // whether the node is attached or dettached from the main application state // TODO: .... meaning not yet understood. // https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/include/v8-profiler.h#L739-L745 @@ -143,14 +143,14 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { // Insert a new Node jl_value_t* type = jl_typeof(a); - size_t self_size = 0; + size_t self_size = 1; string name = ""; if (type != nullptr) { //jl_printf(JL_STDERR, "value: %p\n", a); //jl_printf(JL_STDERR, "type: %p\n", type); //jl_static_show(JL_STDERR, a); //self_size = (size_t)jl_datatype_size(type); - name = "..."; + name = "name"; } From be0491aa82e993c6f9b134b18fadeb38a4d5a0c9 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Fri, 17 Sep 2021 17:25:14 -0400 Subject: [PATCH 032/106] Get the size iff it's a datatype!! that's progress! --- src/gc-heap-snapshot.cpp | 58 ++++++++++++++++++++++++++++++++++------ 1 file changed, 50 insertions(+), 8 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 255e0e0719114..d39f8a681c9f5 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -130,8 +130,9 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(JL_STREAM *stream) { // Dump the snapshot serialize_heap_snapshot(stream, snapshot); - jl_printf(JL_STDERR, "nodes: %d\n", count_nodes); - jl_printf(JL_STDERR, "edges: %d\n", count_edges); + // Debugging + //jl_printf(JL_STDERR, "nodes: %d\n", count_nodes); + //jl_printf(JL_STDERR, "edges: %d\n", count_edges); } JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { @@ -145,14 +146,55 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { size_t self_size = 1; string name = ""; - if (type != nullptr) { - //jl_printf(JL_STDERR, "value: %p\n", a); - //jl_printf(JL_STDERR, "type: %p\n", type); - //jl_static_show(JL_STDERR, a); - //self_size = (size_t)jl_datatype_size(type); - name = "name"; + //self_size = jl_f_sizeof(a); + + if (jl_is_datatype(a)) { + self_size = (size_t)jl_datatype_size(type); + name = jl_typeof_str(a); } + // // Copied from jl_static_show_x_: + // if ((uintptr_t)type < 4096U) { + // // Handle non-julia values: + // // TODO: sprintf the type pointer + // //name = sprintf(..., "", (void*)type); + // name = ""; + // } else if ((uintptr_t)a < 4096U) { + // // TODO: understand this case? + // // n += jl_printf(out, ""); + // } + // else if (v == (jl_value_t*)jl_simplevector_type) { + // //n += jl_printf(out, "Core.SimpleVector"); + // } + // else if (v == (jl_value_t*)jl_typename_type) { + // //n += jl_printf(out, "Core.TypeName"); + // } + // else if (v == (jl_value_t*)jl_symbol_type) { + // //n += jl_printf(out, "Symbol"); + // } + // else if (v == (jl_value_t*)jl_methtable_type) { + // //n += jl_printf(out, "Core.MethodTable"); + // } + // else if (v == (jl_value_t*)jl_any_type) { + // //n += jl_printf(out, "Any"); + // } + // else if (v == (jl_value_t*)jl_type_type) { + // //n += jl_printf(out, "Type"); + // } + // else if (vt == jl_method_type) { + // //jl_method_t *m = (jl_method_t*)v; + // //n += jl_static_show_func_sig(out, m->sig); + // } else { + // // Handle julia values: + // //jl_printf(JL_STDERR, "value: %p\n", a); + // //jl_printf(JL_STDERR, "type: %p\n", type); + // //jl_static_show(JL_STDERR, a); + // self_size = (size_t)jl_datatype_size(type); + // name = "name"; + // } + g_snapshot->node_ptr_to_index_map.insert(val, {a, g_snapshot->nodes.size()}); From 348b327b26e2f63f12a1ca256c59ab2ad34ee995 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Fri, 17 Sep 2021 17:32:26 -0400 Subject: [PATCH 033/106] bugfix for DataType size --- src/gc-heap-snapshot.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index d39f8a681c9f5..b59bc59d9efd6 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -149,7 +149,7 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { //self_size = jl_f_sizeof(a); if (jl_is_datatype(a)) { - self_size = (size_t)jl_datatype_size(type); + self_size = (size_t)jl_datatype_size(a); name = jl_typeof_str(a); } From 8d40da82b81dd0d1f866095427c0b40d5391597f Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Sun, 19 Sep 2021 23:35:17 -0400 Subject: [PATCH 034/106] Tried adding the object type to the snapshots It doesn't seem to show up in Chrome though... i noticed it was already being passed-in to the `gc_debug_edge`, so i figured we could try to use it. --- src/gc-heap-snapshot.cpp | 3 ++- src/gc-heap-snapshot.h | 2 +- src/gc.h | 6 +++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index b59bc59d9efd6..9ad064c6eb7ad 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -217,7 +217,7 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { } // TODO: remove JL_DLLEXPORT -JL_DLLEXPORT void record_edge_to_gc_snapshot(jl_value_t *a, jl_value_t *b) { +JL_DLLEXPORT void record_edge_to_gc_snapshot(char *type_description, jl_value_t *a, jl_value_t *b) { if (!g_snapshot) { return; } @@ -228,6 +228,7 @@ JL_DLLEXPORT void record_edge_to_gc_snapshot(jl_value_t *a, jl_value_t *b) { auto from_node_idx = g_snapshot->node_ptr_to_index_map[a]; //cout << from_node_idx << endl; + g_snapshot->nodes[from_node_idx].type = type_description; g_snapshot->nodes[from_node_idx].edge_count += 1; g_snapshot->edges.push_back(Edge{"property", g_snapshot->node_ptr_to_index_map[a], diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h index 5e27693506f22..06ea015048644 100644 --- a/src/gc-heap-snapshot.h +++ b/src/gc-heap-snapshot.h @@ -11,7 +11,7 @@ extern "C" { // Functions to call from GC when heap snapshot is enabled // --------------------------------------------------------------------- // TODO: remove JL_DLLEXPORT -JL_DLLEXPORT void record_edge_to_gc_snapshot(jl_value_t *a, jl_value_t *b); +JL_DLLEXPORT void record_edge_to_gc_snapshot(char *ty, jl_value_t *a, jl_value_t *b); // --------------------------------------------------------------------- // Functions to call from Julia to take heap snapshot diff --git a/src/gc.h b/src/gc.h index acb040867c58d..1347473736873 100644 --- a/src/gc.h +++ b/src/gc.h @@ -638,12 +638,12 @@ extern int gc_verifying; // TODO: Is slot the right target object? #define gc_debug_edge1(ty,obj,slot,arg1) do { \ verify_parent1(ty,obj,slot,arg1); \ - record_edge_to_gc_snapshot(obj,*slot); \ + record_edge_to_gc_snapshot(ty, obj,*slot); \ } while (0) -// TODO: Is slot the right target object? +// TODO: Is slot the right target object? #define gc_debug_edge2(ty,obj,slot,arg1,arg2) do { \ verify_parent2(ty,obj,slot,arg1,arg2); \ - record_edge_to_gc_snapshot(obj,*slot); \ + record_edge_to_gc_snapshot(ty, obj, *slot); \ } while (0) int gc_slot_to_fieldidx(void *_obj, void *slot); From b6a4e861a8effe201a34afdfbcd5606fce716990 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 20 Sep 2021 12:59:25 -0400 Subject: [PATCH 035/106] get jl_value_t* from jl_taggedvalue_t* --- src/gc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gc.h b/src/gc.h index 1347473736873..027fad799454f 100644 --- a/src/gc.h +++ b/src/gc.h @@ -638,7 +638,7 @@ extern int gc_verifying; // TODO: Is slot the right target object? #define gc_debug_edge1(ty,obj,slot,arg1) do { \ verify_parent1(ty,obj,slot,arg1); \ - record_edge_to_gc_snapshot(ty, obj,*slot); \ + record_edge_to_gc_snapshot(ty, obj, jl_valueof(*slot)); \ } while (0) // TODO: Is slot the right target object? #define gc_debug_edge2(ty,obj,slot,arg1,arg2) do { \ From b923c905c7c29b643805081dac6b9e4a95e1e3a7 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 20 Sep 2021 13:01:52 -0400 Subject: [PATCH 036/106] Use jl_value_t* to get name and size --- src/gc-heap-snapshot.cpp | 69 +++++++++------------------------------- 1 file changed, 15 insertions(+), 54 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 9ad064c6eb7ad..ed120d18a57a5 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -1,5 +1,8 @@ #include "gc-heap-snapshot.h" +#include "julia_internal.h" +#include "gc.h" + #include #include #include @@ -142,60 +145,22 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { //return &g_snapshot->nodes[val->second]; } // Insert a new Node - jl_value_t* type = jl_typeof(a); + jl_datatype_t* type = (jl_datatype_t*)jl_typeof(a); size_t self_size = 1; string name = ""; - //self_size = jl_f_sizeof(a); - if (jl_is_datatype(a)) { - self_size = (size_t)jl_datatype_size(a); - name = jl_typeof_str(a); + if ((uintptr_t)type < 4096U) { + name = ""; + } else if (type == (jl_datatype_t*)jl_buff_tag) { + name = ""; + } else if (type == (jl_datatype_t*)jl_malloc_tag) { + name = ""; + } else if (jl_is_datatype(type)) { + self_size = (size_t)jl_datatype_size(type); + name = jl_typename_str((jl_value_t*)type); } - // // Copied from jl_static_show_x_: - // if ((uintptr_t)type < 4096U) { - // // Handle non-julia values: - // // TODO: sprintf the type pointer - // //name = sprintf(..., "", (void*)type); - // name = ""; - // } else if ((uintptr_t)a < 4096U) { - // // TODO: understand this case? - // // n += jl_printf(out, ""); - // } - // else if (v == (jl_value_t*)jl_simplevector_type) { - // //n += jl_printf(out, "Core.SimpleVector"); - // } - // else if (v == (jl_value_t*)jl_typename_type) { - // //n += jl_printf(out, "Core.TypeName"); - // } - // else if (v == (jl_value_t*)jl_symbol_type) { - // //n += jl_printf(out, "Symbol"); - // } - // else if (v == (jl_value_t*)jl_methtable_type) { - // //n += jl_printf(out, "Core.MethodTable"); - // } - // else if (v == (jl_value_t*)jl_any_type) { - // //n += jl_printf(out, "Any"); - // } - // else if (v == (jl_value_t*)jl_type_type) { - // //n += jl_printf(out, "Type"); - // } - // else if (vt == jl_method_type) { - // //jl_method_t *m = (jl_method_t*)v; - // //n += jl_static_show_func_sig(out, m->sig); - // } else { - // // Handle julia values: - // //jl_printf(JL_STDERR, "value: %p\n", a); - // //jl_printf(JL_STDERR, "type: %p\n", type); - // //jl_static_show(JL_STDERR, a); - // self_size = (size_t)jl_datatype_size(type); - // name = "name"; - // } - - g_snapshot->node_ptr_to_index_map.insert(val, {a, g_snapshot->nodes.size()}); count_nodes += 1; @@ -204,16 +169,13 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { "object", // string type; name, // string name; (size_t)a, // size_t id; - // TODO: This currently segfaults: self_size, // size_t self_size; - //0, // size_t self_size; - 0, // int edge_count; + 0, // int edge_count, will be incremented on every outgoing edge 0, // size_t trace_node_id; - 0 // int detachedness; // 0 - unknown, 1 - attached; 2 - detached + 0 // int detachedness; // 0 - unknown, 1 - attached; 2 - detached }; g_snapshot->nodes.push_back(node); - //return &g_snapshot->nodes.back(); } // TODO: remove JL_DLLEXPORT @@ -226,7 +188,6 @@ JL_DLLEXPORT void record_edge_to_gc_snapshot(char *type_description, jl_value_t record_node_to_gc_snapshot(b); auto from_node_idx = g_snapshot->node_ptr_to_index_map[a]; - //cout << from_node_idx << endl; g_snapshot->nodes[from_node_idx].type = type_description; g_snapshot->nodes[from_node_idx].edge_count += 1; From 7f4b0687f7d2bdf42c33a6a2bd8bba8361acf0b0 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Mon, 20 Sep 2021 14:03:59 -0400 Subject: [PATCH 037/106] Pretty-print type names!!! :D --- src/gc-heap-snapshot.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index ed120d18a57a5..eb61454f94db7 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -157,8 +157,17 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { } else if (type == (jl_datatype_t*)jl_malloc_tag) { name = ""; } else if (jl_is_datatype(type)) { + + ios_t str_; + ios_mem(&str_, 1024); + JL_STREAM* str = (JL_STREAM*)&str_; + + jl_static_show(str, (jl_value_t*)type); + + name = string((const char*)str_.buf, str_.size); + ios_close(&str_); + self_size = (size_t)jl_datatype_size(type); - name = jl_typename_str((jl_value_t*)type); } g_snapshot->node_ptr_to_index_map.insert(val, @@ -172,7 +181,7 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { self_size, // size_t self_size; 0, // int edge_count, will be incremented on every outgoing edge - 0, // size_t trace_node_id; + 0, // size_t trace_node_id (unused) 0 // int detachedness; // 0 - unknown, 1 - attached; 2 - detached }; g_snapshot->nodes.push_back(node); From e823805ad329fb7901aafc8d275137190d4e4e42 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Mon, 20 Sep 2021 14:03:25 -0400 Subject: [PATCH 038/106] JSON-escape strings before printing them --- src/gc-heap-snapshot.cpp | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index eb61454f94db7..0c3bc1c8a51e0 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -15,6 +15,29 @@ using std::string; using std::unordered_map; using std::unordered_set; +// https://stackoverflow.com/a/33799784/751061 +void print_str_escape_json(JL_STREAM *stream, const std::string &s) { + jl_printf(stream, "\""); + for (auto c = s.cbegin(); c != s.cend(); c++) { + switch (*c) { + case '"': jl_printf(stream, "\\\""); break; + case '\\': jl_printf(stream, "\\\\"); break; + case '\b': jl_printf(stream, "\\b"); break; + case '\f': jl_printf(stream, "\\f"); break; + case '\n': jl_printf(stream, "\\n"); break; + case '\r': jl_printf(stream, "\\r"); break; + case '\t': jl_printf(stream, "\\t"); break; + default: + if ('\x00' <= *c && *c <= '\x1f') { + jl_printf(stream, "\\u%04x", (int)*c); + } else { + jl_printf(stream, "%c", *c); + } + } + } + jl_printf(stream, "\""); +} + struct HeapSnapshot; void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot); @@ -87,7 +110,9 @@ struct StringTable { jl_printf(stream, "\n"); } } - jl_printf(stream, "\"%s\"", str.c_str()); + // Escape strings for JSON + // TODO + print_str_escape_json(stream, str); } jl_printf(stream, "]"); } From 5b1277c08c9274926c40f9d245a2452d96cb88db Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Mon, 20 Sep 2021 15:29:45 -0400 Subject: [PATCH 039/106] add assert (fails) that the edges are in order --- src/gc-heap-snapshot.cpp | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 0c3bc1c8a51e0..212ccf92f9d46 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -50,7 +50,7 @@ const int k_node_number_of_fields = 7; struct Node { string type; string name; - size_t id; + size_t id; // (vilterp) the memory address, right? size_t self_size; size_t edge_count; size_t trace_node_id; // This is ALWAYS 0 in Javascript heap-snapshots. @@ -67,9 +67,12 @@ struct Node { const int k_edge_number_of_fields = 3; struct Edge { - string type; - size_t name_or_index; // essentially 'from' + string type; // These *must* match the Enums on the JS side; control interpretation of name_or_index. + size_t name_or_index; // name of the field (for objects/modules) or index of array size_t to_node; + + // Book-keeping fields (not used for serialization) + size_t from_node; // For asserting that we built the edges in the right order }; //template @@ -225,9 +228,13 @@ JL_DLLEXPORT void record_edge_to_gc_snapshot(char *type_description, jl_value_t g_snapshot->nodes[from_node_idx].type = type_description; g_snapshot->nodes[from_node_idx].edge_count += 1; + g_snapshot->edges.push_back(Edge{"property", - g_snapshot->node_ptr_to_index_map[a], - g_snapshot->node_ptr_to_index_map[b]}); + g_snapshot->names.find_or_create_string_id(fieldname), // name or index + g_snapshot->node_ptr_to_index_map[b], // to + // book-keeping + g_snapshot->node_ptr_to_index_map[a], // from + }); count_edges += 1; } @@ -272,11 +279,14 @@ void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot) { jl_printf(stream, "],\n"); jl_printf(stream, "\"edges\":["); - bool first_edge = true; - for (const auto &edge : snapshot.edges) { - if (first_edge) { - first_edge = false; - } else { + for (int i = 0; i < snapshot.edges.size(); ++i) { + // Check that we constructed our nodes & edges correctly. + assert(i == 0 || + snapshot.edges[i - 1].from_node <= snapshot.edges[i].from_node); + + const auto &edge = snapshot.edges[i]; + + if (i != 0) { jl_printf(stream, ","); } jl_printf(stream, "%zu", snapshot.names.find_or_create_string_id(edge.type)); From 1100ddb36e5910045656d0e2250afa2ee24273cf Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Mon, 20 Sep 2021 15:30:02 -0400 Subject: [PATCH 040/106] add hacked fieldname to gc_debug_edge --- src/gc-heap-snapshot.cpp | 3 +++ src/gc-heap-snapshot.h | 1 + src/gc.h | 6 +++++- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 212ccf92f9d46..bdfa5bc20ce48 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -217,6 +217,9 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { // TODO: remove JL_DLLEXPORT JL_DLLEXPORT void record_edge_to_gc_snapshot(char *type_description, jl_value_t *a, jl_value_t *b) { + record_edge_to_gc_snapshot2(type_description, a, b, ""); +} +JL_DLLEXPORT void record_edge_to_gc_snapshot2(char *type_description, jl_value_t *a, jl_value_t *b, char *fieldname) { if (!g_snapshot) { return; } diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h index 06ea015048644..a2e4d7875c9db 100644 --- a/src/gc-heap-snapshot.h +++ b/src/gc-heap-snapshot.h @@ -12,6 +12,7 @@ extern "C" { // --------------------------------------------------------------------- // TODO: remove JL_DLLEXPORT JL_DLLEXPORT void record_edge_to_gc_snapshot(char *ty, jl_value_t *a, jl_value_t *b); +JL_DLLEXPORT void record_edge_to_gc_snapshot2(char *ty, jl_value_t *a, jl_value_t *b, char *name); // --------------------------------------------------------------------- // Functions to call from Julia to take heap snapshot diff --git a/src/gc.h b/src/gc.h index 027fad799454f..71ed349dd2ce5 100644 --- a/src/gc.h +++ b/src/gc.h @@ -643,7 +643,11 @@ extern int gc_verifying; // TODO: Is slot the right target object? #define gc_debug_edge2(ty,obj,slot,arg1,arg2) do { \ verify_parent2(ty,obj,slot,arg1,arg2); \ - record_edge_to_gc_snapshot(ty, obj, *slot); \ + if (strcmp(ty, "module") == 0) { \ + record_edge_to_gc_snapshot2(ty, obj, *slot, arg2); \ + } else { \ + record_edge_to_gc_snapshot2(ty, obj, *slot, ""); \ + } \ } while (0) int gc_slot_to_fieldidx(void *_obj, void *slot); From 02603d25948a90fbed674fdf9dbedc52d8973658 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Mon, 20 Sep 2021 15:49:11 -0400 Subject: [PATCH 041/106] Change to the real format now that we understand it: - The edges array is *ordered* according to the parent of each edge in the Nodes table, such that the edges for each Node are contiguous. --- src/gc-heap-snapshot.cpp | 117 ++++++++++++++++++++------------------- 1 file changed, 59 insertions(+), 58 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index bdfa5bc20ce48..8bccad739e51e 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -41,7 +41,18 @@ void print_str_escape_json(JL_STREAM *stream, const std::string &s) { struct HeapSnapshot; void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot); -// Dump format: +// Edges +// "edge_fields": +// [ "type", "name_or_index", "to_node" ] + +struct Edge { + string type; // These *must* match the Enums on the JS side; control interpretation of name_or_index. + size_t name_or_index; // name of the field (for objects/modules) or index of array + size_t to_node; + + // Book-keeping fields (not used for serialization) +}; + // Nodes // "node_fields": // [ "type", "name", "id", "self_size", "edge_count", "trace_node_id", "detachedness" ] @@ -54,30 +65,15 @@ struct Node { size_t self_size; size_t edge_count; size_t trace_node_id; // This is ALWAYS 0 in Javascript heap-snapshots. - // whether the node is attached or dettached from the main application state + // whether the from_node is attached or dettached from the main application state // TODO: .... meaning not yet understood. - // https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/include/v8-profiler.h#L739-L745 + // https://github.com/nodejs/from_node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/include/v8-profiler.h#L739-L745 int detachedness; // 0 - unknown, 1 - attached; 2 - detached -}; - -// Edges -// "edge_fields": -// [ "type", "name_or_index", "to_node" ] - -const int k_edge_number_of_fields = 3; - -struct Edge { - string type; // These *must* match the Enums on the JS side; control interpretation of name_or_index. - size_t name_or_index; // name of the field (for objects/modules) or index of array - size_t to_node; // Book-keeping fields (not used for serialization) - size_t from_node; // For asserting that we built the edges in the right order + vector edges; // For asserting that we built the edges in the right order }; -//template -//auto find_or_insert_iter(unordered_map& map, const K &key) { -//} struct StringTable { typedef unordered_map MapType; @@ -126,17 +122,18 @@ struct HeapSnapshot { // private: vector nodes; - - vector edges; + // edges are stored on each from_node StringTable names; StringTable node_types = {"object"}; StringTable edge_types = {"property"}; unordered_map node_ptr_to_index_map; + + size_t num_edges = 0; // For metadata, updated as you add each edge. Needed because edges owned by nodes. }; -// TODO: Do we need to refer to nodes by their index in the node array? +// TODO: Do we need to refer to nodes by their index in the from_node array? //size_t find_or_create_node_id(HeapSnapshot& snapshot, string key) { // return find_or_insert_iter(snapshot.nodes_map, key)->second; //} @@ -202,7 +199,7 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { {a, g_snapshot->nodes.size()}); count_nodes += 1; - Node node{ + Node from_node{ "object", // string type; name, // string name; (size_t)a, // size_t id; @@ -210,9 +207,12 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { 0, // int edge_count, will be incremented on every outgoing edge 0, // size_t trace_node_id (unused) - 0 // int detachedness; // 0 - unknown, 1 - attached; 2 - detached + 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached + + // Book-keeping: todo + vector(), }; - g_snapshot->nodes.push_back(node); + g_snapshot->nodes.push_back(from_node); } // TODO: remove JL_DLLEXPORT @@ -229,21 +229,23 @@ JL_DLLEXPORT void record_edge_to_gc_snapshot2(char *type_description, jl_value_t auto from_node_idx = g_snapshot->node_ptr_to_index_map[a]; - g_snapshot->nodes[from_node_idx].type = type_description; - g_snapshot->nodes[from_node_idx].edge_count += 1; + auto &from_node = g_snapshot->nodes[from_node_idx]; + from_node.type = type_description; + from_node.edge_count += 1; - g_snapshot->edges.push_back(Edge{"property", - g_snapshot->names.find_or_create_string_id(fieldname), // name or index - g_snapshot->node_ptr_to_index_map[b], // to - // book-keeping - g_snapshot->node_ptr_to_index_map[a], // from - }); + from_node.edges.push_back(Edge{ + "property", + g_snapshot->names.find_or_create_string_id(fieldname), // name or index + g_snapshot->node_ptr_to_index_map[b], // to + // book-keeping + }); - count_edges += 1; + g_snapshot->num_edges += 1; + count_edges += 1; // debugging } void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot) { - // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2567-L2567 + // mimicking https://github.com/nodejs/from_node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2567-L2567 jl_printf(stream, "{\"snapshot\":{"); jl_printf(stream, "\"meta\":{"); jl_printf(stream, "\"node_fields\":[\"type\",\"name\",\"id\",\"self_size\",\"edge_count\",\"trace_node_id\",\"detachedness\"],"); @@ -255,47 +257,46 @@ void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot) { jl_printf(stream, "\"edge_types\":["); snapshot.edge_types.print_json_array(stream, false); jl_printf(stream, ","); - jl_printf(stream, "\"string_or_number\",\"node\"]"); + jl_printf(stream, "\"string_or_number\",\"from_node\"]"); jl_printf(stream, "},\n"); // end "meta" jl_printf(stream, "\"node_count\":%zu,", snapshot.nodes.size()); - jl_printf(stream, "\"edge_count\":%zu", snapshot.edges.size()); + jl_printf(stream, "\"edge_count\":%zu", snapshot.num_edges); jl_printf(stream, "},\n"); // end "snapshot" jl_printf(stream, "\"nodes\":["); bool first_node = true; - for (const auto &node : snapshot.nodes) { + for (const auto &from_node : snapshot.nodes) { if (first_node) { first_node = false; } else { jl_printf(stream, ","); } // ["type","name","id","self_size","edge_count","trace_node_id","detachedness"] - jl_printf(stream, "%zu", snapshot.names.find_or_create_string_id(node.type)); - jl_printf(stream, ",%zu", snapshot.names.find_or_create_string_id(node.name)); - jl_printf(stream, ",%zu", node.id); - jl_printf(stream, ",%zu", node.self_size); - jl_printf(stream, ",%zu", node.edge_count); - jl_printf(stream, ",%zu", node.trace_node_id); - jl_printf(stream, ",%d", node.detachedness); + jl_printf(stream, "%zu", snapshot.names.find_or_create_string_id(from_node.type)); + jl_printf(stream, ",%zu", snapshot.names.find_or_create_string_id(from_node.name)); + jl_printf(stream, ",%zu", from_node.id); + jl_printf(stream, ",%zu", from_node.self_size); + jl_printf(stream, ",%zu", from_node.edge_count); + jl_printf(stream, ",%zu", from_node.trace_node_id); + jl_printf(stream, ",%d", from_node.detachedness); jl_printf(stream, "\n"); } jl_printf(stream, "],\n"); jl_printf(stream, "\"edges\":["); - for (int i = 0; i < snapshot.edges.size(); ++i) { - // Check that we constructed our nodes & edges correctly. - assert(i == 0 || - snapshot.edges[i - 1].from_node <= snapshot.edges[i].from_node); - - const auto &edge = snapshot.edges[i]; - - if (i != 0) { - jl_printf(stream, ","); + bool first_edge = true; + for (const auto &from_node : snapshot.nodes) { + for (const auto &edge : from_node.edges) { + if (first_edge) { + first_edge = false; + } else { + jl_printf(stream, ","); + } + jl_printf(stream, "%zu", snapshot.names.find_or_create_string_id(edge.type)); + jl_printf(stream, ",%zu", edge.name_or_index * k_node_number_of_fields); + jl_printf(stream, ",%zu", edge.to_node * k_node_number_of_fields); + jl_printf(stream, "\n"); } - jl_printf(stream, "%zu", snapshot.names.find_or_create_string_id(edge.type)); - jl_printf(stream, ",%zu", edge.name_or_index * k_node_number_of_fields); - jl_printf(stream, ",%zu", edge.to_node * k_node_number_of_fields); - jl_printf(stream, "\n"); } jl_printf(stream, "],\n"); // end "edges" From 128eefbccb2ae5ecc2ac8536dea5a4baf67dad1d Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Mon, 20 Sep 2021 16:28:42 -0400 Subject: [PATCH 042/106] Fix node_type and edge_type string table printing --- src/gc-heap-snapshot.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 8bccad739e51e..97a00e3c3c9d9 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -272,7 +272,7 @@ void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot) { jl_printf(stream, ","); } // ["type","name","id","self_size","edge_count","trace_node_id","detachedness"] - jl_printf(stream, "%zu", snapshot.names.find_or_create_string_id(from_node.type)); + jl_printf(stream, "%zu", snapshot.node_types.find_or_create_string_id(from_node.type)); jl_printf(stream, ",%zu", snapshot.names.find_or_create_string_id(from_node.name)); jl_printf(stream, ",%zu", from_node.id); jl_printf(stream, ",%zu", from_node.self_size); @@ -292,7 +292,7 @@ void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot) { } else { jl_printf(stream, ","); } - jl_printf(stream, "%zu", snapshot.names.find_or_create_string_id(edge.type)); + jl_printf(stream, "%zu", snapshot.edge_types.find_or_create_string_id(edge.type)); jl_printf(stream, ",%zu", edge.name_or_index * k_node_number_of_fields); jl_printf(stream, ",%zu", edge.to_node * k_node_number_of_fields); jl_printf(stream, "\n"); From ff20542a1cbb35d690a0707c311810d5a2530de9 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Mon, 20 Sep 2021 16:51:04 -0400 Subject: [PATCH 043/106] Fix node_name and edge_name serialization --- src/gc-heap-snapshot.cpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 97a00e3c3c9d9..1b192795456f4 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -46,7 +46,7 @@ void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot); // [ "type", "name_or_index", "to_node" ] struct Edge { - string type; // These *must* match the Enums on the JS side; control interpretation of name_or_index. + size_t type; // These *must* match the Enums on the JS side; control interpretation of name_or_index. size_t name_or_index; // name of the field (for objects/modules) or index of array size_t to_node; @@ -59,7 +59,7 @@ struct Edge { const int k_node_number_of_fields = 7; struct Node { - string type; + size_t type; string name; size_t id; // (vilterp) the memory address, right? size_t self_size; @@ -200,7 +200,9 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { count_nodes += 1; Node from_node{ - "object", // string type; + // We pick a default type here, which will be set for the _targets_ of edges. + // TODO: What's a good default? + g_snapshot->node_types.find_or_create_string_id("object"), // string type; name, // string name; (size_t)a, // size_t id; self_size, // size_t self_size; @@ -230,11 +232,11 @@ JL_DLLEXPORT void record_edge_to_gc_snapshot2(char *type_description, jl_value_t auto from_node_idx = g_snapshot->node_ptr_to_index_map[a]; auto &from_node = g_snapshot->nodes[from_node_idx]; - from_node.type = type_description; + from_node.type = g_snapshot->node_types.find_or_create_string_id(type_description); from_node.edge_count += 1; from_node.edges.push_back(Edge{ - "property", + g_snapshot->edge_types.find_or_create_string_id("property"), g_snapshot->names.find_or_create_string_id(fieldname), // name or index g_snapshot->node_ptr_to_index_map[b], // to // book-keeping @@ -272,7 +274,7 @@ void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot) { jl_printf(stream, ","); } // ["type","name","id","self_size","edge_count","trace_node_id","detachedness"] - jl_printf(stream, "%zu", snapshot.node_types.find_or_create_string_id(from_node.type)); + jl_printf(stream, "%zu", from_node.type); jl_printf(stream, ",%zu", snapshot.names.find_or_create_string_id(from_node.name)); jl_printf(stream, ",%zu", from_node.id); jl_printf(stream, ",%zu", from_node.self_size); @@ -292,7 +294,7 @@ void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot) { } else { jl_printf(stream, ","); } - jl_printf(stream, "%zu", snapshot.edge_types.find_or_create_string_id(edge.type)); + jl_printf(stream, "%zu", edge.type); jl_printf(stream, ",%zu", edge.name_or_index * k_node_number_of_fields); jl_printf(stream, ",%zu", edge.to_node * k_node_number_of_fields); jl_printf(stream, "\n"); From 2044c6f5e8dfb17325246f9f4ff74abbc257a49b Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Mon, 20 Sep 2021 17:05:55 -0400 Subject: [PATCH 044/106] Add todo list --- src/gc-heap-snapshot.cpp | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 1b192795456f4..ed1ea766c10fb 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -15,6 +15,28 @@ using std::string; using std::unordered_map; using std::unordered_set; +// TODOs: +// - Proper node types/kinds +// 1. map from the names provided to verify_parent to the correct JS names +// 2. change the names at the callsite +// 3. introduce an enum to be set a the callsite and mapped to correct names +// - Field names bitwise or-ing +// - Array indexes +// - already have these, just have to be able to let them know we're at an array +// - Field index -> names +// - same as above, but we need to convert field index to field name (either inside, or at caller) +// - array sizes +// - string sizes + +// Need three functions: +// - gc_heap_snapshot_record_array_edge(from, to, int index) +// - gc_heap_snapshot_record_module_edge(from, to, char* name) +// - gc_heap_snapshot_record_object_edge(from, to, int field_index) + + + + + // https://stackoverflow.com/a/33799784/751061 void print_str_escape_json(JL_STREAM *stream, const std::string &s) { jl_printf(stream, "\""); From b6a5b037bbc4298193f644f4ae869359bff159ef Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Mon, 20 Sep 2021 17:13:44 -0400 Subject: [PATCH 045/106] remove TODO on the bitwise or-ing, it isn't needed --- src/gc-heap-snapshot.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index ed1ea766c10fb..b464c14b05bd2 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -20,7 +20,6 @@ using std::unordered_set; // 1. map from the names provided to verify_parent to the correct JS names // 2. change the names at the callsite // 3. introduce an enum to be set a the callsite and mapped to correct names -// - Field names bitwise or-ing // - Array indexes // - already have these, just have to be able to let them know we're at an array // - Field index -> names From dea884cf4ec5b079435f08d69761c760a8bbac72 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Mon, 20 Sep 2021 17:18:55 -0400 Subject: [PATCH 046/106] Re-Split out the debug functions --- src/gc.c | 34 ++++++++++++++++++++++++++++------ src/gc.h | 2 -- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/src/gc.c b/src/gc.c index 7847de114bbad..78fab173f0797 100644 --- a/src/gc.c +++ b/src/gc.c @@ -1817,9 +1817,12 @@ STATIC_INLINE int gc_mark_scan_objarray(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, (void)jl_assume(objary == (gc_mark_objarray_t*)sp->data); for (; begin < end; begin += objary->step) { *pnew_obj = *begin; - if (*pnew_obj) + if (*pnew_obj) { + verify_parent2("obj array", objary->parent, begin, "elem(%d)", + gc_slot_to_arrayidx(objary->parent, begin)); gc_debug_edge2("obj array", objary->parent, begin, "elem(%d)", gc_slot_to_arrayidx(objary->parent, begin)); + } if (!gc_try_setmark(*pnew_obj, &objary->nptr, ptag, pbits)) continue; begin += objary->step; @@ -1853,9 +1856,12 @@ STATIC_INLINE int gc_mark_scan_array8(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, for (; elem_begin < elem_end; elem_begin++) { jl_value_t **slot = &begin[*elem_begin]; *pnew_obj = *slot; - if (*pnew_obj) + if (*pnew_obj) { + verify_parent2("array", ary8->elem.parent, slot, "elem(%d)", + gc_slot_to_arrayidx(ary8->elem.parent, begin)); gc_debug_edge2("array", ary8->elem.parent, slot, "elem(%d)", gc_slot_to_arrayidx(ary8->elem.parent, begin)); + } if (!gc_try_setmark(*pnew_obj, &ary8->elem.nptr, ptag, pbits)) continue; elem_begin++; @@ -1901,9 +1907,12 @@ STATIC_INLINE int gc_mark_scan_array16(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, for (; elem_begin < elem_end; elem_begin++) { jl_value_t **slot = &begin[*elem_begin]; *pnew_obj = *slot; - if (*pnew_obj) + if (*pnew_obj) { + verify_parent2("array", ary16->elem.parent, slot, "elem(%d)", + gc_slot_to_arrayidx(ary16->elem.parent, begin)); gc_debug_edge2("array", ary16->elem.parent, slot, "elem(%d)", gc_slot_to_arrayidx(ary16->elem.parent, begin)); + } if (!gc_try_setmark(*pnew_obj, &ary16->elem.nptr, ptag, pbits)) continue; elem_begin++; @@ -1947,9 +1956,12 @@ STATIC_INLINE int gc_mark_scan_obj8(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, gc_mark for (; begin < end; begin++) { jl_value_t **slot = &((jl_value_t**)parent)[*begin]; *pnew_obj = *slot; - if (*pnew_obj) + if (*pnew_obj) { + verify_parent2("object", parent, slot, "field(%d)", + gc_slot_to_fieldidx(parent, slot)); gc_debug_edge2("object", parent, slot, "field(%d)", gc_slot_to_fieldidx(parent, slot)); + } if (!gc_try_setmark(*pnew_obj, &obj8->nptr, ptag, pbits)) continue; begin++; @@ -1980,9 +1992,12 @@ STATIC_INLINE int gc_mark_scan_obj16(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, gc_mar for (; begin < end; begin++) { jl_value_t **slot = &((jl_value_t**)parent)[*begin]; *pnew_obj = *slot; - if (*pnew_obj) + if (*pnew_obj) { + verify_parent2("object", parent, slot, "field(%d)", + gc_slot_to_fieldidx(parent, slot)); gc_debug_edge2("object", parent, slot, "field(%d)", gc_slot_to_fieldidx(parent, slot)); + } if (!gc_try_setmark(*pnew_obj, &obj16->nptr, ptag, pbits)) continue; begin++; @@ -2013,9 +2028,12 @@ STATIC_INLINE int gc_mark_scan_obj32(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, gc_mar for (; begin < end; begin++) { jl_value_t **slot = &((jl_value_t**)parent)[*begin]; *pnew_obj = *slot; - if (*pnew_obj) + if (*pnew_obj) { + verify_parent2("object", parent, slot, "field(%d)", + gc_slot_to_fieldidx(parent, slot)); gc_debug_edge2("object", parent, slot, "field(%d)", gc_slot_to_fieldidx(parent, slot)); + } if (!gc_try_setmark(*pnew_obj, &obj32->nptr, ptag, pbits)) continue; begin++; @@ -2402,11 +2420,14 @@ module_binding: { gc_setmark_buf_(ptls, b, mbits, sizeof(jl_binding_t)); } void *vb = jl_astaggedvalue(b); + verify_parent1("module", binding->parent, &vb, "binding_buff"); gc_debug_edge1("module", binding->parent, &vb, "binding_buff"); (void)vb; jl_value_t *value = jl_atomic_load_relaxed(&b->value); jl_value_t *globalref = jl_atomic_load_relaxed(&b->globalref); if (value) { + verify_parent2("module", binding->parent, + &b->value, "binding(%s)", jl_symbol_name(b->name)); gc_debug_edge2("module", binding->parent, &b->value, "binding(%s)", jl_symbol_name(b->name)); if (gc_try_setmark(value, &binding->nptr, &tag, &bits)) { @@ -2539,6 +2560,7 @@ mark: { objprofile_count(vt, bits == GC_OLD_MARKED, sizeof(jl_array_t)); if (flags.how == 1) { void *val_buf = jl_astaggedvalue((char*)a->data - a->offset * a->elsize); + verify_parent1("array", new_obj, &val_buf, "buffer ('loc' addr is meaningless)"); gc_debug_edge1("array", new_obj, &val_buf, "buffer ('loc' addr is meaningless)"); (void)val_buf; gc_setmark_buf_(ptls, (char*)a->data - a->offset * a->elsize, diff --git a/src/gc.h b/src/gc.h index 71ed349dd2ce5..28aa9834d8823 100644 --- a/src/gc.h +++ b/src/gc.h @@ -637,12 +637,10 @@ extern int gc_verifying; // For GC Debugging // TODO: Is slot the right target object? #define gc_debug_edge1(ty,obj,slot,arg1) do { \ - verify_parent1(ty,obj,slot,arg1); \ record_edge_to_gc_snapshot(ty, obj, jl_valueof(*slot)); \ } while (0) // TODO: Is slot the right target object? #define gc_debug_edge2(ty,obj,slot,arg1,arg2) do { \ - verify_parent2(ty,obj,slot,arg1,arg2); \ if (strcmp(ty, "module") == 0) { \ record_edge_to_gc_snapshot2(ty, obj, *slot, arg2); \ } else { \ From 62da715fcb5a3e31acfd1c0bad0885f3985d94c8 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Mon, 20 Sep 2021 17:21:01 -0400 Subject: [PATCH 047/106] better organize TODO --- src/gc-heap-snapshot.cpp | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index b464c14b05bd2..8b059d6a0f1df 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -16,21 +16,25 @@ using std::unordered_map; using std::unordered_set; // TODOs: -// - Proper node types/kinds -// 1. map from the names provided to verify_parent to the correct JS names -// 2. change the names at the callsite -// 3. introduce an enum to be set a the callsite and mapped to correct names -// - Array indexes -// - already have these, just have to be able to let them know we're at an array -// - Field index -> names -// - same as above, but we need to convert field index to field name (either inside, or at caller) -// - array sizes -// - string sizes +// Field Names & node types +// - Proper node types/kinds +// 1. map from the names provided to verify_parent to the correct JS names +// 2. change the names at the callsite +// 3. introduce an enum to be set a the callsite and mapped to correct names +// - Array indexes +// - already have these, just have to be able to let them know we're at an array +// - Field index -> names +// - same as above, but we need to convert field index to field name (either inside, or at caller) +// Rich field names +// - Convert a field index to field name +// Sizes +// - array sizes +// - string sizes // Need three functions: -// - gc_heap_snapshot_record_array_edge(from, to, int index) -// - gc_heap_snapshot_record_module_edge(from, to, char* name) -// - gc_heap_snapshot_record_object_edge(from, to, int field_index) +// - gc_heap_snapshot_record_array_edge(value* from, value* to, int index) +// - gc_heap_snapshot_record_module_edge(value* from, value* to, char* name) +// - gc_heap_snapshot_record_object_edge(value* from, value* to, int field_index) From 2a977166561cbb521264c9f016cae3804ca0fd31 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Mon, 20 Sep 2021 17:44:45 -0400 Subject: [PATCH 048/106] Change all GC callsites to use newly named debug functions --- src/gc-heap-snapshot.cpp | 6 +++--- src/gc-heap-snapshot.h | 12 ++++++++++-- src/gc.c | 24 ++++++++++++++---------- src/gc.h | 14 -------------- 4 files changed, 27 insertions(+), 29 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 8b059d6a0f1df..2a7b519199009 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -243,9 +243,9 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { } // TODO: remove JL_DLLEXPORT -JL_DLLEXPORT void record_edge_to_gc_snapshot(char *type_description, jl_value_t *a, jl_value_t *b) { - record_edge_to_gc_snapshot2(type_description, a, b, ""); -} +//JL_DLLEXPORT void record_edge_to_gc_snapshot(char *type_description, jl_value_t *a, jl_value_t *b) { +// record_edge_to_gc_snapshot2(type_description, a, b, ""); +//} JL_DLLEXPORT void record_edge_to_gc_snapshot2(char *type_description, jl_value_t *a, jl_value_t *b, char *fieldname) { if (!g_snapshot) { return; diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h index a2e4d7875c9db..a8a2d7ac6efb6 100644 --- a/src/gc-heap-snapshot.h +++ b/src/gc-heap-snapshot.h @@ -11,8 +11,16 @@ extern "C" { // Functions to call from GC when heap snapshot is enabled // --------------------------------------------------------------------- // TODO: remove JL_DLLEXPORT -JL_DLLEXPORT void record_edge_to_gc_snapshot(char *ty, jl_value_t *a, jl_value_t *b); -JL_DLLEXPORT void record_edge_to_gc_snapshot2(char *ty, jl_value_t *a, jl_value_t *b, char *name); +JL_DLLEXPORT void gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, int index); +JL_DLLEXPORT void gc_heap_snapshot_record_module_edge(jl_value_t *from, jl_value_t *to, char *name); +JL_DLLEXPORT void gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, int field_index); +// Used for objects managed by GC, but which aren't exposed in the julia object, so have no +// field or index. i.e. they're not reacahable from julia code, but we _will_ hit them in +// the GC mark phase (so we can check their type tag to get the size). +JL_DLLEXPORT void gc_heap_snapshot_record_internal_edge(jl_value_t *from, jl_value_t *to); +// Used for objects manually allocated in C (outside julia GC), to still tell the heap snapshot about the +// size of the object, even though we're never going to mark that object. +JL_DLLEXPORT void gc_heap_snapshot_record_hidden_edge(jl_value_t *from, jl_value_t *to, size_t bytes); // --------------------------------------------------------------------- // Functions to call from Julia to take heap snapshot diff --git a/src/gc.c b/src/gc.c index 78fab173f0797..fba5ff9e139f2 100644 --- a/src/gc.c +++ b/src/gc.c @@ -1820,7 +1820,7 @@ STATIC_INLINE int gc_mark_scan_objarray(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, if (*pnew_obj) { verify_parent2("obj array", objary->parent, begin, "elem(%d)", gc_slot_to_arrayidx(objary->parent, begin)); - gc_debug_edge2("obj array", objary->parent, begin, "elem(%d)", + gc_heap_snapshot_record_array_edge(objary->parent, *begin, gc_slot_to_arrayidx(objary->parent, begin)); } if (!gc_try_setmark(*pnew_obj, &objary->nptr, ptag, pbits)) @@ -1859,7 +1859,7 @@ STATIC_INLINE int gc_mark_scan_array8(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, if (*pnew_obj) { verify_parent2("array", ary8->elem.parent, slot, "elem(%d)", gc_slot_to_arrayidx(ary8->elem.parent, begin)); - gc_debug_edge2("array", ary8->elem.parent, slot, "elem(%d)", + gc_heap_snapshot_record_array_edge(ary8->elem.parent, *slot, gc_slot_to_arrayidx(ary8->elem.parent, begin)); } if (!gc_try_setmark(*pnew_obj, &ary8->elem.nptr, ptag, pbits)) @@ -1910,7 +1910,7 @@ STATIC_INLINE int gc_mark_scan_array16(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, if (*pnew_obj) { verify_parent2("array", ary16->elem.parent, slot, "elem(%d)", gc_slot_to_arrayidx(ary16->elem.parent, begin)); - gc_debug_edge2("array", ary16->elem.parent, slot, "elem(%d)", + gc_heap_snapshot_record_array_edge(ary16->elem.parent, *slot, gc_slot_to_arrayidx(ary16->elem.parent, begin)); } if (!gc_try_setmark(*pnew_obj, &ary16->elem.nptr, ptag, pbits)) @@ -1959,7 +1959,7 @@ STATIC_INLINE int gc_mark_scan_obj8(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, gc_mark if (*pnew_obj) { verify_parent2("object", parent, slot, "field(%d)", gc_slot_to_fieldidx(parent, slot)); - gc_debug_edge2("object", parent, slot, "field(%d)", + gc_heap_snapshot_record_object_edge(parent, slot, gc_slot_to_fieldidx(parent, slot)); } if (!gc_try_setmark(*pnew_obj, &obj8->nptr, ptag, pbits)) @@ -1995,7 +1995,7 @@ STATIC_INLINE int gc_mark_scan_obj16(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, gc_mar if (*pnew_obj) { verify_parent2("object", parent, slot, "field(%d)", gc_slot_to_fieldidx(parent, slot)); - gc_debug_edge2("object", parent, slot, "field(%d)", + gc_heap_snapshot_record_object_edge(parent, slot, gc_slot_to_fieldidx(parent, slot)); } if (!gc_try_setmark(*pnew_obj, &obj16->nptr, ptag, pbits)) @@ -2031,7 +2031,7 @@ STATIC_INLINE int gc_mark_scan_obj32(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, gc_mar if (*pnew_obj) { verify_parent2("object", parent, slot, "field(%d)", gc_slot_to_fieldidx(parent, slot)); - gc_debug_edge2("object", parent, slot, "field(%d)", + gc_heap_snapshot_record_object_edge(parent, slot, gc_slot_to_fieldidx(parent, slot)); } if (!gc_try_setmark(*pnew_obj, &obj32->nptr, ptag, pbits)) @@ -2421,15 +2421,16 @@ module_binding: { } void *vb = jl_astaggedvalue(b); verify_parent1("module", binding->parent, &vb, "binding_buff"); - gc_debug_edge1("module", binding->parent, &vb, "binding_buff"); + // Record the size used for the box for non-const bindings + gc_heap_snapshot_record_internal_edge(binding->parent, b); (void)vb; jl_value_t *value = jl_atomic_load_relaxed(&b->value); jl_value_t *globalref = jl_atomic_load_relaxed(&b->globalref); if (value) { verify_parent2("module", binding->parent, &b->value, "binding(%s)", jl_symbol_name(b->name)); - gc_debug_edge2("module", binding->parent, - &b->value, "binding(%s)", jl_symbol_name(b->name)); + gc_heap_snapshot_record_module_edge(binding->parent, &b->value, + jl_symbol_name(b->name)); if (gc_try_setmark(value, &binding->nptr, &tag, &bits)) { new_obj = value; begin += 2; @@ -2561,7 +2562,7 @@ mark: { if (flags.how == 1) { void *val_buf = jl_astaggedvalue((char*)a->data - a->offset * a->elsize); verify_parent1("array", new_obj, &val_buf, "buffer ('loc' addr is meaningless)"); - gc_debug_edge1("array", new_obj, &val_buf, "buffer ('loc' addr is meaningless)"); + gc_heap_snapshot_record_internal_edge(new_obj, jl_valueof(val_buf)); (void)val_buf; gc_setmark_buf_(ptls, (char*)a->data - a->offset * a->elsize, bits, jl_array_nbytes(a)); @@ -2570,6 +2571,7 @@ mark: { if (update_meta || foreign_alloc) { objprofile_count(jl_malloc_tag, bits == GC_OLD_MARKED, jl_array_nbytes(a)); + gc_heap_snapshot_record_hidden_edge(new_obj, jl_malloc_tag, jl_array_nbytes(a)); if (bits == GC_OLD_MARKED) { ptls->gc_cache.perm_scanned_bytes += jl_array_nbytes(a); } @@ -2581,6 +2583,8 @@ mark: { else if (flags.how == 3) { jl_value_t *owner = jl_array_data_owner(a); uintptr_t nptr = (1 << 2) | (bits & GC_OLD); + // TODO: Keep an eye on the edge type here, we're _pretty sure_ it's right.. + gc_heap_snapshot_record_internal_edge(new_obj, owner); int markowner = gc_try_setmark(owner, &nptr, &tag, &bits); gc_mark_push_remset(ptls, new_obj, nptr); if (markowner) { diff --git a/src/gc.h b/src/gc.h index 28aa9834d8823..a5ea9de7eb711 100644 --- a/src/gc.h +++ b/src/gc.h @@ -634,20 +634,6 @@ extern int gc_verifying; #endif -// For GC Debugging -// TODO: Is slot the right target object? -#define gc_debug_edge1(ty,obj,slot,arg1) do { \ - record_edge_to_gc_snapshot(ty, obj, jl_valueof(*slot)); \ -} while (0) -// TODO: Is slot the right target object? -#define gc_debug_edge2(ty,obj,slot,arg1,arg2) do { \ - if (strcmp(ty, "module") == 0) { \ - record_edge_to_gc_snapshot2(ty, obj, *slot, arg2); \ - } else { \ - record_edge_to_gc_snapshot2(ty, obj, *slot, ""); \ - } \ -} while (0) - int gc_slot_to_fieldidx(void *_obj, void *slot); int gc_slot_to_arrayidx(void *_obj, void *begin); NOINLINE void gc_mark_loop_unwind(jl_ptls_t ptls, jl_gc_mark_sp_t sp, int pc_offset); From e02330ceec1eb8b51c9c0d738f2b234400637564 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Mon, 20 Sep 2021 18:24:42 -0400 Subject: [PATCH 049/106] Woohoo~ Restructured code to support last major changes - Array indices now work correctly - malloc'd buffers (e.g. arrays) (maybe) work (more) correctly --- src/gc-heap-snapshot.cpp | 75 +++++++++++++++++++++++++++------------- src/gc-heap-snapshot.h | 4 +-- src/gc.c | 8 ++--- 3 files changed, 57 insertions(+), 30 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 2a7b519199009..9aa898052cb45 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -37,7 +37,7 @@ using std::unordered_set; // - gc_heap_snapshot_record_object_edge(value* from, value* to, int field_index) - +static inline void _record_gc_node(const char *node_type, const char *edge_type, jl_value_t *a, jl_value_t *b, size_t name_or_index); // https://stackoverflow.com/a/33799784/751061 @@ -194,30 +194,35 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { return; //return &g_snapshot->nodes[val->second]; } - // Insert a new Node - jl_datatype_t* type = (jl_datatype_t*)jl_typeof(a); + // Insert a new Node size_t self_size = 1; string name = ""; - if ((uintptr_t)type < 4096U) { - name = ""; - } else if (type == (jl_datatype_t*)jl_buff_tag) { - name = ""; - } else if (type == (jl_datatype_t*)jl_malloc_tag) { + if (a == (jl_value_t*)jl_malloc_tag) { name = ""; - } else if (jl_is_datatype(type)) { + } else { + jl_datatype_t* type = (jl_datatype_t*)jl_typeof(a); + + if ((uintptr_t)type < 4096U) { + name = ""; + } else if (type == (jl_datatype_t*)jl_buff_tag) { + name = ""; + } else if (type == (jl_datatype_t*)jl_malloc_tag) { + name = ""; + } else if (jl_is_datatype(type)) { - ios_t str_; - ios_mem(&str_, 1024); - JL_STREAM* str = (JL_STREAM*)&str_; + ios_t str_; + ios_mem(&str_, 1024); + JL_STREAM* str = (JL_STREAM*)&str_; - jl_static_show(str, (jl_value_t*)type); + jl_static_show(str, (jl_value_t*)type); - name = string((const char*)str_.buf, str_.size); - ios_close(&str_); + name = string((const char*)str_.buf, str_.size); + ios_close(&str_); - self_size = (size_t)jl_datatype_size(type); + self_size = (size_t)jl_datatype_size(type); + } } g_snapshot->node_ptr_to_index_map.insert(val, @@ -242,11 +247,33 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { g_snapshot->nodes.push_back(from_node); } -// TODO: remove JL_DLLEXPORT -//JL_DLLEXPORT void record_edge_to_gc_snapshot(char *type_description, jl_value_t *a, jl_value_t *b) { -// record_edge_to_gc_snapshot2(type_description, a, b, ""); -//} -JL_DLLEXPORT void record_edge_to_gc_snapshot2(char *type_description, jl_value_t *a, jl_value_t *b, char *fieldname) { +JL_DLLEXPORT void gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, int index) { + _record_gc_node("array", "element", from, to, index); +} +JL_DLLEXPORT void gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, char *name) { + _record_gc_node("object", "property", (jl_value_t*)from, to, + g_snapshot->names.find_or_create_string_id(name)); +} +JL_DLLEXPORT void gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, int field_index) { + // TODO: Field name + const char *field_name = ""; + _record_gc_node("object", "property", from, to, + g_snapshot->names.find_or_create_string_id(field_name)); +} +JL_DLLEXPORT void gc_heap_snapshot_record_internal_edge(jl_value_t *from, jl_value_t *to) { + // TODO: probably need to inline this here and make some changes + _record_gc_node("object", "internal", from, to, + g_snapshot->names.find_or_create_string_id("")); +} +JL_DLLEXPORT void gc_heap_snapshot_record_hidden_edge(jl_value_t *from, size_t bytes) { + // TODO: probably need to inline this here and make some changes + _record_gc_node("native", "hidden", from, (jl_value_t*)jl_malloc_tag, + g_snapshot->names.find_or_create_string_id("")); + + g_snapshot->nodes.back().self_size = bytes; +} + +static inline void _record_gc_node(const char *node_type, const char *edge_type, jl_value_t *a, jl_value_t *b, size_t name_or_index) { if (!g_snapshot) { return; } @@ -257,12 +284,12 @@ JL_DLLEXPORT void record_edge_to_gc_snapshot2(char *type_description, jl_value_t auto from_node_idx = g_snapshot->node_ptr_to_index_map[a]; auto &from_node = g_snapshot->nodes[from_node_idx]; - from_node.type = g_snapshot->node_types.find_or_create_string_id(type_description); + from_node.type = g_snapshot->node_types.find_or_create_string_id(node_type); from_node.edge_count += 1; from_node.edges.push_back(Edge{ - g_snapshot->edge_types.find_or_create_string_id("property"), - g_snapshot->names.find_or_create_string_id(fieldname), // name or index + g_snapshot->edge_types.find_or_create_string_id(edge_type), + name_or_index, g_snapshot->node_ptr_to_index_map[b], // to // book-keeping }); diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h index a8a2d7ac6efb6..2e1163571d5ff 100644 --- a/src/gc-heap-snapshot.h +++ b/src/gc-heap-snapshot.h @@ -12,7 +12,7 @@ extern "C" { // --------------------------------------------------------------------- // TODO: remove JL_DLLEXPORT JL_DLLEXPORT void gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, int index); -JL_DLLEXPORT void gc_heap_snapshot_record_module_edge(jl_value_t *from, jl_value_t *to, char *name); +JL_DLLEXPORT void gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, char *name); JL_DLLEXPORT void gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, int field_index); // Used for objects managed by GC, but which aren't exposed in the julia object, so have no // field or index. i.e. they're not reacahable from julia code, but we _will_ hit them in @@ -20,7 +20,7 @@ JL_DLLEXPORT void gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value JL_DLLEXPORT void gc_heap_snapshot_record_internal_edge(jl_value_t *from, jl_value_t *to); // Used for objects manually allocated in C (outside julia GC), to still tell the heap snapshot about the // size of the object, even though we're never going to mark that object. -JL_DLLEXPORT void gc_heap_snapshot_record_hidden_edge(jl_value_t *from, jl_value_t *to, size_t bytes); +JL_DLLEXPORT void gc_heap_snapshot_record_hidden_edge(jl_value_t *from, size_t bytes); // --------------------------------------------------------------------- // Functions to call from Julia to take heap snapshot diff --git a/src/gc.c b/src/gc.c index fba5ff9e139f2..50a8b5a28b9ac 100644 --- a/src/gc.c +++ b/src/gc.c @@ -1959,7 +1959,7 @@ STATIC_INLINE int gc_mark_scan_obj8(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, gc_mark if (*pnew_obj) { verify_parent2("object", parent, slot, "field(%d)", gc_slot_to_fieldidx(parent, slot)); - gc_heap_snapshot_record_object_edge(parent, slot, + gc_heap_snapshot_record_object_edge(parent, *slot, gc_slot_to_fieldidx(parent, slot)); } if (!gc_try_setmark(*pnew_obj, &obj8->nptr, ptag, pbits)) @@ -1995,7 +1995,7 @@ STATIC_INLINE int gc_mark_scan_obj16(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, gc_mar if (*pnew_obj) { verify_parent2("object", parent, slot, "field(%d)", gc_slot_to_fieldidx(parent, slot)); - gc_heap_snapshot_record_object_edge(parent, slot, + gc_heap_snapshot_record_object_edge(parent, *slot, gc_slot_to_fieldidx(parent, slot)); } if (!gc_try_setmark(*pnew_obj, &obj16->nptr, ptag, pbits)) @@ -2031,7 +2031,7 @@ STATIC_INLINE int gc_mark_scan_obj32(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, gc_mar if (*pnew_obj) { verify_parent2("object", parent, slot, "field(%d)", gc_slot_to_fieldidx(parent, slot)); - gc_heap_snapshot_record_object_edge(parent, slot, + gc_heap_snapshot_record_object_edge(parent, *slot, gc_slot_to_fieldidx(parent, slot)); } if (!gc_try_setmark(*pnew_obj, &obj32->nptr, ptag, pbits)) @@ -2571,7 +2571,7 @@ mark: { if (update_meta || foreign_alloc) { objprofile_count(jl_malloc_tag, bits == GC_OLD_MARKED, jl_array_nbytes(a)); - gc_heap_snapshot_record_hidden_edge(new_obj, jl_malloc_tag, jl_array_nbytes(a)); + gc_heap_snapshot_record_hidden_edge(new_obj, jl_array_nbytes(a)); if (bits == GC_OLD_MARKED) { ptls->gc_cache.perm_scanned_bytes += jl_array_nbytes(a); } From 83f7da67c9427b54ea15258a1e10feb875a51b12 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Mon, 20 Sep 2021 18:45:36 -0400 Subject: [PATCH 050/106] Implement Field Names!!!!!!!!! woohooo --- src/gc-heap-snapshot.cpp | 23 +++++++++++++++++++---- src/gc-heap-snapshot.h | 4 ++-- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 9aa898052cb45..d9f47caa3a7b3 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -247,16 +247,31 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { g_snapshot->nodes.push_back(from_node); } -JL_DLLEXPORT void gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, int index) { +JL_DLLEXPORT void gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) { _record_gc_node("array", "element", from, to, index); } JL_DLLEXPORT void gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, char *name) { _record_gc_node("object", "property", (jl_value_t*)from, to, g_snapshot->names.find_or_create_string_id(name)); } -JL_DLLEXPORT void gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, int field_index) { - // TODO: Field name - const char *field_name = ""; +JL_DLLEXPORT void gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size_t field_index) { + jl_datatype_t *type = (jl_datatype_t*)jl_typeof(from); + if (jl_is_tuple_type(type)) { + // TODO: Maybe not okay to match element and object + _record_gc_node("object", "element", from, to, field_index); + return; + } + if (field_index < 0 || jl_datatype_nfields(type) <= field_index) { + // TODO: We're getting -1 in some cases + jl_printf(JL_STDERR, "WARNING - incorrect field index (%zu) for type\n", field_index); + jl_(type); + _record_gc_node("object", "element", from, to, field_index); + return; + } + jl_svec_t *field_names = jl_field_names(type); + jl_sym_t *name = (jl_sym_t*)jl_svecref(field_names, field_index); + const char *field_name = jl_symbol_name(name); + _record_gc_node("object", "property", from, to, g_snapshot->names.find_or_create_string_id(field_name)); } diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h index 2e1163571d5ff..1db31e42ec150 100644 --- a/src/gc-heap-snapshot.h +++ b/src/gc-heap-snapshot.h @@ -11,9 +11,9 @@ extern "C" { // Functions to call from GC when heap snapshot is enabled // --------------------------------------------------------------------- // TODO: remove JL_DLLEXPORT -JL_DLLEXPORT void gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, int index); +JL_DLLEXPORT void gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index); JL_DLLEXPORT void gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, char *name); -JL_DLLEXPORT void gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, int field_index); +JL_DLLEXPORT void gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size_t field_index); // Used for objects managed by GC, but which aren't exposed in the julia object, so have no // field or index. i.e. they're not reacahable from julia code, but we _will_ hit them in // the GC mark phase (so we can check their type tag to get the size). From c2dcb53c2c992c03c4d3990bf144a41dc957e061 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Mon, 20 Sep 2021 18:47:23 -0400 Subject: [PATCH 051/106] Update TODO list --- src/gc-heap-snapshot.cpp | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index d9f47caa3a7b3..c11cccbffddec 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -16,26 +16,14 @@ using std::unordered_map; using std::unordered_set; // TODOs: +// Roots +// - Correctly reporting the roots to JS somehow..? +// - Making sure we're actually encoding all the roots. // Field Names & node types -// - Proper node types/kinds -// 1. map from the names provided to verify_parent to the correct JS names -// 2. change the names at the callsite -// 3. introduce an enum to be set a the callsite and mapped to correct names -// - Array indexes -// - already have these, just have to be able to let them know we're at an array -// - Field index -> names -// - same as above, but we need to convert field index to field name (either inside, or at caller) -// Rich field names -// - Convert a field index to field name +// - Bug fixing // Sizes -// - array sizes // - string sizes -// Need three functions: -// - gc_heap_snapshot_record_array_edge(value* from, value* to, int index) -// - gc_heap_snapshot_record_module_edge(value* from, value* to, char* name) -// - gc_heap_snapshot_record_object_edge(value* from, value* to, int field_index) - static inline void _record_gc_node(const char *node_type, const char *edge_type, jl_value_t *a, jl_value_t *b, size_t name_or_index); From 4f6a03c360232f2f5718701c5c35fa4b04b464b2 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Mon, 20 Sep 2021 22:21:24 -0400 Subject: [PATCH 052/106] Fix normal path GC code to only debug when enabled. --- src/gc-heap-snapshot.cpp | 19 +++++++++++++++---- src/gc.c | 1 + 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index c11cccbffddec..bc960f0de7854 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -236,13 +236,22 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { } JL_DLLEXPORT void gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) { + if (!g_snapshot) { + return; + } _record_gc_node("array", "element", from, to, index); } JL_DLLEXPORT void gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, char *name) { + if (!g_snapshot) { + return; + } _record_gc_node("object", "property", (jl_value_t*)from, to, g_snapshot->names.find_or_create_string_id(name)); } JL_DLLEXPORT void gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size_t field_index) { + if (!g_snapshot) { + return; + } jl_datatype_t *type = (jl_datatype_t*)jl_typeof(from); if (jl_is_tuple_type(type)) { // TODO: Maybe not okay to match element and object @@ -264,11 +273,17 @@ JL_DLLEXPORT void gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value g_snapshot->names.find_or_create_string_id(field_name)); } JL_DLLEXPORT void gc_heap_snapshot_record_internal_edge(jl_value_t *from, jl_value_t *to) { + if (!g_snapshot) { + return; + } // TODO: probably need to inline this here and make some changes _record_gc_node("object", "internal", from, to, g_snapshot->names.find_or_create_string_id("")); } JL_DLLEXPORT void gc_heap_snapshot_record_hidden_edge(jl_value_t *from, size_t bytes) { + if (!g_snapshot) { + return; + } // TODO: probably need to inline this here and make some changes _record_gc_node("native", "hidden", from, (jl_value_t*)jl_malloc_tag, g_snapshot->names.find_or_create_string_id("")); @@ -277,10 +292,6 @@ JL_DLLEXPORT void gc_heap_snapshot_record_hidden_edge(jl_value_t *from, size_t b } static inline void _record_gc_node(const char *node_type, const char *edge_type, jl_value_t *a, jl_value_t *b, size_t name_or_index) { - if (!g_snapshot) { - return; - } - record_node_to_gc_snapshot(a); record_node_to_gc_snapshot(b); diff --git a/src/gc.c b/src/gc.c index 50a8b5a28b9ac..7531d73cf1e09 100644 --- a/src/gc.c +++ b/src/gc.c @@ -1995,6 +1995,7 @@ STATIC_INLINE int gc_mark_scan_obj16(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, gc_mar if (*pnew_obj) { verify_parent2("object", parent, slot, "field(%d)", gc_slot_to_fieldidx(parent, slot)); + // TODO: Should this be *parent? Given the way it's used above? gc_heap_snapshot_record_object_edge(parent, *slot, gc_slot_to_fieldidx(parent, slot)); } From 392c590305280d2becb04d8742e9036ca7025932 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Mon, 20 Sep 2021 23:07:51 -0400 Subject: [PATCH 053/106] Fix major bug in field name string table indexing: We were accidentally still multiplying `edge.name_or_index` by number of node fields, from back when we thought this field was a from-node-index! haha :) --- src/gc-heap-snapshot.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index bc960f0de7854..1be2fa45945e5 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -361,7 +361,7 @@ void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot) { jl_printf(stream, ","); } jl_printf(stream, "%zu", edge.type); - jl_printf(stream, ",%zu", edge.name_or_index * k_node_number_of_fields); + jl_printf(stream, ",%zu", edge.name_or_index); jl_printf(stream, ",%zu", edge.to_node * k_node_number_of_fields); jl_printf(stream, "\n"); } From 556c7806de6e2acf3ddd69deea446920fbba67c3 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Mon, 20 Sep 2021 23:23:48 -0400 Subject: [PATCH 054/106] Fix some bugs with registering edges from Modules :) --- src/gc-heap-snapshot.cpp | 12 +++++++++--- src/gc.c | 2 +- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 1be2fa45945e5..9b3f1d3ccb712 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -184,7 +184,7 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { } // Insert a new Node - size_t self_size = 1; + size_t self_size = 0; string name = ""; if (a == (jl_value_t*)jl_malloc_tag) { @@ -223,7 +223,9 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { g_snapshot->node_types.find_or_create_string_id("object"), // string type; name, // string name; (size_t)a, // size_t id; - self_size, // size_t self_size; + // We add 1 to self-size for the type tag that all heap-allocated objects have. + // Also because the Chrome Snapshot viewer ignores size-0 leaves! + self_size + 1, // size_t self_size; 0, // int edge_count, will be incremented on every outgoing edge 0, // size_t trace_node_id (unused) @@ -245,6 +247,7 @@ JL_DLLEXPORT void gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_valu if (!g_snapshot) { return; } + //jl_printf(JL_STDERR, "module: %p binding:%p name:%s\n", from, to, name); _record_gc_node("object", "property", (jl_value_t*)from, to, g_snapshot->names.find_or_create_string_id(name)); } @@ -288,16 +291,19 @@ JL_DLLEXPORT void gc_heap_snapshot_record_hidden_edge(jl_value_t *from, size_t b _record_gc_node("native", "hidden", from, (jl_value_t*)jl_malloc_tag, g_snapshot->names.find_or_create_string_id("")); - g_snapshot->nodes.back().self_size = bytes; + // Add the size to the "unknown malloc" tag + g_snapshot->nodes[g_snapshot->node_ptr_to_index_map[(jl_value_t*)jl_malloc_tag]].self_size += bytes; } static inline void _record_gc_node(const char *node_type, const char *edge_type, jl_value_t *a, jl_value_t *b, size_t name_or_index) { record_node_to_gc_snapshot(a); record_node_to_gc_snapshot(b); + // Have to look this up because it might not be created for this edge auto from_node_idx = g_snapshot->node_ptr_to_index_map[a]; auto &from_node = g_snapshot->nodes[from_node_idx]; + // TODO: can these ever disagree?: from_node.type = g_snapshot->node_types.find_or_create_string_id(node_type); from_node.edge_count += 1; diff --git a/src/gc.c b/src/gc.c index 7531d73cf1e09..5bb122af07ca5 100644 --- a/src/gc.c +++ b/src/gc.c @@ -2430,7 +2430,7 @@ module_binding: { if (value) { verify_parent2("module", binding->parent, &b->value, "binding(%s)", jl_symbol_name(b->name)); - gc_heap_snapshot_record_module_edge(binding->parent, &b->value, + gc_heap_snapshot_record_module_edge(binding->parent, value, jl_symbol_name(b->name)); if (gc_try_setmark(value, &binding->nptr, &tag, &bits)) { new_obj = value; From 3b83fd403c7e1774e8c575adb03fd733748e7d94 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Mon, 20 Sep 2021 23:43:42 -0400 Subject: [PATCH 055/106] Handle field names of NamedTuples without crashing --- src/gc-heap-snapshot.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 9b3f1d3ccb712..41e280862d541 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -256,7 +256,8 @@ JL_DLLEXPORT void gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value return; } jl_datatype_t *type = (jl_datatype_t*)jl_typeof(from); - if (jl_is_tuple_type(type)) { + // TODO: It seems like NamedTuples should have field names? Maybe there's another way to get them? + if (jl_is_tuple_type(type) || jl_is_namedtuple_type(type)) { // TODO: Maybe not okay to match element and object _record_gc_node("object", "element", from, to, field_index); return; From 6397269cdac3afe9475ab126a3fdf3a106a8c9c4 Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Tue, 21 Sep 2021 10:47:19 -0400 Subject: [PATCH 056/106] rename _record_gc_node => _record_gc_edge --- src/gc-heap-snapshot.cpp | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 41e280862d541..87340f4a8ed34 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -25,7 +25,8 @@ using std::unordered_set; // - string sizes -static inline void _record_gc_node(const char *node_type, const char *edge_type, jl_value_t *a, jl_value_t *b, size_t name_or_index); +static inline void _record_gc_edge(const char *node_type, const char *edge_type, + jl_value_t *a, jl_value_t *b, size_t name_or_index); // https://stackoverflow.com/a/33799784/751061 @@ -213,8 +214,7 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { } } - g_snapshot->node_ptr_to_index_map.insert(val, - {a, g_snapshot->nodes.size()}); + g_snapshot->node_ptr_to_index_map.insert(val, {a, g_snapshot->nodes.size()}); count_nodes += 1; Node from_node{ @@ -241,15 +241,15 @@ JL_DLLEXPORT void gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_ if (!g_snapshot) { return; } - _record_gc_node("array", "element", from, to, index); + _record_gc_edge("array", "element", from, to, index); } JL_DLLEXPORT void gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, char *name) { if (!g_snapshot) { return; } //jl_printf(JL_STDERR, "module: %p binding:%p name:%s\n", from, to, name); - _record_gc_node("object", "property", (jl_value_t*)from, to, - g_snapshot->names.find_or_create_string_id(name)); + _record_gc_edge("object", "property", (jl_value_t *)from, to, + g_snapshot->names.find_or_create_string_id(name)); } JL_DLLEXPORT void gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size_t field_index) { if (!g_snapshot) { @@ -259,44 +259,47 @@ JL_DLLEXPORT void gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value // TODO: It seems like NamedTuples should have field names? Maybe there's another way to get them? if (jl_is_tuple_type(type) || jl_is_namedtuple_type(type)) { // TODO: Maybe not okay to match element and object - _record_gc_node("object", "element", from, to, field_index); + _record_gc_edge("object", "element", from, to, field_index); return; } if (field_index < 0 || jl_datatype_nfields(type) <= field_index) { // TODO: We're getting -1 in some cases jl_printf(JL_STDERR, "WARNING - incorrect field index (%zu) for type\n", field_index); jl_(type); - _record_gc_node("object", "element", from, to, field_index); + _record_gc_edge("object", "element", from, to, field_index); return; } jl_svec_t *field_names = jl_field_names(type); jl_sym_t *name = (jl_sym_t*)jl_svecref(field_names, field_index); const char *field_name = jl_symbol_name(name); - _record_gc_node("object", "property", from, to, - g_snapshot->names.find_or_create_string_id(field_name)); + _record_gc_edge("object", "property", from, to, + g_snapshot->names.find_or_create_string_id(field_name)); } -JL_DLLEXPORT void gc_heap_snapshot_record_internal_edge(jl_value_t *from, jl_value_t *to) { +JL_DLLEXPORT void gc_heap_snapshot_record_internal_edge(jl_value_t *from, jl_value_t *to) +{ if (!g_snapshot) { return; } // TODO: probably need to inline this here and make some changes - _record_gc_node("object", "internal", from, to, - g_snapshot->names.find_or_create_string_id("")); + _record_gc_edge("object", "internal", from, to, + g_snapshot->names.find_or_create_string_id("")); } JL_DLLEXPORT void gc_heap_snapshot_record_hidden_edge(jl_value_t *from, size_t bytes) { if (!g_snapshot) { return; } // TODO: probably need to inline this here and make some changes - _record_gc_node("native", "hidden", from, (jl_value_t*)jl_malloc_tag, - g_snapshot->names.find_or_create_string_id("")); + _record_gc_edge("native", "hidden", from, (jl_value_t *)jl_malloc_tag, + g_snapshot->names.find_or_create_string_id("")); // Add the size to the "unknown malloc" tag g_snapshot->nodes[g_snapshot->node_ptr_to_index_map[(jl_value_t*)jl_malloc_tag]].self_size += bytes; } -static inline void _record_gc_node(const char *node_type, const char *edge_type, jl_value_t *a, jl_value_t *b, size_t name_or_index) { +static inline void _record_gc_edge(const char *node_type, const char *edge_type, + jl_value_t *a, jl_value_t *b, size_t name_or_index) +{ record_node_to_gc_snapshot(a); record_node_to_gc_snapshot(b); @@ -309,10 +312,8 @@ static inline void _record_gc_node(const char *node_type, const char *edge_type, from_node.edge_count += 1; from_node.edges.push_back(Edge{ - g_snapshot->edge_types.find_or_create_string_id(edge_type), - name_or_index, + g_snapshot->edge_types.find_or_create_string_id(edge_type), name_or_index, g_snapshot->node_ptr_to_index_map[b], // to - // book-keeping }); g_snapshot->num_edges += 1; From b01cd819580403cd62dd7adf8d8c18974e265e39 Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Tue, 21 Sep 2021 11:59:17 -0400 Subject: [PATCH 057/106] capture string and symbol values; tag them for viewer --- src/gc-heap-snapshot.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 87340f4a8ed34..64eefd8d6d00a 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -139,7 +139,7 @@ struct HeapSnapshot { // edges are stored on each from_node StringTable names; - StringTable node_types = {"object"}; + StringTable node_types = {"object", "string", "symbol"}; StringTable edge_types = {"property"}; unordered_map node_ptr_to_index_map; @@ -177,6 +177,7 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(JL_STREAM *stream) { //jl_printf(JL_STDERR, "edges: %d\n", count_edges); } +// mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L597-L597 JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { auto val = g_snapshot->node_ptr_to_index_map.find((void*)a); if (val != g_snapshot->node_ptr_to_index_map.end()) { @@ -187,6 +188,7 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { // Insert a new Node size_t self_size = 0; string name = ""; + string node_type = "object"; if (a == (jl_value_t*)jl_malloc_tag) { name = ""; @@ -199,8 +201,13 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { name = ""; } else if (type == (jl_datatype_t*)jl_malloc_tag) { name = ""; + } else if (jl_is_string(a)) { + node_type = "string"; + name = jl_string_data(a); // string value + } else if (jl_is_symbol(a)) { + node_type = "symbol"; + name = jl_symbol_name((jl_sym_t*)a); } else if (jl_is_datatype(type)) { - ios_t str_; ios_mem(&str_, 1024); JL_STREAM* str = (JL_STREAM*)&str_; @@ -220,7 +227,7 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { Node from_node{ // We pick a default type here, which will be set for the _targets_ of edges. // TODO: What's a good default? - g_snapshot->node_types.find_or_create_string_id("object"), // string type; + g_snapshot->node_types.find_or_create_string_id(node_type), // size_t type; name, // string name; (size_t)a, // size_t id; // We add 1 to self-size for the type tag that all heap-allocated objects have. From fe635679d00a6c099a63954dd3622ef7c4ecb9da Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Tue, 21 Sep 2021 12:13:51 -0400 Subject: [PATCH 058/106] string and symbol sizes --- src/gc-heap-snapshot.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 64eefd8d6d00a..5253e3ebff8bb 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -203,10 +203,12 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { name = ""; } else if (jl_is_string(a)) { node_type = "string"; - name = jl_string_data(a); // string value + name = jl_string_data(a); + self_size = jl_string_len(a); } else if (jl_is_symbol(a)) { node_type = "symbol"; name = jl_symbol_name((jl_sym_t*)a); + self_size = name.length(); } else if (jl_is_datatype(type)) { ios_t str_; ios_mem(&str_, 1024); From 3337ba5c4e6b91495b0e735ca36c3889765db534 Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Wed, 22 Sep 2021 14:39:46 -0400 Subject: [PATCH 059/106] add gc annotations --- src/gc-heap-snapshot.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 5253e3ebff8bb..6ee63377b71b8 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -73,7 +73,7 @@ struct Edge { const int k_node_number_of_fields = 7; struct Node { - size_t type; + size_t type; // TODO: point at actual type here? string name; size_t id; // (vilterp) the memory address, right? size_t self_size; @@ -178,7 +178,7 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(JL_STREAM *stream) { } // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L597-L597 -JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { +void record_node_to_gc_snapshot(jl_value_t *a) JL_GC_DISABLED { auto val = g_snapshot->node_ptr_to_index_map.find((void*)a); if (val != g_snapshot->node_ptr_to_index_map.end()) { return; @@ -246,13 +246,13 @@ JL_DLLEXPORT void record_node_to_gc_snapshot(jl_value_t *a) { g_snapshot->nodes.push_back(from_node); } -JL_DLLEXPORT void gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) { +void gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_GC_DISABLED { if (!g_snapshot) { return; } _record_gc_edge("array", "element", from, to, index); } -JL_DLLEXPORT void gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, char *name) { +void gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, char *name) JL_GC_DISABLED { if (!g_snapshot) { return; } @@ -260,7 +260,7 @@ JL_DLLEXPORT void gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_valu _record_gc_edge("object", "property", (jl_value_t *)from, to, g_snapshot->names.find_or_create_string_id(name)); } -JL_DLLEXPORT void gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size_t field_index) { +void gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size_t field_index) JL_GC_DISABLED { if (!g_snapshot) { return; } @@ -285,8 +285,7 @@ JL_DLLEXPORT void gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value _record_gc_edge("object", "property", from, to, g_snapshot->names.find_or_create_string_id(field_name)); } -JL_DLLEXPORT void gc_heap_snapshot_record_internal_edge(jl_value_t *from, jl_value_t *to) -{ +void gc_heap_snapshot_record_internal_edge(jl_value_t *from, jl_value_t *to) JL_GC_DISABLED { if (!g_snapshot) { return; } @@ -294,7 +293,7 @@ JL_DLLEXPORT void gc_heap_snapshot_record_internal_edge(jl_value_t *from, jl_val _record_gc_edge("object", "internal", from, to, g_snapshot->names.find_or_create_string_id("")); } -JL_DLLEXPORT void gc_heap_snapshot_record_hidden_edge(jl_value_t *from, size_t bytes) { +void gc_heap_snapshot_record_hidden_edge(jl_value_t *from, size_t bytes) JL_GC_DISABLED { if (!g_snapshot) { return; } From fc77bfa62ffba34fc93946e6df41d811b03469de Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 22 Sep 2021 15:34:49 -0400 Subject: [PATCH 060/106] add gc-heap-snapshot.h to gc.o dependency list --- src/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index ba1b65b3fe363..ea9b0f6e8fc83 100644 --- a/src/Makefile +++ b/src/Makefile @@ -284,7 +284,7 @@ $(BUILDDIR)/disasm.o $(BUILDDIR)/disasm.dbg.obj: $(SRCDIR)/debuginfo.h $(SRCDIR) $(BUILDDIR)/dump.o $(BUILDDIR)/dump.dbg.obj: $(addprefix $(SRCDIR)/,common_symbols1.inc common_symbols2.inc builtin_proto.h serialize.h) $(BUILDDIR)/gc-debug.o $(BUILDDIR)/gc-debug.dbg.obj: $(SRCDIR)/gc.h $(BUILDDIR)/gc-pages.o $(BUILDDIR)/gc-pages.dbg.obj: $(SRCDIR)/gc.h -$(BUILDDIR)/gc.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc.h +$(BUILDDIR)/gc.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-heap-snapshot.h $(BUILDDIR)/init.o $(BUILDDIR)/init.dbg.obj: $(SRCDIR)/builtin_proto.h $(BUILDDIR)/interpreter.o $(BUILDDIR)/interpreter.dbg.obj: $(SRCDIR)/builtin_proto.h $(BUILDDIR)/jitlayers.o $(BUILDDIR)/jitlayers.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/codegen_shared.h From ad6f1d57cf9634898cceba1b384bf7267a42d3a8 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 22 Sep 2021 15:37:17 -0400 Subject: [PATCH 061/106] [GCHEAP] add JL_NOTSAFEPOINT --- src/gc-debug.c | 4 ++-- src/gc-heap-snapshot.cpp | 14 +++++++------- src/gc-heap-snapshot.h | 11 +++++------ src/gc.h | 4 ++-- 4 files changed, 16 insertions(+), 17 deletions(-) diff --git a/src/gc-debug.c b/src/gc-debug.c index 8d2fcf67a75af..206e3982599f7 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -1205,7 +1205,7 @@ void gc_count_pool(void) jl_safe_printf("************************\n"); } -int gc_slot_to_fieldidx(void *obj, void *slot) +int gc_slot_to_fieldidx(void *obj, void *slot) JL_NOTSAFEPOINT { jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); int nf = (int)jl_datatype_nfields(vt); @@ -1218,7 +1218,7 @@ int gc_slot_to_fieldidx(void *obj, void *slot) return -1; } -int gc_slot_to_arrayidx(void *obj, void *_slot) +int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT { char *slot = (char*)_slot; jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 6ee63377b71b8..e5ed211e5ef83 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -178,7 +178,7 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(JL_STREAM *stream) { } // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L597-L597 -void record_node_to_gc_snapshot(jl_value_t *a) JL_GC_DISABLED { +void record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { auto val = g_snapshot->node_ptr_to_index_map.find((void*)a); if (val != g_snapshot->node_ptr_to_index_map.end()) { return; @@ -246,13 +246,13 @@ void record_node_to_gc_snapshot(jl_value_t *a) JL_GC_DISABLED { g_snapshot->nodes.push_back(from_node); } -void gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_GC_DISABLED { +void gc_heap_snapshot_record_array_edge(jl_value_t *from , jl_value_t *to , size_t index) JL_NOTSAFEPOINT { if (!g_snapshot) { return; } _record_gc_edge("array", "element", from, to, index); } -void gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, char *name) JL_GC_DISABLED { +void gc_heap_snapshot_record_module_edge(jl_module_t *from , jl_value_t *to , char *name) JL_NOTSAFEPOINT { if (!g_snapshot) { return; } @@ -260,7 +260,7 @@ void gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, char _record_gc_edge("object", "property", (jl_value_t *)from, to, g_snapshot->names.find_or_create_string_id(name)); } -void gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size_t field_index) JL_GC_DISABLED { +void gc_heap_snapshot_record_object_edge(jl_value_t *from , jl_value_t *to , size_t field_index) JL_NOTSAFEPOINT { if (!g_snapshot) { return; } @@ -285,7 +285,7 @@ void gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size_ _record_gc_edge("object", "property", from, to, g_snapshot->names.find_or_create_string_id(field_name)); } -void gc_heap_snapshot_record_internal_edge(jl_value_t *from, jl_value_t *to) JL_GC_DISABLED { +void gc_heap_snapshot_record_internal_edge(jl_value_t *from , jl_value_t *to ) JL_NOTSAFEPOINT { if (!g_snapshot) { return; } @@ -293,7 +293,7 @@ void gc_heap_snapshot_record_internal_edge(jl_value_t *from, jl_value_t *to) JL_ _record_gc_edge("object", "internal", from, to, g_snapshot->names.find_or_create_string_id("")); } -void gc_heap_snapshot_record_hidden_edge(jl_value_t *from, size_t bytes) JL_GC_DISABLED { +void gc_heap_snapshot_record_hidden_edge(jl_value_t *from , size_t bytes ) JL_NOTSAFEPOINT { if (!g_snapshot) { return; } @@ -306,7 +306,7 @@ void gc_heap_snapshot_record_hidden_edge(jl_value_t *from, size_t bytes) JL_GC_D } static inline void _record_gc_edge(const char *node_type, const char *edge_type, - jl_value_t *a, jl_value_t *b, size_t name_or_index) + jl_value_t *a, jl_value_t *b, size_t name_or_index) JL_NOTSAFEPOINT { record_node_to_gc_snapshot(a); record_node_to_gc_snapshot(b); diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h index 1db31e42ec150..de4418ccc1e02 100644 --- a/src/gc-heap-snapshot.h +++ b/src/gc-heap-snapshot.h @@ -10,17 +10,16 @@ extern "C" { // --------------------------------------------------------------------- // Functions to call from GC when heap snapshot is enabled // --------------------------------------------------------------------- -// TODO: remove JL_DLLEXPORT -JL_DLLEXPORT void gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index); -JL_DLLEXPORT void gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, char *name); -JL_DLLEXPORT void gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size_t field_index); +void gc_heap_snapshot_record_array_edge(jl_value_t *from , jl_value_t *to , size_t index) JL_NOTSAFEPOINT; +void gc_heap_snapshot_record_module_edge(jl_module_t *from , jl_value_t *to , char *name) JL_NOTSAFEPOINT; +void gc_heap_snapshot_record_object_edge(jl_value_t *from , jl_value_t *to , size_t field_index) JL_NOTSAFEPOINT; // Used for objects managed by GC, but which aren't exposed in the julia object, so have no // field or index. i.e. they're not reacahable from julia code, but we _will_ hit them in // the GC mark phase (so we can check their type tag to get the size). -JL_DLLEXPORT void gc_heap_snapshot_record_internal_edge(jl_value_t *from, jl_value_t *to); +void gc_heap_snapshot_record_internal_edge(jl_value_t *from , jl_value_t *to ) JL_NOTSAFEPOINT; // Used for objects manually allocated in C (outside julia GC), to still tell the heap snapshot about the // size of the object, even though we're never going to mark that object. -JL_DLLEXPORT void gc_heap_snapshot_record_hidden_edge(jl_value_t *from, size_t bytes); +void gc_heap_snapshot_record_hidden_edge(jl_value_t *from , size_t bytes ) JL_NOTSAFEPOINT; // --------------------------------------------------------------------- // Functions to call from Julia to take heap snapshot diff --git a/src/gc.h b/src/gc.h index a5ea9de7eb711..c404557f3e50d 100644 --- a/src/gc.h +++ b/src/gc.h @@ -634,8 +634,8 @@ extern int gc_verifying; #endif -int gc_slot_to_fieldidx(void *_obj, void *slot); -int gc_slot_to_arrayidx(void *_obj, void *begin); +int gc_slot_to_fieldidx(void *_obj, void *slot) JL_NOTSAFEPOINT; +int gc_slot_to_arrayidx(void *_obj, void *begin) JL_NOTSAFEPOINT; NOINLINE void gc_mark_loop_unwind(jl_ptls_t ptls, jl_gc_mark_sp_t sp, int pc_offset); #ifdef GC_DEBUG_ENV From 8b8cb43940f68fede37a1fc3d78798cb4ca7578f Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Wed, 22 Sep 2021 15:48:34 -0400 Subject: [PATCH 062/106] Disable GC Snapshotting w/ inline check for performance Ensure that the check for GC Heap Snapshot enabled is done in inline functions so that it will be performed in the original code unit, and avoid a jump if disabled. --- src/gc-heap-snapshot.cpp | 33 +++++++++++---------------------- src/gc-heap-snapshot.h | 38 +++++++++++++++++++++++++++++++++----- 2 files changed, 44 insertions(+), 27 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index e5ed211e5ef83..9056f93335bec 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -25,6 +25,8 @@ using std::unordered_set; // - string sizes +int gc_heap_snapshot_enabled = 0; + static inline void _record_gc_edge(const char *node_type, const char *edge_type, jl_value_t *a, jl_value_t *b, size_t name_or_index); @@ -161,11 +163,13 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(JL_STREAM *stream) { // Enable snapshotting HeapSnapshot snapshot; g_snapshot = &snapshot; + gc_heap_snapshot_enabled = true; // Do GC, which will callback into record_edge_to_gc_snapshot()... jl_gc_collect(JL_GC_FULL); // Disable snapshotting + gc_heap_snapshot_enabled = false; g_snapshot = nullptr; // When we return, the snapshot is full @@ -246,24 +250,15 @@ void record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { g_snapshot->nodes.push_back(from_node); } -void gc_heap_snapshot_record_array_edge(jl_value_t *from , jl_value_t *to , size_t index) JL_NOTSAFEPOINT { - if (!g_snapshot) { - return; - } +void _gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT { _record_gc_edge("array", "element", from, to, index); } -void gc_heap_snapshot_record_module_edge(jl_module_t *from , jl_value_t *to , char *name) JL_NOTSAFEPOINT { - if (!g_snapshot) { - return; - } +void _gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, char *name) JL_NOTSAFEPOINT { //jl_printf(JL_STDERR, "module: %p binding:%p name:%s\n", from, to, name); _record_gc_edge("object", "property", (jl_value_t *)from, to, g_snapshot->names.find_or_create_string_id(name)); } -void gc_heap_snapshot_record_object_edge(jl_value_t *from , jl_value_t *to , size_t field_index) JL_NOTSAFEPOINT { - if (!g_snapshot) { - return; - } +void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size_t field_index) JL_NOTSAFEPOINT { jl_datatype_t *type = (jl_datatype_t*)jl_typeof(from); // TODO: It seems like NamedTuples should have field names? Maybe there's another way to get them? if (jl_is_tuple_type(type) || jl_is_namedtuple_type(type)) { @@ -273,8 +268,8 @@ void gc_heap_snapshot_record_object_edge(jl_value_t *from , jl_value_t *to , siz } if (field_index < 0 || jl_datatype_nfields(type) <= field_index) { // TODO: We're getting -1 in some cases - jl_printf(JL_STDERR, "WARNING - incorrect field index (%zu) for type\n", field_index); - jl_(type); + //jl_printf(JL_STDERR, "WARNING - incorrect field index (%zu) for type\n", field_index); + //jl_(type); _record_gc_edge("object", "element", from, to, field_index); return; } @@ -285,18 +280,12 @@ void gc_heap_snapshot_record_object_edge(jl_value_t *from , jl_value_t *to , siz _record_gc_edge("object", "property", from, to, g_snapshot->names.find_or_create_string_id(field_name)); } -void gc_heap_snapshot_record_internal_edge(jl_value_t *from , jl_value_t *to ) JL_NOTSAFEPOINT { - if (!g_snapshot) { - return; - } +void _gc_heap_snapshot_record_internal_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT { // TODO: probably need to inline this here and make some changes _record_gc_edge("object", "internal", from, to, g_snapshot->names.find_or_create_string_id("")); } -void gc_heap_snapshot_record_hidden_edge(jl_value_t *from , size_t bytes ) JL_NOTSAFEPOINT { - if (!g_snapshot) { - return; - } +void _gc_heap_snapshot_record_hidden_edge(jl_value_t *from, size_t bytes) JL_NOTSAFEPOINT { // TODO: probably need to inline this here and make some changes _record_gc_edge("native", "hidden", from, (jl_value_t *)jl_malloc_tag, g_snapshot->names.find_or_create_string_id("")); diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h index de4418ccc1e02..e47375c4f7902 100644 --- a/src/gc-heap-snapshot.h +++ b/src/gc-heap-snapshot.h @@ -7,19 +7,47 @@ extern "C" { #endif + // --------------------------------------------------------------------- // Functions to call from GC when heap snapshot is enabled // --------------------------------------------------------------------- -void gc_heap_snapshot_record_array_edge(jl_value_t *from , jl_value_t *to , size_t index) JL_NOTSAFEPOINT; -void gc_heap_snapshot_record_module_edge(jl_module_t *from , jl_value_t *to , char *name) JL_NOTSAFEPOINT; -void gc_heap_snapshot_record_object_edge(jl_value_t *from , jl_value_t *to , size_t field_index) JL_NOTSAFEPOINT; +// TODO: remove JL_DLLEXPORT +JL_DLLEXPORT void _gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT; +JL_DLLEXPORT void _gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, char *name) JL_NOTSAFEPOINT; +JL_DLLEXPORT void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size_t field_index) JL_NOTSAFEPOINT; // Used for objects managed by GC, but which aren't exposed in the julia object, so have no // field or index. i.e. they're not reacahable from julia code, but we _will_ hit them in // the GC mark phase (so we can check their type tag to get the size). -void gc_heap_snapshot_record_internal_edge(jl_value_t *from , jl_value_t *to ) JL_NOTSAFEPOINT; +JL_DLLEXPORT void _gc_heap_snapshot_record_internal_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT; // Used for objects manually allocated in C (outside julia GC), to still tell the heap snapshot about the // size of the object, even though we're never going to mark that object. -void gc_heap_snapshot_record_hidden_edge(jl_value_t *from , size_t bytes ) JL_NOTSAFEPOINT; +JL_DLLEXPORT void _gc_heap_snapshot_record_hidden_edge(jl_value_t *from, size_t bytes) JL_NOTSAFEPOINT; + + +extern int gc_heap_snapshot_enabled; +#define RETURN_IF_HEAP_SNAPSHOT_NOT_ENABLED() if (!gc_heap_snapshot_enabled) {return;} + + +static inline void gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) { + RETURN_IF_HEAP_SNAPSHOT_NOT_ENABLED(); + _gc_heap_snapshot_record_array_edge(from, to, index); +} +static inline void gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, char *name) { + RETURN_IF_HEAP_SNAPSHOT_NOT_ENABLED(); + _gc_heap_snapshot_record_module_edge(from, to, name); +} +static inline void gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size_t field_index) { + RETURN_IF_HEAP_SNAPSHOT_NOT_ENABLED(); + _gc_heap_snapshot_record_object_edge(from, to, field_index); +} +static inline void gc_heap_snapshot_record_internal_edge(jl_value_t *from, jl_value_t *to) { + RETURN_IF_HEAP_SNAPSHOT_NOT_ENABLED(); + _gc_heap_snapshot_record_internal_edge(from, to); +} +static inline void gc_heap_snapshot_record_hidden_edge(jl_value_t *from, size_t bytes) { + RETURN_IF_HEAP_SNAPSHOT_NOT_ENABLED(); + _gc_heap_snapshot_record_hidden_edge(from, bytes); +} // --------------------------------------------------------------------- // Functions to call from Julia to take heap snapshot From 3e11756141f7b1bbb5ff156040ceace567800859 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Wed, 22 Sep 2021 15:51:35 -0400 Subject: [PATCH 063/106] mark the gc_heap_snapshot_enabled check __unlikely --- src/gc-heap-snapshot.h | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h index e47375c4f7902..341dff7dc5778 100644 --- a/src/gc-heap-snapshot.h +++ b/src/gc-heap-snapshot.h @@ -25,28 +25,31 @@ JL_DLLEXPORT void _gc_heap_snapshot_record_hidden_edge(jl_value_t *from, size_t extern int gc_heap_snapshot_enabled; -#define RETURN_IF_HEAP_SNAPSHOT_NOT_ENABLED() if (!gc_heap_snapshot_enabled) {return;} - static inline void gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) { - RETURN_IF_HEAP_SNAPSHOT_NOT_ENABLED(); - _gc_heap_snapshot_record_array_edge(from, to, index); + if (__unlikely(gc_heap_snapshot_enabled)) { + _gc_heap_snapshot_record_array_edge(from, to, index); + } } static inline void gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, char *name) { - RETURN_IF_HEAP_SNAPSHOT_NOT_ENABLED(); - _gc_heap_snapshot_record_module_edge(from, to, name); + if (__unlikely(gc_heap_snapshot_enabled)) { + _gc_heap_snapshot_record_module_edge(from, to, name); + } } static inline void gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size_t field_index) { - RETURN_IF_HEAP_SNAPSHOT_NOT_ENABLED(); - _gc_heap_snapshot_record_object_edge(from, to, field_index); + if (__unlikely(gc_heap_snapshot_enabled)) { + _gc_heap_snapshot_record_object_edge(from, to, field_index); + } } static inline void gc_heap_snapshot_record_internal_edge(jl_value_t *from, jl_value_t *to) { - RETURN_IF_HEAP_SNAPSHOT_NOT_ENABLED(); - _gc_heap_snapshot_record_internal_edge(from, to); + if (__unlikely(gc_heap_snapshot_enabled)) { + _gc_heap_snapshot_record_internal_edge(from, to); + } } static inline void gc_heap_snapshot_record_hidden_edge(jl_value_t *from, size_t bytes) { - RETURN_IF_HEAP_SNAPSHOT_NOT_ENABLED(); - _gc_heap_snapshot_record_hidden_edge(from, bytes); + if (__unlikely(gc_heap_snapshot_enabled)) { + _gc_heap_snapshot_record_hidden_edge(from, bytes); + } } // --------------------------------------------------------------------- From fc29e39d6b4f22dda0d93eb0400f062929a931ee Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Wed, 22 Sep 2021 15:56:52 -0400 Subject: [PATCH 064/106] fix annotations on new inline funcs --- src/gc-heap-snapshot.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h index 341dff7dc5778..3eef973e10ebb 100644 --- a/src/gc-heap-snapshot.h +++ b/src/gc-heap-snapshot.h @@ -26,27 +26,27 @@ JL_DLLEXPORT void _gc_heap_snapshot_record_hidden_edge(jl_value_t *from, size_t extern int gc_heap_snapshot_enabled; -static inline void gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) { +static inline void gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT { if (__unlikely(gc_heap_snapshot_enabled)) { _gc_heap_snapshot_record_array_edge(from, to, index); } } -static inline void gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, char *name) { +static inline void gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, char *name) JL_NOTSAFEPOINT { if (__unlikely(gc_heap_snapshot_enabled)) { _gc_heap_snapshot_record_module_edge(from, to, name); } } -static inline void gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size_t field_index) { +static inline void gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size_t field_index) JL_NOTSAFEPOINT { if (__unlikely(gc_heap_snapshot_enabled)) { _gc_heap_snapshot_record_object_edge(from, to, field_index); } } -static inline void gc_heap_snapshot_record_internal_edge(jl_value_t *from, jl_value_t *to) { +static inline void gc_heap_snapshot_record_internal_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT { if (__unlikely(gc_heap_snapshot_enabled)) { _gc_heap_snapshot_record_internal_edge(from, to); } } -static inline void gc_heap_snapshot_record_hidden_edge(jl_value_t *from, size_t bytes) { +static inline void gc_heap_snapshot_record_hidden_edge(jl_value_t *from, size_t bytes) JL_NOTSAFEPOINT { if (__unlikely(gc_heap_snapshot_enabled)) { _gc_heap_snapshot_record_hidden_edge(from, bytes); } From e612f29ed46501fae80863b4a47daec276f322a4 Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Wed, 22 Sep 2021 16:20:14 -0400 Subject: [PATCH 065/106] revert to just type name because we can't safely print full types, because they GC --- src/gc-heap-snapshot.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 9056f93335bec..652df266327d3 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -214,16 +214,16 @@ void record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { name = jl_symbol_name((jl_sym_t*)a); self_size = name.length(); } else if (jl_is_datatype(type)) { - ios_t str_; - ios_mem(&str_, 1024); - JL_STREAM* str = (JL_STREAM*)&str_; - - jl_static_show(str, (jl_value_t*)type); - - name = string((const char*)str_.buf, str_.size); - ios_close(&str_); - self_size = (size_t)jl_datatype_size(type); + // TODO: get the entire type, including type parameters + // - option 1: jl_static_show it here: + // crashes because it might do GC, and we are already inside GC. + // + // - option 2: put type jl_datatype_t* directly in the Node struct: + // bad because the jl_datatype_t might be freed before we serialize + // the snapshot. Maybe we should allocate some gc-rooted object + // and attach all the jl_datatype_t*'s we need to it? + name = jl_symbol_name(type->name->name); } } From d9116bbc2a6070e021777bce0f13c881a12fa6fa Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Wed, 22 Sep 2021 14:30:24 -0400 Subject: [PATCH 066/106] start adding roots --- src/gc-heap-snapshot.cpp | 47 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 4 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 652df266327d3..29e6ce793e9f1 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -141,9 +141,10 @@ struct HeapSnapshot { // edges are stored on each from_node StringTable names; - StringTable node_types = {"object", "string", "symbol"}; + StringTable node_types = {"object", "string", "symbol", "synthetic"}; StringTable edge_types = {"property"}; unordered_map node_ptr_to_index_map; + Node *uber_root; size_t num_edges = 0; // For metadata, updated as you add each edge. Needed because edges owned by nodes. }; @@ -165,6 +166,8 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(JL_STREAM *stream) { g_snapshot = &snapshot; gc_heap_snapshot_enabled = true; + _add_uber_root(&snapshot); + // Do GC, which will callback into record_edge_to_gc_snapshot()... jl_gc_collect(JL_GC_FULL); @@ -181,6 +184,27 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(JL_STREAM *stream) { //jl_printf(JL_STDERR, "edges: %d\n", count_edges); } +// adds a node at id 0 which is the "uber root": +// a synthetic node which points to all the GC roots. +void _add_uber_root(HeapSnapshot *snapshot) { + // TODO: DRY this up with node construction in record_node_to_gc_snapshot? + Node uber_root{ + snapshot->node_types.find_or_create_string_id("synthetic"), + "(uber root)", // name + 0, // id: uber root must have id 0 + 0, // size + + 0, // int edge_count, will be incremented on every outgoing edge + 0, // size_t trace_node_id (unused) + 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached + + // outgoing edges + vector(), + }; + snapshot->nodes.push_back(uber_root); + snapshot->uber_root = &uber_root; +} + // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L597-L597 void record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { auto val = g_snapshot->node_ptr_to_index_map.find((void*)a); @@ -242,15 +266,30 @@ void record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { 0, // int edge_count, will be incremented on every outgoing edge 0, // size_t trace_node_id (unused) - 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached + 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached - // Book-keeping: todo + // outgoing edges vector(), }; g_snapshot->nodes.push_back(from_node); } -void _gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT { +JL_DLLEXPORT void gc_heap_snapshot_record_root(jl_value_t *root) { + auto &from_node = g_snapshot->uber_root; + record_node_to_gc_snapshot(root); + + // add synthetic edge from 0 to it + XXX + + from_node.edges.push_back(Edge{ + g_snapshot->edge_types.find_or_create_string_id("synthetic"), + }); +} + +void gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT { + if (!g_snapshot) { + return; + } _record_gc_edge("array", "element", from, to, index); } void _gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, char *name) JL_NOTSAFEPOINT { From a1cee9ed11b84441ed1646d57e80100706e82170 Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Wed, 22 Sep 2021 16:48:59 -0400 Subject: [PATCH 067/106] function to add edges from uber root to roots --- src/gc-heap-snapshot.cpp | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 29e6ce793e9f1..6aa56cb692986 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -27,10 +27,6 @@ using std::unordered_set; int gc_heap_snapshot_enabled = 0; -static inline void _record_gc_edge(const char *node_type, const char *edge_type, - jl_value_t *a, jl_value_t *b, size_t name_or_index); - - // https://stackoverflow.com/a/33799784/751061 void print_str_escape_json(JL_STREAM *stream, const std::string &s) { jl_printf(stream, "\""); @@ -56,6 +52,9 @@ void print_str_escape_json(JL_STREAM *stream, const std::string &s) { struct HeapSnapshot; void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot); +static inline void _record_gc_edge(const char *node_type, const char *edge_type, + jl_value_t *a, jl_value_t *b, size_t name_or_index); +void _add_uber_root(HeapSnapshot *snapshot); // Edges // "edge_fields": @@ -275,18 +274,28 @@ void record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { } JL_DLLEXPORT void gc_heap_snapshot_record_root(jl_value_t *root) { - auto &from_node = g_snapshot->uber_root; + auto &uber_root = g_snapshot->uber_root; record_node_to_gc_snapshot(root); - // add synthetic edge from 0 to it - XXX + // add synthetic edge from uber root to it + record_node_to_gc_snapshot(root); - from_node.edges.push_back(Edge{ + uber_root->edges.push_back(Edge{ g_snapshot->edge_types.find_or_create_string_id("synthetic"), }); + uber_root->edge_count++; } -void gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT { + +void gc_heap_snapshot_record_root(jl_value_t *root, jl_value_t *to) JL_NOTSAFEPOINT { + if (!g_snapshot) { + return; + } + _record_gc_edge("synthetic", "element", (jl_value_t*)nullptr, root, 0 /* array index */); +} + + +void _gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT { if (!g_snapshot) { return; } From 8c3bb4a41417a1a0fd4761ecc8c55fe003ad053a Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Wed, 22 Sep 2021 17:34:48 -0400 Subject: [PATCH 068/106] add uber root and gc roots object --- src/gc-heap-snapshot.cpp | 61 ++++++++++++++++++++++++++-------------- src/gc-heap-snapshot.h | 12 ++++---- src/gc.c | 11 ++++++-- 3 files changed, 55 insertions(+), 29 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 6aa56cb692986..eda2e69b9f62c 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -143,7 +143,7 @@ struct HeapSnapshot { StringTable node_types = {"object", "string", "symbol", "synthetic"}; StringTable edge_types = {"property"}; unordered_map node_ptr_to_index_map; - Node *uber_root; + Node *gc_roots_node; size_t num_edges = 0; // For metadata, updated as you add each edge. Needed because edges owned by nodes. }; @@ -186,11 +186,10 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(JL_STREAM *stream) { // adds a node at id 0 which is the "uber root": // a synthetic node which points to all the GC roots. void _add_uber_root(HeapSnapshot *snapshot) { - // TODO: DRY this up with node construction in record_node_to_gc_snapshot? Node uber_root{ snapshot->node_types.find_or_create_string_id("synthetic"), "(uber root)", // name - 0, // id: uber root must have id 0 + 1, // id: uber root must have id 1 0, // size 0, // int edge_count, will be incremented on every outgoing edge @@ -201,7 +200,30 @@ void _add_uber_root(HeapSnapshot *snapshot) { vector(), }; snapshot->nodes.push_back(uber_root); - snapshot->uber_root = &uber_root; + + Node gc_roots_node{ + snapshot->node_types.find_or_create_string_id("synthetic"), + "(GC roots)", // name + 3, // id: GC root 2 higher than uber root + 0, // size + + 0, // int edge_count, will be incremented on every outgoing edge + 0, // size_t trace_node_id (unused) + 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached + + // outgoing edges + vector(), + }; + snapshot->nodes.push_back(gc_roots_node); + snapshot->gc_roots_node = &gc_roots_node; + + // add edge from uber root to gc roots node + uber_root.edges.push_back(Edge{ + snapshot->names.find_or_create_string_id("internal"), + snapshot->names.find_or_create_string_id("uber_root to gc_roots_node"), + 1, + }); + uber_root.edge_count++; } // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L597-L597 @@ -273,28 +295,24 @@ void record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { g_snapshot->nodes.push_back(from_node); } -JL_DLLEXPORT void gc_heap_snapshot_record_root(jl_value_t *root) { - auto &uber_root = g_snapshot->uber_root; - record_node_to_gc_snapshot(root); +void _gc_heap_snapshot_record_root(jl_value_t *root) { + auto &gc_roots_node = g_snapshot->gc_roots_node; // add synthetic edge from uber root to it record_node_to_gc_snapshot(root); - - uber_root->edges.push_back(Edge{ - g_snapshot->edge_types.find_or_create_string_id("synthetic"), + + // TODO: just make record_node_to_gc_snapshot return this + auto to_node_idx = g_snapshot->node_ptr_to_index_map[root]; + auto name_or_index = g_snapshot->names.find_or_create_string_id("root"); + + gc_roots_node->edges.push_back(Edge{ + g_snapshot->edge_types.find_or_create_string_id("internal"), + name_or_index, + to_node_idx, }); - uber_root->edge_count++; + gc_roots_node->edge_count++; } - -void gc_heap_snapshot_record_root(jl_value_t *root, jl_value_t *to) JL_NOTSAFEPOINT { - if (!g_snapshot) { - return; - } - _record_gc_edge("synthetic", "element", (jl_value_t*)nullptr, root, 0 /* array index */); -} - - void _gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT { if (!g_snapshot) { return; @@ -357,7 +375,8 @@ static inline void _record_gc_edge(const char *node_type, const char *edge_type, from_node.edge_count += 1; from_node.edges.push_back(Edge{ - g_snapshot->edge_types.find_or_create_string_id(edge_type), name_or_index, + g_snapshot->edge_types.find_or_create_string_id(edge_type), + name_or_index, g_snapshot->node_ptr_to_index_map[b], // to }); diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h index 3eef973e10ebb..de67ae3340ddb 100644 --- a/src/gc-heap-snapshot.h +++ b/src/gc-heap-snapshot.h @@ -11,17 +11,17 @@ extern "C" { // --------------------------------------------------------------------- // Functions to call from GC when heap snapshot is enabled // --------------------------------------------------------------------- -// TODO: remove JL_DLLEXPORT -JL_DLLEXPORT void _gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT; -JL_DLLEXPORT void _gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, char *name) JL_NOTSAFEPOINT; -JL_DLLEXPORT void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size_t field_index) JL_NOTSAFEPOINT; +void _gc_heap_snapshot_record_root(jl_value_t *root); +void _gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT; +void _gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, char *name) JL_NOTSAFEPOINT; +void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size_t field_index) JL_NOTSAFEPOINT; // Used for objects managed by GC, but which aren't exposed in the julia object, so have no // field or index. i.e. they're not reacahable from julia code, but we _will_ hit them in // the GC mark phase (so we can check their type tag to get the size). -JL_DLLEXPORT void _gc_heap_snapshot_record_internal_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT; +void _gc_heap_snapshot_record_internal_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT; // Used for objects manually allocated in C (outside julia GC), to still tell the heap snapshot about the // size of the object, even though we're never going to mark that object. -JL_DLLEXPORT void _gc_heap_snapshot_record_hidden_edge(jl_value_t *from, size_t bytes) JL_NOTSAFEPOINT; +void _gc_heap_snapshot_record_hidden_edge(jl_value_t *from, size_t bytes) JL_NOTSAFEPOINT; extern int gc_heap_snapshot_enabled; diff --git a/src/gc.c b/src/gc.c index 5bb122af07ca5..7581639ce2cdb 100644 --- a/src/gc.c +++ b/src/gc.c @@ -2822,8 +2822,10 @@ static void mark_roots(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp) { // modules gc_mark_queue_obj(gc_cache, sp, jl_main_module); + _gc_heap_snapshot_record_root(jl_main_module); // tasks + // TODO: add tasks as roots jl_gc_mark_enqueued_tasks(gc_cache, sp); // invisible builtin values @@ -2834,16 +2836,21 @@ static void mark_roots(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp) for (size_t i = 0; i < jl_current_modules.size; i += 2) { if (jl_current_modules.table[i + 1] != HT_NOTFOUND) { gc_mark_queue_obj(gc_cache, sp, jl_current_modules.table[i]); + _gc_heap_snapshot_record_root(jl_current_modules.table[i]); } } gc_mark_queue_obj(gc_cache, sp, jl_anytuple_type_type); for (size_t i = 0; i < N_CALL_CACHE; i++) { jl_typemap_entry_t *v = jl_atomic_load_relaxed(&call_cache[i]); - if (v != NULL) + if (v != NULL) { gc_mark_queue_obj(gc_cache, sp, v); + _gc_heap_snapshot_record_root(v); + } } - if (jl_all_methods != NULL) + if (jl_all_methods != NULL) { gc_mark_queue_obj(gc_cache, sp, jl_all_methods); + _gc_heap_snapshot_record_root(jl_all_methods); + } if (_jl_debug_method_invalidation != NULL) gc_mark_queue_obj(gc_cache, sp, _jl_debug_method_invalidation); From 6f7fd5aaea9d77c6a23ac1b1cccdfa5856e0da7a Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Wed, 22 Sep 2021 17:53:54 -0400 Subject: [PATCH 069/106] attempt to add subroots node and edges --- src/gc-heap-snapshot.cpp | 53 ++++++++++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 15 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index eda2e69b9f62c..fa84e297e210b 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -143,7 +143,7 @@ struct HeapSnapshot { StringTable node_types = {"object", "string", "symbol", "synthetic"}; StringTable edge_types = {"property"}; unordered_map node_ptr_to_index_map; - Node *gc_roots_node; + size_t gc_subroots_node_idx; // all gc roots hang off of this size_t num_edges = 0; // For metadata, updated as you add each edge. Needed because edges owned by nodes. }; @@ -186,9 +186,9 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(JL_STREAM *stream) { // adds a node at id 0 which is the "uber root": // a synthetic node which points to all the GC roots. void _add_uber_root(HeapSnapshot *snapshot) { - Node uber_root{ + Node internal_root{ snapshot->node_types.find_or_create_string_id("synthetic"), - "(uber root)", // name + "(internal root)", // name 1, // id: uber root must have id 1 0, // size @@ -199,12 +199,12 @@ void _add_uber_root(HeapSnapshot *snapshot) { // outgoing edges vector(), }; - snapshot->nodes.push_back(uber_root); + snapshot->nodes.push_back(internal_root); - Node gc_roots_node{ + Node root_node{ snapshot->node_types.find_or_create_string_id("synthetic"), - "(GC roots)", // name - 3, // id: GC root 2 higher than uber root + "(GC root)", // name + 3, // id: GC root 2 higher than internal root 0, // size 0, // int edge_count, will be incremented on every outgoing edge @@ -214,16 +214,39 @@ void _add_uber_root(HeapSnapshot *snapshot) { // outgoing edges vector(), }; - snapshot->nodes.push_back(gc_roots_node); - snapshot->gc_roots_node = &gc_roots_node; + snapshot->nodes.push_back(root_node); // add edge from uber root to gc roots node - uber_root.edges.push_back(Edge{ + internal_root.edges.push_back(Edge{ snapshot->names.find_or_create_string_id("internal"), - snapshot->names.find_or_create_string_id("uber_root to gc_roots_node"), + snapshot->names.find_or_create_string_id("internal_root to gc_root_node"), 1, }); - uber_root.edge_count++; + internal_root.edge_count++; + + Node subroot_node{ + snapshot->node_types.find_or_create_string_id("synthetic"), + "(GC subroot)", // name + 5, // id: GC subroot 2 higher than root + 0, // size + + 0, // int edge_count, will be incremented on every outgoing edge + 0, // size_t trace_node_id (unused) + 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached + + // outgoing edges + vector(), + }; + snapshot->nodes.push_back(subroot_node); + snapshot->gc_subroots_node_idx = 2; + + // add edge from uber root to gc roots node + root_node.edges.push_back(Edge{ + snapshot->names.find_or_create_string_id("internal"), + snapshot->names.find_or_create_string_id("root_node to sub_root_node"), + 2, + }); + root_node.edge_count++; } // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L597-L597 @@ -296,7 +319,7 @@ void record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { } void _gc_heap_snapshot_record_root(jl_value_t *root) { - auto &gc_roots_node = g_snapshot->gc_roots_node; + auto &gc_subroots_node = g_snapshot->nodes[g_snapshot->gc_subroots_node_idx]; // add synthetic edge from uber root to it record_node_to_gc_snapshot(root); @@ -305,12 +328,12 @@ void _gc_heap_snapshot_record_root(jl_value_t *root) { auto to_node_idx = g_snapshot->node_ptr_to_index_map[root]; auto name_or_index = g_snapshot->names.find_or_create_string_id("root"); - gc_roots_node->edges.push_back(Edge{ + gc_subroots_node.edges.push_back(Edge{ g_snapshot->edge_types.find_or_create_string_id("internal"), name_or_index, to_node_idx, }); - gc_roots_node->edge_count++; + gc_subroots_node.edge_count++; } void _gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT { From 21e0f7f8ca475740e06d90b2f0d06e5680938cd2 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Wed, 22 Sep 2021 19:58:12 -0400 Subject: [PATCH 070/106] add internal_root --- src/gc-heap-snapshot.cpp | 74 +++++++--------------------------------- 1 file changed, 13 insertions(+), 61 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index fa84e297e210b..03a0bdaf0a8e6 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -54,7 +54,7 @@ struct HeapSnapshot; void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot); static inline void _record_gc_edge(const char *node_type, const char *edge_type, jl_value_t *a, jl_value_t *b, size_t name_or_index); -void _add_uber_root(HeapSnapshot *snapshot); +void _add_internal_root(HeapSnapshot *snapshot); // Edges // "edge_fields": @@ -143,7 +143,6 @@ struct HeapSnapshot { StringTable node_types = {"object", "string", "symbol", "synthetic"}; StringTable edge_types = {"property"}; unordered_map node_ptr_to_index_map; - size_t gc_subroots_node_idx; // all gc roots hang off of this size_t num_edges = 0; // For metadata, updated as you add each edge. Needed because edges owned by nodes. }; @@ -165,7 +164,7 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(JL_STREAM *stream) { g_snapshot = &snapshot; gc_heap_snapshot_enabled = true; - _add_uber_root(&snapshot); + _add_internal_root(&snapshot); // Do GC, which will callback into record_edge_to_gc_snapshot()... jl_gc_collect(JL_GC_FULL); @@ -185,28 +184,13 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(JL_STREAM *stream) { // adds a node at id 0 which is the "uber root": // a synthetic node which points to all the GC roots. -void _add_uber_root(HeapSnapshot *snapshot) { +void _add_internal_root(HeapSnapshot *snapshot) { Node internal_root{ snapshot->node_types.find_or_create_string_id("synthetic"), "(internal root)", // name - 1, // id: uber root must have id 1 + 1, // id: internal root must have id 1 0, // size - - 0, // int edge_count, will be incremented on every outgoing edge - 0, // size_t trace_node_id (unused) - 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached - - // outgoing edges - vector(), - }; - snapshot->nodes.push_back(internal_root); - Node root_node{ - snapshot->node_types.find_or_create_string_id("synthetic"), - "(GC root)", // name - 3, // id: GC root 2 higher than internal root - 0, // size - 0, // int edge_count, will be incremented on every outgoing edge 0, // size_t trace_node_id (unused) 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached @@ -214,39 +198,7 @@ void _add_uber_root(HeapSnapshot *snapshot) { // outgoing edges vector(), }; - snapshot->nodes.push_back(root_node); - - // add edge from uber root to gc roots node - internal_root.edges.push_back(Edge{ - snapshot->names.find_or_create_string_id("internal"), - snapshot->names.find_or_create_string_id("internal_root to gc_root_node"), - 1, - }); - internal_root.edge_count++; - - Node subroot_node{ - snapshot->node_types.find_or_create_string_id("synthetic"), - "(GC subroot)", // name - 5, // id: GC subroot 2 higher than root - 0, // size - - 0, // int edge_count, will be incremented on every outgoing edge - 0, // size_t trace_node_id (unused) - 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached - - // outgoing edges - vector(), - }; - snapshot->nodes.push_back(subroot_node); - snapshot->gc_subroots_node_idx = 2; - - // add edge from uber root to gc roots node - root_node.edges.push_back(Edge{ - snapshot->names.find_or_create_string_id("internal"), - snapshot->names.find_or_create_string_id("root_node to sub_root_node"), - 2, - }); - root_node.edge_count++; + snapshot->nodes.push_back(internal_root); } // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L597-L597 @@ -319,21 +271,21 @@ void record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { } void _gc_heap_snapshot_record_root(jl_value_t *root) { - auto &gc_subroots_node = g_snapshot->nodes[g_snapshot->gc_subroots_node_idx]; - - // add synthetic edge from uber root to it + auto &internal_root = g_snapshot->nodes.front(); + + // add synthetic edge from internal root to our root + // TODO: We could label these with a root type record_node_to_gc_snapshot(root); - + // TODO: just make record_node_to_gc_snapshot return this auto to_node_idx = g_snapshot->node_ptr_to_index_map[root]; - auto name_or_index = g_snapshot->names.find_or_create_string_id("root"); - gc_subroots_node.edges.push_back(Edge{ + internal_root.edges.push_back(Edge{ g_snapshot->edge_types.find_or_create_string_id("internal"), - name_or_index, + internal_root.edge_count, to_node_idx, }); - gc_subroots_node.edge_count++; + internal_root.edge_count++; } void _gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT { From 0daf9ac8ab02e1e2128c87f38f98c7c0c3472727 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Thu, 23 Sep 2021 11:30:22 -0400 Subject: [PATCH 071/106] Fix segfault in _gc_heap_snapshot_record_root() --- src/gc-heap-snapshot.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 03a0bdaf0a8e6..c4b5e55c8a08a 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -271,8 +271,6 @@ void record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { } void _gc_heap_snapshot_record_root(jl_value_t *root) { - auto &internal_root = g_snapshot->nodes.front(); - // add synthetic edge from internal root to our root // TODO: We could label these with a root type record_node_to_gc_snapshot(root); @@ -280,6 +278,7 @@ void _gc_heap_snapshot_record_root(jl_value_t *root) { // TODO: just make record_node_to_gc_snapshot return this auto to_node_idx = g_snapshot->node_ptr_to_index_map[root]; + auto &internal_root = g_snapshot->nodes.front(); internal_root.edges.push_back(Edge{ g_snapshot->edge_types.find_or_create_string_id("internal"), internal_root.edge_count, From 8c84a6458544ec9d5c4c03cfea0bb5abdc2f17e6 Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Thu, 23 Sep 2021 15:30:24 -0400 Subject: [PATCH 072/106] fix edge count --- src/gc-heap-snapshot.cpp | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index c4b5e55c8a08a..ea9333ff71822 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -78,7 +78,6 @@ struct Node { string name; size_t id; // (vilterp) the memory address, right? size_t self_size; - size_t edge_count; size_t trace_node_id; // This is ALWAYS 0 in Javascript heap-snapshots. // whether the from_node is attached or dettached from the main application state // TODO: .... meaning not yet understood. @@ -140,8 +139,8 @@ struct HeapSnapshot { // edges are stored on each from_node StringTable names; - StringTable node_types = {"object", "string", "symbol", "synthetic"}; - StringTable edge_types = {"property"}; + StringTable node_types; + StringTable edge_types; unordered_map node_ptr_to_index_map; size_t num_edges = 0; // For metadata, updated as you add each edge. Needed because edges owned by nodes. @@ -188,10 +187,9 @@ void _add_internal_root(HeapSnapshot *snapshot) { Node internal_root{ snapshot->node_types.find_or_create_string_id("synthetic"), "(internal root)", // name - 1, // id: internal root must have id 1 - 0, // size + 0, // id + 1, // size - 0, // int edge_count, will be incremented on every outgoing edge 0, // size_t trace_node_id (unused) 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached @@ -260,7 +258,6 @@ void record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { // Also because the Chrome Snapshot viewer ignores size-0 leaves! self_size + 1, // size_t self_size; - 0, // int edge_count, will be incremented on every outgoing edge 0, // size_t trace_node_id (unused) 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached @@ -271,20 +268,22 @@ void record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { } void _gc_heap_snapshot_record_root(jl_value_t *root) { - // add synthetic edge from internal root to our root - // TODO: We could label these with a root type record_node_to_gc_snapshot(root); // TODO: just make record_node_to_gc_snapshot return this auto to_node_idx = g_snapshot->node_ptr_to_index_map[root]; auto &internal_root = g_snapshot->nodes.front(); + auto edge_type = g_snapshot->edge_types.find_or_create_string_id("internal"); + auto edge_label = g_snapshot->names.find_or_create_string_id("internal_root to root"); + internal_root.edges.push_back(Edge{ - g_snapshot->edge_types.find_or_create_string_id("internal"), - internal_root.edge_count, + edge_type, + edge_label, to_node_idx, }); - internal_root.edge_count++; + + g_snapshot->num_edges++; } void _gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT { @@ -346,7 +345,6 @@ static inline void _record_gc_edge(const char *node_type, const char *edge_type, auto &from_node = g_snapshot->nodes[from_node_idx]; // TODO: can these ever disagree?: from_node.type = g_snapshot->node_types.find_or_create_string_id(node_type); - from_node.edge_count += 1; from_node.edges.push_back(Edge{ g_snapshot->edge_types.find_or_create_string_id(edge_type), @@ -390,7 +388,7 @@ void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot) { jl_printf(stream, ",%zu", snapshot.names.find_or_create_string_id(from_node.name)); jl_printf(stream, ",%zu", from_node.id); jl_printf(stream, ",%zu", from_node.self_size); - jl_printf(stream, ",%zu", from_node.edge_count); + jl_printf(stream, ",%zu", from_node.edges.size()); jl_printf(stream, ",%zu", from_node.trace_node_id); jl_printf(stream, ",%d", from_node.detachedness); jl_printf(stream, "\n"); From dce89e9ca4bec3dfa291bf6c221816630436afb3 Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Thu, 23 Sep 2021 16:24:21 -0400 Subject: [PATCH 073/106] make gc root recording conditional on heap snapshotting being on --- src/gc-heap-snapshot.h | 5 +++++ src/gc.c | 8 ++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h index de67ae3340ddb..6c1fb6b3ecb7d 100644 --- a/src/gc-heap-snapshot.h +++ b/src/gc-heap-snapshot.h @@ -26,6 +26,11 @@ void _gc_heap_snapshot_record_hidden_edge(jl_value_t *from, size_t bytes) JL_NOT extern int gc_heap_snapshot_enabled; +static inline void gc_heap_snapshot_record_root(jl_value_t *root) { + if (__unlikely(gc_heap_snapshot_enabled)) { + _gc_heap_snapshot_record_root(root); + } +} static inline void gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT { if (__unlikely(gc_heap_snapshot_enabled)) { _gc_heap_snapshot_record_array_edge(from, to, index); diff --git a/src/gc.c b/src/gc.c index 7581639ce2cdb..0532dc93f5cb7 100644 --- a/src/gc.c +++ b/src/gc.c @@ -2822,7 +2822,7 @@ static void mark_roots(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp) { // modules gc_mark_queue_obj(gc_cache, sp, jl_main_module); - _gc_heap_snapshot_record_root(jl_main_module); + gc_heap_snapshot_record_root(jl_main_module); // tasks // TODO: add tasks as roots @@ -2836,7 +2836,7 @@ static void mark_roots(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp) for (size_t i = 0; i < jl_current_modules.size; i += 2) { if (jl_current_modules.table[i + 1] != HT_NOTFOUND) { gc_mark_queue_obj(gc_cache, sp, jl_current_modules.table[i]); - _gc_heap_snapshot_record_root(jl_current_modules.table[i]); + gc_heap_snapshot_record_root(jl_current_modules.table[i]); } } gc_mark_queue_obj(gc_cache, sp, jl_anytuple_type_type); @@ -2844,12 +2844,12 @@ static void mark_roots(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp) jl_typemap_entry_t *v = jl_atomic_load_relaxed(&call_cache[i]); if (v != NULL) { gc_mark_queue_obj(gc_cache, sp, v); - _gc_heap_snapshot_record_root(v); + gc_heap_snapshot_record_root(v); } } if (jl_all_methods != NULL) { gc_mark_queue_obj(gc_cache, sp, jl_all_methods); - _gc_heap_snapshot_record_root(jl_all_methods); + gc_heap_snapshot_record_root(jl_all_methods); } if (_jl_debug_method_invalidation != NULL) gc_mark_queue_obj(gc_cache, sp, _jl_debug_method_invalidation); From 6805334d18f52f852824f2ffcc5eee3c30b8f3c5 Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Thu, 23 Sep 2021 16:57:13 -0400 Subject: [PATCH 074/106] mark gc_heap_snapshot_record_root as JL_NOTSAFEPOINT --- src/gc-heap-snapshot.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h index 6c1fb6b3ecb7d..a7158f47420b2 100644 --- a/src/gc-heap-snapshot.h +++ b/src/gc-heap-snapshot.h @@ -26,7 +26,7 @@ void _gc_heap_snapshot_record_hidden_edge(jl_value_t *from, size_t bytes) JL_NOT extern int gc_heap_snapshot_enabled; -static inline void gc_heap_snapshot_record_root(jl_value_t *root) { +static inline void gc_heap_snapshot_record_root(jl_value_t *root) JL_NOTSAFEPOINT { if (__unlikely(gc_heap_snapshot_enabled)) { _gc_heap_snapshot_record_root(root); } From bed9004ae12b0140fabcc6c5794992e43435c4c5 Mon Sep 17 00:00:00 2001 From: Pete Vilter Date: Fri, 24 Sep 2021 00:44:20 -0400 Subject: [PATCH 075/106] better property names for gc roots --- src/gc-heap-snapshot.cpp | 4 ++-- src/gc-heap-snapshot.h | 6 +++--- src/gc.c | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index ea9333ff71822..3b46f42d06940 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -267,7 +267,7 @@ void record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { g_snapshot->nodes.push_back(from_node); } -void _gc_heap_snapshot_record_root(jl_value_t *root) { +void _gc_heap_snapshot_record_root(jl_value_t *root, char *name) { record_node_to_gc_snapshot(root); // TODO: just make record_node_to_gc_snapshot return this @@ -275,7 +275,7 @@ void _gc_heap_snapshot_record_root(jl_value_t *root) { auto &internal_root = g_snapshot->nodes.front(); auto edge_type = g_snapshot->edge_types.find_or_create_string_id("internal"); - auto edge_label = g_snapshot->names.find_or_create_string_id("internal_root to root"); + auto edge_label = g_snapshot->names.find_or_create_string_id(name); internal_root.edges.push_back(Edge{ edge_type, diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h index a7158f47420b2..c26ba5384368f 100644 --- a/src/gc-heap-snapshot.h +++ b/src/gc-heap-snapshot.h @@ -11,7 +11,7 @@ extern "C" { // --------------------------------------------------------------------- // Functions to call from GC when heap snapshot is enabled // --------------------------------------------------------------------- -void _gc_heap_snapshot_record_root(jl_value_t *root); +void _gc_heap_snapshot_record_root(jl_value_t *root, char *name); void _gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT; void _gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, char *name) JL_NOTSAFEPOINT; void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size_t field_index) JL_NOTSAFEPOINT; @@ -26,9 +26,9 @@ void _gc_heap_snapshot_record_hidden_edge(jl_value_t *from, size_t bytes) JL_NOT extern int gc_heap_snapshot_enabled; -static inline void gc_heap_snapshot_record_root(jl_value_t *root) JL_NOTSAFEPOINT { +static inline void gc_heap_snapshot_record_root(jl_value_t *root, char *name) JL_NOTSAFEPOINT { if (__unlikely(gc_heap_snapshot_enabled)) { - _gc_heap_snapshot_record_root(root); + _gc_heap_snapshot_record_root(root, name); } } static inline void gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT { diff --git a/src/gc.c b/src/gc.c index 0532dc93f5cb7..0397167ffba3e 100644 --- a/src/gc.c +++ b/src/gc.c @@ -2822,7 +2822,7 @@ static void mark_roots(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp) { // modules gc_mark_queue_obj(gc_cache, sp, jl_main_module); - gc_heap_snapshot_record_root(jl_main_module); + gc_heap_snapshot_record_root(jl_main_module, "main_module"); // tasks // TODO: add tasks as roots @@ -2836,7 +2836,7 @@ static void mark_roots(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp) for (size_t i = 0; i < jl_current_modules.size; i += 2) { if (jl_current_modules.table[i + 1] != HT_NOTFOUND) { gc_mark_queue_obj(gc_cache, sp, jl_current_modules.table[i]); - gc_heap_snapshot_record_root(jl_current_modules.table[i]); + gc_heap_snapshot_record_root(jl_current_modules.table[i], "current_module"); } } gc_mark_queue_obj(gc_cache, sp, jl_anytuple_type_type); @@ -2844,12 +2844,12 @@ static void mark_roots(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp) jl_typemap_entry_t *v = jl_atomic_load_relaxed(&call_cache[i]); if (v != NULL) { gc_mark_queue_obj(gc_cache, sp, v); - gc_heap_snapshot_record_root(v); + gc_heap_snapshot_record_root(v, "type_map"); } } if (jl_all_methods != NULL) { gc_mark_queue_obj(gc_cache, sp, jl_all_methods); - gc_heap_snapshot_record_root(jl_all_methods); + gc_heap_snapshot_record_root(jl_all_methods, "all_methods"); } if (_jl_debug_method_invalidation != NULL) gc_mark_queue_obj(gc_cache, sp, _jl_debug_method_invalidation); From 5e801cb9a2a94e0ab119008c297d86cd58efc287 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Sun, 26 Sep 2021 21:08:00 -0400 Subject: [PATCH 076/106] Apply suggestions from code review Apply @vtjnash's PR review feedback. Co-authored-by: Jameson Nash --- src/gc-heap-snapshot.cpp | 13 ++++++------- src/gc-heap-snapshot.h | 2 ++ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 3b46f42d06940..657f1b3dafd65 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -1,3 +1,5 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + #include "gc-heap-snapshot.h" #include "julia_internal.h" @@ -81,8 +83,8 @@ struct Node { size_t trace_node_id; // This is ALWAYS 0 in Javascript heap-snapshots. // whether the from_node is attached or dettached from the main application state // TODO: .... meaning not yet understood. - // https://github.com/nodejs/from_node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/include/v8-profiler.h#L739-L745 - int detachedness; // 0 - unknown, 1 - attached; 2 - detached + // https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/include/v8-profiler.h#L739-L745 + int detachedness; // 0 - unknown, 1 - attached, 2 - detached // Book-keeping fields (not used for serialization) vector edges; // For asserting that we built the edges in the right order @@ -118,10 +120,7 @@ struct StringTable { if (first) { first = false; } else { - jl_printf(stream, ","); - if (newlines) { - jl_printf(stream, "\n"); - } + jl_printf(stream, newlines ? ",\n" : ","); } // Escape strings for JSON // TODO @@ -357,7 +356,7 @@ static inline void _record_gc_edge(const char *node_type, const char *edge_type, } void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot) { - // mimicking https://github.com/nodejs/from_node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2567-L2567 + // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2567-L2567 jl_printf(stream, "{\"snapshot\":{"); jl_printf(stream, "\"meta\":{"); jl_printf(stream, "\"node_fields\":[\"type\",\"name\",\"id\",\"self_size\",\"edge_count\",\"trace_node_id\",\"detachedness\"],"); diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h index c26ba5384368f..050fecdec5c1b 100644 --- a/src/gc-heap-snapshot.h +++ b/src/gc-heap-snapshot.h @@ -1,3 +1,5 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + #ifndef JL_GC_HEAP_SNAPSHOT_H #define JL_GC_HEAP_SNAPSHOT_H From c470ea94764dfee825b96551b72bfdf2c2910112 Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Tue, 28 Sep 2021 11:35:25 -0400 Subject: [PATCH 077/106] more PR feedback --- src/gc-heap-snapshot.cpp | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 657f1b3dafd65..bdb1b7e02dab8 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -78,7 +78,7 @@ const int k_node_number_of_fields = 7; struct Node { size_t type; // TODO: point at actual type here? string name; - size_t id; // (vilterp) the memory address, right? + size_t id; // This should be a globally-unique counter, but we use the memory address size_t self_size; size_t trace_node_id; // This is ALWAYS 0 in Javascript heap-snapshots. // whether the from_node is attached or dettached from the main application state @@ -122,8 +122,6 @@ struct StringTable { } else { jl_printf(stream, newlines ? ",\n" : ","); } - // Escape strings for JSON - // TODO print_str_escape_json(stream, str); } jl_printf(stream, "]"); @@ -145,16 +143,10 @@ struct HeapSnapshot { size_t num_edges = 0; // For metadata, updated as you add each edge. Needed because edges owned by nodes. }; - -// TODO: Do we need to refer to nodes by their index in the from_node array? -//size_t find_or_create_node_id(HeapSnapshot& snapshot, string key) { -// return find_or_insert_iter(snapshot.nodes_map, key)->second; -//} - +// global heap snapshot, mutated by garbage collector +// when snapshotting is on. HeapSnapshot *g_snapshot = nullptr; -JL_DLLEXPORT int count_nodes = 0; -JL_DLLEXPORT int count_edges = 0; JL_DLLEXPORT void jl_gc_take_heap_snapshot(JL_STREAM *stream) { // Enable snapshotting @@ -164,8 +156,8 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(JL_STREAM *stream) { _add_internal_root(&snapshot); - // Do GC, which will callback into record_edge_to_gc_snapshot()... - jl_gc_collect(JL_GC_FULL); + // Do a full GC mark (and incremental sweep), which will invoke our callbacks on `g_snapshot` + jl_gc_collect(JL_GC_INCREMENTAL); // Disable snapshotting gc_heap_snapshot_enabled = false; @@ -174,10 +166,6 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(JL_STREAM *stream) { // When we return, the snapshot is full // Dump the snapshot serialize_heap_snapshot(stream, snapshot); - - // Debugging - //jl_printf(JL_STDERR, "nodes: %d\n", count_nodes); - //jl_printf(JL_STDERR, "edges: %d\n", count_edges); } // adds a node at id 0 which is the "uber root": @@ -217,7 +205,7 @@ void record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { jl_datatype_t* type = (jl_datatype_t*)jl_typeof(a); if ((uintptr_t)type < 4096U) { - name = ""; + name = ""; } else if (type == (jl_datatype_t*)jl_buff_tag) { name = ""; } else if (type == (jl_datatype_t*)jl_malloc_tag) { @@ -245,7 +233,6 @@ void record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { } g_snapshot->node_ptr_to_index_map.insert(val, {a, g_snapshot->nodes.size()}); - count_nodes += 1; Node from_node{ // We pick a default type here, which will be set for the _targets_ of edges. @@ -352,7 +339,6 @@ static inline void _record_gc_edge(const char *node_type, const char *edge_type, }); g_snapshot->num_edges += 1; - count_edges += 1; // debugging } void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot) { From a1f09888f3fd6fe73ffc197b587886ae79c12f92 Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Tue, 28 Sep 2021 11:40:43 -0400 Subject: [PATCH 078/106] comments pointing at at Node source --- src/gc-heap-snapshot.cpp | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index bdb1b7e02dab8..b754ac87e3bbd 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -17,16 +17,6 @@ using std::string; using std::unordered_map; using std::unordered_set; -// TODOs: -// Roots -// - Correctly reporting the roots to JS somehow..? -// - Making sure we're actually encoding all the roots. -// Field Names & node types -// - Bug fixing -// Sizes -// - string sizes - - int gc_heap_snapshot_enabled = 0; // https://stackoverflow.com/a/33799784/751061 @@ -61,6 +51,7 @@ void _add_internal_root(HeapSnapshot *snapshot); // Edges // "edge_fields": // [ "type", "name_or_index", "to_node" ] +// mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2598-L2601 struct Edge { size_t type; // These *must* match the Enums on the JS side; control interpretation of name_or_index. @@ -73,6 +64,7 @@ struct Edge { // Nodes // "node_fields": // [ "type", "name", "id", "self_size", "edge_count", "trace_node_id", "detachedness" ] +// mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2568-L2575 const int k_node_number_of_fields = 7; struct Node { From 4266a5c2b7f71e8b06ed962d65b51aafc757f12c Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Tue, 28 Sep 2021 15:50:38 -0400 Subject: [PATCH 079/106] use ios_t instead of JL_STREAM --- src/gc-heap-snapshot.cpp | 111 +++++++++++++++++++-------------------- src/gc-heap-snapshot.h | 3 +- 2 files changed, 57 insertions(+), 57 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index b754ac87e3bbd..88492fa34ae90 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -11,7 +11,6 @@ #include #include -using std::cout; using std::endl; using std::vector; using std::string; using std::unordered_map; @@ -20,30 +19,30 @@ using std::unordered_set; int gc_heap_snapshot_enabled = 0; // https://stackoverflow.com/a/33799784/751061 -void print_str_escape_json(JL_STREAM *stream, const std::string &s) { - jl_printf(stream, "\""); +void print_str_escape_json(ios_t *stream, const std::string &s) { + ios_printf(stream, "\""); for (auto c = s.cbegin(); c != s.cend(); c++) { switch (*c) { - case '"': jl_printf(stream, "\\\""); break; - case '\\': jl_printf(stream, "\\\\"); break; - case '\b': jl_printf(stream, "\\b"); break; - case '\f': jl_printf(stream, "\\f"); break; - case '\n': jl_printf(stream, "\\n"); break; - case '\r': jl_printf(stream, "\\r"); break; - case '\t': jl_printf(stream, "\\t"); break; + case '"': ios_printf(stream, "\\\""); break; + case '\\': ios_printf(stream, "\\\\"); break; + case '\b': ios_printf(stream, "\\b"); break; + case '\f': ios_printf(stream, "\\f"); break; + case '\n': ios_printf(stream, "\\n"); break; + case '\r': ios_printf(stream, "\\r"); break; + case '\t': ios_printf(stream, "\\t"); break; default: if ('\x00' <= *c && *c <= '\x1f') { - jl_printf(stream, "\\u%04x", (int)*c); + ios_printf(stream, "\\u%04x", (int)*c); } else { - jl_printf(stream, "%c", *c); + ios_printf(stream, "%c", *c); } } } - jl_printf(stream, "\""); + ios_printf(stream, "\""); } struct HeapSnapshot; -void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot); +void serialize_heap_snapshot(ios_t *stream, HeapSnapshot &snapshot); static inline void _record_gc_edge(const char *node_type, const char *edge_type, jl_value_t *a, jl_value_t *b, size_t name_or_index); void _add_internal_root(HeapSnapshot *snapshot); @@ -105,18 +104,18 @@ struct StringTable { return val->second; } - void print_json_array(JL_STREAM *stream, bool newlines) { - jl_printf(stream, "["); + void print_json_array(ios_t *stream, bool newlines) { + ios_printf(stream, "["); bool first = true; for (const auto &str : strings) { if (first) { first = false; } else { - jl_printf(stream, newlines ? ",\n" : ","); + ios_printf(stream, newlines ? ",\n" : ","); } print_str_escape_json(stream, str); } - jl_printf(stream, "]"); + ios_printf(stream, "]"); } }; @@ -140,7 +139,7 @@ struct HeapSnapshot { HeapSnapshot *g_snapshot = nullptr; -JL_DLLEXPORT void jl_gc_take_heap_snapshot(JL_STREAM *stream) { +JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream) { // Enable snapshotting HeapSnapshot snapshot; g_snapshot = &snapshot; @@ -157,7 +156,7 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(JL_STREAM *stream) { // When we return, the snapshot is full // Dump the snapshot - serialize_heap_snapshot(stream, snapshot); + serialize_heap_snapshot((ios_t*)stream, snapshot); } // adds a node at id 0 which is the "uber root": @@ -333,65 +332,65 @@ static inline void _record_gc_edge(const char *node_type, const char *edge_type, g_snapshot->num_edges += 1; } -void serialize_heap_snapshot(JL_STREAM *stream, HeapSnapshot &snapshot) { +void serialize_heap_snapshot(ios_t *stream, HeapSnapshot &snapshot) { // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2567-L2567 - jl_printf(stream, "{\"snapshot\":{"); - jl_printf(stream, "\"meta\":{"); - jl_printf(stream, "\"node_fields\":[\"type\",\"name\",\"id\",\"self_size\",\"edge_count\",\"trace_node_id\",\"detachedness\"],"); - jl_printf(stream, "\"node_types\":["); + ios_printf(stream, "{\"snapshot\":{"); + ios_printf(stream, "\"meta\":{"); + ios_printf(stream, "\"node_fields\":[\"type\",\"name\",\"id\",\"self_size\",\"edge_count\",\"trace_node_id\",\"detachedness\"],"); + ios_printf(stream, "\"node_types\":["); snapshot.node_types.print_json_array(stream, false); - jl_printf(stream, ","); - jl_printf(stream, "\"string\", \"number\", \"number\", \"number\", \"number\", \"number\"],"); - jl_printf(stream, "\"edge_fields\":[\"type\",\"name_or_index\",\"to_node\"],"); - jl_printf(stream, "\"edge_types\":["); + ios_printf(stream, ","); + ios_printf(stream, "\"string\", \"number\", \"number\", \"number\", \"number\", \"number\"],"); + ios_printf(stream, "\"edge_fields\":[\"type\",\"name_or_index\",\"to_node\"],"); + ios_printf(stream, "\"edge_types\":["); snapshot.edge_types.print_json_array(stream, false); - jl_printf(stream, ","); - jl_printf(stream, "\"string_or_number\",\"from_node\"]"); - jl_printf(stream, "},\n"); // end "meta" - jl_printf(stream, "\"node_count\":%zu,", snapshot.nodes.size()); - jl_printf(stream, "\"edge_count\":%zu", snapshot.num_edges); - jl_printf(stream, "},\n"); // end "snapshot" - - jl_printf(stream, "\"nodes\":["); + ios_printf(stream, ","); + ios_printf(stream, "\"string_or_number\",\"from_node\"]"); + ios_printf(stream, "},\n"); // end "meta" + ios_printf(stream, "\"node_count\":%zu,", snapshot.nodes.size()); + ios_printf(stream, "\"edge_count\":%zu", snapshot.num_edges); + ios_printf(stream, "},\n"); // end "snapshot" + + ios_printf(stream, "\"nodes\":["); bool first_node = true; for (const auto &from_node : snapshot.nodes) { if (first_node) { first_node = false; } else { - jl_printf(stream, ","); + ios_printf(stream, ","); } // ["type","name","id","self_size","edge_count","trace_node_id","detachedness"] - jl_printf(stream, "%zu", from_node.type); - jl_printf(stream, ",%zu", snapshot.names.find_or_create_string_id(from_node.name)); - jl_printf(stream, ",%zu", from_node.id); - jl_printf(stream, ",%zu", from_node.self_size); - jl_printf(stream, ",%zu", from_node.edges.size()); - jl_printf(stream, ",%zu", from_node.trace_node_id); - jl_printf(stream, ",%d", from_node.detachedness); - jl_printf(stream, "\n"); + ios_printf(stream, "%zu", from_node.type); + ios_printf(stream, ",%zu", snapshot.names.find_or_create_string_id(from_node.name)); + ios_printf(stream, ",%zu", from_node.id); + ios_printf(stream, ",%zu", from_node.self_size); + ios_printf(stream, ",%zu", from_node.edges.size()); + ios_printf(stream, ",%zu", from_node.trace_node_id); + ios_printf(stream, ",%d", from_node.detachedness); + ios_printf(stream, "\n"); } - jl_printf(stream, "],\n"); + ios_printf(stream, "],\n"); - jl_printf(stream, "\"edges\":["); + ios_printf(stream, "\"edges\":["); bool first_edge = true; for (const auto &from_node : snapshot.nodes) { for (const auto &edge : from_node.edges) { if (first_edge) { first_edge = false; } else { - jl_printf(stream, ","); + ios_printf(stream, ","); } - jl_printf(stream, "%zu", edge.type); - jl_printf(stream, ",%zu", edge.name_or_index); - jl_printf(stream, ",%zu", edge.to_node * k_node_number_of_fields); - jl_printf(stream, "\n"); + ios_printf(stream, "%zu", edge.type); + ios_printf(stream, ",%zu", edge.name_or_index); + ios_printf(stream, ",%zu", edge.to_node * k_node_number_of_fields); + ios_printf(stream, "\n"); } } - jl_printf(stream, "],\n"); // end "edges" + ios_printf(stream, "],\n"); // end "edges" - jl_printf(stream, "\"strings\":"); + ios_printf(stream, "\"strings\":"); snapshot.names.print_json_array(stream, true); - jl_printf(stream, "}"); + ios_printf(stream, "}"); } diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h index 050fecdec5c1b..cfea57ed6b86c 100644 --- a/src/gc-heap-snapshot.h +++ b/src/gc-heap-snapshot.h @@ -4,6 +4,7 @@ #define JL_GC_HEAP_SNAPSHOT_H #include "julia.h" +#include "ios.h" #ifdef __cplusplus extern "C" { @@ -62,7 +63,7 @@ static inline void gc_heap_snapshot_record_hidden_edge(jl_value_t *from, size_t // --------------------------------------------------------------------- // Functions to call from Julia to take heap snapshot // --------------------------------------------------------------------- -JL_DLLEXPORT void jl_gc_take_heap_snapshot(JL_STREAM *stream); +JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream); #ifdef __cplusplus From 47fe30d915f07f906c2f599769a0299d087ea218 Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Tue, 28 Sep 2021 16:41:13 -0400 Subject: [PATCH 080/106] print full types --- src/gc-heap-snapshot.cpp | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 88492fa34ae90..da39a12b9d02a 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -211,15 +211,16 @@ void record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { self_size = name.length(); } else if (jl_is_datatype(type)) { self_size = (size_t)jl_datatype_size(type); - // TODO: get the entire type, including type parameters - // - option 1: jl_static_show it here: - // crashes because it might do GC, and we are already inside GC. - // - // - option 2: put type jl_datatype_t* directly in the Node struct: - // bad because the jl_datatype_t might be freed before we serialize - // the snapshot. Maybe we should allocate some gc-rooted object - // and attach all the jl_datatype_t*'s we need to it? - name = jl_symbol_name(type->name->name); + + // print full type + ios_t str_; + ios_mem(&str_, 1024); + JL_STREAM* str = (JL_STREAM*)&str_; + + jl_static_show(str, (jl_value_t*)type); + + name = string((const char*)str_.buf, str_.size); + ios_close(&str_); } } From ae7bf349cce59aaa5a888c8b157215b3b4220088 Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Thu, 30 Sep 2021 00:42:02 -0400 Subject: [PATCH 081/106] array sizes --- src/gc-heap-snapshot.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index da39a12b9d02a..918be0725fff2 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -210,7 +210,9 @@ void record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { name = jl_symbol_name((jl_sym_t*)a); self_size = name.length(); } else if (jl_is_datatype(type)) { - self_size = (size_t)jl_datatype_size(type); + self_size = jl_is_array_type(type) + ? jl_array_nbytes((jl_array_t*)a) + : (size_t)jl_datatype_size(type); // print full type ios_t str_; From e047ef3ab5ec68efa15a8e7830465091dc906a1e Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Wed, 29 Sep 2021 22:03:35 -0400 Subject: [PATCH 082/106] try marking roots in mark_enqueued_tasks --- src/gc.c | 1 - src/partr.c | 42 +++++++++++++----------------------------- 2 files changed, 13 insertions(+), 30 deletions(-) diff --git a/src/gc.c b/src/gc.c index 0397167ffba3e..69a197040a836 100644 --- a/src/gc.c +++ b/src/gc.c @@ -2825,7 +2825,6 @@ static void mark_roots(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp) gc_heap_snapshot_record_root(jl_main_module, "main_module"); // tasks - // TODO: add tasks as roots jl_gc_mark_enqueued_tasks(gc_cache, sp); // invisible builtin values diff --git a/src/partr.c b/src/partr.c index 048a841158153..a3f2dd43165f7 100644 --- a/src/partr.c +++ b/src/partr.c @@ -17,9 +17,6 @@ extern "C" { // thread sleep state -// default to DEFAULT_THREAD_SLEEP_THRESHOLD; set via $JULIA_THREAD_SLEEP_THRESHOLD -uint64_t sleep_threshold; - // thread should not be sleeping--it might need to do work. static const int16_t not_sleeping = 0; @@ -39,8 +36,6 @@ uint64_t io_wakeup_enter; uint64_t io_wakeup_leave; ); -uv_mutex_t *sleep_locks; -uv_cond_t *wake_signals; JL_DLLEXPORT int jl_set_task_tid(jl_task_t *task, int tid) JL_NOTSAFEPOINT { @@ -239,25 +234,10 @@ void jl_init_threadinginfra(void) /* initialize the synchronization trees pool and the multiqueue */ multiq_init(); - sleep_threshold = DEFAULT_THREAD_SLEEP_THRESHOLD; - char *cp = getenv(THREAD_SLEEP_THRESHOLD_NAME); - if (cp) { - if (!strncasecmp(cp, "infinite", 8)) - sleep_threshold = UINT64_MAX; - else - sleep_threshold = (uint64_t)strtol(cp, NULL, 10); - } - jl_ptls_t ptls = jl_current_task->ptls; jl_install_thread_signal_handler(ptls); - - int16_t tid; - sleep_locks = (uv_mutex_t*)calloc(jl_n_threads, sizeof(uv_mutex_t)); - wake_signals = (uv_cond_t*)calloc(jl_n_threads, sizeof(uv_cond_t)); - for (tid = 0; tid < jl_n_threads; tid++) { - uv_mutex_init(&sleep_locks[tid]); - uv_cond_init(&wake_signals[tid]); - } + uv_mutex_init(&ptls->sleep_lock); + uv_cond_init(&ptls->wake_signal); } @@ -277,6 +257,10 @@ void jl_threadfun(void *arg) JL_GC_PROMISE_ROOTED(ct); jl_install_thread_signal_handler(ptls); + // set up sleep mechanism for this thread + uv_mutex_init(&ptls->sleep_lock); + uv_cond_init(&ptls->wake_signal); + // wait for all threads jl_gc_state_set(ptls, JL_GC_STATE_SAFE, 0); uv_barrier_wait(targ->barrier); @@ -340,7 +324,7 @@ static int sleep_check_after_threshold(uint64_t *start_cycles) return 0; } uint64_t elapsed_cycles = jl_hrtime() - (*start_cycles); - if (elapsed_cycles >= sleep_threshold) { + if (elapsed_cycles >= DEFAULT_THREAD_SLEEP_THRESHOLD) { *start_cycles = 0; return 1; } @@ -354,9 +338,9 @@ static void wake_thread(int16_t tid) int8_t state = sleeping; jl_atomic_cmpswap(&other->sleep_check_state, &state, not_sleeping); if (state == sleeping) { - uv_mutex_lock(&sleep_locks[tid]); - uv_cond_signal(&wake_signals[tid]); - uv_mutex_unlock(&sleep_locks[tid]); + uv_mutex_lock(&other->sleep_lock); + uv_cond_signal(&other->wake_signal); + uv_mutex_unlock(&other->sleep_lock); } } @@ -528,13 +512,13 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q) // the other threads will just wait for on signal to resume JULIA_DEBUG_SLEEPWAKE( ptls->sleep_enter = cycleclock() ); int8_t gc_state = jl_gc_safe_enter(ptls); - uv_mutex_lock(&sleep_locks[ptls->tid]); + uv_mutex_lock(&ptls->sleep_lock); while (may_sleep(ptls)) { - uv_cond_wait(&wake_signals[ptls->tid], &sleep_locks[ptls->tid]); + uv_cond_wait(&ptls->wake_signal, &ptls->sleep_lock); // TODO: help with gc work here, if applicable } assert(jl_atomic_load_relaxed(&ptls->sleep_check_state) == not_sleeping); - uv_mutex_unlock(&sleep_locks[ptls->tid]); + uv_mutex_unlock(&ptls->sleep_lock); JULIA_DEBUG_SLEEPWAKE( ptls->sleep_leave = cycleclock() ); jl_gc_safe_leave(ptls, gc_state); // contains jl_gc_safepoint start_cycles = 0; From 188ed05b0d2933bc3c8de9832c72d624d07d225e Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Wed, 29 Sep 2021 23:28:22 -0400 Subject: [PATCH 083/106] add internal edges when scanning the stack TODO: differentiate amongst stack frames? --- src/gc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/gc.c b/src/gc.c index 69a197040a836..0b8cfbaa3935e 100644 --- a/src/gc.c +++ b/src/gc.c @@ -2198,6 +2198,9 @@ JL_EXTENSION NOINLINE void gc_mark_loop(jl_ptls_t ptls, jl_gc_mark_sp_t sp) uint16_t *obj16_begin; uint16_t *obj16_end; + // TODO: don't know if this is safe w/r/t the search order + jl_task_t *latest_task = NULL; + pop: if (sp.pc == sp.pc_start) { // TODO: stealing form another thread @@ -2321,6 +2324,7 @@ stack: { } if (!gc_try_setmark(new_obj, &nptr, &tag, &bits)) continue; + gc_heap_snapshot_record_internal_edge(latest_task, new_obj); i++; if (i < nr) { // Haven't done with this one yet. Update the content and push it back @@ -2671,6 +2675,8 @@ mark: { else if (foreign_alloc) objprofile_count(vt, bits == GC_OLD_MARKED, sizeof(jl_task_t)); jl_task_t *ta = (jl_task_t*)new_obj; + latest_task = ta; // TODO: correct?? + gc_heap_snapshot_record_root(ta, "task"); gc_scrub_record_task(ta); if (gc_cblist_task_scanner) { export_gc_state(ptls, &sp); From a540560658bd59a34e69917ceb7ba0c7519feb5b Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Thu, 7 Oct 2021 10:37:42 -0400 Subject: [PATCH 084/106] Halfway through draft commit to construct full fieldpath from slot Account for inline-allocated structs in field path. Only partway done. --- src/gc-debug.c | 3 +- src/gc-heap-snapshot.cpp | 67 +++++++++++++++++++++++++++++++++------- src/gc.h | 2 +- 3 files changed, 58 insertions(+), 14 deletions(-) diff --git a/src/gc-debug.c b/src/gc-debug.c index 206e3982599f7..7dc36294b16eb 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -1205,7 +1205,7 @@ void gc_count_pool(void) jl_safe_printf("************************\n"); } -int gc_slot_to_fieldidx(void *obj, void *slot) JL_NOTSAFEPOINT +JL_DLLEXPORT int gc_slot_to_fieldidx(void *obj, void *slot) JL_NOTSAFEPOINT { jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); int nf = (int)jl_datatype_nfields(vt); @@ -1217,7 +1217,6 @@ int gc_slot_to_fieldidx(void *obj, void *slot) JL_NOTSAFEPOINT } return -1; } - int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT { char *slot = (char*)_slot; diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 918be0725fff2..72fa8b155648e 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -10,9 +10,11 @@ #include #include #include +#include using std::vector; using std::string; +using std::pair; using std::unordered_map; using std::unordered_set; @@ -213,7 +215,7 @@ void record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { self_size = jl_is_array_type(type) ? jl_array_nbytes((jl_array_t*)a) : (size_t)jl_datatype_size(type); - + // print full type ios_t str_; ios_mem(&str_, 1024); @@ -247,6 +249,49 @@ void record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { g_snapshot->nodes.push_back(from_node); } +typedef pair inlineallocd_field_type_t; +vector _fieldpath_for_slot(jl_value_t *obj, jl_value_t *slot) { + jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); + + vector result; + bool found = _fieldpath_for_slot_helper(result, "", vt, obj, slot); + // jl_datatype_t* final_type; + // if (!found) { + // final_type = vt; + // } else { + // final_type = result.back().first; + // } + // NOTE THE RETURNED VECTOR IS REVERSED + return result; +} + +bool _fieldpath_for_slot_helper( + vector& out, const char *fieldname, jl_datatype_t *objtype, + void *obj, jl_value_t *slot) +{ + int nf = (int)jl_datatype_nfields(objtype); + jl_svec_t *field_names = jl_field_names(objtype); + for (int i = 0; i < nf; i++) { + jl_datatype_t *field_type = (jl_datatype_t*)jl_field_type(objtype, i); + void *fieldaddr = (char*)obj + jl_field_offset(objtype, i); + jl_sym_t *name = (jl_sym_t*)jl_svecref(field_names, i); + const char *field_name = jl_symbol_name(name); + if (fieldaddr >= slot) { + out.push_back(inlineallocd_field_type_t(objtype, field_name)); + return true; + } + if (jl_stored_inline((jl_value_t*)field_type)) { + bool found = _fieldpath_for_slot_helper(out, field_name, field_type, fieldaddr, slot); + if (found) { + out.push_back(inlineallocd_field_type_t(field_type, field_name)); + return true; + } + } + } + return false; +} + + void _gc_heap_snapshot_record_root(jl_value_t *root, char *name) { record_node_to_gc_snapshot(root); @@ -285,16 +330,16 @@ void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size _record_gc_edge("object", "element", from, to, field_index); return; } - if (field_index < 0 || jl_datatype_nfields(type) <= field_index) { - // TODO: We're getting -1 in some cases - //jl_printf(JL_STDERR, "WARNING - incorrect field index (%zu) for type\n", field_index); - //jl_(type); - _record_gc_edge("object", "element", from, to, field_index); - return; - } - jl_svec_t *field_names = jl_field_names(type); - jl_sym_t *name = (jl_sym_t*)jl_svecref(field_names, field_index); - const char *field_name = jl_symbol_name(name); + // if (field_index < 0 || jl_datatype_nfields(type) <= field_index) { + // // TODO: We're getting -1 in some cases + // //jl_printf(JL_STDERR, "WARNING - incorrect field index (%zu) for type\n", field_index); + // //jl_(type); + // _record_gc_edge("object", "element", from, to, field_index); + // return; + // } + // jl_svec_t *field_names = jl_field_names(type); + // jl_sym_t *name = (jl_sym_t*)jl_svecref(field_names, field_index); + // const char *field_name = jl_symbol_name(name); _record_gc_edge("object", "property", from, to, g_snapshot->names.find_or_create_string_id(field_name)); diff --git a/src/gc.h b/src/gc.h index c404557f3e50d..02320f7644928 100644 --- a/src/gc.h +++ b/src/gc.h @@ -634,7 +634,7 @@ extern int gc_verifying; #endif -int gc_slot_to_fieldidx(void *_obj, void *slot) JL_NOTSAFEPOINT; +JL_DLLEXPORT int gc_slot_to_fieldidx(void *_obj, void *slot) JL_NOTSAFEPOINT; int gc_slot_to_arrayidx(void *_obj, void *begin) JL_NOTSAFEPOINT; NOINLINE void gc_mark_loop_unwind(jl_ptls_t ptls, jl_gc_mark_sp_t sp, int pc_offset); From 59b0eda45f15f61b8d6ee33a24bc1edb0cd3d245 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Thu, 7 Oct 2021 10:55:27 -0400 Subject: [PATCH 085/106] Start hooking up the field paths --- src/gc-heap-snapshot.cpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 72fa8b155648e..9d5154d2e6b8b 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -254,19 +254,13 @@ vector _fieldpath_for_slot(jl_value_t *obj, jl_value_ jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); vector result; - bool found = _fieldpath_for_slot_helper(result, "", vt, obj, slot); - // jl_datatype_t* final_type; - // if (!found) { - // final_type = vt; - // } else { - // final_type = result.back().first; - // } + bool found = _fieldpath_for_slot_helper(result, vt, obj, slot); // NOTE THE RETURNED VECTOR IS REVERSED return result; } bool _fieldpath_for_slot_helper( - vector& out, const char *fieldname, jl_datatype_t *objtype, + vector& out, jl_datatype_t *objtype, void *obj, jl_value_t *slot) { int nf = (int)jl_datatype_nfields(objtype); @@ -340,6 +334,13 @@ void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size // jl_svec_t *field_names = jl_field_names(type); // jl_sym_t *name = (jl_sym_t*)jl_svecref(field_names, field_index); // const char *field_name = jl_symbol_name(name); + auto field_paths = _fieldpath_for_slot(from, to); + // Build the new field name by joining the strings, and/or use the struct + field names + // to create a bunch of edges + nodes + // (iterate the vector in reverse - the last element is the first path) + for (auto it = field_paths.rbegin(); it != field_paths.rend(); ++it) { + // ... + } _record_gc_edge("object", "property", from, to, g_snapshot->names.find_or_create_string_id(field_name)); From 152ad15896e35205f1dc2756a5edcb06659ddb85 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Tue, 12 Oct 2021 22:13:37 -0400 Subject: [PATCH 086/106] connect up the code from before (but it's still not working) maybe still need to do the off-by-one jameson mentioned? :thinking: --- src/gc-heap-snapshot.cpp | 44 +++++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 9d5154d2e6b8b..cfff280f3676e 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -250,14 +250,6 @@ void record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { } typedef pair inlineallocd_field_type_t; -vector _fieldpath_for_slot(jl_value_t *obj, jl_value_t *slot) { - jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); - - vector result; - bool found = _fieldpath_for_slot_helper(result, vt, obj, slot); - // NOTE THE RETURNED VECTOR IS REVERSED - return result; -} bool _fieldpath_for_slot_helper( vector& out, jl_datatype_t *objtype, @@ -275,7 +267,7 @@ bool _fieldpath_for_slot_helper( return true; } if (jl_stored_inline((jl_value_t*)field_type)) { - bool found = _fieldpath_for_slot_helper(out, field_name, field_type, fieldaddr, slot); + bool found = _fieldpath_for_slot_helper(out, field_type, fieldaddr, slot); if (found) { out.push_back(inlineallocd_field_type_t(field_type, field_name)); return true; @@ -285,6 +277,28 @@ bool _fieldpath_for_slot_helper( return false; } +vector _fieldpath_for_slot(jl_value_t *obj, jl_value_t *slot) { + jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); + + vector result; + bool found = _fieldpath_for_slot_helper(result, vt, obj, slot); + // TODO: maybe don't need the return value here actually...? + if (!found) { + // TODO: Debug these failures. Some of them seem really wrong, like with the slot + // being _kilobytes_ past the start of the object for an object with 1 pointer and 1 + // field... + // jl_printf(JL_STDERR, "WARNING: No fieldpath found for obj: %p slot: %p ", (void*)obj, (void*)slot); + // jl_datatype_t* type = (jl_datatype_t*)jl_typeof(obj); + // if (jl_is_datatype(type)) { + // jl_printf(JL_STDERR, "typeof: "); + // jl_static_show(JL_STDERR, (jl_value_t*)type); + // } + // jl_printf(JL_STDERR, "\n"); + } + // NOTE THE RETURNED VECTOR IS REVERSED + return result; +} + void _gc_heap_snapshot_record_root(jl_value_t *root, char *name) { record_node_to_gc_snapshot(root); @@ -338,12 +352,22 @@ void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size // Build the new field name by joining the strings, and/or use the struct + field names // to create a bunch of edges + nodes // (iterate the vector in reverse - the last element is the first path) + // TODO: Prefer to create intermediate edges and nodes instead of a combined string path. + if (field_paths.size() > 1) { + jl_printf(JL_STDERR, "count: %lu\n", field_paths.size()); + } + + string path; for (auto it = field_paths.rbegin(); it != field_paths.rend(); ++it) { // ... + path += it->second; + if ( it + 1 != field_paths.rend() ) { + path += "."; + } } _record_gc_edge("object", "property", from, to, - g_snapshot->names.find_or_create_string_id(field_name)); + g_snapshot->names.find_or_create_string_id(path)); } void _gc_heap_snapshot_record_internal_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT { // TODO: probably need to inline this here and make some changes From ddd07ad7349a9c60ae066e4f6a7cf6b055cde91e Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Wed, 13 Oct 2021 12:24:18 -0400 Subject: [PATCH 087/106] Add debug logging for objects in `Main` module --- src/gc-heap-snapshot.cpp | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index cfff280f3676e..8f11af1d64791 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -251,21 +251,31 @@ void record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { typedef pair inlineallocd_field_type_t; +static bool debug_log = false; + bool _fieldpath_for_slot_helper( vector& out, jl_datatype_t *objtype, void *obj, jl_value_t *slot) { int nf = (int)jl_datatype_nfields(objtype); jl_svec_t *field_names = jl_field_names(objtype); + if (debug_log) { + jl_((jl_value_t*)objtype); + jl_printf(JL_STDERR, "obj: %p, slot: %p, nf: %d\n", obj, (void*)slot, nf); + } for (int i = 0; i < nf; i++) { jl_datatype_t *field_type = (jl_datatype_t*)jl_field_type(objtype, i); void *fieldaddr = (char*)obj + jl_field_offset(objtype, i); jl_sym_t *name = (jl_sym_t*)jl_svecref(field_names, i); const char *field_name = jl_symbol_name(name); + if (debug_log) { + jl_printf(JL_STDERR, "%d - field_name: %s fieldaddr: %p\n", i, field_name, fieldaddr); + } if (fieldaddr >= slot) { out.push_back(inlineallocd_field_type_t(objtype, field_name)); return true; } + // If the field is an inline-allocated struct if (jl_stored_inline((jl_value_t*)field_type)) { bool found = _fieldpath_for_slot_helper(out, field_type, fieldaddr, slot); if (found) { @@ -276,12 +286,24 @@ bool _fieldpath_for_slot_helper( } return false; } +JL_DLLEXPORT void jl_breakpoint(jl_value_t *v) +{ + // put a breakpoint in your debugger here +} + vector _fieldpath_for_slot(jl_value_t *obj, jl_value_t *slot) { jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); + if (vt->name->module == jl_main_module) { + // jl_breakpoint(obj); + debug_log = true; + } vector result; bool found = _fieldpath_for_slot_helper(result, vt, obj, slot); + + debug_log = false; + // TODO: maybe don't need the return value here actually...? if (!found) { // TODO: Debug these failures. Some of them seem really wrong, like with the slot From 2da28df176d20ba5ec67f1a27e97547c7da80ea2 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Wed, 13 Oct 2021 12:34:23 -0400 Subject: [PATCH 088/106] Fix silly mistake in slot accounting :) It works now!! ``` - ga::Main.A @302073680 - b.y::Array{Any, 1} @302026704 ``` --- src/gc-heap-snapshot.cpp | 10 +++++----- src/gc-heap-snapshot.h | 6 +++--- src/gc.c | 9 +++------ 3 files changed, 11 insertions(+), 14 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 8f11af1d64791..eae80cf666a34 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -255,7 +255,7 @@ static bool debug_log = false; bool _fieldpath_for_slot_helper( vector& out, jl_datatype_t *objtype, - void *obj, jl_value_t *slot) + void *obj, void *slot) { int nf = (int)jl_datatype_nfields(objtype); jl_svec_t *field_names = jl_field_names(objtype); @@ -292,7 +292,7 @@ JL_DLLEXPORT void jl_breakpoint(jl_value_t *v) } -vector _fieldpath_for_slot(jl_value_t *obj, jl_value_t *slot) { +vector _fieldpath_for_slot(jl_value_t *obj, void *slot) { jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); if (vt->name->module == jl_main_module) { // jl_breakpoint(obj); @@ -352,12 +352,12 @@ void _gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, cha _record_gc_edge("object", "property", (jl_value_t *)from, to, g_snapshot->names.find_or_create_string_id(name)); } -void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size_t field_index) JL_NOTSAFEPOINT { +void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, void* slot) JL_NOTSAFEPOINT { jl_datatype_t *type = (jl_datatype_t*)jl_typeof(from); // TODO: It seems like NamedTuples should have field names? Maybe there's another way to get them? if (jl_is_tuple_type(type) || jl_is_namedtuple_type(type)) { // TODO: Maybe not okay to match element and object - _record_gc_edge("object", "element", from, to, field_index); + _record_gc_edge("object", "element", from, to, /* TODO */0); return; } // if (field_index < 0 || jl_datatype_nfields(type) <= field_index) { @@ -370,7 +370,7 @@ void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size // jl_svec_t *field_names = jl_field_names(type); // jl_sym_t *name = (jl_sym_t*)jl_svecref(field_names, field_index); // const char *field_name = jl_symbol_name(name); - auto field_paths = _fieldpath_for_slot(from, to); + auto field_paths = _fieldpath_for_slot(from, slot); // Build the new field name by joining the strings, and/or use the struct + field names // to create a bunch of edges + nodes // (iterate the vector in reverse - the last element is the first path) diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h index cfea57ed6b86c..6b1c6449737ab 100644 --- a/src/gc-heap-snapshot.h +++ b/src/gc-heap-snapshot.h @@ -17,7 +17,7 @@ extern "C" { void _gc_heap_snapshot_record_root(jl_value_t *root, char *name); void _gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT; void _gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, char *name) JL_NOTSAFEPOINT; -void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size_t field_index) JL_NOTSAFEPOINT; +void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, void* slot) JL_NOTSAFEPOINT; // Used for objects managed by GC, but which aren't exposed in the julia object, so have no // field or index. i.e. they're not reacahable from julia code, but we _will_ hit them in // the GC mark phase (so we can check their type tag to get the size). @@ -44,9 +44,9 @@ static inline void gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_val _gc_heap_snapshot_record_module_edge(from, to, name); } } -static inline void gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size_t field_index) JL_NOTSAFEPOINT { +static inline void gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, void* slot) JL_NOTSAFEPOINT { if (__unlikely(gc_heap_snapshot_enabled)) { - _gc_heap_snapshot_record_object_edge(from, to, field_index); + _gc_heap_snapshot_record_object_edge(from, to, slot); } } static inline void gc_heap_snapshot_record_internal_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT { diff --git a/src/gc.c b/src/gc.c index 0b8cfbaa3935e..201775be80c80 100644 --- a/src/gc.c +++ b/src/gc.c @@ -1959,8 +1959,7 @@ STATIC_INLINE int gc_mark_scan_obj8(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, gc_mark if (*pnew_obj) { verify_parent2("object", parent, slot, "field(%d)", gc_slot_to_fieldidx(parent, slot)); - gc_heap_snapshot_record_object_edge(parent, *slot, - gc_slot_to_fieldidx(parent, slot)); + gc_heap_snapshot_record_object_edge(parent, *slot, slot); } if (!gc_try_setmark(*pnew_obj, &obj8->nptr, ptag, pbits)) continue; @@ -1996,8 +1995,7 @@ STATIC_INLINE int gc_mark_scan_obj16(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, gc_mar verify_parent2("object", parent, slot, "field(%d)", gc_slot_to_fieldidx(parent, slot)); // TODO: Should this be *parent? Given the way it's used above? - gc_heap_snapshot_record_object_edge(parent, *slot, - gc_slot_to_fieldidx(parent, slot)); + gc_heap_snapshot_record_object_edge(parent, *slot, slot); } if (!gc_try_setmark(*pnew_obj, &obj16->nptr, ptag, pbits)) continue; @@ -2032,8 +2030,7 @@ STATIC_INLINE int gc_mark_scan_obj32(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, gc_mar if (*pnew_obj) { verify_parent2("object", parent, slot, "field(%d)", gc_slot_to_fieldidx(parent, slot)); - gc_heap_snapshot_record_object_edge(parent, *slot, - gc_slot_to_fieldidx(parent, slot)); + gc_heap_snapshot_record_object_edge(parent, *slot, slot); } if (!gc_try_setmark(*pnew_obj, &obj32->nptr, ptag, pbits)) continue; From 09ddcd4feef3f19f7cd3b5ecdc1695970388b6c5 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Thu, 14 Oct 2021 12:41:13 -0400 Subject: [PATCH 089/106] Clean up debug logs --- src/gc-heap-snapshot.cpp | 37 +++++++++---------------------------- 1 file changed, 9 insertions(+), 28 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index eae80cf666a34..05958107ae016 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -286,17 +286,12 @@ bool _fieldpath_for_slot_helper( } return false; } -JL_DLLEXPORT void jl_breakpoint(jl_value_t *v) -{ - // put a breakpoint in your debugger here -} - vector _fieldpath_for_slot(jl_value_t *obj, void *slot) { jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); + // TODO(PR): Remove this debugging code if (vt->name->module == jl_main_module) { - // jl_breakpoint(obj); - debug_log = true; + // debug_log = true; } vector result; @@ -309,13 +304,13 @@ vector _fieldpath_for_slot(jl_value_t *obj, void *slo // TODO: Debug these failures. Some of them seem really wrong, like with the slot // being _kilobytes_ past the start of the object for an object with 1 pointer and 1 // field... - // jl_printf(JL_STDERR, "WARNING: No fieldpath found for obj: %p slot: %p ", (void*)obj, (void*)slot); - // jl_datatype_t* type = (jl_datatype_t*)jl_typeof(obj); - // if (jl_is_datatype(type)) { - // jl_printf(JL_STDERR, "typeof: "); - // jl_static_show(JL_STDERR, (jl_value_t*)type); - // } - // jl_printf(JL_STDERR, "\n"); + jl_printf(JL_STDERR, "WARNING: No fieldpath found for obj: %p slot: %p ", (void*)obj, (void*)slot); + jl_datatype_t* type = (jl_datatype_t*)jl_typeof(obj); + if (jl_is_datatype(type)) { + jl_printf(JL_STDERR, "typeof: "); + jl_static_show(JL_STDERR, (jl_value_t*)type); + } + jl_printf(JL_STDERR, "\n"); } // NOTE THE RETURNED VECTOR IS REVERSED return result; @@ -360,25 +355,11 @@ void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, void _record_gc_edge("object", "element", from, to, /* TODO */0); return; } - // if (field_index < 0 || jl_datatype_nfields(type) <= field_index) { - // // TODO: We're getting -1 in some cases - // //jl_printf(JL_STDERR, "WARNING - incorrect field index (%zu) for type\n", field_index); - // //jl_(type); - // _record_gc_edge("object", "element", from, to, field_index); - // return; - // } - // jl_svec_t *field_names = jl_field_names(type); - // jl_sym_t *name = (jl_sym_t*)jl_svecref(field_names, field_index); - // const char *field_name = jl_symbol_name(name); auto field_paths = _fieldpath_for_slot(from, slot); // Build the new field name by joining the strings, and/or use the struct + field names // to create a bunch of edges + nodes // (iterate the vector in reverse - the last element is the first path) // TODO: Prefer to create intermediate edges and nodes instead of a combined string path. - if (field_paths.size() > 1) { - jl_printf(JL_STDERR, "count: %lu\n", field_paths.size()); - } - string path; for (auto it = field_paths.rbegin(); it != field_paths.rend(); ++it) { // ... From bd87eeac6f50f2b2d98153d05b6e2f07382e321a Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Tue, 5 Oct 2021 14:50:14 -0400 Subject: [PATCH 090/106] represent stack frames as nodes; only root running tasks --- src/gc-debug.c | 2 +- src/gc-heap-snapshot.cpp | 126 +++++++++++++++++++++++++++++---------- src/gc-heap-snapshot.h | 20 ++++++- src/gc.c | 34 +++++++---- 4 files changed, 140 insertions(+), 42 deletions(-) diff --git a/src/gc-debug.c b/src/gc-debug.c index 7dc36294b16eb..a84ba94353f31 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -1208,7 +1208,7 @@ void gc_count_pool(void) JL_DLLEXPORT int gc_slot_to_fieldidx(void *obj, void *slot) JL_NOTSAFEPOINT { jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); - int nf = (int)jl_datatype_nfields(vt); + int nf = (int)jl_datatype_nfields(vt); // what happens if you're inlined? lol for (int i = 0; i < nf; i++) { void *fieldaddr = (char*)obj + jl_field_offset(vt, i); if (fieldaddr >= slot) { diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 05958107ae016..c0d4bda508f9a 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -43,11 +43,6 @@ void print_str_escape_json(ios_t *stream, const std::string &s) { ios_printf(stream, "\""); } -struct HeapSnapshot; -void serialize_heap_snapshot(ios_t *stream, HeapSnapshot &snapshot); -static inline void _record_gc_edge(const char *node_type, const char *edge_type, - jl_value_t *a, jl_value_t *b, size_t name_or_index); -void _add_internal_root(HeapSnapshot *snapshot); // Edges // "edge_fields": @@ -83,7 +78,6 @@ struct Node { vector edges; // For asserting that we built the edges in the right order }; - struct StringTable { typedef unordered_map MapType; @@ -141,6 +135,13 @@ struct HeapSnapshot { HeapSnapshot *g_snapshot = nullptr; +void serialize_heap_snapshot(ios_t *stream, HeapSnapshot &snapshot); +static inline void _record_gc_edge(const char *node_type, const char *edge_type, + jl_value_t *a, jl_value_t *b, size_t name_or_index); +void _record_gc_just_edge(const char *edge_type, Node &from_node, size_t to_idx, size_t name_or_idx); +void _add_internal_root(HeapSnapshot *snapshot); + + JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream) { // Enable snapshotting HeapSnapshot snapshot; @@ -180,11 +181,11 @@ void _add_internal_root(HeapSnapshot *snapshot) { } // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L597-L597 -void record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { +// returns the index of the new node +size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { auto val = g_snapshot->node_ptr_to_index_map.find((void*)a); if (val != g_snapshot->node_ptr_to_index_map.end()) { - return; - //return &g_snapshot->nodes[val->second]; + return val->second; } // Insert a new Node @@ -228,7 +229,8 @@ void record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { } } - g_snapshot->node_ptr_to_index_map.insert(val, {a, g_snapshot->nodes.size()}); + auto node_idx = g_snapshot->nodes.size(); + g_snapshot->node_ptr_to_index_map.insert(val, {a, node_idx}); Node from_node{ // We pick a default type here, which will be set for the _targets_ of edges. @@ -247,6 +249,8 @@ void record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { vector(), }; g_snapshot->nodes.push_back(from_node); + + return node_idx; } typedef pair inlineallocd_field_type_t; @@ -320,20 +324,70 @@ vector _fieldpath_for_slot(jl_value_t *obj, void *slo void _gc_heap_snapshot_record_root(jl_value_t *root, char *name) { record_node_to_gc_snapshot(root); - // TODO: just make record_node_to_gc_snapshot return this - auto to_node_idx = g_snapshot->node_ptr_to_index_map[root]; - auto &internal_root = g_snapshot->nodes.front(); - auto edge_type = g_snapshot->edge_types.find_or_create_string_id("internal"); + auto to_node_idx = g_snapshot->node_ptr_to_index_map[root]; auto edge_label = g_snapshot->names.find_or_create_string_id(name); - internal_root.edges.push_back(Edge{ - edge_type, - edge_label, - to_node_idx, - }); + _record_gc_just_edge("internal", internal_root, to_node_idx, edge_label); +} - g_snapshot->num_edges++; +// Add a node to the heap snapshot representing a Julia stack frame. +// Each task points at a stack frame, which points at the stack frame of +// the function it's currently calling, forming a linked list. +// Stack frame nodes point at the objects they have as local variables. +size_t _record_stack_frame_node(HeapSnapshot *snapshot, jl_gcframe_t *frame) { + auto val = g_snapshot->node_ptr_to_index_map.find((void*)frame); + if (val != g_snapshot->node_ptr_to_index_map.end()) { + return val->second; + } + + Node frame_node{ + snapshot->node_types.find_or_create_string_id("synthetic"), + "(stack frame)", // name + (size_t)frame, // id + 1, // size + + 0, // size_t trace_node_id (unused) + 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached + + // outgoing edges + vector(), + }; + + auto node_idx = snapshot->nodes.size(); + snapshot->node_ptr_to_index_map.insert(val, {frame, node_idx}); + snapshot->nodes.push_back(frame_node); + + return node_idx; +} + +void _gc_heap_snapshot_record_frame_to_object_edge(jl_gcframe_t *from, jl_value_t *to) JL_NOTSAFEPOINT { + auto from_node_idx = _record_stack_frame_node(g_snapshot, from); + Node &from_node = g_snapshot->nodes[from_node_idx]; + auto to_idx = record_node_to_gc_snapshot(to); + + // TODO: would be cool to get the name of the local var + auto name_idx = g_snapshot->names.find_or_create_string_id("local var"); + _record_gc_just_edge("internal", from_node, to_idx, name_idx); +} + +void _gc_heap_snapshot_record_task_to_frame_edge(jl_task_t *from, jl_gcframe_t *to) JL_NOTSAFEPOINT { + auto from_node_idx = record_node_to_gc_snapshot((jl_value_t*)from); + Node &from_node = g_snapshot->nodes[from_node_idx]; + _record_stack_frame_node(g_snapshot, to); + auto to_node_idx = g_snapshot->node_ptr_to_index_map[to]; + + auto name_idx = g_snapshot->names.find_or_create_string_id("stack"); + _record_gc_just_edge("internal", from_node, to_node_idx, name_idx); +} + +void _gc_heap_snapshot_record_frame_to_frame_edge(jl_gcframe_t *from, jl_gcframe_t *to) JL_NOTSAFEPOINT { + auto from_node_idx = _record_stack_frame_node(g_snapshot, from); + Node &from_node = g_snapshot->nodes[from_node_idx]; + auto to_node_idx = _record_stack_frame_node(g_snapshot, to); + + auto name_idx = g_snapshot->names.find_or_create_string_id("next frame"); + _record_gc_just_edge("internal", from_node, to_node_idx, name_idx); } void _gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT { @@ -342,13 +396,23 @@ void _gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_ } _record_gc_edge("array", "element", from, to, index); } + void _gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, char *name) JL_NOTSAFEPOINT { //jl_printf(JL_STDERR, "module: %p binding:%p name:%s\n", from, to, name); _record_gc_edge("object", "property", (jl_value_t *)from, to, g_snapshot->names.find_or_create_string_id(name)); } -void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, void* slot) JL_NOTSAFEPOINT { + +void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size_t field_index) JL_NOTSAFEPOINT { jl_datatype_t *type = (jl_datatype_t*)jl_typeof(from); + + if (field_index < 0 || field_index > jl_datatype_nfields(type)) { + // TODO: We're getting -1 in some cases + jl_printf(JL_STDERR, "WARNING - incorrect field index (%d) for type\n", field_index); + jl_(type); + return; + } + // TODO: It seems like NamedTuples should have field names? Maybe there's another way to get them? if (jl_is_tuple_type(type) || jl_is_namedtuple_type(type)) { // TODO: Maybe not okay to match element and object @@ -372,11 +436,13 @@ void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, void _record_gc_edge("object", "property", from, to, g_snapshot->names.find_or_create_string_id(path)); } + void _gc_heap_snapshot_record_internal_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT { // TODO: probably need to inline this here and make some changes _record_gc_edge("object", "internal", from, to, g_snapshot->names.find_or_create_string_id("")); } + void _gc_heap_snapshot_record_hidden_edge(jl_value_t *from, size_t bytes) JL_NOTSAFEPOINT { // TODO: probably need to inline this here and make some changes _record_gc_edge("native", "hidden", from, (jl_value_t *)jl_malloc_tag, @@ -387,22 +453,22 @@ void _gc_heap_snapshot_record_hidden_edge(jl_value_t *from, size_t bytes) JL_NOT } static inline void _record_gc_edge(const char *node_type, const char *edge_type, - jl_value_t *a, jl_value_t *b, size_t name_or_index) JL_NOTSAFEPOINT + jl_value_t *a, jl_value_t *b, size_t name_or_idx) JL_NOTSAFEPOINT { - record_node_to_gc_snapshot(a); - record_node_to_gc_snapshot(b); - - // Have to look this up because it might not be created for this edge - auto from_node_idx = g_snapshot->node_ptr_to_index_map[a]; + auto from_node_idx = record_node_to_gc_snapshot(a); + auto to_node_idx = record_node_to_gc_snapshot(b); auto &from_node = g_snapshot->nodes[from_node_idx]; - // TODO: can these ever disagree?: from_node.type = g_snapshot->node_types.find_or_create_string_id(node_type); + _record_gc_just_edge(edge_type, from_node, to_node_idx, name_or_idx); +} + +void _record_gc_just_edge(const char *edge_type, Node &from_node, size_t to_idx, size_t name_or_idx) { from_node.edges.push_back(Edge{ g_snapshot->edge_types.find_or_create_string_id(edge_type), - name_or_index, - g_snapshot->node_ptr_to_index_map[b], // to + name_or_idx, // edge label + to_idx, // to }); g_snapshot->num_edges += 1; diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h index 6b1c6449737ab..13c8b26500bee 100644 --- a/src/gc-heap-snapshot.h +++ b/src/gc-heap-snapshot.h @@ -14,7 +14,10 @@ extern "C" { // --------------------------------------------------------------------- // Functions to call from GC when heap snapshot is enabled // --------------------------------------------------------------------- -void _gc_heap_snapshot_record_root(jl_value_t *root, char *name); +void _gc_heap_snapshot_record_root(jl_value_t *root, char *name) JL_NOTSAFEPOINT; +void _gc_heap_snapshot_record_frame_to_object_edge(jl_gcframe_t *from, jl_value_t *to) JL_NOTSAFEPOINT; +void _gc_heap_snapshot_record_task_to_frame_edge(jl_task_t *from, jl_gcframe_t *to) JL_NOTSAFEPOINT; +void _gc_heap_snapshot_record_frame_to_frame_edge(jl_gcframe_t *from, jl_gcframe_t *to) JL_NOTSAFEPOINT; void _gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT; void _gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, char *name) JL_NOTSAFEPOINT; void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, void* slot) JL_NOTSAFEPOINT; @@ -29,6 +32,21 @@ void _gc_heap_snapshot_record_hidden_edge(jl_value_t *from, size_t bytes) JL_NOT extern int gc_heap_snapshot_enabled; +static inline void gc_heap_snapshot_record_frame_to_object_edge(jl_gcframe_t *from, jl_value_t *to) { + if (__unlikely(gc_heap_snapshot_enabled)) { + _gc_heap_snapshot_record_frame_to_object_edge(from, to); + } +} +static inline void gc_heap_snapshot_record_task_to_frame_edge(jl_task_t *from, jl_gcframe_t *to) { + if (__unlikely(gc_heap_snapshot_enabled)) { + _gc_heap_snapshot_record_task_to_frame_edge(from, to); + } +} +static inline void gc_heap_snapshot_record_frame_to_frame_edge(jl_gcframe_t *from, jl_gcframe_t *to) { + if (__unlikely(gc_heap_snapshot_enabled)) { + _gc_heap_snapshot_record_frame_to_frame_edge(from, to); + } +} static inline void gc_heap_snapshot_record_root(jl_value_t *root, char *name) JL_NOTSAFEPOINT { if (__unlikely(gc_heap_snapshot_enabled)) { _gc_heap_snapshot_record_root(root, name); diff --git a/src/gc.c b/src/gc.c index 201775be80c80..c5127aa7ac0db 100644 --- a/src/gc.c +++ b/src/gc.c @@ -2195,9 +2195,6 @@ JL_EXTENSION NOINLINE void gc_mark_loop(jl_ptls_t ptls, jl_gc_mark_sp_t sp) uint16_t *obj16_begin; uint16_t *obj16_end; - // TODO: don't know if this is safe w/r/t the search order - jl_task_t *latest_task = NULL; - pop: if (sp.pc == sp.pc_start) { // TODO: stealing form another thread @@ -2321,7 +2318,7 @@ stack: { } if (!gc_try_setmark(new_obj, &nptr, &tag, &bits)) continue; - gc_heap_snapshot_record_internal_edge(latest_task, new_obj); + gc_heap_snapshot_record_frame_to_object_edge(s, new_obj); i++; if (i < nr) { // Haven't done with this one yet. Update the content and push it back @@ -2339,7 +2336,9 @@ stack: { goto mark; } s = (jl_gcframe_t*)gc_read_stack(&s->prev, offset, lb, ub); + // walk up one stack frame if (s != 0) { + gc_heap_snapshot_record_frame_to_frame_edge(stack->s, s); stack->s = s; i = 0; uintptr_t new_nroots = gc_read_stack(&s->nroots, offset, lb, ub); @@ -2672,8 +2671,6 @@ mark: { else if (foreign_alloc) objprofile_count(vt, bits == GC_OLD_MARKED, sizeof(jl_task_t)); jl_task_t *ta = (jl_task_t*)new_obj; - latest_task = ta; // TODO: correct?? - gc_heap_snapshot_record_root(ta, "task"); gc_scrub_record_task(ta); if (gc_cblist_task_scanner) { export_gc_state(ptls, &sp); @@ -2685,8 +2682,12 @@ mark: { } #ifdef COPY_STACKS void *stkbuf = ta->stkbuf; - if (stkbuf && ta->copy_stack) + if (stkbuf && ta->copy_stack) { gc_setmark_buf_(ptls, stkbuf, bits, ta->bufsz); + // TODO: attribute size of stack + // TODO: edge to stack data + // TODO: synthetic node for stack data (how big is it?) + } #endif jl_gcframe_t *s = ta->gcstack; size_t nroots; @@ -2705,6 +2706,8 @@ mark: { #endif if (s) { nroots = gc_read_stack(&s->nroots, offset, lb, ub); + gc_heap_snapshot_record_task_to_frame_edge(ta, s); + assert(nroots <= UINT32_MAX); gc_mark_stackframe_t stackdata = {s, 0, (uint32_t)nroots, offset, lb, ub}; gc_mark_stack_push(&ptls->gc_cache, &sp, gc_mark_laddr(stack), @@ -2808,13 +2811,21 @@ static void jl_gc_queue_thread_local(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp jl_ptls_t ptls2) { gc_mark_queue_obj(gc_cache, sp, jl_atomic_load_relaxed(&ptls2->current_task)); + gc_heap_snapshot_record_root(ptls2->current_task, "current task"); gc_mark_queue_obj(gc_cache, sp, ptls2->root_task); - if (ptls2->next_task) + gc_heap_snapshot_record_root(ptls2->current_task, "root task"); + if (ptls2->next_task) { gc_mark_queue_obj(gc_cache, sp, ptls2->next_task); - if (ptls2->previous_task) // shouldn't be necessary, but no reason not to + gc_heap_snapshot_record_root(ptls2->current_task, "next task"); + } + if (ptls2->previous_task) { // shouldn't be necessary, but no reason not to gc_mark_queue_obj(gc_cache, sp, ptls2->previous_task); - if (ptls2->previous_exception) + gc_heap_snapshot_record_root(ptls2->current_task, "previous task"); + } + if (ptls2->previous_exception) { gc_mark_queue_obj(gc_cache, sp, ptls2->previous_exception); + gc_heap_snapshot_record_root(ptls2->current_task, "previous exception"); + } } void jl_gc_mark_enqueued_tasks(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp); @@ -2828,6 +2839,7 @@ static void mark_roots(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp) gc_heap_snapshot_record_root(jl_main_module, "main_module"); // tasks + // TODO: is this dead code? jl_gc_mark_enqueued_tasks(gc_cache, sp); // invisible builtin values @@ -3064,8 +3076,10 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) // 2.1. mark every object in the `last_remsets` and `rem_binding` jl_gc_queue_remset(gc_cache, &sp, ptls2); // 2.2. mark every thread local root + // TODO: treat these as roots jl_gc_queue_thread_local(gc_cache, &sp, ptls2); // 2.3. mark any managed objects in the backtrace buffer + // TODO: treat these as roots jl_gc_queue_bt_buf(gc_cache, &sp, ptls2); } From 83afa815c2c4ae2444adaf7607a700deff7115dc Mon Sep 17 00:00:00 2001 From: Pete Vilter <7341+vilterp@users.noreply.github.com> Date: Thu, 7 Oct 2021 12:41:17 -0400 Subject: [PATCH 091/106] tweak comment --- src/gc-heap-snapshot.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index c0d4bda508f9a..b35d28807283a 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -64,7 +64,7 @@ struct Edge { const int k_node_number_of_fields = 7; struct Node { - size_t type; // TODO: point at actual type here? + size_t type; // index into snapshot->node_types string name; size_t id; // This should be a globally-unique counter, but we use the memory address size_t self_size; From ba4ea3f5575c29595dbf6e3ac7e83e50c1b8bb7f Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Thu, 14 Oct 2021 12:50:06 -0400 Subject: [PATCH 092/106] Fixups, comments, todos --- src/gc-debug.c | 3 ++- src/gc-heap-snapshot.cpp | 4 +++- src/gc.h | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/gc-debug.c b/src/gc-debug.c index a84ba94353f31..5ba63f2b15d62 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -1205,7 +1205,7 @@ void gc_count_pool(void) jl_safe_printf("************************\n"); } -JL_DLLEXPORT int gc_slot_to_fieldidx(void *obj, void *slot) JL_NOTSAFEPOINT +int gc_slot_to_fieldidx(void *obj, void *slot) JL_NOTSAFEPOINT { jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); int nf = (int)jl_datatype_nfields(vt); // what happens if you're inlined? lol @@ -1217,6 +1217,7 @@ JL_DLLEXPORT int gc_slot_to_fieldidx(void *obj, void *slot) JL_NOTSAFEPOINT } return -1; } + int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT { char *slot = (char*)_slot; diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index b35d28807283a..ce625b13024ae 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -416,7 +416,9 @@ void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size // TODO: It seems like NamedTuples should have field names? Maybe there's another way to get them? if (jl_is_tuple_type(type) || jl_is_namedtuple_type(type)) { // TODO: Maybe not okay to match element and object - _record_gc_edge("object", "element", from, to, /* TODO */0); + _record_gc_edge("object", "element", from, to, + // TODO: Get the names for tuple elements + g_snapshot->names.find_or_create_string_id("")); return; } auto field_paths = _fieldpath_for_slot(from, slot); diff --git a/src/gc.h b/src/gc.h index 02320f7644928..c404557f3e50d 100644 --- a/src/gc.h +++ b/src/gc.h @@ -634,7 +634,7 @@ extern int gc_verifying; #endif -JL_DLLEXPORT int gc_slot_to_fieldidx(void *_obj, void *slot) JL_NOTSAFEPOINT; +int gc_slot_to_fieldidx(void *_obj, void *slot) JL_NOTSAFEPOINT; int gc_slot_to_arrayidx(void *_obj, void *begin) JL_NOTSAFEPOINT; NOINLINE void gc_mark_loop_unwind(jl_ptls_t ptls, jl_gc_mark_sp_t sp, int pc_offset); From 9c04a81e6fa84acb0292d582ef4900e6d019441b Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Thu, 14 Oct 2021 14:45:52 -0400 Subject: [PATCH 093/106] fix merge conflict --- src/gc-heap-snapshot.cpp | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index ce625b13024ae..75756c0579365 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -295,7 +295,7 @@ vector _fieldpath_for_slot(jl_value_t *obj, void *slo jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); // TODO(PR): Remove this debugging code if (vt->name->module == jl_main_module) { - // debug_log = true; + debug_log = true; } vector result; @@ -406,13 +406,6 @@ void _gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, cha void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size_t field_index) JL_NOTSAFEPOINT { jl_datatype_t *type = (jl_datatype_t*)jl_typeof(from); - if (field_index < 0 || field_index > jl_datatype_nfields(type)) { - // TODO: We're getting -1 in some cases - jl_printf(JL_STDERR, "WARNING - incorrect field index (%d) for type\n", field_index); - jl_(type); - return; - } - // TODO: It seems like NamedTuples should have field names? Maybe there's another way to get them? if (jl_is_tuple_type(type) || jl_is_namedtuple_type(type)) { // TODO: Maybe not okay to match element and object From d43ca3ab7732e23d28bc1636d4faa2cf0a687267 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Thu, 14 Oct 2021 15:06:28 -0400 Subject: [PATCH 094/106] Fix Tuple types in fieldpath_for_slot --- src/gc-heap-snapshot.cpp | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 75756c0579365..e762dd51464d0 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -14,6 +15,7 @@ using std::vector; using std::string; +using std::ostringstream; using std::pair; using std::unordered_map; using std::unordered_set; @@ -253,7 +255,7 @@ size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { return node_idx; } -typedef pair inlineallocd_field_type_t; +typedef pair inlineallocd_field_type_t; static bool debug_log = false; @@ -270,10 +272,19 @@ bool _fieldpath_for_slot_helper( for (int i = 0; i < nf; i++) { jl_datatype_t *field_type = (jl_datatype_t*)jl_field_type(objtype, i); void *fieldaddr = (char*)obj + jl_field_offset(objtype, i); - jl_sym_t *name = (jl_sym_t*)jl_svecref(field_names, i); - const char *field_name = jl_symbol_name(name); + ostringstream ss; // NOTE: must have same scope as field_name, below. + string field_name; + // TODO: NamedTuples should maybe have field names? Maybe another way to get them? + if (jl_is_tuple_type(objtype) || jl_is_namedtuple_type(objtype)) { + jl_printf(JL_STDERR, "HERE\n"); + ss << "[" << i << "]"; + field_name = ss.str().c_str(); // See scope comment, above. + } else { + jl_sym_t *name = (jl_sym_t*)jl_svecref(field_names, i); + field_name = jl_symbol_name(name); + } if (debug_log) { - jl_printf(JL_STDERR, "%d - field_name: %s fieldaddr: %p\n", i, field_name, fieldaddr); + jl_printf(JL_STDERR, "%d - field_name: %s fieldaddr: %p\n", i, field_name.c_str(), fieldaddr); } if (fieldaddr >= slot) { out.push_back(inlineallocd_field_type_t(objtype, field_name)); @@ -295,7 +306,7 @@ vector _fieldpath_for_slot(jl_value_t *obj, void *slo jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); // TODO(PR): Remove this debugging code if (vt->name->module == jl_main_module) { - debug_log = true; + // debug_log = true; } vector result; @@ -406,14 +417,6 @@ void _gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, cha void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size_t field_index) JL_NOTSAFEPOINT { jl_datatype_t *type = (jl_datatype_t*)jl_typeof(from); - // TODO: It seems like NamedTuples should have field names? Maybe there's another way to get them? - if (jl_is_tuple_type(type) || jl_is_namedtuple_type(type)) { - // TODO: Maybe not okay to match element and object - _record_gc_edge("object", "element", from, to, - // TODO: Get the names for tuple elements - g_snapshot->names.find_or_create_string_id("")); - return; - } auto field_paths = _fieldpath_for_slot(from, slot); // Build the new field name by joining the strings, and/or use the struct + field names // to create a bunch of edges + nodes From ee74794ab6ac353046903bcbd8f4de214476c366 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Thu, 14 Oct 2021 15:09:23 -0400 Subject: [PATCH 095/106] Add TODO about overrunning type printing buffer --- src/gc-heap-snapshot.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index e762dd51464d0..e62b97a747cc5 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -220,6 +220,7 @@ size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { : (size_t)jl_datatype_size(type); // print full type + // TODO: We _definitely_ have types longer than 1024 bytes.... ios_t str_; ios_mem(&str_, 1024); JL_STREAM* str = (JL_STREAM*)&str_; From 0caead051e8531a7247a75bf152cd494ea9a952a Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Thu, 14 Oct 2021 15:15:33 -0400 Subject: [PATCH 096/106] remove log --- src/gc-heap-snapshot.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index e62b97a747cc5..4a8b4b6e2734b 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -277,7 +277,6 @@ bool _fieldpath_for_slot_helper( string field_name; // TODO: NamedTuples should maybe have field names? Maybe another way to get them? if (jl_is_tuple_type(objtype) || jl_is_namedtuple_type(objtype)) { - jl_printf(JL_STDERR, "HERE\n"); ss << "[" << i << "]"; field_name = ss.str().c_str(); // See scope comment, above. } else { From a1f15019da46a8455b82ea9c1f1c5969b680fd2f Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Fri, 22 Oct 2021 16:36:05 -0400 Subject: [PATCH 097/106] Avoid recollect which might have been mucking our results --- src/gc-heap-snapshot.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 4a8b4b6e2734b..ab85c492bb9f1 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -145,6 +145,10 @@ void _add_internal_root(HeapSnapshot *snapshot); JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream) { + // Do a full GC sweep, which will reset all of the mark bits + gc_num.pause = 0; // Prevent a recollect here, which would cause incomplete results below. + jl_gc_collect(JL_GC_FULL); + // Enable snapshotting HeapSnapshot snapshot; g_snapshot = &snapshot; @@ -153,7 +157,7 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream) { _add_internal_root(&snapshot); // Do a full GC mark (and incremental sweep), which will invoke our callbacks on `g_snapshot` - jl_gc_collect(JL_GC_INCREMENTAL); + jl_gc_collect(JL_GC_FULL); // Disable snapshotting gc_heap_snapshot_enabled = false; From 95b4592e111bce538e3fa719990cef938892a465 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Thu, 18 Nov 2021 14:17:31 -0500 Subject: [PATCH 098/106] Hacky fix to make sure we get full marks in GC: - Force num_pauses > 1 so the GC stats work correctly and it will correctly run the full mark after the full sweep - Tell our heap snapshotter to only record edges on the _second mark_, the one after the full sweep. --- src/gc-heap-snapshot.cpp | 10 ++++++---- src/gc-heap-snapshot.h | 19 ++++++++++--------- src/gc.c | 2 +- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index ab85c492bb9f1..bbce6771e7fb7 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -145,10 +145,6 @@ void _add_internal_root(HeapSnapshot *snapshot); JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream) { - // Do a full GC sweep, which will reset all of the mark bits - gc_num.pause = 0; // Prevent a recollect here, which would cause incomplete results below. - jl_gc_collect(JL_GC_FULL); - // Enable snapshotting HeapSnapshot snapshot; g_snapshot = &snapshot; @@ -156,6 +152,12 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream) { _add_internal_root(&snapshot); + // Initialize the GC's heuristics, so that JL_GC_FULL will work correctly. :) + while (gc_num.pause < 2) { + jl_gc_collect(JL_GC_AUTO); + } + jl_printf(JL_STDERR, "pause: %d\n", gc_num.pause); + // Do a full GC mark (and incremental sweep), which will invoke our callbacks on `g_snapshot` jl_gc_collect(JL_GC_FULL); diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h index 13c8b26500bee..05bb9d48f0c4d 100644 --- a/src/gc-heap-snapshot.h +++ b/src/gc-heap-snapshot.h @@ -31,49 +31,50 @@ void _gc_heap_snapshot_record_hidden_edge(jl_value_t *from, size_t bytes) JL_NOT extern int gc_heap_snapshot_enabled; +extern int prev_sweep_full; // defined in gc.c static inline void gc_heap_snapshot_record_frame_to_object_edge(jl_gcframe_t *from, jl_value_t *to) { - if (__unlikely(gc_heap_snapshot_enabled)) { + if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) { _gc_heap_snapshot_record_frame_to_object_edge(from, to); } } static inline void gc_heap_snapshot_record_task_to_frame_edge(jl_task_t *from, jl_gcframe_t *to) { - if (__unlikely(gc_heap_snapshot_enabled)) { + if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) { _gc_heap_snapshot_record_task_to_frame_edge(from, to); } } static inline void gc_heap_snapshot_record_frame_to_frame_edge(jl_gcframe_t *from, jl_gcframe_t *to) { - if (__unlikely(gc_heap_snapshot_enabled)) { + if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) { _gc_heap_snapshot_record_frame_to_frame_edge(from, to); } } static inline void gc_heap_snapshot_record_root(jl_value_t *root, char *name) JL_NOTSAFEPOINT { - if (__unlikely(gc_heap_snapshot_enabled)) { + if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) { _gc_heap_snapshot_record_root(root, name); } } static inline void gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT { - if (__unlikely(gc_heap_snapshot_enabled)) { + if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) { _gc_heap_snapshot_record_array_edge(from, to, index); } } static inline void gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, char *name) JL_NOTSAFEPOINT { - if (__unlikely(gc_heap_snapshot_enabled)) { + if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) { _gc_heap_snapshot_record_module_edge(from, to, name); } } static inline void gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, void* slot) JL_NOTSAFEPOINT { - if (__unlikely(gc_heap_snapshot_enabled)) { + if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) { _gc_heap_snapshot_record_object_edge(from, to, slot); } } static inline void gc_heap_snapshot_record_internal_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT { - if (__unlikely(gc_heap_snapshot_enabled)) { + if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) { _gc_heap_snapshot_record_internal_edge(from, to); } } static inline void gc_heap_snapshot_record_hidden_edge(jl_value_t *from, size_t bytes) JL_NOTSAFEPOINT { - if (__unlikely(gc_heap_snapshot_enabled)) { + if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) { _gc_heap_snapshot_record_hidden_edge(from, bytes); } } diff --git a/src/gc.c b/src/gc.c index c5127aa7ac0db..48e0221a39d2d 100644 --- a/src/gc.c +++ b/src/gc.c @@ -646,7 +646,7 @@ static int mark_reset_age = 0; static int64_t scanned_bytes; // young bytes scanned while marking static int64_t perm_scanned_bytes; // old bytes scanned while marking -static int prev_sweep_full = 1; +int prev_sweep_full = 1; #define inc_sat(v,s) v = (v) >= s ? s : (v)+1 From b441c95bb22950237290c3440c6610e6c2fdbcd8 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Thu, 25 Nov 2021 22:43:41 -0500 Subject: [PATCH 099/106] Make type string buffer 1MiB to (try to) avoid buffer overflow --- src/gc-heap-snapshot.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index bbce6771e7fb7..bcd71063a40dc 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -226,9 +226,9 @@ size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { : (size_t)jl_datatype_size(type); // print full type - // TODO: We _definitely_ have types longer than 1024 bytes.... + // TODO(PR): Is it possible to use a variable size string here, instead?? ios_t str_; - ios_mem(&str_, 1024); + ios_mem(&str_, 1048576); // 1 MiB JL_STREAM* str = (JL_STREAM*)&str_; jl_static_show(str, (jl_value_t*)type); From 7fa3781b666f632ccceef8a9cf411f320aca3512 Mon Sep 17 00:00:00 2001 From: Pete Vilter Date: Wed, 8 Dec 2021 23:15:26 -0500 Subject: [PATCH 100/106] unbreak heap-snapshot.cpp --- src/gc-heap-snapshot.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index bcd71063a40dc..23cc8311e406c 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -420,7 +420,7 @@ void _gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, cha g_snapshot->names.find_or_create_string_id(name)); } -void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, size_t field_index) JL_NOTSAFEPOINT { +void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, void* slot) JL_NOTSAFEPOINT { jl_datatype_t *type = (jl_datatype_t*)jl_typeof(from); auto field_paths = _fieldpath_for_slot(from, slot); From fb4fbd354c0fa6a9f26d22ff6111ea67f6a328d7 Mon Sep 17 00:00:00 2001 From: Pete Vilter Date: Wed, 8 Dec 2021 23:15:39 -0500 Subject: [PATCH 101/106] unbreak partr.c --- src/partr.c | 42 +++++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/src/partr.c b/src/partr.c index a3f2dd43165f7..048a841158153 100644 --- a/src/partr.c +++ b/src/partr.c @@ -17,6 +17,9 @@ extern "C" { // thread sleep state +// default to DEFAULT_THREAD_SLEEP_THRESHOLD; set via $JULIA_THREAD_SLEEP_THRESHOLD +uint64_t sleep_threshold; + // thread should not be sleeping--it might need to do work. static const int16_t not_sleeping = 0; @@ -36,6 +39,8 @@ uint64_t io_wakeup_enter; uint64_t io_wakeup_leave; ); +uv_mutex_t *sleep_locks; +uv_cond_t *wake_signals; JL_DLLEXPORT int jl_set_task_tid(jl_task_t *task, int tid) JL_NOTSAFEPOINT { @@ -234,10 +239,25 @@ void jl_init_threadinginfra(void) /* initialize the synchronization trees pool and the multiqueue */ multiq_init(); + sleep_threshold = DEFAULT_THREAD_SLEEP_THRESHOLD; + char *cp = getenv(THREAD_SLEEP_THRESHOLD_NAME); + if (cp) { + if (!strncasecmp(cp, "infinite", 8)) + sleep_threshold = UINT64_MAX; + else + sleep_threshold = (uint64_t)strtol(cp, NULL, 10); + } + jl_ptls_t ptls = jl_current_task->ptls; jl_install_thread_signal_handler(ptls); - uv_mutex_init(&ptls->sleep_lock); - uv_cond_init(&ptls->wake_signal); + + int16_t tid; + sleep_locks = (uv_mutex_t*)calloc(jl_n_threads, sizeof(uv_mutex_t)); + wake_signals = (uv_cond_t*)calloc(jl_n_threads, sizeof(uv_cond_t)); + for (tid = 0; tid < jl_n_threads; tid++) { + uv_mutex_init(&sleep_locks[tid]); + uv_cond_init(&wake_signals[tid]); + } } @@ -257,10 +277,6 @@ void jl_threadfun(void *arg) JL_GC_PROMISE_ROOTED(ct); jl_install_thread_signal_handler(ptls); - // set up sleep mechanism for this thread - uv_mutex_init(&ptls->sleep_lock); - uv_cond_init(&ptls->wake_signal); - // wait for all threads jl_gc_state_set(ptls, JL_GC_STATE_SAFE, 0); uv_barrier_wait(targ->barrier); @@ -324,7 +340,7 @@ static int sleep_check_after_threshold(uint64_t *start_cycles) return 0; } uint64_t elapsed_cycles = jl_hrtime() - (*start_cycles); - if (elapsed_cycles >= DEFAULT_THREAD_SLEEP_THRESHOLD) { + if (elapsed_cycles >= sleep_threshold) { *start_cycles = 0; return 1; } @@ -338,9 +354,9 @@ static void wake_thread(int16_t tid) int8_t state = sleeping; jl_atomic_cmpswap(&other->sleep_check_state, &state, not_sleeping); if (state == sleeping) { - uv_mutex_lock(&other->sleep_lock); - uv_cond_signal(&other->wake_signal); - uv_mutex_unlock(&other->sleep_lock); + uv_mutex_lock(&sleep_locks[tid]); + uv_cond_signal(&wake_signals[tid]); + uv_mutex_unlock(&sleep_locks[tid]); } } @@ -512,13 +528,13 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q) // the other threads will just wait for on signal to resume JULIA_DEBUG_SLEEPWAKE( ptls->sleep_enter = cycleclock() ); int8_t gc_state = jl_gc_safe_enter(ptls); - uv_mutex_lock(&ptls->sleep_lock); + uv_mutex_lock(&sleep_locks[ptls->tid]); while (may_sleep(ptls)) { - uv_cond_wait(&ptls->wake_signal, &ptls->sleep_lock); + uv_cond_wait(&wake_signals[ptls->tid], &sleep_locks[ptls->tid]); // TODO: help with gc work here, if applicable } assert(jl_atomic_load_relaxed(&ptls->sleep_check_state) == not_sleeping); - uv_mutex_unlock(&ptls->sleep_lock); + uv_mutex_unlock(&sleep_locks[ptls->tid]); JULIA_DEBUG_SLEEPWAKE( ptls->sleep_leave = cycleclock() ); jl_gc_safe_leave(ptls, gc_state); // contains jl_gc_safepoint start_cycles = 0; From 6ab9e154e1a7ad0e660ae50765a1c4985f168ab6 Mon Sep 17 00:00:00 2001 From: Pete Vilter Date: Wed, 8 Dec 2021 23:24:22 -0500 Subject: [PATCH 102/106] remove println --- src/gc-heap-snapshot.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 23cc8311e406c..ca74ae46f2ee2 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -156,7 +156,6 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream) { while (gc_num.pause < 2) { jl_gc_collect(JL_GC_AUTO); } - jl_printf(JL_STDERR, "pause: %d\n", gc_num.pause); // Do a full GC mark (and incremental sweep), which will invoke our callbacks on `g_snapshot` jl_gc_collect(JL_GC_FULL); From e8b32dab58b1e200e9a0782f65694a838c8a53ba Mon Sep 17 00:00:00 2001 From: Pete Vilter Date: Fri, 10 Dec 2021 15:02:28 -0500 Subject: [PATCH 103/106] put things back I accidentally removed in the rebase --- src/gc-heap-snapshot.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index ca74ae46f2ee2..e436e6b12605b 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -263,6 +263,7 @@ size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { typedef pair inlineallocd_field_type_t; +// TODO: remove this static bool debug_log = false; bool _fieldpath_for_slot_helper( @@ -337,7 +338,7 @@ vector _fieldpath_for_slot(jl_value_t *obj, void *slo } -void _gc_heap_snapshot_record_root(jl_value_t *root, char *name) { +void _gc_heap_snapshot_record_root(jl_value_t *root, char *name) JL_NOTSAFEPOINT { record_node_to_gc_snapshot(root); auto &internal_root = g_snapshot->nodes.front(); From 00f54516e92b92b57a94a402ce508ac898b524fc Mon Sep 17 00:00:00 2001 From: Pete Vilter Date: Thu, 3 Feb 2022 01:55:22 -0500 Subject: [PATCH 104/106] add wrapper function --- base/gcutils.jl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/base/gcutils.jl b/base/gcutils.jl index bdfc2f3a1bb00..f3146f76c6020 100644 --- a/base/gcutils.jl +++ b/base/gcutils.jl @@ -107,6 +107,7 @@ enable(on::Bool) = ccall(:jl_gc_enable, Int32, (Int32,), on) != 0 """ GC.take_heap_snapshot(io::IOStream) + GC.take_heap_snapshot(filepath::String) Write a snapshot of the heap, in the JSON format expected by the Chrome Devtools Heap Snapshot viewer (.heapsnapshot extension), to the given @@ -115,6 +116,11 @@ IO stream. function take_heap_snapshot(io) ccall(:jl_gc_take_heap_snapshot, Cvoid, (Ptr{Cvoid},), (io::IOStream).handle::Ptr{Cvoid}) end +function take_heap_snapshot(filepath::String) + open(filepath, "w") do io + take_heap_snapshot(io) + end +end """ GC.enable_finalizers(on::Bool) From 07b3ba1b9c96c92964e9ef65443b45308dafc301 Mon Sep 17 00:00:00 2001 From: Pete Vilter Date: Thu, 3 Feb 2022 11:53:53 -0500 Subject: [PATCH 105/106] fix syntax error in makefile --- src/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index 2ab77f32d847f..d08f05c5f101f 100644 --- a/src/Makefile +++ b/src/Makefile @@ -46,7 +46,7 @@ RUNTIME_SRCS := \ dlload sys init task array dump staticdata toplevel jl_uv datatype \ simplevector runtime_intrinsics precompile \ threading partr stackwalk \ - gc gc-debug gc-heap-snapshot gc-alloc-profiler gc-pages gc-stacks + gc gc-debug gc-heap-snapshot gc-alloc-profiler gc-pages gc-stacks \ method jlapi signal-handling safepoint timing subtype \ crc32c APInt-C processor ircode opaque_closure codegen-stubs coverage SRCS := jloptions runtime_ccall rtutils From e732d488a1c6833e9c342f4882d8a5eaa3604198 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Thu, 2 Jun 2022 10:37:41 -0400 Subject: [PATCH 106/106] Fix copy/paste typos in gc_heap_snapshot_record_roots for jl_gc_queue_thread_local Co-Authored-By: Dean De Leo Co-Authored-By: @whatsthecraic --- src/gc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gc.c b/src/gc.c index 05f17d18f06c5..f449138f179e3 100644 --- a/src/gc.c +++ b/src/gc.c @@ -2816,15 +2816,15 @@ static void jl_gc_queue_thread_local(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp gc_heap_snapshot_record_root(ptls2->current_task, "root task"); if (ptls2->next_task) { gc_mark_queue_obj(gc_cache, sp, ptls2->next_task); - gc_heap_snapshot_record_root(ptls2->current_task, "next task"); + gc_heap_snapshot_record_root(ptls2->next_task, "next task"); } if (ptls2->previous_task) { // shouldn't be necessary, but no reason not to gc_mark_queue_obj(gc_cache, sp, ptls2->previous_task); - gc_heap_snapshot_record_root(ptls2->current_task, "previous task"); + gc_heap_snapshot_record_root(ptls2->previous_task, "previous task"); } if (ptls2->previous_exception) { gc_mark_queue_obj(gc_cache, sp, ptls2->previous_exception); - gc_heap_snapshot_record_root(ptls2->current_task, "previous exception"); + gc_heap_snapshot_record_root(ptls2->previous_exception, "previous exception"); } }