Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Garbage Profiler #2

Closed
wants to merge 30 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
fbd3740
print every time we GC
vilterp Oct 7, 2021
a69c646
print types of stuff being freed
vilterp Oct 8, 2021
0a4c4ba
move to C++; try debugging...
vilterp Oct 8, 2021
48d280a
try avoiding segfaults
vilterp Oct 8, 2021
1ca0ee4
trying to debug
vilterp Oct 8, 2021
684fef5
remove crazy if statements
vilterp Oct 8, 2021
b1381c5
remove outdated comment
vilterp Oct 8, 2021
89c7f64
do pretty printing during sweep phase
vilterp Oct 8, 2021
2443676
first pass at writing all allocations & frees to a stream
vilterp Oct 12, 2021
cad2321
fix name; add newlines
vilterp Oct 12, 2021
7ffa9bd
tweaks
vilterp Oct 12, 2021
086cb2f
track more allocations
vilterp Oct 12, 2021
f4ace59
first hack at garbage profile
vilterp Oct 12, 2021
ca9f4ab
fix logic
vilterp Oct 12, 2021
58cc665
sort before printing
vilterp Oct 13, 2021
c759ff5
first attempt to collect events in memory
vilterp Oct 13, 2021
e3fe4a3
get it to compile
vilterp Oct 13, 2021
92c58ee
debug logging
vilterp Oct 13, 2021
08b2be9
more debugging
vilterp Oct 13, 2021
ccffc2f
try cutting out one of the maps
vilterp Oct 13, 2021
693e45f
why can't I loop over a map
vilterp Oct 13, 2021
ce4dd0b
remove debug logging. I'm an idiot
vilterp Oct 13, 2021
25f7de4
try writing out stats on each gc
vilterp Oct 14, 2021
aa38612
move GC print inside of report_gc_finished
vilterp Oct 14, 2021
320b06b
fixes
vilterp Oct 14, 2021
caa5074
commas with escaping
vilterp Oct 14, 2021
67cb08c
stop sorting
vilterp Oct 14, 2021
5605e4c
split out garbage profiler into its own function
vilterp Oct 14, 2021
a834a43
remove dead code
vilterp Oct 14, 2021
c8db25b
fix call of wrong variant
vilterp Oct 15, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions base/gcutils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,14 @@ function take_heap_snapshot(io)
ccall(:jl_gc_take_heap_snapshot, Cvoid, (Ptr{Cvoid},), (io::IOStream).handle::Ptr{Cvoid})
end

function start_garbage_profile(io)
ccall(:jl_start_garbage_profile, Cvoid, (Ptr{Cvoid},), io.handle)
end

function stop_garbage_profile()
ccall(:jl_stop_garbage_profile, Cvoid, ())
end

function enable_finalizers()
Base.@inline
ccall(:jl_gc_enable_finalizers_internal, Cvoid, ())
Expand Down
6 changes: 3 additions & 3 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ RUNTIME_SRCS := \
jltypes gf typemap smallintset ast builtins module interpreter symbol \
dlload sys init task array dump staticdata toplevel jl_uv datatype \
simplevector runtime_intrinsics precompile \
threading partr stackwalk gc gc-debug gc-heap-snapshot gc-pages gc-stacks method \
jlapi signal-handling safepoint timing subtype \
threading partr stackwalk gc gc-debug gc-heap-snapshot gc-garbage-profiler \
gc-pages gc-stacks method jlapi signal-handling safepoint timing subtype \
crc32c APInt-C processor ircode opaque_closure
SRCS := jloptions runtime_ccall rtutils

Expand Down Expand Up @@ -255,7 +255,7 @@ $(BUILDDIR)/disasm.o $(BUILDDIR)/disasm.dbg.obj: $(SRCDIR)/debuginfo.h $(SRCDIR)
$(BUILDDIR)/dump.o $(BUILDDIR)/dump.dbg.obj: $(addprefix $(SRCDIR)/,common_symbols1.inc common_symbols2.inc builtin_proto.h serialize.h)
$(BUILDDIR)/gc-debug.o $(BUILDDIR)/gc-debug.dbg.obj: $(SRCDIR)/gc.h
$(BUILDDIR)/gc-pages.o $(BUILDDIR)/gc-pages.dbg.obj: $(SRCDIR)/gc.h
$(BUILDDIR)/gc.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-heap-snapshot.h
$(BUILDDIR)/gc.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-garbage-profiler.h
$(BUILDDIR)/init.o $(BUILDDIR)/init.dbg.obj: $(SRCDIR)/builtin_proto.h
$(BUILDDIR)/interpreter.o $(BUILDDIR)/interpreter.dbg.obj: $(SRCDIR)/builtin_proto.h
$(BUILDDIR)/jitlayers.o $(BUILDDIR)/jitlayers.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/codegen_shared.h
Expand Down
4 changes: 4 additions & 0 deletions src/datatype.c
Original file line number Diff line number Diff line change
Expand Up @@ -786,6 +786,7 @@ JL_DLLEXPORT jl_value_t *jl_new_bits(jl_value_t *dt, const void *data)
assert(jl_is_datatype(dt));
jl_datatype_t *bt = (jl_datatype_t*)dt;
size_t nb = jl_datatype_size(bt);

// some types have special pools to minimize allocations
if (nb == 0) return jl_new_struct_uninit(bt); // returns bt->instance
if (bt == jl_bool_type) return (1 & *(int8_t*)data) ? jl_true : jl_false;
Expand All @@ -802,6 +803,7 @@ JL_DLLEXPORT jl_value_t *jl_new_bits(jl_value_t *dt, const void *data)
jl_task_t *ct = jl_current_task;
jl_value_t *v = jl_gc_alloc(ct->ptls, nb, bt);
memcpy(jl_assume_aligned(v, sizeof(void*)), data, nb);

return v;
}

Expand Down Expand Up @@ -1285,6 +1287,7 @@ JL_DLLEXPORT jl_value_t *jl_new_structv(jl_datatype_t *type, jl_value_t **args,
}
JL_GC_POP();
}

return jv;
}

Expand Down Expand Up @@ -1332,6 +1335,7 @@ JL_DLLEXPORT jl_value_t *jl_new_structt(jl_datatype_t *type, jl_value_t *tup)
set_nth_field(type, jv, i, fi, 0);
}
JL_GC_POP();

return jv;
}

Expand Down
152 changes: 152 additions & 0 deletions src/gc-garbage-profiler.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
// This file is a part of Julia. License is MIT: https://julialang.org/license

#include "gc-garbage-profiler.h"

#include "julia_internal.h"
#include "gc.h"

#include <string>
#include <unordered_map>

using std::string;
using std::unordered_map;

// == utility functions ==

void print_str_escape_csv(ios_t *stream, const std::string &s) {
ios_printf(stream, "\"");
for (auto c = s.cbegin(); c != s.cend(); c++) {
switch (*c) {
case '"': ios_printf(stream, "\"\""); break;
default:
ios_printf(stream, "%c", *c);
}
}
ios_printf(stream, "\"");
}

string _type_as_string(jl_datatype_t *type) {
if ((uintptr_t)type < 4096U) {
return "<corrupt>";
} else if (type == (jl_datatype_t*)jl_buff_tag) {
return "<buffer>";
} else if (type == (jl_datatype_t*)jl_malloc_tag) {
return "<malloc>";
} else if (type == jl_string_type) {
return "<string>";
} else if (type == jl_symbol_type) {
return "<symbol>";
} else if (jl_is_datatype(type)) {
ios_t str_;
ios_mem(&str_, 10024);
JL_STREAM* str = (JL_STREAM*)&str_;

jl_static_show(str, (jl_value_t*)type);

string type_str = string((const char*)str_.buf, str_.size);
ios_close(&str_);

return type_str;
} else {
return "<missing>";
}
}

// == global variables manipulated by callbacks ==
// TODO: wrap these up into a struct

ios_t *garbage_profile_out = nullptr;
int gc_epoch = 0;
// for each type, the index in mem_event where the type
// event appears.
unordered_map<size_t, string> g_type_name_by_address;
unordered_map<size_t, size_t> g_type_address_by_value_address;
unordered_map<size_t, size_t> g_frees_by_type_address;

// == exported interface ==

JL_DLLEXPORT void jl_start_garbage_profile(ios_t *stream) {
garbage_profile_out = stream;
ios_printf(garbage_profile_out, "gc_epoch,type,num_freed\n");
}

JL_DLLEXPORT void jl_stop_garbage_profile() {
// TODO: flush file?
garbage_profile_out = nullptr;
g_type_name_by_address.clear();
g_type_address_by_value_address.clear();
g_frees_by_type_address.clear();
}

// == callbacks called into by the outside ==

void _report_gc_started() {
g_frees_by_type_address.clear();
}

// TODO: figure out how to pass all of these in as a struct
void _report_gc_finished(uint64_t pause, uint64_t freed, uint64_t allocd) {
// TODO: figure out how to put in commas
jl_printf(
JL_STDERR,
"GC: pause %fms. collected %fMB. %lld allocs total\n",
pause/1e6, freed/1e6, allocd
);

// sort frees
for (auto const &pair : g_frees_by_type_address) {
auto type_str = g_type_name_by_address.find(pair.first);
if (type_str != g_type_name_by_address.end()) {
ios_printf(garbage_profile_out, "%d,", gc_epoch);
print_str_escape_csv(garbage_profile_out, type_str->second);
ios_printf(garbage_profile_out, ",%d\n", pair.second);
} else {
jl_printf(JL_STDERR, "couldn't find type %p\n", pair.first);
// TODO: warn about missing type
}
}
gc_epoch++;
}

void register_type_string(jl_datatype_t *type) {
auto id = g_type_name_by_address.find((size_t)type);
if (id != g_type_name_by_address.end()) {
return;
}

string type_str = _type_as_string(type);
g_type_name_by_address[(size_t)type] = type_str;
}

void _record_allocated_value(jl_value_t *val) {
if (garbage_profile_out == nullptr) {
return;
}

auto type = (jl_datatype_t*)jl_typeof(val);
register_type_string(type);

g_type_address_by_value_address[(size_t)val] = (size_t)type;
}

void _record_freed_value(jl_taggedvalue_t *tagged_val) {
if (garbage_profile_out == nullptr) {
return;
}

jl_value_t *val = jl_valueof(tagged_val);

auto value_address = (size_t)val;
auto type_address = g_type_address_by_value_address.find(value_address);
if (type_address == g_type_address_by_value_address.end()) {
return; // TODO: warn
}
auto frees = g_frees_by_type_address.find(type_address->second);

if (frees == g_frees_by_type_address.end()) {
g_frees_by_type_address[type_address->second] = 1;
} else {
g_frees_by_type_address[type_address->second] = frees->second + 1;
}
}

44 changes: 44 additions & 0 deletions src/gc-garbage-profiler.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// This file is a part of Julia. License is MIT: https://julialang.org/license

#ifndef JL_GC_GARBAGE_PROFILER_H
#define JL_GC_GARBAGE_PROFILER_H

#include "julia.h"
#include "ios.h"

#ifdef __cplusplus
extern "C" {
#endif

JL_DLLEXPORT void jl_start_garbage_profile(ios_t *stream);
JL_DLLEXPORT void jl_stop_garbage_profile(void);

void _report_gc_started(void);
void _report_gc_finished(uint64_t pause, uint64_t freed, uint64_t allocd);
void _record_allocated_value(jl_value_t *val);
void _record_freed_value(jl_taggedvalue_t *tagged_val);

// ---------------------------------------------------------------------
// functions to call from GC when garbage profiling is enabled
// ---------------------------------------------------------------------

extern ios_t *garbage_profile_out; // TODO: replace w/ bool?

static inline void record_allocated_value(jl_value_t *val) {
if (__unlikely(garbage_profile_out != 0)) {
_record_allocated_value(val);
}
}

static inline void record_freed_value(jl_taggedvalue_t *tagged_val) {
if (__unlikely(garbage_profile_out != 0)) {
_record_freed_value(tagged_val);
}
}

#ifdef __cplusplus
}
#endif


#endif // JL_GC_GARBAGE_PROFILER_H
4 changes: 2 additions & 2 deletions src/gc-heap-snapshot.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ void print_str_escape_json(ios_t *stream, const std::string &s) {
ios_printf(stream, "\"");
}


// Edges
// "edge_fields":
// [ "type", "name_or_index", "to_node" ]
Expand Down Expand Up @@ -198,6 +197,8 @@ size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT {
string name = "<missing>";
string node_type = "object";

// TODO: dedup with _type_as_string
// need variant that returns size as well
if (a == (jl_value_t*)jl_malloc_tag) {
name = "<malloc>";
} else {
Expand Down Expand Up @@ -412,7 +413,6 @@ void _gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_
}

void _gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, char *name) JL_NOTSAFEPOINT {
//jl_printf(JL_STDERR, "module: %p binding:%p name:%s\n", from, to, name);
_record_gc_edge("object", "property", (jl_value_t *)from, to,
g_snapshot->names.find_or_create_string_id(name));
}
Expand Down
1 change: 0 additions & 1 deletion src/gc-heap-snapshot.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
extern "C" {
#endif


// ---------------------------------------------------------------------
// Functions to call from GC when heap snapshot is enabled
// ---------------------------------------------------------------------
Expand Down
6 changes: 6 additions & 0 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1323,6 +1323,7 @@ static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_t
while ((char*)v <= lim) {
int bits = v->bits.gc;
if (!gc_marked(bits)) {
record_freed_value(v);
*pfl = v;
pfl = &v->next;
pfl_begin = pfl_begin ? pfl_begin : pfl;
Expand Down Expand Up @@ -3055,6 +3056,8 @@ size_t jl_maxrss(void);
// Only one thread should be running in this function
static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
{
_report_gc_started();

combine_thread_gc_counts(&gc_num);

jl_gc_mark_cache_t *gc_cache = &ptls->gc_cache;
Expand Down Expand Up @@ -3243,6 +3246,9 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)

uint64_t gc_end_t = jl_hrtime();
uint64_t pause = gc_end_t - t0;

_report_gc_finished(pause, gc_num.freed, gc_num.allocd);

gc_final_pause_end(t0, gc_end_t);
gc_time_sweep_pause(gc_end_t, actual_allocd, live_bytes,
estimate_freed, sweep_full);
Expand Down
1 change: 1 addition & 0 deletions src/gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#endif
#include "julia_assert.h"
#include "gc-heap-snapshot.h"
#include "gc-garbage-profiler.h"

#ifdef __cplusplus
extern "C" {
Expand Down
4 changes: 4 additions & 0 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

#include "options.h"
#include "julia_locks.h"
#include "gc-garbage-profiler.h"
#include <uv.h>
#if !defined(_WIN32)
#include <unistd.h>
Expand Down Expand Up @@ -362,6 +363,9 @@ STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty)
v = jl_gc_big_alloc(ptls, allocsz);
}
jl_set_typeof(v, ty);

record_allocated_value(v);

return v;
}

Expand Down