Skip to content

Commit

Permalink
Merge pull request #13099 from JuliaLang/jn/no_copy_stacks
Browse files Browse the repository at this point in the history
no-copy stacks
  • Loading branch information
vchuravy authored Oct 2, 2018
2 parents a2df80e + 8e04328 commit d769ad2
Show file tree
Hide file tree
Showing 26 changed files with 1,275 additions and 680 deletions.
4 changes: 3 additions & 1 deletion base/boot.jl
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,9 @@ eval(Core, :(LineInfoNode(mod::Module, method::Symbol, file::Symbol, line::Int,

Module(name::Symbol=:anonymous, std_imports::Bool=true) = ccall(:jl_f_new_module, Ref{Module}, (Any, Bool), name, std_imports)

Task(@nospecialize(f)) = ccall(:jl_new_task, Ref{Task}, (Any, Int), f, 0)
function Task(@nospecialize(f), reserved_stack::Int=0)
return ccall(:jl_new_task, Ref{Task}, (Any, Int), f, reserved_stack)
end

# simple convert for use by constructors of types in Core
# note that there is no actual conversion defined here,
Expand Down
8 changes: 4 additions & 4 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ SRCS := \
jltypes gf typemap ast builtins module interpreter symbol \
dlload sys init task array dump staticdata toplevel jl_uv datatype \
simplevector APInt-C runtime_intrinsics runtime_ccall precompile \
threadgroup threading stackwalk gc gc-debug gc-pages method \
threadgroup threading stackwalk gc gc-debug gc-pages gc-stacks method \
jlapi signal-handling safepoint jloptions timing subtype rtutils \
crc32c processor

Expand Down Expand Up @@ -203,14 +203,14 @@ $(addprefix $(BUILDDIR)/,threading.o threading.dbg.obj gc.o gc.dbg.obj init.c in
$(addprefix $(BUILDDIR)/,APInt-C.o APInt-C.dbg.obj runtime_intrinsics.o runtime_intrinsics.dbg.obj): $(SRCDIR)/APInt-C.h

# archive library file rules
$(BUILDDIR)/support/libsupport.a: $(SRCDIR)/support/*.h $(SRCDIR)/support/*.c
$(BUILDDIR)/support/libsupport.a: $(addprefix $(SRCDIR)/support/,*.h *.c *.S) $(SRCDIR)/support/*.c
$(MAKE) -C $(SRCDIR)/support BUILDDIR='$(abspath $(BUILDDIR)/support)'

$(BUILDDIR)/support/libsupport-debug.a: $(SRCDIR)/support/*.h $(SRCDIR)/support/*.c
$(BUILDDIR)/support/libsupport-debug.a: $(addprefix $(SRCDIR)/support/,*.h *.c *.S) $(SRCDIR)/support/*.c
$(MAKE) -C $(SRCDIR)/support debug BUILDDIR='$(abspath $(BUILDDIR)/support)'

$(FLISP_EXECUTABLE_release): $(BUILDDIR)/flisp/libflisp.a
$(BUILDDIR)/flisp/libflisp.a: $(addprefix $(SRCDIR)/,flisp/*.h flisp/*.c) $(BUILDDIR)/support/libsupport.a
$(BUILDDIR)/flisp/libflisp.a: $(addprefix $(SRCDIR)/flisp/,*.h *.c) $(BUILDDIR)/support/libsupport.a
$(MAKE) -C $(SRCDIR)/flisp BUILDDIR='$(abspath $(BUILDDIR)/flisp)'

$(FLISP_EXECUTABLE_debug): $(BUILDDIR)/flisp/libflisp-debug.a
Expand Down
10 changes: 5 additions & 5 deletions src/gc-debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -595,11 +595,11 @@ static void gc_scrub_task(jl_task_t *ta)
#else
jl_task_t *thread_task = ptls2->root_task;
#endif
if (ta == thread_task)
gc_scrub_range(ptls2->stack_lo, ptls2->stack_hi);
if (ta->stkbuf == (void*)(intptr_t)(-1) || !ta->stkbuf)
return;
gc_scrub_range((char*)ta->stkbuf, (char*)ta->stkbuf + ta->ssize);
void *stkbuf = ta->stkbuf;
if (ta == thread_task && ptls->copy_stack)
gc_scrub_range(ptls2->stackbase, ptls2->stacksize);
else if (stkbuf)
gc_scrub_range((char*)stkbuf, (char*)stkbuf + ta->bufsz);
}

void gc_scrub(void)
Expand Down
1 change: 0 additions & 1 deletion src/gc-pages.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
#ifndef _OS_WINDOWS_
# include <sys/resource.h>
#endif
#include "julia_assert.h"

#ifdef __cplusplus
extern "C" {
Expand Down
191 changes: 191 additions & 0 deletions src/gc-stacks.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
// This file is a part of Julia. License is MIT: https://julialang.org/license

#include "gc.h"
#ifndef _OS_WINDOWS_
# include <sys/resource.h>
#endif

const size_t jl_guard_size = (4096 * 16);

#ifdef _OS_WINDOWS_
#define MAP_FAILED NULL
static void *malloc_stack(size_t bufsz)
{
void *stk = VirtualAlloc(NULL, bufsz, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
if (stk == NULL)
return MAP_FAILED;
DWORD dwOldProtect;
if (!VirtualProtect(stk, jl_guard_size, PAGE_READWRITE | PAGE_GUARD, &dwOldProtect)) {
VirtualFree(stk, 0, MEM_RELEASE);
return MAP_FAILED;
}
return stk;
}


static void free_stack(void *stkbuf, size_t bufsz)
{
VirtualFree(stkbuf, 0, MEM_RELEASE);
}

#else

static void *malloc_stack(size_t bufsz)
{
void* stk = mmap(0, bufsz, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (stk == MAP_FAILED)
return MAP_FAILED;
#if !defined(JL_HAVE_UCONTEXT) && !defined(JL_HAVE_SIGALTSTACK)
// setup a guard page to detect stack overflow
if (mprotect(stk, jl_guard_size, PROT_NONE) == -1) {
munmap(stk, bufsz);
return MAP_FAILED;
}
#endif
return stk;
}

static void free_stack(void *stkbuf, size_t bufsz)
{
munmap(stkbuf, bufsz);
}
#endif


const unsigned pool_sizes[] = {
128 * 1024,
192 * 1024,
256 * 1024,
384 * 1024,
512 * 1024,
768 * 1024,
1024 * 1024,
1537 * 1024,
2048 * 1024,
3 * 1024 * 1024,
4 * 1024 * 1024,
6 * 1024 * 1024,
8 * 1024 * 1024,
12 * 1024 * 1024,
16 * 1024 * 1024,
24 * 1024 * 1024,
};

static_assert(sizeof(pool_sizes) == JL_N_STACK_POOLS * sizeof(pool_sizes[0]), "JL_N_STACK_POOLS size mismatch");

static unsigned select_pool(size_t nb)
{
unsigned pool_id = 0;
while (pool_sizes[pool_id] < nb)
pool_id++;
return pool_id;
}


static void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz)
{
if (bufsz <= pool_sizes[JL_N_STACK_POOLS - 1]) {
unsigned pool_id = select_pool(bufsz);
if (pool_sizes[pool_id] == bufsz) {
arraylist_push(&ptls->heap.free_stacks[pool_id], stkbuf);
return;
}
}
free_stack(stkbuf, bufsz);
}


JL_DLLEXPORT void jl_free_stack(void *stkbuf, size_t bufsz)
{
_jl_free_stack(jl_get_ptls_states(), stkbuf, bufsz);
}


void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task)
{
void *stkbuf = task->stkbuf;
size_t bufsz = task->bufsz;
if (bufsz <= pool_sizes[JL_N_STACK_POOLS - 1]) {
unsigned pool_id = select_pool(bufsz);
if (pool_sizes[pool_id] == bufsz) {
task->stkbuf = NULL;
arraylist_push(&ptls->heap.free_stacks[pool_id], stkbuf);
}
}
}


JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner)
{
jl_ptls_t ptls = jl_get_ptls_states();
size_t ssize = *bufsz;
void *stk = NULL;
if (ssize <= pool_sizes[JL_N_STACK_POOLS - 1]) {
unsigned pool_id = select_pool(ssize);
ssize = pool_sizes[pool_id];
arraylist_t *pool = &ptls->heap.free_stacks[pool_id];
if (pool->len > 0) {
stk = arraylist_pop(pool);
}
}
else {
ssize = LLT_ALIGN(ssize, jl_page_size);
}
if (stk == NULL) {
// TODO: allocate blocks of stacks? but need to mprotect individually anyways
stk = malloc_stack(ssize);
if (stk == MAP_FAILED)
jl_throw(jl_memory_exception);
}
*bufsz = ssize;
if (owner) {
arraylist_t *live_tasks = &ptls->heap.live_tasks;
arraylist_push(live_tasks, owner);
}
return stk;
}

void sweep_stack_pools(void)
{
// TODO: deallocate stacks if we have too many sitting around unused
// for (stk in halfof(free_stacks))
// free_stack(stk, pool_sz);
// // then sweep the task stacks
// for (t in live_tasks)
// if (!gc-marked(t))
// stkbuf = t->stkbuf
// bufsz = t->bufsz
// if (stkbuf)
// push(free_stacks[sz], stkbuf)
for (int i = 0; i < jl_n_threads; i++) {
jl_ptls_t ptls2 = jl_all_tls_states[i];
arraylist_t *live_tasks = &ptls2->heap.live_tasks;
size_t n = 0;
size_t ndel = 0;
size_t l = live_tasks->len;
void **lst = live_tasks->items;
if (l == 0)
continue;
while (1) {
jl_task_t *t = (jl_task_t*)lst[n];
if (gc_marked(jl_astaggedvalue(t)->bits.gc)) {
n++;
}
else {
ndel++;
void *stkbuf = t->stkbuf;
size_t bufsz = t->bufsz;
if (stkbuf) {
t->stkbuf = NULL;
_jl_free_stack(ptls2, stkbuf, bufsz);
}
}
if (n >= l - ndel)
break;
void *tmp = lst[n];
lst[n] = lst[n + ndel];
lst[n + ndel] = tmp;
}
live_tasks->len -= ndel;
}
}
35 changes: 15 additions & 20 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -699,7 +699,7 @@ JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls,

static void sweep_weak_refs(void)
{
for (int i = 0;i < jl_n_threads;i++) {
for (int i = 0; i < jl_n_threads; i++) {
jl_ptls_t ptls2 = jl_all_tls_states[i];
size_t n = 0;
size_t ndel = 0;
Expand All @@ -710,7 +710,8 @@ static void sweep_weak_refs(void)
while (1) {
jl_weakref_t *wr = (jl_weakref_t*)lst[n];
if (gc_marked(jl_astaggedvalue(wr)->bits.gc)) {
// weakref itself is alive
// weakref itself is alive,
// so the user could still re-set it to a new value
if (!gc_marked(jl_astaggedvalue(wr->value)->bits.gc))
wr->value = (jl_value_t*)jl_nothing;
n++;
Expand All @@ -722,7 +723,7 @@ static void sweep_weak_refs(void)
break;
void *tmp = lst[n];
lst[n] = lst[n + ndel];
lst[n+ndel] = tmp;
lst[n + ndel] = tmp;
}
ptls2->heap.weak_refs.len -= ndel;
}
Expand Down Expand Up @@ -1026,7 +1027,7 @@ static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_t
int freedall = 1;
int pg_skpd = 1;
if (!pg->has_marked) {
// lazy version: (empty) if the whole page was already unused, free it
// lazy version: (empty) if the whole page was already unused, free it (return it to the pool)
// eager version: (freedall) free page as soon as possible
// the eager one uses less memory.
// FIXME - need to do accounting on a per-thread basis
Expand Down Expand Up @@ -2124,19 +2125,13 @@ mark: {
objprofile_count(vt, bits == GC_OLD_MARKED, sizeof(jl_task_t));
jl_task_t *ta = (jl_task_t*)new_obj;
gc_scrub_record_task(ta);
int stkbuf = (ta->stkbuf != (void*)(intptr_t)-1 && ta->stkbuf != NULL);
void *stkbuf = ta->stkbuf;
int16_t tid = ta->tid;
jl_ptls_t ptls2 = jl_all_tls_states[tid];
if (stkbuf) {
#ifdef COPY_STACKS
gc_setmark_buf_(ptls, ta->stkbuf, bits, ta->bufsz);
#else
// stkbuf isn't owned by julia for the root task
if (ta != ptls2->root_task) {
gc_setmark_buf_(ptls, ta->stkbuf, bits, ta->ssize);
}
if (stkbuf && ta->copy_stack)
gc_setmark_buf_(ptls, stkbuf, bits, ta->bufsz);
#endif
}
jl_gcframe_t *s = NULL;
size_t nroots;
uintptr_t offset = 0;
Expand All @@ -2148,9 +2143,11 @@ mark: {
else if (stkbuf) {
s = ta->gcstack;
#ifdef COPY_STACKS
ub = (uintptr_t)ptls2->stackbase;
lb = ub - ta->ssize;
offset = (uintptr_t)ta->stkbuf - lb;
if (ta->copy_stack) {
ub = (uintptr_t)ptls2->stackbase;
lb = ub - ta->copy_stack;
offset = (uintptr_t)stkbuf - lb;
}
#endif
}
if (s) {
Expand Down Expand Up @@ -2278,10 +2275,6 @@ static void mark_roots(jl_gc_mark_cache_t *gc_cache, gc_mark_sp_t *sp)
if (jl_all_methods != NULL)
gc_mark_queue_obj(gc_cache, sp, jl_all_methods);

#ifndef COPY_STACKS
gc_mark_queue_obj(gc_cache, sp, jl_unprotect_stack_func);
#endif

// constants
gc_mark_queue_obj(gc_cache, sp, jl_typetype_type);
gc_mark_queue_obj(gc_cache, sp, jl_emptytuple_type);
Expand Down Expand Up @@ -2564,6 +2557,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, int full)
scanned_bytes = 0;
// 5. start sweeping
sweep_weak_refs();
sweep_stack_pools();
gc_sweep_other(ptls, sweep_full);
gc_scrub();
gc_verify_tags();
Expand Down Expand Up @@ -2687,6 +2681,7 @@ void jl_init_thread_heap(jl_ptls_t ptls)
p[i].newpages = NULL;
}
arraylist_new(&heap->weak_refs, 0);
arraylist_new(&heap->live_tasks, 0);
heap->mallocarrays = NULL;
heap->mafreelist = NULL;
heap->big_objects = NULL;
Expand Down
1 change: 1 addition & 0 deletions src/gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,7 @@ void gc_mark_queue_all_roots(jl_ptls_t ptls, gc_mark_sp_t *sp);
void gc_mark_queue_finlist(jl_gc_mark_cache_t *gc_cache, gc_mark_sp_t *sp,
arraylist_t *list, size_t start);
void gc_mark_loop(jl_ptls_t ptls, gc_mark_sp_t sp);
void sweep_stack_pools(void);
void gc_debug_init(void);

extern void *gc_mark_label_addrs[_GC_MARK_L_MAX];
Expand Down
Loading

0 comments on commit d769ad2

Please sign in to comment.