-
-
Notifications
You must be signed in to change notification settings - Fork 5.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Page based heap size heuristics #50144
Merged
Merged
Changes from 7 commits
Commits
Show all changes
13 commits
Select commit
Hold shift + click to select a range
5f36833
Implement new GC heuristics.
gbaraldi 2f42cd5
Relax some of the atomics for sweeping
gbaraldi 01d6e39
Apply review suggestions.
gbaraldi b033c0a
Add batching for counted functions
gbaraldi 2ab7f95
Small fixup
gbaraldi 7af549e
Batch other big allocations as well
gbaraldi 4da775f
Readd testenv changes
gbaraldi b4bab03
Let the heap increase a bit if we are thrashing
gbaraldi 2cbba34
Whitespace
gbaraldi df3fcb0
Merge branch 'master' into new-heuristics
gbaraldi e6a18f3
Experiment with not setting a max size by default at all
gbaraldi 15b34a5
Add under pressure callback
gbaraldi 9f3ca7c
Merge branch 'master' into new-heuristics
gbaraldi File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,255 @@ | ||
diff --git a/src/gc.c b/src/gc.c | ||
index c85d1e5455..c82b2b645d 100644 | ||
--- a/src/gc.c | ||
+++ b/src/gc.c | ||
@@ -6,6 +6,8 @@ | ||
#include "julia_gcext.h" | ||
#include "julia_assert.h" | ||
#include <math.h> | ||
+#include <stddef.h> | ||
+#include <stdint.h> | ||
#include <sys/types.h> | ||
#ifdef __GLIBC__ | ||
#include <malloc.h> // for malloc_trim | ||
@@ -1004,8 +1006,14 @@ STATIC_INLINE jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz) | ||
jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz); | ||
jl_atomic_store_relaxed(&ptls->gc_num.bigalloc, | ||
jl_atomic_load_relaxed(&ptls->gc_num.bigalloc) + 1); | ||
- jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mallocd, allocsz); | ||
- jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, allocsz); | ||
+ uint64_t alloc_thresh = jl_atomic_load_relaxed(&ptls->gc_num.alloc_thresh); | ||
+ if (alloc_thresh + sz < 128*1024) | ||
+ jl_atomic_store_relaxed(&ptls->gc_num.alloc_thresh, alloc_thresh + sz); | ||
+ else { | ||
+ jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mallocd, alloc_thresh + sz); | ||
+ jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_thresh + sz); | ||
+ jl_atomic_store_relaxed(&ptls->gc_num.alloc_thresh, 0); | ||
+ } | ||
#ifdef MEMDEBUG | ||
memset(v, 0xee, allocsz); | ||
#endif | ||
@@ -1051,8 +1059,10 @@ static bigval_t **sweep_big_list(int sweep_full, bigval_t **pv) JL_NOTSAFEPOINT | ||
if (nxt) | ||
nxt->prev = pv; | ||
gc_num.freed += v->sz&~3; | ||
- jl_atomic_fetch_add_relaxed(&gc_heap_stats.malloc_bytes_freed, v->sz&~3); | ||
- jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -(v->sz&~3)); | ||
+ jl_atomic_store_relaxed(&gc_heap_stats.heap_size, | ||
+ jl_atomic_load_relaxed(&gc_heap_stats.heap_size) + (v->sz&~3)); | ||
+ jl_atomic_store_relaxed(&gc_heap_stats.heap_size, | ||
+ jl_atomic_load_relaxed(&gc_heap_stats.heap_size) + (v->sz&~3)); | ||
#ifdef MEMDEBUG | ||
memset(v, 0xbb, v->sz&~3); | ||
#endif | ||
@@ -1112,8 +1122,14 @@ void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT | ||
jl_ptls_t ptls = jl_current_task->ptls; | ||
jl_atomic_store_relaxed(&ptls->gc_num.allocd, | ||
jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz); | ||
- jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mallocd, sz); | ||
- jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, sz); | ||
+ uint64_t alloc_thresh = jl_atomic_load_relaxed(&ptls->gc_num.alloc_thresh); | ||
+ if (alloc_thresh + sz < 128*1024) | ||
+ jl_atomic_store_relaxed(&ptls->gc_num.alloc_thresh, alloc_thresh + sz); | ||
+ else { | ||
+ jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mallocd, alloc_thresh + sz); | ||
+ jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_thresh + sz); | ||
+ jl_atomic_store_relaxed(&ptls->gc_num.alloc_thresh, 0); | ||
+ } | ||
} | ||
|
||
static void combine_thread_gc_counts(jl_gc_num_t *dest) JL_NOTSAFEPOINT | ||
@@ -1126,12 +1142,15 @@ static void combine_thread_gc_counts(jl_gc_num_t *dest) JL_NOTSAFEPOINT | ||
jl_ptls_t ptls = gc_all_tls_states[i]; | ||
if (ptls) { | ||
dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_num.allocd) + gc_num.interval); | ||
- dest->freed += jl_atomic_load_relaxed(&ptls->gc_num.freed); | ||
dest->malloc += jl_atomic_load_relaxed(&ptls->gc_num.malloc); | ||
dest->realloc += jl_atomic_load_relaxed(&ptls->gc_num.realloc); | ||
dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_num.poolalloc); | ||
dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_num.bigalloc); | ||
- dest->freecall += jl_atomic_load_relaxed(&ptls->gc_num.freecall); | ||
+ uint64_t alloc_thresh = jl_atomic_load_relaxed(&ptls->gc_num.alloc_thresh); | ||
+ uint64_t free_thresh = jl_atomic_load_relaxed(&ptls->gc_num.free_thresh); | ||
+ jl_atomic_store_relaxed(&gc_heap_stats.bytes_mallocd, alloc_thresh + jl_atomic_load_relaxed(&gc_heap_stats.bytes_mallocd)); | ||
+ jl_atomic_store_relaxed(&gc_heap_stats.malloc_bytes_freed, free_thresh + jl_atomic_load_relaxed(&gc_heap_stats.malloc_bytes_freed)); | ||
+ jl_atomic_store_relaxed(&gc_heap_stats.heap_size, alloc_thresh - free_thresh + jl_atomic_load_relaxed(&gc_heap_stats.heap_size)); | ||
} | ||
} | ||
} | ||
@@ -1188,8 +1207,10 @@ static void jl_gc_free_array(jl_array_t *a) JL_NOTSAFEPOINT | ||
jl_free_aligned(d); | ||
else | ||
free(d); | ||
- jl_atomic_fetch_add_relaxed(&gc_heap_stats.malloc_bytes_freed, jl_array_nbytes(a)); | ||
- jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -jl_array_nbytes(a)); | ||
+ jl_atomic_store_relaxed(&gc_heap_stats.malloc_bytes_freed, | ||
+ jl_atomic_load_relaxed(&gc_heap_stats.malloc_bytes_freed) + jl_array_nbytes(a)); | ||
+ jl_atomic_store_relaxed(&gc_heap_stats.heap_size, | ||
+ jl_atomic_load_relaxed(&gc_heap_stats.heap_size) - jl_array_nbytes(a)); | ||
gc_num.freed += jl_array_nbytes(a); | ||
gc_num.freecall++; | ||
} | ||
@@ -3589,8 +3610,14 @@ JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz) | ||
jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz); | ||
jl_atomic_store_relaxed(&ptls->gc_num.malloc, | ||
jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); | ||
- jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mallocd, sz); | ||
- jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, sz); | ||
+ uint64_t alloc_thresh = jl_atomic_load_relaxed(&ptls->gc_num.alloc_thresh); | ||
+ if (alloc_thresh + sz < 128*1024) | ||
+ jl_atomic_store_relaxed(&ptls->gc_num.alloc_thresh, alloc_thresh + sz); | ||
+ else { | ||
+ jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mallocd, alloc_thresh + sz); | ||
+ jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_thresh + sz); | ||
+ jl_atomic_store_relaxed(&ptls->gc_num.alloc_thresh, 0); | ||
+ } | ||
} | ||
return malloc(sz); | ||
} | ||
@@ -3606,8 +3633,14 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz) | ||
jl_atomic_load_relaxed(&ptls->gc_num.allocd) + nm*sz); | ||
jl_atomic_store_relaxed(&ptls->gc_num.malloc, | ||
jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); | ||
- jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mallocd, nm*sz); | ||
- jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, nm*sz); | ||
+ uint64_t alloc_thresh = jl_atomic_load_relaxed(&ptls->gc_num.alloc_thresh); | ||
+ if (alloc_thresh + sz*nm < 128*1024) | ||
+ jl_atomic_store_relaxed(&ptls->gc_num.alloc_thresh, alloc_thresh + sz*nm); | ||
+ else { | ||
+ jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mallocd, alloc_thresh + sz*nm); | ||
+ jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_thresh + sz*nm); | ||
+ jl_atomic_store_relaxed(&ptls->gc_num.alloc_thresh, 0); | ||
+ } | ||
} | ||
return calloc(nm, sz); | ||
} | ||
@@ -3619,12 +3652,15 @@ JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz) | ||
free(p); | ||
if (pgcstack != NULL && ct->world_age) { | ||
jl_ptls_t ptls = ct->ptls; | ||
- jl_atomic_store_relaxed(&ptls->gc_num.freed, | ||
- jl_atomic_load_relaxed(&ptls->gc_num.freed) + sz); | ||
- jl_atomic_store_relaxed(&ptls->gc_num.freecall, | ||
- jl_atomic_load_relaxed(&ptls->gc_num.freecall) + 1); | ||
- jl_atomic_fetch_add_relaxed(&gc_heap_stats.malloc_bytes_freed, sz); | ||
- jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -sz); | ||
+ uint64_t free_thresh = jl_atomic_load_relaxed(&ptls->gc_num.free_thresh); | ||
+ if (free_thresh + sz < 128*1024) { | ||
+ jl_atomic_store_relaxed(&ptls->gc_num.free_thresh, free_thresh + sz); | ||
+ } | ||
+ else { | ||
+ jl_atomic_fetch_add_relaxed(&gc_heap_stats.malloc_bytes_freed, free_thresh + sz); | ||
+ jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -(free_thresh + sz)); | ||
+ jl_atomic_store_relaxed(&ptls->gc_num.free_thresh, 0); | ||
+ } | ||
} | ||
} | ||
|
||
@@ -3635,17 +3671,28 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size | ||
if (pgcstack != NULL && ct->world_age) { | ||
jl_ptls_t ptls = ct->ptls; | ||
maybe_collect(ptls); | ||
- if (sz < old) | ||
- jl_atomic_store_relaxed(&ptls->gc_num.freed, | ||
- jl_atomic_load_relaxed(&ptls->gc_num.freed) + (old - sz)); | ||
- else | ||
+ if (!(sz < old)) | ||
jl_atomic_store_relaxed(&ptls->gc_num.allocd, | ||
jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (sz - old)); | ||
jl_atomic_store_relaxed(&ptls->gc_num.realloc, | ||
jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1); | ||
- jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mallocd, sz); | ||
- jl_atomic_fetch_add_relaxed(&gc_heap_stats.malloc_bytes_freed, old); | ||
- jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, sz-old); | ||
+ | ||
+ uint64_t free_thresh = jl_atomic_load_relaxed(&ptls->gc_num.free_thresh); | ||
+ if (free_thresh + old < 128*1024) | ||
+ jl_atomic_store_relaxed(&ptls->gc_num.free_thresh, free_thresh + old); | ||
+ else { | ||
+ jl_atomic_fetch_add_relaxed(&gc_heap_stats.malloc_bytes_freed, free_thresh + old); | ||
+ jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -(free_thresh + old)); | ||
+ jl_atomic_store_relaxed(&ptls->gc_num.free_thresh, 0); | ||
+ } | ||
+ uint64_t alloc_thresh = jl_atomic_load_relaxed(&ptls->gc_num.alloc_thresh); | ||
+ if (alloc_thresh + sz < 128*1024) | ||
+ jl_atomic_store_relaxed(&ptls->gc_num.alloc_thresh, alloc_thresh + sz); | ||
+ else { | ||
+ jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mallocd, alloc_thresh + sz); | ||
+ jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_thresh + sz); | ||
+ jl_atomic_store_relaxed(&ptls->gc_num.alloc_thresh, 0); | ||
+ } | ||
} | ||
return realloc(p, sz); | ||
} | ||
@@ -3720,8 +3767,14 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) | ||
jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz); | ||
jl_atomic_store_relaxed(&ptls->gc_num.malloc, | ||
jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); | ||
- jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mallocd, allocsz); | ||
- jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, allocsz); | ||
+ uint64_t alloc_thresh = jl_atomic_load_relaxed(&ptls->gc_num.alloc_thresh); | ||
+ if (alloc_thresh + sz < 128*1024) | ||
+ jl_atomic_store_relaxed(&ptls->gc_num.alloc_thresh, alloc_thresh + sz); | ||
+ else { | ||
+ jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mallocd, alloc_thresh + sz); | ||
+ jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_thresh + sz); | ||
+ jl_atomic_store_relaxed(&ptls->gc_num.alloc_thresh, 0); | ||
+ } | ||
int last_errno = errno; | ||
#ifdef _OS_WINDOWS_ | ||
DWORD last_error = GetLastError(); | ||
@@ -3752,17 +3805,28 @@ static void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t olds | ||
ptls->gc_cache.perm_scanned_bytes += allocsz - oldsz; | ||
inc_live_bytes(allocsz - oldsz); | ||
} | ||
- else if (allocsz < oldsz) | ||
- jl_atomic_store_relaxed(&ptls->gc_num.freed, | ||
- jl_atomic_load_relaxed(&ptls->gc_num.freed) + (oldsz - allocsz)); | ||
- else | ||
+ else if (!(allocsz < oldsz)) | ||
jl_atomic_store_relaxed(&ptls->gc_num.allocd, | ||
jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (allocsz - oldsz)); | ||
jl_atomic_store_relaxed(&ptls->gc_num.realloc, | ||
jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1); | ||
- jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mallocd, allocsz); | ||
- jl_atomic_fetch_add_relaxed(&gc_heap_stats.malloc_bytes_freed, oldsz); | ||
- jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, allocsz-oldsz); | ||
+ | ||
+ uint64_t free_thresh = jl_atomic_load_relaxed(&ptls->gc_num.free_thresh); | ||
+ if (free_thresh + oldsz < 128*1024) | ||
+ jl_atomic_store_relaxed(&ptls->gc_num.free_thresh, free_thresh + oldsz); | ||
+ else { | ||
+ jl_atomic_fetch_add_relaxed(&gc_heap_stats.malloc_bytes_freed, free_thresh + oldsz); | ||
+ jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -(free_thresh + oldsz)); | ||
+ jl_atomic_store_relaxed(&ptls->gc_num.free_thresh, 0); | ||
+ } | ||
+ uint64_t alloc_thresh = jl_atomic_load_relaxed(&ptls->gc_num.alloc_thresh); | ||
+ if (alloc_thresh + allocsz < 128*1024) | ||
+ jl_atomic_store_relaxed(&ptls->gc_num.alloc_thresh, alloc_thresh + allocsz); | ||
+ else { | ||
+ jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mallocd, alloc_thresh + allocsz); | ||
+ jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_thresh + allocsz); | ||
+ jl_atomic_store_relaxed(&ptls->gc_num.alloc_thresh, 0); | ||
+ } | ||
int last_errno = errno; | ||
#ifdef _OS_WINDOWS_ | ||
DWORD last_error = GetLastError(); | ||
diff --git a/src/julia_threads.h b/src/julia_threads.h | ||
index f4c235243e..a672a92fb9 100644 | ||
--- a/src/julia_threads.h | ||
+++ b/src/julia_threads.h | ||
@@ -130,12 +130,12 @@ typedef struct { | ||
|
||
typedef struct { | ||
_Atomic(int64_t) allocd; | ||
- _Atomic(int64_t) freed; | ||
_Atomic(uint64_t) malloc; | ||
_Atomic(uint64_t) realloc; | ||
_Atomic(uint64_t) poolalloc; | ||
_Atomic(uint64_t) bigalloc; | ||
- _Atomic(uint64_t) freecall; | ||
+ _Atomic(int64_t) free_thresh; // fiels used to batch fetch add operations for the GC | ||
+ _Atomic(uint64_t) alloc_thresh; | ||
} jl_thread_gc_num_t; | ||
|
||
typedef struct { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Remove?