-
Notifications
You must be signed in to change notification settings - Fork 4.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Mark phase prefetching. #73375
Mark phase prefetching. #73375
Changes from 2 commits
a9d0a86
38b4775
9418dec
37f4808
63390f8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2788,6 +2788,8 @@ size_t gc_heap::expand_mechanisms_per_heap[max_expand_mechanisms_count]; | |
|
||
size_t gc_heap::interesting_mechanism_bits_per_heap[max_gc_mechanism_bits_count]; | ||
|
||
mark_queue_t gc_heap::mark_queue; | ||
|
||
#endif // MULTIPLE_HEAPS | ||
|
||
/* end of per heap static initialization */ | ||
|
@@ -23203,24 +23205,19 @@ inline | |
BOOL gc_heap::gc_mark (uint8_t* o, uint8_t* low, uint8_t* high, int condemned_gen) | ||
{ | ||
#ifdef USE_REGIONS | ||
assert (low == 0); | ||
assert (high == 0); | ||
if (is_in_heap_range (o)) | ||
if ((o >= low) && (o < high)) | ||
{ | ||
BOOL already_marked = marked (o); | ||
if (already_marked) | ||
return FALSE; | ||
if (condemned_gen == max_generation) | ||
if (condemned_gen != max_generation && get_region_gen_num (o) > condemned_gen) | ||
{ | ||
set_marked (o); | ||
return TRUE; | ||
return FALSE; | ||
} | ||
int gen = get_region_gen_num (o); | ||
if (gen <= condemned_gen) | ||
BOOL already_marked = marked (o); | ||
if (already_marked) | ||
{ | ||
set_marked (o); | ||
return TRUE; | ||
return FALSE; | ||
} | ||
set_marked (o); | ||
return TRUE; | ||
} | ||
return FALSE; | ||
#else //USE_REGIONS | ||
|
@@ -23543,14 +23540,21 @@ void gc_heap::save_post_plug_info (uint8_t* last_pinned_plug, uint8_t* last_obje | |
} | ||
} | ||
|
||
//#define PREFETCH | ||
#define PREFETCH | ||
#ifdef PREFETCH | ||
__declspec(naked) void __fastcall Prefetch(void* addr) | ||
inline void Prefetch(void* addr) | ||
{ | ||
__asm { | ||
PREFETCHT0 [ECX] | ||
ret | ||
}; | ||
#ifdef TARGET_AMD64 | ||
|
||
#ifndef _MM_HINT_T0 | ||
#define _MM_HINT_T0 1 | ||
#endif | ||
_mm_prefetch((const char*)addr, _MM_HINT_T0); | ||
#elif defined(TARGET_ARM64) && defined(TARGET_WINDOWS) | ||
__prefetch((const char*)addr); | ||
#else | ||
UNREFERENCED_PARAMETER(addr); | ||
#endif | ||
} | ||
#else //PREFETCH | ||
inline void Prefetch (void* addr) | ||
|
@@ -23606,6 +23610,114 @@ BOOL ref_p (uint8_t* r) | |
return (straight_ref_p (r) || partial_object_p (r)); | ||
} | ||
|
||
mark_queue_t::mark_queue_t() : curr_slot_index(0) | ||
{ | ||
for (size_t i = 0; i < slot_count; i++) | ||
{ | ||
slot_table[i] = nullptr; | ||
} | ||
} | ||
|
||
// place an object in the mark queue | ||
// returns a *different* object or nullptr | ||
// if a non-null object is returned, that object is newly marked | ||
// object o *must* be in a condemned generation | ||
FORCEINLINE | ||
uint8_t *mark_queue_t::queue_mark(uint8_t *o) | ||
{ | ||
Prefetch (o); | ||
|
||
// while the prefetch is taking effect, park our object in the queue | ||
// and fetch an object that has been sitting in the queue for a while | ||
// and where (hopefully) the memory is already in the cache | ||
size_t slot_index = curr_slot_index; | ||
uint8_t* old_o = slot_table[slot_index]; | ||
slot_table[slot_index] = o; | ||
|
||
curr_slot_index = (slot_index + 1) % slot_count; | ||
if (old_o == nullptr) | ||
return nullptr; | ||
|
||
// this causes us to access the method table pointer of the old object | ||
BOOL already_marked = marked (old_o); | ||
if (already_marked) | ||
{ | ||
return nullptr; | ||
} | ||
set_marked (old_o); | ||
return old_o; | ||
} | ||
|
||
// place an object in the mark queue | ||
// returns a *different* object or nullptr | ||
// if a non-null object is returned, that object is newly marked | ||
// check first whether the object o is indeed in a condemned generation | ||
FORCEINLINE | ||
uint8_t *mark_queue_t::queue_mark(uint8_t *o, int condemned_gen) | ||
{ | ||
#ifdef USE_REGIONS | ||
if (!is_in_heap_range (o)) | ||
{ | ||
return nullptr; | ||
} | ||
if (condemned_gen != max_generation && gc_heap::get_region_gen_num (o) > condemned_gen) | ||
{ | ||
return nullptr; | ||
} | ||
return queue_mark(o); | ||
#else //USE_REGIONS | ||
assert (condemned_gen == -1); | ||
|
||
#ifdef MULTIPLE_HEAPS | ||
if (o) | ||
{ | ||
gc_heap* hp = gc_heap::heap_of_gc (o); | ||
assert (hp); | ||
if ((o >= hp->gc_low) && (o < hp->gc_high)) | ||
return queue_mark (o); | ||
} | ||
#else //MULTIPLE_HEAPS | ||
if ((o >= gc_heap::gc_low) && (o < gc_heap::gc_high)) | ||
return queue_mark (o); | ||
#endif //MULTIPLE_HEAPS | ||
return nullptr; | ||
#endif //USE_REGIONS | ||
} | ||
|
||
// retrieve a newly marked object from the queue | ||
// returns nullptr if there is no such object | ||
uint8_t* mark_queue_t::drain() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: Should this be renamed to something that better implies that it just marks one object and returns it (maybe There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think of this method as draining but just needs to return if there's still objects to mark. it does drain the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. then I would probably do There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, the object returned is already marked, so |
||
{ | ||
size_t slot_index = curr_slot_index; | ||
size_t empty_slot_count = 0; | ||
while (empty_slot_count < slot_count) | ||
{ | ||
uint8_t* o = slot_table[slot_index]; | ||
slot_table[slot_index] = nullptr; | ||
slot_index = (slot_index + 1) % slot_count; | ||
if (o != nullptr) | ||
{ | ||
BOOL already_marked = marked (o); | ||
if (!already_marked) | ||
{ | ||
set_marked (o); | ||
curr_slot_index = slot_index; | ||
return o; | ||
} | ||
} | ||
empty_slot_count++; | ||
} | ||
return nullptr; | ||
} | ||
|
||
mark_queue_t::~mark_queue_t() | ||
{ | ||
for (size_t slot_index = 0; slot_index < slot_count; slot_index++) | ||
{ | ||
assert(slot_table[slot_index] == nullptr); | ||
} | ||
} | ||
|
||
void gc_heap::mark_object_simple1 (uint8_t* oo, uint8_t* start THREAD_NUMBER_DCL) | ||
{ | ||
SERVER_SC_MARK_VOLATILE(uint8_t*)* mark_stack_tos = (SERVER_SC_MARK_VOLATILE(uint8_t*)*)mark_stack_array; | ||
|
@@ -23665,9 +23777,8 @@ void gc_heap::mark_object_simple1 (uint8_t* oo, uint8_t* start THREAD_NUMBER_DCL | |
|
||
go_through_object_cl (method_table(oo), oo, s, ppslot, | ||
{ | ||
uint8_t* o = *ppslot; | ||
Prefetch(o); | ||
if (gc_mark (o, gc_low, gc_high, condemned_gen)) | ||
uint8_t* o = mark_queue.queue_mark(*ppslot, condemned_gen); | ||
if (o != nullptr) | ||
{ | ||
if (full_p) | ||
{ | ||
|
@@ -23763,9 +23874,8 @@ void gc_heap::mark_object_simple1 (uint8_t* oo, uint8_t* start THREAD_NUMBER_DCL | |
go_through_object (method_table(oo), oo, s, ppslot, | ||
start, use_start, (oo + s), | ||
{ | ||
uint8_t* o = *ppslot; | ||
Prefetch(o); | ||
if (gc_mark (o, gc_low, gc_high,condemned_gen)) | ||
uint8_t* o = mark_queue.queue_mark(*ppslot, condemned_gen); | ||
if (o != nullptr) | ||
{ | ||
if (full_p) | ||
{ | ||
|
@@ -24204,16 +24314,17 @@ gc_heap::mark_object_simple (uint8_t** po THREAD_NUMBER_DCL) | |
snoop_stat.objects_checked_count++; | ||
#endif //SNOOP_STATS | ||
|
||
if (gc_mark1 (o)) | ||
o = mark_queue.queue_mark (o); | ||
if (o != nullptr) | ||
{ | ||
m_boundary (o); | ||
size_t s = size (o); | ||
add_to_promoted_bytes (o, s, thread); | ||
{ | ||
go_through_object_cl (method_table(o), o, s, poo, | ||
{ | ||
uint8_t* oo = *poo; | ||
if (gc_mark (oo, gc_low, gc_high, condemned_gen)) | ||
uint8_t* oo = mark_queue.queue_mark(*poo, condemned_gen); | ||
if (oo != nullptr) | ||
{ | ||
m_boundary (oo); | ||
add_to_promoted_bytes (oo, thread); | ||
|
@@ -24250,6 +24361,45 @@ void gc_heap::mark_object (uint8_t* o THREAD_NUMBER_DCL) | |
#endif //USE_REGIONS | ||
} | ||
|
||
void gc_heap::drain_mark_queue () | ||
{ | ||
int condemned_gen = | ||
#ifdef USE_REGIONS | ||
settings.condemned_generation; | ||
#else | ||
-1; | ||
#endif //USE_REGIONS | ||
|
||
#ifdef MULTIPLE_HEAPS | ||
THREAD_FROM_HEAP; | ||
#else | ||
const int thread = 0; | ||
#endif //MULTIPLE_HEAPS | ||
|
||
uint8_t* o; | ||
while ((o = mark_queue.drain()) != nullptr) | ||
{ | ||
m_boundary (o); | ||
size_t s = size (o); | ||
add_to_promoted_bytes (o, s, thread); | ||
if (contain_pointers_or_collectible (o)) | ||
{ | ||
go_through_object_cl (method_table(o), o, s, poo, | ||
{ | ||
uint8_t* oo = mark_queue.queue_mark(*poo, condemned_gen); | ||
if (oo != nullptr) | ||
{ | ||
m_boundary (oo); | ||
add_to_promoted_bytes (oo, thread); | ||
if (contain_pointers_or_collectible (oo)) | ||
mark_object_simple1 (oo, oo THREAD_NUMBER_ARG); | ||
} | ||
} | ||
); | ||
} | ||
} | ||
} | ||
|
||
#ifdef BACKGROUND_GC | ||
|
||
#ifdef USE_REGIONS | ||
|
@@ -25426,6 +25576,8 @@ void gc_heap::scan_dependent_handles (int condemned_gen_number, ScanContext *sc, | |
if (GCScan::GcDhUnpromotedHandlesExist(sc)) | ||
s_fUnpromotedHandles = TRUE; | ||
|
||
drain_mark_queue(); | ||
|
||
// Synchronize all the threads so we can read our state variables safely. The shared variable | ||
// s_fScanRequired, indicating whether we should scan the tables or terminate the loop, will be set by | ||
// a single thread inside the join. | ||
|
@@ -25844,6 +25996,7 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p) | |
if ((condemned_gen_number == max_generation) && (num_sizedrefs > 0)) | ||
{ | ||
GCScan::GcScanSizedRefs(GCHeap::Promote, condemned_gen_number, max_generation, &sc); | ||
drain_mark_queue(); | ||
fire_mark_event (ETW::GC_ROOT_SIZEDREF, current_promoted_bytes, last_promoted_bytes); | ||
|
||
#ifdef MULTIPLE_HEAPS | ||
|
@@ -25867,26 +26020,30 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p) | |
GCScan::GcScanRoots(GCHeap::Promote, | ||
condemned_gen_number, max_generation, | ||
&sc); | ||
drain_mark_queue(); | ||
fire_mark_event (ETW::GC_ROOT_STACK, current_promoted_bytes, last_promoted_bytes); | ||
|
||
#ifdef BACKGROUND_GC | ||
if (gc_heap::background_running_p()) | ||
{ | ||
scan_background_roots (GCHeap::Promote, heap_number, &sc); | ||
drain_mark_queue(); | ||
fire_mark_event (ETW::GC_ROOT_BGC, current_promoted_bytes, last_promoted_bytes); | ||
} | ||
#endif //BACKGROUND_GC | ||
|
||
#ifdef FEATURE_PREMORTEM_FINALIZATION | ||
dprintf(3, ("Marking finalization data")); | ||
finalize_queue->GcScanRoots(GCHeap::Promote, heap_number, 0); | ||
drain_mark_queue(); | ||
fire_mark_event (ETW::GC_ROOT_FQ, current_promoted_bytes, last_promoted_bytes); | ||
#endif // FEATURE_PREMORTEM_FINALIZATION | ||
|
||
dprintf(3,("Marking handle table")); | ||
GCScan::GcScanHandles(GCHeap::Promote, | ||
condemned_gen_number, max_generation, | ||
&sc); | ||
drain_mark_queue(); | ||
fire_mark_event (ETW::GC_ROOT_HANDLES, current_promoted_bytes, last_promoted_bytes); | ||
|
||
if (!full_p) | ||
|
@@ -25998,6 +26155,7 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p) | |
update_old_card_survived(); | ||
#endif //USE_REGIONS | ||
|
||
drain_mark_queue(); | ||
fire_mark_event (ETW::GC_ROOT_OLDER, current_promoted_bytes, last_promoted_bytes); | ||
} | ||
} | ||
|
@@ -26006,6 +26164,7 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p) | |
if (do_mark_steal_p) | ||
{ | ||
mark_steal(); | ||
drain_mark_queue(); | ||
fire_mark_event (ETW::GC_ROOT_STEAL, current_promoted_bytes, last_promoted_bytes); | ||
} | ||
#endif //MH_SC_MARK | ||
|
@@ -26019,6 +26178,7 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p) | |
// handle table has been fully promoted. | ||
GCScan::GcDhInitialScan(GCHeap::Promote, condemned_gen_number, max_generation, &sc); | ||
scan_dependent_handles(condemned_gen_number, &sc, true); | ||
drain_mark_queue(); | ||
fire_mark_event (ETW::GC_ROOT_DH_HANDLES, current_promoted_bytes, last_promoted_bytes); | ||
|
||
#ifdef MULTIPLE_HEAPS | ||
|
@@ -26101,12 +26261,14 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p) | |
#ifdef FEATURE_PREMORTEM_FINALIZATION | ||
dprintf (3, ("Finalize marking")); | ||
finalize_queue->ScanForFinalization (GCHeap::Promote, condemned_gen_number, mark_only_p, __this); | ||
drain_mark_queue(); | ||
fire_mark_event (ETW::GC_ROOT_NEW_FQ, current_promoted_bytes, last_promoted_bytes); | ||
GCToEEInterface::DiagWalkFReachableObjects(__this); | ||
|
||
// Scan dependent handles again to promote any secondaries associated with primaries that were promoted | ||
// for finalization. As before scan_dependent_handles will also process any mark stack overflow. | ||
scan_dependent_handles(condemned_gen_number, &sc, false); | ||
drain_mark_queue(); | ||
fire_mark_event (ETW::GC_ROOT_DH_HANDLES, current_promoted_bytes, last_promoted_bytes); | ||
#endif //FEATURE_PREMORTEM_FINALIZATION | ||
|
||
|
@@ -31575,7 +31737,7 @@ uint8_t* tree_search (uint8_t* tree, uint8_t* old_address) | |
assert (candidate < tree); | ||
candidate = tree; | ||
tree = tree + cn; | ||
Prefetch (tree - 8); | ||
Prefetch (&((plug_and_pair*)tree)[-1].m_pair.left); | ||
continue; | ||
} | ||
else | ||
|
@@ -31586,7 +31748,7 @@ uint8_t* tree_search (uint8_t* tree, uint8_t* old_address) | |
if ((cn = node_left_child (tree)) != 0) | ||
{ | ||
tree = tree + cn; | ||
Prefetch (tree - 8); | ||
Prefetch (&((plug_and_pair*)tree)[-1].m_pair.left); | ||
continue; | ||
} | ||
else | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
__builtin_prefetch
should work on non-WindowsThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
https://clang.llvm.org/docs/LanguageExtensions.html describes the arguments.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ah right, on linux we should use
__buildin_prefetch
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks Jan - it looks like calling it with the default arguments should be just fine for our purposes.