Skip to content

Commit

Permalink
[Mono] Fix deadlock during gcdump when using interp full AOT fallback. (
Browse files Browse the repository at this point in the history
#89726)

GC thread doing gcdump will dump the EventPipe events after world
has restarted but before releasing GC lock. There was one case
where we logged a bulk type during that face where a type didn't
have its finalizer data initialized and at the same time main thread
running interpreter held loader lock and tried to acquire GC lock,
that triggers a deadlock since the GC thread (still holding the GC lock)
would trigger logic to initialize the finalizer data, but that in turn
requires the GC lock.

Fix delays the fire of GC dump events until after we completed GC.
All GC dump events have been cached into a temp file and will be
written into EventPipe, the only potential issue with this is that
we keep vtable pointers in cache that will be resolved when emitting
EventPipe event, after releasing GC lock, but since we currently
won't unload vtables, that is not an issue, but needs to be addressed
if/when we implement ability to unload vtables. We would then need
to root the vtables while stored in temporary cache.

Commit also enable GC dump test on Mono platforms.
  • Loading branch information
lateralusX authored Aug 3, 2023
1 parent b2e18f8 commit 44b0543
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 6 deletions.
5 changes: 4 additions & 1 deletion src/mono/mono/eventpipe/ep-rt-mono-runtime-provider.c
Original file line number Diff line number Diff line change
Expand Up @@ -4165,6 +4165,9 @@ calculate_live_keywords (
ep_requires_lock_held ();
}

// TODO: If/when we can unload vtables, we would need to temporary
// root the vtable pointers currently stored in buffered gc events.
// Once all events are fired, we can remove root from GC.
static
void
gc_event_callback (
Expand Down Expand Up @@ -4199,7 +4202,6 @@ gc_event_callback (
if (is_gc_heap_dump_enabled (context)) {
EP_ASSERT (context->state == GC_HEAP_DUMP_CONTEXT_STATE_DUMP);
gc_heap_dump_context_build_roots (context);
fire_buffered_gc_events (context);
}
break;
}
Expand Down Expand Up @@ -4289,6 +4291,7 @@ gc_heap_dump_trigger_callback (MonoProfiler *prof)
mono_profiler_set_gc_event_callback (_ep_rt_mono_default_profiler_provider, gc_event_callback);
mono_gc_collect (mono_gc_max_generation ());
mono_profiler_set_gc_event_callback (_ep_rt_mono_default_profiler_provider, NULL);
fire_buffered_gc_events (heap_dump_context);
}

heap_dump_context->state = GC_HEAP_DUMP_CONTEXT_STATE_END;
Expand Down
6 changes: 3 additions & 3 deletions src/tests/issues.targets
Original file line number Diff line number Diff line change
Expand Up @@ -1895,9 +1895,6 @@
<ExcludeList Include="$(XunitTestBinBase)/Regressions/coreclr/22021/consumer/**">
<Issue>needs triage</Issue>
</ExcludeList>
<ExcludeList Include="$(XunitTestBinBase)/tracing/eventpipe/gcdump/gcdump/**">
<Issue>needs triage</Issue>
</ExcludeList>
<ExcludeList Include="$(XunitTestBinBase)/Interop/DllImportAttribute/DllImportPath/**">
<Issue>needs triage</Issue>
</ExcludeList>
Expand Down Expand Up @@ -3523,6 +3520,9 @@
<ExcludeList Include="$(XunitTestBinBase)/tracing/eventpipe/simpleruntimeeventvalidation/**">
<Issue>System.Diagnostics.Process is not supported on wasm</Issue>
</ExcludeList>
<ExcludeList Include="$(XunitTestBinBase)/tracing/eventpipe/gcdump/**">
<Issue>System.Diagnostics.Process is not supported on wasm</Issue>
</ExcludeList>
</ItemGroup>

<ItemGroup Condition="'$(TargetOS)' == 'android'" >
Expand Down
23 changes: 21 additions & 2 deletions src/tests/tracing/eventpipe/gcdump/gcdump.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,16 @@ namespace Tracing.Tests.EventSourceError
// Regression test for https://github.com/dotnet/runtime/issues/38639
public class GCDumpTest
{
private static bool _seenGCStart = false;
private static bool _seenGCStop = false;
private static int _bulkTypeCount = 0;
private static int _bulkNodeCount = 0;
private static int _bulkEdgeCount = 0;
private static int _bulkRootEdgeCount = 0;
private static int _bulkRootStaticVarCount = 0;

private static readonly ulong GC_HeapDump_Keyword = 0x100000UL;
private static ManualResetEvent _gcStopReceived = new ManualResetEvent(false);

public static int Main()
{
Expand All @@ -48,11 +51,17 @@ public static int Main()

private static Action _eventGeneratingAction = () =>
{
// This space intentionally left blank
// Wait up to 10 seconds to receive GCStop event.
_gcStopReceived.WaitOne(10000);
};

private static Func<EventPipeEventSource, Func<int>> _DoesRundownContainMethodEvents = (source) =>
{
source.Clr.GCStart += (GCStartTraceData data) =>
{
_seenGCStart = true;
};

source.Clr.TypeBulkType += (GCBulkTypeTraceData data) =>
{
_bulkTypeCount += data.Count;
Expand All @@ -78,13 +87,21 @@ public static int Main()
_bulkRootStaticVarCount += data.Count;
};

source.Clr.GCStop += (GCEndTraceData data) =>
{
_seenGCStop = true;
_gcStopReceived.Set();
};

return () =>
{
// Hopefully it is low enough to be resilient to changes in the runtime
// and high enough to catch issues. There should be between hundreds and thousands
// for each, but the number is variable and the point of the test is to verify
// that we get any events at all.
if (_bulkTypeCount > 50
if (_seenGCStart
&& _seenGCStop
&& _bulkTypeCount > 50
&& _bulkNodeCount > 50
&& _bulkEdgeCount > 50
&& _bulkRootEdgeCount > 50
Expand All @@ -95,6 +112,8 @@ public static int Main()


Console.WriteLine($"Test failed due to missing GC heap events.");
Console.WriteLine($"_seenGCStart = {_seenGCStart}");
Console.WriteLine($"_seenGCStop = {_seenGCStop}");
Console.WriteLine($"_bulkTypeCount = {_bulkTypeCount}");
Console.WriteLine($"_bulkNodeCount = {_bulkNodeCount}");
Console.WriteLine($"_bulkEdgeCount = {_bulkEdgeCount}");
Expand Down

0 comments on commit 44b0543

Please sign in to comment.