From aaac67d2afed1afbb2fbbeb31502b83f4275ab1c Mon Sep 17 00:00:00 2001 From: Mike McLaughlin Date: Mon, 6 Jan 2020 22:04:39 -0800 Subject: [PATCH] Fix line number issues in SOS The change is to subtract 1 from the IP used to get the source/line info when the stack frame is a leaf node. For hardware exceptions, the IP has already been adjusted (+1) by the runtime so they cancel out. Fixes in SOS the same issues reported in https://github.com/dotnet/coreclr/issues/27765 and https://github.com/dotnet/coreclr/issues/25740 for the runtime's unhandled exception message. Add some more PrintException -lines testing for thrown and hardware exceptions. Gracefully fail with runtime module size == 0. Fix "clrmodules" failure our vendors found. --- src/SOS/SOS.NETCore/SymbolReader.cs | 3 +- src/SOS/SOS.UnitTests/Scripts/DivZero.script | 17 +++++- .../SOS.UnitTests/Scripts/SimpleThrow.script | 14 ++++- src/SOS/Strike/disasm.h | 9 ++- src/SOS/Strike/exts.h | 2 + src/SOS/Strike/runtime.cpp | 12 +++- src/SOS/Strike/strike.cpp | 57 +++++++++++++++++-- src/SOS/Strike/util.cpp | 6 +- src/SOS/Strike/util.h | 2 +- src/Tools/dotnet-dump/Analyzer.cs | 7 ++- 10 files changed, 114 insertions(+), 15 deletions(-) diff --git a/src/SOS/SOS.NETCore/SymbolReader.cs b/src/SOS/SOS.NETCore/SymbolReader.cs index 50231e29ba..1f17a22843 100644 --- a/src/SOS/SOS.NETCore/SymbolReader.cs +++ b/src/SOS/SOS.NETCore/SymbolReader.cs @@ -19,6 +19,7 @@ using System.Reflection.PortableExecutable; using System.Runtime.InteropServices; using System.Threading; +using System.Threading.Tasks; namespace SOS { @@ -309,7 +310,7 @@ public static void LoadNativeSymbols(SymbolFileCallback callback, IntPtr paramet } } } - catch (Exception ex) when (ex is BadInputFormatException || ex is InvalidVirtualAddressException) + catch (Exception ex) when (ex is BadInputFormatException || ex is InvalidVirtualAddressException || ex is TaskCanceledException) { s_tracer.Error("{0}/{1:X16}: {2}", moduleFilePath, address, ex.Message); } diff --git a/src/SOS/SOS.UnitTests/Scripts/DivZero.script b/src/SOS/SOS.UnitTests/Scripts/DivZero.script index 47379e38de..be215a4a99 100644 --- a/src/SOS/SOS.UnitTests/Scripts/DivZero.script +++ b/src/SOS/SOS.UnitTests/Scripts/DivZero.script @@ -10,7 +10,6 @@ LOADSOS # Verifying that PrintException gives us the right exception in the format above. SOSCOMMAND:PrintException - VERIFY:Exception object:\s+\s+ VERIFY:Exception type:\s+System\.DivideByZeroException\s+ VERIFY:Message:\s+(|Attempted to divide by zero\.)\s+ @@ -24,6 +23,22 @@ VERIFY:\s+\s+\s+[Dd]iv[Zz]ero.*!C\.Main(\(.*\))?\+0x\s+ VERIFY:(StackTraceString: \s+)?\s+ VERIFY:HResult:\s+80020012\s+ +SOSCOMMAND:PrintException -lines +VERIFY:Exception object:\s+\s+ +VERIFY:Exception type:\s+System\.DivideByZeroException\s+ +VERIFY:Message:\s+(|Attempted to divide by zero\.)\s+ +VERIFY:InnerException:\s+\s+ +VERIFY:StackTrace \(generated\):\s+ +VERIFY:\s+SP\s+IP\s+\s+Function\s+ +VERIFY:\s+\s+\s+[Dd]iv[Zz]ero.*!C\.DivideByZero(\(.*\))?\+0x\s+ +VERIFY:\[.*[\\|/]Debuggees[\\|/](dotnet.+[\\|/])?DivZero[\\|/]DivZero\.cs @ 12\s*\]\s* +VERIFY:\s+\s+\s+[Dd]iv[Zz]ero.*!C\.F3(\(.*\))?\+0x\s+ +VERIFY:\[.*[\\|/]Debuggees[\\|/](dotnet.+[\\|/])?DivZero[\\|/]DivZero\.cs @ 21\s*\]\s* +VERIFY:\s+\s+\s+[Dd]iv[Zz]ero.*!C\.F2(\(.*\))?\+0x\s+ +VERIFY:\[.*[\\|/]Debuggees[\\|/](dotnet.+[\\|/])?DivZero[\\|/]DivZero\.cs @ 33\s*\]\s* +VERIFY:\s+\s+\s+[Dd]iv[Zz]ero.*!C\.Main(\(.*\))?\+0x\s+ +VERIFY:\[.*[\\|/]Debuggees[\\|/](dotnet.+[\\|/])?DivZero[\\|/]DivZero\.cs @ 54\s*\]\s* + # Verify that Threads (clrthreads) works IFDEF:DOTNETDUMP SOSCOMMAND:clrthreads diff --git a/src/SOS/SOS.UnitTests/Scripts/SimpleThrow.script b/src/SOS/SOS.UnitTests/Scripts/SimpleThrow.script index 278a45f3da..c2bf379c88 100644 --- a/src/SOS/SOS.UnitTests/Scripts/SimpleThrow.script +++ b/src/SOS/SOS.UnitTests/Scripts/SimpleThrow.script @@ -33,6 +33,18 @@ VERIFY:\s+\s+\s+[Ss]imple[Tt]hrow.*!(\$0_)?UserObject\.UseObject VERIFY:\s+\s+\s+[Ss]imple[Tt]hrow.*!(\$0_)?Simple\.Main.*\+0x\s+ VERIFY:HResult:\s+80131509 +SOSCOMMAND:PrintException -lines +VERIFY:Exception object:\s+\s+ +VERIFY:Exception type:\s+System\.InvalidOperationException\s+ +VERIFY:Message:\s+(|Throwing an invalid operation\.\.\.\.)\s+ +VERIFY:InnerException:\s+\s+ +VERIFY:StackTrace\s+\(generated\):\s+ +VERIFY:\s+SP\s+IP\s+Function\s+ +VERIFY:\s+\s+\s+[Ss]imple[Tt]hrow.*!(\$0_)?UserObject\.UseObject.*\+0x\s* +VERIFY:\[.*[\\|/]Debuggees[\\|/](dotnet.+[\\|/])?SimpleThrow[\\|/]UserObject\.cs @ 16\s*\]\s* +VERIFY:\s+\s+\s+[Ss]imple[Tt]hrow.*!(\$0_)?Simple\.Main.*\+0x\s+ +VERIFY:\[.*[\\|/]Debuggees[\\|/](dotnet.+[\\|/])?SimpleThrow[\\|/]SimpleThrow\.cs @ 9\s*\]\s* + # Verify that Threads (clrthreads) works IFDEF:DOTNETDUMP SOSCOMMAND:clrthreads @@ -54,6 +66,6 @@ SOSCOMMAND:ClrStack VERIFY:.*OS Thread Id:\s+0x\s+.* VERIFY:\s+Child\s+SP\s+IP\s+Call Site\s+ VERIFY:\s+\s+\s+(\*\*\* WARNING: Unable to verify checksum for SimpleThrow.exe\s*)?UserObject\.UseObject.*\s+ -VERIFY:[.*[\\|/]Debuggees[\\|/](dotnet.+[\\|/])?SimpleThrow[\\|/]UserObject\.cs @ 18\s*\] +VERIFY:[.*[\\|/]Debuggees[\\|/](dotnet.+[\\|/])?SimpleThrow[\\|/]UserObject\.cs @ 16\s*\] VERIFY:\s+\s+\s+Simple\.Main\(\)\s+ VERIFY:[.*[\\|/]Debuggees[\\|/](dotnet.+[\\|/])?SimpleThrow[\\|/]SimpleThrow\.cs @ 9\s*\] diff --git a/src/SOS/Strike/disasm.h b/src/SOS/Strike/disasm.h index 1eb3920cb0..fb2ee3e640 100644 --- a/src/SOS/Strike/disasm.h +++ b/src/SOS/Strike/disasm.h @@ -125,7 +125,6 @@ eTargetType GetFinalTarget(DWORD_PTR callee, DWORD_PTR* finalMDorIP); #ifndef THUMB_CODE #define THUMB_CODE 1 #endif -#define STACKWALK_CONTROLPC_ADJUST_OFFSET 2 #ifdef SOS_TARGET_X86 @@ -177,6 +176,8 @@ class X86Machine : public IMachine virtual void DumpGCInfo(GCInfoToken gcInfoToken, unsigned methodSize, printfFtn gcPrintf, bool encBytes, bool bPrintHeader) const; + int StackWalkIPAdjustOffset() const { return 1; } + private: X86Machine() {} ~X86Machine() {} @@ -244,6 +245,8 @@ class ARMMachine : public IMachine virtual void DumpGCInfo(GCInfoToken gcInfoToken, unsigned methodSize, printfFtn gcPrintf, bool encBytes, bool bPrintHeader) const; + int StackWalkIPAdjustOffset() const { return 2; } + private: ARMMachine() {} ~ARMMachine() {} @@ -313,6 +316,8 @@ class AMD64Machine : public IMachine virtual void DumpGCInfo(GCInfoToken gcInfoToken, unsigned methodSize, printfFtn gcPrintf, bool encBytes, bool bPrintHeader) const; + int StackWalkIPAdjustOffset() const { return 1; } + private: AMD64Machine() {} ~AMD64Machine() {} @@ -378,6 +383,8 @@ class ARM64Machine : public IMachine virtual void DumpGCInfo(GCInfoToken gcInfoToken, unsigned methodSize, printfFtn gcPrintf, bool encBytes, bool bPrintHeader) const; + int StackWalkIPAdjustOffset() const { return 4; } + private: ARM64Machine() {} ~ARM64Machine() {} diff --git a/src/SOS/Strike/exts.h b/src/SOS/Strike/exts.h index cefec80d91..2d630b87bd 100644 --- a/src/SOS/Strike/exts.h +++ b/src/SOS/Strike/exts.h @@ -390,6 +390,8 @@ class IMachine typedef void (*printfFtn)(const char* fmt, ...); // Dumps the GCInfo virtual void DumpGCInfo(GCInfoToken gcInfoToken, unsigned methodSize, printfFtn gcPrintf, bool encBytes, bool bPrintHeader) const = 0; + // The amount of bytes to adjust the IP for software exception throw instructions (the STACKWALK_CONTROLPC_ADJUST_OFFSET define in the runtime) + virtual int StackWalkIPAdjustOffset() const = 0; protected: IMachine() {} diff --git a/src/SOS/Strike/runtime.cpp b/src/SOS/Strike/runtime.cpp index cafe21bd1c..04d48bfa09 100644 --- a/src/SOS/Strike/runtime.cpp +++ b/src/SOS/Strike/runtime.cpp @@ -85,8 +85,16 @@ HRESULT Runtime::CreateInstance(bool isDesktop, Runtime **ppRuntime) } if (SUCCEEDED(hr)) { - *ppRuntime = new Runtime(isDesktop, moduleIndex, moduleAddress, moduleSize); - OnUnloadTask::Register(CleanupRuntimes); + if (moduleSize > 0) + { + *ppRuntime = new Runtime(isDesktop, moduleIndex, moduleAddress, moduleSize); + OnUnloadTask::Register(CleanupRuntimes); + } + else + { + ExtOut("Runtime (%s) module size == 0\n", runtimeModuleName); + hr = E_INVALIDARG; + } } } return hr; diff --git a/src/SOS/Strike/strike.cpp b/src/SOS/Strike/strike.cpp index 27814e649a..973ec62cb2 100644 --- a/src/SOS/Strike/strike.cpp +++ b/src/SOS/Strike/strike.cpp @@ -2384,13 +2384,15 @@ size_t FormatGeneratedException (DWORD_PTR dataPtr, UINT bytes, __out_ecount_opt(bufferLength) WCHAR *wszBuffer, size_t bufferLength, - BOOL bAsync, + BOOL bAsync, // hardware exception if true BOOL bNestedCase = FALSE, BOOL bLineNumbers = FALSE) { UINT count = bytes / sizeof(StackTraceElement); size_t Length = 0; + _ASSERTE(g_targetMachine != nullptr); + if (wszBuffer && bufferLength > 0) { wszBuffer[0] = L'\0'; @@ -2464,7 +2466,27 @@ size_t FormatGeneratedException (DWORD_PTR dataPtr, WCHAR filename[MAX_LONGPATH] = W(""); ULONG linenum = 0; if (bLineNumbers && - SUCCEEDED(GetLineByOffset(TO_CDADDR(ste.ip), &linenum, filename, _countof(filename)))) + // To get the source line number of the actual code that threw an exception, the IP needs + // to be adjusted in certain cases. + // + // The IP of the leaf node stack frame (i == 0) points to either: + // + // 1) The instruction that caused a hardware exception (div by zero, null ref, etc). + // 2) After the call instruction to an internal runtime function (FCALL like IL_Throw, + // IL_Rethrow, JIT_OverFlow, etc.) that caused a software exception. + // + // It is case 2 that needs to adjust IP because these IL calls don't contain any other instructions + // after and point to next IL instruction and source line after the throw. We distinguish between #1 + // and #2 by the bAsync flag which is set to true for hardware exceptions. + // + // The rest of the stack frame IPs also point to the instruction after the call. The difference + // in this case is that these IL calls consist of more code after the call (at the minimum a nop + // sequence point) that is consider part of the IL instruction. The IP doesn't need to be adjusted + // in these cases. + // + // When the IP needs to be adjusted it is a lot simpler to decrement IP instead of trying to figure + // out the beginning of the instruction. It is enough for GetLineByOffset to return the correct line number. + SUCCEEDED(GetLineByOffset(TO_CDADDR((i == 0 && !bAsync) ? ste.ip - g_targetMachine->StackWalkIPAdjustOffset() : ste.ip), &linenum, filename, _countof(filename)))) { swprintf_s(wszLineBuffer, _countof(wszLineBuffer), W(" %s [%s @ %d]\n"), so.String(), filename, linenum); } @@ -13439,6 +13461,8 @@ class ClrStackImpl public: static void PrintThread(ULONG osID, BOOL bParams, BOOL bLocals, BOOL bSuppressLines, BOOL bGC, BOOL bFull, BOOL bDisplayRegVals) { + _ASSERTE(g_targetMachine != nullptr); + // Symbols variables ULONG symlines = 0; // symlines will be non-zero only if SYMOPT_LOAD_LINES was set in the symbol options if (!bSuppressLines && SUCCEEDED(g_ExtSymbols->GetSymbolOptions(&symlines))) @@ -13481,7 +13505,9 @@ class ClrStackImpl TableOutput out(3, POINTERSIZE_HEX, AlignRight); out.WriteRow("Child SP", "IP", "Call Site"); - + + int frameNumber = 0; + int internalFrames = 0; do { if (IsInterrupt()) @@ -13516,6 +13542,8 @@ class ClrStackImpl // Print the method/Frame info if (SUCCEEDED(frameDataResult) && FrameData.frameAddr) { + internalFrames++; + // Skip the instruction pointer because it doesn't really mean anything for method frames out.WriteColumn(1, bFull ? String("") : NativePtr(ip)); @@ -13534,8 +13562,29 @@ class ClrStackImpl } else { + // To get the source line number of the actual code that threw an exception, the IP needs + // to be adjusted in certain cases. + // + // The IP of leaf node stack frame (frameNumber == 0) points to either: + // + // 1) Currently executing instruction (if you hit a breakpoint or are single stepping through). + // 2) The instruction that caused a hardware exception (div by zero, null ref, etc). + // 3) After the call to an internal runtime function (FCALL like IL_Throw, IL_Rethrow, + // JIT_OverFlow, etc.) that caused a software exception. + // + // It is case 3 that needs to adjust IP because these IL calls don't contain any other instructions + // after and point to next IL instruction and source line after the throw. We distinguish this from + // #1 or #2 by the present of an internal stack frame (usually HelperMethodFrame). + // + // The rest of the stack frame IPs also point to the instruction after the call. The difference + // in this case is that these IL calls consist of more code after the call (at the minimum a nop + // sequence point) that is consider part of the IL instruction. The IP doesn't need to be adjusted + // in these cases. + bool bAdjustIPForLineNumber = frameNumber == 0 && internalFrames > 0; + frameNumber++; + out.WriteColumn(1, InstructionPtr(ip)); - out.WriteColumn(2, MethodNameFromIP(ip, bSuppressLines, bFull, bFull)); + out.WriteColumn(2, MethodNameFromIP(ip, bSuppressLines, bFull, bFull, bAdjustIPForLineNumber)); // Print out gc references. refCount will be zero if bGC is false (or if we // failed to fetch gc reference information). diff --git a/src/SOS/Strike/util.cpp b/src/SOS/Strike/util.cpp index 4bde52925d..6cdae98c51 100644 --- a/src/SOS/Strike/util.cpp +++ b/src/SOS/Strike/util.cpp @@ -5169,7 +5169,7 @@ WString GetFrameFromAddress(TADDR frameAddr, IXCLRDataStackWalk *pStackWalk, BOO return frameOutput; } -WString MethodNameFromIP(CLRDATA_ADDRESS ip, BOOL bSuppressLines, BOOL bAssemblyName, BOOL bDisplacement) +WString MethodNameFromIP(CLRDATA_ADDRESS ip, BOOL bSuppressLines, BOOL bAssemblyName, BOOL bDisplacement, BOOL bAdjustIPForLineNumber) { ULONG linenum; WString methodOutput; @@ -5242,7 +5242,9 @@ WString MethodNameFromIP(CLRDATA_ADDRESS ip, BOOL bSuppressLines, BOOL bAssembly ArrayHolder wszFileName = new WCHAR[MAX_LONGPATH]; if (!bSuppressLines && - SUCCEEDED(GetLineByOffset(TO_CDADDR(ip), &linenum, wszFileName, MAX_LONGPATH))) + // If the IP needs to be adjusted, it is a lot simpler to decrement IP instead of trying to figure out + // the beginning of the instruction. It is enough for GetLineByOffset to return the correct line number. + SUCCEEDED(GetLineByOffset(TO_CDADDR(bAdjustIPForLineNumber ? ip - g_targetMachine->StackWalkIPAdjustOffset() : ip), &linenum, wszFileName, MAX_LONGPATH))) { methodOutput += WString(W(" [")) + wszFileName + W(" @ ") + Decimal(linenum) + W("]"); } diff --git a/src/SOS/Strike/util.h b/src/SOS/Strike/util.h index 92666b8a15..28b6cccda1 100644 --- a/src/SOS/Strike/util.h +++ b/src/SOS/Strike/util.h @@ -2589,7 +2589,7 @@ typedef struct _CROSS_PLATFORM_CONTEXT { WString BuildRegisterOutput(const SOSStackRefData &ref, bool printObj = true); -WString MethodNameFromIP(CLRDATA_ADDRESS methodDesc, BOOL bSuppressLines = FALSE, BOOL bAssemblyName = FALSE, BOOL bDisplacement = FALSE); +WString MethodNameFromIP(CLRDATA_ADDRESS methodDesc, BOOL bSuppressLines = FALSE, BOOL bAssemblyName = FALSE, BOOL bDisplacement = FALSE, BOOL bAdjustIPForLineNumber = FALSE); HRESULT GetGCRefs(ULONG osID, SOSStackRefData **ppRefs, unsigned int *pRefCnt, SOSStackRefError **ppErrors, unsigned int *pErrCount); WString GetFrameFromAddress(TADDR frameAddr, IXCLRDataStackWalk *pStackwalk = NULL, BOOL bAssemblyName = FALSE); diff --git a/src/Tools/dotnet-dump/Analyzer.cs b/src/Tools/dotnet-dump/Analyzer.cs index b48a758102..339901db54 100644 --- a/src/Tools/dotnet-dump/Analyzer.cs +++ b/src/Tools/dotnet-dump/Analyzer.cs @@ -183,7 +183,10 @@ private ClrRuntime CreateRuntime(DataTarget target) string dacFilePath = GetDacFile(clrInfo); try { - runtime = clrInfo.CreateRuntime(dacFilePath); + // Ignore the DAC version mismatch that can happen on Linux because the clrmd ELF dump + // reader returns 0.0.0.0 for the runtime module that the DAC is matched against. This + // will be fixed in clrmd 2.0 but not 1.1. + runtime = clrInfo.CreateRuntime(dacFilePath, ignoreMismatch: RuntimeInformation.IsOSPlatform(OSPlatform.Linux)); } catch (DllNotFoundException ex) { @@ -240,7 +243,7 @@ private string GetDacFile(ClrInfo clrInfo) if (_dacFilePath == null) { - throw new FileNotFoundException("Could not find matching DAC for this runtime: {0}", clrInfo.ModuleInfo.FileName); + throw new FileNotFoundException($"Could not find matching DAC for this runtime: {clrInfo.ModuleInfo.FileName}"); } _isDesktop = clrInfo.Flavor == ClrFlavor.Desktop; }