From a8d26b2a71e01974019947e73d7aa6f97a92e3cb Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Mon, 20 Jul 2020 14:22:51 +0200 Subject: [PATCH 01/36] CoreRT-ARM64: Helper node asm code generation --- .../Target_ARM64/ARM64Emitter.cs | 68 +++++- .../ARM64ReadyToRunGenericHelperNode.cs | 202 +++++++++++++++++- .../Target_ARM64/ARM64ReadyToRunHelperNode.cs | 188 +++++++++++++++- .../Target_ARM64/TargetRegisterMap.cs | 2 + 4 files changed, 449 insertions(+), 11 deletions(-) diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64Emitter.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64Emitter.cs index ee2a3c7516b..d0c726bf0ce 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64Emitter.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64Emitter.cs @@ -25,7 +25,7 @@ public void EmitMOV(Register regDst, ref AddrMode memory) public void EmitMOV(Register regDst, Register regSrc) { - throw new NotImplementedException(); + Builder.EmitUInt((uint)(0b1_0_1_01010_000_00000_000000_11111_00000u | ((uint)regSrc << 16) | (uint)regDst)); } public void EmitMOV(Register regDst, ushort imm16) @@ -35,6 +35,17 @@ public void EmitMOV(Register regDst, ushort imm16) Builder.EmitUInt(instruction); } + public void EmitMOV(Register regDst, ISymbolNode symbol) + { + // ADRP regDst, [symbol (21bit ADRP thing)] + Builder.EmitReloc(symbol, RelocType.IMAGE_REL_BASED_ARM64_PAGEBASE_REL21); + Builder.EmitUInt(0x90000000u | (byte)regDst); + + // Add regDst, (12bit LDR page offset reloc) + Builder.EmitReloc(symbol, RelocType.IMAGE_REL_BASED_ARM64_PAGEOFFSET_12A); + Builder.EmitUInt((uint)(0b1_0_0_100010_0_000000000000_00000_00000 | ((byte)regDst << 5) | (byte)regDst)); + } + // ldr regDst, [PC + imm19] public void EmitLDR(Register regDst, short offset) { @@ -54,6 +65,25 @@ public void EmitLDR(Register regDst, Register regAddr) Builder.EmitUInt(instruction); } + public void EmitLDR(Register regDst, Register regSrc, int offset) + { + Debug.Assert(offset >= -255 && offset <= 4095); + if (offset >= 0) + { + Debug.Assert(offset % 8 == 0); + + offset /= 8; + + Builder.EmitUInt((uint)(0b11_1110_0_1_0_1_000000000000_00000_00000u | ((uint)offset << 10) | ((uint)regSrc << 5) | (uint)regDst)); + } + else + { + uint o = (uint)offset & 0x1FF; + + Builder.EmitUInt((uint)(0b11_1110_0_0_010_000000000_1_1_00000_00000u | (o << 12) | ((uint)regSrc << 5) | (uint)regDst)); + } + } + public void EmitLEAQ(Register reg, ISymbolNode symbol, int delta = 0) { throw new NotImplementedException(); @@ -69,12 +99,36 @@ public void EmitCMP(ref AddrMode addrMode, sbyte immediate) throw new NotImplementedException(); } + public void EmitCMP(Register reg, sbyte immediate) + { + if (immediate >= 0) + { + Builder.EmitUInt((uint)(0b1_1_1_100010_0_000000000000_00000_11111u | immediate << 10) | ((uint)reg << 5)); + } + else + { + throw new NotImplementedException(); + } + } + // add reg, immediate public void EmitADD(Register reg, byte immediate) { Builder.EmitInt((int)(0x91 << 24) | (immediate << 10) | ((byte)reg << 5) | (byte) reg); } + public void EmitSUB(Register reg, int immediate) + { + if (immediate >= 0) + { + Builder.EmitUInt((uint)(0b1_1_0_100010_0_000000000000_00000_00000u | immediate << 12) | ((uint)reg << 5) | (uint)reg); + } + else + { + throw new NotImplementedException(); + } + } + public void EmitJMP(ISymbolNode symbol) { if (symbol.RepresentsIndirectionCell) @@ -100,9 +154,14 @@ public void EmitJMP(ISymbolNode symbol) } } + public void EmitJMP(Register reg) + { + Builder.EmitUInt((uint)(0b11010110_0_0_0_11111_00000_0_00000_00000u | ((uint)reg << 5))); + } + public void EmitINT3() { - throw new NotImplementedException(); + Builder.EmitUInt(0b11010100_001_1111111111111111_000_0_0); } public void EmitJmpToAddrMode(ref AddrMode addrMode) @@ -112,12 +171,13 @@ public void EmitJmpToAddrMode(ref AddrMode addrMode) public void EmitRET() { - throw new NotImplementedException(); + Builder.EmitUInt(0b11010110_0_1_0_11111_00000_0_11110_00000); } public void EmitRETIfEqual() { - throw new NotImplementedException(); + Builder.EmitUInt(0b01010100_0000000000000000010_0_0001u); + EmitRET(); } private bool InSignedByteRange(int i) diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs index 7e510164a45..427c8d1f64b 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs @@ -15,22 +15,197 @@ partial class ReadyToRunGenericHelperNode { protected Register GetContextRegister(ref /* readonly */ ARM64Emitter encoder) { - throw new NotImplementedException(); - } + if (_id == ReadyToRunHelperId.DelegateCtor) + return encoder.TargetRegister.Arg2; + else + return encoder.TargetRegister.Arg0; } protected void EmitDictionaryLookup(NodeFactory factory, ref ARM64Emitter encoder, Register context, Register result, GenericLookupResult lookup, bool relocsOnly) { - throw new NotImplementedException(); + // INVARIANT: must not trash context register + + // Find the generic dictionary slot + int dictionarySlot = 0; + if (!relocsOnly) + { + // The concrete slot won't be known until we're emitting data - don't ask for it in relocsOnly. + dictionarySlot = factory.GenericDictionaryLayout(_dictionaryOwner).GetSlotForEntry(lookup); + } + + // Load the generic dictionary cell + encoder.EmitLDR(result, context, dictionarySlot * factory.Target.PointerSize); + + switch (lookup.LookupResultReferenceType(factory)) + { + case GenericLookupResultReferenceType.Indirect: + // Do another indirection + encoder.EmitLDR(result, result); + break; + + case GenericLookupResultReferenceType.ConditionalIndirect: + throw new NotImplementedException(); + + default: + break; + } } protected sealed override void EmitCode(NodeFactory factory, ref ARM64Emitter encoder, bool relocsOnly) { - throw new NotImplementedException(); + // First load the generic context into the context register. + EmitLoadGenericContext(factory, ref encoder, relocsOnly); + + Register contextRegister = GetContextRegister(ref encoder); + + switch (_id) + { + case ReadyToRunHelperId.GetNonGCStaticBase: + { + Debug.Assert(contextRegister == encoder.TargetRegister.Arg0); + + EmitDictionaryLookup(factory, ref encoder, encoder.TargetRegister.Arg0, encoder.TargetRegister.Result, _lookupSignature, relocsOnly); + + MetadataType target = (MetadataType)_target; + if (!factory.PreinitializationManager.HasLazyStaticConstructor(target)) + { + encoder.EmitRET(); + } + else + { + // We need to trigger the cctor before returning the base. It is stored at the beginning of the non-GC statics region. + int cctorContextSize = NonGCStaticsNode.GetClassConstructorContextStorageSize(factory.Target, target); + encoder.EmitLDR(encoder.TargetRegister.Arg1, encoder.TargetRegister.Arg0, ((short)(factory.Target.PointerSize - cctorContextSize))); + encoder.EmitCMP(encoder.TargetRegister.Arg1, 1); + encoder.EmitRETIfEqual(); + + encoder.EmitMOV(encoder.TargetRegister.Arg1, encoder.TargetRegister.Result); + encoder.EmitSUB(encoder.TargetRegister.Arg0, ((byte)(cctorContextSize))); + encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnNonGCStaticBase)); + } + } + break; + + case ReadyToRunHelperId.GetGCStaticBase: + { + Debug.Assert(contextRegister == encoder.TargetRegister.Arg0); + + encoder.EmitMOV(encoder.TargetRegister.Arg1, encoder.TargetRegister.Arg0); + EmitDictionaryLookup(factory, ref encoder, encoder.TargetRegister.Arg0, encoder.TargetRegister.Result, _lookupSignature, relocsOnly); + encoder.EmitLDR(encoder.TargetRegister.Result, encoder.TargetRegister.Result); + encoder.EmitLDR(encoder.TargetRegister.Result, encoder.TargetRegister.Result); + + MetadataType target = (MetadataType)_target; + if (!factory.PreinitializationManager.HasLazyStaticConstructor(target)) + { + encoder.EmitRET(); + } + else + { + // We need to trigger the cctor before returning the base. It is stored at the beginning of the non-GC statics region. + GenericLookupResult nonGcRegionLookup = factory.GenericLookup.TypeNonGCStaticBase(target); + EmitDictionaryLookup(factory, ref encoder, encoder.TargetRegister.Arg1, encoder.TargetRegister.Arg2, nonGcRegionLookup, relocsOnly); + + int cctorContextSize = NonGCStaticsNode.GetClassConstructorContextStorageSize(factory.Target, target); + encoder.EmitLDR(encoder.TargetRegister.Arg3, encoder.TargetRegister.Arg2, ((short)(factory.Target.PointerSize - cctorContextSize))); + encoder.EmitCMP(encoder.TargetRegister.Arg3, 1); + encoder.EmitRETIfEqual(); + + encoder.EmitMOV(encoder.TargetRegister.Arg1, encoder.TargetRegister.Result); + encoder.EmitMOV(encoder.TargetRegister.Arg0, encoder.TargetRegister.Arg2); + encoder.EmitSUB(encoder.TargetRegister.Arg0, cctorContextSize); + encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnGCStaticBase)); + } + } + break; + + case ReadyToRunHelperId.GetThreadStaticBase: + { + Debug.Assert(contextRegister == encoder.TargetRegister.Arg0); + + MetadataType target = (MetadataType)_target; + + // Look up the index cell + EmitDictionaryLookup(factory, ref encoder, encoder.TargetRegister.Arg0, encoder.TargetRegister.Arg1, _lookupSignature, relocsOnly); + + ISymbolNode helperEntrypoint; + if (factory.PreinitializationManager.HasLazyStaticConstructor(target)) + { + // There is a lazy class constructor. We need the non-GC static base because that's where the + // class constructor context lives. + GenericLookupResult nonGcRegionLookup = factory.GenericLookup.TypeNonGCStaticBase(target); + EmitDictionaryLookup(factory, ref encoder, encoder.TargetRegister.Arg0, encoder.TargetRegister.Arg2, nonGcRegionLookup, relocsOnly); + int cctorContextSize = NonGCStaticsNode.GetClassConstructorContextStorageSize(factory.Target, target); + encoder.EmitSUB(encoder.TargetRegister.Arg2, cctorContextSize); + + helperEntrypoint = factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnThreadStaticBase); + } + else + { + helperEntrypoint = factory.HelperEntrypoint(HelperEntrypoint.GetThreadStaticBaseForType); + } + + // First arg: address of the TypeManager slot that provides the helper with + // information about module index and the type manager instance (which is used + // for initialization on first access). + encoder.EmitLDR(encoder.TargetRegister.Arg0, encoder.TargetRegister.Arg1); + + // Second arg: index of the type in the ThreadStatic section of the modules + encoder.EmitLDR(encoder.TargetRegister.Arg1, encoder.TargetRegister.Arg1, factory.Target.PointerSize); + + encoder.EmitJMP(helperEntrypoint); + } + break; + + case ReadyToRunHelperId.DelegateCtor: + { + // This is a weird helper. Codegen populated Arg0 and Arg1 with the values that the constructor + // method expects. Codegen also passed us the generic context in Arg2. + // We now need to load the delegate target method into Arg2 (using a dictionary lookup) + // and the optional 4th parameter, and call the ctor. + + Debug.Assert(contextRegister == encoder.TargetRegister.Arg2); + + var target = (DelegateCreationInfo)_target; + + EmitDictionaryLookup(factory, ref encoder, encoder.TargetRegister.Arg2, encoder.TargetRegister.Arg2, _lookupSignature, relocsOnly); + + if (target.Thunk != null) + { + Debug.Assert(target.Constructor.Method.Signature.Length == 3); + encoder.EmitMOV(encoder.TargetRegister.Arg3, target.Thunk); + } + else + { + Debug.Assert(target.Constructor.Method.Signature.Length == 2); + } + + encoder.EmitJMP(target.Constructor); + } + break; + + // These are all simple: just get the thing from the dictionary and we're done + case ReadyToRunHelperId.TypeHandle: + case ReadyToRunHelperId.MethodHandle: + case ReadyToRunHelperId.FieldHandle: + case ReadyToRunHelperId.MethodDictionary: + case ReadyToRunHelperId.MethodEntry: + case ReadyToRunHelperId.VirtualDispatchCell: + case ReadyToRunHelperId.DefaultConstructor: + case ReadyToRunHelperId.TypeHandleForCasting: + { + EmitDictionaryLookup(factory, ref encoder, contextRegister, encoder.TargetRegister.Result, _lookupSignature, relocsOnly); + encoder.EmitRET(); + } + break; + + default: + throw new NotImplementedException(); + } } protected virtual void EmitLoadGenericContext(NodeFactory factory, ref ARM64Emitter encoder, bool relocsOnly) { - throw new NotImplementedException(); + // Assume generic context is already loaded in the context register. } } @@ -38,7 +213,22 @@ partial class ReadyToRunGenericLookupFromTypeNode { protected override void EmitLoadGenericContext(NodeFactory factory, ref ARM64Emitter encoder, bool relocsOnly) { - throw new NotImplementedException(); + // We start with context register pointing to the EEType + Register contextRegister = GetContextRegister(ref encoder); + + // Locate the VTable slot that points to the dictionary + int vtableSlot = 0; + if (!relocsOnly) + { + // The concrete slot won't be known until we're emitting data - don't ask for it in relocsOnly. + vtableSlot = VirtualMethodSlotHelper.GetGenericDictionarySlot(factory, (TypeDesc)_dictionaryOwner); + } + + int pointerSize = factory.Target.PointerSize; + int slotOffset = EETypeNode.GetVTableOffset(pointerSize) + (vtableSlot * pointerSize); + + // Load the dictionary pointer from the VTable + encoder.EmitLDR(contextRegister, contextRegister, slotOffset); } } } diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs index 28d7e50239f..2bfaf8cd40a 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs @@ -16,7 +16,193 @@ public partial class ReadyToRunHelperNode { protected override void EmitCode(NodeFactory factory, ref ARM64Emitter encoder, bool relocsOnly) { - throw new NotImplementedException(); + switch (Id) + { + case ReadyToRunHelperId.VirtualCall: + { + MethodDesc targetMethod = (MethodDesc)Target; + + Debug.Assert(!targetMethod.OwningType.IsInterface); + Debug.Assert(!targetMethod.CanMethodBeInSealedVTable()); + + int pointerSize = factory.Target.PointerSize; + + int slot = 0; + if (!relocsOnly) + { + slot = VirtualMethodSlotHelper.GetVirtualMethodSlot(factory, targetMethod, targetMethod.OwningType); + Debug.Assert(slot != -1); + } + + encoder.EmitLDR(encoder.TargetRegister.IntraProcedureCallScratch1, encoder.TargetRegister.Arg0, 0); + encoder.EmitLDR(encoder.TargetRegister.IntraProcedureCallScratch1, encoder.TargetRegister.IntraProcedureCallScratch1, + EETypeNode.GetVTableOffset(pointerSize) + (slot * pointerSize)); + encoder.EmitJMP(encoder.TargetRegister.IntraProcedureCallScratch1); + } + break; + + case ReadyToRunHelperId.GetNonGCStaticBase: + { + MetadataType target = (MetadataType)Target; + bool hasLazyStaticConstructor = factory.PreinitializationManager.HasLazyStaticConstructor(target); + encoder.EmitMOV(encoder.TargetRegister.Result, factory.TypeNonGCStaticsSymbol(target)); + + if (!hasLazyStaticConstructor) + { + encoder.EmitRET(); + } + else + { + encoder.EmitINT3(); + // We need to trigger the cctor before returning the base. It is stored at the beginning of the non-GC statics region. + encoder.EmitMOV(encoder.TargetRegister.Arg2, factory.TypeNonGCStaticsSymbol(target)); + encoder.EmitLDR(encoder.TargetRegister.Arg3, encoder.TargetRegister.Arg2, + ((short)(factory.Target.PointerSize - NonGCStaticsNode.GetClassConstructorContextStorageSize(factory.Target, target)))); + encoder.EmitCMP(encoder.TargetRegister.Arg3, 1); + encoder.EmitRETIfEqual(); + + encoder.EmitMOV(encoder.TargetRegister.Arg1, encoder.TargetRegister.Result); + encoder.EmitMOV(encoder.TargetRegister.Arg0, encoder.TargetRegister.Arg2); + encoder.EmitSUB(encoder.TargetRegister.Arg0, NonGCStaticsNode.GetClassConstructorContextStorageSize(factory.Target, target)); + encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnNonGCStaticBase)); + } + } + break; + + case ReadyToRunHelperId.GetThreadStaticBase: + { + MetadataType target = (MetadataType)Target; + encoder.EmitMOV(encoder.TargetRegister.Arg2, factory.TypeThreadStaticIndex(target)); + + // First arg: address of the TypeManager slot that provides the helper with + // information about module index and the type manager instance (which is used + // for initialization on first access). + encoder.EmitLDR(encoder.TargetRegister.Arg0, encoder.TargetRegister.Arg2); + + // Second arg: index of the type in the ThreadStatic section of the modules + encoder.EmitLDR(encoder.TargetRegister.Arg1, encoder.TargetRegister.Arg2, factory.Target.PointerSize); + + if (!factory.PreinitializationManager.HasLazyStaticConstructor(target)) + { + encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.GetThreadStaticBaseForType)); + } + else + { + encoder.EmitMOV(encoder.TargetRegister.Arg2, factory.TypeNonGCStaticsSymbol(target)); + encoder.EmitSUB(encoder.TargetRegister.Arg2, NonGCStaticsNode.GetClassConstructorContextStorageSize(factory.Target, target)); + // TODO: performance optimization - inline the check verifying whether we need to trigger the cctor + encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnThreadStaticBase)); + } + } + break; + + case ReadyToRunHelperId.GetGCStaticBase: + { + MetadataType target = (MetadataType)Target; + + encoder.EmitMOV(encoder.TargetRegister.Result, factory.TypeGCStaticsSymbol(target)); + encoder.EmitLDR(encoder.TargetRegister.Result, encoder.TargetRegister.Result); + encoder.EmitLDR(encoder.TargetRegister.Result, encoder.TargetRegister.Result); + if (!factory.PreinitializationManager.HasLazyStaticConstructor(target)) + { + encoder.EmitRET(); + } + else + { + encoder.EmitINT3(); + // We need to trigger the cctor before returning the base. It is stored at the beginning of the non-GC statics region. + encoder.EmitMOV(encoder.TargetRegister.Arg2, factory.TypeNonGCStaticsSymbol(target)); + encoder.EmitLDR(encoder.TargetRegister.Arg3, encoder.TargetRegister.Arg2, + ((short)(factory.Target.PointerSize - NonGCStaticsNode.GetClassConstructorContextStorageSize(factory.Target, target)))); + encoder.EmitCMP(encoder.TargetRegister.Arg3, 1); + encoder.EmitRETIfEqual(); + + encoder.EmitMOV(encoder.TargetRegister.Arg1, encoder.TargetRegister.Result); + encoder.EmitMOV(encoder.TargetRegister.Arg0, encoder.TargetRegister.Arg2); + encoder.EmitSUB(encoder.TargetRegister.Arg0, NonGCStaticsNode.GetClassConstructorContextStorageSize(factory.Target, target)); + encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnGCStaticBase)); + } + } + break; + + case ReadyToRunHelperId.DelegateCtor: + { + DelegateCreationInfo target = (DelegateCreationInfo)Target; + + if (target.TargetNeedsVTableLookup) + { + Debug.Assert(!target.TargetMethod.CanMethodBeInSealedVTable()); + + encoder.EmitLDR(encoder.TargetRegister.Arg2, encoder.TargetRegister.Arg1); + + int slot = 0; + if (!relocsOnly) + slot = VirtualMethodSlotHelper.GetVirtualMethodSlot(factory, target.TargetMethod, target.TargetMethod.OwningType); + + Debug.Assert(slot != -1); + encoder.EmitLDR(encoder.TargetRegister.Arg2, encoder.TargetRegister.Arg2, + EETypeNode.GetVTableOffset(factory.Target.PointerSize) + (slot * factory.Target.PointerSize)); + } + else + { + ISymbolNode targetMethodNode = target.GetTargetNode(factory); + encoder.EmitMOV(encoder.TargetRegister.Arg2, target.GetTargetNode(factory)); + } + + if (target.Thunk != null) + { + Debug.Assert(target.Constructor.Method.Signature.Length == 3); + encoder.EmitMOV(encoder.TargetRegister.Arg3, target.Thunk); + } + else + { + Debug.Assert(target.Constructor.Method.Signature.Length == 2); + } + + encoder.EmitJMP(target.Constructor); + } + break; + + case ReadyToRunHelperId.ResolveVirtualFunction: + { + throw new NotImplementedException(); + + /* + *** + NOT TESTED!!! + *** + MethodDesc targetMethod = (MethodDesc)Target; + if (targetMethod.OwningType.IsInterface) + { + encoder.EmitMOV(encoder.TargetRegister.Arg1, factory.InterfaceDispatchCell(targetMethod)); + encoder.EmitJMP(factory.ExternSymbol("RhpResolveInterfaceMethod")); + } + else + { + if (relocsOnly) + break; + + encoder.EmitLDR(encoder.TargetRegister.Result, encoder.TargetRegister.Arg0); + + Debug.Assert(!targetMethod.CanMethodBeInSealedVTable()); + + int slot = VirtualMethodSlotHelper.GetVirtualMethodSlot(factory, targetMethod, targetMethod.OwningType); + Debug.Assert(slot != -1); + encoder.EmitLDR(encoder.TargetRegister.Result, encoder.TargetRegister.Result, + ((short)(EETypeNode.GetVTableOffset(factory.Target.PointerSize) + (slot * factory.Target.PointerSize)))); + encoder.EmitRET(); + } + + break; + */ + } + + default: + throw new NotImplementedException(); + } + + + } } } diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/TargetRegisterMap.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/TargetRegisterMap.cs index ad69fdf9451..5a328f4c650 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/TargetRegisterMap.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/TargetRegisterMap.cs @@ -20,6 +20,7 @@ public struct TargetRegisterMap public readonly Register Arg5; public readonly Register Arg6; public readonly Register Arg7; + public readonly Register IntraProcedureCallScratch1; public readonly Register Result; public TargetRegisterMap(TargetOS os) @@ -32,6 +33,7 @@ public TargetRegisterMap(TargetOS os) Arg5 = Register.X5; Arg6 = Register.X6; Arg7 = Register.X7; + IntraProcedureCallScratch1 = Register.X16; Result = Register.X0; } } From 6c4b87b827415f2667d02dd21b59062a6d42b98d Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Tue, 21 Jul 2020 07:47:05 +0200 Subject: [PATCH 02/36] CoreRT ARM64: Add additional relocation support --- .../DependencyAnalysis/ObjectDataBuilder.cs | 1 + .../Compiler/DependencyAnalysis/Relocation.cs | 44 +++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectDataBuilder.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectDataBuilder.cs index ffc8db575bc..7273b37119f 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectDataBuilder.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectDataBuilder.cs @@ -302,6 +302,7 @@ public void EmitReloc(ISymbolNode symbol, RelocType relocType, int delta = 0) case RelocType.IMAGE_REL_BASED_THUMB_MOV32: case RelocType.IMAGE_REL_BASED_ARM64_PAGEBASE_REL21: case RelocType.IMAGE_REL_BASED_ARM64_PAGEOFFSET_12L: + case RelocType.IMAGE_REL_BASED_ARM64_PAGEOFFSET_12A: // Do not vacate space for this kind of relocation, because // the space is embedded in the instruction. break; diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Relocation.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Relocation.cs index fbb9408222c..e0dd43e6d44 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Relocation.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Relocation.cs @@ -257,6 +257,45 @@ private static unsafe void PutArm64Rel12(uint* pCode, int imm12) Debug.Assert(GetArm64Rel12(pCode) == imm12); } + private static unsafe int GetArm64Rel28(uint* pCode) + { + uint branchInstr = *pCode; + + // first shift 6 bits left to set the sign bit, + // then arithmetic shift right by 4 bits + int imm28 = (((int)(branchInstr & 0x03FFFFFF)) << 6) >> 4; + + return imm28; + } + + private static bool FitsInArm64Rel28(long imm28) + { + return (imm28 >= -0x08000000L) && (imm28 < 0x08000000L); + } + + private static unsafe void PutArm64Rel28(uint* pCode, long imm28) + { + // Verify that we got a valid offset + Debug.Assert(FitsInArm64Rel28(imm28)); + + Debug.Assert((imm28 & 0x3) == 0); // the low two bits must be zero + + uint branchInstr = *pCode; + + branchInstr &= 0xFC000000; // keep bits 31-26 + + Debug.Assert((branchInstr & 0x7FFFFFFF) == 0x14000000); // Must be B or BL + + // Assemble the pc-relative delta 'imm28' into the branch instruction + branchInstr |= (uint)(((imm28 >> 2) & 0x03FFFFFFU)); + + *pCode = branchInstr; // write the assembled instruction + + Debug.Assert(GetArm64Rel28(pCode) == imm28); + } + + + public Relocation(RelocType relocType, int offset, ISymbolNode target) { RelocType = relocType; @@ -286,6 +325,9 @@ public static unsafe void WriteValue(RelocType relocType, void* location, long v case RelocType.IMAGE_REL_BASED_THUMB_BRANCH24: PutThumb2BlRel24((ushort*)location, (uint)value); break; + case RelocType.IMAGE_REL_BASED_ARM64_BRANCH26: + PutArm64Rel28((uint*)location, value); + break; case RelocType.IMAGE_REL_BASED_ARM64_PAGEBASE_REL21: PutArm64Rel21((uint*)location, (int)value); break; @@ -318,6 +360,8 @@ public static unsafe long ReadValue(RelocType relocType, void* location) return (long)GetThumb2Mov32((ushort*)location); case RelocType.IMAGE_REL_BASED_THUMB_BRANCH24: return (long)GetThumb2BlRel24((ushort*)location); + case RelocType.IMAGE_REL_BASED_ARM64_BRANCH26: + return (long)GetArm64Rel28((uint*)location); case RelocType.IMAGE_REL_BASED_ARM64_PAGEBASE_REL21: return GetArm64Rel21((uint*)location); case RelocType.IMAGE_REL_BASED_ARM64_PAGEOFFSET_12A: From 404f4ed23214c69f6cfad22f26db187e23b13da6 Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Tue, 21 Jul 2020 08:17:32 +0200 Subject: [PATCH 03/36] CoreRT ARM64: Use alternative functions for write Barriers On ARM the Jit uses a special register interface for to call the write barrier functions. But as the regular once still used in other places we need two different implemenations. The helper nodes need to call the custom variants --- src/ILCompiler.Compiler/src/Compiler/JitHelper.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ILCompiler.Compiler/src/Compiler/JitHelper.cs b/src/ILCompiler.Compiler/src/Compiler/JitHelper.cs index 61a20149364..81a609543fd 100644 --- a/src/ILCompiler.Compiler/src/Compiler/JitHelper.cs +++ b/src/ILCompiler.Compiler/src/Compiler/JitHelper.cs @@ -62,13 +62,13 @@ public static void GetEntryPoint(TypeSystemContext context, ReadyToRunHelper id, break; case ReadyToRunHelper.WriteBarrier: - mangledName = "RhpAssignRef"; + mangledName = context.Target.Architecture == TargetArchitecture.ARM64 ? "RhpAssignRefArm64" : "RhpAssignRef"; break; case ReadyToRunHelper.CheckedWriteBarrier: - mangledName = "RhpCheckedAssignRef"; + mangledName = context.Target.Architecture == TargetArchitecture.ARM64 ? "RhpCheckedAssignRefArm64" : "RhpCheckedAssignRef"; break; case ReadyToRunHelper.ByRefWriteBarrier: - mangledName = "RhpByRefAssignRef"; + mangledName = context.Target.Architecture == TargetArchitecture.ARM64 ? "RhpByRefAssignRefArm64" : "RhpByRefAssignRef"; break; case ReadyToRunHelper.WriteBarrier_EAX: mangledName = "RhpAssignRefEAX"; From 9d4c4cbbf82f9740bb057e7d448b8ff1a7b0603c Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Mon, 3 Aug 2020 07:28:02 +0200 Subject: [PATCH 04/36] ARM64 code generation --- .../DependencyAnalysis/ObjectWriter.cs | 27 ++++++++++------ .../Target_ARM64/ARM64Emitter.cs | 14 +++++++- .../ARM64ReadyToRunGenericHelperNode.cs | 27 ++++++++++------ .../Target_ARM64/ARM64ReadyToRunHelperNode.cs | 32 +++++++++---------- .../Compiler/VectorOfTFieldLayoutAlgorithm.cs | 11 +++++++ .../src/JitInterface/CorInfoImpl.RyuJit.cs | 2 +- 6 files changed, 76 insertions(+), 37 deletions(-) diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectWriter.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectWriter.cs index f38a31ccf4a..21981bcfe61 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectWriter.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectWriter.cs @@ -857,6 +857,8 @@ public void EmitSymbolDefinition(int currentOffset) { foreach (var name in nodes) { + + _sb.Clear(); AppendExternCPrefix(_sb); name.AppendMangledName(_nodeFactory.NameMangler, _sb); @@ -1043,7 +1045,7 @@ public static void EmitObject(string objectFilePath, IEnumerable // The DWARF CFI unwind is implemented for AMD64 & ARM32 only. TargetArchitecture tarch = factory.Target.Architecture; if (!factory.Target.IsWindows && - (tarch == TargetArchitecture.X64 || tarch == TargetArchitecture.ARM)) + (tarch == TargetArchitecture.X64 || tarch == TargetArchitecture.ARM || tarch == TargetArchitecture.ARM64)) objectWriter.BuildCFIMap(factory, node); // Build debug location map @@ -1089,17 +1091,24 @@ public static void EmitObject(string objectFilePath, IEnumerable } int size = objectWriter.EmitSymbolReference(reloc.Target, (int)delta, reloc.RelocType); - // Emit a copy of original Thumb2 instruction that came from RyuJIT - if (reloc.RelocType == RelocType.IMAGE_REL_BASED_THUMB_MOV32 || - reloc.RelocType == RelocType.IMAGE_REL_BASED_THUMB_BRANCH24) + // Emit a copy of original Thumb2/ARM64 instruction that came from RyuJIT + + switch (reloc.RelocType) { - unsafe - { - fixed (void* location = &nodeContents.Data[i]) + case RelocType.IMAGE_REL_BASED_THUMB_MOV32: + case RelocType.IMAGE_REL_BASED_THUMB_BRANCH24: + case RelocType.IMAGE_REL_BASED_ARM64_BRANCH26: + case RelocType.IMAGE_REL_BASED_ARM64_PAGEBASE_REL21: + case RelocType.IMAGE_REL_BASED_ARM64_PAGEOFFSET_12A: + case RelocType.IMAGE_REL_BASED_ARM64_PAGEOFFSET_12L: + unsafe { - objectWriter.EmitBytes((IntPtr)location, size); + fixed (void* location = &nodeContents.Data[i]) + { + objectWriter.EmitBytes((IntPtr)location, size); + } } - } + break; } // Update nextRelocIndex/Offset diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64Emitter.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64Emitter.cs index d0c726bf0ce..e7b8ea0e211 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64Emitter.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64Emitter.cs @@ -121,7 +121,9 @@ public void EmitSUB(Register reg, int immediate) { if (immediate >= 0) { - Builder.EmitUInt((uint)(0b1_1_0_100010_0_000000000000_00000_00000u | immediate << 12) | ((uint)reg << 5) | (uint)reg); + Debug.Assert(immediate % 4 == 0); + + Builder.EmitUInt((uint)(0b1_1_0_100010_0_000000000000_00000_00000u | immediate << 10) | ((uint)reg << 5) | (uint)reg); } else { @@ -164,6 +166,16 @@ public void EmitINT3() Builder.EmitUInt(0b11010100_001_1111111111111111_000_0_0); } + public void EmitINT3(uint id) + { + Builder.EmitUInt(0b11010100_001_1111111111111111_000_0_0); + Builder.EmitUInt((uint)(0b0_00101_00000000000000000000000000u | ((uint)4))); + Builder.EmitUInt(0xdeadc0de); + Builder.EmitUInt(id); + Builder.EmitUInt(0xdeadc0de); + } + + public void EmitJmpToAddrMode(ref AddrMode addrMode) { throw new NotImplementedException(); diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs index 427c8d1f64b..35b5fb64eb6 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs @@ -18,7 +18,8 @@ protected Register GetContextRegister(ref /* readonly */ ARM64Emitter encoder) if (_id == ReadyToRunHelperId.DelegateCtor) return encoder.TargetRegister.Arg2; else - return encoder.TargetRegister.Arg0; } + return encoder.TargetRegister.Arg0; + } protected void EmitDictionaryLookup(NodeFactory factory, ref ARM64Emitter encoder, Register context, Register result, GenericLookupResult lookup, bool relocsOnly) { @@ -43,6 +44,10 @@ protected void EmitDictionaryLookup(NodeFactory factory, ref ARM64Emitter encode break; case GenericLookupResultReferenceType.ConditionalIndirect: + // Test result, 0x1 + // JEQ L1 + // mov result, [result-1] + // L1: throw new NotImplementedException(); default: @@ -73,13 +78,15 @@ protected sealed override void EmitCode(NodeFactory factory, ref ARM64Emitter en else { // We need to trigger the cctor before returning the base. It is stored at the beginning of the non-GC statics region. - int cctorContextSize = NonGCStaticsNode.GetClassConstructorContextStorageSize(factory.Target, target); - encoder.EmitLDR(encoder.TargetRegister.Arg1, encoder.TargetRegister.Arg0, ((short)(factory.Target.PointerSize - cctorContextSize))); - encoder.EmitCMP(encoder.TargetRegister.Arg1, 1); + encoder.EmitMOV(encoder.TargetRegister.Arg3, encoder.TargetRegister.Arg0); + //encoder.EmitSUB(encoder.TargetRegister.Arg3, NonGCStaticsNode.GetClassConstructorContextStorageSize(factory.Target, target)); + encoder.EmitLDR(encoder.TargetRegister.Arg2, encoder.TargetRegister.Arg3, (short)factory.Target.PointerSize); + encoder.EmitCMP(encoder.TargetRegister.Arg2, 1); encoder.EmitRETIfEqual(); encoder.EmitMOV(encoder.TargetRegister.Arg1, encoder.TargetRegister.Result); - encoder.EmitSUB(encoder.TargetRegister.Arg0, ((byte)(cctorContextSize))); + encoder.EmitMOV(encoder.TargetRegister.Arg0, encoder.TargetRegister.Arg3); + encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnNonGCStaticBase)); } } @@ -105,14 +112,14 @@ protected sealed override void EmitCode(NodeFactory factory, ref ARM64Emitter en GenericLookupResult nonGcRegionLookup = factory.GenericLookup.TypeNonGCStaticBase(target); EmitDictionaryLookup(factory, ref encoder, encoder.TargetRegister.Arg1, encoder.TargetRegister.Arg2, nonGcRegionLookup, relocsOnly); - int cctorContextSize = NonGCStaticsNode.GetClassConstructorContextStorageSize(factory.Target, target); - encoder.EmitLDR(encoder.TargetRegister.Arg3, encoder.TargetRegister.Arg2, ((short)(factory.Target.PointerSize - cctorContextSize))); + encoder.EmitSUB(encoder.TargetRegister.Arg2, NonGCStaticsNode.GetClassConstructorContextStorageSize(factory.Target, target)); + encoder.EmitLDR(encoder.TargetRegister.Arg3, encoder.TargetRegister.Arg2, (short)factory.Target.PointerSize); encoder.EmitCMP(encoder.TargetRegister.Arg3, 1); encoder.EmitRETIfEqual(); encoder.EmitMOV(encoder.TargetRegister.Arg1, encoder.TargetRegister.Result); encoder.EmitMOV(encoder.TargetRegister.Arg0, encoder.TargetRegister.Arg2); - encoder.EmitSUB(encoder.TargetRegister.Arg0, cctorContextSize); + encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnGCStaticBase)); } } @@ -199,7 +206,9 @@ protected sealed override void EmitCode(NodeFactory factory, ref ARM64Emitter en break; default: - throw new NotImplementedException(); + encoder.EmitINT3(); + Console.WriteLine("Misiing R2R for {0}", Id.ToString()); + break; } } diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs index 2bfaf8cd40a..ef61ad26435 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs @@ -44,6 +44,7 @@ protected override void EmitCode(NodeFactory factory, ref ARM64Emitter encoder, case ReadyToRunHelperId.GetNonGCStaticBase: { MetadataType target = (MetadataType)Target; + bool hasLazyStaticConstructor = factory.PreinitializationManager.HasLazyStaticConstructor(target); encoder.EmitMOV(encoder.TargetRegister.Result, factory.TypeNonGCStaticsSymbol(target)); @@ -53,17 +54,16 @@ protected override void EmitCode(NodeFactory factory, ref ARM64Emitter encoder, } else { - encoder.EmitINT3(); // We need to trigger the cctor before returning the base. It is stored at the beginning of the non-GC statics region. - encoder.EmitMOV(encoder.TargetRegister.Arg2, factory.TypeNonGCStaticsSymbol(target)); - encoder.EmitLDR(encoder.TargetRegister.Arg3, encoder.TargetRegister.Arg2, - ((short)(factory.Target.PointerSize - NonGCStaticsNode.GetClassConstructorContextStorageSize(factory.Target, target)))); - encoder.EmitCMP(encoder.TargetRegister.Arg3, 1); + encoder.EmitMOV(encoder.TargetRegister.Arg3, encoder.TargetRegister.Result); + encoder.EmitSUB(encoder.TargetRegister.Arg3, NonGCStaticsNode.GetClassConstructorContextStorageSize(factory.Target, target)); + encoder.EmitLDR(encoder.TargetRegister.Arg2, encoder.TargetRegister.Arg3, (short)factory.Target.PointerSize); + encoder.EmitCMP(encoder.TargetRegister.Arg2, 1); encoder.EmitRETIfEqual(); encoder.EmitMOV(encoder.TargetRegister.Arg1, encoder.TargetRegister.Result); - encoder.EmitMOV(encoder.TargetRegister.Arg0, encoder.TargetRegister.Arg2); - encoder.EmitSUB(encoder.TargetRegister.Arg0, NonGCStaticsNode.GetClassConstructorContextStorageSize(factory.Target, target)); + encoder.EmitMOV(encoder.TargetRegister.Arg0, encoder.TargetRegister.Arg3); + encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnNonGCStaticBase)); } } @@ -103,23 +103,23 @@ protected override void EmitCode(NodeFactory factory, ref ARM64Emitter encoder, encoder.EmitMOV(encoder.TargetRegister.Result, factory.TypeGCStaticsSymbol(target)); encoder.EmitLDR(encoder.TargetRegister.Result, encoder.TargetRegister.Result); encoder.EmitLDR(encoder.TargetRegister.Result, encoder.TargetRegister.Result); + if (!factory.PreinitializationManager.HasLazyStaticConstructor(target)) { encoder.EmitRET(); } else { - encoder.EmitINT3(); // We need to trigger the cctor before returning the base. It is stored at the beginning of the non-GC statics region. encoder.EmitMOV(encoder.TargetRegister.Arg2, factory.TypeNonGCStaticsSymbol(target)); - encoder.EmitLDR(encoder.TargetRegister.Arg3, encoder.TargetRegister.Arg2, - ((short)(factory.Target.PointerSize - NonGCStaticsNode.GetClassConstructorContextStorageSize(factory.Target, target)))); + encoder.EmitSUB(encoder.TargetRegister.Arg2, NonGCStaticsNode.GetClassConstructorContextStorageSize(factory.Target, target)); + encoder.EmitLDR(encoder.TargetRegister.Arg3, encoder.TargetRegister.Arg2, (short)factory.Target.PointerSize); encoder.EmitCMP(encoder.TargetRegister.Arg3, 1); encoder.EmitRETIfEqual(); encoder.EmitMOV(encoder.TargetRegister.Arg1, encoder.TargetRegister.Result); encoder.EmitMOV(encoder.TargetRegister.Arg0, encoder.TargetRegister.Arg2); - encoder.EmitSUB(encoder.TargetRegister.Arg0, NonGCStaticsNode.GetClassConstructorContextStorageSize(factory.Target, target)); + encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnGCStaticBase)); } } @@ -165,8 +165,9 @@ protected override void EmitCode(NodeFactory factory, ref ARM64Emitter encoder, case ReadyToRunHelperId.ResolveVirtualFunction: { - throw new NotImplementedException(); + encoder.EmitINT3(); + //ARMDebug.EmitHelperNYIAssert(factory, ref encoder, ReadyToRunHelperId.ResolveVirtualFunction); /* *** NOT TESTED!!! @@ -192,17 +193,14 @@ NOT TESTED!!! ((short)(EETypeNode.GetVTableOffset(factory.Target.PointerSize) + (slot * factory.Target.PointerSize)))); encoder.EmitRET(); } - - break; */ } + break; + default: throw new NotImplementedException(); } - - - } } } diff --git a/src/ILCompiler.Compiler/src/Compiler/VectorOfTFieldLayoutAlgorithm.cs b/src/ILCompiler.Compiler/src/Compiler/VectorOfTFieldLayoutAlgorithm.cs index 56cc5f497ae..ccd065dfaca 100644 --- a/src/ILCompiler.Compiler/src/Compiler/VectorOfTFieldLayoutAlgorithm.cs +++ b/src/ILCompiler.Compiler/src/Compiler/VectorOfTFieldLayoutAlgorithm.cs @@ -64,6 +64,17 @@ public override bool ComputeContainsGCPointers(DefType type) public override ValueTypeShapeCharacteristics ComputeValueTypeShapeCharacteristics(DefType type) { + if (type.Context.Target.Architecture == TargetArchitecture.ARM64 && + type.Instantiation[0].IsPrimitiveNumeric) + { + return type.InstanceFieldSize.AsInt switch + { + 8 => ValueTypeShapeCharacteristics.Vector64Aggregate, + 16 => ValueTypeShapeCharacteristics.Vector128Aggregate, + _ => ValueTypeShapeCharacteristics.None + }; + } + return _fallbackAlgorithm.ComputeValueTypeShapeCharacteristics(type); } diff --git a/src/ILCompiler.RyuJit/src/JitInterface/CorInfoImpl.RyuJit.cs b/src/ILCompiler.RyuJit/src/JitInterface/CorInfoImpl.RyuJit.cs index c1cac0a4fd5..a2aca7a75f2 100644 --- a/src/ILCompiler.RyuJit/src/JitInterface/CorInfoImpl.RyuJit.cs +++ b/src/ILCompiler.RyuJit/src/JitInterface/CorInfoImpl.RyuJit.cs @@ -671,7 +671,7 @@ private ObjectNode.ObjectData EncodeEHInfo() RelocType rel = (_compilation.NodeFactory.Target.IsWindows) ? RelocType.IMAGE_REL_BASED_ABSOLUTE : - RelocType.IMAGE_REL_BASED_REL32; + RelocType.IMAGE_REL_BASED_RELPTR32; if (_compilation.NodeFactory.Target.Abi == TargetAbi.Jit) rel = RelocType.IMAGE_REL_BASED_REL32; From 20021cb2fcef19fec0cd044276af091acc303092 Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Mon, 3 Aug 2020 07:29:02 +0200 Subject: [PATCH 05/36] unwind use only the dwarf index option --- src/Native/libunwind/src/AddressSpace.hpp | 40 +++++++++++++++++++---- src/Native/libunwind/src/UnwindCursor.hpp | 11 +++++++ 2 files changed, 44 insertions(+), 7 deletions(-) diff --git a/src/Native/libunwind/src/AddressSpace.hpp b/src/Native/libunwind/src/AddressSpace.hpp index fb07c807db9..389be0a5066 100644 --- a/src/Native/libunwind/src/AddressSpace.hpp +++ b/src/Native/libunwind/src/AddressSpace.hpp @@ -117,12 +117,23 @@ namespace libunwind { // __eh_frame_hdr_start = SIZEOF(.eh_frame_hdr) > 0 ? ADDR(.eh_frame_hdr) : 0; // __eh_frame_hdr_end = SIZEOF(.eh_frame_hdr) > 0 ? . : 0; +#ifndef _LIBUNWIND_USE_ONLY_DWARF_INDEX extern char __eh_frame_start; extern char __eh_frame_end; +#endif #if defined(_LIBUNWIND_SUPPORT_DWARF_INDEX) -extern char __eh_frame_hdr_start; -extern char __eh_frame_hdr_end; + +#ifndef _LIBUNWIND_BAREMETAL_DWARF_INDEX_SEC_START +#define _LIBUNWIND_BAREMETAL_DWARF_INDEX_SEC_START __eh_frame_hdr_start +#endif + +#ifndef _LIBUNWIND_BAREMETAL_DWARF_INDEX_SEC_END +#define _LIBUNWIND_BAREMETAL_DWARF_INDEX_SEC_END __eh_frame_hdr_end +#endif + +extern char _LIBUNWIND_BAREMETAL_DWARF_INDEX_SEC_START; +extern char _LIBUNWIND_BAREMETAL_DWARF_INDEX_SEC_END; #endif #elif defined(_LIBUNWIND_ARM_EHABI) && defined(_LIBUNWIND_IS_BAREMETAL) @@ -161,9 +172,11 @@ struct UnwindInfoSections { uintptr_t dso_base; #endif #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) +#ifndef _LIBUNWIND_USE_ONLY_DWARF_INDEX uintptr_t dwarf_section; uintptr_t dwarf_section_length; #endif +#endif #if defined(_LIBUNWIND_SUPPORT_DWARF_INDEX) uintptr_t dwarf_index_section; uintptr_t dwarf_index_section_length; @@ -401,18 +414,31 @@ inline bool LocalAddressSpace::findUnwindSections(pint_t targetAddr, } #elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_LIBUNWIND_IS_BAREMETAL) // Bare metal is statically linked, so no need to ask the dynamic loader +#ifndef _LIBUNWIND_USE_ONLY_DWARF_INDEX info.dwarf_section_length = (uintptr_t)(&__eh_frame_end - &__eh_frame_start); - info.dwarf_section = (uintptr_t)(&__eh_frame_start); + info.dwarf_section = (uintptr_t)(&__eh_frame_start); _LIBUNWIND_TRACE_UNWINDING("findUnwindSections: section %p length %p", - (void *)info.dwarf_section, (void *)info.dwarf_section_length); + (void *)info.dwarf_section, + (void *)info.dwarf_section_length); +#endif #if defined(_LIBUNWIND_SUPPORT_DWARF_INDEX) - info.dwarf_index_section = (uintptr_t)(&__eh_frame_hdr_start); - info.dwarf_index_section_length = (uintptr_t)(&__eh_frame_hdr_end - &__eh_frame_hdr_start); + info.dwarf_index_section = + (uintptr_t)(&(_LIBUNWIND_BAREMETAL_DWARF_INDEX_SEC_START)); + info.dwarf_index_section_length = + (uintptr_t)(&(_LIBUNWIND_BAREMETAL_DWARF_INDEX_SEC_END) - + &(_LIBUNWIND_BAREMETAL_DWARF_INDEX_SEC_START)); _LIBUNWIND_TRACE_UNWINDING("findUnwindSections: index section %p length %p", - (void *)info.dwarf_index_section, (void *)info.dwarf_index_section_length); + (void *)info.dwarf_index_section, + (void *)info.dwarf_index_section_length); #endif + +#ifndef _LIBUNWIND_USE_ONLY_DWARF_INDEX if (info.dwarf_section_length) return true; +#else + if (info.dwarf_index_section_length > 0) + return true; +#endif #elif defined(_LIBUNWIND_ARM_EHABI) && defined(_LIBUNWIND_IS_BAREMETAL) // Bare metal is statically linked, so no need to ask the dynamic loader info.arm_section = (uintptr_t)(&__exidx_start); diff --git a/src/Native/libunwind/src/UnwindCursor.hpp b/src/Native/libunwind/src/UnwindCursor.hpp index ae5cbe7479e..29c3dc7733a 100644 --- a/src/Native/libunwind/src/UnwindCursor.hpp +++ b/src/Native/libunwind/src/UnwindCursor.hpp @@ -1474,6 +1474,8 @@ bool UnwindCursor::getInfoFromDwarfSection(pint_t pc, typename CFI_Parser::CIE_Info cieInfo; bool foundFDE = false; bool foundInCache = false; + +#ifndef _LIBUNWIND_USE_ONLY_DWARF_INDEX // If compact encoding table gave offset into dwarf section, go directly there if (fdeSectionOffsetHint != 0) { foundFDE = CFI_Parser::findFDE(_addressSpace, pc, sects.dwarf_section, @@ -1481,6 +1483,8 @@ bool UnwindCursor::getInfoFromDwarfSection(pint_t pc, sects.dwarf_section + fdeSectionOffsetHint, &fdeInfo, &cieInfo); } +#endif + #if defined(_LIBUNWIND_SUPPORT_DWARF_INDEX) if (!foundFDE && (sects.dwarf_index_section != 0)) { foundFDE = EHHeaderParser::findFDE( @@ -1488,6 +1492,8 @@ bool UnwindCursor::getInfoFromDwarfSection(pint_t pc, (uint32_t)sects.dwarf_index_section_length, &fdeInfo, &cieInfo); } #endif + +#ifndef _LIBUNWIND_USE_ONLY_DWARF_INDEX if (!foundFDE) { // otherwise, search cache of previously found FDEs. pint_t cachedFDE = DwarfFDECache::findFDE(sects.dso_base, pc); @@ -1505,6 +1511,7 @@ bool UnwindCursor::getInfoFromDwarfSection(pint_t pc, (uint32_t)sects.dwarf_section_length, 0, &fdeInfo, &cieInfo); } +#endif if (foundFDE) { typename CFI_Parser::PrologInfo prolog; if (CFI_Parser::parseFDEInstructions(_addressSpace, fdeInfo, cieInfo, pc, @@ -1896,7 +1903,11 @@ void UnwindCursor::setInfoBasedOnIPRegister(bool isReturnAddress) { #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) // If there is dwarf unwind info, look there next. +#if defined(_LIBUNWIND_USE_ONLY_DWARF_INDEX) + if (sects.dwarf_index_section != 0) { +#else if (sects.dwarf_section != 0) { +#endif if (this->getInfoFromDwarfSection(pc, sects)) { // found info in dwarf, done return; From 39c6ceb72d3764e48504ac26ec926e7b648c4752 Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Mon, 3 Aug 2020 07:30:08 +0200 Subject: [PATCH 06/36] ARM64 runtime assembler functions --- src/Native/Runtime/arm64/AllocFast.S | 291 +++++++++++++- src/Native/Runtime/arm64/PInvoke.S | 360 ++++++++++++++++- src/Native/Runtime/arm64/StubDispatch.S | 163 +++++++- .../Runtime/arm64/UniversalTransition.S | 157 +++++++- src/Native/Runtime/arm64/WriteBarriers.S | 373 +++++++++++++++++- .../Runtime/unix/unixasmmacrosarm64.inc | 67 ++++ 6 files changed, 1388 insertions(+), 23 deletions(-) diff --git a/src/Native/Runtime/arm64/AllocFast.S b/src/Native/Runtime/arm64/AllocFast.S index 876f2dfbcb8..07e68455950 100644 --- a/src/Native/Runtime/arm64/AllocFast.S +++ b/src/Native/Runtime/arm64/AllocFast.S @@ -1,4 +1,293 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// TODO: Implement +#include +#include "AsmOffsets.inc" + +// GC type flags +GC_ALLOC_FINALIZE = 1 +GC_ALLOC_ALIGN8_BIAS = 4 +GC_ALLOC_ALIGN8 = 8 + +// +// Rename fields of nested structs +// +OFFSETOF__Thread__m_alloc_context__alloc_ptr = OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr +OFFSETOF__Thread__m_alloc_context__alloc_limit = OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit + + + +// Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's +// allocation context then automatically fallback to the slow allocation path. +// x0 == EEType + LEAF_ENTRY RhpNewFast, _TEXT + + // x1 = GetThread() + INLINE_GETTHREAD x1 + + // + // x0 contains EEType pointer + // + ldr w2, [x0, #OFFSETOF__EEType__m_uBaseSize] + + // + // x0: EEType pointer + // x1: Thread pointer + // x2: base size + // + + // Load potential new object address into x12. + ldr x12, [x1, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + // Determine whether the end of the object would lie outside of the current allocation context. If so, + // we abandon the attempt to allocate the object directly and fall back to the slow helper. + add x2, x2, x12 + ldr x13, [x1, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp x2, x13 + bhi RhpNewFast_RarePath + + // Update the alloc pointer to account for the allocation. + str x2, [x1, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + // Set the new objects EEType pointer + str x0, [x12, #OFFSETOF__Object__m_pEEType] + + mov x0, x12 + ret + +RhpNewFast_RarePath: + mov x1, #0 + b RhpNewObject + LEAF_END RhpNewFast, _TEXT + +// Allocate non-array object with finalizer. +// x0 == EEType + LEAF_ENTRY RhpNewFinalizable, _TEXT + mov x1, #GC_ALLOC_FINALIZE + b RhpNewObject + LEAF_END RhpNewFinalizable, _TEXT + +// Allocate non-array object. +// x0 == EEType +// x1 == alloc flags + NESTED_ENTRY RhpNewObject, _TEXT, NoHandler + + PUSH_COOP_PINVOKE_FRAME x3 + + // x3: transition frame + + // Preserve the EEType in x19 + mov x19, x0 + + ldr w2, [x0, #OFFSETOF__EEType__m_uBaseSize] + + // Call the rest of the allocation helper. + // void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame) + bl RhpGcAlloc + + // Set the new objects EEType pointer on success. + cbz x0, NewOutOfMemory + str x19, [x0, #OFFSETOF__Object__m_pEEType] + + // If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC + ldr w1, [x19, #OFFSETOF__EEType__m_uBaseSize] + movk x2, #(RH_LARGE_OBJECT_SIZE & 0xFFFF) + movk x2, #(RH_LARGE_OBJECT_SIZE >> 16), lsl #16 + cmp x1, x2 + blo New_SkipPublish + + // x0: object + // x1: already contains object size + bl RhpPublishObject // x0: this function returns the object that was passed-in + +New_SkipPublish: + + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + +NewOutOfMemory: + // This is the OOM failure path. We are going to tail-call to a managed helper that will throw + // an out of memory exception that the caller of this allocator understands. + + mov x0, x19 // EEType pointer + mov x1, 0 // Indicate that we should throw OOM. + + POP_COOP_PINVOKE_FRAME + b RhExceptionHandling_FailedAllocation + + NESTED_END RhpNewObject, _TEXT + +// Allocate a string. +// x0 == EEType +// x1 == element/character count + LEAF_ENTRY RhNewString, _TEXT + // Make sure computing the overall allocation size wont overflow + // TODO: this should be actually MAX_STRING_LENGTH + mov x2, 0x7FFFFFFF + cmp x1, x2 + bhi StringSizeOverflow + + // Compute overall allocation size (align(base size + (element size * elements), 8)). + mov w2, #STRING_COMPONENT_SIZE + mov x3, #(STRING_BASE_SIZE + 7) + umaddl x2, w1, w2, x3 // x2 = w1 * w2 + x3 + and x2, x2, #-8 + + // x0 == EEType + // x1 == element count + // x2 == string size + + INLINE_GETTHREAD x3 + + // Load potential new object address into x12. + ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + // Determine whether the end of the object would lie outside of the current allocation context. If so, + // we abandon the attempt to allocate the object directly and fall back to the slow helper. + add x2, x2, x12 + ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp x2, x12 + bhi RhpNewArrayRare + + // Reload new object address into r12. + ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + // Update the alloc pointer to account for the allocation. + str x2, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + // Set the new objects EEType pointer and element count. + str x0, [x12, #OFFSETOF__Object__m_pEEType] + str x1, [x12, #OFFSETOF__Array__m_Length] + + // Return the object allocated in x0. + mov x0, x12 + + ret + +StringSizeOverflow: + // We get here if the length of the final string object can not be represented as an unsigned + // 32-bit value. We are going to tail-call to a managed helper that will throw + // an OOM exception that the caller of this allocator understands. + + // x0 holds EEType pointer already + mov x1, #1 // Indicate that we should throw OverflowException + b RhExceptionHandling_FailedAllocation + LEAF_END RhNewString, _Text + +// Allocate one dimensional, zero based array (SZARRAY). +// x0 == EEType +// x1 == element count + LEAF_ENTRY RhpNewArray, _Text + + // We want to limit the element count to the non-negative 32-bit int range. + // If the element count is <= 0x7FFFFFFF, no overflow is possible because the component + // size is <= 0xffff (it is an unsigned 16-bit value), and the base size for the worst + // case (32 dimensional MdArray) is less than 0xffff, and thus the product fits in 64 bits. + mov x2, #0x7FFFFFFF + cmp x1, x2 + bhi ArraySizeOverflow + + ldrh w2, [x0, #OFFSETOF__EEType__m_usComponentSize] + umull x2, w1, w2 + ldr w3, [x0, #OFFSETOF__EEType__m_uBaseSize] + add x2, x2, x3 + add x2, x2, #7 + and x2, x2, #-8 + + // x0 == EEType + // x1 == element count + // x2 == array size + + INLINE_GETTHREAD x3 + + // Load potential new object address into x12. + ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + // Determine whether the end of the object would lie outside of the current allocation context. If so, + // we abandon the attempt to allocate the object directly and fall back to the slow helper. + add x2, x2, x12 + ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp x2, x12 + bhi RhpNewArrayRare + + // Reload new object address into x12. + ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + // Update the alloc pointer to account for the allocation. + str x2, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + // Set the new objects EEType pointer and element count. + str x0, [x12, #OFFSETOF__Object__m_pEEType] + str x1, [x12, #OFFSETOF__Array__m_Length] + + // Return the object allocated in r0. + mov x0, x12 + + ret + +ArraySizeOverflow: + // We get here if the size of the final array object can not be represented as an unsigned + // 32-bit value. We are going to tail-call to a managed helper that will throw + // an overflow exception that the caller of this allocator understands. + + // x0 holds EEType pointer already + mov x1, #1 // Indicate that we should throw OverflowException + b RhExceptionHandling_FailedAllocation + LEAF_END RhpNewArray, _TEXT + +// Allocate one dimensional, zero based array (SZARRAY) using the slow path that calls a runtime helper. +// x0 == EEType +// x1 == element count +// x2 == array size + Thread::m_alloc_context::alloc_ptr +// x3 == Thread + NESTED_ENTRY RhpNewArrayRare, _TEXT, NoHandler + + // Recover array size by subtracting the alloc_ptr from x2. + ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + sub x2, x2, x12 + + PUSH_COOP_PINVOKE_FRAME x3 + + // Preserve data we will need later into the callee saved registers + mov x19, x0 // Preserve EEType + mov x20, x1 // Preserve element count + mov x21, x2 // Preserve array size + + mov x1, #0 + + // void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame) + bl RhpGcAlloc + + // Set the new objects EEType pointer and length on success. + cbz x0, ArrayOutOfMemory + + // Success, set the array type and element count in the new object. + str x19, [x0, #OFFSETOF__Object__m_pEEType] + str x20, [x0, #OFFSETOF__Array__m_Length] + + // If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC + movk x2, #(RH_LARGE_OBJECT_SIZE & 0xFFFF) + movk x2, #(RH_LARGE_OBJECT_SIZE >> 16), lsl #16 + cmp x21, x2 + blo NewArray_SkipPublish + + // x0 = newly allocated array. x1 = size + mov x1, x21 + bl RhpPublishObject + +NewArray_SkipPublish: + + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + +ArrayOutOfMemory: + // This is the OOM failure path. We are going to tail-call to a managed helper that will throw + // an out of memory exception that the caller of this allocator understands. + + mov x0, x19 // EEType Pointer + mov x1, 0 // Indicate that we should throw OOM. + + POP_COOP_PINVOKE_FRAME + b RhExceptionHandling_FailedAllocation + + NESTED_END RhpNewArrayRare, _TEXT diff --git a/src/Native/Runtime/arm64/PInvoke.S b/src/Native/Runtime/arm64/PInvoke.S index 876f2dfbcb8..bd2a8551d68 100644 --- a/src/Native/Runtime/arm64/PInvoke.S +++ b/src/Native/Runtime/arm64/PInvoke.S @@ -1,4 +1,362 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// TODO: Implement + +#include +#include "AsmOffsets.inc" + +.global RhpTrapThreads + +// Note: these must match the defs in PInvokeTransitionFrameFlags defined in rhbinder.h +PTFF_SAVE_X19 = 0x00000001 +PTFF_SAVE_X20 = 0x00000002 +PTFF_SAVE_X21 = 0x00000004 +PTFF_SAVE_X22 = 0x00000008 +PTFF_SAVE_X23 = 0x00000010 +PTFF_SAVE_X24 = 0x00000020 +PTFF_SAVE_X25 = 0x00000040 +PTFF_SAVE_X26 = 0x00000080 +PTFF_SAVE_X27 = 0x00000100 +PTFF_SAVE_X28 = 0x00000200 +PTFF_SAVE_SP = 0x00000400 +PTFF_SAVE_ALL_PRESERVED = 0x000003FF // NOTE: x19-x28 +PTFF_SAVE_X0 = 0x00000800 +PTFF_SAVE_X1 = 0x00001000 +PTFF_SAVE_X2 = 0x00002000 +PTFF_SAVE_X3 = 0x00004000 +PTFF_SAVE_X4 = 0x00008000 +PTFF_SAVE_X5 = 0x00010000 +PTFF_SAVE_X6 = 0x00020000 +PTFF_SAVE_X7 = 0x00040000 +PTFF_SAVE_X8 = 0x00080000 +PTFF_SAVE_X9 = 0x00100000 +PTFF_SAVE_X10 = 0x00200000 +PTFF_SAVE_X11 = 0x00400000 +PTFF_SAVE_X12 = 0x00800000 +PTFF_SAVE_X13 = 0x01000000 +PTFF_SAVE_X14 = 0x02000000 +PTFF_SAVE_X15 = 0x04000000 +PTFF_SAVE_X16 = 0x08000000 +PTFF_SAVE_X17 = 0x10000000 +PTFF_SAVE_X18 = 0x20000000 +PTFF_SAVE_ALL_SCRATCH = 0x3FFFF800 // NOTE: X0-X18 +PTFF_SAVE_FP = 0x40000000 +PTFF_SAVE_LR = 0x80000000 + +// Bit position for the flags above, to be used with tbz / tbnz instructions +PTFF_THREAD_ABORT_BIT = 36 + +// Bit position for the flags above, to be used with tbz/tbnz instructions +TSF_Attached_Bit = 0 +TSF_SuppressGcStress_Bit = 3 +TSF_DoNotTriggerGc_Bit = 4 + +// Bit position for the flags above, to be used with tbz / tbnz instructions +TrapThreadsFlags_AbortInProgress_Bit = 0 +TrapThreadsFlags_TrapThreads_Bit = 1 + +// This must match HwExceptionCode.STATUS_REDHAWK_THREAD_ABORT +STATUS_REDHAWK_THREAD_ABORT = 0x43 + +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// RhpWaitForSuspend -- rare path for RhpPInvoke and RhpReversePInvokeReturn +// +// +// INPUT: none +// +// TRASHES: none +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////// + NESTED_ENTRY RhpWaitForSuspend, _TEXT, NoHandler + + // FP and LR registers + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-0xA0 // Push down stack pointer and store FP and LR + + // Need to save argument registers x0-x7 and the return buffer register x8 + // Also save x9 which may be used for saving indirect call target + stp x0, x1, [sp, #0x10] + stp x2, x3, [sp, #0x20] + stp x4, x5, [sp, #0x30] + stp x6, x7, [sp, #0x40] + stp x8, x9, [sp, #0x50] + + // Save float argument registers as well since they are volatile + stp d0, d1, [sp, #0x60] + stp d2, d3, [sp, #0x70] + stp d4, d5, [sp, #0x80] + stp d6, d7, [sp, #0x90] + + bl RhpWaitForSuspend2 + + // Restore floating point registers + ldp d0, d1, [sp, #0x60] + ldp d2, d3, [sp, #0x70] + ldp d4, d5, [sp, #0x80] + ldp d6, d7, [sp, #0x90] + + // Restore the argument registers + ldp x0, x1, [sp, #0x10] + ldp x2, x3, [sp, #0x20] + ldp x4, x5, [sp, #0x30] + ldp x6, x7, [sp, #0x40] + ldp x8, x9, [sp, #0x50] + + // Restore FP and LR registers, and free the allocated stack block + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #0xA0 + EPILOG_RETURN + + NESTED_END RhpWaitForSuspend, _TEXT + +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// RhpWaitForGCNoAbort +// +// +// INPUT: x9: transition frame +// +// TRASHES: None +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////// + NESTED_ENTRY RhpWaitForGCNoAbort, _TEXT, NoHandler + + // FP and LR registers + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-0x40 // Push down stack pointer and store FP and LR + + // Save the integer return registers, as well as the floating return registers + stp x0, x1, [sp, #0x10] + stp d0, d1, [sp, #0x20] + stp d2, d3, [sp, #0x30] + + ldr x0, [x9, #OFFSETOF__PInvokeTransitionFrame__m_pThread] + ldr w0, [x0, #OFFSETOF__Thread__m_ThreadStateFlags] + tbnz x0, #TSF_DoNotTriggerGc_Bit, Done + + mov x0, x9 // passing transition frame in x0 + bl RhpWaitForGC2 + +Done: + ldp x0, x1, [sp, #0x10] + ldp d0, d1, [sp, #0x20] + ldp d2, d3, [sp, #0x30] + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #0x40 + EPILOG_RETURN + + NESTED_END RhpWaitForGCNoAbort + +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// RhpWaitForGC +// +// +// INPUT: x9: transition frame +// +// TRASHES: x0, x1, x10 +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////// + NESTED_ENTRY RhpWaitForGC, _TEXT, NoHandler + + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-0x10 + + adrp x10, RhpTrapThreads + add x10, x10, :lo12:RhpTrapThreads + ldr w10, [x10] + tbz x10, #TrapThreadsFlags_TrapThreads_Bit, NoWait + bl RhpWaitForGCNoAbort +NoWait: + tbz x10, #TrapThreadsFlags_AbortInProgress_Bit, NoAbort + ldr x10, [x9, #OFFSETOF__PInvokeTransitionFrame__m_Flags] + tbz x10, #PTFF_THREAD_ABORT_BIT, NoAbort + + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #0x10 + mov w0, #STATUS_REDHAWK_THREAD_ABORT + mov x1, lr // hijack target address as exception PC + b RhpThrowHwEx + +NoAbort: + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #0x10 + EPILOG_RETURN + + NESTED_END RhpWaitForGC, _TEXT + +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// RhpReversePInvoke +// +// IN: x9: address of reverse pinvoke frame +// 0: save slot for previous M->U transition frame +// 8: save slot for thread pointer to avoid re-calc in epilog sequence +// +// PRESERVES: x0 - x8 -- need to preserve these because the caller assumes they are not trashed +// +// TRASHES: x10, x11 +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////// + LEAF_ENTRY RhpReversePInvoke, _TEXT + + INLINE_GETTHREAD x10 // x10 = Thread + str x10, [x9, #8] // save Thread pointer for RhpReversePInvokeReturn + + // x9 = reverse pinvoke frame + // x10 = thread + // x11 = scratch + + ldr w11, [x10, #OFFSETOF__Thread__m_ThreadStateFlags] + tbz x11, #TSF_Attached_Bit, AttachThread + +ThreadAttached: + // + // Check for the correct mode. This is accessible via various odd things that we cannot completely + // prevent such as : + // 1) Registering a reverse pinvoke entrypoint as a vectored exception handler + // 2) Performing a managed delegate invoke on a reverse pinvoke delegate. + // + ldr x11, [x10, #OFFSETOF__Thread__m_pTransitionFrame] + cbz x11, CheckBadTransition + + // Save previous TransitionFrame prior to making the mode transition so that it is always valid + // whenever we might attempt to hijack this thread. + str x11, [x9] + + str xzr, [x10, #OFFSETOF__Thread__m_pTransitionFrame] + dmb ish + + adrp x11, RhpTrapThreads + add x11, x11, :lo12:RhpTrapThreads + ldr w11, [x11] + tbnz x11, #TrapThreadsFlags_TrapThreads_Bit, TrapThread + + ret + +CheckBadTransition: + // Allow 'bad transitions' in when the TSF_DoNotTriggerGc mode is set. This allows us to have + // [NativeCallable] methods that are called via the "restricted GC callouts" as well as from native, + // which is necessary because the methods are CCW vtable methods on interfaces passed to native. + ldr w11, [x10, #OFFSETOF__Thread__m_ThreadStateFlags] + tbz x11, #TSF_DoNotTriggerGc_Bit, BadTransition + + // zero-out our 'previous transition frame' save slot + mov x11, #0 + str x11, [x9] + + // nothing more to do + ret + +TrapThread: + // put the previous frame back (sets us back to preemptive mode) + ldr x11, [x9] + str x11, [x10, #OFFSETOF__Thread__m_pTransitionFrame] + dmb ish + +AttachThread: + // passing address of reverse pinvoke frame in x9 + b RhpReversePInvokeAttachOrTrapThread + +BadTransition: + mov x0, lr // arg <- return address + b RhpReversePInvokeBadTransition + + LEAF_END RhpReversePInvoke, _TEXT + +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// RhpReversePInvokeAttachOrTrapThread -- rare path for RhpPInvoke +// +// +// INPUT: x9: address of reverse pinvoke frame +// +// PRESERVES: x0-x8 -- need to preserve these because the caller assumes they are not trashed +// +// TRASHES: none +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////// + NESTED_ENTRY RhpReversePInvokeAttachOrTrapThread, _TEXT, NoHandler + + // FP and LR registers + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-0xA0 // Push down stack pointer and store FP and LR + + // Need to save argument registers x0-x7 and the return buffer register x8 (twice for 16B alignment) + stp x0, x1, [sp, #0x10] + stp x2, x3, [sp, #0x20] + stp x4, x5, [sp, #0x30] + stp x6, x7, [sp, #0x40] + stp x8, x8, [sp, #0x50] + + // Save float argument registers as well since they are volatile + stp d0, d1, [sp, #0x60] + stp d2, d3, [sp, #0x70] + stp d4, d5, [sp, #0x80] + stp d6, d7, [sp, #0x90] + + mov x0, x9 // passing reverse pinvoke frame pointer in x0 + bl RhpReversePInvokeAttachOrTrapThread2 + + // Restore floating point registers + ldp d0, d1, [sp, #0x60] + ldp d2, d3, [sp, #0x70] + ldp d4, d5, [sp, #0x80] + ldp d6, d7, [sp, #0x90] + + // Restore the argument registers + ldp x0, x1, [sp, #0x10] + ldp x2, x3, [sp, #0x20] + ldp x4, x5, [sp, #0x30] + ldp x6, x7, [sp, #0x40] + ldr x8, [sp, #0x50] + + // Restore FP and LR registers, and free the allocated stack block + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #0xA0 + EPILOG_RETURN + + NESTED_END RhpReversePInvokeTrapThread + +// +// RhpPInvoke +// +// IN: X0: address of pinvoke frame +// +// This helper assumes that its callsite is as good to start the stackwalk as the actual PInvoke callsite. +// The codegenerator must treat the callsite of this helper as GC triggering and generate the GC info for it. +// Also, the codegenerator must ensure that there are no live GC references in callee saved registers. +// + +NESTED_ENTRY RhpPInvoke, _TEXT, NoHandler + str fp, [x0, #OFFSETOF__PInvokeTransitionFrame__m_FramePointer] + str lr, [x0, #OFFSETOF__PInvokeTransitionFrame__m_RIP] + mov x9, SP + str x9, [x0, #OFFSETOF__PInvokeTransitionFrame__m_PreservedRegs] + mov x9, #PTFF_SAVE_SP + str x9, [x0, #OFFSETOF__PInvokeTransitionFrame__m_Flags] + + // get TLS global variable address + // r0 = GetThread() + INLINE_GETTHREAD x10 + str x10, [x0, #OFFSETOF__PInvokeTransitionFrame__m_pThread] + str x0, [x10, #OFFSETOF__Thread__m_pTransitionFrame] + + adrp x9, RhpTrapThreads + add x9, x9, :lo12:RhpTrapThreads + ldr w9, [x9] + cbnz w9, InvokeRareTrapThread // TrapThreadsFlags_None = 0 + ret + +InvokeRareTrapThread: + b C_FUNC(RhpWaitForSuspend2) +NESTED_END RhpPInvoke, _TEXT + + +LEAF_ENTRY RhpPInvokeReturn, _TEXT + ldr x9, [x0, #OFFSETOF__PInvokeTransitionFrame__m_pThread] + mov x10, 0 + str x10, [x9, #OFFSETOF__Thread__m_pTransitionFrame] + + adrp x9, RhpTrapThreads + add x9, x9, :lo12:RhpTrapThreads + ldr w9, [x9] + cbnz w9, 0f // TrapThreadsFlags_None = 0 + ret +0: + // passing transition frame pointer in x0 + b RhpWaitForGC +LEAF_END RhpPInvokeReturn, _TEXT + diff --git a/src/Native/Runtime/arm64/StubDispatch.S b/src/Native/Runtime/arm64/StubDispatch.S index 01ed602a761..b3f910c67a1 100644 --- a/src/Native/Runtime/arm64/StubDispatch.S +++ b/src/Native/Runtime/arm64/StubDispatch.S @@ -2,5 +2,166 @@ // The .NET Foundation licenses this file to you under the MIT license. #include +#include "AsmOffsets.inc" -// TODO: Implement Arm64 support +#define __tls_array 0 + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + + .extern RhpCidResolve + .extern RhpUniversalTransition_DebugStepTailCall + + .macro GET_TLS_DISPATCH_CELL + brk 1 +/* + ldr x9, =_tls_index + ldr w9, [x9] + ldr xip1, [xpr, #__tls_array] + ldr xip1, [xip1, x9, lsl #3] + ldr x9, =SECTIONREL_t_TLS_DispatchCell + ldr x9, [x9] + ldr xip1, [xip1, x9] +*/ + .endm + + .macro SET_TLS_DISPATCH_CELL + // xip1 : Value to be assigned to the TLS variable + brk 1 +/* + ldr x9, =_tls_index + ldr w9, [x9] + ldr x10, [xpr, #__tls_array] + ldr x10, [x10, x9, lsl #3] + ldr x9, =SECTIONREL_t_TLS_DispatchCell + ldr x9, [x9] + str xip1, [x10, x9] +*/ + .endm + +/* TODO +SECTIONREL_t_TLS_DispatchCell + DCD t_TLS_DispatchCell + RELOC 8, t_TLS_DispatchCell ;; SECREL + DCD 0 +*/ + + // Macro that generates code to check a single cache entry. + .macro CHECK_CACHE_ENTRY entry + // Check a single entry in the cache. + // x9 : Cache data structure. Also used for target address jump. + // x10 : Instance EEType* + // x11 : x11 still contains the indirection cell address. do not trash + // x12 : Trashed + ldr x12, [x9, #(OFFSETOF__InterfaceDispatchCache__m_rgEntries + (\entry * 16))] + cmp x10, x12 + bne 0f + ldr x9, [x9, #(OFFSETOF__InterfaceDispatchCache__m_rgEntries + (\entry * 16) + 8)] + br x9 +0: + .endm + + + LEAF_ENTRY RhpCastableObjectDispatch_CommonStub, _TEXT + // Custom calling convention: + // xip0 has pointer to the current thunks data block + + // store dispatch cell address in thread static + ldr xip1, [xip0] + SET_TLS_DISPATCH_CELL + + // Now load the target address and jump to it. + ldr x9, [xip0, #8] + br x9 + LEAF_END RhpCastableObjectDispatch_CommonStub, _TEXT + + LEAF_ENTRY RhpTailCallTLSDispatchCell, _TEXT + // Load the dispatch cell out of the TLS variable + GET_TLS_DISPATCH_CELL + + // Tail call to the target of the dispatch cell, preserving the cell address in xip1 + ldr x9, [xip1] + br x9 + LEAF_END RhpTailCallTLSDispatchCell, _TEXT + +// +// Macro that generates a stub consuming a cache with the given number of entries. +// + .macro DEFINE_INTERFACE_DISPATCH_STUB entries + + NESTED_ENTRY "RhpInterfaceDispatch\entries", _TEXT, NoHandler + + // x11 currently holds the indirection cell address. We need to get the cache structure instead. + ldr x9, [x11, #OFFSETOF__InterfaceDispatchCell__m_pCache] + + // Load the EEType from the object instance in x0. + ldr x10, [x0] + + .global CurrentEntry + .set CurrentEntry, 0 + + .rept \entries + CHECK_CACHE_ENTRY CurrentEntry + .set CurrentEntry, CurrentEntry + 1 + .endr + + // x11 still contains the indirection cell address. + b RhpInterfaceDispatchSlow + + NESTED_END "RhpInterfaceDispatch\entries", _TEXT + + .endm + +// +// Define all the stub routines we currently need. +// + DEFINE_INTERFACE_DISPATCH_STUB 1 + DEFINE_INTERFACE_DISPATCH_STUB 2 + DEFINE_INTERFACE_DISPATCH_STUB 4 + DEFINE_INTERFACE_DISPATCH_STUB 8 + DEFINE_INTERFACE_DISPATCH_STUB 16 + DEFINE_INTERFACE_DISPATCH_STUB 32 + DEFINE_INTERFACE_DISPATCH_STUB 64 + +// +// Initial dispatch on an interface when we dont have a cache yet. +// + LEAF_ENTRY RhpInitialInterfaceDispatch, _TEXT + // Just tail call to the cache miss helper. + b RhpInterfaceDispatchSlow + LEAF_END RhpInitialInterfaceDispatch, _TEXT + +// +// Stub dispatch routine for dispatch to a vtable slot +// + LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT + // xip1 has the interface dispatch cell address in it. + // load x12 to point to the vtable offset (which is stored in the m_pCache field). + ldr x12, [xip1, #OFFSETOF__InterfaceDispatchCell__m_pCache] + + // Load the EEType from the object instance in x0, and add it to the vtable offset + // to get the address in the vtable of what we want to dereference + ldr x13, [x0] + add x12, x12, x13 + + // Load the target address of the vtable into x12 + ldr x12, [x12] + + br x12 + LEAF_END RhpVTableOffsetDispatch, _TEXT + +// +// Cache miss case, call the runtime to resolve the target and update the cache. +// + LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT + ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch + // xip1 has the interface dispatch cell address in it. + // Calling convention of the universal thunk is: + // xip0: contains target address for the thunk to call + // xip1: contains parameter of the thunks target + adrp xip0, RhpCidResolve + add xip0, xip0, :lo12:RhpCidResolve + mov xip1, x11 + b RhpUniversalTransition_DebugStepTailCall + LEAF_END RhpInterfaceDispatchSlow, _TEXT + +#endif // FEATURE_CACHED_INTERFACE_DISPATCH diff --git a/src/Native/Runtime/arm64/UniversalTransition.S b/src/Native/Runtime/arm64/UniversalTransition.S index 876f2dfbcb8..81fd8ca3385 100644 --- a/src/Native/Runtime/arm64/UniversalTransition.S +++ b/src/Native/Runtime/arm64/UniversalTransition.S @@ -1,4 +1,159 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// TODO: Implement +#include + +#ifdef _DEBUG +#define TRASH_SAVED_ARGUMENT_REGISTERS +#endif + +#ifdef TRASH_SAVED_ARGUMENT_REGISTERS + // TODO .extern RhpIntegerTrashValues + // TODO .extern RhpFpTrashValues +#endif // TRASH_SAVED_ARGUMENT_REGISTERS + +// Padding to account for the odd number of saved integer registers +#define ALIGNMENT_PADDING_SIZE (8) + +#define COUNT_ARG_REGISTERS (9) +#define INTEGER_REGISTER_SIZE (8) +#define ARGUMENT_REGISTERS_SIZE (COUNT_ARG_REGISTERS * INTEGER_REGISTER_SIZE) + +// Largest return block is 4 doubles +#define RETURN_BLOCK_SIZE (32) + +#define COUNT_FLOAT_ARG_REGISTERS (8) +#define FLOAT_REGISTER_SIZE (8) +#define FLOAT_ARG_REGISTERS_SIZE (COUNT_FLOAT_ARG_REGISTERS * FLOAT_REGISTER_SIZE) + +#define PUSHED_LR_SIZE (8) +#define PUSHED_FP_SIZE (8) + +// +// From CallerSP to ChildSP, the stack frame is composed of the following adjacent regions: +// +// ALIGNMENT_PADDING_SIZE +// ARGUMENT_REGISTERS_SIZE +// RETURN_BLOCK_SIZE +// FLOAT_ARG_REGISTERS_SIZE +// PUSHED_LR_SIZE +// PUSHED_FP_SIZE +// + +#define DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK (PUSHED_FP_SIZE + PUSHED_LR_SIZE + FLOAT_ARG_REGISTERS_SIZE) + +#define STACK_SIZE (ALIGNMENT_PADDING_SIZE + ARGUMENT_REGISTERS_SIZE + RETURN_BLOCK_SIZE + FLOAT_ARG_REGISTERS_SIZE + PUSHED_LR_SIZE + PUSHED_FP_SIZE) + +#define FLOAT_ARG_OFFSET (PUSHED_FP_SIZE + PUSHED_LR_SIZE) +#define ARGUMENT_REGISTERS_OFFSET (FLOAT_ARG_OFFSET + FLOAT_ARG_REGISTERS_SIZE + RETURN_BLOCK_SIZE) + +// +// RhpUniversalTransition +// +// At input to this function, x0-8, d0-7 and the stack may contain any number of arguments. +// +// In addition, there are 2 extra arguments passed in the intra-procedure-call scratch register: +// xip0 will contain the managed function that is to be called by this transition function +// xip1 will contain the pointer sized extra argument to the managed function +// +// When invoking the callee: +// +// x0 shall contain a pointer to the TransitionBlock +// x1 shall contain the value that was in xip1 at entry to this function +// +// Frame layout is: +// +// {StackPassedArgs} ChildSP+0C0 CallerSP+000 +// {AlignmentPad (0x8 bytes)} ChildSP+0B8 CallerSP-008 +// {IntArgRegs (x0-x8) (0x48 bytes)} ChildSP+070 CallerSP-050 +// {ReturnBlock (0x20 bytes)} ChildSP+050 CallerSP-070 +// -- The base address of the Return block is the TransitionBlock pointer, the floating point args are +// in the neg space of the TransitionBlock pointer. Note that the callee has knowledge of the exact +// layout of all pieces of the frame that lie at or above the pushed floating point registers. +// {FpArgRegs (d0-d7) (0x40 bytes)} ChildSP+010 CallerSP-0B0 +// {PushedLR} ChildSP+008 CallerSP-0B8 +// {PushedFP} ChildSP+000 CallerSP-0C0 +// +// NOTE: If the frame layout ever changes, the C++ UniversalTransitionStackFrame structure +// must be updated as well. +// +// NOTE: The callee receives a pointer to the base of the ReturnBlock, and the callee has +// knowledge of the exact layout of all pieces of the frame that lie at or above the pushed +// FpArgRegs. +// +// NOTE: The stack walker guarantees that conservative GC reporting will be applied to +// everything between the base of the ReturnBlock and the top of the StackPassedArgs. +// + + .text + + .macro UNIVERSAL_TRANSITION FunctionName + + NESTED_ENTRY Rhp\FunctionName, _TEXT, NoHandler + + // FP and LR registers + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-STACK_SIZE // ;; Push down stack pointer and store FP and LR + + // Floating point registers + stp d0, d1, [sp, #(FLOAT_ARG_OFFSET )] + stp d2, d3, [sp, #(FLOAT_ARG_OFFSET + 0x10)] + stp d4, d5, [sp, #(FLOAT_ARG_OFFSET + 0x20)] + stp d6, d7, [sp, #(FLOAT_ARG_OFFSET + 0x30)] + + // Space for return buffer data (0x40 bytes) + + // Save argument registers + stp x0, x1, [sp, #(ARGUMENT_REGISTERS_OFFSET )] + stp x2, x3, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x10)] + stp x4, x5, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x20)] + stp x6, x7, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x30)] + stp x8, xzr, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x40)] + +#ifdef TRASH_SAVED_ARGUMENT_REGISTERS + // ARM64TODO +#endif // TRASH_SAVED_ARGUMENT_REGISTERS + + add x0, sp, #DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK // First parameter to target function is a pointer to the return block + mov x8, x0 // Arm64 calling convention: Address of return block shall be passed in x8 + mov x1, xip1 // Second parameter to target function + blr xip0 + + // We cannot make the label public as that tricks DIA stackwalker into thinking + // it's the beginning of a method. For this reason we export an auxiliary variable + // holding the address instead. + EXPORT_POINTER_TO_ADDRESS PointerToReturnFrom\FunctionName + + // Move the result (the target address) to x12 so it doesn't get overridden when we restore the + // argument registers. + mov x12, x0 + + // Restore floating point registers + ldp d0, d1, [sp, #(FLOAT_ARG_OFFSET )] + ldp d2, d3, [sp, #(FLOAT_ARG_OFFSET + 0x10)] + ldp d4, d5, [sp, #(FLOAT_ARG_OFFSET + 0x20)] + ldp d6, d7, [sp, #(FLOAT_ARG_OFFSET + 0x30)] + + // Restore the argument registers + ldp x0, x1, [sp, #(ARGUMENT_REGISTERS_OFFSET )] + ldp x2, x3, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x10)] + ldp x4, x5, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x20)] + ldp x6, x7, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x30)] + ldr x8, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x40)] + + // Restore FP and LR registers, and free the allocated stack block + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #STACK_SIZE + + // Tailcall to the target address. + // TODO EPILOG_NOP + br x12 + + NESTED_END Rhp\FunctionName, _TEXT + + .endm + + // To enable proper step-in behavior in the debugger, we need to have two instances + // of the thunk. For the first one, the debugger steps into the call in the function, + // for the other, it steps over it. + UNIVERSAL_TRANSITION UniversalTransition + UNIVERSAL_TRANSITION UniversalTransition_DebugStepTailCall + diff --git a/src/Native/Runtime/arm64/WriteBarriers.S b/src/Native/Runtime/arm64/WriteBarriers.S index a14d99d7ef4..a1e3c103673 100644 --- a/src/Native/Runtime/arm64/WriteBarriers.S +++ b/src/Native/Runtime/arm64/WriteBarriers.S @@ -1,33 +1,368 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// TODO: Implement Unix write barriers #include -LEAF_ENTRY RhpAssignRef, _TEXT - str x1, [x0] - ret -LEAF_END RhpAssignRef, _TEXT +// Macro used to copy contents of newly updated GC heap locations to a shadow copy of the heap. This is used +// during garbage collections to verify that object references where never written to the heap without using a +// write barrier. Note that we are potentially racing to update the shadow heap while other threads are writing +// new references to the real heap. Since this can not be solved perfectly without critical sections around the +// entire update process, we instead update the shadow location and then re-check the real location (as two +// ordered operations) and if there is a disparity we will re-write the shadow location with a special value +// (INVALIDGCVALUE) which disables the check for that location. Since the shadow heap is only validated at GC +// time and these write barrier operations are atomic wrt to GCs this is sufficient to guarantee that the +// shadow heap contains only valid copies of real heap values or INVALIDGCVALUE. +#ifdef WRITE_BARRIER_CHECK -LEAF_ENTRY RhpCheckedAssignRef, _TEXT - str x1, [x0] - ret -LEAF_END RhpCheckedAssignRef, _TEXT +// TODO + + SETALIAS g_GCShadow, ?g_GCShadow@@3PEAEEA + SETALIAS g_GCShadowEnd, ?g_GCShadowEnd@@3PEAEEA + EXTERN $g_GCShadow + EXTERN $g_GCShadowEnd + +INVALIDGCVALUE EQU 0xCCCCCCCD + + MACRO + // On entry: + // $destReg: location to be updated + // $refReg: objectref to be stored + // + // On exit: + // x9,x10: trashed + // other registers are preserved + // + UPDATE_GC_SHADOW $destReg, $refReg + + // If g_GCShadow is 0, don't perform the check. + adrp x9, $g_GCShadow + ldr x9, [x9, $g_GCShadow] + cbz x9, %ft1 + + // Save $destReg since we're about to modify it (and we need the original value both within the macro and + // once we exit the macro). + mov x10, $destReg + + // Transform $destReg into the equivalent address in the shadow heap. + adrp x9, g_lowest_address + ldr x9, [x9, g_lowest_address] + subs $destReg, $destReg, x9 + blt %ft0 + + adrp x9, $g_GCShadow + ldr x9, [x9, $g_GCShadow] + add $destReg, $destReg, x9 + + adrp x9, $g_GCShadowEnd + ldr x9, [x9, $g_GCShadowEnd] + cmp $destReg, x9 + bgt %ft0 + + // Update the shadow heap. + str $refReg, [$destReg] + + // The following read must be strongly ordered wrt to the write we have just performed in order to + // prevent race conditions. + dmb ish + + // Now check that the real heap location still contains the value we just wrote into the shadow heap. + mov x9, x10 + ldr x9, [x9] + cmp x9, $refReg + beq %ft0 + + // Someone went and updated the real heap. We need to invalidate the shadow location since we can not + // guarantee whose shadow update won. + MOVL64 x9, INVALIDGCVALUE, 0 + str x9, [$destReg] + +0 + // Restore original $destReg value + mov $destReg, x10 + +1 + MEND + +#else // WRITE_BARRIER_CHECK + + .macro UPDATE_GC_SHADOW destReg, refReg + .endm + +#endif // WRITE_BARRIER_CHECK + +// There are several different helpers used depending on which register holds the object reference. Since all +// the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the +// name of the register that points to the location to be updated and the name of the register that holds the +// object reference (this should be in upper case as it is used in the definition of the name of the helper). + +// Define a sub-macro first that expands to the majority of the barrier implementation. This is used below for +// some interlocked helpers that need an inline barrier. + + // On entry: + // destReg: location to be updated + // refReg: objectref to be stored + // trash: register nr than can be trashed + // trash2: register than can be trashed + // + // On exit: + // destReg: trashed + // + .macro INSERT_UNCHECKED_WRITE_BARRIER_CORE destReg, refReg, trash, trash2 + // Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless + // we are in a debug build and write barrier checking has been enabled). + UPDATE_GC_SHADOW \destReg, \refReg + + // We can skip the card table write if the reference is to + // an object not on the epehemeral segment. + adrp x\trash, g_ephemeral_low + add x\trash, x\trash, :lo12:g_ephemeral_low + ldr x\trash, [x\trash] + cmp \refReg, x\trash + blt 0f + + adrp x\trash, g_ephemeral_high + add x\trash, x\trash, :lo12:g_ephemeral_high + ldr x\trash, [x\trash] + cmp \refReg, x\trash + bge 0f + + // Set this objects card, if it has not already been set. + + adrp x\trash, g_card_table + add x\trash, x\trash, :lo12:g_card_table + ldr x\trash, [x\trash] + add \trash2, x\trash, \destReg, lsr #11 + + // Check that this card has not already been written. Avoiding useless writes is a big win on + // multi-proc systems since it avoids cache thrashing. + ldrb w\trash, [\trash2] + cmp x\trash, 0xFF + beq 0f + + mov x\trash, 0xFF + strb w\trash, [\trash2] +0: + // Exit label + .endm + + // On entry: + // destReg: location to be updated + // refReg: objectref to be stored + // trash: register nr than can be trashed + // trash2: register than can be trashed + // + // On exit: + // destReg: trashed + // + .macro INSERT_CHECKED_WRITE_BARRIER_CORE destReg, refReg, trash, trash2 + + // The "check" of this checked write barrier - is destReg + // within the heap? if no, early out. + adrp x\trash, g_lowest_address + add x\trash, x\trash, :lo12:g_lowest_address + ldr x\trash, [x\trash] + cmp \destReg, x\trash + blt 0f + + adrp x\trash, g_highest_address + add x\trash, x\trash, :lo12:g_highest_address + ldr x\trash, [x\trash] + cmp \destReg, x\trash + bgt 0f + + INSERT_UNCHECKED_WRITE_BARRIER_CORE \destReg, \refReg, \trash, \trash2 + +0: + // Exit label + .endm + +// RhpCheckedAssignRef(Object** dst, Object* src) // -// RhpByRefAssignRef simulates movs instruction for object references. +// Write barrier for writes to objects that may reside +// on the managed heap. // // On entry: -// x0: address of ref-field (assigned to) -// x1: address of the data (source) -// x3: be trashed +// x0 : the destination address (LHS of the assignment). +// May not be an object reference (hence the checked). +// x1 : the object reference (RHS of the assignment). +// On exit: +// x1 : trashed +// x9 : trashed + LEAF_ENTRY RhpCheckedAssignRef, _TEXT + ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation + ALTERNATE_ENTRY RhpCheckedAssignRefX1 + ALTERNATE_ENTRY RhpCheckedAssignRefX1AVLocation + + stlr x1, [x0] + + INSERT_CHECKED_WRITE_BARRIER_CORE x0, x1, 9, x1 + + ret + + LEAF_END RhpCheckedAssignRef, _TEXT + +// RhpAssignRef(Object** dst, Object* src) // +// Write barrier for writes to objects that are known to +// reside on the managed heap. +// +// On entry: +// x0 : the destination address (LHS of the assignment). +// x1 : the object reference (RHS of the assignment). // On exit: -// x0, x1 are incremented by 8, -// x3: trashed +// x1 : trashed +// x9 : trashed + LEAF_ENTRY RhpAssignRef, _TEXT + ALTERNATE_ENTRY RhpAssignRefAVLocation + ALTERNATE_ENTRY RhpAssignRefX1 + ALTERNATE_ENTRY RhpAssignRefX1AVLocation + + stlr x1, [x0] + + INSERT_UNCHECKED_WRITE_BARRIER_CORE x0, x1, 9, x1 + + ret + + LEAF_END RhpAssignRef, _TEXT + +// Interlocked operation helpers where the location is an objectref, thus requiring a GC write barrier upon +// successful updates. + +// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: +// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpCheckedLockCmpXchgAVLocation +// - Function "UnwindWriteBarrierToCaller" assumes no registers where pushed and LR contains the return address + +// RhpCheckedLockCmpXchg(Object** dest, Object* value, Object* comparand) +// +// Interlocked compare exchange on objectref. +// +// On entry: +// x0: pointer to objectref +// x1: exchange value +// x2: comparand +// +// On exit: +// x0: original value of objectref +// x9: trashed +// x10: trashed +// + LEAF_ENTRY RhpCheckedLockCmpXchg + ALTERNATE_ENTRY RhpCheckedLockCmpXchgAVLocation + +CmpXchgRetry: + // Check location value is what we expect. + ldaxr x10, [x0] + cmp x10, x2 + bne CmpXchgNoUpdate + + // Current value matches comparand, attempt to update with the new value. + stlxr w9, x1, [x0] + cbnz w9, CmpXchgRetry + + // We have successfully updated the value of the objectref so now we need a GC write barrier. + // The following barrier code takes the destination in x0 and the value in x1 so the arguments are + // already correctly set up. + + INSERT_CHECKED_WRITE_BARRIER_CORE x0, x1, 9, x0 + +CmpXchgNoUpdate: + // x10 still contains the original value. + mov x0, x10 + ret lr + + LEAF_END RhpCheckedLockCmpXchg, _TEXT + +// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: +// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen within at RhpCheckedXchgAVLocation +// - Function "UnwindWriteBarrierToCaller" assumes no registers where pushed and LR contains the return address + +// RhpCheckedXchg(Object** destination, Object* value) +// +// Interlocked exchange on objectref. +// +// On entry: +// x0: pointer to objectref +// x1: exchange value +// +// On exit: +// x0: original value of objectref +// x9: trashed +// x10: trashed +// + LEAF_ENTRY RhpCheckedXchg, _TEXT + ALTERNATE_ENTRY RhpCheckedXchgAVLocation + +ExchangeRetry: + // Read the existing memory location. + ldaxr x10, [x0] + + // Attempt to update with the new value. + stlxr w9, x1, [x0] + cbnz w9, ExchangeRetry + + // We have successfully updated the value of the objectref so now we need a GC write barrier. + // The following barrier code takes the destination in x0 and the value in x1 so the arguments are + // already correctly set up. + + INSERT_CHECKED_WRITE_BARRIER_CORE x0, x1, 9, x0 + + // x10 still contains the original value. + mov x0, x10 + ret + + LEAF_END RhpCheckedXchg, _TEXT + +LEAF_ENTRY RhpAssignRefArm64, _TEXT + stlr x15, [x14] + + INSERT_UNCHECKED_WRITE_BARRIER_CORE x14, x15, 12, X14 + + ret +LEAF_END RhpAssignRefArm64, _TEXT + +// void JIT_CheckedWriteBarrier(Object** dst, Object* src) +// On entry: +// x14 : the destination address (LHS of the assignment) +// x15 : the object reference (RHS of the assignment) +// +// On exit: +// x12 : trashed +// x14 : trashed (incremented by 8 to implement JIT_ByRefWriteBarrier contract) +// x15 : trashed +// x17 : trashed (ip1) if FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP // -LEAF_ENTRY RhpByRefAssignRef, _TEXT - ldr x3, [x1], #8 - str x3, [x0], #8 +LEAF_ENTRY RhpCheckedAssignRefArm64, _TEXT + + stlr x15, [x14] + + INSERT_CHECKED_WRITE_BARRIER_CORE x14, x15, 12, X15 + + add x14, x14, #8 + + ret +LEAF_END RhpCheckedAssignRefArm64, _TEXT + +// void JIT_ByRefWriteBarrier +// On entry: +// x13 : the source address (points to object reference to write) +// x14 : the destination address (object reference written here) +// +// On exit: +// x12 : trashed +// x13 : incremented by 8 +// x14 : incremented by 8 +// x15 : trashed +// x17 : trashed (ip1) if FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP +// +LEAF_ENTRY RhpByRefAssignRefArm64, _TEXT + ldr x15, [x13] + str x15, [x14] + + INSERT_CHECKED_WRITE_BARRIER_CORE x14, x15, 12, X15 + + add X13, x13, #8 + add x14, x14, #8 + ret -LEAF_END RhpByRefAssignRef, _TEXT +LEAF_END RhpByRefAssignRefArm64, _TEXT diff --git a/src/Native/Runtime/unix/unixasmmacrosarm64.inc b/src/Native/Runtime/unix/unixasmmacrosarm64.inc index d031a77085e..becaa4c3c17 100644 --- a/src/Native/Runtime/unix/unixasmmacrosarm64.inc +++ b/src/Native/Runtime/unix/unixasmmacrosarm64.inc @@ -137,3 +137,70 @@ C_FUNC(\Name): br \reg .endm + +#define xip0 x16 +#define xip1 x17 +#define xpr x18 + +.macro PREPARE_INLINE_GETTHREAD +.global tls_CurrentThread +.endm + +.macro INLINE_GETTHREAD target + mrs \target, tpidr_el0 + add \target, \target, #:tprel_hi12:tls_CurrentThread, lsl #12 + add \target, \target, #:tprel_lo12_nc:tls_CurrentThread +.endm + +.macro EXPORT_POINTER_TO_ADDRESS Name + +1: + + .data + .align 8 +C_FUNC(\Name): + .word 1b + .global C_FUNC(\Name) + .text +.endm + +// Note: these must match the defs in PInvokeTransitionFrameFlags +PTFF_SAVE_SP = 0x00000400 +PTFF_SAVE_ALL_PRESERVED = 0x000003FF // NOTE: x19-x28 + +DEFAULT_FRAME_SAVE_FLAGS = PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_SP + +.macro PUSH_COOP_PINVOKE_FRAME trashReg + + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-0x80 // Push down stack pointer and store FP and LR + + // 0x10 bytes reserved for Thread* and flags + + // Save callee saved registers + PROLOG_SAVE_REG_PAIR x19, x20, #0x20 + PROLOG_SAVE_REG_PAIR x21, x22, #0x30 + PROLOG_SAVE_REG_PAIR x23, x24, #0x40 + PROLOG_SAVE_REG_PAIR x25, x26, #0x50 + PROLOG_SAVE_REG_PAIR x27, x28, #0x60 + + // Save the value of SP before stack allocation to the last slot in the frame (slot #15) + add \trashReg, sp, #0x80 + str \trashReg, [sp, #0x70] + + // Record the bitmask of saved registers in the frame (slot #3) + mov \trashReg, #DEFAULT_FRAME_SAVE_FLAGS + str \trashReg, [sp, #0x18] + + mov \trashReg, sp +.endm + +// Pop the frame and restore register state preserved by PUSH_COOP_PINVOKE_FRAME +.macro POP_COOP_PINVOKE_FRAME + + EPILOG_RESTORE_REG_PAIR x19, x20, #0x20 + EPILOG_RESTORE_REG_PAIR x21, x22, #0x30 + EPILOG_RESTORE_REG_PAIR x23, x24, #0x40 + EPILOG_RESTORE_REG_PAIR x25, x26, #0x50 + EPILOG_RESTORE_REG_PAIR x27, x28, #0x60 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #0x80 +.endm From 6c9e2354a1b2afa9d378c88f01baccfbc53ef804 Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Tue, 4 Aug 2020 08:29:50 +0200 Subject: [PATCH 07/36] ARM64 unix stack walk --- src/Native/Runtime/unix/UnixContext.cpp | 4 +- src/Native/Runtime/unix/UnwindHelpers.cpp | 251 +++++++++++------- .../libunwind/src/DwarfInstructions.hpp | 18 +- 3 files changed, 166 insertions(+), 107 deletions(-) diff --git a/src/Native/Runtime/unix/UnixContext.cpp b/src/Native/Runtime/unix/UnixContext.cpp index 458214bbe56..08b98ef6365 100644 --- a/src/Native/Runtime/unix/UnixContext.cpp +++ b/src/Native/Runtime/unix/UnixContext.cpp @@ -301,7 +301,7 @@ bool GetUnwindProcInfo(PCODE ip, unw_proc_info_t *procInfo) #elif HOST_ARM ((uint32_t*)(unwContext.data))[15] = ip; #elif HOST_ARM64 - ((uint32_t*)(unwContext.data))[32] = ip; + unwContext.data[32] = ip; #elif HOST_WASM ASSERT(false); #elif HOST_X86 @@ -618,7 +618,7 @@ bool FindProcInfo(UIntNative controlPC, UIntNative* startAddress, UIntNative* ls assert((procInfo.start_ip <= controlPC) && (controlPC < procInfo.end_ip)); -#if defined(HOST_ARM) || defined(HOST_ARM64) +#if defined(HOST_ARM) // libunwind fills by reference not by value for ARM *lsda = *((UIntNative *)procInfo.lsda); #else diff --git a/src/Native/Runtime/unix/UnwindHelpers.cpp b/src/Native/Runtime/unix/UnwindHelpers.cpp index ced22cc272c..9ed75c6a1d7 100644 --- a/src/Native/Runtime/unix/UnwindHelpers.cpp +++ b/src/Native/Runtime/unix/UnwindHelpers.cpp @@ -475,229 +475,284 @@ void Registers_arm_rt::setRegister(int num, uint32_t value, uint32_t location) #if defined(TARGET_ARM64) -class Registers_arm64_rt: public libunwind::Registers_arm64 { -public: - Registers_arm64_rt() { abort(); }; - Registers_arm64_rt(const void *registers); +// Shim that implements methods required by libunwind over REGDISPLAY +struct Registers_REGDISPLAY : REGDISPLAY +{ + inline static int getArch() { return libunwind::REGISTERS_ARM64; } + inline static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM64; } + + bool validRegister(int num) const; + bool validFloatRegister(int num) { return false; }; + bool validVectorRegister(int num) const; - bool validRegister(int num) {abort();}; uint64_t getRegister(int num) const; void setRegister(int num, uint64_t value, uint64_t location); - bool validFloatRegister(int num) {abort();}; + double getFloatRegister(int num) {abort();} void setFloatRegister(int num, double value) {abort();} - bool validVectorRegister(int num) const {abort();} - libunwind::v128 getVectorRegister(int num) const {abort();}; - void setVectorRegister(int num, libunwind::v128 value) {abort();}; - void jumpto() { abort();}; + + libunwind::v128 getVectorRegister(int num) const; + void setVectorRegister(int num, libunwind::v128 value); - uint64_t getSP() const { return regs->SP;} - void setSP(uint64_t value, uint64_t location) { regs->SP = value;} - uint64_t getIP() const { return regs->IP;} + uint64_t getSP() const { return SP;} + void setSP(uint64_t value, uint64_t location) { SP = value;} + uint64_t getIP() const { return IP;} void setIP(uint64_t value, uint64_t location) - { regs->IP = value; regs->pIP = (PTR_UIntNative)location; } - void saveVFPAsX() {abort();}; -private: - REGDISPLAY *regs; + { IP = value; pIP = (PTR_UIntNative)location; } }; -inline Registers_arm64_rt::Registers_arm64_rt(const void *registers) { - regs = (REGDISPLAY *)registers; +inline bool Registers_REGDISPLAY::validRegister(int num) const { + if (num == UNW_REG_SP || num == UNW_ARM64_SP) + return true; + + if (num == UNW_ARM64_FP) + return true; + + if (num == UNW_ARM64_LR) + return true; + + if (num == UNW_REG_IP) + return true; + + if (num >= UNW_ARM64_X0 && num <= UNW_ARM64_X28) + return true; + + return false; +} + +bool Registers_REGDISPLAY::validVectorRegister(int num) const +{ + if (num >= UNW_ARM64_D8 && num <= UNW_ARM64_D15) + return true; + + return false; } -inline uint64_t Registers_arm64_rt::getRegister(int regNum) const { +inline uint64_t Registers_REGDISPLAY::getRegister(int regNum) const { if (regNum == UNW_REG_SP || regNum == UNW_ARM64_SP) - return regs->SP; + return SP; + + if (regNum == UNW_ARM64_FP) + return *pFP; if (regNum == UNW_ARM64_LR) - return *regs->pLR; + return *pLR; if (regNum == UNW_REG_IP) - return regs->IP; + return IP; switch (regNum) { case (UNW_ARM64_X0): - return *regs->pX0; + return *pX0; case (UNW_ARM64_X1): - return *regs->pX1; + return *pX1; case (UNW_ARM64_X2): - return *regs->pX2; + return *pX2; case (UNW_ARM64_X3): - return *regs->pX3; + return *pX3; case (UNW_ARM64_X4): - return *regs->pX4; + return *pX4; case (UNW_ARM64_X5): - return *regs->pX5; + return *pX5; case (UNW_ARM64_X6): - return *regs->pX6; + return *pX6; case (UNW_ARM64_X7): - return *regs->pX7; + return *pX7; case (UNW_ARM64_X8): - return *regs->pX8; + return *pX8; case (UNW_ARM64_X9): - return *regs->pX9; + return *pX9; case (UNW_ARM64_X10): - return *regs->pX10; + return *pX10; case (UNW_ARM64_X11): - return *regs->pX11; + return *pX11; case (UNW_ARM64_X12): - return *regs->pX12; + return *pX12; case (UNW_ARM64_X13): - return *regs->pX13; + return *pX13; case (UNW_ARM64_X14): - return *regs->pX14; + return *pX14; case (UNW_ARM64_X15): - return *regs->pX15; + return *pX15; case (UNW_ARM64_X16): - return *regs->pX16; + return *pX16; case (UNW_ARM64_X17): - return *regs->pX17; + return *pX17; case (UNW_ARM64_X18): - return *regs->pX18; + return *pX18; case (UNW_ARM64_X19): - return *regs->pX19; + return *pX19; case (UNW_ARM64_X20): - return *regs->pX20; + return *pX20; case (UNW_ARM64_X21): - return *regs->pX21; + return *pX21; case (UNW_ARM64_X22): - return *regs->pX22; + return *pX22; case (UNW_ARM64_X23): - return *regs->pX23; + return *pX23; case (UNW_ARM64_X24): - return *regs->pX24; + return *pX24; case (UNW_ARM64_X25): - return *regs->pX25; + return *pX25; case (UNW_ARM64_X26): - return *regs->pX26; + return *pX26; case (UNW_ARM64_X27): - return *regs->pX27; + return *pX27; case (UNW_ARM64_X28): - return *regs->pX28; + return *pX28; } PORTABILITY_ASSERT("unsupported arm64 register"); } -void Registers_arm64_rt::setRegister(int num, uint64_t value, uint64_t location) +void Registers_REGDISPLAY::setRegister(int num, uint64_t value, uint64_t location) { - if (num == UNW_REG_SP || num == UNW_ARM64_SP) { - regs->SP = (UIntNative )value; + SP = (UIntNative )value; + return; + } + + if (num == UNW_ARM64_FP) { + pFP = (PTR_UIntNative)location; return; } if (num == UNW_ARM64_LR) { - regs->pLR = (PTR_UIntNative)location; + pLR = (PTR_UIntNative)location; return; } if (num == UNW_REG_IP) { - regs->IP = value; - /* the location could be NULL, we could try to recover - pointer to value in stack from pLR */ - if ((!location) && (regs->pLR) && (*regs->pLR == value)) - regs->pIP = regs->pLR; - else - regs->pIP = (PTR_UIntNative)location; + IP = value; return; } switch (num) { case (UNW_ARM64_X0): - regs->pX0 = (PTR_UIntNative)location; + pX0 = (PTR_UIntNative)location; break; case (UNW_ARM64_X1): - regs->pX1 = (PTR_UIntNative)location; + pX1 = (PTR_UIntNative)location; break; case (UNW_ARM64_X2): - regs->pX2 = (PTR_UIntNative)location; + pX2 = (PTR_UIntNative)location; break; case (UNW_ARM64_X3): - regs->pX3 = (PTR_UIntNative)location; + pX3 = (PTR_UIntNative)location; break; case (UNW_ARM64_X4): - regs->pX4 = (PTR_UIntNative)location; + pX4 = (PTR_UIntNative)location; break; case (UNW_ARM64_X5): - regs->pX5 = (PTR_UIntNative)location; + pX5 = (PTR_UIntNative)location; break; case (UNW_ARM64_X6): - regs->pX6 = (PTR_UIntNative)location; + pX6 = (PTR_UIntNative)location; break; case (UNW_ARM64_X7): - regs->pX7 = (PTR_UIntNative)location; + pX7 = (PTR_UIntNative)location; break; case (UNW_ARM64_X8): - regs->pX8 = (PTR_UIntNative)location; + pX8 = (PTR_UIntNative)location; break; case (UNW_ARM64_X9): - regs->pX9 = (PTR_UIntNative)location; + pX9 = (PTR_UIntNative)location; break; case (UNW_ARM64_X10): - regs->pX10 = (PTR_UIntNative)location; + pX10 = (PTR_UIntNative)location; break; case (UNW_ARM64_X11): - regs->pX11 = (PTR_UIntNative)location; + pX11 = (PTR_UIntNative)location; break; case (UNW_ARM64_X12): - regs->pX12 = (PTR_UIntNative)location; + pX12 = (PTR_UIntNative)location; break; case (UNW_ARM64_X13): - regs->pX13 = (PTR_UIntNative)location; + pX13 = (PTR_UIntNative)location; break; case (UNW_ARM64_X14): - regs->pX14 = (PTR_UIntNative)location; + pX14 = (PTR_UIntNative)location; break; case (UNW_ARM64_X15): - regs->pX15 = (PTR_UIntNative)location; + pX15 = (PTR_UIntNative)location; break; case (UNW_ARM64_X16): - regs->pX16 = (PTR_UIntNative)location; + pX16 = (PTR_UIntNative)location; break; case (UNW_ARM64_X17): - regs->pX17 = (PTR_UIntNative)location; + pX17 = (PTR_UIntNative)location; break; case (UNW_ARM64_X18): - regs->pX18 = (PTR_UIntNative)location; + pX18 = (PTR_UIntNative)location; break; case (UNW_ARM64_X19): - regs->pX19 = (PTR_UIntNative)location; + pX19 = (PTR_UIntNative)location; break; case (UNW_ARM64_X20): - regs->pX20 = (PTR_UIntNative)location; + pX20 = (PTR_UIntNative)location; break; case (UNW_ARM64_X21): - regs->pX21 = (PTR_UIntNative)location; + pX21 = (PTR_UIntNative)location; break; case (UNW_ARM64_X22): - regs->pX22 = (PTR_UIntNative)location; + pX22 = (PTR_UIntNative)location; break; case (UNW_ARM64_X23): - regs->pX23 = (PTR_UIntNative)location; + pX23 = (PTR_UIntNative)location; break; case (UNW_ARM64_X24): - regs->pX24 = (PTR_UIntNative)location; + pX24 = (PTR_UIntNative)location; break; case (UNW_ARM64_X25): - regs->pX25 = (PTR_UIntNative)location; + pX25 = (PTR_UIntNative)location; break; case (UNW_ARM64_X26): - regs->pX26 = (PTR_UIntNative)location; + pX26 = (PTR_UIntNative)location; break; case (UNW_ARM64_X27): - regs->pX27 = (PTR_UIntNative)location; + pX27 = (PTR_UIntNative)location; break; case (UNW_ARM64_X28): - regs->pX28 = (PTR_UIntNative)location; + pX28 = (PTR_UIntNative)location; break; default: PORTABILITY_ASSERT("unsupported arm64 register"); } } +libunwind::v128 Registers_REGDISPLAY::getVectorRegister(int num) const +{ + num -= UNW_ARM64_D8; + + if (num < 0 || num >= sizeof(D) / sizeof(UInt64)) + { + PORTABILITY_ASSERT("unsupported arm64 vector register"); + } + + libunwind::v128 result; + + result.vec[0] = 0; + result.vec[1] = 0; + result.vec[2] = D[num] >> 32; + result.vec[3] = D[num] & 0xFFFFFFFF; + + return result; +} + +void Registers_REGDISPLAY::setVectorRegister(int num, libunwind::v128 value) +{ + num -= UNW_ARM64_D8; + + if (num < 0 || num >= sizeof(D) / sizeof(UInt64)) + { + PORTABILITY_ASSERT("unsupported arm64 vector register"); + } + + D[num] = (UInt64)value.vec[2] << 32 | (UInt64)value.vec[3]; +} + #endif // TARGET_ARM64 bool DoTheStep(uintptr_t pc, UnwindInfoSections uwInfoSections, REGDISPLAY *regs) @@ -707,7 +762,7 @@ bool DoTheStep(uintptr_t pc, UnwindInfoSections uwInfoSections, REGDISPLAY *regs #elif defined(TARGET_ARM) libunwind::UnwindCursor uc(_addressSpace, regs); #elif defined(TARGET_ARM64) - libunwind::UnwindCursor uc(_addressSpace, regs); + libunwind::UnwindCursor uc(_addressSpace, regs); #elif defined(HOST_X86) libunwind::UnwindCursor uc(_addressSpace, regs); #else @@ -724,10 +779,7 @@ bool DoTheStep(uintptr_t pc, UnwindInfoSections uwInfoSections, REGDISPLAY *regs unw_proc_info_t procInfo; uc.getInfo(&procInfo); -#if defined(TARGET_ARM64) - DwarfInstructions dwarfInst; - int stepRet = dwarfInst.stepWithDwarf(_addressSpace, pc, procInfo.unwind_info, *(Registers_arm64_rt*)regs); -#elif defined(TARGET_ARM) +#if defined(TARGET_ARM) DwarfInstructions dwarfInst; int stepRet = dwarfInst.stepWithDwarf(_addressSpace, pc, procInfo.unwind_info, *(Registers_arm_rt*)regs); #else @@ -740,7 +792,12 @@ bool DoTheStep(uintptr_t pc, UnwindInfoSections uwInfoSections, REGDISPLAY *regs return false; } +#if defined(TARGET_ARM64) + regs->SetAddrOfIP(regs->pLR); +#else regs->pIP = PTR_PCODE(regs->SP - sizeof(TADDR)); +#endif + #elif defined(_LIBUNWIND_ARM_EHABI) uc.setInfoBasedOnIPRegister(true); int stepRet = uc.step(); diff --git a/src/Native/libunwind/src/DwarfInstructions.hpp b/src/Native/libunwind/src/DwarfInstructions.hpp index c5cc6c9d510..f341772824c 100644 --- a/src/Native/libunwind/src/DwarfInstructions.hpp +++ b/src/Native/libunwind/src/DwarfInstructions.hpp @@ -169,6 +169,7 @@ int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, // restore registers that DWARF says were saved R newRegisters = registers; pint_t returnAddress = 0; + pint_t returnAddressLocation = 0; const int lastReg = R::lastDwarfRegNum(); assert(static_cast(CFI_Parser::kMaxRegisterNumber) >= lastReg && "register range too large"); @@ -177,7 +178,14 @@ int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, for (int i = 0; i <= lastReg; ++i) { if (prolog.savedRegisters[i].location != CFI_Parser::kRegisterUnused) { - if (registers.validFloatRegister(i)) + if (i == (int)cieInfo.returnAddressRegister) { + returnAddress = getSavedRegister(addressSpace, registers, cfa, + prolog.savedRegisters[i], + returnAddressLocation); + + newRegisters.setRegister(i, returnAddress, returnAddressLocation); + } + else if (registers.validFloatRegister(i)) newRegisters.setFloatRegister( i, getSavedFloatRegister(addressSpace, registers, cfa, prolog.savedRegisters[i])); @@ -185,12 +193,6 @@ int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, newRegisters.setVectorRegister( i, getSavedVectorRegister(addressSpace, registers, cfa, prolog.savedRegisters[i])); - else if (i == (int)cieInfo.returnAddressRegister) { - pint_t dummyLocation; - returnAddress = getSavedRegister(addressSpace, registers, cfa, - prolog.savedRegisters[i], - dummyLocation); - } else if (registers.validRegister(i)) { pint_t value; pint_t location; @@ -272,7 +274,7 @@ int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, // Return address is address after call site instruction, so setting IP to // that does simualates a return. - newRegisters.setIP(returnAddress, 0); + newRegisters.setIP(returnAddress, returnAddressLocation); // Simulate the step by replacing the register set with the new ones. registers = newRegisters; From aba22ac1f32d3aee6a56c1e062ae58d6a14e5827 Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Tue, 4 Aug 2020 15:13:43 +0200 Subject: [PATCH 08/36] ARM64 Exception Handling --- src/ILCompiler/src/Program.cs | 4 +- src/Native/Runtime/arm64/ExceptionHandling.S | 616 +++++++++++++++++- src/Native/Runtime/arm64/PInvoke.S | 7 - src/Native/Runtime/arm64/StubDispatch.S | 57 -- .../Runtime/unix/UnixNativeCodeManager.cpp | 2 +- .../Runtime/unix/unixasmmacrosarm64.inc | 36 + 6 files changed, 654 insertions(+), 68 deletions(-) diff --git a/src/ILCompiler/src/Program.cs b/src/ILCompiler/src/Program.cs index 484924561df..0f5f9d08035 100644 --- a/src/ILCompiler/src/Program.cs +++ b/src/ILCompiler/src/Program.cs @@ -778,8 +778,8 @@ private int Run(string[] args) "EETypes", "scanned", "compiled", type => !(type.GetTypeDefinition() is EcmaType)); } - if (scanningFail) - throw new Exception("Scanning failure"); + // if (scanningFail) + // throw new Exception("Scanning failure"); } if (debugInfoProvider is IDisposable) diff --git a/src/Native/Runtime/arm64/ExceptionHandling.S b/src/Native/Runtime/arm64/ExceptionHandling.S index 876f2dfbcb8..7e688cffd94 100644 --- a/src/Native/Runtime/arm64/ExceptionHandling.S +++ b/src/Native/Runtime/arm64/ExceptionHandling.S @@ -1,4 +1,618 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// TODO: Implement +#include +#include "AsmOffsets.inc" + +#define STACKSIZEOF_ExInfo ((SIZEOF__ExInfo + 15)&(~15)) + +#define HARDWARE_EXCEPTION 1 +#define SOFTWARE_EXCEPTION 0 + +.global RhpTrapThreads + +// ----------------------------------------------------------------------------- +// Macro used to create frame of exception throwing helpers (RhpThrowEx, RhpThrowHwEx) + .macro ALLOC_THROW_FRAME exceptionType + + mov x3, sp + + // Setup a PAL_LIMITED_CONTEXT on the stack { + .if \exceptionType == HARDWARE_EXCEPTION + sub sp,sp,#0x50 + stp x3, x1, [sp] // x3 is the SP and x1 is the IP of the fault site + // TODO PROLOG_PUSH_MACHINE_FRAME + .else + PROLOG_STACK_ALLOC 0x50 + stp x3, lr, [sp] // x3 is the SP and lr is the IP of the fault site + .endif + stp d8, d9, [sp, #0x10] + stp d10, d11, [sp, #0x20] + stp d12, d13, [sp, #0x30] + stp d14, d15, [sp, #0x40] + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-0x70 + stp xzr, xzr, [sp, #0x10] // locations reserved for return value, not used for exception handling + PROLOG_SAVE_REG_PAIR x19, x20, #0x20 + PROLOG_SAVE_REG_PAIR x21, x22, #0x30 + PROLOG_SAVE_REG_PAIR x23, x24, #0x40 + PROLOG_SAVE_REG_PAIR x25, x26, #0x50 + PROLOG_SAVE_REG_PAIR x27, x28, #0x60 + // } end PAL_LIMITED_CONTEXT + + PROLOG_STACK_ALLOC STACKSIZEOF_ExInfo + .endm + +// ----------------------------------------------------------------------------- +// Macro used to create frame of funclet calling helpers (RhpCallXXXXFunclet) +// extraStackSize - extra stack space that the user of the macro can use to +// store additional registers + .macro ALLOC_CALL_FUNCLET_FRAME extraStackSize + + // Using below prolog instead of PROLOG_SAVE_REG_PAIR fp,lr, #-60! + // is intentional. Above statement would also emit instruction to save + // sp in fp. If sp is saved in fp in prolog then it is not expected that fp can change in the body + // of method. However, this method needs to be able to change fp before calling funclet. + // This is required to access locals in funclet. + // TODO PROLOG_SAVE_REG_PAIR_NO_FP fp,lr, #-0x60! + PROLOG_SAVE_REG_PAIR_INDEXED fp,lr, #-0x60 + PROLOG_SAVE_REG_PAIR x19, x20, #0x10 + PROLOG_SAVE_REG_PAIR x21, x22, #0x20 + PROLOG_SAVE_REG_PAIR x23, x24, #0x30 + PROLOG_SAVE_REG_PAIR x25, x26, #0x40 + PROLOG_SAVE_REG_PAIR x27, x28, #0x50 + mov fp, sp + + .if \extraStackSize != 0 + PROLOG_STACK_ALLOC \extraStackSize + .endif + .endm + +// ----------------------------------------------------------------------------- +// Macro used to free frame of funclet calling helpers (RhpCallXXXXFunclet) +// extraStackSize - extra stack space that the user of the macro can use to +// store additional registers. +// It needs to match the value passed to the corresponding +// ALLOC_CALL_FUNCLET_FRAME. + .macro FREE_CALL_FUNCLET_FRAME extraStackSize + + .if \extraStackSize != 0 + EPILOG_STACK_FREE \extraStackSize + .endif + + EPILOG_RESTORE_REG_PAIR x19, x20, #0x10 + EPILOG_RESTORE_REG_PAIR x21, x22, #0x20 + EPILOG_RESTORE_REG_PAIR x23, x24, #0x30 + EPILOG_RESTORE_REG_PAIR x25, x26, #0x40 + EPILOG_RESTORE_REG_PAIR x27, x28, #0x50 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #0x60 + .endm + + +// ----------------------------------------------------------------------------- +// Macro used to restore preserved general purpose and FP registers from REGDISPLAY +// regdisplayReg - register pointing to the REGDISPLAY structure + .macro RESTORE_PRESERVED_REGISTERS regdisplayReg + + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX19] + ldr x19, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX20] + ldr x20, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX21] + ldr x21, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX22] + ldr x22, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX23] + ldr x23, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX24] + ldr x24, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX25] + ldr x25, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX26] + ldr x26, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX27] + ldr x27, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX28] + ldr x28, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pFP] + ldr fp, [x12] + // + // load FP preserved regs + // + add x12, \regdisplayReg, #OFFSETOF__REGDISPLAY__D + ldp d8, d9, [x12, #0x00] + ldp d10, d11, [x12, #0x10] + ldp d12, d13, [x12, #0x20] + ldp d14, d15, [x12, #0x30] + .endm + +// ----------------------------------------------------------------------------- +// Macro used to save preserved general purpose and FP registers to REGDISPLAY +// regdisplayReg - register pointing to the REGDISPLAY structure + .macro SAVE_PRESERVED_REGISTERS regdisplayReg + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX19] + str x19, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX20] + str x20, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX21] + str x21, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX22] + str x22, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX23] + str x23, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX24] + str x24, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX25] + str x25, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX26] + str x26, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX27] + str x27, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX28] + str x28, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pFP] + str fp, [x12] + // + // store vfp preserved regs + // + add x12, \regdisplayReg, #OFFSETOF__REGDISPLAY__D + stp d8, d9, [x12, #0x00] + stp d10, d11, [x12, #0x10] + stp d12, d13, [x12, #0x20] + stp d14, d15, [x12, #0x30] + .endm + + +// ----------------------------------------------------------------------------- +// Macro used to thrash preserved general purpose registers in REGDISPLAY +// to make sure nobody uses them +// regdisplayReg - register pointing to the REGDISPLAY structure + .macro TRASH_PRESERVED_REGISTERS_STORAGE regdisplayReg + +#if 0 // def _DEBUG // @TODO: temporarily removed because trashing the frame pointer breaks the debugger + movz x3, #0xbaad, LSL #48 + movk x3, #0xdeed, LSL #32 + movk x3, #0xbaad, LSL #16 + movk x3, #0xdeed + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX19] + str x3, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX20] + str x3, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX21] + str x3, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX22] + str x3, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX23] + str x3, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX24] + str x3, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX25] + str x3, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX26] + str x3, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX27] + str x3, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX28] + str x3, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pFP] + str x3, [x12] +#endif // _DEBUG + .endm + + + +#define rsp_offsetof_ExInfo 0 +#define rsp_offsetof_Context STACKSIZEOF_ExInfo + +// +// RhpThrowHwEx +// +// INPUT: W0: exception code of fault +// X1: faulting IP +// +// OUTPUT: +// + NESTED_ENTRY RhpThrowHwEx, _TEXT, NoHandler + + ALLOC_THROW_FRAME HARDWARE_EXCEPTION + + // x2 = GetThread() + INLINE_GETTHREAD x2 + + add x1, sp, #rsp_offsetof_ExInfo // x1 <- ExInfo* + str xzr, [x1, #OFFSETOF__ExInfo__m_exception] // pExInfo->m_exception = null + mov w3, #1 + strb w3, [x1, #OFFSETOF__ExInfo__m_passNumber] // pExInfo->m_passNumber = 1 + mov w3, #0xFFFFFFFF + str w3, [x1, #OFFSETOF__ExInfo__m_idxCurClause] // pExInfo->m_idxCurClause = MaxTryRegionIdx + mov w3, #2 + strb w3, [x1, #OFFSETOF__ExInfo__m_kind] // pExInfo->m_kind = ExKind.HardwareFault + + // link the ExInfo into the thread's ExInfo chain + ldr x3, [x2, #OFFSETOF__Thread__m_pExInfoStackHead] + str x3, [x1, #OFFSETOF__ExInfo__m_pPrevExInfo] // pExInfo->m_pPrevExInfo = m_pExInfoStackHead + str x1, [x2, #OFFSETOF__Thread__m_pExInfoStackHead] // m_pExInfoStackHead = pExInfo + + // set the exception context field on the ExInfo + add x2, sp, #rsp_offsetof_Context // x2 <- PAL_LIMITED_CONTEXT* + str x2, [x1, #OFFSETOF__ExInfo__m_pExContext] // pExInfo->m_pExContext = pContext + + // w0: exception code + // x1: ExInfo* + bl RhThrowHwEx + + EXPORT_POINTER_TO_ADDRESS PointerToRhpThrowHwEx2 + + // no return + EMIT_BREAKPOINT + + NESTED_END RhpThrowHwEx, _TEXT + +// +// RhpThrowEx +// +// INPUT: X0: exception object +// +// OUTPUT: +// + + NESTED_ENTRY RhpThrowEx, _TEXT, NoHandler + + ALLOC_THROW_FRAME SOFTWARE_EXCEPTION + + // x2 = GetThread() + INLINE_GETTHREAD x2 + + // There is runtime C# code that can tail call to RhpThrowEx using a binder intrinsic. So the return + // address could have been hijacked when we were in that C# code and we must remove the hijack and + // reflect the correct return address in our exception context record. The other throw helpers don't + // need this because they cannot be tail-called from C#. + + // NOTE: we cannot use INLINE_THREAD_UNHIJACK because it will write into the stack at the location + // where the tail-calling thread had saved LR, which may not match where we have saved LR. + + ldr x1, [x2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + cbz x1, NotHijacked + + ldr x3, [x2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + + // x0: exception object + // x1: hijacked return address + // x2: pThread + // x3: hijacked return address location + + add x12, sp, #(STACKSIZEOF_ExInfo + SIZEOF__PAL_LIMITED_CONTEXT) // re-compute SP at callsite + cmp x3, x12 // if (m_ppvHijackedReturnAddressLocation < SP at callsite) + blo TailCallWasHijacked + + // normal case where a valid return address location is hijacked + str x1, [x3] + b ClearThreadState + +TailCallWasHijacked: + + // Abnormal case where the return address location is now invalid because we ended up here via a tail + // call. In this case, our hijacked return address should be the correct caller of this method. + + // stick the previous return address in LR as well as in the right spots in our PAL_LIMITED_CONTEXT. + mov lr, x1 + str lr, [sp, #(rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__LR)] + str lr, [sp, #(rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__IP)] + +ClearThreadState: + + // clear the Thread's hijack state + str xzr, [x2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + str xzr, [x2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + +NotHijacked: + + add x1, sp, #rsp_offsetof_ExInfo // x1 <- ExInfo* + str xzr, [x1, #OFFSETOF__ExInfo__m_exception] // pExInfo->m_exception = null + mov w3, #1 + strb w3, [x1, #OFFSETOF__ExInfo__m_passNumber] // pExInfo->m_passNumber = 1 + mov w3, #0xFFFFFFFF + str w3, [x1, #OFFSETOF__ExInfo__m_idxCurClause] // pExInfo->m_idxCurClause = MaxTryRegionIdx + mov w3, #1 + strb w3, [x1, #OFFSETOF__ExInfo__m_kind] // pExInfo->m_kind = ExKind.Throw + + // link the ExInfo into the thread's ExInfo chain + ldr x3, [x2, #OFFSETOF__Thread__m_pExInfoStackHead] + str x3, [x1, #OFFSETOF__ExInfo__m_pPrevExInfo] // pExInfo->m_pPrevExInfo = m_pExInfoStackHead + str x1, [x2, #OFFSETOF__Thread__m_pExInfoStackHead] // m_pExInfoStackHead = pExInfo + + // set the exception context field on the ExInfo + add x2, sp, #rsp_offsetof_Context // x2 <- PAL_LIMITED_CONTEXT* + str x2, [x1, #OFFSETOF__ExInfo__m_pExContext] // pExInfo->m_pExContext = pContext + + // x0: exception object + // x1: ExInfo* + bl RhThrowEx + + EXPORT_POINTER_TO_ADDRESS PointerToRhpThrowEx2 + + // no return + EMIT_BREAKPOINT + NESTED_END RhpThrowEx, _TEXT + + +// +// void FASTCALL RhpRethrow() +// +// SUMMARY: Similar to RhpThrowEx, except that it passes along the currently active ExInfo +// +// INPUT: +// +// OUTPUT: +// + + NESTED_ENTRY RhpRethrow, _TEXT, NoHandler + + ALLOC_THROW_FRAME SOFTWARE_EXCEPTION + + // x2 = GetThread() + INLINE_GETTHREAD x2 + + add x1, sp, #rsp_offsetof_ExInfo // x1 <- ExInfo* + str xzr, [x1, #OFFSETOF__ExInfo__m_exception] // pExInfo->m_exception = null + strb wzr, [x1, #OFFSETOF__ExInfo__m_kind] // init to a deterministic value (ExKind.None) + mov w3, #1 + strb w3, [x1, #OFFSETOF__ExInfo__m_passNumber] // pExInfo->m_passNumber = 1 + mov w3, #0xFFFFFFFF + str w3, [x1, #OFFSETOF__ExInfo__m_idxCurClause] // pExInfo->m_idxCurClause = MaxTryRegionIdx + + // link the ExInfo into the thread's ExInfo chain + ldr x3, [x2, #OFFSETOF__Thread__m_pExInfoStackHead] + mov x0, x3 // x0 <- current ExInfo + str x3, [x1, #OFFSETOF__ExInfo__m_pPrevExInfo] // pExInfo->m_pPrevExInfo = m_pExInfoStackHead + str x1, [x2, #OFFSETOF__Thread__m_pExInfoStackHead] // m_pExInfoStackHead = pExInfo + + // set the exception context field on the ExInfo + add x2, sp, #rsp_offsetof_Context // x2 <- PAL_LIMITED_CONTEXT* + str x2, [x1, #OFFSETOF__ExInfo__m_pExContext] // pExInfo->m_pExContext = pContext + + // x0 contains the currently active ExInfo + // x1 contains the address of the new ExInfo + bl RhRethrow + + EXPORT_POINTER_TO_ADDRESS PointerToRhpRethrow2 + + // no return + EMIT_BREAKPOINT + NESTED_END RhpRethrow, _TEXT + +// +// void* FASTCALL RhpCallCatchFunclet(RtuObjectRef exceptionObj, void* pHandlerIP, REGDISPLAY* pRegDisplay, +// ExInfo* pExInfo) +// +// INPUT: X0: exception object +// X1: handler funclet address +// X2: REGDISPLAY* +// X3: ExInfo* +// +// OUTPUT: +// + + NESTED_ENTRY RhpCallCatchFunclet, _TEXT, NoHandler + + ALLOC_CALL_FUNCLET_FRAME 0x60 + stp d8, d9, [sp, #0x00] + stp d10, d11, [sp, #0x10] + stp d12, d13, [sp, #0x20] + stp d14, d15, [sp, #0x30] + stp x0, x2, [sp, #0x40] // x0, x2 & x3 are saved so we have the exception object, REGDISPLAY and + stp x3, xzr, [sp, #0x50] // ExInfo later, xzr makes space for the local "is_not_handling_thread_abort" + +#define rsp_offset_is_not_handling_thread_abort 0x58 +#define rsp_offset_x2 0x48 +#define rsp_offset_x3 0x50 + + // + // clear the DoNotTriggerGc flag, trashes x4-x6 + // + INLINE_GETTHREAD x5 // x5 <- Thread* + + ldr x4, [x5, #OFFSETOF__Thread__m_threadAbortException] + sub x4, x4, x0 + str x4, [sp, #rsp_offset_is_not_handling_thread_abort] // Non-zero if the exception is not ThreadAbortException + + add x12, x5, #OFFSETOF__Thread__m_ThreadStateFlags + +ClearRetry_Catch: + ldxr w4, [x12] + bic w4, w4, #TSF_DoNotTriggerGc + stxr w6, w4, [x12] + cbz w6, ClearSuccess_Catch + b ClearRetry_Catch +ClearSuccess_Catch: + + // + // set preserved regs to the values expected by the funclet + // + RESTORE_PRESERVED_REGISTERS x2 + // + // trash the values at the old homes to make sure nobody uses them + // + TRASH_PRESERVED_REGISTERS_STORAGE x2 + + // + // call the funclet + // + // x0 still contains the exception object + blr x1 + + EXPORT_POINTER_TO_ADDRESS PointerToRhpCallCatchFunclet2 + + // x0 contains resume IP + + ldr x2, [sp, #rsp_offset_x2] // x2 <- REGDISPLAY* + +// @TODO: add debug-only validation code for ExInfo pop + + INLINE_GETTHREAD x1 // x1 <- Thread* + + // We must unhijack the thread at this point because the section of stack where the hijack is applied + // may go dead. If it does, then the next time we try to unhijack the thread, it will corrupt the stack. + INLINE_THREAD_UNHIJACK x1, x3, x12 // Thread in x1, trashes x3 and x12 + + ldr x3, [sp, #rsp_offset_x3] // x3 <- current ExInfo* + ldr x2, [x2, #OFFSETOF__REGDISPLAY__SP] // x2 <- resume SP value + +PopExInfoLoop: + ldr x3, [x3, #OFFSETOF__ExInfo__m_pPrevExInfo] // x3 <- next ExInfo + cbz x3, DonePopping // if (pExInfo == null) { we're done } + cmp x3, x2 + blt PopExInfoLoop // if (pExInfo < resume SP} { keep going } + +DonePopping: + str x3, [x1, #OFFSETOF__Thread__m_pExInfoStackHead] // store the new head on the Thread + + adrp x3, RhpTrapThreads + add x3, x3, :lo12:RhpTrapThreads + ldr w3, [x3] + tbz x3, #TrapThreadsFlags_AbortInProgress_Bit, NoAbort + + ldr x3, [sp, #rsp_offset_is_not_handling_thread_abort] + cbnz x3, NoAbort + + // It was the ThreadAbortException, so rethrow it + // reset SP + mov x1, x0 // x1 <- continuation address as exception PC + mov w0, #STATUS_REDHAWK_THREAD_ABORT + mov sp, x2 + b RhpThrowHwEx + +NoAbort: + // reset SP and jump to continuation address + mov sp, x2 + br x0 + + NESTED_END RhpCallCatchFunclet, _Text + +// +// void FASTCALL RhpCallFinallyFunclet(void* pHandlerIP, REGDISPLAY* pRegDisplay) +// +// INPUT: X0: handler funclet address +// X1: REGDISPLAY* +// +// OUTPUT: +// + + NESTED_ENTRY RhpCallFinallyFunclet, _TEXT, NoHandler + + ALLOC_CALL_FUNCLET_FRAME 0x50 + stp d8, d9, [sp, #0x00] + stp d10, d11, [sp, #0x10] + stp d12, d13, [sp, #0x20] + stp d14, d15, [sp, #0x30] + stp x0, x1, [sp, #0x40] // x1 is saved so we have the REGDISPLAY later, x0 is just alignment padding + +#define rsp_offset_x1 0x48 + + + // We want to suppress hijacking between invocations of subsequent finallys. We do this because we + // cannot tolerate a GC after one finally has run (and possibly side-effected the GC state of the + // method) and then been popped off the stack, leaving behind no trace of its effect. + // + // So we clear the state before and set it after invocation of the handler. + // + + // + // clear the DoNotTriggerGc flag, trashes x2-x4 + // + INLINE_GETTHREAD x2 // x2 <- Thread* + + add x12, x2, #OFFSETOF__Thread__m_ThreadStateFlags + +ClearRetry: + ldxr w4, [x12] + bic w4, w4, #TSF_DoNotTriggerGc + stxr w3, w4, [x12] + cbz w3, ClearSuccess + b ClearRetry +ClearSuccess: + + // + // set preserved regs to the values expected by the funclet + // + RESTORE_PRESERVED_REGISTERS x1 + // + // trash the values at the old homes to make sure nobody uses them + // + TRASH_PRESERVED_REGISTERS_STORAGE x1 + + // + // call the funclet + // + blr x0 + + EXPORT_POINTER_TO_ADDRESS PointerToRhpCallFinallyFunclet2 + + ldr x1, [sp, #rsp_offset_x1] // reload REGDISPLAY pointer + + // + // save new values of preserved regs into REGDISPLAY + // + SAVE_PRESERVED_REGISTERS x1 + + // + // set the DoNotTriggerGc flag, trashes x1-x3 + // + INLINE_GETTHREAD x2 // x2 <- Thread* + + add x12, x2, #OFFSETOF__Thread__m_ThreadStateFlags +SetRetry: + ldxr w1, [x12] + orr w1, w1, #TSF_DoNotTriggerGc + stxr w3, w1, [x12] + cbz w3, SetSuccess + b SetRetry +SetSuccess: + + ldp d8, d9, [sp, #0x00] + ldp d10, d11, [sp, #0x10] + ldp d12, d13, [sp, #0x20] + ldp d14, d15, [sp, #0x30] + + FREE_CALL_FUNCLET_FRAME 0x50 + EPILOG_RETURN + + NESTED_END RhpCallFinallyFunclet, _Text + + +// +// void* FASTCALL RhpCallFilterFunclet(RtuObjectRef exceptionObj, void* pFilterIP, REGDISPLAY* pRegDisplay) +// +// INPUT: X0: exception object +// X1: filter funclet address +// X2: REGDISPLAY* +// +// OUTPUT: +// + + NESTED_ENTRY RhpCallFilterFunclet, _TEXT, NoHandler + ALLOC_CALL_FUNCLET_FRAME 0x40 + stp d8, d9, [sp, #0x00] + stp d10, d11, [sp, #0x10] + stp d12, d13, [sp, #0x20] + stp d14, d15, [sp, #0x30] + + ldr x12, [x2, #OFFSETOF__REGDISPLAY__pFP] + ldr fp, [x12] + + // + // call the funclet + // + // x0 still contains the exception object + blr x1 + + EXPORT_POINTER_TO_ADDRESS PointerToRhpCallFilterFunclet2 + + ldp d8, d9, [sp, #0x00] + ldp d10, d11, [sp, #0x10] + ldp d12, d13, [sp, #0x20] + ldp d14, d15, [sp, #0x30] + + FREE_CALL_FUNCLET_FRAME 0x40 + EPILOG_RETURN + + NESTED_END RhpCallFilterFunclet, Text diff --git a/src/Native/Runtime/arm64/PInvoke.S b/src/Native/Runtime/arm64/PInvoke.S index bd2a8551d68..508127601fb 100644 --- a/src/Native/Runtime/arm64/PInvoke.S +++ b/src/Native/Runtime/arm64/PInvoke.S @@ -51,13 +51,6 @@ TSF_Attached_Bit = 0 TSF_SuppressGcStress_Bit = 3 TSF_DoNotTriggerGc_Bit = 4 -// Bit position for the flags above, to be used with tbz / tbnz instructions -TrapThreadsFlags_AbortInProgress_Bit = 0 -TrapThreadsFlags_TrapThreads_Bit = 1 - -// This must match HwExceptionCode.STATUS_REDHAWK_THREAD_ABORT -STATUS_REDHAWK_THREAD_ABORT = 0x43 - ////////////////////////////////////////////////////////////////////////////////////////////////////////////// // // RhpWaitForSuspend -- rare path for RhpPInvoke and RhpReversePInvokeReturn diff --git a/src/Native/Runtime/arm64/StubDispatch.S b/src/Native/Runtime/arm64/StubDispatch.S index b3f910c67a1..25aae897289 100644 --- a/src/Native/Runtime/arm64/StubDispatch.S +++ b/src/Native/Runtime/arm64/StubDispatch.S @@ -11,40 +11,6 @@ .extern RhpCidResolve .extern RhpUniversalTransition_DebugStepTailCall - .macro GET_TLS_DISPATCH_CELL - brk 1 -/* - ldr x9, =_tls_index - ldr w9, [x9] - ldr xip1, [xpr, #__tls_array] - ldr xip1, [xip1, x9, lsl #3] - ldr x9, =SECTIONREL_t_TLS_DispatchCell - ldr x9, [x9] - ldr xip1, [xip1, x9] -*/ - .endm - - .macro SET_TLS_DISPATCH_CELL - // xip1 : Value to be assigned to the TLS variable - brk 1 -/* - ldr x9, =_tls_index - ldr w9, [x9] - ldr x10, [xpr, #__tls_array] - ldr x10, [x10, x9, lsl #3] - ldr x9, =SECTIONREL_t_TLS_DispatchCell - ldr x9, [x9] - str xip1, [x10, x9] -*/ - .endm - -/* TODO -SECTIONREL_t_TLS_DispatchCell - DCD t_TLS_DispatchCell - RELOC 8, t_TLS_DispatchCell ;; SECREL - DCD 0 -*/ - // Macro that generates code to check a single cache entry. .macro CHECK_CACHE_ENTRY entry // Check a single entry in the cache. @@ -60,29 +26,6 @@ SECTIONREL_t_TLS_DispatchCell 0: .endm - - LEAF_ENTRY RhpCastableObjectDispatch_CommonStub, _TEXT - // Custom calling convention: - // xip0 has pointer to the current thunks data block - - // store dispatch cell address in thread static - ldr xip1, [xip0] - SET_TLS_DISPATCH_CELL - - // Now load the target address and jump to it. - ldr x9, [xip0, #8] - br x9 - LEAF_END RhpCastableObjectDispatch_CommonStub, _TEXT - - LEAF_ENTRY RhpTailCallTLSDispatchCell, _TEXT - // Load the dispatch cell out of the TLS variable - GET_TLS_DISPATCH_CELL - - // Tail call to the target of the dispatch cell, preserving the cell address in xip1 - ldr x9, [xip1] - br x9 - LEAF_END RhpTailCallTLSDispatchCell, _TEXT - // // Macro that generates a stub consuming a cache with the given number of entries. // diff --git a/src/Native/Runtime/unix/UnixNativeCodeManager.cpp b/src/Native/Runtime/unix/UnixNativeCodeManager.cpp index e03a85e3205..38880aed3bb 100644 --- a/src/Native/Runtime/unix/UnixNativeCodeManager.cpp +++ b/src/Native/Runtime/unix/UnixNativeCodeManager.cpp @@ -388,7 +388,7 @@ bool UnixNativeCodeManager::EHEnumNext(EHEnumState * pEHEnumState, EHClause * pE { // @TODO: CORERT: Compress EHInfo using type table index scheme // https://github.com/dotnet/corert/issues/972 - Int32 typeRelAddr = *((PTR_Int32&)pEnumState->pEHInfo)++; + Int32 typeRelAddr = *((PTR_Int32&)pEnumState->pEHInfo); pEHClauseOut->m_pTargetType = dac_cast(pEnumState->pEHInfo + typeRelAddr); } break; diff --git a/src/Native/Runtime/unix/unixasmmacrosarm64.inc b/src/Native/Runtime/unix/unixasmmacrosarm64.inc index becaa4c3c17..b3d64104dc0 100644 --- a/src/Native/Runtime/unix/unixasmmacrosarm64.inc +++ b/src/Native/Runtime/unix/unixasmmacrosarm64.inc @@ -1,6 +1,8 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +#include "AsmOffsets.inc" + .macro NESTED_ENTRY Name, Section, Handler LEAF_ENTRY \Name, \Section .ifnc \Handler, NoHandler @@ -152,6 +154,21 @@ C_FUNC(\Name): add \target, \target, #:tprel_lo12_nc:tls_CurrentThread .endm +.macro INLINE_THREAD_UNHIJACK threadReg, trashReg1, trashReg2 + // + // Thread::Unhijack() + // + ldr \trashReg1, [\threadReg, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + cbz \trashReg1, 0f + + ldr \trashReg2, [\threadReg, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + str \trashReg1, [\trashReg2] + str xzr, [\threadReg, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + str xzr, [\threadReg, #OFFSETOF__Thread__m_pvHijackedReturnAddress] +0: +.endm + + .macro EXPORT_POINTER_TO_ADDRESS Name 1: @@ -204,3 +221,22 @@ DEFAULT_FRAME_SAVE_FLAGS = PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_SP EPILOG_RESTORE_REG_PAIR x27, x28, #0x60 EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #0x80 .endm + +// +// CONSTANTS -- INTEGER +// +#define TSF_Attached 0x01 +#define TSF_SuppressGcStress 0x08 +#define TSF_DoNotTriggerGc 0x10 + +// Bit position for the flags above, to be used with tbz / tbnz instructions +TrapThreadsFlags_AbortInProgress_Bit = 0 +TrapThreadsFlags_TrapThreads_Bit = 1 + +// This must match HwExceptionCode.STATUS_REDHAWK_THREAD_ABORT +#define STATUS_REDHAWK_THREAD_ABORT 0x43 + +// These must match the TrapThreadsFlags enum +#define TrapThreadsFlags_None 0 +#define TrapThreadsFlags_AbortInProgress 1 +#define TrapThreadsFlags_TrapThreads 2 From 5b6bea5e1569ece7b5d945479fb4b03512759051 Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Thu, 6 Aug 2020 15:37:55 +0200 Subject: [PATCH 09/36] ARM64: Fix EH data decodeing --- src/Native/Runtime/unix/UnixNativeCodeManager.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Native/Runtime/unix/UnixNativeCodeManager.cpp b/src/Native/Runtime/unix/UnixNativeCodeManager.cpp index 38880aed3bb..a2639f6078c 100644 --- a/src/Native/Runtime/unix/UnixNativeCodeManager.cpp +++ b/src/Native/Runtime/unix/UnixNativeCodeManager.cpp @@ -390,6 +390,7 @@ bool UnixNativeCodeManager::EHEnumNext(EHEnumState * pEHEnumState, EHClause * pE // https://github.com/dotnet/corert/issues/972 Int32 typeRelAddr = *((PTR_Int32&)pEnumState->pEHInfo); pEHClauseOut->m_pTargetType = dac_cast(pEnumState->pEHInfo + typeRelAddr); + pEnumState->pEHInfo += 4; } break; case EH_CLAUSE_FAULT: From 86a4b8b621ced359fbbfbee0787b3b6fd41cfa78 Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Fri, 14 Aug 2020 14:10:54 +0200 Subject: [PATCH 10/36] Move nattive layout signature from read only to data section on non Windows target --- .../DependencyAnalysis/NativeLayoutSignatureNode.cs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/NativeLayoutSignatureNode.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/NativeLayoutSignatureNode.cs index ba62660b598..1556406c6f9 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/NativeLayoutSignatureNode.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/NativeLayoutSignatureNode.cs @@ -55,7 +55,16 @@ public void AppendMangledName(NameMangler nameMangler, Utf8StringBuilder sb) public int Offset => 0; protected override string GetName(NodeFactory factory) => this.GetMangledName(factory.NameMangler); - public override ObjectNodeSection Section => ObjectNodeSection.ReadOnlyDataSection; + public override ObjectNodeSection Section + { + get + { + if (_identity.Context.Target.IsWindows) + return ObjectNodeSection.ReadOnlyDataSection; + else + return ObjectNodeSection.DataSection; + } + } public override bool IsShareable => false; public override bool StaticDependenciesAreComputed => true; From f98ba02bffe099ea2535c9b28131f37a63d23446 Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Fri, 14 Aug 2020 14:11:47 +0200 Subject: [PATCH 11/36] Fix exception handling macro call --- src/Native/Runtime/arm64/ExceptionHandling.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Native/Runtime/arm64/ExceptionHandling.S b/src/Native/Runtime/arm64/ExceptionHandling.S index 7e688cffd94..1dc4c83cbc0 100644 --- a/src/Native/Runtime/arm64/ExceptionHandling.S +++ b/src/Native/Runtime/arm64/ExceptionHandling.S @@ -53,8 +53,7 @@ // sp in fp. If sp is saved in fp in prolog then it is not expected that fp can change in the body // of method. However, this method needs to be able to change fp before calling funclet. // This is required to access locals in funclet. - // TODO PROLOG_SAVE_REG_PAIR_NO_FP fp,lr, #-0x60! - PROLOG_SAVE_REG_PAIR_INDEXED fp,lr, #-0x60 + PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED fp,lr, #-0x60 PROLOG_SAVE_REG_PAIR x19, x20, #0x10 PROLOG_SAVE_REG_PAIR x21, x22, #0x20 PROLOG_SAVE_REG_PAIR x23, x24, #0x30 From f289d85845f23c87ffcfd488fffd7d165c6f58c3 Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Fri, 14 Aug 2020 14:13:09 +0200 Subject: [PATCH 12/36] ARM64 thunk helpers --- .../Runtime/arm64/InteropThunksHelpers.S | 58 ++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/src/Native/Runtime/arm64/InteropThunksHelpers.S b/src/Native/Runtime/arm64/InteropThunksHelpers.S index 876f2dfbcb8..8c6e4198bd7 100644 --- a/src/Native/Runtime/arm64/InteropThunksHelpers.S +++ b/src/Native/Runtime/arm64/InteropThunksHelpers.S @@ -1,4 +1,60 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// TODO: Implement +#include + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; DATA SECTIONS ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +POINTER_SIZE = 0x08 + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Interop Thunks Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + // + // RhCommonStub + // + // INPUT: xip0: thunk's data block + // + // TRASHES: x9, x10, xip0 + // + LEAF_ENTRY RhCommonStub, _TEXT + // There are arbitrary callers passing arguments with arbitrary signatures. + // Custom calling convention: + // xip0 pointer to the current thunk's data block (data contains 2 pointer values: context + target pointers) + + INLINE_GET_TLS_VAR x9, tls_thunkData + + // x9 = base address of TLS data + // xip0 = address of context cell in thunk's data + + // store thunk address in thread static + ldr x10, [xip0] + str x10, [x9] + + // Now load the target address and jump to it. + ldr xip0, [xip0, #POINTER_SIZE] + br xip0 + + LEAF_END RhCommonStub, _TEXT + + // + // IntPtr RhGetCommonStubAddress() + // + LEAF_ENTRY RhGetCommonStubAddress, _TEXT + adrp x0, RhCommonStub + add x0, x0, :lo12:RhCommonStub + ret + LEAF_END RhGetCommonStubAddress, _TEXT + + + // + // IntPtr RhGetCurrentThunkContext() + // + LEAF_ENTRY RhGetCurrentThunkContext, _TEXT + + INLINE_GET_TLS_VAR x0, tls_thunkData + + ldr x0, [x0] + + ret + + LEAF_END RhGetCurrentThunkContext, _TEXT From 986f61af7e9fa5e9d24b2a6c520f21c015d012f1 Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Mon, 17 Aug 2020 07:43:56 +0200 Subject: [PATCH 13/36] ARM64 process cfi data for elf --- src/JitInterface/src/CorInfoImpl.cs | 147 ++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) diff --git a/src/JitInterface/src/CorInfoImpl.cs b/src/JitInterface/src/CorInfoImpl.cs index 2be99788f34..0683c14359e 100644 --- a/src/JitInterface/src/CorInfoImpl.cs +++ b/src/JitInterface/src/CorInfoImpl.cs @@ -43,6 +43,13 @@ private enum ImageFileMachine ARM = 0x01c4, ARM64 = 0xaa64, } + private enum CFI_OPCODE + { + CFI_ADJUST_CFA_OFFSET, // Offset is adjusted relative to the current one. + CFI_DEF_CFA_REGISTER, // New register is used to compute CFA + CFI_REL_OFFSET, // Register is saved at offset from the current CFA + CFI_DEF_CFA // Take address from register and add offset to it. + }; internal const string JitLibrary = "clrjitilc"; @@ -2687,9 +2694,146 @@ private void allocUnwindInfo(byte* pHotCode, byte* pColdCode, uint startOffset, blobData[i] = pUnwindBlock[i]; } + var target = _compilation.TypeSystemContext.Target; + + if (target.Architecture == TargetArchitecture.ARM64 && target.OperatingSystem == TargetOS.Linux) + { + blobData = CompressARM64CFI(blobData); + } + _frameInfos[_usedFrameInfos++] = new FrameInfo(flags, (int)startOffset, (int)endOffset, blobData); } + private byte[] CompressARM64CFI(byte[] blobData) + { + if (blobData == null || blobData.Length == 0) + { + return blobData; + } + + Debug.Assert(blobData.Length % 8 == 0); + + short spReg = -1; + + int codeOffset = 0; + short cfaRegister = spReg; + int cfaOffset = 0; + int spOffset = 0; + + int[] registerOffset = new int[96]; + + for (int i = 0; i < registerOffset.Length; i++) + { + registerOffset[i] = int.MinValue; + } + + int offset = 0; + while (offset < blobData.Length) + { + codeOffset = Math.Max(codeOffset, blobData[offset++]); + CFI_OPCODE opcode = (CFI_OPCODE)blobData[offset++]; + short dwarfReg = BitConverter.ToInt16(blobData, offset); + offset += sizeof(short); + int cfiOffset = BitConverter.ToInt32(blobData, offset); + offset += sizeof(int); + + switch (opcode) + { + case CFI_OPCODE.CFI_DEF_CFA_REGISTER: + cfaRegister = dwarfReg; + + if (spOffset != 0) + { + for (int i = 0; i < registerOffset.Length; i++) + { + if (registerOffset[i] != int.MinValue) + { + registerOffset[i] -= spOffset; + } + } + + cfaOffset += spOffset; + spOffset = 0; + } + + break; + + case CFI_OPCODE.CFI_REL_OFFSET: + Debug.Assert(cfaRegister == spReg); + registerOffset[dwarfReg] = cfiOffset; + break; + + case CFI_OPCODE.CFI_ADJUST_CFA_OFFSET: + if (cfaRegister != spReg) + { + cfaOffset += cfiOffset; + } + else + { + spOffset += cfiOffset; + + for (int i = 0; i < registerOffset.Length; i++) + { + if (registerOffset[i] != int.MinValue) + { + registerOffset[i] += cfiOffset; + } + } + } + break; + } + } + + using (MemoryStream cfiStream = new MemoryStream()) + { + int storeOffset = 0; + + using (BinaryWriter cfiWriter = new BinaryWriter(cfiStream)) + { + if (cfaRegister != -1) + { + cfiWriter.Write((byte)codeOffset); + cfiWriter.Write(cfaOffset != 0 ? (byte)CFI_OPCODE.CFI_DEF_CFA : (byte)CFI_OPCODE.CFI_DEF_CFA_REGISTER); + cfiWriter.Write(cfaRegister); + cfiWriter.Write(cfaOffset); + storeOffset = cfaOffset; + } + else + { + if (cfaOffset != 0) + { + cfiWriter.Write((byte)codeOffset); + cfiWriter.Write((byte)CFI_OPCODE.CFI_ADJUST_CFA_OFFSET); + cfiWriter.Write((short)-1); + cfiWriter.Write(cfaOffset); + } + + if (spOffset != 0) + { + cfiWriter.Write((byte)codeOffset); + cfiWriter.Write((byte)CFI_OPCODE.CFI_DEF_CFA); + cfiWriter.Write((short)31); + cfiWriter.Write(spOffset); + //storeOffset = -spOffset; + } + } + + for (int i = registerOffset.Length - 1; i >= 0; i--) + { + if (registerOffset[i] != int.MinValue) + { + cfiWriter.Write((byte)codeOffset); + cfiWriter.Write((byte)CFI_OPCODE.CFI_REL_OFFSET); + cfiWriter.Write((short)i); + cfiWriter.Write(registerOffset[i] + storeOffset); + } + } + } + + return cfiStream.ToArray(); + } + } + private void* allocGCInfo(UIntPtr size) { _gcInfo = new byte[(int)size]; @@ -2811,11 +2955,14 @@ private static RelocType GetRelocType(TargetArchitecture targetArchitecture, ush if (targetArchitecture != TargetArchitecture.ARM64) return (RelocType)fRelocType; + const ushort IMAGE_REL_ARM64_BRANCH26 = 3; const ushort IMAGE_REL_ARM64_PAGEBASE_REL21 = 4; const ushort IMAGE_REL_ARM64_PAGEOFFSET_12A = 6; switch (fRelocType) { + case IMAGE_REL_ARM64_BRANCH26: + return RelocType.IMAGE_REL_BASED_ARM64_BRANCH26; case IMAGE_REL_ARM64_PAGEBASE_REL21: return RelocType.IMAGE_REL_BASED_ARM64_PAGEBASE_REL21; case IMAGE_REL_ARM64_PAGEOFFSET_12A: From b1481d67181e804f429e0e9d0ae45d247b04d6e8 Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Mon, 17 Aug 2020 07:45:32 +0200 Subject: [PATCH 14/36] ARM64 missing assembler macro --- src/JitInterface/src/CorInfoImpl.cs | 3 ++- src/Native/Runtime/unix/unixasmmacrosarm64.inc | 16 +++++++++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/JitInterface/src/CorInfoImpl.cs b/src/JitInterface/src/CorInfoImpl.cs index 0683c14359e..ce05c5aecfd 100644 --- a/src/JitInterface/src/CorInfoImpl.cs +++ b/src/JitInterface/src/CorInfoImpl.cs @@ -51,7 +51,8 @@ private enum CFI_OPCODE CFI_DEF_CFA // Take address from register and add offset to it. }; - internal const string JitLibrary = "clrjitilc"; + //internal const string JitLibrary = "clrjitilc"; + internal const string JitLibrary = "protononjit"; #if SUPPORT_JIT private const string JitSupportLibrary = "*"; diff --git a/src/Native/Runtime/unix/unixasmmacrosarm64.inc b/src/Native/Runtime/unix/unixasmmacrosarm64.inc index b3d64104dc0..3e3bbdbba86 100644 --- a/src/Native/Runtime/unix/unixasmmacrosarm64.inc +++ b/src/Native/Runtime/unix/unixasmmacrosarm64.inc @@ -71,6 +71,11 @@ C_FUNC(\Name): .endif .endm +.macro PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED reg1, reg2, ofs + stp \reg1, \reg2, [sp, \ofs]! +.endm + + .macro EPILOG_RESTORE_REG reg, ofs ldr \reg, [sp, \ofs] .endm @@ -144,14 +149,19 @@ C_FUNC(\Name): #define xip1 x17 #define xpr x18 +.macro INLINE_GET_TLS_VAR target, var + mrs \target, tpidr_el0 + add \target, \target, #:tprel_hi12:\var, lsl #12 + add \target, \target, #:tprel_lo12_nc:\var +.endm + + .macro PREPARE_INLINE_GETTHREAD .global tls_CurrentThread .endm .macro INLINE_GETTHREAD target - mrs \target, tpidr_el0 - add \target, \target, #:tprel_hi12:tls_CurrentThread, lsl #12 - add \target, \target, #:tprel_lo12_nc:tls_CurrentThread + INLINE_GET_TLS_VAR \target, tls_CurrentThread .endm .macro INLINE_THREAD_UNHIJACK threadReg, trashReg1, trashReg2 From 5f3bb3437c182710306261909d55f0e1143c3e02 Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Mon, 17 Aug 2020 08:29:04 +0200 Subject: [PATCH 15/36] ARM64 port more assembler helpers --- src/Native/Runtime/arm64/CallDescrWorker.S | 138 +++++++++- .../arm64/CallingConventionConverterHelpers.S | 59 ++++- src/Native/Runtime/arm64/MiscStubs.S | 239 ++++++++++++++++++ 3 files changed, 434 insertions(+), 2 deletions(-) diff --git a/src/Native/Runtime/arm64/CallDescrWorker.S b/src/Native/Runtime/arm64/CallDescrWorker.S index 876f2dfbcb8..24f9fdf2aab 100644 --- a/src/Native/Runtime/arm64/CallDescrWorker.S +++ b/src/Native/Runtime/arm64/CallDescrWorker.S @@ -1,4 +1,140 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// TODO: Implement +#include +#include "AsmOffsets.inc" + +//----------------------------------------------------------------------------- +// This helper routine enregisters the appropriate arguments and makes the +// actual call. +// +// INPUT: x0: pointer to CallDescrData struct +// +//----------------------------------------------------------------------------- +//void RhCallDescrWorker(CallDescrData * pCallDescrData); + NESTED_ENTRY RhCallDescrWorker, _TEXT, NoHandler + + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-32 + PROLOG_SAVE_REG_PAIR x19, x20, #16 + + // Save the value of SP before we start pushing any arguments + mov x20, sp + + mov x19, x0 // save pCallDescrData in x19 + + ldr w1, [x19, #OFFSETOF__CallDescrData__numStackSlots] + cbz w1, Ldonestack + + // Add frame padding to ensure frame size is a multiple of 16 (a requirement of the OS ABI). + // We push two registers (above) and numStackSlots arguments (below). If this comes to an odd number + // of slots we must pad with another. This simplifies to "if the low bit of numStackSlots is set, + // extend the stack another eight bytes". + ldr x0, [x19, #OFFSETOF__CallDescrData__pSrc] + add x0, x0, x1, lsl #3 // pSrcEnd=pSrc+8*numStackSlots + ands x2, x1, #1 + beq Lstackloop + + // This loop copies numStackSlots words + // from [pSrcEnd-8,pSrcEnd-16,...] to [sp-8,sp-16,...] + + // Pad and store one stack slot as number of slots are odd + ldr x4, [x0,#-8]! + str x4, [sp,#-16]! + subs x1, x1, #1 + beq Ldonestack +Lstackloop: + ldp x2, x4, [x0,#-16]! + stp x2, x4, [sp,#-16]! + subs x1, x1, #2 + bne Lstackloop +Ldonestack: + + // If FP arguments are supplied in registers (x9 != NULL) then initialize all of them from the pointer + // given in x9. + ldr x9, [x19, #OFFSETOF__CallDescrData__pFloatArgumentRegisters] + cbz x9, LNoFloatingPoint + ldp d0, d1, [x9] + ldp d2, d3, [x9, #16] + ldp d4, d5, [x9, #32] + ldp d6, d7, [x9, #48] +LNoFloatingPoint: + + // Copy [pArgumentRegisters, ..., pArgumentRegisters + 64] + // into x0, ..., x7, x8 + + ldr x9, [x19, #OFFSETOF__CallDescrData__pArgumentRegisters] + ldp x0, x1, [x9] + ldp x2, x3, [x9, #16] + ldp x4, x5, [x9, #32] + ldp x6, x7, [x9, #48] + ldr x8, [x9, #64] + + // call pTarget + ldr x9, [x19, #OFFSETOF__CallDescrData__pTarget] + blr x9 + + EXPORT_POINTER_TO_ADDRESS PointerToReturnFromCallDescrThunk + + // Symbol used to identify thunk call to managed function so the special + // case unwinder can unwind through this function. Sadly we cannot directly + // export this symbol right now because it confuses DIA unwinder to believe + // it's the beginning of a new method, therefore we export the address + // of an auxiliary variable holding the address instead. + + ldr w3, [x19, #OFFSETOF__CallDescrData__fpReturnSize] + + // Unlike desktop returnValue is a pointer to a return buffer, not the buffer itself + ldr x19, [x19, #OFFSETOF__CallDescrData__pReturnBuffer] + + // Int return case + cbz w3, LIntReturn + + // Float return case + cmp w3, #4 + beq LFloatOrDoubleReturn + + // Double return case + cmp w3, #8 + bne LCheckHFAReturn + +LFloatOrDoubleReturn: + str d0, [x19] + b LReturnDone + +LCheckHFAReturn: + cmp w3, #16 + beq LFloatOrDoubleHFAReturn + cmp w3, #32 + beq LFloatOrDoubleHFAReturn + b LNoHFAReturn + +LFloatOrDoubleHFAReturn: + //Single/Double HFAReturn return case + stp d0, d1, [x19, #00] + stp d2, d3, [x19, #16] + b LReturnDone + +LNoHFAReturn: + + EMIT_BREAKPOINT // Unreachable + +LIntReturn: + // Save return value(s) into retbuf for int + stp x0, x1, [x19] + +LReturnDone: + +#ifdef _DEBUG + // Trash the floating point registers to ensure that the HFA return values + // won't survive by accident + ldp d0, d1, [sp] + ldp d2, d3, [sp, #16] +#endif + // Restore the value of SP + mov sp, x20 + + EPILOG_RESTORE_REG_PAIR x19, x20, #16 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #32 + EPILOG_RETURN + + NESTED_END RhCallDescrWorker diff --git a/src/Native/Runtime/arm64/CallingConventionConverterHelpers.S b/src/Native/Runtime/arm64/CallingConventionConverterHelpers.S index 876f2dfbcb8..de6ff7319ed 100644 --- a/src/Native/Runtime/arm64/CallingConventionConverterHelpers.S +++ b/src/Native/Runtime/arm64/CallingConventionConverterHelpers.S @@ -1,4 +1,61 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// TODO: Implement +#include +#include "AsmOffsets.inc" + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; CallingConventionCoverter Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +POINTER_SIZE = 0x08 + +// Note: The "__jmpstub__" prefix is used to indicate to debugger +// that it must step-through this stub when it encounters it while +// stepping. + + + // + // void CallingConventionConverter_ReturnThunk() + // + LEAF_ENTRY CallingConventionConverter_ReturnThunk, _TEXT + ret + LEAF_END CallingConventionConverter_ReturnThunk, _TEXT + + // + // __jmpstub__CallingConventionConverter_CommonCallingStub + // + // struct CallingConventionConverter_CommonCallingStub_PointerData + // { + // void *ManagedCallConverterThunk; + // void *UniversalThunk; + // } + // + // struct CommonCallingStubInputData + // { + // ULONG_PTR CallingConventionId; + // CallingConventionConverter_CommonCallingStub_PointerData *commonData; // Only the ManagedCallConverterThunk field is used + // // However, it is specified just like other platforms, so the behavior of the common + // // calling stub is easier to debug + // } + // + // xip0 - Points at CommonCallingStubInputData + // + // + LEAF_ENTRY __jmpstub__CallingConventionConverter_CommonCallingStub, _TEXT + ldr xip1, [xip0] // put CallingConventionId into xip1 as "parameter" to universal transition thunk + ldr xip0, [xip0, #POINTER_SIZE] // get pointer to CallingConventionConverter_CommonCallingStub_PointerData into xip0 + ldr x12, [xip0, #POINTER_SIZE] // get address of UniversalTransitionThunk (which we'll tailcall to later) + ldr xip0, [xip0] // get address of ManagedCallConverterThunk (target for universal thunk to call) + br x12 + LEAF_END __jmpstub__CallingConventionConverter_CommonCallingStub, _TEXT + + // + // void CallingConventionConverter_GetStubs(IntPtr *returnVoidStub, IntPtr *returnIntegerStub, IntPtr *commonCallingStub) + // + LEAF_ENTRY CallingConventionConverter_GetStubs, _TEXT + ldr x12, =CallingConventionConverter_ReturnThunk + str x12, [x0] // ARM doesn't need different return thunks. + str x12, [x1] + ldr x12, =__jmpstub__CallingConventionConverter_CommonCallingStub + str x12, [x2] + ret + LEAF_END CallingConventionConverter_GetStubs, _TEXT diff --git a/src/Native/Runtime/arm64/MiscStubs.S b/src/Native/Runtime/arm64/MiscStubs.S index 53616c22696..a4130dc1af6 100644 --- a/src/Native/Runtime/arm64/MiscStubs.S +++ b/src/Native/Runtime/arm64/MiscStubs.S @@ -1,2 +1,241 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. + +#include +#include "AsmOffsets.inc" + + .global memcpy + .global memcpyGCRefs + .global memcpyGCRefsWithWriteBarrier + .global memcpyAnyWithWriteBarrier + .global GetClasslibCCtorCheck + +// +// Checks whether the static class constructor for the type indicated by the context structure has been +// executed yet. If not the classlib is called via their CheckStaticClassConstruction callback which will +// execute the cctor and update the context to record this fact. +// +// Input: +// x0 : Address of StaticClassConstructionContext structure +// +// Output: +// All volatile registers and the condition codes may be trashed. +// + LEAF_ENTRY RhpCheckCctor, _TEXT + + // Check the m_initialized field of the context. The cctor has been run only if this equals 1 (the + // initial state is 0 and the remaining values are reserved for classlib use). This check is + // unsynchronized; if we go down the slow path and call the classlib then it is responsible for + // synchronizing with other threads and re-checking the value. + ldr w12, [x0, #OFFSETOF__StaticClassConstructionContext__m_initialized] + cmp w12, #1 + bne RhpCheckCctor__SlowPath + ret +RhpCheckCctor__SlowPath: + mov x1, x0 + b RhpCheckCctor2 // tail-call the check cctor helper that actually has an implementation to call + // the cctor + + LEAF_END RhpCheckCctor, _TEXT + +// +// Checks whether the static class constructor for the type indicated by the context structure has been +// executed yet. If not the classlib is called via their CheckStaticClassConstruction callback which will +// execute the cctor and update the context to record this fact. +// +// Input: +// x0 : Value that must be preserved in this register across the cctor check. +// x1 : Address of StaticClassConstructionContext structure +// +// Output: +// All volatile registers other than x0 may be trashed and the condition codes may also be trashed. +// + LEAF_ENTRY RhpCheckCctor2, _TEXT + + // Check the m_initialized field of the context. The cctor has been run only if this equals 1 (the + // initial state is 0 and the remaining values are reserved for classlib use). This check is + // unsynchronized; if we go down the slow path and call the classlib then it is responsible for + // synchronizing with other threads and re-checking the value. + ldr w12, [x1, #OFFSETOF__StaticClassConstructionContext__m_initialized] + cmp w12, #1 + bne RhpCheckCctor2__SlowPath + ret + + LEAF_END RhpCheckCctor2 + +// +// Slow path helper for RhpCheckCctor. +// +// Input: +// x0 : Value that must be preserved in this register across the cctor check. +// x1 : Address of StaticClassConstructionContext structure +// +// Output: +// All volatile registers other than x0 may be trashed and the condition codes may also be trashed. +// + NESTED_ENTRY RhpCheckCctor2__SlowPath, _TEXT, NoHandler + + // Need to preserve x0, x1 and lr across helper call. fp is also pushed to keep the stack 16 byte aligned. + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-0x20 + stp x0, x1, [sp, #0x10] + + // Call a C++ helper to retrieve the address of the classlib callback. The caller's return address is + // passed as the argument to the helper; it's an address in the module and is used by the helper to + // locate the classlib. + mov x0, lr + bl GetClasslibCCtorCheck + + // X0 now contains the address of the classlib method to call. The single argument is the context + // structure address currently in stashed on the stack. Clean up and tail call to the classlib + // callback so we're not on the stack should a GC occur (so we don't need to worry about transition + // frames). + mov x12, x0 + ldp x0, x1, [sp, #0x10] + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #0x20 + // tail-call the class lib cctor check function. This function is required to return its first + // argument, so that x0 can be preserved. + br x12 + + NESTED_END RhpCheckCctor__SlowPath2, _TEXT + + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// +// void* RhpCopyMultibyteNoGCRefs(void*, void*, size_t) +// +// The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where +// the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch +// it to managed code. +// + + LEAF_ENTRY RhpCopyMultibyteNoGCRefs, _TEXT + + // x0 dest + // x1 src + // x2 count + + cbz x2, NothingToCopy_NoGCRefs // check for a zero-length copy + + // Now check the dest and src pointers. If they AV, the EH subsystem will recognize the address of the AV, + // unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be + // translated to a managed exception as usual. + ALTERNATE_ENTRY RhpCopyMultibyteNoGCRefsDestAVLocation + ldrb wzr, [x0] + ALTERNATE_ENTRY RhpCopyMultibyteNoGCRefsSrcAVLocation + ldrb wzr, [x1] + + // tail-call to plain-old-memcpy + b memcpy + +NothingToCopy_NoGCRefs: + // dest is already in x0 + ret + + LEAF_END RhpCopyMultibyteNoGCRefs, _TEXT + + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// +// void* RhpCopyMultibyte(void*, void*, size_t) +// +// The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where +// the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch +// it to managed code. +// + + LEAF_ENTRY RhpCopyMultibyte, _TEXT + + // x0 dest + // x1 src + // x2 count + + // check for a zero-length copy + cbz x2, NothingToCopy_RhpCopyMultibyte + + // Now check the dest and src pointers. If they AV, the EH subsystem will recognize the address of the AV, + // unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be + // translated to a managed exception as usual. + ALTERNATE_ENTRY RhpCopyMultibyteDestAVLocation + ldrb wzr, [x0] + ALTERNATE_ENTRY RhpCopyMultibyteSrcAVLocation + ldrb wzr, [x1] + + // tail-call to the GC-safe memcpy implementation + b memcpyGCRefs + +NothingToCopy_RhpCopyMultibyte: + // dest is already still in x0 + ret + + LEAF_END RhpCopyMultibyte, _TEXT + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// +// void* RhpCopyMultibyteWithWriteBarrier(void*, void*, size_t) +// +// The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where +// the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch +// it to managed code. +// Runs a card table update via RhpBulkWriteBarrier after the copy +// + + LEAF_ENTRY RhpCopyMultibyteWithWriteBarrier, _TEXT + + // x0 dest + // x1 src + // x2 count + + // check for a zero-length copy + cbz x2, NothingToCopy_RhpCopyMultibyteWithWriteBarrier + + // Now check the dest and src pointers. If they AV, the EH subsystem will recognize the address of the AV, + // unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be + // translated to a managed exception as usual. + ALTERNATE_ENTRY RhpCopyMultibyteWithWriteBarrierDestAVLocation + ldrb wzr, [x0] + ALTERNATE_ENTRY RhpCopyMultibyteWithWriteBarrierSrcAVLocation + ldrb wzr, [x1] + + // tail-call to the GC-safe memcpy implementation + b memcpyGCRefsWithWriteBarrier + +NothingToCopy_RhpCopyMultibyteWithWriteBarrier: + // dest is already still in x0 + ret + LEAF_END RhpCopyMultibyteWithWriteBarrier, _TEXT + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// +// void* RhpCopyAnyWithWriteBarrier(void*, void*, size_t) +// +// The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where +// the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch +// it to managed code. +// Runs a card table update via RhpBulkWriteBarrier after the copy if it contained GC pointers +// + + LEAF_ENTRY RhpCopyAnyWithWriteBarrier, _TEXT + + // x0 dest + // x1 src + // x2 count + + // check for a zero-length copy + cbz x2, NothingToCopy_RhpCopyAnyWithWriteBarrier + + // Now check the dest and src pointers. If they AV, the EH subsystem will recognize the address of the AV, + // unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be + // translated to a managed exception as usual. + ALTERNATE_ENTRY RhpCopyAnyWithWriteBarrierDestAVLocation + ldrb wzr, [x0] + ALTERNATE_ENTRY RhpCopyAnyWithWriteBarrierSrcAVLocation + ldrb wzr, [x1] + + // tail-call to the GC-safe memcpy implementation + b memcpyAnyWithWriteBarrier + +NothingToCopy_RhpCopyAnyWithWriteBarrier: + // dest is already still in x0 + ret + + LEAF_END RhpCopyAnyWithWriteBarrier, _TEXT From 2fe1f65194f898798976681b4b0764e4bdc31e61 Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Mon, 17 Aug 2020 08:49:05 +0200 Subject: [PATCH 16/36] ARM64: Add missing case for the gemeric helper node --- .../Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs index 35b5fb64eb6..360e4719efb 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs @@ -198,6 +198,7 @@ protected sealed override void EmitCode(NodeFactory factory, ref ARM64Emitter en case ReadyToRunHelperId.MethodEntry: case ReadyToRunHelperId.VirtualDispatchCell: case ReadyToRunHelperId.DefaultConstructor: + case ReadyToRunHelperId.ObjectAllocator: case ReadyToRunHelperId.TypeHandleForCasting: { EmitDictionaryLookup(factory, ref encoder, contextRegister, encoder.TargetRegister.Result, _lookupSignature, relocsOnly); From 11817b23164e1a7c5faeb4ddb2a5ed5835236bca Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Tue, 18 Aug 2020 07:18:56 +0200 Subject: [PATCH 17/36] ARM64 intrinsic support --- .../ExpectedIsaFeaturesRootProvider.cs | 3 +- .../Compiler/HardwareIntrinsicHelpers.Aot.cs | 101 ++++++++++++++++-- src/ILCompiler/src/Program.cs | 8 ++ src/Native/Runtime/PalRedhawk.h | 21 ++++ src/Native/Runtime/startup.cpp | 17 ++- 5 files changed, 139 insertions(+), 11 deletions(-) diff --git a/src/ILCompiler.Compiler/src/Compiler/ExpectedIsaFeaturesRootProvider.cs b/src/ILCompiler.Compiler/src/Compiler/ExpectedIsaFeaturesRootProvider.cs index adca3d89466..90f7b77a4f7 100644 --- a/src/ILCompiler.Compiler/src/Compiler/ExpectedIsaFeaturesRootProvider.cs +++ b/src/ILCompiler.Compiler/src/Compiler/ExpectedIsaFeaturesRootProvider.cs @@ -19,7 +19,8 @@ public ExpectedIsaFeaturesRootProvider(InstructionSetSupport isaSupport) void ICompilationRootProvider.AddCompilationRoots(IRootingServiceProvider rootProvider) { if (_isaSupport.Architecture == TargetArchitecture.X64 - || _isaSupport.Architecture == TargetArchitecture.X86) + || _isaSupport.Architecture == TargetArchitecture.X86 + || _isaSupport.Architecture == TargetArchitecture.ARM64) { int isaFlags = HardwareIntrinsicHelpers.GetRuntimeRequiredIsaFlags(_isaSupport); byte[] bytes = BitConverter.GetBytes(isaFlags); diff --git a/src/ILCompiler.Compiler/src/Compiler/HardwareIntrinsicHelpers.Aot.cs b/src/ILCompiler.Compiler/src/Compiler/HardwareIntrinsicHelpers.Aot.cs index ec35284a865..7f51dd80d6a 100644 --- a/src/ILCompiler.Compiler/src/Compiler/HardwareIntrinsicHelpers.Aot.cs +++ b/src/ILCompiler.Compiler/src/Compiler/HardwareIntrinsicHelpers.Aot.cs @@ -56,9 +56,23 @@ public static MethodIL EmitIsSupportedIL(MethodDesc method, FieldDesc isSupporte string id = InstructionSetSupport.GetHardwareIntrinsicId(method.Context.Target.Architecture, method.OwningType); - Debug.Assert(method.Context.Target.Architecture == TargetArchitecture.X64 - || method.Context.Target.Architecture == TargetArchitecture.X86); - int flag = XArchIntrinsicConstants.FromHardwareIntrinsicId(id); + int flag = 0; + + switch (method.Context.Target.Architecture) + { + case TargetArchitecture.X86: + case TargetArchitecture.X64: + flag = XArchIntrinsicConstants.FromHardwareIntrinsicId(id); + break; + + case TargetArchitecture.ARM64: + flag = Arm64IntrinsicConstants.FromHardwareIntrinsicId(id); + break; + + default: + Debug.Fail("Unsupported Architecture"); + break; + } var emit = new ILEmitter(); ILCodeStream codeStream = emit.NewCodeStream(); @@ -75,12 +89,22 @@ public static MethodIL EmitIsSupportedIL(MethodDesc method, FieldDesc isSupporte public static int GetRuntimeRequiredIsaFlags(InstructionSetSupport instructionSetSupport) { - Debug.Assert(instructionSetSupport.Architecture == TargetArchitecture.X64 || - instructionSetSupport.Architecture == TargetArchitecture.X86); - return XArchIntrinsicConstants.FromInstructionSetFlags(instructionSetSupport.SupportedFlags); + switch (instructionSetSupport.Architecture) + { + case TargetArchitecture.X86: + case TargetArchitecture.X64: + return XArchIntrinsicConstants.FromInstructionSetFlags(instructionSetSupport.SupportedFlags); + + case TargetArchitecture.ARM64: + return Arm64IntrinsicConstants.FromInstructionSetFlags(instructionSetSupport.SupportedFlags); + + default: + Debug.Fail("Unsupported Architecture"); + return 0; + } } - // Keep this enumeration in sync with startup.cpp in the native runtime. + // Keep these enumerations in sync with startup.cpp in the native runtime. private static class XArchIntrinsicConstants { // SSE and SSE2 are baseline ISAs - they're always available @@ -166,5 +190,68 @@ public static int FromInstructionSetFlags(InstructionSetFlags instructionSets) return result; } } + + private static class Arm64IntrinsicConstants + { + public const int ArmBase = 0x0001; + public const int ArmBase_Arm64 = 0x0002; + public const int AdvSimd = 0x0004; + public const int AdvSimd_Arm64 = 0x0008; + public const int Aes = 0x0010; + public const int Crc32 = 0x0020; + public const int Crc32_Arm64 = 0x0040; + public const int Sha1 = 0x0080; + public const int Sha256 = 0x0100; + public const int Atomics = 0x0200; + public const int Vector64 = 0x0400; + public const int Vector128 = 0x0800; + + public static int FromHardwareIntrinsicId(string id) + { + return id switch + { + "ArmBase" => ArmBase, + "ArmBase_Arm64" => ArmBase_Arm64, + "AdvSimd" => AdvSimd, + "AdvSimd_Arm64" => AdvSimd_Arm64, + "Aes" => Aes, + "Crc32" => Crc32, + "Crc32_Arm64" => Crc32_Arm64, + "Sha1" => Sha1, + "Sha256" => Sha256, + "Atomics" => Atomics, + "Vector64" => Vector64, + "Vector128" => Vector128, + _ => throw new NotSupportedException(), + }; + } + + public static int FromInstructionSetFlags(InstructionSetFlags instructionSets) + { + int result = 0; + + foreach (InstructionSet instructionSet in instructionSets) + { + result |= instructionSet switch + { + InstructionSet.ARM64_ArmBase => ArmBase, + InstructionSet.ARM64_ArmBase_Arm64 => ArmBase_Arm64, + InstructionSet.ARM64_AdvSimd => AdvSimd, + InstructionSet.ARM64_AdvSimd_Arm64 => AdvSimd_Arm64, + InstructionSet.ARM64_Aes => Aes, + InstructionSet.ARM64_Crc32 => Crc32, + InstructionSet.ARM64_Crc32_Arm64 => Crc32_Arm64, + InstructionSet.ARM64_Sha1 => Sha1, + InstructionSet.ARM64_Sha256 => Sha256, + InstructionSet.ARM64_Atomics => Atomics, + InstructionSet.ARM64_Vector64 => Vector64, + InstructionSet.ARM64_Vector128 => Vector128, + _ => throw new NotSupportedException() + }; + } + + return result; + } + } } } diff --git a/src/ILCompiler/src/Program.cs b/src/ILCompiler/src/Program.cs index 0f5f9d08035..09e4f817b8c 100644 --- a/src/ILCompiler/src/Program.cs +++ b/src/ILCompiler/src/Program.cs @@ -403,6 +403,14 @@ private int Run(string[] args) optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("bmi2"); } } + else if (_targetArchitecture == TargetArchitecture.ARM64) + { + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("aes"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("crc"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("sha1"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("sha2"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("lse"); + } optimisticInstructionSetSupportBuilder.ComputeInstructionSetFlags(out var optimisticInstructionSet, out _, (string specifiedInstructionSet, string impliedInstructionSet) => throw new NotSupportedException()); diff --git a/src/Native/Runtime/PalRedhawk.h b/src/Native/Runtime/PalRedhawk.h index 7789c05346b..27e1aff03a1 100644 --- a/src/Native/Runtime/PalRedhawk.h +++ b/src/Native/Runtime/PalRedhawk.h @@ -852,6 +852,27 @@ REDHAWK_PALIMPORT uint32_t REDHAWK_PALAPI xmmYmmStateSupport(); REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalIsAvxEnabled(); #endif // defined(HOST_X86) || defined(HOST_AMD64) +#if defined(HOST_ARM64) +// Should match the constants defined in the compiler in HardwareIntrinsicHelpers.Aot.cs +enum ARM64IntrinsicConstants +{ + ARM64IntrinsicConstants_ArmBase = 0x0001, + ARM64IntrinsicConstants_ArmBase_Arm64 = 0x0002, + ARM64IntrinsicConstants_AdvSimd = 0x0004, + ARM64IntrinsicConstants_AdvSimd_Arm64 = 0x0008, + ARM64IntrinsicConstants_Aes = 0x0010, + ARM64IntrinsicConstants_Crc32 = 0x0020, + ARM64IntrinsicConstants_Crc32_Arm64 = 0x0040, + ARM64IntrinsicConstants_Sha1 = 0x0080, + ARM64IntrinsicConstants_Sha256 = 0x0100, + ARM64IntrinsicConstants_Atomics = 0x0200, + ARM64IntrinsicConstants_Vector64 = 0x0400, + ARM64IntrinsicConstants_Vector128 = 0x0800 +}; + +REDHAWK_PALIMPORT void REDHAWK_PALAPI PAL_GetCpuCapabilityFlags(int* flags); +#endif //defined(HOST_ARM64) + #include "PalRedhawkInline.h" #endif // !PAL_REDHAWK_INCLUDED diff --git a/src/Native/Runtime/startup.cpp b/src/Native/Runtime/startup.cpp index 4ed81423af9..a7faeccd263 100644 --- a/src/Native/Runtime/startup.cpp +++ b/src/Native/Runtime/startup.cpp @@ -49,7 +49,7 @@ EXTERN_C bool g_fHasFastFxsave = false; CrstStatic g_CastCacheLock; CrstStatic g_ThunkPoolLock; -#if defined(HOST_X86) || defined(HOST_AMD64) +#if defined(HOST_X86) || defined(HOST_AMD64) || defined(HOST_ARM64) // This field is inspected from the generated code to determine what intrinsics are available. EXTERN_C int g_cpuFeatures = 0; // This field is defined in the generated code and sets the ISA expectations. @@ -155,7 +155,9 @@ static void CheckForPalFallback() } #ifndef USE_PORTABLE_HELPERS -// Should match the constants defined in the compiler in HardwareIntrinsicHelpers.cs + +#if defined(HOST_X86) || defined(HOST_AMD64) +// Should match the constants defined in the compiler in HardwareIntrinsicHelpers.Aot.cs enum XArchIntrinsicConstants { XArchIntrinsicConstants_Aes = 0x0001, @@ -173,8 +175,12 @@ enum XArchIntrinsicConstants XArchIntrinsicConstants_Lzcnt = 0x1000, }; +#endif + bool DetectCPUFeatures() { +#if defined(HOST_X86) || defined(HOST_AMD64) || defined(HOST_ARM64) + #if defined(HOST_X86) || defined(HOST_AMD64) unsigned char buffer[16]; @@ -293,12 +299,17 @@ bool DetectCPUFeatures() g_cpuFeatures |= XArchIntrinsicConstants_Lzcnt; } } +#endif // HOST_X86 || HOST_AMD64 + +#if defined(HOST_ARM64) + PAL_GetCpuCapabilityFlags (&g_cpuFeatures); +#endif if ((g_cpuFeatures & g_requiredCpuFeatures) != g_requiredCpuFeatures) { return false; } -#endif // HOST_X86 || HOST_AMD64 +#endif // HOST_X86 || HOST_AMD64 || HOST_ARM64 return true; } From 08e0d5210084c39146c34abf7d2555fe61e93c47 Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Tue, 18 Aug 2020 07:38:29 +0200 Subject: [PATCH 18/36] ARM64 object writer for ELF object files --- src/Native/ObjWriter/cfi.h | 3 +- src/Native/ObjWriter/objwriter.cpp | 67 +++++++++++++++++++++++++----- src/Native/ObjWriter/objwriter.h | 3 ++ 3 files changed, 62 insertions(+), 11 deletions(-) diff --git a/src/Native/ObjWriter/cfi.h b/src/Native/ObjWriter/cfi.h index 4d5a3648207..c161b260adb 100644 --- a/src/Native/ObjWriter/cfi.h +++ b/src/Native/ObjWriter/cfi.h @@ -13,7 +13,8 @@ enum CFI_OPCODE { CFI_ADJUST_CFA_OFFSET, // Offset is adjusted relative to the current one. CFI_DEF_CFA_REGISTER, // New register is used to compute CFA - CFI_REL_OFFSET // Register is saved at offset from the current CFA + CFI_REL_OFFSET, // Register is saved at offset from the current CFA + CFI_DEF_CFA // Take address from register and add offset to it. }; struct CFI_CODE diff --git a/src/Native/ObjWriter/objwriter.cpp b/src/Native/ObjWriter/objwriter.cpp index b9c31e62c54..a95a781ab64 100644 --- a/src/Native/ObjWriter/objwriter.cpp +++ b/src/Native/ObjWriter/objwriter.cpp @@ -50,6 +50,7 @@ #include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/Win64EH.h" #include "llvm/Target/TargetMachine.h" +#include "..\..\..\lib\Target\AArch64\MCTargetDesc\AArch64MCExpr.h" using namespace llvm; using namespace llvm::codeview; @@ -309,7 +310,13 @@ void ObjectWriter::SetCodeSectionAttribute(const char *SectionName, } void ObjectWriter::EmitAlignment(int ByteAlignment) { - Streamer->EmitValueToAlignment(ByteAlignment, 0x90 /* Nop */); + int64_t fillValue = 0x90; //x86 nop + + if (TMachine->getTargetTriple().getArch() == llvm::Triple::ArchType::aarch64) { + fillValue = 0; // ARM64 bad + } + + Streamer->EmitValueToAlignment(ByteAlignment, fillValue); } void ObjectWriter::EmitBlob(int BlobSize, const char *Blob) { @@ -333,15 +340,28 @@ void ObjectWriter::EmitSymbolDef(const char *SymbolName, bool global) { Streamer->EmitSymbolAttribute(Sym, MCSA_Local); } + Triple TheTriple = TMachine->getTargetTriple(); + // A Thumb2 function symbol should be marked with an appropriate ELF // attribute to make later computation of a relocation address value correct - if (GetTriple().getArch() == Triple::thumb && - GetTriple().getObjectFormat() == Triple::ELF && + + if (TheTriple.getObjectFormat() == Triple::ELF && Streamer->getCurrentSectionOnly()->getKind().isText()) { - Streamer->EmitSymbolAttribute(Sym, MCSA_ELF_TypeFunction); + switch (TheTriple.getArch()) { + case Triple::thumb: + case Triple::aarch64: + Streamer->EmitSymbolAttribute(Sym, MCSA_ELF_TypeFunction); + break; + + default: + break; + } } - Streamer->EmitLabel(Sym); + if (Sym->isUndefined()) + { + Streamer->EmitLabel(Sym); + } } const MCSymbolRefExpr * @@ -353,6 +373,8 @@ ObjectWriter::GetSymbolRefExpr(const char *SymbolName, return MCSymbolRefExpr::create(T, Kind, *OutContext); } + + unsigned ObjectWriter::GetDFSize() { return Streamer->getOrCreateDataFragment()->getContents().size(); } @@ -398,15 +420,16 @@ int ObjectWriter::EmitSymbolRef(const char *SymbolName, case RelocType::IMAGE_REL_BASED_DIR64: Size = 8; break; - case RelocType::IMAGE_REL_BASED_REL32: + case RelocType::IMAGE_REL_BASED_REL32: { Size = 4; - IsPCRel = true; + IsPCRel = true; if (ObjFileInfo->getObjectFileType() == ObjFileInfo->IsELF) { - // PLT is valid only for code symbols, - // but there shouldn't be references to global data symbols - Kind = MCSymbolRefExpr::VK_PLT; + // PLT is valid only for code symbols, + // but there shouldn't be references to global data symbols + Kind = MCSymbolRefExpr::VK_PLT; } break; + } case RelocType::IMAGE_REL_BASED_RELPTR32: Size = 4; IsPCRel = true; @@ -424,6 +447,25 @@ int ObjectWriter::EmitSymbolRef(const char *SymbolName, EmitRelocDirective(GetDFSize(), "R_ARM_THM_JUMP24", TargetExpr); return 4; } + case RelocType::IMAGE_REL_BASED_ARM64_BRANCH26: { + const MCExpr *TargetExpr = GenTargetExpr(SymbolName, Kind, Delta); + EmitRelocDirective(GetDFSize(), "R_AARCH64_JUMP26", TargetExpr); + return 4; + } + case RelocType::IMAGE_REL_BASED_ARM64_PAGEBASE_REL21: { + const MCExpr *TargetExpr = GenTargetExpr(SymbolName, Kind, Delta); + TargetExpr = + AArch64MCExpr::create(TargetExpr, AArch64MCExpr::VK_CALL, *OutContext); + EmitRelocDirective(GetDFSize(), "R_AARCH64_ADR_PREL_LO21", TargetExpr); + return 4; + } + case RelocType::IMAGE_REL_BASED_ARM64_PAGEOFFSET_12A: { + const MCExpr *TargetExpr = GenTargetExpr(SymbolName, Kind, Delta); + TargetExpr = + AArch64MCExpr::create(TargetExpr, AArch64MCExpr::VK_LO12, *OutContext); + EmitRelocDirective(GetDFSize(), "R_AARCH64_ADD_ABS_LO12_NC", TargetExpr); + return 4; + } } const MCExpr *TargetExpr = GenTargetExpr(SymbolName, Kind, Delta, IsPCRel, Size); @@ -510,6 +552,11 @@ void ObjectWriter::EmitCFICode(int Offset, const char *Blob) { "Unexpected Offset Value for OpDefCfaRegister"); Streamer->EmitCFIDefCfaRegister(CfiCode->DwarfReg); break; + case CFI_DEF_CFA: + assert(CfiCode->Offset != 0 && + "Unexpected Offset Value for OpDefCfa"); + Streamer->EmitCFIDefCfa(CfiCode->DwarfReg, CfiCode->Offset); + break; default: assert(false && "Unrecognized CFI"); break; diff --git a/src/Native/ObjWriter/objwriter.h b/src/Native/ObjWriter/objwriter.h index 925ae5068d1..6a41613a4e3 100644 --- a/src/Native/ObjWriter/objwriter.h +++ b/src/Native/ObjWriter/objwriter.h @@ -53,7 +53,10 @@ enum class RelocType { IMAGE_REL_BASED_DIR64 = 0x0A, IMAGE_REL_BASED_REL32 = 0x10, IMAGE_REL_BASED_THUMB_BRANCH24 = 0x13, + IMAGE_REL_BASED_ARM64_BRANCH26 = 0x15, IMAGE_REL_BASED_RELPTR32 = 0x7C, + IMAGE_REL_BASED_ARM64_PAGEBASE_REL21 = 0x81, + IMAGE_REL_BASED_ARM64_PAGEOFFSET_12A = 0x82, }; class ObjectWriter { From 083ee8c8c932d65739153d0392ddad02205b6822 Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Tue, 18 Aug 2020 07:48:45 +0200 Subject: [PATCH 19/36] ARM64 llvm patches for object writer --- src/Native/ObjWriter/llvm.patch | 68 ++++++++++++++++++++++++++++----- 1 file changed, 58 insertions(+), 10 deletions(-) diff --git a/src/Native/ObjWriter/llvm.patch b/src/Native/ObjWriter/llvm.patch index 67ae23ec956..a3eca0ecbe3 100644 --- a/src/Native/ObjWriter/llvm.patch +++ b/src/Native/ObjWriter/llvm.patch @@ -1,5 +1,5 @@ diff --git a/include/llvm/MC/MCObjectStreamer.h b/include/llvm/MC/MCObjectStreamer.h -index 7c1189e..d1d77c9 100644 +index 7c1189e46ab..d1d77c97311 100644 --- a/include/llvm/MC/MCObjectStreamer.h +++ b/include/llvm/MC/MCObjectStreamer.h @@ -101,6 +101,11 @@ public: @@ -15,7 +15,7 @@ index 7c1189e..d1d77c9 100644 /// can change its size during relaxation. virtual void EmitInstToFragment(const MCInst &Inst, const MCSubtargetInfo &); diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h -index 5390e79..5b258e7 100644 +index 5390e794242..5b258e76787 100644 --- a/include/llvm/MC/MCStreamer.h +++ b/include/llvm/MC/MCStreamer.h @@ -115,6 +115,7 @@ public: @@ -37,7 +37,7 @@ index 5390e79..5b258e7 100644 /// method uses .byte directives instead of .ascii or .asciz for readability. virtual void EmitBinaryData(StringRef Data); diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp -index 174397e..ef7161f 100644 +index 174397e2739..ef7161fb56c 100644 --- a/lib/MC/MCObjectStreamer.cpp +++ b/lib/MC/MCObjectStreamer.cpp @@ -122,7 +122,7 @@ void MCObjectStreamer::EmitCFISections(bool EH, bool Debug) { @@ -68,7 +68,7 @@ index 174397e..ef7161f 100644 // We need to create a local symbol to avoid relocations. Frame.Begin = getContext().createTempSymbol(); diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp -index 2bfb9a6..a710098 100644 +index 2bfb9a63eed..a710098e798 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -830,6 +830,7 @@ void MCStreamer::EmitTBSSSymbol(MCSection *Section, MCSymbol *Symbol, @@ -79,8 +79,56 @@ index 2bfb9a6..a710098 100644 void MCStreamer::EmitBinaryData(StringRef Data) { EmitBytes(Data); } void MCStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) { visitUsedExpr(*Value); +diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +index 2bd0cbf9f7c..e7643d5f66d 100644 +--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp ++++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +@@ -41,6 +41,15 @@ public: + return AArch64::NumTargetFixupKinds; + } + ++ Optional getFixupKind(StringRef Name) const { ++ return StringSwitch>(Name) ++ .Case("R_AARCH64_JUMP26", (MCFixupKind)AArch64::fixup_aarch64_pcrel_call26) ++ .Case("R_AARCH64_ADR_PREL_LO21",(MCFixupKind)AArch64::fixup_aarch64_pcrel_adrp_imm21) ++ .Case("R_AARCH64_ADD_ABS_LO12_NC", (MCFixupKind)AArch64::fixup_aarch64_add_imm12) ++ .Default(MCAsmBackend::getFixupKind(Name)); ++ } ++ ++ + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override { + const static MCFixupKindInfo Infos[AArch64::NumTargetFixupKinds] = { + // This table *must* be in the order that the fixup_* kinds are defined +@@ -126,6 +135,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { + case AArch64::fixup_aarch64_pcrel_call26: + case FK_Data_4: + case FK_SecRel_4: ++ case FK_PCRel_4: + return 4; + + case FK_Data_8: +@@ -222,6 +232,7 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value, + case FK_Data_8: + case FK_SecRel_2: + case FK_SecRel_4: ++ case FK_PCRel_4: + return Value; + } + } +diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +index 89c3e5b4c76..ba105365d74 100644 +--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp ++++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +@@ -129,6 +129,7 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, + case FK_Data_2: + return R_CLS(PREL16); + case FK_Data_4: ++ case FK_PCRel_4: + return R_CLS(PREL32); + case FK_Data_8: + if (IsILP32) { diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp -index a77df7a..e1aa752 100644 +index a77df7a2598..e1aa7526f9b 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -48,6 +48,14 @@ public: @@ -118,7 +166,7 @@ index a77df7a..e1aa752 100644 return 2; case FK_SecRel_4: diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h -index 0237496..01676a0 100644 +index 02374966daf..01676a01683 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h @@ -36,6 +36,7 @@ public: @@ -130,7 +178,7 @@ index 0237496..01676a0 100644 bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp -index 59f31be..9b95598 100644 +index 59f31be69d5..9b95598f99f 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -103,6 +103,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, @@ -144,7 +192,7 @@ index 59f31be..9b95598 100644 case ARM::fixup_arm_uncondbl: switch (Modifier) { diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp -index 93f4006..81e4caa 100644 +index 93f4006cee8..108e9c51e13 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -396,6 +396,7 @@ private: @@ -245,7 +293,7 @@ index 93f4006..81e4caa 100644 const SmallVectorImpl &Opcodes) { FlushPendingOffset(); diff --git a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp -index 4a94318..f4f5aa1 100644 +index 4a943187ab6..f4f5aa11bf8 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp @@ -61,6 +61,7 @@ void ARMTargetStreamer::emitMovSP(unsigned Reg, int64_t Offset) {} @@ -257,7 +305,7 @@ index 4a94318..f4f5aa1 100644 const SmallVectorImpl &Opcodes) { } diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt -index b654b8c..58d2515 100644 +index b654b8c5cb8..58d25159af8 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -46,6 +46,7 @@ add_llvm_external_project(clang) From b8d01235f586013aeec15b4ea7b23d68e38dcb67 Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Tue, 18 Aug 2020 07:52:03 +0200 Subject: [PATCH 20/36] ARM64 include untested helper code with debug break --- .../Target_ARM64/ARM64ReadyToRunHelperNode.cs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs index ef61ad26435..1771968661a 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs @@ -165,13 +165,9 @@ protected override void EmitCode(NodeFactory factory, ref ARM64Emitter encoder, case ReadyToRunHelperId.ResolveVirtualFunction: { + // Not tested encoder.EmitINT3(); - //ARMDebug.EmitHelperNYIAssert(factory, ref encoder, ReadyToRunHelperId.ResolveVirtualFunction); - /* - *** - NOT TESTED!!! - *** MethodDesc targetMethod = (MethodDesc)Target; if (targetMethod.OwningType.IsInterface) { @@ -193,7 +189,6 @@ NOT TESTED!!! ((short)(EETypeNode.GetVTableOffset(factory.Target.PointerSize) + (slot * factory.Target.PointerSize)))); encoder.EmitRET(); } - */ } break; From e70d94e15d231e99d0319243fa47c83675932288 Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Wed, 19 Aug 2020 09:02:12 +0200 Subject: [PATCH 21/36] ARM64 reenable scanning fail exception --- src/ILCompiler/src/Program.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ILCompiler/src/Program.cs b/src/ILCompiler/src/Program.cs index 09e4f817b8c..9638e56eb6d 100644 --- a/src/ILCompiler/src/Program.cs +++ b/src/ILCompiler/src/Program.cs @@ -786,8 +786,8 @@ private int Run(string[] args) "EETypes", "scanned", "compiled", type => !(type.GetTypeDefinition() is EcmaType)); } - // if (scanningFail) - // throw new Exception("Scanning failure"); + if (scanningFail) + throw new Exception("Scanning failure"); } if (debugInfoProvider is IDisposable) From ee068b2ce628419bafa912377fde4d656587805d Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Wed, 19 Aug 2020 13:55:00 +0200 Subject: [PATCH 22/36] Arm64 Support (#2) * CoreRT-ARM64: Helper node asm code generation * CoreRT ARM64: Add additional relocation support * CoreRT ARM64: Use alternative functions for write Barriers On ARM the Jit uses a special register interface for to call the write barrier functions. But as the regular once still used in other places we need two different implemenations. The helper nodes need to call the custom variants * ARM64 code generation * unwind use only the dwarf index option * ARM64 runtime assembler functions * ARM64 unix stack walk * ARM64 Exception Handling * ARM64: Fix EH data decodeing * Move nattive layout signature from read only to data section on non Windows target * Fix exception handling macro call * ARM64 thunk helpers * ARM64 process cfi data for elf * ARM64 missing assembler macro * ARM64 port more assembler helpers * ARM64: Add missing case for the gemeric helper node * ARM64 intrinsic support * ARM64 object writer for ELF object files * ARM64 llvm patches for object writer * ARM64 include untested helper code with debug break * ARM64 reenable scanning fail exception --- .../NativeLayoutSignatureNode.cs | 11 +- .../DependencyAnalysis/ObjectDataBuilder.cs | 1 + .../DependencyAnalysis/ObjectWriter.cs | 27 +- .../Compiler/DependencyAnalysis/Relocation.cs | 44 ++ .../Target_ARM64/ARM64Emitter.cs | 80 ++- .../ARM64ReadyToRunGenericHelperNode.cs | 210 +++++- .../Target_ARM64/ARM64ReadyToRunHelperNode.cs | 181 +++++- .../Target_ARM64/TargetRegisterMap.cs | 2 + .../ExpectedIsaFeaturesRootProvider.cs | 3 +- .../Compiler/HardwareIntrinsicHelpers.Aot.cs | 101 ++- .../src/Compiler/JitHelper.cs | 6 +- .../Compiler/VectorOfTFieldLayoutAlgorithm.cs | 11 + .../src/JitInterface/CorInfoImpl.RyuJit.cs | 2 +- src/ILCompiler/src/Program.cs | 8 + src/JitInterface/src/CorInfoImpl.cs | 150 ++++- src/Native/ObjWriter/cfi.h | 3 +- src/Native/ObjWriter/llvm.patch | 68 +- src/Native/ObjWriter/objwriter.cpp | 67 +- src/Native/ObjWriter/objwriter.h | 3 + src/Native/Runtime/PalRedhawk.h | 21 + src/Native/Runtime/arm64/AllocFast.S | 291 ++++++++- src/Native/Runtime/arm64/CallDescrWorker.S | 138 +++- .../arm64/CallingConventionConverterHelpers.S | 59 +- src/Native/Runtime/arm64/ExceptionHandling.S | 615 +++++++++++++++++- .../Runtime/arm64/InteropThunksHelpers.S | 58 +- src/Native/Runtime/arm64/MiscStubs.S | 239 +++++++ src/Native/Runtime/arm64/PInvoke.S | 353 +++++++++- src/Native/Runtime/arm64/StubDispatch.S | 106 ++- .../Runtime/arm64/UniversalTransition.S | 157 ++++- src/Native/Runtime/arm64/WriteBarriers.S | 373 ++++++++++- src/Native/Runtime/startup.cpp | 17 +- src/Native/Runtime/unix/UnixContext.cpp | 4 +- .../Runtime/unix/UnixNativeCodeManager.cpp | 3 +- src/Native/Runtime/unix/UnwindHelpers.cpp | 251 ++++--- .../Runtime/unix/unixasmmacrosarm64.inc | 113 ++++ src/Native/libunwind/src/AddressSpace.hpp | 40 +- .../libunwind/src/DwarfInstructions.hpp | 18 +- src/Native/libunwind/src/UnwindCursor.hpp | 11 + 38 files changed, 3646 insertions(+), 199 deletions(-) diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/NativeLayoutSignatureNode.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/NativeLayoutSignatureNode.cs index ba62660b598..1556406c6f9 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/NativeLayoutSignatureNode.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/NativeLayoutSignatureNode.cs @@ -55,7 +55,16 @@ public void AppendMangledName(NameMangler nameMangler, Utf8StringBuilder sb) public int Offset => 0; protected override string GetName(NodeFactory factory) => this.GetMangledName(factory.NameMangler); - public override ObjectNodeSection Section => ObjectNodeSection.ReadOnlyDataSection; + public override ObjectNodeSection Section + { + get + { + if (_identity.Context.Target.IsWindows) + return ObjectNodeSection.ReadOnlyDataSection; + else + return ObjectNodeSection.DataSection; + } + } public override bool IsShareable => false; public override bool StaticDependenciesAreComputed => true; diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectDataBuilder.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectDataBuilder.cs index ffc8db575bc..7273b37119f 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectDataBuilder.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectDataBuilder.cs @@ -302,6 +302,7 @@ public void EmitReloc(ISymbolNode symbol, RelocType relocType, int delta = 0) case RelocType.IMAGE_REL_BASED_THUMB_MOV32: case RelocType.IMAGE_REL_BASED_ARM64_PAGEBASE_REL21: case RelocType.IMAGE_REL_BASED_ARM64_PAGEOFFSET_12L: + case RelocType.IMAGE_REL_BASED_ARM64_PAGEOFFSET_12A: // Do not vacate space for this kind of relocation, because // the space is embedded in the instruction. break; diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectWriter.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectWriter.cs index f38a31ccf4a..21981bcfe61 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectWriter.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectWriter.cs @@ -857,6 +857,8 @@ public void EmitSymbolDefinition(int currentOffset) { foreach (var name in nodes) { + + _sb.Clear(); AppendExternCPrefix(_sb); name.AppendMangledName(_nodeFactory.NameMangler, _sb); @@ -1043,7 +1045,7 @@ public static void EmitObject(string objectFilePath, IEnumerable // The DWARF CFI unwind is implemented for AMD64 & ARM32 only. TargetArchitecture tarch = factory.Target.Architecture; if (!factory.Target.IsWindows && - (tarch == TargetArchitecture.X64 || tarch == TargetArchitecture.ARM)) + (tarch == TargetArchitecture.X64 || tarch == TargetArchitecture.ARM || tarch == TargetArchitecture.ARM64)) objectWriter.BuildCFIMap(factory, node); // Build debug location map @@ -1089,17 +1091,24 @@ public static void EmitObject(string objectFilePath, IEnumerable } int size = objectWriter.EmitSymbolReference(reloc.Target, (int)delta, reloc.RelocType); - // Emit a copy of original Thumb2 instruction that came from RyuJIT - if (reloc.RelocType == RelocType.IMAGE_REL_BASED_THUMB_MOV32 || - reloc.RelocType == RelocType.IMAGE_REL_BASED_THUMB_BRANCH24) + // Emit a copy of original Thumb2/ARM64 instruction that came from RyuJIT + + switch (reloc.RelocType) { - unsafe - { - fixed (void* location = &nodeContents.Data[i]) + case RelocType.IMAGE_REL_BASED_THUMB_MOV32: + case RelocType.IMAGE_REL_BASED_THUMB_BRANCH24: + case RelocType.IMAGE_REL_BASED_ARM64_BRANCH26: + case RelocType.IMAGE_REL_BASED_ARM64_PAGEBASE_REL21: + case RelocType.IMAGE_REL_BASED_ARM64_PAGEOFFSET_12A: + case RelocType.IMAGE_REL_BASED_ARM64_PAGEOFFSET_12L: + unsafe { - objectWriter.EmitBytes((IntPtr)location, size); + fixed (void* location = &nodeContents.Data[i]) + { + objectWriter.EmitBytes((IntPtr)location, size); + } } - } + break; } // Update nextRelocIndex/Offset diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Relocation.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Relocation.cs index fbb9408222c..e0dd43e6d44 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Relocation.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Relocation.cs @@ -257,6 +257,45 @@ private static unsafe void PutArm64Rel12(uint* pCode, int imm12) Debug.Assert(GetArm64Rel12(pCode) == imm12); } + private static unsafe int GetArm64Rel28(uint* pCode) + { + uint branchInstr = *pCode; + + // first shift 6 bits left to set the sign bit, + // then arithmetic shift right by 4 bits + int imm28 = (((int)(branchInstr & 0x03FFFFFF)) << 6) >> 4; + + return imm28; + } + + private static bool FitsInArm64Rel28(long imm28) + { + return (imm28 >= -0x08000000L) && (imm28 < 0x08000000L); + } + + private static unsafe void PutArm64Rel28(uint* pCode, long imm28) + { + // Verify that we got a valid offset + Debug.Assert(FitsInArm64Rel28(imm28)); + + Debug.Assert((imm28 & 0x3) == 0); // the low two bits must be zero + + uint branchInstr = *pCode; + + branchInstr &= 0xFC000000; // keep bits 31-26 + + Debug.Assert((branchInstr & 0x7FFFFFFF) == 0x14000000); // Must be B or BL + + // Assemble the pc-relative delta 'imm28' into the branch instruction + branchInstr |= (uint)(((imm28 >> 2) & 0x03FFFFFFU)); + + *pCode = branchInstr; // write the assembled instruction + + Debug.Assert(GetArm64Rel28(pCode) == imm28); + } + + + public Relocation(RelocType relocType, int offset, ISymbolNode target) { RelocType = relocType; @@ -286,6 +325,9 @@ public static unsafe void WriteValue(RelocType relocType, void* location, long v case RelocType.IMAGE_REL_BASED_THUMB_BRANCH24: PutThumb2BlRel24((ushort*)location, (uint)value); break; + case RelocType.IMAGE_REL_BASED_ARM64_BRANCH26: + PutArm64Rel28((uint*)location, value); + break; case RelocType.IMAGE_REL_BASED_ARM64_PAGEBASE_REL21: PutArm64Rel21((uint*)location, (int)value); break; @@ -318,6 +360,8 @@ public static unsafe long ReadValue(RelocType relocType, void* location) return (long)GetThumb2Mov32((ushort*)location); case RelocType.IMAGE_REL_BASED_THUMB_BRANCH24: return (long)GetThumb2BlRel24((ushort*)location); + case RelocType.IMAGE_REL_BASED_ARM64_BRANCH26: + return (long)GetArm64Rel28((uint*)location); case RelocType.IMAGE_REL_BASED_ARM64_PAGEBASE_REL21: return GetArm64Rel21((uint*)location); case RelocType.IMAGE_REL_BASED_ARM64_PAGEOFFSET_12A: diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64Emitter.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64Emitter.cs index ee2a3c7516b..e7b8ea0e211 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64Emitter.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64Emitter.cs @@ -25,7 +25,7 @@ public void EmitMOV(Register regDst, ref AddrMode memory) public void EmitMOV(Register regDst, Register regSrc) { - throw new NotImplementedException(); + Builder.EmitUInt((uint)(0b1_0_1_01010_000_00000_000000_11111_00000u | ((uint)regSrc << 16) | (uint)regDst)); } public void EmitMOV(Register regDst, ushort imm16) @@ -35,6 +35,17 @@ public void EmitMOV(Register regDst, ushort imm16) Builder.EmitUInt(instruction); } + public void EmitMOV(Register regDst, ISymbolNode symbol) + { + // ADRP regDst, [symbol (21bit ADRP thing)] + Builder.EmitReloc(symbol, RelocType.IMAGE_REL_BASED_ARM64_PAGEBASE_REL21); + Builder.EmitUInt(0x90000000u | (byte)regDst); + + // Add regDst, (12bit LDR page offset reloc) + Builder.EmitReloc(symbol, RelocType.IMAGE_REL_BASED_ARM64_PAGEOFFSET_12A); + Builder.EmitUInt((uint)(0b1_0_0_100010_0_000000000000_00000_00000 | ((byte)regDst << 5) | (byte)regDst)); + } + // ldr regDst, [PC + imm19] public void EmitLDR(Register regDst, short offset) { @@ -54,6 +65,25 @@ public void EmitLDR(Register regDst, Register regAddr) Builder.EmitUInt(instruction); } + public void EmitLDR(Register regDst, Register regSrc, int offset) + { + Debug.Assert(offset >= -255 && offset <= 4095); + if (offset >= 0) + { + Debug.Assert(offset % 8 == 0); + + offset /= 8; + + Builder.EmitUInt((uint)(0b11_1110_0_1_0_1_000000000000_00000_00000u | ((uint)offset << 10) | ((uint)regSrc << 5) | (uint)regDst)); + } + else + { + uint o = (uint)offset & 0x1FF; + + Builder.EmitUInt((uint)(0b11_1110_0_0_010_000000000_1_1_00000_00000u | (o << 12) | ((uint)regSrc << 5) | (uint)regDst)); + } + } + public void EmitLEAQ(Register reg, ISymbolNode symbol, int delta = 0) { throw new NotImplementedException(); @@ -69,12 +99,38 @@ public void EmitCMP(ref AddrMode addrMode, sbyte immediate) throw new NotImplementedException(); } + public void EmitCMP(Register reg, sbyte immediate) + { + if (immediate >= 0) + { + Builder.EmitUInt((uint)(0b1_1_1_100010_0_000000000000_00000_11111u | immediate << 10) | ((uint)reg << 5)); + } + else + { + throw new NotImplementedException(); + } + } + // add reg, immediate public void EmitADD(Register reg, byte immediate) { Builder.EmitInt((int)(0x91 << 24) | (immediate << 10) | ((byte)reg << 5) | (byte) reg); } + public void EmitSUB(Register reg, int immediate) + { + if (immediate >= 0) + { + Debug.Assert(immediate % 4 == 0); + + Builder.EmitUInt((uint)(0b1_1_0_100010_0_000000000000_00000_00000u | immediate << 10) | ((uint)reg << 5) | (uint)reg); + } + else + { + throw new NotImplementedException(); + } + } + public void EmitJMP(ISymbolNode symbol) { if (symbol.RepresentsIndirectionCell) @@ -100,11 +156,26 @@ public void EmitJMP(ISymbolNode symbol) } } + public void EmitJMP(Register reg) + { + Builder.EmitUInt((uint)(0b11010110_0_0_0_11111_00000_0_00000_00000u | ((uint)reg << 5))); + } + public void EmitINT3() { - throw new NotImplementedException(); + Builder.EmitUInt(0b11010100_001_1111111111111111_000_0_0); + } + + public void EmitINT3(uint id) + { + Builder.EmitUInt(0b11010100_001_1111111111111111_000_0_0); + Builder.EmitUInt((uint)(0b0_00101_00000000000000000000000000u | ((uint)4))); + Builder.EmitUInt(0xdeadc0de); + Builder.EmitUInt(id); + Builder.EmitUInt(0xdeadc0de); } + public void EmitJmpToAddrMode(ref AddrMode addrMode) { throw new NotImplementedException(); @@ -112,12 +183,13 @@ public void EmitJmpToAddrMode(ref AddrMode addrMode) public void EmitRET() { - throw new NotImplementedException(); + Builder.EmitUInt(0b11010110_0_1_0_11111_00000_0_11110_00000); } public void EmitRETIfEqual() { - throw new NotImplementedException(); + Builder.EmitUInt(0b01010100_0000000000000000010_0_0001u); + EmitRET(); } private bool InSignedByteRange(int i) diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs index 7e510164a45..360e4719efb 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs @@ -15,22 +15,207 @@ partial class ReadyToRunGenericHelperNode { protected Register GetContextRegister(ref /* readonly */ ARM64Emitter encoder) { - throw new NotImplementedException(); + if (_id == ReadyToRunHelperId.DelegateCtor) + return encoder.TargetRegister.Arg2; + else + return encoder.TargetRegister.Arg0; } protected void EmitDictionaryLookup(NodeFactory factory, ref ARM64Emitter encoder, Register context, Register result, GenericLookupResult lookup, bool relocsOnly) { - throw new NotImplementedException(); + // INVARIANT: must not trash context register + + // Find the generic dictionary slot + int dictionarySlot = 0; + if (!relocsOnly) + { + // The concrete slot won't be known until we're emitting data - don't ask for it in relocsOnly. + dictionarySlot = factory.GenericDictionaryLayout(_dictionaryOwner).GetSlotForEntry(lookup); + } + + // Load the generic dictionary cell + encoder.EmitLDR(result, context, dictionarySlot * factory.Target.PointerSize); + + switch (lookup.LookupResultReferenceType(factory)) + { + case GenericLookupResultReferenceType.Indirect: + // Do another indirection + encoder.EmitLDR(result, result); + break; + + case GenericLookupResultReferenceType.ConditionalIndirect: + // Test result, 0x1 + // JEQ L1 + // mov result, [result-1] + // L1: + throw new NotImplementedException(); + + default: + break; + } } protected sealed override void EmitCode(NodeFactory factory, ref ARM64Emitter encoder, bool relocsOnly) { - throw new NotImplementedException(); + // First load the generic context into the context register. + EmitLoadGenericContext(factory, ref encoder, relocsOnly); + + Register contextRegister = GetContextRegister(ref encoder); + + switch (_id) + { + case ReadyToRunHelperId.GetNonGCStaticBase: + { + Debug.Assert(contextRegister == encoder.TargetRegister.Arg0); + + EmitDictionaryLookup(factory, ref encoder, encoder.TargetRegister.Arg0, encoder.TargetRegister.Result, _lookupSignature, relocsOnly); + + MetadataType target = (MetadataType)_target; + if (!factory.PreinitializationManager.HasLazyStaticConstructor(target)) + { + encoder.EmitRET(); + } + else + { + // We need to trigger the cctor before returning the base. It is stored at the beginning of the non-GC statics region. + encoder.EmitMOV(encoder.TargetRegister.Arg3, encoder.TargetRegister.Arg0); + //encoder.EmitSUB(encoder.TargetRegister.Arg3, NonGCStaticsNode.GetClassConstructorContextStorageSize(factory.Target, target)); + encoder.EmitLDR(encoder.TargetRegister.Arg2, encoder.TargetRegister.Arg3, (short)factory.Target.PointerSize); + encoder.EmitCMP(encoder.TargetRegister.Arg2, 1); + encoder.EmitRETIfEqual(); + + encoder.EmitMOV(encoder.TargetRegister.Arg1, encoder.TargetRegister.Result); + encoder.EmitMOV(encoder.TargetRegister.Arg0, encoder.TargetRegister.Arg3); + + encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnNonGCStaticBase)); + } + } + break; + + case ReadyToRunHelperId.GetGCStaticBase: + { + Debug.Assert(contextRegister == encoder.TargetRegister.Arg0); + + encoder.EmitMOV(encoder.TargetRegister.Arg1, encoder.TargetRegister.Arg0); + EmitDictionaryLookup(factory, ref encoder, encoder.TargetRegister.Arg0, encoder.TargetRegister.Result, _lookupSignature, relocsOnly); + encoder.EmitLDR(encoder.TargetRegister.Result, encoder.TargetRegister.Result); + encoder.EmitLDR(encoder.TargetRegister.Result, encoder.TargetRegister.Result); + + MetadataType target = (MetadataType)_target; + if (!factory.PreinitializationManager.HasLazyStaticConstructor(target)) + { + encoder.EmitRET(); + } + else + { + // We need to trigger the cctor before returning the base. It is stored at the beginning of the non-GC statics region. + GenericLookupResult nonGcRegionLookup = factory.GenericLookup.TypeNonGCStaticBase(target); + EmitDictionaryLookup(factory, ref encoder, encoder.TargetRegister.Arg1, encoder.TargetRegister.Arg2, nonGcRegionLookup, relocsOnly); + + encoder.EmitSUB(encoder.TargetRegister.Arg2, NonGCStaticsNode.GetClassConstructorContextStorageSize(factory.Target, target)); + encoder.EmitLDR(encoder.TargetRegister.Arg3, encoder.TargetRegister.Arg2, (short)factory.Target.PointerSize); + encoder.EmitCMP(encoder.TargetRegister.Arg3, 1); + encoder.EmitRETIfEqual(); + + encoder.EmitMOV(encoder.TargetRegister.Arg1, encoder.TargetRegister.Result); + encoder.EmitMOV(encoder.TargetRegister.Arg0, encoder.TargetRegister.Arg2); + + encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnGCStaticBase)); + } + } + break; + + case ReadyToRunHelperId.GetThreadStaticBase: + { + Debug.Assert(contextRegister == encoder.TargetRegister.Arg0); + + MetadataType target = (MetadataType)_target; + + // Look up the index cell + EmitDictionaryLookup(factory, ref encoder, encoder.TargetRegister.Arg0, encoder.TargetRegister.Arg1, _lookupSignature, relocsOnly); + + ISymbolNode helperEntrypoint; + if (factory.PreinitializationManager.HasLazyStaticConstructor(target)) + { + // There is a lazy class constructor. We need the non-GC static base because that's where the + // class constructor context lives. + GenericLookupResult nonGcRegionLookup = factory.GenericLookup.TypeNonGCStaticBase(target); + EmitDictionaryLookup(factory, ref encoder, encoder.TargetRegister.Arg0, encoder.TargetRegister.Arg2, nonGcRegionLookup, relocsOnly); + int cctorContextSize = NonGCStaticsNode.GetClassConstructorContextStorageSize(factory.Target, target); + encoder.EmitSUB(encoder.TargetRegister.Arg2, cctorContextSize); + + helperEntrypoint = factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnThreadStaticBase); + } + else + { + helperEntrypoint = factory.HelperEntrypoint(HelperEntrypoint.GetThreadStaticBaseForType); + } + + // First arg: address of the TypeManager slot that provides the helper with + // information about module index and the type manager instance (which is used + // for initialization on first access). + encoder.EmitLDR(encoder.TargetRegister.Arg0, encoder.TargetRegister.Arg1); + + // Second arg: index of the type in the ThreadStatic section of the modules + encoder.EmitLDR(encoder.TargetRegister.Arg1, encoder.TargetRegister.Arg1, factory.Target.PointerSize); + + encoder.EmitJMP(helperEntrypoint); + } + break; + + case ReadyToRunHelperId.DelegateCtor: + { + // This is a weird helper. Codegen populated Arg0 and Arg1 with the values that the constructor + // method expects. Codegen also passed us the generic context in Arg2. + // We now need to load the delegate target method into Arg2 (using a dictionary lookup) + // and the optional 4th parameter, and call the ctor. + + Debug.Assert(contextRegister == encoder.TargetRegister.Arg2); + + var target = (DelegateCreationInfo)_target; + + EmitDictionaryLookup(factory, ref encoder, encoder.TargetRegister.Arg2, encoder.TargetRegister.Arg2, _lookupSignature, relocsOnly); + + if (target.Thunk != null) + { + Debug.Assert(target.Constructor.Method.Signature.Length == 3); + encoder.EmitMOV(encoder.TargetRegister.Arg3, target.Thunk); + } + else + { + Debug.Assert(target.Constructor.Method.Signature.Length == 2); + } + + encoder.EmitJMP(target.Constructor); + } + break; + + // These are all simple: just get the thing from the dictionary and we're done + case ReadyToRunHelperId.TypeHandle: + case ReadyToRunHelperId.MethodHandle: + case ReadyToRunHelperId.FieldHandle: + case ReadyToRunHelperId.MethodDictionary: + case ReadyToRunHelperId.MethodEntry: + case ReadyToRunHelperId.VirtualDispatchCell: + case ReadyToRunHelperId.DefaultConstructor: + case ReadyToRunHelperId.ObjectAllocator: + case ReadyToRunHelperId.TypeHandleForCasting: + { + EmitDictionaryLookup(factory, ref encoder, contextRegister, encoder.TargetRegister.Result, _lookupSignature, relocsOnly); + encoder.EmitRET(); + } + break; + + default: + encoder.EmitINT3(); + Console.WriteLine("Misiing R2R for {0}", Id.ToString()); + break; + } } protected virtual void EmitLoadGenericContext(NodeFactory factory, ref ARM64Emitter encoder, bool relocsOnly) { - throw new NotImplementedException(); + // Assume generic context is already loaded in the context register. } } @@ -38,7 +223,22 @@ partial class ReadyToRunGenericLookupFromTypeNode { protected override void EmitLoadGenericContext(NodeFactory factory, ref ARM64Emitter encoder, bool relocsOnly) { - throw new NotImplementedException(); + // We start with context register pointing to the EEType + Register contextRegister = GetContextRegister(ref encoder); + + // Locate the VTable slot that points to the dictionary + int vtableSlot = 0; + if (!relocsOnly) + { + // The concrete slot won't be known until we're emitting data - don't ask for it in relocsOnly. + vtableSlot = VirtualMethodSlotHelper.GetGenericDictionarySlot(factory, (TypeDesc)_dictionaryOwner); + } + + int pointerSize = factory.Target.PointerSize; + int slotOffset = EETypeNode.GetVTableOffset(pointerSize) + (vtableSlot * pointerSize); + + // Load the dictionary pointer from the VTable + encoder.EmitLDR(contextRegister, contextRegister, slotOffset); } } } diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs index 28d7e50239f..1771968661a 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs @@ -16,7 +16,186 @@ public partial class ReadyToRunHelperNode { protected override void EmitCode(NodeFactory factory, ref ARM64Emitter encoder, bool relocsOnly) { - throw new NotImplementedException(); + switch (Id) + { + case ReadyToRunHelperId.VirtualCall: + { + MethodDesc targetMethod = (MethodDesc)Target; + + Debug.Assert(!targetMethod.OwningType.IsInterface); + Debug.Assert(!targetMethod.CanMethodBeInSealedVTable()); + + int pointerSize = factory.Target.PointerSize; + + int slot = 0; + if (!relocsOnly) + { + slot = VirtualMethodSlotHelper.GetVirtualMethodSlot(factory, targetMethod, targetMethod.OwningType); + Debug.Assert(slot != -1); + } + + encoder.EmitLDR(encoder.TargetRegister.IntraProcedureCallScratch1, encoder.TargetRegister.Arg0, 0); + encoder.EmitLDR(encoder.TargetRegister.IntraProcedureCallScratch1, encoder.TargetRegister.IntraProcedureCallScratch1, + EETypeNode.GetVTableOffset(pointerSize) + (slot * pointerSize)); + encoder.EmitJMP(encoder.TargetRegister.IntraProcedureCallScratch1); + } + break; + + case ReadyToRunHelperId.GetNonGCStaticBase: + { + MetadataType target = (MetadataType)Target; + + bool hasLazyStaticConstructor = factory.PreinitializationManager.HasLazyStaticConstructor(target); + encoder.EmitMOV(encoder.TargetRegister.Result, factory.TypeNonGCStaticsSymbol(target)); + + if (!hasLazyStaticConstructor) + { + encoder.EmitRET(); + } + else + { + // We need to trigger the cctor before returning the base. It is stored at the beginning of the non-GC statics region. + encoder.EmitMOV(encoder.TargetRegister.Arg3, encoder.TargetRegister.Result); + encoder.EmitSUB(encoder.TargetRegister.Arg3, NonGCStaticsNode.GetClassConstructorContextStorageSize(factory.Target, target)); + encoder.EmitLDR(encoder.TargetRegister.Arg2, encoder.TargetRegister.Arg3, (short)factory.Target.PointerSize); + encoder.EmitCMP(encoder.TargetRegister.Arg2, 1); + encoder.EmitRETIfEqual(); + + encoder.EmitMOV(encoder.TargetRegister.Arg1, encoder.TargetRegister.Result); + encoder.EmitMOV(encoder.TargetRegister.Arg0, encoder.TargetRegister.Arg3); + + encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnNonGCStaticBase)); + } + } + break; + + case ReadyToRunHelperId.GetThreadStaticBase: + { + MetadataType target = (MetadataType)Target; + encoder.EmitMOV(encoder.TargetRegister.Arg2, factory.TypeThreadStaticIndex(target)); + + // First arg: address of the TypeManager slot that provides the helper with + // information about module index and the type manager instance (which is used + // for initialization on first access). + encoder.EmitLDR(encoder.TargetRegister.Arg0, encoder.TargetRegister.Arg2); + + // Second arg: index of the type in the ThreadStatic section of the modules + encoder.EmitLDR(encoder.TargetRegister.Arg1, encoder.TargetRegister.Arg2, factory.Target.PointerSize); + + if (!factory.PreinitializationManager.HasLazyStaticConstructor(target)) + { + encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.GetThreadStaticBaseForType)); + } + else + { + encoder.EmitMOV(encoder.TargetRegister.Arg2, factory.TypeNonGCStaticsSymbol(target)); + encoder.EmitSUB(encoder.TargetRegister.Arg2, NonGCStaticsNode.GetClassConstructorContextStorageSize(factory.Target, target)); + // TODO: performance optimization - inline the check verifying whether we need to trigger the cctor + encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnThreadStaticBase)); + } + } + break; + + case ReadyToRunHelperId.GetGCStaticBase: + { + MetadataType target = (MetadataType)Target; + + encoder.EmitMOV(encoder.TargetRegister.Result, factory.TypeGCStaticsSymbol(target)); + encoder.EmitLDR(encoder.TargetRegister.Result, encoder.TargetRegister.Result); + encoder.EmitLDR(encoder.TargetRegister.Result, encoder.TargetRegister.Result); + + if (!factory.PreinitializationManager.HasLazyStaticConstructor(target)) + { + encoder.EmitRET(); + } + else + { + // We need to trigger the cctor before returning the base. It is stored at the beginning of the non-GC statics region. + encoder.EmitMOV(encoder.TargetRegister.Arg2, factory.TypeNonGCStaticsSymbol(target)); + encoder.EmitSUB(encoder.TargetRegister.Arg2, NonGCStaticsNode.GetClassConstructorContextStorageSize(factory.Target, target)); + encoder.EmitLDR(encoder.TargetRegister.Arg3, encoder.TargetRegister.Arg2, (short)factory.Target.PointerSize); + encoder.EmitCMP(encoder.TargetRegister.Arg3, 1); + encoder.EmitRETIfEqual(); + + encoder.EmitMOV(encoder.TargetRegister.Arg1, encoder.TargetRegister.Result); + encoder.EmitMOV(encoder.TargetRegister.Arg0, encoder.TargetRegister.Arg2); + + encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnGCStaticBase)); + } + } + break; + + case ReadyToRunHelperId.DelegateCtor: + { + DelegateCreationInfo target = (DelegateCreationInfo)Target; + + if (target.TargetNeedsVTableLookup) + { + Debug.Assert(!target.TargetMethod.CanMethodBeInSealedVTable()); + + encoder.EmitLDR(encoder.TargetRegister.Arg2, encoder.TargetRegister.Arg1); + + int slot = 0; + if (!relocsOnly) + slot = VirtualMethodSlotHelper.GetVirtualMethodSlot(factory, target.TargetMethod, target.TargetMethod.OwningType); + + Debug.Assert(slot != -1); + encoder.EmitLDR(encoder.TargetRegister.Arg2, encoder.TargetRegister.Arg2, + EETypeNode.GetVTableOffset(factory.Target.PointerSize) + (slot * factory.Target.PointerSize)); + } + else + { + ISymbolNode targetMethodNode = target.GetTargetNode(factory); + encoder.EmitMOV(encoder.TargetRegister.Arg2, target.GetTargetNode(factory)); + } + + if (target.Thunk != null) + { + Debug.Assert(target.Constructor.Method.Signature.Length == 3); + encoder.EmitMOV(encoder.TargetRegister.Arg3, target.Thunk); + } + else + { + Debug.Assert(target.Constructor.Method.Signature.Length == 2); + } + + encoder.EmitJMP(target.Constructor); + } + break; + + case ReadyToRunHelperId.ResolveVirtualFunction: + { + // Not tested + encoder.EmitINT3(); + + MethodDesc targetMethod = (MethodDesc)Target; + if (targetMethod.OwningType.IsInterface) + { + encoder.EmitMOV(encoder.TargetRegister.Arg1, factory.InterfaceDispatchCell(targetMethod)); + encoder.EmitJMP(factory.ExternSymbol("RhpResolveInterfaceMethod")); + } + else + { + if (relocsOnly) + break; + + encoder.EmitLDR(encoder.TargetRegister.Result, encoder.TargetRegister.Arg0); + + Debug.Assert(!targetMethod.CanMethodBeInSealedVTable()); + + int slot = VirtualMethodSlotHelper.GetVirtualMethodSlot(factory, targetMethod, targetMethod.OwningType); + Debug.Assert(slot != -1); + encoder.EmitLDR(encoder.TargetRegister.Result, encoder.TargetRegister.Result, + ((short)(EETypeNode.GetVTableOffset(factory.Target.PointerSize) + (slot * factory.Target.PointerSize)))); + encoder.EmitRET(); + } + } + break; + + + default: + throw new NotImplementedException(); + } } } } diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/TargetRegisterMap.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/TargetRegisterMap.cs index ad69fdf9451..5a328f4c650 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/TargetRegisterMap.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/TargetRegisterMap.cs @@ -20,6 +20,7 @@ public struct TargetRegisterMap public readonly Register Arg5; public readonly Register Arg6; public readonly Register Arg7; + public readonly Register IntraProcedureCallScratch1; public readonly Register Result; public TargetRegisterMap(TargetOS os) @@ -32,6 +33,7 @@ public TargetRegisterMap(TargetOS os) Arg5 = Register.X5; Arg6 = Register.X6; Arg7 = Register.X7; + IntraProcedureCallScratch1 = Register.X16; Result = Register.X0; } } diff --git a/src/ILCompiler.Compiler/src/Compiler/ExpectedIsaFeaturesRootProvider.cs b/src/ILCompiler.Compiler/src/Compiler/ExpectedIsaFeaturesRootProvider.cs index adca3d89466..90f7b77a4f7 100644 --- a/src/ILCompiler.Compiler/src/Compiler/ExpectedIsaFeaturesRootProvider.cs +++ b/src/ILCompiler.Compiler/src/Compiler/ExpectedIsaFeaturesRootProvider.cs @@ -19,7 +19,8 @@ public ExpectedIsaFeaturesRootProvider(InstructionSetSupport isaSupport) void ICompilationRootProvider.AddCompilationRoots(IRootingServiceProvider rootProvider) { if (_isaSupport.Architecture == TargetArchitecture.X64 - || _isaSupport.Architecture == TargetArchitecture.X86) + || _isaSupport.Architecture == TargetArchitecture.X86 + || _isaSupport.Architecture == TargetArchitecture.ARM64) { int isaFlags = HardwareIntrinsicHelpers.GetRuntimeRequiredIsaFlags(_isaSupport); byte[] bytes = BitConverter.GetBytes(isaFlags); diff --git a/src/ILCompiler.Compiler/src/Compiler/HardwareIntrinsicHelpers.Aot.cs b/src/ILCompiler.Compiler/src/Compiler/HardwareIntrinsicHelpers.Aot.cs index ec35284a865..7f51dd80d6a 100644 --- a/src/ILCompiler.Compiler/src/Compiler/HardwareIntrinsicHelpers.Aot.cs +++ b/src/ILCompiler.Compiler/src/Compiler/HardwareIntrinsicHelpers.Aot.cs @@ -56,9 +56,23 @@ public static MethodIL EmitIsSupportedIL(MethodDesc method, FieldDesc isSupporte string id = InstructionSetSupport.GetHardwareIntrinsicId(method.Context.Target.Architecture, method.OwningType); - Debug.Assert(method.Context.Target.Architecture == TargetArchitecture.X64 - || method.Context.Target.Architecture == TargetArchitecture.X86); - int flag = XArchIntrinsicConstants.FromHardwareIntrinsicId(id); + int flag = 0; + + switch (method.Context.Target.Architecture) + { + case TargetArchitecture.X86: + case TargetArchitecture.X64: + flag = XArchIntrinsicConstants.FromHardwareIntrinsicId(id); + break; + + case TargetArchitecture.ARM64: + flag = Arm64IntrinsicConstants.FromHardwareIntrinsicId(id); + break; + + default: + Debug.Fail("Unsupported Architecture"); + break; + } var emit = new ILEmitter(); ILCodeStream codeStream = emit.NewCodeStream(); @@ -75,12 +89,22 @@ public static MethodIL EmitIsSupportedIL(MethodDesc method, FieldDesc isSupporte public static int GetRuntimeRequiredIsaFlags(InstructionSetSupport instructionSetSupport) { - Debug.Assert(instructionSetSupport.Architecture == TargetArchitecture.X64 || - instructionSetSupport.Architecture == TargetArchitecture.X86); - return XArchIntrinsicConstants.FromInstructionSetFlags(instructionSetSupport.SupportedFlags); + switch (instructionSetSupport.Architecture) + { + case TargetArchitecture.X86: + case TargetArchitecture.X64: + return XArchIntrinsicConstants.FromInstructionSetFlags(instructionSetSupport.SupportedFlags); + + case TargetArchitecture.ARM64: + return Arm64IntrinsicConstants.FromInstructionSetFlags(instructionSetSupport.SupportedFlags); + + default: + Debug.Fail("Unsupported Architecture"); + return 0; + } } - // Keep this enumeration in sync with startup.cpp in the native runtime. + // Keep these enumerations in sync with startup.cpp in the native runtime. private static class XArchIntrinsicConstants { // SSE and SSE2 are baseline ISAs - they're always available @@ -166,5 +190,68 @@ public static int FromInstructionSetFlags(InstructionSetFlags instructionSets) return result; } } + + private static class Arm64IntrinsicConstants + { + public const int ArmBase = 0x0001; + public const int ArmBase_Arm64 = 0x0002; + public const int AdvSimd = 0x0004; + public const int AdvSimd_Arm64 = 0x0008; + public const int Aes = 0x0010; + public const int Crc32 = 0x0020; + public const int Crc32_Arm64 = 0x0040; + public const int Sha1 = 0x0080; + public const int Sha256 = 0x0100; + public const int Atomics = 0x0200; + public const int Vector64 = 0x0400; + public const int Vector128 = 0x0800; + + public static int FromHardwareIntrinsicId(string id) + { + return id switch + { + "ArmBase" => ArmBase, + "ArmBase_Arm64" => ArmBase_Arm64, + "AdvSimd" => AdvSimd, + "AdvSimd_Arm64" => AdvSimd_Arm64, + "Aes" => Aes, + "Crc32" => Crc32, + "Crc32_Arm64" => Crc32_Arm64, + "Sha1" => Sha1, + "Sha256" => Sha256, + "Atomics" => Atomics, + "Vector64" => Vector64, + "Vector128" => Vector128, + _ => throw new NotSupportedException(), + }; + } + + public static int FromInstructionSetFlags(InstructionSetFlags instructionSets) + { + int result = 0; + + foreach (InstructionSet instructionSet in instructionSets) + { + result |= instructionSet switch + { + InstructionSet.ARM64_ArmBase => ArmBase, + InstructionSet.ARM64_ArmBase_Arm64 => ArmBase_Arm64, + InstructionSet.ARM64_AdvSimd => AdvSimd, + InstructionSet.ARM64_AdvSimd_Arm64 => AdvSimd_Arm64, + InstructionSet.ARM64_Aes => Aes, + InstructionSet.ARM64_Crc32 => Crc32, + InstructionSet.ARM64_Crc32_Arm64 => Crc32_Arm64, + InstructionSet.ARM64_Sha1 => Sha1, + InstructionSet.ARM64_Sha256 => Sha256, + InstructionSet.ARM64_Atomics => Atomics, + InstructionSet.ARM64_Vector64 => Vector64, + InstructionSet.ARM64_Vector128 => Vector128, + _ => throw new NotSupportedException() + }; + } + + return result; + } + } } } diff --git a/src/ILCompiler.Compiler/src/Compiler/JitHelper.cs b/src/ILCompiler.Compiler/src/Compiler/JitHelper.cs index 61a20149364..81a609543fd 100644 --- a/src/ILCompiler.Compiler/src/Compiler/JitHelper.cs +++ b/src/ILCompiler.Compiler/src/Compiler/JitHelper.cs @@ -62,13 +62,13 @@ public static void GetEntryPoint(TypeSystemContext context, ReadyToRunHelper id, break; case ReadyToRunHelper.WriteBarrier: - mangledName = "RhpAssignRef"; + mangledName = context.Target.Architecture == TargetArchitecture.ARM64 ? "RhpAssignRefArm64" : "RhpAssignRef"; break; case ReadyToRunHelper.CheckedWriteBarrier: - mangledName = "RhpCheckedAssignRef"; + mangledName = context.Target.Architecture == TargetArchitecture.ARM64 ? "RhpCheckedAssignRefArm64" : "RhpCheckedAssignRef"; break; case ReadyToRunHelper.ByRefWriteBarrier: - mangledName = "RhpByRefAssignRef"; + mangledName = context.Target.Architecture == TargetArchitecture.ARM64 ? "RhpByRefAssignRefArm64" : "RhpByRefAssignRef"; break; case ReadyToRunHelper.WriteBarrier_EAX: mangledName = "RhpAssignRefEAX"; diff --git a/src/ILCompiler.Compiler/src/Compiler/VectorOfTFieldLayoutAlgorithm.cs b/src/ILCompiler.Compiler/src/Compiler/VectorOfTFieldLayoutAlgorithm.cs index 56cc5f497ae..ccd065dfaca 100644 --- a/src/ILCompiler.Compiler/src/Compiler/VectorOfTFieldLayoutAlgorithm.cs +++ b/src/ILCompiler.Compiler/src/Compiler/VectorOfTFieldLayoutAlgorithm.cs @@ -64,6 +64,17 @@ public override bool ComputeContainsGCPointers(DefType type) public override ValueTypeShapeCharacteristics ComputeValueTypeShapeCharacteristics(DefType type) { + if (type.Context.Target.Architecture == TargetArchitecture.ARM64 && + type.Instantiation[0].IsPrimitiveNumeric) + { + return type.InstanceFieldSize.AsInt switch + { + 8 => ValueTypeShapeCharacteristics.Vector64Aggregate, + 16 => ValueTypeShapeCharacteristics.Vector128Aggregate, + _ => ValueTypeShapeCharacteristics.None + }; + } + return _fallbackAlgorithm.ComputeValueTypeShapeCharacteristics(type); } diff --git a/src/ILCompiler.RyuJit/src/JitInterface/CorInfoImpl.RyuJit.cs b/src/ILCompiler.RyuJit/src/JitInterface/CorInfoImpl.RyuJit.cs index c1cac0a4fd5..a2aca7a75f2 100644 --- a/src/ILCompiler.RyuJit/src/JitInterface/CorInfoImpl.RyuJit.cs +++ b/src/ILCompiler.RyuJit/src/JitInterface/CorInfoImpl.RyuJit.cs @@ -671,7 +671,7 @@ private ObjectNode.ObjectData EncodeEHInfo() RelocType rel = (_compilation.NodeFactory.Target.IsWindows) ? RelocType.IMAGE_REL_BASED_ABSOLUTE : - RelocType.IMAGE_REL_BASED_REL32; + RelocType.IMAGE_REL_BASED_RELPTR32; if (_compilation.NodeFactory.Target.Abi == TargetAbi.Jit) rel = RelocType.IMAGE_REL_BASED_REL32; diff --git a/src/ILCompiler/src/Program.cs b/src/ILCompiler/src/Program.cs index 484924561df..9638e56eb6d 100644 --- a/src/ILCompiler/src/Program.cs +++ b/src/ILCompiler/src/Program.cs @@ -403,6 +403,14 @@ private int Run(string[] args) optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("bmi2"); } } + else if (_targetArchitecture == TargetArchitecture.ARM64) + { + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("aes"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("crc"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("sha1"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("sha2"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("lse"); + } optimisticInstructionSetSupportBuilder.ComputeInstructionSetFlags(out var optimisticInstructionSet, out _, (string specifiedInstructionSet, string impliedInstructionSet) => throw new NotSupportedException()); diff --git a/src/JitInterface/src/CorInfoImpl.cs b/src/JitInterface/src/CorInfoImpl.cs index 2be99788f34..ce05c5aecfd 100644 --- a/src/JitInterface/src/CorInfoImpl.cs +++ b/src/JitInterface/src/CorInfoImpl.cs @@ -43,8 +43,16 @@ private enum ImageFileMachine ARM = 0x01c4, ARM64 = 0xaa64, } + private enum CFI_OPCODE + { + CFI_ADJUST_CFA_OFFSET, // Offset is adjusted relative to the current one. + CFI_DEF_CFA_REGISTER, // New register is used to compute CFA + CFI_REL_OFFSET, // Register is saved at offset from the current CFA + CFI_DEF_CFA // Take address from register and add offset to it. + }; - internal const string JitLibrary = "clrjitilc"; + //internal const string JitLibrary = "clrjitilc"; + internal const string JitLibrary = "protononjit"; #if SUPPORT_JIT private const string JitSupportLibrary = "*"; @@ -2687,9 +2695,146 @@ private void allocUnwindInfo(byte* pHotCode, byte* pColdCode, uint startOffset, blobData[i] = pUnwindBlock[i]; } + var target = _compilation.TypeSystemContext.Target; + + if (target.Architecture == TargetArchitecture.ARM64 && target.OperatingSystem == TargetOS.Linux) + { + blobData = CompressARM64CFI(blobData); + } + _frameInfos[_usedFrameInfos++] = new FrameInfo(flags, (int)startOffset, (int)endOffset, blobData); } + private byte[] CompressARM64CFI(byte[] blobData) + { + if (blobData == null || blobData.Length == 0) + { + return blobData; + } + + Debug.Assert(blobData.Length % 8 == 0); + + short spReg = -1; + + int codeOffset = 0; + short cfaRegister = spReg; + int cfaOffset = 0; + int spOffset = 0; + + int[] registerOffset = new int[96]; + + for (int i = 0; i < registerOffset.Length; i++) + { + registerOffset[i] = int.MinValue; + } + + int offset = 0; + while (offset < blobData.Length) + { + codeOffset = Math.Max(codeOffset, blobData[offset++]); + CFI_OPCODE opcode = (CFI_OPCODE)blobData[offset++]; + short dwarfReg = BitConverter.ToInt16(blobData, offset); + offset += sizeof(short); + int cfiOffset = BitConverter.ToInt32(blobData, offset); + offset += sizeof(int); + + switch (opcode) + { + case CFI_OPCODE.CFI_DEF_CFA_REGISTER: + cfaRegister = dwarfReg; + + if (spOffset != 0) + { + for (int i = 0; i < registerOffset.Length; i++) + { + if (registerOffset[i] != int.MinValue) + { + registerOffset[i] -= spOffset; + } + } + + cfaOffset += spOffset; + spOffset = 0; + } + + break; + + case CFI_OPCODE.CFI_REL_OFFSET: + Debug.Assert(cfaRegister == spReg); + registerOffset[dwarfReg] = cfiOffset; + break; + + case CFI_OPCODE.CFI_ADJUST_CFA_OFFSET: + if (cfaRegister != spReg) + { + cfaOffset += cfiOffset; + } + else + { + spOffset += cfiOffset; + + for (int i = 0; i < registerOffset.Length; i++) + { + if (registerOffset[i] != int.MinValue) + { + registerOffset[i] += cfiOffset; + } + } + } + break; + } + } + + using (MemoryStream cfiStream = new MemoryStream()) + { + int storeOffset = 0; + + using (BinaryWriter cfiWriter = new BinaryWriter(cfiStream)) + { + if (cfaRegister != -1) + { + cfiWriter.Write((byte)codeOffset); + cfiWriter.Write(cfaOffset != 0 ? (byte)CFI_OPCODE.CFI_DEF_CFA : (byte)CFI_OPCODE.CFI_DEF_CFA_REGISTER); + cfiWriter.Write(cfaRegister); + cfiWriter.Write(cfaOffset); + storeOffset = cfaOffset; + } + else + { + if (cfaOffset != 0) + { + cfiWriter.Write((byte)codeOffset); + cfiWriter.Write((byte)CFI_OPCODE.CFI_ADJUST_CFA_OFFSET); + cfiWriter.Write((short)-1); + cfiWriter.Write(cfaOffset); + } + + if (spOffset != 0) + { + cfiWriter.Write((byte)codeOffset); + cfiWriter.Write((byte)CFI_OPCODE.CFI_DEF_CFA); + cfiWriter.Write((short)31); + cfiWriter.Write(spOffset); + //storeOffset = -spOffset; + } + } + + for (int i = registerOffset.Length - 1; i >= 0; i--) + { + if (registerOffset[i] != int.MinValue) + { + cfiWriter.Write((byte)codeOffset); + cfiWriter.Write((byte)CFI_OPCODE.CFI_REL_OFFSET); + cfiWriter.Write((short)i); + cfiWriter.Write(registerOffset[i] + storeOffset); + } + } + } + + return cfiStream.ToArray(); + } + } + private void* allocGCInfo(UIntPtr size) { _gcInfo = new byte[(int)size]; @@ -2811,11 +2956,14 @@ private static RelocType GetRelocType(TargetArchitecture targetArchitecture, ush if (targetArchitecture != TargetArchitecture.ARM64) return (RelocType)fRelocType; + const ushort IMAGE_REL_ARM64_BRANCH26 = 3; const ushort IMAGE_REL_ARM64_PAGEBASE_REL21 = 4; const ushort IMAGE_REL_ARM64_PAGEOFFSET_12A = 6; switch (fRelocType) { + case IMAGE_REL_ARM64_BRANCH26: + return RelocType.IMAGE_REL_BASED_ARM64_BRANCH26; case IMAGE_REL_ARM64_PAGEBASE_REL21: return RelocType.IMAGE_REL_BASED_ARM64_PAGEBASE_REL21; case IMAGE_REL_ARM64_PAGEOFFSET_12A: diff --git a/src/Native/ObjWriter/cfi.h b/src/Native/ObjWriter/cfi.h index 4d5a3648207..c161b260adb 100644 --- a/src/Native/ObjWriter/cfi.h +++ b/src/Native/ObjWriter/cfi.h @@ -13,7 +13,8 @@ enum CFI_OPCODE { CFI_ADJUST_CFA_OFFSET, // Offset is adjusted relative to the current one. CFI_DEF_CFA_REGISTER, // New register is used to compute CFA - CFI_REL_OFFSET // Register is saved at offset from the current CFA + CFI_REL_OFFSET, // Register is saved at offset from the current CFA + CFI_DEF_CFA // Take address from register and add offset to it. }; struct CFI_CODE diff --git a/src/Native/ObjWriter/llvm.patch b/src/Native/ObjWriter/llvm.patch index 67ae23ec956..a3eca0ecbe3 100644 --- a/src/Native/ObjWriter/llvm.patch +++ b/src/Native/ObjWriter/llvm.patch @@ -1,5 +1,5 @@ diff --git a/include/llvm/MC/MCObjectStreamer.h b/include/llvm/MC/MCObjectStreamer.h -index 7c1189e..d1d77c9 100644 +index 7c1189e46ab..d1d77c97311 100644 --- a/include/llvm/MC/MCObjectStreamer.h +++ b/include/llvm/MC/MCObjectStreamer.h @@ -101,6 +101,11 @@ public: @@ -15,7 +15,7 @@ index 7c1189e..d1d77c9 100644 /// can change its size during relaxation. virtual void EmitInstToFragment(const MCInst &Inst, const MCSubtargetInfo &); diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h -index 5390e79..5b258e7 100644 +index 5390e794242..5b258e76787 100644 --- a/include/llvm/MC/MCStreamer.h +++ b/include/llvm/MC/MCStreamer.h @@ -115,6 +115,7 @@ public: @@ -37,7 +37,7 @@ index 5390e79..5b258e7 100644 /// method uses .byte directives instead of .ascii or .asciz for readability. virtual void EmitBinaryData(StringRef Data); diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp -index 174397e..ef7161f 100644 +index 174397e2739..ef7161fb56c 100644 --- a/lib/MC/MCObjectStreamer.cpp +++ b/lib/MC/MCObjectStreamer.cpp @@ -122,7 +122,7 @@ void MCObjectStreamer::EmitCFISections(bool EH, bool Debug) { @@ -68,7 +68,7 @@ index 174397e..ef7161f 100644 // We need to create a local symbol to avoid relocations. Frame.Begin = getContext().createTempSymbol(); diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp -index 2bfb9a6..a710098 100644 +index 2bfb9a63eed..a710098e798 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -830,6 +830,7 @@ void MCStreamer::EmitTBSSSymbol(MCSection *Section, MCSymbol *Symbol, @@ -79,8 +79,56 @@ index 2bfb9a6..a710098 100644 void MCStreamer::EmitBinaryData(StringRef Data) { EmitBytes(Data); } void MCStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) { visitUsedExpr(*Value); +diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +index 2bd0cbf9f7c..e7643d5f66d 100644 +--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp ++++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +@@ -41,6 +41,15 @@ public: + return AArch64::NumTargetFixupKinds; + } + ++ Optional getFixupKind(StringRef Name) const { ++ return StringSwitch>(Name) ++ .Case("R_AARCH64_JUMP26", (MCFixupKind)AArch64::fixup_aarch64_pcrel_call26) ++ .Case("R_AARCH64_ADR_PREL_LO21",(MCFixupKind)AArch64::fixup_aarch64_pcrel_adrp_imm21) ++ .Case("R_AARCH64_ADD_ABS_LO12_NC", (MCFixupKind)AArch64::fixup_aarch64_add_imm12) ++ .Default(MCAsmBackend::getFixupKind(Name)); ++ } ++ ++ + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override { + const static MCFixupKindInfo Infos[AArch64::NumTargetFixupKinds] = { + // This table *must* be in the order that the fixup_* kinds are defined +@@ -126,6 +135,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { + case AArch64::fixup_aarch64_pcrel_call26: + case FK_Data_4: + case FK_SecRel_4: ++ case FK_PCRel_4: + return 4; + + case FK_Data_8: +@@ -222,6 +232,7 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value, + case FK_Data_8: + case FK_SecRel_2: + case FK_SecRel_4: ++ case FK_PCRel_4: + return Value; + } + } +diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +index 89c3e5b4c76..ba105365d74 100644 +--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp ++++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +@@ -129,6 +129,7 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, + case FK_Data_2: + return R_CLS(PREL16); + case FK_Data_4: ++ case FK_PCRel_4: + return R_CLS(PREL32); + case FK_Data_8: + if (IsILP32) { diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp -index a77df7a..e1aa752 100644 +index a77df7a2598..e1aa7526f9b 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -48,6 +48,14 @@ public: @@ -118,7 +166,7 @@ index a77df7a..e1aa752 100644 return 2; case FK_SecRel_4: diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h -index 0237496..01676a0 100644 +index 02374966daf..01676a01683 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h @@ -36,6 +36,7 @@ public: @@ -130,7 +178,7 @@ index 0237496..01676a0 100644 bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp -index 59f31be..9b95598 100644 +index 59f31be69d5..9b95598f99f 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -103,6 +103,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, @@ -144,7 +192,7 @@ index 59f31be..9b95598 100644 case ARM::fixup_arm_uncondbl: switch (Modifier) { diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp -index 93f4006..81e4caa 100644 +index 93f4006cee8..108e9c51e13 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -396,6 +396,7 @@ private: @@ -245,7 +293,7 @@ index 93f4006..81e4caa 100644 const SmallVectorImpl &Opcodes) { FlushPendingOffset(); diff --git a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp -index 4a94318..f4f5aa1 100644 +index 4a943187ab6..f4f5aa11bf8 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp @@ -61,6 +61,7 @@ void ARMTargetStreamer::emitMovSP(unsigned Reg, int64_t Offset) {} @@ -257,7 +305,7 @@ index 4a94318..f4f5aa1 100644 const SmallVectorImpl &Opcodes) { } diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt -index b654b8c..58d2515 100644 +index b654b8c5cb8..58d25159af8 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -46,6 +46,7 @@ add_llvm_external_project(clang) diff --git a/src/Native/ObjWriter/objwriter.cpp b/src/Native/ObjWriter/objwriter.cpp index b9c31e62c54..a95a781ab64 100644 --- a/src/Native/ObjWriter/objwriter.cpp +++ b/src/Native/ObjWriter/objwriter.cpp @@ -50,6 +50,7 @@ #include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/Win64EH.h" #include "llvm/Target/TargetMachine.h" +#include "..\..\..\lib\Target\AArch64\MCTargetDesc\AArch64MCExpr.h" using namespace llvm; using namespace llvm::codeview; @@ -309,7 +310,13 @@ void ObjectWriter::SetCodeSectionAttribute(const char *SectionName, } void ObjectWriter::EmitAlignment(int ByteAlignment) { - Streamer->EmitValueToAlignment(ByteAlignment, 0x90 /* Nop */); + int64_t fillValue = 0x90; //x86 nop + + if (TMachine->getTargetTriple().getArch() == llvm::Triple::ArchType::aarch64) { + fillValue = 0; // ARM64 bad + } + + Streamer->EmitValueToAlignment(ByteAlignment, fillValue); } void ObjectWriter::EmitBlob(int BlobSize, const char *Blob) { @@ -333,15 +340,28 @@ void ObjectWriter::EmitSymbolDef(const char *SymbolName, bool global) { Streamer->EmitSymbolAttribute(Sym, MCSA_Local); } + Triple TheTriple = TMachine->getTargetTriple(); + // A Thumb2 function symbol should be marked with an appropriate ELF // attribute to make later computation of a relocation address value correct - if (GetTriple().getArch() == Triple::thumb && - GetTriple().getObjectFormat() == Triple::ELF && + + if (TheTriple.getObjectFormat() == Triple::ELF && Streamer->getCurrentSectionOnly()->getKind().isText()) { - Streamer->EmitSymbolAttribute(Sym, MCSA_ELF_TypeFunction); + switch (TheTriple.getArch()) { + case Triple::thumb: + case Triple::aarch64: + Streamer->EmitSymbolAttribute(Sym, MCSA_ELF_TypeFunction); + break; + + default: + break; + } } - Streamer->EmitLabel(Sym); + if (Sym->isUndefined()) + { + Streamer->EmitLabel(Sym); + } } const MCSymbolRefExpr * @@ -353,6 +373,8 @@ ObjectWriter::GetSymbolRefExpr(const char *SymbolName, return MCSymbolRefExpr::create(T, Kind, *OutContext); } + + unsigned ObjectWriter::GetDFSize() { return Streamer->getOrCreateDataFragment()->getContents().size(); } @@ -398,15 +420,16 @@ int ObjectWriter::EmitSymbolRef(const char *SymbolName, case RelocType::IMAGE_REL_BASED_DIR64: Size = 8; break; - case RelocType::IMAGE_REL_BASED_REL32: + case RelocType::IMAGE_REL_BASED_REL32: { Size = 4; - IsPCRel = true; + IsPCRel = true; if (ObjFileInfo->getObjectFileType() == ObjFileInfo->IsELF) { - // PLT is valid only for code symbols, - // but there shouldn't be references to global data symbols - Kind = MCSymbolRefExpr::VK_PLT; + // PLT is valid only for code symbols, + // but there shouldn't be references to global data symbols + Kind = MCSymbolRefExpr::VK_PLT; } break; + } case RelocType::IMAGE_REL_BASED_RELPTR32: Size = 4; IsPCRel = true; @@ -424,6 +447,25 @@ int ObjectWriter::EmitSymbolRef(const char *SymbolName, EmitRelocDirective(GetDFSize(), "R_ARM_THM_JUMP24", TargetExpr); return 4; } + case RelocType::IMAGE_REL_BASED_ARM64_BRANCH26: { + const MCExpr *TargetExpr = GenTargetExpr(SymbolName, Kind, Delta); + EmitRelocDirective(GetDFSize(), "R_AARCH64_JUMP26", TargetExpr); + return 4; + } + case RelocType::IMAGE_REL_BASED_ARM64_PAGEBASE_REL21: { + const MCExpr *TargetExpr = GenTargetExpr(SymbolName, Kind, Delta); + TargetExpr = + AArch64MCExpr::create(TargetExpr, AArch64MCExpr::VK_CALL, *OutContext); + EmitRelocDirective(GetDFSize(), "R_AARCH64_ADR_PREL_LO21", TargetExpr); + return 4; + } + case RelocType::IMAGE_REL_BASED_ARM64_PAGEOFFSET_12A: { + const MCExpr *TargetExpr = GenTargetExpr(SymbolName, Kind, Delta); + TargetExpr = + AArch64MCExpr::create(TargetExpr, AArch64MCExpr::VK_LO12, *OutContext); + EmitRelocDirective(GetDFSize(), "R_AARCH64_ADD_ABS_LO12_NC", TargetExpr); + return 4; + } } const MCExpr *TargetExpr = GenTargetExpr(SymbolName, Kind, Delta, IsPCRel, Size); @@ -510,6 +552,11 @@ void ObjectWriter::EmitCFICode(int Offset, const char *Blob) { "Unexpected Offset Value for OpDefCfaRegister"); Streamer->EmitCFIDefCfaRegister(CfiCode->DwarfReg); break; + case CFI_DEF_CFA: + assert(CfiCode->Offset != 0 && + "Unexpected Offset Value for OpDefCfa"); + Streamer->EmitCFIDefCfa(CfiCode->DwarfReg, CfiCode->Offset); + break; default: assert(false && "Unrecognized CFI"); break; diff --git a/src/Native/ObjWriter/objwriter.h b/src/Native/ObjWriter/objwriter.h index 925ae5068d1..6a41613a4e3 100644 --- a/src/Native/ObjWriter/objwriter.h +++ b/src/Native/ObjWriter/objwriter.h @@ -53,7 +53,10 @@ enum class RelocType { IMAGE_REL_BASED_DIR64 = 0x0A, IMAGE_REL_BASED_REL32 = 0x10, IMAGE_REL_BASED_THUMB_BRANCH24 = 0x13, + IMAGE_REL_BASED_ARM64_BRANCH26 = 0x15, IMAGE_REL_BASED_RELPTR32 = 0x7C, + IMAGE_REL_BASED_ARM64_PAGEBASE_REL21 = 0x81, + IMAGE_REL_BASED_ARM64_PAGEOFFSET_12A = 0x82, }; class ObjectWriter { diff --git a/src/Native/Runtime/PalRedhawk.h b/src/Native/Runtime/PalRedhawk.h index 7789c05346b..27e1aff03a1 100644 --- a/src/Native/Runtime/PalRedhawk.h +++ b/src/Native/Runtime/PalRedhawk.h @@ -852,6 +852,27 @@ REDHAWK_PALIMPORT uint32_t REDHAWK_PALAPI xmmYmmStateSupport(); REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalIsAvxEnabled(); #endif // defined(HOST_X86) || defined(HOST_AMD64) +#if defined(HOST_ARM64) +// Should match the constants defined in the compiler in HardwareIntrinsicHelpers.Aot.cs +enum ARM64IntrinsicConstants +{ + ARM64IntrinsicConstants_ArmBase = 0x0001, + ARM64IntrinsicConstants_ArmBase_Arm64 = 0x0002, + ARM64IntrinsicConstants_AdvSimd = 0x0004, + ARM64IntrinsicConstants_AdvSimd_Arm64 = 0x0008, + ARM64IntrinsicConstants_Aes = 0x0010, + ARM64IntrinsicConstants_Crc32 = 0x0020, + ARM64IntrinsicConstants_Crc32_Arm64 = 0x0040, + ARM64IntrinsicConstants_Sha1 = 0x0080, + ARM64IntrinsicConstants_Sha256 = 0x0100, + ARM64IntrinsicConstants_Atomics = 0x0200, + ARM64IntrinsicConstants_Vector64 = 0x0400, + ARM64IntrinsicConstants_Vector128 = 0x0800 +}; + +REDHAWK_PALIMPORT void REDHAWK_PALAPI PAL_GetCpuCapabilityFlags(int* flags); +#endif //defined(HOST_ARM64) + #include "PalRedhawkInline.h" #endif // !PAL_REDHAWK_INCLUDED diff --git a/src/Native/Runtime/arm64/AllocFast.S b/src/Native/Runtime/arm64/AllocFast.S index 876f2dfbcb8..07e68455950 100644 --- a/src/Native/Runtime/arm64/AllocFast.S +++ b/src/Native/Runtime/arm64/AllocFast.S @@ -1,4 +1,293 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// TODO: Implement +#include +#include "AsmOffsets.inc" + +// GC type flags +GC_ALLOC_FINALIZE = 1 +GC_ALLOC_ALIGN8_BIAS = 4 +GC_ALLOC_ALIGN8 = 8 + +// +// Rename fields of nested structs +// +OFFSETOF__Thread__m_alloc_context__alloc_ptr = OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr +OFFSETOF__Thread__m_alloc_context__alloc_limit = OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit + + + +// Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's +// allocation context then automatically fallback to the slow allocation path. +// x0 == EEType + LEAF_ENTRY RhpNewFast, _TEXT + + // x1 = GetThread() + INLINE_GETTHREAD x1 + + // + // x0 contains EEType pointer + // + ldr w2, [x0, #OFFSETOF__EEType__m_uBaseSize] + + // + // x0: EEType pointer + // x1: Thread pointer + // x2: base size + // + + // Load potential new object address into x12. + ldr x12, [x1, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + // Determine whether the end of the object would lie outside of the current allocation context. If so, + // we abandon the attempt to allocate the object directly and fall back to the slow helper. + add x2, x2, x12 + ldr x13, [x1, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp x2, x13 + bhi RhpNewFast_RarePath + + // Update the alloc pointer to account for the allocation. + str x2, [x1, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + // Set the new objects EEType pointer + str x0, [x12, #OFFSETOF__Object__m_pEEType] + + mov x0, x12 + ret + +RhpNewFast_RarePath: + mov x1, #0 + b RhpNewObject + LEAF_END RhpNewFast, _TEXT + +// Allocate non-array object with finalizer. +// x0 == EEType + LEAF_ENTRY RhpNewFinalizable, _TEXT + mov x1, #GC_ALLOC_FINALIZE + b RhpNewObject + LEAF_END RhpNewFinalizable, _TEXT + +// Allocate non-array object. +// x0 == EEType +// x1 == alloc flags + NESTED_ENTRY RhpNewObject, _TEXT, NoHandler + + PUSH_COOP_PINVOKE_FRAME x3 + + // x3: transition frame + + // Preserve the EEType in x19 + mov x19, x0 + + ldr w2, [x0, #OFFSETOF__EEType__m_uBaseSize] + + // Call the rest of the allocation helper. + // void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame) + bl RhpGcAlloc + + // Set the new objects EEType pointer on success. + cbz x0, NewOutOfMemory + str x19, [x0, #OFFSETOF__Object__m_pEEType] + + // If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC + ldr w1, [x19, #OFFSETOF__EEType__m_uBaseSize] + movk x2, #(RH_LARGE_OBJECT_SIZE & 0xFFFF) + movk x2, #(RH_LARGE_OBJECT_SIZE >> 16), lsl #16 + cmp x1, x2 + blo New_SkipPublish + + // x0: object + // x1: already contains object size + bl RhpPublishObject // x0: this function returns the object that was passed-in + +New_SkipPublish: + + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + +NewOutOfMemory: + // This is the OOM failure path. We are going to tail-call to a managed helper that will throw + // an out of memory exception that the caller of this allocator understands. + + mov x0, x19 // EEType pointer + mov x1, 0 // Indicate that we should throw OOM. + + POP_COOP_PINVOKE_FRAME + b RhExceptionHandling_FailedAllocation + + NESTED_END RhpNewObject, _TEXT + +// Allocate a string. +// x0 == EEType +// x1 == element/character count + LEAF_ENTRY RhNewString, _TEXT + // Make sure computing the overall allocation size wont overflow + // TODO: this should be actually MAX_STRING_LENGTH + mov x2, 0x7FFFFFFF + cmp x1, x2 + bhi StringSizeOverflow + + // Compute overall allocation size (align(base size + (element size * elements), 8)). + mov w2, #STRING_COMPONENT_SIZE + mov x3, #(STRING_BASE_SIZE + 7) + umaddl x2, w1, w2, x3 // x2 = w1 * w2 + x3 + and x2, x2, #-8 + + // x0 == EEType + // x1 == element count + // x2 == string size + + INLINE_GETTHREAD x3 + + // Load potential new object address into x12. + ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + // Determine whether the end of the object would lie outside of the current allocation context. If so, + // we abandon the attempt to allocate the object directly and fall back to the slow helper. + add x2, x2, x12 + ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp x2, x12 + bhi RhpNewArrayRare + + // Reload new object address into r12. + ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + // Update the alloc pointer to account for the allocation. + str x2, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + // Set the new objects EEType pointer and element count. + str x0, [x12, #OFFSETOF__Object__m_pEEType] + str x1, [x12, #OFFSETOF__Array__m_Length] + + // Return the object allocated in x0. + mov x0, x12 + + ret + +StringSizeOverflow: + // We get here if the length of the final string object can not be represented as an unsigned + // 32-bit value. We are going to tail-call to a managed helper that will throw + // an OOM exception that the caller of this allocator understands. + + // x0 holds EEType pointer already + mov x1, #1 // Indicate that we should throw OverflowException + b RhExceptionHandling_FailedAllocation + LEAF_END RhNewString, _Text + +// Allocate one dimensional, zero based array (SZARRAY). +// x0 == EEType +// x1 == element count + LEAF_ENTRY RhpNewArray, _Text + + // We want to limit the element count to the non-negative 32-bit int range. + // If the element count is <= 0x7FFFFFFF, no overflow is possible because the component + // size is <= 0xffff (it is an unsigned 16-bit value), and the base size for the worst + // case (32 dimensional MdArray) is less than 0xffff, and thus the product fits in 64 bits. + mov x2, #0x7FFFFFFF + cmp x1, x2 + bhi ArraySizeOverflow + + ldrh w2, [x0, #OFFSETOF__EEType__m_usComponentSize] + umull x2, w1, w2 + ldr w3, [x0, #OFFSETOF__EEType__m_uBaseSize] + add x2, x2, x3 + add x2, x2, #7 + and x2, x2, #-8 + + // x0 == EEType + // x1 == element count + // x2 == array size + + INLINE_GETTHREAD x3 + + // Load potential new object address into x12. + ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + // Determine whether the end of the object would lie outside of the current allocation context. If so, + // we abandon the attempt to allocate the object directly and fall back to the slow helper. + add x2, x2, x12 + ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp x2, x12 + bhi RhpNewArrayRare + + // Reload new object address into x12. + ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + // Update the alloc pointer to account for the allocation. + str x2, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + // Set the new objects EEType pointer and element count. + str x0, [x12, #OFFSETOF__Object__m_pEEType] + str x1, [x12, #OFFSETOF__Array__m_Length] + + // Return the object allocated in r0. + mov x0, x12 + + ret + +ArraySizeOverflow: + // We get here if the size of the final array object can not be represented as an unsigned + // 32-bit value. We are going to tail-call to a managed helper that will throw + // an overflow exception that the caller of this allocator understands. + + // x0 holds EEType pointer already + mov x1, #1 // Indicate that we should throw OverflowException + b RhExceptionHandling_FailedAllocation + LEAF_END RhpNewArray, _TEXT + +// Allocate one dimensional, zero based array (SZARRAY) using the slow path that calls a runtime helper. +// x0 == EEType +// x1 == element count +// x2 == array size + Thread::m_alloc_context::alloc_ptr +// x3 == Thread + NESTED_ENTRY RhpNewArrayRare, _TEXT, NoHandler + + // Recover array size by subtracting the alloc_ptr from x2. + ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + sub x2, x2, x12 + + PUSH_COOP_PINVOKE_FRAME x3 + + // Preserve data we will need later into the callee saved registers + mov x19, x0 // Preserve EEType + mov x20, x1 // Preserve element count + mov x21, x2 // Preserve array size + + mov x1, #0 + + // void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame) + bl RhpGcAlloc + + // Set the new objects EEType pointer and length on success. + cbz x0, ArrayOutOfMemory + + // Success, set the array type and element count in the new object. + str x19, [x0, #OFFSETOF__Object__m_pEEType] + str x20, [x0, #OFFSETOF__Array__m_Length] + + // If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC + movk x2, #(RH_LARGE_OBJECT_SIZE & 0xFFFF) + movk x2, #(RH_LARGE_OBJECT_SIZE >> 16), lsl #16 + cmp x21, x2 + blo NewArray_SkipPublish + + // x0 = newly allocated array. x1 = size + mov x1, x21 + bl RhpPublishObject + +NewArray_SkipPublish: + + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + +ArrayOutOfMemory: + // This is the OOM failure path. We are going to tail-call to a managed helper that will throw + // an out of memory exception that the caller of this allocator understands. + + mov x0, x19 // EEType Pointer + mov x1, 0 // Indicate that we should throw OOM. + + POP_COOP_PINVOKE_FRAME + b RhExceptionHandling_FailedAllocation + + NESTED_END RhpNewArrayRare, _TEXT diff --git a/src/Native/Runtime/arm64/CallDescrWorker.S b/src/Native/Runtime/arm64/CallDescrWorker.S index 876f2dfbcb8..24f9fdf2aab 100644 --- a/src/Native/Runtime/arm64/CallDescrWorker.S +++ b/src/Native/Runtime/arm64/CallDescrWorker.S @@ -1,4 +1,140 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// TODO: Implement +#include +#include "AsmOffsets.inc" + +//----------------------------------------------------------------------------- +// This helper routine enregisters the appropriate arguments and makes the +// actual call. +// +// INPUT: x0: pointer to CallDescrData struct +// +//----------------------------------------------------------------------------- +//void RhCallDescrWorker(CallDescrData * pCallDescrData); + NESTED_ENTRY RhCallDescrWorker, _TEXT, NoHandler + + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-32 + PROLOG_SAVE_REG_PAIR x19, x20, #16 + + // Save the value of SP before we start pushing any arguments + mov x20, sp + + mov x19, x0 // save pCallDescrData in x19 + + ldr w1, [x19, #OFFSETOF__CallDescrData__numStackSlots] + cbz w1, Ldonestack + + // Add frame padding to ensure frame size is a multiple of 16 (a requirement of the OS ABI). + // We push two registers (above) and numStackSlots arguments (below). If this comes to an odd number + // of slots we must pad with another. This simplifies to "if the low bit of numStackSlots is set, + // extend the stack another eight bytes". + ldr x0, [x19, #OFFSETOF__CallDescrData__pSrc] + add x0, x0, x1, lsl #3 // pSrcEnd=pSrc+8*numStackSlots + ands x2, x1, #1 + beq Lstackloop + + // This loop copies numStackSlots words + // from [pSrcEnd-8,pSrcEnd-16,...] to [sp-8,sp-16,...] + + // Pad and store one stack slot as number of slots are odd + ldr x4, [x0,#-8]! + str x4, [sp,#-16]! + subs x1, x1, #1 + beq Ldonestack +Lstackloop: + ldp x2, x4, [x0,#-16]! + stp x2, x4, [sp,#-16]! + subs x1, x1, #2 + bne Lstackloop +Ldonestack: + + // If FP arguments are supplied in registers (x9 != NULL) then initialize all of them from the pointer + // given in x9. + ldr x9, [x19, #OFFSETOF__CallDescrData__pFloatArgumentRegisters] + cbz x9, LNoFloatingPoint + ldp d0, d1, [x9] + ldp d2, d3, [x9, #16] + ldp d4, d5, [x9, #32] + ldp d6, d7, [x9, #48] +LNoFloatingPoint: + + // Copy [pArgumentRegisters, ..., pArgumentRegisters + 64] + // into x0, ..., x7, x8 + + ldr x9, [x19, #OFFSETOF__CallDescrData__pArgumentRegisters] + ldp x0, x1, [x9] + ldp x2, x3, [x9, #16] + ldp x4, x5, [x9, #32] + ldp x6, x7, [x9, #48] + ldr x8, [x9, #64] + + // call pTarget + ldr x9, [x19, #OFFSETOF__CallDescrData__pTarget] + blr x9 + + EXPORT_POINTER_TO_ADDRESS PointerToReturnFromCallDescrThunk + + // Symbol used to identify thunk call to managed function so the special + // case unwinder can unwind through this function. Sadly we cannot directly + // export this symbol right now because it confuses DIA unwinder to believe + // it's the beginning of a new method, therefore we export the address + // of an auxiliary variable holding the address instead. + + ldr w3, [x19, #OFFSETOF__CallDescrData__fpReturnSize] + + // Unlike desktop returnValue is a pointer to a return buffer, not the buffer itself + ldr x19, [x19, #OFFSETOF__CallDescrData__pReturnBuffer] + + // Int return case + cbz w3, LIntReturn + + // Float return case + cmp w3, #4 + beq LFloatOrDoubleReturn + + // Double return case + cmp w3, #8 + bne LCheckHFAReturn + +LFloatOrDoubleReturn: + str d0, [x19] + b LReturnDone + +LCheckHFAReturn: + cmp w3, #16 + beq LFloatOrDoubleHFAReturn + cmp w3, #32 + beq LFloatOrDoubleHFAReturn + b LNoHFAReturn + +LFloatOrDoubleHFAReturn: + //Single/Double HFAReturn return case + stp d0, d1, [x19, #00] + stp d2, d3, [x19, #16] + b LReturnDone + +LNoHFAReturn: + + EMIT_BREAKPOINT // Unreachable + +LIntReturn: + // Save return value(s) into retbuf for int + stp x0, x1, [x19] + +LReturnDone: + +#ifdef _DEBUG + // Trash the floating point registers to ensure that the HFA return values + // won't survive by accident + ldp d0, d1, [sp] + ldp d2, d3, [sp, #16] +#endif + // Restore the value of SP + mov sp, x20 + + EPILOG_RESTORE_REG_PAIR x19, x20, #16 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #32 + EPILOG_RETURN + + NESTED_END RhCallDescrWorker diff --git a/src/Native/Runtime/arm64/CallingConventionConverterHelpers.S b/src/Native/Runtime/arm64/CallingConventionConverterHelpers.S index 876f2dfbcb8..de6ff7319ed 100644 --- a/src/Native/Runtime/arm64/CallingConventionConverterHelpers.S +++ b/src/Native/Runtime/arm64/CallingConventionConverterHelpers.S @@ -1,4 +1,61 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// TODO: Implement +#include +#include "AsmOffsets.inc" + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; CallingConventionCoverter Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +POINTER_SIZE = 0x08 + +// Note: The "__jmpstub__" prefix is used to indicate to debugger +// that it must step-through this stub when it encounters it while +// stepping. + + + // + // void CallingConventionConverter_ReturnThunk() + // + LEAF_ENTRY CallingConventionConverter_ReturnThunk, _TEXT + ret + LEAF_END CallingConventionConverter_ReturnThunk, _TEXT + + // + // __jmpstub__CallingConventionConverter_CommonCallingStub + // + // struct CallingConventionConverter_CommonCallingStub_PointerData + // { + // void *ManagedCallConverterThunk; + // void *UniversalThunk; + // } + // + // struct CommonCallingStubInputData + // { + // ULONG_PTR CallingConventionId; + // CallingConventionConverter_CommonCallingStub_PointerData *commonData; // Only the ManagedCallConverterThunk field is used + // // However, it is specified just like other platforms, so the behavior of the common + // // calling stub is easier to debug + // } + // + // xip0 - Points at CommonCallingStubInputData + // + // + LEAF_ENTRY __jmpstub__CallingConventionConverter_CommonCallingStub, _TEXT + ldr xip1, [xip0] // put CallingConventionId into xip1 as "parameter" to universal transition thunk + ldr xip0, [xip0, #POINTER_SIZE] // get pointer to CallingConventionConverter_CommonCallingStub_PointerData into xip0 + ldr x12, [xip0, #POINTER_SIZE] // get address of UniversalTransitionThunk (which we'll tailcall to later) + ldr xip0, [xip0] // get address of ManagedCallConverterThunk (target for universal thunk to call) + br x12 + LEAF_END __jmpstub__CallingConventionConverter_CommonCallingStub, _TEXT + + // + // void CallingConventionConverter_GetStubs(IntPtr *returnVoidStub, IntPtr *returnIntegerStub, IntPtr *commonCallingStub) + // + LEAF_ENTRY CallingConventionConverter_GetStubs, _TEXT + ldr x12, =CallingConventionConverter_ReturnThunk + str x12, [x0] // ARM doesn't need different return thunks. + str x12, [x1] + ldr x12, =__jmpstub__CallingConventionConverter_CommonCallingStub + str x12, [x2] + ret + LEAF_END CallingConventionConverter_GetStubs, _TEXT diff --git a/src/Native/Runtime/arm64/ExceptionHandling.S b/src/Native/Runtime/arm64/ExceptionHandling.S index 876f2dfbcb8..1dc4c83cbc0 100644 --- a/src/Native/Runtime/arm64/ExceptionHandling.S +++ b/src/Native/Runtime/arm64/ExceptionHandling.S @@ -1,4 +1,617 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// TODO: Implement +#include +#include "AsmOffsets.inc" + +#define STACKSIZEOF_ExInfo ((SIZEOF__ExInfo + 15)&(~15)) + +#define HARDWARE_EXCEPTION 1 +#define SOFTWARE_EXCEPTION 0 + +.global RhpTrapThreads + +// ----------------------------------------------------------------------------- +// Macro used to create frame of exception throwing helpers (RhpThrowEx, RhpThrowHwEx) + .macro ALLOC_THROW_FRAME exceptionType + + mov x3, sp + + // Setup a PAL_LIMITED_CONTEXT on the stack { + .if \exceptionType == HARDWARE_EXCEPTION + sub sp,sp,#0x50 + stp x3, x1, [sp] // x3 is the SP and x1 is the IP of the fault site + // TODO PROLOG_PUSH_MACHINE_FRAME + .else + PROLOG_STACK_ALLOC 0x50 + stp x3, lr, [sp] // x3 is the SP and lr is the IP of the fault site + .endif + stp d8, d9, [sp, #0x10] + stp d10, d11, [sp, #0x20] + stp d12, d13, [sp, #0x30] + stp d14, d15, [sp, #0x40] + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-0x70 + stp xzr, xzr, [sp, #0x10] // locations reserved for return value, not used for exception handling + PROLOG_SAVE_REG_PAIR x19, x20, #0x20 + PROLOG_SAVE_REG_PAIR x21, x22, #0x30 + PROLOG_SAVE_REG_PAIR x23, x24, #0x40 + PROLOG_SAVE_REG_PAIR x25, x26, #0x50 + PROLOG_SAVE_REG_PAIR x27, x28, #0x60 + // } end PAL_LIMITED_CONTEXT + + PROLOG_STACK_ALLOC STACKSIZEOF_ExInfo + .endm + +// ----------------------------------------------------------------------------- +// Macro used to create frame of funclet calling helpers (RhpCallXXXXFunclet) +// extraStackSize - extra stack space that the user of the macro can use to +// store additional registers + .macro ALLOC_CALL_FUNCLET_FRAME extraStackSize + + // Using below prolog instead of PROLOG_SAVE_REG_PAIR fp,lr, #-60! + // is intentional. Above statement would also emit instruction to save + // sp in fp. If sp is saved in fp in prolog then it is not expected that fp can change in the body + // of method. However, this method needs to be able to change fp before calling funclet. + // This is required to access locals in funclet. + PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED fp,lr, #-0x60 + PROLOG_SAVE_REG_PAIR x19, x20, #0x10 + PROLOG_SAVE_REG_PAIR x21, x22, #0x20 + PROLOG_SAVE_REG_PAIR x23, x24, #0x30 + PROLOG_SAVE_REG_PAIR x25, x26, #0x40 + PROLOG_SAVE_REG_PAIR x27, x28, #0x50 + mov fp, sp + + .if \extraStackSize != 0 + PROLOG_STACK_ALLOC \extraStackSize + .endif + .endm + +// ----------------------------------------------------------------------------- +// Macro used to free frame of funclet calling helpers (RhpCallXXXXFunclet) +// extraStackSize - extra stack space that the user of the macro can use to +// store additional registers. +// It needs to match the value passed to the corresponding +// ALLOC_CALL_FUNCLET_FRAME. + .macro FREE_CALL_FUNCLET_FRAME extraStackSize + + .if \extraStackSize != 0 + EPILOG_STACK_FREE \extraStackSize + .endif + + EPILOG_RESTORE_REG_PAIR x19, x20, #0x10 + EPILOG_RESTORE_REG_PAIR x21, x22, #0x20 + EPILOG_RESTORE_REG_PAIR x23, x24, #0x30 + EPILOG_RESTORE_REG_PAIR x25, x26, #0x40 + EPILOG_RESTORE_REG_PAIR x27, x28, #0x50 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #0x60 + .endm + + +// ----------------------------------------------------------------------------- +// Macro used to restore preserved general purpose and FP registers from REGDISPLAY +// regdisplayReg - register pointing to the REGDISPLAY structure + .macro RESTORE_PRESERVED_REGISTERS regdisplayReg + + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX19] + ldr x19, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX20] + ldr x20, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX21] + ldr x21, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX22] + ldr x22, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX23] + ldr x23, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX24] + ldr x24, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX25] + ldr x25, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX26] + ldr x26, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX27] + ldr x27, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX28] + ldr x28, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pFP] + ldr fp, [x12] + // + // load FP preserved regs + // + add x12, \regdisplayReg, #OFFSETOF__REGDISPLAY__D + ldp d8, d9, [x12, #0x00] + ldp d10, d11, [x12, #0x10] + ldp d12, d13, [x12, #0x20] + ldp d14, d15, [x12, #0x30] + .endm + +// ----------------------------------------------------------------------------- +// Macro used to save preserved general purpose and FP registers to REGDISPLAY +// regdisplayReg - register pointing to the REGDISPLAY structure + .macro SAVE_PRESERVED_REGISTERS regdisplayReg + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX19] + str x19, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX20] + str x20, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX21] + str x21, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX22] + str x22, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX23] + str x23, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX24] + str x24, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX25] + str x25, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX26] + str x26, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX27] + str x27, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX28] + str x28, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pFP] + str fp, [x12] + // + // store vfp preserved regs + // + add x12, \regdisplayReg, #OFFSETOF__REGDISPLAY__D + stp d8, d9, [x12, #0x00] + stp d10, d11, [x12, #0x10] + stp d12, d13, [x12, #0x20] + stp d14, d15, [x12, #0x30] + .endm + + +// ----------------------------------------------------------------------------- +// Macro used to thrash preserved general purpose registers in REGDISPLAY +// to make sure nobody uses them +// regdisplayReg - register pointing to the REGDISPLAY structure + .macro TRASH_PRESERVED_REGISTERS_STORAGE regdisplayReg + +#if 0 // def _DEBUG // @TODO: temporarily removed because trashing the frame pointer breaks the debugger + movz x3, #0xbaad, LSL #48 + movk x3, #0xdeed, LSL #32 + movk x3, #0xbaad, LSL #16 + movk x3, #0xdeed + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX19] + str x3, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX20] + str x3, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX21] + str x3, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX22] + str x3, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX23] + str x3, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX24] + str x3, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX25] + str x3, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX26] + str x3, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX27] + str x3, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX28] + str x3, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pFP] + str x3, [x12] +#endif // _DEBUG + .endm + + + +#define rsp_offsetof_ExInfo 0 +#define rsp_offsetof_Context STACKSIZEOF_ExInfo + +// +// RhpThrowHwEx +// +// INPUT: W0: exception code of fault +// X1: faulting IP +// +// OUTPUT: +// + NESTED_ENTRY RhpThrowHwEx, _TEXT, NoHandler + + ALLOC_THROW_FRAME HARDWARE_EXCEPTION + + // x2 = GetThread() + INLINE_GETTHREAD x2 + + add x1, sp, #rsp_offsetof_ExInfo // x1 <- ExInfo* + str xzr, [x1, #OFFSETOF__ExInfo__m_exception] // pExInfo->m_exception = null + mov w3, #1 + strb w3, [x1, #OFFSETOF__ExInfo__m_passNumber] // pExInfo->m_passNumber = 1 + mov w3, #0xFFFFFFFF + str w3, [x1, #OFFSETOF__ExInfo__m_idxCurClause] // pExInfo->m_idxCurClause = MaxTryRegionIdx + mov w3, #2 + strb w3, [x1, #OFFSETOF__ExInfo__m_kind] // pExInfo->m_kind = ExKind.HardwareFault + + // link the ExInfo into the thread's ExInfo chain + ldr x3, [x2, #OFFSETOF__Thread__m_pExInfoStackHead] + str x3, [x1, #OFFSETOF__ExInfo__m_pPrevExInfo] // pExInfo->m_pPrevExInfo = m_pExInfoStackHead + str x1, [x2, #OFFSETOF__Thread__m_pExInfoStackHead] // m_pExInfoStackHead = pExInfo + + // set the exception context field on the ExInfo + add x2, sp, #rsp_offsetof_Context // x2 <- PAL_LIMITED_CONTEXT* + str x2, [x1, #OFFSETOF__ExInfo__m_pExContext] // pExInfo->m_pExContext = pContext + + // w0: exception code + // x1: ExInfo* + bl RhThrowHwEx + + EXPORT_POINTER_TO_ADDRESS PointerToRhpThrowHwEx2 + + // no return + EMIT_BREAKPOINT + + NESTED_END RhpThrowHwEx, _TEXT + +// +// RhpThrowEx +// +// INPUT: X0: exception object +// +// OUTPUT: +// + + NESTED_ENTRY RhpThrowEx, _TEXT, NoHandler + + ALLOC_THROW_FRAME SOFTWARE_EXCEPTION + + // x2 = GetThread() + INLINE_GETTHREAD x2 + + // There is runtime C# code that can tail call to RhpThrowEx using a binder intrinsic. So the return + // address could have been hijacked when we were in that C# code and we must remove the hijack and + // reflect the correct return address in our exception context record. The other throw helpers don't + // need this because they cannot be tail-called from C#. + + // NOTE: we cannot use INLINE_THREAD_UNHIJACK because it will write into the stack at the location + // where the tail-calling thread had saved LR, which may not match where we have saved LR. + + ldr x1, [x2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + cbz x1, NotHijacked + + ldr x3, [x2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + + // x0: exception object + // x1: hijacked return address + // x2: pThread + // x3: hijacked return address location + + add x12, sp, #(STACKSIZEOF_ExInfo + SIZEOF__PAL_LIMITED_CONTEXT) // re-compute SP at callsite + cmp x3, x12 // if (m_ppvHijackedReturnAddressLocation < SP at callsite) + blo TailCallWasHijacked + + // normal case where a valid return address location is hijacked + str x1, [x3] + b ClearThreadState + +TailCallWasHijacked: + + // Abnormal case where the return address location is now invalid because we ended up here via a tail + // call. In this case, our hijacked return address should be the correct caller of this method. + + // stick the previous return address in LR as well as in the right spots in our PAL_LIMITED_CONTEXT. + mov lr, x1 + str lr, [sp, #(rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__LR)] + str lr, [sp, #(rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__IP)] + +ClearThreadState: + + // clear the Thread's hijack state + str xzr, [x2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + str xzr, [x2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + +NotHijacked: + + add x1, sp, #rsp_offsetof_ExInfo // x1 <- ExInfo* + str xzr, [x1, #OFFSETOF__ExInfo__m_exception] // pExInfo->m_exception = null + mov w3, #1 + strb w3, [x1, #OFFSETOF__ExInfo__m_passNumber] // pExInfo->m_passNumber = 1 + mov w3, #0xFFFFFFFF + str w3, [x1, #OFFSETOF__ExInfo__m_idxCurClause] // pExInfo->m_idxCurClause = MaxTryRegionIdx + mov w3, #1 + strb w3, [x1, #OFFSETOF__ExInfo__m_kind] // pExInfo->m_kind = ExKind.Throw + + // link the ExInfo into the thread's ExInfo chain + ldr x3, [x2, #OFFSETOF__Thread__m_pExInfoStackHead] + str x3, [x1, #OFFSETOF__ExInfo__m_pPrevExInfo] // pExInfo->m_pPrevExInfo = m_pExInfoStackHead + str x1, [x2, #OFFSETOF__Thread__m_pExInfoStackHead] // m_pExInfoStackHead = pExInfo + + // set the exception context field on the ExInfo + add x2, sp, #rsp_offsetof_Context // x2 <- PAL_LIMITED_CONTEXT* + str x2, [x1, #OFFSETOF__ExInfo__m_pExContext] // pExInfo->m_pExContext = pContext + + // x0: exception object + // x1: ExInfo* + bl RhThrowEx + + EXPORT_POINTER_TO_ADDRESS PointerToRhpThrowEx2 + + // no return + EMIT_BREAKPOINT + NESTED_END RhpThrowEx, _TEXT + + +// +// void FASTCALL RhpRethrow() +// +// SUMMARY: Similar to RhpThrowEx, except that it passes along the currently active ExInfo +// +// INPUT: +// +// OUTPUT: +// + + NESTED_ENTRY RhpRethrow, _TEXT, NoHandler + + ALLOC_THROW_FRAME SOFTWARE_EXCEPTION + + // x2 = GetThread() + INLINE_GETTHREAD x2 + + add x1, sp, #rsp_offsetof_ExInfo // x1 <- ExInfo* + str xzr, [x1, #OFFSETOF__ExInfo__m_exception] // pExInfo->m_exception = null + strb wzr, [x1, #OFFSETOF__ExInfo__m_kind] // init to a deterministic value (ExKind.None) + mov w3, #1 + strb w3, [x1, #OFFSETOF__ExInfo__m_passNumber] // pExInfo->m_passNumber = 1 + mov w3, #0xFFFFFFFF + str w3, [x1, #OFFSETOF__ExInfo__m_idxCurClause] // pExInfo->m_idxCurClause = MaxTryRegionIdx + + // link the ExInfo into the thread's ExInfo chain + ldr x3, [x2, #OFFSETOF__Thread__m_pExInfoStackHead] + mov x0, x3 // x0 <- current ExInfo + str x3, [x1, #OFFSETOF__ExInfo__m_pPrevExInfo] // pExInfo->m_pPrevExInfo = m_pExInfoStackHead + str x1, [x2, #OFFSETOF__Thread__m_pExInfoStackHead] // m_pExInfoStackHead = pExInfo + + // set the exception context field on the ExInfo + add x2, sp, #rsp_offsetof_Context // x2 <- PAL_LIMITED_CONTEXT* + str x2, [x1, #OFFSETOF__ExInfo__m_pExContext] // pExInfo->m_pExContext = pContext + + // x0 contains the currently active ExInfo + // x1 contains the address of the new ExInfo + bl RhRethrow + + EXPORT_POINTER_TO_ADDRESS PointerToRhpRethrow2 + + // no return + EMIT_BREAKPOINT + NESTED_END RhpRethrow, _TEXT + +// +// void* FASTCALL RhpCallCatchFunclet(RtuObjectRef exceptionObj, void* pHandlerIP, REGDISPLAY* pRegDisplay, +// ExInfo* pExInfo) +// +// INPUT: X0: exception object +// X1: handler funclet address +// X2: REGDISPLAY* +// X3: ExInfo* +// +// OUTPUT: +// + + NESTED_ENTRY RhpCallCatchFunclet, _TEXT, NoHandler + + ALLOC_CALL_FUNCLET_FRAME 0x60 + stp d8, d9, [sp, #0x00] + stp d10, d11, [sp, #0x10] + stp d12, d13, [sp, #0x20] + stp d14, d15, [sp, #0x30] + stp x0, x2, [sp, #0x40] // x0, x2 & x3 are saved so we have the exception object, REGDISPLAY and + stp x3, xzr, [sp, #0x50] // ExInfo later, xzr makes space for the local "is_not_handling_thread_abort" + +#define rsp_offset_is_not_handling_thread_abort 0x58 +#define rsp_offset_x2 0x48 +#define rsp_offset_x3 0x50 + + // + // clear the DoNotTriggerGc flag, trashes x4-x6 + // + INLINE_GETTHREAD x5 // x5 <- Thread* + + ldr x4, [x5, #OFFSETOF__Thread__m_threadAbortException] + sub x4, x4, x0 + str x4, [sp, #rsp_offset_is_not_handling_thread_abort] // Non-zero if the exception is not ThreadAbortException + + add x12, x5, #OFFSETOF__Thread__m_ThreadStateFlags + +ClearRetry_Catch: + ldxr w4, [x12] + bic w4, w4, #TSF_DoNotTriggerGc + stxr w6, w4, [x12] + cbz w6, ClearSuccess_Catch + b ClearRetry_Catch +ClearSuccess_Catch: + + // + // set preserved regs to the values expected by the funclet + // + RESTORE_PRESERVED_REGISTERS x2 + // + // trash the values at the old homes to make sure nobody uses them + // + TRASH_PRESERVED_REGISTERS_STORAGE x2 + + // + // call the funclet + // + // x0 still contains the exception object + blr x1 + + EXPORT_POINTER_TO_ADDRESS PointerToRhpCallCatchFunclet2 + + // x0 contains resume IP + + ldr x2, [sp, #rsp_offset_x2] // x2 <- REGDISPLAY* + +// @TODO: add debug-only validation code for ExInfo pop + + INLINE_GETTHREAD x1 // x1 <- Thread* + + // We must unhijack the thread at this point because the section of stack where the hijack is applied + // may go dead. If it does, then the next time we try to unhijack the thread, it will corrupt the stack. + INLINE_THREAD_UNHIJACK x1, x3, x12 // Thread in x1, trashes x3 and x12 + + ldr x3, [sp, #rsp_offset_x3] // x3 <- current ExInfo* + ldr x2, [x2, #OFFSETOF__REGDISPLAY__SP] // x2 <- resume SP value + +PopExInfoLoop: + ldr x3, [x3, #OFFSETOF__ExInfo__m_pPrevExInfo] // x3 <- next ExInfo + cbz x3, DonePopping // if (pExInfo == null) { we're done } + cmp x3, x2 + blt PopExInfoLoop // if (pExInfo < resume SP} { keep going } + +DonePopping: + str x3, [x1, #OFFSETOF__Thread__m_pExInfoStackHead] // store the new head on the Thread + + adrp x3, RhpTrapThreads + add x3, x3, :lo12:RhpTrapThreads + ldr w3, [x3] + tbz x3, #TrapThreadsFlags_AbortInProgress_Bit, NoAbort + + ldr x3, [sp, #rsp_offset_is_not_handling_thread_abort] + cbnz x3, NoAbort + + // It was the ThreadAbortException, so rethrow it + // reset SP + mov x1, x0 // x1 <- continuation address as exception PC + mov w0, #STATUS_REDHAWK_THREAD_ABORT + mov sp, x2 + b RhpThrowHwEx + +NoAbort: + // reset SP and jump to continuation address + mov sp, x2 + br x0 + + NESTED_END RhpCallCatchFunclet, _Text + +// +// void FASTCALL RhpCallFinallyFunclet(void* pHandlerIP, REGDISPLAY* pRegDisplay) +// +// INPUT: X0: handler funclet address +// X1: REGDISPLAY* +// +// OUTPUT: +// + + NESTED_ENTRY RhpCallFinallyFunclet, _TEXT, NoHandler + + ALLOC_CALL_FUNCLET_FRAME 0x50 + stp d8, d9, [sp, #0x00] + stp d10, d11, [sp, #0x10] + stp d12, d13, [sp, #0x20] + stp d14, d15, [sp, #0x30] + stp x0, x1, [sp, #0x40] // x1 is saved so we have the REGDISPLAY later, x0 is just alignment padding + +#define rsp_offset_x1 0x48 + + + // We want to suppress hijacking between invocations of subsequent finallys. We do this because we + // cannot tolerate a GC after one finally has run (and possibly side-effected the GC state of the + // method) and then been popped off the stack, leaving behind no trace of its effect. + // + // So we clear the state before and set it after invocation of the handler. + // + + // + // clear the DoNotTriggerGc flag, trashes x2-x4 + // + INLINE_GETTHREAD x2 // x2 <- Thread* + + add x12, x2, #OFFSETOF__Thread__m_ThreadStateFlags + +ClearRetry: + ldxr w4, [x12] + bic w4, w4, #TSF_DoNotTriggerGc + stxr w3, w4, [x12] + cbz w3, ClearSuccess + b ClearRetry +ClearSuccess: + + // + // set preserved regs to the values expected by the funclet + // + RESTORE_PRESERVED_REGISTERS x1 + // + // trash the values at the old homes to make sure nobody uses them + // + TRASH_PRESERVED_REGISTERS_STORAGE x1 + + // + // call the funclet + // + blr x0 + + EXPORT_POINTER_TO_ADDRESS PointerToRhpCallFinallyFunclet2 + + ldr x1, [sp, #rsp_offset_x1] // reload REGDISPLAY pointer + + // + // save new values of preserved regs into REGDISPLAY + // + SAVE_PRESERVED_REGISTERS x1 + + // + // set the DoNotTriggerGc flag, trashes x1-x3 + // + INLINE_GETTHREAD x2 // x2 <- Thread* + + add x12, x2, #OFFSETOF__Thread__m_ThreadStateFlags +SetRetry: + ldxr w1, [x12] + orr w1, w1, #TSF_DoNotTriggerGc + stxr w3, w1, [x12] + cbz w3, SetSuccess + b SetRetry +SetSuccess: + + ldp d8, d9, [sp, #0x00] + ldp d10, d11, [sp, #0x10] + ldp d12, d13, [sp, #0x20] + ldp d14, d15, [sp, #0x30] + + FREE_CALL_FUNCLET_FRAME 0x50 + EPILOG_RETURN + + NESTED_END RhpCallFinallyFunclet, _Text + + +// +// void* FASTCALL RhpCallFilterFunclet(RtuObjectRef exceptionObj, void* pFilterIP, REGDISPLAY* pRegDisplay) +// +// INPUT: X0: exception object +// X1: filter funclet address +// X2: REGDISPLAY* +// +// OUTPUT: +// + + NESTED_ENTRY RhpCallFilterFunclet, _TEXT, NoHandler + ALLOC_CALL_FUNCLET_FRAME 0x40 + stp d8, d9, [sp, #0x00] + stp d10, d11, [sp, #0x10] + stp d12, d13, [sp, #0x20] + stp d14, d15, [sp, #0x30] + + ldr x12, [x2, #OFFSETOF__REGDISPLAY__pFP] + ldr fp, [x12] + + // + // call the funclet + // + // x0 still contains the exception object + blr x1 + + EXPORT_POINTER_TO_ADDRESS PointerToRhpCallFilterFunclet2 + + ldp d8, d9, [sp, #0x00] + ldp d10, d11, [sp, #0x10] + ldp d12, d13, [sp, #0x20] + ldp d14, d15, [sp, #0x30] + + FREE_CALL_FUNCLET_FRAME 0x40 + EPILOG_RETURN + + NESTED_END RhpCallFilterFunclet, Text diff --git a/src/Native/Runtime/arm64/InteropThunksHelpers.S b/src/Native/Runtime/arm64/InteropThunksHelpers.S index 876f2dfbcb8..8c6e4198bd7 100644 --- a/src/Native/Runtime/arm64/InteropThunksHelpers.S +++ b/src/Native/Runtime/arm64/InteropThunksHelpers.S @@ -1,4 +1,60 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// TODO: Implement +#include + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; DATA SECTIONS ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +POINTER_SIZE = 0x08 + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Interop Thunks Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + // + // RhCommonStub + // + // INPUT: xip0: thunk's data block + // + // TRASHES: x9, x10, xip0 + // + LEAF_ENTRY RhCommonStub, _TEXT + // There are arbitrary callers passing arguments with arbitrary signatures. + // Custom calling convention: + // xip0 pointer to the current thunk's data block (data contains 2 pointer values: context + target pointers) + + INLINE_GET_TLS_VAR x9, tls_thunkData + + // x9 = base address of TLS data + // xip0 = address of context cell in thunk's data + + // store thunk address in thread static + ldr x10, [xip0] + str x10, [x9] + + // Now load the target address and jump to it. + ldr xip0, [xip0, #POINTER_SIZE] + br xip0 + + LEAF_END RhCommonStub, _TEXT + + // + // IntPtr RhGetCommonStubAddress() + // + LEAF_ENTRY RhGetCommonStubAddress, _TEXT + adrp x0, RhCommonStub + add x0, x0, :lo12:RhCommonStub + ret + LEAF_END RhGetCommonStubAddress, _TEXT + + + // + // IntPtr RhGetCurrentThunkContext() + // + LEAF_ENTRY RhGetCurrentThunkContext, _TEXT + + INLINE_GET_TLS_VAR x0, tls_thunkData + + ldr x0, [x0] + + ret + + LEAF_END RhGetCurrentThunkContext, _TEXT diff --git a/src/Native/Runtime/arm64/MiscStubs.S b/src/Native/Runtime/arm64/MiscStubs.S index 53616c22696..a4130dc1af6 100644 --- a/src/Native/Runtime/arm64/MiscStubs.S +++ b/src/Native/Runtime/arm64/MiscStubs.S @@ -1,2 +1,241 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. + +#include +#include "AsmOffsets.inc" + + .global memcpy + .global memcpyGCRefs + .global memcpyGCRefsWithWriteBarrier + .global memcpyAnyWithWriteBarrier + .global GetClasslibCCtorCheck + +// +// Checks whether the static class constructor for the type indicated by the context structure has been +// executed yet. If not the classlib is called via their CheckStaticClassConstruction callback which will +// execute the cctor and update the context to record this fact. +// +// Input: +// x0 : Address of StaticClassConstructionContext structure +// +// Output: +// All volatile registers and the condition codes may be trashed. +// + LEAF_ENTRY RhpCheckCctor, _TEXT + + // Check the m_initialized field of the context. The cctor has been run only if this equals 1 (the + // initial state is 0 and the remaining values are reserved for classlib use). This check is + // unsynchronized; if we go down the slow path and call the classlib then it is responsible for + // synchronizing with other threads and re-checking the value. + ldr w12, [x0, #OFFSETOF__StaticClassConstructionContext__m_initialized] + cmp w12, #1 + bne RhpCheckCctor__SlowPath + ret +RhpCheckCctor__SlowPath: + mov x1, x0 + b RhpCheckCctor2 // tail-call the check cctor helper that actually has an implementation to call + // the cctor + + LEAF_END RhpCheckCctor, _TEXT + +// +// Checks whether the static class constructor for the type indicated by the context structure has been +// executed yet. If not the classlib is called via their CheckStaticClassConstruction callback which will +// execute the cctor and update the context to record this fact. +// +// Input: +// x0 : Value that must be preserved in this register across the cctor check. +// x1 : Address of StaticClassConstructionContext structure +// +// Output: +// All volatile registers other than x0 may be trashed and the condition codes may also be trashed. +// + LEAF_ENTRY RhpCheckCctor2, _TEXT + + // Check the m_initialized field of the context. The cctor has been run only if this equals 1 (the + // initial state is 0 and the remaining values are reserved for classlib use). This check is + // unsynchronized; if we go down the slow path and call the classlib then it is responsible for + // synchronizing with other threads and re-checking the value. + ldr w12, [x1, #OFFSETOF__StaticClassConstructionContext__m_initialized] + cmp w12, #1 + bne RhpCheckCctor2__SlowPath + ret + + LEAF_END RhpCheckCctor2 + +// +// Slow path helper for RhpCheckCctor. +// +// Input: +// x0 : Value that must be preserved in this register across the cctor check. +// x1 : Address of StaticClassConstructionContext structure +// +// Output: +// All volatile registers other than x0 may be trashed and the condition codes may also be trashed. +// + NESTED_ENTRY RhpCheckCctor2__SlowPath, _TEXT, NoHandler + + // Need to preserve x0, x1 and lr across helper call. fp is also pushed to keep the stack 16 byte aligned. + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-0x20 + stp x0, x1, [sp, #0x10] + + // Call a C++ helper to retrieve the address of the classlib callback. The caller's return address is + // passed as the argument to the helper; it's an address in the module and is used by the helper to + // locate the classlib. + mov x0, lr + bl GetClasslibCCtorCheck + + // X0 now contains the address of the classlib method to call. The single argument is the context + // structure address currently in stashed on the stack. Clean up and tail call to the classlib + // callback so we're not on the stack should a GC occur (so we don't need to worry about transition + // frames). + mov x12, x0 + ldp x0, x1, [sp, #0x10] + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #0x20 + // tail-call the class lib cctor check function. This function is required to return its first + // argument, so that x0 can be preserved. + br x12 + + NESTED_END RhpCheckCctor__SlowPath2, _TEXT + + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// +// void* RhpCopyMultibyteNoGCRefs(void*, void*, size_t) +// +// The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where +// the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch +// it to managed code. +// + + LEAF_ENTRY RhpCopyMultibyteNoGCRefs, _TEXT + + // x0 dest + // x1 src + // x2 count + + cbz x2, NothingToCopy_NoGCRefs // check for a zero-length copy + + // Now check the dest and src pointers. If they AV, the EH subsystem will recognize the address of the AV, + // unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be + // translated to a managed exception as usual. + ALTERNATE_ENTRY RhpCopyMultibyteNoGCRefsDestAVLocation + ldrb wzr, [x0] + ALTERNATE_ENTRY RhpCopyMultibyteNoGCRefsSrcAVLocation + ldrb wzr, [x1] + + // tail-call to plain-old-memcpy + b memcpy + +NothingToCopy_NoGCRefs: + // dest is already in x0 + ret + + LEAF_END RhpCopyMultibyteNoGCRefs, _TEXT + + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// +// void* RhpCopyMultibyte(void*, void*, size_t) +// +// The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where +// the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch +// it to managed code. +// + + LEAF_ENTRY RhpCopyMultibyte, _TEXT + + // x0 dest + // x1 src + // x2 count + + // check for a zero-length copy + cbz x2, NothingToCopy_RhpCopyMultibyte + + // Now check the dest and src pointers. If they AV, the EH subsystem will recognize the address of the AV, + // unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be + // translated to a managed exception as usual. + ALTERNATE_ENTRY RhpCopyMultibyteDestAVLocation + ldrb wzr, [x0] + ALTERNATE_ENTRY RhpCopyMultibyteSrcAVLocation + ldrb wzr, [x1] + + // tail-call to the GC-safe memcpy implementation + b memcpyGCRefs + +NothingToCopy_RhpCopyMultibyte: + // dest is already still in x0 + ret + + LEAF_END RhpCopyMultibyte, _TEXT + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// +// void* RhpCopyMultibyteWithWriteBarrier(void*, void*, size_t) +// +// The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where +// the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch +// it to managed code. +// Runs a card table update via RhpBulkWriteBarrier after the copy +// + + LEAF_ENTRY RhpCopyMultibyteWithWriteBarrier, _TEXT + + // x0 dest + // x1 src + // x2 count + + // check for a zero-length copy + cbz x2, NothingToCopy_RhpCopyMultibyteWithWriteBarrier + + // Now check the dest and src pointers. If they AV, the EH subsystem will recognize the address of the AV, + // unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be + // translated to a managed exception as usual. + ALTERNATE_ENTRY RhpCopyMultibyteWithWriteBarrierDestAVLocation + ldrb wzr, [x0] + ALTERNATE_ENTRY RhpCopyMultibyteWithWriteBarrierSrcAVLocation + ldrb wzr, [x1] + + // tail-call to the GC-safe memcpy implementation + b memcpyGCRefsWithWriteBarrier + +NothingToCopy_RhpCopyMultibyteWithWriteBarrier: + // dest is already still in x0 + ret + LEAF_END RhpCopyMultibyteWithWriteBarrier, _TEXT + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// +// void* RhpCopyAnyWithWriteBarrier(void*, void*, size_t) +// +// The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where +// the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch +// it to managed code. +// Runs a card table update via RhpBulkWriteBarrier after the copy if it contained GC pointers +// + + LEAF_ENTRY RhpCopyAnyWithWriteBarrier, _TEXT + + // x0 dest + // x1 src + // x2 count + + // check for a zero-length copy + cbz x2, NothingToCopy_RhpCopyAnyWithWriteBarrier + + // Now check the dest and src pointers. If they AV, the EH subsystem will recognize the address of the AV, + // unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be + // translated to a managed exception as usual. + ALTERNATE_ENTRY RhpCopyAnyWithWriteBarrierDestAVLocation + ldrb wzr, [x0] + ALTERNATE_ENTRY RhpCopyAnyWithWriteBarrierSrcAVLocation + ldrb wzr, [x1] + + // tail-call to the GC-safe memcpy implementation + b memcpyAnyWithWriteBarrier + +NothingToCopy_RhpCopyAnyWithWriteBarrier: + // dest is already still in x0 + ret + + LEAF_END RhpCopyAnyWithWriteBarrier, _TEXT diff --git a/src/Native/Runtime/arm64/PInvoke.S b/src/Native/Runtime/arm64/PInvoke.S index 876f2dfbcb8..508127601fb 100644 --- a/src/Native/Runtime/arm64/PInvoke.S +++ b/src/Native/Runtime/arm64/PInvoke.S @@ -1,4 +1,355 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// TODO: Implement + +#include +#include "AsmOffsets.inc" + +.global RhpTrapThreads + +// Note: these must match the defs in PInvokeTransitionFrameFlags defined in rhbinder.h +PTFF_SAVE_X19 = 0x00000001 +PTFF_SAVE_X20 = 0x00000002 +PTFF_SAVE_X21 = 0x00000004 +PTFF_SAVE_X22 = 0x00000008 +PTFF_SAVE_X23 = 0x00000010 +PTFF_SAVE_X24 = 0x00000020 +PTFF_SAVE_X25 = 0x00000040 +PTFF_SAVE_X26 = 0x00000080 +PTFF_SAVE_X27 = 0x00000100 +PTFF_SAVE_X28 = 0x00000200 +PTFF_SAVE_SP = 0x00000400 +PTFF_SAVE_ALL_PRESERVED = 0x000003FF // NOTE: x19-x28 +PTFF_SAVE_X0 = 0x00000800 +PTFF_SAVE_X1 = 0x00001000 +PTFF_SAVE_X2 = 0x00002000 +PTFF_SAVE_X3 = 0x00004000 +PTFF_SAVE_X4 = 0x00008000 +PTFF_SAVE_X5 = 0x00010000 +PTFF_SAVE_X6 = 0x00020000 +PTFF_SAVE_X7 = 0x00040000 +PTFF_SAVE_X8 = 0x00080000 +PTFF_SAVE_X9 = 0x00100000 +PTFF_SAVE_X10 = 0x00200000 +PTFF_SAVE_X11 = 0x00400000 +PTFF_SAVE_X12 = 0x00800000 +PTFF_SAVE_X13 = 0x01000000 +PTFF_SAVE_X14 = 0x02000000 +PTFF_SAVE_X15 = 0x04000000 +PTFF_SAVE_X16 = 0x08000000 +PTFF_SAVE_X17 = 0x10000000 +PTFF_SAVE_X18 = 0x20000000 +PTFF_SAVE_ALL_SCRATCH = 0x3FFFF800 // NOTE: X0-X18 +PTFF_SAVE_FP = 0x40000000 +PTFF_SAVE_LR = 0x80000000 + +// Bit position for the flags above, to be used with tbz / tbnz instructions +PTFF_THREAD_ABORT_BIT = 36 + +// Bit position for the flags above, to be used with tbz/tbnz instructions +TSF_Attached_Bit = 0 +TSF_SuppressGcStress_Bit = 3 +TSF_DoNotTriggerGc_Bit = 4 + +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// RhpWaitForSuspend -- rare path for RhpPInvoke and RhpReversePInvokeReturn +// +// +// INPUT: none +// +// TRASHES: none +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////// + NESTED_ENTRY RhpWaitForSuspend, _TEXT, NoHandler + + // FP and LR registers + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-0xA0 // Push down stack pointer and store FP and LR + + // Need to save argument registers x0-x7 and the return buffer register x8 + // Also save x9 which may be used for saving indirect call target + stp x0, x1, [sp, #0x10] + stp x2, x3, [sp, #0x20] + stp x4, x5, [sp, #0x30] + stp x6, x7, [sp, #0x40] + stp x8, x9, [sp, #0x50] + + // Save float argument registers as well since they are volatile + stp d0, d1, [sp, #0x60] + stp d2, d3, [sp, #0x70] + stp d4, d5, [sp, #0x80] + stp d6, d7, [sp, #0x90] + + bl RhpWaitForSuspend2 + + // Restore floating point registers + ldp d0, d1, [sp, #0x60] + ldp d2, d3, [sp, #0x70] + ldp d4, d5, [sp, #0x80] + ldp d6, d7, [sp, #0x90] + + // Restore the argument registers + ldp x0, x1, [sp, #0x10] + ldp x2, x3, [sp, #0x20] + ldp x4, x5, [sp, #0x30] + ldp x6, x7, [sp, #0x40] + ldp x8, x9, [sp, #0x50] + + // Restore FP and LR registers, and free the allocated stack block + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #0xA0 + EPILOG_RETURN + + NESTED_END RhpWaitForSuspend, _TEXT + +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// RhpWaitForGCNoAbort +// +// +// INPUT: x9: transition frame +// +// TRASHES: None +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////// + NESTED_ENTRY RhpWaitForGCNoAbort, _TEXT, NoHandler + + // FP and LR registers + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-0x40 // Push down stack pointer and store FP and LR + + // Save the integer return registers, as well as the floating return registers + stp x0, x1, [sp, #0x10] + stp d0, d1, [sp, #0x20] + stp d2, d3, [sp, #0x30] + + ldr x0, [x9, #OFFSETOF__PInvokeTransitionFrame__m_pThread] + ldr w0, [x0, #OFFSETOF__Thread__m_ThreadStateFlags] + tbnz x0, #TSF_DoNotTriggerGc_Bit, Done + + mov x0, x9 // passing transition frame in x0 + bl RhpWaitForGC2 + +Done: + ldp x0, x1, [sp, #0x10] + ldp d0, d1, [sp, #0x20] + ldp d2, d3, [sp, #0x30] + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #0x40 + EPILOG_RETURN + + NESTED_END RhpWaitForGCNoAbort + +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// RhpWaitForGC +// +// +// INPUT: x9: transition frame +// +// TRASHES: x0, x1, x10 +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////// + NESTED_ENTRY RhpWaitForGC, _TEXT, NoHandler + + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-0x10 + + adrp x10, RhpTrapThreads + add x10, x10, :lo12:RhpTrapThreads + ldr w10, [x10] + tbz x10, #TrapThreadsFlags_TrapThreads_Bit, NoWait + bl RhpWaitForGCNoAbort +NoWait: + tbz x10, #TrapThreadsFlags_AbortInProgress_Bit, NoAbort + ldr x10, [x9, #OFFSETOF__PInvokeTransitionFrame__m_Flags] + tbz x10, #PTFF_THREAD_ABORT_BIT, NoAbort + + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #0x10 + mov w0, #STATUS_REDHAWK_THREAD_ABORT + mov x1, lr // hijack target address as exception PC + b RhpThrowHwEx + +NoAbort: + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #0x10 + EPILOG_RETURN + + NESTED_END RhpWaitForGC, _TEXT + +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// RhpReversePInvoke +// +// IN: x9: address of reverse pinvoke frame +// 0: save slot for previous M->U transition frame +// 8: save slot for thread pointer to avoid re-calc in epilog sequence +// +// PRESERVES: x0 - x8 -- need to preserve these because the caller assumes they are not trashed +// +// TRASHES: x10, x11 +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////// + LEAF_ENTRY RhpReversePInvoke, _TEXT + + INLINE_GETTHREAD x10 // x10 = Thread + str x10, [x9, #8] // save Thread pointer for RhpReversePInvokeReturn + + // x9 = reverse pinvoke frame + // x10 = thread + // x11 = scratch + + ldr w11, [x10, #OFFSETOF__Thread__m_ThreadStateFlags] + tbz x11, #TSF_Attached_Bit, AttachThread + +ThreadAttached: + // + // Check for the correct mode. This is accessible via various odd things that we cannot completely + // prevent such as : + // 1) Registering a reverse pinvoke entrypoint as a vectored exception handler + // 2) Performing a managed delegate invoke on a reverse pinvoke delegate. + // + ldr x11, [x10, #OFFSETOF__Thread__m_pTransitionFrame] + cbz x11, CheckBadTransition + + // Save previous TransitionFrame prior to making the mode transition so that it is always valid + // whenever we might attempt to hijack this thread. + str x11, [x9] + + str xzr, [x10, #OFFSETOF__Thread__m_pTransitionFrame] + dmb ish + + adrp x11, RhpTrapThreads + add x11, x11, :lo12:RhpTrapThreads + ldr w11, [x11] + tbnz x11, #TrapThreadsFlags_TrapThreads_Bit, TrapThread + + ret + +CheckBadTransition: + // Allow 'bad transitions' in when the TSF_DoNotTriggerGc mode is set. This allows us to have + // [NativeCallable] methods that are called via the "restricted GC callouts" as well as from native, + // which is necessary because the methods are CCW vtable methods on interfaces passed to native. + ldr w11, [x10, #OFFSETOF__Thread__m_ThreadStateFlags] + tbz x11, #TSF_DoNotTriggerGc_Bit, BadTransition + + // zero-out our 'previous transition frame' save slot + mov x11, #0 + str x11, [x9] + + // nothing more to do + ret + +TrapThread: + // put the previous frame back (sets us back to preemptive mode) + ldr x11, [x9] + str x11, [x10, #OFFSETOF__Thread__m_pTransitionFrame] + dmb ish + +AttachThread: + // passing address of reverse pinvoke frame in x9 + b RhpReversePInvokeAttachOrTrapThread + +BadTransition: + mov x0, lr // arg <- return address + b RhpReversePInvokeBadTransition + + LEAF_END RhpReversePInvoke, _TEXT + +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// RhpReversePInvokeAttachOrTrapThread -- rare path for RhpPInvoke +// +// +// INPUT: x9: address of reverse pinvoke frame +// +// PRESERVES: x0-x8 -- need to preserve these because the caller assumes they are not trashed +// +// TRASHES: none +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////// + NESTED_ENTRY RhpReversePInvokeAttachOrTrapThread, _TEXT, NoHandler + + // FP and LR registers + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-0xA0 // Push down stack pointer and store FP and LR + + // Need to save argument registers x0-x7 and the return buffer register x8 (twice for 16B alignment) + stp x0, x1, [sp, #0x10] + stp x2, x3, [sp, #0x20] + stp x4, x5, [sp, #0x30] + stp x6, x7, [sp, #0x40] + stp x8, x8, [sp, #0x50] + + // Save float argument registers as well since they are volatile + stp d0, d1, [sp, #0x60] + stp d2, d3, [sp, #0x70] + stp d4, d5, [sp, #0x80] + stp d6, d7, [sp, #0x90] + + mov x0, x9 // passing reverse pinvoke frame pointer in x0 + bl RhpReversePInvokeAttachOrTrapThread2 + + // Restore floating point registers + ldp d0, d1, [sp, #0x60] + ldp d2, d3, [sp, #0x70] + ldp d4, d5, [sp, #0x80] + ldp d6, d7, [sp, #0x90] + + // Restore the argument registers + ldp x0, x1, [sp, #0x10] + ldp x2, x3, [sp, #0x20] + ldp x4, x5, [sp, #0x30] + ldp x6, x7, [sp, #0x40] + ldr x8, [sp, #0x50] + + // Restore FP and LR registers, and free the allocated stack block + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #0xA0 + EPILOG_RETURN + + NESTED_END RhpReversePInvokeTrapThread + +// +// RhpPInvoke +// +// IN: X0: address of pinvoke frame +// +// This helper assumes that its callsite is as good to start the stackwalk as the actual PInvoke callsite. +// The codegenerator must treat the callsite of this helper as GC triggering and generate the GC info for it. +// Also, the codegenerator must ensure that there are no live GC references in callee saved registers. +// + +NESTED_ENTRY RhpPInvoke, _TEXT, NoHandler + str fp, [x0, #OFFSETOF__PInvokeTransitionFrame__m_FramePointer] + str lr, [x0, #OFFSETOF__PInvokeTransitionFrame__m_RIP] + mov x9, SP + str x9, [x0, #OFFSETOF__PInvokeTransitionFrame__m_PreservedRegs] + mov x9, #PTFF_SAVE_SP + str x9, [x0, #OFFSETOF__PInvokeTransitionFrame__m_Flags] + + // get TLS global variable address + // r0 = GetThread() + INLINE_GETTHREAD x10 + str x10, [x0, #OFFSETOF__PInvokeTransitionFrame__m_pThread] + str x0, [x10, #OFFSETOF__Thread__m_pTransitionFrame] + + adrp x9, RhpTrapThreads + add x9, x9, :lo12:RhpTrapThreads + ldr w9, [x9] + cbnz w9, InvokeRareTrapThread // TrapThreadsFlags_None = 0 + ret + +InvokeRareTrapThread: + b C_FUNC(RhpWaitForSuspend2) +NESTED_END RhpPInvoke, _TEXT + + +LEAF_ENTRY RhpPInvokeReturn, _TEXT + ldr x9, [x0, #OFFSETOF__PInvokeTransitionFrame__m_pThread] + mov x10, 0 + str x10, [x9, #OFFSETOF__Thread__m_pTransitionFrame] + + adrp x9, RhpTrapThreads + add x9, x9, :lo12:RhpTrapThreads + ldr w9, [x9] + cbnz w9, 0f // TrapThreadsFlags_None = 0 + ret +0: + // passing transition frame pointer in x0 + b RhpWaitForGC +LEAF_END RhpPInvokeReturn, _TEXT + diff --git a/src/Native/Runtime/arm64/StubDispatch.S b/src/Native/Runtime/arm64/StubDispatch.S index 01ed602a761..25aae897289 100644 --- a/src/Native/Runtime/arm64/StubDispatch.S +++ b/src/Native/Runtime/arm64/StubDispatch.S @@ -2,5 +2,109 @@ // The .NET Foundation licenses this file to you under the MIT license. #include +#include "AsmOffsets.inc" -// TODO: Implement Arm64 support +#define __tls_array 0 + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + + .extern RhpCidResolve + .extern RhpUniversalTransition_DebugStepTailCall + + // Macro that generates code to check a single cache entry. + .macro CHECK_CACHE_ENTRY entry + // Check a single entry in the cache. + // x9 : Cache data structure. Also used for target address jump. + // x10 : Instance EEType* + // x11 : x11 still contains the indirection cell address. do not trash + // x12 : Trashed + ldr x12, [x9, #(OFFSETOF__InterfaceDispatchCache__m_rgEntries + (\entry * 16))] + cmp x10, x12 + bne 0f + ldr x9, [x9, #(OFFSETOF__InterfaceDispatchCache__m_rgEntries + (\entry * 16) + 8)] + br x9 +0: + .endm + +// +// Macro that generates a stub consuming a cache with the given number of entries. +// + .macro DEFINE_INTERFACE_DISPATCH_STUB entries + + NESTED_ENTRY "RhpInterfaceDispatch\entries", _TEXT, NoHandler + + // x11 currently holds the indirection cell address. We need to get the cache structure instead. + ldr x9, [x11, #OFFSETOF__InterfaceDispatchCell__m_pCache] + + // Load the EEType from the object instance in x0. + ldr x10, [x0] + + .global CurrentEntry + .set CurrentEntry, 0 + + .rept \entries + CHECK_CACHE_ENTRY CurrentEntry + .set CurrentEntry, CurrentEntry + 1 + .endr + + // x11 still contains the indirection cell address. + b RhpInterfaceDispatchSlow + + NESTED_END "RhpInterfaceDispatch\entries", _TEXT + + .endm + +// +// Define all the stub routines we currently need. +// + DEFINE_INTERFACE_DISPATCH_STUB 1 + DEFINE_INTERFACE_DISPATCH_STUB 2 + DEFINE_INTERFACE_DISPATCH_STUB 4 + DEFINE_INTERFACE_DISPATCH_STUB 8 + DEFINE_INTERFACE_DISPATCH_STUB 16 + DEFINE_INTERFACE_DISPATCH_STUB 32 + DEFINE_INTERFACE_DISPATCH_STUB 64 + +// +// Initial dispatch on an interface when we dont have a cache yet. +// + LEAF_ENTRY RhpInitialInterfaceDispatch, _TEXT + // Just tail call to the cache miss helper. + b RhpInterfaceDispatchSlow + LEAF_END RhpInitialInterfaceDispatch, _TEXT + +// +// Stub dispatch routine for dispatch to a vtable slot +// + LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT + // xip1 has the interface dispatch cell address in it. + // load x12 to point to the vtable offset (which is stored in the m_pCache field). + ldr x12, [xip1, #OFFSETOF__InterfaceDispatchCell__m_pCache] + + // Load the EEType from the object instance in x0, and add it to the vtable offset + // to get the address in the vtable of what we want to dereference + ldr x13, [x0] + add x12, x12, x13 + + // Load the target address of the vtable into x12 + ldr x12, [x12] + + br x12 + LEAF_END RhpVTableOffsetDispatch, _TEXT + +// +// Cache miss case, call the runtime to resolve the target and update the cache. +// + LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT + ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch + // xip1 has the interface dispatch cell address in it. + // Calling convention of the universal thunk is: + // xip0: contains target address for the thunk to call + // xip1: contains parameter of the thunks target + adrp xip0, RhpCidResolve + add xip0, xip0, :lo12:RhpCidResolve + mov xip1, x11 + b RhpUniversalTransition_DebugStepTailCall + LEAF_END RhpInterfaceDispatchSlow, _TEXT + +#endif // FEATURE_CACHED_INTERFACE_DISPATCH diff --git a/src/Native/Runtime/arm64/UniversalTransition.S b/src/Native/Runtime/arm64/UniversalTransition.S index 876f2dfbcb8..81fd8ca3385 100644 --- a/src/Native/Runtime/arm64/UniversalTransition.S +++ b/src/Native/Runtime/arm64/UniversalTransition.S @@ -1,4 +1,159 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// TODO: Implement +#include + +#ifdef _DEBUG +#define TRASH_SAVED_ARGUMENT_REGISTERS +#endif + +#ifdef TRASH_SAVED_ARGUMENT_REGISTERS + // TODO .extern RhpIntegerTrashValues + // TODO .extern RhpFpTrashValues +#endif // TRASH_SAVED_ARGUMENT_REGISTERS + +// Padding to account for the odd number of saved integer registers +#define ALIGNMENT_PADDING_SIZE (8) + +#define COUNT_ARG_REGISTERS (9) +#define INTEGER_REGISTER_SIZE (8) +#define ARGUMENT_REGISTERS_SIZE (COUNT_ARG_REGISTERS * INTEGER_REGISTER_SIZE) + +// Largest return block is 4 doubles +#define RETURN_BLOCK_SIZE (32) + +#define COUNT_FLOAT_ARG_REGISTERS (8) +#define FLOAT_REGISTER_SIZE (8) +#define FLOAT_ARG_REGISTERS_SIZE (COUNT_FLOAT_ARG_REGISTERS * FLOAT_REGISTER_SIZE) + +#define PUSHED_LR_SIZE (8) +#define PUSHED_FP_SIZE (8) + +// +// From CallerSP to ChildSP, the stack frame is composed of the following adjacent regions: +// +// ALIGNMENT_PADDING_SIZE +// ARGUMENT_REGISTERS_SIZE +// RETURN_BLOCK_SIZE +// FLOAT_ARG_REGISTERS_SIZE +// PUSHED_LR_SIZE +// PUSHED_FP_SIZE +// + +#define DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK (PUSHED_FP_SIZE + PUSHED_LR_SIZE + FLOAT_ARG_REGISTERS_SIZE) + +#define STACK_SIZE (ALIGNMENT_PADDING_SIZE + ARGUMENT_REGISTERS_SIZE + RETURN_BLOCK_SIZE + FLOAT_ARG_REGISTERS_SIZE + PUSHED_LR_SIZE + PUSHED_FP_SIZE) + +#define FLOAT_ARG_OFFSET (PUSHED_FP_SIZE + PUSHED_LR_SIZE) +#define ARGUMENT_REGISTERS_OFFSET (FLOAT_ARG_OFFSET + FLOAT_ARG_REGISTERS_SIZE + RETURN_BLOCK_SIZE) + +// +// RhpUniversalTransition +// +// At input to this function, x0-8, d0-7 and the stack may contain any number of arguments. +// +// In addition, there are 2 extra arguments passed in the intra-procedure-call scratch register: +// xip0 will contain the managed function that is to be called by this transition function +// xip1 will contain the pointer sized extra argument to the managed function +// +// When invoking the callee: +// +// x0 shall contain a pointer to the TransitionBlock +// x1 shall contain the value that was in xip1 at entry to this function +// +// Frame layout is: +// +// {StackPassedArgs} ChildSP+0C0 CallerSP+000 +// {AlignmentPad (0x8 bytes)} ChildSP+0B8 CallerSP-008 +// {IntArgRegs (x0-x8) (0x48 bytes)} ChildSP+070 CallerSP-050 +// {ReturnBlock (0x20 bytes)} ChildSP+050 CallerSP-070 +// -- The base address of the Return block is the TransitionBlock pointer, the floating point args are +// in the neg space of the TransitionBlock pointer. Note that the callee has knowledge of the exact +// layout of all pieces of the frame that lie at or above the pushed floating point registers. +// {FpArgRegs (d0-d7) (0x40 bytes)} ChildSP+010 CallerSP-0B0 +// {PushedLR} ChildSP+008 CallerSP-0B8 +// {PushedFP} ChildSP+000 CallerSP-0C0 +// +// NOTE: If the frame layout ever changes, the C++ UniversalTransitionStackFrame structure +// must be updated as well. +// +// NOTE: The callee receives a pointer to the base of the ReturnBlock, and the callee has +// knowledge of the exact layout of all pieces of the frame that lie at or above the pushed +// FpArgRegs. +// +// NOTE: The stack walker guarantees that conservative GC reporting will be applied to +// everything between the base of the ReturnBlock and the top of the StackPassedArgs. +// + + .text + + .macro UNIVERSAL_TRANSITION FunctionName + + NESTED_ENTRY Rhp\FunctionName, _TEXT, NoHandler + + // FP and LR registers + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-STACK_SIZE // ;; Push down stack pointer and store FP and LR + + // Floating point registers + stp d0, d1, [sp, #(FLOAT_ARG_OFFSET )] + stp d2, d3, [sp, #(FLOAT_ARG_OFFSET + 0x10)] + stp d4, d5, [sp, #(FLOAT_ARG_OFFSET + 0x20)] + stp d6, d7, [sp, #(FLOAT_ARG_OFFSET + 0x30)] + + // Space for return buffer data (0x40 bytes) + + // Save argument registers + stp x0, x1, [sp, #(ARGUMENT_REGISTERS_OFFSET )] + stp x2, x3, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x10)] + stp x4, x5, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x20)] + stp x6, x7, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x30)] + stp x8, xzr, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x40)] + +#ifdef TRASH_SAVED_ARGUMENT_REGISTERS + // ARM64TODO +#endif // TRASH_SAVED_ARGUMENT_REGISTERS + + add x0, sp, #DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK // First parameter to target function is a pointer to the return block + mov x8, x0 // Arm64 calling convention: Address of return block shall be passed in x8 + mov x1, xip1 // Second parameter to target function + blr xip0 + + // We cannot make the label public as that tricks DIA stackwalker into thinking + // it's the beginning of a method. For this reason we export an auxiliary variable + // holding the address instead. + EXPORT_POINTER_TO_ADDRESS PointerToReturnFrom\FunctionName + + // Move the result (the target address) to x12 so it doesn't get overridden when we restore the + // argument registers. + mov x12, x0 + + // Restore floating point registers + ldp d0, d1, [sp, #(FLOAT_ARG_OFFSET )] + ldp d2, d3, [sp, #(FLOAT_ARG_OFFSET + 0x10)] + ldp d4, d5, [sp, #(FLOAT_ARG_OFFSET + 0x20)] + ldp d6, d7, [sp, #(FLOAT_ARG_OFFSET + 0x30)] + + // Restore the argument registers + ldp x0, x1, [sp, #(ARGUMENT_REGISTERS_OFFSET )] + ldp x2, x3, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x10)] + ldp x4, x5, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x20)] + ldp x6, x7, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x30)] + ldr x8, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x40)] + + // Restore FP and LR registers, and free the allocated stack block + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #STACK_SIZE + + // Tailcall to the target address. + // TODO EPILOG_NOP + br x12 + + NESTED_END Rhp\FunctionName, _TEXT + + .endm + + // To enable proper step-in behavior in the debugger, we need to have two instances + // of the thunk. For the first one, the debugger steps into the call in the function, + // for the other, it steps over it. + UNIVERSAL_TRANSITION UniversalTransition + UNIVERSAL_TRANSITION UniversalTransition_DebugStepTailCall + diff --git a/src/Native/Runtime/arm64/WriteBarriers.S b/src/Native/Runtime/arm64/WriteBarriers.S index a14d99d7ef4..a1e3c103673 100644 --- a/src/Native/Runtime/arm64/WriteBarriers.S +++ b/src/Native/Runtime/arm64/WriteBarriers.S @@ -1,33 +1,368 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// TODO: Implement Unix write barriers #include -LEAF_ENTRY RhpAssignRef, _TEXT - str x1, [x0] - ret -LEAF_END RhpAssignRef, _TEXT +// Macro used to copy contents of newly updated GC heap locations to a shadow copy of the heap. This is used +// during garbage collections to verify that object references where never written to the heap without using a +// write barrier. Note that we are potentially racing to update the shadow heap while other threads are writing +// new references to the real heap. Since this can not be solved perfectly without critical sections around the +// entire update process, we instead update the shadow location and then re-check the real location (as two +// ordered operations) and if there is a disparity we will re-write the shadow location with a special value +// (INVALIDGCVALUE) which disables the check for that location. Since the shadow heap is only validated at GC +// time and these write barrier operations are atomic wrt to GCs this is sufficient to guarantee that the +// shadow heap contains only valid copies of real heap values or INVALIDGCVALUE. +#ifdef WRITE_BARRIER_CHECK -LEAF_ENTRY RhpCheckedAssignRef, _TEXT - str x1, [x0] - ret -LEAF_END RhpCheckedAssignRef, _TEXT +// TODO + + SETALIAS g_GCShadow, ?g_GCShadow@@3PEAEEA + SETALIAS g_GCShadowEnd, ?g_GCShadowEnd@@3PEAEEA + EXTERN $g_GCShadow + EXTERN $g_GCShadowEnd + +INVALIDGCVALUE EQU 0xCCCCCCCD + + MACRO + // On entry: + // $destReg: location to be updated + // $refReg: objectref to be stored + // + // On exit: + // x9,x10: trashed + // other registers are preserved + // + UPDATE_GC_SHADOW $destReg, $refReg + + // If g_GCShadow is 0, don't perform the check. + adrp x9, $g_GCShadow + ldr x9, [x9, $g_GCShadow] + cbz x9, %ft1 + + // Save $destReg since we're about to modify it (and we need the original value both within the macro and + // once we exit the macro). + mov x10, $destReg + + // Transform $destReg into the equivalent address in the shadow heap. + adrp x9, g_lowest_address + ldr x9, [x9, g_lowest_address] + subs $destReg, $destReg, x9 + blt %ft0 + + adrp x9, $g_GCShadow + ldr x9, [x9, $g_GCShadow] + add $destReg, $destReg, x9 + + adrp x9, $g_GCShadowEnd + ldr x9, [x9, $g_GCShadowEnd] + cmp $destReg, x9 + bgt %ft0 + + // Update the shadow heap. + str $refReg, [$destReg] + + // The following read must be strongly ordered wrt to the write we have just performed in order to + // prevent race conditions. + dmb ish + + // Now check that the real heap location still contains the value we just wrote into the shadow heap. + mov x9, x10 + ldr x9, [x9] + cmp x9, $refReg + beq %ft0 + + // Someone went and updated the real heap. We need to invalidate the shadow location since we can not + // guarantee whose shadow update won. + MOVL64 x9, INVALIDGCVALUE, 0 + str x9, [$destReg] + +0 + // Restore original $destReg value + mov $destReg, x10 + +1 + MEND + +#else // WRITE_BARRIER_CHECK + + .macro UPDATE_GC_SHADOW destReg, refReg + .endm + +#endif // WRITE_BARRIER_CHECK + +// There are several different helpers used depending on which register holds the object reference. Since all +// the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the +// name of the register that points to the location to be updated and the name of the register that holds the +// object reference (this should be in upper case as it is used in the definition of the name of the helper). + +// Define a sub-macro first that expands to the majority of the barrier implementation. This is used below for +// some interlocked helpers that need an inline barrier. + + // On entry: + // destReg: location to be updated + // refReg: objectref to be stored + // trash: register nr than can be trashed + // trash2: register than can be trashed + // + // On exit: + // destReg: trashed + // + .macro INSERT_UNCHECKED_WRITE_BARRIER_CORE destReg, refReg, trash, trash2 + // Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless + // we are in a debug build and write barrier checking has been enabled). + UPDATE_GC_SHADOW \destReg, \refReg + + // We can skip the card table write if the reference is to + // an object not on the epehemeral segment. + adrp x\trash, g_ephemeral_low + add x\trash, x\trash, :lo12:g_ephemeral_low + ldr x\trash, [x\trash] + cmp \refReg, x\trash + blt 0f + + adrp x\trash, g_ephemeral_high + add x\trash, x\trash, :lo12:g_ephemeral_high + ldr x\trash, [x\trash] + cmp \refReg, x\trash + bge 0f + + // Set this objects card, if it has not already been set. + + adrp x\trash, g_card_table + add x\trash, x\trash, :lo12:g_card_table + ldr x\trash, [x\trash] + add \trash2, x\trash, \destReg, lsr #11 + + // Check that this card has not already been written. Avoiding useless writes is a big win on + // multi-proc systems since it avoids cache thrashing. + ldrb w\trash, [\trash2] + cmp x\trash, 0xFF + beq 0f + + mov x\trash, 0xFF + strb w\trash, [\trash2] +0: + // Exit label + .endm + + // On entry: + // destReg: location to be updated + // refReg: objectref to be stored + // trash: register nr than can be trashed + // trash2: register than can be trashed + // + // On exit: + // destReg: trashed + // + .macro INSERT_CHECKED_WRITE_BARRIER_CORE destReg, refReg, trash, trash2 + + // The "check" of this checked write barrier - is destReg + // within the heap? if no, early out. + adrp x\trash, g_lowest_address + add x\trash, x\trash, :lo12:g_lowest_address + ldr x\trash, [x\trash] + cmp \destReg, x\trash + blt 0f + + adrp x\trash, g_highest_address + add x\trash, x\trash, :lo12:g_highest_address + ldr x\trash, [x\trash] + cmp \destReg, x\trash + bgt 0f + + INSERT_UNCHECKED_WRITE_BARRIER_CORE \destReg, \refReg, \trash, \trash2 + +0: + // Exit label + .endm + +// RhpCheckedAssignRef(Object** dst, Object* src) // -// RhpByRefAssignRef simulates movs instruction for object references. +// Write barrier for writes to objects that may reside +// on the managed heap. // // On entry: -// x0: address of ref-field (assigned to) -// x1: address of the data (source) -// x3: be trashed +// x0 : the destination address (LHS of the assignment). +// May not be an object reference (hence the checked). +// x1 : the object reference (RHS of the assignment). +// On exit: +// x1 : trashed +// x9 : trashed + LEAF_ENTRY RhpCheckedAssignRef, _TEXT + ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation + ALTERNATE_ENTRY RhpCheckedAssignRefX1 + ALTERNATE_ENTRY RhpCheckedAssignRefX1AVLocation + + stlr x1, [x0] + + INSERT_CHECKED_WRITE_BARRIER_CORE x0, x1, 9, x1 + + ret + + LEAF_END RhpCheckedAssignRef, _TEXT + +// RhpAssignRef(Object** dst, Object* src) // +// Write barrier for writes to objects that are known to +// reside on the managed heap. +// +// On entry: +// x0 : the destination address (LHS of the assignment). +// x1 : the object reference (RHS of the assignment). // On exit: -// x0, x1 are incremented by 8, -// x3: trashed +// x1 : trashed +// x9 : trashed + LEAF_ENTRY RhpAssignRef, _TEXT + ALTERNATE_ENTRY RhpAssignRefAVLocation + ALTERNATE_ENTRY RhpAssignRefX1 + ALTERNATE_ENTRY RhpAssignRefX1AVLocation + + stlr x1, [x0] + + INSERT_UNCHECKED_WRITE_BARRIER_CORE x0, x1, 9, x1 + + ret + + LEAF_END RhpAssignRef, _TEXT + +// Interlocked operation helpers where the location is an objectref, thus requiring a GC write barrier upon +// successful updates. + +// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: +// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpCheckedLockCmpXchgAVLocation +// - Function "UnwindWriteBarrierToCaller" assumes no registers where pushed and LR contains the return address + +// RhpCheckedLockCmpXchg(Object** dest, Object* value, Object* comparand) +// +// Interlocked compare exchange on objectref. +// +// On entry: +// x0: pointer to objectref +// x1: exchange value +// x2: comparand +// +// On exit: +// x0: original value of objectref +// x9: trashed +// x10: trashed +// + LEAF_ENTRY RhpCheckedLockCmpXchg + ALTERNATE_ENTRY RhpCheckedLockCmpXchgAVLocation + +CmpXchgRetry: + // Check location value is what we expect. + ldaxr x10, [x0] + cmp x10, x2 + bne CmpXchgNoUpdate + + // Current value matches comparand, attempt to update with the new value. + stlxr w9, x1, [x0] + cbnz w9, CmpXchgRetry + + // We have successfully updated the value of the objectref so now we need a GC write barrier. + // The following barrier code takes the destination in x0 and the value in x1 so the arguments are + // already correctly set up. + + INSERT_CHECKED_WRITE_BARRIER_CORE x0, x1, 9, x0 + +CmpXchgNoUpdate: + // x10 still contains the original value. + mov x0, x10 + ret lr + + LEAF_END RhpCheckedLockCmpXchg, _TEXT + +// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: +// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen within at RhpCheckedXchgAVLocation +// - Function "UnwindWriteBarrierToCaller" assumes no registers where pushed and LR contains the return address + +// RhpCheckedXchg(Object** destination, Object* value) +// +// Interlocked exchange on objectref. +// +// On entry: +// x0: pointer to objectref +// x1: exchange value +// +// On exit: +// x0: original value of objectref +// x9: trashed +// x10: trashed +// + LEAF_ENTRY RhpCheckedXchg, _TEXT + ALTERNATE_ENTRY RhpCheckedXchgAVLocation + +ExchangeRetry: + // Read the existing memory location. + ldaxr x10, [x0] + + // Attempt to update with the new value. + stlxr w9, x1, [x0] + cbnz w9, ExchangeRetry + + // We have successfully updated the value of the objectref so now we need a GC write barrier. + // The following barrier code takes the destination in x0 and the value in x1 so the arguments are + // already correctly set up. + + INSERT_CHECKED_WRITE_BARRIER_CORE x0, x1, 9, x0 + + // x10 still contains the original value. + mov x0, x10 + ret + + LEAF_END RhpCheckedXchg, _TEXT + +LEAF_ENTRY RhpAssignRefArm64, _TEXT + stlr x15, [x14] + + INSERT_UNCHECKED_WRITE_BARRIER_CORE x14, x15, 12, X14 + + ret +LEAF_END RhpAssignRefArm64, _TEXT + +// void JIT_CheckedWriteBarrier(Object** dst, Object* src) +// On entry: +// x14 : the destination address (LHS of the assignment) +// x15 : the object reference (RHS of the assignment) +// +// On exit: +// x12 : trashed +// x14 : trashed (incremented by 8 to implement JIT_ByRefWriteBarrier contract) +// x15 : trashed +// x17 : trashed (ip1) if FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP // -LEAF_ENTRY RhpByRefAssignRef, _TEXT - ldr x3, [x1], #8 - str x3, [x0], #8 +LEAF_ENTRY RhpCheckedAssignRefArm64, _TEXT + + stlr x15, [x14] + + INSERT_CHECKED_WRITE_BARRIER_CORE x14, x15, 12, X15 + + add x14, x14, #8 + + ret +LEAF_END RhpCheckedAssignRefArm64, _TEXT + +// void JIT_ByRefWriteBarrier +// On entry: +// x13 : the source address (points to object reference to write) +// x14 : the destination address (object reference written here) +// +// On exit: +// x12 : trashed +// x13 : incremented by 8 +// x14 : incremented by 8 +// x15 : trashed +// x17 : trashed (ip1) if FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP +// +LEAF_ENTRY RhpByRefAssignRefArm64, _TEXT + ldr x15, [x13] + str x15, [x14] + + INSERT_CHECKED_WRITE_BARRIER_CORE x14, x15, 12, X15 + + add X13, x13, #8 + add x14, x14, #8 + ret -LEAF_END RhpByRefAssignRef, _TEXT +LEAF_END RhpByRefAssignRefArm64, _TEXT diff --git a/src/Native/Runtime/startup.cpp b/src/Native/Runtime/startup.cpp index 4ed81423af9..a7faeccd263 100644 --- a/src/Native/Runtime/startup.cpp +++ b/src/Native/Runtime/startup.cpp @@ -49,7 +49,7 @@ EXTERN_C bool g_fHasFastFxsave = false; CrstStatic g_CastCacheLock; CrstStatic g_ThunkPoolLock; -#if defined(HOST_X86) || defined(HOST_AMD64) +#if defined(HOST_X86) || defined(HOST_AMD64) || defined(HOST_ARM64) // This field is inspected from the generated code to determine what intrinsics are available. EXTERN_C int g_cpuFeatures = 0; // This field is defined in the generated code and sets the ISA expectations. @@ -155,7 +155,9 @@ static void CheckForPalFallback() } #ifndef USE_PORTABLE_HELPERS -// Should match the constants defined in the compiler in HardwareIntrinsicHelpers.cs + +#if defined(HOST_X86) || defined(HOST_AMD64) +// Should match the constants defined in the compiler in HardwareIntrinsicHelpers.Aot.cs enum XArchIntrinsicConstants { XArchIntrinsicConstants_Aes = 0x0001, @@ -173,8 +175,12 @@ enum XArchIntrinsicConstants XArchIntrinsicConstants_Lzcnt = 0x1000, }; +#endif + bool DetectCPUFeatures() { +#if defined(HOST_X86) || defined(HOST_AMD64) || defined(HOST_ARM64) + #if defined(HOST_X86) || defined(HOST_AMD64) unsigned char buffer[16]; @@ -293,12 +299,17 @@ bool DetectCPUFeatures() g_cpuFeatures |= XArchIntrinsicConstants_Lzcnt; } } +#endif // HOST_X86 || HOST_AMD64 + +#if defined(HOST_ARM64) + PAL_GetCpuCapabilityFlags (&g_cpuFeatures); +#endif if ((g_cpuFeatures & g_requiredCpuFeatures) != g_requiredCpuFeatures) { return false; } -#endif // HOST_X86 || HOST_AMD64 +#endif // HOST_X86 || HOST_AMD64 || HOST_ARM64 return true; } diff --git a/src/Native/Runtime/unix/UnixContext.cpp b/src/Native/Runtime/unix/UnixContext.cpp index 458214bbe56..08b98ef6365 100644 --- a/src/Native/Runtime/unix/UnixContext.cpp +++ b/src/Native/Runtime/unix/UnixContext.cpp @@ -301,7 +301,7 @@ bool GetUnwindProcInfo(PCODE ip, unw_proc_info_t *procInfo) #elif HOST_ARM ((uint32_t*)(unwContext.data))[15] = ip; #elif HOST_ARM64 - ((uint32_t*)(unwContext.data))[32] = ip; + unwContext.data[32] = ip; #elif HOST_WASM ASSERT(false); #elif HOST_X86 @@ -618,7 +618,7 @@ bool FindProcInfo(UIntNative controlPC, UIntNative* startAddress, UIntNative* ls assert((procInfo.start_ip <= controlPC) && (controlPC < procInfo.end_ip)); -#if defined(HOST_ARM) || defined(HOST_ARM64) +#if defined(HOST_ARM) // libunwind fills by reference not by value for ARM *lsda = *((UIntNative *)procInfo.lsda); #else diff --git a/src/Native/Runtime/unix/UnixNativeCodeManager.cpp b/src/Native/Runtime/unix/UnixNativeCodeManager.cpp index e03a85e3205..a2639f6078c 100644 --- a/src/Native/Runtime/unix/UnixNativeCodeManager.cpp +++ b/src/Native/Runtime/unix/UnixNativeCodeManager.cpp @@ -388,8 +388,9 @@ bool UnixNativeCodeManager::EHEnumNext(EHEnumState * pEHEnumState, EHClause * pE { // @TODO: CORERT: Compress EHInfo using type table index scheme // https://github.com/dotnet/corert/issues/972 - Int32 typeRelAddr = *((PTR_Int32&)pEnumState->pEHInfo)++; + Int32 typeRelAddr = *((PTR_Int32&)pEnumState->pEHInfo); pEHClauseOut->m_pTargetType = dac_cast(pEnumState->pEHInfo + typeRelAddr); + pEnumState->pEHInfo += 4; } break; case EH_CLAUSE_FAULT: diff --git a/src/Native/Runtime/unix/UnwindHelpers.cpp b/src/Native/Runtime/unix/UnwindHelpers.cpp index ced22cc272c..9ed75c6a1d7 100644 --- a/src/Native/Runtime/unix/UnwindHelpers.cpp +++ b/src/Native/Runtime/unix/UnwindHelpers.cpp @@ -475,229 +475,284 @@ void Registers_arm_rt::setRegister(int num, uint32_t value, uint32_t location) #if defined(TARGET_ARM64) -class Registers_arm64_rt: public libunwind::Registers_arm64 { -public: - Registers_arm64_rt() { abort(); }; - Registers_arm64_rt(const void *registers); +// Shim that implements methods required by libunwind over REGDISPLAY +struct Registers_REGDISPLAY : REGDISPLAY +{ + inline static int getArch() { return libunwind::REGISTERS_ARM64; } + inline static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM64; } + + bool validRegister(int num) const; + bool validFloatRegister(int num) { return false; }; + bool validVectorRegister(int num) const; - bool validRegister(int num) {abort();}; uint64_t getRegister(int num) const; void setRegister(int num, uint64_t value, uint64_t location); - bool validFloatRegister(int num) {abort();}; + double getFloatRegister(int num) {abort();} void setFloatRegister(int num, double value) {abort();} - bool validVectorRegister(int num) const {abort();} - libunwind::v128 getVectorRegister(int num) const {abort();}; - void setVectorRegister(int num, libunwind::v128 value) {abort();}; - void jumpto() { abort();}; + + libunwind::v128 getVectorRegister(int num) const; + void setVectorRegister(int num, libunwind::v128 value); - uint64_t getSP() const { return regs->SP;} - void setSP(uint64_t value, uint64_t location) { regs->SP = value;} - uint64_t getIP() const { return regs->IP;} + uint64_t getSP() const { return SP;} + void setSP(uint64_t value, uint64_t location) { SP = value;} + uint64_t getIP() const { return IP;} void setIP(uint64_t value, uint64_t location) - { regs->IP = value; regs->pIP = (PTR_UIntNative)location; } - void saveVFPAsX() {abort();}; -private: - REGDISPLAY *regs; + { IP = value; pIP = (PTR_UIntNative)location; } }; -inline Registers_arm64_rt::Registers_arm64_rt(const void *registers) { - regs = (REGDISPLAY *)registers; +inline bool Registers_REGDISPLAY::validRegister(int num) const { + if (num == UNW_REG_SP || num == UNW_ARM64_SP) + return true; + + if (num == UNW_ARM64_FP) + return true; + + if (num == UNW_ARM64_LR) + return true; + + if (num == UNW_REG_IP) + return true; + + if (num >= UNW_ARM64_X0 && num <= UNW_ARM64_X28) + return true; + + return false; +} + +bool Registers_REGDISPLAY::validVectorRegister(int num) const +{ + if (num >= UNW_ARM64_D8 && num <= UNW_ARM64_D15) + return true; + + return false; } -inline uint64_t Registers_arm64_rt::getRegister(int regNum) const { +inline uint64_t Registers_REGDISPLAY::getRegister(int regNum) const { if (regNum == UNW_REG_SP || regNum == UNW_ARM64_SP) - return regs->SP; + return SP; + + if (regNum == UNW_ARM64_FP) + return *pFP; if (regNum == UNW_ARM64_LR) - return *regs->pLR; + return *pLR; if (regNum == UNW_REG_IP) - return regs->IP; + return IP; switch (regNum) { case (UNW_ARM64_X0): - return *regs->pX0; + return *pX0; case (UNW_ARM64_X1): - return *regs->pX1; + return *pX1; case (UNW_ARM64_X2): - return *regs->pX2; + return *pX2; case (UNW_ARM64_X3): - return *regs->pX3; + return *pX3; case (UNW_ARM64_X4): - return *regs->pX4; + return *pX4; case (UNW_ARM64_X5): - return *regs->pX5; + return *pX5; case (UNW_ARM64_X6): - return *regs->pX6; + return *pX6; case (UNW_ARM64_X7): - return *regs->pX7; + return *pX7; case (UNW_ARM64_X8): - return *regs->pX8; + return *pX8; case (UNW_ARM64_X9): - return *regs->pX9; + return *pX9; case (UNW_ARM64_X10): - return *regs->pX10; + return *pX10; case (UNW_ARM64_X11): - return *regs->pX11; + return *pX11; case (UNW_ARM64_X12): - return *regs->pX12; + return *pX12; case (UNW_ARM64_X13): - return *regs->pX13; + return *pX13; case (UNW_ARM64_X14): - return *regs->pX14; + return *pX14; case (UNW_ARM64_X15): - return *regs->pX15; + return *pX15; case (UNW_ARM64_X16): - return *regs->pX16; + return *pX16; case (UNW_ARM64_X17): - return *regs->pX17; + return *pX17; case (UNW_ARM64_X18): - return *regs->pX18; + return *pX18; case (UNW_ARM64_X19): - return *regs->pX19; + return *pX19; case (UNW_ARM64_X20): - return *regs->pX20; + return *pX20; case (UNW_ARM64_X21): - return *regs->pX21; + return *pX21; case (UNW_ARM64_X22): - return *regs->pX22; + return *pX22; case (UNW_ARM64_X23): - return *regs->pX23; + return *pX23; case (UNW_ARM64_X24): - return *regs->pX24; + return *pX24; case (UNW_ARM64_X25): - return *regs->pX25; + return *pX25; case (UNW_ARM64_X26): - return *regs->pX26; + return *pX26; case (UNW_ARM64_X27): - return *regs->pX27; + return *pX27; case (UNW_ARM64_X28): - return *regs->pX28; + return *pX28; } PORTABILITY_ASSERT("unsupported arm64 register"); } -void Registers_arm64_rt::setRegister(int num, uint64_t value, uint64_t location) +void Registers_REGDISPLAY::setRegister(int num, uint64_t value, uint64_t location) { - if (num == UNW_REG_SP || num == UNW_ARM64_SP) { - regs->SP = (UIntNative )value; + SP = (UIntNative )value; + return; + } + + if (num == UNW_ARM64_FP) { + pFP = (PTR_UIntNative)location; return; } if (num == UNW_ARM64_LR) { - regs->pLR = (PTR_UIntNative)location; + pLR = (PTR_UIntNative)location; return; } if (num == UNW_REG_IP) { - regs->IP = value; - /* the location could be NULL, we could try to recover - pointer to value in stack from pLR */ - if ((!location) && (regs->pLR) && (*regs->pLR == value)) - regs->pIP = regs->pLR; - else - regs->pIP = (PTR_UIntNative)location; + IP = value; return; } switch (num) { case (UNW_ARM64_X0): - regs->pX0 = (PTR_UIntNative)location; + pX0 = (PTR_UIntNative)location; break; case (UNW_ARM64_X1): - regs->pX1 = (PTR_UIntNative)location; + pX1 = (PTR_UIntNative)location; break; case (UNW_ARM64_X2): - regs->pX2 = (PTR_UIntNative)location; + pX2 = (PTR_UIntNative)location; break; case (UNW_ARM64_X3): - regs->pX3 = (PTR_UIntNative)location; + pX3 = (PTR_UIntNative)location; break; case (UNW_ARM64_X4): - regs->pX4 = (PTR_UIntNative)location; + pX4 = (PTR_UIntNative)location; break; case (UNW_ARM64_X5): - regs->pX5 = (PTR_UIntNative)location; + pX5 = (PTR_UIntNative)location; break; case (UNW_ARM64_X6): - regs->pX6 = (PTR_UIntNative)location; + pX6 = (PTR_UIntNative)location; break; case (UNW_ARM64_X7): - regs->pX7 = (PTR_UIntNative)location; + pX7 = (PTR_UIntNative)location; break; case (UNW_ARM64_X8): - regs->pX8 = (PTR_UIntNative)location; + pX8 = (PTR_UIntNative)location; break; case (UNW_ARM64_X9): - regs->pX9 = (PTR_UIntNative)location; + pX9 = (PTR_UIntNative)location; break; case (UNW_ARM64_X10): - regs->pX10 = (PTR_UIntNative)location; + pX10 = (PTR_UIntNative)location; break; case (UNW_ARM64_X11): - regs->pX11 = (PTR_UIntNative)location; + pX11 = (PTR_UIntNative)location; break; case (UNW_ARM64_X12): - regs->pX12 = (PTR_UIntNative)location; + pX12 = (PTR_UIntNative)location; break; case (UNW_ARM64_X13): - regs->pX13 = (PTR_UIntNative)location; + pX13 = (PTR_UIntNative)location; break; case (UNW_ARM64_X14): - regs->pX14 = (PTR_UIntNative)location; + pX14 = (PTR_UIntNative)location; break; case (UNW_ARM64_X15): - regs->pX15 = (PTR_UIntNative)location; + pX15 = (PTR_UIntNative)location; break; case (UNW_ARM64_X16): - regs->pX16 = (PTR_UIntNative)location; + pX16 = (PTR_UIntNative)location; break; case (UNW_ARM64_X17): - regs->pX17 = (PTR_UIntNative)location; + pX17 = (PTR_UIntNative)location; break; case (UNW_ARM64_X18): - regs->pX18 = (PTR_UIntNative)location; + pX18 = (PTR_UIntNative)location; break; case (UNW_ARM64_X19): - regs->pX19 = (PTR_UIntNative)location; + pX19 = (PTR_UIntNative)location; break; case (UNW_ARM64_X20): - regs->pX20 = (PTR_UIntNative)location; + pX20 = (PTR_UIntNative)location; break; case (UNW_ARM64_X21): - regs->pX21 = (PTR_UIntNative)location; + pX21 = (PTR_UIntNative)location; break; case (UNW_ARM64_X22): - regs->pX22 = (PTR_UIntNative)location; + pX22 = (PTR_UIntNative)location; break; case (UNW_ARM64_X23): - regs->pX23 = (PTR_UIntNative)location; + pX23 = (PTR_UIntNative)location; break; case (UNW_ARM64_X24): - regs->pX24 = (PTR_UIntNative)location; + pX24 = (PTR_UIntNative)location; break; case (UNW_ARM64_X25): - regs->pX25 = (PTR_UIntNative)location; + pX25 = (PTR_UIntNative)location; break; case (UNW_ARM64_X26): - regs->pX26 = (PTR_UIntNative)location; + pX26 = (PTR_UIntNative)location; break; case (UNW_ARM64_X27): - regs->pX27 = (PTR_UIntNative)location; + pX27 = (PTR_UIntNative)location; break; case (UNW_ARM64_X28): - regs->pX28 = (PTR_UIntNative)location; + pX28 = (PTR_UIntNative)location; break; default: PORTABILITY_ASSERT("unsupported arm64 register"); } } +libunwind::v128 Registers_REGDISPLAY::getVectorRegister(int num) const +{ + num -= UNW_ARM64_D8; + + if (num < 0 || num >= sizeof(D) / sizeof(UInt64)) + { + PORTABILITY_ASSERT("unsupported arm64 vector register"); + } + + libunwind::v128 result; + + result.vec[0] = 0; + result.vec[1] = 0; + result.vec[2] = D[num] >> 32; + result.vec[3] = D[num] & 0xFFFFFFFF; + + return result; +} + +void Registers_REGDISPLAY::setVectorRegister(int num, libunwind::v128 value) +{ + num -= UNW_ARM64_D8; + + if (num < 0 || num >= sizeof(D) / sizeof(UInt64)) + { + PORTABILITY_ASSERT("unsupported arm64 vector register"); + } + + D[num] = (UInt64)value.vec[2] << 32 | (UInt64)value.vec[3]; +} + #endif // TARGET_ARM64 bool DoTheStep(uintptr_t pc, UnwindInfoSections uwInfoSections, REGDISPLAY *regs) @@ -707,7 +762,7 @@ bool DoTheStep(uintptr_t pc, UnwindInfoSections uwInfoSections, REGDISPLAY *regs #elif defined(TARGET_ARM) libunwind::UnwindCursor uc(_addressSpace, regs); #elif defined(TARGET_ARM64) - libunwind::UnwindCursor uc(_addressSpace, regs); + libunwind::UnwindCursor uc(_addressSpace, regs); #elif defined(HOST_X86) libunwind::UnwindCursor uc(_addressSpace, regs); #else @@ -724,10 +779,7 @@ bool DoTheStep(uintptr_t pc, UnwindInfoSections uwInfoSections, REGDISPLAY *regs unw_proc_info_t procInfo; uc.getInfo(&procInfo); -#if defined(TARGET_ARM64) - DwarfInstructions dwarfInst; - int stepRet = dwarfInst.stepWithDwarf(_addressSpace, pc, procInfo.unwind_info, *(Registers_arm64_rt*)regs); -#elif defined(TARGET_ARM) +#if defined(TARGET_ARM) DwarfInstructions dwarfInst; int stepRet = dwarfInst.stepWithDwarf(_addressSpace, pc, procInfo.unwind_info, *(Registers_arm_rt*)regs); #else @@ -740,7 +792,12 @@ bool DoTheStep(uintptr_t pc, UnwindInfoSections uwInfoSections, REGDISPLAY *regs return false; } +#if defined(TARGET_ARM64) + regs->SetAddrOfIP(regs->pLR); +#else regs->pIP = PTR_PCODE(regs->SP - sizeof(TADDR)); +#endif + #elif defined(_LIBUNWIND_ARM_EHABI) uc.setInfoBasedOnIPRegister(true); int stepRet = uc.step(); diff --git a/src/Native/Runtime/unix/unixasmmacrosarm64.inc b/src/Native/Runtime/unix/unixasmmacrosarm64.inc index d031a77085e..3e3bbdbba86 100644 --- a/src/Native/Runtime/unix/unixasmmacrosarm64.inc +++ b/src/Native/Runtime/unix/unixasmmacrosarm64.inc @@ -1,6 +1,8 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +#include "AsmOffsets.inc" + .macro NESTED_ENTRY Name, Section, Handler LEAF_ENTRY \Name, \Section .ifnc \Handler, NoHandler @@ -69,6 +71,11 @@ C_FUNC(\Name): .endif .endm +.macro PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED reg1, reg2, ofs + stp \reg1, \reg2, [sp, \ofs]! +.endm + + .macro EPILOG_RESTORE_REG reg, ofs ldr \reg, [sp, \ofs] .endm @@ -137,3 +144,109 @@ C_FUNC(\Name): br \reg .endm + +#define xip0 x16 +#define xip1 x17 +#define xpr x18 + +.macro INLINE_GET_TLS_VAR target, var + mrs \target, tpidr_el0 + add \target, \target, #:tprel_hi12:\var, lsl #12 + add \target, \target, #:tprel_lo12_nc:\var +.endm + + +.macro PREPARE_INLINE_GETTHREAD +.global tls_CurrentThread +.endm + +.macro INLINE_GETTHREAD target + INLINE_GET_TLS_VAR \target, tls_CurrentThread +.endm + +.macro INLINE_THREAD_UNHIJACK threadReg, trashReg1, trashReg2 + // + // Thread::Unhijack() + // + ldr \trashReg1, [\threadReg, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + cbz \trashReg1, 0f + + ldr \trashReg2, [\threadReg, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + str \trashReg1, [\trashReg2] + str xzr, [\threadReg, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + str xzr, [\threadReg, #OFFSETOF__Thread__m_pvHijackedReturnAddress] +0: +.endm + + +.macro EXPORT_POINTER_TO_ADDRESS Name + +1: + + .data + .align 8 +C_FUNC(\Name): + .word 1b + .global C_FUNC(\Name) + .text +.endm + +// Note: these must match the defs in PInvokeTransitionFrameFlags +PTFF_SAVE_SP = 0x00000400 +PTFF_SAVE_ALL_PRESERVED = 0x000003FF // NOTE: x19-x28 + +DEFAULT_FRAME_SAVE_FLAGS = PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_SP + +.macro PUSH_COOP_PINVOKE_FRAME trashReg + + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-0x80 // Push down stack pointer and store FP and LR + + // 0x10 bytes reserved for Thread* and flags + + // Save callee saved registers + PROLOG_SAVE_REG_PAIR x19, x20, #0x20 + PROLOG_SAVE_REG_PAIR x21, x22, #0x30 + PROLOG_SAVE_REG_PAIR x23, x24, #0x40 + PROLOG_SAVE_REG_PAIR x25, x26, #0x50 + PROLOG_SAVE_REG_PAIR x27, x28, #0x60 + + // Save the value of SP before stack allocation to the last slot in the frame (slot #15) + add \trashReg, sp, #0x80 + str \trashReg, [sp, #0x70] + + // Record the bitmask of saved registers in the frame (slot #3) + mov \trashReg, #DEFAULT_FRAME_SAVE_FLAGS + str \trashReg, [sp, #0x18] + + mov \trashReg, sp +.endm + +// Pop the frame and restore register state preserved by PUSH_COOP_PINVOKE_FRAME +.macro POP_COOP_PINVOKE_FRAME + + EPILOG_RESTORE_REG_PAIR x19, x20, #0x20 + EPILOG_RESTORE_REG_PAIR x21, x22, #0x30 + EPILOG_RESTORE_REG_PAIR x23, x24, #0x40 + EPILOG_RESTORE_REG_PAIR x25, x26, #0x50 + EPILOG_RESTORE_REG_PAIR x27, x28, #0x60 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #0x80 +.endm + +// +// CONSTANTS -- INTEGER +// +#define TSF_Attached 0x01 +#define TSF_SuppressGcStress 0x08 +#define TSF_DoNotTriggerGc 0x10 + +// Bit position for the flags above, to be used with tbz / tbnz instructions +TrapThreadsFlags_AbortInProgress_Bit = 0 +TrapThreadsFlags_TrapThreads_Bit = 1 + +// This must match HwExceptionCode.STATUS_REDHAWK_THREAD_ABORT +#define STATUS_REDHAWK_THREAD_ABORT 0x43 + +// These must match the TrapThreadsFlags enum +#define TrapThreadsFlags_None 0 +#define TrapThreadsFlags_AbortInProgress 1 +#define TrapThreadsFlags_TrapThreads 2 diff --git a/src/Native/libunwind/src/AddressSpace.hpp b/src/Native/libunwind/src/AddressSpace.hpp index fb07c807db9..389be0a5066 100644 --- a/src/Native/libunwind/src/AddressSpace.hpp +++ b/src/Native/libunwind/src/AddressSpace.hpp @@ -117,12 +117,23 @@ namespace libunwind { // __eh_frame_hdr_start = SIZEOF(.eh_frame_hdr) > 0 ? ADDR(.eh_frame_hdr) : 0; // __eh_frame_hdr_end = SIZEOF(.eh_frame_hdr) > 0 ? . : 0; +#ifndef _LIBUNWIND_USE_ONLY_DWARF_INDEX extern char __eh_frame_start; extern char __eh_frame_end; +#endif #if defined(_LIBUNWIND_SUPPORT_DWARF_INDEX) -extern char __eh_frame_hdr_start; -extern char __eh_frame_hdr_end; + +#ifndef _LIBUNWIND_BAREMETAL_DWARF_INDEX_SEC_START +#define _LIBUNWIND_BAREMETAL_DWARF_INDEX_SEC_START __eh_frame_hdr_start +#endif + +#ifndef _LIBUNWIND_BAREMETAL_DWARF_INDEX_SEC_END +#define _LIBUNWIND_BAREMETAL_DWARF_INDEX_SEC_END __eh_frame_hdr_end +#endif + +extern char _LIBUNWIND_BAREMETAL_DWARF_INDEX_SEC_START; +extern char _LIBUNWIND_BAREMETAL_DWARF_INDEX_SEC_END; #endif #elif defined(_LIBUNWIND_ARM_EHABI) && defined(_LIBUNWIND_IS_BAREMETAL) @@ -161,9 +172,11 @@ struct UnwindInfoSections { uintptr_t dso_base; #endif #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) +#ifndef _LIBUNWIND_USE_ONLY_DWARF_INDEX uintptr_t dwarf_section; uintptr_t dwarf_section_length; #endif +#endif #if defined(_LIBUNWIND_SUPPORT_DWARF_INDEX) uintptr_t dwarf_index_section; uintptr_t dwarf_index_section_length; @@ -401,18 +414,31 @@ inline bool LocalAddressSpace::findUnwindSections(pint_t targetAddr, } #elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_LIBUNWIND_IS_BAREMETAL) // Bare metal is statically linked, so no need to ask the dynamic loader +#ifndef _LIBUNWIND_USE_ONLY_DWARF_INDEX info.dwarf_section_length = (uintptr_t)(&__eh_frame_end - &__eh_frame_start); - info.dwarf_section = (uintptr_t)(&__eh_frame_start); + info.dwarf_section = (uintptr_t)(&__eh_frame_start); _LIBUNWIND_TRACE_UNWINDING("findUnwindSections: section %p length %p", - (void *)info.dwarf_section, (void *)info.dwarf_section_length); + (void *)info.dwarf_section, + (void *)info.dwarf_section_length); +#endif #if defined(_LIBUNWIND_SUPPORT_DWARF_INDEX) - info.dwarf_index_section = (uintptr_t)(&__eh_frame_hdr_start); - info.dwarf_index_section_length = (uintptr_t)(&__eh_frame_hdr_end - &__eh_frame_hdr_start); + info.dwarf_index_section = + (uintptr_t)(&(_LIBUNWIND_BAREMETAL_DWARF_INDEX_SEC_START)); + info.dwarf_index_section_length = + (uintptr_t)(&(_LIBUNWIND_BAREMETAL_DWARF_INDEX_SEC_END) - + &(_LIBUNWIND_BAREMETAL_DWARF_INDEX_SEC_START)); _LIBUNWIND_TRACE_UNWINDING("findUnwindSections: index section %p length %p", - (void *)info.dwarf_index_section, (void *)info.dwarf_index_section_length); + (void *)info.dwarf_index_section, + (void *)info.dwarf_index_section_length); #endif + +#ifndef _LIBUNWIND_USE_ONLY_DWARF_INDEX if (info.dwarf_section_length) return true; +#else + if (info.dwarf_index_section_length > 0) + return true; +#endif #elif defined(_LIBUNWIND_ARM_EHABI) && defined(_LIBUNWIND_IS_BAREMETAL) // Bare metal is statically linked, so no need to ask the dynamic loader info.arm_section = (uintptr_t)(&__exidx_start); diff --git a/src/Native/libunwind/src/DwarfInstructions.hpp b/src/Native/libunwind/src/DwarfInstructions.hpp index c5cc6c9d510..f341772824c 100644 --- a/src/Native/libunwind/src/DwarfInstructions.hpp +++ b/src/Native/libunwind/src/DwarfInstructions.hpp @@ -169,6 +169,7 @@ int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, // restore registers that DWARF says were saved R newRegisters = registers; pint_t returnAddress = 0; + pint_t returnAddressLocation = 0; const int lastReg = R::lastDwarfRegNum(); assert(static_cast(CFI_Parser::kMaxRegisterNumber) >= lastReg && "register range too large"); @@ -177,7 +178,14 @@ int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, for (int i = 0; i <= lastReg; ++i) { if (prolog.savedRegisters[i].location != CFI_Parser::kRegisterUnused) { - if (registers.validFloatRegister(i)) + if (i == (int)cieInfo.returnAddressRegister) { + returnAddress = getSavedRegister(addressSpace, registers, cfa, + prolog.savedRegisters[i], + returnAddressLocation); + + newRegisters.setRegister(i, returnAddress, returnAddressLocation); + } + else if (registers.validFloatRegister(i)) newRegisters.setFloatRegister( i, getSavedFloatRegister(addressSpace, registers, cfa, prolog.savedRegisters[i])); @@ -185,12 +193,6 @@ int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, newRegisters.setVectorRegister( i, getSavedVectorRegister(addressSpace, registers, cfa, prolog.savedRegisters[i])); - else if (i == (int)cieInfo.returnAddressRegister) { - pint_t dummyLocation; - returnAddress = getSavedRegister(addressSpace, registers, cfa, - prolog.savedRegisters[i], - dummyLocation); - } else if (registers.validRegister(i)) { pint_t value; pint_t location; @@ -272,7 +274,7 @@ int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, // Return address is address after call site instruction, so setting IP to // that does simualates a return. - newRegisters.setIP(returnAddress, 0); + newRegisters.setIP(returnAddress, returnAddressLocation); // Simulate the step by replacing the register set with the new ones. registers = newRegisters; diff --git a/src/Native/libunwind/src/UnwindCursor.hpp b/src/Native/libunwind/src/UnwindCursor.hpp index ae5cbe7479e..29c3dc7733a 100644 --- a/src/Native/libunwind/src/UnwindCursor.hpp +++ b/src/Native/libunwind/src/UnwindCursor.hpp @@ -1474,6 +1474,8 @@ bool UnwindCursor::getInfoFromDwarfSection(pint_t pc, typename CFI_Parser::CIE_Info cieInfo; bool foundFDE = false; bool foundInCache = false; + +#ifndef _LIBUNWIND_USE_ONLY_DWARF_INDEX // If compact encoding table gave offset into dwarf section, go directly there if (fdeSectionOffsetHint != 0) { foundFDE = CFI_Parser::findFDE(_addressSpace, pc, sects.dwarf_section, @@ -1481,6 +1483,8 @@ bool UnwindCursor::getInfoFromDwarfSection(pint_t pc, sects.dwarf_section + fdeSectionOffsetHint, &fdeInfo, &cieInfo); } +#endif + #if defined(_LIBUNWIND_SUPPORT_DWARF_INDEX) if (!foundFDE && (sects.dwarf_index_section != 0)) { foundFDE = EHHeaderParser::findFDE( @@ -1488,6 +1492,8 @@ bool UnwindCursor::getInfoFromDwarfSection(pint_t pc, (uint32_t)sects.dwarf_index_section_length, &fdeInfo, &cieInfo); } #endif + +#ifndef _LIBUNWIND_USE_ONLY_DWARF_INDEX if (!foundFDE) { // otherwise, search cache of previously found FDEs. pint_t cachedFDE = DwarfFDECache::findFDE(sects.dso_base, pc); @@ -1505,6 +1511,7 @@ bool UnwindCursor::getInfoFromDwarfSection(pint_t pc, (uint32_t)sects.dwarf_section_length, 0, &fdeInfo, &cieInfo); } +#endif if (foundFDE) { typename CFI_Parser::PrologInfo prolog; if (CFI_Parser::parseFDEInstructions(_addressSpace, fdeInfo, cieInfo, pc, @@ -1896,7 +1903,11 @@ void UnwindCursor::setInfoBasedOnIPRegister(bool isReturnAddress) { #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) // If there is dwarf unwind info, look there next. +#if defined(_LIBUNWIND_USE_ONLY_DWARF_INDEX) + if (sects.dwarf_index_section != 0) { +#else if (sects.dwarf_section != 0) { +#endif if (this->getInfoFromDwarfSection(pc, sects)) { // found info in dwarf, done return; From d5766e0724af13f40fe8f28304db996172470dca Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Thu, 20 Aug 2020 07:49:07 +0200 Subject: [PATCH 23/36] ARM64 formating --- .../src/Compiler/DependencyAnalysis/ObjectWriter.cs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectWriter.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectWriter.cs index 21981bcfe61..2a31cd0975c 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectWriter.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectWriter.cs @@ -857,8 +857,6 @@ public void EmitSymbolDefinition(int currentOffset) { foreach (var name in nodes) { - - _sb.Clear(); AppendExternCPrefix(_sb); name.AppendMangledName(_nodeFactory.NameMangler, _sb); From c0afb01191ef9384f85bcb36579d8b9ed2da1958 Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Thu, 20 Aug 2020 07:51:23 +0200 Subject: [PATCH 24/36] ARM64: revert using alternative JIT --- src/JitInterface/src/CorInfoImpl.cs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/JitInterface/src/CorInfoImpl.cs b/src/JitInterface/src/CorInfoImpl.cs index ce05c5aecfd..0683c14359e 100644 --- a/src/JitInterface/src/CorInfoImpl.cs +++ b/src/JitInterface/src/CorInfoImpl.cs @@ -51,8 +51,7 @@ private enum CFI_OPCODE CFI_DEF_CFA // Take address from register and add offset to it. }; - //internal const string JitLibrary = "clrjitilc"; - internal const string JitLibrary = "protononjit"; + internal const string JitLibrary = "clrjitilc"; #if SUPPORT_JIT private const string JitSupportLibrary = "*"; From 2bf04e11bef47405266d4f99f9213c6a998e039d Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Thu, 20 Aug 2020 07:59:20 +0200 Subject: [PATCH 25/36] Arm64 (#5) * CoreRT-ARM64: Helper node asm code generation * CoreRT ARM64: Add additional relocation support * CoreRT ARM64: Use alternative functions for write Barriers On ARM the Jit uses a special register interface for to call the write barrier functions. But as the regular once still used in other places we need two different implemenations. The helper nodes need to call the custom variants * ARM64 code generation * unwind use only the dwarf index option * ARM64 runtime assembler functions * ARM64 unix stack walk * ARM64 Exception Handling * ARM64: Fix EH data decodeing * Move nattive layout signature from read only to data section on non Windows target * Fix exception handling macro call * ARM64 thunk helpers * ARM64 process cfi data for elf * ARM64 missing assembler macro * ARM64 port more assembler helpers * ARM64: Add missing case for the gemeric helper node * ARM64 intrinsic support * ARM64 object writer for ELF object files * ARM64 llvm patches for object writer * ARM64 include untested helper code with debug break * ARM64 reenable scanning fail exception --- .../src/Compiler/DependencyAnalysis/ObjectWriter.cs | 2 -- src/JitInterface/src/CorInfoImpl.cs | 3 +-- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectWriter.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectWriter.cs index 21981bcfe61..2a31cd0975c 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectWriter.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectWriter.cs @@ -857,8 +857,6 @@ public void EmitSymbolDefinition(int currentOffset) { foreach (var name in nodes) { - - _sb.Clear(); AppendExternCPrefix(_sb); name.AppendMangledName(_nodeFactory.NameMangler, _sb); diff --git a/src/JitInterface/src/CorInfoImpl.cs b/src/JitInterface/src/CorInfoImpl.cs index ffef463bfaf..cb822538581 100644 --- a/src/JitInterface/src/CorInfoImpl.cs +++ b/src/JitInterface/src/CorInfoImpl.cs @@ -51,8 +51,7 @@ private enum CFI_OPCODE CFI_DEF_CFA // Take address from register and add offset to it. }; - //internal const string JitLibrary = "clrjitilc"; - internal const string JitLibrary = "protononjit"; + internal const string JitLibrary = "clrjitilc"; #if SUPPORT_JIT private const string JitSupportLibrary = "*"; From c68cea102a07107a1dd3570b35fcc51ec5248a5a Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Thu, 20 Aug 2020 09:40:44 +0200 Subject: [PATCH 26/36] Revert "ARM64 unix stack walk" This reverts commit 6c9e2354a1b2afa9d378c88f01baccfbc53ef804. --- src/Native/Runtime/unix/UnixContext.cpp | 4 +- src/Native/Runtime/unix/UnwindHelpers.cpp | 251 +++++++----------- .../libunwind/src/DwarfInstructions.hpp | 18 +- 3 files changed, 107 insertions(+), 166 deletions(-) diff --git a/src/Native/Runtime/unix/UnixContext.cpp b/src/Native/Runtime/unix/UnixContext.cpp index 08b98ef6365..458214bbe56 100644 --- a/src/Native/Runtime/unix/UnixContext.cpp +++ b/src/Native/Runtime/unix/UnixContext.cpp @@ -301,7 +301,7 @@ bool GetUnwindProcInfo(PCODE ip, unw_proc_info_t *procInfo) #elif HOST_ARM ((uint32_t*)(unwContext.data))[15] = ip; #elif HOST_ARM64 - unwContext.data[32] = ip; + ((uint32_t*)(unwContext.data))[32] = ip; #elif HOST_WASM ASSERT(false); #elif HOST_X86 @@ -618,7 +618,7 @@ bool FindProcInfo(UIntNative controlPC, UIntNative* startAddress, UIntNative* ls assert((procInfo.start_ip <= controlPC) && (controlPC < procInfo.end_ip)); -#if defined(HOST_ARM) +#if defined(HOST_ARM) || defined(HOST_ARM64) // libunwind fills by reference not by value for ARM *lsda = *((UIntNative *)procInfo.lsda); #else diff --git a/src/Native/Runtime/unix/UnwindHelpers.cpp b/src/Native/Runtime/unix/UnwindHelpers.cpp index 9ed75c6a1d7..ced22cc272c 100644 --- a/src/Native/Runtime/unix/UnwindHelpers.cpp +++ b/src/Native/Runtime/unix/UnwindHelpers.cpp @@ -475,284 +475,229 @@ void Registers_arm_rt::setRegister(int num, uint32_t value, uint32_t location) #if defined(TARGET_ARM64) -// Shim that implements methods required by libunwind over REGDISPLAY -struct Registers_REGDISPLAY : REGDISPLAY -{ - inline static int getArch() { return libunwind::REGISTERS_ARM64; } - inline static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM64; } - - bool validRegister(int num) const; - bool validFloatRegister(int num) { return false; }; - bool validVectorRegister(int num) const; +class Registers_arm64_rt: public libunwind::Registers_arm64 { +public: + Registers_arm64_rt() { abort(); }; + Registers_arm64_rt(const void *registers); + bool validRegister(int num) {abort();}; uint64_t getRegister(int num) const; void setRegister(int num, uint64_t value, uint64_t location); - + bool validFloatRegister(int num) {abort();}; double getFloatRegister(int num) {abort();} void setFloatRegister(int num, double value) {abort();} - - libunwind::v128 getVectorRegister(int num) const; - void setVectorRegister(int num, libunwind::v128 value); + bool validVectorRegister(int num) const {abort();} + libunwind::v128 getVectorRegister(int num) const {abort();}; + void setVectorRegister(int num, libunwind::v128 value) {abort();}; + void jumpto() { abort();}; - uint64_t getSP() const { return SP;} - void setSP(uint64_t value, uint64_t location) { SP = value;} - uint64_t getIP() const { return IP;} + uint64_t getSP() const { return regs->SP;} + void setSP(uint64_t value, uint64_t location) { regs->SP = value;} + uint64_t getIP() const { return regs->IP;} void setIP(uint64_t value, uint64_t location) - { IP = value; pIP = (PTR_UIntNative)location; } + { regs->IP = value; regs->pIP = (PTR_UIntNative)location; } + void saveVFPAsX() {abort();}; +private: + REGDISPLAY *regs; }; -inline bool Registers_REGDISPLAY::validRegister(int num) const { - if (num == UNW_REG_SP || num == UNW_ARM64_SP) - return true; - - if (num == UNW_ARM64_FP) - return true; - - if (num == UNW_ARM64_LR) - return true; - - if (num == UNW_REG_IP) - return true; - - if (num >= UNW_ARM64_X0 && num <= UNW_ARM64_X28) - return true; - - return false; -} - -bool Registers_REGDISPLAY::validVectorRegister(int num) const -{ - if (num >= UNW_ARM64_D8 && num <= UNW_ARM64_D15) - return true; - - return false; +inline Registers_arm64_rt::Registers_arm64_rt(const void *registers) { + regs = (REGDISPLAY *)registers; } -inline uint64_t Registers_REGDISPLAY::getRegister(int regNum) const { +inline uint64_t Registers_arm64_rt::getRegister(int regNum) const { if (regNum == UNW_REG_SP || regNum == UNW_ARM64_SP) - return SP; - - if (regNum == UNW_ARM64_FP) - return *pFP; + return regs->SP; if (regNum == UNW_ARM64_LR) - return *pLR; + return *regs->pLR; if (regNum == UNW_REG_IP) - return IP; + return regs->IP; switch (regNum) { case (UNW_ARM64_X0): - return *pX0; + return *regs->pX0; case (UNW_ARM64_X1): - return *pX1; + return *regs->pX1; case (UNW_ARM64_X2): - return *pX2; + return *regs->pX2; case (UNW_ARM64_X3): - return *pX3; + return *regs->pX3; case (UNW_ARM64_X4): - return *pX4; + return *regs->pX4; case (UNW_ARM64_X5): - return *pX5; + return *regs->pX5; case (UNW_ARM64_X6): - return *pX6; + return *regs->pX6; case (UNW_ARM64_X7): - return *pX7; + return *regs->pX7; case (UNW_ARM64_X8): - return *pX8; + return *regs->pX8; case (UNW_ARM64_X9): - return *pX9; + return *regs->pX9; case (UNW_ARM64_X10): - return *pX10; + return *regs->pX10; case (UNW_ARM64_X11): - return *pX11; + return *regs->pX11; case (UNW_ARM64_X12): - return *pX12; + return *regs->pX12; case (UNW_ARM64_X13): - return *pX13; + return *regs->pX13; case (UNW_ARM64_X14): - return *pX14; + return *regs->pX14; case (UNW_ARM64_X15): - return *pX15; + return *regs->pX15; case (UNW_ARM64_X16): - return *pX16; + return *regs->pX16; case (UNW_ARM64_X17): - return *pX17; + return *regs->pX17; case (UNW_ARM64_X18): - return *pX18; + return *regs->pX18; case (UNW_ARM64_X19): - return *pX19; + return *regs->pX19; case (UNW_ARM64_X20): - return *pX20; + return *regs->pX20; case (UNW_ARM64_X21): - return *pX21; + return *regs->pX21; case (UNW_ARM64_X22): - return *pX22; + return *regs->pX22; case (UNW_ARM64_X23): - return *pX23; + return *regs->pX23; case (UNW_ARM64_X24): - return *pX24; + return *regs->pX24; case (UNW_ARM64_X25): - return *pX25; + return *regs->pX25; case (UNW_ARM64_X26): - return *pX26; + return *regs->pX26; case (UNW_ARM64_X27): - return *pX27; + return *regs->pX27; case (UNW_ARM64_X28): - return *pX28; + return *regs->pX28; } PORTABILITY_ASSERT("unsupported arm64 register"); } -void Registers_REGDISPLAY::setRegister(int num, uint64_t value, uint64_t location) +void Registers_arm64_rt::setRegister(int num, uint64_t value, uint64_t location) { - if (num == UNW_REG_SP || num == UNW_ARM64_SP) { - SP = (UIntNative )value; - return; - } - if (num == UNW_ARM64_FP) { - pFP = (PTR_UIntNative)location; + if (num == UNW_REG_SP || num == UNW_ARM64_SP) { + regs->SP = (UIntNative )value; return; } if (num == UNW_ARM64_LR) { - pLR = (PTR_UIntNative)location; + regs->pLR = (PTR_UIntNative)location; return; } if (num == UNW_REG_IP) { - IP = value; + regs->IP = value; + /* the location could be NULL, we could try to recover + pointer to value in stack from pLR */ + if ((!location) && (regs->pLR) && (*regs->pLR == value)) + regs->pIP = regs->pLR; + else + regs->pIP = (PTR_UIntNative)location; return; } switch (num) { case (UNW_ARM64_X0): - pX0 = (PTR_UIntNative)location; + regs->pX0 = (PTR_UIntNative)location; break; case (UNW_ARM64_X1): - pX1 = (PTR_UIntNative)location; + regs->pX1 = (PTR_UIntNative)location; break; case (UNW_ARM64_X2): - pX2 = (PTR_UIntNative)location; + regs->pX2 = (PTR_UIntNative)location; break; case (UNW_ARM64_X3): - pX3 = (PTR_UIntNative)location; + regs->pX3 = (PTR_UIntNative)location; break; case (UNW_ARM64_X4): - pX4 = (PTR_UIntNative)location; + regs->pX4 = (PTR_UIntNative)location; break; case (UNW_ARM64_X5): - pX5 = (PTR_UIntNative)location; + regs->pX5 = (PTR_UIntNative)location; break; case (UNW_ARM64_X6): - pX6 = (PTR_UIntNative)location; + regs->pX6 = (PTR_UIntNative)location; break; case (UNW_ARM64_X7): - pX7 = (PTR_UIntNative)location; + regs->pX7 = (PTR_UIntNative)location; break; case (UNW_ARM64_X8): - pX8 = (PTR_UIntNative)location; + regs->pX8 = (PTR_UIntNative)location; break; case (UNW_ARM64_X9): - pX9 = (PTR_UIntNative)location; + regs->pX9 = (PTR_UIntNative)location; break; case (UNW_ARM64_X10): - pX10 = (PTR_UIntNative)location; + regs->pX10 = (PTR_UIntNative)location; break; case (UNW_ARM64_X11): - pX11 = (PTR_UIntNative)location; + regs->pX11 = (PTR_UIntNative)location; break; case (UNW_ARM64_X12): - pX12 = (PTR_UIntNative)location; + regs->pX12 = (PTR_UIntNative)location; break; case (UNW_ARM64_X13): - pX13 = (PTR_UIntNative)location; + regs->pX13 = (PTR_UIntNative)location; break; case (UNW_ARM64_X14): - pX14 = (PTR_UIntNative)location; + regs->pX14 = (PTR_UIntNative)location; break; case (UNW_ARM64_X15): - pX15 = (PTR_UIntNative)location; + regs->pX15 = (PTR_UIntNative)location; break; case (UNW_ARM64_X16): - pX16 = (PTR_UIntNative)location; + regs->pX16 = (PTR_UIntNative)location; break; case (UNW_ARM64_X17): - pX17 = (PTR_UIntNative)location; + regs->pX17 = (PTR_UIntNative)location; break; case (UNW_ARM64_X18): - pX18 = (PTR_UIntNative)location; + regs->pX18 = (PTR_UIntNative)location; break; case (UNW_ARM64_X19): - pX19 = (PTR_UIntNative)location; + regs->pX19 = (PTR_UIntNative)location; break; case (UNW_ARM64_X20): - pX20 = (PTR_UIntNative)location; + regs->pX20 = (PTR_UIntNative)location; break; case (UNW_ARM64_X21): - pX21 = (PTR_UIntNative)location; + regs->pX21 = (PTR_UIntNative)location; break; case (UNW_ARM64_X22): - pX22 = (PTR_UIntNative)location; + regs->pX22 = (PTR_UIntNative)location; break; case (UNW_ARM64_X23): - pX23 = (PTR_UIntNative)location; + regs->pX23 = (PTR_UIntNative)location; break; case (UNW_ARM64_X24): - pX24 = (PTR_UIntNative)location; + regs->pX24 = (PTR_UIntNative)location; break; case (UNW_ARM64_X25): - pX25 = (PTR_UIntNative)location; + regs->pX25 = (PTR_UIntNative)location; break; case (UNW_ARM64_X26): - pX26 = (PTR_UIntNative)location; + regs->pX26 = (PTR_UIntNative)location; break; case (UNW_ARM64_X27): - pX27 = (PTR_UIntNative)location; + regs->pX27 = (PTR_UIntNative)location; break; case (UNW_ARM64_X28): - pX28 = (PTR_UIntNative)location; + regs->pX28 = (PTR_UIntNative)location; break; default: PORTABILITY_ASSERT("unsupported arm64 register"); } } -libunwind::v128 Registers_REGDISPLAY::getVectorRegister(int num) const -{ - num -= UNW_ARM64_D8; - - if (num < 0 || num >= sizeof(D) / sizeof(UInt64)) - { - PORTABILITY_ASSERT("unsupported arm64 vector register"); - } - - libunwind::v128 result; - - result.vec[0] = 0; - result.vec[1] = 0; - result.vec[2] = D[num] >> 32; - result.vec[3] = D[num] & 0xFFFFFFFF; - - return result; -} - -void Registers_REGDISPLAY::setVectorRegister(int num, libunwind::v128 value) -{ - num -= UNW_ARM64_D8; - - if (num < 0 || num >= sizeof(D) / sizeof(UInt64)) - { - PORTABILITY_ASSERT("unsupported arm64 vector register"); - } - - D[num] = (UInt64)value.vec[2] << 32 | (UInt64)value.vec[3]; -} - #endif // TARGET_ARM64 bool DoTheStep(uintptr_t pc, UnwindInfoSections uwInfoSections, REGDISPLAY *regs) @@ -762,7 +707,7 @@ bool DoTheStep(uintptr_t pc, UnwindInfoSections uwInfoSections, REGDISPLAY *regs #elif defined(TARGET_ARM) libunwind::UnwindCursor uc(_addressSpace, regs); #elif defined(TARGET_ARM64) - libunwind::UnwindCursor uc(_addressSpace, regs); + libunwind::UnwindCursor uc(_addressSpace, regs); #elif defined(HOST_X86) libunwind::UnwindCursor uc(_addressSpace, regs); #else @@ -779,7 +724,10 @@ bool DoTheStep(uintptr_t pc, UnwindInfoSections uwInfoSections, REGDISPLAY *regs unw_proc_info_t procInfo; uc.getInfo(&procInfo); -#if defined(TARGET_ARM) +#if defined(TARGET_ARM64) + DwarfInstructions dwarfInst; + int stepRet = dwarfInst.stepWithDwarf(_addressSpace, pc, procInfo.unwind_info, *(Registers_arm64_rt*)regs); +#elif defined(TARGET_ARM) DwarfInstructions dwarfInst; int stepRet = dwarfInst.stepWithDwarf(_addressSpace, pc, procInfo.unwind_info, *(Registers_arm_rt*)regs); #else @@ -792,12 +740,7 @@ bool DoTheStep(uintptr_t pc, UnwindInfoSections uwInfoSections, REGDISPLAY *regs return false; } -#if defined(TARGET_ARM64) - regs->SetAddrOfIP(regs->pLR); -#else regs->pIP = PTR_PCODE(regs->SP - sizeof(TADDR)); -#endif - #elif defined(_LIBUNWIND_ARM_EHABI) uc.setInfoBasedOnIPRegister(true); int stepRet = uc.step(); diff --git a/src/Native/libunwind/src/DwarfInstructions.hpp b/src/Native/libunwind/src/DwarfInstructions.hpp index f341772824c..c5cc6c9d510 100644 --- a/src/Native/libunwind/src/DwarfInstructions.hpp +++ b/src/Native/libunwind/src/DwarfInstructions.hpp @@ -169,7 +169,6 @@ int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, // restore registers that DWARF says were saved R newRegisters = registers; pint_t returnAddress = 0; - pint_t returnAddressLocation = 0; const int lastReg = R::lastDwarfRegNum(); assert(static_cast(CFI_Parser::kMaxRegisterNumber) >= lastReg && "register range too large"); @@ -178,14 +177,7 @@ int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, for (int i = 0; i <= lastReg; ++i) { if (prolog.savedRegisters[i].location != CFI_Parser::kRegisterUnused) { - if (i == (int)cieInfo.returnAddressRegister) { - returnAddress = getSavedRegister(addressSpace, registers, cfa, - prolog.savedRegisters[i], - returnAddressLocation); - - newRegisters.setRegister(i, returnAddress, returnAddressLocation); - } - else if (registers.validFloatRegister(i)) + if (registers.validFloatRegister(i)) newRegisters.setFloatRegister( i, getSavedFloatRegister(addressSpace, registers, cfa, prolog.savedRegisters[i])); @@ -193,6 +185,12 @@ int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, newRegisters.setVectorRegister( i, getSavedVectorRegister(addressSpace, registers, cfa, prolog.savedRegisters[i])); + else if (i == (int)cieInfo.returnAddressRegister) { + pint_t dummyLocation; + returnAddress = getSavedRegister(addressSpace, registers, cfa, + prolog.savedRegisters[i], + dummyLocation); + } else if (registers.validRegister(i)) { pint_t value; pint_t location; @@ -274,7 +272,7 @@ int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, // Return address is address after call site instruction, so setting IP to // that does simualates a return. - newRegisters.setIP(returnAddress, returnAddressLocation); + newRegisters.setIP(returnAddress, 0); // Simulate the step by replacing the register set with the new ones. registers = newRegisters; From bbecadca71460dff9a91330d0275a6eb172eb11c Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Thu, 20 Aug 2020 09:44:53 +0200 Subject: [PATCH 27/36] Arm64 (#6) * CoreRT-ARM64: Helper node asm code generation * CoreRT ARM64: Add additional relocation support * CoreRT ARM64: Use alternative functions for write Barriers On ARM the Jit uses a special register interface for to call the write barrier functions. But as the regular once still used in other places we need two different implemenations. The helper nodes need to call the custom variants * ARM64 code generation * unwind use only the dwarf index option * ARM64 runtime assembler functions * ARM64 unix stack walk * ARM64 Exception Handling * ARM64: Fix EH data decodeing * Move nattive layout signature from read only to data section on non Windows target * Fix exception handling macro call * ARM64 thunk helpers * ARM64 process cfi data for elf * ARM64 missing assembler macro * ARM64 port more assembler helpers * ARM64: Add missing case for the gemeric helper node * ARM64 intrinsic support * ARM64 object writer for ELF object files * ARM64 llvm patches for object writer * ARM64 include untested helper code with debug break * ARM64 reenable scanning fail exception * ARM64 formating * ARM64: revert using alternative JIT * Revert "ARM64 unix stack walk" This reverts commit 6c9e2354a1b2afa9d378c88f01baccfbc53ef804. --- src/Native/Runtime/unix/UnixContext.cpp | 4 +- src/Native/Runtime/unix/UnwindHelpers.cpp | 251 +++++++----------- .../libunwind/src/DwarfInstructions.hpp | 18 +- 3 files changed, 107 insertions(+), 166 deletions(-) diff --git a/src/Native/Runtime/unix/UnixContext.cpp b/src/Native/Runtime/unix/UnixContext.cpp index 08b98ef6365..458214bbe56 100644 --- a/src/Native/Runtime/unix/UnixContext.cpp +++ b/src/Native/Runtime/unix/UnixContext.cpp @@ -301,7 +301,7 @@ bool GetUnwindProcInfo(PCODE ip, unw_proc_info_t *procInfo) #elif HOST_ARM ((uint32_t*)(unwContext.data))[15] = ip; #elif HOST_ARM64 - unwContext.data[32] = ip; + ((uint32_t*)(unwContext.data))[32] = ip; #elif HOST_WASM ASSERT(false); #elif HOST_X86 @@ -618,7 +618,7 @@ bool FindProcInfo(UIntNative controlPC, UIntNative* startAddress, UIntNative* ls assert((procInfo.start_ip <= controlPC) && (controlPC < procInfo.end_ip)); -#if defined(HOST_ARM) +#if defined(HOST_ARM) || defined(HOST_ARM64) // libunwind fills by reference not by value for ARM *lsda = *((UIntNative *)procInfo.lsda); #else diff --git a/src/Native/Runtime/unix/UnwindHelpers.cpp b/src/Native/Runtime/unix/UnwindHelpers.cpp index 9ed75c6a1d7..ced22cc272c 100644 --- a/src/Native/Runtime/unix/UnwindHelpers.cpp +++ b/src/Native/Runtime/unix/UnwindHelpers.cpp @@ -475,284 +475,229 @@ void Registers_arm_rt::setRegister(int num, uint32_t value, uint32_t location) #if defined(TARGET_ARM64) -// Shim that implements methods required by libunwind over REGDISPLAY -struct Registers_REGDISPLAY : REGDISPLAY -{ - inline static int getArch() { return libunwind::REGISTERS_ARM64; } - inline static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM64; } - - bool validRegister(int num) const; - bool validFloatRegister(int num) { return false; }; - bool validVectorRegister(int num) const; +class Registers_arm64_rt: public libunwind::Registers_arm64 { +public: + Registers_arm64_rt() { abort(); }; + Registers_arm64_rt(const void *registers); + bool validRegister(int num) {abort();}; uint64_t getRegister(int num) const; void setRegister(int num, uint64_t value, uint64_t location); - + bool validFloatRegister(int num) {abort();}; double getFloatRegister(int num) {abort();} void setFloatRegister(int num, double value) {abort();} - - libunwind::v128 getVectorRegister(int num) const; - void setVectorRegister(int num, libunwind::v128 value); + bool validVectorRegister(int num) const {abort();} + libunwind::v128 getVectorRegister(int num) const {abort();}; + void setVectorRegister(int num, libunwind::v128 value) {abort();}; + void jumpto() { abort();}; - uint64_t getSP() const { return SP;} - void setSP(uint64_t value, uint64_t location) { SP = value;} - uint64_t getIP() const { return IP;} + uint64_t getSP() const { return regs->SP;} + void setSP(uint64_t value, uint64_t location) { regs->SP = value;} + uint64_t getIP() const { return regs->IP;} void setIP(uint64_t value, uint64_t location) - { IP = value; pIP = (PTR_UIntNative)location; } + { regs->IP = value; regs->pIP = (PTR_UIntNative)location; } + void saveVFPAsX() {abort();}; +private: + REGDISPLAY *regs; }; -inline bool Registers_REGDISPLAY::validRegister(int num) const { - if (num == UNW_REG_SP || num == UNW_ARM64_SP) - return true; - - if (num == UNW_ARM64_FP) - return true; - - if (num == UNW_ARM64_LR) - return true; - - if (num == UNW_REG_IP) - return true; - - if (num >= UNW_ARM64_X0 && num <= UNW_ARM64_X28) - return true; - - return false; -} - -bool Registers_REGDISPLAY::validVectorRegister(int num) const -{ - if (num >= UNW_ARM64_D8 && num <= UNW_ARM64_D15) - return true; - - return false; +inline Registers_arm64_rt::Registers_arm64_rt(const void *registers) { + regs = (REGDISPLAY *)registers; } -inline uint64_t Registers_REGDISPLAY::getRegister(int regNum) const { +inline uint64_t Registers_arm64_rt::getRegister(int regNum) const { if (regNum == UNW_REG_SP || regNum == UNW_ARM64_SP) - return SP; - - if (regNum == UNW_ARM64_FP) - return *pFP; + return regs->SP; if (regNum == UNW_ARM64_LR) - return *pLR; + return *regs->pLR; if (regNum == UNW_REG_IP) - return IP; + return regs->IP; switch (regNum) { case (UNW_ARM64_X0): - return *pX0; + return *regs->pX0; case (UNW_ARM64_X1): - return *pX1; + return *regs->pX1; case (UNW_ARM64_X2): - return *pX2; + return *regs->pX2; case (UNW_ARM64_X3): - return *pX3; + return *regs->pX3; case (UNW_ARM64_X4): - return *pX4; + return *regs->pX4; case (UNW_ARM64_X5): - return *pX5; + return *regs->pX5; case (UNW_ARM64_X6): - return *pX6; + return *regs->pX6; case (UNW_ARM64_X7): - return *pX7; + return *regs->pX7; case (UNW_ARM64_X8): - return *pX8; + return *regs->pX8; case (UNW_ARM64_X9): - return *pX9; + return *regs->pX9; case (UNW_ARM64_X10): - return *pX10; + return *regs->pX10; case (UNW_ARM64_X11): - return *pX11; + return *regs->pX11; case (UNW_ARM64_X12): - return *pX12; + return *regs->pX12; case (UNW_ARM64_X13): - return *pX13; + return *regs->pX13; case (UNW_ARM64_X14): - return *pX14; + return *regs->pX14; case (UNW_ARM64_X15): - return *pX15; + return *regs->pX15; case (UNW_ARM64_X16): - return *pX16; + return *regs->pX16; case (UNW_ARM64_X17): - return *pX17; + return *regs->pX17; case (UNW_ARM64_X18): - return *pX18; + return *regs->pX18; case (UNW_ARM64_X19): - return *pX19; + return *regs->pX19; case (UNW_ARM64_X20): - return *pX20; + return *regs->pX20; case (UNW_ARM64_X21): - return *pX21; + return *regs->pX21; case (UNW_ARM64_X22): - return *pX22; + return *regs->pX22; case (UNW_ARM64_X23): - return *pX23; + return *regs->pX23; case (UNW_ARM64_X24): - return *pX24; + return *regs->pX24; case (UNW_ARM64_X25): - return *pX25; + return *regs->pX25; case (UNW_ARM64_X26): - return *pX26; + return *regs->pX26; case (UNW_ARM64_X27): - return *pX27; + return *regs->pX27; case (UNW_ARM64_X28): - return *pX28; + return *regs->pX28; } PORTABILITY_ASSERT("unsupported arm64 register"); } -void Registers_REGDISPLAY::setRegister(int num, uint64_t value, uint64_t location) +void Registers_arm64_rt::setRegister(int num, uint64_t value, uint64_t location) { - if (num == UNW_REG_SP || num == UNW_ARM64_SP) { - SP = (UIntNative )value; - return; - } - if (num == UNW_ARM64_FP) { - pFP = (PTR_UIntNative)location; + if (num == UNW_REG_SP || num == UNW_ARM64_SP) { + regs->SP = (UIntNative )value; return; } if (num == UNW_ARM64_LR) { - pLR = (PTR_UIntNative)location; + regs->pLR = (PTR_UIntNative)location; return; } if (num == UNW_REG_IP) { - IP = value; + regs->IP = value; + /* the location could be NULL, we could try to recover + pointer to value in stack from pLR */ + if ((!location) && (regs->pLR) && (*regs->pLR == value)) + regs->pIP = regs->pLR; + else + regs->pIP = (PTR_UIntNative)location; return; } switch (num) { case (UNW_ARM64_X0): - pX0 = (PTR_UIntNative)location; + regs->pX0 = (PTR_UIntNative)location; break; case (UNW_ARM64_X1): - pX1 = (PTR_UIntNative)location; + regs->pX1 = (PTR_UIntNative)location; break; case (UNW_ARM64_X2): - pX2 = (PTR_UIntNative)location; + regs->pX2 = (PTR_UIntNative)location; break; case (UNW_ARM64_X3): - pX3 = (PTR_UIntNative)location; + regs->pX3 = (PTR_UIntNative)location; break; case (UNW_ARM64_X4): - pX4 = (PTR_UIntNative)location; + regs->pX4 = (PTR_UIntNative)location; break; case (UNW_ARM64_X5): - pX5 = (PTR_UIntNative)location; + regs->pX5 = (PTR_UIntNative)location; break; case (UNW_ARM64_X6): - pX6 = (PTR_UIntNative)location; + regs->pX6 = (PTR_UIntNative)location; break; case (UNW_ARM64_X7): - pX7 = (PTR_UIntNative)location; + regs->pX7 = (PTR_UIntNative)location; break; case (UNW_ARM64_X8): - pX8 = (PTR_UIntNative)location; + regs->pX8 = (PTR_UIntNative)location; break; case (UNW_ARM64_X9): - pX9 = (PTR_UIntNative)location; + regs->pX9 = (PTR_UIntNative)location; break; case (UNW_ARM64_X10): - pX10 = (PTR_UIntNative)location; + regs->pX10 = (PTR_UIntNative)location; break; case (UNW_ARM64_X11): - pX11 = (PTR_UIntNative)location; + regs->pX11 = (PTR_UIntNative)location; break; case (UNW_ARM64_X12): - pX12 = (PTR_UIntNative)location; + regs->pX12 = (PTR_UIntNative)location; break; case (UNW_ARM64_X13): - pX13 = (PTR_UIntNative)location; + regs->pX13 = (PTR_UIntNative)location; break; case (UNW_ARM64_X14): - pX14 = (PTR_UIntNative)location; + regs->pX14 = (PTR_UIntNative)location; break; case (UNW_ARM64_X15): - pX15 = (PTR_UIntNative)location; + regs->pX15 = (PTR_UIntNative)location; break; case (UNW_ARM64_X16): - pX16 = (PTR_UIntNative)location; + regs->pX16 = (PTR_UIntNative)location; break; case (UNW_ARM64_X17): - pX17 = (PTR_UIntNative)location; + regs->pX17 = (PTR_UIntNative)location; break; case (UNW_ARM64_X18): - pX18 = (PTR_UIntNative)location; + regs->pX18 = (PTR_UIntNative)location; break; case (UNW_ARM64_X19): - pX19 = (PTR_UIntNative)location; + regs->pX19 = (PTR_UIntNative)location; break; case (UNW_ARM64_X20): - pX20 = (PTR_UIntNative)location; + regs->pX20 = (PTR_UIntNative)location; break; case (UNW_ARM64_X21): - pX21 = (PTR_UIntNative)location; + regs->pX21 = (PTR_UIntNative)location; break; case (UNW_ARM64_X22): - pX22 = (PTR_UIntNative)location; + regs->pX22 = (PTR_UIntNative)location; break; case (UNW_ARM64_X23): - pX23 = (PTR_UIntNative)location; + regs->pX23 = (PTR_UIntNative)location; break; case (UNW_ARM64_X24): - pX24 = (PTR_UIntNative)location; + regs->pX24 = (PTR_UIntNative)location; break; case (UNW_ARM64_X25): - pX25 = (PTR_UIntNative)location; + regs->pX25 = (PTR_UIntNative)location; break; case (UNW_ARM64_X26): - pX26 = (PTR_UIntNative)location; + regs->pX26 = (PTR_UIntNative)location; break; case (UNW_ARM64_X27): - pX27 = (PTR_UIntNative)location; + regs->pX27 = (PTR_UIntNative)location; break; case (UNW_ARM64_X28): - pX28 = (PTR_UIntNative)location; + regs->pX28 = (PTR_UIntNative)location; break; default: PORTABILITY_ASSERT("unsupported arm64 register"); } } -libunwind::v128 Registers_REGDISPLAY::getVectorRegister(int num) const -{ - num -= UNW_ARM64_D8; - - if (num < 0 || num >= sizeof(D) / sizeof(UInt64)) - { - PORTABILITY_ASSERT("unsupported arm64 vector register"); - } - - libunwind::v128 result; - - result.vec[0] = 0; - result.vec[1] = 0; - result.vec[2] = D[num] >> 32; - result.vec[3] = D[num] & 0xFFFFFFFF; - - return result; -} - -void Registers_REGDISPLAY::setVectorRegister(int num, libunwind::v128 value) -{ - num -= UNW_ARM64_D8; - - if (num < 0 || num >= sizeof(D) / sizeof(UInt64)) - { - PORTABILITY_ASSERT("unsupported arm64 vector register"); - } - - D[num] = (UInt64)value.vec[2] << 32 | (UInt64)value.vec[3]; -} - #endif // TARGET_ARM64 bool DoTheStep(uintptr_t pc, UnwindInfoSections uwInfoSections, REGDISPLAY *regs) @@ -762,7 +707,7 @@ bool DoTheStep(uintptr_t pc, UnwindInfoSections uwInfoSections, REGDISPLAY *regs #elif defined(TARGET_ARM) libunwind::UnwindCursor uc(_addressSpace, regs); #elif defined(TARGET_ARM64) - libunwind::UnwindCursor uc(_addressSpace, regs); + libunwind::UnwindCursor uc(_addressSpace, regs); #elif defined(HOST_X86) libunwind::UnwindCursor uc(_addressSpace, regs); #else @@ -779,7 +724,10 @@ bool DoTheStep(uintptr_t pc, UnwindInfoSections uwInfoSections, REGDISPLAY *regs unw_proc_info_t procInfo; uc.getInfo(&procInfo); -#if defined(TARGET_ARM) +#if defined(TARGET_ARM64) + DwarfInstructions dwarfInst; + int stepRet = dwarfInst.stepWithDwarf(_addressSpace, pc, procInfo.unwind_info, *(Registers_arm64_rt*)regs); +#elif defined(TARGET_ARM) DwarfInstructions dwarfInst; int stepRet = dwarfInst.stepWithDwarf(_addressSpace, pc, procInfo.unwind_info, *(Registers_arm_rt*)regs); #else @@ -792,12 +740,7 @@ bool DoTheStep(uintptr_t pc, UnwindInfoSections uwInfoSections, REGDISPLAY *regs return false; } -#if defined(TARGET_ARM64) - regs->SetAddrOfIP(regs->pLR); -#else regs->pIP = PTR_PCODE(regs->SP - sizeof(TADDR)); -#endif - #elif defined(_LIBUNWIND_ARM_EHABI) uc.setInfoBasedOnIPRegister(true); int stepRet = uc.step(); diff --git a/src/Native/libunwind/src/DwarfInstructions.hpp b/src/Native/libunwind/src/DwarfInstructions.hpp index f341772824c..c5cc6c9d510 100644 --- a/src/Native/libunwind/src/DwarfInstructions.hpp +++ b/src/Native/libunwind/src/DwarfInstructions.hpp @@ -169,7 +169,6 @@ int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, // restore registers that DWARF says were saved R newRegisters = registers; pint_t returnAddress = 0; - pint_t returnAddressLocation = 0; const int lastReg = R::lastDwarfRegNum(); assert(static_cast(CFI_Parser::kMaxRegisterNumber) >= lastReg && "register range too large"); @@ -178,14 +177,7 @@ int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, for (int i = 0; i <= lastReg; ++i) { if (prolog.savedRegisters[i].location != CFI_Parser::kRegisterUnused) { - if (i == (int)cieInfo.returnAddressRegister) { - returnAddress = getSavedRegister(addressSpace, registers, cfa, - prolog.savedRegisters[i], - returnAddressLocation); - - newRegisters.setRegister(i, returnAddress, returnAddressLocation); - } - else if (registers.validFloatRegister(i)) + if (registers.validFloatRegister(i)) newRegisters.setFloatRegister( i, getSavedFloatRegister(addressSpace, registers, cfa, prolog.savedRegisters[i])); @@ -193,6 +185,12 @@ int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, newRegisters.setVectorRegister( i, getSavedVectorRegister(addressSpace, registers, cfa, prolog.savedRegisters[i])); + else if (i == (int)cieInfo.returnAddressRegister) { + pint_t dummyLocation; + returnAddress = getSavedRegister(addressSpace, registers, cfa, + prolog.savedRegisters[i], + dummyLocation); + } else if (registers.validRegister(i)) { pint_t value; pint_t location; @@ -274,7 +272,7 @@ int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, // Return address is address after call site instruction, so setting IP to // that does simualates a return. - newRegisters.setIP(returnAddress, returnAddressLocation); + newRegisters.setIP(returnAddress, 0); // Simulate the step by replacing the register set with the new ones. registers = newRegisters; From 37ce54090ea6c0eb9a8497923a8179ab98fca764 Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Thu, 20 Aug 2020 10:44:03 +0200 Subject: [PATCH 28/36] Revert "Revert "ARM64 unix stack walk"" This reverts commit c68cea102a07107a1dd3570b35fcc51ec5248a5a. --- src/Native/Runtime/unix/UnixContext.cpp | 4 +- src/Native/Runtime/unix/UnwindHelpers.cpp | 251 +++++++++++------- .../libunwind/src/DwarfInstructions.hpp | 18 +- 3 files changed, 166 insertions(+), 107 deletions(-) diff --git a/src/Native/Runtime/unix/UnixContext.cpp b/src/Native/Runtime/unix/UnixContext.cpp index 458214bbe56..08b98ef6365 100644 --- a/src/Native/Runtime/unix/UnixContext.cpp +++ b/src/Native/Runtime/unix/UnixContext.cpp @@ -301,7 +301,7 @@ bool GetUnwindProcInfo(PCODE ip, unw_proc_info_t *procInfo) #elif HOST_ARM ((uint32_t*)(unwContext.data))[15] = ip; #elif HOST_ARM64 - ((uint32_t*)(unwContext.data))[32] = ip; + unwContext.data[32] = ip; #elif HOST_WASM ASSERT(false); #elif HOST_X86 @@ -618,7 +618,7 @@ bool FindProcInfo(UIntNative controlPC, UIntNative* startAddress, UIntNative* ls assert((procInfo.start_ip <= controlPC) && (controlPC < procInfo.end_ip)); -#if defined(HOST_ARM) || defined(HOST_ARM64) +#if defined(HOST_ARM) // libunwind fills by reference not by value for ARM *lsda = *((UIntNative *)procInfo.lsda); #else diff --git a/src/Native/Runtime/unix/UnwindHelpers.cpp b/src/Native/Runtime/unix/UnwindHelpers.cpp index ced22cc272c..9ed75c6a1d7 100644 --- a/src/Native/Runtime/unix/UnwindHelpers.cpp +++ b/src/Native/Runtime/unix/UnwindHelpers.cpp @@ -475,229 +475,284 @@ void Registers_arm_rt::setRegister(int num, uint32_t value, uint32_t location) #if defined(TARGET_ARM64) -class Registers_arm64_rt: public libunwind::Registers_arm64 { -public: - Registers_arm64_rt() { abort(); }; - Registers_arm64_rt(const void *registers); +// Shim that implements methods required by libunwind over REGDISPLAY +struct Registers_REGDISPLAY : REGDISPLAY +{ + inline static int getArch() { return libunwind::REGISTERS_ARM64; } + inline static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM64; } + + bool validRegister(int num) const; + bool validFloatRegister(int num) { return false; }; + bool validVectorRegister(int num) const; - bool validRegister(int num) {abort();}; uint64_t getRegister(int num) const; void setRegister(int num, uint64_t value, uint64_t location); - bool validFloatRegister(int num) {abort();}; + double getFloatRegister(int num) {abort();} void setFloatRegister(int num, double value) {abort();} - bool validVectorRegister(int num) const {abort();} - libunwind::v128 getVectorRegister(int num) const {abort();}; - void setVectorRegister(int num, libunwind::v128 value) {abort();}; - void jumpto() { abort();}; + + libunwind::v128 getVectorRegister(int num) const; + void setVectorRegister(int num, libunwind::v128 value); - uint64_t getSP() const { return regs->SP;} - void setSP(uint64_t value, uint64_t location) { regs->SP = value;} - uint64_t getIP() const { return regs->IP;} + uint64_t getSP() const { return SP;} + void setSP(uint64_t value, uint64_t location) { SP = value;} + uint64_t getIP() const { return IP;} void setIP(uint64_t value, uint64_t location) - { regs->IP = value; regs->pIP = (PTR_UIntNative)location; } - void saveVFPAsX() {abort();}; -private: - REGDISPLAY *regs; + { IP = value; pIP = (PTR_UIntNative)location; } }; -inline Registers_arm64_rt::Registers_arm64_rt(const void *registers) { - regs = (REGDISPLAY *)registers; +inline bool Registers_REGDISPLAY::validRegister(int num) const { + if (num == UNW_REG_SP || num == UNW_ARM64_SP) + return true; + + if (num == UNW_ARM64_FP) + return true; + + if (num == UNW_ARM64_LR) + return true; + + if (num == UNW_REG_IP) + return true; + + if (num >= UNW_ARM64_X0 && num <= UNW_ARM64_X28) + return true; + + return false; +} + +bool Registers_REGDISPLAY::validVectorRegister(int num) const +{ + if (num >= UNW_ARM64_D8 && num <= UNW_ARM64_D15) + return true; + + return false; } -inline uint64_t Registers_arm64_rt::getRegister(int regNum) const { +inline uint64_t Registers_REGDISPLAY::getRegister(int regNum) const { if (regNum == UNW_REG_SP || regNum == UNW_ARM64_SP) - return regs->SP; + return SP; + + if (regNum == UNW_ARM64_FP) + return *pFP; if (regNum == UNW_ARM64_LR) - return *regs->pLR; + return *pLR; if (regNum == UNW_REG_IP) - return regs->IP; + return IP; switch (regNum) { case (UNW_ARM64_X0): - return *regs->pX0; + return *pX0; case (UNW_ARM64_X1): - return *regs->pX1; + return *pX1; case (UNW_ARM64_X2): - return *regs->pX2; + return *pX2; case (UNW_ARM64_X3): - return *regs->pX3; + return *pX3; case (UNW_ARM64_X4): - return *regs->pX4; + return *pX4; case (UNW_ARM64_X5): - return *regs->pX5; + return *pX5; case (UNW_ARM64_X6): - return *regs->pX6; + return *pX6; case (UNW_ARM64_X7): - return *regs->pX7; + return *pX7; case (UNW_ARM64_X8): - return *regs->pX8; + return *pX8; case (UNW_ARM64_X9): - return *regs->pX9; + return *pX9; case (UNW_ARM64_X10): - return *regs->pX10; + return *pX10; case (UNW_ARM64_X11): - return *regs->pX11; + return *pX11; case (UNW_ARM64_X12): - return *regs->pX12; + return *pX12; case (UNW_ARM64_X13): - return *regs->pX13; + return *pX13; case (UNW_ARM64_X14): - return *regs->pX14; + return *pX14; case (UNW_ARM64_X15): - return *regs->pX15; + return *pX15; case (UNW_ARM64_X16): - return *regs->pX16; + return *pX16; case (UNW_ARM64_X17): - return *regs->pX17; + return *pX17; case (UNW_ARM64_X18): - return *regs->pX18; + return *pX18; case (UNW_ARM64_X19): - return *regs->pX19; + return *pX19; case (UNW_ARM64_X20): - return *regs->pX20; + return *pX20; case (UNW_ARM64_X21): - return *regs->pX21; + return *pX21; case (UNW_ARM64_X22): - return *regs->pX22; + return *pX22; case (UNW_ARM64_X23): - return *regs->pX23; + return *pX23; case (UNW_ARM64_X24): - return *regs->pX24; + return *pX24; case (UNW_ARM64_X25): - return *regs->pX25; + return *pX25; case (UNW_ARM64_X26): - return *regs->pX26; + return *pX26; case (UNW_ARM64_X27): - return *regs->pX27; + return *pX27; case (UNW_ARM64_X28): - return *regs->pX28; + return *pX28; } PORTABILITY_ASSERT("unsupported arm64 register"); } -void Registers_arm64_rt::setRegister(int num, uint64_t value, uint64_t location) +void Registers_REGDISPLAY::setRegister(int num, uint64_t value, uint64_t location) { - if (num == UNW_REG_SP || num == UNW_ARM64_SP) { - regs->SP = (UIntNative )value; + SP = (UIntNative )value; + return; + } + + if (num == UNW_ARM64_FP) { + pFP = (PTR_UIntNative)location; return; } if (num == UNW_ARM64_LR) { - regs->pLR = (PTR_UIntNative)location; + pLR = (PTR_UIntNative)location; return; } if (num == UNW_REG_IP) { - regs->IP = value; - /* the location could be NULL, we could try to recover - pointer to value in stack from pLR */ - if ((!location) && (regs->pLR) && (*regs->pLR == value)) - regs->pIP = regs->pLR; - else - regs->pIP = (PTR_UIntNative)location; + IP = value; return; } switch (num) { case (UNW_ARM64_X0): - regs->pX0 = (PTR_UIntNative)location; + pX0 = (PTR_UIntNative)location; break; case (UNW_ARM64_X1): - regs->pX1 = (PTR_UIntNative)location; + pX1 = (PTR_UIntNative)location; break; case (UNW_ARM64_X2): - regs->pX2 = (PTR_UIntNative)location; + pX2 = (PTR_UIntNative)location; break; case (UNW_ARM64_X3): - regs->pX3 = (PTR_UIntNative)location; + pX3 = (PTR_UIntNative)location; break; case (UNW_ARM64_X4): - regs->pX4 = (PTR_UIntNative)location; + pX4 = (PTR_UIntNative)location; break; case (UNW_ARM64_X5): - regs->pX5 = (PTR_UIntNative)location; + pX5 = (PTR_UIntNative)location; break; case (UNW_ARM64_X6): - regs->pX6 = (PTR_UIntNative)location; + pX6 = (PTR_UIntNative)location; break; case (UNW_ARM64_X7): - regs->pX7 = (PTR_UIntNative)location; + pX7 = (PTR_UIntNative)location; break; case (UNW_ARM64_X8): - regs->pX8 = (PTR_UIntNative)location; + pX8 = (PTR_UIntNative)location; break; case (UNW_ARM64_X9): - regs->pX9 = (PTR_UIntNative)location; + pX9 = (PTR_UIntNative)location; break; case (UNW_ARM64_X10): - regs->pX10 = (PTR_UIntNative)location; + pX10 = (PTR_UIntNative)location; break; case (UNW_ARM64_X11): - regs->pX11 = (PTR_UIntNative)location; + pX11 = (PTR_UIntNative)location; break; case (UNW_ARM64_X12): - regs->pX12 = (PTR_UIntNative)location; + pX12 = (PTR_UIntNative)location; break; case (UNW_ARM64_X13): - regs->pX13 = (PTR_UIntNative)location; + pX13 = (PTR_UIntNative)location; break; case (UNW_ARM64_X14): - regs->pX14 = (PTR_UIntNative)location; + pX14 = (PTR_UIntNative)location; break; case (UNW_ARM64_X15): - regs->pX15 = (PTR_UIntNative)location; + pX15 = (PTR_UIntNative)location; break; case (UNW_ARM64_X16): - regs->pX16 = (PTR_UIntNative)location; + pX16 = (PTR_UIntNative)location; break; case (UNW_ARM64_X17): - regs->pX17 = (PTR_UIntNative)location; + pX17 = (PTR_UIntNative)location; break; case (UNW_ARM64_X18): - regs->pX18 = (PTR_UIntNative)location; + pX18 = (PTR_UIntNative)location; break; case (UNW_ARM64_X19): - regs->pX19 = (PTR_UIntNative)location; + pX19 = (PTR_UIntNative)location; break; case (UNW_ARM64_X20): - regs->pX20 = (PTR_UIntNative)location; + pX20 = (PTR_UIntNative)location; break; case (UNW_ARM64_X21): - regs->pX21 = (PTR_UIntNative)location; + pX21 = (PTR_UIntNative)location; break; case (UNW_ARM64_X22): - regs->pX22 = (PTR_UIntNative)location; + pX22 = (PTR_UIntNative)location; break; case (UNW_ARM64_X23): - regs->pX23 = (PTR_UIntNative)location; + pX23 = (PTR_UIntNative)location; break; case (UNW_ARM64_X24): - regs->pX24 = (PTR_UIntNative)location; + pX24 = (PTR_UIntNative)location; break; case (UNW_ARM64_X25): - regs->pX25 = (PTR_UIntNative)location; + pX25 = (PTR_UIntNative)location; break; case (UNW_ARM64_X26): - regs->pX26 = (PTR_UIntNative)location; + pX26 = (PTR_UIntNative)location; break; case (UNW_ARM64_X27): - regs->pX27 = (PTR_UIntNative)location; + pX27 = (PTR_UIntNative)location; break; case (UNW_ARM64_X28): - regs->pX28 = (PTR_UIntNative)location; + pX28 = (PTR_UIntNative)location; break; default: PORTABILITY_ASSERT("unsupported arm64 register"); } } +libunwind::v128 Registers_REGDISPLAY::getVectorRegister(int num) const +{ + num -= UNW_ARM64_D8; + + if (num < 0 || num >= sizeof(D) / sizeof(UInt64)) + { + PORTABILITY_ASSERT("unsupported arm64 vector register"); + } + + libunwind::v128 result; + + result.vec[0] = 0; + result.vec[1] = 0; + result.vec[2] = D[num] >> 32; + result.vec[3] = D[num] & 0xFFFFFFFF; + + return result; +} + +void Registers_REGDISPLAY::setVectorRegister(int num, libunwind::v128 value) +{ + num -= UNW_ARM64_D8; + + if (num < 0 || num >= sizeof(D) / sizeof(UInt64)) + { + PORTABILITY_ASSERT("unsupported arm64 vector register"); + } + + D[num] = (UInt64)value.vec[2] << 32 | (UInt64)value.vec[3]; +} + #endif // TARGET_ARM64 bool DoTheStep(uintptr_t pc, UnwindInfoSections uwInfoSections, REGDISPLAY *regs) @@ -707,7 +762,7 @@ bool DoTheStep(uintptr_t pc, UnwindInfoSections uwInfoSections, REGDISPLAY *regs #elif defined(TARGET_ARM) libunwind::UnwindCursor uc(_addressSpace, regs); #elif defined(TARGET_ARM64) - libunwind::UnwindCursor uc(_addressSpace, regs); + libunwind::UnwindCursor uc(_addressSpace, regs); #elif defined(HOST_X86) libunwind::UnwindCursor uc(_addressSpace, regs); #else @@ -724,10 +779,7 @@ bool DoTheStep(uintptr_t pc, UnwindInfoSections uwInfoSections, REGDISPLAY *regs unw_proc_info_t procInfo; uc.getInfo(&procInfo); -#if defined(TARGET_ARM64) - DwarfInstructions dwarfInst; - int stepRet = dwarfInst.stepWithDwarf(_addressSpace, pc, procInfo.unwind_info, *(Registers_arm64_rt*)regs); -#elif defined(TARGET_ARM) +#if defined(TARGET_ARM) DwarfInstructions dwarfInst; int stepRet = dwarfInst.stepWithDwarf(_addressSpace, pc, procInfo.unwind_info, *(Registers_arm_rt*)regs); #else @@ -740,7 +792,12 @@ bool DoTheStep(uintptr_t pc, UnwindInfoSections uwInfoSections, REGDISPLAY *regs return false; } +#if defined(TARGET_ARM64) + regs->SetAddrOfIP(regs->pLR); +#else regs->pIP = PTR_PCODE(regs->SP - sizeof(TADDR)); +#endif + #elif defined(_LIBUNWIND_ARM_EHABI) uc.setInfoBasedOnIPRegister(true); int stepRet = uc.step(); diff --git a/src/Native/libunwind/src/DwarfInstructions.hpp b/src/Native/libunwind/src/DwarfInstructions.hpp index c5cc6c9d510..f341772824c 100644 --- a/src/Native/libunwind/src/DwarfInstructions.hpp +++ b/src/Native/libunwind/src/DwarfInstructions.hpp @@ -169,6 +169,7 @@ int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, // restore registers that DWARF says were saved R newRegisters = registers; pint_t returnAddress = 0; + pint_t returnAddressLocation = 0; const int lastReg = R::lastDwarfRegNum(); assert(static_cast(CFI_Parser::kMaxRegisterNumber) >= lastReg && "register range too large"); @@ -177,7 +178,14 @@ int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, for (int i = 0; i <= lastReg; ++i) { if (prolog.savedRegisters[i].location != CFI_Parser::kRegisterUnused) { - if (registers.validFloatRegister(i)) + if (i == (int)cieInfo.returnAddressRegister) { + returnAddress = getSavedRegister(addressSpace, registers, cfa, + prolog.savedRegisters[i], + returnAddressLocation); + + newRegisters.setRegister(i, returnAddress, returnAddressLocation); + } + else if (registers.validFloatRegister(i)) newRegisters.setFloatRegister( i, getSavedFloatRegister(addressSpace, registers, cfa, prolog.savedRegisters[i])); @@ -185,12 +193,6 @@ int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, newRegisters.setVectorRegister( i, getSavedVectorRegister(addressSpace, registers, cfa, prolog.savedRegisters[i])); - else if (i == (int)cieInfo.returnAddressRegister) { - pint_t dummyLocation; - returnAddress = getSavedRegister(addressSpace, registers, cfa, - prolog.savedRegisters[i], - dummyLocation); - } else if (registers.validRegister(i)) { pint_t value; pint_t location; @@ -272,7 +274,7 @@ int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, // Return address is address after call site instruction, so setting IP to // that does simualates a return. - newRegisters.setIP(returnAddress, 0); + newRegisters.setIP(returnAddress, returnAddressLocation); // Simulate the step by replacing the register set with the new ones. registers = newRegisters; From d1cf628642186ce56a161fd65177957233ea7695 Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Thu, 20 Aug 2020 11:00:22 +0200 Subject: [PATCH 29/36] Revert "Revert "Revert "ARM64 unix stack walk""" This reverts commit 37ce54090ea6c0eb9a8497923a8179ab98fca764. --- src/Native/Runtime/unix/UnixContext.cpp | 4 +- src/Native/Runtime/unix/UnwindHelpers.cpp | 251 +++++++----------- .../libunwind/src/DwarfInstructions.hpp | 18 +- 3 files changed, 107 insertions(+), 166 deletions(-) diff --git a/src/Native/Runtime/unix/UnixContext.cpp b/src/Native/Runtime/unix/UnixContext.cpp index 08b98ef6365..458214bbe56 100644 --- a/src/Native/Runtime/unix/UnixContext.cpp +++ b/src/Native/Runtime/unix/UnixContext.cpp @@ -301,7 +301,7 @@ bool GetUnwindProcInfo(PCODE ip, unw_proc_info_t *procInfo) #elif HOST_ARM ((uint32_t*)(unwContext.data))[15] = ip; #elif HOST_ARM64 - unwContext.data[32] = ip; + ((uint32_t*)(unwContext.data))[32] = ip; #elif HOST_WASM ASSERT(false); #elif HOST_X86 @@ -618,7 +618,7 @@ bool FindProcInfo(UIntNative controlPC, UIntNative* startAddress, UIntNative* ls assert((procInfo.start_ip <= controlPC) && (controlPC < procInfo.end_ip)); -#if defined(HOST_ARM) +#if defined(HOST_ARM) || defined(HOST_ARM64) // libunwind fills by reference not by value for ARM *lsda = *((UIntNative *)procInfo.lsda); #else diff --git a/src/Native/Runtime/unix/UnwindHelpers.cpp b/src/Native/Runtime/unix/UnwindHelpers.cpp index 9ed75c6a1d7..ced22cc272c 100644 --- a/src/Native/Runtime/unix/UnwindHelpers.cpp +++ b/src/Native/Runtime/unix/UnwindHelpers.cpp @@ -475,284 +475,229 @@ void Registers_arm_rt::setRegister(int num, uint32_t value, uint32_t location) #if defined(TARGET_ARM64) -// Shim that implements methods required by libunwind over REGDISPLAY -struct Registers_REGDISPLAY : REGDISPLAY -{ - inline static int getArch() { return libunwind::REGISTERS_ARM64; } - inline static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM64; } - - bool validRegister(int num) const; - bool validFloatRegister(int num) { return false; }; - bool validVectorRegister(int num) const; +class Registers_arm64_rt: public libunwind::Registers_arm64 { +public: + Registers_arm64_rt() { abort(); }; + Registers_arm64_rt(const void *registers); + bool validRegister(int num) {abort();}; uint64_t getRegister(int num) const; void setRegister(int num, uint64_t value, uint64_t location); - + bool validFloatRegister(int num) {abort();}; double getFloatRegister(int num) {abort();} void setFloatRegister(int num, double value) {abort();} - - libunwind::v128 getVectorRegister(int num) const; - void setVectorRegister(int num, libunwind::v128 value); + bool validVectorRegister(int num) const {abort();} + libunwind::v128 getVectorRegister(int num) const {abort();}; + void setVectorRegister(int num, libunwind::v128 value) {abort();}; + void jumpto() { abort();}; - uint64_t getSP() const { return SP;} - void setSP(uint64_t value, uint64_t location) { SP = value;} - uint64_t getIP() const { return IP;} + uint64_t getSP() const { return regs->SP;} + void setSP(uint64_t value, uint64_t location) { regs->SP = value;} + uint64_t getIP() const { return regs->IP;} void setIP(uint64_t value, uint64_t location) - { IP = value; pIP = (PTR_UIntNative)location; } + { regs->IP = value; regs->pIP = (PTR_UIntNative)location; } + void saveVFPAsX() {abort();}; +private: + REGDISPLAY *regs; }; -inline bool Registers_REGDISPLAY::validRegister(int num) const { - if (num == UNW_REG_SP || num == UNW_ARM64_SP) - return true; - - if (num == UNW_ARM64_FP) - return true; - - if (num == UNW_ARM64_LR) - return true; - - if (num == UNW_REG_IP) - return true; - - if (num >= UNW_ARM64_X0 && num <= UNW_ARM64_X28) - return true; - - return false; -} - -bool Registers_REGDISPLAY::validVectorRegister(int num) const -{ - if (num >= UNW_ARM64_D8 && num <= UNW_ARM64_D15) - return true; - - return false; +inline Registers_arm64_rt::Registers_arm64_rt(const void *registers) { + regs = (REGDISPLAY *)registers; } -inline uint64_t Registers_REGDISPLAY::getRegister(int regNum) const { +inline uint64_t Registers_arm64_rt::getRegister(int regNum) const { if (regNum == UNW_REG_SP || regNum == UNW_ARM64_SP) - return SP; - - if (regNum == UNW_ARM64_FP) - return *pFP; + return regs->SP; if (regNum == UNW_ARM64_LR) - return *pLR; + return *regs->pLR; if (regNum == UNW_REG_IP) - return IP; + return regs->IP; switch (regNum) { case (UNW_ARM64_X0): - return *pX0; + return *regs->pX0; case (UNW_ARM64_X1): - return *pX1; + return *regs->pX1; case (UNW_ARM64_X2): - return *pX2; + return *regs->pX2; case (UNW_ARM64_X3): - return *pX3; + return *regs->pX3; case (UNW_ARM64_X4): - return *pX4; + return *regs->pX4; case (UNW_ARM64_X5): - return *pX5; + return *regs->pX5; case (UNW_ARM64_X6): - return *pX6; + return *regs->pX6; case (UNW_ARM64_X7): - return *pX7; + return *regs->pX7; case (UNW_ARM64_X8): - return *pX8; + return *regs->pX8; case (UNW_ARM64_X9): - return *pX9; + return *regs->pX9; case (UNW_ARM64_X10): - return *pX10; + return *regs->pX10; case (UNW_ARM64_X11): - return *pX11; + return *regs->pX11; case (UNW_ARM64_X12): - return *pX12; + return *regs->pX12; case (UNW_ARM64_X13): - return *pX13; + return *regs->pX13; case (UNW_ARM64_X14): - return *pX14; + return *regs->pX14; case (UNW_ARM64_X15): - return *pX15; + return *regs->pX15; case (UNW_ARM64_X16): - return *pX16; + return *regs->pX16; case (UNW_ARM64_X17): - return *pX17; + return *regs->pX17; case (UNW_ARM64_X18): - return *pX18; + return *regs->pX18; case (UNW_ARM64_X19): - return *pX19; + return *regs->pX19; case (UNW_ARM64_X20): - return *pX20; + return *regs->pX20; case (UNW_ARM64_X21): - return *pX21; + return *regs->pX21; case (UNW_ARM64_X22): - return *pX22; + return *regs->pX22; case (UNW_ARM64_X23): - return *pX23; + return *regs->pX23; case (UNW_ARM64_X24): - return *pX24; + return *regs->pX24; case (UNW_ARM64_X25): - return *pX25; + return *regs->pX25; case (UNW_ARM64_X26): - return *pX26; + return *regs->pX26; case (UNW_ARM64_X27): - return *pX27; + return *regs->pX27; case (UNW_ARM64_X28): - return *pX28; + return *regs->pX28; } PORTABILITY_ASSERT("unsupported arm64 register"); } -void Registers_REGDISPLAY::setRegister(int num, uint64_t value, uint64_t location) +void Registers_arm64_rt::setRegister(int num, uint64_t value, uint64_t location) { - if (num == UNW_REG_SP || num == UNW_ARM64_SP) { - SP = (UIntNative )value; - return; - } - if (num == UNW_ARM64_FP) { - pFP = (PTR_UIntNative)location; + if (num == UNW_REG_SP || num == UNW_ARM64_SP) { + regs->SP = (UIntNative )value; return; } if (num == UNW_ARM64_LR) { - pLR = (PTR_UIntNative)location; + regs->pLR = (PTR_UIntNative)location; return; } if (num == UNW_REG_IP) { - IP = value; + regs->IP = value; + /* the location could be NULL, we could try to recover + pointer to value in stack from pLR */ + if ((!location) && (regs->pLR) && (*regs->pLR == value)) + regs->pIP = regs->pLR; + else + regs->pIP = (PTR_UIntNative)location; return; } switch (num) { case (UNW_ARM64_X0): - pX0 = (PTR_UIntNative)location; + regs->pX0 = (PTR_UIntNative)location; break; case (UNW_ARM64_X1): - pX1 = (PTR_UIntNative)location; + regs->pX1 = (PTR_UIntNative)location; break; case (UNW_ARM64_X2): - pX2 = (PTR_UIntNative)location; + regs->pX2 = (PTR_UIntNative)location; break; case (UNW_ARM64_X3): - pX3 = (PTR_UIntNative)location; + regs->pX3 = (PTR_UIntNative)location; break; case (UNW_ARM64_X4): - pX4 = (PTR_UIntNative)location; + regs->pX4 = (PTR_UIntNative)location; break; case (UNW_ARM64_X5): - pX5 = (PTR_UIntNative)location; + regs->pX5 = (PTR_UIntNative)location; break; case (UNW_ARM64_X6): - pX6 = (PTR_UIntNative)location; + regs->pX6 = (PTR_UIntNative)location; break; case (UNW_ARM64_X7): - pX7 = (PTR_UIntNative)location; + regs->pX7 = (PTR_UIntNative)location; break; case (UNW_ARM64_X8): - pX8 = (PTR_UIntNative)location; + regs->pX8 = (PTR_UIntNative)location; break; case (UNW_ARM64_X9): - pX9 = (PTR_UIntNative)location; + regs->pX9 = (PTR_UIntNative)location; break; case (UNW_ARM64_X10): - pX10 = (PTR_UIntNative)location; + regs->pX10 = (PTR_UIntNative)location; break; case (UNW_ARM64_X11): - pX11 = (PTR_UIntNative)location; + regs->pX11 = (PTR_UIntNative)location; break; case (UNW_ARM64_X12): - pX12 = (PTR_UIntNative)location; + regs->pX12 = (PTR_UIntNative)location; break; case (UNW_ARM64_X13): - pX13 = (PTR_UIntNative)location; + regs->pX13 = (PTR_UIntNative)location; break; case (UNW_ARM64_X14): - pX14 = (PTR_UIntNative)location; + regs->pX14 = (PTR_UIntNative)location; break; case (UNW_ARM64_X15): - pX15 = (PTR_UIntNative)location; + regs->pX15 = (PTR_UIntNative)location; break; case (UNW_ARM64_X16): - pX16 = (PTR_UIntNative)location; + regs->pX16 = (PTR_UIntNative)location; break; case (UNW_ARM64_X17): - pX17 = (PTR_UIntNative)location; + regs->pX17 = (PTR_UIntNative)location; break; case (UNW_ARM64_X18): - pX18 = (PTR_UIntNative)location; + regs->pX18 = (PTR_UIntNative)location; break; case (UNW_ARM64_X19): - pX19 = (PTR_UIntNative)location; + regs->pX19 = (PTR_UIntNative)location; break; case (UNW_ARM64_X20): - pX20 = (PTR_UIntNative)location; + regs->pX20 = (PTR_UIntNative)location; break; case (UNW_ARM64_X21): - pX21 = (PTR_UIntNative)location; + regs->pX21 = (PTR_UIntNative)location; break; case (UNW_ARM64_X22): - pX22 = (PTR_UIntNative)location; + regs->pX22 = (PTR_UIntNative)location; break; case (UNW_ARM64_X23): - pX23 = (PTR_UIntNative)location; + regs->pX23 = (PTR_UIntNative)location; break; case (UNW_ARM64_X24): - pX24 = (PTR_UIntNative)location; + regs->pX24 = (PTR_UIntNative)location; break; case (UNW_ARM64_X25): - pX25 = (PTR_UIntNative)location; + regs->pX25 = (PTR_UIntNative)location; break; case (UNW_ARM64_X26): - pX26 = (PTR_UIntNative)location; + regs->pX26 = (PTR_UIntNative)location; break; case (UNW_ARM64_X27): - pX27 = (PTR_UIntNative)location; + regs->pX27 = (PTR_UIntNative)location; break; case (UNW_ARM64_X28): - pX28 = (PTR_UIntNative)location; + regs->pX28 = (PTR_UIntNative)location; break; default: PORTABILITY_ASSERT("unsupported arm64 register"); } } -libunwind::v128 Registers_REGDISPLAY::getVectorRegister(int num) const -{ - num -= UNW_ARM64_D8; - - if (num < 0 || num >= sizeof(D) / sizeof(UInt64)) - { - PORTABILITY_ASSERT("unsupported arm64 vector register"); - } - - libunwind::v128 result; - - result.vec[0] = 0; - result.vec[1] = 0; - result.vec[2] = D[num] >> 32; - result.vec[3] = D[num] & 0xFFFFFFFF; - - return result; -} - -void Registers_REGDISPLAY::setVectorRegister(int num, libunwind::v128 value) -{ - num -= UNW_ARM64_D8; - - if (num < 0 || num >= sizeof(D) / sizeof(UInt64)) - { - PORTABILITY_ASSERT("unsupported arm64 vector register"); - } - - D[num] = (UInt64)value.vec[2] << 32 | (UInt64)value.vec[3]; -} - #endif // TARGET_ARM64 bool DoTheStep(uintptr_t pc, UnwindInfoSections uwInfoSections, REGDISPLAY *regs) @@ -762,7 +707,7 @@ bool DoTheStep(uintptr_t pc, UnwindInfoSections uwInfoSections, REGDISPLAY *regs #elif defined(TARGET_ARM) libunwind::UnwindCursor uc(_addressSpace, regs); #elif defined(TARGET_ARM64) - libunwind::UnwindCursor uc(_addressSpace, regs); + libunwind::UnwindCursor uc(_addressSpace, regs); #elif defined(HOST_X86) libunwind::UnwindCursor uc(_addressSpace, regs); #else @@ -779,7 +724,10 @@ bool DoTheStep(uintptr_t pc, UnwindInfoSections uwInfoSections, REGDISPLAY *regs unw_proc_info_t procInfo; uc.getInfo(&procInfo); -#if defined(TARGET_ARM) +#if defined(TARGET_ARM64) + DwarfInstructions dwarfInst; + int stepRet = dwarfInst.stepWithDwarf(_addressSpace, pc, procInfo.unwind_info, *(Registers_arm64_rt*)regs); +#elif defined(TARGET_ARM) DwarfInstructions dwarfInst; int stepRet = dwarfInst.stepWithDwarf(_addressSpace, pc, procInfo.unwind_info, *(Registers_arm_rt*)regs); #else @@ -792,12 +740,7 @@ bool DoTheStep(uintptr_t pc, UnwindInfoSections uwInfoSections, REGDISPLAY *regs return false; } -#if defined(TARGET_ARM64) - regs->SetAddrOfIP(regs->pLR); -#else regs->pIP = PTR_PCODE(regs->SP - sizeof(TADDR)); -#endif - #elif defined(_LIBUNWIND_ARM_EHABI) uc.setInfoBasedOnIPRegister(true); int stepRet = uc.step(); diff --git a/src/Native/libunwind/src/DwarfInstructions.hpp b/src/Native/libunwind/src/DwarfInstructions.hpp index f341772824c..c5cc6c9d510 100644 --- a/src/Native/libunwind/src/DwarfInstructions.hpp +++ b/src/Native/libunwind/src/DwarfInstructions.hpp @@ -169,7 +169,6 @@ int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, // restore registers that DWARF says were saved R newRegisters = registers; pint_t returnAddress = 0; - pint_t returnAddressLocation = 0; const int lastReg = R::lastDwarfRegNum(); assert(static_cast(CFI_Parser::kMaxRegisterNumber) >= lastReg && "register range too large"); @@ -178,14 +177,7 @@ int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, for (int i = 0; i <= lastReg; ++i) { if (prolog.savedRegisters[i].location != CFI_Parser::kRegisterUnused) { - if (i == (int)cieInfo.returnAddressRegister) { - returnAddress = getSavedRegister(addressSpace, registers, cfa, - prolog.savedRegisters[i], - returnAddressLocation); - - newRegisters.setRegister(i, returnAddress, returnAddressLocation); - } - else if (registers.validFloatRegister(i)) + if (registers.validFloatRegister(i)) newRegisters.setFloatRegister( i, getSavedFloatRegister(addressSpace, registers, cfa, prolog.savedRegisters[i])); @@ -193,6 +185,12 @@ int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, newRegisters.setVectorRegister( i, getSavedVectorRegister(addressSpace, registers, cfa, prolog.savedRegisters[i])); + else if (i == (int)cieInfo.returnAddressRegister) { + pint_t dummyLocation; + returnAddress = getSavedRegister(addressSpace, registers, cfa, + prolog.savedRegisters[i], + dummyLocation); + } else if (registers.validRegister(i)) { pint_t value; pint_t location; @@ -274,7 +272,7 @@ int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, // Return address is address after call site instruction, so setting IP to // that does simualates a return. - newRegisters.setIP(returnAddress, returnAddressLocation); + newRegisters.setIP(returnAddress, 0); // Simulate the step by replacing the register set with the new ones. registers = newRegisters; From be9d9e62bac9e6cf54b06e444bbd34ea02bdac53 Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Thu, 20 Aug 2020 11:39:59 +0200 Subject: [PATCH 30/36] ARM64 review reworks --- .../src/Compiler/DependencyAnalysis/ObjectWriter.cs | 2 +- .../DependencyAnalysis/Target_ARM64/ARM64Emitter.cs | 10 ---------- .../Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs | 6 ++---- 3 files changed, 3 insertions(+), 15 deletions(-) diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectWriter.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectWriter.cs index 2a31cd0975c..5bf1ba8ed01 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectWriter.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectWriter.cs @@ -1040,7 +1040,7 @@ public static void EmitObject(string objectFilePath, IEnumerable // Build symbol definition map. objectWriter.BuildSymbolDefinitionMap(node, nodeContents.DefinedSymbols); - // The DWARF CFI unwind is implemented for AMD64 & ARM32 only. + // The DWARF CFI unwind is only implemented for some architectures. TargetArchitecture tarch = factory.Target.Architecture; if (!factory.Target.IsWindows && (tarch == TargetArchitecture.X64 || tarch == TargetArchitecture.ARM || tarch == TargetArchitecture.ARM64)) diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64Emitter.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64Emitter.cs index e7b8ea0e211..a91e32529d4 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64Emitter.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64Emitter.cs @@ -166,16 +166,6 @@ public void EmitINT3() Builder.EmitUInt(0b11010100_001_1111111111111111_000_0_0); } - public void EmitINT3(uint id) - { - Builder.EmitUInt(0b11010100_001_1111111111111111_000_0_0); - Builder.EmitUInt((uint)(0b0_00101_00000000000000000000000000u | ((uint)4))); - Builder.EmitUInt(0xdeadc0de); - Builder.EmitUInt(id); - Builder.EmitUInt(0xdeadc0de); - } - - public void EmitJmpToAddrMode(ref AddrMode addrMode) { throw new NotImplementedException(); diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs index 360e4719efb..9c29b36e5be 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs @@ -79,7 +79,7 @@ protected sealed override void EmitCode(NodeFactory factory, ref ARM64Emitter en { // We need to trigger the cctor before returning the base. It is stored at the beginning of the non-GC statics region. encoder.EmitMOV(encoder.TargetRegister.Arg3, encoder.TargetRegister.Arg0); - //encoder.EmitSUB(encoder.TargetRegister.Arg3, NonGCStaticsNode.GetClassConstructorContextStorageSize(factory.Target, target)); + encoder.EmitSUB(encoder.TargetRegister.Arg3, NonGCStaticsNode.GetClassConstructorContextStorageSize(factory.Target, target)); encoder.EmitLDR(encoder.TargetRegister.Arg2, encoder.TargetRegister.Arg3, (short)factory.Target.PointerSize); encoder.EmitCMP(encoder.TargetRegister.Arg2, 1); encoder.EmitRETIfEqual(); @@ -207,9 +207,7 @@ protected sealed override void EmitCode(NodeFactory factory, ref ARM64Emitter en break; default: - encoder.EmitINT3(); - Console.WriteLine("Misiing R2R for {0}", Id.ToString()); - break; + throw new NotImplementedException(); } } From bc488c3b97b8129365af64c71e0826ba39e2304d Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Thu, 20 Aug 2020 11:42:20 +0200 Subject: [PATCH 31/36] Arm64 (#7) * CoreRT-ARM64: Helper node asm code generation * CoreRT ARM64: Add additional relocation support * CoreRT ARM64: Use alternative functions for write Barriers On ARM the Jit uses a special register interface for to call the write barrier functions. But as the regular once still used in other places we need two different implemenations. The helper nodes need to call the custom variants * ARM64 code generation * unwind use only the dwarf index option * ARM64 runtime assembler functions * ARM64 unix stack walk * ARM64 Exception Handling * ARM64: Fix EH data decodeing * Move nattive layout signature from read only to data section on non Windows target * Fix exception handling macro call * ARM64 thunk helpers * ARM64 process cfi data for elf * ARM64 missing assembler macro * ARM64 port more assembler helpers * ARM64: Add missing case for the gemeric helper node * ARM64 intrinsic support * ARM64 object writer for ELF object files * ARM64 llvm patches for object writer * ARM64 include untested helper code with debug break * ARM64 reenable scanning fail exception * ARM64 formating * ARM64: revert using alternative JIT * Revert "ARM64 unix stack walk" * ARM64 review reworks --- .../src/Compiler/DependencyAnalysis/ObjectWriter.cs | 2 +- .../DependencyAnalysis/Target_ARM64/ARM64Emitter.cs | 10 ---------- .../Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs | 6 ++---- 3 files changed, 3 insertions(+), 15 deletions(-) diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectWriter.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectWriter.cs index 2a31cd0975c..5bf1ba8ed01 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectWriter.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectWriter.cs @@ -1040,7 +1040,7 @@ public static void EmitObject(string objectFilePath, IEnumerable // Build symbol definition map. objectWriter.BuildSymbolDefinitionMap(node, nodeContents.DefinedSymbols); - // The DWARF CFI unwind is implemented for AMD64 & ARM32 only. + // The DWARF CFI unwind is only implemented for some architectures. TargetArchitecture tarch = factory.Target.Architecture; if (!factory.Target.IsWindows && (tarch == TargetArchitecture.X64 || tarch == TargetArchitecture.ARM || tarch == TargetArchitecture.ARM64)) diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64Emitter.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64Emitter.cs index e7b8ea0e211..a91e32529d4 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64Emitter.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64Emitter.cs @@ -166,16 +166,6 @@ public void EmitINT3() Builder.EmitUInt(0b11010100_001_1111111111111111_000_0_0); } - public void EmitINT3(uint id) - { - Builder.EmitUInt(0b11010100_001_1111111111111111_000_0_0); - Builder.EmitUInt((uint)(0b0_00101_00000000000000000000000000u | ((uint)4))); - Builder.EmitUInt(0xdeadc0de); - Builder.EmitUInt(id); - Builder.EmitUInt(0xdeadc0de); - } - - public void EmitJmpToAddrMode(ref AddrMode addrMode) { throw new NotImplementedException(); diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs index 360e4719efb..9c29b36e5be 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs @@ -79,7 +79,7 @@ protected sealed override void EmitCode(NodeFactory factory, ref ARM64Emitter en { // We need to trigger the cctor before returning the base. It is stored at the beginning of the non-GC statics region. encoder.EmitMOV(encoder.TargetRegister.Arg3, encoder.TargetRegister.Arg0); - //encoder.EmitSUB(encoder.TargetRegister.Arg3, NonGCStaticsNode.GetClassConstructorContextStorageSize(factory.Target, target)); + encoder.EmitSUB(encoder.TargetRegister.Arg3, NonGCStaticsNode.GetClassConstructorContextStorageSize(factory.Target, target)); encoder.EmitLDR(encoder.TargetRegister.Arg2, encoder.TargetRegister.Arg3, (short)factory.Target.PointerSize); encoder.EmitCMP(encoder.TargetRegister.Arg2, 1); encoder.EmitRETIfEqual(); @@ -207,9 +207,7 @@ protected sealed override void EmitCode(NodeFactory factory, ref ARM64Emitter en break; default: - encoder.EmitINT3(); - Console.WriteLine("Misiing R2R for {0}", Id.ToString()); - break; + throw new NotImplementedException(); } } From d9d12f7e8efba87f982f4d61a67666707c440df7 Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Mon, 24 Aug 2020 09:45:42 +0200 Subject: [PATCH 32/36] ARM64 Fix wrong function call for invoke return --- src/Native/Runtime/arm64/PInvoke.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Native/Runtime/arm64/PInvoke.S b/src/Native/Runtime/arm64/PInvoke.S index 508127601fb..8b8dd2100a8 100644 --- a/src/Native/Runtime/arm64/PInvoke.S +++ b/src/Native/Runtime/arm64/PInvoke.S @@ -350,6 +350,6 @@ LEAF_ENTRY RhpPInvokeReturn, _TEXT ret 0: // passing transition frame pointer in x0 - b RhpWaitForGC + b C_FUNC(RhpWaitForGC2) LEAF_END RhpPInvokeReturn, _TEXT From 2bc790b3d0558d280fda80e03cc2ebe7fbe19230 Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Tue, 25 Aug 2020 08:38:38 +0200 Subject: [PATCH 33/36] ARM64: Ensure module fixup node data is aligned Data is updated with a compare exchange operation. Therefore it needs to be aligned or it will trigger a data exception on ARM64 CPUs --- .../src/Compiler/DependencyAnalysis/PInvokeModuleFixupNode.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/PInvokeModuleFixupNode.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/PInvokeModuleFixupNode.cs index 82374f8faf4..198649fe616 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/PInvokeModuleFixupNode.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/PInvokeModuleFixupNode.cs @@ -41,6 +41,8 @@ public void AppendMangledName(NameMangler nameMangler, Utf8StringBuilder sb) public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false) { ObjectDataBuilder builder = new ObjectDataBuilder(factory, relocsOnly); + builder.RequireInitialPointerAlignment(); + builder.AddSymbol(this); ISymbolNode nameSymbol = factory.ConstantUtf8String(_pInvokeModuleData.ModuleName); From 65f6dff90552f876e4a90fa2d01bd9fc2a5d49bf Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Thu, 27 Aug 2020 08:30:33 +0200 Subject: [PATCH 34/36] Arm64: review reworks (#10) --- src/JitInterface/src/CorInfoImpl.cs | 5 +- src/Native/ObjWriter/objwriter.cpp | 7 +-- src/Native/Runtime/arm64/AllocFast.S | 2 - src/Native/Runtime/arm64/AsmMacros.h | 9 ++++ src/Native/Runtime/arm64/ExceptionHandling.S | 4 +- src/Native/Runtime/arm64/Interlocked.S | 3 ++ src/Native/Runtime/arm64/PInvoke.S | 49 +++---------------- src/Native/Runtime/arm64/StubDispatch.S | 2 - src/Native/Runtime/arm64/WriteBarriers.S | 36 +++++++------- src/Native/Runtime/arm64/WriteBarriers.asm | 2 + .../Runtime/unix/unixasmmacrosarm64.inc | 20 ++++++-- src/Native/libunwind/src/UnwindCursor.hpp | 2 +- 12 files changed, 67 insertions(+), 74 deletions(-) diff --git a/src/JitInterface/src/CorInfoImpl.cs b/src/JitInterface/src/CorInfoImpl.cs index cb822538581..705947f5022 100644 --- a/src/JitInterface/src/CorInfoImpl.cs +++ b/src/JitInterface/src/CorInfoImpl.cs @@ -2708,6 +2708,10 @@ private void allocUnwindInfo(byte* pHotCode, byte* pColdCode, uint startOffset, _frameInfos[_usedFrameInfos++] = new FrameInfo(flags, (int)startOffset, (int)endOffset, blobData); } + // Get the CFI data in the same shape as clang/LLVM generated one. This improves the compatibility with libunwind and other unwind solutions + // - Combine in one single block for the whole prolog instead of one CFI block per assembler instruction + // - Store CFA definition first + // - Store all used registers in ascending order private byte[] CompressARM64CFI(byte[] blobData) { if (blobData == null || blobData.Length == 0) @@ -2818,7 +2822,6 @@ private byte[] CompressARM64CFI(byte[] blobData) cfiWriter.Write((byte)CFI_OPCODE.CFI_DEF_CFA); cfiWriter.Write((short)31); cfiWriter.Write(spOffset); - //storeOffset = -spOffset; } } diff --git a/src/Native/ObjWriter/objwriter.cpp b/src/Native/ObjWriter/objwriter.cpp index a95a781ab64..03875f595f9 100644 --- a/src/Native/ObjWriter/objwriter.cpp +++ b/src/Native/ObjWriter/objwriter.cpp @@ -310,10 +310,11 @@ void ObjectWriter::SetCodeSectionAttribute(const char *SectionName, } void ObjectWriter::EmitAlignment(int ByteAlignment) { - int64_t fillValue = 0x90; //x86 nop + int64_t fillValue = 0; - if (TMachine->getTargetTriple().getArch() == llvm::Triple::ArchType::aarch64) { - fillValue = 0; // ARM64 bad + if (TMachine->getTargetTriple().getArch() == llvm::Triple::ArchType::x86 || + TMachine->getTargetTriple().getArch() == llvm::Triple::ArchType::x86_64) { + fillValue = 0x90; // x86 nop } Streamer->EmitValueToAlignment(ByteAlignment, fillValue); diff --git a/src/Native/Runtime/arm64/AllocFast.S b/src/Native/Runtime/arm64/AllocFast.S index 07e68455950..82fdeec45d8 100644 --- a/src/Native/Runtime/arm64/AllocFast.S +++ b/src/Native/Runtime/arm64/AllocFast.S @@ -6,8 +6,6 @@ // GC type flags GC_ALLOC_FINALIZE = 1 -GC_ALLOC_ALIGN8_BIAS = 4 -GC_ALLOC_ALIGN8 = 8 // // Rename fields of nested structs diff --git a/src/Native/Runtime/arm64/AsmMacros.h b/src/Native/Runtime/arm64/AsmMacros.h index 950d8befc6a..fc4e84503ac 100644 --- a/src/Native/Runtime/arm64/AsmMacros.h +++ b/src/Native/Runtime/arm64/AsmMacros.h @@ -247,6 +247,15 @@ __SECTIONREL_tls_CurrentThread SETS "$__SECTIONREL_tls_CurrentThread":CC:"_" 0 MEND +;; ---------------------------------------------------------------------------- - +;; +;; Macro to add a memory barrier. Equal to __sync_synchronize(). +;; + + MACRO ArmInterlockedOperationBarrier + dmb ish + MEND + ;; ----------------------------------------------------------------------------- ;; ;; Macro used from unmanaged helpers called from managed code where the helper does not transition immediately diff --git a/src/Native/Runtime/arm64/ExceptionHandling.S b/src/Native/Runtime/arm64/ExceptionHandling.S index 1dc4c83cbc0..660820cde15 100644 --- a/src/Native/Runtime/arm64/ExceptionHandling.S +++ b/src/Native/Runtime/arm64/ExceptionHandling.S @@ -465,8 +465,8 @@ PopExInfoLoop: DonePopping: str x3, [x1, #OFFSETOF__Thread__m_pExInfoStackHead] // store the new head on the Thread - adrp x3, RhpTrapThreads - add x3, x3, :lo12:RhpTrapThreads + PREPARE_EXTERNAL_VAR RhpTrapThreads, x3 + ldr w3, [x3] tbz x3, #TrapThreadsFlags_AbortInProgress_Bit, NoAbort diff --git a/src/Native/Runtime/arm64/Interlocked.S b/src/Native/Runtime/arm64/Interlocked.S index 755b5fd3d30..a1cd55d79be 100644 --- a/src/Native/Runtime/arm64/Interlocked.S +++ b/src/Native/Runtime/arm64/Interlocked.S @@ -18,7 +18,9 @@ ALTERNATE_ENTRY RhpLockCmpXchg32AVLocation bne 2f // if (w0 != w2) goto exit stlxr w9, w1, [x8] // if (w0 == w2) { try *x8 = w1 and goto loop if failed or goto exit } cbnz w9, 1b + 2: // exit + ArmInterlockedOperationBarrier ret LEAF_END RhpLockCmpXchg32, _TEXT @@ -38,5 +40,6 @@ ALTERNATE_ENTRY RhpLockCmpXchg64AVLocation stlxr w9, x1, [x8] // if (x0 == x2) { try *x8 = x1 and goto loop if failed or goto exit } cbnz w9, 1b 2: // exit + ArmInterlockedOperationBarrier ret LEAF_END RhpLockCmpXchg64, _TEXT diff --git a/src/Native/Runtime/arm64/PInvoke.S b/src/Native/Runtime/arm64/PInvoke.S index 8b8dd2100a8..d3a2accd33d 100644 --- a/src/Native/Runtime/arm64/PInvoke.S +++ b/src/Native/Runtime/arm64/PInvoke.S @@ -8,40 +8,7 @@ .global RhpTrapThreads // Note: these must match the defs in PInvokeTransitionFrameFlags defined in rhbinder.h -PTFF_SAVE_X19 = 0x00000001 -PTFF_SAVE_X20 = 0x00000002 -PTFF_SAVE_X21 = 0x00000004 -PTFF_SAVE_X22 = 0x00000008 -PTFF_SAVE_X23 = 0x00000010 -PTFF_SAVE_X24 = 0x00000020 -PTFF_SAVE_X25 = 0x00000040 -PTFF_SAVE_X26 = 0x00000080 -PTFF_SAVE_X27 = 0x00000100 -PTFF_SAVE_X28 = 0x00000200 PTFF_SAVE_SP = 0x00000400 -PTFF_SAVE_ALL_PRESERVED = 0x000003FF // NOTE: x19-x28 -PTFF_SAVE_X0 = 0x00000800 -PTFF_SAVE_X1 = 0x00001000 -PTFF_SAVE_X2 = 0x00002000 -PTFF_SAVE_X3 = 0x00004000 -PTFF_SAVE_X4 = 0x00008000 -PTFF_SAVE_X5 = 0x00010000 -PTFF_SAVE_X6 = 0x00020000 -PTFF_SAVE_X7 = 0x00040000 -PTFF_SAVE_X8 = 0x00080000 -PTFF_SAVE_X9 = 0x00100000 -PTFF_SAVE_X10 = 0x00200000 -PTFF_SAVE_X11 = 0x00400000 -PTFF_SAVE_X12 = 0x00800000 -PTFF_SAVE_X13 = 0x01000000 -PTFF_SAVE_X14 = 0x02000000 -PTFF_SAVE_X15 = 0x04000000 -PTFF_SAVE_X16 = 0x08000000 -PTFF_SAVE_X17 = 0x10000000 -PTFF_SAVE_X18 = 0x20000000 -PTFF_SAVE_ALL_SCRATCH = 0x3FFFF800 // NOTE: X0-X18 -PTFF_SAVE_FP = 0x40000000 -PTFF_SAVE_LR = 0x80000000 // Bit position for the flags above, to be used with tbz / tbnz instructions PTFF_THREAD_ABORT_BIT = 36 @@ -151,8 +118,8 @@ Done: PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-0x10 - adrp x10, RhpTrapThreads - add x10, x10, :lo12:RhpTrapThreads + PREPARE_EXTERNAL_VAR RhpTrapThreads, x10 + ldr w10, [x10] tbz x10, #TrapThreadsFlags_TrapThreads_Bit, NoWait bl RhpWaitForGCNoAbort @@ -214,8 +181,8 @@ ThreadAttached: str xzr, [x10, #OFFSETOF__Thread__m_pTransitionFrame] dmb ish - adrp x11, RhpTrapThreads - add x11, x11, :lo12:RhpTrapThreads + PREPARE_EXTERNAL_VAR RhpTrapThreads, x11 + ldr w11, [x11] tbnz x11, #TrapThreadsFlags_TrapThreads_Bit, TrapThread @@ -327,8 +294,8 @@ NESTED_ENTRY RhpPInvoke, _TEXT, NoHandler str x10, [x0, #OFFSETOF__PInvokeTransitionFrame__m_pThread] str x0, [x10, #OFFSETOF__Thread__m_pTransitionFrame] - adrp x9, RhpTrapThreads - add x9, x9, :lo12:RhpTrapThreads + PREPARE_EXTERNAL_VAR RhpTrapThreads, x9 + ldr w9, [x9] cbnz w9, InvokeRareTrapThread // TrapThreadsFlags_None = 0 ret @@ -343,8 +310,8 @@ LEAF_ENTRY RhpPInvokeReturn, _TEXT mov x10, 0 str x10, [x9, #OFFSETOF__Thread__m_pTransitionFrame] - adrp x9, RhpTrapThreads - add x9, x9, :lo12:RhpTrapThreads + PREPARE_EXTERNAL_VAR RhpTrapThreads, x9 + ldr w9, [x9] cbnz w9, 0f // TrapThreadsFlags_None = 0 ret diff --git a/src/Native/Runtime/arm64/StubDispatch.S b/src/Native/Runtime/arm64/StubDispatch.S index 25aae897289..dd820f1fc81 100644 --- a/src/Native/Runtime/arm64/StubDispatch.S +++ b/src/Native/Runtime/arm64/StubDispatch.S @@ -4,8 +4,6 @@ #include #include "AsmOffsets.inc" -#define __tls_array 0 - #ifdef FEATURE_CACHED_INTERFACE_DISPATCH .extern RhpCidResolve diff --git a/src/Native/Runtime/arm64/WriteBarriers.S b/src/Native/Runtime/arm64/WriteBarriers.S index a1e3c103673..15f6a12cb7b 100644 --- a/src/Native/Runtime/arm64/WriteBarriers.S +++ b/src/Native/Runtime/arm64/WriteBarriers.S @@ -189,18 +189,14 @@ INVALIDGCVALUE EQU 0xCCCCCCCD // On exit: // x1 : trashed // x9 : trashed - LEAF_ENTRY RhpCheckedAssignRef, _TEXT - ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation + LEAF_ENTRY RhpCheckedAssignRef, _TEXT ALTERNATE_ENTRY RhpCheckedAssignRefX1 - ALTERNATE_ENTRY RhpCheckedAssignRefX1AVLocation - stlr x1, [x0] - - INSERT_CHECKED_WRITE_BARRIER_CORE x0, x1, 9, x1 - - ret + mov x14, x0 ; x14 = dst + mov x15, x1 ; x15 = val + b RhpCheckedAssignRefArm64 - LEAF_END RhpCheckedAssignRef, _TEXT +LEAF_END RhpCheckedAssignRef, _TEXT // RhpAssignRef(Object** dst, Object* src) // @@ -213,18 +209,14 @@ INVALIDGCVALUE EQU 0xCCCCCCCD // On exit: // x1 : trashed // x9 : trashed - LEAF_ENTRY RhpAssignRef, _TEXT - ALTERNATE_ENTRY RhpAssignRefAVLocation +LEAF_ENTRY RhpAssignRef, _TEXT ALTERNATE_ENTRY RhpAssignRefX1 - ALTERNATE_ENTRY RhpAssignRefX1AVLocation - stlr x1, [x0] - - INSERT_UNCHECKED_WRITE_BARRIER_CORE x0, x1, 9, x1 - - ret + mov x14, x0 ; x14 = dst + mov x15, x1 ; x15 = val + b RhpAssignRefArm64 - LEAF_END RhpAssignRef, _TEXT +LEAF_END RhpAssignRef, _TEXT // Interlocked operation helpers where the location is an objectref, thus requiring a GC write barrier upon // successful updates. @@ -260,7 +252,7 @@ CmpXchgRetry: stlxr w9, x1, [x0] cbnz w9, CmpXchgRetry - // We have successfully updated the value of the objectref so now we need a GC write barrier. + // We have successfully updated the value of the objectref so now we need a GC write barrier. // The following barrier code takes the destination in x0 and the value in x1 so the arguments are // already correctly set up. @@ -269,6 +261,7 @@ CmpXchgRetry: CmpXchgNoUpdate: // x10 still contains the original value. mov x0, x10 + ArmInterlockedOperationBarrier ret lr LEAF_END RhpCheckedLockCmpXchg, _TEXT @@ -309,11 +302,14 @@ ExchangeRetry: // x10 still contains the original value. mov x0, x10 + ArmInterlockedOperationBarrier ret LEAF_END RhpCheckedXchg, _TEXT LEAF_ENTRY RhpAssignRefArm64, _TEXT + ALTERNATE_ENTRY RhpAssignRefAVLocation + ALTERNATE_ENTRY RhpAssignRefX1AVLocation stlr x15, [x14] INSERT_UNCHECKED_WRITE_BARRIER_CORE x14, x15, 12, X14 @@ -333,6 +329,8 @@ LEAF_END RhpAssignRefArm64, _TEXT // x17 : trashed (ip1) if FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP // LEAF_ENTRY RhpCheckedAssignRefArm64, _TEXT + ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation + ALTERNATE_ENTRY RhpCheckedAssignRefX1AVLocation stlr x15, [x14] diff --git a/src/Native/Runtime/arm64/WriteBarriers.asm b/src/Native/Runtime/arm64/WriteBarriers.asm index 204c79d00c4..0e331cb8442 100644 --- a/src/Native/Runtime/arm64/WriteBarriers.asm +++ b/src/Native/Runtime/arm64/WriteBarriers.asm @@ -271,6 +271,7 @@ CmpXchgRetry CmpXchgNoUpdate ;; x10 still contains the original value. mov x0, x10 + ArmInterlockedOperationBarrier ret lr LEAF_END RhpCheckedLockCmpXchg @@ -311,6 +312,7 @@ ExchangeRetry ;; x10 still contains the original value. mov x0, x10 + ArmInterlockedOperationBarrier ret LEAF_END RhpCheckedXchg diff --git a/src/Native/Runtime/unix/unixasmmacrosarm64.inc b/src/Native/Runtime/unix/unixasmmacrosarm64.inc index 3e3bbdbba86..52f3e0e6f6c 100644 --- a/src/Native/Runtime/unix/unixasmmacrosarm64.inc +++ b/src/Native/Runtime/unix/unixasmmacrosarm64.inc @@ -24,6 +24,11 @@ C_FUNC(\Name): C_FUNC(\Name): .endm +.macro LABELED_RETURN_ADDRESS Name + .global C_FUNC(\Name) +C_FUNC(\Name): +.endm + .macro LEAF_ENTRY Name, Section .global C_FUNC(\Name) .type \Name, %function @@ -37,8 +42,9 @@ C_FUNC(\Name): .endm .macro PREPARE_EXTERNAL_VAR Name, HelperReg - ldr \HelperReg, [pc, #C_FUNC(\Name)@GOTPCREL] -.endm + adrp \HelperReg, C_FUNC(\Name) + add \HelperReg, \HelperReg, :lo12:C_FUNC(\Name) +.endm .macro PROLOG_STACK_ALLOC Size sub sp, sp, \Size @@ -152,7 +158,7 @@ C_FUNC(\Name): .macro INLINE_GET_TLS_VAR target, var mrs \target, tpidr_el0 add \target, \target, #:tprel_hi12:\var, lsl #12 - add \target, \target, #:tprel_lo12_nc:\var + add \target, \target, #:tprel_lo12_nc:\var .endm @@ -164,6 +170,10 @@ C_FUNC(\Name): INLINE_GET_TLS_VAR \target, tls_CurrentThread .endm +.macro ArmInterlockedOperationBarrier + dmb ish +.endm + .macro INLINE_THREAD_UNHIJACK threadReg, trashReg1, trashReg2 // // Thread::Unhijack() @@ -232,12 +242,16 @@ DEFAULT_FRAME_SAVE_FLAGS = PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_SP EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #0x80 .endm +// Bit position for the flags above, to be used with tbz / tbnz instructions +PTFF_THREAD_ABORT_BIT = 36 + // // CONSTANTS -- INTEGER // #define TSF_Attached 0x01 #define TSF_SuppressGcStress 0x08 #define TSF_DoNotTriggerGc 0x10 +#define TSF_SuppressGcStress__OR__TSF_DoNotTriggerGC 0x18 // Bit position for the flags above, to be used with tbz / tbnz instructions TrapThreadsFlags_AbortInProgress_Bit = 0 diff --git a/src/Native/libunwind/src/UnwindCursor.hpp b/src/Native/libunwind/src/UnwindCursor.hpp index 29c3dc7733a..3a6c5131c93 100644 --- a/src/Native/libunwind/src/UnwindCursor.hpp +++ b/src/Native/libunwind/src/UnwindCursor.hpp @@ -1904,7 +1904,7 @@ void UnwindCursor::setInfoBasedOnIPRegister(bool isReturnAddress) { #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) // If there is dwarf unwind info, look there next. #if defined(_LIBUNWIND_USE_ONLY_DWARF_INDEX) - if (sects.dwarf_index_section != 0) { + if (sects.dwarf_index_section != 0) { #else if (sects.dwarf_section != 0) { #endif From ca7d15a7ec6509901f3510851bd6f2ccf2cb187f Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Thu, 27 Aug 2020 16:14:17 +0200 Subject: [PATCH 35/36] Arm64 (#11) * ARM64: remove left over code --- src/Native/ObjWriter/objwriter.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Native/ObjWriter/objwriter.cpp b/src/Native/ObjWriter/objwriter.cpp index 03875f595f9..bd00b7bd1a3 100644 --- a/src/Native/ObjWriter/objwriter.cpp +++ b/src/Native/ObjWriter/objwriter.cpp @@ -359,10 +359,7 @@ void ObjectWriter::EmitSymbolDef(const char *SymbolName, bool global) { } } - if (Sym->isUndefined()) - { - Streamer->EmitLabel(Sym); - } + Streamer->EmitLabel(Sym); } const MCSymbolRefExpr * From a83496ee4a4bba307235068d67b7a131f38daca9 Mon Sep 17 00:00:00 2001 From: Jan Kotas Date: Thu, 27 Aug 2020 08:07:51 -0700 Subject: [PATCH 36/36] Update src/Native/Runtime/arm64/WriteBarriers.S --- src/Native/Runtime/arm64/WriteBarriers.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Native/Runtime/arm64/WriteBarriers.S b/src/Native/Runtime/arm64/WriteBarriers.S index 15f6a12cb7b..ec09e7524c0 100644 --- a/src/Native/Runtime/arm64/WriteBarriers.S +++ b/src/Native/Runtime/arm64/WriteBarriers.S @@ -252,7 +252,7 @@ CmpXchgRetry: stlxr w9, x1, [x0] cbnz w9, CmpXchgRetry - // We have successfully updated the value of the objectref so now we need a GC write barrier. + // We have successfully updated the value of the objectref so now we need a GC write barrier. // The following barrier code takes the destination in x0 and the value in x1 so the arguments are // already correctly set up.