From f1341643325d819db52f3fc6e9df185586f9324a Mon Sep 17 00:00:00 2001 From: Samir Jindel Date: Thu, 19 Dec 2019 16:41:24 +0000 Subject: [PATCH] [vm] Enable multiple entry-points on ARM64. Adresses https://github.com/dart-lang/sdk/issues/34162 Change-Id: I7126f8c9b470041aaa260255293327f67d64d1bc Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/128063 Commit-Queue: Samir Jindel Reviewed-by: Martin Kustermann --- runtime/tests/vm/vm.status | 2 +- .../vm/compiler/assembler/assembler_arm64.cc | 25 +++---- .../vm/compiler/assembler/assembler_arm64.h | 20 +++--- .../backend/flow_graph_compiler_arm64.cc | 27 ++++---- runtime/vm/compiler/backend/il_arm64.cc | 3 +- .../vm/compiler/stub_code_compiler_arm64.cc | 66 +++++++++++++++---- runtime/vm/object.cc | 5 +- 7 files changed, 99 insertions(+), 49 deletions(-) diff --git a/runtime/tests/vm/vm.status b/runtime/tests/vm/vm.status index 5aa43f5e297a..2fe73c99f2ac 100644 --- a/runtime/tests/vm/vm.status +++ b/runtime/tests/vm/vm.status @@ -225,7 +225,7 @@ cc/Service_Profile: SkipByDesign [ $compiler == dartkb ] dart/generic_field_invocation_test: SkipByDesign # KBC interpreters do not support --no_lazy_dispatchers -[ $builder_tag == bytecode_interpreter || $hot_reload || $hot_reload_rollback || $arch != arm && $arch != simarm && $arch != x64 || $compiler != dartk && $compiler != dartkp && $compiler != dartkb ] +[ $builder_tag == bytecode_interpreter || $hot_reload || $hot_reload_rollback || $arch != arm && $arch != simarm && $arch != x64 && $arch != simarm64 && $arch != arm64 || $compiler != dartk && $compiler != dartkp && $compiler != dartkb ] dart/entrypoints/*: SkipByDesign # These tests are for compiler optimizations and very sensitive to when functions are optimized, so they are disabled on hotreload and optcounter bots. [ $builder_tag == crossword || $builder_tag == crossword_ast || $compiler != dartkp || $system != linux && $system != macos && $system != windows ] diff --git a/runtime/vm/compiler/assembler/assembler_arm64.cc b/runtime/vm/compiler/assembler/assembler_arm64.cc index 86991ee5b887..721a93141ca1 100644 --- a/runtime/vm/compiler/assembler/assembler_arm64.cc +++ b/runtime/vm/compiler/assembler/assembler_arm64.cc @@ -619,16 +619,14 @@ void Assembler::Branch(const Code& target, br(TMP); } -void Assembler::BranchPatchable(const Code& code) { - Branch(code, PP, ObjectPoolBuilderEntry::kPatchable); -} - void Assembler::BranchLink(const Code& target, - ObjectPoolBuilderEntry::Patchability patchable) { + ObjectPoolBuilderEntry::Patchability patchable, + CodeEntryKind entry_kind) { const int32_t offset = target::ObjectPool::element_offset( object_pool_builder().FindObject(ToObject(target), patchable)); LoadWordFromPoolOffset(CODE_REG, offset); - ldr(TMP, FieldAddress(CODE_REG, target::Code::entry_point_offset())); + ldr(TMP, + FieldAddress(CODE_REG, target::Code::entry_point_offset(entry_kind))); blr(TMP); } @@ -638,11 +636,13 @@ void Assembler::BranchLinkToRuntime() { } void Assembler::BranchLinkWithEquivalence(const Code& target, - const Object& equivalence) { + const Object& equivalence, + CodeEntryKind entry_kind) { const int32_t offset = target::ObjectPool::element_offset( object_pool_builder().FindObject(ToObject(target), equivalence)); LoadWordFromPoolOffset(CODE_REG, offset); - ldr(TMP, FieldAddress(CODE_REG, target::Code::entry_point_offset())); + ldr(TMP, + FieldAddress(CODE_REG, target::Code::entry_point_offset(entry_kind))); blr(TMP); } @@ -1514,10 +1514,10 @@ void Assembler::LeaveStubFrame() { // R0 receiver, R5 ICData entries array // Preserve R4 (ARGS_DESC_REG), not required today, but maybe later. void Assembler::MonomorphicCheckedEntryJIT() { - ASSERT(has_single_entry_point_); has_single_entry_point_ = false; const bool saved_use_far_branches = use_far_branches(); set_use_far_branches(false); + const intptr_t start = CodeSize(); Label immediate, miss; Bind(&miss); @@ -1525,7 +1525,8 @@ void Assembler::MonomorphicCheckedEntryJIT() { br(IP0); Comment("MonomorphicCheckedEntry"); - ASSERT(CodeSize() == target::Instructions::kMonomorphicEntryOffsetJIT); + ASSERT(CodeSize() - start == + target::Instructions::kMonomorphicEntryOffsetJIT); const intptr_t cid_offset = target::Array::element_offset(0); const intptr_t count_offset = target::Array::element_offset(1); @@ -1541,7 +1542,8 @@ void Assembler::MonomorphicCheckedEntryJIT() { LoadImmediate(R4, 0); // GC-safe for OptimizeInvokedFunction. // Fall through to unchecked entry. - ASSERT(CodeSize() == target::Instructions::kPolymorphicEntryOffsetJIT); + ASSERT(CodeSize() - start == + target::Instructions::kPolymorphicEntryOffsetJIT); set_use_far_branches(saved_use_far_branches); } @@ -1549,7 +1551,6 @@ void Assembler::MonomorphicCheckedEntryJIT() { // R0 receiver, R5 guarded cid as Smi. // Preserve R4 (ARGS_DESC_REG), not required today, but maybe later. void Assembler::MonomorphicCheckedEntryAOT() { - ASSERT(has_single_entry_point_); has_single_entry_point_ = false; bool saved_use_far_branches = use_far_branches(); set_use_far_branches(false); diff --git a/runtime/vm/compiler/assembler/assembler_arm64.h b/runtime/vm/compiler/assembler/assembler_arm64.h index 33212a3ea3ad..0493e0809ac8 100644 --- a/runtime/vm/compiler/assembler/assembler_arm64.h +++ b/runtime/vm/compiler/assembler/assembler_arm64.h @@ -1371,14 +1371,15 @@ class Assembler : public AssemblerBase { Register pp, ObjectPoolBuilderEntry::Patchability patchable = ObjectPoolBuilderEntry::kNotPatchable); - void BranchPatchable(const Code& code); void BranchLink(const Code& code, ObjectPoolBuilderEntry::Patchability patchable = - ObjectPoolBuilderEntry::kNotPatchable); + ObjectPoolBuilderEntry::kNotPatchable, + CodeEntryKind entry_kind = CodeEntryKind::kNormal); - void BranchLinkPatchable(const Code& code) { - BranchLink(code, ObjectPoolBuilderEntry::kPatchable); + void BranchLinkPatchable(const Code& code, + CodeEntryKind entry_kind = CodeEntryKind::kNormal) { + BranchLink(code, ObjectPoolBuilderEntry::kPatchable, entry_kind); } void BranchLinkToRuntime(); @@ -1388,7 +1389,10 @@ class Assembler : public AssemblerBase { // Emit a call that shares its object pool entries with other calls // that have the same equivalence marker. - void BranchLinkWithEquivalence(const Code& code, const Object& equivalence); + void BranchLinkWithEquivalence( + const Code& code, + const Object& equivalence, + CodeEntryKind entry_kind = CodeEntryKind::kNormal); void AddImmediate(Register dest, int64_t imm) { AddImmediate(dest, dest, imm); @@ -1675,9 +1679,9 @@ class Assembler : public AssemblerBase { // Returns object data offset for address calculation; for heap objects also // accounts for the tag. static int32_t HeapDataOffset(bool is_external, intptr_t cid) { - return is_external ? - 0 : - (target::Instance::DataOffsetFor(cid) - kHeapObjectTag); + return is_external + ? 0 + : (target::Instance::DataOffsetFor(cid) - kHeapObjectTag); } static int32_t EncodeImm26BranchOffset(int64_t imm, int32_t instr) { diff --git a/runtime/vm/compiler/backend/flow_graph_compiler_arm64.cc b/runtime/vm/compiler/backend/flow_graph_compiler_arm64.cc index 3d23b399c81a..c490e72e1caa 100644 --- a/runtime/vm/compiler/backend/flow_graph_compiler_arm64.cc +++ b/runtime/vm/compiler/backend/flow_graph_compiler_arm64.cc @@ -967,8 +967,7 @@ void FlowGraphCompiler::GenerateDartCall(intptr_t deopt_id, RawPcDescriptors::Kind kind, LocationSummary* locs, Code::EntryKind entry_kind) { - // TODO(sjindel/entrypoints): Support multiple entrypoints on ARM64. - __ BranchLinkPatchable(stub); + __ BranchLinkPatchable(stub, entry_kind); EmitCallsiteMetadata(token_pos, deopt_id, kind, locs); } @@ -978,7 +977,6 @@ void FlowGraphCompiler::GenerateStaticDartCall(intptr_t deopt_id, LocationSummary* locs, const Function& target, Code::EntryKind entry_kind) { - // TODO(sjindel/entrypoints): Support multiple entrypoints on ARM64. if (FLAG_precompiled_mode && FLAG_use_bare_instructions) { AddPcRelativeCallTarget(target, entry_kind); __ GenerateUnRelocatedPcRelativeCall(); @@ -990,7 +988,7 @@ void FlowGraphCompiler::GenerateStaticDartCall(intptr_t deopt_id, // instead. ASSERT(is_optimizing()); const auto& stub = StubCode::CallStaticFunction(); - __ BranchLinkWithEquivalence(stub, target); + __ BranchLinkWithEquivalence(stub, target, entry_kind); EmitCallsiteMetadata(token_pos, deopt_id, kind, locs); AddStaticCallTarget(target, entry_kind); } @@ -1026,7 +1024,6 @@ void FlowGraphCompiler::EmitOptimizedInstanceCall(const Code& stub, TokenPosition token_pos, LocationSummary* locs, Code::EntryKind entry_kind) { - // TODO(sjindel/entrypoints): Support multiple entrypoints on ARM64. ASSERT(Array::Handle(zone(), ic_data.arguments_descriptor()).Length() > 0); // Each ICData propagated from unoptimized to optimized code contains the // function that corresponds to the Dart function of that IC call. Due @@ -1038,7 +1035,8 @@ void FlowGraphCompiler::EmitOptimizedInstanceCall(const Code& stub, __ LoadObject(R6, parsed_function().function()); __ LoadFromOffset(R0, SP, (ic_data.CountWithoutTypeArgs() - 1) * kWordSize); __ LoadUniqueObject(R5, ic_data); - GenerateDartCall(deopt_id, token_pos, stub, RawPcDescriptors::kIcCall, locs); + GenerateDartCall(deopt_id, token_pos, stub, RawPcDescriptors::kIcCall, locs, + entry_kind); __ Drop(ic_data.CountWithTypeArgs()); } @@ -1122,7 +1120,6 @@ void FlowGraphCompiler::EmitInstanceCallAOT(const ICData& ic_data, TokenPosition token_pos, LocationSummary* locs, Code::EntryKind entry_kind) { - // TODO(34162): Support multiple entry-points on ARM64. ASSERT(ic_data.NumArgsTested() == 1); const Code& initial_stub = StubCode::UnlinkedCall(); const UnlinkedCall& data = @@ -1147,9 +1144,13 @@ void FlowGraphCompiler::EmitInstanceCallAOT(const ICData& ic_data, } else { __ LoadDoubleWordFromPoolOffset(R5, CODE_REG, ObjectPool::element_offset(data_index)); - __ ldr(LR, compiler::FieldAddress( - CODE_REG, - Code::entry_point_offset(Code::EntryKind::kMonomorphic))); + const intptr_t entry_point_offset = + entry_kind == Code::EntryKind::kNormal + ? compiler::target::Code::entry_point_offset( + Code::EntryKind::kMonomorphic) + : compiler::target::Code::entry_point_offset( + Code::EntryKind::kMonomorphicUnchecked); + __ ldr(LR, compiler::FieldAddress(CODE_REG, entry_point_offset)); } __ blr(LR); @@ -1164,12 +1165,11 @@ void FlowGraphCompiler::EmitUnoptimizedStaticCall(intptr_t count_with_type_args, LocationSummary* locs, const ICData& ic_data, Code::EntryKind entry_kind) { - // TODO(34162): Support multiple entry-points on ARM64. const Code& stub = StubCode::UnoptimizedStaticCallEntry(ic_data.NumArgsTested()); __ LoadObject(R5, ic_data); GenerateDartCall(deopt_id, token_pos, stub, - RawPcDescriptors::kUnoptStaticCall, locs); + RawPcDescriptors::kUnoptStaticCall, locs, entry_kind); __ Drop(count_with_type_args); } @@ -1181,7 +1181,6 @@ void FlowGraphCompiler::EmitOptimizedStaticCall( TokenPosition token_pos, LocationSummary* locs, Code::EntryKind entry_kind) { - // TODO(sjindel/entrypoints): Support multiple entrypoints on ARM64. ASSERT(!function.IsClosureFunction()); if (function.HasOptionalParameters() || function.IsGeneric()) { __ LoadObject(R4, arguments_descriptor); @@ -1193,7 +1192,7 @@ void FlowGraphCompiler::EmitOptimizedStaticCall( // Do not use the code from the function, but let the code be patched so that // we can record the outgoing edges to other code. GenerateStaticDartCall(deopt_id, token_pos, RawPcDescriptors::kOther, locs, - function); + function, entry_kind); __ Drop(count_with_type_args); } diff --git a/runtime/vm/compiler/backend/il_arm64.cc b/runtime/vm/compiler/backend/il_arm64.cc index 0b26d323cb07..12c0551fc8db 100644 --- a/runtime/vm/compiler/backend/il_arm64.cc +++ b/runtime/vm/compiler/backend/il_arm64.cc @@ -265,7 +265,8 @@ void ClosureCallInstr::EmitNativeCode(FlowGraphCompiler* compiler) { // R0: Function. ASSERT(locs()->in(0).reg() == R0); __ LoadFieldFromOffset(CODE_REG, R0, Function::code_offset()); - __ LoadFieldFromOffset(R2, R0, Function::entry_point_offset()); + __ LoadFieldFromOffset( + R2, CODE_REG, compiler::target::Code::entry_point_offset(entry_kind())); // R2: instructions. // R5: Smi 0 (no IC data; the lazy-compile stub expects a GC-safe value). diff --git a/runtime/vm/compiler/stub_code_compiler_arm64.cc b/runtime/vm/compiler/stub_code_compiler_arm64.cc index c1f1975be7bd..114e43b78c7d 100644 --- a/runtime/vm/compiler/stub_code_compiler_arm64.cc +++ b/runtime/vm/compiler/stub_code_compiler_arm64.cc @@ -1370,7 +1370,6 @@ void StubCodeCompiler::GenerateInvokeDartCodeStub(Assembler* assembler) { __ Pop(R4); __ StoreToOffset(R4, THR, target::Thread::vm_tag_offset()); - #if defined(TARGET_OS_FUCHSIA) __ mov(R3, THR); #endif @@ -2107,6 +2106,20 @@ static void EmitFastSmiOp(Assembler* assembler, __ ret(); } +// Saves the offset of the target entry-point (from the Function) into R8. +// +// Must be the first code generated, since any code before will be skipped in +// the unchecked entry-point. +static void GenerateRecordEntryPoint(Assembler* assembler) { + Label done; + __ LoadImmediate(R8, target::Function::entry_point_offset() - kHeapObjectTag); + __ b(&done); + __ BindUncheckedEntryPoint(); + __ LoadImmediate( + R8, target::Function::unchecked_entry_point_offset() - kHeapObjectTag); + __ Bind(&done); +} + // Generate inline cache check for 'num_args'. // R0: receiver (if instance call) // R5: ICData @@ -2126,6 +2139,17 @@ void StubCodeCompiler::GenerateNArgsCheckInlineCacheStub( Optimized optimized, CallType type, Exactness exactness) { + const bool save_entry_point = kind == Token::kILLEGAL; + if (save_entry_point) { + GenerateRecordEntryPoint(assembler); + } + + if (optimized == kOptimized) { + GenerateOptimizedUsageCounterIncrement(assembler); + } else { + GenerateUsageCounterIncrement(assembler, /*scratch=*/R6); + } + ASSERT(exactness == kIgnoreExactness); // Unimplemented. ASSERT(num_args == 1 || num_args == 2); #if defined(DEBUG) @@ -2259,6 +2283,10 @@ void StubCodeCompiler::GenerateNArgsCheckInlineCacheStub( // setup space on stack for result (target code object). __ Push(R4); // Preserve arguments descriptor array. __ Push(R5); // Preserve IC Data. + if (save_entry_point) { + __ SmiTag(R8); + __ Push(R8); + } // Setup space on stack for the result (target code object). __ Push(ZR); // Push call arguments. @@ -2274,6 +2302,10 @@ void StubCodeCompiler::GenerateNArgsCheckInlineCacheStub( // Pop returned function object into R0. // Restore arguments descriptor array and IC data array. __ Pop(R0); // Pop returned function object into R0. + if (save_entry_point) { + __ Pop(R8); + __ SmiUntag(R8); + } __ Pop(R5); // Restore IC Data. __ Pop(R4); // Restore arguments descriptor array. __ RestoreCodePointer(); @@ -2305,19 +2337,32 @@ void StubCodeCompiler::GenerateNArgsCheckInlineCacheStub( __ Bind(&call_target_function); // R0: target function. __ LoadFieldFromOffset(CODE_REG, R0, target::Function::code_offset()); - __ LoadFieldFromOffset(R2, R0, target::Function::entry_point_offset()); + if (save_entry_point) { + __ add(R2, R0, Operand(R8)); + __ ldr(R2, Address(R2, 0)); + } else { + __ LoadFieldFromOffset(R2, R0, target::Function::entry_point_offset()); + } __ br(R2); #if !defined(PRODUCT) - if (!optimized) { + if (optimized == kUnoptimized) { __ Bind(&stepping); __ EnterStubFrame(); if (type == kInstanceCall) { __ Push(R0); // Preserve receiver. } + if (save_entry_point) { + __ SmiTag(R8); + __ Push(R8); + } __ Push(R5); // Preserve IC data. __ CallRuntime(kSingleStepHandlerRuntimeEntry, 0); __ Pop(R5); + if (save_entry_point) { + __ Pop(R8); + __ SmiUntag(R8); + } if (type == kInstanceCall) { __ Pop(R0); } @@ -2333,7 +2378,6 @@ void StubCodeCompiler::GenerateNArgsCheckInlineCacheStub( // LR: return address void StubCodeCompiler::GenerateOneArgCheckInlineCacheStub( Assembler* assembler) { - GenerateUsageCounterIncrement(assembler, /* scratch */ R6); GenerateNArgsCheckInlineCacheStub( assembler, 1, kInlineCacheMissHandlerOneArgRuntimeEntry, Token::kILLEGAL, kUnoptimized, kInstanceCall, kIgnoreExactness); @@ -2352,7 +2396,6 @@ void StubCodeCompiler::GenerateOneArgCheckInlineCacheWithExactnessCheckStub( // LR: return address void StubCodeCompiler::GenerateTwoArgsCheckInlineCacheStub( Assembler* assembler) { - GenerateUsageCounterIncrement(assembler, /* scratch */ R6); GenerateNArgsCheckInlineCacheStub( assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kILLEGAL, kUnoptimized, kInstanceCall, kIgnoreExactness); @@ -2362,7 +2405,6 @@ void StubCodeCompiler::GenerateTwoArgsCheckInlineCacheStub( // R5: ICData // LR: return address void StubCodeCompiler::GenerateSmiAddInlineCacheStub(Assembler* assembler) { - GenerateUsageCounterIncrement(assembler, /* scratch */ R6); GenerateNArgsCheckInlineCacheStub( assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kADD, kUnoptimized, kInstanceCall, kIgnoreExactness); @@ -2372,7 +2414,6 @@ void StubCodeCompiler::GenerateSmiAddInlineCacheStub(Assembler* assembler) { // R5: ICData // LR: return address void StubCodeCompiler::GenerateSmiLessInlineCacheStub(Assembler* assembler) { - GenerateUsageCounterIncrement(assembler, /* scratch */ R6); GenerateNArgsCheckInlineCacheStub( assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kLT, kUnoptimized, kInstanceCall, kIgnoreExactness); @@ -2382,7 +2423,6 @@ void StubCodeCompiler::GenerateSmiLessInlineCacheStub(Assembler* assembler) { // R5: ICData // LR: return address void StubCodeCompiler::GenerateSmiEqualInlineCacheStub(Assembler* assembler) { - GenerateUsageCounterIncrement(assembler, /* scratch */ R6); GenerateNArgsCheckInlineCacheStub( assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kEQ, kUnoptimized, kInstanceCall, kIgnoreExactness); @@ -2394,7 +2434,6 @@ void StubCodeCompiler::GenerateSmiEqualInlineCacheStub(Assembler* assembler) { // LR: return address void StubCodeCompiler::GenerateOneArgOptimizedCheckInlineCacheStub( Assembler* assembler) { - GenerateOptimizedUsageCounterIncrement(assembler); GenerateNArgsCheckInlineCacheStub( assembler, 1, kInlineCacheMissHandlerOneArgRuntimeEntry, Token::kILLEGAL, kOptimized, kInstanceCall, kIgnoreExactness); @@ -2416,7 +2455,6 @@ void StubCodeCompiler:: // LR: return address void StubCodeCompiler::GenerateTwoArgsOptimizedCheckInlineCacheStub( Assembler* assembler) { - GenerateOptimizedUsageCounterIncrement(assembler); GenerateNArgsCheckInlineCacheStub( assembler, 2, kInlineCacheMissHandlerTwoArgsRuntimeEntry, Token::kILLEGAL, kOptimized, kInstanceCall, kIgnoreExactness); @@ -2426,6 +2464,7 @@ void StubCodeCompiler::GenerateTwoArgsOptimizedCheckInlineCacheStub( // LR: return address void StubCodeCompiler::GenerateZeroArgsUnoptimizedStaticCallStub( Assembler* assembler) { + GenerateRecordEntryPoint(assembler); GenerateUsageCounterIncrement(assembler, /* scratch */ R6); #if defined(DEBUG) { @@ -2478,14 +2517,19 @@ void StubCodeCompiler::GenerateZeroArgsUnoptimizedStaticCallStub( // Get function and call it, if possible. __ LoadFromOffset(R0, R6, target_offset); __ LoadFieldFromOffset(CODE_REG, R0, target::Function::code_offset()); - __ LoadFieldFromOffset(R2, R0, target::Function::entry_point_offset()); + __ add(R2, R0, Operand(R8)); + __ ldr(R2, Address(R2, 0)); __ br(R2); #if !defined(PRODUCT) __ Bind(&stepping); __ EnterStubFrame(); __ Push(R5); // Preserve IC data. + __ SmiTag(R8); + __ Push(R8); __ CallRuntime(kSingleStepHandlerRuntimeEntry, 0); + __ Pop(R8); + __ SmiUntag(R8); __ Pop(R5); __ RestoreCodePointer(); __ LeaveStubFrame(); diff --git a/runtime/vm/object.cc b/runtime/vm/object.cc index de393225b4a0..b4965482cb5a 100644 --- a/runtime/vm/object.cc +++ b/runtime/vm/object.cc @@ -8426,8 +8426,9 @@ RawCode* Function::EnsureHasCode() const { } bool Function::MayHaveUncheckedEntryPoint(Isolate* I) const { -// TODO(#34162): Support the other architectures. -#if defined(TARGET_ARCH_X64) || defined(TARGET_ARCH_ARM) +// TODO(#34162): Support IA32. +#if defined(TARGET_ARCH_X64) || defined(TARGET_ARCH_ARM) || \ + defined(TARGET_ARCH_ARM64) return FLAG_enable_multiple_entrypoints && (NeedsArgumentTypeChecks(I) || IsImplicitClosureFunction()); #else