Skip to content

Commit

Permalink
8325821: [REDO] use "dmb.ishst+dmb.ishld" for release barrier
Browse files Browse the repository at this point in the history
  • Loading branch information
kuaiwei committed May 17, 2024
1 parent 7c750fd commit b71a1b3
Show file tree
Hide file tree
Showing 9 changed files with 343 additions and 13 deletions.
7 changes: 4 additions & 3 deletions src/hotspot/cpu/aarch64/aarch64.ad
Original file line number Diff line number Diff line change
Expand Up @@ -7780,7 +7780,7 @@ instruct membar_acquire() %{
ins_cost(VOLATILE_REF_COST);

format %{ "membar_acquire\n\t"
"dmb ish" %}
"dmb ishld" %}

ins_encode %{
__ block_comment("membar_acquire");
Expand Down Expand Up @@ -7834,11 +7834,12 @@ instruct membar_release() %{
ins_cost(VOLATILE_REF_COST);

format %{ "membar_release\n\t"
"dmb ish" %}
"dmb ishst\n\tdmb ishld" %}

ins_encode %{
__ block_comment("membar_release");
__ membar(Assembler::LoadStore|Assembler::StoreStore);
__ membar(Assembler::StoreStore);
__ membar(Assembler::LoadStore);
%}
ins_pipe(pipe_serial);
%}
Expand Down
2 changes: 2 additions & 0 deletions src/hotspot/cpu/aarch64/globals_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ define_pd_global(intx, InlineSmallCode, 1000);
range(1, 99) \
product(ccstr, UseBranchProtection, "none", \
"Branch Protection to use: none, standard, pac-ret") \
product(bool, AlwaysMergeDMB, true, DIAGNOSTIC, \
"Always merge DMB instructions in code emission") \

// end of ARCH_FLAGS

Expand Down
31 changes: 24 additions & 7 deletions src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2350,14 +2350,31 @@ void MacroAssembler::membar(Membar_mask_bits order_constraint) {
address last = code()->last_insn();
if (last != nullptr && nativeInstruction_at(last)->is_Membar() && prev == last) {
NativeMembar *bar = NativeMembar_at(prev);
// We are merging two memory barrier instructions. On AArch64 we
// can do this simply by ORing them together.
bar->set_kind(bar->get_kind() | order_constraint);
BLOCK_COMMENT("merged membar");
} else {
code()->set_last_insn(pc());
dmb(Assembler::barrier(order_constraint));
// Don't promote DMB ST|DMB LD to DMB (a full barrier) because
// doing so would introduce a StoreLoad which the caller did not
// intend
if (AlwaysMergeDMB || bar->get_kind() == order_constraint
|| bar->get_kind() == AnyAny
|| order_constraint == AnyAny) {
// We are merging two memory barrier instructions. On AArch64 we
// can do this simply by ORing them together.
bar->set_kind(bar->get_kind() | order_constraint);
BLOCK_COMMENT("merged membar");
return;
} else if (!AlwaysMergeDMB){
// A special case like "DMB ST;DMB LD;DMB ST", the last DMB can be skipped
// We need check the last 2 instructions
address prev2 = prev - NativeMembar::instruction_size;
if (last != code()->last_label() && nativeInstruction_at(prev2)->is_Membar()) {
NativeMembar *bar2 = NativeMembar_at(prev2);
assert(bar2->get_kind() == order_constraint, "it should be merged before");
BLOCK_COMMENT("merged membar");
return;
}
}
}
code()->set_last_insn(pc());
dmb(Assembler::barrier(order_constraint));
}

bool MacroAssembler::try_merge_ldst(Register rt, const Address &adr, size_t size_in_bytes, bool is_store) {
Expand Down
1 change: 1 addition & 0 deletions src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ class MacroAssembler: public Assembler {
void bind(Label& L) {
Assembler::bind(L);
code()->clear_last_insn();
code()->set_last_label(pc());
}

void membar(Membar_mask_bits order_constraint);
Expand Down
5 changes: 4 additions & 1 deletion src/hotspot/cpu/aarch64/vm_version_aarch64.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2015, 2020, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
Expand Down Expand Up @@ -230,6 +230,9 @@ void VM_Version::initialize() {
if (FLAG_IS_DEFAULT(OnSpinWaitInstCount)) {
FLAG_SET_DEFAULT(OnSpinWaitInstCount, 1);
}
if (FLAG_IS_DEFAULT(AlwaysMergeDMB)) {
FLAG_SET_DEFAULT(AlwaysMergeDMB, false);
}
}

if (_cpu == CPU_ARM) {
Expand Down
15 changes: 14 additions & 1 deletion src/hotspot/share/asm/codeBuffer.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -930,6 +930,10 @@ void CodeBuffer::expand(CodeSection* which_cs, csize_t amount) {
// Move all the code and relocations to the new blob:
relocate_code_to(&cb);

// some internal addresses, _last_insn _last_label, are used during code emission,
// adjust them in expansion
adjust_internal_address(insts_begin(), cb.insts_begin());

// Copy the temporary code buffer into the current code buffer.
// Basically, do {*this = cb}, except for some control information.
this->take_over_code_from(&cb);
Expand All @@ -951,6 +955,15 @@ void CodeBuffer::expand(CodeSection* which_cs, csize_t amount) {
#endif //PRODUCT
}

void CodeBuffer::adjust_internal_address(address from, address to) {
if (_last_insn != nullptr) {
_last_insn += to - from;
}
if (_last_label != nullptr) {
_last_label += to - from;
}
}

void CodeBuffer::take_over_code_from(CodeBuffer* cb) {
// Must already have disposed of the old blob somehow.
assert(blob() == nullptr, "must be empty");
Expand Down
8 changes: 8 additions & 0 deletions src/hotspot/share/asm/codeBuffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,7 @@ class CodeBuffer: public StackObj DEBUG_ONLY(COMMA private Scrubber) {
Arena* _overflow_arena;

address _last_insn; // used to merge consecutive memory barriers, loads or stores.
address _last_label; // record last bind label address, it's also the start of current bb.

SharedStubToInterpRequests* _shared_stub_to_interp_requests; // used to collect requests for shared iterpreter stubs
SharedTrampolineRequests* _shared_trampoline_requests; // used to collect requests for shared trampolines
Expand All @@ -460,6 +461,7 @@ class CodeBuffer: public StackObj DEBUG_ONLY(COMMA private Scrubber) {
_oop_recorder = nullptr;
_overflow_arena = nullptr;
_last_insn = nullptr;
_last_label = nullptr;
_main_code_size = 0;
_finalize_stubs = false;
_shared_stub_to_interp_requests = nullptr;
Expand Down Expand Up @@ -514,6 +516,9 @@ class CodeBuffer: public StackObj DEBUG_ONLY(COMMA private Scrubber) {
// moves code sections to new buffer (assumes relocs are already in there)
void relocate_code_to(CodeBuffer* cb) const;

// adjust some internal address during expand
void adjust_internal_address(address from, address to);

// set up a model of the final layout of my contents
void compute_final_layout(CodeBuffer* dest) const;

Expand Down Expand Up @@ -686,6 +691,9 @@ class CodeBuffer: public StackObj DEBUG_ONLY(COMMA private Scrubber) {
void set_last_insn(address a) { _last_insn = a; }
void clear_last_insn() { set_last_insn(nullptr); }

address last_label() const { return _last_label; }
void set_last_label(address a) { _last_label = a; }

#ifndef PRODUCT
AsmRemarks &asm_remarks() { return _asm_remarks; }
DbgStrings &dbg_strings() { return _dbg_strings; }
Expand Down
202 changes: 201 additions & 1 deletion test/hotspot/gtest/aarch64/test_assembler_aarch64.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
Expand Down Expand Up @@ -28,6 +28,7 @@

#include "asm/assembler.hpp"
#include "asm/assembler.inline.hpp"
#include "asm/macroAssembler.hpp"
#include "compiler/disassembler.hpp"
#include "memory/resourceArea.hpp"
#include "unittest.hpp"
Expand Down Expand Up @@ -81,4 +82,203 @@ TEST_VM(AssemblerAArch64, validate) {
BufferBlob::free(b);
}

static void asm_dump(address start, address end) {
ResourceMark rm;
stringStream ss;
ss.print_cr("Insns:");
Disassembler::decode(start, end, &ss);
printf("%s\n", ss.as_string());
}

TEST_VM(AssemblerAArch64, merge_dmb) {
BufferBlob* b = BufferBlob::create("aarch64Test", 400);
CodeBuffer code(b);
MacroAssembler _masm(&code);

{
// merge with same type
__ membar(Assembler::Membar_mask_bits::StoreStore);
__ membar(Assembler::Membar_mask_bits::StoreStore);
__ membar(Assembler::Membar_mask_bits::StoreStore);
__ nop();
__ membar(Assembler::Membar_mask_bits::LoadStore);
__ membar(Assembler::Membar_mask_bits::LoadStore);
__ membar(Assembler::Membar_mask_bits::LoadStore);
__ membar(Assembler::Membar_mask_bits::LoadStore);
__ nop();
// merge with high rank
__ membar(Assembler::Membar_mask_bits::LoadStore);
__ membar(Assembler::Membar_mask_bits::LoadStore);
__ membar(Assembler::Membar_mask_bits::AnyAny);
__ membar(Assembler::Membar_mask_bits::StoreStore);
__ membar(Assembler::Membar_mask_bits::StoreStore);
__ nop();
// merge with different type
__ membar(Assembler::Membar_mask_bits::LoadStore);
__ membar(Assembler::Membar_mask_bits::StoreStore);
__ membar(Assembler::Membar_mask_bits::LoadStore);
__ membar(Assembler::Membar_mask_bits::StoreStore);
}
asm_dump(code.insts()->start(), code.insts()->end());
// AlwaysMergeDMB
static const unsigned int insns1[] = {
0xd5033abf, // dmb.ishst
0xd503201f, // nop
0xd50339bf, // dmb.ishld
0xd503201f, // nop
0xd5033bbf, // dmb.ish
0xd503201f, // nop
0xd5033bbf, // dmb.ish
};
// !AlwaysMergeDMB
static const unsigned int insns2[] = {
0xd5033abf, // dmb.ishst
0xd503201f, // nop
0xd50339bf, // dmb.ishld
0xd503201f, // nop
0xd5033bbf, // dmb.ish
0xd503201f, // nop
0xd50339bf, // dmb.ishld
0xd5033abf, // dmb.ishst
};
if (AlwaysMergeDMB) {
EXPECT_EQ(code.insts()->size(), (CodeSection::csize_t)(sizeof insns1));
asm_check((const unsigned int *)code.insts()->start(), insns1, sizeof insns1 / sizeof insns1[0]);
} else {
EXPECT_EQ(code.insts()->size(), (CodeSection::csize_t)(sizeof insns2));
asm_check((const unsigned int *)code.insts()->start(), insns2, sizeof insns2 / sizeof insns2[0]);
}

BufferBlob::free(b);
}

TEST_VM(AssemblerAArch64, merge_dmb_block_by_label) {
BufferBlob* b = BufferBlob::create("aarch64Test", 400);
CodeBuffer code(b);
MacroAssembler _masm(&code);

{
Label l;
// merge can not cross the label
__ membar(Assembler::Membar_mask_bits::StoreStore);
__ bind(l);
__ membar(Assembler::Membar_mask_bits::StoreStore);
}
asm_dump(code.insts()->start(), code.insts()->end());
static const unsigned int insns[] = {
0xd5033abf, // dmb.ishst
0xd5033abf, // dmb.ishst
};
EXPECT_EQ(code.insts()->size(), (CodeSection::csize_t)(sizeof insns));
asm_check((const unsigned int *)code.insts()->start(), insns, sizeof insns / sizeof insns[0]);

BufferBlob::free(b);
}

TEST_VM(AssemblerAArch64, merge_dmb_after_expand) {
ResourceMark rm;
BufferBlob* b = BufferBlob::create("aarch64Test", 400);
CodeBuffer code(b);
code.set_blob(b);
MacroAssembler _masm(&code);

{
__ membar(Assembler::Membar_mask_bits::StoreStore);
code.insts()->maybe_expand_to_ensure_remaining(50000);
__ membar(Assembler::Membar_mask_bits::StoreStore);
}
asm_dump(code.insts()->start(), code.insts()->end());
static const unsigned int insns[] = {
0xd5033abf, // dmb.ishst
};
EXPECT_EQ(code.insts()->size(), (CodeSection::csize_t)(sizeof insns));
asm_check((const unsigned int *)code.insts()->start(), insns, sizeof insns / sizeof insns[0]);
}

TEST_VM(AssemblerAArch64, merge_ldst) {
BufferBlob* b = BufferBlob::create("aarch64Test", 400);
CodeBuffer code(b);
MacroAssembler _masm(&code);

{
Label l;
// merge ld/st into ldp/stp
__ ldr(r0, Address(sp, 8));
__ ldr(r1, Address(sp, 0));
__ nop();
__ str(r0, Address(sp, 0));
__ str(r1, Address(sp, 8));
__ nop();
__ ldrw(r0, Address(sp, 0));
__ ldrw(r1, Address(sp, 4));
__ nop();
__ strw(r0, Address(sp, 4));
__ strw(r1, Address(sp, 0));
__ nop();
// can not merge
__ ldrw(r0, Address(sp, 4));
__ ldr(r1, Address(sp, 8));
__ nop();
__ ldrw(r0, Address(sp, 0));
__ ldrw(r1, Address(sp, 8));
__ nop();
__ str(r0, Address(sp, 0));
__ bind(l); // block by label
__ str(r1, Address(sp, 8));
__ nop();
}
asm_dump(code.insts()->start(), code.insts()->end());
static const unsigned int insns1[] = {
0xa94003e1, // ldp x1, x0, [sp]
0xd503201f, // nop
0xa90007e0, // stp x0, x1, [sp]
0xd503201f, // nop
0x294007e0, // ldp w0, w1, [sp]
0xd503201f, // nop
0x290003e1, // stp w1, w0, [sp]
0xd503201f, // nop
0xb94007e0, // ldr w0, [sp, 4]
0xf94007e1, // ldr x1, [sp, 8]
0xd503201f, // nop
0xb94003e0, // ldr w0, [sp]
0xb9400be1, // ldr w1, [sp, 8]
0xd503201f, // nop
0xf90003e0, // str x0, [sp]
0xf90007e1, // str x1, [sp, 8]
0xd503201f, // nop
};
EXPECT_EQ(code.insts()->size(), (CodeSection::csize_t)(sizeof insns1));
asm_check((const unsigned int *)code.insts()->start(), insns1, sizeof insns1 / sizeof insns1[0]);

BufferBlob::free(b);
}

TEST_VM(AssemblerAArch64, merge_ldst_after_expand) {
ResourceMark rm;
BufferBlob* b = BufferBlob::create("aarch64Test", 400);
CodeBuffer code(b);
code.set_blob(b);
MacroAssembler _masm(&code);

{
__ ldr(r0, Address(sp, 8));
code.insts()->maybe_expand_to_ensure_remaining(10000);
__ ldr(r1, Address(sp, 0));
__ nop();
__ str(r0, Address(sp, 0));
code.insts()->maybe_expand_to_ensure_remaining(100000);
__ str(r1, Address(sp, 8));
__ nop();
}
asm_dump(code.insts()->start(), code.insts()->end());
static const unsigned int insns[] = {
0xa94003e1, // ldp x1, x0, [sp]
0xd503201f, // nop
0xa90007e0, // stp x0, x1, [sp]
0xd503201f, // nop
};
EXPECT_EQ(code.insts()->size(), (CodeSection::csize_t)(sizeof insns));
asm_check((const unsigned int *)code.insts()->start(), insns, sizeof insns / sizeof insns[0]);
}

#endif // AARCH64
Loading

0 comments on commit b71a1b3

Please sign in to comment.