Skip to content

Commit

Permalink
Merge store with reverse byte order input value
Browse files Browse the repository at this point in the history
  • Loading branch information
kuaiwei committed Jan 9, 2025
1 parent de02503 commit f388c6d
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 48 deletions.
85 changes: 63 additions & 22 deletions src/hotspot/share/opto/memnode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2790,22 +2790,24 @@ class MergePrimitiveStores : public StackObj {
private:
PhaseGVN* const _phase;
StoreNode* const _store;
enum DataOrder { Unknown, Forward, Reverse};
DataOrder _value_order;

NOT_PRODUCT( const CHeapBitMap &_trace_tags; )

public:
MergePrimitiveStores(PhaseGVN* phase, StoreNode* store) :
_phase(phase), _store(store)
_phase(phase), _store(store), _value_order(DataOrder::Unknown)
NOT_PRODUCT( COMMA _trace_tags(Compile::current()->directive()->trace_merge_stores_tags()) )
{}

StoreNode* run();

private:
bool is_compatible_store(const StoreNode* other_store) const;
bool is_adjacent_pair(const StoreNode* use_store, const StoreNode* def_store) const;
bool is_adjacent_input_pair(const Node* n1, const Node* n2, const int memory_size) const;
static bool is_con_RShift(const Node* n, Node const*& base_out, jint& shift_out);
bool is_adjacent_pair(const StoreNode* use_store, const StoreNode* def_store);
bool is_adjacent_input_pair(const Node* n1, const Node* n2, const int memory_size);
static bool is_con_RShift(const Node* n, Node const*& base_out, jint& shift_out, PhaseGVN* phase);
enum CFGStatus { SuccessNoRangeCheck, SuccessWithRangeCheck, Failure };
static CFGStatus cfg_status_for_pair(const StoreNode* use_store, const StoreNode* def_store);

Expand Down Expand Up @@ -2841,14 +2843,14 @@ class MergePrimitiveStores : public StackObj {
#endif
};

Status find_adjacent_use_store(const StoreNode* def_store) const;
Status find_adjacent_def_store(const StoreNode* use_store) const;
Status find_adjacent_use_store(const StoreNode* def_store);
Status find_adjacent_def_store(const StoreNode* use_store);
Status find_use_store(const StoreNode* def_store) const;
Status find_def_store(const StoreNode* use_store) const;
Status find_use_store_unidirectional(const StoreNode* def_store) const;
Status find_def_store_unidirectional(const StoreNode* use_store) const;

void collect_merge_list(Node_List& merge_list) const;
void collect_merge_list(Node_List& merge_list);
Node* make_merged_input_value(const Node_List& merge_list);
StoreNode* make_merged_store(const Node_List& merge_list, Node* merged_input_value);

Expand All @@ -2859,23 +2861,23 @@ class MergePrimitiveStores : public StackObj {
}

bool is_trace_basic() const {
return is_trace(TraceMergeStores::Tag::BASIC);
return is_trace(TraceMergeStores::Tag::BASIC) || UseNewCode2;
}

bool is_trace_pointer() const {
return is_trace(TraceMergeStores::Tag::POINTER);
return is_trace(TraceMergeStores::Tag::POINTER) || UseNewCode2;
}

bool is_trace_aliasing() const {
return is_trace(TraceMergeStores::Tag::ALIASING);
return is_trace(TraceMergeStores::Tag::ALIASING) || UseNewCode2;
}

bool is_trace_adjacency() const {
return is_trace(TraceMergeStores::Tag::ADJACENCY);
return is_trace(TraceMergeStores::Tag::ADJACENCY) || UseNewCode2;
}

bool is_trace_success() const {
return is_trace(TraceMergeStores::Tag::SUCCESS);
return is_trace(TraceMergeStores::Tag::SUCCESS) || UseNewCode2;
}
#endif

Expand Down Expand Up @@ -2933,7 +2935,7 @@ bool MergePrimitiveStores::is_compatible_store(const StoreNode* other_store) con
return true;
}

bool MergePrimitiveStores::is_adjacent_pair(const StoreNode* use_store, const StoreNode* def_store) const {
bool MergePrimitiveStores::is_adjacent_pair(const StoreNode* use_store, const StoreNode* def_store) {
if (!is_adjacent_input_pair(def_store->in(MemNode::ValueIn),
use_store->in(MemNode::ValueIn),
def_store->memory_size())) {
Expand All @@ -2951,7 +2953,7 @@ bool MergePrimitiveStores::is_adjacent_pair(const StoreNode* use_store, const St
return pointer_def.is_adjacent_to_and_before(pointer_use);
}

bool MergePrimitiveStores::is_adjacent_input_pair(const Node* n1, const Node* n2, const int memory_size) const {
bool MergePrimitiveStores::is_adjacent_input_pair(const Node* n1, const Node* n2, const int memory_size) {
// Pattern: [n1 = ConI, n2 = ConI]
if (n1->Opcode() == Op_ConI) {
return n2->Opcode() == Op_ConI;
Expand All @@ -2965,7 +2967,7 @@ bool MergePrimitiveStores::is_adjacent_input_pair(const Node* n1, const Node* n2
#endif // !VM_LITTLE_ENDIAN
Node const* base_n2;
jint shift_n2;
if (!is_con_RShift(n2, base_n2, shift_n2)) {
if (!is_con_RShift(n2, base_n2, shift_n2, _phase)) {
return false;
}
if (n1->Opcode() == Op_ConvL2I) {
Expand All @@ -2978,22 +2980,41 @@ bool MergePrimitiveStores::is_adjacent_input_pair(const Node* n1, const Node* n2
// n1 = base = base >> 0
base_n1 = n1;
shift_n1 = 0;
} else if (!is_con_RShift(n1, base_n1, shift_n1)) {
} else if (!is_con_RShift(n1, base_n1, shift_n1, _phase)) {
return false;
}
int bits_per_store = memory_size * 8;
if (base_n1 != base_n2 ||
shift_n1 + bits_per_store != shift_n2 ||
abs(shift_n1 - shift_n2) != bits_per_store ||
shift_n1 % bits_per_store != 0) {
return false;
}

// initialize value_order once
if (_value_order == DataOrder::Unknown) {
if (shift_n1 < shift_n2) {
_value_order = DataOrder::Forward;
} else if (memory_size == 1 &&
Matcher::match_rule_supported(Op_ReverseBytesI) &&
Matcher::match_rule_supported(Op_ReverseBytesL)) {
_value_order = DataOrder::Reverse; // only support reverse bytes
} else {
return false;
}
}

if ((_value_order == DataOrder::Forward && shift_n1 > shift_n2) ||
(_value_order == DataOrder::Reverse && shift_n1 < shift_n2)) {
// wrong order
return false;
}

// both load from same value with correct shift
return true;
}

// Detect pattern: n = base_out >> shift_out
bool MergePrimitiveStores::is_con_RShift(const Node* n, Node const*& base_out, jint& shift_out) {
bool MergePrimitiveStores::is_con_RShift(const Node* n, Node const*& base_out, jint& shift_out, PhaseGVN* phase) {
assert(n != nullptr, "precondition");

int opc = n->Opcode();
Expand All @@ -3012,6 +3033,13 @@ bool MergePrimitiveStores::is_con_RShift(const Node* n, Node const*& base_out, j
// The shift must be positive:
return shift_out >= 0;
}

if (phase->type(n)->isa_int() != nullptr ||
phase->type(n)->isa_long() != nullptr) {
base_out = n;
shift_out = 0;
return true;
}
return false;
}

Expand Down Expand Up @@ -3061,7 +3089,7 @@ MergePrimitiveStores::CFGStatus MergePrimitiveStores::cfg_status_for_pair(const
return CFGStatus::SuccessWithRangeCheck;
}

MergePrimitiveStores::Status MergePrimitiveStores::find_adjacent_use_store(const StoreNode* def_store) const {
MergePrimitiveStores::Status MergePrimitiveStores::find_adjacent_use_store(const StoreNode* def_store) {
Status status_use = find_use_store(def_store);
StoreNode* use_store = status_use.found_store();
if (use_store != nullptr && !is_adjacent_pair(use_store, def_store)) {
Expand All @@ -3070,7 +3098,7 @@ MergePrimitiveStores::Status MergePrimitiveStores::find_adjacent_use_store(const
return status_use;
}

MergePrimitiveStores::Status MergePrimitiveStores::find_adjacent_def_store(const StoreNode* use_store) const {
MergePrimitiveStores::Status MergePrimitiveStores::find_adjacent_def_store(const StoreNode* use_store) {
Status status_def = find_def_store(use_store);
StoreNode* def_store = status_def.found_store();
if (def_store != nullptr && !is_adjacent_pair(use_store, def_store)) {
Expand Down Expand Up @@ -3135,7 +3163,7 @@ MergePrimitiveStores::Status MergePrimitiveStores::find_def_store_unidirectional
return Status::make(def_store, cfg_status_for_pair(use_store, def_store));
}

void MergePrimitiveStores::collect_merge_list(Node_List& merge_list) const {
void MergePrimitiveStores::collect_merge_list(Node_List& merge_list) {
// The merged store can be at most 8 bytes.
const uint merge_list_max_size = 8 / _store->memory_size();
assert(merge_list_max_size >= 2 &&
Expand Down Expand Up @@ -3198,16 +3226,20 @@ Node* MergePrimitiveStores::make_merged_input_value(const Node_List& merge_list)
// | |
// _store first
//
assert(_value_order != DataOrder::Unknown, "sanity");
Node* hi = _store->in(MemNode::ValueIn);
Node* lo = first->in(MemNode::ValueIn);
#ifndef VM_LITTLE_ENDIAN
// `_store` and `first` are swapped in the diagram above
swap(hi, lo);
#endif // !VM_LITTLE_ENDIAN
if (_value_order == DataOrder::Reverse) {
swap(hi, lo);
}
Node const* hi_base;
jint hi_shift;
merged_input_value = lo;
bool is_true = is_con_RShift(hi, hi_base, hi_shift);
bool is_true = is_con_RShift(hi, hi_base, hi_shift, _phase);
assert(is_true, "must detect con RShift");
if (merged_input_value != hi_base && merged_input_value->Opcode() == Op_ConvL2I) {
// look through
Expand All @@ -3233,6 +3265,15 @@ Node* MergePrimitiveStores::make_merged_input_value(const Node_List& merge_list)
(_phase->type(merged_input_value)->isa_long() != nullptr && new_memory_size == 8),
"merged_input_value is either int or long, and new_memory_size is small enough");

if (_value_order == DataOrder::Reverse) {
if (new_memory_size == 8) {
merged_input_value = _phase->transform(new ReverseBytesLNode(nullptr, merged_input_value));
} else if (new_memory_size == 4) {
merged_input_value = _phase->transform(new ReverseBytesINode(nullptr, merged_input_value));
} else {
return nullptr;
}
}
return merged_input_value;
}

Expand Down
54 changes: 28 additions & 26 deletions test/hotspot/jtreg/compiler/c2/TestMergeStores.java
Original file line number Diff line number Diff line change
Expand Up @@ -797,11 +797,10 @@ static Object[] test2RBE(byte[] a, int offset, long v) {
}

@Test
@IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "8",
IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"},
applyIfPlatform = {"little-endian", "true"})
@IR(counts = { IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1",
IRNode.REVERSE_BYTES_L, "1"},
applyIf = {"UseUnalignedAccesses", "true"},
applyIfPlatformOr = {"little-endian", "true", "x64", "true", "aarch64", "true"})
@IR(counts = {IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"},
applyIf = {"UseUnalignedAccesses", "true"},
applyIfPlatform = {"big-endian", "true"})
Expand All @@ -828,11 +827,10 @@ static Object[] test2bBE(byte[] a, int offset, long v) {
}

@Test
@IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "8",
IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"},
applyIfPlatform = {"little-endian", "true"})
@IR(counts = { IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1",
IRNode.REVERSE_BYTES_L, "1"},
applyIf = {"UseUnalignedAccesses", "true"},
applyIfPlatformOr = {"little-endian", "true", "x64", "true", "aarch64", "true"})
@IR(counts = {IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"},
applyIf = {"UseUnalignedAccesses", "true"},
applyIfPlatform = {"big-endian", "true"})
Expand Down Expand Up @@ -907,11 +905,10 @@ static Object[] test3RBE(byte[] a, int offset, long v) {
}

@Test
@IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "8",
IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"},
applyIfPlatform = {"little-endian", "true"})
@IR(counts = {IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "2",
IRNode.REVERSE_BYTES_I, "1"},
applyIf = {"UseUnalignedAccesses", "true"},
applyIfPlatformOr = {"little-endian", "true", "x64", "true", "aarch64", "true"})
@IR(counts = {IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "2"},
applyIf = {"UseUnalignedAccesses", "true"},
applyIfPlatform = {"big-endian", "true"})
Expand Down Expand Up @@ -1006,12 +1003,13 @@ static Object[] test4RBE(byte[] a, int offset, long v1, int v2, short v3, byte v
}

@Test
@IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "12",
@IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "8",
IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1", // Stores of constants can be merged
IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1",
IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"},
IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "2",
IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
IRNode.REVERSE_BYTES_I, "1"},
applyIf = {"UseUnalignedAccesses", "true"},
applyIfPlatform = {"little-endian", "true"})
applyIfPlatformOr = {"x64", "true", "aarch64", "true"})
@IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "4", // 3 (+ 1 for uncommon trap)
IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "3",
IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "2",
Expand Down Expand Up @@ -1897,11 +1895,13 @@ static Object[] test500RBE(byte[] a, int offset, long v) {
}

@Test
@IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "8",
@IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1", // for RangeCheck trap
IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"},
applyIfPlatform = {"little-endian", "true"})
IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1", // expect merged
IRNode.REVERSE_BYTES_L, "1"},
applyIf = {"UseUnalignedAccesses", "true"},
applyIfPlatformOr = {"little-endian", "true", "x64", "true", "aarch64", "true"})
@IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1", // for RangeCheck trap
IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
Expand Down Expand Up @@ -2168,11 +2168,13 @@ static Object[] test800RBE(byte[] a, int offset, long v) {
}

@Test
@IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "6",
@IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "2",
IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"},
applyIfPlatform = {"little-endian", "true"})
IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1",
IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
IRNode.REVERSE_BYTES_I, "1"},
applyIf = {"UseUnalignedAccesses", "true"},
applyIfPlatformOr = {"little-endian", "true", "x64", "true", "aarch64", "true"})
@IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "2",
IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1",
Expand Down
10 changes: 10 additions & 0 deletions test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java
Original file line number Diff line number Diff line change
Expand Up @@ -1455,6 +1455,16 @@ public class IRNode {
vectorNode(REPLICATE_D, "Replicate", TYPE_DOUBLE);
}

public static final String REVERSE_BYTES_I = PREFIX + "REVERSE_BYTES_I" + POSTFIX;
static {
beforeMatchingNameRegex(REVERSE_BYTES_I, "ReverseBytesI");
}

public static final String REVERSE_BYTES_L = PREFIX + "REVERSE_BYTES_L" + POSTFIX;
static {
beforeMatchingNameRegex(REVERSE_BYTES_L, "ReverseBytesL");
}

public static final String REVERSE_BYTES_VB = VECTOR_PREFIX + "REVERSE_BYTES_VB" + POSTFIX;
static {
vectorNode(REVERSE_BYTES_VB, "ReverseBytesV", TYPE_BYTE);
Expand Down

0 comments on commit f388c6d

Please sign in to comment.