Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

i#3044 AArch64 SVE codec: Add SVE2 SPLICE variant #6517

Merged
merged 2 commits into from
Dec 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions core/ir/aarch64/codec.c
Original file line number Diff line number Diff line change
Expand Up @@ -1598,6 +1598,11 @@ decode_single_sized(reg_id_t min_reg, reg_id_t max_reg, uint pos_start, uint bit
reg_id = reg_id + min_reg - max_reg - 1;

*opnd = opnd_create_reg_element_vector(reg_id, size);

if (offset > 0) {
opnd->aux.flags |= DR_OPND_IMPLICIT;
}

return true;
}

Expand Down Expand Up @@ -1635,6 +1640,11 @@ encode_sized_base(uint pos_start, uint size_start, uint min_size, uint max_size,
if (size < min_size)
return false;

/* DR_OPND_IMPLICIT should be set if using an offset */
if ((offset > 0) != ((opnd.aux.flags & DR_OPND_IMPLICIT) ? true : false)) {
return false;
}

uint reg_number;
if (!is_vreg(&vec_size, &reg_number, opnd))
return false;
Expand Down Expand Up @@ -7321,6 +7331,19 @@ encode_opnd_z_size_bhsd_5(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint
return encode_sized_z(5, 22, BYTE_REG, DOUBLE_REG, 0, 0, opnd, enc_out);
}

static inline bool
decode_opnd_z_size_bhsd_5p1(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
{
return decode_sized_z(5, 22, BYTE_REG, DOUBLE_REG, 0, 1, enc, pc, opnd);
}

static inline bool
encode_opnd_z_size_bhsd_5p1(uint enc, int opcode, byte *pc, opnd_t opnd,
OUT uint *enc_out)
{
return encode_sized_z(5, 22, BYTE_REG, DOUBLE_REG, 0, 1, opnd, enc_out);
}

static inline bool
decode_opnd_z_size_bhs_5(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
{
Expand Down
1 change: 1 addition & 0 deletions core/ir/aarch64/codec_sve2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@
01000101xx0xxxxx011101xxxxxxxxxx n 1104 SVE2 smullt z_size_hsd_0 : z_sizep1_bhs_5 z_sizep1_bhs_16
01000100111xxxxx1100x1xxxxxxxxxx n 1104 SVE2 smullt z_d_0 : z_s_5 z4_s_16 i2_index_11
01000100101xxxxx1100x1xxxxxxxxxx n 1104 SVE2 smullt z_s_0 : z_msz_bhsd_5 z3_msz_bhsd_16 i3_index_11
00000101xx101101100xxxxxxxxxxxxx n 882 SVE2 splice z_size_bhsd_0 : p10_lo z_size_bhsd_5 z_size_bhsd_5p1
01000100xx001000101xxxxxxxxxxxxx n 402 SVE2 sqabs z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_5
01000101xx00000111011xxxxxxxxxxx n 1168 SVE2 sqcadd z_size_bhsd_0 : z_size_bhsd_0 z_size_bhsd_5 imm1_ew_10
01000100xx0xxxxx011000xxxxxxxxxx n 1105 SVE2 sqdmlalb z_size_hsd_0 : z_size_hsd_0 z_sizep1_bhs_5 z_sizep1_bhs_16
Expand Down
16 changes: 15 additions & 1 deletion core/ir/aarch64/instr_create_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -8782,7 +8782,7 @@
* \param Pv The first source predicate register, P (Predicate).
* \param Zm The third source vector register, Z (Scalable).
*/
#define INSTR_CREATE_splice_sve(dc, Zdn, Pv, Zm) \
#define INSTR_CREATE_splice_sve_des(dc, Zdn, Pv, Zm) \
instr_create_1dst_3src(dc, OP_splice, Zdn, Pv, Zdn, Zm)

/**
Expand Down Expand Up @@ -18183,4 +18183,18 @@
#define INSTR_CREATE_mul_sve_idx(dc, Zd, Zn, Zm, index) \
instr_create_1dst_3src(dc, OP_mul, Zd, Zn, Zm, index)

/**
* Creates a SPLICE instruction.
*
* This macro is used to encode the forms:
\verbatim
SPLICE <Zd>.<Ts>, <Pv>, { <Zn1>.<Ts>, <Zn2>.<Ts> }
\endverbatim
* \param dc The void * dcontext used to allocate memory for the #instr_t.
* \param Zd The destination vector register. Can be Z.b, Z.h, Z.s or Z.d.
* \param Pv The first source predicate register, P (Predicate).
* \param Zn The second source vector register. Can be Z.b, Z.h, Z.s or Z.d.
*/
#define INSTR_CREATE_splice_sve_con(dc, Zd, Pv, Zn) \
instr_create_1dst_3src(dc, OP_splice, Zd, Pv, Zn, opnd_create_increment_reg(Zn, 1))
#endif /* DR_IR_MACROS_AARCH64_H */
1 change: 1 addition & 0 deletions core/ir/aarch64/opnd_defs.txt
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,7 @@
--------xx------------xxxxx----- p_size_bhsd_5 # sve predicate vector reg, elsz depending on size
--------xx------------xxxxx----- p_size_hsd_5 # sve predicate vector reg, elsz depending on size
--------xx------------xxxxx----- z_size_bhsd_5 # sve vector reg, elsz depending on size
--------xx------------xxxxx----- z_size_bhsd_5p1 # sve vector reg, elsz depending on size, plus 1
--------xx------------xxxxx----- z_size_bhs_5 # sve vector reg, elsz depending on size
--------xx------------xxxxx----- z_size_bh_5 # sve vector reg, elsz depending on size
--------xx------------xxxxx----- z_sizep1_bhs_5 # sve vector reg, elsz depending on size, plus 1
Expand Down
6 changes: 6 additions & 0 deletions core/ir/opnd_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -1788,6 +1788,12 @@ typedef enum _dr_opnd_flags_t {
* This is used by RISCV64 for immediates display format.
*/
DR_OPND_IMM_PRINT_DECIMAL = 0x1000,

/**
* The register number is not in the instruction encoding but is calculated
* based on another register
*/
DR_OPND_IMPLICIT = 0x2000,
} dr_opnd_flags_t;

#ifdef DR_FAST_IR
Expand Down
1 change: 1 addition & 0 deletions core/ir/opnd_shared.c
Original file line number Diff line number Diff line change
Expand Up @@ -1496,6 +1496,7 @@ opnd_create_increment_reg(opnd_t opnd, uint increment)
opnd.value.reg_and_element_size.element_size;
inc_opnd.size = opnd.size; /* indicates full size of reg */
inc_opnd.aux.flags = opnd.aux.flags;
inc_opnd.aux.flags |= DR_OPND_IMPLICIT;
return inc_opnd;
}

Expand Down
66 changes: 66 additions & 0 deletions suite/tests/api/dis-a64-sve2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4108,6 +4108,72 @@ c51edfff : ldnt1w z31.d, p7/Z, [z31.d, x30] : ldnt1w (%z31.d,%x30)[16by
45dd779b : smullt z27.d, z28.s, z29.s : smullt %z28.s %z29.s -> %z27.d
45df77ff : smullt z31.d, z31.s, z31.s : smullt %z31.s %z31.s -> %z31.d

# SPLICE <Zd>.<T>, <Pv>, { <Zn1>.<T>, <Zn2>.<T> } (SPLICE-Z.P.ZZ-Con)
052d8000 : splice z0.b, p0, {z0.b, z1.b} : splice %p0 %z0.b %z1.b -> %z0.b
052d8482 : splice z2.b, p1, {z4.b, z5.b} : splice %p1 %z4.b %z5.b -> %z2.b
052d88c4 : splice z4.b, p2, {z6.b, z7.b} : splice %p2 %z6.b %z7.b -> %z4.b
052d8906 : splice z6.b, p2, {z8.b, z9.b} : splice %p2 %z8.b %z9.b -> %z6.b
052d8d48 : splice z8.b, p3, {z10.b, z11.b} : splice %p3 %z10.b %z11.b -> %z8.b
052d8d8a : splice z10.b, p3, {z12.b, z13.b} : splice %p3 %z12.b %z13.b -> %z10.b
052d91cc : splice z12.b, p4, {z14.b, z15.b} : splice %p4 %z14.b %z15.b -> %z12.b
052d920e : splice z14.b, p4, {z16.b, z17.b} : splice %p4 %z16.b %z17.b -> %z14.b
052d9650 : splice z16.b, p5, {z18.b, z19.b} : splice %p5 %z18.b %z19.b -> %z16.b
052d9671 : splice z17.b, p5, {z19.b, z20.b} : splice %p5 %z19.b %z20.b -> %z17.b
052d96b3 : splice z19.b, p5, {z21.b, z22.b} : splice %p5 %z21.b %z22.b -> %z19.b
052d9af5 : splice z21.b, p6, {z23.b, z24.b} : splice %p6 %z23.b %z24.b -> %z21.b
052d9b37 : splice z23.b, p6, {z25.b, z26.b} : splice %p6 %z25.b %z26.b -> %z23.b
052d9f79 : splice z25.b, p7, {z27.b, z28.b} : splice %p7 %z27.b %z28.b -> %z25.b
052d9fbb : splice z27.b, p7, {z29.b, z30.b} : splice %p7 %z29.b %z30.b -> %z27.b
052d9fff : splice z31.b, p7, {z31.b, z0.b} : splice %p7 %z31.b %z0.b -> %z31.b
056d8000 : splice z0.h, p0, {z0.h, z1.h} : splice %p0 %z0.h %z1.h -> %z0.h
056d8482 : splice z2.h, p1, {z4.h, z5.h} : splice %p1 %z4.h %z5.h -> %z2.h
056d88c4 : splice z4.h, p2, {z6.h, z7.h} : splice %p2 %z6.h %z7.h -> %z4.h
056d8906 : splice z6.h, p2, {z8.h, z9.h} : splice %p2 %z8.h %z9.h -> %z6.h
056d8d48 : splice z8.h, p3, {z10.h, z11.h} : splice %p3 %z10.h %z11.h -> %z8.h
056d8d8a : splice z10.h, p3, {z12.h, z13.h} : splice %p3 %z12.h %z13.h -> %z10.h
056d91cc : splice z12.h, p4, {z14.h, z15.h} : splice %p4 %z14.h %z15.h -> %z12.h
056d920e : splice z14.h, p4, {z16.h, z17.h} : splice %p4 %z16.h %z17.h -> %z14.h
056d9650 : splice z16.h, p5, {z18.h, z19.h} : splice %p5 %z18.h %z19.h -> %z16.h
056d9671 : splice z17.h, p5, {z19.h, z20.h} : splice %p5 %z19.h %z20.h -> %z17.h
056d96b3 : splice z19.h, p5, {z21.h, z22.h} : splice %p5 %z21.h %z22.h -> %z19.h
056d9af5 : splice z21.h, p6, {z23.h, z24.h} : splice %p6 %z23.h %z24.h -> %z21.h
056d9b37 : splice z23.h, p6, {z25.h, z26.h} : splice %p6 %z25.h %z26.h -> %z23.h
056d9f79 : splice z25.h, p7, {z27.h, z28.h} : splice %p7 %z27.h %z28.h -> %z25.h
056d9fbb : splice z27.h, p7, {z29.h, z30.h} : splice %p7 %z29.h %z30.h -> %z27.h
056d9fff : splice z31.h, p7, {z31.h, z0.h} : splice %p7 %z31.h %z0.h -> %z31.h
05ad8000 : splice z0.s, p0, {z0.s, z1.s} : splice %p0 %z0.s %z1.s -> %z0.s
05ad8482 : splice z2.s, p1, {z4.s, z5.s} : splice %p1 %z4.s %z5.s -> %z2.s
05ad88c4 : splice z4.s, p2, {z6.s, z7.s} : splice %p2 %z6.s %z7.s -> %z4.s
05ad8906 : splice z6.s, p2, {z8.s, z9.s} : splice %p2 %z8.s %z9.s -> %z6.s
05ad8d48 : splice z8.s, p3, {z10.s, z11.s} : splice %p3 %z10.s %z11.s -> %z8.s
05ad8d8a : splice z10.s, p3, {z12.s, z13.s} : splice %p3 %z12.s %z13.s -> %z10.s
05ad91cc : splice z12.s, p4, {z14.s, z15.s} : splice %p4 %z14.s %z15.s -> %z12.s
05ad920e : splice z14.s, p4, {z16.s, z17.s} : splice %p4 %z16.s %z17.s -> %z14.s
05ad9650 : splice z16.s, p5, {z18.s, z19.s} : splice %p5 %z18.s %z19.s -> %z16.s
05ad9671 : splice z17.s, p5, {z19.s, z20.s} : splice %p5 %z19.s %z20.s -> %z17.s
05ad96b3 : splice z19.s, p5, {z21.s, z22.s} : splice %p5 %z21.s %z22.s -> %z19.s
05ad9af5 : splice z21.s, p6, {z23.s, z24.s} : splice %p6 %z23.s %z24.s -> %z21.s
05ad9b37 : splice z23.s, p6, {z25.s, z26.s} : splice %p6 %z25.s %z26.s -> %z23.s
05ad9f79 : splice z25.s, p7, {z27.s, z28.s} : splice %p7 %z27.s %z28.s -> %z25.s
05ad9fbb : splice z27.s, p7, {z29.s, z30.s} : splice %p7 %z29.s %z30.s -> %z27.s
05ad9fff : splice z31.s, p7, {z31.s, z0.s} : splice %p7 %z31.s %z0.s -> %z31.s
05ed8000 : splice z0.d, p0, {z0.d, z1.d} : splice %p0 %z0.d %z1.d -> %z0.d
05ed8482 : splice z2.d, p1, {z4.d, z5.d} : splice %p1 %z4.d %z5.d -> %z2.d
05ed88c4 : splice z4.d, p2, {z6.d, z7.d} : splice %p2 %z6.d %z7.d -> %z4.d
05ed8906 : splice z6.d, p2, {z8.d, z9.d} : splice %p2 %z8.d %z9.d -> %z6.d
05ed8d48 : splice z8.d, p3, {z10.d, z11.d} : splice %p3 %z10.d %z11.d -> %z8.d
05ed8d8a : splice z10.d, p3, {z12.d, z13.d} : splice %p3 %z12.d %z13.d -> %z10.d
05ed91cc : splice z12.d, p4, {z14.d, z15.d} : splice %p4 %z14.d %z15.d -> %z12.d
05ed920e : splice z14.d, p4, {z16.d, z17.d} : splice %p4 %z16.d %z17.d -> %z14.d
05ed9650 : splice z16.d, p5, {z18.d, z19.d} : splice %p5 %z18.d %z19.d -> %z16.d
05ed9671 : splice z17.d, p5, {z19.d, z20.d} : splice %p5 %z19.d %z20.d -> %z17.d
05ed96b3 : splice z19.d, p5, {z21.d, z22.d} : splice %p5 %z21.d %z22.d -> %z19.d
05ed9af5 : splice z21.d, p6, {z23.d, z24.d} : splice %p6 %z23.d %z24.d -> %z21.d
05ed9b37 : splice z23.d, p6, {z25.d, z26.d} : splice %p6 %z25.d %z26.d -> %z23.d
05ed9f79 : splice z25.d, p7, {z27.d, z28.d} : splice %p7 %z27.d %z28.d -> %z25.d
05ed9fbb : splice z27.d, p7, {z29.d, z30.d} : splice %p7 %z29.d %z30.d -> %z27.d
05ed9fff : splice z31.d, p7, {z31.d, z0.d} : splice %p7 %z31.d %z0.d -> %z31.d

# SQABS <Zd>.<T>, <Pg>/M, <Zn>.<T> (SQABS-Z.P.Z-_)
4408a000 : sqabs z0.b, p0/M, z0.b : sqabs %p0/m %z0.b -> %z0.b
4408a482 : sqabs z2.b, p1/M, z4.b : sqabs %p1/m %z4.b -> %z2.b
Expand Down
12 changes: 6 additions & 6 deletions suite/tests/api/ir_aarch64_sve.c
Original file line number Diff line number Diff line change
Expand Up @@ -8088,15 +8088,15 @@ TEST_INSTR(ext_sve)
opnd_create_immed_uint(imm8_0_0[i], OPSZ_1));
}

TEST_INSTR(splice_sve)
TEST_INSTR(splice_sve_des)
{
/* Testing SPLICE <Zdn>.<Ts>, <Pv>, <Zdn>.<Ts>, <Zm>.<Ts> */
const char *const expected_0_0[6] = {
"splice %p0 %z0.b %z0.b -> %z0.b", "splice %p2 %z5.b %z7.b -> %z5.b",
"splice %p3 %z10.b %z12.b -> %z10.b", "splice %p5 %z16.b %z18.b -> %z16.b",
"splice %p6 %z21.b %z23.b -> %z21.b", "splice %p7 %z31.b %z31.b -> %z31.b",
};
TEST_LOOP(splice, splice_sve, 6, expected_0_0[i],
TEST_LOOP(splice, splice_sve_des, 6, expected_0_0[i],
opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_1),
opnd_create_reg(Pn_half_six_offset_0[i]),
opnd_create_reg_element_vector(Zn_six_offset_2[i], OPSZ_1));
Expand All @@ -8106,7 +8106,7 @@ TEST_INSTR(splice_sve)
"splice %p3 %z10.h %z12.h -> %z10.h", "splice %p5 %z16.h %z18.h -> %z16.h",
"splice %p6 %z21.h %z23.h -> %z21.h", "splice %p7 %z31.h %z31.h -> %z31.h",
};
TEST_LOOP(splice, splice_sve, 6, expected_0_1[i],
TEST_LOOP(splice, splice_sve_des, 6, expected_0_1[i],
opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2),
opnd_create_reg(Pn_half_six_offset_0[i]),
opnd_create_reg_element_vector(Zn_six_offset_2[i], OPSZ_2));
Expand All @@ -8116,7 +8116,7 @@ TEST_INSTR(splice_sve)
"splice %p3 %z10.s %z12.s -> %z10.s", "splice %p5 %z16.s %z18.s -> %z16.s",
"splice %p6 %z21.s %z23.s -> %z21.s", "splice %p7 %z31.s %z31.s -> %z31.s",
};
TEST_LOOP(splice, splice_sve, 6, expected_0_2[i],
TEST_LOOP(splice, splice_sve_des, 6, expected_0_2[i],
opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4),
opnd_create_reg(Pn_half_six_offset_0[i]),
opnd_create_reg_element_vector(Zn_six_offset_2[i], OPSZ_4));
Expand All @@ -8126,7 +8126,7 @@ TEST_INSTR(splice_sve)
"splice %p3 %z10.d %z12.d -> %z10.d", "splice %p5 %z16.d %z18.d -> %z16.d",
"splice %p6 %z21.d %z23.d -> %z21.d", "splice %p7 %z31.d %z31.d -> %z31.d",
};
TEST_LOOP(splice, splice_sve, 6, expected_0_3[i],
TEST_LOOP(splice, splice_sve_des, 6, expected_0_3[i],
opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8),
opnd_create_reg(Pn_half_six_offset_0[i]),
opnd_create_reg_element_vector(Zn_six_offset_2[i], OPSZ_8));
Expand Down Expand Up @@ -20779,7 +20779,7 @@ main(int argc, char *argv[])
RUN_INSTR_TEST(insr_sve_simd_fp);

RUN_INSTR_TEST(ext_sve);
RUN_INSTR_TEST(splice_sve);
RUN_INSTR_TEST(splice_sve_des);

RUN_INSTR_TEST(rev_sve_pred);
RUN_INSTR_TEST(rev_sve);
Expand Down
46 changes: 46 additions & 0 deletions suite/tests/api/ir_aarch64_sve2.c
Original file line number Diff line number Diff line change
Expand Up @@ -8710,6 +8710,50 @@ TEST_INSTR(mul_sve_idx)
opnd_create_immed_uint(i2_0_0[i], OPSZ_2b));
}

TEST_INSTR(splice_sve_con)
{

/* Testing SPLICE <Zd>.<Ts>, <Pv>, { <Zn1>.<Ts>, <Zn2>.<Ts> } */
const char *const expected_0_0[6] = {
"splice %p0 %z0.b %z1.b -> %z0.b", "splice %p2 %z7.b %z8.b -> %z5.b",
"splice %p3 %z12.b %z13.b -> %z10.b", "splice %p5 %z18.b %z19.b -> %z16.b",
"splice %p6 %z23.b %z24.b -> %z21.b", "splice %p7 %z31.b %z0.b -> %z31.b",
};
TEST_LOOP(splice, splice_sve_con, 6, expected_0_0[i],
opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_1),
opnd_create_reg(Pn_half_six_offset_0[i]),
opnd_create_reg_element_vector(Zn_six_offset_2[i], OPSZ_1));

const char *const expected_0_1[6] = {
"splice %p0 %z0.h %z1.h -> %z0.h", "splice %p2 %z7.h %z8.h -> %z5.h",
"splice %p3 %z12.h %z13.h -> %z10.h", "splice %p5 %z18.h %z19.h -> %z16.h",
"splice %p6 %z23.h %z24.h -> %z21.h", "splice %p7 %z31.h %z0.h -> %z31.h",
};
TEST_LOOP(splice, splice_sve_con, 6, expected_0_1[i],
opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2),
opnd_create_reg(Pn_half_six_offset_0[i]),
opnd_create_reg_element_vector(Zn_six_offset_2[i], OPSZ_2));

const char *const expected_0_2[6] = {
"splice %p0 %z0.s %z1.s -> %z0.s", "splice %p2 %z7.s %z8.s -> %z5.s",
"splice %p3 %z12.s %z13.s -> %z10.s", "splice %p5 %z18.s %z19.s -> %z16.s",
"splice %p6 %z23.s %z24.s -> %z21.s", "splice %p7 %z31.s %z0.s -> %z31.s",
};
TEST_LOOP(splice, splice_sve_con, 6, expected_0_2[i],
opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4),
opnd_create_reg(Pn_half_six_offset_0[i]),
opnd_create_reg_element_vector(Zn_six_offset_2[i], OPSZ_4));

const char *const expected_0_3[6] = {
"splice %p0 %z0.d %z1.d -> %z0.d", "splice %p2 %z7.d %z8.d -> %z5.d",
"splice %p3 %z12.d %z13.d -> %z10.d", "splice %p5 %z18.d %z19.d -> %z16.d",
"splice %p6 %z23.d %z24.d -> %z21.d", "splice %p7 %z31.d %z0.d -> %z31.d",
};
TEST_LOOP(splice, splice_sve_con, 6, expected_0_3[i],
opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8),
opnd_create_reg(Pn_half_six_offset_0[i]),
opnd_create_reg_element_vector(Zn_six_offset_2[i], OPSZ_8));
}
int
main(int argc, char *argv[])
{
Expand Down Expand Up @@ -8966,6 +9010,8 @@ main(int argc, char *argv[])
RUN_INSTR_TEST(mul_sve_vector);
RUN_INSTR_TEST(mul_sve_idx);

RUN_INSTR_TEST(splice_sve_con);

print("All SVE2 tests complete.\n");
#ifndef STANDALONE_DECODER
dr_standalone_exit();
Expand Down
Loading