diff --git a/core/ir/aarch64/codec.c b/core/ir/aarch64/codec.c index 9f53c8c668b..5678f963ef0 100644 --- a/core/ir/aarch64/codec.c +++ b/core/ir/aarch64/codec.c @@ -988,6 +988,27 @@ get_vector_element_reg_offset(opnd_t opnd) } } +static inline opnd_size_t +get_opnd_size_from_offset(aarch64_reg_offset offset) +{ + switch (offset) { + case BYTE_REG: return OPSZ_1; + case HALF_REG: return OPSZ_2; + case SINGLE_REG: return OPSZ_4; + case DOUBLE_REG: return OPSZ_8; + case QUAD_REG: return OPSZ_16; + default: ASSERT_NOT_REACHED(); return OPSZ_NA; + } +} + +static inline uint +get_elements_in_sve_vector(aarch64_reg_offset element_size) +{ + const uint element_length = + opnd_size_in_bits(get_opnd_size_from_offset(element_size)); + return opnd_size_in_bits(OPSZ_SVE_VL) / element_length; +} + /******************************************************************************* * Pairs of functions for decoding and encoding a generalised type of operand. */ @@ -6042,6 +6063,73 @@ encode_opnd_imm2_tsz_index(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint return true; } +static inline bool +dtype_is_signed(uint dtype) +{ + /* No need for a ASSERT_NOT_REACHED as all possible values of dtype are used in the + * instructions */ + switch (dtype) { + case 0b1110: + case 0b1101: + case 0b1100: + case 0b1001: + case 0b1000: + case 0b0100: return true; + default: return false; + } +} + +/* svemem_gpr: GPR offset and base reg for SVE ld/st */ + +static inline bool +decode_opnd_svemem_gpr_5(uint enc, int opcode, byte *pc, OUT opnd_t *opnd) +{ + uint dtype = extract_uint(enc, 21, 4); + if (dtype_is_signed(dtype)) + dtype = ~dtype; + + const aarch64_reg_offset insz = BITS(dtype, 3, 2); + const aarch64_reg_offset elsz = BITS(dtype, 1, 0); + + const uint elements = get_elements_in_sve_vector(elsz); + const opnd_size_t mem_transfer = opnd_size_from_bytes((1 << insz) * elements); + const opnd_size_t insz_opsz = get_opnd_size_from_offset(insz); + + const reg_id_t rn = decode_reg(extract_uint(enc, 5, 5), true, true); + const reg_id_t rm = decode_reg(extract_uint(enc, 16, 5), true, false); + + /* The byte load type does not use offset scaling, so set to zero in those cases */ + *opnd = opnd_create_base_disp_shift_aarch64(rn, rm, DR_EXTEND_UXTX, insz != BYTE_REG, + 0, 0, mem_transfer, + opnd_size_to_shift_amount(insz_opsz)); + return true; +} + +static inline bool +encode_opnd_svemem_gpr_5(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out) +{ + uint dtype = extract_uint(enc, 21, 4); + if (dtype_is_signed(dtype)) + dtype = ~dtype; + + const aarch64_reg_offset insz = BITS(dtype, 3, 2); + const aarch64_reg_offset elsz = BITS(dtype, 1, 0); + + const uint elements = get_elements_in_sve_vector(elsz); + const opnd_size_t mem_transfer = opnd_size_from_bytes((1 << insz) * elements); + + IF_RETURN_FALSE(!opnd_is_base_disp(opnd) || (opnd_get_size(opnd) != mem_transfer) || + (opnd_get_disp(opnd) != 0)) + + uint rn, rm; + bool is_x; + IF_RETURN_FALSE(!encode_reg(&rn, &is_x, opnd_get_base(opnd), true) || !is_x) + IF_RETURN_FALSE(!encode_reg(&rm, &is_x, opnd_get_index(opnd), false) || !is_x) + + *enc_out = (rm << 16) | (rn << 5); + return true; +} + /* mem0p: as mem0, but a pair of registers, so double size */ static inline bool diff --git a/core/ir/aarch64/codec_sve.txt b/core/ir/aarch64/codec_sve.txt index 783b9461b5c..82f306566a6 100644 --- a/core/ir/aarch64/codec_sve.txt +++ b/core/ir/aarch64/codec_sve.txt @@ -296,6 +296,22 @@ 1000010011xxxxxx100xxxxxxxxxxxxx n 913 SVE ld1rsw z_d_0 : svememx6_s_5 p10_zer_lo 1000010101xxxxxx110xxxxxxxxxxxxx n 914 SVE ld1rw z_s_0 : svememx6_s_5 p10_zer_lo 1000010101xxxxxx111xxxxxxxxxxxxx n 914 SVE ld1rw z_d_0 : svememx6_s_5 p10_zer_lo +10100100001xxxxx011xxxxxxxxxxxxx n 937 SVE ldff1b z_h_0 : svemem_gpr_5 p10_zer_lo +10100100010xxxxx011xxxxxxxxxxxxx n 937 SVE ldff1b z_s_0 : svemem_gpr_5 p10_zer_lo +10100100011xxxxx011xxxxxxxxxxxxx n 937 SVE ldff1b z_d_0 : svemem_gpr_5 p10_zer_lo +10100100000xxxxx011xxxxxxxxxxxxx n 937 SVE ldff1b z_b_0 : svemem_gpr_5 p10_zer_lo +10100101111xxxxx011xxxxxxxxxxxxx n 938 SVE ldff1d z_d_0 : svemem_gpr_5 p10_zer_lo +10100100101xxxxx011xxxxxxxxxxxxx n 939 SVE ldff1h z_h_0 : svemem_gpr_5 p10_zer_lo +10100100110xxxxx011xxxxxxxxxxxxx n 939 SVE ldff1h z_s_0 : svemem_gpr_5 p10_zer_lo +10100100111xxxxx011xxxxxxxxxxxxx n 939 SVE ldff1h z_d_0 : svemem_gpr_5 p10_zer_lo +10100101110xxxxx011xxxxxxxxxxxxx n 940 SVE ldff1sb z_h_0 : svemem_gpr_5 p10_zer_lo +10100101101xxxxx011xxxxxxxxxxxxx n 940 SVE ldff1sb z_s_0 : svemem_gpr_5 p10_zer_lo +10100101100xxxxx011xxxxxxxxxxxxx n 940 SVE ldff1sb z_d_0 : svemem_gpr_5 p10_zer_lo +10100101001xxxxx011xxxxxxxxxxxxx n 941 SVE ldff1sh z_s_0 : svemem_gpr_5 p10_zer_lo +10100101000xxxxx011xxxxxxxxxxxxx n 941 SVE ldff1sh z_d_0 : svemem_gpr_5 p10_zer_lo +10100100100xxxxx011xxxxxxxxxxxxx n 942 SVE ldff1sw z_d_0 : svemem_gpr_5 p10_zer_lo +10100101010xxxxx011xxxxxxxxxxxxx n 943 SVE ldff1w z_s_0 : svemem_gpr_5 p10_zer_lo +10100101011xxxxx011xxxxxxxxxxxxx n 943 SVE ldff1w z_d_0 : svemem_gpr_5 p10_zer_lo 1000010110xxxxxx000xxxxxxxx0xxxx n 227 SVE ldr p0 : svemem_gpr_simm9_vl 1000010110xxxxxx010xxxxxxxxxxxxx n 227 SVE ldr z0 : svemem_gpr_simm9_vl 00000100xx000011100xxxxxxxxxxxxx n 902 SVE lsl z_tszl8_bhsd_0 : p10_mrg_lo z_tszl8_bhsd_0 tszl8_imm3_5 diff --git a/core/ir/aarch64/instr_create_api.h b/core/ir/aarch64/instr_create_api.h index b781b6a47dd..5be28a0342f 100644 --- a/core/ir/aarch64/instr_create_api.h +++ b/core/ir/aarch64/instr_create_api.h @@ -10616,4 +10616,139 @@ */ #define INSTR_CREATE_rdvl(dc, Rd, simm) instr_create_1dst_1src(dc, OP_rdvl, Rd, simm) +/** + * Creates a LDFF1B instruction. + * + * This macro is used to encode the forms: + * \verbatim + * LDFF1B { .H }, /Z, [{, }] + * LDFF1B { .S }, /Z, [{, }] + * LDFF1B { .D }, /Z, [{, }] + * LDFF1B { .B }, /Z, [{, }] + * \endverbatim + * \param dc The void * dcontext used to allocate memory for the #instr_t. + * \param Zt The destination vector register, Z (Scalable). + * \param Pg The governing predicate register, P (Predicate). + * \param Rn The first source base register with a register offset, + * constructed with the function: + * opnd_create_base_disp_aarch64(Rn, Rm, + * DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1) + */ +#define INSTR_CREATE_ldff1b_sve_pred(dc, Zt, Pg, Rn) \ + instr_create_1dst_2src(dc, OP_ldff1b, Zt, Rn, Pg) + +/** + * Creates a LDFF1D instruction. + * + * This macro is used to encode the forms: + * \verbatim + * LDFF1D { .D }, /Z, [{, , LSL #3}] + * \endverbatim + * \param dc The void * dcontext used to allocate memory for the #instr_t. + * \param Zt The destination vector register, Z (Scalable). + * \param Pg The governing predicate register, P (Predicate). + * \param Rn The first source base register with a register offset, + * constructed with the function: + * opnd_create_base_disp_shift_aarch64(Rn, Rm, + * DR_EXTEND_UXTX, 1, 0, 0, OPSZ_32, 3) + */ +#define INSTR_CREATE_ldff1d_sve_pred(dc, Zt, Pg, Rn) \ + instr_create_1dst_2src(dc, OP_ldff1d, Zt, Rn, Pg) + +/** + * Creates a LDFF1H instruction. + * + * This macro is used to encode the forms: + * \verbatim + * LDFF1H { .H }, /Z, [{, , LSL #1}] + * LDFF1H { .S }, /Z, [{, , LSL #1}] + * LDFF1H { .D }, /Z, [{, , LSL #1}] + * \endverbatim + * \param dc The void * dcontext used to allocate memory for the #instr_t. + * \param Zt The destination vector register, Z (Scalable). + * \param Pg The governing predicate register, P (Predicate). + * \param Rn The first source base register with a register offset, + * constructed with the function: + * opnd_create_base_disp_shift_aarch64(Rn, Rm, + * DR_EXTEND_UXTX, 1, 0, 0, OPSZ_32, 1) + */ +#define INSTR_CREATE_ldff1h_sve_pred(dc, Zt, Pg, Rn) \ + instr_create_1dst_2src(dc, OP_ldff1h, Zt, Rn, Pg) + +/** + * Creates a LDFF1SB instruction. + * + * This macro is used to encode the forms: + * \verbatim + * LDFF1SB { .H }, /Z, [{, }] + * LDFF1SB { .S }, /Z, [{, }] + * LDFF1SB { .D }, /Z, [{, }] + * \endverbatim + * \param dc The void * dcontext used to allocate memory for the #instr_t. + * \param Zt The destination vector register, Z (Scalable). + * \param Pg The governing predicate register, P (Predicate). + * \param Rn The first source base register with a register offset, + * constructed with the function: + * opnd_create_base_disp_aarch64(Rn, Rm, + * DR_EXTEND_UXTX, false, 0, 0, OPSZ_1) + */ +#define INSTR_CREATE_ldff1sb_sve_pred(dc, Zt, Pg, Rn) \ + instr_create_1dst_2src(dc, OP_ldff1sb, Zt, Rn, Pg) + +/** + * Creates a LDFF1SH instruction. + * + * This macro is used to encode the forms: + * \verbatim + * LDFF1SH { .S }, /Z, [{, , LSL #1}] + * LDFF1SH { .D }, /Z, [{, , LSL #1}] + * \endverbatim + * \param dc The void * dcontext used to allocate memory for the #instr_t. + * \param Zt The destination vector register, Z (Scalable). + * \param Pg The governing predicate register, P (Predicate). + * \param Rn The first source base register with a register offset, + * constructed with the function: + * opnd_create_base_disp_shift_aarch64(Rn, Rm, + * DR_EXTEND_UXTX, 1, 0, 0, OPSZ_16, 1) + */ +#define INSTR_CREATE_ldff1sh_sve_pred(dc, Zt, Pg, Rn) \ + instr_create_1dst_2src(dc, OP_ldff1sh, Zt, Rn, Pg) + +/** + * Creates a LDFF1SW instruction. + * + * This macro is used to encode the forms: + * \verbatim + * LDFF1SW { .D }, /Z, [{, , LSL #2}] + * \endverbatim + * \param dc The void * dcontext used to allocate memory for the #instr_t. + * \param Zt The destination vector register, Z (Scalable). + * \param Pg The governing predicate register, P (Predicate). + * \param Rn The first source base register with a register offset, + * constructed with the function: + * opnd_create_base_disp_shift_aarch64(Rn, Rm, + * DR_EXTEND_UXTX, 1, 0, 0, OPSZ_16, 2) + */ +#define INSTR_CREATE_ldff1sw_sve_pred(dc, Zt, Pg, Rn) \ + instr_create_1dst_2src(dc, OP_ldff1sw, Zt, Rn, Pg) + +/** + * Creates a LDFF1W instruction. + * + * This macro is used to encode the forms: + * \verbatim + * LDFF1W { .S }, /Z, [{, , LSL #2}] + * LDFF1W { .D }, /Z, [{, , LSL #2}] + * \endverbatim + * \param dc The void * dcontext used to allocate memory for the #instr_t. + * \param Zt The destination vector register, Z (Scalable). + * \param Pg The governing predicate register, P (Predicate). + * \param Rn The first source base register with a register offset, + * constructed with the function: + * opnd_create_base_disp_shift_aarch64(Rn, Rm, + * DR_EXTEND_UXTX, 1, 0, 0, OPSZ_32, 2) + */ +#define INSTR_CREATE_ldff1w_sve_pred(dc, Zt, Pg, Rn) \ + instr_create_1dst_2src(dc, OP_ldff1w, Zt, Rn, Pg) + #endif /* DR_IR_MACROS_AARCH64_H */ diff --git a/core/ir/aarch64/opnd_defs.txt b/core/ir/aarch64/opnd_defs.txt index b9c7cb3cf20..9f4e4316e96 100644 --- a/core/ir/aarch64/opnd_defs.txt +++ b/core/ir/aarch64/opnd_defs.txt @@ -286,6 +286,7 @@ --------xx-xxxxx---------------- z_size_bhsd_16 # sve vector reg, elsz depending on size --------xx-xxxxx---------------- z_size_hsd_16 # sve vector reg, elsz depending on size --------xx-xxxxx---------------- imm2_tsz_index # Index encoded in imm2:tsz +-------????xxxxx------xxxxx----- svemem_gpr_5 # GPR offset and base reg for SVE ld/st -?--------------------xxxxx----- mem0p # gets size from 30; no offset, pair -?---------xxxxx????------------ x16imm # computes immed from 30 and 15:12 -x------------------------------ index3 # index of D subreg in Q: 0-1 diff --git a/core/ir/opnd_api.h b/core/ir/opnd_api.h index 0381f6fa784..842e865bc81 100644 --- a/core/ir/opnd_api.h +++ b/core/ir/opnd_api.h @@ -1854,8 +1854,10 @@ struct _opnd_t { byte /*bool*/ pre_index : 1; /* Access this using opnd_get_index_extend and opnd_set_index_extend. */ byte /*dr_extend_type_t*/ extend_type : 3; - /* Shift register offset left by amount implied by size of memory operand: */ + /* Enable shift register offset left */ byte /*bool*/ scaled : 1; + /* Shift offset amount */ + byte /*uint*/ scaled_value : 3; # elif defined(ARM) byte /*dr_shift_type_t*/ shift_type : 3; byte shift_amount_minus_1 : 5; /* 1..31 so we store (val - 1) */ @@ -2225,6 +2227,13 @@ opnd_create_base_disp_arm(reg_id_t base_reg, reg_id_t index_reg, #endif #ifdef AARCH64 +DR_API +/** + * Returns the left shift amount from \p size. + */ +uint +opnd_size_to_shift_amount(opnd_size_t size); + DR_API /** * Returns a memory reference operand that refers to either a base @@ -2234,13 +2243,28 @@ DR_API * Or a base register plus an optionally extended and shifted index register: * - [base_reg, index_reg, extend_type, shift_amount] * - * The shift_amount is zero or, if \p scaled, a value determined by the - * size of the operand. + * If \p scaled is enabled, \p shift determines the shift amount. * * The resulting operand has data size \p size (must be an OPSZ_ constant). * Both \p base_reg and \p index_reg must be DR_REG_ constants. * Either \p index_reg must be #DR_REG_NULL or disp must be 0. * + * TODO i#3044: WARNING this function may change during SVE development of + * DynamoRIO. The function will be considered stable when this warning has been + * removed. + * + * \note AArch64-only. + */ +opnd_t +opnd_create_base_disp_shift_aarch64(reg_id_t base_reg, reg_id_t index_reg, + dr_extend_type_t extend_type, bool scaled, int disp, + dr_opnd_flags_t flags, opnd_size_t size, uint shift); + +DR_API +/** + * Same as opnd_create_base_disp_shift_aarch64 but if \p scaled is true then the extend + * amount is calculated from the operand size (otherwise it is zero). + * * \note AArch64-only. */ opnd_t @@ -2836,8 +2860,19 @@ DR_API /** * Assumes \p opnd is a base+disp memory reference. * Sets the index register to be extended by \p extend and optionally \p scaled. - * Returns whether successful. If the offset is scaled the amount it is shifted - * by is determined by the size of the memory operand. + * Returns whether successful. If \p scaled is zero, the offset is not scaled. + * \note AArch64-only. + */ +bool +opnd_set_index_extend_value(opnd_t *opnd, dr_extend_type_t extend, bool scaled, + uint scaled_value); + +DR_API +/** + * Assumes \p opnd is a base+disp memory reference. + * Sets the index register to be extended by \p extend and optionally \p scaled. + * Returns whether successful. If \p scaled is zero, the offset is not scaled; otherwise + * is calculated from the operand size. * \note AArch64-only. */ bool diff --git a/core/ir/opnd_shared.c b/core/ir/opnd_shared.c index 56c5ba8d99f..51f005015c9 100644 --- a/core/ir/opnd_shared.c +++ b/core/ir/opnd_shared.c @@ -757,9 +757,9 @@ opnd_create_base_disp_arm(reg_id_t base_reg, reg_id_t index_reg, #ifdef AARCH64 opnd_t -opnd_create_base_disp_aarch64(reg_id_t base_reg, reg_id_t index_reg, - dr_extend_type_t extend_type, bool scaled, int disp, - dr_opnd_flags_t flags, opnd_size_t size) +opnd_create_base_disp_shift_aarch64(reg_id_t base_reg, reg_id_t index_reg, + dr_extend_type_t extend_type, bool scaled, int disp, + dr_opnd_flags_t flags, opnd_size_t size, uint shift) { opnd_t opnd; opnd.kind = BASE_DISP_kind; @@ -781,10 +781,20 @@ opnd_create_base_disp_aarch64(reg_id_t base_reg, reg_id_t index_reg, opnd.value.base_disp.pre_index = false; opnd_set_disp_helper(&opnd, disp); opnd.aux.flags = flags; - if (!opnd_set_index_extend(&opnd, extend_type, scaled)) + if (!opnd_set_index_extend_value(&opnd, extend_type, scaled, shift)) CLIENT_ASSERT(false, "opnd_create_base_disp_aarch64: invalid extend type"); return opnd; } + +opnd_t +opnd_create_base_disp_aarch64(reg_id_t base_reg, reg_id_t index_reg, + dr_extend_type_t extend_type, bool scaled, int disp, + dr_opnd_flags_t flags, opnd_size_t size) +{ + const uint shift = scaled ? opnd_size_to_shift_amount(size) : 0; + return opnd_create_base_disp_shift_aarch64(base_reg, index_reg, extend_type, scaled, + disp, flags, size, shift); +} #endif #undef opnd_get_base @@ -890,8 +900,8 @@ opnd_set_index_shift(opnd_t *opnd, dr_shift_type_t shift, uint amount) #endif /* ARM */ #ifdef AARCH64 -static uint -opnd_size_to_extend_amount(opnd_size_t size) +uint +opnd_size_to_shift_amount(opnd_size_t size) { switch (size) { default: @@ -903,6 +913,8 @@ opnd_size_to_extend_amount(opnd_size_t size) case OPSZ_0: /* fall-through */ case OPSZ_8: return 3; case OPSZ_16: return 4; + case OPSZ_32: return 5; + case OPSZ_64: return 6; } } @@ -918,7 +930,7 @@ opnd_get_index_extend(opnd_t opnd, OUT bool *scaled, OUT uint *amount) extend = opnd.value.base_disp.extend_type; scaled_out = opnd.value.base_disp.scaled; if (scaled_out) - amount_out = opnd_size_to_extend_amount(opnd_get_size(opnd)); + amount_out = opnd.value.base_disp.scaled_value; } if (scaled != NULL) *scaled = scaled_out; @@ -928,7 +940,8 @@ opnd_get_index_extend(opnd_t opnd, OUT bool *scaled, OUT uint *amount) } bool -opnd_set_index_extend(opnd_t *opnd, dr_extend_type_t extend, bool scaled) +opnd_set_index_extend_value(opnd_t *opnd, dr_extend_type_t extend, bool scaled, + uint scaled_value) { if (!opnd_is_base_disp(*opnd)) { CLIENT_ASSERT(false, "opnd_set_index_shift called on invalid opnd type"); @@ -938,10 +951,22 @@ opnd_set_index_extend(opnd_t *opnd, dr_extend_type_t extend, bool scaled) CLIENT_ASSERT(false, "opnd index extend: invalid extend type"); return false; } + if (scaled_value > 7) { + CLIENT_ASSERT(false, "opnd index extend: invalid scaled value"); + return false; + } opnd->value.base_disp.extend_type = extend; opnd->value.base_disp.scaled = scaled; + opnd->value.base_disp.scaled_value = scaled_value; return true; } + +bool +opnd_set_index_extend(opnd_t *opnd, dr_extend_type_t extend, bool scaled) +{ + const uint value = scaled ? opnd_size_to_shift_amount(opnd_get_size(*opnd)) : 0; + return opnd_set_index_extend_value(opnd, extend, scaled, value); +} #endif /* AARCH64 */ bool diff --git a/suite/tests/api/dis-a64-sve.txt b/suite/tests/api/dis-a64-sve.txt index 9d7f4540eae..bb14b3aab3d 100644 --- a/suite/tests/api/dis-a64-sve.txt +++ b/suite/tests/api/dis-a64-sve.txt @@ -10296,6 +10296,294 @@ 8577ff9b : ld1rw z27.d, p7/Z, [x28, #220] : ld1rw +0xdc(%x28)[4byte] %p7/z -> %z27.d 857fffff : ld1rw z31.d, p7/Z, [sp, #252] : ld1rw +0xfc(%sp)[4byte] %p7/z -> %z31.d +# LDFF1B { .H }, /Z, [{, }] (LDFF1B-Z.P.BR-U16) +a4206000 : ldff1b z0.h, p0/Z, [x0, x0] : ldff1b (%x0,%x0)[16byte] %p0/z -> %z0.h +a4256482 : ldff1b z2.h, p1/Z, [x4, x5] : ldff1b (%x4,%x5)[16byte] %p1/z -> %z2.h +a42768c4 : ldff1b z4.h, p2/Z, [x6, x7] : ldff1b (%x6,%x7)[16byte] %p2/z -> %z4.h +a4296906 : ldff1b z6.h, p2/Z, [x8, x9] : ldff1b (%x8,%x9)[16byte] %p2/z -> %z6.h +a42b6d48 : ldff1b z8.h, p3/Z, [x10, x11] : ldff1b (%x10,%x11)[16byte] %p3/z -> %z8.h +a42c6d6a : ldff1b z10.h, p3/Z, [x11, x12] : ldff1b (%x11,%x12)[16byte] %p3/z -> %z10.h +a42e71ac : ldff1b z12.h, p4/Z, [x13, x14] : ldff1b (%x13,%x14)[16byte] %p4/z -> %z12.h +a43071ee : ldff1b z14.h, p4/Z, [x15, x16] : ldff1b (%x15,%x16)[16byte] %p4/z -> %z14.h +a4327630 : ldff1b z16.h, p5/Z, [x17, x18] : ldff1b (%x17,%x18)[16byte] %p5/z -> %z16.h +a4347671 : ldff1b z17.h, p5/Z, [x19, x20] : ldff1b (%x19,%x20)[16byte] %p5/z -> %z17.h +a43676b3 : ldff1b z19.h, p5/Z, [x21, x22] : ldff1b (%x21,%x22)[16byte] %p5/z -> %z19.h +a4387af5 : ldff1b z21.h, p6/Z, [x23, x24] : ldff1b (%x23,%x24)[16byte] %p6/z -> %z21.h +a4397b17 : ldff1b z23.h, p6/Z, [x24, x25] : ldff1b (%x24,%x25)[16byte] %p6/z -> %z23.h +a43b7f59 : ldff1b z25.h, p7/Z, [x26, x27] : ldff1b (%x26,%x27)[16byte] %p7/z -> %z25.h +a43d7f9b : ldff1b z27.h, p7/Z, [x28, x29] : ldff1b (%x28,%x29)[16byte] %p7/z -> %z27.h +a43e7fff : ldff1b z31.h, p7/Z, [sp, x30] : ldff1b (%sp,%x30)[16byte] %p7/z -> %z31.h + +# LDFF1B { .S }, /Z, [{, }] (LDFF1B-Z.P.BR-U32) +a4406000 : ldff1b z0.s, p0/Z, [x0, x0] : ldff1b (%x0,%x0)[8byte] %p0/z -> %z0.s +a4456482 : ldff1b z2.s, p1/Z, [x4, x5] : ldff1b (%x4,%x5)[8byte] %p1/z -> %z2.s +a44768c4 : ldff1b z4.s, p2/Z, [x6, x7] : ldff1b (%x6,%x7)[8byte] %p2/z -> %z4.s +a4496906 : ldff1b z6.s, p2/Z, [x8, x9] : ldff1b (%x8,%x9)[8byte] %p2/z -> %z6.s +a44b6d48 : ldff1b z8.s, p3/Z, [x10, x11] : ldff1b (%x10,%x11)[8byte] %p3/z -> %z8.s +a44c6d6a : ldff1b z10.s, p3/Z, [x11, x12] : ldff1b (%x11,%x12)[8byte] %p3/z -> %z10.s +a44e71ac : ldff1b z12.s, p4/Z, [x13, x14] : ldff1b (%x13,%x14)[8byte] %p4/z -> %z12.s +a45071ee : ldff1b z14.s, p4/Z, [x15, x16] : ldff1b (%x15,%x16)[8byte] %p4/z -> %z14.s +a4527630 : ldff1b z16.s, p5/Z, [x17, x18] : ldff1b (%x17,%x18)[8byte] %p5/z -> %z16.s +a4547671 : ldff1b z17.s, p5/Z, [x19, x20] : ldff1b (%x19,%x20)[8byte] %p5/z -> %z17.s +a45676b3 : ldff1b z19.s, p5/Z, [x21, x22] : ldff1b (%x21,%x22)[8byte] %p5/z -> %z19.s +a4587af5 : ldff1b z21.s, p6/Z, [x23, x24] : ldff1b (%x23,%x24)[8byte] %p6/z -> %z21.s +a4597b17 : ldff1b z23.s, p6/Z, [x24, x25] : ldff1b (%x24,%x25)[8byte] %p6/z -> %z23.s +a45b7f59 : ldff1b z25.s, p7/Z, [x26, x27] : ldff1b (%x26,%x27)[8byte] %p7/z -> %z25.s +a45d7f9b : ldff1b z27.s, p7/Z, [x28, x29] : ldff1b (%x28,%x29)[8byte] %p7/z -> %z27.s +a45e7fff : ldff1b z31.s, p7/Z, [sp, x30] : ldff1b (%sp,%x30)[8byte] %p7/z -> %z31.s + +# LDFF1B { .D }, /Z, [{, }] (LDFF1B-Z.P.BR-U64) +a4606000 : ldff1b z0.d, p0/Z, [x0, x0] : ldff1b (%x0,%x0)[4byte] %p0/z -> %z0.d +a4656482 : ldff1b z2.d, p1/Z, [x4, x5] : ldff1b (%x4,%x5)[4byte] %p1/z -> %z2.d +a46768c4 : ldff1b z4.d, p2/Z, [x6, x7] : ldff1b (%x6,%x7)[4byte] %p2/z -> %z4.d +a4696906 : ldff1b z6.d, p2/Z, [x8, x9] : ldff1b (%x8,%x9)[4byte] %p2/z -> %z6.d +a46b6d48 : ldff1b z8.d, p3/Z, [x10, x11] : ldff1b (%x10,%x11)[4byte] %p3/z -> %z8.d +a46c6d6a : ldff1b z10.d, p3/Z, [x11, x12] : ldff1b (%x11,%x12)[4byte] %p3/z -> %z10.d +a46e71ac : ldff1b z12.d, p4/Z, [x13, x14] : ldff1b (%x13,%x14)[4byte] %p4/z -> %z12.d +a47071ee : ldff1b z14.d, p4/Z, [x15, x16] : ldff1b (%x15,%x16)[4byte] %p4/z -> %z14.d +a4727630 : ldff1b z16.d, p5/Z, [x17, x18] : ldff1b (%x17,%x18)[4byte] %p5/z -> %z16.d +a4747671 : ldff1b z17.d, p5/Z, [x19, x20] : ldff1b (%x19,%x20)[4byte] %p5/z -> %z17.d +a47676b3 : ldff1b z19.d, p5/Z, [x21, x22] : ldff1b (%x21,%x22)[4byte] %p5/z -> %z19.d +a4787af5 : ldff1b z21.d, p6/Z, [x23, x24] : ldff1b (%x23,%x24)[4byte] %p6/z -> %z21.d +a4797b17 : ldff1b z23.d, p6/Z, [x24, x25] : ldff1b (%x24,%x25)[4byte] %p6/z -> %z23.d +a47b7f59 : ldff1b z25.d, p7/Z, [x26, x27] : ldff1b (%x26,%x27)[4byte] %p7/z -> %z25.d +a47d7f9b : ldff1b z27.d, p7/Z, [x28, x29] : ldff1b (%x28,%x29)[4byte] %p7/z -> %z27.d +a47e7fff : ldff1b z31.d, p7/Z, [sp, x30] : ldff1b (%sp,%x30)[4byte] %p7/z -> %z31.d + +# LDFF1B { .B }, /Z, [{, }] (LDFF1B-Z.P.BR-U8) +a4006000 : ldff1b z0.b, p0/Z, [x0, x0] : ldff1b (%x0,%x0)[32byte] %p0/z -> %z0.b +a4056482 : ldff1b z2.b, p1/Z, [x4, x5] : ldff1b (%x4,%x5)[32byte] %p1/z -> %z2.b +a40768c4 : ldff1b z4.b, p2/Z, [x6, x7] : ldff1b (%x6,%x7)[32byte] %p2/z -> %z4.b +a4096906 : ldff1b z6.b, p2/Z, [x8, x9] : ldff1b (%x8,%x9)[32byte] %p2/z -> %z6.b +a40b6d48 : ldff1b z8.b, p3/Z, [x10, x11] : ldff1b (%x10,%x11)[32byte] %p3/z -> %z8.b +a40c6d6a : ldff1b z10.b, p3/Z, [x11, x12] : ldff1b (%x11,%x12)[32byte] %p3/z -> %z10.b +a40e71ac : ldff1b z12.b, p4/Z, [x13, x14] : ldff1b (%x13,%x14)[32byte] %p4/z -> %z12.b +a41071ee : ldff1b z14.b, p4/Z, [x15, x16] : ldff1b (%x15,%x16)[32byte] %p4/z -> %z14.b +a4127630 : ldff1b z16.b, p5/Z, [x17, x18] : ldff1b (%x17,%x18)[32byte] %p5/z -> %z16.b +a4147671 : ldff1b z17.b, p5/Z, [x19, x20] : ldff1b (%x19,%x20)[32byte] %p5/z -> %z17.b +a41676b3 : ldff1b z19.b, p5/Z, [x21, x22] : ldff1b (%x21,%x22)[32byte] %p5/z -> %z19.b +a4187af5 : ldff1b z21.b, p6/Z, [x23, x24] : ldff1b (%x23,%x24)[32byte] %p6/z -> %z21.b +a4197b17 : ldff1b z23.b, p6/Z, [x24, x25] : ldff1b (%x24,%x25)[32byte] %p6/z -> %z23.b +a41b7f59 : ldff1b z25.b, p7/Z, [x26, x27] : ldff1b (%x26,%x27)[32byte] %p7/z -> %z25.b +a41d7f9b : ldff1b z27.b, p7/Z, [x28, x29] : ldff1b (%x28,%x29)[32byte] %p7/z -> %z27.b +a41e7fff : ldff1b z31.b, p7/Z, [sp, x30] : ldff1b (%sp,%x30)[32byte] %p7/z -> %z31.b + +# LDFF1D { .D }, /Z, [{, , LSL #3}] (LDFF1D-Z.P.BR-U64) +a5e06000 : ldff1d z0.d, p0/Z, [x0, x0, LSL #3] : ldff1d (%x0,%x0,lsl #3)[32byte] %p0/z -> %z0.d +a5e56482 : ldff1d z2.d, p1/Z, [x4, x5, LSL #3] : ldff1d (%x4,%x5,lsl #3)[32byte] %p1/z -> %z2.d +a5e768c4 : ldff1d z4.d, p2/Z, [x6, x7, LSL #3] : ldff1d (%x6,%x7,lsl #3)[32byte] %p2/z -> %z4.d +a5e96906 : ldff1d z6.d, p2/Z, [x8, x9, LSL #3] : ldff1d (%x8,%x9,lsl #3)[32byte] %p2/z -> %z6.d +a5eb6d48 : ldff1d z8.d, p3/Z, [x10, x11, LSL #3] : ldff1d (%x10,%x11,lsl #3)[32byte] %p3/z -> %z8.d +a5ec6d6a : ldff1d z10.d, p3/Z, [x11, x12, LSL #3] : ldff1d (%x11,%x12,lsl #3)[32byte] %p3/z -> %z10.d +a5ee71ac : ldff1d z12.d, p4/Z, [x13, x14, LSL #3] : ldff1d (%x13,%x14,lsl #3)[32byte] %p4/z -> %z12.d +a5f071ee : ldff1d z14.d, p4/Z, [x15, x16, LSL #3] : ldff1d (%x15,%x16,lsl #3)[32byte] %p4/z -> %z14.d +a5f27630 : ldff1d z16.d, p5/Z, [x17, x18, LSL #3] : ldff1d (%x17,%x18,lsl #3)[32byte] %p5/z -> %z16.d +a5f47671 : ldff1d z17.d, p5/Z, [x19, x20, LSL #3] : ldff1d (%x19,%x20,lsl #3)[32byte] %p5/z -> %z17.d +a5f676b3 : ldff1d z19.d, p5/Z, [x21, x22, LSL #3] : ldff1d (%x21,%x22,lsl #3)[32byte] %p5/z -> %z19.d +a5f87af5 : ldff1d z21.d, p6/Z, [x23, x24, LSL #3] : ldff1d (%x23,%x24,lsl #3)[32byte] %p6/z -> %z21.d +a5f97b17 : ldff1d z23.d, p6/Z, [x24, x25, LSL #3] : ldff1d (%x24,%x25,lsl #3)[32byte] %p6/z -> %z23.d +a5fb7f59 : ldff1d z25.d, p7/Z, [x26, x27, LSL #3] : ldff1d (%x26,%x27,lsl #3)[32byte] %p7/z -> %z25.d +a5fd7f9b : ldff1d z27.d, p7/Z, [x28, x29, LSL #3] : ldff1d (%x28,%x29,lsl #3)[32byte] %p7/z -> %z27.d +a5fe7fff : ldff1d z31.d, p7/Z, [sp, x30, LSL #3] : ldff1d (%sp,%x30,lsl #3)[32byte] %p7/z -> %z31.d + +# LDFF1H { .H }, /Z, [{, , LSL #1}] (LDFF1H-Z.P.BR-U16) +a4a06000 : ldff1h z0.h, p0/Z, [x0, x0, LSL #1] : ldff1h (%x0,%x0,lsl #1)[32byte] %p0/z -> %z0.h +a4a56482 : ldff1h z2.h, p1/Z, [x4, x5, LSL #1] : ldff1h (%x4,%x5,lsl #1)[32byte] %p1/z -> %z2.h +a4a768c4 : ldff1h z4.h, p2/Z, [x6, x7, LSL #1] : ldff1h (%x6,%x7,lsl #1)[32byte] %p2/z -> %z4.h +a4a96906 : ldff1h z6.h, p2/Z, [x8, x9, LSL #1] : ldff1h (%x8,%x9,lsl #1)[32byte] %p2/z -> %z6.h +a4ab6d48 : ldff1h z8.h, p3/Z, [x10, x11, LSL #1] : ldff1h (%x10,%x11,lsl #1)[32byte] %p3/z -> %z8.h +a4ac6d6a : ldff1h z10.h, p3/Z, [x11, x12, LSL #1] : ldff1h (%x11,%x12,lsl #1)[32byte] %p3/z -> %z10.h +a4ae71ac : ldff1h z12.h, p4/Z, [x13, x14, LSL #1] : ldff1h (%x13,%x14,lsl #1)[32byte] %p4/z -> %z12.h +a4b071ee : ldff1h z14.h, p4/Z, [x15, x16, LSL #1] : ldff1h (%x15,%x16,lsl #1)[32byte] %p4/z -> %z14.h +a4b27630 : ldff1h z16.h, p5/Z, [x17, x18, LSL #1] : ldff1h (%x17,%x18,lsl #1)[32byte] %p5/z -> %z16.h +a4b47671 : ldff1h z17.h, p5/Z, [x19, x20, LSL #1] : ldff1h (%x19,%x20,lsl #1)[32byte] %p5/z -> %z17.h +a4b676b3 : ldff1h z19.h, p5/Z, [x21, x22, LSL #1] : ldff1h (%x21,%x22,lsl #1)[32byte] %p5/z -> %z19.h +a4b87af5 : ldff1h z21.h, p6/Z, [x23, x24, LSL #1] : ldff1h (%x23,%x24,lsl #1)[32byte] %p6/z -> %z21.h +a4b97b17 : ldff1h z23.h, p6/Z, [x24, x25, LSL #1] : ldff1h (%x24,%x25,lsl #1)[32byte] %p6/z -> %z23.h +a4bb7f59 : ldff1h z25.h, p7/Z, [x26, x27, LSL #1] : ldff1h (%x26,%x27,lsl #1)[32byte] %p7/z -> %z25.h +a4bd7f9b : ldff1h z27.h, p7/Z, [x28, x29, LSL #1] : ldff1h (%x28,%x29,lsl #1)[32byte] %p7/z -> %z27.h +a4be7fff : ldff1h z31.h, p7/Z, [sp, x30, LSL #1] : ldff1h (%sp,%x30,lsl #1)[32byte] %p7/z -> %z31.h + +# LDFF1H { .S }, /Z, [{, , LSL #1}] (LDFF1H-Z.P.BR-U32) +a4c06000 : ldff1h z0.s, p0/Z, [x0, x0, LSL #1] : ldff1h (%x0,%x0,lsl #1)[16byte] %p0/z -> %z0.s +a4c56482 : ldff1h z2.s, p1/Z, [x4, x5, LSL #1] : ldff1h (%x4,%x5,lsl #1)[16byte] %p1/z -> %z2.s +a4c768c4 : ldff1h z4.s, p2/Z, [x6, x7, LSL #1] : ldff1h (%x6,%x7,lsl #1)[16byte] %p2/z -> %z4.s +a4c96906 : ldff1h z6.s, p2/Z, [x8, x9, LSL #1] : ldff1h (%x8,%x9,lsl #1)[16byte] %p2/z -> %z6.s +a4cb6d48 : ldff1h z8.s, p3/Z, [x10, x11, LSL #1] : ldff1h (%x10,%x11,lsl #1)[16byte] %p3/z -> %z8.s +a4cc6d6a : ldff1h z10.s, p3/Z, [x11, x12, LSL #1] : ldff1h (%x11,%x12,lsl #1)[16byte] %p3/z -> %z10.s +a4ce71ac : ldff1h z12.s, p4/Z, [x13, x14, LSL #1] : ldff1h (%x13,%x14,lsl #1)[16byte] %p4/z -> %z12.s +a4d071ee : ldff1h z14.s, p4/Z, [x15, x16, LSL #1] : ldff1h (%x15,%x16,lsl #1)[16byte] %p4/z -> %z14.s +a4d27630 : ldff1h z16.s, p5/Z, [x17, x18, LSL #1] : ldff1h (%x17,%x18,lsl #1)[16byte] %p5/z -> %z16.s +a4d47671 : ldff1h z17.s, p5/Z, [x19, x20, LSL #1] : ldff1h (%x19,%x20,lsl #1)[16byte] %p5/z -> %z17.s +a4d676b3 : ldff1h z19.s, p5/Z, [x21, x22, LSL #1] : ldff1h (%x21,%x22,lsl #1)[16byte] %p5/z -> %z19.s +a4d87af5 : ldff1h z21.s, p6/Z, [x23, x24, LSL #1] : ldff1h (%x23,%x24,lsl #1)[16byte] %p6/z -> %z21.s +a4d97b17 : ldff1h z23.s, p6/Z, [x24, x25, LSL #1] : ldff1h (%x24,%x25,lsl #1)[16byte] %p6/z -> %z23.s +a4db7f59 : ldff1h z25.s, p7/Z, [x26, x27, LSL #1] : ldff1h (%x26,%x27,lsl #1)[16byte] %p7/z -> %z25.s +a4dd7f9b : ldff1h z27.s, p7/Z, [x28, x29, LSL #1] : ldff1h (%x28,%x29,lsl #1)[16byte] %p7/z -> %z27.s +a4de7fff : ldff1h z31.s, p7/Z, [sp, x30, LSL #1] : ldff1h (%sp,%x30,lsl #1)[16byte] %p7/z -> %z31.s + +# LDFF1H { .D }, /Z, [{, , LSL #1}] (LDFF1H-Z.P.BR-U64) +a4e06000 : ldff1h z0.d, p0/Z, [x0, x0, LSL #1] : ldff1h (%x0,%x0,lsl #1)[8byte] %p0/z -> %z0.d +a4e56482 : ldff1h z2.d, p1/Z, [x4, x5, LSL #1] : ldff1h (%x4,%x5,lsl #1)[8byte] %p1/z -> %z2.d +a4e768c4 : ldff1h z4.d, p2/Z, [x6, x7, LSL #1] : ldff1h (%x6,%x7,lsl #1)[8byte] %p2/z -> %z4.d +a4e96906 : ldff1h z6.d, p2/Z, [x8, x9, LSL #1] : ldff1h (%x8,%x9,lsl #1)[8byte] %p2/z -> %z6.d +a4eb6d48 : ldff1h z8.d, p3/Z, [x10, x11, LSL #1] : ldff1h (%x10,%x11,lsl #1)[8byte] %p3/z -> %z8.d +a4ec6d6a : ldff1h z10.d, p3/Z, [x11, x12, LSL #1] : ldff1h (%x11,%x12,lsl #1)[8byte] %p3/z -> %z10.d +a4ee71ac : ldff1h z12.d, p4/Z, [x13, x14, LSL #1] : ldff1h (%x13,%x14,lsl #1)[8byte] %p4/z -> %z12.d +a4f071ee : ldff1h z14.d, p4/Z, [x15, x16, LSL #1] : ldff1h (%x15,%x16,lsl #1)[8byte] %p4/z -> %z14.d +a4f27630 : ldff1h z16.d, p5/Z, [x17, x18, LSL #1] : ldff1h (%x17,%x18,lsl #1)[8byte] %p5/z -> %z16.d +a4f47671 : ldff1h z17.d, p5/Z, [x19, x20, LSL #1] : ldff1h (%x19,%x20,lsl #1)[8byte] %p5/z -> %z17.d +a4f676b3 : ldff1h z19.d, p5/Z, [x21, x22, LSL #1] : ldff1h (%x21,%x22,lsl #1)[8byte] %p5/z -> %z19.d +a4f87af5 : ldff1h z21.d, p6/Z, [x23, x24, LSL #1] : ldff1h (%x23,%x24,lsl #1)[8byte] %p6/z -> %z21.d +a4f97b17 : ldff1h z23.d, p6/Z, [x24, x25, LSL #1] : ldff1h (%x24,%x25,lsl #1)[8byte] %p6/z -> %z23.d +a4fb7f59 : ldff1h z25.d, p7/Z, [x26, x27, LSL #1] : ldff1h (%x26,%x27,lsl #1)[8byte] %p7/z -> %z25.d +a4fd7f9b : ldff1h z27.d, p7/Z, [x28, x29, LSL #1] : ldff1h (%x28,%x29,lsl #1)[8byte] %p7/z -> %z27.d +a4fe7fff : ldff1h z31.d, p7/Z, [sp, x30, LSL #1] : ldff1h (%sp,%x30,lsl #1)[8byte] %p7/z -> %z31.d + +# LDFF1SB { .H }, /Z, [{, }] (LDFF1SB-Z.P.BR-S16) +a5c06000 : ldff1sb z0.h, p0/Z, [x0, x0] : ldff1sb (%x0,%x0)[16byte] %p0/z -> %z0.h +a5c56482 : ldff1sb z2.h, p1/Z, [x4, x5] : ldff1sb (%x4,%x5)[16byte] %p1/z -> %z2.h +a5c768c4 : ldff1sb z4.h, p2/Z, [x6, x7] : ldff1sb (%x6,%x7)[16byte] %p2/z -> %z4.h +a5c96906 : ldff1sb z6.h, p2/Z, [x8, x9] : ldff1sb (%x8,%x9)[16byte] %p2/z -> %z6.h +a5cb6d48 : ldff1sb z8.h, p3/Z, [x10, x11] : ldff1sb (%x10,%x11)[16byte] %p3/z -> %z8.h +a5cc6d6a : ldff1sb z10.h, p3/Z, [x11, x12] : ldff1sb (%x11,%x12)[16byte] %p3/z -> %z10.h +a5ce71ac : ldff1sb z12.h, p4/Z, [x13, x14] : ldff1sb (%x13,%x14)[16byte] %p4/z -> %z12.h +a5d071ee : ldff1sb z14.h, p4/Z, [x15, x16] : ldff1sb (%x15,%x16)[16byte] %p4/z -> %z14.h +a5d27630 : ldff1sb z16.h, p5/Z, [x17, x18] : ldff1sb (%x17,%x18)[16byte] %p5/z -> %z16.h +a5d47671 : ldff1sb z17.h, p5/Z, [x19, x20] : ldff1sb (%x19,%x20)[16byte] %p5/z -> %z17.h +a5d676b3 : ldff1sb z19.h, p5/Z, [x21, x22] : ldff1sb (%x21,%x22)[16byte] %p5/z -> %z19.h +a5d87af5 : ldff1sb z21.h, p6/Z, [x23, x24] : ldff1sb (%x23,%x24)[16byte] %p6/z -> %z21.h +a5d97b17 : ldff1sb z23.h, p6/Z, [x24, x25] : ldff1sb (%x24,%x25)[16byte] %p6/z -> %z23.h +a5db7f59 : ldff1sb z25.h, p7/Z, [x26, x27] : ldff1sb (%x26,%x27)[16byte] %p7/z -> %z25.h +a5dd7f9b : ldff1sb z27.h, p7/Z, [x28, x29] : ldff1sb (%x28,%x29)[16byte] %p7/z -> %z27.h +a5de7fff : ldff1sb z31.h, p7/Z, [sp, x30] : ldff1sb (%sp,%x30)[16byte] %p7/z -> %z31.h + +# LDFF1SB { .S }, /Z, [{, }] (LDFF1SB-Z.P.BR-S32) +a5a06000 : ldff1sb z0.s, p0/Z, [x0, x0] : ldff1sb (%x0,%x0)[8byte] %p0/z -> %z0.s +a5a56482 : ldff1sb z2.s, p1/Z, [x4, x5] : ldff1sb (%x4,%x5)[8byte] %p1/z -> %z2.s +a5a768c4 : ldff1sb z4.s, p2/Z, [x6, x7] : ldff1sb (%x6,%x7)[8byte] %p2/z -> %z4.s +a5a96906 : ldff1sb z6.s, p2/Z, [x8, x9] : ldff1sb (%x8,%x9)[8byte] %p2/z -> %z6.s +a5ab6d48 : ldff1sb z8.s, p3/Z, [x10, x11] : ldff1sb (%x10,%x11)[8byte] %p3/z -> %z8.s +a5ac6d6a : ldff1sb z10.s, p3/Z, [x11, x12] : ldff1sb (%x11,%x12)[8byte] %p3/z -> %z10.s +a5ae71ac : ldff1sb z12.s, p4/Z, [x13, x14] : ldff1sb (%x13,%x14)[8byte] %p4/z -> %z12.s +a5b071ee : ldff1sb z14.s, p4/Z, [x15, x16] : ldff1sb (%x15,%x16)[8byte] %p4/z -> %z14.s +a5b27630 : ldff1sb z16.s, p5/Z, [x17, x18] : ldff1sb (%x17,%x18)[8byte] %p5/z -> %z16.s +a5b47671 : ldff1sb z17.s, p5/Z, [x19, x20] : ldff1sb (%x19,%x20)[8byte] %p5/z -> %z17.s +a5b676b3 : ldff1sb z19.s, p5/Z, [x21, x22] : ldff1sb (%x21,%x22)[8byte] %p5/z -> %z19.s +a5b87af5 : ldff1sb z21.s, p6/Z, [x23, x24] : ldff1sb (%x23,%x24)[8byte] %p6/z -> %z21.s +a5b97b17 : ldff1sb z23.s, p6/Z, [x24, x25] : ldff1sb (%x24,%x25)[8byte] %p6/z -> %z23.s +a5bb7f59 : ldff1sb z25.s, p7/Z, [x26, x27] : ldff1sb (%x26,%x27)[8byte] %p7/z -> %z25.s +a5bd7f9b : ldff1sb z27.s, p7/Z, [x28, x29] : ldff1sb (%x28,%x29)[8byte] %p7/z -> %z27.s +a5be7fff : ldff1sb z31.s, p7/Z, [sp, x30] : ldff1sb (%sp,%x30)[8byte] %p7/z -> %z31.s + +# LDFF1SB { .D }, /Z, [{, }] (LDFF1SB-Z.P.BR-S64) +a5806000 : ldff1sb z0.d, p0/Z, [x0, x0] : ldff1sb (%x0,%x0)[4byte] %p0/z -> %z0.d +a5856482 : ldff1sb z2.d, p1/Z, [x4, x5] : ldff1sb (%x4,%x5)[4byte] %p1/z -> %z2.d +a58768c4 : ldff1sb z4.d, p2/Z, [x6, x7] : ldff1sb (%x6,%x7)[4byte] %p2/z -> %z4.d +a5896906 : ldff1sb z6.d, p2/Z, [x8, x9] : ldff1sb (%x8,%x9)[4byte] %p2/z -> %z6.d +a58b6d48 : ldff1sb z8.d, p3/Z, [x10, x11] : ldff1sb (%x10,%x11)[4byte] %p3/z -> %z8.d +a58c6d6a : ldff1sb z10.d, p3/Z, [x11, x12] : ldff1sb (%x11,%x12)[4byte] %p3/z -> %z10.d +a58e71ac : ldff1sb z12.d, p4/Z, [x13, x14] : ldff1sb (%x13,%x14)[4byte] %p4/z -> %z12.d +a59071ee : ldff1sb z14.d, p4/Z, [x15, x16] : ldff1sb (%x15,%x16)[4byte] %p4/z -> %z14.d +a5927630 : ldff1sb z16.d, p5/Z, [x17, x18] : ldff1sb (%x17,%x18)[4byte] %p5/z -> %z16.d +a5947671 : ldff1sb z17.d, p5/Z, [x19, x20] : ldff1sb (%x19,%x20)[4byte] %p5/z -> %z17.d +a59676b3 : ldff1sb z19.d, p5/Z, [x21, x22] : ldff1sb (%x21,%x22)[4byte] %p5/z -> %z19.d +a5987af5 : ldff1sb z21.d, p6/Z, [x23, x24] : ldff1sb (%x23,%x24)[4byte] %p6/z -> %z21.d +a5997b17 : ldff1sb z23.d, p6/Z, [x24, x25] : ldff1sb (%x24,%x25)[4byte] %p6/z -> %z23.d +a59b7f59 : ldff1sb z25.d, p7/Z, [x26, x27] : ldff1sb (%x26,%x27)[4byte] %p7/z -> %z25.d +a59d7f9b : ldff1sb z27.d, p7/Z, [x28, x29] : ldff1sb (%x28,%x29)[4byte] %p7/z -> %z27.d +a59e7fff : ldff1sb z31.d, p7/Z, [sp, x30] : ldff1sb (%sp,%x30)[4byte] %p7/z -> %z31.d + +# LDFF1SH { .S }, /Z, [{, , LSL #1}] (LDFF1SH-Z.P.BR-S32) +a5206000 : ldff1sh z0.s, p0/Z, [x0, x0, LSL #1] : ldff1sh (%x0,%x0,lsl #1)[16byte] %p0/z -> %z0.s +a5256482 : ldff1sh z2.s, p1/Z, [x4, x5, LSL #1] : ldff1sh (%x4,%x5,lsl #1)[16byte] %p1/z -> %z2.s +a52768c4 : ldff1sh z4.s, p2/Z, [x6, x7, LSL #1] : ldff1sh (%x6,%x7,lsl #1)[16byte] %p2/z -> %z4.s +a5296906 : ldff1sh z6.s, p2/Z, [x8, x9, LSL #1] : ldff1sh (%x8,%x9,lsl #1)[16byte] %p2/z -> %z6.s +a52b6d48 : ldff1sh z8.s, p3/Z, [x10, x11, LSL #1] : ldff1sh (%x10,%x11,lsl #1)[16byte] %p3/z -> %z8.s +a52c6d6a : ldff1sh z10.s, p3/Z, [x11, x12, LSL #1] : ldff1sh (%x11,%x12,lsl #1)[16byte] %p3/z -> %z10.s +a52e71ac : ldff1sh z12.s, p4/Z, [x13, x14, LSL #1] : ldff1sh (%x13,%x14,lsl #1)[16byte] %p4/z -> %z12.s +a53071ee : ldff1sh z14.s, p4/Z, [x15, x16, LSL #1] : ldff1sh (%x15,%x16,lsl #1)[16byte] %p4/z -> %z14.s +a5327630 : ldff1sh z16.s, p5/Z, [x17, x18, LSL #1] : ldff1sh (%x17,%x18,lsl #1)[16byte] %p5/z -> %z16.s +a5347671 : ldff1sh z17.s, p5/Z, [x19, x20, LSL #1] : ldff1sh (%x19,%x20,lsl #1)[16byte] %p5/z -> %z17.s +a53676b3 : ldff1sh z19.s, p5/Z, [x21, x22, LSL #1] : ldff1sh (%x21,%x22,lsl #1)[16byte] %p5/z -> %z19.s +a5387af5 : ldff1sh z21.s, p6/Z, [x23, x24, LSL #1] : ldff1sh (%x23,%x24,lsl #1)[16byte] %p6/z -> %z21.s +a5397b17 : ldff1sh z23.s, p6/Z, [x24, x25, LSL #1] : ldff1sh (%x24,%x25,lsl #1)[16byte] %p6/z -> %z23.s +a53b7f59 : ldff1sh z25.s, p7/Z, [x26, x27, LSL #1] : ldff1sh (%x26,%x27,lsl #1)[16byte] %p7/z -> %z25.s +a53d7f9b : ldff1sh z27.s, p7/Z, [x28, x29, LSL #1] : ldff1sh (%x28,%x29,lsl #1)[16byte] %p7/z -> %z27.s +a53e7fff : ldff1sh z31.s, p7/Z, [sp, x30, LSL #1] : ldff1sh (%sp,%x30,lsl #1)[16byte] %p7/z -> %z31.s + +# LDFF1SH { .D }, /Z, [{, , LSL #1}] (LDFF1SH-Z.P.BR-S64) +a5006000 : ldff1sh z0.d, p0/Z, [x0, x0, LSL #1] : ldff1sh (%x0,%x0,lsl #1)[8byte] %p0/z -> %z0.d +a5056482 : ldff1sh z2.d, p1/Z, [x4, x5, LSL #1] : ldff1sh (%x4,%x5,lsl #1)[8byte] %p1/z -> %z2.d +a50768c4 : ldff1sh z4.d, p2/Z, [x6, x7, LSL #1] : ldff1sh (%x6,%x7,lsl #1)[8byte] %p2/z -> %z4.d +a5096906 : ldff1sh z6.d, p2/Z, [x8, x9, LSL #1] : ldff1sh (%x8,%x9,lsl #1)[8byte] %p2/z -> %z6.d +a50b6d48 : ldff1sh z8.d, p3/Z, [x10, x11, LSL #1] : ldff1sh (%x10,%x11,lsl #1)[8byte] %p3/z -> %z8.d +a50c6d6a : ldff1sh z10.d, p3/Z, [x11, x12, LSL #1] : ldff1sh (%x11,%x12,lsl #1)[8byte] %p3/z -> %z10.d +a50e71ac : ldff1sh z12.d, p4/Z, [x13, x14, LSL #1] : ldff1sh (%x13,%x14,lsl #1)[8byte] %p4/z -> %z12.d +a51071ee : ldff1sh z14.d, p4/Z, [x15, x16, LSL #1] : ldff1sh (%x15,%x16,lsl #1)[8byte] %p4/z -> %z14.d +a5127630 : ldff1sh z16.d, p5/Z, [x17, x18, LSL #1] : ldff1sh (%x17,%x18,lsl #1)[8byte] %p5/z -> %z16.d +a5147671 : ldff1sh z17.d, p5/Z, [x19, x20, LSL #1] : ldff1sh (%x19,%x20,lsl #1)[8byte] %p5/z -> %z17.d +a51676b3 : ldff1sh z19.d, p5/Z, [x21, x22, LSL #1] : ldff1sh (%x21,%x22,lsl #1)[8byte] %p5/z -> %z19.d +a5187af5 : ldff1sh z21.d, p6/Z, [x23, x24, LSL #1] : ldff1sh (%x23,%x24,lsl #1)[8byte] %p6/z -> %z21.d +a5197b17 : ldff1sh z23.d, p6/Z, [x24, x25, LSL #1] : ldff1sh (%x24,%x25,lsl #1)[8byte] %p6/z -> %z23.d +a51b7f59 : ldff1sh z25.d, p7/Z, [x26, x27, LSL #1] : ldff1sh (%x26,%x27,lsl #1)[8byte] %p7/z -> %z25.d +a51d7f9b : ldff1sh z27.d, p7/Z, [x28, x29, LSL #1] : ldff1sh (%x28,%x29,lsl #1)[8byte] %p7/z -> %z27.d +a51e7fff : ldff1sh z31.d, p7/Z, [sp, x30, LSL #1] : ldff1sh (%sp,%x30,lsl #1)[8byte] %p7/z -> %z31.d + +# LDFF1SW { .D }, /Z, [{, , LSL #2}] (LDFF1SW-Z.P.BR-S64) +a4806000 : ldff1sw z0.d, p0/Z, [x0, x0, LSL #2] : ldff1sw (%x0,%x0,lsl #2)[16byte] %p0/z -> %z0.d +a4856482 : ldff1sw z2.d, p1/Z, [x4, x5, LSL #2] : ldff1sw (%x4,%x5,lsl #2)[16byte] %p1/z -> %z2.d +a48768c4 : ldff1sw z4.d, p2/Z, [x6, x7, LSL #2] : ldff1sw (%x6,%x7,lsl #2)[16byte] %p2/z -> %z4.d +a4896906 : ldff1sw z6.d, p2/Z, [x8, x9, LSL #2] : ldff1sw (%x8,%x9,lsl #2)[16byte] %p2/z -> %z6.d +a48b6d48 : ldff1sw z8.d, p3/Z, [x10, x11, LSL #2] : ldff1sw (%x10,%x11,lsl #2)[16byte] %p3/z -> %z8.d +a48c6d6a : ldff1sw z10.d, p3/Z, [x11, x12, LSL #2] : ldff1sw (%x11,%x12,lsl #2)[16byte] %p3/z -> %z10.d +a48e71ac : ldff1sw z12.d, p4/Z, [x13, x14, LSL #2] : ldff1sw (%x13,%x14,lsl #2)[16byte] %p4/z -> %z12.d +a49071ee : ldff1sw z14.d, p4/Z, [x15, x16, LSL #2] : ldff1sw (%x15,%x16,lsl #2)[16byte] %p4/z -> %z14.d +a4927630 : ldff1sw z16.d, p5/Z, [x17, x18, LSL #2] : ldff1sw (%x17,%x18,lsl #2)[16byte] %p5/z -> %z16.d +a4947671 : ldff1sw z17.d, p5/Z, [x19, x20, LSL #2] : ldff1sw (%x19,%x20,lsl #2)[16byte] %p5/z -> %z17.d +a49676b3 : ldff1sw z19.d, p5/Z, [x21, x22, LSL #2] : ldff1sw (%x21,%x22,lsl #2)[16byte] %p5/z -> %z19.d +a4987af5 : ldff1sw z21.d, p6/Z, [x23, x24, LSL #2] : ldff1sw (%x23,%x24,lsl #2)[16byte] %p6/z -> %z21.d +a4997b17 : ldff1sw z23.d, p6/Z, [x24, x25, LSL #2] : ldff1sw (%x24,%x25,lsl #2)[16byte] %p6/z -> %z23.d +a49b7f59 : ldff1sw z25.d, p7/Z, [x26, x27, LSL #2] : ldff1sw (%x26,%x27,lsl #2)[16byte] %p7/z -> %z25.d +a49d7f9b : ldff1sw z27.d, p7/Z, [x28, x29, LSL #2] : ldff1sw (%x28,%x29,lsl #2)[16byte] %p7/z -> %z27.d +a49e7fff : ldff1sw z31.d, p7/Z, [sp, x30, LSL #2] : ldff1sw (%sp,%x30,lsl #2)[16byte] %p7/z -> %z31.d + +# LDFF1W { .S }, /Z, [{, , LSL #2}] (LDFF1W-Z.P.BR-U32) +a5406000 : ldff1w z0.s, p0/Z, [x0, x0, LSL #2] : ldff1w (%x0,%x0,lsl #2)[32byte] %p0/z -> %z0.s +a5456482 : ldff1w z2.s, p1/Z, [x4, x5, LSL #2] : ldff1w (%x4,%x5,lsl #2)[32byte] %p1/z -> %z2.s +a54768c4 : ldff1w z4.s, p2/Z, [x6, x7, LSL #2] : ldff1w (%x6,%x7,lsl #2)[32byte] %p2/z -> %z4.s +a5496906 : ldff1w z6.s, p2/Z, [x8, x9, LSL #2] : ldff1w (%x8,%x9,lsl #2)[32byte] %p2/z -> %z6.s +a54b6d48 : ldff1w z8.s, p3/Z, [x10, x11, LSL #2] : ldff1w (%x10,%x11,lsl #2)[32byte] %p3/z -> %z8.s +a54c6d6a : ldff1w z10.s, p3/Z, [x11, x12, LSL #2] : ldff1w (%x11,%x12,lsl #2)[32byte] %p3/z -> %z10.s +a54e71ac : ldff1w z12.s, p4/Z, [x13, x14, LSL #2] : ldff1w (%x13,%x14,lsl #2)[32byte] %p4/z -> %z12.s +a55071ee : ldff1w z14.s, p4/Z, [x15, x16, LSL #2] : ldff1w (%x15,%x16,lsl #2)[32byte] %p4/z -> %z14.s +a5527630 : ldff1w z16.s, p5/Z, [x17, x18, LSL #2] : ldff1w (%x17,%x18,lsl #2)[32byte] %p5/z -> %z16.s +a5547671 : ldff1w z17.s, p5/Z, [x19, x20, LSL #2] : ldff1w (%x19,%x20,lsl #2)[32byte] %p5/z -> %z17.s +a55676b3 : ldff1w z19.s, p5/Z, [x21, x22, LSL #2] : ldff1w (%x21,%x22,lsl #2)[32byte] %p5/z -> %z19.s +a5587af5 : ldff1w z21.s, p6/Z, [x23, x24, LSL #2] : ldff1w (%x23,%x24,lsl #2)[32byte] %p6/z -> %z21.s +a5597b17 : ldff1w z23.s, p6/Z, [x24, x25, LSL #2] : ldff1w (%x24,%x25,lsl #2)[32byte] %p6/z -> %z23.s +a55b7f59 : ldff1w z25.s, p7/Z, [x26, x27, LSL #2] : ldff1w (%x26,%x27,lsl #2)[32byte] %p7/z -> %z25.s +a55d7f9b : ldff1w z27.s, p7/Z, [x28, x29, LSL #2] : ldff1w (%x28,%x29,lsl #2)[32byte] %p7/z -> %z27.s +a55e7fff : ldff1w z31.s, p7/Z, [sp, x30, LSL #2] : ldff1w (%sp,%x30,lsl #2)[32byte] %p7/z -> %z31.s + +# LDFF1W { .D }, /Z, [{, , LSL #2}] (LDFF1W-Z.P.BR-U64) +a5606000 : ldff1w z0.d, p0/Z, [x0, x0, LSL #2] : ldff1w (%x0,%x0,lsl #2)[16byte] %p0/z -> %z0.d +a5656482 : ldff1w z2.d, p1/Z, [x4, x5, LSL #2] : ldff1w (%x4,%x5,lsl #2)[16byte] %p1/z -> %z2.d +a56768c4 : ldff1w z4.d, p2/Z, [x6, x7, LSL #2] : ldff1w (%x6,%x7,lsl #2)[16byte] %p2/z -> %z4.d +a5696906 : ldff1w z6.d, p2/Z, [x8, x9, LSL #2] : ldff1w (%x8,%x9,lsl #2)[16byte] %p2/z -> %z6.d +a56b6d48 : ldff1w z8.d, p3/Z, [x10, x11, LSL #2] : ldff1w (%x10,%x11,lsl #2)[16byte] %p3/z -> %z8.d +a56c6d6a : ldff1w z10.d, p3/Z, [x11, x12, LSL #2] : ldff1w (%x11,%x12,lsl #2)[16byte] %p3/z -> %z10.d +a56e71ac : ldff1w z12.d, p4/Z, [x13, x14, LSL #2] : ldff1w (%x13,%x14,lsl #2)[16byte] %p4/z -> %z12.d +a57071ee : ldff1w z14.d, p4/Z, [x15, x16, LSL #2] : ldff1w (%x15,%x16,lsl #2)[16byte] %p4/z -> %z14.d +a5727630 : ldff1w z16.d, p5/Z, [x17, x18, LSL #2] : ldff1w (%x17,%x18,lsl #2)[16byte] %p5/z -> %z16.d +a5747671 : ldff1w z17.d, p5/Z, [x19, x20, LSL #2] : ldff1w (%x19,%x20,lsl #2)[16byte] %p5/z -> %z17.d +a57676b3 : ldff1w z19.d, p5/Z, [x21, x22, LSL #2] : ldff1w (%x21,%x22,lsl #2)[16byte] %p5/z -> %z19.d +a5787af5 : ldff1w z21.d, p6/Z, [x23, x24, LSL #2] : ldff1w (%x23,%x24,lsl #2)[16byte] %p6/z -> %z21.d +a5797b17 : ldff1w z23.d, p6/Z, [x24, x25, LSL #2] : ldff1w (%x24,%x25,lsl #2)[16byte] %p6/z -> %z23.d +a57b7f59 : ldff1w z25.d, p7/Z, [x26, x27, LSL #2] : ldff1w (%x26,%x27,lsl #2)[16byte] %p7/z -> %z25.d +a57d7f9b : ldff1w z27.d, p7/Z, [x28, x29, LSL #2] : ldff1w (%x28,%x29,lsl #2)[16byte] %p7/z -> %z27.d +a57e7fff : ldff1w z31.d, p7/Z, [sp, x30, LSL #2] : ldff1w (%sp,%x30,lsl #2)[16byte] %p7/z -> %z31.d + # LDR , [{, #, MUL VL}] 858043c0 : ldr z0, [x30] : ldr (%x30)[32byte] -> %z0 858057a1 : ldr z1, [x29, #5, mul vl] : ldr +0x05(%x29)[32byte] -> %z1 diff --git a/suite/tests/api/ir_aarch64_sve.c b/suite/tests/api/ir_aarch64_sve.c index 57fbb91b1da..f97ede81972 100644 --- a/suite/tests/api/ir_aarch64_sve.c +++ b/suite/tests/api/ir_aarch64_sve.c @@ -13692,6 +13692,276 @@ TEST_INSTR(rdvl) opnd_create_immed_int(imm6_0_0[i], OPSZ_6b)); } +TEST_INSTR(ldff1b_sve_pred) +{ + /* Testing LDFF1B { .H }, /Z, [{, }] */ + const char *const expected_0_0[6] = { + "ldff1b (%x0,%x0)[16byte] %p0/z -> %z0.h", + "ldff1b (%x7,%x8)[16byte] %p2/z -> %z5.h", + "ldff1b (%x12,%x13)[16byte] %p3/z -> %z10.h", + "ldff1b (%x17,%x18)[16byte] %p5/z -> %z16.h", + "ldff1b (%x22,%x23)[16byte] %p6/z -> %z21.h", + "ldff1b (%sp,%x30)[16byte] %p7/z -> %z31.h", + }; + TEST_LOOP(ldff1b, ldff1b_sve_pred, 6, expected_0_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_base_disp_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_16)); + + /* Testing LDFF1B { .S }, /Z, [{, }] */ + const char *const expected_1_0[6] = { + "ldff1b (%x0,%x0)[8byte] %p0/z -> %z0.s", + "ldff1b (%x7,%x8)[8byte] %p2/z -> %z5.s", + "ldff1b (%x12,%x13)[8byte] %p3/z -> %z10.s", + "ldff1b (%x17,%x18)[8byte] %p5/z -> %z16.s", + "ldff1b (%x22,%x23)[8byte] %p6/z -> %z21.s", + "ldff1b (%sp,%x30)[8byte] %p7/z -> %z31.s", + }; + TEST_LOOP(ldff1b, ldff1b_sve_pred, 6, expected_1_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_base_disp_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_8)); + + /* Testing LDFF1B { .D }, /Z, [{, }] */ + const char *const expected_2_0[6] = { + "ldff1b (%x0,%x0)[4byte] %p0/z -> %z0.d", + "ldff1b (%x7,%x8)[4byte] %p2/z -> %z5.d", + "ldff1b (%x12,%x13)[4byte] %p3/z -> %z10.d", + "ldff1b (%x17,%x18)[4byte] %p5/z -> %z16.d", + "ldff1b (%x22,%x23)[4byte] %p6/z -> %z21.d", + "ldff1b (%sp,%x30)[4byte] %p7/z -> %z31.d", + }; + TEST_LOOP(ldff1b, ldff1b_sve_pred, 6, expected_2_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_base_disp_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_4)); + + /* Testing LDFF1B { .B }, /Z, [{, }] */ + const char *const expected_3_0[6] = { + "ldff1b (%x0,%x0)[32byte] %p0/z -> %z0.b", + "ldff1b (%x7,%x8)[32byte] %p2/z -> %z5.b", + "ldff1b (%x12,%x13)[32byte] %p3/z -> %z10.b", + "ldff1b (%x17,%x18)[32byte] %p5/z -> %z16.b", + "ldff1b (%x22,%x23)[32byte] %p6/z -> %z21.b", + "ldff1b (%sp,%x30)[32byte] %p7/z -> %z31.b", + }; + TEST_LOOP(ldff1b, ldff1b_sve_pred, 6, expected_3_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_1), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_base_disp_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_32)); +} + +TEST_INSTR(ldff1d_sve_pred) +{ + /* Testing LDFF1D { .D }, /Z, [{, , LSL #3}] */ + const char *const expected_0_0[6] = { + "ldff1d (%x0,%x0,lsl #3)[32byte] %p0/z -> %z0.d", + "ldff1d (%x7,%x8,lsl #3)[32byte] %p2/z -> %z5.d", + "ldff1d (%x12,%x13,lsl #3)[32byte] %p3/z -> %z10.d", + "ldff1d (%x17,%x18,lsl #3)[32byte] %p5/z -> %z16.d", + "ldff1d (%x22,%x23,lsl #3)[32byte] %p6/z -> %z21.d", + "ldff1d (%sp,%x30,lsl #3)[32byte] %p7/z -> %z31.d", + }; + TEST_LOOP(ldff1d, ldff1d_sve_pred, 6, expected_0_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], + Xn_six_offset_3[i], DR_EXTEND_UXTX, 1, + 0, 0, OPSZ_32, 3)); +} + +TEST_INSTR(ldff1h_sve_pred) +{ + /* Testing LDFF1H { .H }, /Z, [{, , LSL #1}] */ + const char *const expected_0_0[6] = { + "ldff1h (%x0,%x0,lsl #1)[32byte] %p0/z -> %z0.h", + "ldff1h (%x7,%x8,lsl #1)[32byte] %p2/z -> %z5.h", + "ldff1h (%x12,%x13,lsl #1)[32byte] %p3/z -> %z10.h", + "ldff1h (%x17,%x18,lsl #1)[32byte] %p5/z -> %z16.h", + "ldff1h (%x22,%x23,lsl #1)[32byte] %p6/z -> %z21.h", + "ldff1h (%sp,%x30,lsl #1)[32byte] %p7/z -> %z31.h", + }; + TEST_LOOP(ldff1h, ldff1h_sve_pred, 6, expected_0_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], + Xn_six_offset_3[i], DR_EXTEND_UXTX, 1, + 0, 0, OPSZ_32, 1)); + + /* Testing LDFF1H { .S }, /Z, [{, , LSL #1}] */ + const char *const expected_1_0[6] = { + "ldff1h (%x0,%x0,lsl #1)[16byte] %p0/z -> %z0.s", + "ldff1h (%x7,%x8,lsl #1)[16byte] %p2/z -> %z5.s", + "ldff1h (%x12,%x13,lsl #1)[16byte] %p3/z -> %z10.s", + "ldff1h (%x17,%x18,lsl #1)[16byte] %p5/z -> %z16.s", + "ldff1h (%x22,%x23,lsl #1)[16byte] %p6/z -> %z21.s", + "ldff1h (%sp,%x30,lsl #1)[16byte] %p7/z -> %z31.s", + }; + TEST_LOOP(ldff1h, ldff1h_sve_pred, 6, expected_1_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], + Xn_six_offset_3[i], DR_EXTEND_UXTX, 1, + 0, 0, OPSZ_16, 1)); + + /* Testing LDFF1H { .D }, /Z, [{, , LSL #1}] */ + const char *const expected_2_0[6] = { + "ldff1h (%x0,%x0,lsl #1)[8byte] %p0/z -> %z0.d", + "ldff1h (%x7,%x8,lsl #1)[8byte] %p2/z -> %z5.d", + "ldff1h (%x12,%x13,lsl #1)[8byte] %p3/z -> %z10.d", + "ldff1h (%x17,%x18,lsl #1)[8byte] %p5/z -> %z16.d", + "ldff1h (%x22,%x23,lsl #1)[8byte] %p6/z -> %z21.d", + "ldff1h (%sp,%x30,lsl #1)[8byte] %p7/z -> %z31.d", + }; + TEST_LOOP(ldff1h, ldff1h_sve_pred, 6, expected_2_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], + Xn_six_offset_3[i], DR_EXTEND_UXTX, 1, + 0, 0, OPSZ_8, 1)); +} + +TEST_INSTR(ldff1sb_sve_pred) +{ + /* Testing LDFF1SB { .H }, /Z, [{, }] */ + const char *const expected_0_0[6] = { + "ldff1sb (%x0,%x0)[16byte] %p0/z -> %z0.h", + "ldff1sb (%x7,%x8)[16byte] %p2/z -> %z5.h", + "ldff1sb (%x12,%x13)[16byte] %p3/z -> %z10.h", + "ldff1sb (%x17,%x18)[16byte] %p5/z -> %z16.h", + "ldff1sb (%x22,%x23)[16byte] %p6/z -> %z21.h", + "ldff1sb (%sp,%x30)[16byte] %p7/z -> %z31.h", + }; + TEST_LOOP(ldff1sb, ldff1sb_sve_pred, 6, expected_0_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_base_disp_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_16)); + + /* Testing LDFF1SB { .S }, /Z, [{, }] */ + const char *const expected_1_0[6] = { + "ldff1sb (%x0,%x0)[8byte] %p0/z -> %z0.s", + "ldff1sb (%x7,%x8)[8byte] %p2/z -> %z5.s", + "ldff1sb (%x12,%x13)[8byte] %p3/z -> %z10.s", + "ldff1sb (%x17,%x18)[8byte] %p5/z -> %z16.s", + "ldff1sb (%x22,%x23)[8byte] %p6/z -> %z21.s", + "ldff1sb (%sp,%x30)[8byte] %p7/z -> %z31.s", + }; + TEST_LOOP(ldff1sb, ldff1sb_sve_pred, 6, expected_1_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_base_disp_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_8)); + + /* Testing LDFF1SB { .D }, /Z, [{, }] */ + const char *const expected_2_0[6] = { + "ldff1sb (%x0,%x0)[4byte] %p0/z -> %z0.d", + "ldff1sb (%x7,%x8)[4byte] %p2/z -> %z5.d", + "ldff1sb (%x12,%x13)[4byte] %p3/z -> %z10.d", + "ldff1sb (%x17,%x18)[4byte] %p5/z -> %z16.d", + "ldff1sb (%x22,%x23)[4byte] %p6/z -> %z21.d", + "ldff1sb (%sp,%x30)[4byte] %p7/z -> %z31.d", + }; + TEST_LOOP(ldff1sb, ldff1sb_sve_pred, 6, expected_2_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_base_disp_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_4)); +} + +TEST_INSTR(ldff1sh_sve_pred) +{ + /* Testing LDFF1SH { .S }, /Z, [{, , LSL #1}] */ + const char *const expected_0_0[6] = { + "ldff1sh (%x0,%x0,lsl #1)[16byte] %p0/z -> %z0.s", + "ldff1sh (%x7,%x8,lsl #1)[16byte] %p2/z -> %z5.s", + "ldff1sh (%x12,%x13,lsl #1)[16byte] %p3/z -> %z10.s", + "ldff1sh (%x17,%x18,lsl #1)[16byte] %p5/z -> %z16.s", + "ldff1sh (%x22,%x23,lsl #1)[16byte] %p6/z -> %z21.s", + "ldff1sh (%sp,%x30,lsl #1)[16byte] %p7/z -> %z31.s", + }; + TEST_LOOP(ldff1sh, ldff1sh_sve_pred, 6, expected_0_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], + Xn_six_offset_3[i], DR_EXTEND_UXTX, 1, + 0, 0, OPSZ_16, 1)); + + /* Testing LDFF1SH { .D }, /Z, [{, , LSL #1}] */ + const char *const expected_1_0[6] = { + "ldff1sh (%x0,%x0,lsl #1)[8byte] %p0/z -> %z0.d", + "ldff1sh (%x7,%x8,lsl #1)[8byte] %p2/z -> %z5.d", + "ldff1sh (%x12,%x13,lsl #1)[8byte] %p3/z -> %z10.d", + "ldff1sh (%x17,%x18,lsl #1)[8byte] %p5/z -> %z16.d", + "ldff1sh (%x22,%x23,lsl #1)[8byte] %p6/z -> %z21.d", + "ldff1sh (%sp,%x30,lsl #1)[8byte] %p7/z -> %z31.d", + }; + TEST_LOOP(ldff1sh, ldff1sh_sve_pred, 6, expected_1_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], + Xn_six_offset_3[i], DR_EXTEND_UXTX, 1, + 0, 0, OPSZ_8, 1)); +} + +TEST_INSTR(ldff1sw_sve_pred) +{ + /* Testing LDFF1SW { .D }, /Z, [{, , LSL #2}] */ + const char *const expected_0_0[6] = { + "ldff1sw (%x0,%x0,lsl #2)[16byte] %p0/z -> %z0.d", + "ldff1sw (%x7,%x8,lsl #2)[16byte] %p2/z -> %z5.d", + "ldff1sw (%x12,%x13,lsl #2)[16byte] %p3/z -> %z10.d", + "ldff1sw (%x17,%x18,lsl #2)[16byte] %p5/z -> %z16.d", + "ldff1sw (%x22,%x23,lsl #2)[16byte] %p6/z -> %z21.d", + "ldff1sw (%sp,%x30,lsl #2)[16byte] %p7/z -> %z31.d", + }; + TEST_LOOP(ldff1sw, ldff1sw_sve_pred, 6, expected_0_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], + Xn_six_offset_3[i], DR_EXTEND_UXTX, 1, + 0, 0, OPSZ_16, 2)); +} + +TEST_INSTR(ldff1w_sve_pred) +{ + /* Testing LDFF1W { .S }, /Z, [{, , LSL #2}] */ + const char *const expected_0_0[6] = { + "ldff1w (%x0,%x0,lsl #2)[32byte] %p0/z -> %z0.s", + "ldff1w (%x7,%x8,lsl #2)[32byte] %p2/z -> %z5.s", + "ldff1w (%x12,%x13,lsl #2)[32byte] %p3/z -> %z10.s", + "ldff1w (%x17,%x18,lsl #2)[32byte] %p5/z -> %z16.s", + "ldff1w (%x22,%x23,lsl #2)[32byte] %p6/z -> %z21.s", + "ldff1w (%sp,%x30,lsl #2)[32byte] %p7/z -> %z31.s", + }; + TEST_LOOP(ldff1w, ldff1w_sve_pred, 6, expected_0_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], + Xn_six_offset_3[i], DR_EXTEND_UXTX, 1, + 0, 0, OPSZ_32, 2)); + + /* Testing LDFF1W { .D }, /Z, [{, , LSL #2}] */ + const char *const expected_1_0[6] = { + "ldff1w (%x0,%x0,lsl #2)[16byte] %p0/z -> %z0.d", + "ldff1w (%x7,%x8,lsl #2)[16byte] %p2/z -> %z5.d", + "ldff1w (%x12,%x13,lsl #2)[16byte] %p3/z -> %z10.d", + "ldff1w (%x17,%x18,lsl #2)[16byte] %p5/z -> %z16.d", + "ldff1w (%x22,%x23,lsl #2)[16byte] %p6/z -> %z21.d", + "ldff1w (%sp,%x30,lsl #2)[16byte] %p7/z -> %z31.d", + }; + TEST_LOOP(ldff1w, ldff1w_sve_pred, 6, expected_1_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], + Xn_six_offset_3[i], DR_EXTEND_UXTX, 1, + 0, 0, OPSZ_16, 2)); +} + int main(int argc, char *argv[]) { @@ -14099,6 +14369,14 @@ main(int argc, char *argv[]) RUN_INSTR_TEST(fmul_sve_vector); RUN_INSTR_TEST(fmul_sve_idx); + RUN_INSTR_TEST(ldff1b_sve_pred); + RUN_INSTR_TEST(ldff1d_sve_pred); + RUN_INSTR_TEST(ldff1h_sve_pred); + RUN_INSTR_TEST(ldff1sb_sve_pred); + RUN_INSTR_TEST(ldff1sh_sve_pred); + RUN_INSTR_TEST(ldff1sw_sve_pred); + RUN_INSTR_TEST(ldff1w_sve_pred); + print("All sve tests complete.\n"); #ifndef STANDALONE_DECODER dr_standalone_exit();