diff --git a/core/ir/aarch64/codec.c b/core/ir/aarch64/codec.c index f4a93898f49..afd34d7b3c7 100644 --- a/core/ir/aarch64/codec.c +++ b/core/ir/aarch64/codec.c @@ -550,7 +550,7 @@ encode_reg(OUT uint *num, OUT bool *is_x, reg_id_t reg, bool is_sp) } /* Decode SIMD/FP register. */ -static inline opnd_t +static inline reg_id_t decode_vreg(aarch64_reg_offset scale, uint n) { reg_id_t reg = DR_REG_NULL; @@ -564,19 +564,15 @@ decode_vreg(aarch64_reg_offset scale, uint n) case Z_REG: reg = DR_REG_Z0 + n; break; default: ASSERT_NOT_REACHED(); } - return opnd_create_reg(reg); + return reg; } /* Encode SIMD/FP register. */ static inline bool -encode_vreg(INOUT opnd_size_t *x, OUT uint *r, opnd_t opnd) +encode_vreg(INOUT opnd_size_t *x, OUT uint *r, reg_id_t reg) { - reg_id_t reg; opnd_size_t sz; uint n; - if (!opnd_is_reg(opnd)) - return false; - reg = opnd_get_reg(opnd); if ((uint)(reg - DR_REG_B0) < 32) { n = reg - DR_REG_B0; sz = OPSZ_1; @@ -608,6 +604,12 @@ encode_vreg(INOUT opnd_size_t *x, OUT uint *r, opnd_t opnd) return true; } +static inline bool +is_vreg(INOUT opnd_size_t *x, OUT uint *r, opnd_t opnd) +{ + return opnd_is_reg(opnd) && encode_vreg(x, r, opnd_get_reg(opnd)); +} + static opnd_t create_base_imm(uint enc, int disp, int bytes) { @@ -1330,7 +1332,7 @@ encode_opnd_memreg_size(opnd_size_t size, opnd_t opnd, OUT uint *enc_out) static bool decode_opnd_q0p(int add, uint enc, OUT opnd_t *opnd) { - *opnd = decode_vreg(4, (extract_uint(enc, 0, 5) + add) % 32); + *opnd = opnd_create_reg(decode_vreg(4, (extract_uint(enc, 0, 5) + add) % 32)); return true; } @@ -1339,7 +1341,7 @@ encode_opnd_q0p(int add, opnd_t opnd, OUT uint *enc_out) { opnd_size_t size = OPSZ_NA; uint r; - if (!encode_vreg(&size, &r, opnd) || size != OPSZ_16) + if (!is_vreg(&size, &r, opnd) || size != OPSZ_16) return false; *enc_out = (r - add) % 32; return true; @@ -1371,7 +1373,7 @@ encode_opnd_rn(bool is_sp, int pos, int sz_bit, opnd_t opnd, OUT uint *enc_out) static bool decode_opnd_vector_reg(int pos, int scale, uint enc, OUT opnd_t *opnd) { - *opnd = decode_vreg(scale, extract_uint(enc, pos, 5)); + *opnd = opnd_create_reg(decode_vreg(scale, extract_uint(enc, pos, 5))); return true; } @@ -1385,7 +1387,7 @@ encode_opnd_vector_reg(int pos, int scale, opnd_t opnd, OUT uint *enc_out) else requested_size = opnd_size_from_bytes(1 << scale); uint r; - if (!encode_vreg(&size, &r, opnd) || size != requested_size) + if (!is_vreg(&size, &r, opnd) || size != requested_size) return false; *enc_out = r << pos; return true; @@ -1530,7 +1532,7 @@ encode_opnd_float_reg(int pos, opnd_t opnd, OUT uint *enc_out) opnd_size_t size = OPSZ_NA; - if (!encode_vreg(&size, &num, opnd)) + if (!is_vreg(&size, &num, opnd)) return false; if (!size_to_ftype(size, &type)) return false; @@ -1611,7 +1613,7 @@ encode_sized_base(uint pos_start, uint size_start, uint min_size, uint max_size, return false; uint reg_number; - if (!encode_vreg(&vec_size, ®_number, opnd)) + if (!is_vreg(&vec_size, ®_number, opnd)) return false; if (offset > 0) { @@ -1681,7 +1683,7 @@ encode_sized_z_tb(uint pos_start, uint min_size, uint max_size, opnd_t opnd, opnd_size_t vec_size = OPSZ_SCALABLE; uint reg_number; - if (!encode_vreg(&vec_size, ®_number, opnd)) + if (!is_vreg(&vec_size, ®_number, opnd)) return false; *enc_out |= (reg_number << pos_start); @@ -3798,7 +3800,7 @@ encode_opnd_z4_d_16(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_o static inline bool decode_opnd_q4_16(uint enc, int opcode, byte *pc, OUT opnd_t *opnd) { - *opnd = decode_vreg(QUAD_REG, extract_uint(enc, 16, 4)); + *opnd = opnd_create_reg(decode_vreg(QUAD_REG, extract_uint(enc, 16, 4))); return true; } @@ -3807,7 +3809,7 @@ encode_opnd_q4_16(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out { opnd_size_t size = OPSZ_NA; uint r; - IF_RETURN_FALSE(!encode_vreg(&size, &r, opnd)) + IF_RETURN_FALSE(!is_vreg(&size, &r, opnd)) IF_RETURN_FALSE(size != OPSZ_16) IF_RETURN_FALSE(r > 15) @@ -3936,7 +3938,8 @@ decode_z_tsz_bhsdq_base(uint enc, uint pos, OUT opnd_t *opnd) if (size == OPSZ_NA) return false; - *opnd = opnd_create_reg_element_vector(DR_REG_Z0 + extract_uint(enc, pos, 5), size); + *opnd = opnd_create_reg_element_vector(decode_vreg(Z_REG, extract_uint(enc, pos, 5)), + size); return true; } @@ -4494,6 +4497,82 @@ encode_opnd_imm8_10(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_o return true; } +static inline bool +decode_svemem_gpr_vec(uint enc, aarch64_reg_offset element_size, dr_extend_type_t mod, + aarch64_reg_offset memory_access_size, bool scaled, + bool is_prefetch, OUT opnd_t *opnd) +{ + ASSERT(memory_access_size <= DOUBLE_REG); + + const reg_id_t xn = + decode_reg(extract_uint(enc, 5, 5), /*is_x=*/true, /*is_sp=*/true); + + const reg_id_t zm = decode_vreg(Z_REG, extract_uint(enc, 16, 5)); + ASSERT(reg_is_z(zm)); + + const uint num_elements = get_elements_in_sve_vector(element_size); + const opnd_size_t mem_size = is_prefetch + ? OPSZ_0 + : opnd_size_from_bytes((1 << memory_access_size) * num_elements); + + *opnd = opnd_create_vector_base_disp_aarch64( + xn, zm, get_opnd_size_from_offset(element_size), mod, scaled, 0, 0, mem_size, + scaled ? memory_access_size : 0); + + return true; +} + +static inline bool +encode_svemem_gpr_vec(uint enc, aarch64_reg_offset element_size, aarch64_reg_offset msz, + bool scaled, opnd_t opnd, OUT uint *enc_out) +{ + ASSERT(msz <= DOUBLE_REG); + + if (!opnd_is_base_disp(opnd) || opnd_get_index(opnd) == DR_REG_NULL || + get_vector_element_reg_offset(opnd) != element_size) + return false; + + bool opnd_is_scaled; + uint scale; + opnd_get_index_extend(opnd, &opnd_is_scaled, &scale); + if (scaled != opnd_is_scaled || (scaled && (scale != msz))) + return false; + + bool base_is_x; + uint xn; + if (!encode_reg(&xn, &base_is_x, opnd_get_base(opnd), /*is_sp=*/true) || !base_is_x) + return false; + + uint zm; + opnd_size_t zm_size = OPSZ_SCALABLE; + if (!encode_vreg(&zm_size, &zm, opnd_get_index(opnd))) + return false; + + *enc_out |= (zm << 16) | (xn << 5); + + return true; +} + +/* SVE prefetch memory address (64-bit offset) [, .D{, }] */ +static inline bool +decode_opnd_sveprf_gpr_vec64(uint enc, int opcode, byte *pc, OUT opnd_t *opnd) +{ + const aarch64_reg_offset msz = BITS(enc, 14, 13); + + return decode_svemem_gpr_vec(enc, DOUBLE_REG, DR_EXTEND_UXTX, msz, msz > 0, true, + opnd); +} + +static inline bool +encode_opnd_sveprf_gpr_vec64(uint enc, int opcode, byte *pc, opnd_t opnd, + OUT uint *enc_out) +{ + const uint msz = BITS(enc, 14, 13); + + return opnd_get_index_extend(opnd, NULL, NULL) == DR_EXTEND_UXTX && + encode_svemem_gpr_vec(enc, DOUBLE_REG, msz, msz > 0, opnd, enc_out); +} + /* imm7: 7-bit immediate from bits 14-20 */ static inline bool @@ -4836,7 +4915,8 @@ encode_opnd_svemem_gpr_simm6_vl(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out) { const opnd_size_t mem_transfer = op_is_prefetch(opcode) ? OPSZ_0 : OPSZ_SVE_VL; - if (!opnd_is_base_disp(opnd) || opnd_get_size(opnd) != mem_transfer) + if (!opnd_is_base_disp(opnd) || opnd_get_index(opnd) != DR_REG_NULL || + opnd_get_size(opnd) != mem_transfer) return false; uint imm6; @@ -5385,8 +5465,10 @@ encode_opnd_z_sz_sd(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_o { IF_RETURN_FALSE(!opnd_is_reg(opnd)) - const uint reg_number = (uint)(opnd_get_reg(opnd) - DR_REG_Z0); - IF_RETURN_FALSE(!(reg_number < 32)) + uint reg_number; + opnd_size_t reg_size = OPSZ_SCALABLE; + if (!is_vreg(®_size, ®_number, opnd)) + return false; uint sz = 0; switch (opnd_get_vector_element_size(opnd)) { @@ -5820,8 +5902,8 @@ decode_opnd_svemem_vec_vec_idx(uint enc, int opcode, byte *pc, OUT opnd_t *opnd) const uint msz = extract_uint(enc, 10, 2); - const reg_id_t zn = DR_REG_Z0 + extract_uint(enc, 5, 5); - const reg_id_t zm = DR_REG_Z0 + extract_uint(enc, 16, 5); + const reg_id_t zn = decode_vreg(Z_REG, extract_uint(enc, 5, 5)); + const reg_id_t zm = decode_vreg(Z_REG, extract_uint(enc, 16, 5)); /* This operand is used for SVE ADR instructions which don't transfer any memory. * If this operand ends up being used for other instructions in the future we will @@ -5844,8 +5926,12 @@ encode_opnd_svemem_vec_vec_idx(uint enc, int opcode, byte *pc, opnd_t opnd, if (!opnd_is_base_disp(opnd)) return false; - const uint zn = (uint)(opnd_get_base(opnd) - DR_REG_Z0); - const uint zm = (uint)(opnd_get_index(opnd) - DR_REG_Z0); + uint zm; + uint zn; + opnd_size_t reg_size = OPSZ_SCALABLE; + if (!encode_vreg(®_size, &zn, opnd_get_base(opnd)) || + !encode_vreg(®_size, &zm, opnd_get_index(opnd))) + return false; opnd_size_t element_size; dr_extend_type_t extend_type; @@ -6596,7 +6682,7 @@ decode_svemem_vec_imm5(uint enc, aarch64_reg_offset element_size, bool is_prefet ? OPSZ_0 : opnd_size_from_bytes(scale * get_elements_in_sve_vector(element_size)); - const reg_id_t zn = DR_REG_Z0 + extract_uint(enc, 5, 5); + const reg_id_t zn = decode_vreg(Z_REG, extract_uint(enc, 5, 5)); ASSERT(reg_is_z(zn)); const int imm5 = (int)(extract_uint(enc, 16, 5) << msz); @@ -6623,10 +6709,10 @@ encode_svemem_vec_imm5(uint enc, aarch64_reg_offset element_size, bool is_prefet get_vector_element_reg_offset(opnd) != element_size) return false; - const reg_id_t zd = opnd_get_base(opnd); - if (!reg_is_z(zd)) + uint reg_number; + opnd_size_t reg_size = OPSZ_SCALABLE; + if (!encode_vreg(®_size, ®_number, opnd_get_base(opnd))) return false; - const uint reg_number = zd - DR_REG_Z0; const aarch64_reg_offset msz = BITS(enc, 24, 23); const uint scale = 1 << msz; @@ -6675,6 +6761,28 @@ encode_opnd_svemem_vec_d_imm5(uint enc, int opcode, byte *pc, opnd_t opnd, return encode_svemem_vec_imm5(enc, DOUBLE_REG, op_is_prefetch(opcode), opnd, enc_out); } +/* SVE memory address (64-bit offset) [, .D{, }] */ +static inline bool +decode_opnd_svemem_gpr_vec64(uint enc, int opcode, byte *pc, OUT opnd_t *opnd) +{ + const aarch64_reg_offset msz = BITS(enc, 24, 23); + const bool scaled = BITS(enc, 21, 21) != 0; + + return decode_svemem_gpr_vec(enc, DOUBLE_REG, DR_EXTEND_UXTX, msz, scaled, false, + opnd); +} + +static inline bool +encode_opnd_svemem_gpr_vec64(uint enc, int opcode, byte *pc, opnd_t opnd, + OUT uint *enc_out) +{ + const uint msz = BITS(enc, 24, 23); + const bool scaled = BITS(enc, 21, 21) != 0; + + return opnd_get_index_extend(opnd, NULL, NULL) == DR_EXTEND_UXTX && + encode_svemem_gpr_vec(enc, DOUBLE_REG, msz, scaled, opnd, enc_out); +} + static inline bool dtype_is_signed(uint dtype) { @@ -7746,9 +7854,9 @@ encode_opnds_fccm(byte *pc, instr_t *instr, uint enc, decode_info_t *di) uint rn, rm; uint ftype; - if (!encode_vreg(&rn_size, &rn, instr_get_src(instr, 0))) + if (!is_vreg(&rn_size, &rn, instr_get_src(instr, 0))) return ENCFAIL; - if (!encode_vreg(&rm_size, &rm, instr_get_src(instr, 1))) + if (!is_vreg(&rm_size, &rm, instr_get_src(instr, 1))) return ENCFAIL; if (rn_size != rm_size) return ENCFAIL; @@ -7830,11 +7938,11 @@ encode_opnds_fcsel(byte *pc, instr_t *instr, uint enc, decode_info_t *di) uint rn, rm, rd; uint ftype; - if (!encode_vreg(&rn_size, &rn, instr_get_src(instr, 0))) + if (!is_vreg(&rn_size, &rn, instr_get_src(instr, 0))) return ENCFAIL; - if (!encode_vreg(&rm_size, &rm, instr_get_src(instr, 1))) + if (!is_vreg(&rm_size, &rm, instr_get_src(instr, 1))) return ENCFAIL; - if (!encode_vreg(&rd_size, &rd, instr_get_dst(instr, 0))) + if (!is_vreg(&rd_size, &rd, instr_get_dst(instr, 0))) return ENCFAIL; if ((rn_size != rm_size || rn_size != rd_size)) return ENCFAIL; diff --git a/core/ir/aarch64/codec_sve.txt b/core/ir/aarch64/codec_sve.txt index cc701be7908..494417bbe47 100644 --- a/core/ir/aarch64/codec_sve.txt +++ b/core/ir/aarch64/codec_sve.txt @@ -303,9 +303,14 @@ 10000100001xxxxx110xxxxxxxxxxxxx n 946 SVE ld1b z_s_0 : svemem_vec_s_imm5 p10_zer_lo 11000100001xxxxx110xxxxxxxxxxxxx n 946 SVE ld1b z_d_0 : svemem_vec_d_imm5 p10_zer_lo 10000100001xxxxx110xxxxxxxxxxxxx n 946 SVE ld1b z_s_0 : svemem_vec_s_imm5 p10_zer_lo +11000100010xxxxx110xxxxxxxxxxxxx n 946 SVE ld1b z_d_0 : svemem_gpr_vec64 p10_zer_lo 11000101101xxxxx110xxxxxxxxxxxxx n 975 SVE ld1d z_d_0 : svemem_vec_d_imm5 p10_zer_lo +11000101111xxxxx110xxxxxxxxxxxxx n 975 SVE ld1d z_d_0 : svemem_gpr_vec64 p10_zer_lo +11000101110xxxxx110xxxxxxxxxxxxx n 975 SVE ld1d z_d_0 : svemem_gpr_vec64 p10_zer_lo 10000100101xxxxx110xxxxxxxxxxxxx n 976 SVE ld1h z_s_0 : svemem_vec_s_imm5 p10_zer_lo 11000100101xxxxx110xxxxxxxxxxxxx n 976 SVE ld1h z_d_0 : svemem_vec_d_imm5 p10_zer_lo +11000100111xxxxx110xxxxxxxxxxxxx n 976 SVE ld1h z_d_0 : svemem_gpr_vec64 p10_zer_lo +11000100110xxxxx110xxxxxxxxxxxxx n 976 SVE ld1h z_d_0 : svemem_gpr_vec64 p10_zer_lo 1000010001xxxxxx101xxxxxxxxxxxxx n 908 SVE ld1rb z_h_0 : svememx6_b_5 p10_zer_lo 1000010001xxxxxx110xxxxxxxxxxxxx n 908 SVE ld1rb z_s_0 : svememx6_b_5 p10_zer_lo 1000010001xxxxxx111xxxxxxxxxxxxx n 908 SVE ld1rb z_d_0 : svememx6_b_5 p10_zer_lo @@ -329,11 +334,18 @@ 10100101100xxxxx010xxxxxxxxxxxxx n 949 SVE ld1sb z_d_0 : svemem_gpr_shf p10_zer_lo 10000100001xxxxx100xxxxxxxxxxxxx n 949 SVE ld1sb z_s_0 : svemem_vec_s_imm5 p10_zer_lo 11000100001xxxxx100xxxxxxxxxxxxx n 949 SVE ld1sb z_d_0 : svemem_vec_d_imm5 p10_zer_lo +11000100010xxxxx100xxxxxxxxxxxxx n 949 SVE ld1sb z_d_0 : svemem_gpr_vec64 p10_zer_lo 10000100101xxxxx100xxxxxxxxxxxxx n 977 SVE ld1sh z_s_0 : svemem_vec_s_imm5 p10_zer_lo 11000100101xxxxx100xxxxxxxxxxxxx n 977 SVE ld1sh z_d_0 : svemem_vec_d_imm5 p10_zer_lo +11000100111xxxxx100xxxxxxxxxxxxx n 977 SVE ld1sh z_d_0 : svemem_gpr_vec64 p10_zer_lo +11000100110xxxxx100xxxxxxxxxxxxx n 977 SVE ld1sh z_d_0 : svemem_gpr_vec64 p10_zer_lo 11000101001xxxxx100xxxxxxxxxxxxx n 978 SVE ld1sw z_d_0 : svemem_vec_d_imm5 p10_zer_lo +11000101011xxxxx100xxxxxxxxxxxxx n 978 SVE ld1sw z_d_0 : svemem_gpr_vec64 p10_zer_lo +11000101010xxxxx100xxxxxxxxxxxxx n 978 SVE ld1sw z_d_0 : svemem_gpr_vec64 p10_zer_lo 10000101001xxxxx110xxxxxxxxxxxxx n 979 SVE ld1w z_s_0 : svemem_vec_s_imm5 p10_zer_lo 11000101001xxxxx110xxxxxxxxxxxxx n 979 SVE ld1w z_d_0 : svemem_vec_d_imm5 p10_zer_lo +11000101011xxxxx110xxxxxxxxxxxxx n 979 SVE ld1w z_d_0 : svemem_gpr_vec64 p10_zer_lo +11000101010xxxxx110xxxxxxxxxxxxx n 979 SVE ld1w z_d_0 : svemem_gpr_vec64 p10_zer_lo 10100100001xxxxx110xxxxxxxxxxxxx n 967 SVE ld2b z_b_0 z_msz_bhsd_0p1 : svemem_gprs_bhsdx p10_zer_lo 10100100010xxxxx110xxxxxxxxxxxxx n 968 SVE ld3b z_b_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 : svemem_gprs_bhsdx p10_zer_lo 10100100011xxxxx110xxxxxxxxxxxxx n 969 SVE ld4b z_b_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 : svemem_gprs_bhsdx p10_zer_lo @@ -343,28 +355,40 @@ 10100100000xxxxx011xxxxxxxxxxxxx n 937 SVE ldff1b z_b_0 : svemem_gpr_shf p10_zer_lo 10000100001xxxxx111xxxxxxxxxxxxx n 937 SVE ldff1b z_s_0 : svemem_vec_s_imm5 p10_zer_lo 11000100001xxxxx111xxxxxxxxxxxxx n 937 SVE ldff1b z_d_0 : svemem_vec_d_imm5 p10_zer_lo +11000100010xxxxx111xxxxxxxxxxxxx n 937 SVE ldff1b z_d_0 : svemem_gpr_vec64 p10_zer_lo 10100101111xxxxx011xxxxxxxxxxxxx n 938 SVE ldff1d z_d_0 : svemem_gpr_shf p10_zer_lo 11000101101xxxxx111xxxxxxxxxxxxx n 938 SVE ldff1d z_d_0 : svemem_vec_d_imm5 p10_zer_lo +11000101111xxxxx111xxxxxxxxxxxxx n 938 SVE ldff1d z_d_0 : svemem_gpr_vec64 p10_zer_lo +11000101110xxxxx111xxxxxxxxxxxxx n 938 SVE ldff1d z_d_0 : svemem_gpr_vec64 p10_zer_lo 10100100101xxxxx011xxxxxxxxxxxxx n 939 SVE ldff1h z_h_0 : svemem_gpr_shf p10_zer_lo 10100100110xxxxx011xxxxxxxxxxxxx n 939 SVE ldff1h z_s_0 : svemem_gpr_shf p10_zer_lo 10100100111xxxxx011xxxxxxxxxxxxx n 939 SVE ldff1h z_d_0 : svemem_gpr_shf p10_zer_lo 10000100101xxxxx111xxxxxxxxxxxxx n 939 SVE ldff1h z_s_0 : svemem_vec_s_imm5 p10_zer_lo 11000100101xxxxx111xxxxxxxxxxxxx n 939 SVE ldff1h z_d_0 : svemem_vec_d_imm5 p10_zer_lo +11000100111xxxxx111xxxxxxxxxxxxx n 939 SVE ldff1h z_d_0 : svemem_gpr_vec64 p10_zer_lo +11000100110xxxxx111xxxxxxxxxxxxx n 939 SVE ldff1h z_d_0 : svemem_gpr_vec64 p10_zer_lo 10100101110xxxxx011xxxxxxxxxxxxx n 940 SVE ldff1sb z_h_0 : svemem_gpr_shf p10_zer_lo 10100101101xxxxx011xxxxxxxxxxxxx n 940 SVE ldff1sb z_s_0 : svemem_gpr_shf p10_zer_lo 10100101100xxxxx011xxxxxxxxxxxxx n 940 SVE ldff1sb z_d_0 : svemem_gpr_shf p10_zer_lo 10000100001xxxxx101xxxxxxxxxxxxx n 940 SVE ldff1sb z_s_0 : svemem_vec_s_imm5 p10_zer_lo 11000100001xxxxx101xxxxxxxxxxxxx n 940 SVE ldff1sb z_d_0 : svemem_vec_d_imm5 p10_zer_lo +11000100010xxxxx101xxxxxxxxxxxxx n 940 SVE ldff1sb z_d_0 : svemem_gpr_vec64 p10_zer_lo 10100101001xxxxx011xxxxxxxxxxxxx n 941 SVE ldff1sh z_s_0 : svemem_gpr_shf p10_zer_lo 10100101000xxxxx011xxxxxxxxxxxxx n 941 SVE ldff1sh z_d_0 : svemem_gpr_shf p10_zer_lo 10000100101xxxxx101xxxxxxxxxxxxx n 941 SVE ldff1sh z_s_0 : svemem_vec_s_imm5 p10_zer_lo 11000100101xxxxx101xxxxxxxxxxxxx n 941 SVE ldff1sh z_d_0 : svemem_vec_d_imm5 p10_zer_lo +11000100111xxxxx101xxxxxxxxxxxxx n 941 SVE ldff1sh z_d_0 : svemem_gpr_vec64 p10_zer_lo +11000100110xxxxx101xxxxxxxxxxxxx n 941 SVE ldff1sh z_d_0 : svemem_gpr_vec64 p10_zer_lo 10100100100xxxxx011xxxxxxxxxxxxx n 942 SVE ldff1sw z_d_0 : svemem_gpr_shf p10_zer_lo 11000101001xxxxx101xxxxxxxxxxxxx n 942 SVE ldff1sw z_d_0 : svemem_vec_d_imm5 p10_zer_lo +11000101011xxxxx101xxxxxxxxxxxxx n 942 SVE ldff1sw z_d_0 : svemem_gpr_vec64 p10_zer_lo +11000101010xxxxx101xxxxxxxxxxxxx n 942 SVE ldff1sw z_d_0 : svemem_gpr_vec64 p10_zer_lo 10100101010xxxxx011xxxxxxxxxxxxx n 943 SVE ldff1w z_s_0 : svemem_gpr_shf p10_zer_lo 10100101011xxxxx011xxxxxxxxxxxxx n 943 SVE ldff1w z_d_0 : svemem_gpr_shf p10_zer_lo 10000101001xxxxx111xxxxxxxxxxxxx n 943 SVE ldff1w z_s_0 : svemem_vec_s_imm5 p10_zer_lo 11000101001xxxxx111xxxxxxxxxxxxx n 943 SVE ldff1w z_d_0 : svemem_vec_d_imm5 p10_zer_lo +11000101011xxxxx111xxxxxxxxxxxxx n 943 SVE ldff1w z_d_0 : svemem_gpr_vec64 p10_zer_lo +11000101010xxxxx111xxxxxxxxxxxxx n 943 SVE ldff1w z_d_0 : svemem_gpr_vec64 p10_zer_lo 10100100000xxxxx110xxxxxxxxxxxxx n 950 SVE ldnt1b z_b_0 : svemem_gprs_b1 p10_zer_lo 1000010110xxxxxx000xxxxxxxx0xxxx n 227 SVE ldr p0 : svemem_gpr_simm9_vl 1000010110xxxxxx010xxxxxxxxxxxxx n 227 SVE ldr z0 : svemem_gpr_simm9_vl @@ -412,15 +436,19 @@ 1000010111xxxxxx000xxxxxxxx0xxxx n 963 SVE prfb : prfop4 p10_lo svemem_gpr_simm6_vl 10000100000xxxxx111xxxxxxxx0xxxx n 963 SVE prfb : prfop4 p10_lo svemem_vec_s_imm5 11000100000xxxxx111xxxxxxxx0xxxx n 963 SVE prfb : prfop4 p10_lo svemem_vec_d_imm5 +11000100011xxxxx100xxxxxxxx0xxxx n 963 SVE prfb : prfop4 p10_lo sveprf_gpr_vec64 1000010111xxxxxx011xxxxxxxx0xxxx n 964 SVE prfd : prfop4 p10_lo svemem_gpr_simm6_vl 10000101100xxxxx111xxxxxxxx0xxxx n 964 SVE prfd : prfop4 p10_lo svemem_vec_s_imm5 11000101100xxxxx111xxxxxxxx0xxxx n 964 SVE prfd : prfop4 p10_lo svemem_vec_d_imm5 +11000100011xxxxx111xxxxxxxx0xxxx n 964 SVE prfd : prfop4 p10_lo sveprf_gpr_vec64 1000010111xxxxxx001xxxxxxxx0xxxx n 965 SVE prfh : prfop4 p10_lo svemem_gpr_simm6_vl 10000100100xxxxx111xxxxxxxx0xxxx n 965 SVE prfh : prfop4 p10_lo svemem_vec_s_imm5 11000100100xxxxx111xxxxxxxx0xxxx n 965 SVE prfh : prfop4 p10_lo svemem_vec_d_imm5 +11000100011xxxxx101xxxxxxxx0xxxx n 965 SVE prfh : prfop4 p10_lo sveprf_gpr_vec64 1000010111xxxxxx010xxxxxxxx0xxxx n 966 SVE prfw : prfop4 p10_lo svemem_gpr_simm6_vl 10000101000xxxxx111xxxxxxxx0xxxx n 966 SVE prfw : prfop4 p10_lo svemem_vec_s_imm5 11000101000xxxxx111xxxxxxxx0xxxx n 966 SVE prfw : prfop4 p10_lo svemem_vec_d_imm5 +11000100011xxxxx110xxxxxxxx0xxxx n 966 SVE prfw : prfop4 p10_lo sveprf_gpr_vec64 001001010101000011xxxx0xxxx00000 w 786 SVE ptest : p10 p_b_5 00100101xx011000111000xxxxx0xxxx n 897 SVE ptrue p_size_bhsd_0 : pred_constr 00100101xx011001111000xxxxx0xxxx w 898 SVE ptrues p_size_bhsd_0 : pred_constr @@ -505,11 +533,18 @@ 11100100011xxxxx010xxxxxxxxxxxxx n 951 SVE st1b svemem_gpr_shf : z_d_0 p10_lo 11100100011xxxxx101xxxxxxxxxxxxx n 951 SVE st1b svemem_vec_s_imm5 : z_s_0 p10_lo 11100100010xxxxx101xxxxxxxxxxxxx n 951 SVE st1b svemem_vec_d_imm5 : z_d_0 p10_lo +11100100000xxxxx101xxxxxxxxxxxxx n 951 SVE st1b svemem_gpr_vec64 : z_d_0 p10_lo 11100101110xxxxx101xxxxxxxxxxxxx n 981 SVE st1d svemem_vec_d_imm5 : z_d_0 p10_lo +11100101101xxxxx101xxxxxxxxxxxxx n 981 SVE st1d svemem_gpr_vec64 : z_d_0 p10_lo +11100101100xxxxx101xxxxxxxxxxxxx n 981 SVE st1d svemem_gpr_vec64 : z_d_0 p10_lo 11100100111xxxxx101xxxxxxxxxxxxx n 980 SVE st1h svemem_vec_s_imm5 : z_s_0 p10_lo 11100100110xxxxx101xxxxxxxxxxxxx n 980 SVE st1h svemem_vec_d_imm5 : z_d_0 p10_lo +11100100101xxxxx101xxxxxxxxxxxxx n 980 SVE st1h svemem_gpr_vec64 : z_d_0 p10_lo +11100100100xxxxx101xxxxxxxxxxxxx n 980 SVE st1h svemem_gpr_vec64 : z_d_0 p10_lo 11100101011xxxxx101xxxxxxxxxxxxx n 982 SVE st1w svemem_vec_s_imm5 : z_s_0 p10_lo 11100101010xxxxx101xxxxxxxxxxxxx n 982 SVE st1w svemem_vec_d_imm5 : z_d_0 p10_lo +11100101001xxxxx101xxxxxxxxxxxxx n 982 SVE st1w svemem_gpr_vec64 : z_d_0 p10_lo +11100101000xxxxx101xxxxxxxxxxxxx n 982 SVE st1w svemem_gpr_vec64 : z_d_0 p10_lo 11100100001xxxxx011xxxxxxxxxxxxx n 970 SVE st2b svemem_gprs_bhsdx : z_b_0 z_msz_bhsd_0p1 p10_lo 11100100010xxxxx011xxxxxxxxxxxxx n 971 SVE st3b svemem_gprs_bhsdx : z_b_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 p10_lo 11100100011xxxxx011xxxxxxxxxxxxx n 972 SVE st4b svemem_gprs_bhsdx : z_b_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 p10_lo diff --git a/core/ir/aarch64/instr_create_api.h b/core/ir/aarch64/instr_create_api.h index d19e525cd5e..4690b749826 100644 --- a/core/ir/aarch64/instr_create_api.h +++ b/core/ir/aarch64/instr_create_api.h @@ -10953,6 +10953,7 @@ * LDFF1B { .B }, /Z, [{, }] * LDFF1B { .S }, /Z, [.S{, #}] * LDFF1B { .D }, /Z, [.D{, #}] + * LDFF1B { .D }, /Z, [, .D] * \endverbatim * \param dc The void * dcontext used to allocate memory for the #instr_t. * \param Zt The destination vector register, Z (Scalable). @@ -10967,6 +10968,8 @@ * For the [\.D{, #\}] variant: * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, * 0, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 64), 0) + * For the [\, \.D] variant: + * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, */ #define INSTR_CREATE_ldff1b_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_2src(dc, OP_ldff1b, Zt, Rn, Pg) @@ -10978,6 +10981,8 @@ * \verbatim * LDFF1D { .D }, /Z, [{, , LSL #3}] * LDFF1D { .D }, /Z, [.D{, #}] + * LDFF1D { .D }, /Z, [, .D, LSL #3] + * LDFF1D { .D }, /Z, [, .D] * \endverbatim * \param dc The void * dcontext used to allocate memory for the #instr_t. * \param Zt The destination vector register, Z (Scalable). @@ -10990,6 +10995,12 @@ * For the [\.D{, #\}] variant: * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, * 0, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) + * For the [\, \.D, LSL #3] variant: + * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, + * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 3) + * For the [\, \.D] variant: + * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, + * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) */ #define INSTR_CREATE_ldff1d_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_2src(dc, OP_ldff1d, Zt, Rn, Pg) @@ -11004,6 +11015,8 @@ * LDFF1H { .D }, /Z, [{, , LSL #1}] * LDFF1H { .S }, /Z, [.S{, #}] * LDFF1H { .D }, /Z, [.D{, #}] + * LDFF1H { .D }, /Z, [, .D, LSL #1] + * LDFF1H { .D }, /Z, [, .D] * \endverbatim * \param dc The void * dcontext used to allocate memory for the #instr_t. * \param Zt The destination vector register, Z (Scalable). @@ -11019,6 +11032,12 @@ * For the [\.D{, #\}] variant: * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, * 0, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) + * For the [\, \.D, LSL #1] variant: + * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, + * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 1) + * For the [\, \.D] variant: + * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, + * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) */ #define INSTR_CREATE_ldff1h_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_2src(dc, OP_ldff1h, Zt, Rn, Pg) @@ -11033,6 +11052,7 @@ * LDFF1SB { .D }, /Z, [{, }] * LDFF1SB { .S }, /Z, [.S{, #}] * LDFF1SB { .D }, /Z, [.D{, #}] + * LDFF1SB { .D }, /Z, [, .D] * \endverbatim * \param dc The void * dcontext used to allocate memory for the #instr_t. * \param Zt The destination vector register, Z (Scalable). @@ -11047,6 +11067,9 @@ * For the [\.D{, #\}] variant: * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, * 0, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 64), 0) + * For the [\, \.D] variant: + * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, + * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 64), 0) */ #define INSTR_CREATE_ldff1sb_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_2src(dc, OP_ldff1sb, Zt, Rn, Pg) @@ -11060,6 +11083,8 @@ * LDFF1SH { .D }, /Z, [{, , LSL #1}] * LDFF1SH { .S }, /Z, [.S{, #}] * LDFF1SH { .D }, /Z, [.D{, #}] + * LDFF1SH { .D }, /Z, [, .D, LSL #1] + * LDFF1SH { .D }, /Z, [, .D] * \endverbatim * \param dc The void * dcontext used to allocate memory for the #instr_t. * \param Zt The destination vector register, Z (Scalable). @@ -11075,6 +11100,12 @@ * For the [\.D{, #\}] variant: * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, * 0, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) + * For the [\, \.D, LSL #1] variant: + * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, + * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 1) + * For the [\, \.D] variant: + * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, + * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) */ #define INSTR_CREATE_ldff1sh_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_2src(dc, OP_ldff1sh, Zt, Rn, Pg) @@ -11086,6 +11117,8 @@ * \verbatim * LDFF1SW { .D }, /Z, [{, , LSL #2}] * LDFF1SW { .D }, /Z, [.D{, #}] + * LDFF1W { .D }, /Z, [, .D, LSL #2] + * LDFF1W { .D }, /Z, [, .D] * \endverbatim * \param dc The void * dcontext used to allocate memory for the #instr_t. * \param Zt The destination vector register, Z (Scalable). @@ -11098,6 +11131,12 @@ * For the [\.D{, #\}] variant: * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, * 0, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) + * For the [\, \.D, LSL #2] variant: + * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, + * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 2) + * For the [\, \.D] variant: + * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, + * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) */ #define INSTR_CREATE_ldff1sw_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_2src(dc, OP_ldff1sw, Zt, Rn, Pg) @@ -11193,6 +11232,7 @@ * LD1B { .B }, /Z, [, ] * LD1B { .D }, /Z, [.D{, #}] * LD1B { .S }, /Z, [.S{, #}] + * LD1B { .D }, /Z, [, .D] * \endverbatim * \param dc The void * dcontext used to allocate memory for the #instr_t. * \param Zt The destination vector register, Z (Scalable). @@ -11208,6 +11248,9 @@ * For the [\.D{, #\}] variant: * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, * 0, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 64), 0) + * For the [\, \.D] variant: + * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, + * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 64), 0) */ #define INSTR_CREATE_ld1b_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_2src(dc, OP_ld1b, Zt, Rn, Pg) @@ -11258,6 +11301,7 @@ * LD1SB { .D }, /Z, [, ] * LD1SB { .S }, /Z, [.S{, #}] * LD1SB { .D }, /Z, [.D{, #}] + * LD1SB { .D }, /Z, [, .D] * \endverbatim * \param dc The void * dcontext used to allocate memory for the #instr_t. * \param Zt The destination vector register, Z (Scalable). @@ -11273,6 +11317,9 @@ * For the [\.D{, #\}] variant: * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, * 0, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 64), 0) + * For the [\, \.D] variant: + * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, + * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 64), 0) */ #define INSTR_CREATE_ld1sb_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_2src(dc, OP_ld1sb, Zt, Rn, Pg) @@ -11303,6 +11350,7 @@ * ST1B { . }, , [, ] * ST1B { .S }, , [.S{, #}] * ST1B { .D }, , [.D{, #}] + * ST1B { .D }, , [, .D] * \endverbatim * \param dc The void * dcontext used to allocate memory for the #instr_t. * \param Zt The first source vector register, Z (Scalable). @@ -11318,6 +11366,9 @@ * For the [\.D{, #\}] variant: * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, * 0, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 64), 0) + * For the [\, \.D] variant: + * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, + * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 64), 0) */ #define INSTR_CREATE_st1b_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_2src(dc, OP_st1b, Rn, Zt, Pg) @@ -11563,6 +11614,7 @@ * PRFB , , [{, #, MUL VL}] * PRFB , , [.D{, #}] * PRFB , , [.S{, #}] + * PRFB , , [, .D] * \endverbatim * \param dc The void * dcontext used to allocate memory for the #instr_t. * \param prfop The prefetch operation. @@ -11577,6 +11629,9 @@ * For the [\.D{, #\}] variant: * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, * 0, 0, imm5, 0, OPSZ_0, 0) + * For the [\, \.D] variant: + * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, + * 0, 0, OPSZ_0, 0) */ #define INSTR_CREATE_prfb_sve_pred(dc, prfop, Pg, Rn) \ instr_create_0dst_3src(dc, OP_prfb, prfop, Pg, Rn) @@ -11589,6 +11644,7 @@ * PRFD , , [{, #, MUL VL}] * PRFD , , [.D{, #}] * PRFD , , [.S{, #}] + * PRFD , , [, .D, LSL #3] * \endverbatim * \param dc The void * dcontext used to allocate memory for the #instr_t. * \param prfop The prefetch operation. @@ -11603,6 +11659,9 @@ * For the [\.D{, #\}] variant: * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, * 0, 0, imm5, 0, OPSZ_0, 0) + * For the [\, \.D] variant: + * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, + * true, 0, OPSZ_0, 3) */ #define INSTR_CREATE_prfd_sve_pred(dc, prfop, Pg, Rn) \ instr_create_0dst_3src(dc, OP_prfd, prfop, Pg, Rn) @@ -11615,6 +11674,7 @@ * PRFH , , [{, #, MUL VL}] * PRFH , , [.D{, #}] * PRFH , , [.S{, #}] + * PRFH , , [, .D, LSL #1] * \endverbatim * \param dc The void * dcontext used to allocate memory for the #instr_t. * \param prfop The prefetch operation. @@ -11629,6 +11689,9 @@ * For the [\.D{, #\}] variant: * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, * 0, 0, imm5, 0, OPSZ_0, 0) + * For the [\, \.D] variant: + * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, + * true, 0, OPSZ_0, 1) */ #define INSTR_CREATE_prfh_sve_pred(dc, prfop, Pg, Rn) \ instr_create_0dst_3src(dc, OP_prfh, prfop, Pg, Rn) @@ -11641,6 +11704,7 @@ * PRFW , , [{, #, MUL VL}] * PRFW , , [.D{, #}] * PRFW , , [.S{, #}] + * PRFW , , [, .D, LSL #2] * \endverbatim * \param dc The void * dcontext used to allocate memory for the #instr_t. * \param prfop The prefetch operation. @@ -11655,6 +11719,9 @@ * For the [\.D{, #\}] variant: * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, * 0, 0, imm5, 0, OPSZ_0, 0) + * For the [\, \.D] variant: + * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, + * true, 0, OPSZ_0, 2) */ #define INSTR_CREATE_prfw_sve_pred(dc, prfop, Pg, Rn) \ instr_create_0dst_3src(dc, OP_prfw, prfop, Pg, Rn) @@ -11802,6 +11869,8 @@ * \verbatim * LD1H { .S }, /Z, [.S{, #}] * LD1H { .D }, /Z, [.D{, #}] + * LD1H { .D }, /Z, [, .D, LSL #1] + * LD1H { .D }, /Z, [, .D] * \endverbatim * \param dc The void * dcontext used to allocate memory for the #instr_t. * \param Zt The destination vector register, Z (Scalable). @@ -11814,6 +11883,12 @@ * For the [\.D{, #\}] variant: * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, * 0, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) + * For the [\, \.D, LSL #1] variant: + * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, + * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 1) + * For the [\, \.D] variant: + * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, + * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) */ #define INSTR_CREATE_ld1h_sve_pred(dc, Zt, Pg, Zn) \ instr_create_1dst_2src(dc, OP_ld1h, Zt, Zn, Pg) @@ -11825,6 +11900,8 @@ * \verbatim * LD1SH { .S }, /Z, [.S{, #}] * LD1SH { .D }, /Z, [.D{, #}] + * LD1SH { .D }, /Z, [, .D, LSL #1] + * LD1SH { .D }, /Z, [, .D] * \endverbatim * \param dc The void * dcontext used to allocate memory for the #instr_t. * \param Zt The destination vector register, Z (Scalable). @@ -11837,6 +11914,12 @@ * For the [\.D{, #\}] variant: * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, * 0, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) + * For the [\, \.D, LSL #1] variant: + * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, + * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 1) + * For the [\, \.D] variant: + * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, + * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) */ #define INSTR_CREATE_ld1sh_sve_pred(dc, Zt, Pg, Zn) \ instr_create_1dst_2src(dc, OP_ld1sh, Zt, Zn, Pg) @@ -11848,6 +11931,8 @@ * \verbatim * LD1W { .S }, /Z, [.S{, #}] * LD1W { .D }, /Z, [.D{, #}] + * LD1W { .D }, /Z, [, .D, LSL #2] + * LD1W { .D }, /Z, [, .D] * \endverbatim * \param dc The void * dcontext used to allocate memory for the #instr_t. * \param Zt The destination vector register, Z (Scalable). @@ -11860,6 +11945,12 @@ * For the [\.D{, #\}] variant: * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, * 0, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) + * For the [\, \.D, LSL #2] variant: + * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, + * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 2) + * For the [\, \.D] variant: + * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, + * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) */ #define INSTR_CREATE_ld1w_sve_pred(dc, Zt, Pg, Zn) \ instr_create_1dst_2src(dc, OP_ld1w, Zt, Zn, Pg) @@ -11870,6 +11961,8 @@ * This macro is used to encode the forms: * \verbatim * LD1D { .D }, /Z, [.D{, #}] + * LD1D { .D }, /Z, [, .D, LSL #3] + * LD1D { .D }, /Z, [, .D] * \endverbatim * \param dc The void * dcontext used to allocate memory for the #instr_t. * \param Zt The destination vector register, Z (Scalable). @@ -11879,6 +11972,12 @@ * For the [\.D{, #\}] variant: * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, * 0, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 8), 0) + * For the [\, \.D, LSL #3] variant: + * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, + * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 3) + * For the [\, \.D] variant: + * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, + * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) */ #define INSTR_CREATE_ld1d_sve_pred(dc, Zt, Pg, Zn) \ instr_create_1dst_2src(dc, OP_ld1d, Zt, Zn, Pg) @@ -11909,6 +12008,8 @@ * \verbatim * ST1H { .S }, , [.S{, #}] * ST1H { .D }, , [.D{, #}] + * ST1H { .D }, , [, .D, LSL #1] + * ST1H { .D }, , [, .D] * \endverbatim * \param dc The void * dcontext used to allocate memory for the #instr_t. * \param Zt The first source vector register, Z (Scalable). @@ -11921,6 +12022,12 @@ * For the [\.D{, #\}] variant: * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, * 0, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) + * For the [\, \.D, LSL #1] variant: + * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, + * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 1) + * For the [\, \.D] variant: + * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, + * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 32), 0) */ #define INSTR_CREATE_st1h_sve_pred(dc, Zt, Pg, Zn) \ instr_create_1dst_2src(dc, OP_st1h, Zn, Zt, Pg) @@ -11932,6 +12039,8 @@ * \verbatim * ST1W { .S }, , [.S{, #}] * ST1W { .D }, , [.D{, #}] + * ST1W { .D }, , [, .D, LSL #2] + * ST1W { .D }, , [, .D] * \endverbatim * \param dc The void * dcontext used to allocate memory for the #instr_t. * \param Zt The first source vector register, Z (Scalable). @@ -11944,6 +12053,12 @@ * For the [\.D{, #\}] variant: * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, * 0, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) + * For the [\, \.D, LSL #2] variant: + * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, + * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 2) + * For the [\, \.D] variant: + * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, + * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) */ #define INSTR_CREATE_st1w_sve_pred(dc, Zt, Pg, Zn) \ instr_create_1dst_2src(dc, OP_st1w, Zn, Zt, Pg) @@ -11954,6 +12069,8 @@ * This macro is used to encode the forms: * \verbatim * ST1D { .D }, , [.D{, #}] + * ST1D { .D }, , [, .D, LSL #3] + * ST1D { .D }, , [, .D] * \endverbatim * \param dc The void * dcontext used to allocate memory for the #instr_t. * \param Zt The first source vector register, Z (Scalable). @@ -11963,6 +12080,12 @@ * For the [\.D{, #\}] variant: * opnd_create_vector_base_disp_aarch64(Zn, DR_REG_NULL, OPSZ_8, * 0, 0, imm5, 0, opnd_size_from_bytes(dr_get_sve_vl() / 8), 0) + * For the [\, \.D, LSL #3] variant: + * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, + * true, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 3) + * For the [\, \.D] variant: + * opnd_create_vector_base_disp_aarch64(Xn, Zm, OPSZ_8, DR_EXTEND_UXTX, + * 0, 0, opnd_size_from_bytes(dr_get_sve_vl() / 16), 0) */ #define INSTR_CREATE_st1d_sve_pred(dc, Zt, Pg, Zn) \ instr_create_1dst_2src(dc, OP_st1d, Zn, Zt, Pg) diff --git a/core/ir/aarch64/opnd_defs.txt b/core/ir/aarch64/opnd_defs.txt index be66e909e5f..f7f6c140a44 100644 --- a/core/ir/aarch64/opnd_defs.txt +++ b/core/ir/aarch64/opnd_defs.txt @@ -213,6 +213,7 @@ -----------xxxxx---------------- s16 # S register -----------xxxxx------xxxxx----- svemem_gprs_b1 # memory reg from Rm and Rn fields transferring 1 bytes per element -----------xxxxx---xxx---------- imm8_10 # 8 bit imm at pos 10, split across 20:16 and 12:10 +-----------xxxxx-??---xxxxx----- sveprf_gpr_vec64 # SVE prefetch memory address (64-bit offset) [, .D{, }] -----------xxxxxxx-------------- imm7 # 7 bit immediate from 14-20 -----------xxxxxxxxx------------ mem9off # immed offset for mem9/mem9post -----------xxxxxxxxx--xxxxx----- mem9q # size is 16 bytes @@ -307,6 +308,7 @@ --------xx-xxxxx---------------- imm2_tsz_index # Index encoded in imm2:tsz -------??--xxxxx------xxxxx----- svemem_vec_s_imm5 # SVE memory address [.S{, #}] -------??--xxxxx------xxxxx----- svemem_vec_d_imm5 # SVE memory address [.D{, #}] +-------??-?xxxxx------xxxxx----- svemem_gpr_vec64 # SVE memory address (64-bit offset) [, .D{, }] -------????xxxxx------xxxxx----- svemem_gpr_shf # GPR offset and base reg for SVE ld/st, with optional shift -------????xxxxx------xxxxx----- svemem_gprs_bhsdx # memory reg from Rm and Rn fields transferring x bytes per element -------xx------------------xxxxx z_msz_bhsd_0p1 # z register with element size determined by msz, plus 1 diff --git a/suite/tests/api/dis-a64-sve.txt b/suite/tests/api/dis-a64-sve.txt index 61675ac5bc3..5e18a897ba7 100644 --- a/suite/tests/api/dis-a64-sve.txt +++ b/suite/tests/api/dis-a64-sve.txt @@ -10658,6 +10658,24 @@ c43fdfff : ld1b z31.d, p7/Z, [z31.d, #31] : ld1b +0x1f(%z31.d)[4byt 843bdfbb : ld1b z27.s, p7/Z, [z29.s, #27] : ld1b +0x1b(%z29.s)[8byte] %p7/z -> %z27.s 843fdfff : ld1b z31.s, p7/Z, [z31.s, #31] : ld1b +0x1f(%z31.s)[8byte] %p7/z -> %z31.s +# LD1B { .D }, /Z, [, .D] (LD1B-Z.P.BZ-D.64.unscaled) +c440c000 : ld1b z0.d, p0/Z, [x0, z0.d] : ld1b (%x0,%z0.d)[4byte] %p0/z -> %z0.d +c445c482 : ld1b z2.d, p1/Z, [x4, z5.d] : ld1b (%x4,%z5.d)[4byte] %p1/z -> %z2.d +c447c8c4 : ld1b z4.d, p2/Z, [x6, z7.d] : ld1b (%x6,%z7.d)[4byte] %p2/z -> %z4.d +c449c906 : ld1b z6.d, p2/Z, [x8, z9.d] : ld1b (%x8,%z9.d)[4byte] %p2/z -> %z6.d +c44bcd48 : ld1b z8.d, p3/Z, [x10, z11.d] : ld1b (%x10,%z11.d)[4byte] %p3/z -> %z8.d +c44dcd6a : ld1b z10.d, p3/Z, [x11, z13.d] : ld1b (%x11,%z13.d)[4byte] %p3/z -> %z10.d +c44fd1ac : ld1b z12.d, p4/Z, [x13, z15.d] : ld1b (%x13,%z15.d)[4byte] %p4/z -> %z12.d +c451d1ee : ld1b z14.d, p4/Z, [x15, z17.d] : ld1b (%x15,%z17.d)[4byte] %p4/z -> %z14.d +c453d630 : ld1b z16.d, p5/Z, [x17, z19.d] : ld1b (%x17,%z19.d)[4byte] %p5/z -> %z16.d +c454d671 : ld1b z17.d, p5/Z, [x19, z20.d] : ld1b (%x19,%z20.d)[4byte] %p5/z -> %z17.d +c456d6b3 : ld1b z19.d, p5/Z, [x21, z22.d] : ld1b (%x21,%z22.d)[4byte] %p5/z -> %z19.d +c458daf5 : ld1b z21.d, p6/Z, [x23, z24.d] : ld1b (%x23,%z24.d)[4byte] %p6/z -> %z21.d +c45adb17 : ld1b z23.d, p6/Z, [x24, z26.d] : ld1b (%x24,%z26.d)[4byte] %p6/z -> %z23.d +c45cdf59 : ld1b z25.d, p7/Z, [x26, z28.d] : ld1b (%x26,%z28.d)[4byte] %p7/z -> %z25.d +c45edf9b : ld1b z27.d, p7/Z, [x28, z30.d] : ld1b (%x28,%z30.d)[4byte] %p7/z -> %z27.d +c45fdfff : ld1b z31.d, p7/Z, [sp, z31.d] : ld1b (%sp,%z31.d)[4byte] %p7/z -> %z31.d + # LD1D { .D }, /Z, [.D{, #}] (LD1D-Z.P.AI-D) c5a0c000 : ld1d z0.d, p0/Z, [z0.d, #0] : ld1d (%z0.d)[32byte] %p0/z -> %z0.d c5a2c482 : ld1d z2.d, p1/Z, [z4.d, #16] : ld1d +0x10(%z4.d)[32byte] %p1/z -> %z2.d @@ -10676,6 +10694,42 @@ c5b9df79 : ld1d z25.d, p7/Z, [z27.d, #200] : ld1d +0xc8(%z27.d)[32by c5bbdfbb : ld1d z27.d, p7/Z, [z29.d, #216] : ld1d +0xd8(%z29.d)[32byte] %p7/z -> %z27.d c5bfdfff : ld1d z31.d, p7/Z, [z31.d, #248] : ld1d +0xf8(%z31.d)[32byte] %p7/z -> %z31.d +# LD1D { .D }, /Z, [, .D, LSL #3] (LD1D-Z.P.BZ-D.64.scaled) +c5e0c000 : ld1d z0.d, p0/Z, [x0, z0.d, LSL #3] : ld1d (%x0,%z0.d,lsl #3)[32byte] %p0/z -> %z0.d +c5e5c482 : ld1d z2.d, p1/Z, [x4, z5.d, LSL #3] : ld1d (%x4,%z5.d,lsl #3)[32byte] %p1/z -> %z2.d +c5e7c8c4 : ld1d z4.d, p2/Z, [x6, z7.d, LSL #3] : ld1d (%x6,%z7.d,lsl #3)[32byte] %p2/z -> %z4.d +c5e9c906 : ld1d z6.d, p2/Z, [x8, z9.d, LSL #3] : ld1d (%x8,%z9.d,lsl #3)[32byte] %p2/z -> %z6.d +c5ebcd48 : ld1d z8.d, p3/Z, [x10, z11.d, LSL #3] : ld1d (%x10,%z11.d,lsl #3)[32byte] %p3/z -> %z8.d +c5edcd6a : ld1d z10.d, p3/Z, [x11, z13.d, LSL #3] : ld1d (%x11,%z13.d,lsl #3)[32byte] %p3/z -> %z10.d +c5efd1ac : ld1d z12.d, p4/Z, [x13, z15.d, LSL #3] : ld1d (%x13,%z15.d,lsl #3)[32byte] %p4/z -> %z12.d +c5f1d1ee : ld1d z14.d, p4/Z, [x15, z17.d, LSL #3] : ld1d (%x15,%z17.d,lsl #3)[32byte] %p4/z -> %z14.d +c5f3d630 : ld1d z16.d, p5/Z, [x17, z19.d, LSL #3] : ld1d (%x17,%z19.d,lsl #3)[32byte] %p5/z -> %z16.d +c5f4d671 : ld1d z17.d, p5/Z, [x19, z20.d, LSL #3] : ld1d (%x19,%z20.d,lsl #3)[32byte] %p5/z -> %z17.d +c5f6d6b3 : ld1d z19.d, p5/Z, [x21, z22.d, LSL #3] : ld1d (%x21,%z22.d,lsl #3)[32byte] %p5/z -> %z19.d +c5f8daf5 : ld1d z21.d, p6/Z, [x23, z24.d, LSL #3] : ld1d (%x23,%z24.d,lsl #3)[32byte] %p6/z -> %z21.d +c5fadb17 : ld1d z23.d, p6/Z, [x24, z26.d, LSL #3] : ld1d (%x24,%z26.d,lsl #3)[32byte] %p6/z -> %z23.d +c5fcdf59 : ld1d z25.d, p7/Z, [x26, z28.d, LSL #3] : ld1d (%x26,%z28.d,lsl #3)[32byte] %p7/z -> %z25.d +c5fedf9b : ld1d z27.d, p7/Z, [x28, z30.d, LSL #3] : ld1d (%x28,%z30.d,lsl #3)[32byte] %p7/z -> %z27.d +c5ffdfff : ld1d z31.d, p7/Z, [sp, z31.d, LSL #3] : ld1d (%sp,%z31.d,lsl #3)[32byte] %p7/z -> %z31.d + +# LD1D { .D }, /Z, [, .D] (LD1D-Z.P.BZ-D.64.unscaled) +c5c0c000 : ld1d z0.d, p0/Z, [x0, z0.d] : ld1d (%x0,%z0.d)[32byte] %p0/z -> %z0.d +c5c5c482 : ld1d z2.d, p1/Z, [x4, z5.d] : ld1d (%x4,%z5.d)[32byte] %p1/z -> %z2.d +c5c7c8c4 : ld1d z4.d, p2/Z, [x6, z7.d] : ld1d (%x6,%z7.d)[32byte] %p2/z -> %z4.d +c5c9c906 : ld1d z6.d, p2/Z, [x8, z9.d] : ld1d (%x8,%z9.d)[32byte] %p2/z -> %z6.d +c5cbcd48 : ld1d z8.d, p3/Z, [x10, z11.d] : ld1d (%x10,%z11.d)[32byte] %p3/z -> %z8.d +c5cdcd6a : ld1d z10.d, p3/Z, [x11, z13.d] : ld1d (%x11,%z13.d)[32byte] %p3/z -> %z10.d +c5cfd1ac : ld1d z12.d, p4/Z, [x13, z15.d] : ld1d (%x13,%z15.d)[32byte] %p4/z -> %z12.d +c5d1d1ee : ld1d z14.d, p4/Z, [x15, z17.d] : ld1d (%x15,%z17.d)[32byte] %p4/z -> %z14.d +c5d3d630 : ld1d z16.d, p5/Z, [x17, z19.d] : ld1d (%x17,%z19.d)[32byte] %p5/z -> %z16.d +c5d4d671 : ld1d z17.d, p5/Z, [x19, z20.d] : ld1d (%x19,%z20.d)[32byte] %p5/z -> %z17.d +c5d6d6b3 : ld1d z19.d, p5/Z, [x21, z22.d] : ld1d (%x21,%z22.d)[32byte] %p5/z -> %z19.d +c5d8daf5 : ld1d z21.d, p6/Z, [x23, z24.d] : ld1d (%x23,%z24.d)[32byte] %p6/z -> %z21.d +c5dadb17 : ld1d z23.d, p6/Z, [x24, z26.d] : ld1d (%x24,%z26.d)[32byte] %p6/z -> %z23.d +c5dcdf59 : ld1d z25.d, p7/Z, [x26, z28.d] : ld1d (%x26,%z28.d)[32byte] %p7/z -> %z25.d +c5dedf9b : ld1d z27.d, p7/Z, [x28, z30.d] : ld1d (%x28,%z30.d)[32byte] %p7/z -> %z27.d +c5dfdfff : ld1d z31.d, p7/Z, [sp, z31.d] : ld1d (%sp,%z31.d)[32byte] %p7/z -> %z31.d + # LD1H { .S }, /Z, [.S{, #}] (LD1H-Z.P.AI-S) 84a0c000 : ld1h z0.s, p0/Z, [z0.s, #0] : ld1h (%z0.s)[16byte] %p0/z -> %z0.s 84a2c482 : ld1h z2.s, p1/Z, [z4.s, #4] : ld1h +0x04(%z4.s)[16byte] %p1/z -> %z2.s @@ -10712,6 +10766,42 @@ c4b9df79 : ld1h z25.d, p7/Z, [z27.d, #50] : ld1h +0x32(%z27.d)[8byt c4bbdfbb : ld1h z27.d, p7/Z, [z29.d, #54] : ld1h +0x36(%z29.d)[8byte] %p7/z -> %z27.d c4bfdfff : ld1h z31.d, p7/Z, [z31.d, #62] : ld1h +0x3e(%z31.d)[8byte] %p7/z -> %z31.d +# LD1H { .D }, /Z, [, .D, LSL #1] (LD1H-Z.P.BZ-D.64.scaled) +c4e0c000 : ld1h z0.d, p0/Z, [x0, z0.d, LSL #1] : ld1h (%x0,%z0.d,lsl #1)[8byte] %p0/z -> %z0.d +c4e5c482 : ld1h z2.d, p1/Z, [x4, z5.d, LSL #1] : ld1h (%x4,%z5.d,lsl #1)[8byte] %p1/z -> %z2.d +c4e7c8c4 : ld1h z4.d, p2/Z, [x6, z7.d, LSL #1] : ld1h (%x6,%z7.d,lsl #1)[8byte] %p2/z -> %z4.d +c4e9c906 : ld1h z6.d, p2/Z, [x8, z9.d, LSL #1] : ld1h (%x8,%z9.d,lsl #1)[8byte] %p2/z -> %z6.d +c4ebcd48 : ld1h z8.d, p3/Z, [x10, z11.d, LSL #1] : ld1h (%x10,%z11.d,lsl #1)[8byte] %p3/z -> %z8.d +c4edcd6a : ld1h z10.d, p3/Z, [x11, z13.d, LSL #1] : ld1h (%x11,%z13.d,lsl #1)[8byte] %p3/z -> %z10.d +c4efd1ac : ld1h z12.d, p4/Z, [x13, z15.d, LSL #1] : ld1h (%x13,%z15.d,lsl #1)[8byte] %p4/z -> %z12.d +c4f1d1ee : ld1h z14.d, p4/Z, [x15, z17.d, LSL #1] : ld1h (%x15,%z17.d,lsl #1)[8byte] %p4/z -> %z14.d +c4f3d630 : ld1h z16.d, p5/Z, [x17, z19.d, LSL #1] : ld1h (%x17,%z19.d,lsl #1)[8byte] %p5/z -> %z16.d +c4f4d671 : ld1h z17.d, p5/Z, [x19, z20.d, LSL #1] : ld1h (%x19,%z20.d,lsl #1)[8byte] %p5/z -> %z17.d +c4f6d6b3 : ld1h z19.d, p5/Z, [x21, z22.d, LSL #1] : ld1h (%x21,%z22.d,lsl #1)[8byte] %p5/z -> %z19.d +c4f8daf5 : ld1h z21.d, p6/Z, [x23, z24.d, LSL #1] : ld1h (%x23,%z24.d,lsl #1)[8byte] %p6/z -> %z21.d +c4fadb17 : ld1h z23.d, p6/Z, [x24, z26.d, LSL #1] : ld1h (%x24,%z26.d,lsl #1)[8byte] %p6/z -> %z23.d +c4fcdf59 : ld1h z25.d, p7/Z, [x26, z28.d, LSL #1] : ld1h (%x26,%z28.d,lsl #1)[8byte] %p7/z -> %z25.d +c4fedf9b : ld1h z27.d, p7/Z, [x28, z30.d, LSL #1] : ld1h (%x28,%z30.d,lsl #1)[8byte] %p7/z -> %z27.d +c4ffdfff : ld1h z31.d, p7/Z, [sp, z31.d, LSL #1] : ld1h (%sp,%z31.d,lsl #1)[8byte] %p7/z -> %z31.d + +# LD1H { .D }, /Z, [, .D] (LD1H-Z.P.BZ-D.64.unscaled) +c4c0c000 : ld1h z0.d, p0/Z, [x0, z0.d] : ld1h (%x0,%z0.d)[8byte] %p0/z -> %z0.d +c4c5c482 : ld1h z2.d, p1/Z, [x4, z5.d] : ld1h (%x4,%z5.d)[8byte] %p1/z -> %z2.d +c4c7c8c4 : ld1h z4.d, p2/Z, [x6, z7.d] : ld1h (%x6,%z7.d)[8byte] %p2/z -> %z4.d +c4c9c906 : ld1h z6.d, p2/Z, [x8, z9.d] : ld1h (%x8,%z9.d)[8byte] %p2/z -> %z6.d +c4cbcd48 : ld1h z8.d, p3/Z, [x10, z11.d] : ld1h (%x10,%z11.d)[8byte] %p3/z -> %z8.d +c4cdcd6a : ld1h z10.d, p3/Z, [x11, z13.d] : ld1h (%x11,%z13.d)[8byte] %p3/z -> %z10.d +c4cfd1ac : ld1h z12.d, p4/Z, [x13, z15.d] : ld1h (%x13,%z15.d)[8byte] %p4/z -> %z12.d +c4d1d1ee : ld1h z14.d, p4/Z, [x15, z17.d] : ld1h (%x15,%z17.d)[8byte] %p4/z -> %z14.d +c4d3d630 : ld1h z16.d, p5/Z, [x17, z19.d] : ld1h (%x17,%z19.d)[8byte] %p5/z -> %z16.d +c4d4d671 : ld1h z17.d, p5/Z, [x19, z20.d] : ld1h (%x19,%z20.d)[8byte] %p5/z -> %z17.d +c4d6d6b3 : ld1h z19.d, p5/Z, [x21, z22.d] : ld1h (%x21,%z22.d)[8byte] %p5/z -> %z19.d +c4d8daf5 : ld1h z21.d, p6/Z, [x23, z24.d] : ld1h (%x23,%z24.d)[8byte] %p6/z -> %z21.d +c4dadb17 : ld1h z23.d, p6/Z, [x24, z26.d] : ld1h (%x24,%z26.d)[8byte] %p6/z -> %z23.d +c4dcdf59 : ld1h z25.d, p7/Z, [x26, z28.d] : ld1h (%x26,%z28.d)[8byte] %p7/z -> %z25.d +c4dedf9b : ld1h z27.d, p7/Z, [x28, z30.d] : ld1h (%x28,%z30.d)[8byte] %p7/z -> %z27.d +c4dfdfff : ld1h z31.d, p7/Z, [sp, z31.d] : ld1h (%sp,%z31.d)[8byte] %p7/z -> %z31.d + # LD1RB { .H }, /Z, [{, #}] (LD1RB-Z.P.BI-U16) 8440a000 : ld1rb z0.h, p0/Z, [x0, #0] : ld1rb (%x0)[1byte] %p0/z -> %z0.h 8444a482 : ld1rb z2.h, p1/Z, [x4, #4] : ld1rb +0x04(%x4)[1byte] %p1/z -> %z2.h @@ -11180,6 +11270,24 @@ c4399f79 : ld1sb z25.d, p7/Z, [z27.d, #25] : ld1sb +0x19(%z27.d)[4byt c43b9fbb : ld1sb z27.d, p7/Z, [z29.d, #27] : ld1sb +0x1b(%z29.d)[4byte] %p7/z -> %z27.d c43f9fff : ld1sb z31.d, p7/Z, [z31.d, #31] : ld1sb +0x1f(%z31.d)[4byte] %p7/z -> %z31.d +# LD1SB { .D }, /Z, [, .D] (LD1SB-Z.P.BZ-D.64.unscaled) +c4408000 : ld1sb z0.d, p0/Z, [x0, z0.d] : ld1sb (%x0,%z0.d)[4byte] %p0/z -> %z0.d +c4458482 : ld1sb z2.d, p1/Z, [x4, z5.d] : ld1sb (%x4,%z5.d)[4byte] %p1/z -> %z2.d +c44788c4 : ld1sb z4.d, p2/Z, [x6, z7.d] : ld1sb (%x6,%z7.d)[4byte] %p2/z -> %z4.d +c4498906 : ld1sb z6.d, p2/Z, [x8, z9.d] : ld1sb (%x8,%z9.d)[4byte] %p2/z -> %z6.d +c44b8d48 : ld1sb z8.d, p3/Z, [x10, z11.d] : ld1sb (%x10,%z11.d)[4byte] %p3/z -> %z8.d +c44d8d6a : ld1sb z10.d, p3/Z, [x11, z13.d] : ld1sb (%x11,%z13.d)[4byte] %p3/z -> %z10.d +c44f91ac : ld1sb z12.d, p4/Z, [x13, z15.d] : ld1sb (%x13,%z15.d)[4byte] %p4/z -> %z12.d +c45191ee : ld1sb z14.d, p4/Z, [x15, z17.d] : ld1sb (%x15,%z17.d)[4byte] %p4/z -> %z14.d +c4539630 : ld1sb z16.d, p5/Z, [x17, z19.d] : ld1sb (%x17,%z19.d)[4byte] %p5/z -> %z16.d +c4549671 : ld1sb z17.d, p5/Z, [x19, z20.d] : ld1sb (%x19,%z20.d)[4byte] %p5/z -> %z17.d +c45696b3 : ld1sb z19.d, p5/Z, [x21, z22.d] : ld1sb (%x21,%z22.d)[4byte] %p5/z -> %z19.d +c4589af5 : ld1sb z21.d, p6/Z, [x23, z24.d] : ld1sb (%x23,%z24.d)[4byte] %p6/z -> %z21.d +c45a9b17 : ld1sb z23.d, p6/Z, [x24, z26.d] : ld1sb (%x24,%z26.d)[4byte] %p6/z -> %z23.d +c45c9f59 : ld1sb z25.d, p7/Z, [x26, z28.d] : ld1sb (%x26,%z28.d)[4byte] %p7/z -> %z25.d +c45e9f9b : ld1sb z27.d, p7/Z, [x28, z30.d] : ld1sb (%x28,%z30.d)[4byte] %p7/z -> %z27.d +c45f9fff : ld1sb z31.d, p7/Z, [sp, z31.d] : ld1sb (%sp,%z31.d)[4byte] %p7/z -> %z31.d + # LD1SH { .S }, /Z, [.S{, #}] (LD1SH-Z.P.AI-S) 84a08000 : ld1sh z0.s, p0/Z, [z0.s, #0] : ld1sh (%z0.s)[16byte] %p0/z -> %z0.s 84a28482 : ld1sh z2.s, p1/Z, [z4.s, #4] : ld1sh +0x04(%z4.s)[16byte] %p1/z -> %z2.s @@ -11216,6 +11324,42 @@ c4b99f79 : ld1sh z25.d, p7/Z, [z27.d, #50] : ld1sh +0x32(%z27.d)[8byt c4bb9fbb : ld1sh z27.d, p7/Z, [z29.d, #54] : ld1sh +0x36(%z29.d)[8byte] %p7/z -> %z27.d c4bf9fff : ld1sh z31.d, p7/Z, [z31.d, #62] : ld1sh +0x3e(%z31.d)[8byte] %p7/z -> %z31.d +# LD1SH { .D }, /Z, [, .D, LSL #1] (LD1SH-Z.P.BZ-D.64.scaled) +c4e08000 : ld1sh z0.d, p0/Z, [x0, z0.d, LSL #1] : ld1sh (%x0,%z0.d,lsl #1)[8byte] %p0/z -> %z0.d +c4e58482 : ld1sh z2.d, p1/Z, [x4, z5.d, LSL #1] : ld1sh (%x4,%z5.d,lsl #1)[8byte] %p1/z -> %z2.d +c4e788c4 : ld1sh z4.d, p2/Z, [x6, z7.d, LSL #1] : ld1sh (%x6,%z7.d,lsl #1)[8byte] %p2/z -> %z4.d +c4e98906 : ld1sh z6.d, p2/Z, [x8, z9.d, LSL #1] : ld1sh (%x8,%z9.d,lsl #1)[8byte] %p2/z -> %z6.d +c4eb8d48 : ld1sh z8.d, p3/Z, [x10, z11.d, LSL #1] : ld1sh (%x10,%z11.d,lsl #1)[8byte] %p3/z -> %z8.d +c4ed8d6a : ld1sh z10.d, p3/Z, [x11, z13.d, LSL #1] : ld1sh (%x11,%z13.d,lsl #1)[8byte] %p3/z -> %z10.d +c4ef91ac : ld1sh z12.d, p4/Z, [x13, z15.d, LSL #1] : ld1sh (%x13,%z15.d,lsl #1)[8byte] %p4/z -> %z12.d +c4f191ee : ld1sh z14.d, p4/Z, [x15, z17.d, LSL #1] : ld1sh (%x15,%z17.d,lsl #1)[8byte] %p4/z -> %z14.d +c4f39630 : ld1sh z16.d, p5/Z, [x17, z19.d, LSL #1] : ld1sh (%x17,%z19.d,lsl #1)[8byte] %p5/z -> %z16.d +c4f49671 : ld1sh z17.d, p5/Z, [x19, z20.d, LSL #1] : ld1sh (%x19,%z20.d,lsl #1)[8byte] %p5/z -> %z17.d +c4f696b3 : ld1sh z19.d, p5/Z, [x21, z22.d, LSL #1] : ld1sh (%x21,%z22.d,lsl #1)[8byte] %p5/z -> %z19.d +c4f89af5 : ld1sh z21.d, p6/Z, [x23, z24.d, LSL #1] : ld1sh (%x23,%z24.d,lsl #1)[8byte] %p6/z -> %z21.d +c4fa9b17 : ld1sh z23.d, p6/Z, [x24, z26.d, LSL #1] : ld1sh (%x24,%z26.d,lsl #1)[8byte] %p6/z -> %z23.d +c4fc9f59 : ld1sh z25.d, p7/Z, [x26, z28.d, LSL #1] : ld1sh (%x26,%z28.d,lsl #1)[8byte] %p7/z -> %z25.d +c4fe9f9b : ld1sh z27.d, p7/Z, [x28, z30.d, LSL #1] : ld1sh (%x28,%z30.d,lsl #1)[8byte] %p7/z -> %z27.d +c4ff9fff : ld1sh z31.d, p7/Z, [sp, z31.d, LSL #1] : ld1sh (%sp,%z31.d,lsl #1)[8byte] %p7/z -> %z31.d + +# LD1SH { .D }, /Z, [, .D] (LD1SH-Z.P.BZ-D.64.unscaled) +c4c08000 : ld1sh z0.d, p0/Z, [x0, z0.d] : ld1sh (%x0,%z0.d)[8byte] %p0/z -> %z0.d +c4c58482 : ld1sh z2.d, p1/Z, [x4, z5.d] : ld1sh (%x4,%z5.d)[8byte] %p1/z -> %z2.d +c4c788c4 : ld1sh z4.d, p2/Z, [x6, z7.d] : ld1sh (%x6,%z7.d)[8byte] %p2/z -> %z4.d +c4c98906 : ld1sh z6.d, p2/Z, [x8, z9.d] : ld1sh (%x8,%z9.d)[8byte] %p2/z -> %z6.d +c4cb8d48 : ld1sh z8.d, p3/Z, [x10, z11.d] : ld1sh (%x10,%z11.d)[8byte] %p3/z -> %z8.d +c4cd8d6a : ld1sh z10.d, p3/Z, [x11, z13.d] : ld1sh (%x11,%z13.d)[8byte] %p3/z -> %z10.d +c4cf91ac : ld1sh z12.d, p4/Z, [x13, z15.d] : ld1sh (%x13,%z15.d)[8byte] %p4/z -> %z12.d +c4d191ee : ld1sh z14.d, p4/Z, [x15, z17.d] : ld1sh (%x15,%z17.d)[8byte] %p4/z -> %z14.d +c4d39630 : ld1sh z16.d, p5/Z, [x17, z19.d] : ld1sh (%x17,%z19.d)[8byte] %p5/z -> %z16.d +c4d49671 : ld1sh z17.d, p5/Z, [x19, z20.d] : ld1sh (%x19,%z20.d)[8byte] %p5/z -> %z17.d +c4d696b3 : ld1sh z19.d, p5/Z, [x21, z22.d] : ld1sh (%x21,%z22.d)[8byte] %p5/z -> %z19.d +c4d89af5 : ld1sh z21.d, p6/Z, [x23, z24.d] : ld1sh (%x23,%z24.d)[8byte] %p6/z -> %z21.d +c4da9b17 : ld1sh z23.d, p6/Z, [x24, z26.d] : ld1sh (%x24,%z26.d)[8byte] %p6/z -> %z23.d +c4dc9f59 : ld1sh z25.d, p7/Z, [x26, z28.d] : ld1sh (%x26,%z28.d)[8byte] %p7/z -> %z25.d +c4de9f9b : ld1sh z27.d, p7/Z, [x28, z30.d] : ld1sh (%x28,%z30.d)[8byte] %p7/z -> %z27.d +c4df9fff : ld1sh z31.d, p7/Z, [sp, z31.d] : ld1sh (%sp,%z31.d)[8byte] %p7/z -> %z31.d + # LD1SW { .D }, /Z, [.D{, #}] (LD1SW-Z.P.AI-D) c5208000 : ld1sw z0.d, p0/Z, [z0.d, #0] : ld1sw (%z0.d)[16byte] %p0/z -> %z0.d c5228482 : ld1sw z2.d, p1/Z, [z4.d, #8] : ld1sw +0x08(%z4.d)[16byte] %p1/z -> %z2.d @@ -11234,6 +11378,42 @@ c5399f79 : ld1sw z25.d, p7/Z, [z27.d, #100] : ld1sw +0x64(%z27.d)[16by c53b9fbb : ld1sw z27.d, p7/Z, [z29.d, #108] : ld1sw +0x6c(%z29.d)[16byte] %p7/z -> %z27.d c53f9fff : ld1sw z31.d, p7/Z, [z31.d, #124] : ld1sw +0x7c(%z31.d)[16byte] %p7/z -> %z31.d +# LD1SW { .D }, /Z, [, .D, LSL #2] (LD1SW-Z.P.BZ-D.64.scaled) +c5608000 : ld1sw z0.d, p0/Z, [x0, z0.d, LSL #2] : ld1sw (%x0,%z0.d,lsl #2)[16byte] %p0/z -> %z0.d +c5658482 : ld1sw z2.d, p1/Z, [x4, z5.d, LSL #2] : ld1sw (%x4,%z5.d,lsl #2)[16byte] %p1/z -> %z2.d +c56788c4 : ld1sw z4.d, p2/Z, [x6, z7.d, LSL #2] : ld1sw (%x6,%z7.d,lsl #2)[16byte] %p2/z -> %z4.d +c5698906 : ld1sw z6.d, p2/Z, [x8, z9.d, LSL #2] : ld1sw (%x8,%z9.d,lsl #2)[16byte] %p2/z -> %z6.d +c56b8d48 : ld1sw z8.d, p3/Z, [x10, z11.d, LSL #2] : ld1sw (%x10,%z11.d,lsl #2)[16byte] %p3/z -> %z8.d +c56d8d6a : ld1sw z10.d, p3/Z, [x11, z13.d, LSL #2] : ld1sw (%x11,%z13.d,lsl #2)[16byte] %p3/z -> %z10.d +c56f91ac : ld1sw z12.d, p4/Z, [x13, z15.d, LSL #2] : ld1sw (%x13,%z15.d,lsl #2)[16byte] %p4/z -> %z12.d +c57191ee : ld1sw z14.d, p4/Z, [x15, z17.d, LSL #2] : ld1sw (%x15,%z17.d,lsl #2)[16byte] %p4/z -> %z14.d +c5739630 : ld1sw z16.d, p5/Z, [x17, z19.d, LSL #2] : ld1sw (%x17,%z19.d,lsl #2)[16byte] %p5/z -> %z16.d +c5749671 : ld1sw z17.d, p5/Z, [x19, z20.d, LSL #2] : ld1sw (%x19,%z20.d,lsl #2)[16byte] %p5/z -> %z17.d +c57696b3 : ld1sw z19.d, p5/Z, [x21, z22.d, LSL #2] : ld1sw (%x21,%z22.d,lsl #2)[16byte] %p5/z -> %z19.d +c5789af5 : ld1sw z21.d, p6/Z, [x23, z24.d, LSL #2] : ld1sw (%x23,%z24.d,lsl #2)[16byte] %p6/z -> %z21.d +c57a9b17 : ld1sw z23.d, p6/Z, [x24, z26.d, LSL #2] : ld1sw (%x24,%z26.d,lsl #2)[16byte] %p6/z -> %z23.d +c57c9f59 : ld1sw z25.d, p7/Z, [x26, z28.d, LSL #2] : ld1sw (%x26,%z28.d,lsl #2)[16byte] %p7/z -> %z25.d +c57e9f9b : ld1sw z27.d, p7/Z, [x28, z30.d, LSL #2] : ld1sw (%x28,%z30.d,lsl #2)[16byte] %p7/z -> %z27.d +c57f9fff : ld1sw z31.d, p7/Z, [sp, z31.d, LSL #2] : ld1sw (%sp,%z31.d,lsl #2)[16byte] %p7/z -> %z31.d + +# LD1SW { .D }, /Z, [, .D] (LD1SW-Z.P.BZ-D.64.unscaled) +c5408000 : ld1sw z0.d, p0/Z, [x0, z0.d] : ld1sw (%x0,%z0.d)[16byte] %p0/z -> %z0.d +c5458482 : ld1sw z2.d, p1/Z, [x4, z5.d] : ld1sw (%x4,%z5.d)[16byte] %p1/z -> %z2.d +c54788c4 : ld1sw z4.d, p2/Z, [x6, z7.d] : ld1sw (%x6,%z7.d)[16byte] %p2/z -> %z4.d +c5498906 : ld1sw z6.d, p2/Z, [x8, z9.d] : ld1sw (%x8,%z9.d)[16byte] %p2/z -> %z6.d +c54b8d48 : ld1sw z8.d, p3/Z, [x10, z11.d] : ld1sw (%x10,%z11.d)[16byte] %p3/z -> %z8.d +c54d8d6a : ld1sw z10.d, p3/Z, [x11, z13.d] : ld1sw (%x11,%z13.d)[16byte] %p3/z -> %z10.d +c54f91ac : ld1sw z12.d, p4/Z, [x13, z15.d] : ld1sw (%x13,%z15.d)[16byte] %p4/z -> %z12.d +c55191ee : ld1sw z14.d, p4/Z, [x15, z17.d] : ld1sw (%x15,%z17.d)[16byte] %p4/z -> %z14.d +c5539630 : ld1sw z16.d, p5/Z, [x17, z19.d] : ld1sw (%x17,%z19.d)[16byte] %p5/z -> %z16.d +c5549671 : ld1sw z17.d, p5/Z, [x19, z20.d] : ld1sw (%x19,%z20.d)[16byte] %p5/z -> %z17.d +c55696b3 : ld1sw z19.d, p5/Z, [x21, z22.d] : ld1sw (%x21,%z22.d)[16byte] %p5/z -> %z19.d +c5589af5 : ld1sw z21.d, p6/Z, [x23, z24.d] : ld1sw (%x23,%z24.d)[16byte] %p6/z -> %z21.d +c55a9b17 : ld1sw z23.d, p6/Z, [x24, z26.d] : ld1sw (%x24,%z26.d)[16byte] %p6/z -> %z23.d +c55c9f59 : ld1sw z25.d, p7/Z, [x26, z28.d] : ld1sw (%x26,%z28.d)[16byte] %p7/z -> %z25.d +c55e9f9b : ld1sw z27.d, p7/Z, [x28, z30.d] : ld1sw (%x28,%z30.d)[16byte] %p7/z -> %z27.d +c55f9fff : ld1sw z31.d, p7/Z, [sp, z31.d] : ld1sw (%sp,%z31.d)[16byte] %p7/z -> %z31.d + # LD1W { .S }, /Z, [.S{, #}] (LD1W-Z.P.AI-S) 8520c000 : ld1w z0.s, p0/Z, [z0.s, #0] : ld1w (%z0.s)[32byte] %p0/z -> %z0.s 8522c482 : ld1w z2.s, p1/Z, [z4.s, #8] : ld1w +0x08(%z4.s)[32byte] %p1/z -> %z2.s @@ -11270,6 +11450,42 @@ c539df79 : ld1w z25.d, p7/Z, [z27.d, #100] : ld1w +0x64(%z27.d)[16by c53bdfbb : ld1w z27.d, p7/Z, [z29.d, #108] : ld1w +0x6c(%z29.d)[16byte] %p7/z -> %z27.d c53fdfff : ld1w z31.d, p7/Z, [z31.d, #124] : ld1w +0x7c(%z31.d)[16byte] %p7/z -> %z31.d +# LD1W { .D }, /Z, [, .D, LSL #2] (LD1W-Z.P.BZ-D.64.scaled) +c560c000 : ld1w z0.d, p0/Z, [x0, z0.d, LSL #2] : ld1w (%x0,%z0.d,lsl #2)[16byte] %p0/z -> %z0.d +c565c482 : ld1w z2.d, p1/Z, [x4, z5.d, LSL #2] : ld1w (%x4,%z5.d,lsl #2)[16byte] %p1/z -> %z2.d +c567c8c4 : ld1w z4.d, p2/Z, [x6, z7.d, LSL #2] : ld1w (%x6,%z7.d,lsl #2)[16byte] %p2/z -> %z4.d +c569c906 : ld1w z6.d, p2/Z, [x8, z9.d, LSL #2] : ld1w (%x8,%z9.d,lsl #2)[16byte] %p2/z -> %z6.d +c56bcd48 : ld1w z8.d, p3/Z, [x10, z11.d, LSL #2] : ld1w (%x10,%z11.d,lsl #2)[16byte] %p3/z -> %z8.d +c56dcd6a : ld1w z10.d, p3/Z, [x11, z13.d, LSL #2] : ld1w (%x11,%z13.d,lsl #2)[16byte] %p3/z -> %z10.d +c56fd1ac : ld1w z12.d, p4/Z, [x13, z15.d, LSL #2] : ld1w (%x13,%z15.d,lsl #2)[16byte] %p4/z -> %z12.d +c571d1ee : ld1w z14.d, p4/Z, [x15, z17.d, LSL #2] : ld1w (%x15,%z17.d,lsl #2)[16byte] %p4/z -> %z14.d +c573d630 : ld1w z16.d, p5/Z, [x17, z19.d, LSL #2] : ld1w (%x17,%z19.d,lsl #2)[16byte] %p5/z -> %z16.d +c574d671 : ld1w z17.d, p5/Z, [x19, z20.d, LSL #2] : ld1w (%x19,%z20.d,lsl #2)[16byte] %p5/z -> %z17.d +c576d6b3 : ld1w z19.d, p5/Z, [x21, z22.d, LSL #2] : ld1w (%x21,%z22.d,lsl #2)[16byte] %p5/z -> %z19.d +c578daf5 : ld1w z21.d, p6/Z, [x23, z24.d, LSL #2] : ld1w (%x23,%z24.d,lsl #2)[16byte] %p6/z -> %z21.d +c57adb17 : ld1w z23.d, p6/Z, [x24, z26.d, LSL #2] : ld1w (%x24,%z26.d,lsl #2)[16byte] %p6/z -> %z23.d +c57cdf59 : ld1w z25.d, p7/Z, [x26, z28.d, LSL #2] : ld1w (%x26,%z28.d,lsl #2)[16byte] %p7/z -> %z25.d +c57edf9b : ld1w z27.d, p7/Z, [x28, z30.d, LSL #2] : ld1w (%x28,%z30.d,lsl #2)[16byte] %p7/z -> %z27.d +c57fdfff : ld1w z31.d, p7/Z, [sp, z31.d, LSL #2] : ld1w (%sp,%z31.d,lsl #2)[16byte] %p7/z -> %z31.d + +# LD1W { .D }, /Z, [, .D] (LD1W-Z.P.BZ-D.64.unscaled) +c540c000 : ld1w z0.d, p0/Z, [x0, z0.d] : ld1w (%x0,%z0.d)[16byte] %p0/z -> %z0.d +c545c482 : ld1w z2.d, p1/Z, [x4, z5.d] : ld1w (%x4,%z5.d)[16byte] %p1/z -> %z2.d +c547c8c4 : ld1w z4.d, p2/Z, [x6, z7.d] : ld1w (%x6,%z7.d)[16byte] %p2/z -> %z4.d +c549c906 : ld1w z6.d, p2/Z, [x8, z9.d] : ld1w (%x8,%z9.d)[16byte] %p2/z -> %z6.d +c54bcd48 : ld1w z8.d, p3/Z, [x10, z11.d] : ld1w (%x10,%z11.d)[16byte] %p3/z -> %z8.d +c54dcd6a : ld1w z10.d, p3/Z, [x11, z13.d] : ld1w (%x11,%z13.d)[16byte] %p3/z -> %z10.d +c54fd1ac : ld1w z12.d, p4/Z, [x13, z15.d] : ld1w (%x13,%z15.d)[16byte] %p4/z -> %z12.d +c551d1ee : ld1w z14.d, p4/Z, [x15, z17.d] : ld1w (%x15,%z17.d)[16byte] %p4/z -> %z14.d +c553d630 : ld1w z16.d, p5/Z, [x17, z19.d] : ld1w (%x17,%z19.d)[16byte] %p5/z -> %z16.d +c554d671 : ld1w z17.d, p5/Z, [x19, z20.d] : ld1w (%x19,%z20.d)[16byte] %p5/z -> %z17.d +c556d6b3 : ld1w z19.d, p5/Z, [x21, z22.d] : ld1w (%x21,%z22.d)[16byte] %p5/z -> %z19.d +c558daf5 : ld1w z21.d, p6/Z, [x23, z24.d] : ld1w (%x23,%z24.d)[16byte] %p6/z -> %z21.d +c55adb17 : ld1w z23.d, p6/Z, [x24, z26.d] : ld1w (%x24,%z26.d)[16byte] %p6/z -> %z23.d +c55cdf59 : ld1w z25.d, p7/Z, [x26, z28.d] : ld1w (%x26,%z28.d)[16byte] %p7/z -> %z25.d +c55edf9b : ld1w z27.d, p7/Z, [x28, z30.d] : ld1w (%x28,%z30.d)[16byte] %p7/z -> %z27.d +c55fdfff : ld1w z31.d, p7/Z, [sp, z31.d] : ld1w (%sp,%z31.d)[16byte] %p7/z -> %z31.d + # LDFF1B { .H }, /Z, [{, }] (LDFF1B-Z.P.BR-U16) a4206000 : ldff1b z0.h, p0/Z, [x0, x0] : ldff1b (%x0,%x0)[16byte] %p0/z -> %z0.h a4256482 : ldff1b z2.h, p1/Z, [x4, x5] : ldff1b (%x4,%x5)[16byte] %p1/z -> %z2.h @@ -11414,6 +11630,42 @@ c5b9ff79 : ldff1d z25.d, p7/Z, [z27.d, #200] : ldff1d +0xc8(%z27.d)[32by c5bbffbb : ldff1d z27.d, p7/Z, [z29.d, #216] : ldff1d +0xd8(%z29.d)[32byte] %p7/z -> %z27.d c5bfffff : ldff1d z31.d, p7/Z, [z31.d, #248] : ldff1d +0xf8(%z31.d)[32byte] %p7/z -> %z31.d +# LDFF1D { .D }, /Z, [, .D, LSL #3] (LDFF1D-Z.P.BZ-D.64.scaled) +c5e0e000 : ldff1d z0.d, p0/Z, [x0, z0.d, LSL #3] : ldff1d (%x0,%z0.d,lsl #3)[32byte] %p0/z -> %z0.d +c5e5e482 : ldff1d z2.d, p1/Z, [x4, z5.d, LSL #3] : ldff1d (%x4,%z5.d,lsl #3)[32byte] %p1/z -> %z2.d +c5e7e8c4 : ldff1d z4.d, p2/Z, [x6, z7.d, LSL #3] : ldff1d (%x6,%z7.d,lsl #3)[32byte] %p2/z -> %z4.d +c5e9e906 : ldff1d z6.d, p2/Z, [x8, z9.d, LSL #3] : ldff1d (%x8,%z9.d,lsl #3)[32byte] %p2/z -> %z6.d +c5ebed48 : ldff1d z8.d, p3/Z, [x10, z11.d, LSL #3] : ldff1d (%x10,%z11.d,lsl #3)[32byte] %p3/z -> %z8.d +c5eded6a : ldff1d z10.d, p3/Z, [x11, z13.d, LSL #3] : ldff1d (%x11,%z13.d,lsl #3)[32byte] %p3/z -> %z10.d +c5eff1ac : ldff1d z12.d, p4/Z, [x13, z15.d, LSL #3] : ldff1d (%x13,%z15.d,lsl #3)[32byte] %p4/z -> %z12.d +c5f1f1ee : ldff1d z14.d, p4/Z, [x15, z17.d, LSL #3] : ldff1d (%x15,%z17.d,lsl #3)[32byte] %p4/z -> %z14.d +c5f3f630 : ldff1d z16.d, p5/Z, [x17, z19.d, LSL #3] : ldff1d (%x17,%z19.d,lsl #3)[32byte] %p5/z -> %z16.d +c5f4f671 : ldff1d z17.d, p5/Z, [x19, z20.d, LSL #3] : ldff1d (%x19,%z20.d,lsl #3)[32byte] %p5/z -> %z17.d +c5f6f6b3 : ldff1d z19.d, p5/Z, [x21, z22.d, LSL #3] : ldff1d (%x21,%z22.d,lsl #3)[32byte] %p5/z -> %z19.d +c5f8faf5 : ldff1d z21.d, p6/Z, [x23, z24.d, LSL #3] : ldff1d (%x23,%z24.d,lsl #3)[32byte] %p6/z -> %z21.d +c5fafb17 : ldff1d z23.d, p6/Z, [x24, z26.d, LSL #3] : ldff1d (%x24,%z26.d,lsl #3)[32byte] %p6/z -> %z23.d +c5fcff59 : ldff1d z25.d, p7/Z, [x26, z28.d, LSL #3] : ldff1d (%x26,%z28.d,lsl #3)[32byte] %p7/z -> %z25.d +c5feff9b : ldff1d z27.d, p7/Z, [x28, z30.d, LSL #3] : ldff1d (%x28,%z30.d,lsl #3)[32byte] %p7/z -> %z27.d +c5ffffff : ldff1d z31.d, p7/Z, [sp, z31.d, LSL #3] : ldff1d (%sp,%z31.d,lsl #3)[32byte] %p7/z -> %z31.d + +# LDFF1D { .D }, /Z, [, .D] (LDFF1D-Z.P.BZ-D.64.unscaled) +c5c0e000 : ldff1d z0.d, p0/Z, [x0, z0.d] : ldff1d (%x0,%z0.d)[32byte] %p0/z -> %z0.d +c5c5e482 : ldff1d z2.d, p1/Z, [x4, z5.d] : ldff1d (%x4,%z5.d)[32byte] %p1/z -> %z2.d +c5c7e8c4 : ldff1d z4.d, p2/Z, [x6, z7.d] : ldff1d (%x6,%z7.d)[32byte] %p2/z -> %z4.d +c5c9e906 : ldff1d z6.d, p2/Z, [x8, z9.d] : ldff1d (%x8,%z9.d)[32byte] %p2/z -> %z6.d +c5cbed48 : ldff1d z8.d, p3/Z, [x10, z11.d] : ldff1d (%x10,%z11.d)[32byte] %p3/z -> %z8.d +c5cded6a : ldff1d z10.d, p3/Z, [x11, z13.d] : ldff1d (%x11,%z13.d)[32byte] %p3/z -> %z10.d +c5cff1ac : ldff1d z12.d, p4/Z, [x13, z15.d] : ldff1d (%x13,%z15.d)[32byte] %p4/z -> %z12.d +c5d1f1ee : ldff1d z14.d, p4/Z, [x15, z17.d] : ldff1d (%x15,%z17.d)[32byte] %p4/z -> %z14.d +c5d3f630 : ldff1d z16.d, p5/Z, [x17, z19.d] : ldff1d (%x17,%z19.d)[32byte] %p5/z -> %z16.d +c5d4f671 : ldff1d z17.d, p5/Z, [x19, z20.d] : ldff1d (%x19,%z20.d)[32byte] %p5/z -> %z17.d +c5d6f6b3 : ldff1d z19.d, p5/Z, [x21, z22.d] : ldff1d (%x21,%z22.d)[32byte] %p5/z -> %z19.d +c5d8faf5 : ldff1d z21.d, p6/Z, [x23, z24.d] : ldff1d (%x23,%z24.d)[32byte] %p6/z -> %z21.d +c5dafb17 : ldff1d z23.d, p6/Z, [x24, z26.d] : ldff1d (%x24,%z26.d)[32byte] %p6/z -> %z23.d +c5dcff59 : ldff1d z25.d, p7/Z, [x26, z28.d] : ldff1d (%x26,%z28.d)[32byte] %p7/z -> %z25.d +c5deff9b : ldff1d z27.d, p7/Z, [x28, z30.d] : ldff1d (%x28,%z30.d)[32byte] %p7/z -> %z27.d +c5dfffff : ldff1d z31.d, p7/Z, [sp, z31.d] : ldff1d (%sp,%z31.d)[32byte] %p7/z -> %z31.d + # LDFF1H { .H }, /Z, [{, , LSL #1}] (LDFF1H-Z.P.BR-U16) a4a06000 : ldff1h z0.h, p0/Z, [x0, x0, LSL #1] : ldff1h (%x0,%x0,lsl #1)[32byte] %p0/z -> %z0.h a4a56482 : ldff1h z2.h, p1/Z, [x4, x5, LSL #1] : ldff1h (%x4,%x5,lsl #1)[32byte] %p1/z -> %z2.h @@ -11504,6 +11756,42 @@ c4b9ff79 : ldff1h z25.d, p7/Z, [z27.d, #50] : ldff1h +0x32(%z27.d)[8byt c4bbffbb : ldff1h z27.d, p7/Z, [z29.d, #54] : ldff1h +0x36(%z29.d)[8byte] %p7/z -> %z27.d c4bfffff : ldff1h z31.d, p7/Z, [z31.d, #62] : ldff1h +0x3e(%z31.d)[8byte] %p7/z -> %z31.d +# LDFF1H { .D }, /Z, [, .D, LSL #1] (LDFF1H-Z.P.BZ-D.64.scaled) +c4e0e000 : ldff1h z0.d, p0/Z, [x0, z0.d, LSL #1] : ldff1h (%x0,%z0.d,lsl #1)[8byte] %p0/z -> %z0.d +c4e5e482 : ldff1h z2.d, p1/Z, [x4, z5.d, LSL #1] : ldff1h (%x4,%z5.d,lsl #1)[8byte] %p1/z -> %z2.d +c4e7e8c4 : ldff1h z4.d, p2/Z, [x6, z7.d, LSL #1] : ldff1h (%x6,%z7.d,lsl #1)[8byte] %p2/z -> %z4.d +c4e9e906 : ldff1h z6.d, p2/Z, [x8, z9.d, LSL #1] : ldff1h (%x8,%z9.d,lsl #1)[8byte] %p2/z -> %z6.d +c4ebed48 : ldff1h z8.d, p3/Z, [x10, z11.d, LSL #1] : ldff1h (%x10,%z11.d,lsl #1)[8byte] %p3/z -> %z8.d +c4eded6a : ldff1h z10.d, p3/Z, [x11, z13.d, LSL #1] : ldff1h (%x11,%z13.d,lsl #1)[8byte] %p3/z -> %z10.d +c4eff1ac : ldff1h z12.d, p4/Z, [x13, z15.d, LSL #1] : ldff1h (%x13,%z15.d,lsl #1)[8byte] %p4/z -> %z12.d +c4f1f1ee : ldff1h z14.d, p4/Z, [x15, z17.d, LSL #1] : ldff1h (%x15,%z17.d,lsl #1)[8byte] %p4/z -> %z14.d +c4f3f630 : ldff1h z16.d, p5/Z, [x17, z19.d, LSL #1] : ldff1h (%x17,%z19.d,lsl #1)[8byte] %p5/z -> %z16.d +c4f4f671 : ldff1h z17.d, p5/Z, [x19, z20.d, LSL #1] : ldff1h (%x19,%z20.d,lsl #1)[8byte] %p5/z -> %z17.d +c4f6f6b3 : ldff1h z19.d, p5/Z, [x21, z22.d, LSL #1] : ldff1h (%x21,%z22.d,lsl #1)[8byte] %p5/z -> %z19.d +c4f8faf5 : ldff1h z21.d, p6/Z, [x23, z24.d, LSL #1] : ldff1h (%x23,%z24.d,lsl #1)[8byte] %p6/z -> %z21.d +c4fafb17 : ldff1h z23.d, p6/Z, [x24, z26.d, LSL #1] : ldff1h (%x24,%z26.d,lsl #1)[8byte] %p6/z -> %z23.d +c4fcff59 : ldff1h z25.d, p7/Z, [x26, z28.d, LSL #1] : ldff1h (%x26,%z28.d,lsl #1)[8byte] %p7/z -> %z25.d +c4feff9b : ldff1h z27.d, p7/Z, [x28, z30.d, LSL #1] : ldff1h (%x28,%z30.d,lsl #1)[8byte] %p7/z -> %z27.d +c4ffffff : ldff1h z31.d, p7/Z, [sp, z31.d, LSL #1] : ldff1h (%sp,%z31.d,lsl #1)[8byte] %p7/z -> %z31.d + +# LDFF1H { .D }, /Z, [, .D] (LDFF1H-Z.P.BZ-D.64.unscaled) +c4c0e000 : ldff1h z0.d, p0/Z, [x0, z0.d] : ldff1h (%x0,%z0.d)[8byte] %p0/z -> %z0.d +c4c5e482 : ldff1h z2.d, p1/Z, [x4, z5.d] : ldff1h (%x4,%z5.d)[8byte] %p1/z -> %z2.d +c4c7e8c4 : ldff1h z4.d, p2/Z, [x6, z7.d] : ldff1h (%x6,%z7.d)[8byte] %p2/z -> %z4.d +c4c9e906 : ldff1h z6.d, p2/Z, [x8, z9.d] : ldff1h (%x8,%z9.d)[8byte] %p2/z -> %z6.d +c4cbed48 : ldff1h z8.d, p3/Z, [x10, z11.d] : ldff1h (%x10,%z11.d)[8byte] %p3/z -> %z8.d +c4cded6a : ldff1h z10.d, p3/Z, [x11, z13.d] : ldff1h (%x11,%z13.d)[8byte] %p3/z -> %z10.d +c4cff1ac : ldff1h z12.d, p4/Z, [x13, z15.d] : ldff1h (%x13,%z15.d)[8byte] %p4/z -> %z12.d +c4d1f1ee : ldff1h z14.d, p4/Z, [x15, z17.d] : ldff1h (%x15,%z17.d)[8byte] %p4/z -> %z14.d +c4d3f630 : ldff1h z16.d, p5/Z, [x17, z19.d] : ldff1h (%x17,%z19.d)[8byte] %p5/z -> %z16.d +c4d4f671 : ldff1h z17.d, p5/Z, [x19, z20.d] : ldff1h (%x19,%z20.d)[8byte] %p5/z -> %z17.d +c4d6f6b3 : ldff1h z19.d, p5/Z, [x21, z22.d] : ldff1h (%x21,%z22.d)[8byte] %p5/z -> %z19.d +c4d8faf5 : ldff1h z21.d, p6/Z, [x23, z24.d] : ldff1h (%x23,%z24.d)[8byte] %p6/z -> %z21.d +c4dafb17 : ldff1h z23.d, p6/Z, [x24, z26.d] : ldff1h (%x24,%z26.d)[8byte] %p6/z -> %z23.d +c4dcff59 : ldff1h z25.d, p7/Z, [x26, z28.d] : ldff1h (%x26,%z28.d)[8byte] %p7/z -> %z25.d +c4deff9b : ldff1h z27.d, p7/Z, [x28, z30.d] : ldff1h (%x28,%z30.d)[8byte] %p7/z -> %z27.d +c4dfffff : ldff1h z31.d, p7/Z, [sp, z31.d] : ldff1h (%sp,%z31.d)[8byte] %p7/z -> %z31.d + # LDFF1SB { .H }, /Z, [{, }] (LDFF1SB-Z.P.BR-S16) a5c06000 : ldff1sb z0.h, p0/Z, [x0, x0] : ldff1sb (%x0,%x0)[16byte] %p0/z -> %z0.h a5c56482 : ldff1sb z2.h, p1/Z, [x4, x5] : ldff1sb (%x4,%x5)[16byte] %p1/z -> %z2.h @@ -11594,6 +11882,24 @@ c439bf79 : ldff1sb z25.d, p7/Z, [z27.d, #25] : ldff1sb +0x19(%z27.d)[4by c43bbfbb : ldff1sb z27.d, p7/Z, [z29.d, #27] : ldff1sb +0x1b(%z29.d)[4byte] %p7/z -> %z27.d c43fbfff : ldff1sb z31.d, p7/Z, [z31.d, #31] : ldff1sb +0x1f(%z31.d)[4byte] %p7/z -> %z31.d +# LDFF1SB { .D }, /Z, [, .D] (LDFF1SB-Z.P.BZ-D.64.unscaled) +c440a000 : ldff1sb z0.d, p0/Z, [x0, z0.d] : ldff1sb (%x0,%z0.d)[4byte] %p0/z -> %z0.d +c445a482 : ldff1sb z2.d, p1/Z, [x4, z5.d] : ldff1sb (%x4,%z5.d)[4byte] %p1/z -> %z2.d +c447a8c4 : ldff1sb z4.d, p2/Z, [x6, z7.d] : ldff1sb (%x6,%z7.d)[4byte] %p2/z -> %z4.d +c449a906 : ldff1sb z6.d, p2/Z, [x8, z9.d] : ldff1sb (%x8,%z9.d)[4byte] %p2/z -> %z6.d +c44bad48 : ldff1sb z8.d, p3/Z, [x10, z11.d] : ldff1sb (%x10,%z11.d)[4byte] %p3/z -> %z8.d +c44dad6a : ldff1sb z10.d, p3/Z, [x11, z13.d] : ldff1sb (%x11,%z13.d)[4byte] %p3/z -> %z10.d +c44fb1ac : ldff1sb z12.d, p4/Z, [x13, z15.d] : ldff1sb (%x13,%z15.d)[4byte] %p4/z -> %z12.d +c451b1ee : ldff1sb z14.d, p4/Z, [x15, z17.d] : ldff1sb (%x15,%z17.d)[4byte] %p4/z -> %z14.d +c453b630 : ldff1sb z16.d, p5/Z, [x17, z19.d] : ldff1sb (%x17,%z19.d)[4byte] %p5/z -> %z16.d +c454b671 : ldff1sb z17.d, p5/Z, [x19, z20.d] : ldff1sb (%x19,%z20.d)[4byte] %p5/z -> %z17.d +c456b6b3 : ldff1sb z19.d, p5/Z, [x21, z22.d] : ldff1sb (%x21,%z22.d)[4byte] %p5/z -> %z19.d +c458baf5 : ldff1sb z21.d, p6/Z, [x23, z24.d] : ldff1sb (%x23,%z24.d)[4byte] %p6/z -> %z21.d +c45abb17 : ldff1sb z23.d, p6/Z, [x24, z26.d] : ldff1sb (%x24,%z26.d)[4byte] %p6/z -> %z23.d +c45cbf59 : ldff1sb z25.d, p7/Z, [x26, z28.d] : ldff1sb (%x26,%z28.d)[4byte] %p7/z -> %z25.d +c45ebf9b : ldff1sb z27.d, p7/Z, [x28, z30.d] : ldff1sb (%x28,%z30.d)[4byte] %p7/z -> %z27.d +c45fbfff : ldff1sb z31.d, p7/Z, [sp, z31.d] : ldff1sb (%sp,%z31.d)[4byte] %p7/z -> %z31.d + # LDFF1SH { .S }, /Z, [{, , LSL #1}] (LDFF1SH-Z.P.BR-S32) a5206000 : ldff1sh z0.s, p0/Z, [x0, x0, LSL #1] : ldff1sh (%x0,%x0,lsl #1)[16byte] %p0/z -> %z0.s a5256482 : ldff1sh z2.s, p1/Z, [x4, x5, LSL #1] : ldff1sh (%x4,%x5,lsl #1)[16byte] %p1/z -> %z2.s @@ -11666,6 +11972,42 @@ c4b9bf79 : ldff1sh z25.d, p7/Z, [z27.d, #50] : ldff1sh +0x32(%z27.d)[8by c4bbbfbb : ldff1sh z27.d, p7/Z, [z29.d, #54] : ldff1sh +0x36(%z29.d)[8byte] %p7/z -> %z27.d c4bfbfff : ldff1sh z31.d, p7/Z, [z31.d, #62] : ldff1sh +0x3e(%z31.d)[8byte] %p7/z -> %z31.d +# LDFF1SH { .D }, /Z, [, .D, LSL #1] (LDFF1SH-Z.P.BZ-D.64.scaled) +c4e0a000 : ldff1sh z0.d, p0/Z, [x0, z0.d, LSL #1] : ldff1sh (%x0,%z0.d,lsl #1)[8byte] %p0/z -> %z0.d +c4e5a482 : ldff1sh z2.d, p1/Z, [x4, z5.d, LSL #1] : ldff1sh (%x4,%z5.d,lsl #1)[8byte] %p1/z -> %z2.d +c4e7a8c4 : ldff1sh z4.d, p2/Z, [x6, z7.d, LSL #1] : ldff1sh (%x6,%z7.d,lsl #1)[8byte] %p2/z -> %z4.d +c4e9a906 : ldff1sh z6.d, p2/Z, [x8, z9.d, LSL #1] : ldff1sh (%x8,%z9.d,lsl #1)[8byte] %p2/z -> %z6.d +c4ebad48 : ldff1sh z8.d, p3/Z, [x10, z11.d, LSL #1] : ldff1sh (%x10,%z11.d,lsl #1)[8byte] %p3/z -> %z8.d +c4edad6a : ldff1sh z10.d, p3/Z, [x11, z13.d, LSL #1] : ldff1sh (%x11,%z13.d,lsl #1)[8byte] %p3/z -> %z10.d +c4efb1ac : ldff1sh z12.d, p4/Z, [x13, z15.d, LSL #1] : ldff1sh (%x13,%z15.d,lsl #1)[8byte] %p4/z -> %z12.d +c4f1b1ee : ldff1sh z14.d, p4/Z, [x15, z17.d, LSL #1] : ldff1sh (%x15,%z17.d,lsl #1)[8byte] %p4/z -> %z14.d +c4f3b630 : ldff1sh z16.d, p5/Z, [x17, z19.d, LSL #1] : ldff1sh (%x17,%z19.d,lsl #1)[8byte] %p5/z -> %z16.d +c4f4b671 : ldff1sh z17.d, p5/Z, [x19, z20.d, LSL #1] : ldff1sh (%x19,%z20.d,lsl #1)[8byte] %p5/z -> %z17.d +c4f6b6b3 : ldff1sh z19.d, p5/Z, [x21, z22.d, LSL #1] : ldff1sh (%x21,%z22.d,lsl #1)[8byte] %p5/z -> %z19.d +c4f8baf5 : ldff1sh z21.d, p6/Z, [x23, z24.d, LSL #1] : ldff1sh (%x23,%z24.d,lsl #1)[8byte] %p6/z -> %z21.d +c4fabb17 : ldff1sh z23.d, p6/Z, [x24, z26.d, LSL #1] : ldff1sh (%x24,%z26.d,lsl #1)[8byte] %p6/z -> %z23.d +c4fcbf59 : ldff1sh z25.d, p7/Z, [x26, z28.d, LSL #1] : ldff1sh (%x26,%z28.d,lsl #1)[8byte] %p7/z -> %z25.d +c4febf9b : ldff1sh z27.d, p7/Z, [x28, z30.d, LSL #1] : ldff1sh (%x28,%z30.d,lsl #1)[8byte] %p7/z -> %z27.d +c4ffbfff : ldff1sh z31.d, p7/Z, [sp, z31.d, LSL #1] : ldff1sh (%sp,%z31.d,lsl #1)[8byte] %p7/z -> %z31.d + +# LDFF1SH { .D }, /Z, [, .D] (LDFF1SH-Z.P.BZ-D.64.unscaled) +c4c0a000 : ldff1sh z0.d, p0/Z, [x0, z0.d] : ldff1sh (%x0,%z0.d)[8byte] %p0/z -> %z0.d +c4c5a482 : ldff1sh z2.d, p1/Z, [x4, z5.d] : ldff1sh (%x4,%z5.d)[8byte] %p1/z -> %z2.d +c4c7a8c4 : ldff1sh z4.d, p2/Z, [x6, z7.d] : ldff1sh (%x6,%z7.d)[8byte] %p2/z -> %z4.d +c4c9a906 : ldff1sh z6.d, p2/Z, [x8, z9.d] : ldff1sh (%x8,%z9.d)[8byte] %p2/z -> %z6.d +c4cbad48 : ldff1sh z8.d, p3/Z, [x10, z11.d] : ldff1sh (%x10,%z11.d)[8byte] %p3/z -> %z8.d +c4cdad6a : ldff1sh z10.d, p3/Z, [x11, z13.d] : ldff1sh (%x11,%z13.d)[8byte] %p3/z -> %z10.d +c4cfb1ac : ldff1sh z12.d, p4/Z, [x13, z15.d] : ldff1sh (%x13,%z15.d)[8byte] %p4/z -> %z12.d +c4d1b1ee : ldff1sh z14.d, p4/Z, [x15, z17.d] : ldff1sh (%x15,%z17.d)[8byte] %p4/z -> %z14.d +c4d3b630 : ldff1sh z16.d, p5/Z, [x17, z19.d] : ldff1sh (%x17,%z19.d)[8byte] %p5/z -> %z16.d +c4d4b671 : ldff1sh z17.d, p5/Z, [x19, z20.d] : ldff1sh (%x19,%z20.d)[8byte] %p5/z -> %z17.d +c4d6b6b3 : ldff1sh z19.d, p5/Z, [x21, z22.d] : ldff1sh (%x21,%z22.d)[8byte] %p5/z -> %z19.d +c4d8baf5 : ldff1sh z21.d, p6/Z, [x23, z24.d] : ldff1sh (%x23,%z24.d)[8byte] %p6/z -> %z21.d +c4dabb17 : ldff1sh z23.d, p6/Z, [x24, z26.d] : ldff1sh (%x24,%z26.d)[8byte] %p6/z -> %z23.d +c4dcbf59 : ldff1sh z25.d, p7/Z, [x26, z28.d] : ldff1sh (%x26,%z28.d)[8byte] %p7/z -> %z25.d +c4debf9b : ldff1sh z27.d, p7/Z, [x28, z30.d] : ldff1sh (%x28,%z30.d)[8byte] %p7/z -> %z27.d +c4dfbfff : ldff1sh z31.d, p7/Z, [sp, z31.d] : ldff1sh (%sp,%z31.d)[8byte] %p7/z -> %z31.d + # LDFF1SW { .D }, /Z, [{, , LSL #2}] (LDFF1SW-Z.P.BR-S64) a4806000 : ldff1sw z0.d, p0/Z, [x0, x0, LSL #2] : ldff1sw (%x0,%x0,lsl #2)[16byte] %p0/z -> %z0.d a4856482 : ldff1sw z2.d, p1/Z, [x4, x5, LSL #2] : ldff1sw (%x4,%x5,lsl #2)[16byte] %p1/z -> %z2.d @@ -11702,6 +12044,42 @@ c539bf79 : ldff1sw z25.d, p7/Z, [z27.d, #100] : ldff1sw +0x64(%z27.d)[16b c53bbfbb : ldff1sw z27.d, p7/Z, [z29.d, #108] : ldff1sw +0x6c(%z29.d)[16byte] %p7/z -> %z27.d c53fbfff : ldff1sw z31.d, p7/Z, [z31.d, #124] : ldff1sw +0x7c(%z31.d)[16byte] %p7/z -> %z31.d +# LDFF1SW { .D }, /Z, [, .D, LSL #2] (LDFF1SW-Z.P.BZ-D.64.scaled) +c560a000 : ldff1sw z0.d, p0/Z, [x0, z0.d, LSL #2] : ldff1sw (%x0,%z0.d,lsl #2)[16byte] %p0/z -> %z0.d +c565a482 : ldff1sw z2.d, p1/Z, [x4, z5.d, LSL #2] : ldff1sw (%x4,%z5.d,lsl #2)[16byte] %p1/z -> %z2.d +c567a8c4 : ldff1sw z4.d, p2/Z, [x6, z7.d, LSL #2] : ldff1sw (%x6,%z7.d,lsl #2)[16byte] %p2/z -> %z4.d +c569a906 : ldff1sw z6.d, p2/Z, [x8, z9.d, LSL #2] : ldff1sw (%x8,%z9.d,lsl #2)[16byte] %p2/z -> %z6.d +c56bad48 : ldff1sw z8.d, p3/Z, [x10, z11.d, LSL #2] : ldff1sw (%x10,%z11.d,lsl #2)[16byte] %p3/z -> %z8.d +c56dad6a : ldff1sw z10.d, p3/Z, [x11, z13.d, LSL #2] : ldff1sw (%x11,%z13.d,lsl #2)[16byte] %p3/z -> %z10.d +c56fb1ac : ldff1sw z12.d, p4/Z, [x13, z15.d, LSL #2] : ldff1sw (%x13,%z15.d,lsl #2)[16byte] %p4/z -> %z12.d +c571b1ee : ldff1sw z14.d, p4/Z, [x15, z17.d, LSL #2] : ldff1sw (%x15,%z17.d,lsl #2)[16byte] %p4/z -> %z14.d +c573b630 : ldff1sw z16.d, p5/Z, [x17, z19.d, LSL #2] : ldff1sw (%x17,%z19.d,lsl #2)[16byte] %p5/z -> %z16.d +c574b671 : ldff1sw z17.d, p5/Z, [x19, z20.d, LSL #2] : ldff1sw (%x19,%z20.d,lsl #2)[16byte] %p5/z -> %z17.d +c576b6b3 : ldff1sw z19.d, p5/Z, [x21, z22.d, LSL #2] : ldff1sw (%x21,%z22.d,lsl #2)[16byte] %p5/z -> %z19.d +c578baf5 : ldff1sw z21.d, p6/Z, [x23, z24.d, LSL #2] : ldff1sw (%x23,%z24.d,lsl #2)[16byte] %p6/z -> %z21.d +c57abb17 : ldff1sw z23.d, p6/Z, [x24, z26.d, LSL #2] : ldff1sw (%x24,%z26.d,lsl #2)[16byte] %p6/z -> %z23.d +c57cbf59 : ldff1sw z25.d, p7/Z, [x26, z28.d, LSL #2] : ldff1sw (%x26,%z28.d,lsl #2)[16byte] %p7/z -> %z25.d +c57ebf9b : ldff1sw z27.d, p7/Z, [x28, z30.d, LSL #2] : ldff1sw (%x28,%z30.d,lsl #2)[16byte] %p7/z -> %z27.d +c57fbfff : ldff1sw z31.d, p7/Z, [sp, z31.d, LSL #2] : ldff1sw (%sp,%z31.d,lsl #2)[16byte] %p7/z -> %z31.d + +# LDFF1SW { .D }, /Z, [, .D] (LDFF1SW-Z.P.BZ-D.64.unscaled) +c540a000 : ldff1sw z0.d, p0/Z, [x0, z0.d] : ldff1sw (%x0,%z0.d)[16byte] %p0/z -> %z0.d +c545a482 : ldff1sw z2.d, p1/Z, [x4, z5.d] : ldff1sw (%x4,%z5.d)[16byte] %p1/z -> %z2.d +c547a8c4 : ldff1sw z4.d, p2/Z, [x6, z7.d] : ldff1sw (%x6,%z7.d)[16byte] %p2/z -> %z4.d +c549a906 : ldff1sw z6.d, p2/Z, [x8, z9.d] : ldff1sw (%x8,%z9.d)[16byte] %p2/z -> %z6.d +c54bad48 : ldff1sw z8.d, p3/Z, [x10, z11.d] : ldff1sw (%x10,%z11.d)[16byte] %p3/z -> %z8.d +c54dad6a : ldff1sw z10.d, p3/Z, [x11, z13.d] : ldff1sw (%x11,%z13.d)[16byte] %p3/z -> %z10.d +c54fb1ac : ldff1sw z12.d, p4/Z, [x13, z15.d] : ldff1sw (%x13,%z15.d)[16byte] %p4/z -> %z12.d +c551b1ee : ldff1sw z14.d, p4/Z, [x15, z17.d] : ldff1sw (%x15,%z17.d)[16byte] %p4/z -> %z14.d +c553b630 : ldff1sw z16.d, p5/Z, [x17, z19.d] : ldff1sw (%x17,%z19.d)[16byte] %p5/z -> %z16.d +c554b671 : ldff1sw z17.d, p5/Z, [x19, z20.d] : ldff1sw (%x19,%z20.d)[16byte] %p5/z -> %z17.d +c556b6b3 : ldff1sw z19.d, p5/Z, [x21, z22.d] : ldff1sw (%x21,%z22.d)[16byte] %p5/z -> %z19.d +c558baf5 : ldff1sw z21.d, p6/Z, [x23, z24.d] : ldff1sw (%x23,%z24.d)[16byte] %p6/z -> %z21.d +c55abb17 : ldff1sw z23.d, p6/Z, [x24, z26.d] : ldff1sw (%x24,%z26.d)[16byte] %p6/z -> %z23.d +c55cbf59 : ldff1sw z25.d, p7/Z, [x26, z28.d] : ldff1sw (%x26,%z28.d)[16byte] %p7/z -> %z25.d +c55ebf9b : ldff1sw z27.d, p7/Z, [x28, z30.d] : ldff1sw (%x28,%z30.d)[16byte] %p7/z -> %z27.d +c55fbfff : ldff1sw z31.d, p7/Z, [sp, z31.d] : ldff1sw (%sp,%z31.d)[16byte] %p7/z -> %z31.d + # LDFF1W { .S }, /Z, [{, , LSL #2}] (LDFF1W-Z.P.BR-U32) a5406000 : ldff1w z0.s, p0/Z, [x0, x0, LSL #2] : ldff1w (%x0,%x0,lsl #2)[32byte] %p0/z -> %z0.s a5456482 : ldff1w z2.s, p1/Z, [x4, x5, LSL #2] : ldff1w (%x4,%x5,lsl #2)[32byte] %p1/z -> %z2.s @@ -11774,6 +12152,42 @@ c539ff79 : ldff1w z25.d, p7/Z, [z27.d, #100] : ldff1w +0x64(%z27.d)[16by c53bffbb : ldff1w z27.d, p7/Z, [z29.d, #108] : ldff1w +0x6c(%z29.d)[16byte] %p7/z -> %z27.d c53fffff : ldff1w z31.d, p7/Z, [z31.d, #124] : ldff1w +0x7c(%z31.d)[16byte] %p7/z -> %z31.d +# LDFF1W { .D }, /Z, [, .D, LSL #2] (LDFF1W-Z.P.BZ-D.64.scaled) +c560e000 : ldff1w z0.d, p0/Z, [x0, z0.d, LSL #2] : ldff1w (%x0,%z0.d,lsl #2)[16byte] %p0/z -> %z0.d +c565e482 : ldff1w z2.d, p1/Z, [x4, z5.d, LSL #2] : ldff1w (%x4,%z5.d,lsl #2)[16byte] %p1/z -> %z2.d +c567e8c4 : ldff1w z4.d, p2/Z, [x6, z7.d, LSL #2] : ldff1w (%x6,%z7.d,lsl #2)[16byte] %p2/z -> %z4.d +c569e906 : ldff1w z6.d, p2/Z, [x8, z9.d, LSL #2] : ldff1w (%x8,%z9.d,lsl #2)[16byte] %p2/z -> %z6.d +c56bed48 : ldff1w z8.d, p3/Z, [x10, z11.d, LSL #2] : ldff1w (%x10,%z11.d,lsl #2)[16byte] %p3/z -> %z8.d +c56ded6a : ldff1w z10.d, p3/Z, [x11, z13.d, LSL #2] : ldff1w (%x11,%z13.d,lsl #2)[16byte] %p3/z -> %z10.d +c56ff1ac : ldff1w z12.d, p4/Z, [x13, z15.d, LSL #2] : ldff1w (%x13,%z15.d,lsl #2)[16byte] %p4/z -> %z12.d +c571f1ee : ldff1w z14.d, p4/Z, [x15, z17.d, LSL #2] : ldff1w (%x15,%z17.d,lsl #2)[16byte] %p4/z -> %z14.d +c573f630 : ldff1w z16.d, p5/Z, [x17, z19.d, LSL #2] : ldff1w (%x17,%z19.d,lsl #2)[16byte] %p5/z -> %z16.d +c574f671 : ldff1w z17.d, p5/Z, [x19, z20.d, LSL #2] : ldff1w (%x19,%z20.d,lsl #2)[16byte] %p5/z -> %z17.d +c576f6b3 : ldff1w z19.d, p5/Z, [x21, z22.d, LSL #2] : ldff1w (%x21,%z22.d,lsl #2)[16byte] %p5/z -> %z19.d +c578faf5 : ldff1w z21.d, p6/Z, [x23, z24.d, LSL #2] : ldff1w (%x23,%z24.d,lsl #2)[16byte] %p6/z -> %z21.d +c57afb17 : ldff1w z23.d, p6/Z, [x24, z26.d, LSL #2] : ldff1w (%x24,%z26.d,lsl #2)[16byte] %p6/z -> %z23.d +c57cff59 : ldff1w z25.d, p7/Z, [x26, z28.d, LSL #2] : ldff1w (%x26,%z28.d,lsl #2)[16byte] %p7/z -> %z25.d +c57eff9b : ldff1w z27.d, p7/Z, [x28, z30.d, LSL #2] : ldff1w (%x28,%z30.d,lsl #2)[16byte] %p7/z -> %z27.d +c57fffff : ldff1w z31.d, p7/Z, [sp, z31.d, LSL #2] : ldff1w (%sp,%z31.d,lsl #2)[16byte] %p7/z -> %z31.d + +# LDFF1W { .D }, /Z, [, .D] (LDFF1W-Z.P.BZ-D.64.unscaled) +c540e000 : ldff1w z0.d, p0/Z, [x0, z0.d] : ldff1w (%x0,%z0.d)[16byte] %p0/z -> %z0.d +c545e482 : ldff1w z2.d, p1/Z, [x4, z5.d] : ldff1w (%x4,%z5.d)[16byte] %p1/z -> %z2.d +c547e8c4 : ldff1w z4.d, p2/Z, [x6, z7.d] : ldff1w (%x6,%z7.d)[16byte] %p2/z -> %z4.d +c549e906 : ldff1w z6.d, p2/Z, [x8, z9.d] : ldff1w (%x8,%z9.d)[16byte] %p2/z -> %z6.d +c54bed48 : ldff1w z8.d, p3/Z, [x10, z11.d] : ldff1w (%x10,%z11.d)[16byte] %p3/z -> %z8.d +c54ded6a : ldff1w z10.d, p3/Z, [x11, z13.d] : ldff1w (%x11,%z13.d)[16byte] %p3/z -> %z10.d +c54ff1ac : ldff1w z12.d, p4/Z, [x13, z15.d] : ldff1w (%x13,%z15.d)[16byte] %p4/z -> %z12.d +c551f1ee : ldff1w z14.d, p4/Z, [x15, z17.d] : ldff1w (%x15,%z17.d)[16byte] %p4/z -> %z14.d +c553f630 : ldff1w z16.d, p5/Z, [x17, z19.d] : ldff1w (%x17,%z19.d)[16byte] %p5/z -> %z16.d +c554f671 : ldff1w z17.d, p5/Z, [x19, z20.d] : ldff1w (%x19,%z20.d)[16byte] %p5/z -> %z17.d +c556f6b3 : ldff1w z19.d, p5/Z, [x21, z22.d] : ldff1w (%x21,%z22.d)[16byte] %p5/z -> %z19.d +c558faf5 : ldff1w z21.d, p6/Z, [x23, z24.d] : ldff1w (%x23,%z24.d)[16byte] %p6/z -> %z21.d +c55afb17 : ldff1w z23.d, p6/Z, [x24, z26.d] : ldff1w (%x24,%z26.d)[16byte] %p6/z -> %z23.d +c55cff59 : ldff1w z25.d, p7/Z, [x26, z28.d] : ldff1w (%x26,%z28.d)[16byte] %p7/z -> %z25.d +c55eff9b : ldff1w z27.d, p7/Z, [x28, z30.d] : ldff1w (%x28,%z30.d)[16byte] %p7/z -> %z27.d +c55fffff : ldff1w z31.d, p7/Z, [sp, z31.d] : ldff1w (%sp,%z31.d)[16byte] %p7/z -> %z31.d + # LDNT1B { .B }, /Z, [, ] (LDNT1B-Z.P.BR-Contiguous) a400c000 : ldnt1b z0.b, p0/Z, [x0, x0] : ldnt1b (%x0,%x0)[32byte] %p0/z -> %z0.b a405c482 : ldnt1b z2.b, p1/Z, [x4, x5] : ldnt1b (%x4,%x5)[32byte] %p1/z -> %z2.b @@ -13710,60 +14124,6 @@ a41edfff : ldnt1b z31.b, p7/Z, [sp, x30] : ldnt1b (%sp,%x30)[32byte] 85d71f8e : prfb 14, p7, [x28, #23, MUL VL] : prfb $0x0e %p7 +0x17(%x28) 85df1fef : prfb 15, p7, [sp, #31, MUL VL] : prfb $0x0f %p7 +0x1f(%sp) -# PRFD , , [{, #, MUL VL}] (PRFD-I.P.BI-S) -85e06000 : prfd PLDL1KEEP, p0, [x0, #-32, MUL VL] : prfd $0x00 %p0 -0x20(%x0) -85e46481 : prfd PLDL1STRM, p1, [x4, #-28, MUL VL] : prfd $0x01 %p1 -0x1c(%x4) -85e868c2 : prfd PLDL2KEEP, p2, [x6, #-24, MUL VL] : prfd $0x02 %p2 -0x18(%x6) -85ec6903 : prfd PLDL2STRM, p2, [x8, #-20, MUL VL] : prfd $0x03 %p2 -0x14(%x8) -85f06d44 : prfd PLDL3KEEP, p3, [x10, #-16, MUL VL] : prfd $0x04 %p3 -0x10(%x10) -85f46d65 : prfd PLDL3STRM, p3, [x11, #-12, MUL VL] : prfd $0x05 %p3 -0x0c(%x11) -85f871a6 : prfd 6, p4, [x13, #-8, MUL VL] : prfd $0x06 %p4 -0x08(%x13) -85fc71e7 : prfd 7, p4, [x15, #-4, MUL VL] : prfd $0x07 %p4 -0x04(%x15) -85c07628 : prfd PSTL1KEEP, p5, [x17, #0, MUL VL] : prfd $0x08 %p5 (%x17) -85c37669 : prfd PSTL1STRM, p5, [x19, #3, MUL VL] : prfd $0x09 %p5 +0x03(%x19) -85c776aa : prfd PSTL2KEEP, p5, [x21, #7, MUL VL] : prfd $0x0a %p5 +0x07(%x21) -85cb7aeb : prfd PSTL2STRM, p6, [x23, #11, MUL VL] : prfd $0x0b %p6 +0x0b(%x23) -85cf7b0c : prfd PSTL3KEEP, p6, [x24, #15, MUL VL] : prfd $0x0c %p6 +0x0f(%x24) -85d37f4d : prfd PSTL3STRM, p7, [x26, #19, MUL VL] : prfd $0x0d %p7 +0x13(%x26) -85d77f8e : prfd 14, p7, [x28, #23, MUL VL] : prfd $0x0e %p7 +0x17(%x28) -85df7fef : prfd 15, p7, [sp, #31, MUL VL] : prfd $0x0f %p7 +0x1f(%sp) - -# PRFH , , [{, #, MUL VL}] (PRFH-I.P.BI-S) -85e02000 : prfh PLDL1KEEP, p0, [x0, #-32, MUL VL] : prfh $0x00 %p0 -0x20(%x0) -85e42481 : prfh PLDL1STRM, p1, [x4, #-28, MUL VL] : prfh $0x01 %p1 -0x1c(%x4) -85e828c2 : prfh PLDL2KEEP, p2, [x6, #-24, MUL VL] : prfh $0x02 %p2 -0x18(%x6) -85ec2903 : prfh PLDL2STRM, p2, [x8, #-20, MUL VL] : prfh $0x03 %p2 -0x14(%x8) -85f02d44 : prfh PLDL3KEEP, p3, [x10, #-16, MUL VL] : prfh $0x04 %p3 -0x10(%x10) -85f42d65 : prfh PLDL3STRM, p3, [x11, #-12, MUL VL] : prfh $0x05 %p3 -0x0c(%x11) -85f831a6 : prfh 6, p4, [x13, #-8, MUL VL] : prfh $0x06 %p4 -0x08(%x13) -85fc31e7 : prfh 7, p4, [x15, #-4, MUL VL] : prfh $0x07 %p4 -0x04(%x15) -85c03628 : prfh PSTL1KEEP, p5, [x17, #0, MUL VL] : prfh $0x08 %p5 (%x17) -85c33669 : prfh PSTL1STRM, p5, [x19, #3, MUL VL] : prfh $0x09 %p5 +0x03(%x19) -85c736aa : prfh PSTL2KEEP, p5, [x21, #7, MUL VL] : prfh $0x0a %p5 +0x07(%x21) -85cb3aeb : prfh PSTL2STRM, p6, [x23, #11, MUL VL] : prfh $0x0b %p6 +0x0b(%x23) -85cf3b0c : prfh PSTL3KEEP, p6, [x24, #15, MUL VL] : prfh $0x0c %p6 +0x0f(%x24) -85d33f4d : prfh PSTL3STRM, p7, [x26, #19, MUL VL] : prfh $0x0d %p7 +0x13(%x26) -85d73f8e : prfh 14, p7, [x28, #23, MUL VL] : prfh $0x0e %p7 +0x17(%x28) -85df3fef : prfh 15, p7, [sp, #31, MUL VL] : prfh $0x0f %p7 +0x1f(%sp) - -# PRFW , , [{, #, MUL VL}] (PRFW-I.P.BI-S) -85e04000 : prfw PLDL1KEEP, p0, [x0, #-32, MUL VL] : prfw $0x00 %p0 -0x20(%x0) -85e44481 : prfw PLDL1STRM, p1, [x4, #-28, MUL VL] : prfw $0x01 %p1 -0x1c(%x4) -85e848c2 : prfw PLDL2KEEP, p2, [x6, #-24, MUL VL] : prfw $0x02 %p2 -0x18(%x6) -85ec4903 : prfw PLDL2STRM, p2, [x8, #-20, MUL VL] : prfw $0x03 %p2 -0x14(%x8) -85f04d44 : prfw PLDL3KEEP, p3, [x10, #-16, MUL VL] : prfw $0x04 %p3 -0x10(%x10) -85f44d65 : prfw PLDL3STRM, p3, [x11, #-12, MUL VL] : prfw $0x05 %p3 -0x0c(%x11) -85f851a6 : prfw 6, p4, [x13, #-8, MUL VL] : prfw $0x06 %p4 -0x08(%x13) -85fc51e7 : prfw 7, p4, [x15, #-4, MUL VL] : prfw $0x07 %p4 -0x04(%x15) -85c05628 : prfw PSTL1KEEP, p5, [x17, #0, MUL VL] : prfw $0x08 %p5 (%x17) -85c35669 : prfw PSTL1STRM, p5, [x19, #3, MUL VL] : prfw $0x09 %p5 +0x03(%x19) -85c756aa : prfw PSTL2KEEP, p5, [x21, #7, MUL VL] : prfw $0x0a %p5 +0x07(%x21) -85cb5aeb : prfw PSTL2STRM, p6, [x23, #11, MUL VL] : prfw $0x0b %p6 +0x0b(%x23) -85cf5b0c : prfw PSTL3KEEP, p6, [x24, #15, MUL VL] : prfw $0x0c %p6 +0x0f(%x24) -85d35f4d : prfw PSTL3STRM, p7, [x26, #19, MUL VL] : prfw $0x0d %p7 +0x13(%x26) -85d75f8e : prfw 14, p7, [x28, #23, MUL VL] : prfw $0x0e %p7 +0x17(%x28) -85df5fef : prfw 15, p7, [sp, #31, MUL VL] : prfw $0x0f %p7 +0x1f(%sp) - # PRFB , , [.S{, #}] (PRFB-I.P.AI-S) 8400e000 : prfb PLDL1KEEP, p0, [z0.s, #0] : prfb $0x00 %p0 (%z0.s) 8402e481 : prfb PLDL1STRM, p1, [z4.s, #2] : prfb $0x01 %p1 +0x02(%z4.s) @@ -13800,6 +14160,42 @@ c419ff6d : prfb PSTL3STRM, p7, [z27.d, #25] : prfb $0x0d %p7 +0x19(%z c41bffae : prfb 14, p7, [z29.d, #27] : prfb $0x0e %p7 +0x1b(%z29.d) c41fffef : prfb 15, p7, [z31.d, #31] : prfb $0x0f %p7 +0x1f(%z31.d) +# PRFB , , [, .D] (PRFB-I.P.BZ-D.64.scaled) +c4608000 : prfb PLDL1KEEP, p0, [x0, z0.d] : prfb $0x00 %p0 (%x0,%z0.d) +c4658481 : prfb PLDL1STRM, p1, [x4, z5.d] : prfb $0x01 %p1 (%x4,%z5.d) +c46788c2 : prfb PLDL2KEEP, p2, [x6, z7.d] : prfb $0x02 %p2 (%x6,%z7.d) +c4698903 : prfb PLDL2STRM, p2, [x8, z9.d] : prfb $0x03 %p2 (%x8,%z9.d) +c46b8d44 : prfb PLDL3KEEP, p3, [x10, z11.d] : prfb $0x04 %p3 (%x10,%z11.d) +c46d8d65 : prfb PLDL3STRM, p3, [x11, z13.d] : prfb $0x05 %p3 (%x11,%z13.d) +c46f91a6 : prfb 6, p4, [x13, z15.d] : prfb $0x06 %p4 (%x13,%z15.d) +c47191e7 : prfb 7, p4, [x15, z17.d] : prfb $0x07 %p4 (%x15,%z17.d) +c4739628 : prfb PSTL1KEEP, p5, [x17, z19.d] : prfb $0x08 %p5 (%x17,%z19.d) +c4749669 : prfb PSTL1STRM, p5, [x19, z20.d] : prfb $0x09 %p5 (%x19,%z20.d) +c47696aa : prfb PSTL2KEEP, p5, [x21, z22.d] : prfb $0x0a %p5 (%x21,%z22.d) +c4789aeb : prfb PSTL2STRM, p6, [x23, z24.d] : prfb $0x0b %p6 (%x23,%z24.d) +c47a9b0c : prfb PSTL3KEEP, p6, [x24, z26.d] : prfb $0x0c %p6 (%x24,%z26.d) +c47c9f4d : prfb PSTL3STRM, p7, [x26, z28.d] : prfb $0x0d %p7 (%x26,%z28.d) +c47e9f8e : prfb 14, p7, [x28, z30.d] : prfb $0x0e %p7 (%x28,%z30.d) +c47f9fef : prfb 15, p7, [sp, z31.d] : prfb $0x0f %p7 (%sp,%z31.d) + +# PRFD , , [{, #, MUL VL}] (PRFD-I.P.BI-S) +85e06000 : prfd PLDL1KEEP, p0, [x0, #-32, MUL VL] : prfd $0x00 %p0 -0x20(%x0) +85e46481 : prfd PLDL1STRM, p1, [x4, #-28, MUL VL] : prfd $0x01 %p1 -0x1c(%x4) +85e868c2 : prfd PLDL2KEEP, p2, [x6, #-24, MUL VL] : prfd $0x02 %p2 -0x18(%x6) +85ec6903 : prfd PLDL2STRM, p2, [x8, #-20, MUL VL] : prfd $0x03 %p2 -0x14(%x8) +85f06d44 : prfd PLDL3KEEP, p3, [x10, #-16, MUL VL] : prfd $0x04 %p3 -0x10(%x10) +85f46d65 : prfd PLDL3STRM, p3, [x11, #-12, MUL VL] : prfd $0x05 %p3 -0x0c(%x11) +85f871a6 : prfd 6, p4, [x13, #-8, MUL VL] : prfd $0x06 %p4 -0x08(%x13) +85fc71e7 : prfd 7, p4, [x15, #-4, MUL VL] : prfd $0x07 %p4 -0x04(%x15) +85c07628 : prfd PSTL1KEEP, p5, [x17, #0, MUL VL] : prfd $0x08 %p5 (%x17) +85c37669 : prfd PSTL1STRM, p5, [x19, #3, MUL VL] : prfd $0x09 %p5 +0x03(%x19) +85c776aa : prfd PSTL2KEEP, p5, [x21, #7, MUL VL] : prfd $0x0a %p5 +0x07(%x21) +85cb7aeb : prfd PSTL2STRM, p6, [x23, #11, MUL VL] : prfd $0x0b %p6 +0x0b(%x23) +85cf7b0c : prfd PSTL3KEEP, p6, [x24, #15, MUL VL] : prfd $0x0c %p6 +0x0f(%x24) +85d37f4d : prfd PSTL3STRM, p7, [x26, #19, MUL VL] : prfd $0x0d %p7 +0x13(%x26) +85d77f8e : prfd 14, p7, [x28, #23, MUL VL] : prfd $0x0e %p7 +0x17(%x28) +85df7fef : prfd 15, p7, [sp, #31, MUL VL] : prfd $0x0f %p7 +0x1f(%sp) + # PRFD , , [.S{, #}] (PRFD-I.P.AI-S) 8580e000 : prfd PLDL1KEEP, p0, [z0.s, #0] : prfd $0x00 %p0 (%z0.s) 8582e481 : prfd PLDL1STRM, p1, [z4.s, #16] : prfd $0x01 %p1 +0x10(%z4.s) @@ -13836,6 +14232,42 @@ c599ff6d : prfd PSTL3STRM, p7, [z27.d, #200] : prfd $0x0d %p7 +0xc8(%z c59bffae : prfd 14, p7, [z29.d, #216] : prfd $0x0e %p7 +0xd8(%z29.d) c59fffef : prfd 15, p7, [z31.d, #248] : prfd $0x0f %p7 +0xf8(%z31.d) +# PRFD , , [, .D, LSL #3] (PRFD-I.P.BZ-D.64.scaled) +c460e000 : prfd PLDL1KEEP, p0, [x0, z0.d, LSL #3] : prfd $0x00 %p0 (%x0,%z0.d,lsl #3) +c465e481 : prfd PLDL1STRM, p1, [x4, z5.d, LSL #3] : prfd $0x01 %p1 (%x4,%z5.d,lsl #3) +c467e8c2 : prfd PLDL2KEEP, p2, [x6, z7.d, LSL #3] : prfd $0x02 %p2 (%x6,%z7.d,lsl #3) +c469e903 : prfd PLDL2STRM, p2, [x8, z9.d, LSL #3] : prfd $0x03 %p2 (%x8,%z9.d,lsl #3) +c46bed44 : prfd PLDL3KEEP, p3, [x10, z11.d, LSL #3] : prfd $0x04 %p3 (%x10,%z11.d,lsl #3) +c46ded65 : prfd PLDL3STRM, p3, [x11, z13.d, LSL #3] : prfd $0x05 %p3 (%x11,%z13.d,lsl #3) +c46ff1a6 : prfd 6, p4, [x13, z15.d, LSL #3] : prfd $0x06 %p4 (%x13,%z15.d,lsl #3) +c471f1e7 : prfd 7, p4, [x15, z17.d, LSL #3] : prfd $0x07 %p4 (%x15,%z17.d,lsl #3) +c473f628 : prfd PSTL1KEEP, p5, [x17, z19.d, LSL #3] : prfd $0x08 %p5 (%x17,%z19.d,lsl #3) +c474f669 : prfd PSTL1STRM, p5, [x19, z20.d, LSL #3] : prfd $0x09 %p5 (%x19,%z20.d,lsl #3) +c476f6aa : prfd PSTL2KEEP, p5, [x21, z22.d, LSL #3] : prfd $0x0a %p5 (%x21,%z22.d,lsl #3) +c478faeb : prfd PSTL2STRM, p6, [x23, z24.d, LSL #3] : prfd $0x0b %p6 (%x23,%z24.d,lsl #3) +c47afb0c : prfd PSTL3KEEP, p6, [x24, z26.d, LSL #3] : prfd $0x0c %p6 (%x24,%z26.d,lsl #3) +c47cff4d : prfd PSTL3STRM, p7, [x26, z28.d, LSL #3] : prfd $0x0d %p7 (%x26,%z28.d,lsl #3) +c47eff8e : prfd 14, p7, [x28, z30.d, LSL #3] : prfd $0x0e %p7 (%x28,%z30.d,lsl #3) +c47fffef : prfd 15, p7, [sp, z31.d, LSL #3] : prfd $0x0f %p7 (%sp,%z31.d,lsl #3) + +# PRFH , , [{, #, MUL VL}] (PRFH-I.P.BI-S) +85e02000 : prfh PLDL1KEEP, p0, [x0, #-32, MUL VL] : prfh $0x00 %p0 -0x20(%x0) +85e42481 : prfh PLDL1STRM, p1, [x4, #-28, MUL VL] : prfh $0x01 %p1 -0x1c(%x4) +85e828c2 : prfh PLDL2KEEP, p2, [x6, #-24, MUL VL] : prfh $0x02 %p2 -0x18(%x6) +85ec2903 : prfh PLDL2STRM, p2, [x8, #-20, MUL VL] : prfh $0x03 %p2 -0x14(%x8) +85f02d44 : prfh PLDL3KEEP, p3, [x10, #-16, MUL VL] : prfh $0x04 %p3 -0x10(%x10) +85f42d65 : prfh PLDL3STRM, p3, [x11, #-12, MUL VL] : prfh $0x05 %p3 -0x0c(%x11) +85f831a6 : prfh 6, p4, [x13, #-8, MUL VL] : prfh $0x06 %p4 -0x08(%x13) +85fc31e7 : prfh 7, p4, [x15, #-4, MUL VL] : prfh $0x07 %p4 -0x04(%x15) +85c03628 : prfh PSTL1KEEP, p5, [x17, #0, MUL VL] : prfh $0x08 %p5 (%x17) +85c33669 : prfh PSTL1STRM, p5, [x19, #3, MUL VL] : prfh $0x09 %p5 +0x03(%x19) +85c736aa : prfh PSTL2KEEP, p5, [x21, #7, MUL VL] : prfh $0x0a %p5 +0x07(%x21) +85cb3aeb : prfh PSTL2STRM, p6, [x23, #11, MUL VL] : prfh $0x0b %p6 +0x0b(%x23) +85cf3b0c : prfh PSTL3KEEP, p6, [x24, #15, MUL VL] : prfh $0x0c %p6 +0x0f(%x24) +85d33f4d : prfh PSTL3STRM, p7, [x26, #19, MUL VL] : prfh $0x0d %p7 +0x13(%x26) +85d73f8e : prfh 14, p7, [x28, #23, MUL VL] : prfh $0x0e %p7 +0x17(%x28) +85df3fef : prfh 15, p7, [sp, #31, MUL VL] : prfh $0x0f %p7 +0x1f(%sp) + # PRFH , , [.S{, #}] (PRFH-I.P.AI-S) 8480e000 : prfh PLDL1KEEP, p0, [z0.s, #0] : prfh $0x00 %p0 (%z0.s) 8482e481 : prfh PLDL1STRM, p1, [z4.s, #4] : prfh $0x01 %p1 +0x04(%z4.s) @@ -13872,6 +14304,42 @@ c499ff6d : prfh PSTL3STRM, p7, [z27.d, #50] : prfh $0x0d %p7 +0x32(%z c49bffae : prfh 14, p7, [z29.d, #54] : prfh $0x0e %p7 +0x36(%z29.d) c49fffef : prfh 15, p7, [z31.d, #62] : prfh $0x0f %p7 +0x3e(%z31.d) +# PRFH , , [, .D, LSL #1] (PRFH-I.P.BZ-D.64.scaled) +c460a000 : prfh PLDL1KEEP, p0, [x0, z0.d, LSL #1] : prfh $0x00 %p0 (%x0,%z0.d,lsl #1) +c465a481 : prfh PLDL1STRM, p1, [x4, z5.d, LSL #1] : prfh $0x01 %p1 (%x4,%z5.d,lsl #1) +c467a8c2 : prfh PLDL2KEEP, p2, [x6, z7.d, LSL #1] : prfh $0x02 %p2 (%x6,%z7.d,lsl #1) +c469a903 : prfh PLDL2STRM, p2, [x8, z9.d, LSL #1] : prfh $0x03 %p2 (%x8,%z9.d,lsl #1) +c46bad44 : prfh PLDL3KEEP, p3, [x10, z11.d, LSL #1] : prfh $0x04 %p3 (%x10,%z11.d,lsl #1) +c46dad65 : prfh PLDL3STRM, p3, [x11, z13.d, LSL #1] : prfh $0x05 %p3 (%x11,%z13.d,lsl #1) +c46fb1a6 : prfh 6, p4, [x13, z15.d, LSL #1] : prfh $0x06 %p4 (%x13,%z15.d,lsl #1) +c471b1e7 : prfh 7, p4, [x15, z17.d, LSL #1] : prfh $0x07 %p4 (%x15,%z17.d,lsl #1) +c473b628 : prfh PSTL1KEEP, p5, [x17, z19.d, LSL #1] : prfh $0x08 %p5 (%x17,%z19.d,lsl #1) +c474b669 : prfh PSTL1STRM, p5, [x19, z20.d, LSL #1] : prfh $0x09 %p5 (%x19,%z20.d,lsl #1) +c476b6aa : prfh PSTL2KEEP, p5, [x21, z22.d, LSL #1] : prfh $0x0a %p5 (%x21,%z22.d,lsl #1) +c478baeb : prfh PSTL2STRM, p6, [x23, z24.d, LSL #1] : prfh $0x0b %p6 (%x23,%z24.d,lsl #1) +c47abb0c : prfh PSTL3KEEP, p6, [x24, z26.d, LSL #1] : prfh $0x0c %p6 (%x24,%z26.d,lsl #1) +c47cbf4d : prfh PSTL3STRM, p7, [x26, z28.d, LSL #1] : prfh $0x0d %p7 (%x26,%z28.d,lsl #1) +c47ebf8e : prfh 14, p7, [x28, z30.d, LSL #1] : prfh $0x0e %p7 (%x28,%z30.d,lsl #1) +c47fbfef : prfh 15, p7, [sp, z31.d, LSL #1] : prfh $0x0f %p7 (%sp,%z31.d,lsl #1) + +# PRFW , , [{, #, MUL VL}] (PRFW-I.P.BI-S) +85e04000 : prfw PLDL1KEEP, p0, [x0, #-32, MUL VL] : prfw $0x00 %p0 -0x20(%x0) +85e44481 : prfw PLDL1STRM, p1, [x4, #-28, MUL VL] : prfw $0x01 %p1 -0x1c(%x4) +85e848c2 : prfw PLDL2KEEP, p2, [x6, #-24, MUL VL] : prfw $0x02 %p2 -0x18(%x6) +85ec4903 : prfw PLDL2STRM, p2, [x8, #-20, MUL VL] : prfw $0x03 %p2 -0x14(%x8) +85f04d44 : prfw PLDL3KEEP, p3, [x10, #-16, MUL VL] : prfw $0x04 %p3 -0x10(%x10) +85f44d65 : prfw PLDL3STRM, p3, [x11, #-12, MUL VL] : prfw $0x05 %p3 -0x0c(%x11) +85f851a6 : prfw 6, p4, [x13, #-8, MUL VL] : prfw $0x06 %p4 -0x08(%x13) +85fc51e7 : prfw 7, p4, [x15, #-4, MUL VL] : prfw $0x07 %p4 -0x04(%x15) +85c05628 : prfw PSTL1KEEP, p5, [x17, #0, MUL VL] : prfw $0x08 %p5 (%x17) +85c35669 : prfw PSTL1STRM, p5, [x19, #3, MUL VL] : prfw $0x09 %p5 +0x03(%x19) +85c756aa : prfw PSTL2KEEP, p5, [x21, #7, MUL VL] : prfw $0x0a %p5 +0x07(%x21) +85cb5aeb : prfw PSTL2STRM, p6, [x23, #11, MUL VL] : prfw $0x0b %p6 +0x0b(%x23) +85cf5b0c : prfw PSTL3KEEP, p6, [x24, #15, MUL VL] : prfw $0x0c %p6 +0x0f(%x24) +85d35f4d : prfw PSTL3STRM, p7, [x26, #19, MUL VL] : prfw $0x0d %p7 +0x13(%x26) +85d75f8e : prfw 14, p7, [x28, #23, MUL VL] : prfw $0x0e %p7 +0x17(%x28) +85df5fef : prfw 15, p7, [sp, #31, MUL VL] : prfw $0x0f %p7 +0x1f(%sp) + # PRFW , , [.S{, #}] (PRFW-I.P.AI-S) 8500e000 : prfw PLDL1KEEP, p0, [z0.s, #0] : prfw $0x00 %p0 (%z0.s) 8502e481 : prfw PLDL1STRM, p1, [z4.s, #8] : prfw $0x01 %p1 +0x08(%z4.s) @@ -13908,6 +14376,24 @@ c519ff6d : prfw PSTL3STRM, p7, [z27.d, #100] : prfw $0x0d %p7 +0x64(%z c51bffae : prfw 14, p7, [z29.d, #108] : prfw $0x0e %p7 +0x6c(%z29.d) c51fffef : prfw 15, p7, [z31.d, #124] : prfw $0x0f %p7 +0x7c(%z31.d) +# PRFW , , [, .D, LSL #2] (PRFW-I.P.BZ-D.64.scaled) +c460c000 : prfw PLDL1KEEP, p0, [x0, z0.d, LSL #2] : prfw $0x00 %p0 (%x0,%z0.d,lsl #2) +c465c481 : prfw PLDL1STRM, p1, [x4, z5.d, LSL #2] : prfw $0x01 %p1 (%x4,%z5.d,lsl #2) +c467c8c2 : prfw PLDL2KEEP, p2, [x6, z7.d, LSL #2] : prfw $0x02 %p2 (%x6,%z7.d,lsl #2) +c469c903 : prfw PLDL2STRM, p2, [x8, z9.d, LSL #2] : prfw $0x03 %p2 (%x8,%z9.d,lsl #2) +c46bcd44 : prfw PLDL3KEEP, p3, [x10, z11.d, LSL #2] : prfw $0x04 %p3 (%x10,%z11.d,lsl #2) +c46dcd65 : prfw PLDL3STRM, p3, [x11, z13.d, LSL #2] : prfw $0x05 %p3 (%x11,%z13.d,lsl #2) +c46fd1a6 : prfw 6, p4, [x13, z15.d, LSL #2] : prfw $0x06 %p4 (%x13,%z15.d,lsl #2) +c471d1e7 : prfw 7, p4, [x15, z17.d, LSL #2] : prfw $0x07 %p4 (%x15,%z17.d,lsl #2) +c473d628 : prfw PSTL1KEEP, p5, [x17, z19.d, LSL #2] : prfw $0x08 %p5 (%x17,%z19.d,lsl #2) +c474d669 : prfw PSTL1STRM, p5, [x19, z20.d, LSL #2] : prfw $0x09 %p5 (%x19,%z20.d,lsl #2) +c476d6aa : prfw PSTL2KEEP, p5, [x21, z22.d, LSL #2] : prfw $0x0a %p5 (%x21,%z22.d,lsl #2) +c478daeb : prfw PSTL2STRM, p6, [x23, z24.d, LSL #2] : prfw $0x0b %p6 (%x23,%z24.d,lsl #2) +c47adb0c : prfw PSTL3KEEP, p6, [x24, z26.d, LSL #2] : prfw $0x0c %p6 (%x24,%z26.d,lsl #2) +c47cdf4d : prfw PSTL3STRM, p7, [x26, z28.d, LSL #2] : prfw $0x0d %p7 (%x26,%z28.d,lsl #2) +c47edf8e : prfw 14, p7, [x28, z30.d, LSL #2] : prfw $0x0e %p7 (%x28,%z30.d,lsl #2) +c47fdfef : prfw 15, p7, [sp, z31.d, LSL #2] : prfw $0x0f %p7 (%sp,%z31.d,lsl #2) + # PTEST , .B (PTEST-.P.P-_) 2550c000 : ptest p0, p0.b : ptest %p0 %p0.b 2550c440 : ptest p1, p2.b : ptest %p1 %p2.b @@ -17069,6 +17555,24 @@ e459bf79 : st1b z25.d, p7, [z27.d, #25] : st1b %z25.d %p7 -> +0x1 e45bbfbb : st1b z27.d, p7, [z29.d, #27] : st1b %z27.d %p7 -> +0x1b(%z29.d)[4byte] e45fbfff : st1b z31.d, p7, [z31.d, #31] : st1b %z31.d %p7 -> +0x1f(%z31.d)[4byte] +# ST1B { .D }, , [, .D] (ST1B-Z.P.BZ-D.64.unscaled) +e400a000 : st1b z0.d, p0, [x0, z0.d] : st1b %z0.d %p0 -> (%x0,%z0.d)[4byte] +e405a482 : st1b z2.d, p1, [x4, z5.d] : st1b %z2.d %p1 -> (%x4,%z5.d)[4byte] +e407a8c4 : st1b z4.d, p2, [x6, z7.d] : st1b %z4.d %p2 -> (%x6,%z7.d)[4byte] +e409a906 : st1b z6.d, p2, [x8, z9.d] : st1b %z6.d %p2 -> (%x8,%z9.d)[4byte] +e40bad48 : st1b z8.d, p3, [x10, z11.d] : st1b %z8.d %p3 -> (%x10,%z11.d)[4byte] +e40dad6a : st1b z10.d, p3, [x11, z13.d] : st1b %z10.d %p3 -> (%x11,%z13.d)[4byte] +e40fb1ac : st1b z12.d, p4, [x13, z15.d] : st1b %z12.d %p4 -> (%x13,%z15.d)[4byte] +e411b1ee : st1b z14.d, p4, [x15, z17.d] : st1b %z14.d %p4 -> (%x15,%z17.d)[4byte] +e413b630 : st1b z16.d, p5, [x17, z19.d] : st1b %z16.d %p5 -> (%x17,%z19.d)[4byte] +e414b671 : st1b z17.d, p5, [x19, z20.d] : st1b %z17.d %p5 -> (%x19,%z20.d)[4byte] +e416b6b3 : st1b z19.d, p5, [x21, z22.d] : st1b %z19.d %p5 -> (%x21,%z22.d)[4byte] +e418baf5 : st1b z21.d, p6, [x23, z24.d] : st1b %z21.d %p6 -> (%x23,%z24.d)[4byte] +e41abb17 : st1b z23.d, p6, [x24, z26.d] : st1b %z23.d %p6 -> (%x24,%z26.d)[4byte] +e41cbf59 : st1b z25.d, p7, [x26, z28.d] : st1b %z25.d %p7 -> (%x26,%z28.d)[4byte] +e41ebf9b : st1b z27.d, p7, [x28, z30.d] : st1b %z27.d %p7 -> (%x28,%z30.d)[4byte] +e41fbfff : st1b z31.d, p7, [sp, z31.d] : st1b %z31.d %p7 -> (%sp,%z31.d)[4byte] + # ST1D { .D }, , [.D{, #}] (ST1D-Z.P.AI-D) e5c0a000 : st1d z0.d, p0, [z0.d, #0] : st1d %z0.d %p0 -> (%z0.d)[32byte] e5c2a482 : st1d z2.d, p1, [z4.d, #16] : st1d %z2.d %p1 -> +0x10(%z4.d)[32byte] @@ -17087,6 +17591,42 @@ e5d9bf79 : st1d z25.d, p7, [z27.d, #200] : st1d %z25.d %p7 -> +0xc e5dbbfbb : st1d z27.d, p7, [z29.d, #216] : st1d %z27.d %p7 -> +0xd8(%z29.d)[32byte] e5dfbfff : st1d z31.d, p7, [z31.d, #248] : st1d %z31.d %p7 -> +0xf8(%z31.d)[32byte] +# ST1D { .D }, , [, .D, LSL #3] (ST1D-Z.P.BZ-D.64.scaled) +e5a0a000 : st1d z0.d, p0, [x0, z0.d, LSL #3] : st1d %z0.d %p0 -> (%x0,%z0.d,lsl #3)[32byte] +e5a5a482 : st1d z2.d, p1, [x4, z5.d, LSL #3] : st1d %z2.d %p1 -> (%x4,%z5.d,lsl #3)[32byte] +e5a7a8c4 : st1d z4.d, p2, [x6, z7.d, LSL #3] : st1d %z4.d %p2 -> (%x6,%z7.d,lsl #3)[32byte] +e5a9a906 : st1d z6.d, p2, [x8, z9.d, LSL #3] : st1d %z6.d %p2 -> (%x8,%z9.d,lsl #3)[32byte] +e5abad48 : st1d z8.d, p3, [x10, z11.d, LSL #3] : st1d %z8.d %p3 -> (%x10,%z11.d,lsl #3)[32byte] +e5adad6a : st1d z10.d, p3, [x11, z13.d, LSL #3] : st1d %z10.d %p3 -> (%x11,%z13.d,lsl #3)[32byte] +e5afb1ac : st1d z12.d, p4, [x13, z15.d, LSL #3] : st1d %z12.d %p4 -> (%x13,%z15.d,lsl #3)[32byte] +e5b1b1ee : st1d z14.d, p4, [x15, z17.d, LSL #3] : st1d %z14.d %p4 -> (%x15,%z17.d,lsl #3)[32byte] +e5b3b630 : st1d z16.d, p5, [x17, z19.d, LSL #3] : st1d %z16.d %p5 -> (%x17,%z19.d,lsl #3)[32byte] +e5b4b671 : st1d z17.d, p5, [x19, z20.d, LSL #3] : st1d %z17.d %p5 -> (%x19,%z20.d,lsl #3)[32byte] +e5b6b6b3 : st1d z19.d, p5, [x21, z22.d, LSL #3] : st1d %z19.d %p5 -> (%x21,%z22.d,lsl #3)[32byte] +e5b8baf5 : st1d z21.d, p6, [x23, z24.d, LSL #3] : st1d %z21.d %p6 -> (%x23,%z24.d,lsl #3)[32byte] +e5babb17 : st1d z23.d, p6, [x24, z26.d, LSL #3] : st1d %z23.d %p6 -> (%x24,%z26.d,lsl #3)[32byte] +e5bcbf59 : st1d z25.d, p7, [x26, z28.d, LSL #3] : st1d %z25.d %p7 -> (%x26,%z28.d,lsl #3)[32byte] +e5bebf9b : st1d z27.d, p7, [x28, z30.d, LSL #3] : st1d %z27.d %p7 -> (%x28,%z30.d,lsl #3)[32byte] +e5bfbfff : st1d z31.d, p7, [sp, z31.d, LSL #3] : st1d %z31.d %p7 -> (%sp,%z31.d,lsl #3)[32byte] + +# ST1D { .D }, , [, .D] (ST1D-Z.P.BZ-D.64.unscaled) +e580a000 : st1d z0.d, p0, [x0, z0.d] : st1d %z0.d %p0 -> (%x0,%z0.d)[32byte] +e585a482 : st1d z2.d, p1, [x4, z5.d] : st1d %z2.d %p1 -> (%x4,%z5.d)[32byte] +e587a8c4 : st1d z4.d, p2, [x6, z7.d] : st1d %z4.d %p2 -> (%x6,%z7.d)[32byte] +e589a906 : st1d z6.d, p2, [x8, z9.d] : st1d %z6.d %p2 -> (%x8,%z9.d)[32byte] +e58bad48 : st1d z8.d, p3, [x10, z11.d] : st1d %z8.d %p3 -> (%x10,%z11.d)[32byte] +e58dad6a : st1d z10.d, p3, [x11, z13.d] : st1d %z10.d %p3 -> (%x11,%z13.d)[32byte] +e58fb1ac : st1d z12.d, p4, [x13, z15.d] : st1d %z12.d %p4 -> (%x13,%z15.d)[32byte] +e591b1ee : st1d z14.d, p4, [x15, z17.d] : st1d %z14.d %p4 -> (%x15,%z17.d)[32byte] +e593b630 : st1d z16.d, p5, [x17, z19.d] : st1d %z16.d %p5 -> (%x17,%z19.d)[32byte] +e594b671 : st1d z17.d, p5, [x19, z20.d] : st1d %z17.d %p5 -> (%x19,%z20.d)[32byte] +e596b6b3 : st1d z19.d, p5, [x21, z22.d] : st1d %z19.d %p5 -> (%x21,%z22.d)[32byte] +e598baf5 : st1d z21.d, p6, [x23, z24.d] : st1d %z21.d %p6 -> (%x23,%z24.d)[32byte] +e59abb17 : st1d z23.d, p6, [x24, z26.d] : st1d %z23.d %p6 -> (%x24,%z26.d)[32byte] +e59cbf59 : st1d z25.d, p7, [x26, z28.d] : st1d %z25.d %p7 -> (%x26,%z28.d)[32byte] +e59ebf9b : st1d z27.d, p7, [x28, z30.d] : st1d %z27.d %p7 -> (%x28,%z30.d)[32byte] +e59fbfff : st1d z31.d, p7, [sp, z31.d] : st1d %z31.d %p7 -> (%sp,%z31.d)[32byte] + # ST1H { .S }, , [.S{, #}] (ST1H-Z.P.AI-S) e4e0a000 : st1h z0.s, p0, [z0.s, #0] : st1h %z0.s %p0 -> (%z0.s)[16byte] e4e2a482 : st1h z2.s, p1, [z4.s, #4] : st1h %z2.s %p1 -> +0x04(%z4.s)[16byte] @@ -17123,6 +17663,24 @@ e4d9bf79 : st1h z25.d, p7, [z27.d, #50] : st1h %z25.d %p7 -> +0x3 e4dbbfbb : st1h z27.d, p7, [z29.d, #54] : st1h %z27.d %p7 -> +0x36(%z29.d)[8byte] e4dfbfff : st1h z31.d, p7, [z31.d, #62] : st1h %z31.d %p7 -> +0x3e(%z31.d)[8byte] +# ST1H { .D }, , [, .D, LSL #1] (ST1H-Z.P.BZ-D.64.scaled) +e4a0a000 : st1h z0.d, p0, [x0, z0.d, LSL #1] : st1h %z0.d %p0 -> (%x0,%z0.d,lsl #1)[8byte] +e4a5a482 : st1h z2.d, p1, [x4, z5.d, LSL #1] : st1h %z2.d %p1 -> (%x4,%z5.d,lsl #1)[8byte] +e4a7a8c4 : st1h z4.d, p2, [x6, z7.d, LSL #1] : st1h %z4.d %p2 -> (%x6,%z7.d,lsl #1)[8byte] +e4a9a906 : st1h z6.d, p2, [x8, z9.d, LSL #1] : st1h %z6.d %p2 -> (%x8,%z9.d,lsl #1)[8byte] +e4abad48 : st1h z8.d, p3, [x10, z11.d, LSL #1] : st1h %z8.d %p3 -> (%x10,%z11.d,lsl #1)[8byte] +e4adad6a : st1h z10.d, p3, [x11, z13.d, LSL #1] : st1h %z10.d %p3 -> (%x11,%z13.d,lsl #1)[8byte] +e4afb1ac : st1h z12.d, p4, [x13, z15.d, LSL #1] : st1h %z12.d %p4 -> (%x13,%z15.d,lsl #1)[8byte] +e4b1b1ee : st1h z14.d, p4, [x15, z17.d, LSL #1] : st1h %z14.d %p4 -> (%x15,%z17.d,lsl #1)[8byte] +e4b3b630 : st1h z16.d, p5, [x17, z19.d, LSL #1] : st1h %z16.d %p5 -> (%x17,%z19.d,lsl #1)[8byte] +e4b4b671 : st1h z17.d, p5, [x19, z20.d, LSL #1] : st1h %z17.d %p5 -> (%x19,%z20.d,lsl #1)[8byte] +e4b6b6b3 : st1h z19.d, p5, [x21, z22.d, LSL #1] : st1h %z19.d %p5 -> (%x21,%z22.d,lsl #1)[8byte] +e4b8baf5 : st1h z21.d, p6, [x23, z24.d, LSL #1] : st1h %z21.d %p6 -> (%x23,%z24.d,lsl #1)[8byte] +e4babb17 : st1h z23.d, p6, [x24, z26.d, LSL #1] : st1h %z23.d %p6 -> (%x24,%z26.d,lsl #1)[8byte] +e4bcbf59 : st1h z25.d, p7, [x26, z28.d, LSL #1] : st1h %z25.d %p7 -> (%x26,%z28.d,lsl #1)[8byte] +e4bebf9b : st1h z27.d, p7, [x28, z30.d, LSL #1] : st1h %z27.d %p7 -> (%x28,%z30.d,lsl #1)[8byte] +e4bfbfff : st1h z31.d, p7, [sp, z31.d, LSL #1] : st1h %z31.d %p7 -> (%sp,%z31.d,lsl #1)[8byte] + # ST1W { .S }, , [.S{, #}] (ST1W-Z.P.AI-S) e560a000 : st1w z0.s, p0, [z0.s, #0] : st1w %z0.s %p0 -> (%z0.s)[32byte] e562a482 : st1w z2.s, p1, [z4.s, #8] : st1w %z2.s %p1 -> +0x08(%z4.s)[32byte] @@ -17159,6 +17717,24 @@ e559bf79 : st1w z25.d, p7, [z27.d, #100] : st1w %z25.d %p7 -> +0x6 e55bbfbb : st1w z27.d, p7, [z29.d, #108] : st1w %z27.d %p7 -> +0x6c(%z29.d)[16byte] e55fbfff : st1w z31.d, p7, [z31.d, #124] : st1w %z31.d %p7 -> +0x7c(%z31.d)[16byte] +# ST1W { .D }, , [, .D, LSL #2] (ST1W-Z.P.BZ-D.64.scaled) +e520a000 : st1w z0.d, p0, [x0, z0.d, LSL #2] : st1w %z0.d %p0 -> (%x0,%z0.d,lsl #2)[16byte] +e525a482 : st1w z2.d, p1, [x4, z5.d, LSL #2] : st1w %z2.d %p1 -> (%x4,%z5.d,lsl #2)[16byte] +e527a8c4 : st1w z4.d, p2, [x6, z7.d, LSL #2] : st1w %z4.d %p2 -> (%x6,%z7.d,lsl #2)[16byte] +e529a906 : st1w z6.d, p2, [x8, z9.d, LSL #2] : st1w %z6.d %p2 -> (%x8,%z9.d,lsl #2)[16byte] +e52bad48 : st1w z8.d, p3, [x10, z11.d, LSL #2] : st1w %z8.d %p3 -> (%x10,%z11.d,lsl #2)[16byte] +e52dad6a : st1w z10.d, p3, [x11, z13.d, LSL #2] : st1w %z10.d %p3 -> (%x11,%z13.d,lsl #2)[16byte] +e52fb1ac : st1w z12.d, p4, [x13, z15.d, LSL #2] : st1w %z12.d %p4 -> (%x13,%z15.d,lsl #2)[16byte] +e531b1ee : st1w z14.d, p4, [x15, z17.d, LSL #2] : st1w %z14.d %p4 -> (%x15,%z17.d,lsl #2)[16byte] +e533b630 : st1w z16.d, p5, [x17, z19.d, LSL #2] : st1w %z16.d %p5 -> (%x17,%z19.d,lsl #2)[16byte] +e534b671 : st1w z17.d, p5, [x19, z20.d, LSL #2] : st1w %z17.d %p5 -> (%x19,%z20.d,lsl #2)[16byte] +e536b6b3 : st1w z19.d, p5, [x21, z22.d, LSL #2] : st1w %z19.d %p5 -> (%x21,%z22.d,lsl #2)[16byte] +e538baf5 : st1w z21.d, p6, [x23, z24.d, LSL #2] : st1w %z21.d %p6 -> (%x23,%z24.d,lsl #2)[16byte] +e53abb17 : st1w z23.d, p6, [x24, z26.d, LSL #2] : st1w %z23.d %p6 -> (%x24,%z26.d,lsl #2)[16byte] +e53cbf59 : st1w z25.d, p7, [x26, z28.d, LSL #2] : st1w %z25.d %p7 -> (%x26,%z28.d,lsl #2)[16byte] +e53ebf9b : st1w z27.d, p7, [x28, z30.d, LSL #2] : st1w %z27.d %p7 -> (%x28,%z30.d,lsl #2)[16byte] +e53fbfff : st1w z31.d, p7, [sp, z31.d, LSL #2] : st1w %z31.d %p7 -> (%sp,%z31.d,lsl #2)[16byte] + # STNT1B { .B }, , [, ] (STNT1B-Z.P.BR-Contiguous) e4006000 : stnt1b z0.b, p0, [x0, x0] : stnt1b %z0.b %p0 -> (%x0,%x0)[32byte] e4056482 : stnt1b z2.b, p1, [x4, x5] : stnt1b %z2.b %p1 -> (%x4,%x5)[32byte] diff --git a/suite/tests/api/ir_aarch64_sve.c b/suite/tests/api/ir_aarch64_sve.c index 82bf9cccd38..5e08334d87c 100644 --- a/suite/tests/api/ir_aarch64_sve.c +++ b/suite/tests/api/ir_aarch64_sve.c @@ -13786,6 +13786,22 @@ TEST_INSTR(ldff1b_sve_pred) opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, OPSZ_4, 0)); + + /* Testing LDFF1B { .D }, /Z, [, .D] */ + const char *const expected_5_0[6] = { + "ldff1b (%x0,%z0.d)[4byte] %p0/z -> %z0.d", + "ldff1b (%x7,%z8.d)[4byte] %p2/z -> %z5.d", + "ldff1b (%x12,%z13.d)[4byte] %p3/z -> %z10.d", + "ldff1b (%x17,%z19.d)[4byte] %p5/z -> %z16.d", + "ldff1b (%x22,%z24.d)[4byte] %p6/z -> %z21.d", + "ldff1b (%sp,%z31.d)[4byte] %p7/z -> %z31.d", + }; + TEST_LOOP(ldff1b, ldff1b_sve_pred, 6, expected_5_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_vector_base_disp_aarch64(Xn_six_offset_2_sp[i], + Zn_six_offset_3[i], OPSZ_8, + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_4, 0)); } TEST_INSTR(ldff1d_sve_pred) @@ -13822,6 +13838,38 @@ TEST_INSTR(ldff1d_sve_pred) opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, OPSZ_32, 0)); + + /* Testing LDFF1D { .D }, /Z, [, .D, LSL #3] */ + const char *const expected_2_0[6] = { + "ldff1d (%x0,%z0.d,lsl #3)[32byte] %p0/z -> %z0.d", + "ldff1d (%x7,%z8.d,lsl #3)[32byte] %p2/z -> %z5.d", + "ldff1d (%x12,%z13.d,lsl #3)[32byte] %p3/z -> %z10.d", + "ldff1d (%x17,%z19.d,lsl #3)[32byte] %p5/z -> %z16.d", + "ldff1d (%x22,%z24.d,lsl #3)[32byte] %p6/z -> %z21.d", + "ldff1d (%sp,%z31.d,lsl #3)[32byte] %p7/z -> %z31.d", + }; + TEST_LOOP(ldff1d, ldff1d_sve_pred, 6, expected_2_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_vector_base_disp_aarch64( + Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, true, + 0, 0, OPSZ_32, 3)); + + /* Testing LDFF1D { .D }, /Z, [, .D] */ + const char *const expected_3_0[6] = { + "ldff1d (%x0,%z0.d)[32byte] %p0/z -> %z0.d", + "ldff1d (%x7,%z8.d)[32byte] %p2/z -> %z5.d", + "ldff1d (%x12,%z13.d)[32byte] %p3/z -> %z10.d", + "ldff1d (%x17,%z19.d)[32byte] %p5/z -> %z16.d", + "ldff1d (%x22,%z24.d)[32byte] %p6/z -> %z21.d", + "ldff1d (%sp,%z31.d)[32byte] %p7/z -> %z31.d", + }; + TEST_LOOP(ldff1d, ldff1d_sve_pred, 6, expected_3_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_vector_base_disp_aarch64( + Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, + false, 0, 0, OPSZ_32, 0)); } TEST_INSTR(ldff1h_sve_pred) @@ -13906,6 +13954,38 @@ TEST_INSTR(ldff1h_sve_pred) opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, OPSZ_8, 0)); + + /* Testing LDFF1H { .D }, /Z, [, .D, LSL #1] */ + const char *const expected_4_0[6] = { + "ldff1h (%x0,%z0.d,lsl #1)[8byte] %p0/z -> %z0.d", + "ldff1h (%x7,%z8.d,lsl #1)[8byte] %p2/z -> %z5.d", + "ldff1h (%x12,%z13.d,lsl #1)[8byte] %p3/z -> %z10.d", + "ldff1h (%x17,%z19.d,lsl #1)[8byte] %p5/z -> %z16.d", + "ldff1h (%x22,%z24.d,lsl #1)[8byte] %p6/z -> %z21.d", + "ldff1h (%sp,%z31.d,lsl #1)[8byte] %p7/z -> %z31.d", + }; + TEST_LOOP(ldff1h, ldff1h_sve_pred, 6, expected_4_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_vector_base_disp_aarch64( + Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, true, + 0, 0, OPSZ_8, 1)); + + /* Testing LDFF1H { .D }, /Z, [, .D] */ + const char *const expected_5_0[6] = { + "ldff1h (%x0,%z0.d)[8byte] %p0/z -> %z0.d", + "ldff1h (%x7,%z8.d)[8byte] %p2/z -> %z5.d", + "ldff1h (%x12,%z13.d)[8byte] %p3/z -> %z10.d", + "ldff1h (%x17,%z19.d)[8byte] %p5/z -> %z16.d", + "ldff1h (%x22,%z24.d)[8byte] %p6/z -> %z21.d", + "ldff1h (%sp,%z31.d)[8byte] %p7/z -> %z31.d", + }; + TEST_LOOP(ldff1h, ldff1h_sve_pred, 6, expected_5_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_vector_base_disp_aarch64( + Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, + false, 0, 0, OPSZ_8, 0)); } TEST_INSTR(ldff1sb_sve_pred) @@ -13987,6 +14067,22 @@ TEST_INSTR(ldff1sb_sve_pred) opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, OPSZ_4, 0)); + + /* Testing LDFF1SB { .D }, /Z, [, .D] */ + const char *const expected_4_0[6] = { + "ldff1sb (%x0,%z0.d)[4byte] %p0/z -> %z0.d", + "ldff1sb (%x7,%z8.d)[4byte] %p2/z -> %z5.d", + "ldff1sb (%x12,%z13.d)[4byte] %p3/z -> %z10.d", + "ldff1sb (%x17,%z19.d)[4byte] %p5/z -> %z16.d", + "ldff1sb (%x22,%z24.d)[4byte] %p6/z -> %z21.d", + "ldff1sb (%sp,%z31.d)[4byte] %p7/z -> %z31.d", + }; + TEST_LOOP(ldff1sb, ldff1sb_sve_pred, 6, expected_4_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_vector_base_disp_aarch64(Xn_six_offset_2_sp[i], + Zn_six_offset_3[i], OPSZ_8, + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_4, 0)); } TEST_INSTR(ldff1sh_sve_pred) @@ -14055,6 +14151,38 @@ TEST_INSTR(ldff1sh_sve_pred) opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, OPSZ_8, 0)); + + /* Testing LDFF1SH { .D }, /Z, [, .D, LSL #1] */ + const char *const expected_3_0[6] = { + "ldff1sh (%x0,%z0.d,lsl #1)[8byte] %p0/z -> %z0.d", + "ldff1sh (%x7,%z8.d,lsl #1)[8byte] %p2/z -> %z5.d", + "ldff1sh (%x12,%z13.d,lsl #1)[8byte] %p3/z -> %z10.d", + "ldff1sh (%x17,%z19.d,lsl #1)[8byte] %p5/z -> %z16.d", + "ldff1sh (%x22,%z24.d,lsl #1)[8byte] %p6/z -> %z21.d", + "ldff1sh (%sp,%z31.d,lsl #1)[8byte] %p7/z -> %z31.d", + }; + TEST_LOOP(ldff1sh, ldff1sh_sve_pred, 6, expected_3_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_vector_base_disp_aarch64( + Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, true, + 0, 0, OPSZ_8, 1)); + + /* Testing LDFF1SH { .D }, /Z, [, .D] */ + const char *const expected_4_0[6] = { + "ldff1sh (%x0,%z0.d)[8byte] %p0/z -> %z0.d", + "ldff1sh (%x7,%z8.d)[8byte] %p2/z -> %z5.d", + "ldff1sh (%x12,%z13.d)[8byte] %p3/z -> %z10.d", + "ldff1sh (%x17,%z19.d)[8byte] %p5/z -> %z16.d", + "ldff1sh (%x22,%z24.d)[8byte] %p6/z -> %z21.d", + "ldff1sh (%sp,%z31.d)[8byte] %p7/z -> %z31.d", + }; + TEST_LOOP(ldff1sh, ldff1sh_sve_pred, 6, expected_4_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_vector_base_disp_aarch64( + Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, + false, 0, 0, OPSZ_8, 0)); } TEST_INSTR(ldff1sw_sve_pred) @@ -14091,6 +14219,38 @@ TEST_INSTR(ldff1sw_sve_pred) opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, OPSZ_16, 0)); + + /* Testing LDFF1SW { .D }, /Z, [, .D, LSL #2] */ + const char *const expected_2_0[6] = { + "ldff1sw (%x0,%z0.d,lsl #2)[16byte] %p0/z -> %z0.d", + "ldff1sw (%x7,%z8.d,lsl #2)[16byte] %p2/z -> %z5.d", + "ldff1sw (%x12,%z13.d,lsl #2)[16byte] %p3/z -> %z10.d", + "ldff1sw (%x17,%z19.d,lsl #2)[16byte] %p5/z -> %z16.d", + "ldff1sw (%x22,%z24.d,lsl #2)[16byte] %p6/z -> %z21.d", + "ldff1sw (%sp,%z31.d,lsl #2)[16byte] %p7/z -> %z31.d", + }; + TEST_LOOP(ldff1sw, ldff1sw_sve_pred, 6, expected_2_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_vector_base_disp_aarch64( + Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, true, + 0, 0, OPSZ_16, 2)); + + /* Testing LDFF1SW { .D }, /Z, [, .D] */ + const char *const expected_3_0[6] = { + "ldff1sw (%x0,%z0.d)[16byte] %p0/z -> %z0.d", + "ldff1sw (%x7,%z8.d)[16byte] %p2/z -> %z5.d", + "ldff1sw (%x12,%z13.d)[16byte] %p3/z -> %z10.d", + "ldff1sw (%x17,%z19.d)[16byte] %p5/z -> %z16.d", + "ldff1sw (%x22,%z24.d)[16byte] %p6/z -> %z21.d", + "ldff1sw (%sp,%z31.d)[16byte] %p7/z -> %z31.d", + }; + TEST_LOOP(ldff1sw, ldff1sw_sve_pred, 6, expected_3_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_vector_base_disp_aarch64( + Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, + false, 0, 0, OPSZ_16, 0)); } TEST_INSTR(ldff1w_sve_pred) @@ -14159,6 +14319,38 @@ TEST_INSTR(ldff1w_sve_pred) opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, OPSZ_16, 0)); + + /* Testing LDFF1W { .D }, /Z, [, .D, LSL #2] */ + const char *const expected_3_0[6] = { + "ldff1w (%x0,%z0.d,lsl #2)[16byte] %p0/z -> %z0.d", + "ldff1w (%x7,%z8.d,lsl #2)[16byte] %p2/z -> %z5.d", + "ldff1w (%x12,%z13.d,lsl #2)[16byte] %p3/z -> %z10.d", + "ldff1w (%x17,%z19.d,lsl #2)[16byte] %p5/z -> %z16.d", + "ldff1w (%x22,%z24.d,lsl #2)[16byte] %p6/z -> %z21.d", + "ldff1w (%sp,%z31.d,lsl #2)[16byte] %p7/z -> %z31.d", + }; + TEST_LOOP(ldff1w, ldff1w_sve_pred, 6, expected_3_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_vector_base_disp_aarch64( + Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, true, + 0, 0, OPSZ_16, 2)); + + /* Testing LDFF1W { .D }, /Z, [, .D] */ + const char *const expected_4_0[6] = { + "ldff1w (%x0,%z0.d)[16byte] %p0/z -> %z0.d", + "ldff1w (%x7,%z8.d)[16byte] %p2/z -> %z5.d", + "ldff1w (%x12,%z13.d)[16byte] %p3/z -> %z10.d", + "ldff1w (%x17,%z19.d)[16byte] %p5/z -> %z16.d", + "ldff1w (%x22,%z24.d)[16byte] %p6/z -> %z21.d", + "ldff1w (%sp,%z31.d)[16byte] %p7/z -> %z31.d", + }; + TEST_LOOP(ldff1w, ldff1w_sve_pred, 6, expected_4_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_vector_base_disp_aarch64( + Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, + false, 0, 0, OPSZ_16, 0)); } TEST_INSTR(fcadd_sve_pred) @@ -14394,6 +14586,22 @@ TEST_INSTR(ld1b_sve_pred) opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_4, DR_EXTEND_UXTX, 0, imm5[i], 0, OPSZ_8, 0)); + + /* Testing LD1B { .D }, /Z, [, .D] */ + const char *const expected_5_0[6] = { + "ld1b (%x0,%z0.d)[4byte] %p0/z -> %z0.d", + "ld1b (%x7,%z8.d)[4byte] %p2/z -> %z5.d", + "ld1b (%x12,%z13.d)[4byte] %p3/z -> %z10.d", + "ld1b (%x17,%z19.d)[4byte] %p5/z -> %z16.d", + "ld1b (%x22,%z24.d)[4byte] %p6/z -> %z21.d", + "ld1b (%sp,%z31.d)[4byte] %p7/z -> %z31.d", + }; + TEST_LOOP(ld1b, ld1b_sve_pred, 6, expected_5_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_vector_base_disp_aarch64( + Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, + false, 0, 0, OPSZ_4, 0)); } TEST_INSTR(ld1rob_sve_pred) @@ -14514,6 +14722,22 @@ TEST_INSTR(ld1sb_sve_pred) opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, OPSZ_4, 0)); + + /* Testing LD1SB { .D }, /Z, [, .D] */ + const char *const expected_4_0[6] = { + "ld1sb (%x0,%z0.d)[4byte] %p0/z -> %z0.d", + "ld1sb (%x7,%z8.d)[4byte] %p2/z -> %z5.d", + "ld1sb (%x12,%z13.d)[4byte] %p3/z -> %z10.d", + "ld1sb (%x17,%z19.d)[4byte] %p5/z -> %z16.d", + "ld1sb (%x22,%z24.d)[4byte] %p6/z -> %z21.d", + "ld1sb (%sp,%z31.d)[4byte] %p7/z -> %z31.d", + }; + TEST_LOOP(ld1sb, ld1sb_sve_pred, 6, expected_4_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_vector_base_disp_aarch64(Xn_six_offset_2_sp[i], + Zn_six_offset_3[i], OPSZ_8, + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_4, 0)); } TEST_INSTR(ldnt1b_sve_pred) @@ -14626,6 +14850,22 @@ TEST_INSTR(st1b_sve_pred) opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, OPSZ_4, 0)); + + /* Testing ST1B { .D }, , [, .D] */ + const char *const expected_2_0[6] = { + "st1b %z0.d %p0 -> (%x0,%z0.d)[4byte]", + "st1b %z5.d %p2 -> (%x7,%z8.d)[4byte]", + "st1b %z10.d %p3 -> (%x12,%z13.d)[4byte]", + "st1b %z16.d %p5 -> (%x17,%z19.d)[4byte]", + "st1b %z21.d %p6 -> (%x22,%z24.d)[4byte]", + "st1b %z31.d %p7 -> (%sp,%z31.d)[4byte]", + }; + TEST_LOOP(st1b, st1b_sve_pred, 6, expected_2_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_reg(Pn_half_six_offset_0[i]), + opnd_create_vector_base_disp_aarch64( + Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, + false, 0, 0, OPSZ_4, 0)); } TEST_INSTR(stnt1b_sve_pred) @@ -14929,6 +15169,19 @@ TEST_INSTR(prfb_sve_pred) opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, OPSZ_0, 0)); + + /* Testing PRFB , , [, .D] */ + const char *const expected_2_0[6] = { + "prfb $0x00 %p0 (%x0,%z0.d)", "prfb $0x02 %p2 (%x7,%z8.d)", + "prfb $0x05 %p3 (%x12,%z13.d)", "prfb $0x08 %p5 (%x17,%z19.d)", + "prfb $0x0a %p6 (%x22,%z24.d)", "prfb $0x0f %p7 (%sp,%z31.d)", + }; + TEST_LOOP(prfb, prfb_sve_pred, 6, expected_2_0[i], + opnd_create_immed_uint(prfop[i], OPSZ_4b), + opnd_create_reg(Pn_half_six_offset_0[i]), + opnd_create_vector_base_disp_aarch64( + Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, + false, 0, 0, OPSZ_0, 0)); } TEST_INSTR(prfd_sve_pred) @@ -14975,6 +15228,18 @@ TEST_INSTR(prfd_sve_pred) opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, OPSZ_0, 0)); + /* Testing PRFD , , [, .D, LSL #3] */ + const char *const expected_2_0[6] = { + "prfd $0x00 %p0 (%x0,%z0.d,lsl #3)", "prfd $0x02 %p2 (%x7,%z8.d,lsl #3)", + "prfd $0x05 %p3 (%x12,%z13.d,lsl #3)", "prfd $0x08 %p5 (%x17,%z19.d,lsl #3)", + "prfd $0x0a %p6 (%x22,%z24.d,lsl #3)", "prfd $0x0f %p7 (%sp,%z31.d,lsl #3)", + }; + TEST_LOOP(prfd, prfd_sve_pred, 6, expected_2_0[i], + opnd_create_immed_uint(prfop[i], OPSZ_4b), + opnd_create_reg(Pn_half_six_offset_0[i]), + opnd_create_vector_base_disp_aarch64( + Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, true, + 0, 0, OPSZ_0, 3)); } TEST_INSTR(prfh_sve_pred) @@ -15021,6 +15286,19 @@ TEST_INSTR(prfh_sve_pred) opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, OPSZ_0, 0)); + + /* Testing PRFH , , [, .D, LSL #1] */ + const char *const expected_2_0[6] = { + "prfh $0x00 %p0 (%x0,%z0.d,lsl #1)", "prfh $0x02 %p2 (%x7,%z8.d,lsl #1)", + "prfh $0x05 %p3 (%x12,%z13.d,lsl #1)", "prfh $0x08 %p5 (%x17,%z19.d,lsl #1)", + "prfh $0x0a %p6 (%x22,%z24.d,lsl #1)", "prfh $0x0f %p7 (%sp,%z31.d,lsl #1)", + }; + TEST_LOOP(prfh, prfh_sve_pred, 6, expected_2_0[i], + opnd_create_immed_uint(prfop[i], OPSZ_4b), + opnd_create_reg(Pn_half_six_offset_0[i]), + opnd_create_vector_base_disp_aarch64( + Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, true, + 0, 0, OPSZ_0, 1)); } TEST_INSTR(prfw_sve_pred) @@ -15067,6 +15345,19 @@ TEST_INSTR(prfw_sve_pred) opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, OPSZ_0, 0)); + + /* Testing PRFW , , [, .D, LSL #2] */ + const char *const expected_2_0[6] = { + "prfw $0x00 %p0 (%x0,%z0.d,lsl #2)", "prfw $0x02 %p2 (%x7,%z8.d,lsl #2)", + "prfw $0x05 %p3 (%x12,%z13.d,lsl #2)", "prfw $0x08 %p5 (%x17,%z19.d,lsl #2)", + "prfw $0x0a %p6 (%x22,%z24.d,lsl #2)", "prfw $0x0f %p7 (%sp,%z31.d,lsl #2)", + }; + TEST_LOOP(prfw, prfw_sve_pred, 6, expected_2_0[i], + opnd_create_immed_uint(prfop[i], OPSZ_4b), + opnd_create_reg(Pn_half_six_offset_0[i]), + opnd_create_vector_base_disp_aarch64( + Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, true, + 0, 0, OPSZ_0, 2)); } TEST_INSTR(adr_sve) @@ -15435,6 +15726,38 @@ TEST_INSTR(ld1h_sve_pred) opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, OPSZ_8, 0)); + + /* Testing LD1H { .D }, /Z, [, .D, LSL #1] */ + const char *const expected_1_0[6] = { + "ld1h (%x0,%z0.d,lsl #1)[8byte] %p0/z -> %z0.d", + "ld1h (%x7,%z8.d,lsl #1)[8byte] %p2/z -> %z5.d", + "ld1h (%x12,%z13.d,lsl #1)[8byte] %p3/z -> %z10.d", + "ld1h (%x17,%z19.d,lsl #1)[8byte] %p5/z -> %z16.d", + "ld1h (%x22,%z24.d,lsl #1)[8byte] %p6/z -> %z21.d", + "ld1h (%sp,%z31.d,lsl #1)[8byte] %p7/z -> %z31.d", + }; + TEST_LOOP(ld1h, ld1h_sve_pred, 6, expected_1_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_vector_base_disp_aarch64( + Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, true, + 0, 0, OPSZ_8, 1)); + + /* Testing LD1H { .D }, /Z, [, .D] */ + const char *const expected_2_0[6] = { + "ld1h (%x0,%z0.d)[8byte] %p0/z -> %z0.d", + "ld1h (%x7,%z8.d)[8byte] %p2/z -> %z5.d", + "ld1h (%x12,%z13.d)[8byte] %p3/z -> %z10.d", + "ld1h (%x17,%z19.d)[8byte] %p5/z -> %z16.d", + "ld1h (%x22,%z24.d)[8byte] %p6/z -> %z21.d", + "ld1h (%sp,%z31.d)[8byte] %p7/z -> %z31.d", + }; + TEST_LOOP(ld1h, ld1h_sve_pred, 6, expected_2_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_vector_base_disp_aarch64( + Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, + false, 0, 0, OPSZ_8, 0)); } TEST_INSTR(ld1sh_sve_pred) @@ -15471,6 +15794,38 @@ TEST_INSTR(ld1sh_sve_pred) opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, OPSZ_8, 0)); + + /* Testing LD1SH { .D }, /Z, [, .D, LSL #1] */ + const char *const expected_1_0[6] = { + "ld1sh (%x0,%z0.d,lsl #1)[8byte] %p0/z -> %z0.d", + "ld1sh (%x7,%z8.d,lsl #1)[8byte] %p2/z -> %z5.d", + "ld1sh (%x12,%z13.d,lsl #1)[8byte] %p3/z -> %z10.d", + "ld1sh (%x17,%z19.d,lsl #1)[8byte] %p5/z -> %z16.d", + "ld1sh (%x22,%z24.d,lsl #1)[8byte] %p6/z -> %z21.d", + "ld1sh (%sp,%z31.d,lsl #1)[8byte] %p7/z -> %z31.d", + }; + TEST_LOOP(ld1sh, ld1sh_sve_pred, 6, expected_1_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_vector_base_disp_aarch64( + Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, true, + 0, 0, OPSZ_8, 1)); + + /* Testing LD1SH { .D }, /Z, [, .D] */ + const char *const expected_2_0[6] = { + "ld1sh (%x0,%z0.d)[8byte] %p0/z -> %z0.d", + "ld1sh (%x7,%z8.d)[8byte] %p2/z -> %z5.d", + "ld1sh (%x12,%z13.d)[8byte] %p3/z -> %z10.d", + "ld1sh (%x17,%z19.d)[8byte] %p5/z -> %z16.d", + "ld1sh (%x22,%z24.d)[8byte] %p6/z -> %z21.d", + "ld1sh (%sp,%z31.d)[8byte] %p7/z -> %z31.d", + }; + TEST_LOOP(ld1sh, ld1sh_sve_pred, 6, expected_2_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_vector_base_disp_aarch64( + Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, + false, 0, 0, OPSZ_8, 0)); } TEST_INSTR(ld1w_sve_pred) @@ -15506,6 +15861,38 @@ TEST_INSTR(ld1w_sve_pred) opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, OPSZ_16, 0)); + + /* Testing LD1W { .D }, /Z, [, .D, LSL #2] */ + const char *const expected_1_0[6] = { + "ld1w (%x0,%z0.d,lsl #2)[16byte] %p0/z -> %z0.d", + "ld1w (%x7,%z8.d,lsl #2)[16byte] %p2/z -> %z5.d", + "ld1w (%x12,%z13.d,lsl #2)[16byte] %p3/z -> %z10.d", + "ld1w (%x17,%z19.d,lsl #2)[16byte] %p5/z -> %z16.d", + "ld1w (%x22,%z24.d,lsl #2)[16byte] %p6/z -> %z21.d", + "ld1w (%sp,%z31.d,lsl #2)[16byte] %p7/z -> %z31.d", + }; + TEST_LOOP(ld1w, ld1w_sve_pred, 6, expected_1_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_vector_base_disp_aarch64( + Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, true, + 0, 0, OPSZ_16, 2)); + + /* Testing LD1W { .D }, /Z, [, .D] */ + const char *const expected_2_0[6] = { + "ld1w (%x0,%z0.d)[16byte] %p0/z -> %z0.d", + "ld1w (%x7,%z8.d)[16byte] %p2/z -> %z5.d", + "ld1w (%x12,%z13.d)[16byte] %p3/z -> %z10.d", + "ld1w (%x17,%z19.d)[16byte] %p5/z -> %z16.d", + "ld1w (%x22,%z24.d)[16byte] %p6/z -> %z21.d", + "ld1w (%sp,%z31.d)[16byte] %p7/z -> %z31.d", + }; + TEST_LOOP(ld1w, ld1w_sve_pred, 6, expected_2_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_vector_base_disp_aarch64( + Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, + false, 0, 0, OPSZ_16, 0)); } TEST_INSTR(ld1d_sve_pred) @@ -15526,6 +15913,38 @@ TEST_INSTR(ld1d_sve_pred) opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, OPSZ_32, 0)); + + /* Testing LD1D { .D }, /Z, [, .D, LSL #3] */ + const char *const expected_1_0[6] = { + "ld1d (%x0,%z0.d,lsl #3)[32byte] %p0/z -> %z0.d", + "ld1d (%x7,%z8.d,lsl #3)[32byte] %p2/z -> %z5.d", + "ld1d (%x12,%z13.d,lsl #3)[32byte] %p3/z -> %z10.d", + "ld1d (%x17,%z19.d,lsl #3)[32byte] %p5/z -> %z16.d", + "ld1d (%x22,%z24.d,lsl #3)[32byte] %p6/z -> %z21.d", + "ld1d (%sp,%z31.d,lsl #3)[32byte] %p7/z -> %z31.d", + }; + TEST_LOOP(ld1d, ld1d_sve_pred, 6, expected_1_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_vector_base_disp_aarch64( + Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, true, + 0, 0, OPSZ_32, 3)); + + /* Testing LD1D { .D }, /Z, [, .D] */ + const char *const expected_2_0[6] = { + "ld1d (%x0,%z0.d)[32byte] %p0/z -> %z0.d", + "ld1d (%x7,%z8.d)[32byte] %p2/z -> %z5.d", + "ld1d (%x12,%z13.d)[32byte] %p3/z -> %z10.d", + "ld1d (%x17,%z19.d)[32byte] %p5/z -> %z16.d", + "ld1d (%x22,%z24.d)[32byte] %p6/z -> %z21.d", + "ld1d (%sp,%z31.d)[32byte] %p7/z -> %z31.d", + }; + TEST_LOOP(ld1d, ld1d_sve_pred, 6, expected_2_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_vector_base_disp_aarch64( + Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, + false, 0, 0, OPSZ_32, 0)); } TEST_INSTR(ld1sw_sve_pred) @@ -15546,6 +15965,38 @@ TEST_INSTR(ld1sw_sve_pred) opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, OPSZ_16, 0)); + + /* Testing LD1SW { .D }, /Z, [, .D, LSL #2] */ + const char *const expected_1_0[6] = { + "ld1sw (%x0,%z0.d,lsl #2)[16byte] %p0/z -> %z0.d", + "ld1sw (%x7,%z8.d,lsl #2)[16byte] %p2/z -> %z5.d", + "ld1sw (%x12,%z13.d,lsl #2)[16byte] %p3/z -> %z10.d", + "ld1sw (%x17,%z19.d,lsl #2)[16byte] %p5/z -> %z16.d", + "ld1sw (%x22,%z24.d,lsl #2)[16byte] %p6/z -> %z21.d", + "ld1sw (%sp,%z31.d,lsl #2)[16byte] %p7/z -> %z31.d", + }; + TEST_LOOP(ld1sw, ld1sw_sve_pred, 6, expected_1_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_vector_base_disp_aarch64( + Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, true, + 0, 0, OPSZ_16, 2)); + + /* Testing LD1SW { .D }, /Z, [, .D] */ + const char *const expected_2_0[6] = { + "ld1sw (%x0,%z0.d)[16byte] %p0/z -> %z0.d", + "ld1sw (%x7,%z8.d)[16byte] %p2/z -> %z5.d", + "ld1sw (%x12,%z13.d)[16byte] %p3/z -> %z10.d", + "ld1sw (%x17,%z19.d)[16byte] %p5/z -> %z16.d", + "ld1sw (%x22,%z24.d)[16byte] %p6/z -> %z21.d", + "ld1sw (%sp,%z31.d)[16byte] %p7/z -> %z31.d", + }; + TEST_LOOP(ld1sw, ld1sw_sve_pred, 6, expected_2_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_vector_base_disp_aarch64( + Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, + false, 0, 0, OPSZ_16, 0)); } TEST_INSTR(st1h_sve_pred) @@ -15582,6 +16033,38 @@ TEST_INSTR(st1h_sve_pred) opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, OPSZ_8, 0)); + + /* Testing ST1H { .D }, , [, .D, LSL #1] */ + const char *const expected_1_0[6] = { + "st1h %z0.d %p0 -> (%x0,%z0.d,lsl #1)[8byte]", + "st1h %z5.d %p2 -> (%x7,%z8.d,lsl #1)[8byte]", + "st1h %z10.d %p3 -> (%x12,%z13.d,lsl #1)[8byte]", + "st1h %z16.d %p5 -> (%x17,%z19.d,lsl #1)[8byte]", + "st1h %z21.d %p6 -> (%x22,%z24.d,lsl #1)[8byte]", + "st1h %z31.d %p7 -> (%sp,%z31.d,lsl #1)[8byte]", + }; + TEST_LOOP(st1h, st1h_sve_pred, 6, expected_1_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_reg(Pn_half_six_offset_0[i]), + opnd_create_vector_base_disp_aarch64( + Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, true, + 0, 0, OPSZ_8, 1)); + + /* Testing ST1H { .D }, , [, .D] */ + const char *const expected_2_0[6] = { + "st1h %z0.d %p0 -> (%x0,%z0.d)[8byte]", + "st1h %z5.d %p2 -> (%x7,%z8.d)[8byte]", + "st1h %z10.d %p3 -> (%x12,%z13.d)[8byte]", + "st1h %z16.d %p5 -> (%x17,%z19.d)[8byte]", + "st1h %z21.d %p6 -> (%x22,%z24.d)[8byte]", + "st1h %z31.d %p7 -> (%sp,%z31.d)[8byte]", + }; + TEST_LOOP(st1h, st1h_sve_pred, 6, expected_2_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_reg(Pn_half_six_offset_0[i]), + opnd_create_vector_base_disp_aarch64( + Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, + false, 0, 0, OPSZ_8, 0)); } TEST_INSTR(st1w_sve_pred) @@ -15618,6 +16101,38 @@ TEST_INSTR(st1w_sve_pred) opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5[i], 0, OPSZ_16, 0)); + + /* Testing ST1W { .D }, , [, .D, LSL #2] */ + const char *const expected_2_0[6] = { + "st1w %z0.d %p0 -> (%x0,%z0.d,lsl #2)[16byte]", + "st1w %z5.d %p2 -> (%x7,%z8.d,lsl #2)[16byte]", + "st1w %z10.d %p3 -> (%x12,%z13.d,lsl #2)[16byte]", + "st1w %z16.d %p5 -> (%x17,%z19.d,lsl #2)[16byte]", + "st1w %z21.d %p6 -> (%x22,%z24.d,lsl #2)[16byte]", + "st1w %z31.d %p7 -> (%sp,%z31.d,lsl #2)[16byte]", + }; + TEST_LOOP(st1w, st1w_sve_pred, 6, expected_2_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_reg(Pn_half_six_offset_0[i]), + opnd_create_vector_base_disp_aarch64( + Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, true, + 0, 0, OPSZ_16, 2)); + + /* Testing ST1W { .D }, , [, .D] */ + const char *const expected_3_0[6] = { + "st1w %z0.d %p0 -> (%x0,%z0.d)[16byte]", + "st1w %z5.d %p2 -> (%x7,%z8.d)[16byte]", + "st1w %z10.d %p3 -> (%x12,%z13.d)[16byte]", + "st1w %z16.d %p5 -> (%x17,%z19.d)[16byte]", + "st1w %z21.d %p6 -> (%x22,%z24.d)[16byte]", + "st1w %z31.d %p7 -> (%sp,%z31.d)[16byte]", + }; + TEST_LOOP(st1w, st1w_sve_pred, 6, expected_3_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_reg(Pn_half_six_offset_0[i]), + opnd_create_vector_base_disp_aarch64( + Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, + false, 0, 0, OPSZ_16, 0)); } TEST_INSTR(st1d_sve_pred) @@ -15638,6 +16153,38 @@ TEST_INSTR(st1d_sve_pred) opnd_create_vector_base_disp_aarch64(Zn_six_offset_2[i], DR_REG_NULL, OPSZ_8, DR_EXTEND_UXTX, 0, imm5_0_0[i], 0, OPSZ_32, 0)); + + /* Testing ST1D { .D }, , [, .D, LSL #3] */ + const char *const expected_1_0[6] = { + "st1d %z0.d %p0 -> (%x0,%z0.d,lsl #3)[32byte]", + "st1d %z5.d %p2 -> (%x7,%z8.d,lsl #3)[32byte]", + "st1d %z10.d %p3 -> (%x12,%z13.d,lsl #3)[32byte]", + "st1d %z16.d %p5 -> (%x17,%z19.d,lsl #3)[32byte]", + "st1d %z21.d %p6 -> (%x22,%z24.d,lsl #3)[32byte]", + "st1d %z31.d %p7 -> (%sp,%z31.d,lsl #3)[32byte]", + }; + TEST_LOOP(st1d, st1d_sve_pred, 6, expected_1_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_reg(Pn_half_six_offset_0[i]), + opnd_create_vector_base_disp_aarch64( + Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, true, + 0, 0, OPSZ_32, 3)); + + /* Testing ST1D { .D }, , [, .D] */ + const char *const expected_2_0[6] = { + "st1d %z0.d %p0 -> (%x0,%z0.d)[32byte]", + "st1d %z5.d %p2 -> (%x7,%z8.d)[32byte]", + "st1d %z10.d %p3 -> (%x12,%z13.d)[32byte]", + "st1d %z16.d %p5 -> (%x17,%z19.d)[32byte]", + "st1d %z21.d %p6 -> (%x22,%z24.d)[32byte]", + "st1d %z31.d %p7 -> (%sp,%z31.d)[32byte]", + }; + TEST_LOOP(st1d, st1d_sve_pred, 6, expected_2_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_reg(Pn_half_six_offset_0[i]), + opnd_create_vector_base_disp_aarch64( + Xn_six_offset_2_sp[i], Zn_six_offset_3[i], OPSZ_8, DR_EXTEND_UXTX, + false, 0, 0, OPSZ_32, 0)); } int @@ -16067,7 +16614,6 @@ main(int argc, char *argv[]) RUN_INSTR_TEST(ld1sb_sve_pred); RUN_INSTR_TEST(ldnt1b_sve_pred); RUN_INSTR_TEST(ld1h_sve_pred); - RUN_INSTR_TEST(ld1sb_sve_pred); RUN_INSTR_TEST(ld1sh_sve_pred); RUN_INSTR_TEST(ld1w_sve_pred); RUN_INSTR_TEST(ld1d_sve_pred);