Skip to content

Commit

Permalink
i#3044 AArch64 SVE codec: Add LDFF1* (scalar plus scalar) instructions (
Browse files Browse the repository at this point in the history
#5850)

This patch adds the appropriate macros, tests and codec entries
to encode the following variants:
LDFF1B  { <Zt>.H }, <Pg>/Z, [<Xn|SP>{, <Xm>}]
LDFF1B  { <Zt>.S }, <Pg>/Z, [<Xn|SP>{, <Xm>}]
LDFF1B  { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, <Xm>}]
LDFF1B  { <Zt>.B }, <Pg>/Z, [<Xn|SP>{, <Xm>}]
LDFF1D  { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, <Xm>, LSL #3}]
LDFF1H  { <Zt>.H }, <Pg>/Z, [<Xn|SP>{, <Xm>, LSL #1}]
LDFF1H  { <Zt>.S }, <Pg>/Z, [<Xn|SP>{, <Xm>, LSL #1}]
LDFF1H  { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, <Xm>, LSL #1}]
LDFF1SB { <Zt>.H }, <Pg>/Z, [<Xn|SP>{, <Xm>}]
LDFF1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>{, <Xm>}]
LDFF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, <Xm>}]
LDFF1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>{, <Xm>, LSL #1}]
LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, <Xm>, LSL #1}]
LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, <Xm>, LSL #2}]
LDFF1W  { <Zt>.S }, <Pg>/Z, [<Xn|SP>{, <Xm>, LSL #2}]
LDFF1W  { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, <Xm>, LSL #2}]

Issue #3044
  • Loading branch information
cmannett85-arm authored Feb 2, 2023
1 parent 32b92ea commit d7a6ba9
Show file tree
Hide file tree
Showing 8 changed files with 879 additions and 13 deletions.
88 changes: 88 additions & 0 deletions core/ir/aarch64/codec.c
Original file line number Diff line number Diff line change
Expand Up @@ -988,6 +988,27 @@ get_vector_element_reg_offset(opnd_t opnd)
}
}

static inline opnd_size_t
get_opnd_size_from_offset(aarch64_reg_offset offset)
{
switch (offset) {
case BYTE_REG: return OPSZ_1;
case HALF_REG: return OPSZ_2;
case SINGLE_REG: return OPSZ_4;
case DOUBLE_REG: return OPSZ_8;
case QUAD_REG: return OPSZ_16;
default: ASSERT_NOT_REACHED(); return OPSZ_NA;
}
}

static inline uint
get_elements_in_sve_vector(aarch64_reg_offset element_size)
{
const uint element_length =
opnd_size_in_bits(get_opnd_size_from_offset(element_size));
return opnd_size_in_bits(OPSZ_SVE_VL) / element_length;
}

/*******************************************************************************
* Pairs of functions for decoding and encoding a generalised type of operand.
*/
Expand Down Expand Up @@ -6042,6 +6063,73 @@ encode_opnd_imm2_tsz_index(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint
return true;
}

static inline bool
dtype_is_signed(uint dtype)
{
/* No need for a ASSERT_NOT_REACHED as all possible values of dtype are used in the
* instructions */
switch (dtype) {
case 0b1110:
case 0b1101:
case 0b1100:
case 0b1001:
case 0b1000:
case 0b0100: return true;
default: return false;
}
}

/* svemem_gpr: GPR offset and base reg for SVE ld/st */

static inline bool
decode_opnd_svemem_gpr_5(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
{
uint dtype = extract_uint(enc, 21, 4);
if (dtype_is_signed(dtype))
dtype = ~dtype;

const aarch64_reg_offset insz = BITS(dtype, 3, 2);
const aarch64_reg_offset elsz = BITS(dtype, 1, 0);

const uint elements = get_elements_in_sve_vector(elsz);
const opnd_size_t mem_transfer = opnd_size_from_bytes((1 << insz) * elements);
const opnd_size_t insz_opsz = get_opnd_size_from_offset(insz);

const reg_id_t rn = decode_reg(extract_uint(enc, 5, 5), true, true);
const reg_id_t rm = decode_reg(extract_uint(enc, 16, 5), true, false);

/* The byte load type does not use offset scaling, so set to zero in those cases */
*opnd = opnd_create_base_disp_shift_aarch64(rn, rm, DR_EXTEND_UXTX, insz != BYTE_REG,
0, 0, mem_transfer,
opnd_size_to_shift_amount(insz_opsz));
return true;
}

static inline bool
encode_opnd_svemem_gpr_5(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
{
uint dtype = extract_uint(enc, 21, 4);
if (dtype_is_signed(dtype))
dtype = ~dtype;

const aarch64_reg_offset insz = BITS(dtype, 3, 2);
const aarch64_reg_offset elsz = BITS(dtype, 1, 0);

const uint elements = get_elements_in_sve_vector(elsz);
const opnd_size_t mem_transfer = opnd_size_from_bytes((1 << insz) * elements);

IF_RETURN_FALSE(!opnd_is_base_disp(opnd) || (opnd_get_size(opnd) != mem_transfer) ||
(opnd_get_disp(opnd) != 0))

uint rn, rm;
bool is_x;
IF_RETURN_FALSE(!encode_reg(&rn, &is_x, opnd_get_base(opnd), true) || !is_x)
IF_RETURN_FALSE(!encode_reg(&rm, &is_x, opnd_get_index(opnd), false) || !is_x)

*enc_out = (rm << 16) | (rn << 5);
return true;
}

/* mem0p: as mem0, but a pair of registers, so double size */

static inline bool
Expand Down
16 changes: 16 additions & 0 deletions core/ir/aarch64/codec_sve.txt
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,22 @@
1000010011xxxxxx100xxxxxxxxxxxxx n 913 SVE ld1rsw z_d_0 : svememx6_s_5 p10_zer_lo
1000010101xxxxxx110xxxxxxxxxxxxx n 914 SVE ld1rw z_s_0 : svememx6_s_5 p10_zer_lo
1000010101xxxxxx111xxxxxxxxxxxxx n 914 SVE ld1rw z_d_0 : svememx6_s_5 p10_zer_lo
10100100001xxxxx011xxxxxxxxxxxxx n 937 SVE ldff1b z_h_0 : svemem_gpr_5 p10_zer_lo
10100100010xxxxx011xxxxxxxxxxxxx n 937 SVE ldff1b z_s_0 : svemem_gpr_5 p10_zer_lo
10100100011xxxxx011xxxxxxxxxxxxx n 937 SVE ldff1b z_d_0 : svemem_gpr_5 p10_zer_lo
10100100000xxxxx011xxxxxxxxxxxxx n 937 SVE ldff1b z_b_0 : svemem_gpr_5 p10_zer_lo
10100101111xxxxx011xxxxxxxxxxxxx n 938 SVE ldff1d z_d_0 : svemem_gpr_5 p10_zer_lo
10100100101xxxxx011xxxxxxxxxxxxx n 939 SVE ldff1h z_h_0 : svemem_gpr_5 p10_zer_lo
10100100110xxxxx011xxxxxxxxxxxxx n 939 SVE ldff1h z_s_0 : svemem_gpr_5 p10_zer_lo
10100100111xxxxx011xxxxxxxxxxxxx n 939 SVE ldff1h z_d_0 : svemem_gpr_5 p10_zer_lo
10100101110xxxxx011xxxxxxxxxxxxx n 940 SVE ldff1sb z_h_0 : svemem_gpr_5 p10_zer_lo
10100101101xxxxx011xxxxxxxxxxxxx n 940 SVE ldff1sb z_s_0 : svemem_gpr_5 p10_zer_lo
10100101100xxxxx011xxxxxxxxxxxxx n 940 SVE ldff1sb z_d_0 : svemem_gpr_5 p10_zer_lo
10100101001xxxxx011xxxxxxxxxxxxx n 941 SVE ldff1sh z_s_0 : svemem_gpr_5 p10_zer_lo
10100101000xxxxx011xxxxxxxxxxxxx n 941 SVE ldff1sh z_d_0 : svemem_gpr_5 p10_zer_lo
10100100100xxxxx011xxxxxxxxxxxxx n 942 SVE ldff1sw z_d_0 : svemem_gpr_5 p10_zer_lo
10100101010xxxxx011xxxxxxxxxxxxx n 943 SVE ldff1w z_s_0 : svemem_gpr_5 p10_zer_lo
10100101011xxxxx011xxxxxxxxxxxxx n 943 SVE ldff1w z_d_0 : svemem_gpr_5 p10_zer_lo
1000010110xxxxxx000xxxxxxxx0xxxx n 227 SVE ldr p0 : svemem_gpr_simm9_vl
1000010110xxxxxx010xxxxxxxxxxxxx n 227 SVE ldr z0 : svemem_gpr_simm9_vl
00000100xx000011100xxxxxxxxxxxxx n 902 SVE lsl z_tszl8_bhsd_0 : p10_mrg_lo z_tszl8_bhsd_0 tszl8_imm3_5
Expand Down
135 changes: 135 additions & 0 deletions core/ir/aarch64/instr_create_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -10616,4 +10616,139 @@
*/
#define INSTR_CREATE_rdvl(dc, Rd, simm) instr_create_1dst_1src(dc, OP_rdvl, Rd, simm)

/**
* Creates a LDFF1B instruction.
*
* This macro is used to encode the forms:
* \verbatim
* LDFF1B { <Zt>.H }, <Pg>/Z, [<Xn|SP>{, <Xm>}]
* LDFF1B { <Zt>.S }, <Pg>/Z, [<Xn|SP>{, <Xm>}]
* LDFF1B { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, <Xm>}]
* LDFF1B { <Zt>.B }, <Pg>/Z, [<Xn|SP>{, <Xm>}]
* \endverbatim
* \param dc The void * dcontext used to allocate memory for the #instr_t.
* \param Zt The destination vector register, Z (Scalable).
* \param Pg The governing predicate register, P (Predicate).
* \param Rn The first source base register with a register offset,
* constructed with the function:
* opnd_create_base_disp_aarch64(Rn, Rm,
* DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1)
*/
#define INSTR_CREATE_ldff1b_sve_pred(dc, Zt, Pg, Rn) \
instr_create_1dst_2src(dc, OP_ldff1b, Zt, Rn, Pg)

/**
* Creates a LDFF1D instruction.
*
* This macro is used to encode the forms:
* \verbatim
* LDFF1D { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, <Xm>, LSL #3}]
* \endverbatim
* \param dc The void * dcontext used to allocate memory for the #instr_t.
* \param Zt The destination vector register, Z (Scalable).
* \param Pg The governing predicate register, P (Predicate).
* \param Rn The first source base register with a register offset,
* constructed with the function:
* opnd_create_base_disp_shift_aarch64(Rn, Rm,
* DR_EXTEND_UXTX, 1, 0, 0, OPSZ_32, 3)
*/
#define INSTR_CREATE_ldff1d_sve_pred(dc, Zt, Pg, Rn) \
instr_create_1dst_2src(dc, OP_ldff1d, Zt, Rn, Pg)

/**
* Creates a LDFF1H instruction.
*
* This macro is used to encode the forms:
* \verbatim
* LDFF1H { <Zt>.H }, <Pg>/Z, [<Xn|SP>{, <Xm>, LSL #1}]
* LDFF1H { <Zt>.S }, <Pg>/Z, [<Xn|SP>{, <Xm>, LSL #1}]
* LDFF1H { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, <Xm>, LSL #1}]
* \endverbatim
* \param dc The void * dcontext used to allocate memory for the #instr_t.
* \param Zt The destination vector register, Z (Scalable).
* \param Pg The governing predicate register, P (Predicate).
* \param Rn The first source base register with a register offset,
* constructed with the function:
* opnd_create_base_disp_shift_aarch64(Rn, Rm,
* DR_EXTEND_UXTX, 1, 0, 0, OPSZ_32, 1)
*/
#define INSTR_CREATE_ldff1h_sve_pred(dc, Zt, Pg, Rn) \
instr_create_1dst_2src(dc, OP_ldff1h, Zt, Rn, Pg)

/**
* Creates a LDFF1SB instruction.
*
* This macro is used to encode the forms:
* \verbatim
* LDFF1SB { <Zt>.H }, <Pg>/Z, [<Xn|SP>{, <Xm>}]
* LDFF1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>{, <Xm>}]
* LDFF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, <Xm>}]
* \endverbatim
* \param dc The void * dcontext used to allocate memory for the #instr_t.
* \param Zt The destination vector register, Z (Scalable).
* \param Pg The governing predicate register, P (Predicate).
* \param Rn The first source base register with a register offset,
* constructed with the function:
* opnd_create_base_disp_aarch64(Rn, Rm,
* DR_EXTEND_UXTX, false, 0, 0, OPSZ_1)
*/
#define INSTR_CREATE_ldff1sb_sve_pred(dc, Zt, Pg, Rn) \
instr_create_1dst_2src(dc, OP_ldff1sb, Zt, Rn, Pg)

/**
* Creates a LDFF1SH instruction.
*
* This macro is used to encode the forms:
* \verbatim
* LDFF1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>{, <Xm>, LSL #1}]
* LDFF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, <Xm>, LSL #1}]
* \endverbatim
* \param dc The void * dcontext used to allocate memory for the #instr_t.
* \param Zt The destination vector register, Z (Scalable).
* \param Pg The governing predicate register, P (Predicate).
* \param Rn The first source base register with a register offset,
* constructed with the function:
* opnd_create_base_disp_shift_aarch64(Rn, Rm,
* DR_EXTEND_UXTX, 1, 0, 0, OPSZ_16, 1)
*/
#define INSTR_CREATE_ldff1sh_sve_pred(dc, Zt, Pg, Rn) \
instr_create_1dst_2src(dc, OP_ldff1sh, Zt, Rn, Pg)

/**
* Creates a LDFF1SW instruction.
*
* This macro is used to encode the forms:
* \verbatim
* LDFF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, <Xm>, LSL #2}]
* \endverbatim
* \param dc The void * dcontext used to allocate memory for the #instr_t.
* \param Zt The destination vector register, Z (Scalable).
* \param Pg The governing predicate register, P (Predicate).
* \param Rn The first source base register with a register offset,
* constructed with the function:
* opnd_create_base_disp_shift_aarch64(Rn, Rm,
* DR_EXTEND_UXTX, 1, 0, 0, OPSZ_16, 2)
*/
#define INSTR_CREATE_ldff1sw_sve_pred(dc, Zt, Pg, Rn) \
instr_create_1dst_2src(dc, OP_ldff1sw, Zt, Rn, Pg)

/**
* Creates a LDFF1W instruction.
*
* This macro is used to encode the forms:
* \verbatim
* LDFF1W { <Zt>.S }, <Pg>/Z, [<Xn|SP>{, <Xm>, LSL #2}]
* LDFF1W { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, <Xm>, LSL #2}]
* \endverbatim
* \param dc The void * dcontext used to allocate memory for the #instr_t.
* \param Zt The destination vector register, Z (Scalable).
* \param Pg The governing predicate register, P (Predicate).
* \param Rn The first source base register with a register offset,
* constructed with the function:
* opnd_create_base_disp_shift_aarch64(Rn, Rm,
* DR_EXTEND_UXTX, 1, 0, 0, OPSZ_32, 2)
*/
#define INSTR_CREATE_ldff1w_sve_pred(dc, Zt, Pg, Rn) \
instr_create_1dst_2src(dc, OP_ldff1w, Zt, Rn, Pg)

#endif /* DR_IR_MACROS_AARCH64_H */
1 change: 1 addition & 0 deletions core/ir/aarch64/opnd_defs.txt
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@
--------xx-xxxxx---------------- z_size_bhsd_16 # sve vector reg, elsz depending on size
--------xx-xxxxx---------------- z_size_hsd_16 # sve vector reg, elsz depending on size
--------xx-xxxxx---------------- imm2_tsz_index # Index encoded in imm2:tsz
-------????xxxxx------xxxxx----- svemem_gpr_5 # GPR offset and base reg for SVE ld/st
-?--------------------xxxxx----- mem0p # gets size from 30; no offset, pair
-?---------xxxxx????------------ x16imm # computes immed from 30 and 15:12
-x------------------------------ index3 # index of D subreg in Q: 0-1
Expand Down
45 changes: 40 additions & 5 deletions core/ir/opnd_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -1854,8 +1854,10 @@ struct _opnd_t {
byte /*bool*/ pre_index : 1;
/* Access this using opnd_get_index_extend and opnd_set_index_extend. */
byte /*dr_extend_type_t*/ extend_type : 3;
/* Shift register offset left by amount implied by size of memory operand: */
/* Enable shift register offset left */
byte /*bool*/ scaled : 1;
/* Shift offset amount */
byte /*uint*/ scaled_value : 3;
# elif defined(ARM)
byte /*dr_shift_type_t*/ shift_type : 3;
byte shift_amount_minus_1 : 5; /* 1..31 so we store (val - 1) */
Expand Down Expand Up @@ -2225,6 +2227,13 @@ opnd_create_base_disp_arm(reg_id_t base_reg, reg_id_t index_reg,
#endif

#ifdef AARCH64
DR_API
/**
* Returns the left shift amount from \p size.
*/
uint
opnd_size_to_shift_amount(opnd_size_t size);

DR_API
/**
* Returns a memory reference operand that refers to either a base
Expand All @@ -2234,13 +2243,28 @@ DR_API
* Or a base register plus an optionally extended and shifted index register:
* - [base_reg, index_reg, extend_type, shift_amount]
*
* The shift_amount is zero or, if \p scaled, a value determined by the
* size of the operand.
* If \p scaled is enabled, \p shift determines the shift amount.
*
* The resulting operand has data size \p size (must be an OPSZ_ constant).
* Both \p base_reg and \p index_reg must be DR_REG_ constants.
* Either \p index_reg must be #DR_REG_NULL or disp must be 0.
*
* TODO i#3044: WARNING this function may change during SVE development of
* DynamoRIO. The function will be considered stable when this warning has been
* removed.
*
* \note AArch64-only.
*/
opnd_t
opnd_create_base_disp_shift_aarch64(reg_id_t base_reg, reg_id_t index_reg,
dr_extend_type_t extend_type, bool scaled, int disp,
dr_opnd_flags_t flags, opnd_size_t size, uint shift);

DR_API
/**
* Same as opnd_create_base_disp_shift_aarch64 but if \p scaled is true then the extend
* amount is calculated from the operand size (otherwise it is zero).
*
* \note AArch64-only.
*/
opnd_t
Expand Down Expand Up @@ -2836,8 +2860,19 @@ DR_API
/**
* Assumes \p opnd is a base+disp memory reference.
* Sets the index register to be extended by \p extend and optionally \p scaled.
* Returns whether successful. If the offset is scaled the amount it is shifted
* by is determined by the size of the memory operand.
* Returns whether successful. If \p scaled is zero, the offset is not scaled.
* \note AArch64-only.
*/
bool
opnd_set_index_extend_value(opnd_t *opnd, dr_extend_type_t extend, bool scaled,
uint scaled_value);

DR_API
/**
* Assumes \p opnd is a base+disp memory reference.
* Sets the index register to be extended by \p extend and optionally \p scaled.
* Returns whether successful. If \p scaled is zero, the offset is not scaled; otherwise
* is calculated from the operand size.
* \note AArch64-only.
*/
bool
Expand Down
Loading

0 comments on commit d7a6ba9

Please sign in to comment.