Skip to content

Commit

Permalink
i#2626 fp/simd encode: Add support for vector and scalar FADD. (#2811)
Browse files Browse the repository at this point in the history
This adds encoding and decoding support the scalar and vector versions
of FADD. The patterns are auto-generated using the publicly available
ISA spec from
https://developer.arm.com/products/architecture/a-profile/exploration-tools

For the scalar versions, the H, S and D registers are used to generate
the correct encoding. For the vector versions, D and Q registers are
used to denote the vector width. They also take an additional immediate
as source operand, which denotes the element width (half, single or 
double).

Issue: #2626
  • Loading branch information
fhahn authored Feb 13, 2018
1 parent cb09eec commit 1b72319
Show file tree
Hide file tree
Showing 9 changed files with 960 additions and 577 deletions.
13 changes: 13 additions & 0 deletions api/docs/bt.dox
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,19 @@ passes an \c instrlist_t of all Level 3 \c instr_t's, for simplicity.
<tr><td><table width=100% bgcolor="#0000C0" cellspacing=0 cellpadding=1 border=0>
<tr><td></td></tr></table></td></tr></table></td></tr></table>
\endhtmlonly

********************
\subsection sec_IR_AArch64 AArch64 IR Variations

DynamoRIO's IR representation of AArch64 NEON instructions uses an additional
immediate source operand to denote the width of the vector elements. The immediates
take the values #FSZ_HALF, #FSZ_DOUBLE, and #FSZ_SINGLE for floating point instructions.
This is different from AArch64 assembly, where the element width is part of the
vector register operand. For example, floating point vector addition of two vectors
with 2 double elements is represented in assembly by
\code fadd v9.2d, v30.2d, v9.2d \endcode and in IR by
\code fadd %q30 %q9 $0x03 -> %q9 \endcode.

\section sec_events_bt Events

The core of a client's interaction with DynamoRIO occurs through <em>
Expand Down
207 changes: 194 additions & 13 deletions core/arch/aarch64/codec.c
Original file line number Diff line number Diff line change
Expand Up @@ -464,18 +464,18 @@ encode_opnd_adr_page(int scale, byte *pc, opnd_t opnd, OUT uint *enc_out,
return false;
}

/* dq_plus: used for dq0, dq0p1, dq0p2, dq0p3 */
/* dq_plus: used for dq0, dq5, dq16, dq0p1, dq0p2, dq0p3 */

static inline bool
decode_opnd_dq_plus(int add, int qpos, uint enc, OUT opnd_t *opnd)
decode_opnd_dq_plus(int add, int rpos, int qpos, uint enc, OUT opnd_t *opnd)
{
*opnd = opnd_create_reg((TEST(1U << qpos, enc) ? DR_REG_Q0 : DR_REG_D0) +
(extract_uint(enc, 0, 5) + add) % 32);
(extract_uint(enc, rpos, rpos+5) + add) % 32);
return true;
}

static inline bool
encode_opnd_dq_plus(int add, int qpos, opnd_t opnd, OUT uint *enc_out)
encode_opnd_dq_plus(int add, int rpos, int qpos, opnd_t opnd, OUT uint *enc_out)
{
uint num;
bool q;
Expand All @@ -485,7 +485,7 @@ encode_opnd_dq_plus(int add, int qpos, opnd_t opnd, OUT uint *enc_out)
num = opnd_get_reg(opnd) - (q ? DR_REG_Q0 : DR_REG_D0);
if (num >= 32)
return false;
*enc_out = (num - add) % 32 | (uint)q << qpos;
*enc_out = ((num - add) % 32) << rpos | (uint)q << qpos;
return true;
}

Expand Down Expand Up @@ -917,55 +917,83 @@ encode_opnd_d10(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
static inline bool
decode_opnd_dq0(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
{
return decode_opnd_dq_plus(0, 30, enc, opnd);
return decode_opnd_dq_plus(0, 0, 30, enc, opnd);
}

static inline bool
encode_opnd_dq0(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
{
return encode_opnd_dq_plus(0, 30, opnd, enc_out);
return encode_opnd_dq_plus(0, 0, 30, opnd, enc_out);
}

/* dq5: D/Q register at bit position 5; bit 30 selects Q reg */

static inline bool
decode_opnd_dq5(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
{
return decode_opnd_dq_plus(0, 5, 30, enc, opnd);
}

static inline bool
encode_opnd_dq5(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
{
return encode_opnd_dq_plus(0, 5, 30, opnd, enc_out);
}

/* dq16: D/Q register at bit position 16; bit 30 selects Q reg */

static inline bool
decode_opnd_dq16(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
{
return decode_opnd_dq_plus(0, 16, 30, enc, opnd);
}

static inline bool
encode_opnd_dq16(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
{
return encode_opnd_dq_plus(0, 16, 30, opnd, enc_out);
}

/* dq0p1: as dq0 but add 1 mod 32 to reg number */

static inline bool
decode_opnd_dq0p1(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
{
return decode_opnd_dq_plus(1, 30, enc, opnd);
return decode_opnd_dq_plus(1, 0, 30, enc, opnd);
}

static inline bool
encode_opnd_dq0p1(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
{
return encode_opnd_dq_plus(1, 30, opnd, enc_out);
return encode_opnd_dq_plus(1, 0, 30, opnd, enc_out);
}

/* dq0p2: as dq0 but add 2 mod 32 to reg number */

static inline bool
decode_opnd_dq0p2(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
{
return decode_opnd_dq_plus(2, 30, enc, opnd);
return decode_opnd_dq_plus(2, 0, 30, enc, opnd);
}

static inline bool
encode_opnd_dq0p2(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
{
return encode_opnd_dq_plus(2, 30, opnd, enc_out);
return encode_opnd_dq_plus(2, 0, 30, opnd, enc_out);
}

/* dq0p3: as dq0 but add 3 mod 32 to reg number */

static inline bool
decode_opnd_dq0p3(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
{
return decode_opnd_dq_plus(3, 30, enc, opnd);
return decode_opnd_dq_plus(3, 0, 30, enc, opnd);
}

static inline bool
encode_opnd_dq0p3(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
{
return encode_opnd_dq_plus(3, 30, opnd, enc_out);
return encode_opnd_dq_plus(3, 0, 30, opnd, enc_out);
}

/* ext: extend type, dr_extend_type_t */
Expand Down Expand Up @@ -1003,6 +1031,101 @@ encode_opnd_extam(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out
return true;
}

static inline reg_id_t
decode_float_reg(uint n, uint type, reg_id_t *reg)
{
switch (type) {
case 3:
*reg = DR_REG_H0 + n;
return true;
case 0:
*reg = DR_REG_S0 + n;
return true;
case 1:
*reg = DR_REG_D0 + n;
return true;
default:
return false;
}
}

static inline bool
decode_opnd_float_reg(int pos, uint enc, OUT opnd_t *opnd)
{
reg_id_t reg;
if (!decode_float_reg(extract_uint(enc, pos, 5), extract_uint(enc, 22, 2), &reg))
return false;
*opnd = opnd_create_reg(reg);
return true;
}

static inline bool
encode_opnd_float_reg(int pos, opnd_t opnd, OUT uint *enc_out)
{
uint num;
uint type;

opnd_size_t size = OPSZ_NA;

if (!encode_vreg(&size, &num, opnd))
return false;

switch (size) {
case OPSZ_2:
type = 3;
break;
case OPSZ_4:
type = 0;
break;
case OPSZ_8:
type = 1;
break;
default:
return false;
}

*enc_out = type << 22 | num << pos;
return true;
}

static inline bool
decode_opnd_float_reg0(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
{
return decode_opnd_float_reg(0, enc, opnd);
}

static inline bool
encode_opnd_float_reg0(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
{
return encode_opnd_float_reg(0, opnd, enc_out);
}

static inline bool
decode_opnd_float_reg5(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
{
return decode_opnd_float_reg(5, enc, opnd);
}

static inline bool
encode_opnd_float_reg5(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
{
return encode_opnd_float_reg(5, opnd, enc_out);
}

static inline bool
decode_opnd_float_reg16(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
{
return decode_opnd_float_reg(16, enc, opnd);
}

static inline bool
encode_opnd_float_reg16(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
{
return encode_opnd_float_reg(16, opnd, enc_out);
}



/* h0: H register at bit position 0 */

static inline bool
Expand Down Expand Up @@ -2549,6 +2672,64 @@ encode_opnds_tbz(byte *pc, instr_t *instr, uint enc, decode_info_t *di)
return ENCFAIL;
}

/* Element size for vector floating point instructions. */

/* fsz: Operand size for single and double precision encoding of floating point
* vector instructions. We need to convert the generic size operand to the right
* encoding bits. It only supports FSZ_SINGLE and FSZ_DOUBLE.
*/
static inline bool
decode_opnd_fsz(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
{
if (((enc >> 21) & 0x03) == 0x01) {
*opnd = opnd_create_immed_int(FSZ_SINGLE, OPSZ_2b);
return true;
}
if (((enc >> 21) & 0x03) == 0x03) {
*opnd = opnd_create_immed_int(FSZ_DOUBLE, OPSZ_2b);
return true;
}
return false;
}

static inline bool
encode_opnd_fsz(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
{
if (opnd_get_immed_int(opnd) == FSZ_SINGLE) {
*enc_out = 0x01 << 21;
return true;
}
if (opnd_get_immed_int(opnd) == FSZ_DOUBLE) {
*enc_out = 0x03 << 21;
return true;
}
return false;
}

/* fsz16: Operand size for half precision encoding of floating point vector
* instructions. We need to convert the generic size operand to the right
* encoding bits. It only supports FSZ_HALF.
*/
static inline bool
decode_opnd_fsz16(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
{
if (((enc >> 21) & 0x03) == 0x02) {
*opnd = opnd_create_immed_int(FSZ_HALF, OPSZ_2b);
return true;
}
return false;
}

static inline bool
encode_opnd_fsz16(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
{
if (opnd_get_immed_int(opnd) == FSZ_HALF) {
*enc_out = 0x02 << 21;
return true;
}
return false;
}

/******************************************************************************/

/* Include automatically generated decoder and encoder. */
Expand Down
16 changes: 14 additions & 2 deletions core/arch/aarch64/codec.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@
----------------------------xxxx nzcv # flag bit specifier for CCMN, CCMP
---------------------------xxxxx b0 # B register
---------------------------xxxxx h0 # H register
--------xx-----------------xxxxx float_reg0 # H, S or D register including type for FP instruction
--------xx------------xxxxx----- float_reg5 # H, S or D register including type for FP instruction
--------xx-xxxxx---------------- float_reg16 # H, S or D register including type for FP instruction
---------------------------xxxxx s0 # S register
---------------------------xxxxx d0 # D register
---------------------------xxxxx q0 # Q register
Expand Down Expand Up @@ -128,6 +131,8 @@
??---?--xxxxxxxxxxxxxxxxxxx----- memlit # load literal, gets size from 31:30 and 26
-x------------------------------ index3 # index of D subreg in Q: 0-1
-x-------------------------xxxxx dq0 # Q register if bit 30 is set, else D
-x--------------------xxxxx----- dq5 # Q register if bit 30 is set, else D
-x---------xxxxx---------------- dq16 # Q register if bit 30 is set, else D
-x-------------------------xxxxx dq0p1 # ... add 1
-x-------------------------xxxxx dq0p2 # ... add 2
-x-------------------------xxxxx dq0p3 # ... add 3
Expand All @@ -146,8 +151,10 @@ x---------------------xxxxx----- wx5 # W/X register (or WZR/XZR)
x---------------------xxxxx----- wx5sp # W/X register or WSP/XSP
x----------------xxxxx---------- wx10 # W/X register (or WZR/XZR)
x----------xxxxx---------------- wx16 # W/X register (or WZR/XZR)

---------xx--------------------- fsz # element size of FP vector reg (single (0x1) and double (0x3) encoding)
---------xx--------------------- fsz16 # element size of FP vector reg (half (0x2) encoding)
################################################################################

# Instruction patterns

# The syntax here is: pattern opcode opndtype* : opndtype*
Expand Down Expand Up @@ -939,4 +946,9 @@ x101101011000000000101xxxxxxxxxx cls wx0 : wx5

# Data Processing - Scalar Floating-Point and Advanced SIMD

# FIXME i#1569: Add: more data processing
# FADD (vector)
0x0011100x1xxxxx110101xxxxxxxxxx fadd dq0 : dq5 dq16 fsz
0x001110010xxxxx000101xxxxxxxxxx fadd dq0 : dq5 dq16 fsz16

# FADD (scalar)
00011110xx1xxxxx001010xxxxxxxxxx fadd float_reg0 : float_reg5 float_reg16
Loading

0 comments on commit 1b72319

Please sign in to comment.