i#2626 fp/simd encode: Add support for vector and scalar FADD. (#2811)

This adds encoding and decoding support the scalar and vector versions of FADD. The patterns are auto-generated using the publicly available ISA spec from https://developer.arm.com/products/architecture/a-profile/exploration-tools For the scalar versions, the H, S and D registers are used to generate the correct encoding. For the vector versions, D and Q registers are used to denote the vector width. They also take an additional immediate as source operand, which denotes the element width (half, single or double). Issue: #2626
DynamoRIO · Feb 13, 2018 · 1b72319 · 1b72319
1 parent cb09eec
commit 1b72319
Show file tree

Hide file tree

Showing 9 changed files with 960 additions and 577 deletions.
diff --git a/api/docs/bt.dox b/api/docs/bt.dox
@@ -327,6 +327,19 @@ passes an \c instrlist_t of all Level 3 \c instr_t's, for simplicity.
   <tr><td><table width=100% bgcolor="#0000C0" cellspacing=0 cellpadding=1 border=0>
   <tr><td></td></tr></table></td></tr></table></td></tr></table>
 \endhtmlonly
+
+********************
+\subsection sec_IR_AArch64 AArch64 IR Variations
+
+DynamoRIO's IR representation of AArch64 NEON instructions uses an additional
+immediate source operand to denote the width of the vector elements. The immediates
+take the values #FSZ_HALF, #FSZ_DOUBLE, and #FSZ_SINGLE for floating point instructions.
+This is different from AArch64 assembly, where the element width is part of the
+vector register operand. For example, floating point vector addition of two vectors
+with 2 double elements is represented in assembly by
+\code fadd v9.2d, v30.2d, v9.2d \endcode and in IR by
+\code fadd   %q30 %q9 $0x03 -> %q9 \endcode.
+
 \section sec_events_bt Events
 
 The core of a client's interaction with DynamoRIO occurs through <em>

diff --git a/core/arch/aarch64/codec.c b/core/arch/aarch64/codec.c
@@ -464,18 +464,18 @@ encode_opnd_adr_page(int scale, byte *pc, opnd_t opnd, OUT uint *enc_out,
     return false;
 }
 
-/* dq_plus: used for dq0, dq0p1, dq0p2, dq0p3 */
+/* dq_plus: used for dq0, dq5, dq16, dq0p1, dq0p2, dq0p3 */
 
 static inline bool
-decode_opnd_dq_plus(int add, int qpos, uint enc, OUT opnd_t *opnd)
+decode_opnd_dq_plus(int add, int rpos, int qpos, uint enc, OUT opnd_t *opnd)
 {
     *opnd = opnd_create_reg((TEST(1U << qpos, enc) ? DR_REG_Q0 : DR_REG_D0) +
-                            (extract_uint(enc, 0, 5) + add) % 32);
+                            (extract_uint(enc, rpos, rpos+5) + add) % 32);
     return true;
 }
 
 static inline bool
-encode_opnd_dq_plus(int add, int qpos, opnd_t opnd, OUT uint *enc_out)
+encode_opnd_dq_plus(int add, int rpos, int qpos, opnd_t opnd, OUT uint *enc_out)
 {
     uint num;
     bool q;
@@ -485,7 +485,7 @@ encode_opnd_dq_plus(int add, int qpos, opnd_t opnd, OUT uint *enc_out)
     num = opnd_get_reg(opnd) - (q ? DR_REG_Q0 : DR_REG_D0);
     if (num >= 32)
         return false;
-    *enc_out = (num - add) % 32 | (uint)q << qpos;
+    *enc_out = ((num - add) % 32) << rpos | (uint)q << qpos;
     return true;
 }
 
@@ -917,55 +917,83 @@ encode_opnd_d10(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
 static inline bool
 decode_opnd_dq0(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
 {
-    return decode_opnd_dq_plus(0, 30, enc, opnd);
+    return decode_opnd_dq_plus(0, 0, 30, enc, opnd);
 }
 
 static inline bool
 encode_opnd_dq0(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
 {
-    return encode_opnd_dq_plus(0, 30, opnd, enc_out);
+    return encode_opnd_dq_plus(0, 0, 30, opnd, enc_out);
+}
+
+/* dq5: D/Q register at bit position 5; bit 30 selects Q reg */
+
+static inline bool
+decode_opnd_dq5(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
+{
+    return decode_opnd_dq_plus(0, 5, 30, enc, opnd);
+}
+
+static inline bool
+encode_opnd_dq5(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
+{
+    return encode_opnd_dq_plus(0, 5, 30, opnd, enc_out);
+}
+
+/* dq16: D/Q register at bit position 16; bit 30 selects Q reg */
+
+static inline bool
+decode_opnd_dq16(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
+{
+    return decode_opnd_dq_plus(0, 16, 30, enc, opnd);
+}
+
+static inline bool
+encode_opnd_dq16(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
+{
+    return encode_opnd_dq_plus(0, 16, 30, opnd, enc_out);
 }
 
 /* dq0p1: as dq0 but add 1 mod 32 to reg number */
 
 static inline bool
 decode_opnd_dq0p1(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
 {
-    return decode_opnd_dq_plus(1, 30, enc, opnd);
+    return decode_opnd_dq_plus(1, 0, 30, enc, opnd);
 }
 
 static inline bool
 encode_opnd_dq0p1(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
 {
-    return encode_opnd_dq_plus(1, 30, opnd, enc_out);
+    return encode_opnd_dq_plus(1, 0, 30, opnd, enc_out);
 }
 
 /* dq0p2: as dq0 but add 2 mod 32 to reg number */
 
 static inline bool
 decode_opnd_dq0p2(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
 {
-    return decode_opnd_dq_plus(2, 30, enc, opnd);
+    return decode_opnd_dq_plus(2, 0, 30, enc, opnd);
 }
 
 static inline bool
 encode_opnd_dq0p2(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
 {
-    return encode_opnd_dq_plus(2, 30, opnd, enc_out);
+    return encode_opnd_dq_plus(2, 0, 30, opnd, enc_out);
 }
 
 /* dq0p3: as dq0 but add 3 mod 32 to reg number */
 
 static inline bool
 decode_opnd_dq0p3(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
 {
-    return decode_opnd_dq_plus(3, 30, enc, opnd);
+    return decode_opnd_dq_plus(3, 0, 30, enc, opnd);
 }
 
 static inline bool
 encode_opnd_dq0p3(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
 {
-    return encode_opnd_dq_plus(3, 30, opnd, enc_out);
+    return encode_opnd_dq_plus(3, 0, 30, opnd, enc_out);
 }
 
 /* ext: extend type, dr_extend_type_t */
@@ -1003,6 +1031,101 @@ encode_opnd_extam(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out
     return true;
 }
 
+static inline reg_id_t
+decode_float_reg(uint n, uint type, reg_id_t *reg)
+{
+    switch (type) {
+    case 3:
+        *reg = DR_REG_H0 + n;
+        return true;
+    case 0:
+        *reg = DR_REG_S0 + n;
+        return true;
+    case 1:
+        *reg = DR_REG_D0 + n;
+        return true;
+    default:
+        return false;
+    }
+}
+
+static inline bool
+decode_opnd_float_reg(int pos, uint enc, OUT opnd_t *opnd)
+{
+    reg_id_t reg;
+    if (!decode_float_reg(extract_uint(enc, pos, 5), extract_uint(enc, 22, 2), &reg))
+        return false;
+    *opnd = opnd_create_reg(reg);
+    return true;
+}
+
+static inline bool
+encode_opnd_float_reg(int pos, opnd_t opnd, OUT uint *enc_out)
+{
+    uint num;
+    uint type;
+
+    opnd_size_t size = OPSZ_NA;
+
+    if (!encode_vreg(&size, &num, opnd))
+        return false;
+
+    switch (size) {
+    case OPSZ_2:
+        type = 3;
+        break;
+    case OPSZ_4:
+        type = 0;
+        break;
+    case OPSZ_8:
+        type = 1;
+        break;
+    default:
+        return false;
+    }
+
+    *enc_out = type << 22 | num << pos;
+    return true;
+}
+
+static inline bool
+decode_opnd_float_reg0(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
+{
+    return decode_opnd_float_reg(0, enc, opnd);
+}
+
+static inline bool
+encode_opnd_float_reg0(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
+{
+    return encode_opnd_float_reg(0, opnd, enc_out);
+}
+
+static inline bool
+decode_opnd_float_reg5(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
+{
+    return decode_opnd_float_reg(5, enc, opnd);
+}
+
+static inline bool
+encode_opnd_float_reg5(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
+{
+    return encode_opnd_float_reg(5, opnd, enc_out);
+}
+
+static inline bool
+decode_opnd_float_reg16(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
+{
+    return decode_opnd_float_reg(16, enc, opnd);
+}
+
+static inline bool
+encode_opnd_float_reg16(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
+{
+    return encode_opnd_float_reg(16, opnd, enc_out);
+}
+
+
+
 /* h0: H register at bit position 0 */
 
 static inline bool
@@ -2549,6 +2672,64 @@ encode_opnds_tbz(byte *pc, instr_t *instr, uint enc, decode_info_t *di)
     return ENCFAIL;
 }
 
+/* Element size for vector floating point instructions. */
+
+/* fsz: Operand size for single and double precision encoding of floating point
+ * vector instructions. We need to convert the generic size operand to the right
+ * encoding bits. It only supports FSZ_SINGLE and FSZ_DOUBLE.
+ */
+static inline bool
+decode_opnd_fsz(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
+{
+    if (((enc >> 21) & 0x03) == 0x01) {
+        *opnd = opnd_create_immed_int(FSZ_SINGLE, OPSZ_2b);
+        return true;
+    }
+    if (((enc >> 21) & 0x03) == 0x03) {
+        *opnd = opnd_create_immed_int(FSZ_DOUBLE, OPSZ_2b);
+        return true;
+    }
+    return false;
+}
+
+static inline bool
+encode_opnd_fsz(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
+{
+    if (opnd_get_immed_int(opnd) == FSZ_SINGLE) {
+        *enc_out = 0x01 << 21;
+        return true;
+    }
+    if (opnd_get_immed_int(opnd) == FSZ_DOUBLE) {
+        *enc_out = 0x03 << 21;
+        return true;
+    }
+    return false;
+}
+
+/* fsz16: Operand size for half precision encoding of floating point vector
+ * instructions. We need to convert the generic size operand to the right
+ * encoding bits. It only supports FSZ_HALF.
+ */
+static inline bool
+decode_opnd_fsz16(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
+{
+    if (((enc >> 21) & 0x03) == 0x02) {
+        *opnd = opnd_create_immed_int(FSZ_HALF, OPSZ_2b);
+        return true;
+    }
+    return false;
+}
+
+static inline bool
+encode_opnd_fsz16(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
+{
+    if (opnd_get_immed_int(opnd) == FSZ_HALF) {
+        *enc_out = 0x02 << 21;
+        return true;
+    }
+    return false;
+}
+
 /******************************************************************************/
 
 /* Include automatically generated decoder and encoder. */

diff --git a/core/arch/aarch64/codec.txt b/core/arch/aarch64/codec.txt
@@ -58,6 +58,9 @@
 ----------------------------xxxx  nzcv       # flag bit specifier for CCMN, CCMP
 ---------------------------xxxxx  b0         # B register
 ---------------------------xxxxx  h0         # H register
+--------xx-----------------xxxxx  float_reg0 # H, S or D register including type for FP instruction
+--------xx------------xxxxx-----  float_reg5 # H, S or D register including type for FP instruction
+--------xx-xxxxx----------------  float_reg16 # H, S or D register including type for FP instruction
 ---------------------------xxxxx  s0         # S register
 ---------------------------xxxxx  d0         # D register
 ---------------------------xxxxx  q0         # Q register
@@ -128,6 +131,8 @@
 ??---?--xxxxxxxxxxxxxxxxxxx-----  memlit     # load literal, gets size from 31:30 and 26
 -x------------------------------  index3     # index of D subreg in Q: 0-1
 -x-------------------------xxxxx  dq0        # Q register if bit 30 is set, else D
+-x--------------------xxxxx-----  dq5        # Q register if bit 30 is set, else D
+-x---------xxxxx----------------  dq16       # Q register if bit 30 is set, else D
 -x-------------------------xxxxx  dq0p1      # ... add 1
 -x-------------------------xxxxx  dq0p2      # ... add 2
 -x-------------------------xxxxx  dq0p3      # ... add 3
@@ -146,8 +151,10 @@ x---------------------xxxxx-----  wx5        # W/X register (or WZR/XZR)
 x---------------------xxxxx-----  wx5sp      # W/X register or WSP/XSP
 x----------------xxxxx----------  wx10       # W/X register (or WZR/XZR)
 x----------xxxxx----------------  wx16       # W/X register (or WZR/XZR)
-
+---------xx---------------------  fsz        # element size of FP vector reg (single (0x1) and double (0x3) encoding)
+---------xx---------------------  fsz16      # element size of FP vector reg (half (0x2) encoding)
 ################################################################################
+
 # Instruction patterns
 
 # The syntax here is: pattern opcode opndtype* : opndtype*
@@ -939,4 +946,9 @@ x101101011000000000101xxxxxxxxxx  cls     wx0 : wx5
 
 # Data Processing - Scalar Floating-Point and Advanced SIMD
 
-# FIXME i#1569: Add: more data processing
+# FADD (vector)
+0x0011100x1xxxxx110101xxxxxxxxxx     fadd dq0 : dq5 dq16 fsz
+0x001110010xxxxx000101xxxxxxxxxx     fadd dq0 : dq5 dq16 fsz16
+
+# FADD (scalar)
+00011110xx1xxxxx001010xxxxxxxxxx fadd float_reg0 : float_reg5 float_reg16