i#4408 Add AArch64 immediate FMOV instructions (#4467)

Includes addition of: - Double precision floating-point operands for Linux. - HAVE_HALF_FLOAT feature flag for half-precision FP.
DynamoRIO · Oct 16, 2020 · 3a408a0 · 3a408a0
1 parent 5cbe811
commit 3a408a0
Show file tree

Hide file tree

Showing 14 changed files with 714 additions and 7 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1168,6 +1168,13 @@ else ()
   set(HAVE_RSEQ OFF)
 endif ()
 
+# Currently only AArch64 targets supported for half-precision FP.
+if (DR_HOST_AARCH64)
+  set(HAVE_HALF_FLOAT ON)
+else ()
+  set(HAVE_HALF_FLOAT OFF)
+endif ()
+
 ###########################################################################
 
 # Issue 20: cross-arch execve depends on these being distinct and not

diff --git a/api/docs/release.dox b/api/docs/release.dox
@@ -170,6 +170,9 @@ Further non-compatibility-affecting changes include:
    querying other-bitwidth client registration.
  - Added a new drcachesim option \p -max_global_trace_refs for specifying a global
    trace size limit that does not terminate the process.
+ - Added opnd_create_immed_double(), opnd_get_immed_double() and
+   opnd_is_immed_double() to enable the creation and handling of double
+   precision floating-point operands.
 
 **************************************************
 <hr>

diff --git a/core/arch/loadtoconst.c b/core/arch/loadtoconst.c
@@ -1601,6 +1601,7 @@ opnd_replace_reg_with_val(opnd_t *opnd, int old_reg, int val)
     case NULL_kind:
     case IMMED_INTEGER_kind:
     case IMMED_FLOAT_kind:
+    case IMMED_DOUBLE_kind:
     case PC_kind:
     case FAR_PC_kind:
     case INSTR_kind:

diff --git a/core/ir/aarch64/codec.c b/core/ir/aarch64/codec.c
@@ -38,6 +38,7 @@
  * functions next to each other.
  */
 
+#include <stdint.h>
 #include "../globals.h"
 #include "arch.h"
 #include "decode.h"
@@ -1556,6 +1557,146 @@ encode_opnd_cond(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
     return encode_opnd_int(12, 4, false, 0, 0, opnd, enc_out);
 }
 
+/* fpimm8: immediate operand for SIMD fmov */
+
+static inline bool
+decode_opnd_fpimm8(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
+{
+    /* See Arm Architecture Reference Manual
+     *
+     * Immediate is encoded as 8 bits. Bits 5->9 and 16->18. LSB is bit 5:
+     * imm8 = a:b:c:d:e:f:g:h (LSB)
+     *
+     * Half-precision (v8.2)
+     * --------------
+     *
+     * imm16 = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>,2):imm8<5:0>:Zeros(6);
+     *         a:~b:bb:cdefgh:000000
+     *
+     * datasize = if Q == '1' then 128 else 64;
+     * imm = Replicate(imm16, datasize DIV 16);
+     *     = imm16:imm16:imm16:imm16                         (Q=0 -> 64)
+     *     = imm16:imm16:imm16:imm16:imm16:imm16:imm16:imm16 (Q=1 -> 128)
+     *
+     * Single-precision (TODO)
+     * ----------------
+     * Assume cmode = 1111 and op = 0
+     *
+     * imm32 = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>,5):imm8<5:0>:Zeros(19);
+     *         a:~b:bbbbb:cdefgh:0000000000000000000
+     *
+     * imm64 = Replicate(imm32, 2);
+     *       = a:~b:bbbbb:cdefgh:0000000000000000000 a:~b:bbbbb:cdefgh:0000000000000000000
+     *
+     * datasize = if Q == '1' then 128 else 64;
+     * imm = Replicate(imm64, datasize DIV 64);
+     *     = imm64       (Q=0)
+     *     = imm64:imm64 (Q=1)
+     */
+    union {
+#ifdef HAVE_HALF_FLOAT
+        __fp16 f;
+        uint16_t i;
+#else
+        /* For platforms on which 16 bit (half-precision) FP is not yet available. */
+        float f;
+        uint32_t i;
+#endif
+    } fpv;
+
+    int abc = extract_uint(enc, 16, 3);
+    int defgh = extract_uint(enc, 5, 5);
+
+    uint a = (abc & 0x4);
+    uint b = (abc & 0x2);
+    uint not_b = b == 0 ? 1 : 0;
+
+#ifdef HAVE_HALF_FLOAT
+    uint bb = ((b == 0) ? 0 : 0x3);
+#else
+    uint bbbbb = ((b == 0) ? 0 : 0x1f);
+#endif
+
+    uint cdefgh = ((abc & 0x1) << 5) | (defgh & 0x1f);
+
+#ifdef HAVE_HALF_FLOAT
+    uint16_t imm16 = (a << 13) | (not_b << 14) | (bb << 12) | (cdefgh << 6);
+    fpv.i = imm16;
+#else
+    uint32_t imm32 = (a << 29) | (not_b << 30) | (bbbbb << 25) | (cdefgh << 19);
+    fpv.i = imm32;
+#endif
+    *opnd = opnd_create_immed_float(fpv.f);
+
+    return true;
+}
+
+static inline bool
+encode_opnd_fpimm8(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
+{
+    /* Based on the IEEE 754-2008 standard but with Arm-specific details that
+     * are left open by the standard. See Arm Architecture Reference Manual.
+     *
+     * Half-precision example
+     *   __   ________
+     * S/exp\/fraction\
+     *  _
+     * abbbcdefgh000000
+     * 0011110000000000 = 1.0
+     *    _
+     *   abbb cdef gh00 0000
+     * 0x8    0    0    0     a
+     * 0x1    0    0    0     b
+     * 0x0    8    0    0     c
+     * 0x0    7    c    0     defgh
+     */
+    union {
+#ifdef HAVE_HALF_FLOAT
+        __fp16 f;
+        uint16_t i;
+#else
+        /* For platforms on which 16 bit (half-precision) FP is not yet available. */
+        float f;
+        uint32_t i;
+#endif
+    } fpv;
+
+    if (!opnd_is_immed_float(opnd))
+        return false;
+
+    fpv.f = opnd_get_immed_float(opnd);
+#ifdef HAVE_HALF_FLOAT
+    uint16_t imm = fpv.i;
+    uint a = (imm & 0x8000);
+    uint b = (imm & 0x1000);
+    uint c = (imm & 0x800);
+    uint defgh = (imm & 0x7c0);
+
+    /* 3332 2222 2222 1111 1111 11
+     * 1098 7654 3210 9876 5432 1098 7654 3210
+     * ---- ---- ---- -abc ---- --de fgh- ----   immediate encoding
+     *          0x8000 |<-3|  | ||
+     *          0x1000  |<-5--| ||
+     *           0x800   |<--5--||
+     *           0x7c0           |>
+     */
+    *enc_out = (a << 3) | (b << 5) | (c << 5) | (defgh >> 1);
+#else
+    /* 3332 2222 2222 1111 1111 11
+     * 1098 7654 3210 9876 5432 1098 7654 3210
+     *  _
+     * abbb bbbc defg h000 0000 0000 0000 0000
+     */
+    uint32_t imm = fpv.i;
+    uint a = (imm & 0x80000000);
+    uint b = (imm & 0x10000000);
+    uint c = (imm & 0x1000000);
+    uint defgh = (imm & 0xf80000);
+    *enc_out = (a >> 13) | (b >> 11) | (c >> 8) | (defgh >> 14);
+#endif
+    return true;
+}
+
 /* sysops: immediate operand for SYS instruction which specifies SYS operations */
 
 static inline bool
@@ -2095,6 +2236,143 @@ encode_opnd_sd_sz(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out
     return false;
 }
 
+/* fpimm13: floating-point immediate for scalar fmov */
+
+static inline bool
+decode_opnd_fpimm13(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
+{
+    /*
+     * From the Architecture Reference Manual, 8 bit immediate abcdefgh maps to
+     * floats:
+     *
+     * 3332 2222 2222 1111 1111 11
+     * 1098 7654 3210 9876 5432 1098 7654 3210
+     *  _                            abcd efgh <- 8 bit immediate mapped to
+     * abbb bbbc defg h000 0000 0000 0000 0000 <- 32 bit float
+     *
+     *   abcd efgh  Masks
+     * 0x1    0     a
+     * 0x4    0     b
+     * 0x2    0     c
+     * 0x1    F     defgh
+     */
+    if (extract_uint(enc, 22, 1) == 0) { /* 32 bits */
+        union {
+            float f;
+            uint32_t i;
+        } fpv;
+
+        uint32_t imm = extract_uint(enc, 13, 8);
+
+        uint32_t a = imm & 0x80;
+        uint32_t b = imm & 0x40;
+        uint32_t not_b = ((b == 0) ? 1 : 0);
+        uint32_t bbbbb = ((b == 0) ? 0 : 0x1f);
+        uint32_t c = imm & 0x20;
+        uint32_t defgh = imm & 0x1f;
+
+        uint32_t imm32 =
+            (a << 24) | (not_b << 30) | (bbbbb << 25) | (c << 19) | (defgh << 19);
+
+        fpv.i = imm32;
+        *opnd = opnd_create_immed_float(fpv.f);
+    } else { /* 64 bits */
+        /* 6666 5555 5555 5544 44444444 33333333 33322222 22221111 111111
+         * 3210 9876 5432 1098 76543210 98765432 10987654 32109876 54321098 76543210
+         *  _                                                               abcdefgh
+         * abbb bbbb bbcd efgh 00000000 00000000 00000000 00000000 00000000 00000000
+         */
+        union {
+            double d;
+            uint64_t i;
+        } fpv;
+
+        uint64_t imm = extract_uint(enc, 13, 8);
+
+        uint64_t a = imm & 0x80;
+        uint64_t b = imm & 0x40;
+        uint64_t not_b = ((b == 0) ? 1 : 0);
+        uint64_t bbbbbbbb = ((b == 0) ? 0 : 0xff);
+        uint64_t c = imm & 0x20;
+        uint64_t defgh = imm & 0x1f;
+
+        uint64_t imm64 =
+            (a << 56) | (not_b << 62) | (bbbbbbbb << 54) | (c << 48) | (defgh << 48);
+
+        fpv.i = imm64;
+        *opnd = opnd_create_immed_double(fpv.d);
+    }
+    return true;
+}
+
+static inline bool
+encode_opnd_fpimm13(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
+{
+    /*
+     * From the Architecture Reference Manual, 8 bit immediate abcdefgh maps to
+     * floats:
+     *
+     *   3332 2222 2222 1111 1111 11
+     *   1098 7654 3210 9876 5432 1098 7654 3210
+     *    _
+     *   abbb bbbc defg h000 0000 0000 0000 0000
+     * 0x8    0    0    0    0    0    0    0    a
+     * 0x1    0    0    0    0    0    0    0    b
+     * 0x0    1    0    0    0    0    0    0    c
+     * 0x0    0    f    8    0    0    0    0    defgh
+     */
+    if (opnd_is_immed_float(opnd)) {
+        ASSERT(extract_uint(enc, 22, 1) == 0); /* 32 bit floating point */
+        union {
+            float f;
+            uint32_t i;
+        } fpv;
+        fpv.f = opnd_get_immed_float(opnd);
+        uint32_t imm = fpv.i;
+
+        uint a = (imm & 0x80000000);
+        uint b = (imm & 0x10000000);
+        uint c = (imm & 0x01000000);
+        uint defgh = (imm & 0x00f80000);
+
+        /* 3332 2222 2222 1111 1111 11
+         * 1098 7654 3210 9876 5432 1098 7654 3210
+         * ---- ---- ---a bcde fgh- ---- ---- ----   immediate encoding
+         * |-----11---->|           0x80000000 a
+         *    |-----9---->|         0x10000000 b
+         *         |---6-->|        0x01000000 c
+         *           |--6-->|       0x00f80000 defgh
+         */
+        *enc_out = (a >> 11) | (b >> 9) | (c >> 6) | (defgh >> 6);
+    } else if (opnd_is_immed_double(opnd)) {
+        ASSERT(extract_uint(enc, 22, 1) == 1); /* 64 bit floating point */
+        /* 6666 5555 5555 5544 44444444 33333333 33322222 22221111 111111
+         * 3210 9876 5432 1098 76543210 98765432 10987654 32109876 54321098 76543210
+         *  _
+         * abbb bbbb bbcd efgh 00000000 00000000 00000000 00000000 00000000 00000000
+         *
+         * ---- ---- ---a bcde fgh----- -------- immediate encoding
+         */
+        union {
+            double d;
+            uint64_t i;
+        } fpv;
+        fpv.d = opnd_get_immed_double(opnd);
+        uint64_t imm = fpv.i;
+
+        uint64_t a = (imm & 0x8000000000000000);
+        uint64_t b = (imm & 0x1000000000000000);
+        uint64_t c = (imm & 0x0020000000000000);
+        uint64_t defgh = (imm & 0x001f000000000000);
+
+        *enc_out =
+            (((a >> 11) | (b >> 9) | (c >> 3) | (defgh >> 3)) & 0xffffffff00000000) >> 32;
+    } else
+        return false;
+
+    return true;
+}
+
 /* b_sz: Vector element width for SIMD instructions. */
 
 static inline bool

diff --git a/core/ir/aarch64/codec.txt b/core/ir/aarch64/codec.txt
@@ -55,7 +55,7 @@
 
 --------------------------------  impx30     # implicit X30 operand
 --------------------------------  lsl        # implicit LSL for ADD/MOV (immediate)
---------------------------------  h_sz      # element width of FP vector reg, used to
+--------------------------------  h_sz       # element width of FP vector reg, used to
                                              # distinguish FP16 and float/double encs
 ----------------------------xxxx  nzcv       # flag bit specifier for CCMN, CCMP
 ---------------------------xxxxx  w0         # W register (or WZR)
@@ -96,6 +96,7 @@
 -----------------xxxxx----------  q10        # Q register
 ----------------xxx-------------  ext        # extend type
 ----------------xxxx------------  cond       # condition for CCMN, CCMP
+-------------xxx------xxxxx-----  fpimm8     # floating-point immediate for vector fmov
 -------------xxxxxxxxxxxxxx-----  sysops     # immediate operands for SYS
 ------------xxxxxxxxxxxxxxx-----  sysreg     # operand of MRS
 -----------xxxxx----------------  ign16      # ignored reg field in load/store exclusive
@@ -127,6 +128,7 @@
 ---------x----------------------  imm12sh    # shift for ADD/SUB (immediate); '0x'
                                              # elements, depending on bit 22 (sz)
 ---------x----------------------  sd_sz      # element width of FP vector reg for single
+--------??-xxxxxxxx-------------  fpimm13    # floating-point immediate for scalar fmov
 --------xx----------------------  b_sz       # element width of a vector (8<<b_sz)
 --------xx----------------------  hs_sz      # element width of a vector (8<<hs_sz)
 --------xx----------------------  bhs_sz     # element width of a vector (8<<bhs_sz)
@@ -973,13 +975,21 @@ x101101011000000000101xxxxxxxxxx  cls     wx0 : wx5
 # Data Processing - Scalar Floating-Point and Advanced SIMD
 
 
+# FMOV immediate to scalar FP reg
+00011110111xxxxxxxx10000000xxxxx     fmov h0 : fpimm13 # Armv8.2
+00011110001xxxxxxxx10000000xxxxx     fmov s0 : fpimm13
+00011110011xxxxxxxx10000000xxxxx     fmov d0 : fpimm13
+
 # FMOV (general) GPR to FP reg
 0001111011100111000000xxxxxxxxxx     fmov h0 : w5 # Armv8.2
 0001111000100111000000xxxxxxxxxx     fmov s0 : w5
 1001111011100111000000xxxxxxxxxx     fmov h0 : x5 # Armv8.2
 1001111001100111000000xxxxxxxxxx     fmov d0 : x5
 1001111010101111000000xxxxxxxxxx     fmov q0 : x5 # only sets the bit top half of q0
 
+# FMOV immediate to vector reg
+0x00111100000xxx111111xxxxxxxxxx     fmov dq0 : fpimm8 h_sz # Armv8.2
+
 
 # Advanced SIMD three same (FP16)
 0x001110010xxxxx000001xxxxxxxxxx     fmaxnm    dq0 : dq5 dq16 h_sz