i#4848 AArch64 Decoder: Add FCVT instructions (#4855)

This implements: - Scalar floating-point - Vector floating-point - Scalar floating-point to GPR for FCVTAS, FCVTNS, FCVTPS, FCVTPU, and FCVTZS. Issue: #4848, #2626
DynamoRIO · Apr 19, 2021 · 23715da · 23715da
1 parent e9cda44
commit 23715da
Show file tree

Hide file tree

Showing 5 changed files with 602 additions and 27 deletions.
diff --git a/core/ir/aarch64/codec.txt b/core/ir/aarch64/codec.txt
@@ -1119,19 +1119,60 @@ x101101011000000000101xxxxxxxxxx  cls     wx0 : wx5
 0001111011100010010000xxxxxxxxxx     fcvt      s0 : h5
 0001111011100010110000xxxxxxxxxx     fcvt      d0 : h5
 
-# FCVTZU (scalar, integer) FP to GPR reg
+# Floating-point convert (scalar)
+0001111000100100000000xxxxxxxxxx     fcvtas    w0 : s5
+1001111000100100000000xxxxxxxxxx     fcvtas    x0 : s5
+0001111001100100000000xxxxxxxxxx     fcvtas    w0 : d5
+1001111001100100000000xxxxxxxxxx     fcvtas    x0 : d5
+0001111000100000000000xxxxxxxxxx     fcvtns    w0 : s5
+1001111000100000000000xxxxxxxxxx     fcvtns    x0 : s5
+0001111001100000000000xxxxxxxxxx     fcvtns    w0 : d5
+1001111001100000000000xxxxxxxxxx     fcvtns    x0 : d5
+0001111000101000000000xxxxxxxxxx     fcvtps    w0 : s5
+1001111000101000000000xxxxxxxxxx     fcvtps    x0 : s5
+0001111001101000000000xxxxxxxxxx     fcvtps    w0 : d5
+1001111001101000000000xxxxxxxxxx     fcvtps    x0 : d5
+0001111000101001000000xxxxxxxxxx     fcvtpu    w0 : s5
+1001111000101001000000xxxxxxxxxx     fcvtpu    x0 : s5
+0001111001101001000000xxxxxxxxxx     fcvtpu    w0 : d5
+1001111001101001000000xxxxxxxxxx     fcvtpu    x0 : d5
+
+# Floating-point convert (vector) (scalar single-precision and double-precision)
+0101111000100001110010xxxxxxxxxx     fcvtas    s0 : s5
+0101111001100001110010xxxxxxxxxx     fcvtas    d0 : d5
+0101111000100001101010xxxxxxxxxx     fcvtns    s0 : s5
+0101111001100001101010xxxxxxxxxx     fcvtns    d0 : d5
+0101111010100001101010xxxxxxxxxx     fcvtps    s0 : s5
+0101111011100001101010xxxxxxxxxx     fcvtps    d0 : d5
+0111111010100001101010xxxxxxxxxx     fcvtpu    s0 : s5
+0111111011100001101010xxxxxxxxxx     fcvtpu    d0 : d5
+
+# Floating-point convert (vector) (vector single-precision and double-precision)
+0x0011100x100001110010xxxxxxxxxx     fcvtas    dq0 : dq5 sd_sz
+0x0011100x100001101010xxxxxxxxxx     fcvtns    dq0 : dq5 sd_sz
+0x0011101x100001101010xxxxxxxxxx     fcvtps    dq0 : dq5 sd_sz
+0x1011101x100001101010xxxxxxxxxx     fcvtpu    dq0 : dq5 sd_sz
+
+# Floating-point convert (scalar, integer)
+0001111000111000000000xxxxxxxxxx     fcvtzs    w0 : s5
+1001111000111000000000xxxxxxxxxx     fcvtzs    x0 : s5
+0001111001111000000000xxxxxxxxxx     fcvtzs    w0 : d5
+1001111001111000000000xxxxxxxxxx     fcvtzs    x0 : d5
 0001111000111001000000xxxxxxxxxx     fcvtzu    w0 : s5
 1001111000111001000000xxxxxxxxxx     fcvtzu    x0 : s5
 0001111001111001000000xxxxxxxxxx     fcvtzu    w0 : d5
 1001111001111001000000xxxxxxxxxx     fcvtzu    x0 : d5
 
-# FCVTZU (vector, integer)
-0x1011101x100001101110xxxxxxxxxx     fcvtzu    dq0 : dq5 sd_sz
-
-# FCVTZU (Scalar single precision and double-precision)
+# Floating-point convert (vector, integer) (vector single-precision and double-precision)
+0101111010100001101110xxxxxxxxxx     fcvtzs    s0 : s5
+0101111011100001101110xxxxxxxxxx     fcvtzs    d0 : d5
 0111111010100001101110xxxxxxxxxx     fcvtzu    s0 : s5
 0111111011100001101110xxxxxxxxxx     fcvtzu    d0 : d5
 
+# Floating-point convert (vector, integer) (scalar single-precision and double-precision)
+0x0011101x100001101110xxxxxxxxxx     fcvtzs    dq0 : dq5 sd_sz
+0x1011101x100001101110xxxxxxxxxx     fcvtzu    dq0 : dq5 sd_sz
+
 # Floating-point data-processing (2 source)
 00011110xx1xxxxx000010xxxxxxxxxx     fmul      float_reg0 : float_reg5 float_reg16
 00011110xx1xxxxx000110xxxxxxxxxx     fdiv      float_reg0 : float_reg5 float_reg16

diff --git a/core/ir/aarch64/instr_create_api.h b/core/ir/aarch64/instr_create_api.h
@@ -1694,11 +1694,67 @@ enum {
     instr_create_1dst_2src(dc, OP_bif, Rd, Rm, Rn)
 
 /**
- * Creates a FCVTZU vector instruction.
- * \param dc      The void * dcontext used to allocate memory for the instr_t.
+ * Creates an FCVTAS vector instruction.
+ * \param dc      The void * dcontext used to allocate memory for the #instr_t.
+ * \param Rd      The output register.
+ * \param Rm      The input vector register.
+ * \param width   Immediate int of the vector element width. Must be #OPND_CREATE_SINGLE()
+ * or #OPND_CREATE_DOUBLE().
+ */
+#define INSTR_CREATE_fcvtas_vector(dc, Rd, Rm, width) \
+    instr_create_1dst_2src(dc, OP_fcvtas, Rd, Rm, width)
+
+/**
+ * Creates an FCVTNS vector instruction.
+ * \param dc      The void * dcontext used to allocate memory for the #instr_t.
+ * \param Rd      The output register.
+ * \param Rm      The first input register.
+ * \param width   Immediate int of the vector element width. Must be #OPND_CREATE_SINGLE()
+ * or #OPND_CREATE_DOUBLE().
+ */
+#define INSTR_CREATE_fcvtns_vector(dc, Rd, Rm, width) \
+    instr_create_1dst_2src(dc, OP_fcvtns, Rd, Rm, width)
+
+/**
+ * Creates an FCVTPS vector instruction.
+ * \param dc      The void * dcontext used to allocate memory for the #instr_t.
+ * \param Rd      The output register.
+ * \param Rm      The first input register.
+ * \param width   Immediate int of the vector element width. Must be #OPND_CREATE_SINGLE()
+ * or #OPND_CREATE_DOUBLE().
+ */
+#define INSTR_CREATE_fcvtps_vector(dc, Rd, Rm, width) \
+    instr_create_1dst_2src(dc, OP_fcvtps, Rd, Rm, width)
+
+/**
+ * Creates an FCVTPU vector instruction.
+ * \param dc      The void * dcontext used to allocate memory for the #instr_t.
+ * \param Rd      The output register.
+ * \param Rm      The first input register.
+ * \param width   Immediate int of the vector element width. Must be #OPND_CREATE_SINGLE()
+ * or #OPND_CREATE_DOUBLE().
+ */
+#define INSTR_CREATE_fcvtpu_vector(dc, Rd, Rm, width) \
+    instr_create_1dst_2src(dc, OP_fcvtpu, Rd, Rm, width)
+
+/**
+ * Creates an FCVTZS vector instruction.
+ * \param dc      The void * dcontext used to allocate memory for the #instr_t.
+ * \param Rd      The output register.
+ * \param Rm      The first input register.
+ * \param width   Immediate int of the vector element width. Must be #OPND_CREATE_SINGLE()
+ * or #OPND_CREATE_DOUBLE().
+ */
+#define INSTR_CREATE_fcvtzs_vector(dc, Rd, Rm, width) \
+    instr_create_1dst_2src(dc, OP_fcvtzs, Rd, Rm, width)
+
+/**
+ * Creates an FCVTZU vector instruction.
+ * \param dc      The void * dcontext used to allocate memory for the #instr_t.
  * \param Rd      The output register.
  * \param Rm      The first input register.
- * \param width   The input element vector width.
+ * \param width   Immediate int of the vector element width. Must be #OPND_CREATE_SINGLE()
+ * or #OPND_CREATE_DOUBLE().
  */
 #define INSTR_CREATE_fcvtzu_vector(dc, Rd, Rm, width) \
     instr_create_1dst_2src(dc, OP_fcvtzu, Rd, Rm, width)
@@ -1738,16 +1794,61 @@ enum {
 #define INSTR_CREATE_fsqrt_scalar(dc, Rd, Rm) instr_create_1dst_1src(dc, OP_fsqrt, Rd, Rm)
 
 /**
- * Creates a FCVT floating point instruction.
- * \param dc      The void * dcontext used to allocate memory for the instr_t.
- * \param Rd      The output register.
- * \param Rm      The first input register.
+ * Creates an FCVT floating point instruction.
+ * \param dc      The void * dcontext used to allocate memory for the #instr_t.
+ * \param Rd      Floating-point or integer output register.
+ * \param Rm      Floating-point input register.
  */
 #define INSTR_CREATE_fcvt_scalar(dc, Rd, Rm) instr_create_1dst_1src(dc, OP_fcvt, Rd, Rm)
 
 /**
- * Creates a FCVTZU floating point instruction.
- * \param dc      The void * dcontext used to allocate memory for the instr_t.
+ * Creates an FCVTAS floating point instruction.
+ * \param dc      The void * dcontext used to allocate memory for the #instr_t.
+ * \param Rd      Floating-point or integer output register.
+ * \param Rm      Floating-point input register.
+ */
+#define INSTR_CREATE_fcvtas_scalar(dc, Rd, Rm) \
+    instr_create_1dst_1src(dc, OP_fcvtas, Rd, Rm)
+
+/**
+ * Creates an FCVTNS floating point instruction.
+ * \param dc      The void * dcontext used to allocate memory for the #instr_t.
+ * \param Rd      Floating-point or integer output register.
+ * \param Rm      Floating-point input register.
+ */
+#define INSTR_CREATE_fcvtns_scalar(dc, Rd, Rm) \
+    instr_create_1dst_1src(dc, OP_fcvtns, Rd, Rm)
+
+/**
+ * Creates an FCVTPS floating point instruction.
+ * \param dc      The void * dcontext used to allocate memory for the #instr_t.
+ * \param Rd      Floating-point or integer output register.
+ * \param Rm      Floating-point input register.
+ */
+#define INSTR_CREATE_fcvtps_scalar(dc, Rd, Rm) \
+    instr_create_1dst_1src(dc, OP_fcvtps, Rd, Rm)
+
+/**
+ * Creates an FCVTPU floating point instruction.
+ * \param dc      The void * dcontext used to allocate memory for the #instr_t.
+ * \param Rd      Floating-point or integer output register.
+ * \param Rm      Floating-point input register.
+ */
+#define INSTR_CREATE_fcvtpu_scalar(dc, Rd, Rm) \
+    instr_create_1dst_1src(dc, OP_fcvtpu, Rd, Rm)
+
+/**
+ * Creates an FCVTZS floating point instruction.
+ * \param dc      The void * dcontext used to allocate memory for the #instr_t.
+ * \param Rd      Floating-point or integer output register.
+ * \param Rm      Floating-point input register.
+ */
+#define INSTR_CREATE_fcvtzs_scalar(dc, Rd, Rm) \
+    instr_create_1dst_1src(dc, OP_fcvtzs, Rd, Rm)
+
+/**
+ * Creates an FCVTZU floating point instruction.
+ * \param dc      The void * dcontext used to allocate memory for the #instr_t.
  * \param Rd      Floating-point or integer output register.
  * \param Rm      Floating-point input register.
  */

diff --git a/suite/tests/api/dis-a64.txt b/suite/tests/api/dis-a64.txt
@@ -2095,19 +2095,6 @@ fd7fffff : ldr    d31, [sp,#32760]        : ldr    +0x7ff8(%sp)[8byte] -> %d31
 1e67c26b : frinti d11, d19                          : frinti %d19 -> %d11
 1e27c26b : frinti s11, s19                          : frinti %s19 -> %s11
 1ee7c26b : frinti h11, h19                          : frinti %h19 -> %h11
-1e624117 : fcvt s23, d8                             : fcvt   %d8 -> %s23
-1e63c1fd : fcvt h29, d15                            : fcvt   %d15 -> %h29
-1ee2431c : fcvt s28, h24                            : fcvt   %h24 -> %s28
-1ee2c002 : fcvt d2, h0                              : fcvt   %h0 -> %d2
-1e390121 : fcvtzu w1, s9                            : fcvtzu %s9 -> %w1
-9e39012b : fcvtzu x11, s9                           : fcvtzu %s9 -> %x11
-1e7901a7 : fcvtzu w7, d13                           : fcvtzu %d13 -> %w7
-9e790055 : fcvtzu x21, d2                           : fcvtzu %d2 -> %x21
-2ea1b829 : fcvtzu v9.2s, v1.2s                      : fcvtzu %d1 $0x02 -> %d9
-6ea1b910 : fcvtzu v16.4s, v8.4s                     : fcvtzu %q8 $0x02 -> %q16
-6ee1b803 : fcvtzu v3.2d, v0.2d                      : fcvtzu %q0 $0x03 -> %q3
-7ea1b929 : fcvtzu s9, s9                            : fcvtzu %s9 -> %s9
-7ee1b841 : fcvtzu d1, d2                            : fcvtzu %d2 -> %d1
 
 # Floating-point data-processing (2 source)
 1e7e0b62 : fmul d2, d27, d30                        : fmul   %d27 %d30 -> %d2
@@ -2152,6 +2139,66 @@ fd7fffff : ldr    d31, [sp,#32760]        : ldr    +0x7ff8(%sp)[8byte] -> %d31
 1f3789e4 : fnmsub s4, s15, s23, s2                  : fnmsub %s15 %s23 %s2 -> %s4
 1ff789e4 : fnmsub h4, h15, h23, h2                  : fnmsub %h15 %h23 %h2 -> %h4
 
+# Floating-point conversion
+1e624117 : fcvt s23, d8                             : fcvt   %d8 -> %s23
+1e63c1fd : fcvt h29, d15                            : fcvt   %d15 -> %h29
+1ee2431c : fcvt s28, h24                            : fcvt   %h24 -> %s28
+1ee2c002 : fcvt d2, h0                              : fcvt   %h0 -> %d2
+1e240034 : fcvtas w20, s1                           : fcvtas %s1 -> %w20
+9e240067 : fcvtas x7, s3                            : fcvtas %s3 -> %x7
+1e6402c0 : fcvtas w0, d22                           : fcvtas %d22 -> %w0
+9e640015 : fcvtas x21, d0                           : fcvtas %d0 -> %x21
+0e21c827 : fcvtas v7.2s, v1.2s                      : fcvtas %d1 $0x02 -> %d7
+4e21c920 : fcvtas v0.4s, v9.4s                      : fcvtas %q9 $0x02 -> %q0
+4e61cba5 : fcvtas v5.2d, v29.2d                     : fcvtas %q29 $0x03 -> %q5
+5e21cbde : fcvtas s30, s30                          : fcvtas %s30 -> %s30
+5e61c987 : fcvtas d7, d12                           : fcvtas %d12 -> %d7
+1e200115 : fcvtns w21, s8                           : fcvtns %s8 -> %w21
+9e2002ae : fcvtns x14, s21                          : fcvtns %s21 -> %x14
+1e6003a7 : fcvtns w7, d29                           : fcvtns %d29 -> %w7
+9e600229 : fcvtns x9, d17                           : fcvtns %d17 -> %x9
+0e21a925 : fcvtns v5.2s, v9.2s                      : fcvtns %d9 $0x02 -> %d5
+4e21aa61 : fcvtns v1.4s, v19.4s                     : fcvtns %q19 $0x02 -> %q1
+4e61a971 : fcvtns v17.2d, v11.2d                    : fcvtns %q11 $0x03 -> %q17
+5e21a849 : fcvtns s9, s2                            : fcvtns %s2 -> %s9
+5e61a8f1 : fcvtns d17, d7                           : fcvtns %d7 -> %d17
+1e2800f3 : fcvtps w19, s7                           : fcvtps %s7 -> %w19
+9e280085 : fcvtps x5, s4                            : fcvtps %s4 -> %x5
+1e680148 : fcvtps w8, d10                           : fcvtps %d10 -> %w8
+9e680249 : fcvtps x9, d18                           : fcvtps %d18 -> %x9
+0ea1a926 : fcvtps v6.2s, v9.2s                      : fcvtps %d9 $0x02 -> %d6
+4ea1aa84 : fcvtps v4.4s, v20.4s                     : fcvtps %q20 $0x02 -> %q4
+4ee1a80f : fcvtps v15.2d, v0.2d                     : fcvtps %q0 $0x03 -> %q15
+5ea1a89d : fcvtps s29, s4                           : fcvtps %s4 -> %s29
+5ee1aa0c : fcvtps d12, d16                          : fcvtps %d16 -> %d12
+1e290041 : fcvtpu w1, s2                            : fcvtpu %s2 -> %w1
+9e29016e : fcvtpu x14, s11                          : fcvtpu %s11 -> %x14
+1e690044 : fcvtpu w4, d2                            : fcvtpu %d2 -> %w4
+9e690029 : fcvtpu x9, d1                            : fcvtpu %d1 -> %x9
+2ea1ab01 : fcvtpu v1.2s, v24.2s                     : fcvtpu %d24 $0x02 -> %d1
+6ea1aab6 : fcvtpu v22.4s, v21.4s                    : fcvtpu %q21 $0x02 -> %q22
+6ee1a96b : fcvtpu v11.2d, v11.2d                    : fcvtpu %q11 $0x03 -> %q11
+7ea1aabb : fcvtpu s27, s21                          : fcvtpu %s21 -> %s27
+7ee1aa4c : fcvtpu d12 -> d18                        : fcvtpu %d18 -> %d12
+1e38010b : fcvtzs w11, s8                           : fcvtzs %s8 -> %w11
+9e38006e : fcvtzs x14, s3                           : fcvtzs %s3 -> %x14
+1e780380 : fcvtzs w0, d28                           : fcvtzs %d28 -> %w0
+9e780029 : fcvtzs x9, d1                            : fcvtzs %d1 -> %x9
+0ea1b903 : fcvtzs v3.2s, v8.2s                      : fcvtzs %d8 $0x02 -> %d3
+4ea1baa9 : fcvtzs v9.4s, v21.4s                     : fcvtzs %q21 $0x02 -> %q9
+4ee1b84b : fcvtzs v11.2d, v2.2d                     : fcvtzs %q2 $0x03 -> %q11
+5ea1b863 : fcvtzs s3, s3                            : fcvtzs %s3 -> %s3
+5ee1b8f1 : fcvtzs d17, d7                           : fcvtzs %d7 -> %d17
+1e390121 : fcvtzu w1, s9                            : fcvtzu %s9 -> %w1
+9e39012b : fcvtzu x11, s9                           : fcvtzu %s9 -> %x11
+1e7901a7 : fcvtzu w7, d13                           : fcvtzu %d13 -> %w7
+9e790055 : fcvtzu x21, d2                           : fcvtzu %d2 -> %x21
+2ea1b829 : fcvtzu v9.2s, v1.2s                      : fcvtzu %d1 $0x02 -> %d9
+6ea1b910 : fcvtzu v16.4s, v8.4s                     : fcvtzu %q8 $0x02 -> %q16
+6ee1b803 : fcvtzu v3.2d, v0.2d                      : fcvtzu %q0 $0x03 -> %q3
+7ea1b929 : fcvtzu s9, s9                            : fcvtzu %s9 -> %s9
+7ee1b841 : fcvtzu d1, d2                            : fcvtzu %d2 -> %d1
+
 # SVE bitwise logical operations (predicated)
 04181da2 : orr z2.b, p7/m, z2.b, z13.b              : orr    %p7 %z2 %z13 $0x00 -> %z2
 04581da2 : orr z2.h, p7/m, z2.h, z13.h              : orr    %p7 %z2 %z13 $0x01 -> %z2