i#6238: Add categorization of x86 instruction mix and subcategories f…

…or FP category. (#6308) This PR extends #6237 by adding categorization for x86 instruction mix. It introduces subcategories like MATH, CONVERT, and MOVE for both x86 and AArch64. For instance, arithmetic floating-point operations will have DR_INSTR_CATEGORY_FP | DR_INSTR_CATEGORY_MATH category. Issue: #6238 Not all x86 opcodes are categorized nd such instructions are marked 'UNCATEGORIZED"
DynamoRIO · Sep 22, 2023 · d32cf34 · d32cf34
1 parent 78f1d3b
commit d32cf34
Show file tree

Hide file tree

Showing 11 changed files with 6,718 additions and 6,960 deletions.
diff --git a/api/docs/release.dox b/api/docs/release.dox
@@ -127,6 +127,11 @@ clients.
 The changes between version \DR_VERSION and 10.0.0 include the following compatibility
 changes:
  - Marked x86 rep instructions as predicated.
+ - The #dr_instr_category_t enum underwent changes to support new categories
+   such as STATE, MOVE, CONVERT, and MATH.
+   INT_MATH has been removed and replaced with MATH.
+   FP_MATH has been removed and replaced with FP|MATH.
+   The enumeration was organized in a different order, the old numbers become invalid
 
 Further non-compatibility-affecting changes include:
  - Added core-sharded analysis tool support where traces are sharded by

diff --git a/core/ir/aarch64/codec.c b/core/ir/aarch64/codec.c
@@ -9562,7 +9562,7 @@ encode_opnds_tbz(byte *pc, instr_t *instr, uint enc, decode_info_t *di)
 }
 
 static inline uint
-decode_load_store_category(uint enc)
+decode_load_store_category(uint encoding)
 {
     uint category = DR_INSTR_CATEGORY_OTHER;
     /* Calculation of category is based on C4.1 'A64 instruction set encoding'
@@ -9577,35 +9577,38 @@ decode_load_store_category(uint enc)
      *                        ------
      *                         opc
      */
-    uint op0 = BITS(enc, 31, 28);
-    uint opc = BITS(enc, 23, 22);
+    uint op0 = BITS(encoding, 31, 28);
+    uint opc = BITS(encoding, 23, 22);
     if ((op0 & 0x3) == 0x3) { /* xx11 */
-        if (BITS(enc, 10, 10) == 1 && BITS(enc, 21, 21) == 1)
+        if (BITS(encoding, 10, 10) == 1 && BITS(encoding, 21, 21) == 1)
             category = DR_INSTR_CATEGORY_LOAD;
-        else if (opc == 0 || (opc == 0x2 && BITS(enc, 26, 26) == 1))
+        else if (opc == 0 || (opc == 0x2 && BITS(encoding, 26, 26) == 1))
             category = DR_INSTR_CATEGORY_STORE;
         else
             category = DR_INSTR_CATEGORY_LOAD;
     } else if ((op0 & 0x3) == 0 || (op0 & 0x3) == 0x2) { /* xx00, xx10 */
-        category =
-            (BITS(enc, 22, 22) == 0) ? DR_INSTR_CATEGORY_STORE : DR_INSTR_CATEGORY_LOAD;
-        if ((op0 & 0xc) == 0 && BITS(enc, 26, 26) == 1)
-            category |= DR_INSTR_CATEGORY_SIMD;
+        category = (BITS(encoding, 22, 22) == 0) ? DR_INSTR_CATEGORY_STORE
+                                                 : DR_INSTR_CATEGORY_LOAD;
     } else { /* xx01 */
-        if (BITS(enc, 24, 24) == 0)
+        if (BITS(encoding, 24, 24) == 0)
             category = DR_INSTR_CATEGORY_LOAD;
-        else if (BITS(enc, 21, 21) == 0)
+        else if (BITS(encoding, 21, 21) == 0)
             category = (opc == 0) ? DR_INSTR_CATEGORY_STORE : DR_INSTR_CATEGORY_LOAD;
-        else if ((opc == 0x1 || opc == 0x3) && BITS(enc, 11, 10) == 0)
+        else if ((opc == 0x1 || opc == 0x3) && BITS(encoding, 11, 10) == 0)
             category = DR_INSTR_CATEGORY_LOAD;
         else
             category = DR_INSTR_CATEGORY_STORE;
     }
+
+    /* Load/Store operation with SIMD&FP register */
+    if (category != DR_INSTR_CATEGORY_OTHER && BITS(encoding, 26, 26) == 1)
+        category |= DR_INSTR_CATEGORY_SIMD | DR_INSTR_CATEGORY_FP;
+
     return category;
 }
 
-static inline bool
-decode_category(uint enc, instr_t *instr)
+static inline void
+decode_category(uint encoding, instr_t *instr)
 {
     int category = DR_INSTR_CATEGORY_OTHER;
     /* Calculation of category is based on C4.1 'A64 instruction set encoding'
@@ -9618,10 +9621,10 @@ decode_category(uint enc, instr_t *instr)
      *               op1
      */
 
-    uint op1 = BITS(enc, 28, 25);
-    if ((BITS(enc, 31, 31) == 1 && op1 == 0) || op1 == 0x2) /* SME || SVE */
+    uint op1 = BITS(encoding, 28, 25);
+    if ((BITS(encoding, 31, 31) == 1 && op1 == 0) || op1 == 0x2) /* SME || SVE */
         category = DR_INSTR_CATEGORY_SIMD;
-    else if (BITS(enc, 31, 31) == 0 && op1 == 0) /* op1 is 0 and 31 bit is 0 */
+    else if (BITS(encoding, 31, 31) == 0 && op1 == 0) /* op1 is 0 and 31 bit is 0 */
         category = DR_INSTR_CATEGORY_UNCATEGORIZED;
     else {
         /*                       op1 - xxxx
@@ -9637,28 +9640,56 @@ decode_category(uint enc, instr_t *instr)
         if ((op1 & 0x4) == 0) {       /* op1 is x0xx */
             if ((op1 & 0x8) != 0) {   /* op1 is not 00xx */
                 if ((op1 & 0x2) == 0) /* op1 is 100x, Data processing Immediate */
-                    category = DR_INSTR_CATEGORY_INT_MATH;
+                    category = DR_INSTR_CATEGORY_MATH;
                 else /* op1 is 101x, Branches */
                     category = DR_INSTR_CATEGORY_BRANCH;
             }
         } else { /* op1 is x1xx */
-            uint op0 = BITS(enc, 31, 28);
+            uint op0 = BITS(encoding, 31, 28);
             if ((op1 & 0x1) == 0) /* op1 is x1x0, LOAD/STORE */
-                category = decode_load_store_category(enc);
+                category = decode_load_store_category(encoding);
             else if ((op1 & 0x2) == 0) /* op1 is x101 */
-                category = DR_INSTR_CATEGORY_INT_MATH;
+                category = DR_INSTR_CATEGORY_MATH;
             else { /* op1 is x111, Scalar Floating-Point and Advances SIMD */
                 /* op0 is 0xx0 || op0 is 01x1 */
                 if ((op0 & 0x9) == 0 || (op0 & 0x5) == 0x5)
                     category = DR_INSTR_CATEGORY_SIMD;
-                else
-                    category = DR_INSTR_CATEGORY_FP_MATH;
+                else {
+                    category = DR_INSTR_CATEGORY_FP;
+                    if (op0 == 0xC) /* op0 is 1100 */
+                        category |= DR_INSTR_CATEGORY_MATH;
+                    else if ((op0 & 0x5) == 1) { /* op0 is x0x1 */
+                        if ((BITS(encoding, 24, 23) & 0x2) != 0)
+                            category |= DR_INSTR_CATEGORY_MATH;
+                        else {
+                            uint op2 = BITS(encoding, 22, 19);
+                            if ((op2 & 0x4) == 0) /* op2 is x0xx */
+                                category |= DR_INSTR_CATEGORY_CONVERT;
+                            else {
+                                uint op3 = BITS(encoding, 18, 10);
+                                if ((op3 & 0x3F) == 0) /* op3 is xxx000000 */
+                                    category |= DR_INSTR_CATEGORY_CONVERT;
+                                else if ((op3 & 0x10) == 0x10) /* op3 is xxxx10000 */
+                                    category |= DR_INSTR_CATEGORY_MATH;
+                                else if ((op3 & 0x8) == 0x8) /* op3 is xxxxx1000 */
+                                    category |= DR_INSTR_CATEGORY_MATH;
+                                else if ((op3 & 0x4) == 0x4) /* op3 is xxxxxx100 */
+                                    category |= DR_INSTR_CATEGORY_MOVE;
+                                else if ((op3 & 0x3) == 0x1) /* op3 is xxxxxxx01 */
+                                    category |= DR_INSTR_CATEGORY_MATH;
+                                else if ((op3 & 0x3) == 0x2) /* op3 is xxxxxxx10 */
+                                    category |= DR_INSTR_CATEGORY_MATH;
+                                else if ((op3 & 0x3) == 0x3) /* op3 is xxxxxxx11 */
+                                    category |= DR_INSTR_CATEGORY_MOVE;
+                            }
+                        }
+                    }
+                }
             }
         }
     }
 
     instr_set_category(instr, category);
-    return true;
 }
 
 /******************************************************************************/

diff --git a/core/ir/aarch64/instr.c b/core/ir/aarch64/instr.c
@@ -312,17 +312,28 @@ instr_is_rep_string_op(instr_t *instr)
 bool
 instr_is_floating_ex(instr_t *instr, dr_fp_type_t *type OUT)
 {
-    /* For now there is only support of FP arithmetic category type (DR_FP_MATH). */
-    /* TODO i#6238: Add support for all FP types.
+    /* DR_FP_STATE instructions aren't available on AArch64.
+     * Processor state is saved/restored with loads and stores.
      */
     uint cat = instr_get_category(instr);
-    if (TEST(DR_INSTR_CATEGORY_FP_MATH, cat)) {
+    if (!TEST(DR_INSTR_CATEGORY_FP, cat))
+        return false;
+    else if (TEST(DR_INSTR_CATEGORY_MATH, cat)) {
         if (type != NULL)
             *type = DR_FP_MATH;
         return true;
+    } else if (TEST(DR_INSTR_CATEGORY_CONVERT, cat)) {
+        if (type != NULL)
+            *type = DR_FP_CONVERT;
+        return true;
+    } else if (TEST(DR_INSTR_CATEGORY_MOVE, cat)) {
+        if (type != NULL)
+            *type = DR_FP_MOVE;
+        return true;
+    } else {
+        CLIENT_ASSERT(false, "instr_is_floating_ex: FP instruction without subcategory");
+        return false;
     }
-
-    return false;
 }
 
 bool

diff --git a/core/ir/decode.h b/core/ir/decode.h
@@ -83,6 +83,9 @@ typedef struct instr_info_t {
      * stored here varies by arch.
      */
     uint opcode;
+#ifdef X86
+    uint category;
+#endif
     const char *name;
     /* Operands: each has a type and a size.
      * The opnd_size_t will instead be reg_id_t for TYPE_*REG*.

diff --git a/core/ir/decode_shared.c b/core/ir/decode_shared.c
@@ -202,8 +202,20 @@ dr_get_sve_vector_length(void)
  * type is OP_INVALID so can be copied to instr->opcode
  */
 #define xx 0 /* TYPE_NONE */, OPSZ_NA
-const instr_info_t invalid_instr = { OP_INVALID, 0x000000, "(bad)", xx, xx, xx,
-                                     xx,         xx,       0,       0,  0 };
+const instr_info_t invalid_instr = { OP_INVALID,
+                                     0x000000,
+#ifdef X86
+                                     DR_INSTR_CATEGORY_UNCATEGORIZED,
+#endif
+                                     "(bad)",
+                                     xx,
+                                     xx,
+                                     xx,
+                                     xx,
+                                     xx,
+                                     0,
+                                     0,
+                                     0 };
 #undef xx
 
 /* PR 302344: used for shared traces -tracedump_origins where we

diff --git a/core/ir/instr_api.h b/core/ir/instr_api.h
@@ -1898,20 +1898,23 @@ instr_is_rep_string_op(instr_t *instr);
  */
 typedef enum {
     DR_INSTR_CATEGORY_UNCATEGORIZED = 0x0, /**< Uncategorized. */
-    DR_INSTR_CATEGORY_INT_MATH = 0x1,      /**< Integer arithmetic operations. */
-    DR_INSTR_CATEGORY_FP_MATH = 0x2,       /**< Floating-Point arithmetic operations. */
-    DR_INSTR_CATEGORY_LOAD = 0x4,          /**< Loads. */
-    DR_INSTR_CATEGORY_STORE = 0x8,         /**< Stores. */
-    DR_INSTR_CATEGORY_BRANCH = 0x10,       /**< Branches. */
-    DR_INSTR_CATEGORY_SIMD = 0x20, /**< Operations with vector registers (SIMD). */
-    DR_INSTR_CATEGORY_OTHER = 0x40 /**< Other types of instructions. */
+    DR_INSTR_CATEGORY_FP = 0x1,            /**< Floating-Point operations. */
+    DR_INSTR_CATEGORY_LOAD = 0x2,          /**< Loads. */
+    DR_INSTR_CATEGORY_STORE = 0x4,         /**< Stores. */
+    DR_INSTR_CATEGORY_BRANCH = 0x8,        /**< Branches. */
+    DR_INSTR_CATEGORY_SIMD = 0x10,    /**< Operations with vector registers (SIMD). */
+    DR_INSTR_CATEGORY_STATE = 0x20,   /**< Saves, restores, or queries processor state. */
+    DR_INSTR_CATEGORY_MOVE = 0x40,    /**< Moves value from one location to another. */
+    DR_INSTR_CATEGORY_CONVERT = 0x80, /**< Converts to or from value. */
+    DR_INSTR_CATEGORY_MATH = 0x100, /**< Performs arithmetic or conditional operations. */
+    DR_INSTR_CATEGORY_OTHER = 0x200 /**< Other types of instructions. */
 } dr_instr_category_t;
 
 /**
  * Indicates which type of floating-point operation and instruction performs.
  */
 typedef enum {
-    DR_FP_STATE,   /**< Loads, stores, or queries general floating point state. */
+    DR_FP_STATE,   /**< Saves, restores, or queries processor state. */
     DR_FP_MOVE,    /**< Moves floating point values from one location to another. */
     DR_FP_CONVERT, /**< Converts to or from floating point values. */
     DR_FP_MATH,    /**< Performs arithmetic or conditional operations. */

diff --git a/core/ir/x86/decode.c b/core/ir/x86/decode.c
@@ -1,5 +1,5 @@
 /* **********************************************************
- * Copyright (c) 2011-2022 Google, Inc.  All rights reserved.
+ * Copyright (c) 2011-2023 Google, Inc.  All rights reserved.
  * Copyright (c) 2000-2010 VMware, Inc.  All rights reserved.
  * **********************************************************/
 
@@ -80,21 +80,47 @@
 
 /* used for VEX decoding */
 #define xx TYPE_NONE, OPSZ_NA
-static const instr_info_t escape_instr = { ESCAPE, 0x000000, "(bad)", xx, xx, xx,
-                                           xx,     xx,       0,       0,  0 };
-static const instr_info_t escape_38_instr = {
-    ESCAPE_3BYTE_38, 0x000000, "(bad)", xx, xx, xx, xx, xx, 0, 0, 0
-};
-static const instr_info_t escape_3a_instr = {
-    ESCAPE_3BYTE_3a, 0x000000, "(bad)", xx, xx, xx, xx, xx, 0, 0, 0
+static const instr_info_t escape_instr = {
+    ESCAPE, 0x000000, DR_INSTR_CATEGORY_UNCATEGORIZED, "(bad)", xx, xx, xx, xx, xx, 0,
+    0,      0
 };
+static const instr_info_t escape_38_instr = { ESCAPE_3BYTE_38,
+                                              0x000000,
+                                              DR_INSTR_CATEGORY_UNCATEGORIZED,
+                                              "(bad)",
+                                              xx,
+                                              xx,
+                                              xx,
+                                              xx,
+                                              xx,
+                                              0,
+                                              0,
+                                              0 };
+static const instr_info_t escape_3a_instr = { ESCAPE_3BYTE_3a,
+                                              0x000000,
+                                              DR_INSTR_CATEGORY_UNCATEGORIZED,
+                                              "(bad)",
+                                              xx,
+                                              xx,
+                                              xx,
+                                              xx,
+                                              xx,
+                                              0,
+                                              0,
+                                              0 };
 /* used for XOP decoding */
-static const instr_info_t xop_8_instr = { XOP_8_EXT, 0x000000, "(bad)", xx, xx, xx,
-                                          xx,        xx,       0,       0,  0 };
-static const instr_info_t xop_9_instr = { XOP_9_EXT, 0x000000, "(bad)", xx, xx, xx,
-                                          xx,        xx,       0,       0,  0 };
-static const instr_info_t xop_a_instr = { XOP_A_EXT, 0x000000, "(bad)", xx, xx, xx,
-                                          xx,        xx,       0,       0,  0 };
+static const instr_info_t xop_8_instr = {
+    XOP_8_EXT, 0x000000, DR_INSTR_CATEGORY_UNCATEGORIZED, "(bad)", xx, xx, xx, xx, xx, 0,
+    0,         0
+};
+static const instr_info_t xop_9_instr = {
+    XOP_9_EXT, 0x000000, DR_INSTR_CATEGORY_UNCATEGORIZED, "(bad)", xx, xx, xx, xx, xx, 0,
+    0,         0
+};
+static const instr_info_t xop_a_instr = {
+    XOP_A_EXT, 0x000000, DR_INSTR_CATEGORY_UNCATEGORIZED, "(bad)", xx, xx, xx, xx, xx, 0,
+    0,         0
+};
 #undef xx
 
 bool
@@ -2410,6 +2436,30 @@ decode_get_tuple_type_input_size(const instr_info_t *info, decode_info_t *di)
         di->input_size = OPSZ_NA;
 }
 
+/* TODO i#6238: Not all opcodes have been reviewed.
+ * In case an opcode has not been reviewed,
+ * the default category assigned to it is DR_INSTR_CATEGORY_UNCATEGORIZED.
+ */
+static inline void
+decode_category(instr_t *instr)
+{
+    if (instr != NULL) {
+        if (op_instr[instr->opcode] != NULL) {
+            uint category = op_instr[instr->opcode]->category;
+            if (instr_operands_valid(instr)) {
+                if (instr_reads_memory(instr))
+                    category |= DR_INSTR_CATEGORY_LOAD;
+                if (instr_writes_memory(instr))
+                    category |= DR_INSTR_CATEGORY_STORE;
+            }
+            instr_set_category(instr, category);
+        } else {
+            /* nonvalid opcode */
+            instr_set_category(instr, DR_INSTR_CATEGORY_UNCATEGORIZED);
+        }
+    }
+}
+
 /****************************************************************************
  * Exported routines
  */
@@ -2541,6 +2591,7 @@ decode_common(dcontext_t *dcontext, byte *pc, byte *orig_pc, instr_t *instr)
                                         decode operands too */
                                _IF_DEBUG(!TEST(INSTR_IGNORE_INVALID, instr->flags)));
     instr_set_opcode(instr, info->type);
+    decode_category(instr);
     IF_X64(instr_set_x86_mode(instr, di.x86_mode));
     /* failure up to this point handled fine -- we set opcode to OP_INVALID */
     if (next_pc == NULL) {