Skip to content

Commit

Permalink
i#6238: Add categorization of x86 instruction mix and subcategories f…
Browse files Browse the repository at this point in the history
…or FP category. (#6308)

This PR extends #6237 by adding categorization for x86 instruction mix.
It introduces subcategories like MATH, CONVERT, and MOVE for both x86
and AArch64. For instance, arithmetic floating-point operations will
have DR_INSTR_CATEGORY_FP | DR_INSTR_CATEGORY_MATH category.

Issue: #6238

Not all x86 opcodes are categorized nd such instructions are marked
'UNCATEGORIZED"
  • Loading branch information
kuhanov authored Sep 22, 2023
1 parent 78f1d3b commit d32cf34
Show file tree
Hide file tree
Showing 11 changed files with 6,718 additions and 6,960 deletions.
5 changes: 5 additions & 0 deletions api/docs/release.dox
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,11 @@ clients.
The changes between version \DR_VERSION and 10.0.0 include the following compatibility
changes:
- Marked x86 rep instructions as predicated.
- The #dr_instr_category_t enum underwent changes to support new categories
such as STATE, MOVE, CONVERT, and MATH.
INT_MATH has been removed and replaced with MATH.
FP_MATH has been removed and replaced with FP|MATH.
The enumeration was organized in a different order, the old numbers become invalid

Further non-compatibility-affecting changes include:
- Added core-sharded analysis tool support where traces are sharded by
Expand Down
79 changes: 55 additions & 24 deletions core/ir/aarch64/codec.c
Original file line number Diff line number Diff line change
Expand Up @@ -9562,7 +9562,7 @@ encode_opnds_tbz(byte *pc, instr_t *instr, uint enc, decode_info_t *di)
}

static inline uint
decode_load_store_category(uint enc)
decode_load_store_category(uint encoding)
{
uint category = DR_INSTR_CATEGORY_OTHER;
/* Calculation of category is based on C4.1 'A64 instruction set encoding'
Expand All @@ -9577,35 +9577,38 @@ decode_load_store_category(uint enc)
* ------
* opc
*/
uint op0 = BITS(enc, 31, 28);
uint opc = BITS(enc, 23, 22);
uint op0 = BITS(encoding, 31, 28);
uint opc = BITS(encoding, 23, 22);
if ((op0 & 0x3) == 0x3) { /* xx11 */
if (BITS(enc, 10, 10) == 1 && BITS(enc, 21, 21) == 1)
if (BITS(encoding, 10, 10) == 1 && BITS(encoding, 21, 21) == 1)
category = DR_INSTR_CATEGORY_LOAD;
else if (opc == 0 || (opc == 0x2 && BITS(enc, 26, 26) == 1))
else if (opc == 0 || (opc == 0x2 && BITS(encoding, 26, 26) == 1))
category = DR_INSTR_CATEGORY_STORE;
else
category = DR_INSTR_CATEGORY_LOAD;
} else if ((op0 & 0x3) == 0 || (op0 & 0x3) == 0x2) { /* xx00, xx10 */
category =
(BITS(enc, 22, 22) == 0) ? DR_INSTR_CATEGORY_STORE : DR_INSTR_CATEGORY_LOAD;
if ((op0 & 0xc) == 0 && BITS(enc, 26, 26) == 1)
category |= DR_INSTR_CATEGORY_SIMD;
category = (BITS(encoding, 22, 22) == 0) ? DR_INSTR_CATEGORY_STORE
: DR_INSTR_CATEGORY_LOAD;
} else { /* xx01 */
if (BITS(enc, 24, 24) == 0)
if (BITS(encoding, 24, 24) == 0)
category = DR_INSTR_CATEGORY_LOAD;
else if (BITS(enc, 21, 21) == 0)
else if (BITS(encoding, 21, 21) == 0)
category = (opc == 0) ? DR_INSTR_CATEGORY_STORE : DR_INSTR_CATEGORY_LOAD;
else if ((opc == 0x1 || opc == 0x3) && BITS(enc, 11, 10) == 0)
else if ((opc == 0x1 || opc == 0x3) && BITS(encoding, 11, 10) == 0)
category = DR_INSTR_CATEGORY_LOAD;
else
category = DR_INSTR_CATEGORY_STORE;
}

/* Load/Store operation with SIMD&FP register */
if (category != DR_INSTR_CATEGORY_OTHER && BITS(encoding, 26, 26) == 1)
category |= DR_INSTR_CATEGORY_SIMD | DR_INSTR_CATEGORY_FP;

return category;
}

static inline bool
decode_category(uint enc, instr_t *instr)
static inline void
decode_category(uint encoding, instr_t *instr)
{
int category = DR_INSTR_CATEGORY_OTHER;
/* Calculation of category is based on C4.1 'A64 instruction set encoding'
Expand All @@ -9618,10 +9621,10 @@ decode_category(uint enc, instr_t *instr)
* op1
*/

uint op1 = BITS(enc, 28, 25);
if ((BITS(enc, 31, 31) == 1 && op1 == 0) || op1 == 0x2) /* SME || SVE */
uint op1 = BITS(encoding, 28, 25);
if ((BITS(encoding, 31, 31) == 1 && op1 == 0) || op1 == 0x2) /* SME || SVE */
category = DR_INSTR_CATEGORY_SIMD;
else if (BITS(enc, 31, 31) == 0 && op1 == 0) /* op1 is 0 and 31 bit is 0 */
else if (BITS(encoding, 31, 31) == 0 && op1 == 0) /* op1 is 0 and 31 bit is 0 */
category = DR_INSTR_CATEGORY_UNCATEGORIZED;
else {
/* op1 - xxxx
Expand All @@ -9637,28 +9640,56 @@ decode_category(uint enc, instr_t *instr)
if ((op1 & 0x4) == 0) { /* op1 is x0xx */
if ((op1 & 0x8) != 0) { /* op1 is not 00xx */
if ((op1 & 0x2) == 0) /* op1 is 100x, Data processing Immediate */
category = DR_INSTR_CATEGORY_INT_MATH;
category = DR_INSTR_CATEGORY_MATH;
else /* op1 is 101x, Branches */
category = DR_INSTR_CATEGORY_BRANCH;
}
} else { /* op1 is x1xx */
uint op0 = BITS(enc, 31, 28);
uint op0 = BITS(encoding, 31, 28);
if ((op1 & 0x1) == 0) /* op1 is x1x0, LOAD/STORE */
category = decode_load_store_category(enc);
category = decode_load_store_category(encoding);
else if ((op1 & 0x2) == 0) /* op1 is x101 */
category = DR_INSTR_CATEGORY_INT_MATH;
category = DR_INSTR_CATEGORY_MATH;
else { /* op1 is x111, Scalar Floating-Point and Advances SIMD */
/* op0 is 0xx0 || op0 is 01x1 */
if ((op0 & 0x9) == 0 || (op0 & 0x5) == 0x5)
category = DR_INSTR_CATEGORY_SIMD;
else
category = DR_INSTR_CATEGORY_FP_MATH;
else {
category = DR_INSTR_CATEGORY_FP;
if (op0 == 0xC) /* op0 is 1100 */
category |= DR_INSTR_CATEGORY_MATH;
else if ((op0 & 0x5) == 1) { /* op0 is x0x1 */
if ((BITS(encoding, 24, 23) & 0x2) != 0)
category |= DR_INSTR_CATEGORY_MATH;
else {
uint op2 = BITS(encoding, 22, 19);
if ((op2 & 0x4) == 0) /* op2 is x0xx */
category |= DR_INSTR_CATEGORY_CONVERT;
else {
uint op3 = BITS(encoding, 18, 10);
if ((op3 & 0x3F) == 0) /* op3 is xxx000000 */
category |= DR_INSTR_CATEGORY_CONVERT;
else if ((op3 & 0x10) == 0x10) /* op3 is xxxx10000 */
category |= DR_INSTR_CATEGORY_MATH;
else if ((op3 & 0x8) == 0x8) /* op3 is xxxxx1000 */
category |= DR_INSTR_CATEGORY_MATH;
else if ((op3 & 0x4) == 0x4) /* op3 is xxxxxx100 */
category |= DR_INSTR_CATEGORY_MOVE;
else if ((op3 & 0x3) == 0x1) /* op3 is xxxxxxx01 */
category |= DR_INSTR_CATEGORY_MATH;
else if ((op3 & 0x3) == 0x2) /* op3 is xxxxxxx10 */
category |= DR_INSTR_CATEGORY_MATH;
else if ((op3 & 0x3) == 0x3) /* op3 is xxxxxxx11 */
category |= DR_INSTR_CATEGORY_MOVE;
}
}
}
}
}
}
}

instr_set_category(instr, category);
return true;
}

/******************************************************************************/
Expand Down
21 changes: 16 additions & 5 deletions core/ir/aarch64/instr.c
Original file line number Diff line number Diff line change
Expand Up @@ -312,17 +312,28 @@ instr_is_rep_string_op(instr_t *instr)
bool
instr_is_floating_ex(instr_t *instr, dr_fp_type_t *type OUT)
{
/* For now there is only support of FP arithmetic category type (DR_FP_MATH). */
/* TODO i#6238: Add support for all FP types.
/* DR_FP_STATE instructions aren't available on AArch64.
* Processor state is saved/restored with loads and stores.
*/
uint cat = instr_get_category(instr);
if (TEST(DR_INSTR_CATEGORY_FP_MATH, cat)) {
if (!TEST(DR_INSTR_CATEGORY_FP, cat))
return false;
else if (TEST(DR_INSTR_CATEGORY_MATH, cat)) {
if (type != NULL)
*type = DR_FP_MATH;
return true;
} else if (TEST(DR_INSTR_CATEGORY_CONVERT, cat)) {
if (type != NULL)
*type = DR_FP_CONVERT;
return true;
} else if (TEST(DR_INSTR_CATEGORY_MOVE, cat)) {
if (type != NULL)
*type = DR_FP_MOVE;
return true;
} else {
CLIENT_ASSERT(false, "instr_is_floating_ex: FP instruction without subcategory");
return false;
}

return false;
}

bool
Expand Down
3 changes: 3 additions & 0 deletions core/ir/decode.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ typedef struct instr_info_t {
* stored here varies by arch.
*/
uint opcode;
#ifdef X86
uint category;
#endif
const char *name;
/* Operands: each has a type and a size.
* The opnd_size_t will instead be reg_id_t for TYPE_*REG*.
Expand Down
16 changes: 14 additions & 2 deletions core/ir/decode_shared.c
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,20 @@ dr_get_sve_vector_length(void)
* type is OP_INVALID so can be copied to instr->opcode
*/
#define xx 0 /* TYPE_NONE */, OPSZ_NA
const instr_info_t invalid_instr = { OP_INVALID, 0x000000, "(bad)", xx, xx, xx,
xx, xx, 0, 0, 0 };
const instr_info_t invalid_instr = { OP_INVALID,
0x000000,
#ifdef X86
DR_INSTR_CATEGORY_UNCATEGORIZED,
#endif
"(bad)",
xx,
xx,
xx,
xx,
xx,
0,
0,
0 };
#undef xx

/* PR 302344: used for shared traces -tracedump_origins where we
Expand Down
19 changes: 11 additions & 8 deletions core/ir/instr_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -1898,20 +1898,23 @@ instr_is_rep_string_op(instr_t *instr);
*/
typedef enum {
DR_INSTR_CATEGORY_UNCATEGORIZED = 0x0, /**< Uncategorized. */
DR_INSTR_CATEGORY_INT_MATH = 0x1, /**< Integer arithmetic operations. */
DR_INSTR_CATEGORY_FP_MATH = 0x2, /**< Floating-Point arithmetic operations. */
DR_INSTR_CATEGORY_LOAD = 0x4, /**< Loads. */
DR_INSTR_CATEGORY_STORE = 0x8, /**< Stores. */
DR_INSTR_CATEGORY_BRANCH = 0x10, /**< Branches. */
DR_INSTR_CATEGORY_SIMD = 0x20, /**< Operations with vector registers (SIMD). */
DR_INSTR_CATEGORY_OTHER = 0x40 /**< Other types of instructions. */
DR_INSTR_CATEGORY_FP = 0x1, /**< Floating-Point operations. */
DR_INSTR_CATEGORY_LOAD = 0x2, /**< Loads. */
DR_INSTR_CATEGORY_STORE = 0x4, /**< Stores. */
DR_INSTR_CATEGORY_BRANCH = 0x8, /**< Branches. */
DR_INSTR_CATEGORY_SIMD = 0x10, /**< Operations with vector registers (SIMD). */
DR_INSTR_CATEGORY_STATE = 0x20, /**< Saves, restores, or queries processor state. */
DR_INSTR_CATEGORY_MOVE = 0x40, /**< Moves value from one location to another. */
DR_INSTR_CATEGORY_CONVERT = 0x80, /**< Converts to or from value. */
DR_INSTR_CATEGORY_MATH = 0x100, /**< Performs arithmetic or conditional operations. */
DR_INSTR_CATEGORY_OTHER = 0x200 /**< Other types of instructions. */
} dr_instr_category_t;

/**
* Indicates which type of floating-point operation and instruction performs.
*/
typedef enum {
DR_FP_STATE, /**< Loads, stores, or queries general floating point state. */
DR_FP_STATE, /**< Saves, restores, or queries processor state. */
DR_FP_MOVE, /**< Moves floating point values from one location to another. */
DR_FP_CONVERT, /**< Converts to or from floating point values. */
DR_FP_MATH, /**< Performs arithmetic or conditional operations. */
Expand Down
79 changes: 65 additions & 14 deletions core/ir/x86/decode.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* **********************************************************
* Copyright (c) 2011-2022 Google, Inc. All rights reserved.
* Copyright (c) 2011-2023 Google, Inc. All rights reserved.
* Copyright (c) 2000-2010 VMware, Inc. All rights reserved.
* **********************************************************/

Expand Down Expand Up @@ -80,21 +80,47 @@

/* used for VEX decoding */
#define xx TYPE_NONE, OPSZ_NA
static const instr_info_t escape_instr = { ESCAPE, 0x000000, "(bad)", xx, xx, xx,
xx, xx, 0, 0, 0 };
static const instr_info_t escape_38_instr = {
ESCAPE_3BYTE_38, 0x000000, "(bad)", xx, xx, xx, xx, xx, 0, 0, 0
};
static const instr_info_t escape_3a_instr = {
ESCAPE_3BYTE_3a, 0x000000, "(bad)", xx, xx, xx, xx, xx, 0, 0, 0
static const instr_info_t escape_instr = {
ESCAPE, 0x000000, DR_INSTR_CATEGORY_UNCATEGORIZED, "(bad)", xx, xx, xx, xx, xx, 0,
0, 0
};
static const instr_info_t escape_38_instr = { ESCAPE_3BYTE_38,
0x000000,
DR_INSTR_CATEGORY_UNCATEGORIZED,
"(bad)",
xx,
xx,
xx,
xx,
xx,
0,
0,
0 };
static const instr_info_t escape_3a_instr = { ESCAPE_3BYTE_3a,
0x000000,
DR_INSTR_CATEGORY_UNCATEGORIZED,
"(bad)",
xx,
xx,
xx,
xx,
xx,
0,
0,
0 };
/* used for XOP decoding */
static const instr_info_t xop_8_instr = { XOP_8_EXT, 0x000000, "(bad)", xx, xx, xx,
xx, xx, 0, 0, 0 };
static const instr_info_t xop_9_instr = { XOP_9_EXT, 0x000000, "(bad)", xx, xx, xx,
xx, xx, 0, 0, 0 };
static const instr_info_t xop_a_instr = { XOP_A_EXT, 0x000000, "(bad)", xx, xx, xx,
xx, xx, 0, 0, 0 };
static const instr_info_t xop_8_instr = {
XOP_8_EXT, 0x000000, DR_INSTR_CATEGORY_UNCATEGORIZED, "(bad)", xx, xx, xx, xx, xx, 0,
0, 0
};
static const instr_info_t xop_9_instr = {
XOP_9_EXT, 0x000000, DR_INSTR_CATEGORY_UNCATEGORIZED, "(bad)", xx, xx, xx, xx, xx, 0,
0, 0
};
static const instr_info_t xop_a_instr = {
XOP_A_EXT, 0x000000, DR_INSTR_CATEGORY_UNCATEGORIZED, "(bad)", xx, xx, xx, xx, xx, 0,
0, 0
};
#undef xx

bool
Expand Down Expand Up @@ -2410,6 +2436,30 @@ decode_get_tuple_type_input_size(const instr_info_t *info, decode_info_t *di)
di->input_size = OPSZ_NA;
}

/* TODO i#6238: Not all opcodes have been reviewed.
* In case an opcode has not been reviewed,
* the default category assigned to it is DR_INSTR_CATEGORY_UNCATEGORIZED.
*/
static inline void
decode_category(instr_t *instr)
{
if (instr != NULL) {
if (op_instr[instr->opcode] != NULL) {
uint category = op_instr[instr->opcode]->category;
if (instr_operands_valid(instr)) {
if (instr_reads_memory(instr))
category |= DR_INSTR_CATEGORY_LOAD;
if (instr_writes_memory(instr))
category |= DR_INSTR_CATEGORY_STORE;
}
instr_set_category(instr, category);
} else {
/* nonvalid opcode */
instr_set_category(instr, DR_INSTR_CATEGORY_UNCATEGORIZED);
}
}
}

/****************************************************************************
* Exported routines
*/
Expand Down Expand Up @@ -2541,6 +2591,7 @@ decode_common(dcontext_t *dcontext, byte *pc, byte *orig_pc, instr_t *instr)
decode operands too */
_IF_DEBUG(!TEST(INSTR_IGNORE_INVALID, instr->flags)));
instr_set_opcode(instr, info->type);
decode_category(instr);
IF_X64(instr_set_x86_mode(instr, di.x86_mode));
/* failure up to this point handled fine -- we set opcode to OP_INVALID */
if (next_pc == NULL) {
Expand Down
Loading

0 comments on commit d32cf34

Please sign in to comment.