DynamoRIO · joshua-warburton · Oct 21, 2022 · Oct 7, 2022 · Oct 11, 2022 · Oct 11, 2022
diff --git a/api/docs/release.dox b/api/docs/release.dox
@@ -131,6 +131,11 @@ changes:
  - Changed the drcachesim -use_physical option to not modify the regular trace
    entry virtual addresses but to instead insert metadata containing translation
    information for converting virtual to physical addresses.
+ - Changed the layout of the DR_FAST_IR struct to add an element size property
+   that supports more information about vectors. This changes the method of accessing
+   the register in the structure from struct.value.reg to
+   struct.value.reg_and_element_size.reg. The element size can be accessed directly
+   via struct.value.reg_and_element_size.element_size.
 
 Further non-compatibility-affecting changes include:
  - Added AArchXX support for attaching to a running process.

diff --git a/core/ir/aarch64/codec.c b/core/ir/aarch64/codec.c
@@ -547,16 +547,17 @@ encode_reg(OUT uint *num, OUT bool *is_x, reg_id_t reg, bool is_sp)
 
 /* Decode SIMD/FP register. */
 static inline opnd_t
-decode_vreg(uint scale, uint n)
+decode_vreg(aarch64_reg_offset scale, uint n)
 {
     reg_id_t reg = DR_REG_NULL;
-    ASSERT(n < 32 && scale < 5);
+    ASSERT(n < 32 && scale < 6);
     switch (scale) {
-    case 0: reg = DR_REG_B0 + n; break;
-    case 1: reg = DR_REG_H0 + n; break;
-    case 2: reg = DR_REG_S0 + n; break;
-    case 3: reg = DR_REG_D0 + n; break;
-    case 4: reg = DR_REG_Q0 + n; break;
+    case BYTE_REG: reg = DR_REG_B0 + n; break;
+    case HALF_REG: reg = DR_REG_H0 + n; break;
+    case SINGLE_REG: reg = DR_REG_S0 + n; break;
+    case DOUBLE_REG: reg = DR_REG_D0 + n; break;
+    case QUAD_REG: reg = DR_REG_Q0 + n; break;
+    case Z_REG: reg = DR_REG_Z0 + n; break;
     }
     return opnd_create_reg(reg);
 }
@@ -586,6 +587,9 @@ encode_vreg(INOUT opnd_size_t *x, OUT uint *r, opnd_t opnd)
     } else if ((uint)(reg - DR_REG_Q0) < 32) {
         n = reg - DR_REG_Q0;
         sz = OPSZ_16;
+    } else if ((uint)(reg - DR_REG_Z0) < 32) {
+        n = reg - DR_REG_Z0;
+        sz = OPSZ_SCALABLE;
     } else
         return false;
     if (*x == OPSZ_NA)
@@ -1052,8 +1056,13 @@ static bool
 encode_opnd_vector_reg(int pos, int scale, opnd_t opnd, OUT uint *enc_out)
 {
     opnd_size_t size = OPSZ_NA;
+    opnd_size_t requested_size = OPSZ_NA;
+    if (scale == Z_REG)
+        requested_size = OPSZ_SCALABLE;
+    else
+        requested_size = opnd_size_from_bytes(1 << scale);
     uint r;
-    if (!encode_vreg(&size, &r, opnd) || size != opnd_size_from_bytes(1 << scale))
+    if (!encode_vreg(&size, &r, opnd) || size != requested_size)
         return false;
     *enc_out = r << pos;
     return true;
@@ -1222,18 +1231,97 @@ encode_opnd_p(uint pos_start, uint max_reg_num, opnd_t opnd, OUT uint *enc_out)
     return true;
 }
 
-/* Used to encode a SVE vector register (Z registers). */
+static inline bool
+decode_single_sized_z(uint pos_start, aarch64_reg_offset bit_size, uint enc,
+                      OUT opnd_t *opnd)
+{
+    opnd_size_t size;
+
+    switch (bit_size) {
+    case BYTE_REG: size = OPSZ_1; break;
+    case HALF_REG: size = OPSZ_2; break;
+    case SINGLE_REG: size = OPSZ_4; break;
+    case DOUBLE_REG: size = OPSZ_8; break;
+    case QUAD_REG: size = OPSZ_16; break;
+    default: return false;
+    }
+
+    *opnd =
+        opnd_create_reg_element_vector(DR_REG_Z0 + extract_uint(enc, pos_start, 5), size);
+    return true;
+}
 
 static inline bool
-encode_opnd_z(uint pos_start, opnd_t opnd, OUT uint *enc_out)
+encode_single_sized_z(uint pos_start, aarch64_reg_offset bit_size, opnd_t opnd,
+                      OUT uint *enc_out)
 {
-    uint num;
-    if (!opnd_is_reg(opnd))
+    if (!opnd_is_element_vector_reg(opnd))
         return false;
-    num = opnd_get_reg(opnd) - DR_REG_Z0;
-    if (num >= 32)
+
+    aarch64_reg_offset size;
+    uint reg_number;
+    opnd_size_t vec_size = OPSZ_SCALABLE;
+
+    switch (opnd_get_vector_element_size(opnd)) {
+    case OPSZ_1: size = BYTE_REG; break;
+    case OPSZ_2: size = HALF_REG; break;
+    case OPSZ_4: size = SINGLE_REG; break;
+    case OPSZ_8: size = DOUBLE_REG; break;
+    case OPSZ_16: size = QUAD_REG; break;
+    default: return false;
+    }
+
+    if (bit_size != size)
         return false;
-    *enc_out = num << pos_start;
+
+    if (!encode_vreg(&vec_size, &reg_number, opnd))
+        return false;
+
+    *enc_out |= (reg_number << pos_start);
+    return true;
+}
+
+static inline bool
+decode_sized_z(uint pos_start, uint size_start, uint min_size, uint max_size, uint enc,
+               byte *pc, OUT opnd_t *opnd)
+{
+    aarch64_reg_offset bit_size = extract_uint(enc, size_start, 2);
+    if (bit_size < min_size)
+        return false;
+    if (bit_size > max_size)
+        return false;
+
+    return decode_single_sized_z(pos_start, bit_size, enc, opnd);
+}
+
+static inline bool
+encode_sized_z(uint pos_start, uint size_start, uint min_size, uint max_size, opnd_t opnd,
+               OUT uint *enc_out)
+{
+    if (!opnd_is_element_vector_reg(opnd))
+        return false;
+
+    aarch64_reg_offset size;
+    uint reg_number;
+    opnd_size_t vec_size = OPSZ_SCALABLE;
+
+    switch (opnd_get_vector_element_size(opnd)) {
+    case OPSZ_1: size = BYTE_REG; break;
+    case OPSZ_2: size = HALF_REG; break;
+    case OPSZ_4: size = SINGLE_REG; break;
+    case OPSZ_8: size = DOUBLE_REG; break;
+    default: return false;
+    }
+
+    if (size > max_size)
+        return false;
+    if (size < min_size)
+        return false;
+
+    if (!encode_vreg(&vec_size, &reg_number, opnd))
+        return false;
+
+    *enc_out |= (size << size_start) | (reg_number << pos_start);
     return true;
 }
 
@@ -1624,14 +1712,25 @@ encode_opnd_q0(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
 static inline bool
 decode_opnd_z0(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
 {
-    *opnd = opnd_create_reg(DR_REG_Z0 + extract_uint(enc, 0, 5));
-    return true;
+    return decode_opnd_vector_reg(0, Z_REG, enc, opnd);
 }
 
 static inline bool
 encode_opnd_z0(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
 {
-    return encode_opnd_z(0, opnd, enc_out);
+    return encode_opnd_vector_reg(0, Z_REG, opnd, enc_out);
+}
+
+static inline bool
+decode_opnd_z_q_0(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
+{
+    return decode_single_sized_z(0, QUAD_REG, enc, opnd);
+}
+
+static inline bool
+encode_opnd_z_q_0(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
+{
+    return encode_single_sized_z(0, QUAD_REG, opnd, enc_out);
 }
 
 /* q0p1: as q0 but add 1 mod 32 to reg number */
@@ -1820,14 +1919,25 @@ encode_opnd_q5(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
 static inline bool
 decode_opnd_z5(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
 {
-    *opnd = opnd_create_reg(DR_REG_Z0 + extract_uint(enc, 5, 5));
-    return true;
+    return decode_opnd_vector_reg(5, Z_REG, enc, opnd);
 }
 
 static inline bool
 encode_opnd_z5(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
 {
-    return encode_opnd_z(5, opnd, enc_out);
+    return encode_opnd_vector_reg(5, Z_REG, opnd, enc_out);
+}
+
+static inline bool
+decode_opnd_z_q_5(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
+{
+    return decode_single_sized_z(5, QUAD_REG, enc, opnd);
+}
+
+static inline bool
+encode_opnd_z_q_5(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
+{
+    return encode_single_sized_z(5, QUAD_REG, opnd, enc_out);
 }
 
 /* mem9qpost: post-indexed mem9q, so offset is zero */
@@ -1947,6 +2057,21 @@ encode_opnd_extam(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out
     return true;
 }
 
+/* p10_low: P register at bit position 10; P0-P7 */
+
+static inline bool
+decode_opnd_p10_low(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
+{
+    *opnd = opnd_create_reg(DR_REG_P0 + extract_uint(enc, 10, 3));
+    return true;
+}
+
+static inline bool
+encode_opnd_p10_low(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
+{
+    return encode_opnd_p(10, 7, opnd, enc_out);
+}
+
 /* cmode_h_sz: Operand for 16 bit elements' shift amount */
 
 static inline bool
@@ -2009,21 +2134,6 @@ encode_opnd_imm2idx(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_o
     return encode_opnd_int(12, 2, false, 0, 0, opnd, enc_out);
 }
 
-/* p10_low: P register at bit position 10; P0-P7 */
-
-static inline bool
-decode_opnd_p10_low(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
-{
-    *opnd = opnd_create_reg(DR_REG_P0 + extract_uint(enc, 10, 3));
-    return true;
-}
-
-static inline bool
-encode_opnd_p10_low(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
-{
-    return encode_opnd_p(10, 7, opnd, enc_out);
-}
-
 /* cmode_s_sz: Operand for 32 bit elements' shift amount */
 
 static inline bool
@@ -2963,14 +3073,25 @@ encode_opnd_q16(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
 static inline bool
 decode_opnd_z16(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
 {
-    *opnd = opnd_create_reg(DR_REG_Z0 + extract_uint(enc, 16, 5));
-    return true;
+    return decode_opnd_vector_reg(16, Z_REG, enc, opnd);
 }
 
 static inline bool
 encode_opnd_z16(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
 {
-    return encode_opnd_z(16, opnd, enc_out);
+    return encode_opnd_vector_reg(16, Z_REG, opnd, enc_out);
+}
+
+static inline bool
+decode_opnd_z_q_16(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
+{
+    return decode_single_sized_z(16, QUAD_REG, enc, opnd);
+}
+
+static inline bool
+encode_opnd_z_q_16(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
+{
+    return encode_single_sized_z(16, QUAD_REG, opnd, enc_out);
 }
 
 /* b16: B register at bit position 16. */
@@ -4159,6 +4280,18 @@ encode_opnd_bhsd_size_reg0(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint
     return encode_bhsd_size_regx(0, enc, opcode, pc, opnd, enc_out);
 }
 
+static inline bool
+decode_opnd_z_size_bhsd_0(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
+{
+    return decode_sized_z(0, 22, BYTE_REG, DOUBLE_REG, enc, pc, opnd);
+}
+
+static inline bool
+encode_opnd_z_size_bhsd_0(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
+{
+    return encode_sized_z(0, 22, BYTE_REG, DOUBLE_REG, opnd, enc_out);
+}
+
 static inline bool
 decode_opnd_float_reg5(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
 {
@@ -4197,6 +4330,18 @@ encode_opnd_bhsd_size_reg5(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint
     return encode_bhsd_size_regx(5, enc, opcode, pc, opnd, enc_out);
 }
 
+static inline bool
+decode_opnd_z_size_bhsd_5(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
+{
+    return decode_sized_z(5, 22, BYTE_REG, DOUBLE_REG, enc, pc, opnd);
+}
+
+static inline bool
+encode_opnd_z_size_bhsd_5(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
+{
+    return encode_sized_z(5, 22, BYTE_REG, DOUBLE_REG, opnd, enc_out);
+}
+
 static inline bool
 decode_opnd_float_reg10(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
 {
@@ -4248,6 +4393,18 @@ encode_opnd_bhsd_size_reg16(uint enc, int opcode, byte *pc, opnd_t opnd,
     return encode_bhsd_size_regx(16, enc, opcode, pc, opnd, enc_out);
 }
 
+static inline bool
+decode_opnd_z_size_bhsd_16(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
+{
+    return decode_sized_z(16, 22, BYTE_REG, DOUBLE_REG, enc, pc, opnd);
+}
+
+static inline bool
+encode_opnd_z_size_bhsd_16(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
+{
+    return encode_sized_z(16, 22, BYTE_REG, DOUBLE_REG, opnd, enc_out);
+}
+
 /* mem0p: as mem0, but a pair of registers, so double size */
 
 static inline bool

diff --git a/core/ir/aarch64/codec.h b/core/ir/aarch64/codec.h
@@ -43,6 +43,7 @@ typedef enum {
     SINGLE_REG = 2,
     DOUBLE_REG = 3,
     QUAD_REG = 4,
+    Z_REG = 5,
     NOT_A_REG = DR_REG_INVALID
 } aarch64_reg_offset;
 

diff --git a/core/ir/aarch64/codec_sve.txt b/core/ir/aarch64/codec_sve.txt
@@ -40,13 +40,15 @@
 
 # Instruction definitions:
 
-00000100xx1xxxxx000000xxxxxxxxxx  n   9    SVE    add  z0 : z5 z16 bhsd_sz
-00000100xx011010000xxxxxxxxxxxxx  n   21   SVE    and  z0 : p10_low z0 z5 bhsd_sz
-00000100xx011011000xxxxxxxxxxxxx  n   29   SVE    bic  z0 : p10_low z0 z5 bhsd_sz
-00000100xx011001000xxxxxxxxxxxxx  n   90   SVE    eor  z0 : p10_low z0 z5 bhsd_sz
-00000100xx011000000xxxxxxxxxxxxx  n   327  SVE    orr  z0 : p10_low z0 z5 bhsd_sz
-00000100xx1xxxxx000100xxxxxxxxxx  n   403  SVE  sqadd  z0 : z5 z16 bhsd_sz
-00000100xx1xxxxx000110xxxxxxxxxx  n   425  SVE  sqsub  z0 : z5 z16 bhsd_sz
-00000100xx1xxxxx000001xxxxxxxxxx  n   470  SVE    sub  z0 : z5 z16 bhsd_sz
-00000100xx1xxxxx000101xxxxxxxxxx  n   531  SVE  uqadd  z0 : z5 z16 bhsd_sz
-00000100xx1xxxxx000111xxxxxxxxxx  n   538  SVE  uqsub  z0 : z5 z16 bhsd_sz
+00000100xx1xxxxx000000xxxxxxxxxx  n   9    SVE      add  z_size_bhsd_0 : z_size_bhsd_5 z_size_bhsd_16
+00000100xx011010000xxxxxxxxxxxxx  n   21   SVE      and             z0 : p10_low z0 z5 bhsd_sz
+00000100xx011011000xxxxxxxxxxxxx  n   29   SVE      bic             z0 : p10_low z0 z5 bhsd_sz
+00000100xx011001000xxxxxxxxxxxxx  n   90   SVE      eor             z0 : p10_low z0 z5 bhsd_sz
+0000010000100000101111xxxxxxxxxx  n   783  BASE movprfx             z0 : z5
+00000100xx011000000xxxxxxxxxxxxx  n   327  SVE      orr             z0 : p10_low z0 z5 bhsd_sz
+00000100xx1xxxxx000100xxxxxxxxxx  n   403  SVE    sqadd             z0 : z5 z16 bhsd_sz
+00000100xx1xxxxx000110xxxxxxxxxx  n   425  SVE    sqsub             z0 : z5 z16 bhsd_sz
+00000100xx1xxxxx000001xxxxxxxxxx  n   470  SVE      sub             z0 : z5 z16 bhsd_sz
+00000100xx1xxxxx000101xxxxxxxxxx  n   531  SVE    uqadd             z0 : z5 z16 bhsd_sz
+00000100xx1xxxxx000111xxxxxxxxxx  n   538  SVE    uqsub             z0 : z5 z16 bhsd_sz
+00000101101xxxxx000001xxxxxxxxxx  n   566  SVE     zip2          z_q_0 : z_q_5 z_q_16
diff --git a/core/ir/aarch64/encode.c b/core/ir/aarch64/encode.c
@@ -109,7 +109,7 @@ const char *const reg_names[] = {
     "pmevtyper28_el0", "pmevtyper29_el0", "pmevtyper30_el0", "pmccfiltr_el0",
     "spsr_irq", "spsr_abt", "spsr_und", "spsr_fiq", "tpidr_el0", "tpidrro_el0",
 
-    "z0",  "z1",  "z2",  "q3",  "z4",  "z5",  "z6",  "z7",  "z8",  "z9",
+    "z0",  "z1",  "z2",  "z3",  "z4",  "z5",  "z6",  "z7",  "z8",  "z9",
     "z10", "z11", "z12", "z13", "z14", "z15", "z16", "z17", "z18", "z19",
     "z20", "z21", "z22", "z23", "z24", "z25", "z26", "z27", "z28", "z29",
     "z30", "z31",