From a0cbec67c8356b68c304ac9299e8befcab1232f3 Mon Sep 17 00:00:00 2001
From: Yang Liu <liuyang22@iscas.ac.cn>
Date: Thu, 27 Jul 2023 23:19:25 +0800
Subject: [PATCH] i#3544 RV64: Added a new immediate format for AUIPC (#6208)

AUIPC is a PC-relative instruction, which needs to be mangled before
going into fcache, so the src opnd needs to be a PC or instr. This patch
added a new immediate type called `u_immpc` for AUIPC to achieve this.

Issue: https://github.com/DynamoRIO/dynamorio/issues/3544
---
 core/ir/riscv64/codec.c           | 46 +++++++++++++++++++++
 core/ir/riscv64/codec.h           |  1 +
 core/ir/riscv64/codec.py          | 69 +++++++++++++++++--------------
 core/ir/riscv64/decode.c          |  6 +--
 core/ir/riscv64/isl/rv32i.txt     |  2 +-
 suite/tests/api/ir_riscv64.c      | 54 ++++++++++++++++--------
 suite/tests/api/ir_riscv64.expect |  2 +-
 7 files changed, 124 insertions(+), 56 deletions(-)

diff --git a/core/ir/riscv64/codec.c b/core/ir/riscv64/codec.c
index 69fa91db804..80d3bdf43e9 100644
--- a/core/ir/riscv64/codec.c
+++ b/core/ir/riscv64/codec.c
@@ -473,6 +473,24 @@ decode_u_imm_opnd(dcontext_t *dc, uint32_t inst, int op_sz, byte *pc, byte *orig
     return true;
 }
 
+/* Decode the immediate field of the U-type format (PC-relative):
+ * |31        12|11   7|6      0|
+ * | imm[31:12] |  rd  | opcode |
+ *  ^----------^
+ * Into:
+ * |31        12|11  0|
+ * | imm[31:12] |  0  |
+ */
+static bool
+decode_u_immpc_opnd(dcontext_t *dc, uint32_t inst, int op_sz, byte *pc, byte *orig_pc,
+                    int idx, instr_t *out)
+{
+    uint uimm = GET_FIELD(inst, 31, 12);
+    opnd_t opnd = opnd_create_pc(orig_pc + (uimm << 12));
+    instr_set_src(out, idx, opnd);
+    return true;
+}
+
 /* Decode the immediate field of the J-type format as a pc-relative offset:
  * |   31    |30       21|   20    |19        12|11   7|6      0|
  * | imm[20] | imm[10:1] | imm[11] | imm[19:12] |  rd  | opcode |
@@ -1088,6 +1106,7 @@ opnd_dec_func_t opnd_decoders[] = {
     [RISCV64_FLD_S_IMM] = decode_s_imm_opnd,
     [RISCV64_FLD_B_IMM] = decode_b_imm_opnd,
     [RISCV64_FLD_U_IMM] = decode_u_imm_opnd,
+    [RISCV64_FLD_U_IMMPC] = decode_u_immpc_opnd,
     [RISCV64_FLD_J_IMM] = decode_j_imm_opnd,
     [RISCV64_FLD_CRD] = decode_crd_opnd,
     [RISCV64_FLD_CRDFP] = decode_crdfp_opnd,
@@ -1777,6 +1796,32 @@ encode_u_imm_opnd(instr_t *instr, byte *pc, int idx, uint32_t *out)
     return true;
 }
 
+/* Encode the immediate field of the U-type format (PC-relative):
+ * |31        12|11   7|6      0|
+ * | imm[31:12] |  rd  | opcode |
+ *  ^----------^
+ * From:
+ * |31        12|11  0|
+ * | imm[31:12] |  0  |
+ */
+static bool
+encode_u_immpc_opnd(instr_t *instr, byte *pc, int idx, uint32_t *out)
+{
+    opnd_t opnd = instr_get_src(instr, idx);
+    uint32_t imm;
+    if (opnd.kind == PC_kind)
+        imm = opnd_get_pc(opnd) - pc;
+    else if (opnd.kind == INSTR_kind)
+        imm = (byte *)opnd_get_instr(opnd)->offset - (byte *)instr->offset;
+    else
+        return false;
+    /* FIXME i#3544: Add an assertion here to ensure that the lower 12 bits of imm are all
+     * 0. Assert only if decode_info_t.check_reachable is true. We should mark it as false
+     * to skip the check in get_encoding_info(), as we did for AARCHXX. */
+    *out |= SET_FIELD(imm >> 12, 31, 12);
+    return true;
+}
+
 /* Encode the immediate field of the J-type format as a pc-relative offset:
  * |   31    |30       21|   20    |19        12|11   7|6      0|
  * | imm[20] | imm[10:1] | imm[11] | imm[19:12] |  rd  | opcode |
@@ -2348,6 +2393,7 @@ opnd_enc_func_t opnd_encoders[] = {
     [RISCV64_FLD_S_IMM] = encode_s_imm_opnd,
     [RISCV64_FLD_B_IMM] = encode_b_imm_opnd,
     [RISCV64_FLD_U_IMM] = encode_u_imm_opnd,
+    [RISCV64_FLD_U_IMMPC] = encode_u_immpc_opnd,
     [RISCV64_FLD_J_IMM] = encode_j_imm_opnd,
     [RISCV64_FLD_CRD] = encode_crd_opnd,
     [RISCV64_FLD_CRDFP] = encode_crdfp_opnd,
diff --git a/core/ir/riscv64/codec.h b/core/ir/riscv64/codec.h
index d4532f36125..ae82d9c9918 100644
--- a/core/ir/riscv64/codec.h
+++ b/core/ir/riscv64/codec.h
@@ -208,6 +208,7 @@ typedef enum {
     RISCV64_FLD_S_IMM,
     RISCV64_FLD_B_IMM,
     RISCV64_FLD_U_IMM,
+    RISCV64_FLD_U_IMMPC,
     RISCV64_FLD_J_IMM,
     /* Compressed instruction fields */
     RISCV64_FLD_CRD,
diff --git a/core/ir/riscv64/codec.py b/core/ir/riscv64/codec.py
index 66291ca198e..6fe592e62b5 100755
--- a/core/ir/riscv64/codec.py
+++ b/core/ir/riscv64/codec.py
@@ -269,49 +269,56 @@ def __new__(cls, value: int, arg_name: str, is_dest: bool,
              '',
              'The 20-bit immediate field in the U-type format.'
              )
-    J_IMM = (22,
+    U_IMMPC = (22,
+               'imm',
+               False,
+               'OPSZ_20b',
+               '',
+               'The 20-bit immediate field in the U-type format (PC-relative).'
+               )
+    J_IMM = (23,
              'pc_rel',
              False,
              'OPSZ_2',
              '',
              'The immediate field in the J-type format.'
              )
-    IMM = (23,  # Used only for parsing ISA files. Concatenated into V_RS1_DISP.
+    IMM = (24,  # Used only for parsing ISA files. Concatenated into V_RS1_DISP.
            'imm',
            False,
            'OPSZ_12b',
            '',
            'The immediate field in PREFETCH instructions.'
            )
-    CRD = (24,
+    CRD = (25,
            'rd',
            True,
            'OPSZ_PTR',
            '',
            'The output register in `CR`, `CI` RVC formats (inst[11:7])'
            )
-    CRDFP = (25,
+    CRDFP = (26,
              'rd',
              True,
              'OPSZ_PTR',
              '',
              'The output floating-point register in `CR`, `CI` RVC formats (inst[11:7])'
              )
-    CRS1 = (26,
+    CRS1 = (27,
             'rs1',
             False,
             'OPSZ_PTR',
             '',
             'The first input register in `CR`, `CI` RVC formats (inst[11:7]).'
             )
-    CRS2 = (27,
+    CRS2 = (28,
             'rs2',
             False,
             'OPSZ_PTR',
             '',
             'The second input register in `CR`, `CSS` RVC formats (inst[6:2]).'
             )
-    CRS2FP = (28,
+    CRS2FP = (29,
               'rs2',
               False,
               'OPSZ_PTR',
@@ -319,146 +326,146 @@ def __new__(cls, value: int, arg_name: str, is_dest: bool,
               'The second input floating-point register in `CR`, `CSS` RVC formats (inst[6:2]).'
               )
     # Fields in compressed instructions.
-    CRD_ = (29,
+    CRD_ = (30,
             'rd',
             True,
             'OPSZ_PTR',
             '',
             'The output register in `CIW`, `CL` RVC formats (inst[4:2])'
             )
-    CRD_FP = (30,
+    CRD_FP = (31,
               'rd',
               True,
               'OPSZ_PTR',
               '',
               'The output floating-point register in `CIW`, `CL` RVC formats (inst[4:2])'
               )
-    CRS1_ = (31,
+    CRS1_ = (32,
              'rs1',
              False,
              'OPSZ_PTR',
              '',
              'The first input register in `CL`, `CS`, `CA`, `CB` RVC formats (inst[9:7]).'
              )
-    CRS2_ = (32,
+    CRS2_ = (33,
              'rs2',
              False,
              'OPSZ_PTR',
              '',
              'The second input register in `CS`, `CA` RVC formats (inst[4:2]).'
              )
-    CRS2_FP = (33,
+    CRS2_FP = (34,
                'rs2',
                False,
                'OPSZ_PTR',
                '',
                'The second input floating-point register in `CS`, `CA` RVC formats (inst[4:2]).'
                )
-    CRD__ = (34,
+    CRD__ = (35,
              'rd',
              True,
              'OPSZ_PTR',
              '',
              'The output register in `CA` RVC format (inst[9:7])'
              )
-    CSHAMT = (35,
+    CSHAMT = (36,
               'shamt',
               False,
               'OPSZ_6b',
               '',
               'The `shamt` field in the RVC format.'
               )
-    CSR_IMM = (36,
+    CSR_IMM = (37,
                'imm',
                False,
                'OPSZ_5b',
                '',
                'The immediate field in a CSR instruction.'
                )
-    CADDI16SP_IMM = (37,
+    CADDI16SP_IMM = (38,
                      'imm',
                      False,
                      'OPSZ_10b',
                      '',
                      'The immediate field in a C.ADDI16SP instruction.'
                      )
-    CLWSP_IMM = (38,
+    CLWSP_IMM = (39,
                  'sp_offset',
                  False,
                  'OPSZ_1',
                  '',
                  'The SP-relative memory location (sp+imm: imm & 0x3 == 0).'
                  )
-    CLDSP_IMM = (39,
+    CLDSP_IMM = (40,
                  'sp_offset',
                  False,
                  'OPSZ_9b',
                  '',
                  'The SP-relative memory location (sp+imm: imm & 0x7 == 0).'
                  )
-    CLUI_IMM = (40,
+    CLUI_IMM = (41,
                 'imm',
                 False,
                 'OPSZ_6b',
                 '',
                 'The immediate field in a C.LUI instruction.'
                 )
-    CSWSP_IMM = (41,
+    CSWSP_IMM = (42,
                  'sp_offset',
                  True,
                  'OPSZ_1',
                  '',
                  'The SP-relative memory location (sp+imm: imm & 0x3 == 0).'
                  )
-    CSDSP_IMM = (42,
+    CSDSP_IMM = (43,
                  'sp_offset',
                  True,
                  'OPSZ_9b',
                  '',
                  'The SP-relative memory location (sp+imm: imm & 0x7 == 0).'
                  )
-    CIW_IMM = (43,
+    CIW_IMM = (44,
                'imm',
                False,
                'OPSZ_10b',
                '',
                'The immediate field in a CIW format instruction.'
                )
-    CLW_IMM = (44,
+    CLW_IMM = (45,
                'mem',
                False,
                'OPSZ_7b',
                'im(rs1)', 'The register-relative memory location (reg+imm: imm & 0x3 == 0).')
-    CLD_IMM = (45,
+    CLD_IMM = (46,
                'mem',
                False,
                'OPSZ_1',
                'im(rs1)', 'The register-relative memory location (reg+imm: imm & 0x7 == 0).')
-    CSW_IMM = (46,
+    CSW_IMM = (47,
                'mem',
                True,
                'OPSZ_7b',
                'im(rs1)', 'The register-relative memory location (reg+imm: imm & 0x3 == 0).')
-    CSD_IMM = (47,
+    CSD_IMM = (48,
                'mem',
                True,
                'OPSZ_1',
                'im(rs1)', 'The register-relative memory location (reg+imm: imm & 0x7 == 0).')
-    CIMM5 = (48,
+    CIMM5 = (49,
              'imm',
              False,
              'OPSZ_6b',
              '',
              'The immediate field in a C.ADDI, C.ADDIW, C.LI, and C.ANDI instruction.'
              )
-    CB_IMM = (49,
+    CB_IMM = (50,
               'pc_rel',
               False,
               'OPSZ_2',
               '',
               'The immediate field in a a CB format instruction (C.BEQZ and C.BNEZ).'
               )
-    CJ_IMM = (50,
+    CJ_IMM = (51,
               'pc_rel',
               False,
               'OPSZ_2',
@@ -466,7 +473,7 @@ def __new__(cls, value: int, arg_name: str, is_dest: bool,
               'The immediate field in a CJ format instruction.'
               )
     # Virtual fields en/decoding special cases.
-    V_L_RS1_DISP = (51,
+    V_L_RS1_DISP = (52,
                     'mem',
                     False,
                     {
@@ -479,7 +486,7 @@ def __new__(cls, value: int, arg_name: str, is_dest: bool,
                     'im(rs1)',
                     'The register-relative memory source location (reg+imm).'
                     )
-    V_S_RS1_DISP = (52,
+    V_S_RS1_DISP = (53,
                     'mem',
                     True,
                     {
diff --git a/core/ir/riscv64/decode.c b/core/ir/riscv64/decode.c
index d8b56e03ad0..8c5899f42a3 100644
--- a/core/ir/riscv64/decode.c
+++ b/core/ir/riscv64/decode.c
@@ -86,9 +86,7 @@ decode(void *drcontext, byte *pc, instr_t *instr)
 byte *
 decode_from_copy(void *drcontext, byte *copy_pc, byte *orig_pc, instr_t *instr)
 {
-    /* FIXME i#3544: Not implemented */
-    ASSERT_NOT_IMPLEMENTED(false);
-    return NULL;
+    return decode_common(drcontext, copy_pc, orig_pc, instr);
 }
 
 byte *
@@ -255,8 +253,6 @@ decode_debug_checks_arch(void)
 int
 main()
 {
-    /* FIXME i#3544: Add decoder tests. */
-
     bool res = true;
     standalone_init();
     standalone_exit();
diff --git a/core/ir/riscv64/isl/rv32i.txt b/core/ir/riscv64/isl/rv32i.txt
index 9f7f2d652ab..54efe6dc656 100644
--- a/core/ir/riscv64/isl/rv32i.txt
+++ b/core/ir/riscv64/isl/rv32i.txt
@@ -2,7 +2,7 @@
 # Version 2.1
 
 lui    | u | u_imm rd      | .........................0110111
-auipc  | u | u_imm rd      | .........................0010111
+auipc  | u | u_immpc rd    | .........................0010111
 jal    | j | j_imm rd      | .........................1101111
 jalr   | i | i_imm rs1 rd  | .................000.....1100111
 beq    | b | b_imm rs2 rs1 | .................000.....1100011
diff --git a/suite/tests/api/ir_riscv64.c b/suite/tests/api/ir_riscv64.c
index 8e1d7005504..12058deab0b 100644
--- a/suite/tests/api/ir_riscv64.c
+++ b/suite/tests/api/ir_riscv64.c
@@ -90,12 +90,12 @@ test_instr_encoding(void *dc, uint opcode, instr_t *instr)
 }
 
 static void
-test_instr_encoding_jump_or_branch(void *dc, uint opcode, instr_t *instr)
+test_instr_encoding_jal_or_branch(void *dc, uint opcode, instr_t *instr)
 {
-    /* XXX i#3544: For jump and branch instructions, current disassembler will print
+    /* XXX i#3544: For jal and branch instructions, current disassembler will print
      * the complete jump address, that is, an address relative to `buf`. But the
      * value of `buf` is indeterminate at runtime, so we skip checking the disassembled
-     * format for jump and branch instructions.
+     * format for these instructions. Same for test_instr_encoding_auipc().
      *
      * FIXME i#3544: For branch instructions, we should use relative offsets instead.
      */
@@ -114,6 +114,24 @@ test_instr_encoding_jump_or_branch(void *dc, uint opcode, instr_t *instr)
     instr_destroy(dc, decin);
 }
 
+static void
+test_instr_encoding_auipc(void *dc, uint opcode, app_pc instr_pc, instr_t *instr)
+{
+    instr_t *decin;
+    byte *pc, *next_pc;
+
+    ASSERT(instr_get_opcode(instr) == opcode);
+    ASSERT(instr_is_encoding_possible(instr));
+    pc = instr_encode_to_copy(dc, instr, buf, instr_pc);
+    ASSERT(pc != NULL);
+    decin = instr_create(dc);
+    next_pc = decode_from_copy(dc, buf, instr_pc, decin);
+    ASSERT(next_pc != NULL);
+    ASSERT(instr_same(instr, decin));
+    instr_destroy(dc, instr);
+    instr_destroy(dc, decin);
+}
+
 static void
 test_integer_load_store(void *dc)
 {
@@ -1082,45 +1100,45 @@ test_jump_and_branch(void *dc)
                              opnd_create_immed_int(42, OPSZ_20b));
     pc = test_instr_encoding(dc, OP_lui, instr);
     instr = INSTR_CREATE_auipc(dc, opnd_create_reg(DR_REG_A0),
-                               opnd_create_immed_int(42, OPSZ_20b));
-    test_instr_encoding(dc, OP_auipc, instr);
+                               opnd_create_pc(pc + (3 << 12)));
+    test_instr_encoding_auipc(dc, OP_auipc, pc, instr);
     instr = INSTR_CREATE_jal(dc, opnd_create_reg(DR_REG_A0), opnd_create_pc(pc));
-    test_instr_encoding_jump_or_branch(dc, OP_jal, instr);
+    test_instr_encoding_jal_or_branch(dc, OP_jal, instr);
     instr = INSTR_CREATE_jalr(dc, opnd_create_reg(DR_REG_A0), opnd_create_reg(DR_REG_A1),
                               opnd_create_immed_int(42, OPSZ_12b));
-    test_instr_encoding_jump_or_branch(dc, OP_jalr, instr);
+    test_instr_encoding(dc, OP_jalr, instr);
 
     instr = INSTR_CREATE_beq(dc, opnd_create_pc(pc), opnd_create_reg(DR_REG_A0),
                              opnd_create_reg(DR_REG_A1));
-    test_instr_encoding_jump_or_branch(dc, OP_beq, instr);
+    test_instr_encoding_jal_or_branch(dc, OP_beq, instr);
     instr = INSTR_CREATE_bne(dc, opnd_create_pc(pc), opnd_create_reg(DR_REG_A0),
                              opnd_create_reg(DR_REG_A1));
-    test_instr_encoding_jump_or_branch(dc, OP_bne, instr);
+    test_instr_encoding_jal_or_branch(dc, OP_bne, instr);
     instr = INSTR_CREATE_blt(dc, opnd_create_pc(pc), opnd_create_reg(DR_REG_A0),
                              opnd_create_reg(DR_REG_A1));
-    test_instr_encoding_jump_or_branch(dc, OP_blt, instr);
+    test_instr_encoding_jal_or_branch(dc, OP_blt, instr);
     instr = INSTR_CREATE_bge(dc, opnd_create_pc(pc), opnd_create_reg(DR_REG_A0),
                              opnd_create_reg(DR_REG_A1));
-    test_instr_encoding_jump_or_branch(dc, OP_bge, instr);
+    test_instr_encoding_jal_or_branch(dc, OP_bge, instr);
     instr = INSTR_CREATE_bltu(dc, opnd_create_pc(pc), opnd_create_reg(DR_REG_A0),
                               opnd_create_reg(DR_REG_A1));
-    test_instr_encoding_jump_or_branch(dc, OP_bltu, instr);
+    test_instr_encoding_jal_or_branch(dc, OP_bltu, instr);
     instr = INSTR_CREATE_bgeu(dc, opnd_create_pc(pc), opnd_create_reg(DR_REG_A0),
                               opnd_create_reg(DR_REG_A1));
-    test_instr_encoding_jump_or_branch(dc, OP_bgeu, instr);
+    test_instr_encoding_jal_or_branch(dc, OP_bgeu, instr);
 
     /* Compressed */
     instr = INSTR_CREATE_c_j(dc, opnd_create_pc(pc));
-    test_instr_encoding_jump_or_branch(dc, OP_c_j, instr);
+    test_instr_encoding_jal_or_branch(dc, OP_c_j, instr);
     instr = INSTR_CREATE_c_jr(dc, opnd_create_reg(DR_REG_A0));
-    test_instr_encoding_jump_or_branch(dc, OP_c_jr, instr);
+    test_instr_encoding_jal_or_branch(dc, OP_c_jr, instr);
     /* There is no c.jal in RV64. */
     instr = INSTR_CREATE_c_jalr(dc, opnd_create_reg(DR_REG_A0));
-    test_instr_encoding_jump_or_branch(dc, OP_c_jalr, instr);
+    test_instr_encoding_jal_or_branch(dc, OP_c_jalr, instr);
     instr = INSTR_CREATE_c_beqz(dc, opnd_create_pc(pc), opnd_create_reg(DR_REG_X8));
-    test_instr_encoding_jump_or_branch(dc, OP_c_beqz, instr);
+    test_instr_encoding_jal_or_branch(dc, OP_c_beqz, instr);
     instr = INSTR_CREATE_c_bnez(dc, opnd_create_pc(pc), opnd_create_reg(DR_REG_X8));
-    test_instr_encoding_jump_or_branch(dc, OP_c_bnez, instr);
+    test_instr_encoding_jal_or_branch(dc, OP_c_bnez, instr);
     instr = INSTR_CREATE_c_li(dc, opnd_create_reg(DR_REG_A1),
                               opnd_add_flags(opnd_create_immed_int((1 << 5) - 1, OPSZ_5b),
                                              DR_OPND_IMM_PRINT_DECIMAL));
diff --git a/suite/tests/api/ir_riscv64.expect b/suite/tests/api/ir_riscv64.expect
index 902130454ba..d9801fe86df 100644
--- a/suite/tests/api/ir_riscv64.expect
+++ b/suite/tests/api/ir_riscv64.expect
@@ -221,7 +221,7 @@ c.xor  a5 -> fp
 c.sub  a5 -> fp
 test_integer_arith complete
 lui    0x2a -> a0
-auipc  0x2a -> a0
+jalr   a1 0x2a -> a0
 c.li   31 -> a1
 c.lui  1 -> a1
 c.addi 31 -> a1