From 45b526a48f5f327b2996cab371fc304766e02741 Mon Sep 17 00:00:00 2001 From: Joshua Warburton Date: Thu, 4 Aug 2022 16:07:34 +0100 Subject: [PATCH] i#2626: AArch64 v8.2 codec: Add BCAX, EOR3, P/ESB instructions (#5590) This patch adds the following decodes, encoding macros and appropriate tests for both BCAX .16B, .16B, .16B, .16B EOR3 .16B, .16B, .16B, .16B ESB PSB These instructions are part of features that have not yet been added, so BASE has been used as a placeholder where appropriate Issue: #4860 --- core/arch/aarch64/proc.c | 3 +- core/ir/aarch64/codec_v82.txt | 20 ++++--- core/ir/aarch64/instr_create_api.h | 54 ++++++++++++++++++ suite/tests/api/dis-a64.txt | 42 ++++++++++++++ suite/tests/api/ir_aarch64_v82.c | 88 ++++++++++++++++++++++++++++++ 5 files changed, 198 insertions(+), 9 deletions(-) diff --git a/core/arch/aarch64/proc.c b/core/arch/aarch64/proc.c index 3213d6a3722..137ac717721 100644 --- a/core/arch/aarch64/proc.c +++ b/core/arch/aarch64/proc.c @@ -160,7 +160,8 @@ proc_has_feature(feature_bit_t f) # if defined(BUILD_TESTS) if (f == FEATURE_LSE || f == FEATURE_RDM || f == FEATURE_FP16 || f == FEATURE_DotProd || f == FEATURE_SVE || f == FEATURE_LOR || - f == FEATURE_FHM || f == FEATURE_SM3 || f == FEATURE_SM4 || f == FEATURE_SHA512) + f == FEATURE_FHM || f == FEATURE_SM3 || f == FEATURE_SM4 || f == FEATURE_SHA512 || + f == FEATURE_SHA3) return true; # endif ushort feat_nibble, feat_val, freg_nibble, feat_nsflag; diff --git a/core/ir/aarch64/codec_v82.txt b/core/ir/aarch64/codec_v82.txt index fd9ad7308a7..8b591137ba3 100644 --- a/core/ir/aarch64/codec_v82.txt +++ b/core/ir/aarch64/codec_v82.txt @@ -34,6 +34,9 @@ # Instruction definitions: +11001110001xxxxx0xxxxxxxxxxxxxxx n 599 SHA3 bcax q0 : q5 q16 q10 b_const_sz +11001110000xxxxx0xxxxxxxxxxxxxxx n 600 SHA3 eor3 q0 : q5 q16 q10 b_const_sz +11010101000000110010001000011111 n 601 BASE esb : 0x101110110xxxxx000101xxxxxxxxxx n 94 FP16 fabd dq0 : dq5 dq16 h_sz 01111110110xxxxx000101xxxxxxxxxx n 94 FP16 fabd h0 : h5 h16 0x00111011111000111110xxxxxxxxxx n 95 FP16 fabs dq0 : dq5 h_sz @@ -105,16 +108,16 @@ x001111011100001000000xxxxxxxxxx n 120 FP16 fcvtnu wx0 : h5 0x101110110xxxxx001101xxxxxxxxxx n 139 FP16 fminp dq0 : dq5 dq16 h_sz 0x00111010110000111110xxxxxxxxxx n 140 FP16 fminv h0 : dq5 h_sz 0x001110010xxxxx000011xxxxxxxxxx n 141 FP16 fmla dq0 : dq0 dq5 dq16 h_sz -0x001110001xxxxx111011xxxxxxxxxx n 142 FHM fmlal dq0 : dq0 sd5 sd16 h_sz -0x00111110xxxxxx0000x0xxxxxxxxxx n 142 FHM fmlal dq0 : dq0 sd5 sd16_h_sz vindex_H h_sz -0x101110001xxxxx110011xxxxxxxxxx n 143 FHM fmlal2 dq0 : dq0 sd5 sd16 h_sz -0x10111110xxxxxx1000x0xxxxxxxxxx n 143 FHM fmlal2 dq0 : dq0 sd5 sd16_h_sz vindex_H h_sz +0x001110001xxxxx111011xxxxxxxxxx n 142 FHM fmlal dq0 : dq0 sd5 sd16 h_sz +0x00111110xxxxxx0000x0xxxxxxxxxx n 142 FHM fmlal dq0 : dq0 sd5 sd16_h_sz vindex_H h_sz +0x101110001xxxxx110011xxxxxxxxxx n 143 FHM fmlal2 dq0 : dq0 sd5 sd16 h_sz +0x10111110xxxxxx1000x0xxxxxxxxxx n 143 FHM fmlal2 dq0 : dq0 sd5 sd16_h_sz vindex_H h_sz 0x001110110xxxxx000011xxxxxxxxxx n 144 FP16 fmls dq0 : dq0 dq5 dq16 h_sz 0x00111100xxxxxx0101x0xxxxxxxxxx n 144 FP16 fmls dq0 : dq5 dq16_h_sz vindex_H h_sz -0x001110101xxxxx111011xxxxxxxxxx n 145 FHM fmlsl dq0 : dq0 sd5 sd16 h_sz -0x00111110xxxxxx0100x0xxxxxxxxxx n 145 FHM fmlsl dq0 : dq0 sd5 sd16_h_sz vindex_H h_sz -0x101110101xxxxx110011xxxxxxxxxx n 146 FHM fmlsl2 dq0 : dq0 sd5 sd16 h_sz -0x10111110xxxxxx1100x0xxxxxxxxxx n 146 FHM fmlsl2 dq0 : dq0 sd5 sd16_h_sz vindex_H h_sz +0x001110101xxxxx111011xxxxxxxxxx n 145 FHM fmlsl dq0 : dq0 sd5 sd16 h_sz +0x00111110xxxxxx0100x0xxxxxxxxxx n 145 FHM fmlsl dq0 : dq0 sd5 sd16_h_sz vindex_H h_sz +0x101110101xxxxx110011xxxxxxxxxx n 146 FHM fmlsl2 dq0 : dq0 sd5 sd16 h_sz +0x10111110xxxxxx1100x0xxxxxxxxxx n 146 FHM fmlsl2 dq0 : dq0 sd5 sd16_h_sz vindex_H h_sz 00011110111xxxxxxxx10000000xxxxx n 147 FP16 fmov h0 : fpimm13 0001111011100111000000xxxxxxxxxx n 147 FP16 fmov h0 : w5 1001111011100111000000xxxxxxxxxx n 147 FP16 fmov h0 : x5 @@ -144,6 +147,7 @@ x001111011100001000000xxxxxxxxxx n 120 FP16 fcvtnu wx0 : h5 0111111011111001110110xxxxxxxxxx n 165 FP16 frsqrte h0 : h5 0x001110110xxxxx001111xxxxxxxxxx n 166 FP16 frsqrts dq0 : dq5 dq16 h_sz 0x001110110xxxxx000101xxxxxxxxxx n 168 FP16 fsub dq0 : dq5 dq16 h_sz +11010101000000110010001000111111 n 602 BASE psb : 0x001110100xxxxx100101xxxxxxxxxx n 364 DotProd sdot dq0 : dq5 dq16 s_const_sz b_const_sz 11001110011xxxxx100000xxxxxxxxxx n 595 SHA512 sha512h q0 : q0 q5 q16 d_const_sz 11001110011xxxxx100001xxxxxxxxxx n 596 SHA512 sha512h2 q0 : q0 q5 q16 d_const_sz diff --git a/core/ir/aarch64/instr_create_api.h b/core/ir/aarch64/instr_create_api.h index 00843b3ba29..b5a2ab55a71 100644 --- a/core/ir/aarch64/instr_create_api.h +++ b/core/ir/aarch64/instr_create_api.h @@ -3874,4 +3874,58 @@ #define INSTR_CREATE_sm4ekey_vector(dc, Rd, Rn, Rm, Rm_elsz) \ instr_create_1dst_3src(dc, OP_sm4ekey, Rd, Rn, Rm, Rm_elsz) +/** + * Creates a BCAX instruction. + * + * This macro is used to encode the forms: + * \verbatim + * BCAX .16B, .16B, .16B, .16B + * \endverbatim + * \param dc The void * dcontext used to allocate memory for the #instr_t. + * \param Rd The first destination vector register, Q (quadword, 128 bits) + * \param Rn The second source vector register, Q (quadword, 128 bits) + * \param Rm The third source vector register, Q (quadword, 128 bits) + * \param Ra The fourth source vector register, Q (quadword, 128 bits) + */ +#define INSTR_CREATE_bcax(dc, Rd, Rn, Rm, Ra) \ + instr_create_1dst_4src(dc, OP_bcax, Rd, Rn, Rm, Ra, OPND_CREATE_BYTE()) + +/** + * Creates a EOR3 instruction. + * + * This macro is used to encode the forms: + * \verbatim + * EOR3 .16B, .16B, .16B, .16B + * \endverbatim + * \param dc The void * dcontext used to allocate memory for the #instr_t. + * \param Rd The first destination vector register, Q (quadword, 128 bits) + * \param Rn The second source vector register, Q (quadword, 128 bits) + * \param Rm The third source vector register, Q (quadword, 128 bits) + * \param Ra The fourth source vector register, Q (quadword, 128 bits) + */ +#define INSTR_CREATE_eor3(dc, Rd, Rn, Rm, Ra) \ + instr_create_1dst_4src(dc, OP_eor3, Rd, Rn, Rm, Ra, OPND_CREATE_BYTE()) + +/** + * Creates a ESB instruction. + * + * This macro is used to encode the forms: + * \verbatim + * ESB + * \endverbatim + * \param dc The void * dcontext used to allocate memory for the #instr_t. + */ +#define INSTR_CREATE_esb(dc) instr_create_0dst_0src(dc, OP_esb) + +/** + * Creates a PSB instruction. + * + * This macro is used to encode the forms: + * \verbatim + * PSB CSYNC + * \endverbatim + * \param dc The void * dcontext used to allocate memory for the #instr_t. + */ +#define INSTR_CREATE_psb_csync(dc) instr_create_0dst_0src(dc, OP_psb) + #endif /* DR_IR_MACROS_AARCH64_H */ diff --git a/suite/tests/api/dis-a64.txt b/suite/tests/api/dis-a64.txt index d19067476fb..0cbbef33d1c 100644 --- a/suite/tests/api/dis-a64.txt +++ b/suite/tests/api/dis-a64.txt @@ -52692,3 +52692,45 @@ ce79cb17 : sm4ekey v23.4s, v24.4s, v25.4s : sm4ekey %q24 %q25 $0x02 - ce7bcb59 : sm4ekey v25.4s, v26.4s, v27.4s : sm4ekey %q26 %q27 $0x02 -> %q25 ce7dcb9b : sm4ekey v27.4s, v28.4s, v29.4s : sm4ekey %q28 %q29 $0x02 -> %q27 ce61c81f : sm4ekey v31.4s, v0.4s, v1.4s : sm4ekey %q0 %q1 $0x02 -> %q31 + +# BCAX .16B, .16B, .16B, .16B (BCAX-Q.QQQ-Crypto) +ce220c20 : bcax v0.16b, v1.16b, v2.16b, v3.16b : bcax %q1 %q2 %q3 $0x00 -> %q0 +ce241462 : bcax v2.16b, v3.16b, v4.16b, v5.16b : bcax %q3 %q4 %q5 $0x00 -> %q2 +ce261ca4 : bcax v4.16b, v5.16b, v6.16b, v7.16b : bcax %q5 %q6 %q7 $0x00 -> %q4 +ce2824e6 : bcax v6.16b, v7.16b, v8.16b, v9.16b : bcax %q7 %q8 %q9 $0x00 -> %q6 +ce2a2d28 : bcax v8.16b, v9.16b, v10.16b, v11.16b : bcax %q9 %q10 %q11 $0x00 -> %q8 +ce2c356a : bcax v10.16b, v11.16b, v12.16b, v13.16b : bcax %q11 %q12 %q13 $0x00 -> %q10 +ce2e3dac : bcax v12.16b, v13.16b, v14.16b, v15.16b : bcax %q13 %q14 %q15 $0x00 -> %q12 +ce3045ee : bcax v14.16b, v15.16b, v16.16b, v17.16b : bcax %q15 %q16 %q17 $0x00 -> %q14 +ce324e30 : bcax v16.16b, v17.16b, v18.16b, v19.16b : bcax %q17 %q18 %q19 $0x00 -> %q16 +ce335251 : bcax v17.16b, v18.16b, v19.16b, v20.16b : bcax %q18 %q19 %q20 $0x00 -> %q17 +ce355a93 : bcax v19.16b, v20.16b, v21.16b, v22.16b : bcax %q20 %q21 %q22 $0x00 -> %q19 +ce3762d5 : bcax v21.16b, v22.16b, v23.16b, v24.16b : bcax %q22 %q23 %q24 $0x00 -> %q21 +ce396b17 : bcax v23.16b, v24.16b, v25.16b, v26.16b : bcax %q24 %q25 %q26 $0x00 -> %q23 +ce3b7359 : bcax v25.16b, v26.16b, v27.16b, v28.16b : bcax %q26 %q27 %q28 $0x00 -> %q25 +ce3d7b9b : bcax v27.16b, v28.16b, v29.16b, v30.16b : bcax %q28 %q29 %q30 $0x00 -> %q27 +ce21081f : bcax v31.16b, v0.16b, v1.16b, v2.16b : bcax %q0 %q1 %q2 $0x00 -> %q31 + +# EOR3 .16B, .16B, .16B, .16B (EOR3-Q.QQQ-Crypto) +ce020c20 : eor3 v0.16b, v1.16b, v2.16b, v3.16b : eor3 %q1 %q2 %q3 $0x00 -> %q0 +ce041462 : eor3 v2.16b, v3.16b, v4.16b, v5.16b : eor3 %q3 %q4 %q5 $0x00 -> %q2 +ce061ca4 : eor3 v4.16b, v5.16b, v6.16b, v7.16b : eor3 %q5 %q6 %q7 $0x00 -> %q4 +ce0824e6 : eor3 v6.16b, v7.16b, v8.16b, v9.16b : eor3 %q7 %q8 %q9 $0x00 -> %q6 +ce0a2d28 : eor3 v8.16b, v9.16b, v10.16b, v11.16b : eor3 %q9 %q10 %q11 $0x00 -> %q8 +ce0c356a : eor3 v10.16b, v11.16b, v12.16b, v13.16b : eor3 %q11 %q12 %q13 $0x00 -> %q10 +ce0e3dac : eor3 v12.16b, v13.16b, v14.16b, v15.16b : eor3 %q13 %q14 %q15 $0x00 -> %q12 +ce1045ee : eor3 v14.16b, v15.16b, v16.16b, v17.16b : eor3 %q15 %q16 %q17 $0x00 -> %q14 +ce124e30 : eor3 v16.16b, v17.16b, v18.16b, v19.16b : eor3 %q17 %q18 %q19 $0x00 -> %q16 +ce135251 : eor3 v17.16b, v18.16b, v19.16b, v20.16b : eor3 %q18 %q19 %q20 $0x00 -> %q17 +ce155a93 : eor3 v19.16b, v20.16b, v21.16b, v22.16b : eor3 %q20 %q21 %q22 $0x00 -> %q19 +ce1762d5 : eor3 v21.16b, v22.16b, v23.16b, v24.16b : eor3 %q22 %q23 %q24 $0x00 -> %q21 +ce196b17 : eor3 v23.16b, v24.16b, v25.16b, v26.16b : eor3 %q24 %q25 %q26 $0x00 -> %q23 +ce1b7359 : eor3 v25.16b, v26.16b, v27.16b, v28.16b : eor3 %q26 %q27 %q28 $0x00 -> %q25 +ce1d7b9b : eor3 v27.16b, v28.16b, v29.16b, v30.16b : eor3 %q28 %q29 %q30 $0x00 -> %q27 +ce01081f : eor3 v31.16b, v0.16b, v1.16b, v2.16b : eor3 %q0 %q1 %q2 $0x00 -> %q31 + +# ESB (ESB--barrier) +d503221f : esb : esb + +# PSB (PSB--barrier) +d503223f : psb csync : psb diff --git a/suite/tests/api/ir_aarch64_v82.c b/suite/tests/api/ir_aarch64_v82.c index 565f39b4030..af3134aa5be 100644 --- a/suite/tests/api/ir_aarch64_v82.c +++ b/suite/tests/api/ir_aarch64_v82.c @@ -1782,6 +1782,89 @@ TEST_INSTR(sha512su1) return success; } +TEST_INSTR(bcax) +{ + bool success = true; + instr_t *instr; + byte *pc; + + /* Testing BCAX .16B, .16B, .16B, .16B */ + reg_id_t Rd_0_0[3] = { DR_REG_Q0, DR_REG_Q10, DR_REG_Q31 }; + reg_id_t Rn_0_0[3] = { DR_REG_Q0, DR_REG_Q11, DR_REG_Q31 }; + reg_id_t Rm_0_0[3] = { DR_REG_Q0, DR_REG_Q12, DR_REG_Q31 }; + reg_id_t Ra_0_0[3] = { DR_REG_Q0, DR_REG_Q13, DR_REG_Q31 }; + const char *expected_0_0[3] = { + "bcax %q0 %q0 %q0 $0x00 -> %q0", + "bcax %q11 %q12 %q13 $0x00 -> %q10", + "bcax %q31 %q31 %q31 $0x00 -> %q31", + }; + for (int i = 0; i < 3; i++) { + instr = + INSTR_CREATE_bcax(dc, opnd_create_reg(Rd_0_0[i]), opnd_create_reg(Rn_0_0[i]), + opnd_create_reg(Rm_0_0[i]), opnd_create_reg(Ra_0_0[i])); + if (!test_instr_encoding(dc, OP_bcax, instr, expected_0_0[i])) + success = false; + } + + return success; +} + +TEST_INSTR(eor3) +{ + bool success = true; + instr_t *instr; + byte *pc; + + /* Testing EOR3 .16B, .16B, .16B, .16B */ + reg_id_t Rd_0_0[3] = { DR_REG_Q0, DR_REG_Q10, DR_REG_Q31 }; + reg_id_t Rn_0_0[3] = { DR_REG_Q0, DR_REG_Q11, DR_REG_Q31 }; + reg_id_t Rm_0_0[3] = { DR_REG_Q0, DR_REG_Q12, DR_REG_Q31 }; + reg_id_t Ra_0_0[3] = { DR_REG_Q0, DR_REG_Q13, DR_REG_Q31 }; + const char *expected_0_0[3] = { + "eor3 %q0 %q0 %q0 $0x00 -> %q0", + "eor3 %q11 %q12 %q13 $0x00 -> %q10", + "eor3 %q31 %q31 %q31 $0x00 -> %q31", + }; + for (int i = 0; i < 3; i++) { + instr = + INSTR_CREATE_eor3(dc, opnd_create_reg(Rd_0_0[i]), opnd_create_reg(Rn_0_0[i]), + opnd_create_reg(Rm_0_0[i]), opnd_create_reg(Ra_0_0[i])); + if (!test_instr_encoding(dc, OP_eor3, instr, expected_0_0[i])) + success = false; + } + + return success; +} + +TEST_INSTR(esb) +{ + bool success = true; + instr_t *instr; + byte *pc; + + /* Testing ESB */ + const char *expected_0_0[1] = { "esb" }; + instr = INSTR_CREATE_esb(dc); + if (!test_instr_encoding(dc, OP_esb, instr, expected_0_0[0])) + success = false; + + return success; +} + +TEST_INSTR(psb) +{ + bool success = true; + instr_t *instr; + byte *pc; + + /* Testing PSB */ + const char *expected_0_0[1] = { "psb" }; + instr = INSTR_CREATE_psb_csync(dc); + if (!test_instr_encoding(dc, OP_psb, instr, expected_0_0[0])) + success = false; + return success; +} + int main(int argc, char *argv[]) { @@ -1845,6 +1928,11 @@ main(int argc, char *argv[]) RUN_INSTR_TEST(sha512su0); RUN_INSTR_TEST(sha512su1); + RUN_INSTR_TEST(bcax); + RUN_INSTR_TEST(eor3); + RUN_INSTR_TEST(esb); + RUN_INSTR_TEST(psb); + print("All v8.2 tests complete.\n"); #ifndef STANDALONE_DECODER dr_standalone_exit();