Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

i#3044 AArch64 SVE codec: Add memory vector+immed #5889

Merged
merged 1 commit into from
Mar 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 114 additions & 18 deletions core/ir/aarch64/codec.c
Original file line number Diff line number Diff line change
Expand Up @@ -208,18 +208,24 @@ try_encode_int(OUT uint *bits, int len, int scale, ptr_int_t val)
}

static inline bool
try_encode_imm(OUT uint *imm, int bits, opnd_t opnd)
try_encode_uint(OUT uint *bits, int len, int scale, ptr_int_t val)
{
ptr_int_t value;
if (!opnd_is_immed_int(opnd))
return false;
value = opnd_get_immed_int(opnd);
if (!(0 <= value && value < (uint)1 << bits))
const ptr_uint_t mask = MASK(len) << scale;

if (val < 0 || (val & ~mask) != 0)
return false;
*imm = value;

*bits = (uint)(val >> scale);
return true;
}

static inline bool
try_encode_imm(OUT uint *imm, int bits, opnd_t opnd)
{
return opnd_is_immed_int(opnd) &&
try_encode_uint(imm, bits, 0, opnd_get_immed_int(opnd));
}

static inline bool
encode_pc_off(OUT uint *poff, int bits, byte *pc, instr_t *instr, opnd_t opnd,
decode_info_t *di)
Expand Down Expand Up @@ -975,9 +981,6 @@ extract_tsz_size(uint enc)
static aarch64_reg_offset
get_vector_element_reg_offset(opnd_t opnd)
{
if (!opnd_is_element_vector_reg(opnd))
return NOT_A_REG;

switch (opnd_get_vector_element_size(opnd)) {
case OPSZ_1: return BYTE_REG;
case OPSZ_2: return HALF_REG;
Expand Down Expand Up @@ -4447,10 +4450,10 @@ svemem_gprs_per_element_encode(uint bytes_per_element, aarch64_reg_offset elemen

uint rn, rm;
bool is_x_register;
IF_RETURN_FALSE(!encode_reg(&rn, &is_x_register, opnd_get_base(opnd), true) ||
!is_x_register)
IF_RETURN_FALSE(!encode_reg(&rm, &is_x_register, opnd_get_index(opnd), false) ||
!is_x_register)
if (!encode_reg(&rn, &is_x_register, opnd_get_base(opnd), true) || !is_x_register)
return false;
if (!encode_reg(&rm, &is_x_register, opnd_get_index(opnd), false) || !is_x_register)
return false;

*enc_out = rn << 5 | rm << 16;
return true;
Expand Down Expand Up @@ -4832,16 +4835,18 @@ static inline bool
encode_opnd_svemem_gpr_simm6_vl(uint enc, int opcode, byte *pc, opnd_t opnd,
OUT uint *enc_out)
{
IF_RETURN_FALSE(!opnd_is_base_disp(opnd))
const opnd_size_t mem_transfer = op_is_prefetch(opcode) ? OPSZ_0 : OPSZ_SVE_VL;
IF_RETURN_FALSE(opnd_get_size(opnd) != mem_transfer)
if (!opnd_is_base_disp(opnd) || opnd_get_size(opnd) != mem_transfer)
return false;

uint imm6;
IF_RETURN_FALSE(!try_encode_int(&imm6, 6, 0, opnd_get_disp(opnd)))
if (!try_encode_int(&imm6, 6, 0, opnd_get_disp(opnd)))
return false;

uint rn;
bool is_x;
IF_RETURN_FALSE(!encode_reg(&rn, &is_x, opnd_get_base(opnd), true) || !is_x)
if (!encode_reg(&rn, &is_x, opnd_get_base(opnd), true) || !is_x)
return false;

*enc_out = (rn << 5) | (imm6 << 16);
return true;
Expand Down Expand Up @@ -6579,6 +6584,97 @@ encode_opnd_imm2_tsz_index(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint
return true;
}

/* SVE memory address [<Zn>.<T>{, #<imm>}] */
static inline bool
decode_svemem_vec_imm5(uint enc, aarch64_reg_offset element_size, bool is_prefetch,
OUT opnd_t *opnd)
{
const aarch64_reg_offset msz = BITS(enc, 24, 23);
const uint scale = 1 << msz;

const opnd_size_t mem_transfer = is_prefetch
? OPSZ_0
: opnd_size_from_bytes(scale * get_elements_in_sve_vector(element_size));

const reg_id_t zn = DR_REG_Z0 + extract_uint(enc, 5, 5);
ASSERT(reg_is_z(zn));

const int imm5 = (int)(extract_uint(enc, 16, 5) << msz);
switch (msz) {
case BYTE_REG: ASSERT(imm5 >= 0 && imm5 <= 31); break;
case HALF_REG: ASSERT(imm5 >= 0 && imm5 <= 62 && (imm5 % 2) == 0); break;
case SINGLE_REG: ASSERT(imm5 >= 0 && imm5 <= 124 && (imm5 % 4) == 0); break;
case DOUBLE_REG: ASSERT(imm5 >= 0 && imm5 <= 248 && (imm5 % 8) == 0); break;
default: ASSERT_NOT_REACHED();
}

*opnd = opnd_create_vector_base_disp_aarch64(zn, DR_REG_NULL,
get_opnd_size_from_offset(element_size),
0, false, imm5, 0, mem_transfer, 0);

return true;
}

static inline bool
encode_svemem_vec_imm5(uint enc, aarch64_reg_offset element_size, bool is_prefetch,
opnd_t opnd, OUT uint *enc_out)
{
if (!opnd_is_base_disp(opnd) || opnd_get_index(opnd) != DR_REG_NULL ||
get_vector_element_reg_offset(opnd) != element_size)
return false;

const reg_id_t zd = opnd_get_base(opnd);
if (!reg_is_z(zd))
return false;
const uint reg_number = zd - DR_REG_Z0;

const aarch64_reg_offset msz = BITS(enc, 24, 23);
const uint scale = 1 << msz;

const opnd_size_t mem_transfer = is_prefetch
? OPSZ_0
: opnd_size_from_bytes(scale * get_elements_in_sve_vector(element_size));

if (opnd_get_size(opnd) != mem_transfer)
return false;

uint imm5;
if (!try_encode_uint(&imm5, 5, msz, opnd_get_disp(opnd)))
return false;

*enc_out |= (imm5 << 16) | (reg_number << 5);

return true;
}

/* SVE memory address [<Zn>.S{, #<imm>}] */
static inline bool
decode_opnd_svemem_vec_s_imm5(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
{
return decode_svemem_vec_imm5(enc, SINGLE_REG, op_is_prefetch(opcode), opnd);
}

static inline bool
encode_opnd_svemem_vec_s_imm5(uint enc, int opcode, byte *pc, opnd_t opnd,
OUT uint *enc_out)
{
return encode_svemem_vec_imm5(enc, SINGLE_REG, op_is_prefetch(opcode), opnd, enc_out);
}

/* SVE memory address [<Zn>.D{, #<imm>}] */
static inline bool
decode_opnd_svemem_vec_d_imm5(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
{
return decode_svemem_vec_imm5(enc, DOUBLE_REG, op_is_prefetch(opcode), opnd);
}

static inline bool
encode_opnd_svemem_vec_d_imm5(uint enc, int opcode, byte *pc, opnd_t opnd,
OUT uint *enc_out)
{
return encode_svemem_vec_imm5(enc, DOUBLE_REG, op_is_prefetch(opcode), opnd, enc_out);
}

static inline bool
dtype_is_signed(uint dtype)
{
Expand Down
41 changes: 41 additions & 0 deletions core/ir/aarch64/codec_sve.txt
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,13 @@
10100100010xxxxx010xxxxxxxxxxxxx n 946 SVE ld1b z_s_0 : svemem_gpr_shf p10_zer_lo
10100100011xxxxx010xxxxxxxxxxxxx n 946 SVE ld1b z_d_0 : svemem_gpr_shf p10_zer_lo
10100100000xxxxx010xxxxxxxxxxxxx n 946 SVE ld1b z_b_0 : svemem_gpr_shf p10_zer_lo
11000100001xxxxx110xxxxxxxxxxxxx n 946 SVE ld1b z_d_0 : svemem_vec_d_imm5 p10_zer_lo
10000100001xxxxx110xxxxxxxxxxxxx n 946 SVE ld1b z_s_0 : svemem_vec_s_imm5 p10_zer_lo
11000100001xxxxx110xxxxxxxxxxxxx n 946 SVE ld1b z_d_0 : svemem_vec_d_imm5 p10_zer_lo
10000100001xxxxx110xxxxxxxxxxxxx n 946 SVE ld1b z_s_0 : svemem_vec_s_imm5 p10_zer_lo
11000101101xxxxx110xxxxxxxxxxxxx n 975 SVE ld1d z_d_0 : svemem_vec_d_imm5 p10_zer_lo
10000100101xxxxx110xxxxxxxxxxxxx n 976 SVE ld1h z_s_0 : svemem_vec_s_imm5 p10_zer_lo
11000100101xxxxx110xxxxxxxxxxxxx n 976 SVE ld1h z_d_0 : svemem_vec_d_imm5 p10_zer_lo
1000010001xxxxxx101xxxxxxxxxxxxx n 908 SVE ld1rb z_h_0 : svememx6_b_5 p10_zer_lo
1000010001xxxxxx110xxxxxxxxxxxxx n 908 SVE ld1rb z_s_0 : svememx6_b_5 p10_zer_lo
1000010001xxxxxx111xxxxxxxxxxxxx n 908 SVE ld1rb z_d_0 : svememx6_b_5 p10_zer_lo
Expand All @@ -320,25 +327,44 @@
10100101110xxxxx010xxxxxxxxxxxxx n 949 SVE ld1sb z_h_0 : svemem_gpr_shf p10_zer_lo
10100101101xxxxx010xxxxxxxxxxxxx n 949 SVE ld1sb z_s_0 : svemem_gpr_shf p10_zer_lo
10100101100xxxxx010xxxxxxxxxxxxx n 949 SVE ld1sb z_d_0 : svemem_gpr_shf p10_zer_lo
10000100001xxxxx100xxxxxxxxxxxxx n 949 SVE ld1sb z_s_0 : svemem_vec_s_imm5 p10_zer_lo
11000100001xxxxx100xxxxxxxxxxxxx n 949 SVE ld1sb z_d_0 : svemem_vec_d_imm5 p10_zer_lo
10000100101xxxxx100xxxxxxxxxxxxx n 977 SVE ld1sh z_s_0 : svemem_vec_s_imm5 p10_zer_lo
11000100101xxxxx100xxxxxxxxxxxxx n 977 SVE ld1sh z_d_0 : svemem_vec_d_imm5 p10_zer_lo
11000101001xxxxx100xxxxxxxxxxxxx n 978 SVE ld1sw z_d_0 : svemem_vec_d_imm5 p10_zer_lo
10000101001xxxxx110xxxxxxxxxxxxx n 979 SVE ld1w z_s_0 : svemem_vec_s_imm5 p10_zer_lo
11000101001xxxxx110xxxxxxxxxxxxx n 979 SVE ld1w z_d_0 : svemem_vec_d_imm5 p10_zer_lo
10100100001xxxxx110xxxxxxxxxxxxx n 967 SVE ld2b z_b_0 z_msz_bhsd_0p1 : svemem_gprs_bhsdx p10_zer_lo
10100100010xxxxx110xxxxxxxxxxxxx n 968 SVE ld3b z_b_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 : svemem_gprs_bhsdx p10_zer_lo
10100100011xxxxx110xxxxxxxxxxxxx n 969 SVE ld4b z_b_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 : svemem_gprs_bhsdx p10_zer_lo
10100100001xxxxx011xxxxxxxxxxxxx n 937 SVE ldff1b z_h_0 : svemem_gpr_shf p10_zer_lo
10100100010xxxxx011xxxxxxxxxxxxx n 937 SVE ldff1b z_s_0 : svemem_gpr_shf p10_zer_lo
10100100011xxxxx011xxxxxxxxxxxxx n 937 SVE ldff1b z_d_0 : svemem_gpr_shf p10_zer_lo
10100100000xxxxx011xxxxxxxxxxxxx n 937 SVE ldff1b z_b_0 : svemem_gpr_shf p10_zer_lo
10000100001xxxxx111xxxxxxxxxxxxx n 937 SVE ldff1b z_s_0 : svemem_vec_s_imm5 p10_zer_lo
11000100001xxxxx111xxxxxxxxxxxxx n 937 SVE ldff1b z_d_0 : svemem_vec_d_imm5 p10_zer_lo
10100101111xxxxx011xxxxxxxxxxxxx n 938 SVE ldff1d z_d_0 : svemem_gpr_shf p10_zer_lo
11000101101xxxxx111xxxxxxxxxxxxx n 938 SVE ldff1d z_d_0 : svemem_vec_d_imm5 p10_zer_lo
10100100101xxxxx011xxxxxxxxxxxxx n 939 SVE ldff1h z_h_0 : svemem_gpr_shf p10_zer_lo
10100100110xxxxx011xxxxxxxxxxxxx n 939 SVE ldff1h z_s_0 : svemem_gpr_shf p10_zer_lo
10100100111xxxxx011xxxxxxxxxxxxx n 939 SVE ldff1h z_d_0 : svemem_gpr_shf p10_zer_lo
10000100101xxxxx111xxxxxxxxxxxxx n 939 SVE ldff1h z_s_0 : svemem_vec_s_imm5 p10_zer_lo
11000100101xxxxx111xxxxxxxxxxxxx n 939 SVE ldff1h z_d_0 : svemem_vec_d_imm5 p10_zer_lo
10100101110xxxxx011xxxxxxxxxxxxx n 940 SVE ldff1sb z_h_0 : svemem_gpr_shf p10_zer_lo
10100101101xxxxx011xxxxxxxxxxxxx n 940 SVE ldff1sb z_s_0 : svemem_gpr_shf p10_zer_lo
10100101100xxxxx011xxxxxxxxxxxxx n 940 SVE ldff1sb z_d_0 : svemem_gpr_shf p10_zer_lo
10000100001xxxxx101xxxxxxxxxxxxx n 940 SVE ldff1sb z_s_0 : svemem_vec_s_imm5 p10_zer_lo
11000100001xxxxx101xxxxxxxxxxxxx n 940 SVE ldff1sb z_d_0 : svemem_vec_d_imm5 p10_zer_lo
10100101001xxxxx011xxxxxxxxxxxxx n 941 SVE ldff1sh z_s_0 : svemem_gpr_shf p10_zer_lo
10100101000xxxxx011xxxxxxxxxxxxx n 941 SVE ldff1sh z_d_0 : svemem_gpr_shf p10_zer_lo
10000100101xxxxx101xxxxxxxxxxxxx n 941 SVE ldff1sh z_s_0 : svemem_vec_s_imm5 p10_zer_lo
11000100101xxxxx101xxxxxxxxxxxxx n 941 SVE ldff1sh z_d_0 : svemem_vec_d_imm5 p10_zer_lo
10100100100xxxxx011xxxxxxxxxxxxx n 942 SVE ldff1sw z_d_0 : svemem_gpr_shf p10_zer_lo
11000101001xxxxx101xxxxxxxxxxxxx n 942 SVE ldff1sw z_d_0 : svemem_vec_d_imm5 p10_zer_lo
10100101010xxxxx011xxxxxxxxxxxxx n 943 SVE ldff1w z_s_0 : svemem_gpr_shf p10_zer_lo
10100101011xxxxx011xxxxxxxxxxxxx n 943 SVE ldff1w z_d_0 : svemem_gpr_shf p10_zer_lo
10000101001xxxxx111xxxxxxxxxxxxx n 943 SVE ldff1w z_s_0 : svemem_vec_s_imm5 p10_zer_lo
11000101001xxxxx111xxxxxxxxxxxxx n 943 SVE ldff1w z_d_0 : svemem_vec_d_imm5 p10_zer_lo
10100100000xxxxx110xxxxxxxxxxxxx n 950 SVE ldnt1b z_b_0 : svemem_gprs_b1 p10_zer_lo
1000010110xxxxxx000xxxxxxxx0xxxx n 227 SVE ldr p0 : svemem_gpr_simm9_vl
1000010110xxxxxx010xxxxxxxxxxxxx n 227 SVE ldr z0 : svemem_gpr_simm9_vl
Expand Down Expand Up @@ -384,9 +410,17 @@
00100101010110001100000xxxx0xxxx w 895 SVE pfirst p_b_0 : p5 p_b_0
00100101xx0110011100010xxxx0xxxx w 925 SVE pnext p_size_bhsd_0 : p5 p_size_bhsd_0
1000010111xxxxxx000xxxxxxxx0xxxx n 963 SVE prfb : prfop4 p10_lo svemem_gpr_simm6_vl
10000100000xxxxx111xxxxxxxx0xxxx n 963 SVE prfb : prfop4 p10_lo svemem_vec_s_imm5
11000100000xxxxx111xxxxxxxx0xxxx n 963 SVE prfb : prfop4 p10_lo svemem_vec_d_imm5
1000010111xxxxxx011xxxxxxxx0xxxx n 964 SVE prfd : prfop4 p10_lo svemem_gpr_simm6_vl
10000101100xxxxx111xxxxxxxx0xxxx n 964 SVE prfd : prfop4 p10_lo svemem_vec_s_imm5
11000101100xxxxx111xxxxxxxx0xxxx n 964 SVE prfd : prfop4 p10_lo svemem_vec_d_imm5
1000010111xxxxxx001xxxxxxxx0xxxx n 965 SVE prfh : prfop4 p10_lo svemem_gpr_simm6_vl
10000100100xxxxx111xxxxxxxx0xxxx n 965 SVE prfh : prfop4 p10_lo svemem_vec_s_imm5
11000100100xxxxx111xxxxxxxx0xxxx n 965 SVE prfh : prfop4 p10_lo svemem_vec_d_imm5
1000010111xxxxxx010xxxxxxxx0xxxx n 966 SVE prfw : prfop4 p10_lo svemem_gpr_simm6_vl
10000101000xxxxx111xxxxxxxx0xxxx n 966 SVE prfw : prfop4 p10_lo svemem_vec_s_imm5
11000101000xxxxx111xxxxxxxx0xxxx n 966 SVE prfw : prfop4 p10_lo svemem_vec_d_imm5
001001010101000011xxxx0xxxx00000 w 786 SVE ptest : p10 p_b_5
00100101xx011000111000xxxxx0xxxx n 897 SVE ptrue p_size_bhsd_0 : pred_constr
00100101xx011001111000xxxxx0xxxx w 898 SVE ptrues p_size_bhsd_0 : pred_constr
Expand Down Expand Up @@ -469,6 +503,13 @@
11100100001xxxxx010xxxxxxxxxxxxx n 951 SVE st1b svemem_gpr_shf : z_h_0 p10_lo
11100100010xxxxx010xxxxxxxxxxxxx n 951 SVE st1b svemem_gpr_shf : z_s_0 p10_lo
11100100011xxxxx010xxxxxxxxxxxxx n 951 SVE st1b svemem_gpr_shf : z_d_0 p10_lo
11100100011xxxxx101xxxxxxxxxxxxx n 951 SVE st1b svemem_vec_s_imm5 : z_s_0 p10_lo
11100100010xxxxx101xxxxxxxxxxxxx n 951 SVE st1b svemem_vec_d_imm5 : z_d_0 p10_lo
11100101110xxxxx101xxxxxxxxxxxxx n 981 SVE st1d svemem_vec_d_imm5 : z_d_0 p10_lo
11100100111xxxxx101xxxxxxxxxxxxx n 980 SVE st1h svemem_vec_s_imm5 : z_s_0 p10_lo
11100100110xxxxx101xxxxxxxxxxxxx n 980 SVE st1h svemem_vec_d_imm5 : z_d_0 p10_lo
11100101011xxxxx101xxxxxxxxxxxxx n 982 SVE st1w svemem_vec_s_imm5 : z_s_0 p10_lo
11100101010xxxxx101xxxxxxxxxxxxx n 982 SVE st1w svemem_vec_d_imm5 : z_d_0 p10_lo
11100100001xxxxx011xxxxxxxxxxxxx n 970 SVE st2b svemem_gprs_bhsdx : z_b_0 z_msz_bhsd_0p1 p10_lo
11100100010xxxxx011xxxxxxxxxxxxx n 971 SVE st3b svemem_gprs_bhsdx : z_b_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 p10_lo
11100100011xxxxx011xxxxxxxxxxxxx n 972 SVE st4b svemem_gprs_bhsdx : z_b_0 z_msz_bhsd_0p1 z_msz_bhsd_0p2 z_msz_bhsd_0p3 p10_lo
Expand Down
Loading