Skip to content

Commit

Permalink
i#2626 AArch64 Decode: Add Saturating SIMD instructions (#4970)
Browse files Browse the repository at this point in the history
Adds the following instructions to the codec
- SQRDMULH (by element scalar, by element vector, vector scalar)
- SQRDMLAH (by element scalar, by element vector, vector scalar)
- SQSHL    (immediate scalar, register scalar, register vector)
- SQXTN    (scalar, vector)
- SQXTN2   (vector)
- SQXTUN   (scalar, vector)
- SQXTUN2  (vector)
- UQXTN    (scalar, vector)
- UQXTN2   (vector)

Issue: #2626
  • Loading branch information
MDevereau authored Jun 29, 2021
1 parent 271d357 commit af9cea0
Show file tree
Hide file tree
Showing 3 changed files with 745 additions and 114 deletions.
139 changes: 94 additions & 45 deletions core/ir/aarch64/codec.c
Original file line number Diff line number Diff line change
Expand Up @@ -1297,6 +1297,19 @@ encode_opnd_x5sp(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
return encode_opnd_wxn(true, true, 5, opnd, enc_out);
}

/* b5: B register at bit position 5 */
static inline bool
decode_opnd_b5(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
{
return decode_opnd_vector_reg(5, 0, enc, opnd);
}

static inline bool
encode_opnd_b5(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
{
return encode_opnd_vector_reg(5, 0, opnd, enc_out);
}

/* h5: H register at bit position 5 */

static inline bool
Expand Down Expand Up @@ -1809,6 +1822,19 @@ encode_opnd_sysops(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_ou
return encode_opnd_int(5, 14, false, 0, 0, opnd, enc_out);
}

/* dq16_idx_lhm: imm4 from bits 16-20, the lower 4 bits of register Rm with idx_lhm */
static inline bool
decode_opnd_dq16_idx_lhm(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
{
return decode_opnd_int(16, 4, false, 0, OPSZ_4b, 0, enc, opnd);
}

static inline bool
encode_opnd_dq16_idx_lhm(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
{
return encode_opnd_int(16, 4, false, 0, 0, opnd, enc_out);
}

/* sysreg: system register, operand of MRS/MSR */

static inline bool
Expand Down Expand Up @@ -2024,6 +2050,48 @@ encode_opnd_z16(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
return encode_opnd_z(16, opnd, enc_out);
}

/* b16: B register at bit position 16. */

static inline bool
decode_opnd_b16(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
{
return decode_opnd_vector_reg(16, 0, enc, opnd);
}

static inline bool
encode_opnd_b16(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
{
return encode_opnd_vector_reg(16, 0, opnd, enc_out);
}

/* h16: H register at bit position 16. */

static inline bool
decode_opnd_h16(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
{
return decode_opnd_vector_reg(16, 1, enc, opnd);
}

static inline bool
encode_opnd_h16(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
{
return encode_opnd_vector_reg(16, 1, opnd, enc_out);
}

/* s16: S register at bit position 16. */

static inline bool
decode_opnd_s16(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
{
return decode_opnd_vector_reg(16, 2, enc, opnd);
}

static inline bool
encode_opnd_s16(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
{
return encode_opnd_vector_reg(16, 2, opnd, enc_out);
}

/* mem9off: just the 9-bit offset from mem9 */

static inline bool
Expand Down Expand Up @@ -2279,6 +2347,32 @@ encode_opnd_vindex_H(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_
return true;
}

/* idx_lhm: imm3 from bits 21, 20 and 11 */

static inline bool
decode_opnd_idx_lhm(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
{
uint h = extract_uint(enc, 11, 1);
uint l = extract_uint(enc, 21, 1);
uint m = extract_uint(enc, 20, 1);
uint value = (h << 2) | (l << 1) | m;
*opnd = opnd_create_immed_uint(value, OPSZ_3b);
return true;
}

static inline bool
encode_opnd_idx_lhm(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
{
uint val = opnd_get_immed_int(opnd);
if (val & (1 << 2))
*enc_out |= (1 << 11);
if (val & (1 << 1))
*enc_out |= (1 << 21);
if (val & 1)
*enc_out |= (1 << 20);
return true;
}

/* immhb: The vector encoding of #fbits operand. This is the number of bits
* after the decimal point for fixed-point values.
*/
Expand Down Expand Up @@ -2569,51 +2663,6 @@ encode_opnd_fpimm13(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_o
return true;
}

/* index_lhm: imm3 from bits 21, 20 and 11 */

static inline bool
decode_opnd_index_lhm(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
{
uint h = extract_uint(enc, 11, 1);
uint l = extract_uint(enc, 21, 1);
uint value = (h << 1) | l;
opnd_size_t opsz = OPSZ_2b;

uint sz = extract_uint(enc, 22, 2);
if (sz < 0b01 || sz > 0b11)
return false;
if (sz == 0b01) {
uint m = extract_uint(enc, 20, 1);
value = (value << 1) | m;
opsz = OPSZ_3b;
}

*opnd = opnd_create_immed_uint(value, opsz);
return true;
}

static inline bool
encode_opnd_index_lhm(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
{
uint sz = extract_uint(enc, 22, 2);
if (sz < 0b01 || sz > 0b11)
return false;
if (!opnd_is_immed_int(opnd))
return false;
uint val = opnd_get_immed_int(opnd);
if (sz == 0b10)
val <<= 1;

*enc_out = 0;
if (val & (1 << 2))
*enc_out |= (1 << 11);
if (val & (1 << 1))
*enc_out |= (1 << 21);
if (val & 1)
*enc_out |= (1 << 20);
return true;
}

/* b_sz: Vector element width for SIMD instructions. */

static inline bool
Expand Down
48 changes: 45 additions & 3 deletions core/ir/aarch64/codec.txt
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
----------------------xxxxx----- w5 # W register (or WZR)
----------------------xxxxx----- x5 # X register (or XZR)
----------------------xxxxx----- x5sp # X register or XSP
----------------------xxxxx----- b5 # B register
----------------------xxxxx----- h5 # H register
----------------------xxxxx----- s5 # S register
----------------------xxxxx----- d5 # D register
Expand All @@ -102,6 +103,7 @@
-------------xxx------xxxxx----- fpimm8 # floating-point immediate for vector fmov
-------------xxx------xxxxx----- imm8 # immediate from 16:18 and 5:10
-------------xxxxxxxxxxxxxx----- sysops # immediate operands for SYS
------------xxxx---------------- dq16_idx_lhm # lower 4 bits of Rm with idx_lhm
------------xxxxxxxxxxxxxxx----- sysreg # operand of MRS
-----------?????------xxxxx----- wx5_imm5 # reg 5-9 d or q is inferred from bits 16:20
-----------xxxxx---------------- ign16 # ignored reg field in load/store exclusive
Expand All @@ -115,6 +117,9 @@
-----------xxxxx---------------- d16 # D register
-----------xxxxx---------------- q16 # Q register
-----------xxxxx---------------- z16 # Z register
-----------xxxxx---------------- b16 # B register
-----------xxxxx---------------- h16 # H register
-----------xxxxx---------------- s16 # S register
-----------xxxxxxxxx------------ mem9off # immed offset for mem9/mem9post
-----------xxxxxxxxx--xxxxx----- mem9q # size is 16 bytes
-----------xxxxxxxxx--xxxxx----- prf9 # size is 0 bytes (prefetch variant of mem9)
Expand All @@ -126,6 +131,7 @@
----------?xxxxx--?-??---------- x16immvr # computes immed from 21, 13 and 11:10
----------?xxxxx???-??---------- x16immvs # computes immed from 21, 15:13 and 11:10
----------xx--------x----------- vindex_H # Index for vector with half elements (0-7)
----------xx--------x----------- idx_lhm # imm3 from bits 11, 21 and/or 20 inferred from sz
----------xxxxxx---------------- immhb # encoding of #fbits value in immh:immb fields
----------xxxxxxxxxxxx---------- imm12 # immediate for ADD/SUB
----------xxxxxxxxxxxxxxxxx----- mem12q # size is 16 bytes
Expand All @@ -135,7 +141,6 @@
# elements, depending on bit 22 (sz)
---------x---------------------- sd_sz # element width of FP vector reg for single
--------??-xxxxxxxx------------- fpimm13 # floating-point immediate for scalar fmov
--------??xx--------x----------- index_lhm # imm3 from bits 11, 21 and/or 20 inferred from sz
--------xx---------------------- b_sz # element width of a vector (8<<b_sz)
--------xx---------------------- hs_sz # element width of a vector (8<<hs_sz)
--------xx---------------------- bhs_sz # element width of a vector (8<<bhs_sz)
Expand Down Expand Up @@ -1048,6 +1053,12 @@ x101101011000000000101xxxxxxxxxx cls wx0 : wx5
0x001110xx1xxxxx001111xxxxxxxxxx cmge dq0 : dq5 dq16 bhsd_sz
0x001110xx1xxxxx010001xxxxxxxxxx sshl dq0 : dq5 dq16 bhsd_sz
0x001110xx1xxxxx010011xxxxxxxxxx sqshl dq0 : dq5 dq16 bhsd_sz
01011110001xxxxx010011xxxxxxxxxx sqshl b0 : b5 b16
01011110011xxxxx010011xxxxxxxxxx sqshl h0 : h5 h16
01011110101xxxxx010011xxxxxxxxxx sqshl s0 : s5 s16
01011110111xxxxx010011xxxxxxxxxx sqshl d0 : d5 d16
0101111100xxxxxx011101xxxxxxxxxx sqshl s0 : s5 immhb
0101111101xxxxxx011101xxxxxxxxxx sqshl d0 : d5 immhb
0x001110xx1xxxxx010101xxxxxxxxxx srshl dq0 : dq5 dq16 bhsd_sz
0x001110xx1xxxxx010111xxxxxxxxxx sqrshl dq0 : dq5 dq16 bhsd_sz
0x001110xx1xxxxx011001xxxxxxxxxx smax dq0 : dq5 dq16 bhs_sz
Expand All @@ -1057,12 +1068,38 @@ x101101011000000000101xxxxxxxxxx cls wx0 : wx5
0x001110xx1xxxxx100001xxxxxxxxxx add dq0 : dq5 dq16 bhsd_sz
0x001110xx1xxxxx100011xxxxxxxxxx cmtst dq0 : dq5 dq16 bhsd_sz
0x001110xx1xxxxx100101xxxxxxxxxx mla dq0 : dq0 dq5 dq16 bhs_sz
0x101111xxxxxxxx0000x0xxxxxxxxxx mla dq0 : dq5 dq16_h_sz bhsd_sz index_lhm
0x101111xxxxxxxx0000x0xxxxxxxxxx mla dq0 : dq5 dq16_idx_lhm bhsd_sz idx_lhm
0x001110xx1xxxxx100111xxxxxxxxxx mul dq0 : dq5 dq16 bhs_sz
0x001111xxxxxxxx1000x0xxxxxxxxxx mul dq0 : dq5 dq16_h_sz vindex_H hs_sz
0x001110xx1xxxxx101001xxxxxxxxxx smaxp dq0 : dq5 dq16 bhs_sz
0x001110xx1xxxxx101011xxxxxxxxxx sminp dq0 : dq5 dq16 bhs_sz
0x001110xx1xxxxx101101xxxxxxxxxx sqdmulh dq0 : dq5 dq16 hs_sz
01011110011xxxxx101101xxxxxxxxxx sqdmulh h0 : h5 h16
01011110101xxxxx101101xxxxxxxxxx sqdmulh s0 : s5 s16
0x001111xxxxxxxx1100x0xxxxxxxxxx sqdmulh dq0 : dq5 dq16_idx_lhm bhsd_sz idx_lhm
0101111101xxxxxx1100x0xxxxxxxxxx sqdmulh h0 : h5 dq16_idx_lhm idx_lhm
0101111110xxxxxx1100x0xxxxxxxxxx sqdmulh s0 : s5 dq16_idx_lhm idx_lhm
0x101111xxxxxxxx1101x0xxxxxxxxxx sqrdmlah dq0 : dq5 dq16_idx_lhm bhsd_sz idx_lhm
0111111101xxxxxx1101x0xxxxxxxxxx sqrdmlah h0 : h5 dq16_idx_lhm idx_lhm
0111111110xxxxxx1101x0xxxxxxxxxx sqrdmlah s0 : s5 dq16_idx_lhm idx_lhm
0x101110xx0xxxxx100001xxxxxxxxxx sqrdmlah dq0 : dq5 dq16 hs_sz
01111110010xxxxx100001xxxxxxxxxx sqrdmlah h0 : h5 h16
01111110100xxxxx100001xxxxxxxxxx sqrdmlah s0 : s5 s16
0101111000100001010010xxxxxxxxxx sqxtn b0 : h5
0101111001100001010010xxxxxxxxxx sqxtn h0 : s5
0101111010100001010010xxxxxxxxxx sqxtn s0 : d5
00001110xx100001010010xxxxxxxxxx sqxtn d0 : d5 bhs_sz
01001110xx100001010010xxxxxxxxxx sqxtn2 q0 : q5 bhs_sz
0111111000100001001010xxxxxxxxxx sqxtun b0 : h5
0111111001100001001010xxxxxxxxxx sqxtun h0 : s5
0111111010100001001010xxxxxxxxxx sqxtun s0 : d5
00101110xx100001001010xxxxxxxxxx sqxtun d0 : d5 bhs_sz
01101110xx100001001010xxxxxxxxxx sqxtun2 q0 : q5 bhs_sz
0111111000100001010010xxxxxxxxxx uqxtn b0 : h5
0111111001100001010010xxxxxxxxxx uqxtn h0 : s5
0111111010100001010010xxxxxxxxxx uqxtn s0 : d5
00101110xx100001010010xxxxxxxxxx uqxtn d0 : d5 bhs_sz
01101110xx100001010010xxxxxxxxxx uqxtn2 q0 : q5 bhs_sz
0x001110xx1xxxxx101111xxxxxxxxxx addp dq0 : dq5 dq16 bhsd_sz
0x0011100x1xxxxx110001xxxxxxxxxx fmaxnm dq0 : dq5 dq16 sd_sz
0x0011100x1xxxxx110011xxxxxxxxxx fmla dq0 : dq0 dq5 dq16 sd_sz
Expand Down Expand Up @@ -1102,11 +1139,16 @@ x101101011000000000101xxxxxxxxxx cls wx0 : wx5
0x101110xx1xxxxx100001xxxxxxxxxx sub dq0 : dq5 dq16 bhsd_sz
0x101110xx1xxxxx100011xxxxxxxxxx cmeq dq0 : dq5 dq16 bhsd_sz
0x101110xx1xxxxx100101xxxxxxxxxx mls dq0 : dq0 dq5 dq16 bhs_sz
0x101111xxxxxxxx0100x0xxxxxxxxxx mls dq0 : dq5 dq16_h_sz bhsd_sz index_lhm
0x101111xxxxxxxx0100x0xxxxxxxxxx mls dq0 : dq5 dq16_idx_lhm bhsd_sz idx_lhm
0x101110xx1xxxxx100111xxxxxxxxxx pmul dq0 : dq5 dq16 b_sz
0x101110xx1xxxxx101001xxxxxxxxxx umaxp dq0 : dq5 dq16 bhs_sz
0x101110xx1xxxxx101011xxxxxxxxxx uminp dq0 : dq5 dq16 bhs_sz
01111110011xxxxx101101xxxxxxxxxx sqrdmulh h0 : h5 h16
01111110101xxxxx101101xxxxxxxxxx sqrdmulh s0 : s5 s16
0x101110xx1xxxxx101101xxxxxxxxxx sqrdmulh dq0 : dq5 dq16 hs_sz
0x001111xxxxxxxx1101x0xxxxxxxxxx sqrdmulh dq0 : dq5 dq16_idx_lhm bhsd_sz idx_lhm
0101111101xxxxxx1101x0xxxxxxxxxx sqrdmulh h0 : h5 dq16_idx_lhm idx_lhm
0101111110xxxxxx1101x0xxxxxxxxxx sqrdmulh s0 : s5 dq16_idx_lhm idx_lhm
0x1011100x1xxxxx110001xxxxxxxxxx fmaxnmp dq0 : dq5 dq16 sd_sz
0x101110001xxxxx110011xxxxxxxxxx fmlal2 dq0 : dq0 dq5 dq16
0x1011100x1xxxxx110101xxxxxxxxxx faddp dq0 : dq5 dq16 sd_sz
Expand Down
Loading

0 comments on commit af9cea0

Please sign in to comment.