Skip to content

Commit

Permalink
i#4408 Add AArch64 immediate FMOV instructions (#4467)
Browse files Browse the repository at this point in the history
Includes addition of:
- Double precision floating-point operands for Linux.
- HAVE_HALF_FLOAT feature flag for half-precision FP.
  • Loading branch information
AssadHashmi authored Oct 16, 2020
1 parent 5cbe811 commit 3a408a0
Show file tree
Hide file tree
Showing 14 changed files with 714 additions and 7 deletions.
7 changes: 7 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1168,6 +1168,13 @@ else ()
set(HAVE_RSEQ OFF)
endif ()

# Currently only AArch64 targets supported for half-precision FP.
if (DR_HOST_AARCH64)
set(HAVE_HALF_FLOAT ON)
else ()
set(HAVE_HALF_FLOAT OFF)
endif ()

###########################################################################

# Issue 20: cross-arch execve depends on these being distinct and not
Expand Down
3 changes: 3 additions & 0 deletions api/docs/release.dox
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,9 @@ Further non-compatibility-affecting changes include:
querying other-bitwidth client registration.
- Added a new drcachesim option \p -max_global_trace_refs for specifying a global
trace size limit that does not terminate the process.
- Added opnd_create_immed_double(), opnd_get_immed_double() and
opnd_is_immed_double() to enable the creation and handling of double
precision floating-point operands.

**************************************************
<hr>
Expand Down
1 change: 1 addition & 0 deletions core/arch/loadtoconst.c
Original file line number Diff line number Diff line change
Expand Up @@ -1601,6 +1601,7 @@ opnd_replace_reg_with_val(opnd_t *opnd, int old_reg, int val)
case NULL_kind:
case IMMED_INTEGER_kind:
case IMMED_FLOAT_kind:
case IMMED_DOUBLE_kind:
case PC_kind:
case FAR_PC_kind:
case INSTR_kind:
Expand Down
278 changes: 278 additions & 0 deletions core/ir/aarch64/codec.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
* functions next to each other.
*/

#include <stdint.h>
#include "../globals.h"
#include "arch.h"
#include "decode.h"
Expand Down Expand Up @@ -1556,6 +1557,146 @@ encode_opnd_cond(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
return encode_opnd_int(12, 4, false, 0, 0, opnd, enc_out);
}

/* fpimm8: immediate operand for SIMD fmov */

static inline bool
decode_opnd_fpimm8(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
{
/* See Arm Architecture Reference Manual
*
* Immediate is encoded as 8 bits. Bits 5->9 and 16->18. LSB is bit 5:
* imm8 = a:b:c:d:e:f:g:h (LSB)
*
* Half-precision (v8.2)
* --------------
*
* imm16 = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>,2):imm8<5:0>:Zeros(6);
* a:~b:bb:cdefgh:000000
*
* datasize = if Q == '1' then 128 else 64;
* imm = Replicate(imm16, datasize DIV 16);
* = imm16:imm16:imm16:imm16 (Q=0 -> 64)
* = imm16:imm16:imm16:imm16:imm16:imm16:imm16:imm16 (Q=1 -> 128)
*
* Single-precision (TODO)
* ----------------
* Assume cmode = 1111 and op = 0
*
* imm32 = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>,5):imm8<5:0>:Zeros(19);
* a:~b:bbbbb:cdefgh:0000000000000000000
*
* imm64 = Replicate(imm32, 2);
* = a:~b:bbbbb:cdefgh:0000000000000000000 a:~b:bbbbb:cdefgh:0000000000000000000
*
* datasize = if Q == '1' then 128 else 64;
* imm = Replicate(imm64, datasize DIV 64);
* = imm64 (Q=0)
* = imm64:imm64 (Q=1)
*/
union {
#ifdef HAVE_HALF_FLOAT
__fp16 f;
uint16_t i;
#else
/* For platforms on which 16 bit (half-precision) FP is not yet available. */
float f;
uint32_t i;
#endif
} fpv;

int abc = extract_uint(enc, 16, 3);
int defgh = extract_uint(enc, 5, 5);

uint a = (abc & 0x4);
uint b = (abc & 0x2);
uint not_b = b == 0 ? 1 : 0;

#ifdef HAVE_HALF_FLOAT
uint bb = ((b == 0) ? 0 : 0x3);
#else
uint bbbbb = ((b == 0) ? 0 : 0x1f);
#endif

uint cdefgh = ((abc & 0x1) << 5) | (defgh & 0x1f);

#ifdef HAVE_HALF_FLOAT
uint16_t imm16 = (a << 13) | (not_b << 14) | (bb << 12) | (cdefgh << 6);
fpv.i = imm16;
#else
uint32_t imm32 = (a << 29) | (not_b << 30) | (bbbbb << 25) | (cdefgh << 19);
fpv.i = imm32;
#endif
*opnd = opnd_create_immed_float(fpv.f);

return true;
}

static inline bool
encode_opnd_fpimm8(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
{
/* Based on the IEEE 754-2008 standard but with Arm-specific details that
* are left open by the standard. See Arm Architecture Reference Manual.
*
* Half-precision example
* __ ________
* S/exp\/fraction\
* _
* abbbcdefgh000000
* 0011110000000000 = 1.0
* _
* abbb cdef gh00 0000
* 0x8 0 0 0 a
* 0x1 0 0 0 b
* 0x0 8 0 0 c
* 0x0 7 c 0 defgh
*/
union {
#ifdef HAVE_HALF_FLOAT
__fp16 f;
uint16_t i;
#else
/* For platforms on which 16 bit (half-precision) FP is not yet available. */
float f;
uint32_t i;
#endif
} fpv;

if (!opnd_is_immed_float(opnd))
return false;

fpv.f = opnd_get_immed_float(opnd);
#ifdef HAVE_HALF_FLOAT
uint16_t imm = fpv.i;
uint a = (imm & 0x8000);
uint b = (imm & 0x1000);
uint c = (imm & 0x800);
uint defgh = (imm & 0x7c0);

/* 3332 2222 2222 1111 1111 11
* 1098 7654 3210 9876 5432 1098 7654 3210
* ---- ---- ---- -abc ---- --de fgh- ---- immediate encoding
* 0x8000 |<-3| | ||
* 0x1000 |<-5--| ||
* 0x800 |<--5--||
* 0x7c0 |>
*/
*enc_out = (a << 3) | (b << 5) | (c << 5) | (defgh >> 1);
#else
/* 3332 2222 2222 1111 1111 11
* 1098 7654 3210 9876 5432 1098 7654 3210
* _
* abbb bbbc defg h000 0000 0000 0000 0000
*/
uint32_t imm = fpv.i;
uint a = (imm & 0x80000000);
uint b = (imm & 0x10000000);
uint c = (imm & 0x1000000);
uint defgh = (imm & 0xf80000);
*enc_out = (a >> 13) | (b >> 11) | (c >> 8) | (defgh >> 14);
#endif
return true;
}

/* sysops: immediate operand for SYS instruction which specifies SYS operations */

static inline bool
Expand Down Expand Up @@ -2095,6 +2236,143 @@ encode_opnd_sd_sz(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out
return false;
}

/* fpimm13: floating-point immediate for scalar fmov */

static inline bool
decode_opnd_fpimm13(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
{
/*
* From the Architecture Reference Manual, 8 bit immediate abcdefgh maps to
* floats:
*
* 3332 2222 2222 1111 1111 11
* 1098 7654 3210 9876 5432 1098 7654 3210
* _ abcd efgh <- 8 bit immediate mapped to
* abbb bbbc defg h000 0000 0000 0000 0000 <- 32 bit float
*
* abcd efgh Masks
* 0x1 0 a
* 0x4 0 b
* 0x2 0 c
* 0x1 F defgh
*/
if (extract_uint(enc, 22, 1) == 0) { /* 32 bits */
union {
float f;
uint32_t i;
} fpv;

uint32_t imm = extract_uint(enc, 13, 8);

uint32_t a = imm & 0x80;
uint32_t b = imm & 0x40;
uint32_t not_b = ((b == 0) ? 1 : 0);
uint32_t bbbbb = ((b == 0) ? 0 : 0x1f);
uint32_t c = imm & 0x20;
uint32_t defgh = imm & 0x1f;

uint32_t imm32 =
(a << 24) | (not_b << 30) | (bbbbb << 25) | (c << 19) | (defgh << 19);

fpv.i = imm32;
*opnd = opnd_create_immed_float(fpv.f);
} else { /* 64 bits */
/* 6666 5555 5555 5544 44444444 33333333 33322222 22221111 111111
* 3210 9876 5432 1098 76543210 98765432 10987654 32109876 54321098 76543210
* _ abcdefgh
* abbb bbbb bbcd efgh 00000000 00000000 00000000 00000000 00000000 00000000
*/
union {
double d;
uint64_t i;
} fpv;

uint64_t imm = extract_uint(enc, 13, 8);

uint64_t a = imm & 0x80;
uint64_t b = imm & 0x40;
uint64_t not_b = ((b == 0) ? 1 : 0);
uint64_t bbbbbbbb = ((b == 0) ? 0 : 0xff);
uint64_t c = imm & 0x20;
uint64_t defgh = imm & 0x1f;

uint64_t imm64 =
(a << 56) | (not_b << 62) | (bbbbbbbb << 54) | (c << 48) | (defgh << 48);

fpv.i = imm64;
*opnd = opnd_create_immed_double(fpv.d);
}
return true;
}

static inline bool
encode_opnd_fpimm13(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
{
/*
* From the Architecture Reference Manual, 8 bit immediate abcdefgh maps to
* floats:
*
* 3332 2222 2222 1111 1111 11
* 1098 7654 3210 9876 5432 1098 7654 3210
* _
* abbb bbbc defg h000 0000 0000 0000 0000
* 0x8 0 0 0 0 0 0 0 a
* 0x1 0 0 0 0 0 0 0 b
* 0x0 1 0 0 0 0 0 0 c
* 0x0 0 f 8 0 0 0 0 defgh
*/
if (opnd_is_immed_float(opnd)) {
ASSERT(extract_uint(enc, 22, 1) == 0); /* 32 bit floating point */
union {
float f;
uint32_t i;
} fpv;
fpv.f = opnd_get_immed_float(opnd);
uint32_t imm = fpv.i;

uint a = (imm & 0x80000000);
uint b = (imm & 0x10000000);
uint c = (imm & 0x01000000);
uint defgh = (imm & 0x00f80000);

/* 3332 2222 2222 1111 1111 11
* 1098 7654 3210 9876 5432 1098 7654 3210
* ---- ---- ---a bcde fgh- ---- ---- ---- immediate encoding
* |-----11---->| 0x80000000 a
* |-----9---->| 0x10000000 b
* |---6-->| 0x01000000 c
* |--6-->| 0x00f80000 defgh
*/
*enc_out = (a >> 11) | (b >> 9) | (c >> 6) | (defgh >> 6);
} else if (opnd_is_immed_double(opnd)) {
ASSERT(extract_uint(enc, 22, 1) == 1); /* 64 bit floating point */
/* 6666 5555 5555 5544 44444444 33333333 33322222 22221111 111111
* 3210 9876 5432 1098 76543210 98765432 10987654 32109876 54321098 76543210
* _
* abbb bbbb bbcd efgh 00000000 00000000 00000000 00000000 00000000 00000000
*
* ---- ---- ---a bcde fgh----- -------- immediate encoding
*/
union {
double d;
uint64_t i;
} fpv;
fpv.d = opnd_get_immed_double(opnd);
uint64_t imm = fpv.i;

uint64_t a = (imm & 0x8000000000000000);
uint64_t b = (imm & 0x1000000000000000);
uint64_t c = (imm & 0x0020000000000000);
uint64_t defgh = (imm & 0x001f000000000000);

*enc_out =
(((a >> 11) | (b >> 9) | (c >> 3) | (defgh >> 3)) & 0xffffffff00000000) >> 32;
} else
return false;

return true;
}

/* b_sz: Vector element width for SIMD instructions. */

static inline bool
Expand Down
12 changes: 11 additions & 1 deletion core/ir/aarch64/codec.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@

-------------------------------- impx30 # implicit X30 operand
-------------------------------- lsl # implicit LSL for ADD/MOV (immediate)
-------------------------------- h_sz # element width of FP vector reg, used to
-------------------------------- h_sz # element width of FP vector reg, used to
# distinguish FP16 and float/double encs
----------------------------xxxx nzcv # flag bit specifier for CCMN, CCMP
---------------------------xxxxx w0 # W register (or WZR)
Expand Down Expand Up @@ -96,6 +96,7 @@
-----------------xxxxx---------- q10 # Q register
----------------xxx------------- ext # extend type
----------------xxxx------------ cond # condition for CCMN, CCMP
-------------xxx------xxxxx----- fpimm8 # floating-point immediate for vector fmov
-------------xxxxxxxxxxxxxx----- sysops # immediate operands for SYS
------------xxxxxxxxxxxxxxx----- sysreg # operand of MRS
-----------xxxxx---------------- ign16 # ignored reg field in load/store exclusive
Expand Down Expand Up @@ -127,6 +128,7 @@
---------x---------------------- imm12sh # shift for ADD/SUB (immediate); '0x'
# elements, depending on bit 22 (sz)
---------x---------------------- sd_sz # element width of FP vector reg for single
--------??-xxxxxxxx------------- fpimm13 # floating-point immediate for scalar fmov
--------xx---------------------- b_sz # element width of a vector (8<<b_sz)
--------xx---------------------- hs_sz # element width of a vector (8<<hs_sz)
--------xx---------------------- bhs_sz # element width of a vector (8<<bhs_sz)
Expand Down Expand Up @@ -973,13 +975,21 @@ x101101011000000000101xxxxxxxxxx cls wx0 : wx5
# Data Processing - Scalar Floating-Point and Advanced SIMD


# FMOV immediate to scalar FP reg
00011110111xxxxxxxx10000000xxxxx fmov h0 : fpimm13 # Armv8.2
00011110001xxxxxxxx10000000xxxxx fmov s0 : fpimm13
00011110011xxxxxxxx10000000xxxxx fmov d0 : fpimm13

# FMOV (general) GPR to FP reg
0001111011100111000000xxxxxxxxxx fmov h0 : w5 # Armv8.2
0001111000100111000000xxxxxxxxxx fmov s0 : w5
1001111011100111000000xxxxxxxxxx fmov h0 : x5 # Armv8.2
1001111001100111000000xxxxxxxxxx fmov d0 : x5
1001111010101111000000xxxxxxxxxx fmov q0 : x5 # only sets the bit top half of q0

# FMOV immediate to vector reg
0x00111100000xxx111111xxxxxxxxxx fmov dq0 : fpimm8 h_sz # Armv8.2


# Advanced SIMD three same (FP16)
0x001110010xxxxx000001xxxxxxxxxx fmaxnm dq0 : dq5 dq16 h_sz
Expand Down
Loading

0 comments on commit 3a408a0

Please sign in to comment.