Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

i#2626 Add #fbits fixed-point handling to AArch64 codec #4860

Merged
merged 4 commits into from
Apr 21, 2021
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions core/ir/aarch64/codec.c
Original file line number Diff line number Diff line change
Expand Up @@ -1557,6 +1557,35 @@ encode_opnd_cond(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
return encode_opnd_int(12, 4, false, 0, 0, opnd, enc_out);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i#2626 Add #fbits fixed-point handling to AArch64 codec

nit: Missing mini-description and colon after issue number prefix

}

/* scale: scalar encoding of #fbits operand (number of bits after the decimal point for
* fixed-point values) */
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

style: Capitalize w/ punctuation for complete sentence

style: prefer */ on own line (unfortunately no clang-format rule to enforce it)


static inline bool
decode_opnd_scale(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
{
uint scale = extract_uint(enc, 10, 6);
*opnd = opnd_create_immed_int(64 - scale, OPSZ_6b);
return true;
}

static inline bool
encode_opnd_scale(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
{
ptr_int_t fbits;

if (!opnd_is_immed_int(opnd))
return false;

fbits = opnd_get_immed_int(opnd);

if (fbits < 1 || fbits > 64)
return false;

*enc_out = (64 - fbits) << 10; /* 'scale' bitfield in encoding */

return true;
}

/* fpimm8: immediate operand for SIMD fmov */

static inline bool
Expand Down Expand Up @@ -2140,6 +2169,34 @@ encode_opnd_vindex_H(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_
return true;
}

/* immhb: vector encoding of #fbits operand (number of bits after the decimal point for
* fixed-point values) */
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same style comments


static inline bool
decode_opnd_immhb(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
{
uint immhb = extract_uint(enc, 16, 6);
*opnd = opnd_create_immed_int(64 - immhb, OPSZ_6b);
return true;
}

static inline bool
encode_opnd_immhb(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
{
ptr_int_t fbits;

if (!opnd_is_immed_int(opnd))
return false;

fbits = opnd_get_immed_int(opnd);
if (fbits < 1 || fbits > 64)
return false;

*enc_out = (64 - fbits) << 16;

return true;
}

/* imm12: 12-bit immediate operand of ADD/SUB */

static inline bool
Expand Down
13 changes: 9 additions & 4 deletions core/ir/aarch64/codec.txt
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@
-----------------xxxxx---------- q10 # Q register
----------------xxx------------- ext # extend type
----------------xxxx------------ cond # condition for CCMN, CCMP
----------------xxxxxx---------- scale # encoding of #fbits value in scale field
-------------xxx------xxxxx----- fpimm8 # floating-point immediate for vector fmov
-------------xxxxxxxxxxxxxx----- sysops # immediate operands for SYS
------------xxxxxxxxxxxxxxx----- sysreg # operand of MRS
Expand All @@ -121,6 +122,7 @@
----------?xxxxx--?-??---------- x16immvr # computes immed from 21, 13 and 11:10
----------?xxxxx???-??---------- x16immvs # computes immed from 21, 15:13 and 11:10
----------xx--------x----------- vindex_H # Index for vector with half elements (0-7)
----------xxxxxx---------------- immhb # encoding of #fbits value in immh:immb fields
----------xxxxxxxxxxxx---------- imm12 # immediate for ADD/SUB
----------xxxxxxxxxxxxxxxxx----- mem12q # size is 16 bytes
----------xxxxxxxxxxxxxxxxx----- prf12 # size is 0 bytes (prefetch variant of mem12)
Expand Down Expand Up @@ -1158,20 +1160,23 @@ x101101011000000000101xxxxxxxxxx cls wx0 : wx5
1001111000111000000000xxxxxxxxxx fcvtzs x0 : s5
0001111001111000000000xxxxxxxxxx fcvtzs w0 : d5
1001111001111000000000xxxxxxxxxx fcvtzs x0 : d5
0001111000111001000000xxxxxxxxxx fcvtzu w0 : s5
1001111000111001000000xxxxxxxxxx fcvtzu x0 : s5
0001111001111001000000xxxxxxxxxx fcvtzu w0 : d5
1001111001111001000000xxxxxxxxxx fcvtzu x0 : d5
x001111000111001000000xxxxxxxxxx fcvtzu wx0 : s5
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This patch folds the 4 fcvtzs x0 : <FPreg>5 and fcvtzs w0 : <FPreg>5 encodings into 2 fcvtzs wx0 : <FPreg>5 encodings. A subsequent PR will check and fold others.

x001111001111001000000xxxxxxxxxx fcvtzu wx0 : d5
x001111000011001xxxxxxxxxxxxxxxx fcvtzu wx0 : s5 scale
x001111001011001xxxxxxxxxxxxxxxx fcvtzu wx0 : d5 scale

# Floating-point convert (vector, integer) (vector single-precision and double-precision)
0101111010100001101110xxxxxxxxxx fcvtzs s0 : s5
0101111011100001101110xxxxxxxxxx fcvtzs d0 : d5
0111111010100001101110xxxxxxxxxx fcvtzu s0 : s5
0111111011100001101110xxxxxxxxxx fcvtzu d0 : d5
0111111100xxxxxx111111xxxxxxxxxx fcvtzu s0 : s5 immhb
0111111101xxxxxx111111xxxxxxxxxx fcvtzu d0 : d5 immhb

# Floating-point convert (vector, integer) (scalar single-precision and double-precision)
0x0011101x100001101110xxxxxxxxxx fcvtzs dq0 : dq5 sd_sz
0x1011101x100001101110xxxxxxxxxx fcvtzu dq0 : dq5 sd_sz
0x1011110xxxxxxx111111xxxxxxxxxx fcvtzu dq0 : dq5 sd_sz immhb

# Floating-point data-processing (2 source)
00011110xx1xxxxx000010xxxxxxxxxx fmul float_reg0 : float_reg5 float_reg16
Expand Down
24 changes: 24 additions & 0 deletions core/ir/aarch64/instr_create_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -1759,6 +1759,19 @@ enum {
#define INSTR_CREATE_fcvtzu_vector(dc, Rd, Rm, width) \
instr_create_1dst_2src(dc, OP_fcvtzu, Rd, Rm, width)

/**
* Creates a FCVTZU vector floating-point to fixed-point convert instruction.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment as on Matt's PR: s/a/an/ for most common pronunciation. Ditto below.

* \param dc The void * dcontext used to allocate memory for the #instr_t.
* \param Rd The output register.
* \param Rm The input register.
* \param width The vector element width. Use either OPND_CREATE_SINGLE() or
* OPND_CREATE_DOUBLE().
* \param fbits The number of bits after the binary point in the fixed-point
* destination element.
*/
#define INSTR_CREATE_fcvtzu_vector_fixed(dc, Rd, Rm, width, fbits) \
instr_create_1dst_3src(dc, OP_fcvtzu, Rd, Rm, width, fbits)

/* -------- Floating-point data-processing (1 source) ------------------ */

/**
Expand Down Expand Up @@ -1855,6 +1868,17 @@ enum {
#define INSTR_CREATE_fcvtzu_scalar(dc, Rd, Rm) \
instr_create_1dst_1src(dc, OP_fcvtzu, Rd, Rm)

/**
* Creates a FCVTZU scalar floating-point to fixed-point convert instruction.
* \param dc The void * dcontext used to allocate memory for the #instr_t.
* \param Rd Floating-point or integer output register.
* \param Rm Floating-point input register.
* \param fbits The number of bits after the binary point in the fixed-point
* destination.
*/
#define INSTR_CREATE_fcvtzu_scalar_fixed(dc, Rd, Rm, fbits) \
instr_create_1dst_2src(dc, OP_fcvtzu, Rd, Rm, fbits)

/**
* Creates a FRINTN floating point instruction.
* \param dc The void * dcontext used to allocate memory for the instr_t.
Expand Down
47 changes: 47 additions & 0 deletions suite/tests/api/dis-a64.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2199,6 +2199,53 @@ fd7fffff : ldr d31, [sp,#32760] : ldr +0x7ff8(%sp)[8byte] -> %d31
7ea1b929 : fcvtzu s9, s9 : fcvtzu %s9 -> %s9
7ee1b841 : fcvtzu d1, d2 : fcvtzu %d2 -> %d1

# Floating-point/fixed-point conversion
1e19f107 : fcvtzu w7, s8, #4 : fcvtzu %s8 $0x04 -> %w7
9e19c2ad : fcvtzu x13, s21, #16 : fcvtzu %s21 $0x10 -> %x13
1e59813e : fcvtzu w30, d9, #32 : fcvtzu %d9 $0x20 -> %w30
9e5901ec : fcvtzu x12, d15, #64 : fcvtzu %d15 $0x40 -> %x12
7f3ffc20 : fcvtzu s0, s1, #1 : fcvtzu %s1 $0x01 -> %s0
7f3efc62 : fcvtzu s2, s3, #2 : fcvtzu %s3 $0x02 -> %s2
7f3cfca4 : fcvtzu s4, s5, #4 : fcvtzu %s5 $0x04 -> %s4
7f38fce6 : fcvtzu s6, s7, #8 : fcvtzu %s7 $0x08 -> %s6
7f30fd28 : fcvtzu s8, s9, #16 : fcvtzu %s9 $0x10 -> %s8
7f20fd6a : fcvtzu s10, s11, #32 : fcvtzu %s11 $0x20 -> %s10
7f2bffbc : fcvtzu s28, s29, #21 : fcvtzu %s29 $0x15 -> %s28
7f21fffe : fcvtzu s30, s31, #31 : fcvtzu %s31 $0x1f -> %s30
7f7ffc20 : fcvtzu d0, d1, #1 : fcvtzu %d1 $0x01 -> %d0
7f7cfca4 : fcvtzu d4, d5, #4 : fcvtzu %d5 $0x04 -> %d4
7f78fce6 : fcvtzu d6, d7, #8 : fcvtzu %d7 $0x08 -> %d6
7f70fd28 : fcvtzu d8, d9, #16 : fcvtzu %d9 $0x10 -> %d8
7f60fd6a : fcvtzu d10, d11, #32 : fcvtzu %d11 $0x20 -> %d10
7f40fdac : fcvtzu d12, d13, #64 : fcvtzu %d13 $0x40 -> %d12
7f6bffbc : fcvtzu d28, d29, #21 : fcvtzu %d29 $0x15 -> %d28
7f56fffe : fcvtzu d30, d31, #42 : fcvtzu %d31 $0x2a -> %d30
6f3ffc20 : fcvtzu v0.4s, v1.4s, #1 : fcvtzu %q1 $0x02 $0x01 -> %q0
6f3efc62 : fcvtzu v2.4s, v3.4s, #2 : fcvtzu %q3 $0x02 $0x02 -> %q2
6f3cfca4 : fcvtzu v4.4s, v5.4s, #4 : fcvtzu %q5 $0x02 $0x04 -> %q4
6f38fce6 : fcvtzu v6.4s, v7.4s, #8 : fcvtzu %q7 $0x02 $0x08 -> %q6
6f30fd28 : fcvtzu v8.4s, v9.4s, #16 : fcvtzu %q9 $0x02 $0x10 -> %q8
6f20fd6a : fcvtzu v10.4s, v11.4s, #32 : fcvtzu %q11 $0x02 $0x20 -> %q10
6f2bffbc : fcvtzu v28.4s, v29.4s, #21 : fcvtzu %q29 $0x02 $0x15 -> %q28
6f21fffe : fcvtzu v30.4s, v31.4s, #31 : fcvtzu %q31 $0x02 $0x1f -> %q30
6f7ffc20 : fcvtzu v0.2d, v1.2d, #1 : fcvtzu %q1 $0x03 $0x01 -> %q0
6f7efc62 : fcvtzu v2.2d, v3.2d, #2 : fcvtzu %q3 $0x03 $0x02 -> %q2
6f7cfca4 : fcvtzu v4.2d, v5.2d, #4 : fcvtzu %q5 $0x03 $0x04 -> %q4
6f78fce6 : fcvtzu v6.2d, v7.2d, #8 : fcvtzu %q7 $0x03 $0x08 -> %q6
6f70fd28 : fcvtzu v8.2d, v9.2d, #16 : fcvtzu %q9 $0x03 $0x10 -> %q8
6f60fd6a : fcvtzu v10.2d, v11.2d, #32 : fcvtzu %q11 $0x03 $0x20 -> %q10
6f40fdac : fcvtzu v12.2d, v13.2d, #64 : fcvtzu %q13 $0x03 $0x40 -> %q12
6f6bffbc : fcvtzu v28.2d, v29.2d, #21 : fcvtzu %q29 $0x03 $0x15 -> %q28
6f56fffe : fcvtzu v30.2d, v31.2d, #42 : fcvtzu %q31 $0x03 $0x2a -> %q30
2f3ffc20 : fcvtzu v0.2s, v1.2s, #1 : fcvtzu %d1 $0x02 $0x01 -> %d0
2f3efc62 : fcvtzu v2.2s, v3.2s, #2 : fcvtzu %d3 $0x02 $0x02 -> %d2
2f3cfca4 : fcvtzu v4.2s, v5.2s, #4 : fcvtzu %d5 $0x02 $0x04 -> %d4
2f38fce6 : fcvtzu v6.2s, v7.2s, #8 : fcvtzu %d7 $0x02 $0x08 -> %d6
2f30fd28 : fcvtzu v8.2s, v9.2s, #16 : fcvtzu %d9 $0x02 $0x10 -> %d8
2f20fd6a : fcvtzu v10.2s, v11.2s, #32 : fcvtzu %d11 $0x02 $0x20 -> %d10
2f2bffbc : fcvtzu v28.2s, v29.2s, #21 : fcvtzu %d29 $0x02 $0x15 -> %d28
2f21fffe : fcvtzu v30.2s, v31.2s, #31 : fcvtzu %d31 $0x02 $0x1f -> %d30

# SVE bitwise logical operations (predicated)
04181da2 : orr z2.b, p7/m, z2.b, z13.b : orr %p7 %z2 %z13 $0x00 -> %z2
04581da2 : orr z2.h, p7/m, z2.h, z13.h : orr %p7 %z2 %z13 $0x01 -> %z2
Expand Down
Loading