Skip to content

Commit

Permalink
i#5036 A64 scatter/gather, part 1: Expand scalar+vector loads (#6267)
Browse files Browse the repository at this point in the history
Creates an AArch64 version of drx_expand_scatter_gather() and tests for
it. So far only SVE scalar+vector loads are supported. Support and tests
for more instructions will follow in future commits.

State restore is also not yet supported and will be implemented when
i#5365 is complete.

Issue: #5036
  • Loading branch information
jackgallagher-arm authored Aug 24, 2023
1 parent ad5e838 commit 1a14566
Show file tree
Hide file tree
Showing 23 changed files with 2,443 additions and 160 deletions.
6 changes: 6 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -621,6 +621,12 @@ if (X86 AND UNIX)
check_avx512_processor_and_compiler_support(proc_supports_avx512)
endif ()

set(proc_supports_sve OFF)
if (AARCH64 AND UNIX)
set(CFLAGS_SVE "-march=armv8-a+sve")
check_sve_processor_and_compiler_support(proc_supports_sve)
endif ()

# Ensure that _AMD64_ or _X86_ are defined on Microsoft Windows, as otherwise
# um/winnt.h provided since Windows 10.0.22000 will error.
if (NOT UNIX)
Expand Down
4 changes: 2 additions & 2 deletions api/samples/memtrace_simple.c
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *wher
const opnd_t src = instr_get_src(instr_operands, i);
if (opnd_is_memory_reference(src)) {
#ifdef AARCH64
/* TODO i#5844: Memory references involving SVE registers are not
/* TODO i#5036: Memory references involving SVE registers are not
* supported yet. To be implemented as part of scatter/gather work.
*/
if (opnd_is_base_disp(src) &&
Expand All @@ -343,7 +343,7 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *wher
const opnd_t dst = instr_get_dst(instr_operands, i);
if (opnd_is_memory_reference(dst)) {
#ifdef AARCH64
/* TODO i#5844: Memory references involving SVE registers are not
/* TODO i#5036: Memory references involving SVE registers are not
* supported yet. To be implemented as part of scatter/gather work.
*/
if (opnd_is_base_disp(dst) &&
Expand Down
4 changes: 2 additions & 2 deletions api/samples/memval_simple.c
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ handle_post_write(void *drcontext, instrlist_t *ilist, instr_t *where, reg_id_t
}

#ifdef AARCH64
/* TODO i#5844: Memory references involving SVE registers are not
/* TODO i#5036: Memory references involving SVE registers are not
* supported yet. To be implemented as part of scatter/gather work.
*/
if (opnd_is_base_disp(dst) &&
Expand Down Expand Up @@ -405,7 +405,7 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *wher
break;
}
#ifdef AARCH64
/* TODO i#5844: Memory references involving SVE registers are not
/* TODO i#5036: Memory references involving SVE registers are not
* supported yet. To be implemented as part of scatter/gather work.
*/
if (opnd_is_base_disp(dst) &&
Expand Down
86 changes: 86 additions & 0 deletions clients/drcachesim/tests/scattergather-aarch64.templatex
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#ifdef __ARM_FEATURE_SVE
ld1b 32bit unscaled offset uxtw: PASS
ld1b 32bit unscaled offset sxtw: PASS
ld1b 32bit unpacked unscaled offset uxtw: PASS
ld1b 32bit unpacked unscaled offset sxtw: PASS
ld1b 64bit unscaled offset: PASS
ld1b 64bit unscaled offset Zt==Zm: PASS
ld1sb 32bit unscaled offset uxtw: PASS
ld1sb 32bit unscaled offset sxtw: PASS
ld1sb 32bit unpacked unscaled offset uxtw: PASS
ld1sb 32bit unpacked unscaled offset sxtw: PASS
ld1sb 64bit unscaled offset: PASS
ld1sb 64bit unscaled offset: PASS
ld1h 32bit scaled offset uxtw: PASS
ld1h 32bit scaled offset sxtw: PASS
ld1h 32bit unpacked scaled offset uxtw: PASS
ld1h 32bit unpacked scaled offset sxtw: PASS
ld1h 32bit unpacked unscaled offset uxtw: PASS
ld1h 32bit unpacked unscaled offset sxtw: PASS
ld1h 32bit unscaled offset uxtw: PASS
ld1h 32bit unscaled offset sxtw: PASS
ld1h 64bit scaled offset: PASS
ld1h 64bit unscaled offset: PASS
ld1h 64bit unscaled offset Zt==Zm: PASS
ld1sh 32bit scaled offset uxtw: PASS
ld1sh 32bit scaled offset sxtw: PASS
ld1sh 32bit unpacked scaled offset uxtw: PASS
ld1sh 32bit unpacked scaled offset sxtw: PASS
ld1sh 32bit unpacked unscaled offset uxtw: PASS
ld1sh 32bit unpacked unscaled offset sxtw: PASS
ld1sh 32bit unscaled offset uxtw: PASS
ld1sh 32bit unscaled offset sxtw: PASS
ld1sh 64bit scaled offset: PASS
ld1sh 64bit unscaled offset: PASS
ld1sh 64bit unscaled offset Zt==Zm: PASS
ld1w 32bit scaled offset uxtw: PASS
ld1w 32bit scaled offset sxtw: PASS
ld1w 32bit unpacked scaled offset uxtw: PASS
ld1w 32bit unpacked scaled offset sxtw: PASS
ld1w 32bit unpacked unscaled offset uxtw: PASS
ld1w 32bit unpacked unscaled offset sxtw: PASS
ld1w 32bit unscaled offset uxtw: PASS
ld1w 32bit unscaled offset sxtw: PASS
ld1w 64bit scaled offset: PASS
ld1w 64bit unscaled offset: PASS
ld1w 64bit unscaled offset Zt==Zm: PASS
ld1sw 32bit unpacked scaled offset uxtw: PASS
ld1sw 32bit unpacked scaled offset sxtw: PASS
ld1sw 32bit unpacked unscaled offset uxtw: PASS
ld1sw 32bit unpacked unscaled offset sxtw: PASS
ld1sw 64bit scaled offset: PASS
ld1sw 64bit unscaled offset: PASS
ld1sw 64bit unscaled offset Zt==Zm: PASS
ld1d 32bit unpacked scaled offset uxtw: PASS
ld1d 32bit unpacked scaled offset sxtw: PASS
ld1d 32bit unpacked unscaled offset uxtw: PASS
ld1d 32bit unpacked unscaled offset sxtw: PASS
ld1d 64bit scaled offset: PASS
ld1d 64bit unscaled offset: PASS
ld1d 64bit unscaled offset Zt==Zm: PASS
#endif /* __ARM_FEATURE_SVE */
---- <application exited with code 0> ----
Basic counts tool results:
Total counts:
.* total \(fetched\) instructions
.* total unique \(fetched\) instructions
.* total non-fetched instructions
.* total prefetches
.* total data loads
.* total data stores
.* total icache flushes
.* total dcache flushes
1 total threads
.* total scheduling markers
.*
Thread .* counts:
.* \(fetched\) instructions
.* unique \(fetched\) instructions
.* non-fetched instructions
.* prefetches
.* data loads
.* data stores
.* icache flushes
.* dcache flushes
.* scheduling markers
.*
2 changes: 1 addition & 1 deletion clients/drcachesim/tracer/instru_offline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -949,7 +949,7 @@ offline_instru_t::identify_elidable_addresses(void *drcontext, instrlist_t *ilis
// view by expanding the instr in raw2trace (e.g. using
// drx_expand_scatter_gather) when building the ilist.
if (drutil_instr_is_stringop_loop(instr)
// TODO i#3837: Scatter/gather support NYI on ARM/AArch64.
// TODO i#5036: Scatter/gather support incomplete on AArch64.
IF_X86(|| instr_is_scatter(instr) || instr_is_gather(instr))) {
return;
}
Expand Down
4 changes: 2 additions & 2 deletions clients/drcachesim/tracer/tracer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1311,7 +1311,7 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *inst
const opnd_t src = instr_get_src(instr_operands, i);
if (opnd_is_memory_reference(src)) {
#ifdef AARCH64
/* TODO i#5844: Memory references involving SVE registers are not
/* TODO i#5036: Memory references involving SVE registers are not
* supported yet. To be implemented as part of scatter/gather work.
*/
if (opnd_is_base_disp(src) &&
Expand All @@ -1335,7 +1335,7 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *inst
const opnd_t dst = instr_get_dst(instr_operands, i);
if (opnd_is_memory_reference(dst)) {
#ifdef AARCH64
/* TODO i#5844: Memory references involving SVE registers are not
/* TODO i#5036: Memory references involving SVE registers are not
* supported yet. To be implemented as part of scatter/gather work.
*/
if (opnd_is_base_disp(dst) &&
Expand Down
73 changes: 69 additions & 4 deletions core/ir/aarch64/instr.c
Original file line number Diff line number Diff line change
Expand Up @@ -610,17 +610,82 @@ DR_API
bool
instr_is_scatter(instr_t *instr)
{
/* FIXME i#3837: add support. */
ASSERT_NOT_IMPLEMENTED(false);
switch (instr_get_opcode(instr)) {
case OP_st1b:
case OP_st1h:
case OP_st1w:
case OP_st1d:
case OP_st2b:
case OP_st2h:
case OP_st2w:
case OP_st2d:
case OP_st3b:
case OP_st3h:
case OP_st3w:
case OP_st3d:
case OP_st4b:
case OP_st4h:
case OP_st4w:
case OP_st4d:
case OP_stnt1b:
case OP_stnt1h:
case OP_stnt1w:
case OP_stnt1d: return true;
}
return false;
}

DR_API
bool
instr_is_gather(instr_t *instr)
{
/* FIXME i#3837: add support. */
ASSERT_NOT_IMPLEMENTED(false);
switch (instr_get_opcode(instr)) {
case OP_ld1b:
case OP_ld1h:
case OP_ld1w:
case OP_ld1d:
case OP_ld1sb:
case OP_ld1sh:
case OP_ld1sw:
case OP_ld1rob:
case OP_ld1rqb:
case OP_ld1rqh:
case OP_ld1rqw:
case OP_ld1rqd:
case OP_ldff1b:
case OP_ldff1h:
case OP_ldff1w:
case OP_ldff1d:
case OP_ldff1sb:
case OP_ldff1sh:
case OP_ldff1sw:
case OP_ldnf1b:
case OP_ldnf1h:
case OP_ldnf1w:
case OP_ldnf1d:
case OP_ldnf1sb:
case OP_ldnf1sh:
case OP_ldnf1sw:
case OP_ldnt1b:
case OP_ldnt1h:
case OP_ldnt1w:
case OP_ldnt1d:
case OP_ldnt1sb:
case OP_ldnt1sh:
case OP_ldnt1sw:
case OP_ld2b:
case OP_ld2h:
case OP_ld2w:
case OP_ld2d:
case OP_ld3b:
case OP_ld3h:
case OP_ld3w:
case OP_ld3d:
case OP_ld4b:
case OP_ld4h:
case OP_ld4w:
case OP_ld4d: return true;
}
return false;
}

Expand Down
19 changes: 19 additions & 0 deletions core/ir/aarch64/instr_create_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -634,10 +634,13 @@
#define INSTR_CREATE_ldp(dc, rt1, rt2, mem) \
instr_create_2dst_1src(dc, OP_ldp, rt1, rt2, mem)
#define INSTR_CREATE_ldr(dc, Rd, mem) instr_create_1dst_1src((dc), OP_ldr, (Rd), (mem))
#define INSTR_CREATE_ldrsw(dc, Rd, mem) \
instr_create_1dst_1src((dc), OP_ldrsw, (Rd), (mem))
#define INSTR_CREATE_ldrb(dc, Rd, mem) instr_create_1dst_1src(dc, OP_ldrb, Rd, mem)
#define INSTR_CREATE_ldrsb(dc, Rd, mem) \
instr_create_1dst_1src((dc), OP_ldrsb, (Rd), (mem))
#define INSTR_CREATE_ldrh(dc, Rd, mem) instr_create_1dst_1src(dc, OP_ldrh, Rd, mem)
#define INSTR_CREATE_ldrsh(dc, Rd, mem) instr_create_1dst_1src(dc, OP_ldrsh, Rd, mem)
#define INSTR_CREATE_ldur(dc, rt, mem) instr_create_1dst_1src(dc, OP_ldur, rt, mem)
#define INSTR_CREATE_ldar(dc, Rt, mem) instr_create_1dst_1src((dc), OP_ldar, (Rt), (mem))
#define INSTR_CREATE_ldarb(dc, Rt, mem) \
Expand Down Expand Up @@ -7185,6 +7188,22 @@
#define INSTR_CREATE_eor_sve_pred_b(dc, Pd, Pg, Pn, Pm) \
instr_create_1dst_3src(dc, OP_eor, Pd, Pg, Pn, Pm)

/**
* Creates a NOT instruction.
*
* This macro is used to encode the forms:
* \verbatim
* NOT <Pd>.B, <Pg>/Z, <Pn>.B
* \endverbatim
* \param dc The void * dcontext used to allocate memory for the #instr_t.
* \param Pd The destination predicate register, P (Predicate).
* \param Pg The governing predicate register, P (Predicate).
* \param Pn The first source predicate register, P (Predicate).
*/
#define INSTR_CREATE_not_sve_pred_b(dc, Pd, Pg, Pn) \
INSTR_CREATE_eor_sve_pred_b( \
dc, Pd, Pg, Pn, opnd_create_reg_element_vector(opnd_get_reg(Pg), OPSZ_1))

/**
* Creates an EOR instruction.
*
Expand Down
4 changes: 4 additions & 0 deletions ext/drx/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ set(srcs
drx.c
drx_buf.c
scatter_gather_${ARCH_NAME}.c
scatter_gather_shared.c
# add more here
)

Expand Down Expand Up @@ -71,6 +72,9 @@ macro(configure_drx_target target)
if (WIN32)
target_link_libraries(${target} ntdll_imports)
endif ()
target_include_directories(${target}
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR})
endmacro()

configure_drx_target(drx)
Expand Down
4 changes: 2 additions & 2 deletions ext/drx/drx.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@
# define IF_WINDOWS_ELSE(x, y) (y)
#endif

#ifdef X86
/* TODO i#3837: Add AArch64 support. */
#if defined(X86) || defined(AARCH64)
/* TODO i#5036: Complete AArch64 support. */
# define PLATFORM_SUPPORTS_SCATTER_GATHER
#endif

Expand Down
Loading

0 comments on commit 1a14566

Please sign in to comment.