Skip to content

Commit

Permalink
i#5383: macos M1 support
Browse files Browse the repository at this point in the history
This patch adds enough support to run simple hello world on M1 macs.

Issue: DynamoRIO#5383
  • Loading branch information
Anthony Romano committed May 31, 2022
1 parent 11888f3 commit 3165bb7
Show file tree
Hide file tree
Showing 42 changed files with 594 additions and 232 deletions.
22 changes: 11 additions & 11 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -216,14 +216,14 @@ endif ()
# For cross-compilation this should still work as you're supposed to set this var.
# X64 mean 64-bit generically, whether AMD64 or AARCH64.
set(TARGET_ARCH "${CMAKE_SYSTEM_PROCESSOR}" CACHE STRING "Target architecture")
if (TARGET_ARCH MATCHES "^arm")
set(ARM 1) # This means AArch32.
set(X64 OFF)
message(STATUS "Building for ARM")
elseif (TARGET_ARCH MATCHES "^aarch64")
if (TARGET_ARCH MATCHES "^arm64" OR TARGET_ARCH MATCHES "^aarch64")
set(AARCH64 1)
set(X64 1)
message(STATUS "Building for AArch64")
elseif (TARGET_ARCH MATCHES "^arm")
set(ARM 1) # This means AArch32.
set(X64 OFF)
message(STATUS "Building for ARM")
else ()
set(X86 1) # This means IA-32 or AMD64
message(STATUS "Building for x86")
Expand Down Expand Up @@ -263,17 +263,17 @@ endif ()

# The execution architecture, which might differ from the target for building
# an AArch64 decoder to execute on x86 machines (i#1684).
if (CMAKE_SYSTEM_PROCESSOR MATCHES "^arm")
set(DR_HOST_ARM 1)
set(DR_HOST_ARCH_NAME "arm")
set(DR_HOST_AARCHXX 1)
set(DR_HOST_ARCH_NAME_SHARED aarchxx)
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^aarch64")
if (CMAKE_SYSTEM_PROCESSOR MATCHES "^aarch64" OR CMAKE_SYSTEM_PROCESSOR MATCHES "^arm64")
set(DR_HOST_AARCH64 1)
set(DR_HOST_ARCH_NAME "aarch64")
set(DR_HOST_AARCHXX 1)
set(DR_HOST_ARCH_NAME_SHARED aarchxx)
set(DR_HOST_X64 1)
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^arm")
set(DR_HOST_ARM 1)
set(DR_HOST_ARCH_NAME "arm")
set(DR_HOST_AARCHXX 1)
set(DR_HOST_ARCH_NAME_SHARED aarchxx)
elseif (CMAKE_C_SIZEOF_DATA_PTR EQUAL 8)
set(DR_HOST_X86 1)
set(DR_HOST_ARCH_NAME "x86")
Expand Down
85 changes: 63 additions & 22 deletions core/arch/aarch64/aarch64.asm
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,11 @@

#include "../asm_defines.asm"
START_FILE


#if !(defined(MACOS) && defined(AARCH64))
#include "include/syscall.h"
#endif

#ifndef UNIX
# error Non-Unix is not supported
Expand Down Expand Up @@ -308,17 +312,22 @@ GLOBAL_LABEL(dynamorio_app_take_over:)
GLOBAL_LABEL(cleanup_and_terminate:)
/* move argument registers to callee saved registers */
mov x19, x0 /* dcontext ptr size */
#ifdef MACOS
mov x20, x1 /* sysnr */
mov x21, x2 /* arg1 */
mov x22, x3 /* arg2 */
#else
mov w20, w1 /* sysnum 32-bit int */
mov w21, w2 /* sys_arg1 32-bit int */
mov w22, w3 /* sys_arg2 32-bit int */
#endif
mov w23, w4 /* exitproc 32-bit int */
/* x24 reserved for dstack ptr */
/* x25 reserved for syscall ptr */

/* inc exiting_thread_count to avoid being killed once off all_threads list */
adrp x0, :got:exiting_thread_count
ldr x0, [x0, #:got_lo12:exiting_thread_count]
CALLC2(atomic_add, x0, #1)
AARCH64_ADRP_GOT_LDR(GLOBAL_REF(exiting_thread_count), x0)
CALLC2(GLOBAL_REF(atomic_add), x0, #1)

/* save dcontext->dstack for freeing later and set dcontext->is_exiting */
mov w1, #1
Expand All @@ -341,39 +350,41 @@ cat_thread_only:
CALLC0(GLOBAL_REF(dynamo_thread_exit))
cat_no_thread:
/* switch to d_r_initstack for cleanup of dstack */
adrp x26, :got:initstack_mutex
ldr x26, [x26, #:got_lo12:initstack_mutex]
AARCH64_ADRP_GOT_LDR(GLOBAL_REF(initstack_mutex), x26)
cat_spin:
CALLC2(atomic_swap, x26, #1)
CALLC2(GLOBAL_REF(atomic_swap), x26, #1)
cbz w0, cat_have_lock
yield
b cat_spin

cat_have_lock:
/* switch stack */
adrp x0, :got:d_r_initstack
ldr x0, [x0, #:got_lo12:d_r_initstack]
AARCH64_ADRP_GOT_LDR(GLOBAL_REF(d_r_initstack), x0)
ldr x0, [x0]
mov sp, x0

/* free dstack and call the EXIT_DR_HOOK */
CALLC1(GLOBAL_REF(dynamo_thread_stack_free_and_exit), x24) /* pass dstack */

/* give up initstack_mutex */
adrp x0, :got:initstack_mutex
ldr x0, [x0, #:got_lo12:initstack_mutex]
AARCH64_ADRP_GOT_LDR(GLOBAL_REF(initstack_mutex), x0)
mov x1, #0
str x1, [x0]

/* dec exiting_thread_count (allows another thread to kill us) */
adrp x0, :got:exiting_thread_count
ldr x0, [x0, #:got_lo12:exiting_thread_count]
CALLC2(atomic_add, x0, #-1)
AARCH64_ADRP_GOT_LDR(GLOBAL_REF(exiting_thread_count), x0)
CALLC2(GLOBAL_REF(atomic_add), x0, #-1)

/* put system call number in x8 */
#ifdef MACOS
mov x0, x20 /* sysnr */
mov x1, x21 /* arg1 */
mov x2, x22 /* arg2 */
#else
mov w0, w21 /* sys_arg1 32-bit int */
mov w1, w22 /* sys_arg2 32-bit int */
mov w8, w20 /* int sys_call */
#endif

br x25 /* go do the syscall! */
bl GLOBAL_REF(unexpected_return) /* FIXME i#1569: NYI */
Expand All @@ -392,8 +403,13 @@ GLOBAL_LABEL(atomic_add:)

DECLARE_FUNC(global_do_syscall_int)
GLOBAL_LABEL(global_do_syscall_int:)
#ifdef MACOS
mov x16, #0
svc #0x80
#else
/* FIXME i#1569: NYI on AArch64 */
svc #0
#endif
bl GLOBAL_REF(unexpected_return)
END_FUNC(global_do_syscall_int)

Expand All @@ -414,7 +430,7 @@ DECLARE_GLOBAL(safe_read_asm_recover)
DECLARE_FUNC(safe_read_asm)
GLOBAL_LABEL(safe_read_asm:)
cmp ARG3, #0
1: b.eq safe_read_asm_recover
1: b.eq safe_read_asm_recover_local
ADDRTAKEN_LABEL(safe_read_asm_pre:)
ldrb w3, [ARG2]
ADDRTAKEN_LABEL(safe_read_asm_mid:)
Expand All @@ -425,6 +441,7 @@ ADDRTAKEN_LABEL(safe_read_asm_post:)
add ARG1, ARG1, #1
b 1b
ADDRTAKEN_LABEL(safe_read_asm_recover:)
safe_read_asm_recover_local:
mov x0, ARG2
ret
END_FUNC(safe_read_asm)
Expand Down Expand Up @@ -564,6 +581,15 @@ GLOBAL_LABEL(main_signal_handler:)

#endif /* LINUX */

#ifdef MACOS
DECLARE_FUNC(main_signal_handler)
GLOBAL_LABEL(main_signal_handler:)
/* see sendsig_set_thread_state64 in unix_signal.c */
mov ARG6, sp
b GLOBAL_REF(main_signal_handler_C) /* chain call */
END_FUNC(main_signal_handler)
#endif

DECLARE_FUNC(hashlookup_null_handler)
GLOBAL_LABEL(hashlookup_null_handler:)
bl GLOBAL_REF(unexpected_return) /* FIXME i#1569: NYI */
Expand Down Expand Up @@ -621,8 +647,7 @@ GLOBAL_LABEL(icache_op_ic_ivau_asm:)
/* Spill X1 and X2 to TLS_REG4_SLOT and TLS_REG5_SLOT. */
stp x1, x2, [x0, #spill_state_r4_OFFSET]
/* Point X1 at icache_op_struct.lock. */
adrp x1, (icache_op_struct + icache_op_struct_lock_OFFSET)
add x1, x1, #:lo12:(icache_op_struct + icache_op_struct_lock_OFFSET)
AARCH64_ADRP_GOT((GLOBAL_REF(icache_op_struct) + icache_op_struct_lock_OFFSET), x1)
/* Acquire lock. */
prfm pstl1keep, [x1]
1:
Expand Down Expand Up @@ -720,8 +745,7 @@ ic_ivau_return:
/* Load fcache_return into X1. */
ldr x1, [x0, #spill_state_fcache_return_OFFSET]
/* Point X0 at fake linkstub. */
adrp x0, linkstub_selfmod
add x0, x0, #:lo12:linkstub_selfmod
AARCH64_ADRP_GOT(GLOBAL_REF(linkstub_selfmod), x0)
/* Branch to fcache_return. */
br x1

Expand Down Expand Up @@ -753,8 +777,7 @@ GLOBAL_LABEL(icache_op_isb_asm:)
ldr x2, [x0, #spill_state_r2_OFFSET]
stp x1, x2, [x0, #spill_state_r4_OFFSET]
/* Point X1 at icache_op_struct.lock. */
adrp x1, (icache_op_struct + icache_op_struct_lock_OFFSET)
add x1, x1, #:lo12:(icache_op_struct + icache_op_struct_lock_OFFSET)
AARCH64_ADRP_GOT((GLOBAL_REF(icache_op_struct) + icache_op_struct_lock_OFFSET), x1)
/* Acquire lock. */
prfm pstl1keep, [x1]
1:
Expand All @@ -781,10 +804,28 @@ GLOBAL_LABEL(icache_op_isb_asm:)
/* Load fcache_return into X1. */
ldr x1, [x0, #spill_state_fcache_return_OFFSET]
/* Point X0 at fake linkstub. */
adrp x0, linkstub_selfmod
add x0, x0, #:lo12:linkstub_selfmod
AARCH64_ADRP_GOT(GLOBAL_REF(linkstub_selfmod), x0)
/* Branch to fcache_return. */
br x1
END_FUNC(icache_op_isb_asm)

#if defined(MACOS) && defined(AARCH64)
DECLARE_FUNC(dynamorio_sigreturn)
GLOBAL_LABEL(dynamorio_sigreturn:)
brk 0xb001
END_FUNC(dynamorio_sigreturn)

DECLARE_FUNC(dynamorio_sys_exit)
GLOBAL_LABEL(dynamorio_sys_exit:)
brk 0xb002
END_FUNC(dynamorio_sys_exit)
#endif

#ifdef MACOS
DECLARE_FUNC(new_bsdthread_intercept)
GLOBAL_LABEL(new_bsdthread_intercept:)
brk 0xb003
END_FUNC(new_bsdthread_intercept)
#endif

END_FILE
8 changes: 4 additions & 4 deletions core/arch/aarch64/memfuncs.asm
Original file line number Diff line number Diff line change
Expand Up @@ -70,14 +70,14 @@ GLOBAL_LABEL(memset:)

/* See x86.asm notes about needing these to avoid gcc invoking *_chk */
.global __memcpy_chk
.hidden __memcpy_chk
HIDDEN(__memcpy_chk)
WEAK(__memcpy_chk)
.set __memcpy_chk,memcpy
.set __memcpy_chk,GLOBAL_REF(memcpy)

.global __memset_chk
.hidden __memset_chk
HIDDEN(__memset_chk)
WEAK(__memset_chk)
.set __memset_chk,memset
.set __memset_chk,GLOBAL_REF(memset)

#endif /* UNIX */

Expand Down
2 changes: 2 additions & 0 deletions core/arch/aarch64/proc.c
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ proc_init_arch(void)
}

#ifndef DR_HOST_NOT_TARGET
# if !defined(MACOS)
get_processor_specific_info();

DOLOG(1, LOG_TOP, {
Expand Down Expand Up @@ -135,6 +136,7 @@ proc_init_arch(void)
/* FIXME i#5474: Log all FEATURE_s for ID_AA64PFR0_EL1. */
LOG_FEATURE(FEATURE_FP16);
});
# endif
#endif
}

Expand Down
2 changes: 1 addition & 1 deletion core/arch/aarchxx/aarchxx.asm
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ GLOBAL_LABEL(xfer_to_new_libdr:)
* the current DR, but w/o clobbering ARG3 or ARG4.
*/
adr ARG1, .L_start_invoke_C
adr ARG2, _start
adr ARG2, GLOBAL_REF(_start)
sub ARG1, ARG1, ARG2
add REG_PRESERVED_1, REG_PRESERVED_1, ARG1
/* _start expects these as 2nd & 3rd args */
Expand Down
7 changes: 4 additions & 3 deletions core/arch/aarchxx/emit_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ insert_load_dr_tls_base(dcontext_t *dcontext, instrlist_t *ilist, instr_t *where
*/
PRE(ilist, where,
INSTR_CREATE_mrs(dcontext, opnd_create_reg(reg_base),
opnd_create_reg(DR_REG_TPIDR_EL0)));
opnd_create_reg(LIB_SEG_TLS)));
#else // ARM
/* load TLS base from user-read-only-thread-ID register
* mrc p15, 0, reg_base, c13, c0, 3
Expand All @@ -65,8 +65,9 @@ insert_load_dr_tls_base(dcontext_t *dcontext, instrlist_t *ilist, instr_t *where
#endif
/* ldr dr_reg_stolen, [reg_base, DR_TLS_BASE_OFFSET] */
PRE(ilist, where,
XINST_CREATE_load(dcontext, opnd_create_reg(dr_reg_stolen),
OPND_CREATE_MEMPTR(reg_base, DR_TLS_BASE_OFFSET)));
XINST_CREATE_load(
dcontext, opnd_create_reg(dr_reg_stolen),
OPND_CREATE_MEMPTR(reg_base, IF_MACOS64_ELSE(8, 1) * DR_TLS_BASE_OFFSET)));
}

/* Having only one thread register (TPIDRURO for ARM, TPIDR_EL0 for AARCH64) shared
Expand Down
12 changes: 7 additions & 5 deletions core/arch/arch.c
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,8 @@ static void shared_gencode_init(IF_X86_64_ELSE(gencode_mode_t gencode_mode, void
bool x86_to_x64_mode = false;
#endif

pthread_jit_write();

gencode = heap_mmap_reserve(GENCODE_RESERVE_SIZE, GENCODE_COMMIT_SIZE,
MEMPROT_EXEC | MEMPROT_READ | MEMPROT_WRITE,
VMM_SPECIAL_MMAP | VMM_REACHABLE);
Expand Down Expand Up @@ -3426,15 +3428,15 @@ dr_mcontext_to_priv_mcontext(priv_mcontext_t *dst, dr_mcontext_t *src)
if (src->size > sizeof(dr_mcontext_t))
return false;
if (TESTALL(DR_MC_ALL, src->flags) && src->size == sizeof(dr_mcontext_t)) {
*dst = *(priv_mcontext_t *)(&MCXT_FIRST_REG_FIELD(src));
*dst = *(priv_mcontext_t *)(MCXT_FIRST_REG_FIELD(src));
} else {
if (TEST(DR_MC_INTEGER, src->flags)) {
/* xsp is in the middle of the mcxt, so we save dst->xsp here and
* restore it later so we can use one memcpy for DR_MC_INTEGER.
*/
reg_t save_xsp = dst->xsp;
if (src->size >= offsetof(dr_mcontext_t, IF_X86_ELSE(xflags, pc))) {
memcpy(&MCXT_FIRST_REG_FIELD(dst), &MCXT_FIRST_REG_FIELD(src),
memcpy(MCXT_FIRST_REG_FIELD(dst), MCXT_FIRST_REG_FIELD(src),
/* end of the mcxt integer gpr */
offsetof(priv_mcontext_t, IF_X86_ELSE(xflags, pc)));
} else
Expand Down Expand Up @@ -3521,15 +3523,15 @@ priv_mcontext_to_dr_mcontext(dr_mcontext_t *dst, priv_mcontext_t *src)
if (dst->size > sizeof(dr_mcontext_t))
return false;
if (TESTALL(DR_MC_ALL, dst->flags) && dst->size == sizeof(dr_mcontext_t)) {
*(priv_mcontext_t *)(&MCXT_FIRST_REG_FIELD(dst)) = *src;
*(priv_mcontext_t *)(MCXT_FIRST_REG_FIELD(dst)) = *src;
} else {
if (TEST(DR_MC_INTEGER, dst->flags)) {
/* xsp is in the middle of the mcxt, so we save dst->xsp here and
* restore it later so we can use one memcpy for DR_MC_INTEGER.
*/
reg_t save_xsp = dst->xsp;
if (dst->size >= offsetof(dr_mcontext_t, IF_X86_ELSE(xflags, pc))) {
memcpy(&MCXT_FIRST_REG_FIELD(dst), &MCXT_FIRST_REG_FIELD(src),
memcpy(MCXT_FIRST_REG_FIELD(dst), MCXT_FIRST_REG_FIELD(src),
/* end of the mcxt integer gpr */
offsetof(priv_mcontext_t, IF_X86_ELSE(xflags, pc)));
} else
Expand Down Expand Up @@ -3610,7 +3612,7 @@ priv_mcontext_t *
dr_mcontext_as_priv_mcontext(dr_mcontext_t *mc)
{
/* It's up to the caller to ensure the proper DR_MC_ flags are set (i#1848) */
return (priv_mcontext_t *)(&MCXT_FIRST_REG_FIELD(mc));
return (priv_mcontext_t *)(MCXT_FIRST_REG_FIELD(mc));
}

priv_mcontext_t *
Expand Down
11 changes: 11 additions & 0 deletions core/arch/arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -1336,9 +1336,20 @@ new_thread_setup(priv_mcontext_t *mc);
# ifdef MACOS
void
new_bsdthread_setup(priv_mcontext_t *mc);
// Enable writing to MAP_JIT pages.
# define pthread_jit_write() pthread_jit_write_protect_np(false)
// Enable executing MAP_JIT pages.
# define pthread_jit_read() pthread_jit_write_protect_np(true)
void
pthread_jit_write_protect_np(int);
# endif
#endif

#ifndef pthread_jit_write
# define pthread_jit_write()
# define pthread_jit_read()
#endif

void
get_simd_vals(priv_mcontext_t *mc);

Expand Down
Loading

0 comments on commit 3165bb7

Please sign in to comment.