diff --git a/core/arch/emit_utils_shared.c b/core/arch/emit_utils_shared.c index 01b335a10fe..e1a99dee3cd 100644 --- a/core/arch/emit_utils_shared.c +++ b/core/arch/emit_utils_shared.c @@ -1382,6 +1382,11 @@ build_profile_call_buffer(void); uint profile_call_size() { + /* XXX i#1566: For -satisfy_w_xor_x we'd need to change the + * instr_encode calls and possibly more. Punting for now. + */ + ASSERT_NOT_IMPLEMENTED(!DYNAMO_OPTION(satisfy_w_xor_x), + "PROFILE_RDTSC is not supported with -satisfy_w_xor_x"); if (profile_call_length == 0) build_profile_call_buffer(); return profile_call_length; diff --git a/core/arch/interp.c b/core/arch/interp.c index 19e51e28087..3fda40c9bdc 100644 --- a/core/arch/interp.c +++ b/core/arch/interp.c @@ -7985,9 +7985,13 @@ shift_ctis_in_fragment(dcontext_t *dcontext, fragment_t *f, ssize_t shift, ASSERT(old_target + shift == target); LOG(THREAD, LOG_MONITOR, 4, "shift_ctis_in_fragment: pre-sysenter mov now pts to @" PFX "\n", target); - DEBUG_DECLARE(encode_nxt =) instr_encode(dcontext, &instr, prev_decode_pc); + DEBUG_DECLARE(encode_nxt =) + instr_encode_to_copy(dcontext, &instr, + vmcode_get_writable_addr(prev_decode_pc), + prev_decode_pc); /* must not change size! */ - ASSERT(encode_nxt != NULL && encode_nxt == next_pc); + ASSERT(encode_nxt != NULL && + vmcode_get_executable_addr(encode_nxt) == next_pc); } /* The following 'if' won't get executed since a sysenter isn't * a CTI instr, so we don't need an else. We do need to take care @@ -8011,9 +8015,11 @@ shift_ctis_in_fragment(dcontext_t *dcontext, fragment_t *f, ssize_t shift, /* re-encode instr w/ new pc-relative target */ instr_set_raw_bits_valid(&instr, false); instr_set_target(&instr, opnd_create_pc(target - shift)); - DEBUG_DECLARE(nxt_pc =) instr_encode(dcontext, &instr, prev_pc); + DEBUG_DECLARE(nxt_pc =) + instr_encode_to_copy(dcontext, &instr, vmcode_get_writable_addr(prev_pc), + prev_pc); /* must not change size! */ - ASSERT(nxt_pc != NULL && nxt_pc == pc); + ASSERT(nxt_pc != NULL && vmcode_get_executable_addr(nxt_pc) == pc); #ifdef DEBUG if ((d_r_stats->logmask & LOG_CACHE) != 0) { d_r_loginst( diff --git a/core/arch/mangle_shared.c b/core/arch/mangle_shared.c index 1bab4d57426..b71c7ed28d1 100644 --- a/core/arch/mangle_shared.c +++ b/core/arch/mangle_shared.c @@ -756,8 +756,11 @@ mangle_syscall_code(dcontext_t *dcontext, fragment_t *f, byte *pc, bool skip) LOG(THREAD, LOG_SYSCALLS, 3, "\tmodifying target of syscall jmp to " PFX "\n", target); instr_set_target(&instr, opnd_create_pc(target)); - nxt_pc = instr_encode(dcontext, &instr, skip_pc); - ASSERT(nxt_pc != NULL && nxt_pc == cti_pc); + nxt_pc = instr_encode_to_copy(dcontext, &instr, vmcode_get_writable_addr(skip_pc), + skip_pc); + ASSERT(nxt_pc != NULL); + nxt_pc = vmcode_get_executable_addr(nxt_pc); + ASSERT(nxt_pc == cti_pc); machine_cache_sync(skip_pc, nxt_pc, true); } else { LOG(THREAD, LOG_SYSCALLS, 3, "\ttarget of syscall jmp is already " PFX "\n", diff --git a/core/dynamo.c b/core/dynamo.c index 6f9bcd4776c..b7bf4296090 100644 --- a/core/dynamo.c +++ b/core/dynamo.c @@ -805,6 +805,8 @@ dynamorio_fork_init(dcontext_t *dcontext) } # endif /* DEBUG */ + vmm_heap_fork_init(dcontext); + /* must re-hash parent entry in threads table, plus no longer have any * other threads (fork -> we're alone in address space), so clear * out entire thread table, then add child diff --git a/core/heap.c b/core/heap.c index d553d8aa373..940058ae1fc 100644 --- a/core/heap.c +++ b/core/heap.c @@ -46,6 +46,7 @@ #include "link.h" /* for struct sizes */ #include "instr.h" /* for struct sizes */ #include "fcache.h" /* fcache_low_on_memory */ +#include "memquery.h" #ifdef DEBUG # include "hotpatch.h" /* To handle leak for case 9593. */ #endif @@ -918,8 +919,9 @@ vmm_place_vmcode(vm_heap_t *vmh, size_t size, heap_error_code_t *error_code) /* Ensure os_map_file ignores vmcode: */ ASSERT(!is_vmm_reserved_address(vmh->start_addr, size, NULL, NULL)); size_t map_size = vmh->alloc_size; - byte *map_base = os_map_file(heapmgt->dual_map_file, &map_size, 0, - vmh->alloc_start, MEMPROT_NONE, MAP_FILE_FIXED); + byte *map_base = + os_map_file(heapmgt->dual_map_file, &map_size, 0, vmh->alloc_start, + MEMPROT_NONE, MAP_FILE_VMM_COMMIT | MAP_FILE_FIXED); if (map_base != vmh->alloc_start || map_size != vmh->alloc_size) { REPORT_FATAL_ERROR_AND_EXIT(FAILED_TO_SATISFY_W_XOR_X, 2, get_application_name(), get_application_pid()); @@ -1067,9 +1069,11 @@ vmm_heap_unit_exit(vm_heap_t *vmh) os_heap_free(vmh->alloc_start, vmh->alloc_size, &error_code); ASSERT(error_code == HEAP_ERROR_SUCCESS); if (DYNAMO_OPTION(satisfy_w_xor_x) && vmh == &heapmgt->vmcode) { + heap_error_code_t error_code; os_heap_free(heapmgt->vmcode_writable_alloc, vmh->alloc_size, &error_code); ASSERT(error_code == HEAP_ERROR_SUCCESS); os_delete_memory_file(MEMORY_FILE_NAME, heapmgt->dual_map_file); + heapmgt->dual_map_file = INVALID_FILE; } } else { /* FIXME: doing nothing for now - we only care about this in @@ -1594,6 +1598,8 @@ vmm_heap_commit(vm_addr_t p, size_t size, uint prot, heap_error_code_t *error_co { bool res = true; vm_heap_t *vmh = vmheap_for_which(which); + LOG(GLOBAL, LOG_HEAP, 3, "vmm_heap_commit %s: size=%d p=" PFX " prot=%x\n", vmh->name, + size, p, prot); if (DYNAMO_OPTION(satisfy_w_xor_x) && vmh == &heapmgt->vmcode) { vm_addr_t p_writable = vmm_normalize_addr(vmh, &p); /* We blindly shadow even if prot is -w to simplify de-alloc. -w is rare. */ @@ -1674,7 +1680,8 @@ vmm_heap_commit(vm_addr_t p, size_t size, uint prot, heap_error_code_t *error_co } } } - if (!res && DYNAMO_OPTION(oom_timeout) != 0) { + if (!res && DYNAMO_OPTION(oom_timeout) != 0 && + !(DYNAMO_OPTION(satisfy_w_xor_x) && vmh == &heapmgt->vmcode)) { DEBUG_DECLARE(heap_error_code_t old_error_code = *error_code;) ASSERT(old_error_code != HEAP_ERROR_SUCCESS); @@ -1892,6 +1899,109 @@ vmm_heap_exit() } } +#ifdef UNIX +void +vmm_heap_fork_init(dcontext_t *dcontext) +{ + if (!DYNAMO_OPTION(satisfy_w_xor_x)) + return; + /* We want a private copy of our dual mapping setup, rather than sharing the + * parent's. Unfortunately that requires copying the entire vmcode contents + * into new mappings. Fortunately with two copies we don't need a temporary. + * + * XXX i#3556: There is a race here where the parent changes vmcode while we're + * setting up our private version. The only solution may be to force the parent to + * wait: do we want to do that? + */ + + /* First, make a new file. */ + int old_fd = heapmgt->dual_map_file; + heapmgt->dual_map_file = + os_create_memory_file(MEMORY_FILE_NAME, heapmgt->vmcode.alloc_size); + if (heapmgt->dual_map_file == INVALID_FILE) + goto vmm_heap_fork_init_failed; + LOG(GLOBAL, LOG_HEAP, 2, "%s: new dual_map_file is %d\n", __FUNCTION__, + heapmgt->dual_map_file); + + /* Second, make a new +w region and copy the old +x protections and contents. + * Record the old +x protections (because some are +rw (ELF data segments), some + * are +rx, and some are +r (reachable (non-exec) heap)). + */ + vm_area_vector_t x_regions; + vmvector_init_vector(&x_regions, VECTOR_NEVER_MERGE | VECTOR_NO_LOCK); + /* XXX i#3556: For -reachable_heap we'll have problems here where the heap + * for x_regions will modify vmcode! + */ + ASSERT_NOT_IMPLEMENTED(!REACHABLE_HEAP(), + "'-reachable_heap -satisfy_w_xor_x' does not support fork"); + + size_t map_size = heapmgt->vmcode.alloc_size; + byte *map_base = + os_map_file(heapmgt->dual_map_file, &map_size, 0, heapmgt->vmcode_writable_alloc, + MEMPROT_NONE, MAP_FILE_VMM_COMMIT | MAP_FILE_FIXED); + if (map_base != heapmgt->vmcode_writable_alloc || + map_size != heapmgt->vmcode.alloc_size) + goto vmm_heap_fork_init_failed; + memquery_iter_t iter; + if (!memquery_iterator_start(&iter, heapmgt->vmcode.start_addr, true /*using heap*/)) + goto vmm_heap_fork_init_failed; + while (memquery_iterator_next(&iter) && iter.vm_start < heapmgt->vmcode.end_addr) { + if (iter.vm_start < heapmgt->vmcode.start_addr || iter.prot == MEMPROT_NONE) + continue; + vmvector_add(&x_regions, iter.vm_start, iter.vm_end, + (void *)(ptr_uint_t)iter.prot); + heap_error_code_t error_code; + byte *new_start = + iter.vm_start - heapmgt->vmcode.start_addr + heapmgt->vmcode_writable_base; + uint new_prot = (iter.prot & ~(MEMPROT_EXEC)) | MEMPROT_WRITE; + if (!os_heap_commit(new_start, iter.vm_end - iter.vm_start, new_prot, + &error_code)) + goto vmm_heap_fork_init_failed; + memcpy(new_start, iter.vm_start, iter.vm_end - iter.vm_start); + LOG(GLOBAL, LOG_HEAP, 2, "%s: re-mapped %p-%p %x; copied from %p-%p %x\n", + __FUNCTION__, new_start, new_start + (iter.vm_end - iter.vm_start), new_prot, + iter.vm_start, iter.vm_end, iter.prot); + } + memquery_iterator_stop(&iter); + + /* Third, make a new +x region and set up the right protections and mappings. */ + map_size = heapmgt->vmcode.alloc_size; + map_base = + os_map_file(heapmgt->dual_map_file, &map_size, 0, heapmgt->vmcode.alloc_start, + MEMPROT_NONE, MAP_FILE_VMM_COMMIT | MAP_FILE_FIXED); + if (map_base != heapmgt->vmcode.alloc_start || map_size != heapmgt->vmcode.alloc_size) + goto vmm_heap_fork_init_failed; + vmvector_iterator_t vmvi; + vmvector_iterator_start(&x_regions, &vmvi); + while (vmvector_iterator_hasnext(&vmvi)) { + byte *start, *end; + uint prot = (uint)(ptr_uint_t)vmvector_iterator_next(&vmvi, &start, &end); + map_size = end - start; + map_base = os_map_file(heapmgt->dual_map_file, &map_size, + start - heapmgt->vmcode.start_addr, start, prot, + MAP_FILE_VMM_COMMIT | MAP_FILE_FIXED); + if (map_base != start || map_size != end - start) + goto vmm_heap_fork_init_failed; + LOG(GLOBAL, LOG_HEAP, 2, "%s: re-mapped %p-%p %x\n", __FUNCTION__, start, end, + prot); + } + vmvector_iterator_stop(&vmvi); + vmvector_reset_vector(GLOBAL_DCONTEXT, &x_regions); + + /* XXX: We don't want to unlink any tmpfs file so we don't use + * os_delete_memory_file(). This may not work on Windows if that function needs to do + * more. + */ + os_close(old_fd); + return; + +vmm_heap_fork_init_failed: + REPORT_FATAL_ERROR_AND_EXIT(FAILED_TO_SATISFY_W_XOR_X, 2, get_application_name(), + get_application_pid()); + ASSERT_NOT_REACHED(); +} +#endif + /* checks for compatibility among heap options, returns true if * modified the value of any options to make them compatible */ @@ -3839,7 +3949,7 @@ static void heap_unit_extend_commitment(heap_unit_t *u, size_t size_need, uint prot) { u->end_pc += common_heap_extend_commitment(u->cur_pc, u->end_pc, u->reserved_end_pc, - size_need, prot, VMM_HEAP); + size_need, prot, u->which); } /* allocate storage on the DR heap diff --git a/core/heap.h b/core/heap.h index 72183a00acb..7439a1838bc 100644 --- a/core/heap.h +++ b/core/heap.h @@ -125,6 +125,10 @@ void vmm_heap_init(void); void vmm_heap_exit(void); +#ifdef UNIX +void +vmm_heap_fork_init(dcontext_t *dcontext); +#endif void print_vmm_heap_data(file_t outf); byte * diff --git a/core/lib/instrument.c b/core/lib/instrument.c index 41cb65b3e4d..6c33e40caa2 100644 --- a/core/lib/instrument.c +++ b/core/lib/instrument.c @@ -2891,6 +2891,10 @@ DR_API void * dr_nonheap_alloc(size_t size, uint prot) { + CLIENT_ASSERT( + !TESTALL(DR_MEMPROT_WRITE | DR_MEMPROT_EXEC, prot) || + !DYNAMO_OPTION(satisfy_w_xor_x), + "reachable executable client memory is not supported with -satisfy_w_xor_x"); return heap_mmap_ex(size, size, prot, false /*no guard pages*/, /* For back-compat we preserve reachability. */ VMM_SPECIAL_MMAP | VMM_REACHABLE); diff --git a/core/monitor.c b/core/monitor.c index 59766e12f50..275151175a7 100644 --- a/core/monitor.c +++ b/core/monitor.c @@ -1014,8 +1014,6 @@ make_room_in_trace_buffer(dcontext_t *dcontext, uint add_size, fragment_t *f) return false; } /* Re-allocate trace buf. It must be reachable for rip-rel re-relativization. */ - LOG(THREAD, LOG_MONITOR, 3, "\nRe-allocating trace buffer from %d to %d bytes\n", - md->trace_buf_size, size); new_tbuf = heap_reachable_alloc(dcontext, size HEAPACCT(ACCT_TRACE)); if (md->trace_buf != NULL) { /* copy entire thing, just in case */ @@ -1033,6 +1031,9 @@ make_room_in_trace_buffer(dcontext_t *dcontext, uint add_size, fragment_t *f) instr = instr_get_next(instr); } } + LOG(THREAD, LOG_MONITOR, 3, + "\nRe-allocated trace buffer from %d @" PFX " to %d bytes @" PFX "\n", + md->trace_buf_size, md->trace_buf, size, new_tbuf); md->trace_buf = new_tbuf; md->trace_buf_size = size; } diff --git a/core/optionsx.h b/core/optionsx.h index 276887c9962..932fef36b03 100644 --- a/core/optionsx.h +++ b/core/optionsx.h @@ -1477,6 +1477,14 @@ DYNAMIC_OPTION(bool, pause_via_loop, OPTION_DEFAULT(bool, reachable_client, IF_STATIC_LIBRARY_ELSE(false, true), "guarantee that clients are reachable from the code cache.") #endif + /* XXX i#3566: Support for W^X has some current limitations: + * + It is not implemented for Windows or Mac. + * + Fork is not perfectly supported: there is overhead and a race. + * + Pcaches are not supported. + * + -native_exec_list is not supported. + * + dr_nonheap_alloc(rwx) is not supported. + * Clients using other non-vmcode sources of +wx memory will also not comply. + */ OPTION_DEFAULT(bool, satisfy_w_xor_x, false, "avoids ever allocating memory that is both writable and executable.") /* FIXME: the lower 16 bits are ignored - so this here gives us diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt index 2e95f0924fb..cb81f47a8a9 100644 --- a/suite/tests/CMakeLists.txt +++ b/suite/tests/CMakeLists.txt @@ -107,6 +107,8 @@ set(vmap_run_list "SHORT::ONLY::client.events$::-code_api -disable_traces" "SHORT::X86::ONLY::client.events$::-code_api -thread_private -disable_traces" "SHORT::X86::LIN::ONLY::client.events$::-code_api -no_early_inject" # only early on ARM + # XXX i#3556: NYI on Windows, Mac, and non-x86 (and not supported on 32-bit). + "SHORT::X86::X64::LIN::ONLY::drcache.*\\.simple$|selfmod2|racesys|reachability::-code_api -satisfy_w_xor_x" # maybe this should be SHORT as -coarse_units will eventually be the default? "X86::-code_api -opt_memory" # i#1575: ARM -coarse_units NYI "X86::-code_api -opt_speed" # i#1551: ARM indcall2direct NYI @@ -1509,21 +1511,35 @@ macro(strings2lists test ops_var) endif () endmacro() -function(torun_normal name test ops) - strings2lists(${test} ops) - torun("${name}" ${test} ${${test}_source} OFF OFF "${${test}_dr_ops};${ops}" - "${${test}_exe_ops}" added) +function(set_properties name test myprefix) if (DEFINED ${test}_depends) - ops2testprefix(prefix "${ops}") - set_property(TEST ${name} APPEND PROPERTY DEPENDS "${prefix}${${test}_depends}") + set_property(TEST ${name} APPEND PROPERTY DEPENDS "${myprefix}${${test}_depends}") endif (DEFINED ${test}_depends) + if (DEFINED ${test}_self_serial) + # We blindly add a dep to every possible cross-cutting same test w/ diff ops. + # CTest seems ok with a dep on a non-existent test. + foreach (prefix ${prior_prefixes}) + if (NOT "${prefix}" STREQUAL "${myprefix}") + message("For ${name} |${myprefix}|, not equal: |${prefix}|,|${myprefix}|") + set_property(TEST ${name} APPEND PROPERTY DEPENDS "${prefix}${test}") + endif () + endforeach () + endif () if (DEFINED ${test}_env) - ops2testprefix(prefix "${ops}") - set_property(TEST ${name} APPEND PROPERTY ENVIRONMENT "${prefix}${${test}_env}") + set_property(TEST ${name} APPEND PROPERTY ENVIRONMENT "${myprefix}${${test}_env}") endif () +endfunction () + +function(torun_normal name test ops) + strings2lists(${test} ops) + torun("${name}" ${test} ${${test}_source} OFF OFF "${${test}_dr_ops};${ops}" + "${${test}_exe_ops}" added) + ops2testprefix(myprefix "${ops}") + set_properties(${name} ${test} ${myprefix}) endfunction(torun_normal) function(torun_ci name test ops) + ops2testprefix(myprefix "${ops}") strings2lists(${test} ops) set(cli_ops "${${test}_client_ops}") if ("${test}" MATCHES "client\\.flush") @@ -1549,14 +1565,7 @@ function(torun_ci name test ops) "${${test}_exe_ops}" added) endif () if (added) - if (DEFINED ${test}_depends) - ops2testprefix(prefix "${ops}") - set_property(TEST ${name} APPEND PROPERTY DEPENDS "${prefix}${${test}_depends}") - endif (DEFINED ${test}_depends) - if (DEFINED ${test}_env) - ops2testprefix(prefix "${ops}") - set_property(TEST ${name} APPEND PROPERTY ENVIRONMENT "${prefix}${${test}_env}") - endif () + set_properties(${name} ${test} ${myprefix}) endif (added) endfunction(torun_ci) @@ -2616,6 +2625,7 @@ endif () set(tool.drcachesim.${testname}_basedir "${PROJECT_SOURCE_DIR}/clients/drcachesim/tests") set(tool.drcachesim.${testname}_rawtemp ON) # no preprocessor + set(tool.drcachesim.${testname}_self_serial ON) endmacro (torunonly_drcachesim) set(config_files_dir ${PROJECT_SOURCE_DIR}/clients/drcachesim/tests) @@ -2794,6 +2804,7 @@ endif () "foreach@${CMAKE_COMMAND}@-E@remove_directory@drmemtrace.${exetgt}.*.dir") set(tool.drcacheoff.${testname}_postcmd "firstglob@${drcachesim_path}@-indir@drmemtrace.${exetgt}.*.dir${sim_atops}") + set(tool.drcacheoff.${testname}_self_serial ON) endmacro() # We could share drcachesim-simple.templatex if we had the launcher fork @@ -3698,13 +3709,6 @@ if (X64 AND NOT AARCH64) # FIXME i#1569: get working on AArch64 endif (CLIENT_INTERFACE) torunonly(low4GB security-common.selfmod security-common/selfmod.c "-heap_in_lower_4GB -reachable_heap" "") - if (UNIX AND NOT APPLE) - # XXX i#3556: NYI on Windows, Mac, and non-x86 (and not supported on 32-bit). - torunonly(w-x.selfmod security-common.selfmod security-common/selfmod.c - "-satisfy_w_xor_x" "") - torunonly_ci(w-x.reachability ${ci_shared_app} client.reachability.dll - client-interface/reachability.c "" "-satisfy_w_xor_x" "") - endif () endif (X64 AND NOT AARCH64) tobuild("security-common.TestAllocWE" security-common/TestAllocWE.c) @@ -3770,6 +3774,12 @@ foreach (run ${run_list}) set(enabled OFF) endif () + string(REGEX MATCHALL "^X64::" is_x64 "${run}") + string(REGEX REPLACE "^X64::" "" run "${run}") + if (is_x64 AND NOT X64) + set(enabled OFF) + endif () + string(REGEX MATCHALL "^WIN::" is_win "${run}") string(REGEX REPLACE "^WIN::" "" run "${run}") if (is_win AND UNIX) @@ -3796,6 +3806,11 @@ foreach (run ${run_list}) set(regex "^common") endif () + ops2testprefix(prefix "${run}") + list(FIND prior_prefixes "${prefix}" index) + if (${index} EQUAL -1) + list(APPEND prior_prefixes "${prefix}") + endif () foreach (test ${testlist_normal}) if (NOT regex OR "${test}" MATCHES "${regex}") run2testname(test_name "${run}" ${test})