Skip to content

Commit

Permalink
i#1309 win8+ threads: use NtCreateThreadEx for internal nudges (#2838)
Browse files Browse the repository at this point in the history
Since NtCreateThread returns STATUS_ACCESS_DENIED on win8+, we implement a
solution for internal nudges using NtCreateThreadEx.  We already have
support for varying who frees the stack in NUDGE_NUDGER_FREE_STACK, which
we leverage here.  The other complication is who frees the arg_buf for a
remote process (since we can't easily put it on the stack): again, we
already have NUDGE_FREE_ARG which we use here.

We also fix a bug in cleanup_and_terminate that results in a double-free of
a nudge dstack: a simple 1-byte-bool error where our asm checks all 4
bytes.  A similar asm bug found by inspection with sig_should_swap_stack()
is also fixed.

This fixes internal nudges.  Client threads and external nudges will be
fixed separately.

Issue: #1309, #1432, #2835
  • Loading branch information
derekbruening authored Feb 19, 2018
1 parent cd3e7c1 commit b565a05
Show file tree
Hide file tree
Showing 5 changed files with 126 additions and 27 deletions.
6 changes: 3 additions & 3 deletions core/arch/x86/x86.asm
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* **********************************************************
* Copyright (c) 2011-2017 Google, Inc. All rights reserved.
* Copyright (c) 2011-2018 Google, Inc. All rights reserved.
* Copyright (c) 2001-2010 VMware, Inc. All rights reserved.
* ********************************************************** */

Expand Down Expand Up @@ -594,7 +594,7 @@ GLOBAL_LABEL(cleanup_and_terminate:)
mov REG_XBX, PTRSZ [1*ARG_SZ + REG_XBP] /* dcontext */
SAVE_TO_DCONTEXT_VIA_REG(REG_XBX,is_exiting_OFFSET,1)
CALLC1(GLOBAL_REF(is_currently_on_dstack), REG_XBX) /* xbx is callee-saved */
cmp REG_XAX, 0
cmp al, 0
jnz cat_save_dstack
mov REG_XBX, 0 /* save 0 for dstack to avoid double-free */
jmp cat_done_saving_dstack
Expand Down Expand Up @@ -1486,7 +1486,7 @@ GLOBAL_LABEL(master_signal_handler:)
mov REG_XAX, REG_XSP
/* call a C routine rather than writing everything in asm */
CALLC2(GLOBAL_REF(sig_should_swap_stack), REG_XAX, REG_XDX)
cmp REG_XAX, 0
cmp al, 0
pop REG_XAX /* clone_and_swap_args.stack */
pop REG_XCX /* clone_and_swap_args.tos */
je no_swap
Expand Down
7 changes: 4 additions & 3 deletions core/dynamo.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* **********************************************************
* Copyright (c) 2010-2017 Google, Inc. All rights reserved.
* Copyright (c) 2010-2018 Google, Inc. All rights reserved.
* Copyright (c) 2000-2010 VMware, Inc. All rights reserved.
* **********************************************************/

Expand Down Expand Up @@ -1662,6 +1662,7 @@ delete_dynamo_context(dcontext_t *dcontext, bool free_stack)
if (free_stack) {
ASSERT(dcontext->dstack != NULL);
ASSERT(!is_currently_on_dstack(dcontext));
LOG(GLOBAL, LOG_THREADS, 1, "Freeing DR stack "PFX"\n", dcontext->dstack);
stack_free(dcontext->dstack, DYNAMORIO_STACK_SIZE);
} /* else will be cleaned up by caller */

Expand Down Expand Up @@ -2313,8 +2314,8 @@ dynamo_thread_init(byte *dstack_in, priv_mcontext_t *mc
IF_CLIENT_INTERFACE_ELSE(client_thread ? "CLIENT " : "", ""),
get_thread_id(), dcontext);
LOG(THREAD, LOG_TOP|LOG_THREADS, 1,
"DR stack is "PFX"-"PFX"\n", dcontext->dstack - DYNAMORIO_STACK_SIZE,
dcontext->dstack);
"DR stack is "PFX"-"PFX" (passed in "PFX")\n",
dcontext->dstack - DYNAMORIO_STACK_SIZE, dcontext->dstack, dstack_in);
#endif

#ifdef DEADLOCK_AVOIDANCE
Expand Down
34 changes: 21 additions & 13 deletions core/nudge.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* **********************************************************
* Copyright (c) 2011-2017 Google, Inc. All rights reserved.
* Copyright (c) 2011-2018 Google, Inc. All rights reserved.
* Copyright (c) 2008-2010 VMware, Inc. All rights reserved.
* **********************************************************/

Expand Down Expand Up @@ -125,14 +125,17 @@ nudge_thread_cleanup(dcontext_t *dcontext, bool exit_process, uint exit_code)
* terminate nudge threads instead of allowing them to return and exit normally.
* On XP and 2k3 none of our nudge creation routines inform csrss of the new thread
* (which is who typically frees the stacks).
* On Vista we don't use NtCreateThreadEx to create the nudge threads so the kernel
* doesn't free the stack.
* On Vista and Win7 we don't use NtCreateThreadEx to create the nudge threads so
* the kernel doesn't free the stack.
* As such we are left with two options: free the app stack here (nudgee free) or
* have the nudge thread creator free the app stack (nudger free). Going with
* nudgee free means we leak exit race nudge stacks whereas if we go with nudger free
* for external nudges then we'll leak timed out nudge stacks (for internal nudges
* we pretty much have to do nudgee free). A nudge_arg_t flag is used to specify
* which model we use, but currently we always nudgee free.
* have the nudge thread creator free the app stack (nudger free). Going with
* nudgee free means we leak exit race nudge stacks whereas if we go with nudger free
* for external nudges then we'll leak timed out nudge stacks (for internal nudges
* we pretty much have to do nudgee free). A nudge_arg_t flag is used to specify
* which model we use, but currently we always nudgee free.
* On Win8+ we do use NtCreateThreadEx to create the nudge threads so the kernel
* does free the stack. We could use this on Vista and Win7 too -- should we?
* It requires someone to free the argument buffer (NUDGE_FREE_ARG).
*
* dynamo_thread_exit_common() is where the app stack is actually freed, not here.
*/
Expand Down Expand Up @@ -219,8 +222,6 @@ generic_nudge_handler(nudge_arg_t *arg_dont_use)
/* if needed tell thread exit to free the application stack */
if (!TEST(NUDGE_NUDGER_FREE_STACK, safe_arg.flags)) {
dcontext->free_app_stack = true;
} else {
ASSERT_NOT_TESTED();
}

/* FIXME - would be nice to inform nudge creator if we need to nop the nudge. */
Expand Down Expand Up @@ -262,7 +263,6 @@ generic_nudge_handler(nudge_arg_t *arg_dont_use)

/* Free the arg if requested. */
if (TEST(NUDGE_FREE_ARG, safe_arg.flags)) {
ASSERT_NOT_TESTED();
nt_free_virtual_memory(arg_dont_use);
}

Expand Down Expand Up @@ -488,10 +488,18 @@ nudge_internal(process_id_t pid, uint nudge_action_mask,

nudge_arg.version = NUDGE_ARG_CURRENT_VERSION;
nudge_arg.nudge_action_mask = nudge_action_mask;
/* we do not set NUDGE_NUDGER_FREE_STACK so the stack will be freed
* in the target process
/* We do not set NUDGE_NUDGER_FREE_STACK so the stack will be freed
* in the target process, for <=win7.
*/
nudge_arg.flags = (internal ? NUDGE_IS_INTERNAL : 0);
#ifdef WINDOWS
if (get_os_version() >= WINDOWS_VERSION_8) {
/* The kernel owns and frees the stack. */
nudge_arg.flags |= NUDGE_NUDGER_FREE_STACK;
/* The arg was placed in a new kernel alloc. */
nudge_arg.flags |= NUDGE_FREE_ARG;
}
#endif
nudge_arg.client_arg = client_arg;
nudge_arg.client_id = client_id;

Expand Down
94 changes: 89 additions & 5 deletions core/win32/ntdll.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* **********************************************************
* Copyright (c) 2010-2017 Google, Inc. All rights reserved.
* Copyright (c) 2010-2018 Google, Inc. All rights reserved.
* Copyright (c) 2003-2010 VMware, Inc. All rights reserved.
* **********************************************************/

Expand Down Expand Up @@ -4596,7 +4596,7 @@ create_thread_common(HANDLE hProcess, bool target_64bit, void *start_addr,
res = NT_SYSCALL(CreateThread, &hthread, THREAD_ALL_ACCESS, &oa,
hProcess, &cid, context, stack, (byte)TRUE);
if (!NT_SUCCESS(res)) {
NTPRINT("create_thread: failed to create thread\n");
NTPRINT("create_thread: failed to create thread: %x\n", res);
return INVALID_HANDLE_VALUE;
}
/* Xref PR 252008 & PR 252745 - on 32-bit Windows the kernel will set esp
Expand All @@ -4617,6 +4617,74 @@ create_thread_common(HANDLE hProcess, bool target_64bit, void *start_addr,
return hthread;
}

static HANDLE
our_create_thread_ex(HANDLE hProcess, bool target_64bit, void *start_addr,
void *arg, const void *arg_buf, size_t arg_buf_size,
uint stack_reserve, uint stack_commit, bool suspended,
thread_id_t *tid)
{
HANDLE hthread;
OBJECT_ATTRIBUTES oa;
CLIENT_ID cid;
TEB *teb;
void *thread_arg = arg;
create_thread_info_t info = {0};
NTSTATUS res;
/* NtCreateThreadEx doesn't exist prior to Vista. */
ASSERT(syscalls[SYS_CreateThreadEx] != SYSCALL_NOT_PRESENT);
GET_RAW_SYSCALL(CreateThreadEx,
OUT PHANDLE ThreadHandle,
IN ACCESS_MASK DesiredAccess,
IN POBJECT_ATTRIBUTES ObjectAttributes,
IN HANDLE ProcessHandle,
IN LPTHREAD_START_ROUTINE Win32StartAddress,
IN LPVOID StartParameter,
IN BOOL CreateSuspended,
IN uint StackZeroBits,
IN SIZE_T StackCommitSize,
IN SIZE_T StackReserveSize,
INOUT create_thread_info_t *thread_info);

InitializeObjectAttributes(&oa, NULL, OBJ_CASE_INSENSITIVE, NULL, NULL);

if (arg_buf != NULL) {
/* XXX: Currently we leak this memory, except for nudge where the caller
* sets NUDGE_FREE_ARG.
*/
if (!NT_SUCCESS
(nt_remote_allocate_virtual_memory
(hProcess, &thread_arg, arg_buf_size, PAGE_READWRITE, MEM_COMMIT))) {
NTPRINT("create_thread: failed to allocate arg buf\n");
return INVALID_HANDLE_VALUE;
}
if (!nt_write_virtual_memory(hProcess, thread_arg, arg_buf,
arg_buf_size, NULL)) {
NTPRINT("create_thread: failed to write arguments\n");
return INVALID_HANDLE_VALUE;
}
}

info.struct_size = sizeof(info);
info.client_id.flags = THREAD_INFO_ELEMENT_CLIENT_ID | THREAD_INFO_ELEMENT_UNKNOWN_2;
info.client_id.buffer_size = sizeof(cid);
info.client_id.buffer = &cid;
/* We get STATUS_INVALID_PARAMETER unless we also ask for teb. */
info.teb.flags = THREAD_INFO_ELEMENT_TEB | THREAD_INFO_ELEMENT_UNKNOWN_2;
info.teb.buffer_size = sizeof(TEB*);
info.teb.buffer = &teb;
res = NT_RAW_SYSCALL(CreateThreadEx, &hthread, THREAD_ALL_ACCESS, &oa, hProcess,
(LPTHREAD_START_ROUTINE)convert_data_to_function(start_addr),
thread_arg, !!suspended, 0, stack_commit, stack_reserve,
&info);
if (!NT_SUCCESS(res)) {
NTPRINT("create_thread_ex: failed to create thread: %x\n", res);
return INVALID_HANDLE_VALUE;
}
if (tid != NULL)
*tid = (thread_id_t)cid.UniqueThread;
return hthread;
}

/* Creates a new stack w/ guard page */
HANDLE
our_create_thread(HANDLE hProcess, bool target_64bit, void *start_addr,
Expand All @@ -4631,15 +4699,22 @@ our_create_thread(HANDLE hProcess, bool target_64bit, void *start_addr,
ASSERT(stack_commit + PAGE_SIZE <= stack_reserve &&
ALIGNED(stack_commit, PAGE_SIZE) &&
ALIGNED(stack_reserve, PAGE_SIZE));

if (get_os_version() >= WINDOWS_VERSION_8) {
/* NtCreateThread not available: use Ex where the kernel makes the stack. */
return our_create_thread_ex(hProcess, target_64bit, start_addr, arg, arg_buf,
arg_buf_size, stack_reserve, stack_commit,
suspended, tid);
}

if (!NT_SUCCESS(nt_remote_allocate_virtual_memory(hProcess,
&stack.ExpandableStackBottom,
stack_reserve, PAGE_READWRITE,
MEM_RESERVE))) {
NTPRINT("create_thread: failed to allocate stack\n");
return INVALID_HANDLE_VALUE;
}
/* FIXME : for failures beyond this point we don't bother deallocating the
* stack. */
/* For failures beyond this point we don't bother deallocating the stack. */
stack.ExpandableStackBase =
((byte *)stack.ExpandableStackBottom) + stack_reserve;
stack.ExpandableStackLimit =
Expand All @@ -4662,13 +4737,22 @@ our_create_thread(HANDLE hProcess, bool target_64bit, void *start_addr,
arg_buf, arg_buf_size, &stack, suspended, tid);
}

/* Uses caller-allocated stack */
/* Uses caller-allocated stack. hProcess must be NT_CURRENT_PROCESS for win8+. */
HANDLE
our_create_thread_have_stack(HANDLE hProcess, bool target_64bit, void *start_addr,
void *arg, const void *arg_buf, size_t arg_buf_size,
byte *stack_base, size_t stack_size,
bool suspended, thread_id_t *tid)
{
if (get_os_version() >= WINDOWS_VERSION_8) {
/* FIXME i#1309: we need a wrapper function so we can use NtCreateThreadEx
* and then switch stacks. This is too hard to arrange in another process.
* NtCreateThread seems to work in some cases (on Appveyor) so until we have
* an NtCreateThreadEx solution we fall through.
*/
ASSERT(hProcess == NT_CURRENT_PROCESS &&
"No support for creating a remote thread with a custom stack");
}
USER_STACK stack = {0};
stack.ExpandableStackBase = stack_base;
stack.ExpandableStackLimit = stack_base - stack_size;
Expand Down
12 changes: 9 additions & 3 deletions core/win32/ntdll.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* **********************************************************
* Copyright (c) 2011-2017 Google, Inc. All rights reserved.
* Copyright (c) 2011-2018 Google, Inc. All rights reserved.
* Copyright (c) 2003-2010 VMware, Inc. All rights reserved.
* **********************************************************/

Expand Down Expand Up @@ -939,6 +939,7 @@ typedef enum { /* NOTE - these are speculative */
* - INOUT */
THREAD_INFO_ELEMENT_UNKNOWN_1 = 0x9, /* Unknown - ptr_uint_t sized
* [ observed 1 ] - IN */
THREAD_INFO_ELEMENT_UNKNOWN_2 = 0x10000,
} thread_info_elm_buf_type_t;

typedef struct _thread_info_element_t { /* NOTE - this is speculative */
Expand Down Expand Up @@ -2102,13 +2103,18 @@ nt_stop_profile(HANDLE profile_handle);
HANDLE
create_process(wchar_t *exe, wchar_t *cmdline);

/* NOTE see important usage information in ntdll.c, threads created with this
* function can NOT return from their start routine */
/* See important usage information in ntdll.c: threads created with this
* function can NOT return from their start routine.
* On Win8+, the kernel owns the created stack; o/w, we own it.
* On Win8+, if arg_buf != NULL, it's placed in a new virtual alloc and it's
* up to the caller to free it.
*/
HANDLE
our_create_thread(HANDLE hProcess, bool target_64bit, void *start_addr,
void *arg, const void *arg_buf, size_t arg_buf_size,
uint stack_reserve, uint stack_commit, bool suspended,
thread_id_t *tid);
/* Uses caller-allocated stack. hProcess must be NT_CURRENT_PROCESS for win8+. */
HANDLE
our_create_thread_have_stack(HANDLE hProcess, bool target_64bit, void *start_addr,
void *arg, const void *arg_buf, size_t arg_buf_size,
Expand Down

0 comments on commit b565a05

Please sign in to comment.