Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

i#6919: Fix XMM saving after synchall #6920

Merged
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
cc2db9f
i#6919 fix XMM saving after synch-all
ndrewh Aug 16, 2024
7d3f352
appease clang-format, locally at least
ndrewh Aug 16, 2024
0fa0640
link_with_pthread to fix ci
ndrewh Aug 16, 2024
a05990a
threadset is a X64 test only
ndrewh Aug 16, 2024
55e3e83
DR_HOST_X64
ndrewh Aug 16, 2024
53b6d8b
X64 AND DR_HOST_X64\?
ndrewh Aug 16, 2024
3468fb2
appease test gods
ndrewh Aug 16, 2024
a79fe71
PLEASE CI
ndrewh Aug 17, 2024
e94b012
Merge remote-tracking branch 'origin/master' into i6919-set-self-cont…
ndrewh Aug 17, 2024
22e5724
move test and client into separate file
ndrewh Aug 17, 2024
1153dfd
cleanup comments
ndrewh Aug 17, 2024
357d45c
clang-format
ndrewh Aug 17, 2024
11086ed
i would like to apologize to the CI runner
ndrewh Aug 17, 2024
2826345
Merge remote-tracking branch 'origin/master' into i6919-set-self-cont…
ndrewh Aug 19, 2024
cfc8fd6
nits
ndrewh Aug 19, 2024
b016209
rename test, remove sleep, use print, nits
ndrewh Aug 22, 2024
0a10a73
clang-format
ndrewh Aug 22, 2024
9b8486c
Merge remote-tracking branch 'origin/master' into i6919-set-self-cont…
ndrewh Aug 22, 2024
0359c62
switch to condvar to wait for thread
ndrewh Aug 22, 2024
be4716f
Merge branch 'master' into i6919-set-self-context-fp-restore
derekbruening Aug 23, 2024
63e26f1
save_fpstate already saves xmm state from mcontext
ndrewh Aug 23, 2024
9786f8f
fix comment
ndrewh Aug 23, 2024
43e8ecf
fix comment again (sorry, CI runner)
ndrewh Aug 23, 2024
77e5919
linux-only
ndrewh Aug 23, 2024
dfd9291
fuck macos
ndrewh Aug 23, 2024
cc1effd
remove duplicate dcontext
ndrewh Aug 23, 2024
a1ca49e
Merge branch 'master' into i6919-set-self-context-fp-restore
derekbruening Aug 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 3 additions & 12 deletions core/unix/signal.c
Original file line number Diff line number Diff line change
Expand Up @@ -3235,20 +3235,10 @@ thread_set_self_context(void *cxt, bool is_detach_external)
ASSERT_NOT_IMPLEMENTED(false); /* PR 405694: can't use regular sigreturn! */
ndrewh marked this conversation as resolved.
Show resolved Hide resolved
ndrewh marked this conversation as resolved.
Show resolved Hide resolved
#endif
memset(&frame, 0, sizeof(frame));
#if defined(X86)
dcontext_t *dcontext = get_thread_private_dcontext();
#endif
#ifdef LINUX
# ifdef X86
byte *xstate = get_and_initialize_xstate_buffer(dcontext);
frame.uc.uc_mcontext.fpstate = &((kernel_xstate_t *)xstate)->fpstate;
# endif /* X86 */
frame.uc.uc_mcontext = *sc;
#endif
IF_ARM(ASSERT_NOT_TESTED());
#if defined(X86)
save_fpstate(dcontext, &frame);
derekbruening marked this conversation as resolved.
Show resolved Hide resolved
#endif
/* The kernel calls do_sigaltstack on sys_rt_sigreturn primarily to ensure
* the frame is ok, but the side effect is we can mess up our own altstack
* settings if we're not careful. Having invalid ss_size looks good for
Expand Down Expand Up @@ -3341,14 +3331,15 @@ thread_set_self_mcontext(priv_mcontext_t *mc, bool is_detach_external)
sig_full_cxt_t sc_full;
sig_full_initialize(&sc_full, &ucxt);
#if defined(LINUX) && defined(X86)
sc_full.sc->fpstate = NULL; /* for mcontext_to_sigcontext */
/* for mcontext_to_sigcontext to fill in with saved fp state */
ndrewh marked this conversation as resolved.
Show resolved Hide resolved
sc_full.sc->fpstate = (kernel_fpstate_t *)get_and_initialize_xstate_buffer(
get_thread_private_dcontext());
#endif
mcontext_to_sigcontext(&sc_full, mc, DR_MC_ALL);
thread_set_segment_registers(sc_full.sc);
/* sigreturn takes the mode from cpsr */
IF_ARM(
set_pc_mode_in_cpsr(sc_full.sc, dr_get_isa_mode(get_thread_private_dcontext())));
/* thread_set_self_context will fill in the real fp/simd state for x86 */
thread_set_self_context((void *)sc_full.sc, is_detach_external);
ASSERT_NOT_REACHED();
}
Expand Down
7 changes: 7 additions & 0 deletions suite/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5396,6 +5396,13 @@ if (UNIX)
if (NOT APPLE)
tobuild(linux.thread linux/thread.c)
tobuild(linux.threadexit linux/threadexit.c)

if (X86 AND X64 AND DR_HOST_X86 AND DR_HOST_X64)
tobuild_ci(linux.threadset linux/threadset.c "" "" "")
ndrewh marked this conversation as resolved.
Show resolved Hide resolved
set_avx_flags(linux.threadset)
link_with_pthread(linux.threadset)
endif ()

if (NOT ANDROID)
tobuild(linux.threadexit2 linux/threadexit2.c) # XXX i#1874: hangs on Android
tobuild(linux.signalfd linux/signalfd.c) # XXX i#1874: fails natively on Android
Expand Down
345 changes: 345 additions & 0 deletions suite/tests/linux/threadset.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,345 @@
/* **********************************************************
* Copyright (c) 2011-2020 Google, Inc. All rights reserved.
ndrewh marked this conversation as resolved.
Show resolved Hide resolved
* **********************************************************/

/*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of VMware, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/

/* Tests resuming from check_wait_at_safe_spot => thread_set_self_context,
* triggered by another thread flushing (causing a synchall). Test based on
* linux.sigcontext.
ndrewh marked this conversation as resolved.
Show resolved Hide resolved
*/

#include "tools.h"
#include "thread.h"

/* we want the latest defs so we can get at ymm state */
ndrewh marked this conversation as resolved.
Show resolved Hide resolved
#include "../../../core/unix/include/sigcontext.h"
#include <assert.h>
#include <stdio.h>
#include <unistd.h>
#include <signal.h>
#include <ucontext.h>
#include <errno.h>
#include <stddef.h>

/* For sharing NUM_*_REGS constants. */
#include "../api/detach_state_shared.h"

#define INTS_PER_XMM 4
ndrewh marked this conversation as resolved.
Show resolved Hide resolved
#define INTS_PER_YMM 8
#define INTS_PER_ZMM 16

__attribute__((noinline)) void
ndrewh marked this conversation as resolved.
Show resolved Hide resolved
dummy2()
ndrewh marked this conversation as resolved.
Show resolved Hide resolved
{
for (int i = 0; i < 10; i++) {
asm volatile("add %rdi, %rdi");
}
}

void *
thread()
{
for (int i = 0; i < 100000; i++) {
dummy2();
}
}

int
main(int argc, char *argv[])
{
int buf[INTS_PER_XMM * NUM_SIMD_SSE_AVX_REGS];
char *ptr = (char *)buf;
int i, j;

/* this test deliberately uses write() instead of the other libc calls, since those
ndrewh marked this conversation as resolved.
Show resolved Hide resolved
* appeared to cause crashes (likely due to us accidentically triggering the xmm
* saving bug :/).
ndrewh marked this conversation as resolved.
Show resolved Hide resolved
*/

write(2, "Starting test.\n", 15);
ndrewh marked this conversation as resolved.
Show resolved Hide resolved
ndrewh marked this conversation as resolved.
Show resolved Hide resolved
thread_t flusher = create_thread(thread, NULL);
sleep(1);
ndrewh marked this conversation as resolved.
Show resolved Hide resolved
write(2, "Saving regs.\n", 13);

/* put known values in xmm regs (we assume processor has xmm) */
for (i = 0; i < NUM_SIMD_SSE_AVX_REGS; i++) {
for (j = 0; j < INTS_PER_XMM; j++)
buf[i * INTS_PER_XMM + j] = 0xdeadbeef << i;
}
#define MOVE_TO_XMM(buf, num) \
__asm__ __volatile__("movdqu %0, %%xmm" #num \
: \
: "m"(buf[num * INTS_PER_XMM]) \
: "xmm" #num);
MOVE_TO_XMM(buf, 0)
MOVE_TO_XMM(buf, 1)
MOVE_TO_XMM(buf, 2)
MOVE_TO_XMM(buf, 3)
MOVE_TO_XMM(buf, 4)
MOVE_TO_XMM(buf, 5)
MOVE_TO_XMM(buf, 6)
MOVE_TO_XMM(buf, 7)
#ifdef X64
MOVE_TO_XMM(buf, 8)
MOVE_TO_XMM(buf, 9)
MOVE_TO_XMM(buf, 10)
MOVE_TO_XMM(buf, 11)
MOVE_TO_XMM(buf, 12)
MOVE_TO_XMM(buf, 13)
MOVE_TO_XMM(buf, 14)
MOVE_TO_XMM(buf, 15)
#endif

#if defined(__AVX__) || defined(__AVX512F__)
{
/* put known values in ymm regs */
# ifdef __AVX512F__
int buf[INTS_PER_ZMM * NUM_SIMD_AVX512_REGS];
# else
int buf[INTS_PER_YMM * NUM_SIMD_SSE_AVX_REGS];
# endif
char *ptr = (char *)buf;
int i, j;

/* put known values in xmm regs (we assume processor has xmm) */
# ifdef __AVX512F__
for (i = 0; i < NUM_SIMD_AVX512_REGS; i++) {
for (j = 0; j < INTS_PER_ZMM; j++)
buf[i * INTS_PER_ZMM + j] = 0xdeadbeef + i * INTS_PER_ZMM + j;
}
# else
for (i = 0; i < NUM_SIMD_SSE_AVX_REGS; i++) {
for (j = 0; j < INTS_PER_YMM; j++)
buf[i * INTS_PER_YMM + j] = 0xdeadbeef + i * INTS_PER_ZMM + j;
}
# endif
# ifdef __AVX512F__
# define MOVE_TO_ZMM(buf, num) \
__asm__ __volatile__("vmovdqu64 %0, %%zmm" #num \
: \
: "m"(buf[num * INTS_PER_ZMM]) \
: "zmm" #num);
MOVE_TO_ZMM(buf, 0)
MOVE_TO_ZMM(buf, 1)
MOVE_TO_ZMM(buf, 2)
MOVE_TO_ZMM(buf, 3)
MOVE_TO_ZMM(buf, 4)
MOVE_TO_ZMM(buf, 5)
MOVE_TO_ZMM(buf, 6)
MOVE_TO_ZMM(buf, 7)
# ifdef X64
MOVE_TO_ZMM(buf, 8)
MOVE_TO_ZMM(buf, 9)
MOVE_TO_ZMM(buf, 10)
MOVE_TO_ZMM(buf, 11)
MOVE_TO_ZMM(buf, 12)
MOVE_TO_ZMM(buf, 13)
MOVE_TO_ZMM(buf, 14)
MOVE_TO_ZMM(buf, 15)
MOVE_TO_ZMM(buf, 16)
MOVE_TO_ZMM(buf, 17)
MOVE_TO_ZMM(buf, 18)
MOVE_TO_ZMM(buf, 19)
MOVE_TO_ZMM(buf, 20)
MOVE_TO_ZMM(buf, 21)
MOVE_TO_ZMM(buf, 22)
MOVE_TO_ZMM(buf, 23)
MOVE_TO_ZMM(buf, 24)
MOVE_TO_ZMM(buf, 25)
MOVE_TO_ZMM(buf, 26)
MOVE_TO_ZMM(buf, 27)
MOVE_TO_ZMM(buf, 28)
MOVE_TO_ZMM(buf, 29)
MOVE_TO_ZMM(buf, 30)
MOVE_TO_ZMM(buf, 31)
# endif
/* Re-using INTS_PER_ZMM here to get same data patterns as above. */
# define MOVE_TO_OPMASK(buf, num) \
__asm__ __volatile__("kmovw %0, %%k" #num \
: \
: "m"(buf[num * INTS_PER_ZMM]) \
: "k" #num);
MOVE_TO_OPMASK(buf, 0)
MOVE_TO_OPMASK(buf, 1)
MOVE_TO_OPMASK(buf, 2)
MOVE_TO_OPMASK(buf, 3)
MOVE_TO_OPMASK(buf, 4)
MOVE_TO_OPMASK(buf, 5)
MOVE_TO_OPMASK(buf, 6)
MOVE_TO_OPMASK(buf, 7)
# else
# define MOVE_TO_YMM(buf, num) \
__asm__ __volatile__("vmovdqu %0, %%ymm" #num \
: \
: "m"(buf[num * INTS_PER_YMM]) \
: "ymm" #num);
MOVE_TO_YMM(buf, 0)
MOVE_TO_YMM(buf, 1)
MOVE_TO_YMM(buf, 2)
MOVE_TO_YMM(buf, 3)
MOVE_TO_YMM(buf, 4)
MOVE_TO_YMM(buf, 5)
MOVE_TO_YMM(buf, 6)
MOVE_TO_YMM(buf, 7)
# ifdef X64
MOVE_TO_YMM(buf, 8)
MOVE_TO_YMM(buf, 9)
MOVE_TO_YMM(buf, 10)
MOVE_TO_YMM(buf, 11)
MOVE_TO_YMM(buf, 12)
MOVE_TO_YMM(buf, 13)
MOVE_TO_YMM(buf, 14)
MOVE_TO_YMM(buf, 15)
ndrewh marked this conversation as resolved.
Show resolved Hide resolved
# endif
# endif

write(2, "before\n", 7);
ndrewh marked this conversation as resolved.
Show resolved Hide resolved

/* Sometime in this loop, we will synch with the other thread */
for (int i = 0; i < 100; i++) {
dummy2();
}

write(2, "after\n", 6);

/* Ensure they are preserved across the sigreturn (xref i#3812). */
# ifdef __AVX512F__
/* Use a new buffer to avoid the old values. We could do a custom memset
* with rep movs in asm instead (regular memset may clobber SIMD regs).
*/
int buf2[INTS_PER_ZMM * NUM_SIMD_AVX512_REGS];
# define MOVE_FROM_ZMM(buf, num) \
__asm__ __volatile__("vmovdqu64 %%zmm" #num ", %0" \
: "=m"(buf[num * INTS_PER_ZMM]) \
: \
: "zmm" #num);
MOVE_FROM_ZMM(buf2, 0)
MOVE_FROM_ZMM(buf2, 1)
MOVE_FROM_ZMM(buf2, 2)
MOVE_FROM_ZMM(buf2, 3)
MOVE_FROM_ZMM(buf2, 4)
MOVE_FROM_ZMM(buf2, 5)
MOVE_FROM_ZMM(buf2, 6)
MOVE_FROM_ZMM(buf2, 7)
# ifdef X64
MOVE_FROM_ZMM(buf2, 8)
MOVE_FROM_ZMM(buf2, 9)
MOVE_FROM_ZMM(buf2, 10)
MOVE_FROM_ZMM(buf2, 11)
MOVE_FROM_ZMM(buf2, 12)
MOVE_FROM_ZMM(buf2, 13)
MOVE_FROM_ZMM(buf2, 14)
MOVE_FROM_ZMM(buf2, 15)
MOVE_FROM_ZMM(buf2, 16)
MOVE_FROM_ZMM(buf2, 17)
MOVE_FROM_ZMM(buf2, 18)
MOVE_FROM_ZMM(buf2, 19)
MOVE_FROM_ZMM(buf2, 20)
MOVE_FROM_ZMM(buf2, 21)
MOVE_FROM_ZMM(buf2, 22)
MOVE_FROM_ZMM(buf2, 23)
MOVE_FROM_ZMM(buf2, 24)
MOVE_FROM_ZMM(buf2, 25)
MOVE_FROM_ZMM(buf2, 26)
MOVE_FROM_ZMM(buf2, 27)
MOVE_FROM_ZMM(buf2, 28)
MOVE_FROM_ZMM(buf2, 29)
MOVE_FROM_ZMM(buf2, 30)
MOVE_FROM_ZMM(buf2, 31)
# endif
for (i = 0; i < NUM_SIMD_AVX512_REGS; i++) {
for (j = 0; j < INTS_PER_ZMM; j++) {
if (buf2[i * INTS_PER_ZMM + j] != 0xdeadbeef + i * INTS_PER_ZMM + j) {
ndrewh marked this conversation as resolved.
Show resolved Hide resolved
write(2, "Assertion failed.\n", 19);
_exit(1);
}
}
}

/* Re-using INTS_PER_ZMM here to get same data patterns as above. */
int buf3[INTS_PER_ZMM * NUM_OPMASK_REGS];
# define MOVE_FROM_OPMASK(buf, num) \
__asm__ __volatile__("kmovw %%k" #num ", %0" \
: "=m"(buf[num * INTS_PER_ZMM]) \
: \
: "k" #num);
MOVE_FROM_OPMASK(buf3, 0)
MOVE_FROM_OPMASK(buf3, 1)
MOVE_FROM_OPMASK(buf3, 2)
MOVE_FROM_OPMASK(buf3, 3)
MOVE_FROM_OPMASK(buf3, 4)
MOVE_FROM_OPMASK(buf3, 5)
MOVE_FROM_OPMASK(buf3, 6)
MOVE_FROM_OPMASK(buf3, 7)
for (i = 0; i < NUM_OPMASK_REGS; i++) {
short bufval = (short)buf3[i * INTS_PER_ZMM];
short expect = (short)(0xdeadbeef + i * INTS_PER_ZMM);
assert(bufval == expect);
}
# else
int buf2[INTS_PER_YMM * NUM_SIMD_SSE_AVX_REGS];
# define MOVE_FROM_YMM(buf, num) \
__asm__ __volatile__("vmovdqu %%ymm" #num ", %0" \
: "=m"(buf[num * INTS_PER_YMM]) \
: \
: "ymm" #num);
MOVE_FROM_YMM(buf2, 0)
MOVE_FROM_YMM(buf2, 1)
MOVE_FROM_YMM(buf2, 2)
MOVE_FROM_YMM(buf2, 3)
MOVE_FROM_YMM(buf2, 4)
MOVE_FROM_YMM(buf2, 5)
MOVE_FROM_YMM(buf2, 6)
MOVE_FROM_YMM(buf2, 7)
# ifdef X64
MOVE_FROM_YMM(buf2, 8)
MOVE_FROM_YMM(buf2, 9)
MOVE_FROM_YMM(buf2, 10)
MOVE_FROM_YMM(buf2, 11)
MOVE_FROM_YMM(buf2, 12)
MOVE_FROM_YMM(buf2, 13)
MOVE_FROM_YMM(buf2, 14)
MOVE_FROM_YMM(buf2, 15)
# endif
for (i = 0; i < NUM_SIMD_SSE_AVX_REGS; i++) {
for (j = 0; j < INTS_PER_YMM; j++) {
if (buf2[i * INTS_PER_YMM + j] != 0xdeadbeef + i * INTS_PER_ZMM + j) {
write(2, "Assertion failed.\n", 19);
_exit(1);
}
}
}
# endif
}
#endif

write(2, "All done\n", 9);
return 0;
}
Loading
Loading