From ce3f97cd3c24f8e58e57f1a6112dffb5a5f6bf53 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Sat, 19 Aug 2023 21:36:52 -0400 Subject: [PATCH] Use rr-safe `nopl; rdtsc` sequence (#50975) When running under `rr`, it needs to patch out `rdtsc` to record the values returned. If this is not possible, `rr` falls back to an expensive signal-based emulation. As of rr master, a specific `nopl; rdtsc` sequence may be used to guarantee that `rdtsc` patching is always possible. Use this sequence for uses of rdtsc in our runtime. --- src/julia_internal.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/julia_internal.h b/src/julia_internal.h index a61951a53ec91..1a0373f0f9131 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -209,8 +209,17 @@ JL_DLLEXPORT void jl_unlock_profile_wr(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEA static inline uint64_t cycleclock(void) JL_NOTSAFEPOINT { #if defined(_CPU_X86_64_) + // This is nopl 0(%rax, %rax, 1), but assembler are incosistent about whether + // they emit that as a 4 or 5 byte sequence and we need to be guaranteed to use + // the 5 byte one. +#define NOP5_OVERRIDE_NOP ".byte 0x0f, 0x1f, 0x44, 0x00, 0x00\n\t" uint64_t low, high; - __asm__ volatile("rdtsc" : "=a"(low), "=d"(high)); + // This instruction sequence is promised by rr to be patchable. rr can usually + // also patch `rdtsc` in regular code, but without the preceeding nop, there could + // be an interfering branch into the middle of rr's patch region. Using this + // sequence prevents a massive rr-induced slowdown if the compiler happens to emit + // an unlucky pattern. See https://github.com/rr-debugger/rr/pull/3580. + __asm__ volatile(NOP5_OVERRIDE_NOP "rdtsc" : "=a"(low), "=d"(high)); return (high << 32) | low; #elif defined(_CPU_X86_) int64_t ret;