From 230c669bb2164084f5507d948e964c07844d4102 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Mon, 18 Jul 2022 13:45:06 +0200 Subject: [PATCH] tsan: attempt to reproduce spurious ReportRace contention For context see: https://github.com/google/sanitizers/issues/1552 --- compiler-rt/lib/tsan/rtl/tsan_rtl.cpp | 9 ++- compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp | 13 +--- compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp | 37 +++++++-- compiler-rt/lib/tsan/rtl/tsan_shadow.h | 9 +++ compiler-rt/test/tsan/bench.h | 18 +++-- compiler-rt/test/tsan/bench_spurious_race.cpp | 75 +++++++++++++++++++ 6 files changed, 133 insertions(+), 28 deletions(-) create mode 100644 compiler-rt/test/tsan/bench_spurious_race.cpp diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp index 825a9d791ecc42a..b9231f991b80f47 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp @@ -167,9 +167,9 @@ static void DoResetImpl(uptr epoch) { // so this is only best-effort. The thread can only modify position // within this part, because switching parts is protected by // slot/trace mutexes that we hold here. - atomic_store_relaxed( - &tctx->thr->trace_pos, - reinterpret_cast(&part->events[TracePart::kSize])); + //atomic_store_relaxed( + // &tctx->thr->trace_pos, + // reinterpret_cast(&part->events[TracePart::kSize])); break; } parts->Remove(part); @@ -221,7 +221,7 @@ void DoReset(ThreadState* thr, uptr epoch) SANITIZER_NO_THREAD_SAFETY_ANALYSIS { return; } } - DPrintf("#%d: DoReset epoch=%lu\n", thr ? thr->tid : -1, epoch); + Printf("#%d: DoReset epoch=%lu\n", thr ? thr->tid : -1, epoch); DoResetImpl(epoch); for (auto& slot : ctx->slots) slot.mtx.Unlock(); } @@ -294,6 +294,7 @@ void SlotAttachAndLock(ThreadState* thr) { } thr->clock.Set(slot->sid, epoch); slot->journal.PushBack({thr->tid, epoch}); + //!!! do we need to TraceTime here? } static void SlotDetachImpl(ThreadState* thr, bool exiting) { diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp index 7d771bfaad7f713..1eca5598ac185ed 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp @@ -145,15 +145,6 @@ void TraceTime(ThreadState* thr) { TraceEvent(thr, ev); } -ALWAYS_INLINE RawShadow LoadShadow(RawShadow* p) { - return static_cast( - atomic_load((atomic_uint32_t*)p, memory_order_relaxed)); -} - -ALWAYS_INLINE void StoreShadow(RawShadow* sp, RawShadow s) { - atomic_store((atomic_uint32_t*)sp, static_cast(s), memory_order_relaxed); -} - NOINLINE void DoReportRace(ThreadState* thr, RawShadow* shadow_mem, Shadow cur, Shadow old, AccessType typ) SANITIZER_NO_THREAD_SAFETY_ANALYSIS { @@ -170,11 +161,13 @@ NOINLINE void DoReportRace(ThreadState* thr, RawShadow* shadow_mem, Shadow cur, // the slot locked because of the fork. But MemoryRangeFreed is not // called during fork because fork sets ignore_reads_and_writes, // so simply unlocking the slot should be fine. + //u64 start = NanoTime(); if (typ & kAccessSlotLocked) SlotUnlock(thr); ReportRace(thr, shadow_mem, cur, Shadow(old), typ); if (typ & kAccessSlotLocked) SlotLock(thr); + //Printf("#%03d: ReportRace %p took %llu us\n", thr->tid, (void *)ShadowToMem(shadow_mem), (NanoTime() - start) / 1000); } #if !TSAN_VECTORIZE @@ -357,6 +350,8 @@ STORE : { if (UNLIKELY(index == 0)) index = (atomic_load_relaxed(&thr->trace_pos) / 2) % 16; } + if (1 && !(typ & kAccessAtomic) && !(typ & kAccessRead) && ((atomic_load_relaxed(&thr->trace_pos) / 8) % 47) == 0) + internal_usleep(5); StoreShadow(&shadow_mem[index / 4], cur.raw()); // We could zero other slots determined by rewrite_mask. // That would help other threads to evict better slots, diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp index 4cf8816489df442..8a987fa3f2bbd11 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp @@ -496,6 +496,8 @@ bool RestoreStack(EventType type, Sid sid, Epoch epoch, uptr addr, uptr size, } bool match = ev_sid == sid && ev_epoch == epoch; if (evp->is_access) { + if (!match) + return; if (evp->is_func == 0 && evp->type == EventType::kAccessExt && evp->_ == 0) // NopEvent return; @@ -507,7 +509,7 @@ bool RestoreStack(EventType type, Sid sid, Epoch epoch, uptr addr, uptr size, prev_pc = ev_pc; DPrintf2(" Access: pc=0x%zx addr=0x%zx/%zu type=%u/%u\n", ev_pc, ev_addr, ev_size, ev->is_read, ev->is_atomic); - if (match && type == EventType::kAccessExt && + if (type == EventType::kAccessExt && IsWithinAccess(addr, size, ev_addr, ev_size) && is_read == ev->is_read && is_atomic == ev->is_atomic && !is_free) RestoreStackMatch(pstk, pmset, &stack, mset, ev_pc, &found); @@ -530,13 +532,15 @@ bool RestoreStack(EventType type, Sid sid, Epoch epoch, uptr addr, uptr size, } switch (evp->type) { case EventType::kAccessExt: { + if (!match) + return; auto *ev = reinterpret_cast(evp); uptr ev_addr = RestoreAddr(ev->addr); uptr ev_size = 1 << ev->size_log; prev_pc = ev->pc; DPrintf2(" AccessExt: pc=0x%llx addr=0x%zx/%zu type=%u/%u\n", ev->pc, ev_addr, ev_size, ev->is_read, ev->is_atomic); - if (match && type == EventType::kAccessExt && + if (type == EventType::kAccessExt && IsWithinAccess(addr, size, ev_addr, ev_size) && is_read == ev->is_read && is_atomic == ev->is_atomic && !is_free) @@ -544,6 +548,8 @@ bool RestoreStack(EventType type, Sid sid, Epoch epoch, uptr addr, uptr size, break; } case EventType::kAccessRange: { + if (!match) + return; auto *ev = reinterpret_cast(evp); uptr ev_addr = RestoreAddr(ev->addr); uptr ev_size = @@ -552,7 +558,7 @@ bool RestoreStack(EventType type, Sid sid, Epoch epoch, uptr addr, uptr size, prev_pc = ev_pc; DPrintf2(" Range: pc=0x%zx addr=0x%zx/%zu type=%u/%u\n", ev_pc, ev_addr, ev_size, ev->is_read, ev->is_free); - if (match && type == EventType::kAccessExt && + if (type == EventType::kAccessExt && IsWithinAccess(addr, size, ev_addr, ev_size) && is_read == ev->is_read && !is_atomic && is_free == ev->is_free) RestoreStackMatch(pstk, pmset, &stack, mset, ev_pc, &found); @@ -699,6 +705,7 @@ bool OutputReport(ThreadState *thr, const ScopedReport &srep) { } bool IsFiredSuppression(Context *ctx, ReportType type, StackTrace trace) { +/* ReadLock lock(&ctx->fired_suppressions_mtx); for (uptr k = 0; k < ctx->fired_suppressions.size(); k++) { if (ctx->fired_suppressions[k].type != type) @@ -712,10 +719,12 @@ bool IsFiredSuppression(Context *ctx, ReportType type, StackTrace trace) { } } } + */ return false; } static bool IsFiredSuppression(Context *ctx, ReportType type, uptr addr) { +/* ReadLock lock(&ctx->fired_suppressions_mtx); for (uptr k = 0; k < ctx->fired_suppressions.size(); k++) { if (ctx->fired_suppressions[k].type != type) @@ -727,6 +736,7 @@ static bool IsFiredSuppression(Context *ctx, ReportType type, uptr addr) { return true; } } + */ return false; } @@ -739,7 +749,6 @@ void ReportRace(ThreadState *thr, RawShadow *shadow_mem, Shadow cur, Shadow old, ScopedIgnoreInterceptors ignore; uptr addr = ShadowToMem(shadow_mem); - DPrintf("#%d: ReportRace %p\n", thr->tid, (void *)addr); if (!ShouldReport(thr, ReportTypeRace)) return; uptr addr_off0, size0; @@ -753,14 +762,22 @@ void ReportRace(ThreadState *thr, RawShadow *shadow_mem, Shadow cur, Shadow old, const uptr kMop = 2; Shadow s[kMop] = {cur, old}; + + //Printf("#%d: ReportRace %p sid=%u epoch=%u\n", thr->tid, (void *)addr, (u32)s[1].sid(), (u32)s[1].epoch()); + static Sid bad_sid; + static Epoch bad_epoch; + if (s[1].sid() == bad_sid && s[1].epoch() == bad_epoch) + return; + + uptr addr0 = addr + addr_off0; uptr addr1 = addr + addr_off1; uptr end0 = addr0 + size0; uptr end1 = addr1 + size1; uptr addr_min = min(addr0, addr1); uptr addr_max = max(end0, end1); - if (IsExpectedReport(addr_min, addr_max - addr_min)) - return; + //if (IsExpectedReport(addr_min, addr_max - addr_min)) + // return; if (HandleRacyAddress(thr, addr_min, addr_max)) return; @@ -791,9 +808,15 @@ void ReportRace(ThreadState *thr, RawShadow *shadow_mem, Shadow cur, Shadow old, Lock slot_lock(&ctx->slots[static_cast(s[1].sid())].mtx); ThreadRegistryLock l0(&ctx->thread_registry); Lock slots_lock(&ctx->slot_mtx); + + //if (LoadShadow(shadow_mem) == Shadow::kRodata) + // return; if (!RestoreStack(EventType::kAccessExt, s[1].sid(), s[1].epoch(), addr1, - size1, typ1, &tids[1], &traces[1], mset[1], &tags[1])) + size1, typ1, &tids[1], &traces[1], mset[1], &tags[1])) { + bad_sid = s[1].sid(); + bad_epoch = s[1].epoch(); return; + } if (IsFiredSuppression(ctx, rep_typ, traces[1])) return; diff --git a/compiler-rt/lib/tsan/rtl/tsan_shadow.h b/compiler-rt/lib/tsan/rtl/tsan_shadow.h index b222acf9e6c5180..f5dda139a5ae1a9 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_shadow.h +++ b/compiler-rt/lib/tsan/rtl/tsan_shadow.h @@ -178,6 +178,15 @@ class Shadow { static_assert(sizeof(Shadow) == kShadowSize, "bad Shadow size"); +ALWAYS_INLINE RawShadow LoadShadow(RawShadow* p) { + return static_cast( + atomic_load((atomic_uint32_t*)p, memory_order_relaxed)); +} + +ALWAYS_INLINE void StoreShadow(RawShadow* sp, RawShadow s) { + atomic_store((atomic_uint32_t*)sp, static_cast(s), memory_order_relaxed); +} + } // namespace __tsan #endif diff --git a/compiler-rt/test/tsan/bench.h b/compiler-rt/test/tsan/bench.h index 58a0a78d4598efe..2497ce4a0c5f351 100644 --- a/compiler-rt/test/tsan/bench.h +++ b/compiler-rt/test/tsan/bench.h @@ -1,5 +1,6 @@ #include "test.h" #include +#include int bench_nthread; int bench_niter; @@ -8,6 +9,12 @@ int bench_mode; void bench(); // defined by user void start_thread_group(int nth, void(*f)(int tid)); +uint64_t nanotime() { + timespec tp; + clock_gettime(CLOCK_MONOTONIC, &tp); + return tp.tv_sec * 1000000000ULL + tp.tv_nsec; +} + int main(int argc, char **argv) { bench_nthread = 2; if (argc > 1) @@ -18,15 +25,10 @@ int main(int argc, char **argv) { if (argc > 3) bench_mode = atoi(argv[3]); - timespec tp0; - clock_gettime(CLOCK_MONOTONIC, &tp0); + uint64_t start = nanotime(); bench(); - timespec tp1; - clock_gettime(CLOCK_MONOTONIC, &tp1); - unsigned long long t = - (tp1.tv_sec * 1000000000ULL + tp1.tv_nsec) - - (tp0.tv_sec * 1000000000ULL + tp0.tv_nsec); - fprintf(stderr, "%llu ns/iter\n", t / bench_niter); + uint64_t t = nanotime() - start; + fprintf(stderr, "%lu ns/iter\n", t / bench_niter); fprintf(stderr, "DONE\n"); } diff --git a/compiler-rt/test/tsan/bench_spurious_race.cpp b/compiler-rt/test/tsan/bench_spurious_race.cpp new file mode 100644 index 000000000000000..e0b18efff6202fa --- /dev/null +++ b/compiler-rt/test/tsan/bench_spurious_race.cpp @@ -0,0 +1,75 @@ +// RUN: %clangxx_tsan %s -o %t +// RUN: %run %t 2>&1 | FileCheck %s + +// bench.h needs pthread barriers which are not available on OS X +// UNSUPPORTED: darwin + +#include "bench.h" +#include +#include + +struct Block { + volatile long data[32 << 10]; +}; + +Block* queue; +std::atomic step; +std::atomic ref; +std::atomic fake; +std::atomic reset[1000]; +int histogram[64]; + +void thread(int tid) { + if (tid == 0) { + uint64_t start = nanotime(); + for (int i = 0; i < bench_niter; i++) { + Block* b = new Block; + fake++; + for (auto& data : b->data) + data = 1; + while (i != 0 && ref != bench_nthread / 2) + pthread_yield(); + Block* prev = queue; + queue = b; + ref = 0; + step++; + delete prev; + uint64_t now = nanotime(); + uint64_t time_ms = (now - start) / 1000000; + start = now; + int idx = 0; + for (;time_ms; time_ms /= 2, idx++) {} + histogram[idx]++; + } + int first = 64, last = 64; + for (int i = 0; i < 64; i++) { + if (!histogram[i]) + continue; + if (first == 64) + first = i; + last = i + 1; + } + for (uint64_t ms = 1; first < last; first++, ms *= 2) + printf("<%-6lums: %d\n", ms, histogram[first]); + } else if (tid % 2) { + for (int i = 0; i < bench_niter; i++) { + while (step != i + 1) + usleep(10); + Block* b = queue; + for (auto data : b->data) { + if (data != 1) + exit(1); + } + ref++; + } + } else { + while (step < bench_niter) + reset[(tid / 2) % (sizeof(reset)/sizeof(reset[0]))]++; + } +} + +void bench() { + start_thread_group(bench_nthread, thread); +} + +// CHECK: DONE