diff --git a/clients/drcachesim/tracer/instru.h b/clients/drcachesim/tracer/instru.h index c7137d4820b..d348ed21f07 100644 --- a/clients/drcachesim/tracer/instru.h +++ b/clients/drcachesim/tracer/instru.h @@ -38,6 +38,7 @@ #include #include +#include #include "dr_api.h" #include "drvector.h" #include "trace_entry.h" @@ -179,6 +180,7 @@ class instru_t { , disable_optimizations_(disable_opts) , instr_size_(instruction_size) { + frozen_timestamp_.store(0, std::memory_order_release); } virtual ~instru_t() { @@ -221,10 +223,15 @@ class instru_t { // This is a per-buffer-writeout header. virtual int append_unit_header(byte *buf_ptr, thread_id_t tid, ptr_int_t window) = 0; + // If the entry at buf_ptr is not a timestamp, returns false. + // Else, if the timestamp value is < min_timestamp, replaces it with a + // new timestamp (ignoring the frozen timestamp). Returns true either way. + virtual bool + refresh_unit_header_timestamp(byte *buf_ptr, uint64 min_timestamp) = 0; virtual void set_frozen_timestamp(uint64 timestamp) { - frozen_timestamp_ = timestamp; + frozen_timestamp_.store(timestamp, std::memory_order_release); } // These insert inlined code to add an entry into the trace buffer. @@ -292,8 +299,8 @@ class instru_t { drvector_t *reg_vector_; bool disable_optimizations_; // Stores a timestamp to use for all future unit headers. This is meant for - // avoiding time gaps for max-limit scenarios (i#5021). - uint64 frozen_timestamp_ = 0; + // avoiding time gaps for -align_endpoints or max-limit scenarios (i#5021). + std::atomic frozen_timestamp_; private: instru_t() @@ -339,6 +346,8 @@ class online_instru_t : public instru_t { append_thread_header(byte *buf_ptr, thread_id_t tid, offline_file_type_t file_type); int append_unit_header(byte *buf_ptr, thread_id_t tid, ptr_int_t window) override; + bool + refresh_unit_header_timestamp(byte *buf_ptr, uint64 min_timestamp) override; int instrument_memref(void *drcontext, void *bb_field, instrlist_t *ilist, instr_t *where, @@ -420,6 +429,8 @@ class offline_instru_t : public instru_t { append_thread_header(byte *buf_ptr, thread_id_t tid, offline_file_type_t file_type); int append_unit_header(byte *buf_ptr, thread_id_t tid, ptr_int_t window) override; + bool + refresh_unit_header_timestamp(byte *buf_ptr, uint64 min_timestamp) override; int instrument_memref(void *drcontext, void *bb_field, instrlist_t *ilist, instr_t *where, diff --git a/clients/drcachesim/tracer/instru_offline.cpp b/clients/drcachesim/tracer/instru_offline.cpp index d08fa8035b7..3673fe02478 100644 --- a/clients/drcachesim/tracer/instru_offline.cpp +++ b/clients/drcachesim/tracer/instru_offline.cpp @@ -397,8 +397,8 @@ offline_instru_t::append_unit_header(byte *buf_ptr, thread_id_t tid, ptr_int_t w byte *new_buf = buf_ptr; offline_entry_t *entry = (offline_entry_t *)new_buf; entry->timestamp.type = OFFLINE_TYPE_TIMESTAMP; - entry->timestamp.usec = - frozen_timestamp_ != 0 ? frozen_timestamp_ : instru_t::get_timestamp(); + uint64 frozen = frozen_timestamp_.load(std::memory_order_acquire); + entry->timestamp.usec = frozen != 0 ? frozen : instru_t::get_timestamp(); new_buf += sizeof(*entry); if (window >= 0) new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_WINDOW_ID, (uintptr_t)window); @@ -406,6 +406,17 @@ offline_instru_t::append_unit_header(byte *buf_ptr, thread_id_t tid, ptr_int_t w return (int)(new_buf - buf_ptr); } +bool +offline_instru_t::refresh_unit_header_timestamp(byte *buf_ptr, uint64 min_timestamp) +{ + offline_entry_t *stamp = reinterpret_cast(buf_ptr); + if (stamp->timestamp.type != OFFLINE_TYPE_TIMESTAMP) + return false; + if (stamp->timestamp.usec < min_timestamp) + stamp->timestamp.usec = instru_t::get_timestamp(); + return true; +} + int offline_instru_t::insert_save_entry(void *drcontext, instrlist_t *ilist, instr_t *where, reg_id_t reg_ptr, reg_id_t scratch, int adjust, diff --git a/clients/drcachesim/tracer/instru_online.cpp b/clients/drcachesim/tracer/instru_online.cpp index a9f8351c26b..2119e691ab6 100644 --- a/clients/drcachesim/tracer/instru_online.cpp +++ b/clients/drcachesim/tracer/instru_online.cpp @@ -185,17 +185,28 @@ online_instru_t::append_unit_header(byte *buf_ptr, thread_id_t tid, intptr_t win { byte *new_buf = buf_ptr; new_buf += append_tid(new_buf, tid); - new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_TIMESTAMP, - // Truncated to 32 bits for 32-bit: we live with it. - static_cast(frozen_timestamp_ != 0 - ? frozen_timestamp_ - : instru_t::get_timestamp())); + uint64 frozen = frozen_timestamp_.load(std::memory_order_acquire); + new_buf += append_marker( + new_buf, TRACE_MARKER_TYPE_TIMESTAMP, + // Truncated to 32 bits for 32-bit: we live with it. + static_cast(frozen != 0 ? frozen : instru_t::get_timestamp())); if (window >= 0) new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_WINDOW_ID, (uintptr_t)window); new_buf += append_marker(new_buf, TRACE_MARKER_TYPE_CPU_ID, instru_t::get_cpu_id()); return (int)(new_buf - buf_ptr); } +bool +online_instru_t::refresh_unit_header_timestamp(byte *buf_ptr, uint64 min_timestamp) +{ + trace_entry_t *stamp = reinterpret_cast(buf_ptr); + if (stamp->type != TRACE_TYPE_MARKER || stamp->size != TRACE_MARKER_TYPE_TIMESTAMP) + return false; + if (stamp->addr < min_timestamp) + stamp->addr = static_cast(instru_t::get_timestamp()); + return true; +} + void online_instru_t::insert_save_immed(void *drcontext, instrlist_t *ilist, instr_t *where, reg_id_t base, reg_id_t scratch, ptr_int_t immed, diff --git a/clients/drcachesim/tracer/output.cpp b/clients/drcachesim/tracer/output.cpp index 911351dd649..857e65cf059 100644 --- a/clients/drcachesim/tracer/output.cpp +++ b/clients/drcachesim/tracer/output.cpp @@ -445,15 +445,21 @@ open_new_thread_file(void *drcontext, ptr_int_t window_num) return opened_new_file; } +/* Returns the size of just the thread header, but appends both the thread header + * and the buffer header. + */ static size_t -prepend_offline_thread_header(void *drcontext) +prepend_offline_thread_header(void *drcontext, ptr_int_t window = -1) { DR_ASSERT(op_offline.get_value()); /* Write initial headers at the top of the first buffer. */ per_thread_t *data = (per_thread_t *)drmgr_get_tls_field(drcontext, tls_idx); size_t size = reinterpret_cast(instru)->append_thread_header( data->buf_base, dr_get_thread_id(drcontext), get_file_type()); - BUF_PTR(data->seg_base) = data->buf_base + size + buf_hdr_slots_size; + BUF_PTR(data->seg_base) = data->buf_base + size; + BUF_PTR(data->seg_base) += append_unit_header( + drcontext, BUF_PTR(data->seg_base), dr_get_thread_id(drcontext), + window == -1 ? get_local_window(data) : window); data->has_thread_header = true; return size; } @@ -581,7 +587,7 @@ set_local_window(void *drcontext, ptr_int_t value) } if ((value > 0 && op_split_windows.get_value()) || data->init_header_size == 0) { - size_t header_size = prepend_offline_thread_header(drcontext); + size_t header_size = prepend_offline_thread_header(drcontext, value); if (data->init_header_size == 0) data->init_header_size = header_size; else @@ -674,22 +680,6 @@ is_ok_to_split_before(trace_type_t type) op_L0I_filter.get_value(); } -static size_t -add_buffer_header(void *drcontext, per_thread_t *data, byte *buf_base) -{ - size_t header_size = 0; - // For online we already wrote the thread header but for offline it is in - // the first buffer, so skip over it. - if (data->has_thread_header && op_offline.get_value()) - header_size = data->init_header_size; - data->has_thread_header = false; - // The initial slots are left empty for the header, which we add here. - header_size += - append_unit_header(drcontext, buf_base + header_size, dr_get_thread_id(drcontext), - get_local_window(data)); - return header_size; -} - static uint output_buffer(void *drcontext, per_thread_t *data, byte *buf_base, byte *buf_ptr, size_t header_size) @@ -811,9 +801,12 @@ process_entry_for_physaddr(void *drcontext, per_thread_t *data, size_t header_si ASSERT(size == data->init_header_size, "inconsistent header"); *skip = data->init_header_size; v2p_ptr += size; - header_size += size; } - v2p_ptr += add_buffer_header(drcontext, data, v2p_ptr); + // With timestamps at buffer start, we want to use the same timestamp+cpu + // to avoid out-of-order times. + memcpy(v2p_ptr, data->buf_base + header_size - buf_hdr_slots_size, + buf_hdr_slots_size); + v2p_ptr += buf_hdr_slots_size; *emitted = true; } if (v2p_ptr + 2 * instru->sizeof_entry() - data->v2p_buf >= @@ -822,7 +815,9 @@ process_entry_for_physaddr(void *drcontext, per_thread_t *data, size_t header_si data->num_phys_markers += output_buffer(drcontext, data, data->v2p_buf, v2p_ptr, header_size); v2p_ptr = data->v2p_buf; - v2p_ptr += add_buffer_header(drcontext, data, v2p_ptr); + memcpy(v2p_ptr, data->buf_base + header_size - buf_hdr_slots_size, + buf_hdr_slots_size); + v2p_ptr += buf_hdr_slots_size; } if (success) { v2p_ptr += @@ -906,7 +901,6 @@ process_and_output_buffer(void *drcontext, bool skip_size_cap) byte *mem_ref, *buf_ptr; byte *redzone; bool do_write = true; - size_t header_size = 0; uint current_num_refs = 0; if (op_offline.get_value() && data->file == INVALID_FILE) { @@ -916,10 +910,32 @@ process_and_output_buffer(void *drcontext, bool skip_size_cap) open_new_thread_file(drcontext, get_local_window(data)); } + size_t header_size = buf_hdr_slots_size; + // For online we already wrote the thread header but for offline it is in + // the first buffer. + if (data->has_thread_header && op_offline.get_value()) + header_size += data->init_header_size; + data->has_thread_header = false; + + if (align_attach_detach_endpoints()) { + // This is the attach counterpart to instru_t::set_frozen_timestamp(): we place + // timestamps at buffer creation, but that can be before we're fully attached. + // We update any too-early timestamps to reflect when we actually started + // tracing. (Switching back to timestamps at buffer output is actually + // worse as we then have the identical frozen timestamp for all the flushes + // during detach, plus they are all on the same cpu too.) + size_t stamp_offs = + header_size > buf_hdr_slots_size ? header_size - buf_hdr_slots_size : 0; + uint64 min_timestamp = attached_timestamp.load(std::memory_order_acquire); + bool found = instru->refresh_unit_header_timestamp(data->buf_base + stamp_offs, + min_timestamp); + DR_ASSERT(found); + } + buf_ptr = BUF_PTR(data->seg_base); // We may get called with nothing to write: e.g., on a syscall for // -L0I_filter and -L0D_filter. - if (buf_ptr == data->buf_base + header_size + buf_hdr_slots_size) { + if (buf_ptr == data->buf_base + header_size) { if (has_tracing_windows()) { // If there is no data to write, we do not emit an empty header here // unless this thread is exiting (set_local_window will also write out @@ -930,8 +946,6 @@ process_and_output_buffer(void *drcontext, bool skip_size_cap) return; } - header_size = add_buffer_header(drcontext, data, data->buf_base); - bool window_changed = false; if (has_tracing_windows() && get_local_window(data) != tracing_window.load(std::memory_order_acquire)) { @@ -1021,7 +1035,6 @@ process_and_output_buffer(void *drcontext, bool skip_size_cap) memset(redzone, -1, buf_ptr - redzone); } } - BUF_PTR(data->seg_base) = data->buf_base + buf_hdr_slots_size; num_refs_racy += current_num_refs; if (op_exit_after_tracing.get_value() > 0 && num_refs_racy > op_exit_after_tracing.get_value()) { @@ -1038,9 +1051,14 @@ process_and_output_buffer(void *drcontext, bool skip_size_cap) } dr_mutex_unlock(mutex); } + ptr_int_t window = -1; if (has_tracing_windows()) { - set_local_window(drcontext, tracing_window.load(std::memory_order_acquire)); + window = tracing_window.load(std::memory_order_acquire); + set_local_window(drcontext, window); } + BUF_PTR(data->seg_base) = data->buf_base + + append_unit_header(drcontext, data->buf_base, dr_get_thread_id(drcontext), + window); } void @@ -1106,8 +1124,10 @@ init_thread_io(void *drcontext) write_trace_data(drcontext, (byte *)buf, proc_info, get_local_window(data)); /* put buf_base to TLS plus header slots as starting buf_ptr */ - data->init_header_size = buf_hdr_slots_size; - BUF_PTR(data->seg_base) = data->buf_base + buf_hdr_slots_size; + data->init_header_size = + append_unit_header(drcontext, data->buf_base, dr_get_thread_id(drcontext), + get_local_window(data)); + BUF_PTR(data->seg_base) = data->buf_base + data->init_header_size; } } diff --git a/clients/drcachesim/tracer/tracer.cpp b/clients/drcachesim/tracer/tracer.cpp index 6047630f184..4a76e0b8ccc 100644 --- a/clients/drcachesim/tracer/tracer.cpp +++ b/clients/drcachesim/tracer/tracer.cpp @@ -113,6 +113,8 @@ size_t trace_buf_size; size_t redzone_size; size_t max_buf_size; +std::atomic attached_timestamp; + static drvector_t scratch_reserve_vec; /* per bb user data during instrumentation */ @@ -413,6 +415,7 @@ event_post_attach() DR_ASSERT(attached_midway); if (!align_attach_detach_endpoints()) return; + attached_timestamp.store(instru_t::get_timestamp(), std::memory_order_release); if (op_trace_after_instrs.get_value() != 0) { NOTIFY(1, "Switching to counting mode after attach\n"); tracing_mode.store(BBDUP_MODE_COUNT, std::memory_order_release); @@ -427,6 +430,12 @@ event_pre_detach() { if (align_attach_detach_endpoints()) { NOTIFY(1, "Switching to no-tracing mode during detach\n"); + // Keep all final thread output at the detach timestamp. We won't add to any + // buffers during detach and we now have timestamps added at buffer start + // instead of output but a non-omitted thread that is at a wait syscall + // and so has an empty buffer would have its thread exit with a post-detach + // timestamp without this freeze. + instru->set_frozen_timestamp(instru_t::get_timestamp()); tracing_mode.store(BBDUP_MODE_NOP, std::memory_order_release); } } diff --git a/clients/drcachesim/tracer/tracer.h b/clients/drcachesim/tracer/tracer.h index 05d2e0c5ef3..0be077d3e99 100644 --- a/clients/drcachesim/tracer/tracer.h +++ b/clients/drcachesim/tracer/tracer.h @@ -153,6 +153,7 @@ is_first_nonlabel(void *drcontext, instr_t *instr); extern std::atomic tracing_mode; extern std::atomic tracing_window; extern bool attached_midway; +extern std::atomic attached_timestamp; /* We have multiple modes. While just 2 results in a more efficient dispatch, * the power of extra modes justifies the extra overhead.