From 4dfe3d9aba2db52bb21618d27e2a1ec2e3f1cf09 Mon Sep 17 00:00:00 2001 From: Abhinav Anil Sharma Date: Mon, 26 Aug 2024 17:22:16 -0400 Subject: [PATCH] i#5505 kernel tracing: Add flag to configure PT buffer size (#6936) Adds a new drmemtrace command-line flag that allows setting an appropriate size for the buffer used to collect PT kernel trace data during online trace collection: -kernel_trace_buffer_size_shift. For apps with a high thread count, a large value may cause OOMs. At the same time, a too low value causes issues during trace decode. The new flag allows tuning it as needed. Verified that the PT related tests continue to work on a system that supports the Intel-PT hardware feature: ``` The following tests passed: code_api|tool.drcacheoff.kernel.simple_SUDO code_api|tool.drcacheoff.kernel.opcode-mix_SUDO code_api|tool.drcacheoff.kernel.syscall-mix_SUDO code_api|tool.drcacheoff.kernel.invariant-checker_SUDO ... The following tests passed: code_api|client.drpttracer_SUDO-test ``` Issue: #5505 --- clients/drcachesim/common/options.cpp | 8 ++++++++ clients/drcachesim/common/options.h | 1 + clients/drcachesim/drpt2trace/pt2ir.cpp | 5 +++++ clients/drcachesim/tracer/syscall_pt_trace.cpp | 14 ++++++++------ clients/drcachesim/tracer/syscall_pt_trace.h | 15 +++++++++++++-- clients/drcachesim/tracer/tracer.cpp | 3 ++- ext/drpttracer/drpttracer.c | 8 ++++---- 7 files changed, 41 insertions(+), 13 deletions(-) diff --git a/clients/drcachesim/common/options.cpp b/clients/drcachesim/common/options.cpp index 35f85e8537b..b7a6176b47f 100644 --- a/clients/drcachesim/common/options.cpp +++ b/clients/drcachesim/common/options.cpp @@ -860,6 +860,14 @@ droption_t op_skip_kcore_dump( "as normal. This may be useful if it is not feasible to run the application " "with superuser permissions and the user wants to use a different kcore " "dump, from a prior trace or created separately."); +droption_t op_kernel_trace_buffer_size_shift( + DROPTION_SCOPE_ALL, "kernel_trace_buffer_size_shift", 8, + "Size of the buffer used to collect kernel trace data.", + "When -enable_kernel_tracing is set, this is used to compute the size of the " + "buffer used to collect kernel trace data. The size is computed as " + "(1 << kernel_trace_buffer_size_shift) * page_size. Too large buffers can cause " + "OOMs on apps with many threads, whereas too small buffers can cause decoding " + "issues in raw2trace due to dropped trace data."); #endif // Core-oriented analysis. diff --git a/clients/drcachesim/common/options.h b/clients/drcachesim/common/options.h index b064a4e961a..5e17c248cc9 100644 --- a/clients/drcachesim/common/options.h +++ b/clients/drcachesim/common/options.h @@ -195,6 +195,7 @@ extern dynamorio::droption::droption_t op_enable_drstatecmp; #ifdef BUILD_PT_TRACER extern dynamorio::droption::droption_t op_enable_kernel_tracing; extern dynamorio::droption::droption_t op_skip_kcore_dump; +extern dynamorio::droption::droption_t op_kernel_trace_buffer_size_shift; #endif extern dynamorio::droption::droption_t op_core_sharded; extern dynamorio::droption::droption_t op_core_serial; diff --git a/clients/drcachesim/drpt2trace/pt2ir.cpp b/clients/drcachesim/drpt2trace/pt2ir.cpp index 13ea7221daf..c8e16c71b61 100644 --- a/clients/drcachesim/drpt2trace/pt2ir.cpp +++ b/clients/drcachesim/drpt2trace/pt2ir.cpp @@ -424,6 +424,11 @@ pt2ir_t::dx_decoding_error(DR_PARAM_IN int errcode, DR_PARAM_IN const char *errt VPRINT(0, "[" HEX64_FORMAT_STRING ", IP:" HEX64_FORMAT_STRING "] %s: %s\n", pos, ip, errtype, pt_errstr(pt_errcode(errcode))); } + if (errcode == -pte_no_enable) { + VPRINT(0, + "Consider increasing -kernel_trace_buffer_size_shift to avoid dropping PT " + "trace data."); + } } } // namespace drmemtrace diff --git a/clients/drcachesim/tracer/syscall_pt_trace.cpp b/clients/drcachesim/tracer/syscall_pt_trace.cpp index c22191d7009..24ebceb1f6a 100644 --- a/clients/drcachesim/tracer/syscall_pt_trace.cpp +++ b/clients/drcachesim/tracer/syscall_pt_trace.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2023 Google, Inc. All rights reserved. + * Copyright (c) 2023-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -58,7 +58,6 @@ namespace drmemtrace { #ifndef OUTFILE_SUFFIX_PT # define OUTFILE_SUFFIX_PT "raw.pt" #endif -#define RING_BUFFER_SIZE_SHIFT 8 syscall_pt_trace_t::syscall_pt_trace_t() : open_file_ex_func_(nullptr) @@ -72,6 +71,7 @@ syscall_pt_trace_t::syscall_pt_trace_t() , is_dumping_metadata_(false) , drcontext_(nullptr) , output_file_(INVALID_FILE) + , kernel_trace_buffer_size_shift_(0) { } @@ -87,7 +87,8 @@ bool syscall_pt_trace_t::init(void *drcontext, char *pt_dir_name, drmemtrace_open_file_ex_func_t open_file_ex_func, drmemtrace_write_file_func_t write_file_func, - drmemtrace_close_file_func_t close_file_func) + drmemtrace_close_file_func_t close_file_func, + int kernel_trace_buffer_size_shift) { if (is_initialized_) { ASSERT(false, "syscall_pt_trace_t is already initialized"); @@ -105,11 +106,11 @@ syscall_pt_trace_t::init(void *drcontext, char *pt_dir_name, // TODO i#5505: Pass the window-id to support windowed traces. output_file_ = open_file_ex_func_(output_file_name.c_str(), DR_FILE_WRITE_REQUIRE_NEW, dr_get_thread_id(drcontext_), 0); - + kernel_trace_buffer_size_shift_ = kernel_trace_buffer_size_shift; /* Create a buffer to store the data generated by drpttracer. For syscall traces, only * the PT data is dumped, and the sideband data is not included. */ - if (drpttracer_create_output(drcontext_, RING_BUFFER_SIZE_SHIFT, 0, + if (drpttracer_create_output(drcontext_, kernel_trace_buffer_size_shift_, 0, &pttracer_output_buffer_.data) != DRPTTRACER_SUCCESS) { return false; } @@ -125,7 +126,8 @@ syscall_pt_trace_t::start_syscall_pt_trace(DR_PARAM_IN int sysnum) ASSERT(drcontext_ != nullptr, "drcontext_ is nullptr"); if (drpttracer_create_handle(drcontext_, DRPTTRACER_TRACING_ONLY_KERNEL, - RING_BUFFER_SIZE_SHIFT, RING_BUFFER_SIZE_SHIFT, + kernel_trace_buffer_size_shift_, + kernel_trace_buffer_size_shift_, &pttracer_handle_.handle) != DRPTTRACER_SUCCESS) { return false; } diff --git a/clients/drcachesim/tracer/syscall_pt_trace.h b/clients/drcachesim/tracer/syscall_pt_trace.h index 430b0b0d5d8..d9af890d227 100644 --- a/clients/drcachesim/tracer/syscall_pt_trace.h +++ b/clients/drcachesim/tracer/syscall_pt_trace.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2023 Google, Inc. All rights reserved. + * Copyright (c) 2023-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -115,7 +115,8 @@ class syscall_pt_trace_t { init(void *drcontext, char *pt_dir_name, drmemtrace_open_file_ex_func_t open_file_ex_func, drmemtrace_write_file_func_t write_file_func, - drmemtrace_close_file_func_t close_file_func); + drmemtrace_close_file_func_t close_file_func, + int kernel_trace_buffer_size_shift); /* Start the PT tracing for current syscall and store the sysnum of the syscall. */ bool @@ -198,6 +199,16 @@ class syscall_pt_trace_t { * every syscall in the current thread. */ file_t output_file_; + + /* The ring buffer that stores the recorded PT data is assigned a size of + * (1 << kernel_trace_buffer_size_shift_) * page_size. + * For apps with a high thread count, this may cause us to exceed the available + * memory. But if it is configured too low, we may see errors of type + * READ_RING_BUFFER_ERROR_OLD_DATA_OVERWRITTEN or + * "get next instruction error: expected tracing enabled event" during PT trace + * decoding. + */ + int kernel_trace_buffer_size_shift_; }; } // namespace drmemtrace diff --git a/clients/drcachesim/tracer/tracer.cpp b/clients/drcachesim/tracer/tracer.cpp index 13964f6fb2f..7ccafbb8f16 100644 --- a/clients/drcachesim/tracer/tracer.cpp +++ b/clients/drcachesim/tracer/tracer.cpp @@ -1809,7 +1809,8 @@ init_thread_in_process(void *drcontext) return file_ops_func.call_open_file(fname, mode_flags, thread_id, window_id); }, - file_ops_func.write_file, file_ops_func.close_file)) { + file_ops_func.write_file, file_ops_func.close_file, + op_kernel_trace_buffer_size_shift.get_value())) { FATAL("Failed to init syscall_pt_trace_t for kernel raw files at %s\n", kernel_trace_logsubdir); } diff --git a/ext/drpttracer/drpttracer.c b/ext/drpttracer/drpttracer.c index a56b1b62b51..d957ce86997 100644 --- a/ext/drpttracer/drpttracer.c +++ b/ext/drpttracer/drpttracer.c @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2023 Google, Inc. All rights reserved. + * Copyright (c) 2023-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -164,9 +164,9 @@ read_ring_buf_to_buf(DR_PARAM_IN void *drcontext, DR_PARAM_IN uint8_t *ring_buf_ return READ_RING_BUFFER_ERROR_INVALID_PARAMETER; } if (data_size > ring_buf_size) { - ASSERT( - false, - "data size is larger than the ring buffer size, and old data is overwritten"); + ASSERT(false, + "Data size is larger than the ring buffer size, and old data is " + "overwritten. Consider increasing -kernel_trace_buffer_size_shift."); return READ_RING_BUFFER_ERROR_OLD_DATA_OVERWRITTEN; }