Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rmccormick shmleak debug #306

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 18 additions & 12 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -95,20 +95,18 @@ FetchContent_Declare(
)
FetchContent_MakeAvailable(dlpack)

set(BOOST_ENABLE_CMAKE ON)
set(BOOST_INCLUDE_LIBRARIES stacktrace)
#
# Boost
#
ExternalProject_Add(
boostorg
URL https://boostorg.jfrog.io/artifactory/main/release/1.79.0/source/boost_1_79_0.tar.gz
URL_HASH SHA256=273f1be93238a068aba4f9735a4a2b003019af067b9c183ed227780b8f36062c
PREFIX "boost-src"
CONFIGURE_COMMAND ${CMAKE_COMMAND} -E copy_directory
<SOURCE_DIR>/boost/ ${CMAKE_BINARY_DIR}/boost
INSTALL_COMMAND ""
BUILD_COMMAND ""
FetchContent_Declare(
Boost
GIT_REPOSITORY https://github.com/boostorg/boost.git
GIT_TAG boost-1.81.0
GIT_SHALLOW ON
)
set(boostorg_INCLUDE_DIRS "${CMAKE_BINARY_DIR}/boost/")
FetchContent_MakeAvailable(Boost)

#
# CUDA
Expand All @@ -125,6 +123,10 @@ if(${TRITON_ENABLE_NVTX})
add_definitions(-DTRITON_ENABLE_NVTX=1)
endif() # TRITON_ENABLE_NVTX

add_definitions(-DBOOST_STACKTRACE_USE_ADDR2LINE=1)
add_definitions(-DBOOST_STACKTRACE_USE_BACKTRACE=1)


find_package(ZLIB REQUIRED)
find_package(Threads REQUIRED)

Expand Down Expand Up @@ -220,8 +222,6 @@ add_executable(
${PYTHON_BACKEND_STUB_SRCS}
)

add_dependencies(triton-python-backend boostorg)
add_dependencies(triton-python-backend-stub boostorg)

set_property(TARGET triton-python-backend-stub PROPERTY OUTPUT_NAME triton_python_backend_stub)

Expand Down Expand Up @@ -255,6 +255,9 @@ target_link_libraries(
triton-core-serverstub # from repo-core
ZLIB::ZLIB
-larchive
-ldl
Boost::stacktrace_backtrace
Boost::stacktrace_addr2line
)

target_link_libraries(
Expand All @@ -267,6 +270,9 @@ target_link_libraries(
pybind11::embed
-lrt # shared memory
-larchive # libarchive
-ldl
Boost::stacktrace_backtrace
Boost::stacktrace_addr2line
)

set_target_properties(
Expand Down
3 changes: 2 additions & 1 deletion src/gpu_buffers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ GPUBuffersHelper::Complete(std::unique_ptr<SharedMemoryManager>& shm_pool)
"Complete has already been called. Complete should only be called "
"once.");
}
gpu_buffers_shm_ = shm_pool->Construct<GPUBuffersShm>();
gpu_buffers_shm_ = shm_pool->Construct<GPUBuffersShm>(
1 /* count */, false /* aligned */, "[GpuBufferShm]");
if (!error_shm_) {
buffers_handle_shm_ =
shm_pool->Construct<bi::managed_external_buffer::handle_t>(
Expand Down
25 changes: 14 additions & 11 deletions src/infer_request.cc
Original file line number Diff line number Diff line change
Expand Up @@ -178,12 +178,13 @@ InferRequest::SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool)
{
AllocatedSharedMemory<char> infer_request_shm = shm_pool->Construct<char>(
sizeof(InferRequestShm) +
(RequestedOutputNames().size() *
sizeof(bi::managed_external_buffer::handle_t)) +
(Inputs().size() * sizeof(bi::managed_external_buffer::handle_t)) +
PbString::ShmStructSize(ModelName()) +
PbString::ShmStructSize(RequestId()) +
PbString::ShmStructSize(Parameters()));
(RequestedOutputNames().size() *
sizeof(bi::managed_external_buffer::handle_t)) +
(Inputs().size() * sizeof(bi::managed_external_buffer::handle_t)) +
PbString::ShmStructSize(ModelName()) +
PbString::ShmStructSize(RequestId()) +
PbString::ShmStructSize(Parameters()),
false /* aligned */, "[InferRequestShm]");

infer_request_shm_ptr_ =
reinterpret_cast<InferRequestShm*>(infer_request_shm.data_.get());
Expand Down Expand Up @@ -268,8 +269,8 @@ InferRequest::LoadFromSharedMemory(
std::unique_ptr<SharedMemoryManager>& shm_pool,
bi::managed_external_buffer::handle_t request_handle, bool open_cuda_handle)
{
AllocatedSharedMemory<char> infer_request_shm =
shm_pool->Load<char>(request_handle);
AllocatedSharedMemory<char> infer_request_shm = shm_pool->Load<char>(
request_handle, false /* aligned */, "[InferRequestShm]");
InferRequestShm* infer_request_shm_ptr =
reinterpret_cast<InferRequestShm*>(infer_request_shm.data_.get());

Expand Down Expand Up @@ -453,7 +454,8 @@ InferRequest::Exec(const bool is_decoupled)
}

request_batch = shm_pool->Construct<char>(
sizeof(RequestBatch) + sizeof(bi::managed_external_buffer::handle_t));
sizeof(RequestBatch) + sizeof(bi::managed_external_buffer::handle_t),
false /* aligned */, "[RequestBatch]");

RequestBatch* request_batch_shm_ptr =
reinterpret_cast<RequestBatch*>(request_batch.data_.get());
Expand Down Expand Up @@ -493,7 +495,8 @@ InferRequest::Exec(const bool is_decoupled)
if (has_gpu_tensor) {
AllocatedSharedMemory<GPUBuffersShm> gpu_buffers_shm =
shm_pool->Load<GPUBuffersShm>(
request_batch_shm_ptr->gpu_buffers_handle);
request_batch_shm_ptr->gpu_buffers_handle, false,
"[GpuBufferShm]");
AllocatedSharedMemory<bi::managed_external_buffer::handle_t>
gpu_buffers_handle =
shm_pool->Load<bi::managed_external_buffer::handle_t>(
Expand Down Expand Up @@ -544,7 +547,7 @@ InferRequest::Exec(const bool is_decoupled)
shm_pool, ipc_message->ResponseHandle());

AllocatedSharedMemory<char> response_batch_shm =
shm_pool->Load<char>(bls_response->Args());
shm_pool->Load<char>(bls_response->Args(), false, "[BlsResponseShm]");
response_batch =
reinterpret_cast<ResponseBatch*>(response_batch_shm.data_.get());
response_handle = reinterpret_cast<bi::managed_external_buffer::handle_t*>(
Expand Down
12 changes: 7 additions & 5 deletions src/infer_response.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,13 @@ InferResponse::SaveToSharedMemory(
{
size_t output_tensor_length = output_tensors_.size();
if (HasError()) {
response_shm_ = shm_pool->Construct<char>(sizeof(ResponseShm));
response_shm_ = shm_pool->Construct<char>(
sizeof(ResponseShm), false /* aligned */, "[InferResponseShm]");
} else {
response_shm_ = shm_pool->Construct<char>(
sizeof(ResponseShm) +
output_tensor_length * sizeof(bi::managed_external_buffer::handle_t));
sizeof(ResponseShm) + output_tensor_length *
sizeof(bi::managed_external_buffer::handle_t),
false /* aligned */, "[InferResponseShm]");
}

ResponseShm* response_shm_ptr =
Expand Down Expand Up @@ -135,8 +137,8 @@ InferResponse::LoadFromSharedMemory(
bi::managed_external_buffer::handle_t response_handle,
bool open_cuda_handle)
{
AllocatedSharedMemory<char> response_shm =
shm_pool->Load<char>(response_handle);
AllocatedSharedMemory<char> response_shm = shm_pool->Load<char>(
response_handle, false /* aligned */, "[InferResponseShm]");
ResponseShm* response_shm_ptr =
reinterpret_cast<ResponseShm*>(response_shm.data_.get());
uint32_t requested_output_count = response_shm_ptr->outputs_size;
Expand Down
14 changes: 8 additions & 6 deletions src/ipc_message.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,17 +34,18 @@ IPCMessage::Create(
const std::unique_ptr<SharedMemoryManager>& shm_pool, bool inline_response)
{
AllocatedSharedMemory<IPCMessageShm> ipc_message_shm =
shm_pool->Construct<IPCMessageShm>();
shm_pool->Construct<IPCMessageShm>(
1 /* count */, false /* aligned */, "[IPCMessageShm]");

ipc_message_shm.data_->inline_response = inline_response;
AllocatedSharedMemory<bi::interprocess_mutex> response_mutex_shm;
AllocatedSharedMemory<bi::interprocess_condition> response_cond_shm;
if (inline_response) {
response_mutex_shm = std::move(shm_pool->Construct<bi::interprocess_mutex>(
1 /* count */, true /* aligned */));
1 /* count */, true /* aligned */, "[ResponseMutexShm]"));
response_cond_shm =
std::move(shm_pool->Construct<bi::interprocess_condition>(
1 /* count */, true /* aligned */));
1 /* count */, true /* aligned */, "[ResponseCondShm]"));

ipc_message_shm.data_->response_mutex = response_mutex_shm.handle_;
ipc_message_shm.data_->response_cond = response_cond_shm.handle_;
Expand All @@ -62,15 +63,16 @@ IPCMessage::LoadFromSharedMemory(
bi::managed_external_buffer::handle_t message_handle)
{
AllocatedSharedMemory<IPCMessageShm> ipc_message_shm =
shm_pool->Load<IPCMessageShm>(message_handle);
shm_pool->Load<IPCMessageShm>(
message_handle, false /* unsafe */, "[IPCMessageShm]");

AllocatedSharedMemory<bi::interprocess_mutex> response_mutex_shm;
AllocatedSharedMemory<bi::interprocess_condition> response_cond_shm;
if (ipc_message_shm.data_->inline_response) {
response_mutex_shm = shm_pool->Load<bi::interprocess_mutex>(
ipc_message_shm.data_->response_mutex);
ipc_message_shm.data_->response_mutex, false, "[ResponseMutexShm]");
response_cond_shm = shm_pool->Load<bi::interprocess_condition>(
ipc_message_shm.data_->response_cond);
ipc_message_shm.data_->response_cond, false, "[ResponseCondShm]");
}

return std::unique_ptr<IPCMessage>(
Expand Down
15 changes: 9 additions & 6 deletions src/message_queue.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,13 @@ class MessageQueue {
uint32_t message_queue_size)
{
AllocatedSharedMemory<MessageQueueShm> mq_shm =
shm_pool->Construct<MessageQueueShm>();
shm_pool->Construct<MessageQueueShm>(
1 /* count */, false /* aligned */, "[MessageQueueShm]");
mq_shm.data_->size = message_queue_size;

AllocatedSharedMemory<T> mq_buffer_shm =
shm_pool->Construct<T>(message_queue_size /* count */);
AllocatedSharedMemory<T> mq_buffer_shm = shm_pool->Construct<T>(
message_queue_size /* count */, false /* aligned */,
"[MessageQueueBufferShm]");
mq_shm.data_->buffer = mq_buffer_shm.handle_;
mq_shm.data_->head = 0;
mq_shm.data_->tail = 0;
Expand All @@ -87,9 +89,10 @@ class MessageQueue {
bi::managed_external_buffer::handle_t message_queue_handle)
{
AllocatedSharedMemory<MessageQueueShm> mq_shm =
shm_pool->Load<MessageQueueShm>(message_queue_handle);
AllocatedSharedMemory<T> mq_shm_buffer =
shm_pool->Load<T>(mq_shm.data_->buffer);
shm_pool->Load<MessageQueueShm>(
message_queue_handle, false, "[MessageQueueShm]");
AllocatedSharedMemory<T> mq_shm_buffer = shm_pool->Load<T>(
mq_shm.data_->buffer, false, "[MessageQueueBufferShm]");

return std::unique_ptr<MessageQueue<T>>(
new MessageQueue(mq_shm, mq_shm_buffer));
Expand Down
5 changes: 3 additions & 2 deletions src/metric.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ void
Metric::SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool)
{
AllocatedSharedMemory<MetricShm> custom_metric_shm =
shm_pool->Construct<MetricShm>();
shm_pool->Construct<MetricShm>(
1 /* count */, false /* aligned */, "[MetricShm]");
custom_metric_shm_ptr_ = custom_metric_shm.data_.get();

std::unique_ptr<PbString> labels_shm = PbString::Create(shm_pool, labels_);
Expand All @@ -74,7 +75,7 @@ Metric::LoadFromSharedMemory(
bi::managed_external_buffer::handle_t handle)
{
AllocatedSharedMemory<MetricShm> custom_metric_shm =
shm_pool->Load<MetricShm>(handle);
shm_pool->Load<MetricShm>(handle, false, "[MetricShm]");
MetricShm* custom_metric_shm_ptr = custom_metric_shm.data_.get();

std::unique_ptr<PbString> labels_shm = PbString::LoadFromSharedMemory(
Expand Down
5 changes: 3 additions & 2 deletions src/metric_family.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ void
MetricFamily::SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool)
{
AllocatedSharedMemory<MetricFamilyShm> custom_metric_family_shm =
shm_pool->Construct<MetricFamilyShm>();
shm_pool->Construct<MetricFamilyShm>(
1 /* count */, false /* aligned */, "[MetricFamilyShm]");

custom_metric_family_shm_ptr_ = custom_metric_family_shm.data_.get();
std::unique_ptr<PbString> name_shm = PbString::Create(shm_pool, name_);
Expand All @@ -99,7 +100,7 @@ MetricFamily::LoadFromSharedMemory(
bi::managed_external_buffer::handle_t handle)
{
AllocatedSharedMemory<MetricFamilyShm> custom_metric_family_shm =
shm_pool->Load<MetricFamilyShm>(handle);
shm_pool->Load<MetricFamilyShm>(handle, false, "[MetricFamilyShm]");
MetricFamilyShm* custom_metric_family_shm_ptr =
custom_metric_family_shm.data_.get();
std::unique_ptr<PbString> name_shm = PbString::LoadFromSharedMemory(
Expand Down
5 changes: 3 additions & 2 deletions src/model_loader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ void
ModelLoader::SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool)
{
AllocatedSharedMemory<ModelLoaderRequestShm> model_loader_req_shm =
shm_pool->Construct<ModelLoaderRequestShm>();
shm_pool->Construct<ModelLoaderRequestShm>(
1 /* count */, false /* aligned */, "[ModelLoadShm]");
model_loader_req_shm_ptr_ = model_loader_req_shm.data_.get();

std::unique_ptr<PbString> name_shm = PbString::Create(shm_pool, name_);
Expand Down Expand Up @@ -65,7 +66,7 @@ ModelLoader::LoadFromSharedMemory(
bi::managed_external_buffer::handle_t handle)
{
AllocatedSharedMemory<ModelLoaderRequestShm> model_loader_req_shm =
shm_pool->Load<ModelLoaderRequestShm>(handle);
shm_pool->Load<ModelLoaderRequestShm>(handle, false, "[ModelLoadShm]");
ModelLoaderRequestShm* model_loader_req_shm_ptr =
model_loader_req_shm.data_.get();

Expand Down
5 changes: 3 additions & 2 deletions src/pb_error.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ void
PbError::SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool)
{
message_shm_ = PbString::Create(shm_pool, message_);
error_shm_ = shm_pool->Construct<PbErrorShm>();
error_shm_ = shm_pool->Construct<PbErrorShm>(
1 /* count */, false /* aligned */, "[PbErrorShm]");
error_shm_.data_->code = code_;
error_shm_.data_->message_shm_handle = message_shm_->ShmHandle();
shm_handle_ = error_shm_.handle_;
Expand All @@ -62,7 +63,7 @@ PbError::LoadFromSharedMemory(
bi::managed_external_buffer::handle_t shm_handle)
{
AllocatedSharedMemory<PbErrorShm> error_shm =
shm_pool->Load<PbErrorShm>(shm_handle);
shm_pool->Load<PbErrorShm>(shm_handle, false, "[PbErrorShm]");
std::unique_ptr<PbString> message_shm = PbString::LoadFromSharedMemory(
shm_pool, error_shm.data_->message_shm_handle);

Expand Down
5 changes: 3 additions & 2 deletions src/pb_log.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ PbLogShm::Create(
std::unique_ptr<PbString> file_name = PbString::Create(shm_pool, filename);
std::unique_ptr<PbString> log_message = PbString::Create(shm_pool, message);
AllocatedSharedMemory<LogSendMessage> log_send_message =
shm_pool->Construct<LogSendMessage>();
shm_pool->Construct<LogSendMessage>(
1 /* count */, false /* aligned */, "[LogSendMessage]");

LogSendMessage* send_message_payload = log_send_message.data_.get();
new (&(send_message_payload->mu)) bi::interprocess_mutex;
Expand All @@ -94,7 +95,7 @@ PbLogShm::LoadFromSharedMemory(
bi::managed_external_buffer::handle_t handle)
{
AllocatedSharedMemory<LogSendMessage> log_container_shm =
shm_pool->Load<LogSendMessage>(handle);
shm_pool->Load<LogSendMessage>(handle, false, "[LogSendMessage]");
std::unique_ptr<PbString> pb_string_filename = PbString::LoadFromSharedMemory(
shm_pool, log_container_shm.data_->filename);
const std::string& filename = pb_string_filename->String();
Expand Down
12 changes: 7 additions & 5 deletions src/pb_map.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,12 @@ PbMap::Create(
std::unordered_map<std::string, std::string>& map)
{
std::vector<std::unique_ptr<PbString>> strings;
AllocatedSharedMemory<DictShm> dict_shm = shm_pool->Construct<DictShm>();
AllocatedSharedMemory<DictShm> dict_shm = shm_pool->Construct<DictShm>(
1 /* count */, false /* aligned */, "[DictShm]");
dict_shm.data_->length = map.size();

AllocatedSharedMemory<PairShm> pair_shms =
shm_pool->Construct<PairShm>(map.size());
AllocatedSharedMemory<PairShm> pair_shms = shm_pool->Construct<PairShm>(
map.size(), false /* aligned */, "[PairShm]");
dict_shm.data_->values = pair_shms.handle_;

size_t i = 0;
Expand Down Expand Up @@ -74,9 +75,10 @@ PbMap::LoadFromSharedMemory(
std::unique_ptr<SharedMemoryManager>& shm_pool,
bi::managed_external_buffer::handle_t handle)
{
AllocatedSharedMemory<DictShm> dict_shm = shm_pool->Load<DictShm>(handle);
AllocatedSharedMemory<DictShm> dict_shm =
shm_pool->Load<DictShm>(handle, false, "[DictShm]");
AllocatedSharedMemory<PairShm> pair_shms =
shm_pool->Load<PairShm>(dict_shm.data_->values);
shm_pool->Load<PairShm>(dict_shm.data_->values, false, "[PairShm]");

std::vector<std::unique_ptr<PbString>> pb_strings;
std::unordered_map<std::string, std::string> map;
Expand Down
7 changes: 4 additions & 3 deletions src/pb_memory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ PbMemory::Create(
requested_byte_size += byte_size;
}

AllocatedSharedMemory<char> memory_shm =
shm_pool->Construct<char>(requested_byte_size);
AllocatedSharedMemory<char> memory_shm = shm_pool->Construct<char>(
requested_byte_size, false /* aligned */, "[PbMemoryShm]");
PbMemory::FillShmData(
memory_type, memory_type_id, byte_size, data, memory_shm.data_.get(),
memory_shm.handle_, copy_gpu);
Expand Down Expand Up @@ -248,7 +248,8 @@ PbMemory::LoadFromSharedMemory(
std::unique_ptr<SharedMemoryManager>& shm_pool,
bi::managed_external_buffer::handle_t handle, bool open_cuda_handle)
{
AllocatedSharedMemory<char> memory_shm = shm_pool->Load<char>(handle);
AllocatedSharedMemory<char> memory_shm =
shm_pool->Load<char>(handle, false, "[PbMemoryShm]");
MemoryShm* memory_shm_ptr =
reinterpret_cast<MemoryShm*>(memory_shm.data_.get());
char* memory_data_shm = memory_shm.data_.get() + sizeof(MemoryShm);
Expand Down
Loading