Skip to content

Commit

Permalink
Fix BLS decoupled segfault and hang (triton-inference-server#325) (tr…
Browse files Browse the repository at this point in the history
…iton-inference-server#326)

* Store InferPayload using the address of the object managed by the shared_ptr

* Fix hang

* Release GIL before sending message to the other process

* Release GIL in the beginning
  • Loading branch information
krishung5 authored Dec 8, 2023
1 parent ffbac67 commit caca2d5
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 4 deletions.
8 changes: 7 additions & 1 deletion src/infer_request.cc
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,13 @@ InferRequest::GetResponseSender()
std::shared_ptr<InferResponse>
InferRequest::Exec(const bool is_decoupled)
{
// Release the GIL. This avoids a potential deadlock situation in the parent
// process, where every thread in the thread pool is indirectly waiting for a
// function in the stub process that acquires the GIL. Meanwhile, the current
// thread, which holds the GIL, is also waiting for the parent side to have
// the next available thread to pick up the job during resource contention.
py::gil_scoped_release release;

// BLS should not be used in "initialize" or "finalize" function.
std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
if (!stub->IsInitialized() || stub->IsFinalizing()) {
Expand All @@ -465,7 +472,6 @@ InferRequest::Exec(const bool is_decoupled)
});

try {
py::gil_scoped_release release;
ipc_message = IPCMessage::Create(shm_pool, true /* inline_response */);
bool has_exception = false;
PythonBackendException pb_exception(std::string{});
Expand Down
4 changes: 2 additions & 2 deletions src/python_be.cc
Original file line number Diff line number Diff line change
Expand Up @@ -752,7 +752,7 @@ ModelInstanceState::ExecuteBLSRequest(
if (is_decoupled && (infer_response->Id() != nullptr)) {
// Need to manage the lifetime of InferPayload object for bls
// decoupled responses.
infer_payload_[reinterpret_cast<void*>(&infer_payload)] =
infer_payload_[reinterpret_cast<intptr_t>(infer_payload.get())] =
infer_payload;
}

Expand Down Expand Up @@ -943,7 +943,7 @@ ModelInstanceState::ProcessBLSCleanupRequest(
reinterpret_cast<CleanupMessage*>(cleanup_request_message.data_.get());

void* id = cleanup_message_ptr->id;
infer_payload_.erase(id);
infer_payload_.erase(reinterpret_cast<intptr_t>(id));

{
bi::scoped_lock<bi::interprocess_mutex> lock{*(message->ResponseMutex())};
Expand Down
2 changes: 1 addition & 1 deletion src/python_be.h
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ class ModelInstanceState : public BackendModelInstance {
std::unique_ptr<IPCMessage> received_message_;
std::vector<std::future<void>> futures_;
std::unique_ptr<boost::asio::thread_pool> thread_pool_;
std::unordered_map<void*, std::shared_ptr<InferPayload>> infer_payload_;
std::unordered_map<intptr_t, std::shared_ptr<InferPayload>> infer_payload_;
std::unique_ptr<RequestExecutor> request_executor_;
std::mutex response_factory_map_mutex_;
std::unordered_map<intptr_t, TRITONBACKEND_ResponseFactory*>
Expand Down
7 changes: 7 additions & 0 deletions src/response_sender.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,13 @@ void
ResponseSender::Send(
std::shared_ptr<InferResponse> infer_response, const uint32_t flags)
{
// Release the GIL. This avoids a potential deadlock situation in the parent
// process, where every thread in the thread pool is indirectly waiting for a
// function in the stub process that acquires the GIL. Meanwhile, the current
// thread, which holds the GIL, is also waiting for the parent side to have
// the next available thread to pick up the job during resource contention.
py::gil_scoped_release release;

if (closed_) {
throw PythonBackendException(
"Unable to send response. Response sender has been closed.");
Expand Down

0 comments on commit caca2d5

Please sign in to comment.