Skip to content

Commit

Permalink
Adding the support tracing of child models invoked from a BLS model (#…
Browse files Browse the repository at this point in the history
…277)

* Adding tracing for bls

* Added access to trace from BLS request creation

* Added tracing to decoupled

* clang format

* Adding InferenceTrace object
  • Loading branch information
oandreeva-nv authored Aug 7, 2023
1 parent a9e6a77 commit 3ac4eb1
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 10 deletions.
13 changes: 11 additions & 2 deletions src/infer_request.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,13 @@ InferRequest::InferRequest(
const std::string& model_name, const int64_t model_version,
const std::string& parameters, const uint32_t flags, const int32_t timeout,
const intptr_t response_factory_address, const intptr_t request_address,
const PreferredMemory& preferred_memory)
const PreferredMemory& preferred_memory, const InferenceTrace& trace)
: request_id_(request_id), correlation_id_(correlation_id), inputs_(inputs),
requested_output_names_(requested_output_names), model_name_(model_name),
model_version_(model_version), parameters_(parameters), flags_(flags),
timeout_(timeout), response_factory_address_(response_factory_address),
request_address_(request_address), preferred_memory_(preferred_memory)
request_address_(request_address), preferred_memory_(preferred_memory),
trace_(trace)
{
for (auto& input : inputs) {
if (!input) {
Expand Down Expand Up @@ -166,6 +167,12 @@ InferRequest::GetPreferredMemory()
return preferred_memory_;
}

InferenceTrace&
InferRequest::Trace()
{
return trace_;
}

void
InferRequest::SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool)
{
Expand All @@ -191,6 +198,7 @@ InferRequest::SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool)
infer_request_shm_ptr_->is_decoupled = is_decoupled_;
infer_request_shm_ptr_->timeout = timeout_;
infer_request_shm_ptr_->preferred_memory = preferred_memory_;
infer_request_shm_ptr_->trace = trace_;

output_names_handle_shm_ptr_ =
reinterpret_cast<bi::managed_external_buffer::handle_t*>(
Expand Down Expand Up @@ -368,6 +376,7 @@ InferRequest::InferRequest(
is_decoupled_ = infer_request_shm_ptr_->is_decoupled;
timeout_ = infer_request_shm_ptr_->timeout;
preferred_memory_ = infer_request_shm_ptr_->preferred_memory;
trace_ = infer_request_shm_ptr_->trace;

#ifdef TRITON_PB_STUB
response_sender_ = std::make_shared<ResponseSender>(
Expand Down
17 changes: 16 additions & 1 deletion src/infer_request.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,17 @@ namespace triton { namespace backend { namespace python {

class Stub;

//
// Inference Trace
//
struct InferenceTrace {
#ifndef TRITON_PB_STUB
TRITONSERVER_InferenceTrace* triton_trace_;
#else
void* triton_trace_;
#endif
};

//
// Inference Request
//
Expand All @@ -55,6 +66,7 @@ struct InferRequestShm {
bool is_decoupled;
int32_t timeout;
PreferredMemory preferred_memory;
InferenceTrace trace;
};

class InferRequest {
Expand All @@ -68,7 +80,8 @@ class InferRequest {
const int32_t timeout = 0, const intptr_t response_factory_address = 0,
const intptr_t request_address = 0,
const PreferredMemory& preferred_memory =
PreferredMemory(PreferredMemory::DEFAULT, 0));
PreferredMemory(PreferredMemory::DEFAULT, 0),
const InferenceTrace& trace = {.triton_trace_ = nullptr});

const std::vector<std::shared_ptr<PbTensor>>& Inputs();
const std::string& RequestId();
Expand All @@ -84,6 +97,7 @@ class InferRequest {
bool IsDecoupled();
void SetIsDecoupled(const bool is_decoupled);
PreferredMemory& GetPreferredMemory();
InferenceTrace& Trace();

#ifdef TRITON_PB_STUB
std::shared_ptr<InferResponse> Exec(const bool is_decoupled);
Expand Down Expand Up @@ -139,6 +153,7 @@ class InferRequest {
intptr_t request_address_;
bool is_decoupled_;
PreferredMemory preferred_memory_;
InferenceTrace trace_;

// Shared Memory Data Structures
AllocatedSharedMemory<char> infer_request_shm_;
Expand Down
12 changes: 9 additions & 3 deletions src/pb_stub.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1362,6 +1362,9 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module)
.value("TRITONSERVER_MEMORY_CPU", PreferredMemory::MemoryType::CPU)
.export_values();

py::class_<InferenceTrace, std::shared_ptr<InferenceTrace>>(
module, "InferenceTrace");

py::class_<InferRequest, std::shared_ptr<InferRequest>>(
module, "InferenceRequest")
.def(
Expand All @@ -1371,7 +1374,8 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module)
const std::string& model_name,
const int64_t model_version, const uint32_t flags,
const int32_t timeout,
const PreferredMemory& preferred_memory) {
const PreferredMemory& preferred_memory,
const InferenceTrace& trace) {
std::set<std::string> requested_outputs;
for (auto& requested_output_name : requested_output_names) {
requested_outputs.emplace(requested_output_name);
Expand All @@ -1381,7 +1385,7 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module)
request_id, correlation_id, inputs, requested_outputs,
model_name, model_version, "" /*parameters*/, flags, timeout,
0 /*response_factory_address*/, 0 /*request_address*/,
preferred_memory);
preferred_memory, trace);
}),
py::arg("request_id").none(false) = "",
py::arg("correlation_id").none(false) = 0,
Expand All @@ -1391,7 +1395,8 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module)
py::arg("model_version").none(false) = -1,
py::arg("flags").none(false) = 0, py::arg("timeout").none(false) = 0,
py::arg("preferred_memory").none(false) =
PreferredMemory(PreferredMemory::DEFAULT, 0))
PreferredMemory(PreferredMemory::DEFAULT, 0),
py::arg("trace").none(false) = nullptr)
.def(
"inputs", &InferRequest::Inputs,
py::return_value_policy::reference_internal)
Expand All @@ -1401,6 +1406,7 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module)
.def("set_flags", &InferRequest::SetFlags)
.def("timeout", &InferRequest::Timeout)
.def("parameters", &InferRequest::Parameters)
.def("trace", &InferRequest::Trace)
.def(
"exec",
[](std::shared_ptr<InferRequest>& infer_request,
Expand Down
11 changes: 9 additions & 2 deletions src/python_be.cc
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,11 @@ ModelInstanceState::SaveRequestsToSharedMemory(
uint32_t flags;
RETURN_IF_ERROR(TRITONBACKEND_RequestFlags(request, &flags));

TRITONSERVER_InferenceTrace* triton_trace;
RETURN_IF_ERROR(TRITONBACKEND_RequestTrace(request, &triton_trace));

InferenceTrace trace = {triton_trace};

std::unique_ptr<InferRequest> infer_request;
if (model_state->IsDecoupled()) {
TRITONBACKEND_ResponseFactory* factory_ptr;
Expand All @@ -372,13 +377,15 @@ ModelInstanceState::SaveRequestsToSharedMemory(
id, correlation_id, pb_input_tensors, requested_output_names,
model_state->Name(), model_state->Version(), parameters_string, flags,
0 /* BLS request timeout*/, reinterpret_cast<intptr_t>(factory_ptr),
reinterpret_cast<intptr_t>(request));
reinterpret_cast<intptr_t>(request),
PreferredMemory(PreferredMemory::DEFAULT, 0), trace);
} else {
infer_request = std::make_unique<InferRequest>(
id, correlation_id, pb_input_tensors, requested_output_names,
model_state->Name(), model_state->Version(), parameters_string, flags,
0 /* BLS request timeout*/, 0 /* response_factory_address */,
reinterpret_cast<intptr_t>(request));
reinterpret_cast<intptr_t>(request),
PreferredMemory(PreferredMemory::DEFAULT, 0), trace);
}

RETURN_IF_EXCEPTION(infer_request->SaveToSharedMemory(Stub()->ShmPool()));
Expand Down
10 changes: 8 additions & 2 deletions src/request_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,12 @@ RequestExecutor::Infer(
THROW_IF_TRITON_ERROR(TRITONSERVER_InferenceRequestSetReleaseCallback(
irequest, InferRequestComplete, nullptr /* request_release_userp */));

TRITONSERVER_InferenceTrace* trace = nullptr;
if (infer_request->Trace().triton_trace_ != nullptr) {
THROW_IF_TRITON_ERROR(TRITONSERVER_InferenceTraceSpawnChildTrace(
infer_request->Trace().triton_trace_, &trace));
}

for (auto& infer_input : infer_request->Inputs()) {
THROW_IF_TRITON_ERROR(TRITONSERVER_InferenceRequestAddInput(
irequest, infer_input->Name().c_str(),
Expand Down Expand Up @@ -388,8 +394,8 @@ RequestExecutor::Infer(
reinterpret_cast<void*>(infer_payload->ResponseAllocUserp().get()),
InferResponseComplete, reinterpret_cast<void*>(infer_payload.get())));

THROW_IF_TRITON_ERROR(TRITONSERVER_ServerInferAsync(
server_, irequest, nullptr /* trace */));
THROW_IF_TRITON_ERROR(
TRITONSERVER_ServerInferAsync(server_, irequest, trace));
}
}
catch (const PythonBackendException& pb_exception) {
Expand Down

0 comments on commit 3ac4eb1

Please sign in to comment.