Skip to content

Commit

Permalink
Adding InferenceTrace object
Browse files Browse the repository at this point in the history
  • Loading branch information
oandreeva-nv committed Jul 21, 2023
1 parent fa9be85 commit 6950941
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 13 deletions.
4 changes: 2 additions & 2 deletions src/infer_request.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ InferRequest::InferRequest(
const std::string& model_name, const int64_t model_version,
const std::string& parameters, const uint32_t flags, const int32_t timeout,
const intptr_t response_factory_address, const intptr_t request_address,
const PreferredMemory& preferred_memory, TRITONSERVER_InferenceTrace* trace)
const PreferredMemory& preferred_memory, const InferenceTrace& trace)
: request_id_(request_id), correlation_id_(correlation_id), inputs_(inputs),
requested_output_names_(requested_output_names), model_name_(model_name),
model_version_(model_version), parameters_(parameters), flags_(flags),
Expand Down Expand Up @@ -167,7 +167,7 @@ InferRequest::GetPreferredMemory()
return preferred_memory_;
}

TRITONSERVER_InferenceTrace*
InferenceTrace&
InferRequest::Trace()
{
return trace_;
Expand Down
19 changes: 15 additions & 4 deletions src/infer_request.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,17 @@ namespace triton { namespace backend { namespace python {

class Stub;

//
// Inference Trace
//
struct InferenceTrace {
#ifndef TRITON_PB_STUB
TRITONSERVER_InferenceTrace* triton_trace_;
#else
void* triton_trace_;
#endif
};

//
// Inference Request
//
Expand All @@ -55,7 +66,7 @@ struct InferRequestShm {
bool is_decoupled;
int32_t timeout;
PreferredMemory preferred_memory;
TRITONSERVER_InferenceTrace* trace;
InferenceTrace trace;
};

class InferRequest {
Expand All @@ -70,7 +81,7 @@ class InferRequest {
const intptr_t request_address = 0,
const PreferredMemory& preferred_memory =
PreferredMemory(PreferredMemory::DEFAULT, 0),
TRITONSERVER_InferenceTrace* trace = nullptr);
const InferenceTrace& trace = {.triton_trace_ = nullptr});

const std::vector<std::shared_ptr<PbTensor>>& Inputs();
const std::string& RequestId();
Expand All @@ -86,7 +97,7 @@ class InferRequest {
bool IsDecoupled();
void SetIsDecoupled(const bool is_decoupled);
PreferredMemory& GetPreferredMemory();
TRITONSERVER_InferenceTrace* Trace();
InferenceTrace& Trace();

#ifdef TRITON_PB_STUB
std::shared_ptr<InferResponse> Exec(const bool is_decoupled);
Expand Down Expand Up @@ -142,7 +153,7 @@ class InferRequest {
intptr_t request_address_;
bool is_decoupled_;
PreferredMemory preferred_memory_;
TRITONSERVER_InferenceTrace* trace_;
InferenceTrace trace_;

// Shared Memory Data Structures
AllocatedSharedMemory<char> infer_request_shm_;
Expand Down
9 changes: 6 additions & 3 deletions src/pb_stub.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1362,6 +1362,9 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module)
.value("TRITONSERVER_MEMORY_CPU", PreferredMemory::MemoryType::CPU)
.export_values();

py::class_<InferenceTrace, std::shared_ptr<InferenceTrace>>(
module, "InferenceTrace");

py::class_<InferRequest, std::shared_ptr<InferRequest>>(
module, "InferenceRequest")
.def(
Expand All @@ -1372,12 +1375,11 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module)
const int64_t model_version, const uint32_t flags,
const int32_t timeout,
const PreferredMemory& preferred_memory,
std::shared_ptr<InferRequest>& request) {
const InferenceTrace& trace) {
std::set<std::string> requested_outputs;
for (auto& requested_output_name : requested_output_names) {
requested_outputs.emplace(requested_output_name);
}
auto trace = (request != nullptr) ? request->Trace() : nullptr;
// FIXME: InferenceRequest parameters are not supported in BLS now.
return std::make_shared<InferRequest>(
request_id, correlation_id, inputs, requested_outputs,
Expand All @@ -1394,7 +1396,7 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module)
py::arg("flags").none(false) = 0, py::arg("timeout").none(false) = 0,
py::arg("preferred_memory").none(false) =
PreferredMemory(PreferredMemory::DEFAULT, 0),
py::arg("request").none(false) = nullptr)
py::arg("trace").none(false) = nullptr)
.def(
"inputs", &InferRequest::Inputs,
py::return_value_policy::reference_internal)
Expand All @@ -1404,6 +1406,7 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module)
.def("set_flags", &InferRequest::SetFlags)
.def("timeout", &InferRequest::Timeout)
.def("parameters", &InferRequest::Parameters)
.def("trace", &InferRequest::Trace)
.def(
"exec",
[](std::shared_ptr<InferRequest>& infer_request,
Expand Down
6 changes: 4 additions & 2 deletions src/python_be.cc
Original file line number Diff line number Diff line change
Expand Up @@ -364,8 +364,10 @@ ModelInstanceState::SaveRequestsToSharedMemory(
uint32_t flags;
RETURN_IF_ERROR(TRITONBACKEND_RequestFlags(request, &flags));

TRITONSERVER_InferenceTrace* trace;
RETURN_IF_ERROR(TRITONBACKEND_RequestTrace(request, &trace));
TRITONSERVER_InferenceTrace* triton_trace;
RETURN_IF_ERROR(TRITONBACKEND_RequestTrace(request, &triton_trace));

InferenceTrace trace = {triton_trace};

std::unique_ptr<InferRequest> infer_request;
if (model_state->IsDecoupled()) {
Expand Down
4 changes: 2 additions & 2 deletions src/request_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -360,9 +360,9 @@ RequestExecutor::Infer(
irequest, InferRequestComplete, nullptr /* request_release_userp */));

TRITONSERVER_InferenceTrace* trace = nullptr;
if (infer_request->Trace() != nullptr) {
if (infer_request->Trace().triton_trace_ != nullptr) {
THROW_IF_TRITON_ERROR(TRITONSERVER_InferenceTraceSpawnChildTrace(
infer_request->Trace(), &trace));
infer_request->Trace().triton_trace_, &trace));
}

for (auto& infer_input : infer_request->Inputs()) {
Expand Down

0 comments on commit 6950941

Please sign in to comment.