From 695094132c2c4c5533b90e5b48194066f2d9fd78 Mon Sep 17 00:00:00 2001 From: Olga Andreeva Date: Tue, 18 Jul 2023 16:49:43 -0700 Subject: [PATCH] Adding InferenceTrace object --- src/infer_request.cc | 4 ++-- src/infer_request.h | 19 +++++++++++++++---- src/pb_stub.cc | 9 ++++++--- src/python_be.cc | 6 ++++-- src/request_executor.cc | 4 ++-- 5 files changed, 29 insertions(+), 13 deletions(-) diff --git a/src/infer_request.cc b/src/infer_request.cc index b47e894b..5fdae669 100644 --- a/src/infer_request.cc +++ b/src/infer_request.cc @@ -44,7 +44,7 @@ InferRequest::InferRequest( const std::string& model_name, const int64_t model_version, const std::string& parameters, const uint32_t flags, const int32_t timeout, const intptr_t response_factory_address, const intptr_t request_address, - const PreferredMemory& preferred_memory, TRITONSERVER_InferenceTrace* trace) + const PreferredMemory& preferred_memory, const InferenceTrace& trace) : request_id_(request_id), correlation_id_(correlation_id), inputs_(inputs), requested_output_names_(requested_output_names), model_name_(model_name), model_version_(model_version), parameters_(parameters), flags_(flags), @@ -167,7 +167,7 @@ InferRequest::GetPreferredMemory() return preferred_memory_; } -TRITONSERVER_InferenceTrace* +InferenceTrace& InferRequest::Trace() { return trace_; diff --git a/src/infer_request.h b/src/infer_request.h index 98ff4268..7ef3a363 100644 --- a/src/infer_request.h +++ b/src/infer_request.h @@ -41,6 +41,17 @@ namespace triton { namespace backend { namespace python { class Stub; +// +// Inference Trace +// +struct InferenceTrace { +#ifndef TRITON_PB_STUB + TRITONSERVER_InferenceTrace* triton_trace_; +#else + void* triton_trace_; +#endif +}; + // // Inference Request // @@ -55,7 +66,7 @@ struct InferRequestShm { bool is_decoupled; int32_t timeout; PreferredMemory preferred_memory; - TRITONSERVER_InferenceTrace* trace; + InferenceTrace trace; }; class InferRequest { @@ -70,7 +81,7 @@ class InferRequest { const intptr_t request_address = 0, const PreferredMemory& preferred_memory = PreferredMemory(PreferredMemory::DEFAULT, 0), - TRITONSERVER_InferenceTrace* trace = nullptr); + const InferenceTrace& trace = {.triton_trace_ = nullptr}); const std::vector>& Inputs(); const std::string& RequestId(); @@ -86,7 +97,7 @@ class InferRequest { bool IsDecoupled(); void SetIsDecoupled(const bool is_decoupled); PreferredMemory& GetPreferredMemory(); - TRITONSERVER_InferenceTrace* Trace(); + InferenceTrace& Trace(); #ifdef TRITON_PB_STUB std::shared_ptr Exec(const bool is_decoupled); @@ -142,7 +153,7 @@ class InferRequest { intptr_t request_address_; bool is_decoupled_; PreferredMemory preferred_memory_; - TRITONSERVER_InferenceTrace* trace_; + InferenceTrace trace_; // Shared Memory Data Structures AllocatedSharedMemory infer_request_shm_; diff --git a/src/pb_stub.cc b/src/pb_stub.cc index f1cbe874..b7df94c6 100644 --- a/src/pb_stub.cc +++ b/src/pb_stub.cc @@ -1362,6 +1362,9 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module) .value("TRITONSERVER_MEMORY_CPU", PreferredMemory::MemoryType::CPU) .export_values(); + py::class_>( + module, "InferenceTrace"); + py::class_>( module, "InferenceRequest") .def( @@ -1372,12 +1375,11 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module) const int64_t model_version, const uint32_t flags, const int32_t timeout, const PreferredMemory& preferred_memory, - std::shared_ptr& request) { + const InferenceTrace& trace) { std::set requested_outputs; for (auto& requested_output_name : requested_output_names) { requested_outputs.emplace(requested_output_name); } - auto trace = (request != nullptr) ? request->Trace() : nullptr; // FIXME: InferenceRequest parameters are not supported in BLS now. return std::make_shared( request_id, correlation_id, inputs, requested_outputs, @@ -1394,7 +1396,7 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module) py::arg("flags").none(false) = 0, py::arg("timeout").none(false) = 0, py::arg("preferred_memory").none(false) = PreferredMemory(PreferredMemory::DEFAULT, 0), - py::arg("request").none(false) = nullptr) + py::arg("trace").none(false) = nullptr) .def( "inputs", &InferRequest::Inputs, py::return_value_policy::reference_internal) @@ -1404,6 +1406,7 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module) .def("set_flags", &InferRequest::SetFlags) .def("timeout", &InferRequest::Timeout) .def("parameters", &InferRequest::Parameters) + .def("trace", &InferRequest::Trace) .def( "exec", [](std::shared_ptr& infer_request, diff --git a/src/python_be.cc b/src/python_be.cc index daa18219..bb2c4e49 100644 --- a/src/python_be.cc +++ b/src/python_be.cc @@ -364,8 +364,10 @@ ModelInstanceState::SaveRequestsToSharedMemory( uint32_t flags; RETURN_IF_ERROR(TRITONBACKEND_RequestFlags(request, &flags)); - TRITONSERVER_InferenceTrace* trace; - RETURN_IF_ERROR(TRITONBACKEND_RequestTrace(request, &trace)); + TRITONSERVER_InferenceTrace* triton_trace; + RETURN_IF_ERROR(TRITONBACKEND_RequestTrace(request, &triton_trace)); + + InferenceTrace trace = {triton_trace}; std::unique_ptr infer_request; if (model_state->IsDecoupled()) { diff --git a/src/request_executor.cc b/src/request_executor.cc index aa97487b..b54e3988 100644 --- a/src/request_executor.cc +++ b/src/request_executor.cc @@ -360,9 +360,9 @@ RequestExecutor::Infer( irequest, InferRequestComplete, nullptr /* request_release_userp */)); TRITONSERVER_InferenceTrace* trace = nullptr; - if (infer_request->Trace() != nullptr) { + if (infer_request->Trace().triton_trace_ != nullptr) { THROW_IF_TRITON_ERROR(TRITONSERVER_InferenceTraceSpawnChildTrace( - infer_request->Trace(), &trace)); + infer_request->Trace().triton_trace_, &trace)); } for (auto& infer_input : infer_request->Inputs()) {