From 9f705476db18a741436695decf34d2953d47bf63 Mon Sep 17 00:00:00 2001 From: Olga Andreeva Date: Fri, 7 Jul 2023 17:28:44 -0700 Subject: [PATCH 1/5] Added new backend API and tritonserver API to support tracing from BLS models. Added their implementation. --- include/triton/core/tritonbackend.h | 10 ++++++++++ include/triton/core/tritonserver.h | 12 +++++++++++- src/backend_model.cc | 15 +++++++++++++++ src/dynamic_batch_scheduler.cc | 2 +- src/ensemble_scheduler.cc | 10 +++++----- src/infer_request.h | 5 ++++- src/infer_trace.h | 1 + src/sequence_batch_scheduler.cc | 2 +- src/server.cc | 2 +- src/tritonserver.cc | 15 +++++++++++++++ src/tritonserver_stub.cc | 8 ++++++++ 11 files changed, 72 insertions(+), 10 deletions(-) diff --git a/include/triton/core/tritonbackend.h b/include/triton/core/tritonbackend.h index 8e58c34cf..3ba528e19 100644 --- a/include/triton/core/tritonbackend.h +++ b/include/triton/core/tritonbackend.h @@ -561,6 +561,16 @@ TRITONBACKEND_RequestOutputBufferProperties( TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_RequestRelease( TRITONBACKEND_Request* request, uint32_t release_flags); +/// Get the trace associated with a request. The returned trace is owned by the +/// request, not the caller, and so should not be modified or freed. +/// [CLARIFY THIS]If tracing is disabled, then `nullptr` will be returned. +/// +/// \param request The inference request. +/// \param trace Returns the trace associated with the request. +/// \return a TRITONSERVER_Error indicating success or failure. +TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_RequestTrace( + TRITONBACKEND_Request* request, TRITONSERVER_InferenceTrace** trace); + /// /// TRITONBACKEND_ResponseFactory /// diff --git a/include/triton/core/tritonserver.h b/include/triton/core/tritonserver.h index b92832172..748ab5ac1 100644 --- a/include/triton/core/tritonserver.h +++ b/include/triton/core/tritonserver.h @@ -91,7 +91,7 @@ struct TRITONSERVER_MetricFamily; /// } /// #define TRITONSERVER_API_VERSION_MAJOR 1 -#define TRITONSERVER_API_VERSION_MINOR 23 +#define TRITONSERVER_API_VERSION_MINOR 24 /// Get the TRITONBACKEND API version supported by the Triton shared /// library. This value can be compared against the @@ -879,6 +879,16 @@ TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceTraceRequestId( struct TRITONSERVER_InferenceTrace* trace, const char** request_id); +/// Get the child trace, spawned from the parent trace. +/// +/// \param trace The trace. +/// \param child_trace Returns the child trace, spawned from the trace. +/// \return a TRITONSERVER_Error indicating success or failure. +TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* +TRITONSERVER_InferenceTraceSpawnChildTrace( + struct TRITONSERVER_InferenceTrace* trace, + struct TRITONSERVER_InferenceTrace** child_trace); + /// TRITONSERVER_InferenceRequest /// /// Object representing an inference request. The inference request diff --git a/src/backend_model.cc b/src/backend_model.cc index dec657680..cad75af70 100644 --- a/src/backend_model.cc +++ b/src/backend_model.cc @@ -1201,6 +1201,21 @@ TRITONBACKEND_RequestRelease( return nullptr; // success } +TRITONAPI_DECLSPEC TRITONSERVER_Error* +TRITONBACKEND_RequestTrace( + TRITONBACKEND_Request* request, TRITONSERVER_InferenceTrace** trace) +{ +#ifdef TRITON_ENABLE_TRACING + InferenceRequest* tr = reinterpret_cast(request); + *trace = + reinterpret_cast(tr->TraceProxy()->Trace()); + return nullptr; // success +#else + return TRITONSERVER_ErrorNew( + TRITONSERVER_ERROR_UNSUPPORTED, "tracing is not supported"); +#endif // TRITON_ENABLE_TRACING +} + /// /// TRITONBACKEND_State /// diff --git a/src/dynamic_batch_scheduler.cc b/src/dynamic_batch_scheduler.cc index d2c398552..758a2681f 100644 --- a/src/dynamic_batch_scheduler.cc +++ b/src/dynamic_batch_scheduler.cc @@ -187,7 +187,7 @@ DynamicBatchScheduler::Enqueue(std::unique_ptr& request) if (request->QueueStartNs() == 0) { request->CaptureQueueStartNs(); INFER_TRACE_ACTIVITY( - request->Trace(), TRITONSERVER_TRACE_QUEUE_START, + request->TraceProxy(), TRITONSERVER_TRACE_QUEUE_START, request->QueueStartNs()); #ifdef TRITON_ENABLE_TRACING request->TraceInputTensors( diff --git a/src/ensemble_scheduler.cc b/src/ensemble_scheduler.cc index 46168d078..8e9588969 100644 --- a/src/ensemble_scheduler.cc +++ b/src/ensemble_scheduler.cc @@ -963,12 +963,12 @@ EnsembleContext::InitStep( RETURN_IF_ERROR(irequest->PrepareForInference()); #ifdef TRITON_ENABLE_TRACING - auto& parent_trace = request_tracker_->Request()->Trace(); + auto& parent_trace = request_tracker_->Request()->TraceProxy(); if (parent_trace != nullptr) { irequest->SetTrace(parent_trace->SpawnChildTrace()); - irequest->Trace()->SetModelName(irequest->ModelName()); - irequest->Trace()->SetRequestId(irequest->Id()); - irequest->Trace()->SetModelVersion(irequest->ActualModelVersion()); + irequest->TraceProxy()->SetModelName(irequest->ModelName()); + irequest->TraceProxy()->SetRequestId(irequest->Id()); + irequest->TraceProxy()->SetModelVersion(irequest->ActualModelVersion()); } #endif @@ -1309,7 +1309,7 @@ EnsembleScheduler::Enqueue(std::unique_ptr& request) // scheduling process request->CaptureQueueStartNs(); INFER_TRACE_ACTIVITY( - request->Trace(), TRITONSERVER_TRACE_QUEUE_START, + request->TraceProxy(), TRITONSERVER_TRACE_QUEUE_START, request->QueueStartNs()); #ifdef TRITON_ENABLE_TRACING request->TraceInputTensors( diff --git a/src/infer_request.h b/src/infer_request.h index 8c6f86459..45d74b2ad 100644 --- a/src/infer_request.h +++ b/src/infer_request.h @@ -323,7 +323,10 @@ class InferenceRequest { bool CacheKeyIsSet() const { return cache_key_is_set_; } #ifdef TRITON_ENABLE_TRACING - const std::shared_ptr& Trace() const { return trace_; } + const std::shared_ptr& TraceProxy() const + { + return trace_; + } std::shared_ptr* MutableTrace() { return &trace_; } void SetTrace(const std::shared_ptr& trace) { diff --git a/src/infer_trace.h b/src/infer_trace.h index 518a6905d..16477273f 100644 --- a/src/infer_trace.h +++ b/src/infer_trace.h @@ -138,6 +138,7 @@ class InferenceTraceProxy { public: InferenceTraceProxy(InferenceTrace* trace) : trace_(trace) {} ~InferenceTraceProxy() { trace_->Release(); } + InferenceTrace* Trace() { return trace_; } int64_t Id() const { return trace_->Id(); } int64_t ParentId() const { return trace_->ParentId(); } const std::string& ModelName() const { return trace_->ModelName(); } diff --git a/src/sequence_batch_scheduler.cc b/src/sequence_batch_scheduler.cc index 543156eae..ef339cf78 100644 --- a/src/sequence_batch_scheduler.cc +++ b/src/sequence_batch_scheduler.cc @@ -628,7 +628,7 @@ SequenceBatchScheduler::Enqueue(std::unique_ptr& irequest) // scheduling process irequest->CaptureQueueStartNs(); INFER_TRACE_ACTIVITY( - irequest->Trace(), TRITONSERVER_TRACE_QUEUE_START, + irequest->TraceProxy(), TRITONSERVER_TRACE_QUEUE_START, irequest->QueueStartNs()); // Record time at the beginning of the batcher queueing diff --git a/src/server.cc b/src/server.cc index ec6022c69..66e740790 100644 --- a/src/server.cc +++ b/src/server.cc @@ -543,7 +543,7 @@ InferenceServer::InferAsync(std::unique_ptr& request) #ifdef TRITON_ENABLE_STATS request->CaptureRequestStartNs(); INFER_TRACE_ACTIVITY( - request->Trace(), TRITONSERVER_TRACE_REQUEST_START, + request->TraceProxy(), TRITONSERVER_TRACE_REQUEST_START, request->RequestStartNs()); #endif // TRITON_ENABLE_STATS diff --git a/src/tritonserver.cc b/src/tritonserver.cc index 70eaf1131..a45053150 100644 --- a/src/tritonserver.cc +++ b/src/tritonserver.cc @@ -1060,6 +1060,21 @@ TRITONSERVER_InferenceTraceModelVersion( #endif // TRITON_ENABLE_TRACING } +TRITONAPI_DECLSPEC TRITONSERVER_Error* +TRITONSERVER_InferenceTraceSpawnChildTrace( + TRITONSERVER_InferenceTrace* trace, + TRITONSERVER_InferenceTrace** child_trace) +{ +#ifdef TRITON_ENABLE_TRACING + tc::InferenceTrace* ltrace = reinterpret_cast(trace); + *child_trace = ltrace->SpawnChildTrace(); + return nullptr; // Success +#else + return TRITONSERVER_ErrorNew( + TRITONSERVER_ERROR_UNSUPPORTED, "inference tracing not supported"); +#endif // TRITON_ENABLE_TRACING +} + // // TRITONSERVER_ServerOptions // diff --git a/src/tritonserver_stub.cc b/src/tritonserver_stub.cc index 8ef16a0c4..b0081a0a2 100644 --- a/src/tritonserver_stub.cc +++ b/src/tritonserver_stub.cc @@ -178,6 +178,10 @@ TRITONSERVER_InferenceTraceRequestId() { } TRITONAPI_DECLSPEC void +TRITONSERVER_InferenceTraceSpawnChildTrace() +{ +} +TRITONAPI_DECLSPEC void TRITONSERVER_InferenceRequestNew() { } @@ -691,6 +695,10 @@ TRITONBACKEND_RequestRelease() { } TRITONAPI_DECLSPEC void +TRITONBACKEND_RequestTrace() +{ +} +TRITONAPI_DECLSPEC void TRITONSERVER_InferenceRequestSetBoolParameter() { } From cc1fa0b55b91dd2243b9f8f40df180a9bce57557 Mon Sep 17 00:00:00 2001 From: Olga Andreeva Date: Wed, 12 Jul 2023 17:58:40 -0700 Subject: [PATCH 2/5] Added fixes --- include/triton/core/tritonbackend.h | 2 +- src/backend_model.cc | 8 ++++++-- src/tritonserver.cc | 3 ++- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/include/triton/core/tritonbackend.h b/include/triton/core/tritonbackend.h index 3ba528e19..9fb913db7 100644 --- a/include/triton/core/tritonbackend.h +++ b/include/triton/core/tritonbackend.h @@ -563,7 +563,7 @@ TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_RequestRelease( /// Get the trace associated with a request. The returned trace is owned by the /// request, not the caller, and so should not be modified or freed. -/// [CLARIFY THIS]If tracing is disabled, then `nullptr` will be returned. +/// If tracing is disabled, then `nullptr` will be returned. /// /// \param request The inference request. /// \param trace Returns the trace associated with the request. diff --git a/src/backend_model.cc b/src/backend_model.cc index cad75af70..996ce6fd6 100644 --- a/src/backend_model.cc +++ b/src/backend_model.cc @@ -1207,8 +1207,12 @@ TRITONBACKEND_RequestTrace( { #ifdef TRITON_ENABLE_TRACING InferenceRequest* tr = reinterpret_cast(request); - *trace = - reinterpret_cast(tr->TraceProxy()->Trace()); + if (tr->TraceProxy() != nullptr) { + *trace = reinterpret_cast( + tr->TraceProxy()->Trace()); + } else { + *trace = nullptr; + } return nullptr; // success #else return TRITONSERVER_ErrorNew( diff --git a/src/tritonserver.cc b/src/tritonserver.cc index a45053150..38bbbdb4b 100644 --- a/src/tritonserver.cc +++ b/src/tritonserver.cc @@ -1067,7 +1067,8 @@ TRITONSERVER_InferenceTraceSpawnChildTrace( { #ifdef TRITON_ENABLE_TRACING tc::InferenceTrace* ltrace = reinterpret_cast(trace); - *child_trace = ltrace->SpawnChildTrace(); + *child_trace = + reinterpret_cast(ltrace->SpawnChildTrace()); return nullptr; // Success #else return TRITONSERVER_ErrorNew( From 2ea04dffe3271d44957f5d1691991acb775b86c6 Mon Sep 17 00:00:00 2001 From: Olga Andreeva <124622579+oandreeva-nv@users.noreply.github.com> Date: Mon, 17 Jul 2023 15:58:12 -0700 Subject: [PATCH 3/5] Update include/triton/core/tritonbackend.h Co-authored-by: GuanLuo <41310872+GuanLuo@users.noreply.github.com> --- include/triton/core/tritonbackend.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/triton/core/tritonbackend.h b/include/triton/core/tritonbackend.h index 9fb913db7..4a2f01245 100644 --- a/include/triton/core/tritonbackend.h +++ b/include/triton/core/tritonbackend.h @@ -563,7 +563,7 @@ TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_RequestRelease( /// Get the trace associated with a request. The returned trace is owned by the /// request, not the caller, and so should not be modified or freed. -/// If tracing is disabled, then `nullptr` will be returned. +/// If the request is not being traced, then `nullptr` will be returned. /// /// \param request The inference request. /// \param trace Returns the trace associated with the request. From e9860069be390bb18d6e309e37eb424befc8d9b4 Mon Sep 17 00:00:00 2001 From: Olga Andreeva Date: Tue, 18 Jul 2023 15:10:39 -0700 Subject: [PATCH 4/5] Revision 1 --- include/triton/core/tritonserver.h | 5 ++++- src/tritonserver.cc | 8 ++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/include/triton/core/tritonserver.h b/include/triton/core/tritonserver.h index 748ab5ac1..c037242bb 100644 --- a/include/triton/core/tritonserver.h +++ b/include/triton/core/tritonserver.h @@ -879,7 +879,10 @@ TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceTraceRequestId( struct TRITONSERVER_InferenceTrace* trace, const char** request_id); -/// Get the child trace, spawned from the parent trace. +/// Get the child trace, spawned from the parent trace. The caller owns +/// the returned object and must call TRITONSERVER_InferenceTraceDelete +/// to release the object, unless ownership is transferred through +/// other APIs (see TRITONSERVER_ServerInferAsync). /// /// \param trace The trace. /// \param child_trace Returns the child trace, spawned from the trace. diff --git a/src/tritonserver.cc b/src/tritonserver.cc index 38bbbdb4b..c9fc49fc4 100644 --- a/src/tritonserver.cc +++ b/src/tritonserver.cc @@ -1067,8 +1067,12 @@ TRITONSERVER_InferenceTraceSpawnChildTrace( { #ifdef TRITON_ENABLE_TRACING tc::InferenceTrace* ltrace = reinterpret_cast(trace); - *child_trace = - reinterpret_cast(ltrace->SpawnChildTrace()); + if (trace != nullptr) { + *child_trace = reinterpret_cast( + ltrace->SpawnChildTrace()); + } else { + *child_trace = nullptr; + } return nullptr; // Success #else return TRITONSERVER_ErrorNew( From f178bad493de4d081787fe1bbb36052d7b744eaa Mon Sep 17 00:00:00 2001 From: oandreeva-nv Date: Thu, 27 Jul 2023 12:07:01 -0700 Subject: [PATCH 5/5] Bumping APi version --- include/triton/core/tritonbackend.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/triton/core/tritonbackend.h b/include/triton/core/tritonbackend.h index 4a2f01245..de0fffb8d 100644 --- a/include/triton/core/tritonbackend.h +++ b/include/triton/core/tritonbackend.h @@ -94,7 +94,7 @@ struct TRITONBACKEND_Batcher; /// } /// #define TRITONBACKEND_API_VERSION_MAJOR 1 -#define TRITONBACKEND_API_VERSION_MINOR 14 +#define TRITONBACKEND_API_VERSION_MINOR 15 /// Get the TRITONBACKEND API version supported by Triton. This value /// can be compared against the TRITONBACKEND_API_VERSION_MAJOR and