triton-inference-server · oandreeva-nv · Aug 7, 2023 · Jul 8, 2023 · Jul 13, 2023 · Jul 17, 2023
diff --git a/include/triton/core/tritonbackend.h b/include/triton/core/tritonbackend.h
@@ -94,7 +94,7 @@ struct TRITONBACKEND_Batcher;
 ///   }
 ///
 #define TRITONBACKEND_API_VERSION_MAJOR 1
-#define TRITONBACKEND_API_VERSION_MINOR 14
+#define TRITONBACKEND_API_VERSION_MINOR 15
 
 /// Get the TRITONBACKEND API version supported by Triton. This value
 /// can be compared against the TRITONBACKEND_API_VERSION_MAJOR and
@@ -561,6 +561,16 @@ TRITONBACKEND_RequestOutputBufferProperties(
 TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_RequestRelease(
     TRITONBACKEND_Request* request, uint32_t release_flags);
 
+/// Get the trace associated with a request. The returned trace is owned by the
+/// request, not the caller, and so should not be modified or freed.
+/// If the request is not being traced, then `nullptr` will be returned.
+///
+/// \param request The inference request.
+/// \param trace Returns the trace associated with the request.
+/// \return a TRITONSERVER_Error indicating success or failure.
+TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_RequestTrace(
+    TRITONBACKEND_Request* request, TRITONSERVER_InferenceTrace** trace);
+
 ///
 /// TRITONBACKEND_ResponseFactory
 ///

diff --git a/include/triton/core/tritonserver.h b/include/triton/core/tritonserver.h
@@ -91,7 +91,7 @@ struct TRITONSERVER_MetricFamily;
 ///   }
 ///
 #define TRITONSERVER_API_VERSION_MAJOR 1
-#define TRITONSERVER_API_VERSION_MINOR 23
+#define TRITONSERVER_API_VERSION_MINOR 24
 
 /// Get the TRITONBACKEND API version supported by the Triton shared
 /// library. This value can be compared against the
@@ -879,6 +879,19 @@ TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*
 TRITONSERVER_InferenceTraceRequestId(
     struct TRITONSERVER_InferenceTrace* trace, const char** request_id);
 
+/// Get the child trace, spawned from the parent trace. The caller owns
+/// the returned object and must call TRITONSERVER_InferenceTraceDelete
+/// to release the object, unless ownership is transferred through
+/// other APIs (see TRITONSERVER_ServerInferAsync).
+///
+/// \param trace The trace.
+/// \param child_trace Returns the child trace, spawned from the trace.
+/// \return a TRITONSERVER_Error indicating success or failure.
+TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*
+TRITONSERVER_InferenceTraceSpawnChildTrace(
+    struct TRITONSERVER_InferenceTrace* trace,
+    struct TRITONSERVER_InferenceTrace** child_trace);
+
 /// TRITONSERVER_InferenceRequest
 ///
 /// Object representing an inference request. The inference request

diff --git a/src/backend_model.cc b/src/backend_model.cc
@@ -1201,6 +1201,25 @@ TRITONBACKEND_RequestRelease(
   return nullptr;  // success
 }
 
+TRITONAPI_DECLSPEC TRITONSERVER_Error*
+TRITONBACKEND_RequestTrace(
+    TRITONBACKEND_Request* request, TRITONSERVER_InferenceTrace** trace)
+{
+#ifdef TRITON_ENABLE_TRACING
+  InferenceRequest* tr = reinterpret_cast<InferenceRequest*>(request);
+  if (tr->TraceProxy() != nullptr) {
+    *trace = reinterpret_cast<TRITONSERVER_InferenceTrace*>(
+        tr->TraceProxy()->Trace());
+  } else {
+    *trace = nullptr;
+  }
+  return nullptr;  // success
+#else
+  return TRITONSERVER_ErrorNew(
+      TRITONSERVER_ERROR_UNSUPPORTED, "tracing is not supported");
+#endif  // TRITON_ENABLE_TRACING
+}
+
 ///
 /// TRITONBACKEND_State
 ///

diff --git a/src/dynamic_batch_scheduler.cc b/src/dynamic_batch_scheduler.cc
@@ -187,7 +187,7 @@ DynamicBatchScheduler::Enqueue(std::unique_ptr<InferenceRequest>& request)
   if (request->QueueStartNs() == 0) {
     request->CaptureQueueStartNs();
     INFER_TRACE_ACTIVITY(
-        request->Trace(), TRITONSERVER_TRACE_QUEUE_START,
+        request->TraceProxy(), TRITONSERVER_TRACE_QUEUE_START,
         request->QueueStartNs());
 #ifdef TRITON_ENABLE_TRACING
     request->TraceInputTensors(

diff --git a/src/ensemble_scheduler.cc b/src/ensemble_scheduler.cc
@@ -963,12 +963,12 @@ EnsembleContext::InitStep(
   RETURN_IF_ERROR(irequest->PrepareForInference());
 
 #ifdef TRITON_ENABLE_TRACING
-  auto& parent_trace = request_tracker_->Request()->Trace();
+  auto& parent_trace = request_tracker_->Request()->TraceProxy();
   if (parent_trace != nullptr) {
     irequest->SetTrace(parent_trace->SpawnChildTrace());
-    irequest->Trace()->SetModelName(irequest->ModelName());
-    irequest->Trace()->SetRequestId(irequest->Id());
-    irequest->Trace()->SetModelVersion(irequest->ActualModelVersion());
+    irequest->TraceProxy()->SetModelName(irequest->ModelName());
+    irequest->TraceProxy()->SetRequestId(irequest->Id());
+    irequest->TraceProxy()->SetModelVersion(irequest->ActualModelVersion());
   }
 #endif
 
@@ -1309,7 +1309,7 @@ EnsembleScheduler::Enqueue(std::unique_ptr<InferenceRequest>& request)
   // scheduling process
   request->CaptureQueueStartNs();
   INFER_TRACE_ACTIVITY(
-      request->Trace(), TRITONSERVER_TRACE_QUEUE_START,
+      request->TraceProxy(), TRITONSERVER_TRACE_QUEUE_START,
       request->QueueStartNs());
 #ifdef TRITON_ENABLE_TRACING
   request->TraceInputTensors(

diff --git a/src/infer_request.h b/src/infer_request.h
@@ -323,7 +323,10 @@ class InferenceRequest {
   bool CacheKeyIsSet() const { return cache_key_is_set_; }
 
 #ifdef TRITON_ENABLE_TRACING
-  const std::shared_ptr<InferenceTraceProxy>& Trace() const { return trace_; }
+  const std::shared_ptr<InferenceTraceProxy>& TraceProxy() const
+  {
+    return trace_;
+  }
   std::shared_ptr<InferenceTraceProxy>* MutableTrace() { return &trace_; }
   void SetTrace(const std::shared_ptr<InferenceTraceProxy>& trace)
   {

diff --git a/src/infer_trace.h b/src/infer_trace.h
@@ -138,6 +138,7 @@ class InferenceTraceProxy {
  public:
   InferenceTraceProxy(InferenceTrace* trace) : trace_(trace) {}
   ~InferenceTraceProxy() { trace_->Release(); }
+  InferenceTrace* Trace() { return trace_; }
   int64_t Id() const { return trace_->Id(); }
   int64_t ParentId() const { return trace_->ParentId(); }
   const std::string& ModelName() const { return trace_->ModelName(); }

diff --git a/src/sequence_batch_scheduler.cc b/src/sequence_batch_scheduler.cc
@@ -628,7 +628,7 @@ SequenceBatchScheduler::Enqueue(std::unique_ptr<InferenceRequest>& irequest)
   // scheduling process
   irequest->CaptureQueueStartNs();
   INFER_TRACE_ACTIVITY(
-      irequest->Trace(), TRITONSERVER_TRACE_QUEUE_START,
+      irequest->TraceProxy(), TRITONSERVER_TRACE_QUEUE_START,
       irequest->QueueStartNs());
 
   // Record time at the beginning of the batcher queueing

diff --git a/src/server.cc b/src/server.cc
@@ -543,7 +543,7 @@ InferenceServer::InferAsync(std::unique_ptr<InferenceRequest>& request)
 #ifdef TRITON_ENABLE_STATS
   request->CaptureRequestStartNs();
   INFER_TRACE_ACTIVITY(
-      request->Trace(), TRITONSERVER_TRACE_REQUEST_START,
+      request->TraceProxy(), TRITONSERVER_TRACE_REQUEST_START,
       request->RequestStartNs());
 #endif  // TRITON_ENABLE_STATS
 

diff --git a/src/tritonserver.cc b/src/tritonserver.cc
@@ -1060,6 +1060,26 @@ TRITONSERVER_InferenceTraceModelVersion(
 #endif  // TRITON_ENABLE_TRACING
 }
 
+TRITONAPI_DECLSPEC TRITONSERVER_Error*
+TRITONSERVER_InferenceTraceSpawnChildTrace(
+    TRITONSERVER_InferenceTrace* trace,
+    TRITONSERVER_InferenceTrace** child_trace)
+{
+#ifdef TRITON_ENABLE_TRACING
+  tc::InferenceTrace* ltrace = reinterpret_cast<tc::InferenceTrace*>(trace);
+  if (trace != nullptr) {
+    *child_trace = reinterpret_cast<TRITONSERVER_InferenceTrace*>(
+        ltrace->SpawnChildTrace());
+  } else {
+    *child_trace = nullptr;
+  }
+  return nullptr;  // Success
+#else
+  return TRITONSERVER_ErrorNew(
+      TRITONSERVER_ERROR_UNSUPPORTED, "inference tracing not supported");
+#endif  // TRITON_ENABLE_TRACING
+}
+
 //
 // TRITONSERVER_ServerOptions
 //

diff --git a/src/tritonserver_stub.cc b/src/tritonserver_stub.cc
@@ -178,6 +178,10 @@ TRITONSERVER_InferenceTraceRequestId()
 {
 }
 TRITONAPI_DECLSPEC void
+TRITONSERVER_InferenceTraceSpawnChildTrace()
+{
+}
+TRITONAPI_DECLSPEC void
 TRITONSERVER_InferenceRequestNew()
 {
 }
@@ -691,6 +695,10 @@ TRITONBACKEND_RequestRelease()
 {
 }
 TRITONAPI_DECLSPEC void
+TRITONBACKEND_RequestTrace()
+{
+}
+TRITONAPI_DECLSPEC void
 TRITONSERVER_InferenceRequestSetBoolParameter()
 {
 }