diff --git a/README.md b/README.md index eee6af39..913034a8 100644 --- a/README.md +++ b/README.md @@ -1656,12 +1656,12 @@ import triton_python_backend_utils as pb_utils class TritonPythonModel: def initialize(self, args): # Create a MetricFamily object to report the latency of the model - # execution. The 'kind' parameter must be either 'COUNTER' or - # 'GAUGE'. + # execution. The 'kind' parameter must be either 'COUNTER', + # 'GAUGE' or 'HISTOGRAM'. self.metric_family = pb_utils.MetricFamily( name="preprocess_latency_ns", description="Cumulative time spent pre-processing requests", - kind=pb_utils.MetricFamily.COUNTER # or pb_utils.MetricFamily.GAUGE + kind=pb_utils.MetricFamily.COUNTER ) # Create a Metric object under the MetricFamily object. The 'labels' diff --git a/src/ipc_message.h b/src/ipc_message.h index ac28238c..8e762b8f 100644 --- a/src/ipc_message.h +++ b/src/ipc_message.h @@ -1,4 +1,4 @@ -// Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -63,6 +63,7 @@ typedef enum PYTHONSTUB_commandtype_enum { PYTHONSTUB_MetricRequestValue, PYTHONSTUB_MetricRequestIncrement, PYTHONSTUB_MetricRequestSet, + PYTHONSTUB_MetricRequestObserve, PYTHONSTUB_LoadModelRequest, PYTHONSTUB_UnloadModelRequest, PYTHONSTUB_ModelReadinessRequest, diff --git a/src/metric.cc b/src/metric.cc index f67c55bf..7796b161 100644 --- a/src/metric.cc +++ b/src/metric.cc @@ -1,4 +1,4 @@ -// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -32,9 +32,12 @@ namespace triton { namespace backend { namespace python { -Metric::Metric(const std::string& labels, void* metric_family_address) - : labels_(labels), operation_value_(0), metric_address_(nullptr), - metric_family_address_(metric_family_address), is_cleared_(false) +Metric::Metric( + const std::string& labels, std::optional> buckets, + void* metric_family_address) + : labels_(labels), buckets_(buckets), operation_value_(0), + metric_address_(nullptr), metric_family_address_(metric_family_address), + is_cleared_(false) { #ifdef TRITON_PB_STUB SendCreateMetricRequest(); @@ -62,6 +65,20 @@ Metric::SaveToSharedMemory(std::unique_ptr& shm_pool) custom_metric_shm_ptr_->metric_family_address = metric_family_address_; custom_metric_shm_ptr_->metric_address = metric_address_; + // Histogram specific case + if (buckets_.has_value()) { + auto buckets_size = buckets_.value().size() * sizeof(double); + std::unique_ptr buckets_shm = PbMemory::Create( + shm_pool, TRITONSERVER_MemoryType::TRITONSERVER_MEMORY_CPU, 0, + buckets_size, reinterpret_cast(buckets_.value().data()), + false /* copy_gpu */); + custom_metric_shm_ptr_->buckets_shm_handle = buckets_shm->ShmHandle(); + buckets_shm_ = std::move(buckets_shm); + } else { + custom_metric_shm_ptr_->buckets_shm_handle = 0; + buckets_shm_ = nullptr; + } + // Save the references to shared memory. custom_metric_shm_ = std::move(custom_metric_shm); labels_shm_ = std::move(labels_shm); @@ -80,17 +97,40 @@ Metric::LoadFromSharedMemory( std::unique_ptr labels_shm = PbString::LoadFromSharedMemory( shm_pool, custom_metric_shm_ptr->labels_shm_handle); - return std::unique_ptr(new Metric(custom_metric_shm, labels_shm)); + std::unique_ptr buckets_shm = nullptr; + if (custom_metric_shm_ptr->buckets_shm_handle != 0) { + buckets_shm = PbMemory::LoadFromSharedMemory( + shm_pool, custom_metric_shm_ptr->buckets_shm_handle, + false /* open_cuda_handle */); + } + + return std::unique_ptr( + new Metric(custom_metric_shm, labels_shm, buckets_shm)); } Metric::Metric( AllocatedSharedMemory& custom_metric_shm, - std::unique_ptr& labels_shm) + std::unique_ptr& labels_shm, + std::unique_ptr& buckets_shm) : custom_metric_shm_(std::move(custom_metric_shm)), - labels_shm_(std::move(labels_shm)) + labels_shm_(std::move(labels_shm)), buckets_shm_(std::move(buckets_shm)) { custom_metric_shm_ptr_ = custom_metric_shm_.data_.get(); + + // FIXME: This constructor is called during each + // set/increment/observe/get_value call. It only needs the pointers. labels_ = labels_shm_->String(); + if (buckets_shm_ != nullptr) { // Histogram + size_t bucket_size = buckets_shm_->ByteSize() / sizeof(double); + std::vector buckets; + buckets.reserve(bucket_size); + for (size_t i = 0; i < bucket_size; ++i) { + buckets.emplace_back( + reinterpret_cast(buckets_shm_->DataPtr())[i]); + } + buckets_ = std::move(buckets); + } + operation_value_ = custom_metric_shm_ptr_->operation_value; metric_family_address_ = custom_metric_shm_ptr_->metric_family_address; metric_address_ = custom_metric_shm_ptr_->metric_address; @@ -161,6 +201,24 @@ Metric::SendSetValueRequest(const double& value) } } +void +Metric::SendObserveRequest(const double& value) +{ + try { + CheckIfCleared(); + std::unique_ptr& stub = Stub::GetOrCreateInstance(); + operation_value_ = value; + SaveToSharedMemory(stub->ShmPool()); + AllocatedSharedMemory custom_metrics_shm; + stub->SendMessage( + custom_metrics_shm, PYTHONSTUB_MetricRequestObserve, shm_handle_); + } + catch (const PythonBackendException& pb_exception) { + throw PythonBackendException( + "Failed to observe metric value: " + std::string(pb_exception.what())); + } +} + double Metric::SendGetValueRequest() { @@ -222,14 +280,35 @@ Metric::InitializeTritonMetric() { std::vector labels_params; ParseLabels(labels_params, labels_); + TRITONSERVER_MetricKind kind; + THROW_IF_TRITON_ERROR(TRITONSERVER_GetMetricFamilyKind( + reinterpret_cast(metric_family_address_), + &kind)); + TRITONSERVER_MetricArgs* args = nullptr; + switch (kind) { + case TRITONSERVER_METRIC_KIND_COUNTER: + case TRITONSERVER_METRIC_KIND_GAUGE: + break; + case TRITONSERVER_METRIC_KIND_HISTOGRAM: { + const std::vector& buckets = buckets_.value(); + THROW_IF_TRITON_ERROR(TRITONSERVER_MetricArgsNew(&args)); + THROW_IF_TRITON_ERROR(TRITONSERVER_MetricArgsSetHistogram( + args, buckets.data(), buckets.size())); + break; + } + default: + break; + } + TRITONSERVER_Metric* triton_metric = nullptr; - THROW_IF_TRITON_ERROR(TRITONSERVER_MetricNew( + THROW_IF_TRITON_ERROR(TRITONSERVER_MetricNewWithArgs( &triton_metric, reinterpret_cast(metric_family_address_), - labels_params.data(), labels_params.size())); + labels_params.data(), labels_params.size(), args)); for (const auto label : labels_params) { TRITONSERVER_ParameterDelete(const_cast(label)); } + THROW_IF_TRITON_ERROR(TRITONSERVER_MetricArgsDelete(args)); return reinterpret_cast(triton_metric); } @@ -262,6 +341,8 @@ Metric::HandleMetricOperation( Increment(operation_value_); } else if (command_type == PYTHONSTUB_MetricRequestSet) { SetValue(operation_value_); + } else if (command_type == PYTHONSTUB_MetricRequestObserve) { + Observe(operation_value_); } else { throw PythonBackendException("Unknown metric operation"); } @@ -281,6 +362,13 @@ Metric::SetValue(const double& value) THROW_IF_TRITON_ERROR(TRITONSERVER_MetricSet(triton_metric, value)); } +void +Metric::Observe(const double& value) +{ + auto triton_metric = reinterpret_cast(metric_address_); + THROW_IF_TRITON_ERROR(TRITONSERVER_MetricObserve(triton_metric, value)); +} + double Metric::GetValue() { diff --git a/src/metric.h b/src/metric.h index 197e8ce9..cd54ca54 100644 --- a/src/metric.h +++ b/src/metric.h @@ -1,4 +1,4 @@ -// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -26,9 +26,11 @@ #pragma once +#include #include #include "ipc_message.h" +#include "pb_memory.h" #include "pb_string.h" #include "pb_utils.h" @@ -47,6 +49,8 @@ namespace triton { namespace backend { namespace python { struct MetricShm { // The shared memory handle of the labels in PbString format. bi::managed_external_buffer::handle_t labels_shm_handle; + // The shared memory handle of the buckets in PbMemory format. + bi::managed_external_buffer::handle_t buckets_shm_handle; // The value used for incrementing or setting the metric. double operation_value; // The address of the TRITONSERVER_Metric object. @@ -58,7 +62,10 @@ struct MetricShm { class Metric { public: - Metric(const std::string& labels, void* metric_family_address); + Metric( + const std::string& labels, + std::optional> buckets, + void* metric_family_address); ~Metric(); @@ -97,6 +104,10 @@ class Metric { /// \param value The value to set the metric to. void SendSetValueRequest(const double& value); + /// Send the request to the parent process to observe the value to the metric. + /// \param value The value to set the metric to. + void SendObserveRequest(const double& value); + /// Send the request to the parent process to get the value of the metric. /// \return Returns the value of the metric. double SendGetValueRequest(); @@ -132,6 +143,10 @@ class Metric { /// \param value The value to set the metric to. void SetValue(const double& value); + /// Use Triton C API to sample the observation to the metric. + /// \param value The value to sample observation to the metric. + void Observe(const double& value); + /// Use Triton C API to get the value of the metric. double GetValue(); @@ -146,10 +161,14 @@ class Metric { // The private constructor for creating a Metric object from shared memory. Metric( AllocatedSharedMemory& custom_metric_shm, - std::unique_ptr& labels_shm); + std::unique_ptr& labels_shm, + std::unique_ptr& buckets); // The labels of the metric, which is the identifier of the metric. std::string labels_; + // Monotonically increasing values representing bucket boundaries for creating + // histogram metric. + std::optional> buckets_; // The value used for incrementing or setting the metric. double operation_value_; // The address of the TRITONSERVER_Metric object. @@ -168,6 +187,7 @@ class Metric { MetricShm* custom_metric_shm_ptr_; bi::managed_external_buffer::handle_t shm_handle_; std::unique_ptr labels_shm_; + std::unique_ptr buckets_shm_; }; }}}; // namespace triton::backend::python diff --git a/src/metric_family.cc b/src/metric_family.cc index 77e8aedf..222a0e23 100644 --- a/src/metric_family.cc +++ b/src/metric_family.cc @@ -1,4 +1,4 @@ -// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -166,19 +166,39 @@ MetricFamily::SendCreateMetricFamilyRequest() } std::shared_ptr -MetricFamily::CreateMetric(const py::object& labels) +MetricFamily::CreateMetric(const py::object& labels, const py::object& buckets) { if (!labels.is_none()) { if (!py::isinstance(labels)) { throw PythonBackendException( - "Failed to create metric. Labels must be a " - "dictionary."); + "Failed to create metric. Labels must be a dictionary."); } } py::module json = py::module_::import("json"); std::string labels_str = std::string(py::str(json.attr("dumps")(labels))); - auto metric = std::make_shared(labels_str, metric_family_address_); + + std::optional> buckets_vec; + if (!buckets.is_none()) { + if (!py::isinstance(buckets)) { + throw PythonBackendException( + "Failed to create metric. Buckets must be a list."); + } + if (kind_ == kCounter || kind_ == kGauge) { + throw PythonBackendException( + "Failed to create metric. Unexpected buckets found."); + } + buckets_vec = buckets.cast>(); + } else { + if (kind_ == kHistogram) { + throw PythonBackendException( + "Failed to create metric. Missing required buckets."); + } + buckets_vec = std::nullopt; + } + + auto metric = + std::make_shared(labels_str, buckets_vec, metric_family_address_); { std::lock_guard lock(metric_map_mu_); metric_map_.insert({metric->MetricAddress(), metric}); @@ -205,6 +225,8 @@ MetricFamily::ToTritonServerMetricKind(const MetricKind& kind) return TRITONSERVER_METRIC_KIND_COUNTER; case kGauge: return TRITONSERVER_METRIC_KIND_GAUGE; + case kHistogram: + return TRITONSERVER_METRIC_KIND_HISTOGRAM; default: throw PythonBackendException("Unknown metric kind"); } diff --git a/src/metric_family.h b/src/metric_family.h index 04374a68..2b5f86ab 100644 --- a/src/metric_family.h +++ b/src/metric_family.h @@ -1,4 +1,4 @@ -// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -97,8 +97,11 @@ class MetricFamily { /// Create a metric from the metric family and store it in the metric map. /// \param labels The labels of the metric. + /// \param buckets Monotonically increasing values representing bucket + /// boundaries for creating histogram metric. /// \return Returns the shared pointer to the created metric. - std::shared_ptr CreateMetric(const py::object& labels); + std::shared_ptr CreateMetric( + const py::object& labels, const py::object& buckets); #else /// Initialize the TRITONSERVER_MetricFamily object. /// \return Returns the address of the TRITONSERVER_MetricFamily object. @@ -128,8 +131,8 @@ class MetricFamily { std::string name_; // The description of the metric family. std::string description_; - // The metric kind of the metric family. Currently only supports GAUGE and - // COUNTER. + // The metric kind of the metric family. Currently only supports GAUGE, + // COUNTER and HISTOGRAM. MetricKind kind_; // The address of the TRITONSERVER_MetricFamily object. void* metric_family_address_; diff --git a/src/pb_stub.cc b/src/pb_stub.cc index 2a6be556..007e7f29 100644 --- a/src/pb_stub.cc +++ b/src/pb_stub.cc @@ -1824,11 +1824,13 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module) py::class_>(module, "Metric") .def("increment", &Metric::SendIncrementRequest) .def("set", &Metric::SendSetValueRequest) + .def("observe", &Metric::SendObserveRequest) .def("value", &Metric::SendGetValueRequest); py::enum_(module, "MetricKind") .value("COUNTER", MetricKind::kCounter) .value("GAUGE", MetricKind::kGauge) + .value("HISTOGRAM", MetricKind::kHistogram) .export_values(); py::class_>( @@ -1839,9 +1841,11 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module) py::arg("kind").none(false)) .def( "Metric", &MetricFamily::CreateMetric, - py::arg("labels").none(true) = py::none()); + py::arg("labels").none(true) = py::none(), + py::arg("buckets").none(true) = py::none()); module.attr("MetricFamily").attr("COUNTER") = MetricKind::kCounter; module.attr("MetricFamily").attr("GAUGE") = MetricKind::kGauge; + module.attr("MetricFamily").attr("HISTOGRAM") = MetricKind::kHistogram; module.def( "load_model", &LoadModel, py::arg("model_name").none(false), diff --git a/src/pb_utils.h b/src/pb_utils.h index 1a6c2d8b..e68cfb0f 100644 --- a/src/pb_utils.h +++ b/src/pb_utils.h @@ -1,4 +1,4 @@ -// Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -171,7 +171,7 @@ struct ResponseBatch : SendMessageBase { enum LogLevel { kInfo = 0, kWarning, kError, kVerbose }; -enum MetricKind { kCounter = 0, kGauge }; +enum MetricKind { kCounter = 0, kGauge, kHistogram }; struct LogSendMessage : SendMessageBase { bi::managed_external_buffer::handle_t filename; diff --git a/src/python_be.cc b/src/python_be.cc index 2212176d..761abdbf 100644 --- a/src/python_be.cc +++ b/src/python_be.cc @@ -758,7 +758,8 @@ ModelInstanceState::StubToParentMQMonitor() case PYTHONSTUB_MetricRequestDelete: case PYTHONSTUB_MetricRequestValue: case PYTHONSTUB_MetricRequestIncrement: - case PYTHONSTUB_MetricRequestSet: { + case PYTHONSTUB_MetricRequestSet: + case PYTHONSTUB_MetricRequestObserve: { ProcessMetricRequest(message); break; } @@ -978,6 +979,7 @@ ModelInstanceState::ProcessMetricRequest( } case PYTHONSTUB_MetricRequestIncrement: case PYTHONSTUB_MetricRequestSet: + case PYTHONSTUB_MetricRequestObserve: case PYTHONSTUB_MetricRequestValue: { metric->HandleMetricOperation(metrics_message_ptr, command); break;