From 5c1d217d780adb9ff622ef6e6fc4febfd652f73a Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Thu, 4 Aug 2022 14:44:06 -0500 Subject: [PATCH 01/31] changes for new triton version --- HeterogeneousCore/SonicTriton/src/TritonData.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/src/TritonData.cc b/HeterogeneousCore/SonicTriton/src/TritonData.cc index d01d397349a78..c2af2c0fbb607 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonData.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonData.cc @@ -4,11 +4,11 @@ #include "FWCore/MessageLogger/interface/MessageLogger.h" #include "model_config.pb.h" -#include "model_config.h" +#include "triton/common/model_config.h" #include -namespace ni = nvidia::inferenceserver; +namespace tco = triton::common; namespace tc = triton::client; //dims: kept constant, represents config.pbtxt parameters of model (converted from google::protobuf::RepeatedField to vector) @@ -32,8 +32,8 @@ TritonData::TritonData(const std::string& name, variableDims_(anyNeg(shape_)), productDims_(variableDims_ ? -1 : dimProduct(shape_)), dname_(model_info.datatype()), - dtype_(ni::ProtocolStringToDataType(dname_)), - byteSize_(ni::GetDataTypeByteSize(dtype_)), + dtype_(tco::ProtocolStringToDataType(dname_)), + byteSize_(tco::GetDataTypeByteSize(dtype_)), totalByteSize_(0) { //create input or output object IO* iotmp; From 9d0ab5683f170aeceffab403c11fd1ff7a8eb3d7 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Fri, 15 Apr 2022 19:25:52 -0500 Subject: [PATCH 02/31] combine shape/request info into TritonDataEntry for multi-request ragged batching (WIP) --- .../SonicTriton/interface/TritonData.h | 51 ++++++-- .../SonicTriton/src/TritonData.cc | 118 +++++++++++++----- 2 files changed, 125 insertions(+), 44 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/interface/TritonData.h b/HeterogeneousCore/SonicTriton/interface/TritonData.h index 77affcb8e5228..c2128accf21c7 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonData.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonData.h @@ -55,8 +55,8 @@ class TritonData { TritonData(const std::string& name, const TensorMetadata& model_info, TritonClient* client, const std::string& pid); //some members can be modified - void setShape(const ShapeType& newShape); - void setShape(unsigned loc, int64_t val); + void setShape(const ShapeType& newShape, unsigned entry=0); + void setShape(unsigned loc, int64_t val, unsigned entry=0); //io accessors template @@ -68,7 +68,7 @@ class TritonData { TritonOutput
fromServer() const; //const accessors - const ShapeView& shape() const { return shape_; } + const ShapeView& shape(unsigned entry=0) const { return entries_.at(entry).shape_; } int64_t byteSize() const { return byteSize_; } const std::string& dname() const { return dname_; } unsigned batchSize() const { return batchSize_; } @@ -77,7 +77,7 @@ class TritonData { bool variableDims() const { return variableDims_; } int64_t sizeDims() const { return productDims_; } //default to dims if shape isn't filled - int64_t sizeShape() const { return variableDims_ ? dimProduct(shape_) : sizeDims(); } + int64_t sizeShape(unsigned entry=0) const { return variableDims_ ? dimProduct(entries_.at(entry).shape_) : sizeDims(); } private: friend class TritonClient; @@ -88,12 +88,45 @@ class TritonData { friend class TritonGpuShmResource; #endif + //group together all relevant information for a single request + //helpful for organizing multi-request ragged batching case + class TritonDataEntry { + public: + //constructors + TritonDataEntry(const ShapeType& dims, bool noBatch, const std::string& name, const std::string& dname)) + : fullShape_(dims), + shape_(fullShape_.begin() + (noBatch ? 0 : 1), fullShape_.end()), + sizeShape_(0), + byteSizePerBatch_(0) { + //create input or output object + IO* iotmp; + createObject(&iotmp, name, dname); + data_.reset(iotmp); + } + + private: + friend class TritonData; + + //accessors + void createObject(IO** ioptr, const std::string& name, const std::string& dname)); + void computeSizes(int64_t byteSize); + void resetSizes(); + + //members + ShapeType fullShape_; + ShapeView shape_; + size_t sizeShape_; + size_t byteSizePerBatch_; + std::shared_ptr data_; + }; + //private accessors only used internally or by client + void checkShm() {} unsigned fullLoc(unsigned loc) const { return loc + (noBatch_ ? 0 : 1); } void setBatchSize(unsigned bsize); void reset(); void setResult(std::shared_ptr result) { result_ = result; } - IO* data() { return data_.get(); } + IO* data(unsigned entry=0) { return entries[entry].data_.get(); } void updateMem(size_t size); void computeSizes(); void resetSizes(); @@ -112,32 +145,28 @@ class TritonData { int64_t dimProduct(const ShapeView& vec) const { return std::accumulate(vec.begin(), vec.end(), 1, std::multiplies()); } - void createObject(IO** ioptr); //generates a unique id number for each instance of the class unsigned uid() const { static std::atomic uid{0}; return ++uid; } std::string xput() const; + void addEntry(unsigned entry); //members std::string name_; - std::shared_ptr data_; TritonClient* client_; bool useShm_; std::string shmName_; const ShapeType dims_; bool noBatch_; unsigned batchSize_; - ShapeType fullShape_; - ShapeView shape_; bool variableDims_; int64_t productDims_; std::string dname_; inference::DataType dtype_; int64_t byteSize_; - size_t sizeShape_; - size_t byteSizePerBatch_; + std::vector> entries_; size_t totalByteSize_; //can be modified in otherwise-const fromServer() method in TritonMemResource::copyOutput(): //TritonMemResource holds a non-const pointer to an instance of this class diff --git a/HeterogeneousCore/SonicTriton/src/TritonData.cc b/HeterogeneousCore/SonicTriton/src/TritonData.cc index c2af2c0fbb607..b533f60bb2730 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonData.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonData.cc @@ -27,30 +27,49 @@ TritonData::TritonData(const std::string& name, dims_(model_info.shape().begin(), model_info.shape().end()), noBatch_(client_->noBatch()), batchSize_(0), - fullShape_(dims_), - shape_(fullShape_.begin() + (noBatch_ ? 0 : 1), fullShape_.end()), - variableDims_(anyNeg(shape_)), - productDims_(variableDims_ ? -1 : dimProduct(shape_)), dname_(model_info.datatype()), dtype_(tco::ProtocolStringToDataType(dname_)), byteSize_(tco::GetDataTypeByteSize(dtype_)), totalByteSize_(0) { - //create input or output object - IO* iotmp; - createObject(&iotmp); - data_.reset(iotmp); + //initialize first shape entry + addEntry(1); + //one-time computation of some shape info + variableDims_ = anyNeg(entries_.front().shape_)); + productDims_ = variableDims_ ? -1 : dimProduct(entries_.front().shape_); + checkShm(); } template <> -void TritonInputData::createObject(tc::InferInput** ioptr) { - tc::InferInput::Create(ioptr, name_, fullShape_, dname_); +void TritonOutputData::checkShm() { + //another specialization for output: can't use shared memory if output size is not known + useShm_ &= !variableDims_; +} + +template +void TritonData::addEntry(unsigned entry) { + if (entry > entries_.size()) { + entries_.reserve(entry+1); + for (unsigned i = entries_.size(); i < entry+1; ++i) { + entries_.emplace_back(dims_, noBatch_, name_, dname_); + //todo: should each entry have its own batch size? + //for now, restrict multi-request mode for ragged batching to batch size = 1 + if (entry>1 and !noBatch_) + entries_.back().fullShape_[0] = 1; + } + //go back and fix the first one + if (entry>1 and !noBatch_) + entries_[0].fullShape_[0] = 1; + } } template <> -void TritonOutputData::createObject(tc::InferRequestedOutput** ioptr) { - tc::InferRequestedOutput::Create(ioptr, name_); - //another specialization for output: can't use shared memory if output size is not known - useShm_ &= !variableDims_; +void TritonInputData::TritonDataEntry::createObject(tc::InferInput** ioptr, const std::string& name, const std::string& dname) { + tc::InferInput::Create(ioptr, name, fullShape_, dname); +} + +template <> +void TritonOutputData::TritonDataEntry::createObject(tc::InferRequestedOutput** ioptr, const std::string& name, const std::string& dname)) { + tc::InferRequestedOutput::Create(ioptr, name); } template <> @@ -70,24 +89,27 @@ tc::InferenceServerGrpcClient* TritonData::client() { //setters template -void TritonData::setShape(const TritonData::ShapeType& newShape) { +void TritonData::setShape(const TritonData::ShapeType& newShape, unsigned entry) { + addEntry(entry); for (unsigned i = 0; i < newShape.size(); ++i) { - setShape(i, newShape[i]); + setShape(i, newShape[i], entry); } } template -void TritonData::setShape(unsigned loc, int64_t val) { +void TritonData::setShape(unsigned loc, int64_t val, unsigned entry) { + addEntry(entry); + unsigned locFull = fullLoc(loc); //check boundary - if (locFull >= fullShape_.size()) + if (locFull >= entries_[entry].fullShape_.size()) throw cms::Exception("TritonDataError") - << name_ << " setShape(): dimension " << locFull << " out of bounds (" << fullShape_.size() << ")"; + << name_ << " setShape(): dimension " << locFull << " out of bounds (" << entries_[entry].fullShape_.size() << ")"; - if (val != fullShape_[locFull]) { + if (val != entries_[entry].fullShape_[locFull]) { if (dims_[locFull] == -1) - fullShape_[locFull] = val; + entries_[entry].fullShape_[entry][locFull] = val; else throw cms::Exception("TritonDataError") << name_ << " setShape(): attempt to change value of non-variable shape dimension " << loc; @@ -97,20 +119,39 @@ void TritonData::setShape(unsigned loc, int64_t val) { template void TritonData::setBatchSize(unsigned bsize) { batchSize_ = bsize; - if (!noBatch_) - fullShape_[0] = batchSize_; + if (!noBatch_) { + if (entries_[0].fullShape_.size()==1) + entries_[0].fullShape_ = batchSize_; + else + throw cms::Exception("TritonDataError") << "attempt to set batch size to " << bsize << " when ragged batching is in use"; + } +} + +template +void TritonData::TritonDataEntry::computeSizes(int64_t shapeSize, int64_t byteSize) { + sizeShape_ = shapeSize; + byteSizePerBatch_ = byteSize * sizeShape_; } template void TritonData::computeSizes() { - sizeShape_ = sizeShape(); - byteSizePerBatch_ = byteSize_ * sizeShape_; - totalByteSize_ = byteSizePerBatch_ * batchSize_; + for (unsigned i = 0; i < entries_.size(); ++i) { + entries_[i].computeSizes(sizeShape(i), byteSize_); + totalByteSize_ += entries_[i].byteSizePerBatch_ * batchSize_; + } } + template -void TritonData::resetSizes() { +void TritonData::TritonDataEntry::resetSizes() { sizeShape_ = 0; byteSizePerBatch_ = 0; +} + +template +void TritonData::resetSizes() { + for (unsigned i = 0; i < entries_.size(); ++i) { + entries_[i].resetSizes(); + } totalByteSize_ = 0; } @@ -168,21 +209,32 @@ void TritonInputData::toServer(TritonInputContainer
ptr) { const auto& data_in = *ptr; //check batch size - if (data_in.size() != batchSize_) { + if (entries_.size()==1 and data_in.size() != batchSize_)) { throw cms::Exception("TritonDataError") << name_ << " toServer(): input vector has size " << data_in.size() << " but specified batch size is " << batchSize_; } - - //shape must be specified for variable dims or if batch size changes - data_->SetShape(fullShape_); + else if (entries_.size()>1 and data_in.size() != entries_.size()) { + throw cms::Exception("TritonDataError") << name_ << " toServer(): input vector has size " << data_in.size() + << " but specified entries size is " << entries_.size(); + } //check type checkType
(); computeSizes(); updateMem(totalByteSize_); - for (unsigned i0 = 0; i0 < batchSize_; ++i0) { - memResource_->copyInput(data_in[i0].data(), i0 * byteSizePerBatch_); + + unsigned counter = 0; + for (unsigned i = 0; i < entries_.size(); ++i) { + auto& entry = entries_[i]; + + //shape must be specified for variable dims or if batch size changes + entry.data_->SetShape(entry.fullShape_); + + for (unsigned i0 = 0; i0 < batchSize_; ++i0) { + memResource_->copyInput(data_in[counter].data(), counter * entry.byteSizePerBatch_); + ++counter; + } } memResource_->set(); From e9f8c618ca9ae6d3e715957f545f503b563d24d0 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Mon, 18 Apr 2022 16:26:09 -0500 Subject: [PATCH 03/31] finish initial propagation (still WIP) --- .../SonicTriton/interface/TritonClient.h | 8 +- .../SonicTriton/interface/TritonData.h | 9 +- .../SonicTriton/src/TritonClient.cc | 118 ++++++++++++------ .../SonicTriton/src/TritonData.cc | 48 +++---- 4 files changed, 100 insertions(+), 83 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/interface/TritonClient.h b/HeterogeneousCore/SonicTriton/interface/TritonClient.h index a44e3697742f7..e780bc4136116 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonClient.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonClient.h @@ -50,7 +50,7 @@ class TritonClient : public SonicClient { protected: //helpers - void getResults(std::shared_ptr results); + void getResults(std::vector& results); void evaluate() override; template bool handle_exception(F&& call); @@ -71,13 +71,9 @@ class TritonClient : public SonicClient { grpc_compression_algorithm compressionAlgo_; triton::client::Headers headers_; - //IO pointers for triton - std::vector inputsTriton_; - std::vector outputsTriton_; - std::unique_ptr client_; //stores timeout, model name and version - triton::client::InferOptions options_; + std::vector options_; private: friend TritonInputData; diff --git a/HeterogeneousCore/SonicTriton/interface/TritonData.h b/HeterogeneousCore/SonicTriton/interface/TritonData.h index c2128accf21c7..caef6f206d61c 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonData.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonData.h @@ -106,11 +106,11 @@ class TritonData { private: friend class TritonData; + friend class TritonClient; //accessors void createObject(IO** ioptr, const std::string& name, const std::string& dname)); void computeSizes(int64_t byteSize); - void resetSizes(); //members ShapeType fullShape_; @@ -118,6 +118,7 @@ class TritonData { size_t sizeShape_; size_t byteSizePerBatch_; std::shared_ptr data_; + std::shared_ptr result_; }; //private accessors only used internally or by client @@ -125,11 +126,10 @@ class TritonData { unsigned fullLoc(unsigned loc) const { return loc + (noBatch_ ? 0 : 1); } void setBatchSize(unsigned bsize); void reset(); - void setResult(std::shared_ptr result) { result_ = result; } - IO* data(unsigned entry=0) { return entries[entry].data_.get(); } + void setResult(Result* result, unsigned entry=0) { entries_[entry].result_ = std::make_shared(result); } + IO* data(unsigned entry=0) { return entries_[entry].data_.get(); } void updateMem(size_t size); void computeSizes(); - void resetSizes(); triton::client::InferenceServerGrpcClient* client(); template void checkType() const { @@ -173,7 +173,6 @@ class TritonData { //so that TritonOutputGpuShmResource can store data here std::shared_ptr holder_; std::shared_ptr> memResource_; - std::shared_ptr result_; //can be modified in otherwise-const fromServer() method to prevent multiple calls CMS_SA_ALLOW mutable bool done_{}; }; diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc index f0d662f866d8d..f355af41a17ea 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc @@ -41,11 +41,11 @@ TritonClient::TritonClient(const edm::ParameterSet& params, const std::string& d verbose_(params.getUntrackedParameter("verbose")), useSharedMemory_(params.getUntrackedParameter("useSharedMemory")), compressionAlgo_(getCompressionAlgo(params.getUntrackedParameter("compression"))), - options_(params.getParameter("modelName")) { + options_(1,params.getParameter("modelName")) { //get appropriate server for this model edm::Service ts; const auto& server = - ts->serverInfo(options_.model_name_, params.getUntrackedParameter("preferredServer")); + ts->serverInfo(options_[0].model_name_, params.getUntrackedParameter("preferredServer")); serverType_ = server.type; if (verbose_) edm::LogInfo(fullDebugName_) << "Using server: " << server.url; @@ -60,13 +60,13 @@ TritonClient::TritonClient(const edm::ParameterSet& params, const std::string& d "TritonClient(): unable to create inference context"); //set options - options_.model_version_ = params.getParameter("modelVersion"); + options_[0].model_version_ = params.getParameter("modelVersion"); //convert seconds to microseconds - options_.client_timeout_ = params.getUntrackedParameter("timeout") * 1e6; + options_[0].client_timeout_ = params.getUntrackedParameter("timeout") * 1e6; //config needed for batch size inference::ModelConfigResponse modelConfigResponse; - TRITON_THROW_IF_ERROR(client_->ModelConfig(&modelConfigResponse, options_.model_name_, options_.model_version_), + TRITON_THROW_IF_ERROR(client_->ModelConfig(&modelConfigResponse, options_[0].model_name_, options_[0].model_version_), "TritonClient(): unable to get model config"); inference::ModelConfig modelConfig(modelConfigResponse.config()); @@ -80,7 +80,7 @@ TritonClient::TritonClient(const edm::ParameterSet& params, const std::string& d //get model info inference::ModelMetadataResponse modelMetadata; - TRITON_THROW_IF_ERROR(client_->ModelMetadata(&modelMetadata, options_.model_name_, options_.model_version_), + TRITON_THROW_IF_ERROR(client_->ModelMetadata(&modelMetadata, options_[0].model_name_, options_[0].model_version_), "TritonClient(): unable to get model metadata"); //get input and output (which know their sizes) @@ -108,14 +108,12 @@ TritonClient::TritonClient(const edm::ParameterSet& params, const std::string& d if (verbose_) io_msg << "Model inputs: " << "\n"; - inputsTriton_.reserve(nicInputs.size()); for (const auto& nicInput : nicInputs) { const auto& iname = nicInput.name(); auto [curr_itr, success] = input_.emplace(std::piecewise_construct, std::forward_as_tuple(iname), std::forward_as_tuple(iname, nicInput, this, ts->pid())); auto& curr_input = curr_itr->second; - inputsTriton_.push_back(curr_input.data()); if (verbose_) { io_msg << " " << iname << " (" << curr_input.dname() << ", " << curr_input.byteSize() << " b) : " << triton_utils::printColl(curr_input.shape()) << "\n"; @@ -130,7 +128,6 @@ TritonClient::TritonClient(const edm::ParameterSet& params, const std::string& d if (verbose_) io_msg << "Model outputs: " << "\n"; - outputsTriton_.reserve(nicOutputs.size()); for (const auto& nicOutput : nicOutputs) { const auto& oname = nicOutput.name(); if (!s_outputs.empty() and s_outputs.find(oname) == s_outputs.end()) @@ -139,7 +136,6 @@ TritonClient::TritonClient(const edm::ParameterSet& params, const std::string& d std::forward_as_tuple(oname), std::forward_as_tuple(oname, nicOutput, this, ts->pid())); auto& curr_output = curr_itr->second; - outputsTriton_.push_back(curr_output.data()); if (verbose_) { io_msg << " " << oname << " (" << curr_output.dname() << ", " << curr_output.byteSize() << " b) : " << triton_utils::printColl(curr_output.shape()) << "\n"; @@ -159,8 +155,8 @@ TritonClient::TritonClient(const edm::ParameterSet& params, const std::string& d //print model info std::stringstream model_msg; if (verbose_) { - model_msg << "Model name: " << options_.model_name_ << "\n" - << "Model version: " << options_.model_version_ << "\n" + model_msg << "Model name: " << options_[0].model_name_ << "\n" + << "Model version: " << options_[0].model_version_ << "\n" << "Model max batch size: " << (noBatch_ ? 0 : maxBatchSize_) << "\n"; edm::LogInfo(fullDebugName_) << model_msg.str() << io_msg.str(); } @@ -222,19 +218,23 @@ bool TritonClient::handle_exception(F&& call) { } } -void TritonClient::getResults(std::shared_ptr results) { +void TritonClient::getResults(std::vector& results) { for (auto& [oname, output] : output_) { - //set shape here before output becomes const - if (output.variableDims()) { - std::vector tmp_shape; - TRITON_THROW_IF_ERROR(results->Shape(oname, &tmp_shape), "getResults(): unable to get output shape for " + oname); - if (!noBatch_) - tmp_shape.erase(tmp_shape.begin()); - output.setShape(tmp_shape); - output.computeSizes(); + for (unsigned i = 0; i < results.size(); ++i) { + auto result = results[i]; + //set shape here before output becomes const + if (output.variableDims()) { + std::vector tmp_shape; + TRITON_THROW_IF_ERROR(result->Shape(oname, &tmp_shape), "getResults(): unable to get output shape for " + oname); + if (!noBatch_) + tmp_shape.erase(tmp_shape.begin()); + output.setShape(tmp_shape,i); + } + //extend lifetime + output.setResult(result,i); } - //extend lifetime - output.setResult(results); + //compute size after getting all result entries + output.computeSizes(); } } @@ -246,8 +246,44 @@ void TritonClient::evaluate() { return; } - //set up shared memory for output + //set up input pointers for triton (generalized for multi-request ragged batching case) + //one vector per request + std::vector> inputsTriton; + unsigned nEntries = input_.begin()->second.entries_.size(); + inputsTriton.resize(nEntries); + for (auto& inputTriton : inputsTriton) { + inputTriton.reserve(inputs_.size()); + } + //consistency check + //todo: move addEntry to a TritonClient function that auto loops over all inputs? & outputs? auto success = handle_exception([&]() { + std::vector nEntriesAll; + nEntriesAll.reserve(input_.size()); + for (auto& [iname, input] : input_) { + nEntriesAll.push_back(input.entries_.size()); + } + if (std::adjacent_find(nEntriesAll.begin(), nEntriesAll.end(), std::not_equal_to<>()) != nEntriesAll.end()) + throw cms::Exception("InconsistentInput") << "Different numbers of entries among different inputs: " << printColl(nEntriesAll); + }); + if (!success) + return; + for (auto& [iname, input] : input_) { + for (unsigned i = 0; i < nEntries; ++i){ + inputsTriton[i].push_back(input.data(i)); + } + } + + //set up output pointers accordingly (same number of entries as input) + std::vector> outputsTriton_; + for (auto& [oname, output] : output_) { + output.addEntry(nEntries); + for (unsigned i = 0; i < nEntries; ++i){ + outputsTriton[i].push_back(output.data(i)); + } + } + + //set up shared memory for output + success = handle_exception([&]() { for (auto& element : output_) { element.second.prepare(); } @@ -268,18 +304,19 @@ void TritonClient::evaluate() { //non-blocking call success = handle_exception([&]() { TRITON_THROW_IF_ERROR( - client_->AsyncInfer( - [start_status, this](tc::InferResult* results) { - //get results - std::shared_ptr results_ptr(results); - auto success = handle_exception( - [&]() { TRITON_THROW_IF_ERROR(results_ptr->RequestStatus(), "evaluate(): unable to get result"); }); - if (!success) - return; + client_->AsyncInferMulti( + [start_status, this](std::vector results) { + //check results + for (auto ptr : results){ + auto success = handle_exception( + [&]() { TRITON_THROW_IF_ERROR(ptr->RequestStatus(), "evaluate(): unable to get result(s)"); }); + if (!success) + return; + } if (verbose()) { inference::ModelStatistics end_status; - success = handle_exception([&]() { end_status = getServerSideStatus(); }); + auto success = handle_exception([&]() { end_status = getServerSideStatus(); }); if (!success) return; @@ -288,7 +325,7 @@ void TritonClient::evaluate() { } //check result - success = handle_exception([&]() { getResults(results_ptr); }); + auto success = handle_exception([&]() { getResults(results); }); if (!success) return; @@ -296,8 +333,8 @@ void TritonClient::evaluate() { finish(true); }, options_, - inputsTriton_, - outputsTriton_, + inputsTriton, + outputsTriton, headers_, compressionAlgo_), "evaluate(): unable to launch async run"); @@ -306,10 +343,10 @@ void TritonClient::evaluate() { return; } else { //blocking call - tc::InferResult* results; + std::vector results; success = handle_exception([&]() { TRITON_THROW_IF_ERROR( - client_->Infer(&results, options_, inputsTriton_, outputsTriton_, headers_, compressionAlgo_), + client_->InferMulti(&results, options_, inputsTriton, outputsTriton, headers_, compressionAlgo_), "evaluate(): unable to run and/or get result"); }); if (!success) @@ -325,8 +362,7 @@ void TritonClient::evaluate() { reportServerSideStats(stats); } - std::shared_ptr results_ptr(results); - success = handle_exception([&]() { getResults(results_ptr); }); + success = handle_exception([&]() { getResults(results); }); if (!success) return; @@ -395,7 +431,7 @@ TritonClient::ServerSideStats TritonClient::summarizeServerStats(const inference inference::ModelStatistics TritonClient::getServerSideStatus() const { if (verbose_) { inference::ModelStatisticsResponse resp; - TRITON_THROW_IF_ERROR(client_->ModelInferenceStatistics(&resp, options_.model_name_, options_.model_version_), + TRITON_THROW_IF_ERROR(client_->ModelInferenceStatistics(&resp, options_[0].model_name_, options_[0].model_version_), "getServerSideStatus(): unable to get model statistics"); return *(resp.model_stats().begin()); } diff --git a/HeterogeneousCore/SonicTriton/src/TritonData.cc b/HeterogeneousCore/SonicTriton/src/TritonData.cc index b533f60bb2730..89f2c4d7af9c8 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonData.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonData.cc @@ -57,8 +57,9 @@ void TritonData::addEntry(unsigned entry) { entries_.back().fullShape_[0] = 1; } //go back and fix the first one - if (entry>1 and !noBatch_) - entries_[0].fullShape_[0] = 1; + if (entry>1 and !noBatch_) { + batchSize_ = entries_[0].fullShape_[0] = 1; + } } } @@ -141,20 +142,6 @@ void TritonData::computeSizes() { } } -template -void TritonData::TritonDataEntry::resetSizes() { - sizeShape_ = 0; - byteSizePerBatch_ = 0; -} - -template -void TritonData::resetSizes() { - for (unsigned i = 0; i < entries_.size(); ++i) { - entries_[i].resetSizes(); - } - totalByteSize_ = 0; -} - //create a memory resource if none exists; //otherwise, reuse the memory resource, resizing it if necessary template @@ -269,10 +256,16 @@ TritonOutput
TritonOutputData::fromServer() const { const DT* r1 = reinterpret_cast(r0); TritonOutput
dataOut; - dataOut.reserve(batchSize_); - for (unsigned i0 = 0; i0 < batchSize_; ++i0) { - auto offset = i0 * sizeShape_; - dataOut.emplace_back(r1 + offset, r1 + offset + sizeShape_); + dataOut.reserve(std::max(batchSize_, entries_.size())); + unsigned counter = 0; + for (unsigned i = 0; i < entries_.size(); ++i) { + auto& entry = entries_[i]; + + for (unsigned i0 = 0; i0 < batchSize_; ++i0) { + auto offset = counter * entry.sizeShape_; + dataOut.emplace_back(r1 + offset, r1 + offset + sizeShape_); + ++counter; + } } done_ = true; @@ -283,23 +276,16 @@ template <> void TritonInputData::reset() { done_ = false; holder_.reset(); - data_->Reset(); - //reset shape - if (variableDims_) { - for (unsigned i = 0; i < shape_.size(); ++i) { - unsigned locFull = fullLoc(i); - fullShape_[locFull] = dims_[locFull]; - } - } - resetSizes(); + entries_.clear(); + totalByteSize_ = 0; } template <> void TritonOutputData::reset() { done_ = false; - result_.reset(); holder_.reset(); - resetSizes(); + entries_.clear(); + totalByteSize_ = 0; } //explicit template instantiation declarations From f570f3cb8efa310a66597ff22347f1e71febc649 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Mon, 18 Apr 2022 16:41:20 -0500 Subject: [PATCH 04/31] simplify synchronization of nEntries across inputs/outputs --- .../SonicTriton/interface/TritonClient.h | 1 + .../SonicTriton/interface/TritonData.h | 1 + .../SonicTriton/src/TritonClient.cc | 37 +++++++++---------- .../SonicTriton/src/TritonData.cc | 24 ++++++------ 4 files changed, 33 insertions(+), 30 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/interface/TritonClient.h b/HeterogeneousCore/SonicTriton/interface/TritonClient.h index e780bc4136116..50c4c5ee83642 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonClient.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonClient.h @@ -81,6 +81,7 @@ class TritonClient : public SonicClient { //private accessors only used by data auto client() { return client_.get(); } + void addEntry(unsigned entry); }; #endif diff --git a/HeterogeneousCore/SonicTriton/interface/TritonData.h b/HeterogeneousCore/SonicTriton/interface/TritonData.h index caef6f206d61c..d6e891c10eea2 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonData.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonData.h @@ -152,6 +152,7 @@ class TritonData { } std::string xput() const; void addEntry(unsigned entry); + void addEntryImpl(unsigned entry); //members std::string name_; diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc index f355af41a17ea..d3cf1dca0d4eb 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc @@ -189,6 +189,17 @@ bool TritonClient::setBatchSize(unsigned bsize) { } } +void TritonClient::addEntry(unsigned entry) { + for (auto& element : input_) { + element.second.addEntryImpl(entry); + } + for (auto& element : output_) { + element.second.addEntryImpl(entry); + } + if (entry>1) + setBatchSize(1); +} + void TritonClient::reset() { for (auto& element : input_) { element.second.reset(); @@ -248,42 +259,30 @@ void TritonClient::evaluate() { //set up input pointers for triton (generalized for multi-request ragged batching case) //one vector per request - std::vector> inputsTriton; unsigned nEntries = input_.begin()->second.entries_.size(); - inputsTriton.resize(nEntries); + std::vector> inputsTriton(nEntries); for (auto& inputTriton : inputsTriton) { inputTriton.reserve(inputs_.size()); } - //consistency check - //todo: move addEntry to a TritonClient function that auto loops over all inputs? & outputs? - auto success = handle_exception([&]() { - std::vector nEntriesAll; - nEntriesAll.reserve(input_.size()); - for (auto& [iname, input] : input_) { - nEntriesAll.push_back(input.entries_.size()); - } - if (std::adjacent_find(nEntriesAll.begin(), nEntriesAll.end(), std::not_equal_to<>()) != nEntriesAll.end()) - throw cms::Exception("InconsistentInput") << "Different numbers of entries among different inputs: " << printColl(nEntriesAll); - }); - if (!success) - return; for (auto& [iname, input] : input_) { for (unsigned i = 0; i < nEntries; ++i){ inputsTriton[i].push_back(input.data(i)); } } - //set up output pointers accordingly (same number of entries as input) - std::vector> outputsTriton_; + //set up output pointers similarly + std::vector> outputsTriton(nEntries); + for (auto& outputTriton : outputsTriton) { + outputTriton.reserve(outputs_.size()); + } for (auto& [oname, output] : output_) { - output.addEntry(nEntries); for (unsigned i = 0; i < nEntries; ++i){ outputsTriton[i].push_back(output.data(i)); } } //set up shared memory for output - success = handle_exception([&]() { + auto success = handle_exception([&]() { for (auto& element : output_) { element.second.prepare(); } diff --git a/HeterogeneousCore/SonicTriton/src/TritonData.cc b/HeterogeneousCore/SonicTriton/src/TritonData.cc index 89f2c4d7af9c8..e2d77d5b9543e 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonData.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonData.cc @@ -32,7 +32,7 @@ TritonData::TritonData(const std::string& name, byteSize_(tco::GetDataTypeByteSize(dtype_)), totalByteSize_(0) { //initialize first shape entry - addEntry(1); + addEntryImpl(1); //one-time computation of some shape info variableDims_ = anyNeg(entries_.front().shape_)); productDims_ = variableDims_ ? -1 : dimProduct(entries_.front().shape_); @@ -47,18 +47,16 @@ void TritonOutputData::checkShm() { template void TritonData::addEntry(unsigned entry) { + //ensures consistency among all inputs + client_->addEntry(entry); +} + +template +void TritonData::addEntryImpl(unsigned entry) { if (entry > entries_.size()) { entries_.reserve(entry+1); for (unsigned i = entries_.size(); i < entry+1; ++i) { entries_.emplace_back(dims_, noBatch_, name_, dname_); - //todo: should each entry have its own batch size? - //for now, restrict multi-request mode for ragged batching to batch size = 1 - if (entry>1 and !noBatch_) - entries_.back().fullShape_[0] = 1; - } - //go back and fix the first one - if (entry>1 and !noBatch_) { - batchSize_ = entries_[0].fullShape_[0] = 1; } } } @@ -121,8 +119,12 @@ template void TritonData::setBatchSize(unsigned bsize) { batchSize_ = bsize; if (!noBatch_) { - if (entries_[0].fullShape_.size()==1) - entries_[0].fullShape_ = batchSize_; + //should only be set to 1 in cases when entries > 1 + if (batchSize_==1 or entries_.size()==1) { + for (auto& entry : entries_) { + entry.fullShape_[0] = batchSize_; + } + } else throw cms::Exception("TritonDataError") << "attempt to set batch size to " << bsize << " when ragged batching is in use"; } From 31db492650b5d888d67fff1616d3a3fc7e4f4e15 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Mon, 18 Apr 2022 17:39:19 -0500 Subject: [PATCH 05/31] fix various mistakes/typos --- .../SonicTriton/interface/TritonData.h | 21 ++++++----- .../SonicTriton/src/TritonClient.cc | 8 ++--- .../SonicTriton/src/TritonData.cc | 36 ++++++++++--------- 3 files changed, 36 insertions(+), 29 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/interface/TritonData.h b/HeterogeneousCore/SonicTriton/interface/TritonData.h index d6e891c10eea2..2c3f7898b6aaa 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonData.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonData.h @@ -93,7 +93,7 @@ class TritonData { class TritonDataEntry { public: //constructors - TritonDataEntry(const ShapeType& dims, bool noBatch, const std::string& name, const std::string& dname)) + TritonDataEntry(const ShapeType& dims, bool noBatch, const std::string& name, const std::string& dname) : fullShape_(dims), shape_(fullShape_.begin() + (noBatch ? 0 : 1), fullShape_.end()), sizeShape_(0), @@ -109,8 +109,8 @@ class TritonData { friend class TritonClient; //accessors - void createObject(IO** ioptr, const std::string& name, const std::string& dname)); - void computeSizes(int64_t byteSize); + void createObject(IO** ioptr, const std::string& name, const std::string& dname); + void computeSizes(int64_t shapeSize, int64_t byteSize); //members ShapeType fullShape_; @@ -125,8 +125,9 @@ class TritonData { void checkShm() {} unsigned fullLoc(unsigned loc) const { return loc + (noBatch_ ? 0 : 1); } void setBatchSize(unsigned bsize); + size_t getEntrySize() const { return std::max(static_cast(batchSize_), entries_.size()); } void reset(); - void setResult(Result* result, unsigned entry=0) { entries_[entry].result_ = std::make_shared(result); } + void setResult(Result* result, unsigned entry=0) { entries_[entry].result_ = std::shared_ptr(result); } IO* data(unsigned entry=0) { return entries_[entry].data_.get(); } void updateMem(size_t size); void computeSizes(); @@ -167,7 +168,7 @@ class TritonData { std::string dname_; inference::DataType dtype_; int64_t byteSize_; - std::vector> entries_; + std::vector entries_; size_t totalByteSize_; //can be modified in otherwise-const fromServer() method in TritonMemResource::copyOutput(): //TritonMemResource holds a non-const pointer to an instance of this class @@ -185,6 +186,12 @@ using TritonOutputMap = std::unordered_map; //avoid "explicit specialization after instantiation" error template <> +void TritonInputData::TritonDataEntry::createObject(triton::client::InferInput** ioptr, const std::string& name, const std::string& dname); +template <> +void TritonOutputData::TritonDataEntry::createObject(triton::client::InferRequestedOutput** ioptr, const std::string& name, const std::string& dname); +template <> +void TritonOutputData::checkShm(); +template <> std::string TritonInputData::xput() const; template <> std::string TritonOutputData::xput() const; @@ -203,10 +210,6 @@ template <> void TritonInputData::reset(); template <> void TritonOutputData::reset(); -template <> -void TritonInputData::createObject(triton::client::InferInput** ioptr); -template <> -void TritonOutputData::createObject(triton::client::InferRequestedOutput** ioptr); //explicit template instantiation declarations extern template class TritonData; diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc index d3cf1dca0d4eb..263a77fb3d531 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc @@ -40,8 +40,8 @@ TritonClient::TritonClient(const edm::ParameterSet& params, const std::string& d : SonicClient(params, debugName, "TritonClient"), verbose_(params.getUntrackedParameter("verbose")), useSharedMemory_(params.getUntrackedParameter("useSharedMemory")), - compressionAlgo_(getCompressionAlgo(params.getUntrackedParameter("compression"))), - options_(1,params.getParameter("modelName")) { + compressionAlgo_(getCompressionAlgo(params.getUntrackedParameter("compression"))) { + options_.emplace_back(params.getParameter("modelName")); //get appropriate server for this model edm::Service ts; const auto& server = @@ -262,7 +262,7 @@ void TritonClient::evaluate() { unsigned nEntries = input_.begin()->second.entries_.size(); std::vector> inputsTriton(nEntries); for (auto& inputTriton : inputsTriton) { - inputTriton.reserve(inputs_.size()); + inputTriton.reserve(input_.size()); } for (auto& [iname, input] : input_) { for (unsigned i = 0; i < nEntries; ++i){ @@ -273,7 +273,7 @@ void TritonClient::evaluate() { //set up output pointers similarly std::vector> outputsTriton(nEntries); for (auto& outputTriton : outputsTriton) { - outputTriton.reserve(outputs_.size()); + outputTriton.reserve(output_.size()); } for (auto& [oname, output] : output_) { for (unsigned i = 0; i < nEntries; ++i){ diff --git a/HeterogeneousCore/SonicTriton/src/TritonData.cc b/HeterogeneousCore/SonicTriton/src/TritonData.cc index e2d77d5b9543e..dcbd0b942ee0c 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonData.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonData.cc @@ -34,7 +34,7 @@ TritonData::TritonData(const std::string& name, //initialize first shape entry addEntryImpl(1); //one-time computation of some shape info - variableDims_ = anyNeg(entries_.front().shape_)); + variableDims_ = anyNeg(entries_.front().shape_); productDims_ = variableDims_ ? -1 : dimProduct(entries_.front().shape_); checkShm(); } @@ -67,7 +67,7 @@ void TritonInputData::TritonDataEntry::createObject(tc::InferInput** ioptr, cons } template <> -void TritonOutputData::TritonDataEntry::createObject(tc::InferRequestedOutput** ioptr, const std::string& name, const std::string& dname)) { +void TritonOutputData::TritonDataEntry::createObject(tc::InferRequestedOutput** ioptr, const std::string& name, const std::string& dname) { tc::InferRequestedOutput::Create(ioptr, name); } @@ -108,7 +108,7 @@ void TritonData::setShape(unsigned loc, int64_t val, unsigned entry) { if (val != entries_[entry].fullShape_[locFull]) { if (dims_[locFull] == -1) - entries_[entry].fullShape_[entry][locFull] = val; + entries_[entry].fullShape_[locFull] = val; else throw cms::Exception("TritonDataError") << name_ << " setShape(): attempt to change value of non-variable shape dimension " << loc; @@ -138,6 +138,7 @@ void TritonData::TritonDataEntry::computeSizes(int64_t shapeSize, int64_t by template void TritonData::computeSizes() { + totalByteSize_ = 0; for (unsigned i = 0; i < entries_.size(); ++i) { entries_[i].computeSizes(sizeShape(i), byteSize_); totalByteSize_ += entries_[i].byteSizePerBatch_ * batchSize_; @@ -177,12 +178,15 @@ void TritonData::updateMem(size_t size) { template <> template TritonInputContainer
TritonInputData::allocate(bool reserve) { - //automatically creates a vector for each batch entry (if batch size known) - auto ptr = std::make_shared>(batchSize_); - if (reserve and !anyNeg(shape_)) { + //automatically creates a vector for each entry (if batch size or entry size known) + auto ptr = std::make_shared>(getEntrySize()); + if (reserve) { computeSizes(); - for (auto& vec : *ptr) { - vec.reserve(sizeShape_); + for (auto& entry : entries_){ + if (anyNeg(entry.shape_)) continue; + for (auto& vec : *ptr) { + vec.reserve(entry.sizeShape_); + } } } return ptr; @@ -198,7 +202,7 @@ void TritonInputData::toServer(TritonInputContainer
ptr) { const auto& data_in = *ptr; //check batch size - if (entries_.size()==1 and data_in.size() != batchSize_)) { + if (entries_.size()==1 and data_in.size() != batchSize_) { throw cms::Exception("TritonDataError") << name_ << " toServer(): input vector has size " << data_in.size() << " but specified batch size is " << batchSize_; } @@ -247,10 +251,6 @@ TritonOutput
TritonOutputData::fromServer() const { if (done_) throw cms::Exception("TritonDataError") << name_ << " fromServer() was already called for this event"; - if (!result_) { - throw cms::Exception("TritonDataError") << name_ << " fromServer(): missing result"; - } - //check type checkType
(); @@ -258,14 +258,18 @@ TritonOutput
TritonOutputData::fromServer() const { const DT* r1 = reinterpret_cast(r0); TritonOutput
dataOut; - dataOut.reserve(std::max(batchSize_, entries_.size())); + dataOut.reserve(getEntrySize()); unsigned counter = 0; for (unsigned i = 0; i < entries_.size(); ++i) { - auto& entry = entries_[i]; + const auto& entry = entries_[i]; + + if (!entry.result_) { + throw cms::Exception("TritonDataError") << name_ << " fromServer(): missing result"; + } for (unsigned i0 = 0; i0 < batchSize_; ++i0) { auto offset = counter * entry.sizeShape_; - dataOut.emplace_back(r1 + offset, r1 + offset + sizeShape_); + dataOut.emplace_back(r1 + offset, r1 + offset + entry.sizeShape_); ++counter; } } From 899836db2321fd6adf571404c728be072ca523a4 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Mon, 18 Apr 2022 18:56:06 -0500 Subject: [PATCH 06/31] propagate to mem resources --- .../SonicTriton/interface/TritonData.h | 22 +++++---- .../SonicTriton/interface/TritonMemResource.h | 28 +++++------ .../SonicTriton/src/TritonData.cc | 32 +++++-------- .../SonicTriton/src/TritonMemResource.cc | 48 +++++++++++-------- 4 files changed, 69 insertions(+), 61 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/interface/TritonData.h b/HeterogeneousCore/SonicTriton/interface/TritonData.h index 2c3f7898b6aaa..a077ca4f7ef0e 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonData.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonData.h @@ -97,7 +97,10 @@ class TritonData { : fullShape_(dims), shape_(fullShape_.begin() + (noBatch ? 0 : 1), fullShape_.end()), sizeShape_(0), - byteSizePerBatch_(0) { + byteSizePerBatch_(0), + totalByteSize_(0), + offset_(0), + output_(nullptr) { //create input or output object IO* iotmp; createObject(&iotmp, name, dname); @@ -107,18 +110,25 @@ class TritonData { private: friend class TritonData; friend class TritonClient; + friend class TritonMemResource; + friend class TritonHeapResource; + friend class TritonCpuShmResource; +#ifdef TRITON_ENABLE_GPU + friend class TritonGpuShmResource; +#endif //accessors void createObject(IO** ioptr, const std::string& name, const std::string& dname); - void computeSizes(int64_t shapeSize, int64_t byteSize); + void computeSizes(int64_t shapeSize, int64_t byteSize, int64_t batchSize); //members ShapeType fullShape_; ShapeView shape_; - size_t sizeShape_; - size_t byteSizePerBatch_; + size_t sizeShape_, byteSizePerBatch_, totalByteSize_; std::shared_ptr data_; std::shared_ptr result_; + unsigned offset_; + const uint8_t* output_; }; //private accessors only used internally or by client @@ -206,10 +216,6 @@ void TritonOutputData::prepare(); template <> template TritonOutput
TritonOutputData::fromServer() const; -template <> -void TritonInputData::reset(); -template <> -void TritonOutputData::reset(); //explicit template instantiation declarations extern template class TritonData; diff --git a/HeterogeneousCore/SonicTriton/interface/TritonMemResource.h b/HeterogeneousCore/SonicTriton/interface/TritonMemResource.h index f6cf37a029fb5..830a543360301 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonMemResource.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonMemResource.h @@ -20,9 +20,9 @@ class TritonMemResource { size_t size() const { return size_; } virtual void close() {} //used for input - virtual void copyInput(const void* values, size_t offset) {} + virtual void copyInput(const void* values, size_t offset, unsigned entry) {} //used for output - virtual const uint8_t* copyOutput() { return nullptr; } + virtual void copyOutput() {} virtual void set(); protected: @@ -39,8 +39,8 @@ class TritonHeapResource : public TritonMemResource { public: TritonHeapResource(TritonData* data, const std::string& name, size_t size); ~TritonHeapResource() override {} - void copyInput(const void* values, size_t offset) override {} - const uint8_t* copyOutput() override { return nullptr; } + void copyInput(const void* values, size_t offset, unsigned entry) override {} + void copyOutput() override {} void set() override {} }; @@ -50,8 +50,8 @@ class TritonCpuShmResource : public TritonMemResource { TritonCpuShmResource(TritonData* data, const std::string& name, size_t size); ~TritonCpuShmResource() override; void close() override; - void copyInput(const void* values, size_t offset) override {} - const uint8_t* copyOutput() override { return nullptr; } + void copyInput(const void* values, size_t offset, unsigned entry) override {} + void copyOutput() override {} }; using TritonInputHeapResource = TritonHeapResource; @@ -61,13 +61,13 @@ using TritonOutputCpuShmResource = TritonCpuShmResource -void TritonInputHeapResource::copyInput(const void* values, size_t offset); +void TritonInputHeapResource::copyInput(const void* values, size_t offset, unsigned entry); template <> -void TritonInputCpuShmResource::copyInput(const void* values, size_t offset); +void TritonInputCpuShmResource::copyInput(const void* values, size_t offset, unsigned entry); template <> -const uint8_t* TritonOutputHeapResource::copyOutput(); +void TritonOutputHeapResource::copyOutput(); template <> -const uint8_t* TritonOutputCpuShmResource::copyOutput(); +void TritonOutputCpuShmResource::copyOutput(); #ifdef TRITON_ENABLE_GPU #include "cuda_runtime_api.h" @@ -78,8 +78,8 @@ class TritonGpuShmResource : public TritonMemResource { TritonGpuShmResource(TritonData* data, const std::string& name, size_t size); ~TritonGpuShmResource() override; void close() override; - void copyInput(const void* values, size_t offset) override {} - const uint8_t* copyOutput() override { return nullptr; } + void copyInput(const void* values, size_t offset, unsigned entry) override {} + void copyOutput() override {} protected: int deviceId_; @@ -91,9 +91,9 @@ using TritonOutputGpuShmResource = TritonGpuShmResource -void TritonInputGpuShmResource::copyInput(const void* values, size_t offset); +void TritonInputGpuShmResource::copyInput(const void* values, size_t offset, unsigned entry); template <> -const uint8_t* TritonOutputGpuShmResource::copyOutput(); +void TritonOutputGpuShmResource::copyOutput(); #endif #endif diff --git a/HeterogeneousCore/SonicTriton/src/TritonData.cc b/HeterogeneousCore/SonicTriton/src/TritonData.cc index dcbd0b942ee0c..0b76f161b5a81 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonData.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonData.cc @@ -131,17 +131,19 @@ void TritonData::setBatchSize(unsigned bsize) { } template -void TritonData::TritonDataEntry::computeSizes(int64_t shapeSize, int64_t byteSize) { +void TritonData::TritonDataEntry::computeSizes(int64_t shapeSize, int64_t byteSize, int64_t batchSize) { sizeShape_ = shapeSize; byteSizePerBatch_ = byteSize * sizeShape_; + totalByteSize_ = byteSizePerBatch_ * batchSize; } template void TritonData::computeSizes() { totalByteSize_ = 0; for (unsigned i = 0; i < entries_.size(); ++i) { - entries_[i].computeSizes(sizeShape(i), byteSize_); - totalByteSize_ += entries_[i].byteSizePerBatch_ * batchSize_; + entries_[i].computeSizes(sizeShape(i), byteSize_, batchSize_); + entries_[i].offset_ = totalByteSize_; + totalByteSize_ += entries_[i].totalByteSize_; } } @@ -217,6 +219,7 @@ void TritonInputData::toServer(TritonInputContainer
ptr) { computeSizes(); updateMem(totalByteSize_); + unsigned offset = 0; unsigned counter = 0; for (unsigned i = 0; i < entries_.size(); ++i) { auto& entry = entries_[i]; @@ -225,7 +228,8 @@ void TritonInputData::toServer(TritonInputContainer
ptr) { entry.data_->SetShape(entry.fullShape_); for (unsigned i0 = 0; i0 < batchSize_; ++i0) { - memResource_->copyInput(data_in[counter].data(), counter * entry.byteSizePerBatch_); + memResource_->copyInput(data_in[counter].data(), offset, i); + offset += i0 * entry.byteSizePerBatch_; ++counter; } } @@ -254,23 +258,21 @@ TritonOutput
TritonOutputData::fromServer() const { //check type checkType
(); - const uint8_t* r0 = memResource_->copyOutput(); - const DT* r1 = reinterpret_cast(r0); + memResource_->copyOutput(); TritonOutput
dataOut; dataOut.reserve(getEntrySize()); - unsigned counter = 0; for (unsigned i = 0; i < entries_.size(); ++i) { const auto& entry = entries_[i]; + const DT* r1 = reinterpret_cast(entry.output_); if (!entry.result_) { throw cms::Exception("TritonDataError") << name_ << " fromServer(): missing result"; } for (unsigned i0 = 0; i0 < batchSize_; ++i0) { - auto offset = counter * entry.sizeShape_; + auto offset = i0 * entry.sizeShape_; dataOut.emplace_back(r1 + offset, r1 + offset + entry.sizeShape_); - ++counter; } } @@ -278,16 +280,8 @@ TritonOutput
TritonOutputData::fromServer() const { return dataOut; } -template <> -void TritonInputData::reset() { - done_ = false; - holder_.reset(); - entries_.clear(); - totalByteSize_ = 0; -} - -template <> -void TritonOutputData::reset() { +template +void TritonData::reset() { done_ = false; holder_.reset(); entries_.clear(); diff --git a/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc b/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc index 29a42cc2edf88..ea5d63cea072d 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc @@ -17,8 +17,10 @@ TritonMemResource::TritonMemResource(TritonData* data, const std::string template void TritonMemResource::set() { - TRITON_THROW_IF_ERROR(data_->data_->SetSharedMemory(name_, data_->totalByteSize_, 0), - "unable to set shared memory (" + name_ + ")"); + for (auto& entry : data_->entries_){ + TRITON_THROW_IF_ERROR(entry.data_->SetSharedMemory(name_, entry.totalByteSize_, entry.offset_), + "unable to set shared memory (" + name_ + ")"); + } } template @@ -26,23 +28,25 @@ TritonHeapResource::TritonHeapResource(TritonData* data, const std::stri : TritonMemResource(data, name, size) {} template <> -void TritonInputHeapResource::copyInput(const void* values, size_t offset) { - TRITON_THROW_IF_ERROR(data_->data_->AppendRaw(reinterpret_cast(values), data_->byteSizePerBatch_), +void TritonInputHeapResource::copyInput(const void* values, size_t offset, unsigned entry) { + TRITON_THROW_IF_ERROR(data_->entries_[entry].data_->AppendRaw(reinterpret_cast(values), data_->entries_[entry].byteSizePerBatch_), data_->name_ + " toServer(): unable to set data for batch entry " + - (data_->byteSizePerBatch_ ? std::to_string(offset / data_->byteSizePerBatch_) : "")); + (data_->entries_.size() > 1 ? std::to_string(entry) : data_->entries_[entry].byteSizePerBatch_ ? std::to_string(offset / data_->entries_[entry].byteSizePerBatch_) : "")); } template <> -const uint8_t* TritonOutputHeapResource::copyOutput() { - size_t contentByteSize; - const uint8_t* values; - TRITON_THROW_IF_ERROR(data_->result_->RawData(data_->name_, &values, &contentByteSize), - data_->name_ + " fromServer(): unable to get raw"); +void TritonOutputHeapResource::copyOutput() { + size_t contentByteSize = 0; + for (auto& entry : data_->entries_) { + size_t contentByteSizeEntry; + TRITON_THROW_IF_ERROR(entry.result_->RawData(data_->name_, &entry.output_, &contentByteSizeEntry), + data_->name_ + " fromServer(): unable to get raw"); + contentByteSize += contentByteSizeEntry; + } if (contentByteSize != data_->totalByteSize_) { throw cms::Exception("TritonDataError") << data_->name_ << " fromServer(): unexpected content byte size " << contentByteSize << " (expected " << data_->totalByteSize_ << ")"; } - return values; } //shared memory helpers based on: @@ -108,14 +112,16 @@ void TritonCpuShmResource::close() { } template <> -void TritonInputCpuShmResource::copyInput(const void* values, size_t offset) { +void TritonInputCpuShmResource::copyInput(const void* values, size_t offset, unsigned entry) { if (size_ > 0) - std::memcpy(addr_ + offset, values, data_->byteSizePerBatch_); + std::memcpy(addr_ + offset, values, data_->entries_[entry].byteSizePerBatch_); } template <> -const uint8_t* TritonOutputCpuShmResource::copyOutput() { - return addr_; +void TritonOutputCpuShmResource::copyOutput() { + for (auto& entry : data_->entries_) { + entry.output_ = addr_ + entry.offset_; + } } template class TritonHeapResource; @@ -151,21 +157,23 @@ void TritonGpuShmResource::close() { } template <> -void TritonInputGpuShmResource::copyInput(const void* values, size_t offset) { +void TritonInputGpuShmResource::copyInput(const void* values, size_t offset, unsigned entry) { cudaCheck( - cudaMemcpy(addr_ + offset, values, data_->byteSizePerBatch_, cudaMemcpyHostToDevice), - data_->name_ + " toServer(): unable to memcpy " + std::to_string(data_->byteSizePerBatch_) + " bytes to GPU"); + cudaMemcpy(addr_ + offset, values, data_->entries_[entry].byteSizePerBatch_, cudaMemcpyHostToDevice), + data_->name_ + " toServer(): unable to memcpy " + std::to_string(data_->entries_[entry].byteSizePerBatch_) + " bytes to GPU"); } template <> -const uint8_t* TritonOutputGpuShmResource::copyOutput() { +void TritonOutputGpuShmResource::copyOutput() { //copy back from gpu, keep in scope auto ptr = std::make_shared>(data_->totalByteSize_); cudaCheck( cudaMemcpy(ptr->data(), addr_, data_->totalByteSize_, cudaMemcpyDeviceToHost), data_->name_ + " fromServer(): unable to memcpy " + std::to_string(data_->totalByteSize_) + " bytes from GPU"); data_->holder_ = ptr; - return ptr->data(); + for (auto& entry : data_->entries_) { + entry.output_ = ptr->data() + entry.offset_; + } } template class TritonGpuShmResource; From 8321b4fb2375edc0c6ec65be99c4e52905d4964c Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Tue, 19 Apr 2022 13:28:04 -0500 Subject: [PATCH 07/31] fix off-by-one issues; unit tests now pass --- HeterogeneousCore/SonicTriton/src/TritonData.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/src/TritonData.cc b/HeterogeneousCore/SonicTriton/src/TritonData.cc index 0b76f161b5a81..93d1e015019e8 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonData.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonData.cc @@ -32,7 +32,7 @@ TritonData::TritonData(const std::string& name, byteSize_(tco::GetDataTypeByteSize(dtype_)), totalByteSize_(0) { //initialize first shape entry - addEntryImpl(1); + addEntryImpl(0); //one-time computation of some shape info variableDims_ = anyNeg(entries_.front().shape_); productDims_ = variableDims_ ? -1 : dimProduct(entries_.front().shape_); @@ -53,7 +53,7 @@ void TritonData::addEntry(unsigned entry) { template void TritonData::addEntryImpl(unsigned entry) { - if (entry > entries_.size()) { + if (entry >= entries_.size()) { entries_.reserve(entry+1); for (unsigned i = entries_.size(); i < entry+1; ++i) { entries_.emplace_back(dims_, noBatch_, name_, dname_); From c89b4b3e1ff433beec65371f9285a6fa143b76a7 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Tue, 19 Apr 2022 16:11:25 -0500 Subject: [PATCH 08/31] some fixes for compatibility checks --- HeterogeneousCore/SonicTriton/scripts/cmsTriton | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/scripts/cmsTriton b/HeterogeneousCore/SonicTriton/scripts/cmsTriton index 4ea4b7cd7f64f..271cacc2a46ad 100755 --- a/HeterogeneousCore/SonicTriton/scripts/cmsTriton +++ b/HeterogeneousCore/SonicTriton/scripts/cmsTriton @@ -423,7 +423,10 @@ driver_docker(){ } driver_apptainer(){ - source ${SANDBOX}/.singularity.d/env/10-docker2singularity.sh && echo $CUDA_DRIVER_VERSION + D2S=${SANDBOX}/.singularity.d/env/10-docker2singularity.sh + if [ -f "$D2S" ]; then + source $D2S && echo $CUDA_DRIVER_VERSION + fi } compat_docker(){ @@ -475,8 +478,12 @@ check_drivers(){ if [ "$PWD" == "$TMPDIR" ]; then $DRYRUN $COMPAT_FN COMPAT_SCRIPT_BASE=$(basename $COMPAT_SCRIPT) - $DRYRUN sed -i 's/LD_LIBRARY_PATH="${_CUDA_COMPAT_REALLIB}"/&; LD_PRELOAD=""/' $COMPAT_SCRIPT_BASE - COMPAT_SCRIPT_MOUNT="$COMPAT_SCRIPT_BASE:$COMPAT_SCRIPT" + # newer containers already include this fix, no need to do it twice + if ! grep -Fq 'LD_PRELOAD=""' $COMPAT_SCRIPT_BASE; then + $DRYRUN sed -i 's/LD_LIBRARY_PATH="${_CUDA_COMPAT_REALLIB}"/&; LD_PRELOAD=""/' $COMPAT_SCRIPT_BASE + COMPAT_SCRIPT_PATH=$(readlink -f $TMPDIR/$COMPAT_SCRIPT_BASE) + COMPAT_SCRIPT_MOUNT="$COMPAT_SCRIPT_PATH:$COMPAT_SCRIPT" + fi fi return 0 else From 5e20b341e1c2089d962cd443b401290d3d82f071 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Tue, 19 Apr 2022 16:21:03 -0500 Subject: [PATCH 09/31] update server image to newest release --- HeterogeneousCore/SonicTriton/scripts/cmsTriton | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HeterogeneousCore/SonicTriton/scripts/cmsTriton b/HeterogeneousCore/SonicTriton/scripts/cmsTriton index 271cacc2a46ad..c218f271ac759 100755 --- a/HeterogeneousCore/SonicTriton/scripts/cmsTriton +++ b/HeterogeneousCore/SonicTriton/scripts/cmsTriton @@ -18,7 +18,7 @@ PARENTPID="" BASEPORT=8000 AUTOPORT="" NPORTS=3 -IMAGE=fastml/triton-torchgeo:21.06-py3-geometric +IMAGE=fastml/triton-torchgeo:22.03-py3-geometric SANDBOX="" COMPAT_USR="" From 71b4f1e0c929d2dec659381f71cab3815c112abd Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Tue, 19 Apr 2022 17:33:50 -0500 Subject: [PATCH 10/31] add a test for ragged inputs --- .../test/TritonIdentityProducer.cc | 66 +++++++++++++++++++ .../SonicTriton/test/tritonTest_cfg.py | 1 + 2 files changed, 67 insertions(+) create mode 100644 HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc diff --git a/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc b/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc new file mode 100644 index 0000000000000..02e4fd9a077ba --- /dev/null +++ b/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc @@ -0,0 +1,66 @@ +#include "HeterogeneousCore/SonicTriton/interface/TritonEDProducer.h" + +#include "FWCore/ParameterSet/interface/FileInPath.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Framework/interface/MakerMacros.h" + +#include +#include +#include +#include +#include + +class TritonIdentityProducer : public TritonEDProducer<> { +public: + explicit TritonIdentityProducer(edm::ParameterSet const& cfg) + : TritonEDProducer<>(cfg), + batchSize_(4) { } + void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, Input& iInput) override { + //follow Triton QA tests for ragged input + std::vector> value_lists{ + {2,2}, + {4,4,4,4}, + {1}, + {3,3,3} + }; + + client_->setBatchSize(batchSize_); + auto& input1 = iInput.at("RAGGED_INPUT"); + auto data1 = input1.allocate(); + for (int i = 0; i < batchSize_; ++i) { + (*data1)[i] = value_lists[i]; + input1.setShape(0, (*data1)[i].size(), i); + } + + // convert to server format + input1.toServer(data1); + } + void produce(edm::Event& iEvent, edm::EventSetup const& iSetup, Output const& iOutput) override { + // check the results + const auto& output1 = iOutput.at("RAGGED_OUTPUT"); + // convert from server format + const auto& tmp = output1.fromServer(); + edm::LogInfo msg(debugName_); + for (int i = 0; i < batchSize_; ++i){ + msg << "output " << i << ": "; + for(int j = 0; j < output1.shape()[0]; ++j){ + msg << tmp[i][j] << " "; + } + msg << "\n"; + } + } + ~TritonIdentityProducer() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + TritonClient::fillPSetDescription(desc); + //to ensure distinct cfi names + descriptions.addWithDefaultLabel(desc); + } + +private: + int batchSize_; +}; + +DEFINE_FWK_MODULE(TritonIdentityProducer); diff --git a/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py b/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py index 2829a5e2db743..765a06d8cfd27 100644 --- a/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py +++ b/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py @@ -8,6 +8,7 @@ "TritonGraphProducer": ["gat_test"], "TritonGraphFilter": ["gat_test"], "TritonGraphAnalyzer": ["gat_test"], + "TritonIdentityProducer": ["ragged_acc_shape"], } # other choices From 7f00e86fe2971472defee20f789e5d7cb04aa134 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Tue, 19 Apr 2022 17:34:00 -0500 Subject: [PATCH 11/31] fix bug revealed by test --- HeterogeneousCore/SonicTriton/src/TritonData.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/HeterogeneousCore/SonicTriton/src/TritonData.cc b/HeterogeneousCore/SonicTriton/src/TritonData.cc index 93d1e015019e8..3382468b52955 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonData.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonData.cc @@ -286,6 +286,8 @@ void TritonData::reset() { holder_.reset(); entries_.clear(); totalByteSize_ = 0; + //re-initialize first shape entry + addEntryImpl(0); } //explicit template instantiation declarations From 111d24857538703139373da58de2fa6d1e14b6df Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Wed, 20 Apr 2022 14:46:57 -0500 Subject: [PATCH 12/31] fix off-by-one --- HeterogeneousCore/SonicTriton/src/TritonClient.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc index 263a77fb3d531..38ca800bfd4eb 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc @@ -196,7 +196,7 @@ void TritonClient::addEntry(unsigned entry) { for (auto& element : output_) { element.second.addEntryImpl(entry); } - if (entry>1) + if (entry>0) setBatchSize(1); } From 36a7ae9f3d3c13b5e0a5ff0e3baefbde58091acb Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Wed, 20 Apr 2022 14:47:26 -0500 Subject: [PATCH 13/31] use simpler example, fix output printing --- .../SonicTriton/test/TritonIdentityProducer.cc | 8 ++++---- HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc b/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc index 02e4fd9a077ba..177dd9bdb48b1 100644 --- a/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc +++ b/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc @@ -26,7 +26,7 @@ class TritonIdentityProducer : public TritonEDProducer<> { }; client_->setBatchSize(batchSize_); - auto& input1 = iInput.at("RAGGED_INPUT"); + auto& input1 = iInput.at("INPUT0"); auto data1 = input1.allocate(); for (int i = 0; i < batchSize_; ++i) { (*data1)[i] = value_lists[i]; @@ -38,13 +38,13 @@ class TritonIdentityProducer : public TritonEDProducer<> { } void produce(edm::Event& iEvent, edm::EventSetup const& iSetup, Output const& iOutput) override { // check the results - const auto& output1 = iOutput.at("RAGGED_OUTPUT"); + const auto& output1 = iOutput.at("OUTPUT0"); // convert from server format const auto& tmp = output1.fromServer(); edm::LogInfo msg(debugName_); for (int i = 0; i < batchSize_; ++i){ - msg << "output " << i << ": "; - for(int j = 0; j < output1.shape()[0]; ++j){ + msg << "output " << i << " (" << triton_utils::printColl(output1.shape(i)) << "): "; + for(int j = 0; j < output1.shape(i)[0]; ++j){ msg << tmp[i][j] << " "; } msg << "\n"; diff --git a/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py b/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py index 765a06d8cfd27..1773e252cfae2 100644 --- a/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py +++ b/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py @@ -8,7 +8,7 @@ "TritonGraphProducer": ["gat_test"], "TritonGraphFilter": ["gat_test"], "TritonGraphAnalyzer": ["gat_test"], - "TritonIdentityProducer": ["ragged_acc_shape"], + "TritonIdentityProducer": ["ragged_io"], } # other choices From 0125089482f78a999dc0bcebbb234dcb2eea67be Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Wed, 20 Apr 2022 17:41:00 -0500 Subject: [PATCH 14/31] simplify --- HeterogeneousCore/SonicTriton/interface/TritonData.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HeterogeneousCore/SonicTriton/interface/TritonData.h b/HeterogeneousCore/SonicTriton/interface/TritonData.h index a077ca4f7ef0e..90acf3084e4bf 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonData.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonData.h @@ -137,7 +137,7 @@ class TritonData { void setBatchSize(unsigned bsize); size_t getEntrySize() const { return std::max(static_cast(batchSize_), entries_.size()); } void reset(); - void setResult(Result* result, unsigned entry=0) { entries_[entry].result_ = std::shared_ptr(result); } + void setResult(Result* result, unsigned entry=0) { entries_[entry].result_.reset(result); } IO* data(unsigned entry=0) { return entries_[entry].data_.get(); } void updateMem(size_t size); void computeSizes(); From e5f84dc08e87980969558bd5ea76e0887470eeb8 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Wed, 20 Apr 2022 18:10:09 -0500 Subject: [PATCH 15/31] fix offset error --- HeterogeneousCore/SonicTriton/src/TritonData.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HeterogeneousCore/SonicTriton/src/TritonData.cc b/HeterogeneousCore/SonicTriton/src/TritonData.cc index 3382468b52955..72caac78befb9 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonData.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonData.cc @@ -229,7 +229,7 @@ void TritonInputData::toServer(TritonInputContainer
ptr) { for (unsigned i0 = 0; i0 < batchSize_; ++i0) { memResource_->copyInput(data_in[counter].data(), offset, i); - offset += i0 * entry.byteSizePerBatch_; + offset += entry.byteSizePerBatch_; ++counter; } } From 0ee76bbc466c11aa63c130f9d8e8bb0db754c775 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Thu, 21 Apr 2022 09:55:10 -0500 Subject: [PATCH 16/31] update test docs, fix model fetching --- HeterogeneousCore/SonicTriton/test/README.md | 9 +++++++-- HeterogeneousCore/SonicTriton/test/fetch_model.sh | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/test/README.md b/HeterogeneousCore/SonicTriton/test/README.md index 6d48159e512dc..2249a3ebd6ac1 100644 --- a/HeterogeneousCore/SonicTriton/test/README.md +++ b/HeterogeneousCore/SonicTriton/test/README.md @@ -1,7 +1,7 @@ # SONIC TritonClient tests -Test modules `TritonImageProducer` and `TritonGraphProducer` (`TritonGraphFilter`, `TritonGraphAnalyzer`) are available. -They generate arbitrary inputs for inference (with Inception/DenseNet or Graph Attention Network, respectively) and print the resulting output. +Test modules `TritonImageProducer`, `TritonIdentityProducer`, and `TritonGraphProducer` (`TritonGraphFilter`, `TritonGraphAnalyzer`) are available. +They generate arbitrary inputs for inference (with Inception/DenseNet, a simple identity model that allows ragged batching, or Graph Attention Network, respectively) and print the resulting output. First, the relevant data for the image classification networks should be downloaded: ``` @@ -19,6 +19,11 @@ Run the image test: cmsRun tritonTest_cfg.py maxEvents=1 modules=TritonImageProducer,TritonImageProducer models=inception_graphdef,densenet_onnx ``` +Run the identity test with ragged batching: +``` +cmsRun tritonTest_cfg.py maxEvents=1 modules=TritonIdentityProducer models=ragged_io +``` + Run the graph test: ``` cmsRun tritonTest_cfg.py maxEvents=1 modules=TritonGraphProducer diff --git a/HeterogeneousCore/SonicTriton/test/fetch_model.sh b/HeterogeneousCore/SonicTriton/test/fetch_model.sh index 801c9b82310b7..5520e5582e779 100755 --- a/HeterogeneousCore/SonicTriton/test/fetch_model.sh +++ b/HeterogeneousCore/SonicTriton/test/fetch_model.sh @@ -3,7 +3,7 @@ # borrowed from https://github.com/triton-inference-server/server/tree/master/docs/examples TRITON_REPO="https://github.com/triton-inference-server/server" -TRITON_VERSION=$(scram tool info triton-inference-server | grep "Version : " | cut -d' ' -f3 | cut -d'-' -f1) +TRITON_VERSION=$(scram tool info triton-inference-client | grep "Version : " | cut -d' ' -f3 | cut -d'-' -f1) TEST_DIR=`pwd` From 1ec919043549b498e05aa5432b0d2909c658f0f8 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Thu, 21 Apr 2022 14:26:59 -0500 Subject: [PATCH 17/31] update readme for ragged case --- HeterogeneousCore/SonicTriton/README.md | 34 +++++++++++------- .../SonicTriton/batching_diagrams.png | Bin 0 -> 10530 bytes 2 files changed, 22 insertions(+), 12 deletions(-) create mode 100644 HeterogeneousCore/SonicTriton/batching_diagrams.png diff --git a/HeterogeneousCore/SonicTriton/README.md b/HeterogeneousCore/SonicTriton/README.md index 11d970c0fc187..c0418b1517dc7 100644 --- a/HeterogeneousCore/SonicTriton/README.md +++ b/HeterogeneousCore/SonicTriton/README.md @@ -9,7 +9,7 @@ Triton supports multiple named inputs and outputs with different types. The allo boolean, unsigned integer (8, 16, 32, or 64 bits), integer (8, 16, 32, or 64 bits), floating point (16, 32, or 64 bit), or string. Triton additionally supports inputs and outputs with multiple dimensions, some of which might be variable (denoted by -1). -Concrete values for variable dimensions must be specified for each call (event). +Concrete values for variable dimensions must be specified for each entry (see [Batching](#batching) below). ## Client @@ -34,6 +34,14 @@ The model information from the server can be printed by enabling `verbose` outpu * `useSharedMemory`: enable use of shared memory (see [below](#shared-memory)) with local servers (default: true) * `compression`: enable compression of input and output data to reduce bandwidth (using gzip or deflate) (default: none) +### Batching + +SonicTriton supports two types of batching, rectangular and ragged, depicted below: +![batching diagrams](./batching_diagrams.png) +In the rectangular case, the inputs for each object in an event have the same shape, so they can be batched together. +In the ragged case, the inputs for each object in an event do not have the same shape, so they cannot be batched; +instead, they are grouped together as separate entries, each with its own shape specified explicitly. + The batch size should be set using the client accessor, in order to ensure a consistent value across all inputs: * `setBatchSize()`: set a new batch size * some models may not support batching @@ -41,15 +49,17 @@ The batch size should be set using the client accessor, in order to ensure a con Useful `TritonData` accessors include: * `variableDims()`: return true if any variable dimensions * `sizeDims()`: return product of dimensions (-1 if any variable dimensions) -* `shape()`: return actual shape (list of dimensions) -* `sizeShape()`: return product of shape dimensions (returns `sizeDims()` if no variable dimensions) +* `shape(unsigned entry=0)`: return actual shape (list of dimensions) for specified entry +* `sizeShape(unsigned entry=0)`: return product of shape dimensions (returns `sizeDims()` if no variable dimensions) for specified entry * `byteSize()`: return number of bytes for data type * `dname()`: return name of data type * `batchSize()`: return current batch size To update the `TritonData` shape in the variable-dimension case: -* `setShape(const std::vector& newShape)`: update all (variable) dimensions with values provided in `newShape` -* `setShape(unsigned loc, int64_t val)`: update variable dimension at `loc` with `val` +* `setShape(const std::vector& newShape, unsigned entry=0)`: update all (variable) dimensions with values provided in `newShape` for specified entry +* `setShape(unsigned loc, int64_t val, unsigned entry=0)`: update variable dimension at `loc` with `val` for specified entry + +### I/O types There are specific local input and output containers that should be used in producers. Here, `T` is a primitive type, and the two aliases listed below are passed to `TritonInputData::toServer()` @@ -58,7 +68,7 @@ and returned by `TritonOutputData::fromServer()`, respectively: * `TritonOutput = std::vector>` The `TritonInputContainer` object should be created using the helper function described below. -It expects one vector per batch entry (i.e. the size of the outer vector is the batch size). +It expects one vector per batch entry (i.e. the size of the outer vector is the batch size (rectangular case) or number of entries (ragged case)). Therefore, it is best to call `TritonClient::setBatchSize()`, if necessary, before calling the helper. It will also reserve the expected size of the input in each inner vector (by default), if the concrete shape is available (i.e. `setShape()` was already called, if the input has variable dimensions). @@ -100,11 +110,11 @@ In a SONIC Triton producer, the basic flow should follow this pattern: a. access input object(s) from `TritonInputMap` b. allocate input data using `allocate()` c. fill input data - d. set input shape(s) (optional, only if any variable dimensions) + d. set input shape(s) (optional for rectangular case, only if any variable dimensions; required for ragged case) e. convert using `toServer()` function of input object(s) 2. `produce()`: - a. access output object(s) from `TritonOutputMap` - b. obtain output data as `TritonOutput` using `fromServer()` function of output object(s) (sets output shape(s) if variable dimensions exist) + a. access output object(s) from `TritonOutputMap` (includes shapes) + b. obtain output data as `TritonOutput` using `fromServer()` function of output object(s) c. fill output products ## Services @@ -116,14 +126,14 @@ The script has two operations (`start` and `stop`) and the following options: * `-d`: use Docker instead of Apptainer * `-f`: force reuse of (possibly) existing container instance * `-g`: use GPU instead of CPU -* `-i` [name]`: server image name (default: fastml/triton-torchgeo:20.09-py3-geometric) +* `-i` [name]`: server image name (default: fastml/triton-torchgeo:22.03-py3-geometric) * `-M [dir]`: model repository (can be given more than once) * `-m [dir]`: specific model directory (can be given more than one) * `-n [name]`: name of container instance, also used for hidden temporary dir (default: triton_server_instance) * `-P [port]`: base port number for services (-1: automatically find an unused port range) (default: 8000) * `-p [pid]`: automatically shut down server when process w/ specified PID ends (-1: use parent process PID) * `-r [num]`: number of retries when starting container (default: 3) -* `-s [dir]`: Apptainer sandbox directory (default: /cvmfs/unpacked.cern.ch/registry.hub.docker.com/fastml/triton-torchgeo:20.09-py3-geometric) +* `-s [dir]`: Apptainer sandbox directory (default: /cvmfs/unpacked.cern.ch/registry.hub.docker.com/fastml/triton-torchgeo:22.03-py3-geometric) * `-t [dir]`: non-default hidden temporary dir * `-v`: (verbose) start: activate server debugging info; stop: keep server logs * `-w [time]`: maximum time to wait for server to start (default: 300 seconds) @@ -172,4 +182,4 @@ The fallback server has a separate set of options, mostly related to the invocat ## Examples -Several example producers (running image classification networks or Graph Attention Network) can be found in the [test](./test) directory. +Several example producers can be found in the [test](./test) directory. diff --git a/HeterogeneousCore/SonicTriton/batching_diagrams.png b/HeterogeneousCore/SonicTriton/batching_diagrams.png new file mode 100644 index 0000000000000000000000000000000000000000..ca438ae2a4ca2ab94c5bd14aef4b940804c474be GIT binary patch literal 10530 zcmcI~1ymf%*6sv2!QI{61_&A;A-E@lI|B^v?(VJw1WN)02tH_VcemggJh%i2;SIU> z-1pyK-dbVxp0u0RR9@c{wQ%0Pt)Zen_Jr!CO2T z#gO4A8dEutG63Ms1ONnv0swdLrocS_zzqlh92f%tA{hVxv14}AJ5l%z1QSJBDZpby z!bm&3hUzG%;{rdM{W-9qy({nwWLJ4*Y2+OwLR4ICD=qg_000TmTV2}~1oohDbapVe zvW3vNdO1R9Jnb9-0PV0@9lhjyAjYqstk)*|LP$dM^P%~aNJ>U$^ zX2eW6lwh3U*TVzzuP$un!Vby=U7J(|ZLVbgcLd2CY;hRN_?TPU&ZJSmRXJEvS+?3Z z&lMTIsU4@XTwQ&1TxOlJq)Mg;Y}AQ5GLUi5 zlBJOozhOJxR3M}}hE}10lE5YBj)L|1Yx3C$q3Mn&?+;P7Cv@!JNJ@n^qB6CMyIH{X zCZBLC`Hw?O#Enr%fG<{9#r1n6ZHM{f@TyQ6GC>M329m}cre`|V%8hglH2hy4CqBHE z4B67WscftsK1v{AGaZ$ z{`+p@tm+}ss}me_AcmE)^zv>oQ`7oXey6~a#`3OaDhWROBKeOb0+I)MOh&d9RQMZE zEGo$YNr!WKbg5c-sPq7KY6)QwGnwJL7oHW)=O8Pmaev~xQA2DXsc28iEN4DXinx65Gj^9RIpR2w`=cK!R2ssSoU!>g}##B)e+wB$aq&j{t+-h_~V$(!SSgFSExH zOP!!kCJt{cU*oQ@IX(B1ZdoT*CHlrh;~IE0jhj;`Gc2uYG83$Qb4coN)^a3NPj2^} z${98`$Q7CP){wUTEknhm^=LEts_|QWllz$o-ksst@N&AMfoZiPR6bg)``m_*A8|#T zXu)&^;8K6r+(^wyRYFO69RN zQolN)U24N(B;0)HD2%_-tqsc2z@Wk9Jbo!QBB&+t-jD8L(}ykLu!}#VlFoJdpdxao zZVA;(j4d!;(qn0oqeA*D03;tSbXs=cp=+Q8^M>-{CTIj0@#b##}74G-glORuJrOZ4lI5DUu zdu9meKI-ljTHPrAl1_;j{!Z%5>M$(j?R|k6mg3RSzc!a<)1*^EjV+kfa5_vq; z;Rm{x42l*2KzlAPC86%Qc$Cv#aj4*4)b2L1kLg*N@3c;udx&i5$Q&>7kxBxY1cPbY zKRAd$C_C^Tw_q8_Ar^?p<}?$vhi-v*Nm#^O^-Qoy8u43)kRFztJi}E)LA{j1%pFR$ zUjdztM$E60XpNXsDW2%a7@y5UsgyQF_lTyu*Jb;0V>=t_?b8~s3W!EYGGc2yaILh&vQa>ht% zh${ETxXrV&OhDqJa@$pBVr!Al5(Y8H#LT-IPtaDQGzRS*d;wT7V|FPplU{bWO64Kt z3H5YeM3OS3li)rP2v*~!NA883EZI-HATAzOK`DGqM`yW_#vZ)DJ_Fk=U8SK>;k!6u zEpr$yVVYcg(<O`Bns#(9=d<*s6cVB*!$>r{?pxBgS(Yu|g?fp88dL`R@C$* zyQA{(spX{Z6z92?4~231VzIp}J3}j}Q8*}Yy!IXb>8h=Dy+Sn>`bpMjWNZf?=VPeX z`+$K$*S4zDfOdISm|xn8I5Q{lM=9AgbItPu7hy=2i~DlAzwaXNHtr7*a>M$~8rxRl z^~rh{P#;*Y2S{o;+@81H^9hHkGtL(^h50Tk zh-W%hq@4GHehl-l=asweMC$L|+XWGo&(WkO+gbl8-e%W;jZ~+O&sVKRkHfAE17(-S znj%?Z=r^F*)EP+)z=+4AX{Nn4NrN^6F}hD6t?L^Bjyku9F>2bO`n|^F^tQUxLxrT1 z<~GZ((_C~*UBU-0tV=sENwhSKKJ! z>&G5PUSJZRx1EG}OM{%`c^WQvZbQgqkrRdrdztnttuxqLcXw-1{2{clK8d-`DAq zH>*T$4_D6T@FP$X%52DXDQAig7gy>C*bzERuSnb|*_gI>%1(cRR2-w+yuD?E%JNr2 z0oWM_{lE8A8<_^`RxJfNrNbn5>omCD*TS|izb;0%nyeZg&0|h8n2l@y)wAZq%BZd{ z+KW#p=CyU#aqj5SU?M%s2>X2Kea-Zs#Ck9=lxmoyNdD4L)rMW1iD~;#`O`A`1yg6rhwH2#PS_q-5V{_ z7Atpf*J$dO605>BZhLK*ts^aUyM5N58RZb>A^B+9tDt#q?$ivfMLKFLG9B=60mm27 zmA~hZQjup$if<>w4z-yAnQ8Y3GWhx+oC}x-Tew@#f@Z>}C)HPV*0*KvJx5UIRmrUA z*nRu(a1pU2e$QR;Qr{kry{*bg?p4o)vQNv z&#~wRRvuyHr(dLxTg!ba3*~P)zWSKt`s=@kl?Jfpp%k?QY%>P)s|=2pRGn;tUZ7IB zPA%CCfAUnf<04$ruig4(5z3D$=cuzDJr|c1k7jtR4q&ZayKh*)lZgQurO?jon?C>CAnnP5 zUn4*I4EYiqW#9BU9)dr1^5L-SBHxz`HU~?K+uz&XqhWMMGi@`=>NCYU4IuQ5<=GVP zydw!FR2~ZFLHVJx0DK-K|NphRc3dz}Frb63X_2f6&if=iey+feM zm9mYDoR>=-zV%dYD_(ftyeYQ(<;(&_r9$}z_2s~wn?pmhOLr+_{D_n^gkH6J^_&9&YrmFFXNE&dwHeb^Ta4B?F%82) zxo#1=OhLq7b-lu_O>W-WrEiy=J@i_>Sz7g^2jj%dS-D^>Av*r$UxX0gwbH88d=fTE0 z#TOpOmNsrRkNt9w+_lgXP&@+WicjA27r!(mD7s9G*xbsub#$>(MFYiaZ+%}&AQTvJ zBrvzVxU5R}viz_!zrH<(iZ(}@@<;%6h`(^}DEK9%rPv9+$MCo$7G>dnrK4?iEr~xN z>~cMhwz+YktTrL6*vH!k`NDcC!6E67rEiTvR@23)ZJ|>ee+FFMvmVUgALJ`LykR%N zDPq|PrtpX{?hUi}ix`FBz8%rCIn9zEJmcYZtl+!j@{2+h#NCHk0c{*P&A2$ju;C1# z4Ies5ZK%AdKa&LdJT?KH7`_=WBDi39;cWdO&~ z+sV_e(b}C*Pd}>V9HeHXnl+>g;bAs~81ybZa@GMV_v9{+YTI?up?Atz${peWqd-=! zuPD9DT??;Bm3p3?p=6w3O%S4je?MSEFqVwKTF=PFM6p9c+}yG0782eo3yRTkRzhzY z%X-Eem@PlG1@^_t>BL0iCGjpk;`Q1>M&B2Xq4)cCTh!bes(p@y(31!I-mudqvisTR zM$fK_r~$+?26P2}^pP0$VvS2Im2M-VfAaMS&f9$e?Qd=Xp ztar;9FsPbic4gzgVKKaaknLX-Fe|SD7@!^v|28#UmHsP<+Iw)`ayajDylL3>Lw;4x zd{vQVX5MLYW-T$KFsH>=cp|%L|0N}m+%QUvoxG0*$og-^p3VF)Fcv*Bp&Kn@9nU5N z7*t-!!F7CUFpZi2s)x3V7V-wI2WgX7cByyQ@)>GQ{fTizOOFUa_$0Y0=;4b)kC4a| zt3zX9q7*6@KHIxN-MS&X%`GuWCF5>SlV#CV0ZZ7rg!1>kYanA1G>U+nmE3%cZmexGL@Y>}3L zR9Jra0o4BN+mhs$VeqJR7D$mBk~KgwBHaUg9{jJ^C2?r6?VE|!4LpvQ@?FoR! z{g+Yyw|W0u`FZ*HrB_Za|Fx#4lDFO*f4<*lK5pF2it2Rb);GORguK)k+uQkDG$cOc z0oi|VuXPu-efNZ;sMO_7lKGYbZq=A;816^P?E)$2^w#c+<@auBDVRQ_|FCliT@VKY zLT0womD#h+K}~gp6c|{P1rINI9kES{*eJrI#PRC8l_%&{mm``3gOw7+P!3 z@(L>=o%G_3he;Ar3z?4M`7rVsRU{yTC0`*{*r>=rtqq_peRL=7k2WPKg-*Fkrp zSV?VIzE$R(6lR!B){|^8vK?B=uZfgAPnti6^H{dw$^-B=F8r6V=Ss@XNsQTd>v%CHZNABxTX7Pr70M zjZK!}&Rh{jCPQrb<)mzWU_Ys32*o1{_XiKx^e@Ws+9s1YFiC@KQ8G@B(wA7r2GY)J z$7^N-9&4upFp1qD;b)g9Qfg)`*~X<4>|H@ zdOn))su{7aaY1!Yqim({SfKIGjzqEqmQ zC}~A8$A{Gla+%T|y&BgLNKNFya>*GYlWm7i@p_bpWN8l6R%+fd|GEe{YLH4zbKlGq zn8@}&pvyxr@6NWi48k)NUB`*yW30D;bbG#ar?eD1ht06cssCn$;o5e4Dtz%nvq{6; zV#SR(0}Cnnz~_ju<-#$9uMT%yPq{DU$CFs%Yc}3D^8y8afr?!)Pc*p4cWj30$wk4? z^yGEs3VLEuJrA36!+9Lp%8zkRc(=B2)=HIM(@>{Zz7MwTX+SXwcQUfRYa{F*6-ar9 zI{oWAt4hokMz`hoeecBAR611!gr9%_I|LdS%wSzv!WCF-VWk@JXwN`A!$`^la{uz3Se^oqtV6!PR6 zTT(wNnk1FruH%>uZ*%ZtDCAAj7`XDmtGh(e3FjrE&czqjcyi!hux}7G)~&>`)NxD8ar4)6a*WWhm(k zM=~ZcG+w2=o9h&ajgY2LhQtm}<-%R11edr$M^5RKCQ$! zay&RA^enjIP)#=SVa|o z6PWU77y9((2zjTSW1{*{1N#$b|GRg|pI1wj*ETuL@nG-Qs_NP=DYA`Kup1;X;=n0e!Av~l`)+iDGLrV1Z zCu9QKB!oelWmU&+m`vL*KbCOaK6gx6`s(pwj@6~(h$G`S?QT-ME#{<%(vSz7n|Rx9 z_%L0C^udXEX%8>u+)vnAks(p8{?jKI^zAZ!2^$;w*+r3ZyH`^qZ%SNou@q>3E`}ao zXZLosY&f7oU{f&?y%A4Ad)|ju@7=<{k48IU*{+t*cW0z}`l#5ut2rl^ZOXPn+He|z zybGFOoJV5H_-5hwm2esROWQLvpK%4NXSfRUFLFLOA75=O%L-SL#~M=??g)Ec)v+e* z=W(?1F63#w>r1N{y)G%lwIvW6^3r56jl+y+hC_5(+uefLZs?|Rrt(?n&CN}y?K^?A znvo|m!HS^Jnn!g343Cjnpd*woWtU7!N<+3(J-N*$ClLm=|4ke3EQs{BU=d4f8oi`pFyOm z;t*!8P~e+NpXU442g|pFlMUZXE=-au7)mI0RM|t^sj!kbyuQ`e3uySdZ>_!DNvmnd zv=!NXqifMEVG3%aoE|#b`$H{!`B`-%E z&;Bn;!OreWHa7u^7fjnrxLSrn+VTpK!#5LjL!nO7UOLK!UQNiJ z<>tPXr@2%ArGYgJ#8L20c{&EAW(oz(3qOf?IY!9P+a!4ASU08TN z>rt?jShr18+C10ShyZt7osE-1g7s%=5A%$M?goiusL&~VeUqHrW$r^utvs{Jiv^L| zZBFa_C^O;JBmO}d0WzVTBewu|y7Qpzjk(IIPi)+k(%#8=?SVI$7V5{X`h<&aps{!b z8yhvokDDEAxy5^N4^;=YLY7KHD~OfQ1jHF6``XR>-~w)*`Ae~C zvCyMnZ1)}dZ8jgRh#YK<9n{Y(G0XC1`f^73UoW;hw>M4hR%@&z?yaSzIS$*N zH1OxQ#zcbLmWA}PmO0H7wVh2Ch(!DAU#3G%4`&-Av)3B>!X;O|xjS&`(LuQ%C`413 z3wwYbMl-G7)Dnl^y`#%M(BUrHIvT5g`547}To1^r2dz?>Efx!G87xLkr5jxf!B*=) z6%8AXf_j#p)q3FQpAk|b|9gRPAxmBq*cin(Wevyrg7uScd01be^y2~{BM3L*9{t7gf`%(H0za-;VxzRv-`vHq zhR2pGKDGJ%JhdCHf3S(R|IO-CPbsL}a5QT-yq_)oCm|Gqk}sH1;YSmg>8a&F;WCsYgQ(VZ1&bbog0;+}YQxN7gQ8?L2Dkr@>|B$I0*Ho#3og^NW` zNGKyzHZFG-Vm_P_T3Ji5^VbvfudLlU!FTelBZ|H4BUd!%J#O~A@_qIudF6{plZn)j zSvtqq$LaFvZUxCrX!Fu$tBfLj8$qactxA4A^QSndB)@%^MFmvorsWW1ngjEV?;Dgx zRY(d&BtWJfC7v4T4o<0zjHof%<-YfC2wo>Tsd{d__AbbQNK1I+AeeXX@T)i^C&Ul- znA3pB4TG(_7;(0&uo0te-12i%TMTdBzAUU1C7`B;gk&lDfD)a1$e17f%c{WP`9RaG zB5X_Pz^>8i&f#)=f-*j)M^=tCiru3?`P--p%{Jdr{N+ib(+@48nqbBjY~|OxBMYzC zBO}HIl_hpLNQX(%OFnlXS7Y1?JC^WRj-WLd=RhV!`nnzSOBqe)7<}Obzqh|M#wJ0p ze&UwB#$w}X**Zdy*`o)*p#hZ36nj@OW)L;*8=to&f6&|5Ig6K0O64M+3+5c)?s-|m1ePpfHpQ1`RQP+mx2O{8Stjo_z0Ze2+Gg1my$&Sf#KPb4+VIGq zGTad4dM57rMJ)#}CHcoSp6n+HkE&A7`}RVjbwWz3+`wx%MFVQLszIGcUPlM^G7^8h zkrKLR4v7+?p?Y?)@~nxDcBDOTqlzI-r(V#oJ98K8ST%Mm!EpbsX8*Hn!S*a3b$Z&( z?KP6}4Dmz;E~^dv@dU}-8-2FK5&?W>jch-aYjcQSH< z?H-ot<@zli2`lw5gIINotjoVub-SFBX_fZg4N0@Mi~K)9|39dI&3F6JEqT`Hl=@fc zwyT>8`NN@#Gs5fN#E)YqB7+CtD0eQo?*xC2e=f3s+|s|gOs<}UzS_@oOJ8^$VJ}h* zRxRg!bs;Hx@=p5K7A;%!_KZjoJmcK$IJs?BInQ*>n5J$T_{`1vIF)7**D>dOC#BNv znxq<(=h8pq^UZb&tn#=lEiUi_LK4zZVEfPFLnR!j0L6oDgvtU@xvKp$;DBl$CjuoR z6_phSKe94^4n#!kaZ|A$K1sdlQu|uUs@6~Givtaxs2}33F{k0Z#dc!4c0?D@o4ym$ zbz5v|PnE|p7K~-B5Ip@w1Byd^LTZ9Q-gof{b48daR{S{*e*0NAn{3%DUW}~@{cEXd!Ly${ejssI`=?|%l|p2# z*|Kqm&g4HXq~j@91}8;g!=agR@8ulmc!yH88@LFDr^Nq}fB8cY2n5@aBn+p9-gLbL zg1gBgOs*6nM=3)&z z`DTD^E91W-OvGF}pFEKChQc5P)l$BH@^Yqlru#?cq8(^&=P+}D{-H5>Uw-lr#4|4n zIxor_@{=GZ}DHbrA~9_eCM-mYSzV$m+EQG z5L>WxV-#aB9z#0q{f4_ZEEDRUnu3SPS+rsAUZ=pQ&(}TpJAlqFrdbi(O*q^R=NT~R zX_8$kg*2H;pD@XXKV(3p2k%#29S9d)_*uuEI-W|q)e>Rxp=pV#@VDS-C{*G6_NGF^y zYUECvK#<)(QHoxx_tc!r@BlcGTuge1VVbTt-eKLuzJQYa+~e;j#rfgA%9)q;jKO&H zV1`{Q9K4L@)blEos2Jo2wdW7!#6qW7t;-^&#n?)_HI8#Z;e4x1^mv-%Y1i6HXX1C# z93Fm#wx_Q7HB?L8yK{2`XVCIddZ0Ee6^`oEVd>qVxGZWb5*Z)&^5bKTegsodmJ#~G z9y?tKw#5l%lgZ4cTsP1@QbmG})2eC|pC+0DsOCuDOTUTc(T0ny8+qw zEI;|%afz<@^SkPr?B6ZirH0IASPmH_i`O@$jpZ zM3}hUa3DHwE5dS*Xz=x@^q*H`%9DBYS!49%=$Vzu2KCqowJ={rO(2GYT|<#zi~v!A zDH`j9Kg&ogQIXJUN!tp`?t5BDR1Ny+M;<3X6s{~SIGkr#`HL6_xpMR8wmi{&J+RQ` z3FDh7Feu_t^xXq8Nl~eq-3nmsrK?Q&7~RE^`mhgQOPOdJpFA{C-PB{Y(dl}&6S{Vu zp7S;GSJ!yR@V>{LR?hbso^!q5aRCPmrE8}(zsK7Sede&3O+03AIWx9)T+XXhBJ#Q^ z1YKT+dfqUe`TI;f@v?`l3JxX&aAvlL_V7iKz4r^h(*#OR69TuHkfvz0>gIzgT-&Yc zd3St>!FqaIsW&NQR76SuO6J@`FMMIxQK3q5^75xqB?iwIT3Ru-crs7Z1o0@(o?o%Q z&w2_alT!FFjeOAA!L`5>tC&3K!AQ0KBk%qA)VYohf<7M94RI?R?3>JLLt4#{jFi3@ zma|y?y^)F0T&Y8E?dnA=INxPwb58-Z%oJylwY}!6-)4;lV--mw4R@k`L71-o>iv(U z9Ub9?{zt__pJVtF7Zb~v9Oj8vMVqYr@Ii~6)#j5+V$?{LkY>1(V2_1_Qde+d5*oYt zm28~{uG49i1b25MwK%oNU(ZyvI#TT(B+8ldUB5U?B;}QXN!&`+zt<+hknS4;FzGPi z;ezowq7B`i1hEg7+VQ+S&XN|A6VoqaqMvNu4PTDFbYcWTaPdBExh}*n zT^Orm&O4VR+=){JviQ3gNpQIX;~9R*C&(Go9+carqL@U; z_rhN5Bx8AFn@aYGLU-uTB-XZVxT% zfPV$T=qj!4Y6f zW>)52|FZxUy@w}Ufb@?BQxS6qXFISf0PN^!Yh?;{wQ{iMb~H1m3aaUZccOXf1h0z7 zS%DxfZyen0&8+M#-Z*#y_ymPKc?E&&@ShoqfDgPc{oj4xIy;!TnL?Za5@rr25Sq7A zGBiq7CeC1IFB%^B|FXZX#Xw|3Rm~KXmp4wBP^7c`E<0`~O+yX6c7CTt?)d znv|7Aq#R7$>>&28@=^ddH!Cv{L36MmFRuWQ(~KV?$oWDL!pkYl59HxA0}2cA@t6ph zngE4*8iF7GeH3|n7gw;oDdg`_cp+v&W&)-!IE8u51>mCy!ml7^f}8@Tyn?)Z@Ina4 V8+zGy1Rn(;FRda~@y0m#e*uV1>N)@b literal 0 HcmV?d00001 From 4c844ec85c2698ca2b3c809d30d58ad67d533772 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Thu, 2 Jun 2022 21:27:08 -0500 Subject: [PATCH 18/31] handle batch size zero w/ ragged (including test) --- HeterogeneousCore/SonicTriton/src/TritonData.cc | 8 +++++++- .../SonicTriton/test/TritonIdentityProducer.cc | 14 +++++++++++--- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/src/TritonData.cc b/HeterogeneousCore/SonicTriton/src/TritonData.cc index 72caac78befb9..23512c91780ce 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonData.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonData.cc @@ -119,8 +119,14 @@ template void TritonData::setBatchSize(unsigned bsize) { batchSize_ = bsize; if (!noBatch_) { + //zero disables inference in TritonClient: remove all entries + if (batchSize_==0) + entries_.clear(); //should only be set to 1 in cases when entries > 1 - if (batchSize_==1 or entries_.size()==1) { + else if (batchSize_==1 or entries_.size()==1) { + //in case batch size was previously zero for some reason + if (entries_.empty()) + addEntryImpl(0); for (auto& entry : entries_) { entry.fullShape_[0] = batchSize_; } diff --git a/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc b/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc index 177dd9bdb48b1..afc23b684df8c 100644 --- a/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc +++ b/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc @@ -15,7 +15,9 @@ class TritonIdentityProducer : public TritonEDProducer<> { public: explicit TritonIdentityProducer(edm::ParameterSet const& cfg) : TritonEDProducer<>(cfg), - batchSize_(4) { } + batchSizes_{1,2,0}, + batchSize_(0), + batchCounter_(0) {} void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, Input& iInput) override { //follow Triton QA tests for ragged input std::vector> value_lists{ @@ -25,6 +27,8 @@ class TritonIdentityProducer : public TritonEDProducer<> { {3,3,3} }; + batchSize_ = batchSizes_[batchCounter_]; + batchCounter_ = (batchCounter_+1) % batchSizes_.size(); client_->setBatchSize(batchSize_); auto& input1 = iInput.at("INPUT0"); auto data1 = input1.allocate(); @@ -34,9 +38,12 @@ class TritonIdentityProducer : public TritonEDProducer<> { } // convert to server format - input1.toServer(data1); + if (batchSize_>0) + input1.toServer(data1); } void produce(edm::Event& iEvent, edm::EventSetup const& iSetup, Output const& iOutput) override { + if (batchSize_==0) + return; // check the results const auto& output1 = iOutput.at("OUTPUT0"); // convert from server format @@ -60,7 +67,8 @@ class TritonIdentityProducer : public TritonEDProducer<> { } private: - int batchSize_; + std::vector batchSizes_; + int batchSize_, batchCounter_; }; DEFINE_FWK_MODULE(TritonIdentityProducer); From 0ca9b30c1ba26b5eaea897a43bee6fdce0b5bcc7 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Wed, 13 Jul 2022 19:10:08 -0500 Subject: [PATCH 19/31] improved batching interface --- .../SonicTriton/interface/TritonClient.h | 19 +++-- .../SonicTriton/interface/TritonData.h | 11 +-- .../SonicTriton/src/TritonClient.cc | 71 ++++++++++++------- .../SonicTriton/src/TritonData.cc | 57 ++++++--------- .../SonicTriton/test/TritonImageProducer.cc | 2 +- 5 files changed, 85 insertions(+), 75 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/interface/TritonClient.h b/HeterogeneousCore/SonicTriton/interface/TritonClient.h index 50c4c5ee83642..8e6bb3518633a 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonClient.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonClient.h @@ -16,6 +16,8 @@ #include "grpc_client.h" #include "grpc_service.pb.h" +enum class TritonBatchMode { Rectangular = 1, Ragged = 2 }; + class TritonClient : public SonicClient { public: struct ServerSideStats { @@ -36,13 +38,15 @@ class TritonClient : public SonicClient { ~TritonClient() override; //accessors - unsigned batchSize() const { return batchSize_; } + unsigned batchSize() const; + TritonBatchMode batchMode() const { return batchMode_; } bool verbose() const { return verbose_; } bool useSharedMemory() const { return useSharedMemory_; } void setUseSharedMemory(bool useShm) { useSharedMemory_ = useShm; } bool setBatchSize(unsigned bsize); + void setBatchMode(TritonBatchMode batchMode); + void resetBatchMode(); void reset() override; - bool noBatch() const { return noBatch_; } TritonServerType serverType() const { return serverType_; } //for fillDescriptions @@ -50,6 +54,8 @@ class TritonClient : public SonicClient { protected: //helpers + bool noOuterDim() const { return noOuterDim_; } + unsigned outerDim() const { return outerDim_; } void getResults(std::vector& results); void evaluate() override; template @@ -62,9 +68,12 @@ class TritonClient : public SonicClient { inference::ModelStatistics getServerSideStatus() const; //members - unsigned maxBatchSize_; - unsigned batchSize_; - bool noBatch_; + unsigned maxOuterDim_; + unsigned outerDim_; + bool noOuterDim_; + unsigned nEntries_; + TritonBatchMode batchMode_; + bool manualBatchMode_; bool verbose_; bool useSharedMemory_; TritonServerType serverType_; diff --git a/HeterogeneousCore/SonicTriton/interface/TritonData.h b/HeterogeneousCore/SonicTriton/interface/TritonData.h index 90acf3084e4bf..d10cc2752460b 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonData.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonData.h @@ -71,7 +71,6 @@ class TritonData { const ShapeView& shape(unsigned entry=0) const { return entries_.at(entry).shape_; } int64_t byteSize() const { return byteSize_; } const std::string& dname() const { return dname_; } - unsigned batchSize() const { return batchSize_; } //utilities bool variableDims() const { return variableDims_; } @@ -93,9 +92,9 @@ class TritonData { class TritonDataEntry { public: //constructors - TritonDataEntry(const ShapeType& dims, bool noBatch, const std::string& name, const std::string& dname) + TritonDataEntry(const ShapeType& dims, bool noOuterDim, const std::string& name, const std::string& dname) : fullShape_(dims), - shape_(fullShape_.begin() + (noBatch ? 0 : 1), fullShape_.end()), + shape_(fullShape_.begin() + (noOuterDim ? 0 : 1), fullShape_.end()), sizeShape_(0), byteSizePerBatch_(0), totalByteSize_(0), @@ -133,9 +132,7 @@ class TritonData { //private accessors only used internally or by client void checkShm() {} - unsigned fullLoc(unsigned loc) const { return loc + (noBatch_ ? 0 : 1); } - void setBatchSize(unsigned bsize); - size_t getEntrySize() const { return std::max(static_cast(batchSize_), entries_.size()); } + unsigned fullLoc(unsigned loc) const; void reset(); void setResult(Result* result, unsigned entry=0) { entries_[entry].result_.reset(result); } IO* data(unsigned entry=0) { return entries_[entry].data_.get(); } @@ -171,8 +168,6 @@ class TritonData { bool useShm_; std::string shmName_; const ShapeType dims_; - bool noBatch_; - unsigned batchSize_; bool variableDims_; int64_t productDims_; std::string dname_; diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc index 38ca800bfd4eb..285b39b14708b 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc @@ -38,6 +38,8 @@ namespace { TritonClient::TritonClient(const edm::ParameterSet& params, const std::string& debugName) : SonicClient(params, debugName, "TritonClient"), + batchMode_(TritonBatchMode::Rectangular), + manualBatchMode_(false), verbose_(params.getUntrackedParameter("verbose")), useSharedMemory_(params.getUntrackedParameter("useSharedMemory")), compressionAlgo_(getCompressionAlgo(params.getUntrackedParameter("compression"))) { @@ -71,12 +73,14 @@ TritonClient::TritonClient(const edm::ParameterSet& params, const std::string& d inference::ModelConfig modelConfig(modelConfigResponse.config()); //check batch size limitations (after i/o setup) - //triton uses max batch size = 0 to denote a model that does not support batching - //but for models that do support batching, a given event may set batch size 0 to indicate no valid input is present - //so set the local max to 1 and keep track of "no batch" case - maxBatchSize_ = modelConfig.max_batch_size(); - noBatch_ = maxBatchSize_ == 0; - maxBatchSize_ = std::max(1u, maxBatchSize_); + //triton uses max batch size = 0 to denote a model that does not support native batching (using the outer dimension) + //but for models that do support batching (native or otherwise), a given event may set batch size 0 to indicate no valid input is present + //so set the local max to 1 and keep track of "no outer dim" case + maxOuterDim_ = modelConfig.max_batch_size(); + noOuterDim_ = maxOuterDim_ == 0; + maxOuterDim_ = std::max(1u, maxOuterDim_); + //propagate batch size + setBatchSize(1); //get model info inference::ModelMetadataResponse modelMetadata; @@ -149,15 +153,12 @@ TritonClient::TritonClient(const edm::ParameterSet& params, const std::string& d throw cms::Exception("MissingOutput") << "Some requested outputs were not available on the server: " << triton_utils::printColl(s_outputs); - //propagate batch size to inputs and outputs - setBatchSize(1); - //print model info std::stringstream model_msg; if (verbose_) { model_msg << "Model name: " << options_[0].model_name_ << "\n" << "Model version: " << options_[0].model_version_ << "\n" - << "Model max batch size: " << (noBatch_ ? 0 : maxBatchSize_) << "\n"; + << "Model max outer dim: " << (noOuterDim_ ? 0 : maxOuterDim_) << "\n"; edm::LogInfo(fullDebugName_) << model_msg.str() << io_msg.str(); } } @@ -171,20 +172,38 @@ TritonClient::~TritonClient() { output_.clear(); } +void TritonClient::setBatchMode(TritonBatchMode batchMode) { + unsigned oldBatchSize = batchSize(); + batchMode_ = batchMode; + manualBatchMode_ = true; + //this allows calling setBatchSize() and setBatchMode() in either order consistently to change back and forth + //but changing from ragged to rectangular once multiple entries may cause issues; todo: check this case + setBatchSize(oldBatchSize); +} + +void TritonClient::resetBatchMode() { + batchMode_ = TritonBatchMode::Rectangular; + manualBatchMode_ = false; +} + +unsigned TritonClient::batchSize() const { + return batchMode_==TritonBatchMode::Rectangular ? outerDim_ : nEntries_; +} + bool TritonClient::setBatchSize(unsigned bsize) { - if (bsize > maxBatchSize_) { - edm::LogWarning(fullDebugName_) << "Requested batch size " << bsize << " exceeds server-specified max batch size " - << maxBatchSize_ << ". Batch size will remain as" << batchSize_; - return false; - } else { - batchSize_ = bsize; - //set for input and output - for (auto& element : input_) { - element.second.setBatchSize(bsize); - } - for (auto& element : output_) { - element.second.setBatchSize(bsize); + if (batchMode_==TritonBatchMode::Rectangular) { + if (bsize > maxOuterDim_) { + edm::LogWarning(fullDebugName_) << "Requested batch size " << bsize << " exceeds server-specified max batch size " + << maxOuterDim_ << ". Batch size will remain as " << outerDim_; + return false; + } else { + outerDim_ = bsize; + return true; } + } else { + addEntry(bsize); + nEntries_ = bsize; + outerDim_ = 1; return true; } } @@ -197,10 +216,12 @@ void TritonClient::addEntry(unsigned entry) { element.second.addEntryImpl(entry); } if (entry>0) - setBatchSize(1); + batchMode_ = TritonBatchMode::Ragged; } void TritonClient::reset() { + if (!manualBatchMode_) + batchMode_ = TritonBatchMode::Rectangular; for (auto& element : input_) { element.second.reset(); } @@ -237,7 +258,7 @@ void TritonClient::getResults(std::vector& results) { if (output.variableDims()) { std::vector tmp_shape; TRITON_THROW_IF_ERROR(result->Shape(oname, &tmp_shape), "getResults(): unable to get output shape for " + oname); - if (!noBatch_) + if (!noOuterDim_) tmp_shape.erase(tmp_shape.begin()); output.setShape(tmp_shape,i); } @@ -252,7 +273,7 @@ void TritonClient::getResults(std::vector& results) { //default case for sync and pseudo async void TritonClient::evaluate() { //in case there is nothing to process - if (batchSize_ == 0) { + if (batchSize() == 0) { finish(true); return; } diff --git a/HeterogeneousCore/SonicTriton/src/TritonData.cc b/HeterogeneousCore/SonicTriton/src/TritonData.cc index 23512c91780ce..e037b8887a87c 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonData.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonData.cc @@ -25,8 +25,6 @@ TritonData::TritonData(const std::string& name, //ensure unique name for shared memory region shmName_(useShm_ ? pid + "_" + xput() + std::to_string(uid()) : ""), dims_(model_info.shape().begin(), model_info.shape().end()), - noBatch_(client_->noBatch()), - batchSize_(0), dname_(model_info.datatype()), dtype_(tco::ProtocolStringToDataType(dname_)), byteSize_(tco::GetDataTypeByteSize(dtype_)), @@ -56,7 +54,7 @@ void TritonData::addEntryImpl(unsigned entry) { if (entry >= entries_.size()) { entries_.reserve(entry+1); for (unsigned i = entries_.size(); i < entry+1; ++i) { - entries_.emplace_back(dims_, noBatch_, name_, dname_); + entries_.emplace_back(dims_, client_->noOuterDim(), name_, dname_); } } } @@ -115,27 +113,6 @@ void TritonData::setShape(unsigned loc, int64_t val, unsigned entry) { } } -template -void TritonData::setBatchSize(unsigned bsize) { - batchSize_ = bsize; - if (!noBatch_) { - //zero disables inference in TritonClient: remove all entries - if (batchSize_==0) - entries_.clear(); - //should only be set to 1 in cases when entries > 1 - else if (batchSize_==1 or entries_.size()==1) { - //in case batch size was previously zero for some reason - if (entries_.empty()) - addEntryImpl(0); - for (auto& entry : entries_) { - entry.fullShape_[0] = batchSize_; - } - } - else - throw cms::Exception("TritonDataError") << "attempt to set batch size to " << bsize << " when ragged batching is in use"; - } -} - template void TritonData::TritonDataEntry::computeSizes(int64_t shapeSize, int64_t byteSize, int64_t batchSize) { sizeShape_ = shapeSize; @@ -146,8 +123,9 @@ void TritonData::TritonDataEntry::computeSizes(int64_t shapeSize, int64_t by template void TritonData::computeSizes() { totalByteSize_ = 0; + unsigned outerDim = client_->outerDim(); for (unsigned i = 0; i < entries_.size(); ++i) { - entries_[i].computeSizes(sizeShape(i), byteSize_, batchSize_); + entries_[i].computeSizes(sizeShape(i), byteSize_, outerDim); entries_[i].offset_ = totalByteSize_; totalByteSize_ += entries_[i].totalByteSize_; } @@ -186,8 +164,8 @@ void TritonData::updateMem(size_t size) { template <> template TritonInputContainer
TritonInputData::allocate(bool reserve) { - //automatically creates a vector for each entry (if batch size or entry size known) - auto ptr = std::make_shared>(getEntrySize()); + //automatically creates a vector for each item (if batch size known) + auto ptr = std::make_shared>(client_->batchSize()); if (reserve) { computeSizes(); for (auto& entry : entries_){ @@ -210,13 +188,11 @@ void TritonInputData::toServer(TritonInputContainer
ptr) { const auto& data_in = *ptr; //check batch size - if (entries_.size()==1 and data_in.size() != batchSize_) { + unsigned batchSize = client_->batchSize(); + unsigned outerDim = client_->outerDim(); + if (data_in.size() != batchSize) { throw cms::Exception("TritonDataError") << name_ << " toServer(): input vector has size " << data_in.size() - << " but specified batch size is " << batchSize_; - } - else if (entries_.size()>1 and data_in.size() != entries_.size()) { - throw cms::Exception("TritonDataError") << name_ << " toServer(): input vector has size " << data_in.size() - << " but specified entries size is " << entries_.size(); + << " but specified batch size is " << batchSize; } //check type @@ -231,9 +207,11 @@ void TritonInputData::toServer(TritonInputContainer
ptr) { auto& entry = entries_[i]; //shape must be specified for variable dims or if batch size changes + if (!client_->noOuterDim()) + entry.fullShape_[0] = outerDim; entry.data_->SetShape(entry.fullShape_); - for (unsigned i0 = 0; i0 < batchSize_; ++i0) { + for (unsigned i0 = 0; i0 < outerDim; ++i0) { memResource_->copyInput(data_in[counter].data(), offset, i); offset += entry.byteSizePerBatch_; ++counter; @@ -266,8 +244,9 @@ TritonOutput
TritonOutputData::fromServer() const { memResource_->copyOutput(); + unsigned outerDim = client_->outerDim(); TritonOutput
dataOut; - dataOut.reserve(getEntrySize()); + dataOut.reserve(client_->batchSize()); for (unsigned i = 0; i < entries_.size(); ++i) { const auto& entry = entries_[i]; const DT* r1 = reinterpret_cast(entry.output_); @@ -276,7 +255,7 @@ TritonOutput
TritonOutputData::fromServer() const { throw cms::Exception("TritonDataError") << name_ << " fromServer(): missing result"; } - for (unsigned i0 = 0; i0 < batchSize_; ++i0) { + for (unsigned i0 = 0; i0 < outerDim; ++i0) { auto offset = i0 * entry.sizeShape_; dataOut.emplace_back(r1 + offset, r1 + offset + entry.sizeShape_); } @@ -296,6 +275,12 @@ void TritonData::reset() { addEntryImpl(0); } +template +unsigned TritonData::fullLoc(unsigned loc) const { + return loc + (client_->noOuterDim() ? 0 : 1); +} + + //explicit template instantiation declarations template class TritonData; template class TritonData; diff --git a/HeterogeneousCore/SonicTriton/test/TritonImageProducer.cc b/HeterogeneousCore/SonicTriton/test/TritonImageProducer.cc index 084686d56a653..07d5211dfd1c3 100644 --- a/HeterogeneousCore/SonicTriton/test/TritonImageProducer.cc +++ b/HeterogeneousCore/SonicTriton/test/TritonImageProducer.cc @@ -76,7 +76,7 @@ class TritonImageProducer : public TritonEDProducer<> { void findTopN(const TritonOutputData& scores, unsigned n = 5) const { const auto& tmp = scores.fromServer(); auto dim = scores.sizeDims(); - for (unsigned i0 = 0; i0 < scores.batchSize(); i0++) { + for (unsigned i0 = 0; i0 < client_->batchSize(); i0++) { //match score to type by index, then put in largest-first map std::map> score_map; for (unsigned i = 0; i < std::min((unsigned)dim, (unsigned)imageList_.size()); ++i) { From c96bb07f9ad4b2651358117711e08b52d3f562aa Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Thu, 14 Jul 2022 14:58:33 -0500 Subject: [PATCH 20/31] fix nEntries handling --- .../SonicTriton/interface/TritonClient.h | 1 + .../SonicTriton/src/TritonClient.cc | 21 ++++++++++++------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/interface/TritonClient.h b/HeterogeneousCore/SonicTriton/interface/TritonClient.h index 8e6bb3518633a..99ecf48bd3376 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonClient.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonClient.h @@ -56,6 +56,7 @@ class TritonClient : public SonicClient { //helpers bool noOuterDim() const { return noOuterDim_; } unsigned outerDim() const { return outerDim_; } + unsigned nEntries() const; void getResults(std::vector& results); void evaluate() override; template diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc index 285b39b14708b..4195c0d1cafee 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc @@ -186,8 +186,12 @@ void TritonClient::resetBatchMode() { manualBatchMode_ = false; } +unsigned TritonClient::nEntries() const { + return !input_.empty() ? input_.begin()->second.entries_.size() : 0; +} + unsigned TritonClient::batchSize() const { - return batchMode_==TritonBatchMode::Rectangular ? outerDim_ : nEntries_; + return batchMode_==TritonBatchMode::Rectangular ? outerDim_ : nEntries(); } bool TritonClient::setBatchSize(unsigned bsize) { @@ -202,7 +206,6 @@ bool TritonClient::setBatchSize(unsigned bsize) { } } else { addEntry(bsize); - nEntries_ = bsize; outerDim_ = 1; return true; } @@ -215,8 +218,10 @@ void TritonClient::addEntry(unsigned entry) { for (auto& element : output_) { element.second.addEntryImpl(entry); } - if (entry>0) + if (entry>0) { batchMode_ = TritonBatchMode::Ragged; + outerDim_ = 1; + } } void TritonClient::reset() { @@ -280,24 +285,24 @@ void TritonClient::evaluate() { //set up input pointers for triton (generalized for multi-request ragged batching case) //one vector per request - unsigned nEntries = input_.begin()->second.entries_.size(); - std::vector> inputsTriton(nEntries); + unsigned nEntriesVal = nEntries(); + std::vector> inputsTriton(nEntriesVal); for (auto& inputTriton : inputsTriton) { inputTriton.reserve(input_.size()); } for (auto& [iname, input] : input_) { - for (unsigned i = 0; i < nEntries; ++i){ + for (unsigned i = 0; i < nEntriesVal; ++i){ inputsTriton[i].push_back(input.data(i)); } } //set up output pointers similarly - std::vector> outputsTriton(nEntries); + std::vector> outputsTriton(nEntriesVal); for (auto& outputTriton : outputsTriton) { outputTriton.reserve(output_.size()); } for (auto& [oname, output] : output_) { - for (unsigned i = 0; i < nEntries; ++i){ + for (unsigned i = 0; i < nEntriesVal; ++i){ outputsTriton[i].push_back(output.data(i)); } } From 8b95069ab5950685567b74823945eafde441cf33 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Thu, 14 Jul 2022 16:15:39 -0500 Subject: [PATCH 21/31] handle ragged -> rectangular by removing entries --- .../SonicTriton/interface/TritonClient.h | 1 + .../SonicTriton/interface/TritonData.h | 8 ++++++++ .../SonicTriton/src/TritonClient.cc | 18 ++++++++++++++++-- 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/interface/TritonClient.h b/HeterogeneousCore/SonicTriton/interface/TritonClient.h index 99ecf48bd3376..cf2e438db2856 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonClient.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonClient.h @@ -92,6 +92,7 @@ class TritonClient : public SonicClient { //private accessors only used by data auto client() { return client_.get(); } void addEntry(unsigned entry); + void resizeEntries(unsigned entry); }; #endif diff --git a/HeterogeneousCore/SonicTriton/interface/TritonData.h b/HeterogeneousCore/SonicTriton/interface/TritonData.h index d10cc2752460b..ee573655ddf19 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonData.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonData.h @@ -105,6 +105,14 @@ class TritonData { createObject(&iotmp, name, dname); data_.reset(iotmp); } + //default needed to be able to use std::vector resize() + TritonDataEntry() + : shape_(fullShape_.begin(), fullShape_.end()), + sizeShape_(0), + byteSizePerBatch_(0), + totalByteSize_(0), + offset_(0), + output_(nullptr) { } private: friend class TritonData; diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc index 4195c0d1cafee..0d2603f32fde8 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc @@ -177,7 +177,7 @@ void TritonClient::setBatchMode(TritonBatchMode batchMode) { batchMode_ = batchMode; manualBatchMode_ = true; //this allows calling setBatchSize() and setBatchMode() in either order consistently to change back and forth - //but changing from ragged to rectangular once multiple entries may cause issues; todo: check this case + //includes handling of change from ragged to rectangular if multiple entries already created setBatchSize(oldBatchSize); } @@ -202,15 +202,29 @@ bool TritonClient::setBatchSize(unsigned bsize) { return false; } else { outerDim_ = bsize; + resizeEntries(1); return true; } } else { - addEntry(bsize); + resizeEntries(bsize); outerDim_ = 1; return true; } } +void TritonClient::resizeEntries(unsigned entry) { + if (entry > nEntries()) + addEntry(entry); + else if (entry < nEntries()) { + for (auto& element : input_) { + element.second.entries_.resize(entry); + } + for (auto& element : output_) { + element.second.entries_.resize(entry); + } + } +} + void TritonClient::addEntry(unsigned entry) { for (auto& element : input_) { element.second.addEntryImpl(entry); From 733d151b656a03658f2b044cea9dd446fae67213 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Thu, 14 Jul 2022 16:33:03 -0500 Subject: [PATCH 22/31] update batching terminology in docs --- HeterogeneousCore/SonicTriton/README.md | 24 +++++++++++++----- .../SonicTriton/batching_diagrams.png | Bin 10530 -> 10467 bytes 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/README.md b/HeterogeneousCore/SonicTriton/README.md index c0418b1517dc7..759b50676040e 100644 --- a/HeterogeneousCore/SonicTriton/README.md +++ b/HeterogeneousCore/SonicTriton/README.md @@ -38,13 +38,19 @@ The model information from the server can be printed by enabling `verbose` outpu SonicTriton supports two types of batching, rectangular and ragged, depicted below: ![batching diagrams](./batching_diagrams.png) -In the rectangular case, the inputs for each object in an event have the same shape, so they can be batched together. -In the ragged case, the inputs for each object in an event do not have the same shape, so they cannot be batched; -instead, they are grouped together as separate entries, each with its own shape specified explicitly. - -The batch size should be set using the client accessor, in order to ensure a consistent value across all inputs: +In the rectangular case, the inputs for each object in an event have the same shape, so they can be combined into a single entry. +(In this case, the batch size is specified as the "outer dimension" of the shape.) +In the ragged case, the inputs for each object in an event do not have the same shape, so they cannot be combined; +instead, they are represented internally as separate entries, each with its own shape specified explicitly. + +The batch size is set and accessed using the client, in order to ensure a consistent value across all inputs. +The batch mode can also be changed manually, in order to allow optimizing the allocation of entries. +(If two entries with different shapes are specified, the batch mode will always automatically switch to ragged.) * `setBatchSize()`: set a new batch size * some models may not support batching +* `batchSize()`: return current batch size +* `setBatchMode()`: set the batch mode (`Rectangular` or `Ragged`) +* `batchMode()`: get the current batch mode Useful `TritonData` accessors include: * `variableDims()`: return true if any variable dimensions @@ -53,7 +59,6 @@ Useful `TritonData` accessors include: * `sizeShape(unsigned entry=0)`: return product of shape dimensions (returns `sizeDims()` if no variable dimensions) for specified entry * `byteSize()`: return number of bytes for data type * `dname()`: return name of data type -* `batchSize()`: return current batch size To update the `TritonData` shape in the variable-dimension case: * `setShape(const std::vector& newShape, unsigned entry=0)`: update all (variable) dimensions with values provided in `newShape` for specified entry @@ -183,3 +188,10 @@ The fallback server has a separate set of options, mostly related to the invocat ## Examples Several example producers can be found in the [test](./test) directory. + +## Legend + +The SonicTriton documentation uses different terms than Triton itself for certain concepts. +The SonicTriton:Triton correspondence for those terms is given here: +* Entry : request +* Rectangular batching : Triton-supported batching diff --git a/HeterogeneousCore/SonicTriton/batching_diagrams.png b/HeterogeneousCore/SonicTriton/batching_diagrams.png index ca438ae2a4ca2ab94c5bd14aef4b940804c474be..6fa2a575ec36d255a74cc62e98f390f477702e54 100644 GIT binary patch literal 10467 zcmbt)1yCH%x9*Yz>F@h`B2|@Tai6|?3IG6b<>jQ*0RXf;)FF+9 zj=JMT{{|CvqB56LR{{WhUIPGuAppP)>Q>+(0N@S=0FF!n0Kp6ZfW#@MQ%x9E@W@P2 zRtoSpJbt1Nb%pIDr{{_~!~Qw&(uUVje=yzTm83EEF@ZRw7!Y!nL;wIC(??U+O&#h< z<>caMVQmMea`SeAQ+e4t0RXz83wj1gMc`+5mrTF0WE$ZDz%68()@{e=hJ)k)H(he( zYES_NR%?kv1-&rQ<>e*iC64IJBmw(o75KS!#_FWm(`_YI{YHeKy>#cjN7n_7m$BYv zat2SB_B*R&yr_;6E%uZOWnEIB!$A$#=-cV^u2>Jk=~p`T5>i}ZUL zt;*=SK^Ia57vQsuJmw%y%Ea|QAQ39Y$!T8NRv&XK z%jJX1f@UP7eGC#GUSfa>8x5n2$BRLReR zVju>pvqu|PyaY6}7hy+UtW6d*j}vh#V#>1{BYlg9^gpw`UrDA)+RR+&V!2!;ky?`Q z=UD5Y4FE4s^-w^h7fsbXUG@o>8Y~bb2V0;9uzKTLAQ;yH=dxDz^RHVjGRDsJgnjxV zj*_Ytdk-Dz-Q3(`PHY3=?#!8yUlWsI%?3Ni42bSnn#gZLG(k zH>mL0Q;#2qD~@6x#@>O{A5-w?aFn|pee|AY+1mi5mN^cqIC;N(<)qrM58 zw+%m!xXx88ibTFas|f;U;TDD{%y7vP`NwJof;hJ|MR?Hp`X1rCv4*xbbY`3Z^DbZA z`jo}8Qe%23Phvx{hjGm~#VIEH0|4__Z{Idfd$LZA@jqqM#NyslMYEmAXNxy`rQs_3 zJ?_mZCWJrFXN-!Gh8J-pdw{fh{+xDzFR$xr8ST)~ab@v+w(w^x|7=T_wQl*Rwy%qX z`oFSMfAvis&zBQz7~V_Q`5-LEHp`X0q+J(`V45wK>}0rsOyKEERhrYx?b4@S2z%n~ zS7E(zX<;n1KIx@8HF)w_ewbj}Iy8F#yT>O@=HyWu9Nif6W6#dJ(o%+@7xRG6@iOmZ zo+qeerLWbMzR9BeE%TlxZ3y_f!o__2Xk_tve%>dTd)(sdcKbC2LzQ`}X!dJ7A)Yu~ z>CUe?g^R}U-SwRhQuo;f^y%DGH(>=uLxypyx6ECD78S+)lzIQs`)P6esXb6cFZ9sc zj9u=BQL0hUswKUvx{avXU(15lF7qzt7R$&Ex|T~#4+5L~lnLvKcYXKMl0P<8W{%aq zs7y##^b|FYtKRy2%E?$QyO<7OI*aCQyRl%?=l2Uwn>{PZxWx=8tI!st%Namr34C*? zvIi=k;G(jT=fgGX!10#B(g6Ud>*S@xHN94U$`mD6FmHd(fz{;I#mk9wDIljmc7b6-#TG<&9wUvqeU<8Gb5VnR07^R;~xoZ*sb?}Uy^6&X*-}me4vgQjnXruF<$@?W}L~7gb zj|`z4qxjN-bu57`@+j}ltg(Zo6mj6K1EKJn#^l`O>>ccpjUjV4>c?zg9{Ul?v{`Ru z#K**<-K~DPs@Va|I!eL|?62IE{H)6`fu?h}=<2`Z8QYa6mYu=8Bt@j{Pov36@}23`1Hs0 z`*gDEqMmZUyhvhsIqT50O!j?&U#Q{l%dUADWA@=*(6$_-!S!t8Xo+F%*uGR;hvq>J z%!7Ahqf0+*srZV=?ak?88*O&K2=}FYNaN%?paMfR(xswF{Yb#`^8477n{N8lI>l*7 zA%o=v`#uqVL5N;fz(k2tANaD(q>Q-BHT4g~lAfeMieclVcvtJdMVT*s*NgGbVK*#_ zds&OCn|;_4BFG`O5vN!gGsyS%5m-WP5&i0FrK5JyI4b$(X0z0hfzTE>&vUvOZFSZ>y zsgqA26bj97Uu7;!&FjKvRmE4VvIkENzc?$WIya66XfEWM$&RM(S6^Vea;Rr?`44V| zviF@2yW)AexKt=O$+^2;x0m>NM7d@&o3ITRfSR_~pL)iP5&v;w{oFlov#Xw<6bfp& zDt?*-(b~h|s!rQ(0_Bo?a2%B;B7i#=wm;dyw1BZqAnPPoN#D4YN7q%!N@*S<@_NHU zTThm+X{A>7adjGJ`L*{d)nNy|7to2qCWRGhFvbLM@JJz`m1_mNVy7PB6_LHuQFj;X%BCEb=_u8gzyFlWK zOMH9;q#AcM=2kmE6=~*w_DaX$ec8la!v+#?K0ZQm2S03|D1>$cw-2A({% zskLr-84({G?rZVmW)|mI^3${Jah3`l1vS}Jb2g@`7;Lgca7FHTq77uR?sSivl4_V8 ztRO<>rX0nJ&%LN(N%Sb@OSz!70Udsp4HVQdHYs_O?%nejU5(CF_u8 zR+iDC!~J`X2fK??8;HcyR=}nwQb9^dPxrQq)C>2n#pc z0ChfAnU}vcQUA*_(}NVDOCP2q0X#w5A1(02zDln2B)eJ!G72`O>uZzqm`7hGMn)7_FeIioP=r6)l;RlczrMmQinMK^uB{>j&$ zY!&G3UPF88Ico=sCFN`lQHF@dj@P4IX;l(Ye?~R0BMC^-c}`g*Wmp4GO%QXtBxF&< z_N&L4WCRWD?LZLHhTXhxO03wb@^0e$fwAX{t+Og=&d*5Zp1H?6cI4gRS4|6>TOWUakQ!ck(1%NfD*INI=HjPO0M>K%FPy;pXD zD<&Mdj8D*z$f$8Jm*J<@lhL*P#&Z4&2i?+~$X^`T=)b=7V`~j*9ZAcrrnWMsF=TL$ zu}!dbR$DkA1U$y66krSO=#{$4DJVUi(u0g$8Hy=c^* zL~L?vy%^qL%)dBT{RNjGkUPhqZZ)GIklsX7?5z z%V?OK9b~UOKT9_cPM2pGled)mt)c4&VIN0By*wZNr@$|kUPXOd;+%Id_Sx^6M4ut9 zK=UWav+K{qZc|B#F;1CQPv1tu$96uJjC|l^S<7)t^ZrKKv!$$}C{rz;M_s$a5h0$_ z&2=#EOo`4i75m(fDq(#CxcP&2#%92GX~CNj89I%0jVU&c5fWx|&fMa(ym{7;)<+=u z%@ZM{>1XoZPx{<;oO4|LC_$dX&p-7>l*Ghef4kxJo)W|Khp{syB5m~16)R^_?&+s} z`KiBM`kb-B3z5oh5^oh6MHNAj80-D)JFo6JngMlIazD{|0`zgxZVE15nEHO?Hw{uO z4cR8z0?A;QJBtZF+P}qdla|PMCprj~XKq?w@#59V3-4DGynR1*B0HIueIqwmMlZt7 zf^6Es&MJ5Oz=Pi0iIBW->%qp33UGm#(S_!CQB9duL#1-fJlZrnU%g3)W zd_x_C$q{&#e+k3sRVq4e5q2k z|5_FaQ@T;7g${Z#+q5FKH&cZ+>S6@E7A3u381Go~9-5Me&6rF?NBqm=^Pf>_Jtx0U z%GdvNq;nUU&sLW0dQ)?+#R5U23ILORRgj%CKF9Q5ksp!KxGxV_G`4u08)`2d_NoPO z7qT%`*u%(T5C6F*=4w-R>NW)PqUF>6mpLr1V-6n#*HVc9SxjRTt9kvRGnvU{rt!nE z!c#bz@QET!-bgR$rVbCk>b35Kx{XC>ZircU-C%IR*^C{^TBhXz*ttBEEaBZ!pr?a} z&-j0ezBtW!YNMd;6rZ`tRl1v6tyaZKE%KH73Y{;8;Wd;lu2gX2dY%ZAx5fW1i3Qa@{Ho()(Qr;dZFKaz#a?Y%r{z^17l2PT-qAh z<&H5Qhty^8=l;$*o+_{mYC(h{O_?7HcAyiYRlJVX)HDBsjvr1iURz~N`nb%jnw6Rw zTVzwwZO|+Qy;^VKV$zm$BsPX+1cBx#BrARs$70-+p$5bOiLfOVL4~uLY(IH}lz;&Y z0+afC5mFm(6LE_$>B7s^DMaLx$-Wb?C)>qa`Vht;z%gA^)Oo!mLJO9whj=vDVl;jt zEs@MSTlnyRy7HI-2htHX80}^huyy48u?PiIeiqMaT{Y1JCh9F?DCtowB~4&_0sDkq z<+L*+gjz&5(D+e-^xwv>rx_V8126dXK6XH@Om8H0s>K%l4hhACUu;|H3oG7j>nHut zd9bwR11@HupVGZycCg`k@J%|&y^RW%^+~#yhjul%*W(uDzm0$?KSgJqUUr@cG!->H zaacW;O*lMms~E0hE#o*dSG;aEY04W{JxJQzA^2Hw(8{sbDi*~M*U`*hYtH_oZ)zRR zDCAmGl&L*dT%~=@aHrskyw%j#vt~)NemwcDjPLvtl&g9W3s1ly=xwBZwH;|sarbhH zECZDqvTV%ap$k$=Zw^y$iBV%!M)VrSI=7a$OSlad6x^Sk8bO3X4a=IB-I97g(w7~g zR_7T-=CaUtz0>5be421dl3HR}WxX#mV{{#uaYjcjd#NDk(yRWmJWT&R>S zz!tzCxI54Y1`z@Z>-};^j}dCs$k%hmuE=G%7{#QPsW7N;5O+2y&bPdfKup4uJ{#DN zl62|1FEB9g0^z1J8zlg8D@pHw3k-m}Laq+wb8MPob7MRv@ufDq6JlbeUiFYP& zKFWY)QrhhZ?ql8_uh9(#v6}t(p?mFko!DnfUuN>N?k;%G%~e?xl@jtckf(PVERa9T zX3Bq0OONBC)mm5~7&qKjq|>qO=w4zH)jtJwmYdU|zkt}fJ48f}3#v=Ps_<-6af>V? z#>1xH8L$Egtld>XC>(Z3M!0=wdD1kPMp@~>YJm9#d=sYQ)?UI?b+pI$P)?~le87!D zXfhkRoQMEoyt>tua1@C96${Cm_}#R)O~hFfhiN`%95y2~WS$nAVPse;6tMh!>A{7P zAf6zhzVde~HflI=;@hEyn>ZP)2ZM zzK#m^WLqCo73%fFHY9E~=A(+~Uy*6+XKsnqX?|-6CsUgs<}O%s!4kdFIWo9eUHv`2_T$rH+!5-C?mS zG37Z+wXf0piu2&ln(2OkwLSL}P^|j^$jQL9G+^S|XoD@x>Uk*wPb%3idsS8Uk!n}8 zeKU#ynM14<=@R5x3kw_5RM)sGn3-{qO*5q?qDF?9pJX&OX_(xPhZ7sBoBU1}_xd!+ zk@Q1P46*4aa(fbL%I#nttF6(rQ0)Om(UBKA13NvJmqGy!ugHLDwFm2APB89e%YEHQ z)z)wo;PEWBLHF89lZ3ul2;rg<1LL~yyY5T*spFU2%M2BIKx23NDAyZH#kv^0ORW^O zZw^h8Hc$+5X%&~eJ%ldx;1)$Li7D17z19d+@xEk@4H zT5ywLL}URdMSTmFZcFOFFPMM_7)*m3OA45Gp4W9~lNyoFVaSgb+O; z2CX6O2x|AgO-_{voL>VS&$&S7Gu~)nPB|LzobS8_lsai@Q8nLdQ(cLlv}I&5O{r0xU+aQ${zR3dRDxm(WFWj> zz*$Q-u{W=m+n)7h)JPzUa+*FCbt#KHv*qI4Y;kV5B%PuNZ%wrlS^l59%RKLICrU4} z*)tQgKRJEQVBUzA|K|RSY=`KR!HSQrA4qs@AgT$Eo;%`vy|$zaFxp{eTBrYYrK>-R zS+TkuR3w6J=>wh1>z}1T+*`J3+T!#M2sIU(;za` z*4_1#O8h%9y<)Zf9P9IDE85Xx=zk&%+xS)_)G@OaD`5h3Z&N8Fl<9X(X=s216kTz) zIqoJ!Lh#h>dciLkTAsz(^|s@0wDD?UogbGX;^CnIswGB;$C1jHkxNP>#S%|&zG1T? zy+;pLJmSvfe)DD5m>Yckz?U_~J{oSHTuoo}-h9zG-m*^WK_0lF*G^d&w@t`{T~1Tq zPjN9f8MCvj+E;dWF{1*M8Ipunh5to(Jmem33 zib>5!GH+PltKI8k9}Qu!(gM*EK&^a+wMv=Egy2WBLS5YD|2X>#`NJR*MaDthdN5OG@)yxNL$cxAxi3ICa%t{T%39Tn65Na z(ZPa~+D=zfoyNbLw(%BqL`JCdt+nfBPq4xib-`vU`>dm_zR77C-?TC!3`N=`!X!lm zIX>dYu~Z7nK=fb7<3vI&ddo4Z%BVDCr6ZjXo;`VSZGB(v?=2DSqL2KtD$jqHm$uk& zua@O9y>o6Fjr`@HwP!;HysSTStf65?12QowDm6~CfAT-k13fZSH#3SFeBm`6&%iP# zixLKIZ`02ywl))TO5c?8iTGD|&>9*Nn)P@dbhgHXMOC<{2wVg*(Z2Y2nF*1R?nEAO zHCos8f@#c*oWAkXoN~1?uY0+ZxvvklnR|j-nL1a>)|Wjr`Pbug($x;0)bE@z$}pIO zD$V}zvL|yl1_-&}4Z_nV`_e3;k4wHF5vbZYP|Tk12mF7|^Qd+5|Ap@Ni>D4hH*qOw z8cLiS3*--Z=Ib%kiU`s9*AU|TRti^N)(~&jvQ~B|>)=yk4`QK{pZo@)M zJMS6=MwQ!?;}XM$*gV>XePh#P{5~<684I@5QbaF44YyeftwZtKv+Mbi5kcrRC^k8o zkXRTSlMyJA^<;KpEht%fXb>`V{pDR>JXQO!&&&vW4QF_2XXQe{tA8vb>xXMw`rE3@Y)r>UpwEafX|*nZ#6Z zB_$A^*slx1j}N@Qe`J)1Z?Q4$q??4W^W-z0Og(@)rA+x#HQuT@o6k1O1L z_X>J_L~b=9#AK_cMx*`}uTP58K(EQ7;^!JK<(1v<4O>J=q8&^!K`DS9-KA)9Qlmjo z&hpX6l!j{fpnu$!*6taI=bgStJryF3;mTVC9sV2-Q!eV&Q>2rer|W8=uOuP9Xh5w% z7xJRkp!c>})HNeQr8p^FSx>)?I{aXhH%~kG3gNBg@BaVA?)wg38(fRteH3j?S}-}I zyo-K-j^uwfYIe~B?>O#bH&FNM^e4XMU(`|NE6JL}uOIh)r!y~N3YZtQ^&XW|Buzh;-)ffGef6P0u-@5)+PZP8oP=15#_G{~=a zuOQsjer}T^Z2P}d>~|l4;eS;54sB2vaD)V(GW-T4$bx>#2i00k2{diKHjmkF5ni+j*g# zzJ7NE3YYYgCqunm=OS&ror$V+C@J1vCw*3LWWd?CWUhsdNmKF>4VUkJr#)L)k^icl zJYd-#X`0fjbcX)(Z?Re(xo`5qpgei)JUiD1CNS7|LIe1O<0iK@T6av)Igr!kp7l^JVeQ6N7(;@p3-t)~3xSej=ny_wB1 zipyzK=4eI-r?bg*EcQQW1gXdGJdY9gy+{L?Q$8er9O?h2W zlH+sl@{3Z4L{d4ndV7HG#2IZ9B6#z& zbWv=@XJks;T?^B|{2GliK1D@of;s_3dytUGbf2le-KDI0C!)OHO~>UV$fCcv!*R{e z1Y(eY^uVBz+|xjQn^Y7=AdGM5-m{o~HrP(vY4)UXUosuA-GDEPN738(q7#O*_(2P` z{ot82xh7{@VxZqzJEV>rBM9p)N`BTJTaymT9CRmd zqvhXtzwQB_F1W3ZL*EZtCfDeoJVzXEjtRFZFI0tJW<^L19r#l|8?+)qIz{ZZzoLW9 zilxFs#_7nT$}DeA!l2D4{_k=u$K;%>y@Eosa7*br{s^*>rsPwx^cie9&9&kc7>4ga z@%;*lo=3|0x7$QqP(xV2_Otzb&x)i)$i{y~ad9|(D#=0(08yqI^8DgkzdFbu!2=hh zUjQ~Wo1$6NVm>P>lcU0k+zhh4Rmt4LD-a`*)x>|IMC-){|vasyDv%2xeroWO0A(vG;On& z7xr_(m?I3FivN2`+)Jyq*yq4j^}E%7`E@c!(chk9!e0?6WyoWT(j)E)*k1A%%^-#= zCQto$xnAf4oh3xNw_KJpQACz5O+Sb~U!5I@%ohMtsJ)1r(M+Y7P&XBckK-KQ^vhAH ziGEKhrXwX#?Ye6!sPSc(b8cb9+ZH1Be?kN?SIXDjsGK^rs%Gum;xOPQxF!`vCw?&pieeA-rr{$+(fsVt^_^0PA2PgB-sxHmH zN@F1u43LtGHp)MKS#@rn8ay>4ZkZ46e7rp$iD4eCCeydvx&t@bR58+NDGa%$P%vbB z{?HAEQ3_)=orQl?oUGroog!Ll~uQ4R_qBY(r4QlH> zG9*mX>f%97gXhkf$PUM7xWrJQ;LEqD)VLTENT8Nng&K8IAMchc7E?U^Dh%Un2=6CH z_b`>j`Ifd!kE7G~B-%bqqB@>(uVhYH92A4MV0%Apz|IKyxCNqG@$jIZQ9YR-}<)ktkt z%R4~LpZWW#liO1Jiv@}q{hndjmXrT zSgJca1$m^ieBu~~ zq6w#%8FQX&d*;HpD18RvO+z`Q1QuOxo71YgYr7lUXi1u@Gl=p0Yz7O}KDW*nZ6(_( zI!1g=|BIegPR9~J&o71ggoEBqTGtH*b%P6u0}$tTFgBM9bW z1M>=k!3!pZSO4N*?+CND@c!=|iXXjvXo2jX7R&`L99`_8ZUCs0lby9W)Xmz_fx`)A zLGiw+A7w=KV1&99l(SZcyGl5^JHV_REF~Ph0NnfnUR?ZO7Ss=h#pjE%rTuqXRToE? zyE)tiAP#dhgHx$W$xywuHgkcxcvEq5vV-p~rn6Dz#Q$!t4R>)xwF*Glv2n8V;)(_9 zyhW8WK9r+AZ4;FICje6aMG&|JIZ-ir2*QYojvmT~>tB2vT{ImX?EvEDE{S*oY_U&_A4a%GO-@TRKZctS0q5n;r-2c|*p$oUof9$8y zKXL!R`^+zz&ZB$;|94ADN`g|3=I-`z2RC^sfV;ajOc25ix8Q?w!Pxk?1i09E_$~O^ zpl}E`8xNGv3=Zak!z|39f3RD(|Fsr*2Ujd? l1R$LJY!EoF0EEwi7tCV@E!K%eeKZ6B$V)3r)k>HK{V%aspJo66 literal 10530 zcmcI~1ymf%*6sv2!QI{61_&A;A-E@lI|B^v?(VJw1WN)02tH_VcemggJh%i2;SIU> z-1pyK-dbVxp0u0RR9@c{wQ%0Pt)Zen_Jr!CO2T z#gO4A8dEutG63Ms1ONnv0swdLrocS_zzqlh92f%tA{hVxv14}AJ5l%z1QSJBDZpby z!bm&3hUzG%;{rdM{W-9qy({nwWLJ4*Y2+OwLR4ICD=qg_000TmTV2}~1oohDbapVe zvW3vNdO1R9Jnb9-0PV0@9lhjyAjYqstk)*|LP$dM^P%~aNJ>U$^ zX2eW6lwh3U*TVzzuP$un!Vby=U7J(|ZLVbgcLd2CY;hRN_?TPU&ZJSmRXJEvS+?3Z z&lMTIsU4@XTwQ&1TxOlJq)Mg;Y}AQ5GLUi5 zlBJOozhOJxR3M}}hE}10lE5YBj)L|1Yx3C$q3Mn&?+;P7Cv@!JNJ@n^qB6CMyIH{X zCZBLC`Hw?O#Enr%fG<{9#r1n6ZHM{f@TyQ6GC>M329m}cre`|V%8hglH2hy4CqBHE z4B67WscftsK1v{AGaZ$ z{`+p@tm+}ss}me_AcmE)^zv>oQ`7oXey6~a#`3OaDhWROBKeOb0+I)MOh&d9RQMZE zEGo$YNr!WKbg5c-sPq7KY6)QwGnwJL7oHW)=O8Pmaev~xQA2DXsc28iEN4DXinx65Gj^9RIpR2w`=cK!R2ssSoU!>g}##B)e+wB$aq&j{t+-h_~V$(!SSgFSExH zOP!!kCJt{cU*oQ@IX(B1ZdoT*CHlrh;~IE0jhj;`Gc2uYG83$Qb4coN)^a3NPj2^} z${98`$Q7CP){wUTEknhm^=LEts_|QWllz$o-ksst@N&AMfoZiPR6bg)``m_*A8|#T zXu)&^;8K6r+(^wyRYFO69RN zQolN)U24N(B;0)HD2%_-tqsc2z@Wk9Jbo!QBB&+t-jD8L(}ykLu!}#VlFoJdpdxao zZVA;(j4d!;(qn0oqeA*D03;tSbXs=cp=+Q8^M>-{CTIj0@#b##}74G-glORuJrOZ4lI5DUu zdu9meKI-ljTHPrAl1_;j{!Z%5>M$(j?R|k6mg3RSzc!a<)1*^EjV+kfa5_vq; z;Rm{x42l*2KzlAPC86%Qc$Cv#aj4*4)b2L1kLg*N@3c;udx&i5$Q&>7kxBxY1cPbY zKRAd$C_C^Tw_q8_Ar^?p<}?$vhi-v*Nm#^O^-Qoy8u43)kRFztJi}E)LA{j1%pFR$ zUjdztM$E60XpNXsDW2%a7@y5UsgyQF_lTyu*Jb;0V>=t_?b8~s3W!EYGGc2yaILh&vQa>ht% zh${ETxXrV&OhDqJa@$pBVr!Al5(Y8H#LT-IPtaDQGzRS*d;wT7V|FPplU{bWO64Kt z3H5YeM3OS3li)rP2v*~!NA883EZI-HATAzOK`DGqM`yW_#vZ)DJ_Fk=U8SK>;k!6u zEpr$yVVYcg(<O`Bns#(9=d<*s6cVB*!$>r{?pxBgS(Yu|g?fp88dL`R@C$* zyQA{(spX{Z6z92?4~231VzIp}J3}j}Q8*}Yy!IXb>8h=Dy+Sn>`bpMjWNZf?=VPeX z`+$K$*S4zDfOdISm|xn8I5Q{lM=9AgbItPu7hy=2i~DlAzwaXNHtr7*a>M$~8rxRl z^~rh{P#;*Y2S{o;+@81H^9hHkGtL(^h50Tk zh-W%hq@4GHehl-l=asweMC$L|+XWGo&(WkO+gbl8-e%W;jZ~+O&sVKRkHfAE17(-S znj%?Z=r^F*)EP+)z=+4AX{Nn4NrN^6F}hD6t?L^Bjyku9F>2bO`n|^F^tQUxLxrT1 z<~GZ((_C~*UBU-0tV=sENwhSKKJ! z>&G5PUSJZRx1EG}OM{%`c^WQvZbQgqkrRdrdztnttuxqLcXw-1{2{clK8d-`DAq zH>*T$4_D6T@FP$X%52DXDQAig7gy>C*bzERuSnb|*_gI>%1(cRR2-w+yuD?E%JNr2 z0oWM_{lE8A8<_^`RxJfNrNbn5>omCD*TS|izb;0%nyeZg&0|h8n2l@y)wAZq%BZd{ z+KW#p=CyU#aqj5SU?M%s2>X2Kea-Zs#Ck9=lxmoyNdD4L)rMW1iD~;#`O`A`1yg6rhwH2#PS_q-5V{_ z7Atpf*J$dO605>BZhLK*ts^aUyM5N58RZb>A^B+9tDt#q?$ivfMLKFLG9B=60mm27 zmA~hZQjup$if<>w4z-yAnQ8Y3GWhx+oC}x-Tew@#f@Z>}C)HPV*0*KvJx5UIRmrUA z*nRu(a1pU2e$QR;Qr{kry{*bg?p4o)vQNv z&#~wRRvuyHr(dLxTg!ba3*~P)zWSKt`s=@kl?Jfpp%k?QY%>P)s|=2pRGn;tUZ7IB zPA%CCfAUnf<04$ruig4(5z3D$=cuzDJr|c1k7jtR4q&ZayKh*)lZgQurO?jon?C>CAnnP5 zUn4*I4EYiqW#9BU9)dr1^5L-SBHxz`HU~?K+uz&XqhWMMGi@`=>NCYU4IuQ5<=GVP zydw!FR2~ZFLHVJx0DK-K|NphRc3dz}Frb63X_2f6&if=iey+feM zm9mYDoR>=-zV%dYD_(ftyeYQ(<;(&_r9$}z_2s~wn?pmhOLr+_{D_n^gkH6J^_&9&YrmFFXNE&dwHeb^Ta4B?F%82) zxo#1=OhLq7b-lu_O>W-WrEiy=J@i_>Sz7g^2jj%dS-D^>Av*r$UxX0gwbH88d=fTE0 z#TOpOmNsrRkNt9w+_lgXP&@+WicjA27r!(mD7s9G*xbsub#$>(MFYiaZ+%}&AQTvJ zBrvzVxU5R}viz_!zrH<(iZ(}@@<;%6h`(^}DEK9%rPv9+$MCo$7G>dnrK4?iEr~xN z>~cMhwz+YktTrL6*vH!k`NDcC!6E67rEiTvR@23)ZJ|>ee+FFMvmVUgALJ`LykR%N zDPq|PrtpX{?hUi}ix`FBz8%rCIn9zEJmcYZtl+!j@{2+h#NCHk0c{*P&A2$ju;C1# z4Ies5ZK%AdKa&LdJT?KH7`_=WBDi39;cWdO&~ z+sV_e(b}C*Pd}>V9HeHXnl+>g;bAs~81ybZa@GMV_v9{+YTI?up?Atz${peWqd-=! zuPD9DT??;Bm3p3?p=6w3O%S4je?MSEFqVwKTF=PFM6p9c+}yG0782eo3yRTkRzhzY z%X-Eem@PlG1@^_t>BL0iCGjpk;`Q1>M&B2Xq4)cCTh!bes(p@y(31!I-mudqvisTR zM$fK_r~$+?26P2}^pP0$VvS2Im2M-VfAaMS&f9$e?Qd=Xp ztar;9FsPbic4gzgVKKaaknLX-Fe|SD7@!^v|28#UmHsP<+Iw)`ayajDylL3>Lw;4x zd{vQVX5MLYW-T$KFsH>=cp|%L|0N}m+%QUvoxG0*$og-^p3VF)Fcv*Bp&Kn@9nU5N z7*t-!!F7CUFpZi2s)x3V7V-wI2WgX7cByyQ@)>GQ{fTizOOFUa_$0Y0=;4b)kC4a| zt3zX9q7*6@KHIxN-MS&X%`GuWCF5>SlV#CV0ZZ7rg!1>kYanA1G>U+nmE3%cZmexGL@Y>}3L zR9Jra0o4BN+mhs$VeqJR7D$mBk~KgwBHaUg9{jJ^C2?r6?VE|!4LpvQ@?FoR! z{g+Yyw|W0u`FZ*HrB_Za|Fx#4lDFO*f4<*lK5pF2it2Rb);GORguK)k+uQkDG$cOc z0oi|VuXPu-efNZ;sMO_7lKGYbZq=A;816^P?E)$2^w#c+<@auBDVRQ_|FCliT@VKY zLT0womD#h+K}~gp6c|{P1rINI9kES{*eJrI#PRC8l_%&{mm``3gOw7+P!3 z@(L>=o%G_3he;Ar3z?4M`7rVsRU{yTC0`*{*r>=rtqq_peRL=7k2WPKg-*Fkrp zSV?VIzE$R(6lR!B){|^8vK?B=uZfgAPnti6^H{dw$^-B=F8r6V=Ss@XNsQTd>v%CHZNABxTX7Pr70M zjZK!}&Rh{jCPQrb<)mzWU_Ys32*o1{_XiKx^e@Ws+9s1YFiC@KQ8G@B(wA7r2GY)J z$7^N-9&4upFp1qD;b)g9Qfg)`*~X<4>|H@ zdOn))su{7aaY1!Yqim({SfKIGjzqEqmQ zC}~A8$A{Gla+%T|y&BgLNKNFya>*GYlWm7i@p_bpWN8l6R%+fd|GEe{YLH4zbKlGq zn8@}&pvyxr@6NWi48k)NUB`*yW30D;bbG#ar?eD1ht06cssCn$;o5e4Dtz%nvq{6; zV#SR(0}Cnnz~_ju<-#$9uMT%yPq{DU$CFs%Yc}3D^8y8afr?!)Pc*p4cWj30$wk4? z^yGEs3VLEuJrA36!+9Lp%8zkRc(=B2)=HIM(@>{Zz7MwTX+SXwcQUfRYa{F*6-ar9 zI{oWAt4hokMz`hoeecBAR611!gr9%_I|LdS%wSzv!WCF-VWk@JXwN`A!$`^la{uz3Se^oqtV6!PR6 zTT(wNnk1FruH%>uZ*%ZtDCAAj7`XDmtGh(e3FjrE&czqjcyi!hux}7G)~&>`)NxD8ar4)6a*WWhm(k zM=~ZcG+w2=o9h&ajgY2LhQtm}<-%R11edr$M^5RKCQ$! zay&RA^enjIP)#=SVa|o z6PWU77y9((2zjTSW1{*{1N#$b|GRg|pI1wj*ETuL@nG-Qs_NP=DYA`Kup1;X;=n0e!Av~l`)+iDGLrV1Z zCu9QKB!oelWmU&+m`vL*KbCOaK6gx6`s(pwj@6~(h$G`S?QT-ME#{<%(vSz7n|Rx9 z_%L0C^udXEX%8>u+)vnAks(p8{?jKI^zAZ!2^$;w*+r3ZyH`^qZ%SNou@q>3E`}ao zXZLosY&f7oU{f&?y%A4Ad)|ju@7=<{k48IU*{+t*cW0z}`l#5ut2rl^ZOXPn+He|z zybGFOoJV5H_-5hwm2esROWQLvpK%4NXSfRUFLFLOA75=O%L-SL#~M=??g)Ec)v+e* z=W(?1F63#w>r1N{y)G%lwIvW6^3r56jl+y+hC_5(+uefLZs?|Rrt(?n&CN}y?K^?A znvo|m!HS^Jnn!g343Cjnpd*woWtU7!N<+3(J-N*$ClLm=|4ke3EQs{BU=d4f8oi`pFyOm z;t*!8P~e+NpXU442g|pFlMUZXE=-au7)mI0RM|t^sj!kbyuQ`e3uySdZ>_!DNvmnd zv=!NXqifMEVG3%aoE|#b`$H{!`B`-%E z&;Bn;!OreWHa7u^7fjnrxLSrn+VTpK!#5LjL!nO7UOLK!UQNiJ z<>tPXr@2%ArGYgJ#8L20c{&EAW(oz(3qOf?IY!9P+a!4ASU08TN z>rt?jShr18+C10ShyZt7osE-1g7s%=5A%$M?goiusL&~VeUqHrW$r^utvs{Jiv^L| zZBFa_C^O;JBmO}d0WzVTBewu|y7Qpzjk(IIPi)+k(%#8=?SVI$7V5{X`h<&aps{!b z8yhvokDDEAxy5^N4^;=YLY7KHD~OfQ1jHF6``XR>-~w)*`Ae~C zvCyMnZ1)}dZ8jgRh#YK<9n{Y(G0XC1`f^73UoW;hw>M4hR%@&z?yaSzIS$*N zH1OxQ#zcbLmWA}PmO0H7wVh2Ch(!DAU#3G%4`&-Av)3B>!X;O|xjS&`(LuQ%C`413 z3wwYbMl-G7)Dnl^y`#%M(BUrHIvT5g`547}To1^r2dz?>Efx!G87xLkr5jxf!B*=) z6%8AXf_j#p)q3FQpAk|b|9gRPAxmBq*cin(Wevyrg7uScd01be^y2~{BM3L*9{t7gf`%(H0za-;VxzRv-`vHq zhR2pGKDGJ%JhdCHf3S(R|IO-CPbsL}a5QT-yq_)oCm|Gqk}sH1;YSmg>8a&F;WCsYgQ(VZ1&bbog0;+}YQxN7gQ8?L2Dkr@>|B$I0*Ho#3og^NW` zNGKyzHZFG-Vm_P_T3Ji5^VbvfudLlU!FTelBZ|H4BUd!%J#O~A@_qIudF6{plZn)j zSvtqq$LaFvZUxCrX!Fu$tBfLj8$qactxA4A^QSndB)@%^MFmvorsWW1ngjEV?;Dgx zRY(d&BtWJfC7v4T4o<0zjHof%<-YfC2wo>Tsd{d__AbbQNK1I+AeeXX@T)i^C&Ul- znA3pB4TG(_7;(0&uo0te-12i%TMTdBzAUU1C7`B;gk&lDfD)a1$e17f%c{WP`9RaG zB5X_Pz^>8i&f#)=f-*j)M^=tCiru3?`P--p%{Jdr{N+ib(+@48nqbBjY~|OxBMYzC zBO}HIl_hpLNQX(%OFnlXS7Y1?JC^WRj-WLd=RhV!`nnzSOBqe)7<}Obzqh|M#wJ0p ze&UwB#$w}X**Zdy*`o)*p#hZ36nj@OW)L;*8=to&f6&|5Ig6K0O64M+3+5c)?s-|m1ePpfHpQ1`RQP+mx2O{8Stjo_z0Ze2+Gg1my$&Sf#KPb4+VIGq zGTad4dM57rMJ)#}CHcoSp6n+HkE&A7`}RVjbwWz3+`wx%MFVQLszIGcUPlM^G7^8h zkrKLR4v7+?p?Y?)@~nxDcBDOTqlzI-r(V#oJ98K8ST%Mm!EpbsX8*Hn!S*a3b$Z&( z?KP6}4Dmz;E~^dv@dU}-8-2FK5&?W>jch-aYjcQSH< z?H-ot<@zli2`lw5gIINotjoVub-SFBX_fZg4N0@Mi~K)9|39dI&3F6JEqT`Hl=@fc zwyT>8`NN@#Gs5fN#E)YqB7+CtD0eQo?*xC2e=f3s+|s|gOs<}UzS_@oOJ8^$VJ}h* zRxRg!bs;Hx@=p5K7A;%!_KZjoJmcK$IJs?BInQ*>n5J$T_{`1vIF)7**D>dOC#BNv znxq<(=h8pq^UZb&tn#=lEiUi_LK4zZVEfPFLnR!j0L6oDgvtU@xvKp$;DBl$CjuoR z6_phSKe94^4n#!kaZ|A$K1sdlQu|uUs@6~Givtaxs2}33F{k0Z#dc!4c0?D@o4ym$ zbz5v|PnE|p7K~-B5Ip@w1Byd^LTZ9Q-gof{b48daR{S{*e*0NAn{3%DUW}~@{cEXd!Ly${ejssI`=?|%l|p2# z*|Kqm&g4HXq~j@91}8;g!=agR@8ulmc!yH88@LFDr^Nq}fB8cY2n5@aBn+p9-gLbL zg1gBgOs*6nM=3)&z z`DTD^E91W-OvGF}pFEKChQc5P)l$BH@^Yqlru#?cq8(^&=P+}D{-H5>Uw-lr#4|4n zIxor_@{=GZ}DHbrA~9_eCM-mYSzV$m+EQG z5L>WxV-#aB9z#0q{f4_ZEEDRUnu3SPS+rsAUZ=pQ&(}TpJAlqFrdbi(O*q^R=NT~R zX_8$kg*2H;pD@XXKV(3p2k%#29S9d)_*uuEI-W|q)e>Rxp=pV#@VDS-C{*G6_NGF^y zYUECvK#<)(QHoxx_tc!r@BlcGTuge1VVbTt-eKLuzJQYa+~e;j#rfgA%9)q;jKO&H zV1`{Q9K4L@)blEos2Jo2wdW7!#6qW7t;-^&#n?)_HI8#Z;e4x1^mv-%Y1i6HXX1C# z93Fm#wx_Q7HB?L8yK{2`XVCIddZ0Ee6^`oEVd>qVxGZWb5*Z)&^5bKTegsodmJ#~G z9y?tKw#5l%lgZ4cTsP1@QbmG})2eC|pC+0DsOCuDOTUTc(T0ny8+qw zEI;|%afz<@^SkPr?B6ZirH0IASPmH_i`O@$jpZ zM3}hUa3DHwE5dS*Xz=x@^q*H`%9DBYS!49%=$Vzu2KCqowJ={rO(2GYT|<#zi~v!A zDH`j9Kg&ogQIXJUN!tp`?t5BDR1Ny+M;<3X6s{~SIGkr#`HL6_xpMR8wmi{&J+RQ` z3FDh7Feu_t^xXq8Nl~eq-3nmsrK?Q&7~RE^`mhgQOPOdJpFA{C-PB{Y(dl}&6S{Vu zp7S;GSJ!yR@V>{LR?hbso^!q5aRCPmrE8}(zsK7Sede&3O+03AIWx9)T+XXhBJ#Q^ z1YKT+dfqUe`TI;f@v?`l3JxX&aAvlL_V7iKz4r^h(*#OR69TuHkfvz0>gIzgT-&Yc zd3St>!FqaIsW&NQR76SuO6J@`FMMIxQK3q5^75xqB?iwIT3Ru-crs7Z1o0@(o?o%Q z&w2_alT!FFjeOAA!L`5>tC&3K!AQ0KBk%qA)VYohf<7M94RI?R?3>JLLt4#{jFi3@ zma|y?y^)F0T&Y8E?dnA=INxPwb58-Z%oJylwY}!6-)4;lV--mw4R@k`L71-o>iv(U z9Ub9?{zt__pJVtF7Zb~v9Oj8vMVqYr@Ii~6)#j5+V$?{LkY>1(V2_1_Qde+d5*oYt zm28~{uG49i1b25MwK%oNU(ZyvI#TT(B+8ldUB5U?B;}QXN!&`+zt<+hknS4;FzGPi z;ezowq7B`i1hEg7+VQ+S&XN|A6VoqaqMvNu4PTDFbYcWTaPdBExh}*n zT^Orm&O4VR+=){JviQ3gNpQIX;~9R*C&(Go9+carqL@U; z_rhN5Bx8AFn@aYGLU-uTB-XZVxT% zfPV$T=qj!4Y6f zW>)52|FZxUy@w}Ufb@?BQxS6qXFISf0PN^!Yh?;{wQ{iMb~H1m3aaUZccOXf1h0z7 zS%DxfZyen0&8+M#-Z*#y_ymPKc?E&&@ShoqfDgPc{oj4xIy;!TnL?Za5@rr25Sq7A zGBiq7CeC1IFB%^B|FXZX#Xw|3Rm~KXmp4wBP^7c`E<0`~O+yX6c7CTt?)d znv|7Aq#R7$>>&28@=^ddH!Cv{L36MmFRuWQ(~KV?$oWDL!pkYl59HxA0}2cA@t6ph zngE4*8iF7GeH3|n7gw;oDdg`_cp+v&W&)-!IE8u51>mCy!ml7^f}8@Tyn?)Z@Ina4 V8+zGy1Rn(;FRda~@y0m#e*uV1>N)@b From d5a708d0dbb92731fd576f1a5042f11ff03059c5 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Sun, 24 Jul 2022 19:04:25 -0500 Subject: [PATCH 23/31] try to handle empty batches and size zero inputs automatically --- .../SonicTriton/interface/TritonData.h | 3 ++- .../SonicTriton/src/TritonClient.cc | 3 +++ HeterogeneousCore/SonicTriton/src/TritonData.cc | 6 ++++-- .../SonicTriton/src/TritonMemResource.cc | 7 ++++--- .../SonicTriton/test/TritonIdentityProducer.cc | 17 ++++++----------- 5 files changed, 19 insertions(+), 17 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/interface/TritonData.h b/HeterogeneousCore/SonicTriton/interface/TritonData.h index ee573655ddf19..de9c5afab8662 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonData.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonData.h @@ -159,7 +159,8 @@ class TritonData { return std::any_of(vec.begin(), vec.end(), [](int64_t i) { return i < 0; }); } int64_t dimProduct(const ShapeView& vec) const { - return std::accumulate(vec.begin(), vec.end(), 1, std::multiplies()); + //lambda treats negative dimensions as 0 to avoid overflows + return std::accumulate(vec.begin(), vec.end(), 1, [](int64_t dim1, int64_t dim2){ return dim1*std::max(0l,dim2); }); } //generates a unique id number for each instance of the class unsigned uid() const { diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc index 0d2603f32fde8..0612181dfb436 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc @@ -293,6 +293,9 @@ void TritonClient::getResults(std::vector& results) { void TritonClient::evaluate() { //in case there is nothing to process if (batchSize() == 0) { + //call getResults on an empty vector + std::vector empty_results; + getResults(empty_results); finish(true); return; } diff --git a/HeterogeneousCore/SonicTriton/src/TritonData.cc b/HeterogeneousCore/SonicTriton/src/TritonData.cc index e037b8887a87c..879ea0949fcc5 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonData.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonData.cc @@ -212,7 +212,9 @@ void TritonInputData::toServer(TritonInputContainer
ptr) { entry.data_->SetShape(entry.fullShape_); for (unsigned i0 = 0; i0 < outerDim; ++i0) { - memResource_->copyInput(data_in[counter].data(), offset, i); + //avoid copying empty input + if (entry.byteSizePerBatch_>0) + memResource_->copyInput(data_in[counter].data(), offset, i); offset += entry.byteSizePerBatch_; ++counter; } @@ -251,7 +253,7 @@ TritonOutput
TritonOutputData::fromServer() const { const auto& entry = entries_[i]; const DT* r1 = reinterpret_cast(entry.output_); - if (!entry.result_) { + if (entry.totalByteSize_>0 and !entry.result_) { throw cms::Exception("TritonDataError") << name_ << " fromServer(): missing result"; } diff --git a/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc b/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc index ea5d63cea072d..9d5d4d431cecd 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc @@ -38,9 +38,10 @@ template <> void TritonOutputHeapResource::copyOutput() { size_t contentByteSize = 0; for (auto& entry : data_->entries_) { - size_t contentByteSizeEntry; - TRITON_THROW_IF_ERROR(entry.result_->RawData(data_->name_, &entry.output_, &contentByteSizeEntry), - data_->name_ + " fromServer(): unable to get raw"); + size_t contentByteSizeEntry(0); + if (entry.totalByteSize_>0) + TRITON_THROW_IF_ERROR(entry.result_->RawData(data_->name_, &entry.output_, &contentByteSizeEntry), + data_->name_ + " fromServer(): unable to get raw"); contentByteSize += contentByteSizeEntry; } if (contentByteSize != data_->totalByteSize_) { diff --git a/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc b/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc index afc23b684df8c..e6ed3b70fa4e2 100644 --- a/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc +++ b/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc @@ -16,7 +16,6 @@ class TritonIdentityProducer : public TritonEDProducer<> { explicit TritonIdentityProducer(edm::ParameterSet const& cfg) : TritonEDProducer<>(cfg), batchSizes_{1,2,0}, - batchSize_(0), batchCounter_(0) {} void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, Input& iInput) override { //follow Triton QA tests for ragged input @@ -27,29 +26,25 @@ class TritonIdentityProducer : public TritonEDProducer<> { {3,3,3} }; - batchSize_ = batchSizes_[batchCounter_]; + client_->setBatchSize(batchSizes_[batchCounter_]); batchCounter_ = (batchCounter_+1) % batchSizes_.size(); - client_->setBatchSize(batchSize_); auto& input1 = iInput.at("INPUT0"); auto data1 = input1.allocate(); - for (int i = 0; i < batchSize_; ++i) { + for (unsigned i = 0; i < client_->batchSize(); ++i) { (*data1)[i] = value_lists[i]; input1.setShape(0, (*data1)[i].size(), i); } // convert to server format - if (batchSize_>0) - input1.toServer(data1); + input1.toServer(data1); } void produce(edm::Event& iEvent, edm::EventSetup const& iSetup, Output const& iOutput) override { - if (batchSize_==0) - return; // check the results const auto& output1 = iOutput.at("OUTPUT0"); // convert from server format const auto& tmp = output1.fromServer(); edm::LogInfo msg(debugName_); - for (int i = 0; i < batchSize_; ++i){ + for (unsigned i = 0; i < client_->batchSize(); ++i){ msg << "output " << i << " (" << triton_utils::printColl(output1.shape(i)) << "): "; for(int j = 0; j < output1.shape(i)[0]; ++j){ msg << tmp[i][j] << " "; @@ -67,8 +62,8 @@ class TritonIdentityProducer : public TritonEDProducer<> { } private: - std::vector batchSizes_; - int batchSize_, batchCounter_; + std::vector batchSizes_; + unsigned batchCounter_; }; DEFINE_FWK_MODULE(TritonIdentityProducer); From a6352219f4c8b5d99052798303efa49736f31140 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Thu, 30 Jun 2022 09:16:49 -0500 Subject: [PATCH 24/31] correct size check --- HeterogeneousCore/SonicTriton/interface/TritonMemResource.h | 3 +++ HeterogeneousCore/SonicTriton/src/TritonMemResource.cc | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/interface/TritonMemResource.h b/HeterogeneousCore/SonicTriton/interface/TritonMemResource.h index 830a543360301..9ccd27fd0c0cf 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonMemResource.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonMemResource.h @@ -52,6 +52,9 @@ class TritonCpuShmResource : public TritonMemResource { void close() override; void copyInput(const void* values, size_t offset, unsigned entry) override {} void copyOutput() override {} + +protected: + size_t sizeOrig_; }; using TritonInputHeapResource = TritonHeapResource; diff --git a/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc b/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc index 9d5d4d431cecd..762a9d443db85 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc @@ -56,7 +56,7 @@ void TritonOutputHeapResource::copyOutput() { template TritonCpuShmResource::TritonCpuShmResource(TritonData* data, const std::string& name, size_t size) - : TritonMemResource(data, name, size) { + : TritonMemResource(data, name, size), sizeOrig_(size) { //mmap of size zero is required to fail by POSIX, but still need to have some shared memory region available for Triton this->size_ = std::max(this->size_, 1); @@ -114,7 +114,7 @@ void TritonCpuShmResource::close() { template <> void TritonInputCpuShmResource::copyInput(const void* values, size_t offset, unsigned entry) { - if (size_ > 0) + if (sizeOrig_ > 0) std::memcpy(addr_ + offset, values, data_->entries_[entry].byteSizePerBatch_); } From 453924071c3042ef34a6631d38ff1be639c3961d Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Mon, 8 Aug 2022 15:58:35 -0500 Subject: [PATCH 25/31] update server version --- HeterogeneousCore/SonicTriton/README.md | 4 ++-- HeterogeneousCore/SonicTriton/scripts/cmsTriton | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/README.md b/HeterogeneousCore/SonicTriton/README.md index 759b50676040e..9888266eaeddd 100644 --- a/HeterogeneousCore/SonicTriton/README.md +++ b/HeterogeneousCore/SonicTriton/README.md @@ -131,14 +131,14 @@ The script has two operations (`start` and `stop`) and the following options: * `-d`: use Docker instead of Apptainer * `-f`: force reuse of (possibly) existing container instance * `-g`: use GPU instead of CPU -* `-i` [name]`: server image name (default: fastml/triton-torchgeo:22.03-py3-geometric) +* `-i` [name]`: server image name (default: fastml/triton-torchgeo:22.07-py3-geometric) * `-M [dir]`: model repository (can be given more than once) * `-m [dir]`: specific model directory (can be given more than one) * `-n [name]`: name of container instance, also used for hidden temporary dir (default: triton_server_instance) * `-P [port]`: base port number for services (-1: automatically find an unused port range) (default: 8000) * `-p [pid]`: automatically shut down server when process w/ specified PID ends (-1: use parent process PID) * `-r [num]`: number of retries when starting container (default: 3) -* `-s [dir]`: Apptainer sandbox directory (default: /cvmfs/unpacked.cern.ch/registry.hub.docker.com/fastml/triton-torchgeo:22.03-py3-geometric) +* `-s [dir]`: Apptainer sandbox directory (default: /cvmfs/unpacked.cern.ch/registry.hub.docker.com/fastml/triton-torchgeo:22.07-py3-geometric) * `-t [dir]`: non-default hidden temporary dir * `-v`: (verbose) start: activate server debugging info; stop: keep server logs * `-w [time]`: maximum time to wait for server to start (default: 300 seconds) diff --git a/HeterogeneousCore/SonicTriton/scripts/cmsTriton b/HeterogeneousCore/SonicTriton/scripts/cmsTriton index c218f271ac759..acd4873828cdf 100755 --- a/HeterogeneousCore/SonicTriton/scripts/cmsTriton +++ b/HeterogeneousCore/SonicTriton/scripts/cmsTriton @@ -18,7 +18,7 @@ PARENTPID="" BASEPORT=8000 AUTOPORT="" NPORTS=3 -IMAGE=fastml/triton-torchgeo:22.03-py3-geometric +IMAGE=fastml/triton-torchgeo:22.07-py3-geometric SANDBOX="" COMPAT_USR="" From e77801f6c739baad1dea0b7eaea55ebeb3b5a621 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Wed, 31 Aug 2022 12:39:58 -0500 Subject: [PATCH 26/31] fix counting bugs for new batching interface --- HeterogeneousCore/SonicTriton/src/TritonClient.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc index 0612181dfb436..ab18b92e3dd29 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc @@ -202,7 +202,8 @@ bool TritonClient::setBatchSize(unsigned bsize) { return false; } else { outerDim_ = bsize; - resizeEntries(1); + //take min to allow resizing to 0 + resizeEntries(std::min(outerDim_,1u)); return true; } } else { @@ -214,7 +215,8 @@ bool TritonClient::setBatchSize(unsigned bsize) { void TritonClient::resizeEntries(unsigned entry) { if (entry > nEntries()) - addEntry(entry); + //addEntry(entry) extends the vector to size entry+1 + addEntry(entry-1); else if (entry < nEntries()) { for (auto& element : input_) { element.second.entries_.resize(entry); From 4b12f67756d4c9ad0c4f66a4b70079121cbda834 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Fri, 17 Feb 2023 12:54:23 -0600 Subject: [PATCH 27/31] only create shared_ptr once (avoid double free) --- HeterogeneousCore/SonicTriton/interface/TritonData.h | 2 +- HeterogeneousCore/SonicTriton/src/TritonClient.cc | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/interface/TritonData.h b/HeterogeneousCore/SonicTriton/interface/TritonData.h index de9c5afab8662..23a3b8f57500f 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonData.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonData.h @@ -142,7 +142,7 @@ class TritonData { void checkShm() {} unsigned fullLoc(unsigned loc) const; void reset(); - void setResult(Result* result, unsigned entry=0) { entries_[entry].result_.reset(result); } + void setResult(std::shared_ptr result, unsigned entry=0) { entries_[entry].result_ = result; } IO* data(unsigned entry=0) { return entries_[entry].data_.get(); } void updateMem(size_t size); void computeSizes(); diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc index ab18b92e3dd29..2910caa5f6356 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc @@ -272,9 +272,9 @@ bool TritonClient::handle_exception(F&& call) { } void TritonClient::getResults(std::vector& results) { - for (auto& [oname, output] : output_) { - for (unsigned i = 0; i < results.size(); ++i) { - auto result = results[i]; + for (unsigned i = 0; i < results.size(); ++i) { + std::shared_ptr result(results[i]); + for (auto& [oname, output] : output_) { //set shape here before output becomes const if (output.variableDims()) { std::vector tmp_shape; @@ -285,9 +285,9 @@ void TritonClient::getResults(std::vector& results) { } //extend lifetime output.setResult(result,i); + //compute size after getting all result entries + if(i==results.size()-1) output.computeSizes(); } - //compute size after getting all result entries - output.computeSizes(); } } From 2342a44fd30a8f4100e3ae3a2809ef89935803f6 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Sat, 18 Feb 2023 12:18:19 -0600 Subject: [PATCH 28/31] code format --- .../SonicTriton/interface/TritonData.h | 111 ++++++++++-------- .../SonicTriton/src/TritonClient.cc | 32 +++-- .../SonicTriton/src/TritonData.cc | 26 ++-- .../SonicTriton/src/TritonMemResource.cc | 18 +-- .../test/TritonIdentityProducer.cc | 25 ++-- 5 files changed, 109 insertions(+), 103 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/interface/TritonData.h b/HeterogeneousCore/SonicTriton/interface/TritonData.h index 23a3b8f57500f..a6703811b6257 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonData.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonData.h @@ -55,8 +55,8 @@ class TritonData { TritonData(const std::string& name, const TensorMetadata& model_info, TritonClient* client, const std::string& pid); //some members can be modified - void setShape(const ShapeType& newShape, unsigned entry=0); - void setShape(unsigned loc, int64_t val, unsigned entry=0); + void setShape(const ShapeType& newShape, unsigned entry = 0); + void setShape(unsigned loc, int64_t val, unsigned entry = 0); //io accessors template @@ -68,7 +68,7 @@ class TritonData { TritonOutput
fromServer() const; //const accessors - const ShapeView& shape(unsigned entry=0) const { return entries_.at(entry).shape_; } + const ShapeView& shape(unsigned entry = 0) const { return entries_.at(entry).shape_; } int64_t byteSize() const { return byteSize_; } const std::string& dname() const { return dname_; } @@ -76,7 +76,9 @@ class TritonData { bool variableDims() const { return variableDims_; } int64_t sizeDims() const { return productDims_; } //default to dims if shape isn't filled - int64_t sizeShape(unsigned entry=0) const { return variableDims_ ? dimProduct(entries_.at(entry).shape_) : sizeDims(); } + int64_t sizeShape(unsigned entry = 0) const { + return variableDims_ ? dimProduct(entries_.at(entry).shape_) : sizeDims(); + } private: friend class TritonClient; @@ -90,60 +92,60 @@ class TritonData { //group together all relevant information for a single request //helpful for organizing multi-request ragged batching case class TritonDataEntry { - public: - //constructors - TritonDataEntry(const ShapeType& dims, bool noOuterDim, const std::string& name, const std::string& dname) - : fullShape_(dims), - shape_(fullShape_.begin() + (noOuterDim ? 0 : 1), fullShape_.end()), - sizeShape_(0), - byteSizePerBatch_(0), - totalByteSize_(0), - offset_(0), - output_(nullptr) { - //create input or output object - IO* iotmp; - createObject(&iotmp, name, dname); - data_.reset(iotmp); - } - //default needed to be able to use std::vector resize() - TritonDataEntry() - : shape_(fullShape_.begin(), fullShape_.end()), - sizeShape_(0), - byteSizePerBatch_(0), - totalByteSize_(0), - offset_(0), - output_(nullptr) { } - - private: - friend class TritonData; - friend class TritonClient; - friend class TritonMemResource; - friend class TritonHeapResource; - friend class TritonCpuShmResource; + public: + //constructors + TritonDataEntry(const ShapeType& dims, bool noOuterDim, const std::string& name, const std::string& dname) + : fullShape_(dims), + shape_(fullShape_.begin() + (noOuterDim ? 0 : 1), fullShape_.end()), + sizeShape_(0), + byteSizePerBatch_(0), + totalByteSize_(0), + offset_(0), + output_(nullptr) { + //create input or output object + IO* iotmp; + createObject(&iotmp, name, dname); + data_.reset(iotmp); + } + //default needed to be able to use std::vector resize() + TritonDataEntry() + : shape_(fullShape_.begin(), fullShape_.end()), + sizeShape_(0), + byteSizePerBatch_(0), + totalByteSize_(0), + offset_(0), + output_(nullptr) {} + + private: + friend class TritonData; + friend class TritonClient; + friend class TritonMemResource; + friend class TritonHeapResource; + friend class TritonCpuShmResource; #ifdef TRITON_ENABLE_GPU - friend class TritonGpuShmResource; + friend class TritonGpuShmResource; #endif - //accessors - void createObject(IO** ioptr, const std::string& name, const std::string& dname); - void computeSizes(int64_t shapeSize, int64_t byteSize, int64_t batchSize); - - //members - ShapeType fullShape_; - ShapeView shape_; - size_t sizeShape_, byteSizePerBatch_, totalByteSize_; - std::shared_ptr data_; - std::shared_ptr result_; - unsigned offset_; - const uint8_t* output_; + //accessors + void createObject(IO** ioptr, const std::string& name, const std::string& dname); + void computeSizes(int64_t shapeSize, int64_t byteSize, int64_t batchSize); + + //members + ShapeType fullShape_; + ShapeView shape_; + size_t sizeShape_, byteSizePerBatch_, totalByteSize_; + std::shared_ptr data_; + std::shared_ptr result_; + unsigned offset_; + const uint8_t* output_; }; //private accessors only used internally or by client void checkShm() {} unsigned fullLoc(unsigned loc) const; void reset(); - void setResult(std::shared_ptr result, unsigned entry=0) { entries_[entry].result_ = result; } - IO* data(unsigned entry=0) { return entries_[entry].data_.get(); } + void setResult(std::shared_ptr result, unsigned entry = 0) { entries_[entry].result_ = result; } + IO* data(unsigned entry = 0) { return entries_[entry].data_.get(); } void updateMem(size_t size); void computeSizes(); triton::client::InferenceServerGrpcClient* client(); @@ -160,7 +162,8 @@ class TritonData { } int64_t dimProduct(const ShapeView& vec) const { //lambda treats negative dimensions as 0 to avoid overflows - return std::accumulate(vec.begin(), vec.end(), 1, [](int64_t dim1, int64_t dim2){ return dim1*std::max(0l,dim2); }); + return std::accumulate( + vec.begin(), vec.end(), 1, [](int64_t dim1, int64_t dim2) { return dim1 * std::max(0l, dim2); }); } //generates a unique id number for each instance of the class unsigned uid() const { @@ -200,9 +203,13 @@ using TritonOutputMap = std::unordered_map; //avoid "explicit specialization after instantiation" error template <> -void TritonInputData::TritonDataEntry::createObject(triton::client::InferInput** ioptr, const std::string& name, const std::string& dname); +void TritonInputData::TritonDataEntry::createObject(triton::client::InferInput** ioptr, + const std::string& name, + const std::string& dname); template <> -void TritonOutputData::TritonDataEntry::createObject(triton::client::InferRequestedOutput** ioptr, const std::string& name, const std::string& dname); +void TritonOutputData::TritonDataEntry::createObject(triton::client::InferRequestedOutput** ioptr, + const std::string& name, + const std::string& dname); template <> void TritonOutputData::checkShm(); template <> diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc index 2910caa5f6356..4d91dd6cebd8b 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc @@ -186,16 +186,12 @@ void TritonClient::resetBatchMode() { manualBatchMode_ = false; } -unsigned TritonClient::nEntries() const { - return !input_.empty() ? input_.begin()->second.entries_.size() : 0; -} +unsigned TritonClient::nEntries() const { return !input_.empty() ? input_.begin()->second.entries_.size() : 0; } -unsigned TritonClient::batchSize() const { - return batchMode_==TritonBatchMode::Rectangular ? outerDim_ : nEntries(); -} +unsigned TritonClient::batchSize() const { return batchMode_ == TritonBatchMode::Rectangular ? outerDim_ : nEntries(); } bool TritonClient::setBatchSize(unsigned bsize) { - if (batchMode_==TritonBatchMode::Rectangular) { + if (batchMode_ == TritonBatchMode::Rectangular) { if (bsize > maxOuterDim_) { edm::LogWarning(fullDebugName_) << "Requested batch size " << bsize << " exceeds server-specified max batch size " << maxOuterDim_ << ". Batch size will remain as " << outerDim_; @@ -203,7 +199,7 @@ bool TritonClient::setBatchSize(unsigned bsize) { } else { outerDim_ = bsize; //take min to allow resizing to 0 - resizeEntries(std::min(outerDim_,1u)); + resizeEntries(std::min(outerDim_, 1u)); return true; } } else { @@ -216,7 +212,7 @@ bool TritonClient::setBatchSize(unsigned bsize) { void TritonClient::resizeEntries(unsigned entry) { if (entry > nEntries()) //addEntry(entry) extends the vector to size entry+1 - addEntry(entry-1); + addEntry(entry - 1); else if (entry < nEntries()) { for (auto& element : input_) { element.second.entries_.resize(entry); @@ -234,7 +230,7 @@ void TritonClient::addEntry(unsigned entry) { for (auto& element : output_) { element.second.addEntryImpl(entry); } - if (entry>0) { + if (entry > 0) { batchMode_ = TritonBatchMode::Ragged; outerDim_ = 1; } @@ -278,15 +274,17 @@ void TritonClient::getResults(std::vector& results) { //set shape here before output becomes const if (output.variableDims()) { std::vector tmp_shape; - TRITON_THROW_IF_ERROR(result->Shape(oname, &tmp_shape), "getResults(): unable to get output shape for " + oname); + TRITON_THROW_IF_ERROR(result->Shape(oname, &tmp_shape), + "getResults(): unable to get output shape for " + oname); if (!noOuterDim_) tmp_shape.erase(tmp_shape.begin()); - output.setShape(tmp_shape,i); + output.setShape(tmp_shape, i); } //extend lifetime - output.setResult(result,i); + output.setResult(result, i); //compute size after getting all result entries - if(i==results.size()-1) output.computeSizes(); + if (i == results.size() - 1) + output.computeSizes(); } } } @@ -310,7 +308,7 @@ void TritonClient::evaluate() { inputTriton.reserve(input_.size()); } for (auto& [iname, input] : input_) { - for (unsigned i = 0; i < nEntriesVal; ++i){ + for (unsigned i = 0; i < nEntriesVal; ++i) { inputsTriton[i].push_back(input.data(i)); } } @@ -321,7 +319,7 @@ void TritonClient::evaluate() { outputTriton.reserve(output_.size()); } for (auto& [oname, output] : output_) { - for (unsigned i = 0; i < nEntriesVal; ++i){ + for (unsigned i = 0; i < nEntriesVal; ++i) { outputsTriton[i].push_back(output.data(i)); } } @@ -351,7 +349,7 @@ void TritonClient::evaluate() { client_->AsyncInferMulti( [start_status, this](std::vector results) { //check results - for (auto ptr : results){ + for (auto ptr : results) { auto success = handle_exception( [&]() { TRITON_THROW_IF_ERROR(ptr->RequestStatus(), "evaluate(): unable to get result(s)"); }); if (!success) diff --git a/HeterogeneousCore/SonicTriton/src/TritonData.cc b/HeterogeneousCore/SonicTriton/src/TritonData.cc index 879ea0949fcc5..d8fc506d6e99a 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonData.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonData.cc @@ -52,20 +52,24 @@ void TritonData::addEntry(unsigned entry) { template void TritonData::addEntryImpl(unsigned entry) { if (entry >= entries_.size()) { - entries_.reserve(entry+1); - for (unsigned i = entries_.size(); i < entry+1; ++i) { + entries_.reserve(entry + 1); + for (unsigned i = entries_.size(); i < entry + 1; ++i) { entries_.emplace_back(dims_, client_->noOuterDim(), name_, dname_); } } } template <> -void TritonInputData::TritonDataEntry::createObject(tc::InferInput** ioptr, const std::string& name, const std::string& dname) { +void TritonInputData::TritonDataEntry::createObject(tc::InferInput** ioptr, + const std::string& name, + const std::string& dname) { tc::InferInput::Create(ioptr, name, fullShape_, dname); } template <> -void TritonOutputData::TritonDataEntry::createObject(tc::InferRequestedOutput** ioptr, const std::string& name, const std::string& dname) { +void TritonOutputData::TritonDataEntry::createObject(tc::InferRequestedOutput** ioptr, + const std::string& name, + const std::string& dname) { tc::InferRequestedOutput::Create(ioptr, name); } @@ -101,8 +105,8 @@ void TritonData::setShape(unsigned loc, int64_t val, unsigned entry) { //check boundary if (locFull >= entries_[entry].fullShape_.size()) - throw cms::Exception("TritonDataError") - << name_ << " setShape(): dimension " << locFull << " out of bounds (" << entries_[entry].fullShape_.size() << ")"; + throw cms::Exception("TritonDataError") << name_ << " setShape(): dimension " << locFull << " out of bounds (" + << entries_[entry].fullShape_.size() << ")"; if (val != entries_[entry].fullShape_[locFull]) { if (dims_[locFull] == -1) @@ -168,8 +172,9 @@ TritonInputContainer
TritonInputData::allocate(bool reserve) { auto ptr = std::make_shared>(client_->batchSize()); if (reserve) { computeSizes(); - for (auto& entry : entries_){ - if (anyNeg(entry.shape_)) continue; + for (auto& entry : entries_) { + if (anyNeg(entry.shape_)) + continue; for (auto& vec : *ptr) { vec.reserve(entry.sizeShape_); } @@ -213,7 +218,7 @@ void TritonInputData::toServer(TritonInputContainer
ptr) { for (unsigned i0 = 0; i0 < outerDim; ++i0) { //avoid copying empty input - if (entry.byteSizePerBatch_>0) + if (entry.byteSizePerBatch_ > 0) memResource_->copyInput(data_in[counter].data(), offset, i); offset += entry.byteSizePerBatch_; ++counter; @@ -253,7 +258,7 @@ TritonOutput
TritonOutputData::fromServer() const { const auto& entry = entries_[i]; const DT* r1 = reinterpret_cast(entry.output_); - if (entry.totalByteSize_>0 and !entry.result_) { + if (entry.totalByteSize_ > 0 and !entry.result_) { throw cms::Exception("TritonDataError") << name_ << " fromServer(): missing result"; } @@ -282,7 +287,6 @@ unsigned TritonData::fullLoc(unsigned loc) const { return loc + (client_->noOuterDim() ? 0 : 1); } - //explicit template instantiation declarations template class TritonData; template class TritonData; diff --git a/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc b/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc index 762a9d443db85..0f9e70c7df12a 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc @@ -17,7 +17,7 @@ TritonMemResource::TritonMemResource(TritonData* data, const std::string template void TritonMemResource::set() { - for (auto& entry : data_->entries_){ + for (auto& entry : data_->entries_) { TRITON_THROW_IF_ERROR(entry.data_->SetSharedMemory(name_, entry.totalByteSize_, entry.offset_), "unable to set shared memory (" + name_ + ")"); } @@ -29,9 +29,13 @@ TritonHeapResource::TritonHeapResource(TritonData* data, const std::stri template <> void TritonInputHeapResource::copyInput(const void* values, size_t offset, unsigned entry) { - TRITON_THROW_IF_ERROR(data_->entries_[entry].data_->AppendRaw(reinterpret_cast(values), data_->entries_[entry].byteSizePerBatch_), + TRITON_THROW_IF_ERROR(data_->entries_[entry].data_->AppendRaw(reinterpret_cast(values), + data_->entries_[entry].byteSizePerBatch_), data_->name_ + " toServer(): unable to set data for batch entry " + - (data_->entries_.size() > 1 ? std::to_string(entry) : data_->entries_[entry].byteSizePerBatch_ ? std::to_string(offset / data_->entries_[entry].byteSizePerBatch_) : "")); + (data_->entries_.size() > 1 ? std::to_string(entry) + : data_->entries_[entry].byteSizePerBatch_ + ? std::to_string(offset / data_->entries_[entry].byteSizePerBatch_) + : "")); } template <> @@ -39,7 +43,7 @@ void TritonOutputHeapResource::copyOutput() { size_t contentByteSize = 0; for (auto& entry : data_->entries_) { size_t contentByteSizeEntry(0); - if (entry.totalByteSize_>0) + if (entry.totalByteSize_ > 0) TRITON_THROW_IF_ERROR(entry.result_->RawData(data_->name_, &entry.output_, &contentByteSizeEntry), data_->name_ + " fromServer(): unable to get raw"); contentByteSize += contentByteSizeEntry; @@ -159,9 +163,9 @@ void TritonGpuShmResource::close() { template <> void TritonInputGpuShmResource::copyInput(const void* values, size_t offset, unsigned entry) { - cudaCheck( - cudaMemcpy(addr_ + offset, values, data_->entries_[entry].byteSizePerBatch_, cudaMemcpyHostToDevice), - data_->name_ + " toServer(): unable to memcpy " + std::to_string(data_->entries_[entry].byteSizePerBatch_) + " bytes to GPU"); + cudaCheck(cudaMemcpy(addr_ + offset, values, data_->entries_[entry].byteSizePerBatch_, cudaMemcpyHostToDevice), + data_->name_ + " toServer(): unable to memcpy " + std::to_string(data_->entries_[entry].byteSizePerBatch_) + + " bytes to GPU"); } template <> diff --git a/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc b/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc index e6ed3b70fa4e2..8a947f021c654 100644 --- a/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc +++ b/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc @@ -14,20 +14,13 @@ class TritonIdentityProducer : public TritonEDProducer<> { public: explicit TritonIdentityProducer(edm::ParameterSet const& cfg) - : TritonEDProducer<>(cfg), - batchSizes_{1,2,0}, - batchCounter_(0) {} + : TritonEDProducer<>(cfg), batchSizes_{1, 2, 0}, batchCounter_(0) {} void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, Input& iInput) override { //follow Triton QA tests for ragged input - std::vector> value_lists{ - {2,2}, - {4,4,4,4}, - {1}, - {3,3,3} - }; + std::vector> value_lists{{2, 2}, {4, 4, 4, 4}, {1}, {3, 3, 3}}; client_->setBatchSize(batchSizes_[batchCounter_]); - batchCounter_ = (batchCounter_+1) % batchSizes_.size(); + batchCounter_ = (batchCounter_ + 1) % batchSizes_.size(); auto& input1 = iInput.at("INPUT0"); auto data1 = input1.allocate(); for (unsigned i = 0; i < client_->batchSize(); ++i) { @@ -44,12 +37,12 @@ class TritonIdentityProducer : public TritonEDProducer<> { // convert from server format const auto& tmp = output1.fromServer(); edm::LogInfo msg(debugName_); - for (unsigned i = 0; i < client_->batchSize(); ++i){ - msg << "output " << i << " (" << triton_utils::printColl(output1.shape(i)) << "): "; - for(int j = 0; j < output1.shape(i)[0]; ++j){ - msg << tmp[i][j] << " "; - } - msg << "\n"; + for (unsigned i = 0; i < client_->batchSize(); ++i) { + msg << "output " << i << " (" << triton_utils::printColl(output1.shape(i)) << "): "; + for (int j = 0; j < output1.shape(i)[0]; ++j) { + msg << tmp[i][j] << " "; + } + msg << "\n"; } } ~TritonIdentityProducer() override = default; From b658adc0da648372ced9b12a6c0a6b41fb1061f1 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Mon, 27 Feb 2023 11:36:49 -0600 Subject: [PATCH 29/31] move image file --- HeterogeneousCore/SonicTriton/README.md | 2 +- .../SonicTriton/{ => doc}/batching_diagrams.png | Bin 2 files changed, 1 insertion(+), 1 deletion(-) rename HeterogeneousCore/SonicTriton/{ => doc}/batching_diagrams.png (100%) diff --git a/HeterogeneousCore/SonicTriton/README.md b/HeterogeneousCore/SonicTriton/README.md index 9888266eaeddd..314b5d4d15986 100644 --- a/HeterogeneousCore/SonicTriton/README.md +++ b/HeterogeneousCore/SonicTriton/README.md @@ -37,7 +37,7 @@ The model information from the server can be printed by enabling `verbose` outpu ### Batching SonicTriton supports two types of batching, rectangular and ragged, depicted below: -![batching diagrams](./batching_diagrams.png) +![batching diagrams](./doc/batching_diagrams.png) In the rectangular case, the inputs for each object in an event have the same shape, so they can be combined into a single entry. (In this case, the batch size is specified as the "outer dimension" of the shape.) In the ragged case, the inputs for each object in an event do not have the same shape, so they cannot be combined; diff --git a/HeterogeneousCore/SonicTriton/batching_diagrams.png b/HeterogeneousCore/SonicTriton/doc/batching_diagrams.png similarity index 100% rename from HeterogeneousCore/SonicTriton/batching_diagrams.png rename to HeterogeneousCore/SonicTriton/doc/batching_diagrams.png From b088dfe4b60cac6cc65f70fed23722017a418a97 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Tue, 28 Feb 2023 10:19:04 -0600 Subject: [PATCH 30/31] improve memory handling --- .../SonicTriton/interface/TritonClient.h | 2 +- .../SonicTriton/src/TritonClient.cc | 31 +++++++++++++------ 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/interface/TritonClient.h b/HeterogeneousCore/SonicTriton/interface/TritonClient.h index cf2e438db2856..833d329417d18 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonClient.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonClient.h @@ -57,7 +57,7 @@ class TritonClient : public SonicClient { bool noOuterDim() const { return noOuterDim_; } unsigned outerDim() const { return outerDim_; } unsigned nEntries() const; - void getResults(std::vector& results); + void getResults(const std::vector>& results); void evaluate() override; template bool handle_exception(F&& call); diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc index 4d91dd6cebd8b..0b1da3b6edfb7 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc @@ -31,6 +31,15 @@ namespace { throw cms::Exception("GrpcCompression") << "Unknown compression algorithm requested: " << name << " (choices: none, deflate, gzip)"; } + + std::vector> convertToShared(const std::vector& tmp) { + std::vector> results; + results.reserve(tmp.size()); + std::transform(tmp.begin(), tmp.end(), std::back_inserter(results), [](tc::InferResult* ptr) { + return std::shared_ptr(ptr); + }); + return results; + } } // namespace //based on https://github.com/triton-inference-server/server/blob/v2.3.0/src/clients/c++/examples/simple_grpc_async_infer_client.cc @@ -267,9 +276,9 @@ bool TritonClient::handle_exception(F&& call) { } } -void TritonClient::getResults(std::vector& results) { +void TritonClient::getResults(const std::vector>& results) { for (unsigned i = 0; i < results.size(); ++i) { - std::shared_ptr result(results[i]); + const auto& result = results[i]; for (auto& [oname, output] : output_) { //set shape here before output becomes const if (output.variableDims()) { @@ -294,8 +303,8 @@ void TritonClient::evaluate() { //in case there is nothing to process if (batchSize() == 0) { //call getResults on an empty vector - std::vector empty_results; - getResults(empty_results); + std::vector> empty_results; + getResults(std::move(empty_results)); finish(true); return; } @@ -347,7 +356,9 @@ void TritonClient::evaluate() { success = handle_exception([&]() { TRITON_THROW_IF_ERROR( client_->AsyncInferMulti( - [start_status, this](std::vector results) { + [start_status, this](std::vector resultsTmp) { + //immediately convert to shared_ptr + const auto& results = convertToShared(resultsTmp); //check results for (auto ptr : results) { auto success = handle_exception( @@ -367,7 +378,7 @@ void TritonClient::evaluate() { } //check result - auto success = handle_exception([&]() { getResults(results); }); + auto success = handle_exception([&]() { getResults(std::move(results)); }); if (!success) return; @@ -385,12 +396,14 @@ void TritonClient::evaluate() { return; } else { //blocking call - std::vector results; + std::vector resultsTmp; success = handle_exception([&]() { TRITON_THROW_IF_ERROR( - client_->InferMulti(&results, options_, inputsTriton, outputsTriton, headers_, compressionAlgo_), + client_->InferMulti(&resultsTmp, options_, inputsTriton, outputsTriton, headers_, compressionAlgo_), "evaluate(): unable to run and/or get result"); }); + //immediately convert to shared_ptr + const auto& results = convertToShared(resultsTmp); if (!success) return; @@ -404,7 +417,7 @@ void TritonClient::evaluate() { reportServerSideStats(stats); } - success = handle_exception([&]() { getResults(results); }); + success = handle_exception([&]() { getResults(std::move(results)); }); if (!success) return; From 570a966bba73d860bc55be433e54d60718b9237b Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Wed, 1 Mar 2023 10:15:35 -0600 Subject: [PATCH 31/31] remove unnecessary moves --- HeterogeneousCore/SonicTriton/src/TritonClient.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc index 0b1da3b6edfb7..c57a8355d07a1 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc @@ -304,7 +304,7 @@ void TritonClient::evaluate() { if (batchSize() == 0) { //call getResults on an empty vector std::vector> empty_results; - getResults(std::move(empty_results)); + getResults(empty_results); finish(true); return; } @@ -378,7 +378,7 @@ void TritonClient::evaluate() { } //check result - auto success = handle_exception([&]() { getResults(std::move(results)); }); + auto success = handle_exception([&]() { getResults(results); }); if (!success) return; @@ -417,7 +417,7 @@ void TritonClient::evaluate() { reportServerSideStats(stats); } - success = handle_exception([&]() { getResults(std::move(results)); }); + success = handle_exception([&]() { getResults(results); }); if (!success) return;