From 5c1d217d780adb9ff622ef6e6fc4febfd652f73a Mon Sep 17 00:00:00 2001
From: Kevin Pedro <kpedro88@gmail.com>
Date: Thu, 4 Aug 2022 14:44:06 -0500
Subject: [PATCH 01/31] changes for new triton version

---
 HeterogeneousCore/SonicTriton/src/TritonData.cc | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/HeterogeneousCore/SonicTriton/src/TritonData.cc b/HeterogeneousCore/SonicTriton/src/TritonData.cc
index d01d397349a78..c2af2c0fbb607 100644
--- a/HeterogeneousCore/SonicTriton/src/TritonData.cc
+++ b/HeterogeneousCore/SonicTriton/src/TritonData.cc
@@ -4,11 +4,11 @@
 #include "FWCore/MessageLogger/interface/MessageLogger.h"
 
 #include "model_config.pb.h"
-#include "model_config.h"
+#include "triton/common/model_config.h"
 
 #include <sstream>
 
-namespace ni = nvidia::inferenceserver;
+namespace tco = triton::common;
 namespace tc = triton::client;
 
 //dims: kept constant, represents config.pbtxt parameters of model (converted from google::protobuf::RepeatedField to vector)
@@ -32,8 +32,8 @@ TritonData<IO>::TritonData(const std::string& name,
       variableDims_(anyNeg(shape_)),
       productDims_(variableDims_ ? -1 : dimProduct(shape_)),
       dname_(model_info.datatype()),
-      dtype_(ni::ProtocolStringToDataType(dname_)),
-      byteSize_(ni::GetDataTypeByteSize(dtype_)),
+      dtype_(tco::ProtocolStringToDataType(dname_)),
+      byteSize_(tco::GetDataTypeByteSize(dtype_)),
       totalByteSize_(0) {
   //create input or output object
   IO* iotmp;

From 9d0ab5683f170aeceffab403c11fd1ff7a8eb3d7 Mon Sep 17 00:00:00 2001
From: Kevin Pedro <kpedro88@gmail.com>
Date: Fri, 15 Apr 2022 19:25:52 -0500
Subject: [PATCH 02/31] combine shape/request info into TritonDataEntry for
 multi-request ragged batching (WIP)

---
 .../SonicTriton/interface/TritonData.h        |  51 ++++++--
 .../SonicTriton/src/TritonData.cc             | 118 +++++++++++++-----
 2 files changed, 125 insertions(+), 44 deletions(-)

diff --git a/HeterogeneousCore/SonicTriton/interface/TritonData.h b/HeterogeneousCore/SonicTriton/interface/TritonData.h
index 77affcb8e5228..c2128accf21c7 100644
--- a/HeterogeneousCore/SonicTriton/interface/TritonData.h
+++ b/HeterogeneousCore/SonicTriton/interface/TritonData.h
@@ -55,8 +55,8 @@ class TritonData {
   TritonData(const std::string& name, const TensorMetadata& model_info, TritonClient* client, const std::string& pid);
 
   //some members can be modified
-  void setShape(const ShapeType& newShape);
-  void setShape(unsigned loc, int64_t val);
+  void setShape(const ShapeType& newShape, unsigned entry=0);
+  void setShape(unsigned loc, int64_t val, unsigned entry=0);
 
   //io accessors
   template <typename DT>
@@ -68,7 +68,7 @@ class TritonData {
   TritonOutput<DT> fromServer() const;
 
   //const accessors
-  const ShapeView& shape() const { return shape_; }
+  const ShapeView& shape(unsigned entry=0) const { return entries_.at(entry).shape_; }
   int64_t byteSize() const { return byteSize_; }
   const std::string& dname() const { return dname_; }
   unsigned batchSize() const { return batchSize_; }
@@ -77,7 +77,7 @@ class TritonData {
   bool variableDims() const { return variableDims_; }
   int64_t sizeDims() const { return productDims_; }
   //default to dims if shape isn't filled
-  int64_t sizeShape() const { return variableDims_ ? dimProduct(shape_) : sizeDims(); }
+  int64_t sizeShape(unsigned entry=0) const { return variableDims_ ? dimProduct(entries_.at(entry).shape_) : sizeDims(); }
 
 private:
   friend class TritonClient;
@@ -88,12 +88,45 @@ class TritonData {
   friend class TritonGpuShmResource<IO>;
 #endif
 
+  //group together all relevant information for a single request
+  //helpful for organizing multi-request ragged batching case
+  class TritonDataEntry {
+    public:
+      //constructors
+      TritonDataEntry(const ShapeType& dims, bool noBatch, const std::string& name, const std::string& dname))
+          : fullShape_(dims),
+            shape_(fullShape_.begin() + (noBatch ? 0 : 1), fullShape_.end()),
+            sizeShape_(0),
+            byteSizePerBatch_(0) {
+        //create input or output object
+        IO* iotmp;
+        createObject(&iotmp, name, dname);
+        data_.reset(iotmp);
+      }
+
+    private:
+      friend class TritonData<IO>;
+
+      //accessors
+      void createObject(IO** ioptr, const std::string& name, const std::string& dname));
+      void computeSizes(int64_t byteSize);
+      void resetSizes();
+
+      //members
+      ShapeType fullShape_;
+      ShapeView shape_;
+      size_t sizeShape_;
+      size_t byteSizePerBatch_;
+      std::shared_ptr<IO> data_;
+  };
+
   //private accessors only used internally or by client
+  void checkShm() {}
   unsigned fullLoc(unsigned loc) const { return loc + (noBatch_ ? 0 : 1); }
   void setBatchSize(unsigned bsize);
   void reset();
   void setResult(std::shared_ptr<Result> result) { result_ = result; }
-  IO* data() { return data_.get(); }
+  IO* data(unsigned entry=0) { return entries[entry].data_.get(); }
   void updateMem(size_t size);
   void computeSizes();
   void resetSizes();
@@ -112,32 +145,28 @@ class TritonData {
   int64_t dimProduct(const ShapeView& vec) const {
     return std::accumulate(vec.begin(), vec.end(), 1, std::multiplies<int64_t>());
   }
-  void createObject(IO** ioptr);
   //generates a unique id number for each instance of the class
   unsigned uid() const {
     static std::atomic<unsigned> uid{0};
     return ++uid;
   }
   std::string xput() const;
+  void addEntry(unsigned entry);
 
   //members
   std::string name_;
-  std::shared_ptr<IO> data_;
   TritonClient* client_;
   bool useShm_;
   std::string shmName_;
   const ShapeType dims_;
   bool noBatch_;
   unsigned batchSize_;
-  ShapeType fullShape_;
-  ShapeView shape_;
   bool variableDims_;
   int64_t productDims_;
   std::string dname_;
   inference::DataType dtype_;
   int64_t byteSize_;
-  size_t sizeShape_;
-  size_t byteSizePerBatch_;
+  std::vector<TritonDataEntry<IO>> entries_;
   size_t totalByteSize_;
   //can be modified in otherwise-const fromServer() method in TritonMemResource::copyOutput():
   //TritonMemResource holds a non-const pointer to an instance of this class
diff --git a/HeterogeneousCore/SonicTriton/src/TritonData.cc b/HeterogeneousCore/SonicTriton/src/TritonData.cc
index c2af2c0fbb607..b533f60bb2730 100644
--- a/HeterogeneousCore/SonicTriton/src/TritonData.cc
+++ b/HeterogeneousCore/SonicTriton/src/TritonData.cc
@@ -27,30 +27,49 @@ TritonData<IO>::TritonData(const std::string& name,
       dims_(model_info.shape().begin(), model_info.shape().end()),
       noBatch_(client_->noBatch()),
       batchSize_(0),
-      fullShape_(dims_),
-      shape_(fullShape_.begin() + (noBatch_ ? 0 : 1), fullShape_.end()),
-      variableDims_(anyNeg(shape_)),
-      productDims_(variableDims_ ? -1 : dimProduct(shape_)),
       dname_(model_info.datatype()),
       dtype_(tco::ProtocolStringToDataType(dname_)),
       byteSize_(tco::GetDataTypeByteSize(dtype_)),
       totalByteSize_(0) {
-  //create input or output object
-  IO* iotmp;
-  createObject(&iotmp);
-  data_.reset(iotmp);
+  //initialize first shape entry
+  addEntry(1);
+  //one-time computation of some shape info
+  variableDims_ = anyNeg(entries_.front().shape_));
+  productDims_ = variableDims_ ? -1 : dimProduct(entries_.front().shape_);
+  checkShm();
 }
 
 template <>
-void TritonInputData::createObject(tc::InferInput** ioptr) {
-  tc::InferInput::Create(ioptr, name_, fullShape_, dname_);
+void TritonOutputData::checkShm() {
+  //another specialization for output: can't use shared memory if output size is not known
+  useShm_ &= !variableDims_;
+}
+
+template <typename IO>
+void TritonData<IO>::addEntry(unsigned entry) {
+  if (entry > entries_.size()) {
+    entries_.reserve(entry+1);
+    for (unsigned i = entries_.size(); i < entry+1; ++i) {
+      entries_.emplace_back(dims_, noBatch_, name_, dname_);
+      //todo: should each entry have its own batch size?
+      //for now, restrict multi-request mode for ragged batching to batch size = 1
+      if (entry>1 and !noBatch_)
+        entries_.back().fullShape_[0] = 1;
+    }
+    //go back and fix the first one
+    if (entry>1 and !noBatch_)
+      entries_[0].fullShape_[0] = 1;
+  }
 }
 
 template <>
-void TritonOutputData::createObject(tc::InferRequestedOutput** ioptr) {
-  tc::InferRequestedOutput::Create(ioptr, name_);
-  //another specialization for output: can't use shared memory if output size is not known
-  useShm_ &= !variableDims_;
+void TritonInputData::TritonDataEntry::createObject(tc::InferInput** ioptr, const std::string& name, const std::string& dname) {
+  tc::InferInput::Create(ioptr, name, fullShape_, dname);
+}
+
+template <>
+void TritonOutputData::TritonDataEntry::createObject(tc::InferRequestedOutput** ioptr, const std::string& name, const std::string& dname)) {
+  tc::InferRequestedOutput::Create(ioptr, name);
 }
 
 template <>
@@ -70,24 +89,27 @@ tc::InferenceServerGrpcClient* TritonData<IO>::client() {
 
 //setters
 template <typename IO>
-void TritonData<IO>::setShape(const TritonData<IO>::ShapeType& newShape) {
+void TritonData<IO>::setShape(const TritonData<IO>::ShapeType& newShape, unsigned entry) {
+  addEntry(entry);
   for (unsigned i = 0; i < newShape.size(); ++i) {
-    setShape(i, newShape[i]);
+    setShape(i, newShape[i], entry);
   }
 }
 
 template <typename IO>
-void TritonData<IO>::setShape(unsigned loc, int64_t val) {
+void TritonData<IO>::setShape(unsigned loc, int64_t val, unsigned entry) {
+  addEntry(entry);
+
   unsigned locFull = fullLoc(loc);
 
   //check boundary
-  if (locFull >= fullShape_.size())
+  if (locFull >= entries_[entry].fullShape_.size())
     throw cms::Exception("TritonDataError")
-        << name_ << " setShape(): dimension " << locFull << " out of bounds (" << fullShape_.size() << ")";
+        << name_ << " setShape(): dimension " << locFull << " out of bounds (" << entries_[entry].fullShape_.size() << ")";
 
-  if (val != fullShape_[locFull]) {
+  if (val != entries_[entry].fullShape_[locFull]) {
     if (dims_[locFull] == -1)
-      fullShape_[locFull] = val;
+      entries_[entry].fullShape_[entry][locFull] = val;
     else
       throw cms::Exception("TritonDataError")
           << name_ << " setShape(): attempt to change value of non-variable shape dimension " << loc;
@@ -97,20 +119,39 @@ void TritonData<IO>::setShape(unsigned loc, int64_t val) {
 template <typename IO>
 void TritonData<IO>::setBatchSize(unsigned bsize) {
   batchSize_ = bsize;
-  if (!noBatch_)
-    fullShape_[0] = batchSize_;
+  if (!noBatch_) {
+    if (entries_[0].fullShape_.size()==1)
+      entries_[0].fullShape_ = batchSize_;
+    else
+      throw cms::Exception("TritonDataError") << "attempt to set batch size to " << bsize << " when ragged batching is in use";
+  }
+}
+
+template <typename IO>
+void TritonData<IO>::TritonDataEntry::computeSizes(int64_t shapeSize, int64_t byteSize) {
+  sizeShape_ = shapeSize;
+  byteSizePerBatch_ = byteSize * sizeShape_;
 }
 
 template <typename IO>
 void TritonData<IO>::computeSizes() {
-  sizeShape_ = sizeShape();
-  byteSizePerBatch_ = byteSize_ * sizeShape_;
-  totalByteSize_ = byteSizePerBatch_ * batchSize_;
+  for (unsigned i = 0; i < entries_.size(); ++i) {
+    entries_[i].computeSizes(sizeShape(i), byteSize_);
+    totalByteSize_ += entries_[i].byteSizePerBatch_ * batchSize_;
+  }
 }
+
 template <typename IO>
-void TritonData<IO>::resetSizes() {
+void TritonData<IO>::TritonDataEntry::resetSizes() {
   sizeShape_ = 0;
   byteSizePerBatch_ = 0;
+}
+
+template <typename IO>
+void TritonData<IO>::resetSizes() {
+  for (unsigned i = 0; i < entries_.size(); ++i) {
+    entries_[i].resetSizes();
+  }
   totalByteSize_ = 0;
 }
 
@@ -168,21 +209,32 @@ void TritonInputData::toServer(TritonInputContainer<DT> ptr) {
   const auto& data_in = *ptr;
 
   //check batch size
-  if (data_in.size() != batchSize_) {
+  if (entries_.size()==1 and data_in.size() != batchSize_)) {
     throw cms::Exception("TritonDataError") << name_ << " toServer(): input vector has size " << data_in.size()
                                             << " but specified batch size is " << batchSize_;
   }
-
-  //shape must be specified for variable dims or if batch size changes
-  data_->SetShape(fullShape_);
+  else if (entries_.size()>1 and data_in.size() != entries_.size()) {
+    throw cms::Exception("TritonDataError") << name_ << " toServer(): input vector has size " << data_in.size()
+                                            << " but specified entries size is " << entries_.size();
+  }
 
   //check type
   checkType<DT>();
 
   computeSizes();
   updateMem(totalByteSize_);
-  for (unsigned i0 = 0; i0 < batchSize_; ++i0) {
-    memResource_->copyInput(data_in[i0].data(), i0 * byteSizePerBatch_);
+
+  unsigned counter = 0;
+  for (unsigned i = 0; i < entries_.size(); ++i) {
+    auto& entry = entries_[i];
+
+    //shape must be specified for variable dims or if batch size changes
+    entry.data_->SetShape(entry.fullShape_);
+
+    for (unsigned i0 = 0; i0 < batchSize_; ++i0) {
+      memResource_->copyInput(data_in[counter].data(), counter * entry.byteSizePerBatch_);
+      ++counter;
+    }
   }
   memResource_->set();
 

From e9f8c618ca9ae6d3e715957f545f503b563d24d0 Mon Sep 17 00:00:00 2001
From: Kevin Pedro <kpedro88@gmail.com>
Date: Mon, 18 Apr 2022 16:26:09 -0500
Subject: [PATCH 03/31] finish initial propagation (still WIP)

---
 .../SonicTriton/interface/TritonClient.h      |   8 +-
 .../SonicTriton/interface/TritonData.h        |   9 +-
 .../SonicTriton/src/TritonClient.cc           | 118 ++++++++++++------
 .../SonicTriton/src/TritonData.cc             |  48 +++----
 4 files changed, 100 insertions(+), 83 deletions(-)

diff --git a/HeterogeneousCore/SonicTriton/interface/TritonClient.h b/HeterogeneousCore/SonicTriton/interface/TritonClient.h
index a44e3697742f7..e780bc4136116 100644
--- a/HeterogeneousCore/SonicTriton/interface/TritonClient.h
+++ b/HeterogeneousCore/SonicTriton/interface/TritonClient.h
@@ -50,7 +50,7 @@ class TritonClient : public SonicClient<TritonInputMap, TritonOutputMap> {
 
 protected:
   //helpers
-  void getResults(std::shared_ptr<triton::client::InferResult> results);
+  void getResults(std::vector<triton::client::InferResult*>& results);
   void evaluate() override;
   template <typename F>
   bool handle_exception(F&& call);
@@ -71,13 +71,9 @@ class TritonClient : public SonicClient<TritonInputMap, TritonOutputMap> {
   grpc_compression_algorithm compressionAlgo_;
   triton::client::Headers headers_;
 
-  //IO pointers for triton
-  std::vector<triton::client::InferInput*> inputsTriton_;
-  std::vector<const triton::client::InferRequestedOutput*> outputsTriton_;
-
   std::unique_ptr<triton::client::InferenceServerGrpcClient> client_;
   //stores timeout, model name and version
-  triton::client::InferOptions options_;
+  std::vector<triton::client::InferOptions> options_;
 
 private:
   friend TritonInputData;
diff --git a/HeterogeneousCore/SonicTriton/interface/TritonData.h b/HeterogeneousCore/SonicTriton/interface/TritonData.h
index c2128accf21c7..caef6f206d61c 100644
--- a/HeterogeneousCore/SonicTriton/interface/TritonData.h
+++ b/HeterogeneousCore/SonicTriton/interface/TritonData.h
@@ -106,11 +106,11 @@ class TritonData {
 
     private:
       friend class TritonData<IO>;
+      friend class TritonClient;
 
       //accessors
       void createObject(IO** ioptr, const std::string& name, const std::string& dname));
       void computeSizes(int64_t byteSize);
-      void resetSizes();
 
       //members
       ShapeType fullShape_;
@@ -118,6 +118,7 @@ class TritonData {
       size_t sizeShape_;
       size_t byteSizePerBatch_;
       std::shared_ptr<IO> data_;
+      std::shared_ptr<Result> result_;
   };
 
   //private accessors only used internally or by client
@@ -125,11 +126,10 @@ class TritonData {
   unsigned fullLoc(unsigned loc) const { return loc + (noBatch_ ? 0 : 1); }
   void setBatchSize(unsigned bsize);
   void reset();
-  void setResult(std::shared_ptr<Result> result) { result_ = result; }
-  IO* data(unsigned entry=0) { return entries[entry].data_.get(); }
+  void setResult(Result* result, unsigned entry=0) { entries_[entry].result_ = std::make_shared<Result>(result); }
+  IO* data(unsigned entry=0) { return entries_[entry].data_.get(); }
   void updateMem(size_t size);
   void computeSizes();
-  void resetSizes();
   triton::client::InferenceServerGrpcClient* client();
   template <typename DT>
   void checkType() const {
@@ -173,7 +173,6 @@ class TritonData {
   //so that TritonOutputGpuShmResource can store data here
   std::shared_ptr<void> holder_;
   std::shared_ptr<TritonMemResource<IO>> memResource_;
-  std::shared_ptr<Result> result_;
   //can be modified in otherwise-const fromServer() method to prevent multiple calls
   CMS_SA_ALLOW mutable bool done_{};
 };
diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc
index f0d662f866d8d..f355af41a17ea 100644
--- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc
+++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc
@@ -41,11 +41,11 @@ TritonClient::TritonClient(const edm::ParameterSet& params, const std::string& d
       verbose_(params.getUntrackedParameter<bool>("verbose")),
       useSharedMemory_(params.getUntrackedParameter<bool>("useSharedMemory")),
       compressionAlgo_(getCompressionAlgo(params.getUntrackedParameter<std::string>("compression"))),
-      options_(params.getParameter<std::string>("modelName")) {
+      options_(1,params.getParameter<std::string>("modelName")) {
   //get appropriate server for this model
   edm::Service<TritonService> ts;
   const auto& server =
-      ts->serverInfo(options_.model_name_, params.getUntrackedParameter<std::string>("preferredServer"));
+      ts->serverInfo(options_[0].model_name_, params.getUntrackedParameter<std::string>("preferredServer"));
   serverType_ = server.type;
   if (verbose_)
     edm::LogInfo(fullDebugName_) << "Using server: " << server.url;
@@ -60,13 +60,13 @@ TritonClient::TritonClient(const edm::ParameterSet& params, const std::string& d
       "TritonClient(): unable to create inference context");
 
   //set options
-  options_.model_version_ = params.getParameter<std::string>("modelVersion");
+  options_[0].model_version_ = params.getParameter<std::string>("modelVersion");
   //convert seconds to microseconds
-  options_.client_timeout_ = params.getUntrackedParameter<unsigned>("timeout") * 1e6;
+  options_[0].client_timeout_ = params.getUntrackedParameter<unsigned>("timeout") * 1e6;
 
   //config needed for batch size
   inference::ModelConfigResponse modelConfigResponse;
-  TRITON_THROW_IF_ERROR(client_->ModelConfig(&modelConfigResponse, options_.model_name_, options_.model_version_),
+  TRITON_THROW_IF_ERROR(client_->ModelConfig(&modelConfigResponse, options_[0].model_name_, options_[0].model_version_),
                         "TritonClient(): unable to get model config");
   inference::ModelConfig modelConfig(modelConfigResponse.config());
 
@@ -80,7 +80,7 @@ TritonClient::TritonClient(const edm::ParameterSet& params, const std::string& d
 
   //get model info
   inference::ModelMetadataResponse modelMetadata;
-  TRITON_THROW_IF_ERROR(client_->ModelMetadata(&modelMetadata, options_.model_name_, options_.model_version_),
+  TRITON_THROW_IF_ERROR(client_->ModelMetadata(&modelMetadata, options_[0].model_name_, options_[0].model_version_),
                         "TritonClient(): unable to get model metadata");
 
   //get input and output (which know their sizes)
@@ -108,14 +108,12 @@ TritonClient::TritonClient(const edm::ParameterSet& params, const std::string& d
   if (verbose_)
     io_msg << "Model inputs: "
            << "\n";
-  inputsTriton_.reserve(nicInputs.size());
   for (const auto& nicInput : nicInputs) {
     const auto& iname = nicInput.name();
     auto [curr_itr, success] = input_.emplace(std::piecewise_construct,
                                               std::forward_as_tuple(iname),
                                               std::forward_as_tuple(iname, nicInput, this, ts->pid()));
     auto& curr_input = curr_itr->second;
-    inputsTriton_.push_back(curr_input.data());
     if (verbose_) {
       io_msg << "  " << iname << " (" << curr_input.dname() << ", " << curr_input.byteSize()
              << " b) : " << triton_utils::printColl(curr_input.shape()) << "\n";
@@ -130,7 +128,6 @@ TritonClient::TritonClient(const edm::ParameterSet& params, const std::string& d
   if (verbose_)
     io_msg << "Model outputs: "
            << "\n";
-  outputsTriton_.reserve(nicOutputs.size());
   for (const auto& nicOutput : nicOutputs) {
     const auto& oname = nicOutput.name();
     if (!s_outputs.empty() and s_outputs.find(oname) == s_outputs.end())
@@ -139,7 +136,6 @@ TritonClient::TritonClient(const edm::ParameterSet& params, const std::string& d
                                                std::forward_as_tuple(oname),
                                                std::forward_as_tuple(oname, nicOutput, this, ts->pid()));
     auto& curr_output = curr_itr->second;
-    outputsTriton_.push_back(curr_output.data());
     if (verbose_) {
       io_msg << "  " << oname << " (" << curr_output.dname() << ", " << curr_output.byteSize()
              << " b) : " << triton_utils::printColl(curr_output.shape()) << "\n";
@@ -159,8 +155,8 @@ TritonClient::TritonClient(const edm::ParameterSet& params, const std::string& d
   //print model info
   std::stringstream model_msg;
   if (verbose_) {
-    model_msg << "Model name: " << options_.model_name_ << "\n"
-              << "Model version: " << options_.model_version_ << "\n"
+    model_msg << "Model name: " << options_[0].model_name_ << "\n"
+              << "Model version: " << options_[0].model_version_ << "\n"
               << "Model max batch size: " << (noBatch_ ? 0 : maxBatchSize_) << "\n";
     edm::LogInfo(fullDebugName_) << model_msg.str() << io_msg.str();
   }
@@ -222,19 +218,23 @@ bool TritonClient::handle_exception(F&& call) {
   }
 }
 
-void TritonClient::getResults(std::shared_ptr<tc::InferResult> results) {
+void TritonClient::getResults(std::vector<tc::InferResult*>& results) {
   for (auto& [oname, output] : output_) {
-    //set shape here before output becomes const
-    if (output.variableDims()) {
-      std::vector<int64_t> tmp_shape;
-      TRITON_THROW_IF_ERROR(results->Shape(oname, &tmp_shape), "getResults(): unable to get output shape for " + oname);
-      if (!noBatch_)
-        tmp_shape.erase(tmp_shape.begin());
-      output.setShape(tmp_shape);
-      output.computeSizes();
+    for (unsigned i = 0; i < results.size(); ++i) {
+      auto result = results[i];
+      //set shape here before output becomes const
+      if (output.variableDims()) {
+        std::vector<int64_t> tmp_shape;
+        TRITON_THROW_IF_ERROR(result->Shape(oname, &tmp_shape), "getResults(): unable to get output shape for " + oname);
+        if (!noBatch_)
+          tmp_shape.erase(tmp_shape.begin());
+        output.setShape(tmp_shape,i);
+      }
+      //extend lifetime
+      output.setResult(result,i);
     }
-    //extend lifetime
-    output.setResult(results);
+    //compute size after getting all result entries
+    output.computeSizes();
   }
 }
 
@@ -246,8 +246,44 @@ void TritonClient::evaluate() {
     return;
   }
 
-  //set up shared memory for output
+  //set up input pointers for triton (generalized for multi-request ragged batching case)
+  //one vector<InferInput*> per request
+  std::vector<std::vector<triton::client::InferInput*>> inputsTriton;
+  unsigned nEntries = input_.begin()->second.entries_.size();
+  inputsTriton.resize(nEntries);
+  for (auto& inputTriton : inputsTriton) {
+    inputTriton.reserve(inputs_.size());
+  }
+  //consistency check
+  //todo: move addEntry to a TritonClient function that auto loops over all inputs? & outputs?
   auto success = handle_exception([&]() {
+    std::vector<unsigned> nEntriesAll;
+    nEntriesAll.reserve(input_.size());
+    for (auto& [iname, input] : input_) {
+      nEntriesAll.push_back(input.entries_.size());
+    }
+    if (std::adjacent_find(nEntriesAll.begin(), nEntriesAll.end(), std::not_equal_to<>()) != nEntriesAll.end())
+      throw cms::Exception("InconsistentInput") << "Different numbers of entries among different inputs: " << printColl(nEntriesAll);
+  });
+  if (!success)
+    return;
+  for (auto& [iname, input] : input_) {
+    for (unsigned i = 0; i < nEntries; ++i){
+      inputsTriton[i].push_back(input.data(i));
+    }
+  }
+
+  //set up output pointers accordingly (same number of entries as input)
+  std::vector<std::vector<const triton::client::InferRequestedOutput*>> outputsTriton_;
+  for (auto& [oname, output] : output_) {
+    output.addEntry(nEntries);
+    for (unsigned i = 0; i < nEntries; ++i){
+      outputsTriton[i].push_back(output.data(i));
+    }
+  }
+
+  //set up shared memory for output
+  success = handle_exception([&]() {
     for (auto& element : output_) {
       element.second.prepare();
     }
@@ -268,18 +304,19 @@ void TritonClient::evaluate() {
     //non-blocking call
     success = handle_exception([&]() {
       TRITON_THROW_IF_ERROR(
-          client_->AsyncInfer(
-              [start_status, this](tc::InferResult* results) {
-                //get results
-                std::shared_ptr<tc::InferResult> results_ptr(results);
-                auto success = handle_exception(
-                    [&]() { TRITON_THROW_IF_ERROR(results_ptr->RequestStatus(), "evaluate(): unable to get result"); });
-                if (!success)
-                  return;
+          client_->AsyncInferMulti(
+              [start_status, this](std::vector<tc::InferResult*> results) {
+                //check results
+                for (auto ptr : results){
+                  auto success = handle_exception(
+                      [&]() { TRITON_THROW_IF_ERROR(ptr->RequestStatus(), "evaluate(): unable to get result(s)"); });
+                  if (!success)
+                    return;
+                }
 
                 if (verbose()) {
                   inference::ModelStatistics end_status;
-                  success = handle_exception([&]() { end_status = getServerSideStatus(); });
+                  auto success = handle_exception([&]() { end_status = getServerSideStatus(); });
                   if (!success)
                     return;
 
@@ -288,7 +325,7 @@ void TritonClient::evaluate() {
                 }
 
                 //check result
-                success = handle_exception([&]() { getResults(results_ptr); });
+                auto success = handle_exception([&]() { getResults(results); });
                 if (!success)
                   return;
 
@@ -296,8 +333,8 @@ void TritonClient::evaluate() {
                 finish(true);
               },
               options_,
-              inputsTriton_,
-              outputsTriton_,
+              inputsTriton,
+              outputsTriton,
               headers_,
               compressionAlgo_),
           "evaluate(): unable to launch async run");
@@ -306,10 +343,10 @@ void TritonClient::evaluate() {
       return;
   } else {
     //blocking call
-    tc::InferResult* results;
+    std::vector<tc::InferResult*> results;
     success = handle_exception([&]() {
       TRITON_THROW_IF_ERROR(
-          client_->Infer(&results, options_, inputsTriton_, outputsTriton_, headers_, compressionAlgo_),
+          client_->InferMulti(&results, options_, inputsTriton, outputsTriton, headers_, compressionAlgo_),
           "evaluate(): unable to run and/or get result");
     });
     if (!success)
@@ -325,8 +362,7 @@ void TritonClient::evaluate() {
       reportServerSideStats(stats);
     }
 
-    std::shared_ptr<tc::InferResult> results_ptr(results);
-    success = handle_exception([&]() { getResults(results_ptr); });
+    success = handle_exception([&]() { getResults(results); });
     if (!success)
       return;
 
@@ -395,7 +431,7 @@ TritonClient::ServerSideStats TritonClient::summarizeServerStats(const inference
 inference::ModelStatistics TritonClient::getServerSideStatus() const {
   if (verbose_) {
     inference::ModelStatisticsResponse resp;
-    TRITON_THROW_IF_ERROR(client_->ModelInferenceStatistics(&resp, options_.model_name_, options_.model_version_),
+    TRITON_THROW_IF_ERROR(client_->ModelInferenceStatistics(&resp, options_[0].model_name_, options_[0].model_version_),
                           "getServerSideStatus(): unable to get model statistics");
     return *(resp.model_stats().begin());
   }
diff --git a/HeterogeneousCore/SonicTriton/src/TritonData.cc b/HeterogeneousCore/SonicTriton/src/TritonData.cc
index b533f60bb2730..89f2c4d7af9c8 100644
--- a/HeterogeneousCore/SonicTriton/src/TritonData.cc
+++ b/HeterogeneousCore/SonicTriton/src/TritonData.cc
@@ -57,8 +57,9 @@ void TritonData<IO>::addEntry(unsigned entry) {
         entries_.back().fullShape_[0] = 1;
     }
     //go back and fix the first one
-    if (entry>1 and !noBatch_)
-      entries_[0].fullShape_[0] = 1;
+    if (entry>1 and !noBatch_) {
+      batchSize_ = entries_[0].fullShape_[0] = 1;
+    }
   }
 }
 
@@ -141,20 +142,6 @@ void TritonData<IO>::computeSizes() {
   }
 }
 
-template <typename IO>
-void TritonData<IO>::TritonDataEntry::resetSizes() {
-  sizeShape_ = 0;
-  byteSizePerBatch_ = 0;
-}
-
-template <typename IO>
-void TritonData<IO>::resetSizes() {
-  for (unsigned i = 0; i < entries_.size(); ++i) {
-    entries_[i].resetSizes();
-  }
-  totalByteSize_ = 0;
-}
-
 //create a memory resource if none exists;
 //otherwise, reuse the memory resource, resizing it if necessary
 template <typename IO>
@@ -269,10 +256,16 @@ TritonOutput<DT> TritonOutputData::fromServer() const {
   const DT* r1 = reinterpret_cast<const DT*>(r0);
 
   TritonOutput<DT> dataOut;
-  dataOut.reserve(batchSize_);
-  for (unsigned i0 = 0; i0 < batchSize_; ++i0) {
-    auto offset = i0 * sizeShape_;
-    dataOut.emplace_back(r1 + offset, r1 + offset + sizeShape_);
+  dataOut.reserve(std::max(batchSize_, entries_.size()));
+  unsigned counter = 0;
+  for (unsigned i = 0; i < entries_.size(); ++i) {
+    auto& entry = entries_[i];
+
+    for (unsigned i0 = 0; i0 < batchSize_; ++i0) {
+      auto offset = counter * entry.sizeShape_;
+      dataOut.emplace_back(r1 + offset, r1 + offset + sizeShape_);
+      ++counter;
+    }
   }
 
   done_ = true;
@@ -283,23 +276,16 @@ template <>
 void TritonInputData::reset() {
   done_ = false;
   holder_.reset();
-  data_->Reset();
-  //reset shape
-  if (variableDims_) {
-    for (unsigned i = 0; i < shape_.size(); ++i) {
-      unsigned locFull = fullLoc(i);
-      fullShape_[locFull] = dims_[locFull];
-    }
-  }
-  resetSizes();
+  entries_.clear();
+  totalByteSize_ = 0;
 }
 
 template <>
 void TritonOutputData::reset() {
   done_ = false;
-  result_.reset();
   holder_.reset();
-  resetSizes();
+  entries_.clear();
+  totalByteSize_ = 0;
 }
 
 //explicit template instantiation declarations

From f570f3cb8efa310a66597ff22347f1e71febc649 Mon Sep 17 00:00:00 2001
From: Kevin Pedro <kpedro88@gmail.com>
Date: Mon, 18 Apr 2022 16:41:20 -0500
Subject: [PATCH 04/31] simplify synchronization of nEntries across
 inputs/outputs

---
 .../SonicTriton/interface/TritonClient.h      |  1 +
 .../SonicTriton/interface/TritonData.h        |  1 +
 .../SonicTriton/src/TritonClient.cc           | 37 +++++++++----------
 .../SonicTriton/src/TritonData.cc             | 24 ++++++------
 4 files changed, 33 insertions(+), 30 deletions(-)

diff --git a/HeterogeneousCore/SonicTriton/interface/TritonClient.h b/HeterogeneousCore/SonicTriton/interface/TritonClient.h
index e780bc4136116..50c4c5ee83642 100644
--- a/HeterogeneousCore/SonicTriton/interface/TritonClient.h
+++ b/HeterogeneousCore/SonicTriton/interface/TritonClient.h
@@ -81,6 +81,7 @@ class TritonClient : public SonicClient<TritonInputMap, TritonOutputMap> {
 
   //private accessors only used by data
   auto client() { return client_.get(); }
+  void addEntry(unsigned entry);
 };
 
 #endif
diff --git a/HeterogeneousCore/SonicTriton/interface/TritonData.h b/HeterogeneousCore/SonicTriton/interface/TritonData.h
index caef6f206d61c..d6e891c10eea2 100644
--- a/HeterogeneousCore/SonicTriton/interface/TritonData.h
+++ b/HeterogeneousCore/SonicTriton/interface/TritonData.h
@@ -152,6 +152,7 @@ class TritonData {
   }
   std::string xput() const;
   void addEntry(unsigned entry);
+  void addEntryImpl(unsigned entry);
 
   //members
   std::string name_;
diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc
index f355af41a17ea..d3cf1dca0d4eb 100644
--- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc
+++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc
@@ -189,6 +189,17 @@ bool TritonClient::setBatchSize(unsigned bsize) {
   }
 }
 
+void TritonClient::addEntry(unsigned entry) {
+  for (auto& element : input_) {
+    element.second.addEntryImpl(entry);
+  }
+  for (auto& element : output_) {
+    element.second.addEntryImpl(entry);
+  }
+  if (entry>1)
+    setBatchSize(1);
+}
+
 void TritonClient::reset() {
   for (auto& element : input_) {
     element.second.reset();
@@ -248,42 +259,30 @@ void TritonClient::evaluate() {
 
   //set up input pointers for triton (generalized for multi-request ragged batching case)
   //one vector<InferInput*> per request
-  std::vector<std::vector<triton::client::InferInput*>> inputsTriton;
   unsigned nEntries = input_.begin()->second.entries_.size();
-  inputsTriton.resize(nEntries);
+  std::vector<std::vector<triton::client::InferInput*>> inputsTriton(nEntries);
   for (auto& inputTriton : inputsTriton) {
     inputTriton.reserve(inputs_.size());
   }
-  //consistency check
-  //todo: move addEntry to a TritonClient function that auto loops over all inputs? & outputs?
-  auto success = handle_exception([&]() {
-    std::vector<unsigned> nEntriesAll;
-    nEntriesAll.reserve(input_.size());
-    for (auto& [iname, input] : input_) {
-      nEntriesAll.push_back(input.entries_.size());
-    }
-    if (std::adjacent_find(nEntriesAll.begin(), nEntriesAll.end(), std::not_equal_to<>()) != nEntriesAll.end())
-      throw cms::Exception("InconsistentInput") << "Different numbers of entries among different inputs: " << printColl(nEntriesAll);
-  });
-  if (!success)
-    return;
   for (auto& [iname, input] : input_) {
     for (unsigned i = 0; i < nEntries; ++i){
       inputsTriton[i].push_back(input.data(i));
     }
   }
 
-  //set up output pointers accordingly (same number of entries as input)
-  std::vector<std::vector<const triton::client::InferRequestedOutput*>> outputsTriton_;
+  //set up output pointers similarly
+  std::vector<std::vector<const triton::client::InferRequestedOutput*>> outputsTriton(nEntries);
+  for (auto& outputTriton : outputsTriton) {
+    outputTriton.reserve(outputs_.size());
+  }
   for (auto& [oname, output] : output_) {
-    output.addEntry(nEntries);
     for (unsigned i = 0; i < nEntries; ++i){
       outputsTriton[i].push_back(output.data(i));
     }
   }
 
   //set up shared memory for output
-  success = handle_exception([&]() {
+  auto success = handle_exception([&]() {
     for (auto& element : output_) {
       element.second.prepare();
     }
diff --git a/HeterogeneousCore/SonicTriton/src/TritonData.cc b/HeterogeneousCore/SonicTriton/src/TritonData.cc
index 89f2c4d7af9c8..e2d77d5b9543e 100644
--- a/HeterogeneousCore/SonicTriton/src/TritonData.cc
+++ b/HeterogeneousCore/SonicTriton/src/TritonData.cc
@@ -32,7 +32,7 @@ TritonData<IO>::TritonData(const std::string& name,
       byteSize_(tco::GetDataTypeByteSize(dtype_)),
       totalByteSize_(0) {
   //initialize first shape entry
-  addEntry(1);
+  addEntryImpl(1);
   //one-time computation of some shape info
   variableDims_ = anyNeg(entries_.front().shape_));
   productDims_ = variableDims_ ? -1 : dimProduct(entries_.front().shape_);
@@ -47,18 +47,16 @@ void TritonOutputData::checkShm() {
 
 template <typename IO>
 void TritonData<IO>::addEntry(unsigned entry) {
+  //ensures consistency among all inputs
+  client_->addEntry(entry);
+}
+
+template <typename IO>
+void TritonData<IO>::addEntryImpl(unsigned entry) {
   if (entry > entries_.size()) {
     entries_.reserve(entry+1);
     for (unsigned i = entries_.size(); i < entry+1; ++i) {
       entries_.emplace_back(dims_, noBatch_, name_, dname_);
-      //todo: should each entry have its own batch size?
-      //for now, restrict multi-request mode for ragged batching to batch size = 1
-      if (entry>1 and !noBatch_)
-        entries_.back().fullShape_[0] = 1;
-    }
-    //go back and fix the first one
-    if (entry>1 and !noBatch_) {
-      batchSize_ = entries_[0].fullShape_[0] = 1;
     }
   }
 }
@@ -121,8 +119,12 @@ template <typename IO>
 void TritonData<IO>::setBatchSize(unsigned bsize) {
   batchSize_ = bsize;
   if (!noBatch_) {
-    if (entries_[0].fullShape_.size()==1)
-      entries_[0].fullShape_ = batchSize_;
+    //should only be set to 1 in cases when entries > 1
+    if (batchSize_==1 or entries_.size()==1) {
+      for (auto& entry : entries_) {
+        entry.fullShape_[0] = batchSize_;
+      }
+    }
     else
       throw cms::Exception("TritonDataError") << "attempt to set batch size to " << bsize << " when ragged batching is in use";
   }

From 31db492650b5d888d67fff1616d3a3fc7e4f4e15 Mon Sep 17 00:00:00 2001
From: Kevin Pedro <kpedro88@gmail.com>
Date: Mon, 18 Apr 2022 17:39:19 -0500
Subject: [PATCH 05/31] fix various mistakes/typos

---
 .../SonicTriton/interface/TritonData.h        | 21 ++++++-----
 .../SonicTriton/src/TritonClient.cc           |  8 ++---
 .../SonicTriton/src/TritonData.cc             | 36 ++++++++++---------
 3 files changed, 36 insertions(+), 29 deletions(-)

diff --git a/HeterogeneousCore/SonicTriton/interface/TritonData.h b/HeterogeneousCore/SonicTriton/interface/TritonData.h
index d6e891c10eea2..2c3f7898b6aaa 100644
--- a/HeterogeneousCore/SonicTriton/interface/TritonData.h
+++ b/HeterogeneousCore/SonicTriton/interface/TritonData.h
@@ -93,7 +93,7 @@ class TritonData {
   class TritonDataEntry {
     public:
       //constructors
-      TritonDataEntry(const ShapeType& dims, bool noBatch, const std::string& name, const std::string& dname))
+      TritonDataEntry(const ShapeType& dims, bool noBatch, const std::string& name, const std::string& dname)
           : fullShape_(dims),
             shape_(fullShape_.begin() + (noBatch ? 0 : 1), fullShape_.end()),
             sizeShape_(0),
@@ -109,8 +109,8 @@ class TritonData {
       friend class TritonClient;
 
       //accessors
-      void createObject(IO** ioptr, const std::string& name, const std::string& dname));
-      void computeSizes(int64_t byteSize);
+      void createObject(IO** ioptr, const std::string& name, const std::string& dname);
+      void computeSizes(int64_t shapeSize, int64_t byteSize);
 
       //members
       ShapeType fullShape_;
@@ -125,8 +125,9 @@ class TritonData {
   void checkShm() {}
   unsigned fullLoc(unsigned loc) const { return loc + (noBatch_ ? 0 : 1); }
   void setBatchSize(unsigned bsize);
+  size_t getEntrySize() const { return std::max(static_cast<size_t>(batchSize_), entries_.size()); }
   void reset();
-  void setResult(Result* result, unsigned entry=0) { entries_[entry].result_ = std::make_shared<Result>(result); }
+  void setResult(Result* result, unsigned entry=0) { entries_[entry].result_ = std::shared_ptr<Result>(result); }
   IO* data(unsigned entry=0) { return entries_[entry].data_.get(); }
   void updateMem(size_t size);
   void computeSizes();
@@ -167,7 +168,7 @@ class TritonData {
   std::string dname_;
   inference::DataType dtype_;
   int64_t byteSize_;
-  std::vector<TritonDataEntry<IO>> entries_;
+  std::vector<TritonDataEntry> entries_;
   size_t totalByteSize_;
   //can be modified in otherwise-const fromServer() method in TritonMemResource::copyOutput():
   //TritonMemResource holds a non-const pointer to an instance of this class
@@ -185,6 +186,12 @@ using TritonOutputMap = std::unordered_map<std::string, TritonOutputData>;
 
 //avoid "explicit specialization after instantiation" error
 template <>
+void TritonInputData::TritonDataEntry::createObject(triton::client::InferInput** ioptr, const std::string& name, const std::string& dname);
+template <>
+void TritonOutputData::TritonDataEntry::createObject(triton::client::InferRequestedOutput** ioptr, const std::string& name, const std::string& dname);
+template <>
+void TritonOutputData::checkShm();
+template <>
 std::string TritonInputData::xput() const;
 template <>
 std::string TritonOutputData::xput() const;
@@ -203,10 +210,6 @@ template <>
 void TritonInputData::reset();
 template <>
 void TritonOutputData::reset();
-template <>
-void TritonInputData::createObject(triton::client::InferInput** ioptr);
-template <>
-void TritonOutputData::createObject(triton::client::InferRequestedOutput** ioptr);
 
 //explicit template instantiation declarations
 extern template class TritonData<triton::client::InferInput>;
diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc
index d3cf1dca0d4eb..263a77fb3d531 100644
--- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc
+++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc
@@ -40,8 +40,8 @@ TritonClient::TritonClient(const edm::ParameterSet& params, const std::string& d
     : SonicClient(params, debugName, "TritonClient"),
       verbose_(params.getUntrackedParameter<bool>("verbose")),
       useSharedMemory_(params.getUntrackedParameter<bool>("useSharedMemory")),
-      compressionAlgo_(getCompressionAlgo(params.getUntrackedParameter<std::string>("compression"))),
-      options_(1,params.getParameter<std::string>("modelName")) {
+      compressionAlgo_(getCompressionAlgo(params.getUntrackedParameter<std::string>("compression"))) {
+  options_.emplace_back(params.getParameter<std::string>("modelName"));
   //get appropriate server for this model
   edm::Service<TritonService> ts;
   const auto& server =
@@ -262,7 +262,7 @@ void TritonClient::evaluate() {
   unsigned nEntries = input_.begin()->second.entries_.size();
   std::vector<std::vector<triton::client::InferInput*>> inputsTriton(nEntries);
   for (auto& inputTriton : inputsTriton) {
-    inputTriton.reserve(inputs_.size());
+    inputTriton.reserve(input_.size());
   }
   for (auto& [iname, input] : input_) {
     for (unsigned i = 0; i < nEntries; ++i){
@@ -273,7 +273,7 @@ void TritonClient::evaluate() {
   //set up output pointers similarly
   std::vector<std::vector<const triton::client::InferRequestedOutput*>> outputsTriton(nEntries);
   for (auto& outputTriton : outputsTriton) {
-    outputTriton.reserve(outputs_.size());
+    outputTriton.reserve(output_.size());
   }
   for (auto& [oname, output] : output_) {
     for (unsigned i = 0; i < nEntries; ++i){
diff --git a/HeterogeneousCore/SonicTriton/src/TritonData.cc b/HeterogeneousCore/SonicTriton/src/TritonData.cc
index e2d77d5b9543e..dcbd0b942ee0c 100644
--- a/HeterogeneousCore/SonicTriton/src/TritonData.cc
+++ b/HeterogeneousCore/SonicTriton/src/TritonData.cc
@@ -34,7 +34,7 @@ TritonData<IO>::TritonData(const std::string& name,
   //initialize first shape entry
   addEntryImpl(1);
   //one-time computation of some shape info
-  variableDims_ = anyNeg(entries_.front().shape_));
+  variableDims_ = anyNeg(entries_.front().shape_);
   productDims_ = variableDims_ ? -1 : dimProduct(entries_.front().shape_);
   checkShm();
 }
@@ -67,7 +67,7 @@ void TritonInputData::TritonDataEntry::createObject(tc::InferInput** ioptr, cons
 }
 
 template <>
-void TritonOutputData::TritonDataEntry::createObject(tc::InferRequestedOutput** ioptr, const std::string& name, const std::string& dname)) {
+void TritonOutputData::TritonDataEntry::createObject(tc::InferRequestedOutput** ioptr, const std::string& name, const std::string& dname) {
   tc::InferRequestedOutput::Create(ioptr, name);
 }
 
@@ -108,7 +108,7 @@ void TritonData<IO>::setShape(unsigned loc, int64_t val, unsigned entry) {
 
   if (val != entries_[entry].fullShape_[locFull]) {
     if (dims_[locFull] == -1)
-      entries_[entry].fullShape_[entry][locFull] = val;
+      entries_[entry].fullShape_[locFull] = val;
     else
       throw cms::Exception("TritonDataError")
           << name_ << " setShape(): attempt to change value of non-variable shape dimension " << loc;
@@ -138,6 +138,7 @@ void TritonData<IO>::TritonDataEntry::computeSizes(int64_t shapeSize, int64_t by
 
 template <typename IO>
 void TritonData<IO>::computeSizes() {
+  totalByteSize_ = 0;
   for (unsigned i = 0; i < entries_.size(); ++i) {
     entries_[i].computeSizes(sizeShape(i), byteSize_);
     totalByteSize_ += entries_[i].byteSizePerBatch_ * batchSize_;
@@ -177,12 +178,15 @@ void TritonData<IO>::updateMem(size_t size) {
 template <>
 template <typename DT>
 TritonInputContainer<DT> TritonInputData::allocate(bool reserve) {
-  //automatically creates a vector for each batch entry (if batch size known)
-  auto ptr = std::make_shared<TritonInput<DT>>(batchSize_);
-  if (reserve and !anyNeg(shape_)) {
+  //automatically creates a vector for each entry (if batch size or entry size known)
+  auto ptr = std::make_shared<TritonInput<DT>>(getEntrySize());
+  if (reserve) {
     computeSizes();
-    for (auto& vec : *ptr) {
-      vec.reserve(sizeShape_);
+    for (auto& entry : entries_){
+      if (anyNeg(entry.shape_)) continue;
+      for (auto& vec : *ptr) {
+        vec.reserve(entry.sizeShape_);
+      }
     }
   }
   return ptr;
@@ -198,7 +202,7 @@ void TritonInputData::toServer(TritonInputContainer<DT> ptr) {
   const auto& data_in = *ptr;
 
   //check batch size
-  if (entries_.size()==1 and data_in.size() != batchSize_)) {
+  if (entries_.size()==1 and data_in.size() != batchSize_) {
     throw cms::Exception("TritonDataError") << name_ << " toServer(): input vector has size " << data_in.size()
                                             << " but specified batch size is " << batchSize_;
   }
@@ -247,10 +251,6 @@ TritonOutput<DT> TritonOutputData::fromServer() const {
   if (done_)
     throw cms::Exception("TritonDataError") << name_ << " fromServer() was already called for this event";
 
-  if (!result_) {
-    throw cms::Exception("TritonDataError") << name_ << " fromServer(): missing result";
-  }
-
   //check type
   checkType<DT>();
 
@@ -258,14 +258,18 @@ TritonOutput<DT> TritonOutputData::fromServer() const {
   const DT* r1 = reinterpret_cast<const DT*>(r0);
 
   TritonOutput<DT> dataOut;
-  dataOut.reserve(std::max(batchSize_, entries_.size()));
+  dataOut.reserve(getEntrySize());
   unsigned counter = 0;
   for (unsigned i = 0; i < entries_.size(); ++i) {
-    auto& entry = entries_[i];
+    const auto& entry = entries_[i];
+
+    if (!entry.result_) {
+      throw cms::Exception("TritonDataError") << name_ << " fromServer(): missing result";
+    }
 
     for (unsigned i0 = 0; i0 < batchSize_; ++i0) {
       auto offset = counter * entry.sizeShape_;
-      dataOut.emplace_back(r1 + offset, r1 + offset + sizeShape_);
+      dataOut.emplace_back(r1 + offset, r1 + offset + entry.sizeShape_);
       ++counter;
     }
   }

From 899836db2321fd6adf571404c728be072ca523a4 Mon Sep 17 00:00:00 2001
From: Kevin Pedro <kpedro88@gmail.com>
Date: Mon, 18 Apr 2022 18:56:06 -0500
Subject: [PATCH 06/31] propagate to mem resources

---
 .../SonicTriton/interface/TritonData.h        | 22 +++++----
 .../SonicTriton/interface/TritonMemResource.h | 28 +++++------
 .../SonicTriton/src/TritonData.cc             | 32 +++++--------
 .../SonicTriton/src/TritonMemResource.cc      | 48 +++++++++++--------
 4 files changed, 69 insertions(+), 61 deletions(-)

diff --git a/HeterogeneousCore/SonicTriton/interface/TritonData.h b/HeterogeneousCore/SonicTriton/interface/TritonData.h
index 2c3f7898b6aaa..a077ca4f7ef0e 100644
--- a/HeterogeneousCore/SonicTriton/interface/TritonData.h
+++ b/HeterogeneousCore/SonicTriton/interface/TritonData.h
@@ -97,7 +97,10 @@ class TritonData {
           : fullShape_(dims),
             shape_(fullShape_.begin() + (noBatch ? 0 : 1), fullShape_.end()),
             sizeShape_(0),
-            byteSizePerBatch_(0) {
+            byteSizePerBatch_(0),
+            totalByteSize_(0),
+            offset_(0),
+            output_(nullptr) {
         //create input or output object
         IO* iotmp;
         createObject(&iotmp, name, dname);
@@ -107,18 +110,25 @@ class TritonData {
     private:
       friend class TritonData<IO>;
       friend class TritonClient;
+      friend class TritonMemResource<IO>;
+      friend class TritonHeapResource<IO>;
+      friend class TritonCpuShmResource<IO>;
+#ifdef TRITON_ENABLE_GPU
+      friend class TritonGpuShmResource<IO>;
+#endif
 
       //accessors
       void createObject(IO** ioptr, const std::string& name, const std::string& dname);
-      void computeSizes(int64_t shapeSize, int64_t byteSize);
+      void computeSizes(int64_t shapeSize, int64_t byteSize, int64_t batchSize);
 
       //members
       ShapeType fullShape_;
       ShapeView shape_;
-      size_t sizeShape_;
-      size_t byteSizePerBatch_;
+      size_t sizeShape_, byteSizePerBatch_, totalByteSize_;
       std::shared_ptr<IO> data_;
       std::shared_ptr<Result> result_;
+      unsigned offset_;
+      const uint8_t* output_;
   };
 
   //private accessors only used internally or by client
@@ -206,10 +216,6 @@ void TritonOutputData::prepare();
 template <>
 template <typename DT>
 TritonOutput<DT> TritonOutputData::fromServer() const;
-template <>
-void TritonInputData::reset();
-template <>
-void TritonOutputData::reset();
 
 //explicit template instantiation declarations
 extern template class TritonData<triton::client::InferInput>;
diff --git a/HeterogeneousCore/SonicTriton/interface/TritonMemResource.h b/HeterogeneousCore/SonicTriton/interface/TritonMemResource.h
index f6cf37a029fb5..830a543360301 100644
--- a/HeterogeneousCore/SonicTriton/interface/TritonMemResource.h
+++ b/HeterogeneousCore/SonicTriton/interface/TritonMemResource.h
@@ -20,9 +20,9 @@ class TritonMemResource {
   size_t size() const { return size_; }
   virtual void close() {}
   //used for input
-  virtual void copyInput(const void* values, size_t offset) {}
+  virtual void copyInput(const void* values, size_t offset, unsigned entry) {}
   //used for output
-  virtual const uint8_t* copyOutput() { return nullptr; }
+  virtual void copyOutput() {}
   virtual void set();
 
 protected:
@@ -39,8 +39,8 @@ class TritonHeapResource : public TritonMemResource<IO> {
 public:
   TritonHeapResource(TritonData<IO>* data, const std::string& name, size_t size);
   ~TritonHeapResource() override {}
-  void copyInput(const void* values, size_t offset) override {}
-  const uint8_t* copyOutput() override { return nullptr; }
+  void copyInput(const void* values, size_t offset, unsigned entry) override {}
+  void copyOutput() override {}
   void set() override {}
 };
 
@@ -50,8 +50,8 @@ class TritonCpuShmResource : public TritonMemResource<IO> {
   TritonCpuShmResource(TritonData<IO>* data, const std::string& name, size_t size);
   ~TritonCpuShmResource() override;
   void close() override;
-  void copyInput(const void* values, size_t offset) override {}
-  const uint8_t* copyOutput() override { return nullptr; }
+  void copyInput(const void* values, size_t offset, unsigned entry) override {}
+  void copyOutput() override {}
 };
 
 using TritonInputHeapResource = TritonHeapResource<triton::client::InferInput>;
@@ -61,13 +61,13 @@ using TritonOutputCpuShmResource = TritonCpuShmResource<triton::client::InferReq
 
 //avoid "explicit specialization after instantiation" error
 template <>
-void TritonInputHeapResource::copyInput(const void* values, size_t offset);
+void TritonInputHeapResource::copyInput(const void* values, size_t offset, unsigned entry);
 template <>
-void TritonInputCpuShmResource::copyInput(const void* values, size_t offset);
+void TritonInputCpuShmResource::copyInput(const void* values, size_t offset, unsigned entry);
 template <>
-const uint8_t* TritonOutputHeapResource::copyOutput();
+void TritonOutputHeapResource::copyOutput();
 template <>
-const uint8_t* TritonOutputCpuShmResource::copyOutput();
+void TritonOutputCpuShmResource::copyOutput();
 
 #ifdef TRITON_ENABLE_GPU
 #include "cuda_runtime_api.h"
@@ -78,8 +78,8 @@ class TritonGpuShmResource : public TritonMemResource<IO> {
   TritonGpuShmResource(TritonData<IO>* data, const std::string& name, size_t size);
   ~TritonGpuShmResource() override;
   void close() override;
-  void copyInput(const void* values, size_t offset) override {}
-  const uint8_t* copyOutput() override { return nullptr; }
+  void copyInput(const void* values, size_t offset, unsigned entry) override {}
+  void copyOutput() override {}
 
 protected:
   int deviceId_;
@@ -91,9 +91,9 @@ using TritonOutputGpuShmResource = TritonGpuShmResource<triton::client::InferReq
 
 //avoid "explicit specialization after instantiation" error
 template <>
-void TritonInputGpuShmResource::copyInput(const void* values, size_t offset);
+void TritonInputGpuShmResource::copyInput(const void* values, size_t offset, unsigned entry);
 template <>
-const uint8_t* TritonOutputGpuShmResource::copyOutput();
+void TritonOutputGpuShmResource::copyOutput();
 #endif
 
 #endif
diff --git a/HeterogeneousCore/SonicTriton/src/TritonData.cc b/HeterogeneousCore/SonicTriton/src/TritonData.cc
index dcbd0b942ee0c..0b76f161b5a81 100644
--- a/HeterogeneousCore/SonicTriton/src/TritonData.cc
+++ b/HeterogeneousCore/SonicTriton/src/TritonData.cc
@@ -131,17 +131,19 @@ void TritonData<IO>::setBatchSize(unsigned bsize) {
 }
 
 template <typename IO>
-void TritonData<IO>::TritonDataEntry::computeSizes(int64_t shapeSize, int64_t byteSize) {
+void TritonData<IO>::TritonDataEntry::computeSizes(int64_t shapeSize, int64_t byteSize, int64_t batchSize) {
   sizeShape_ = shapeSize;
   byteSizePerBatch_ = byteSize * sizeShape_;
+  totalByteSize_ = byteSizePerBatch_ * batchSize;
 }
 
 template <typename IO>
 void TritonData<IO>::computeSizes() {
   totalByteSize_ = 0;
   for (unsigned i = 0; i < entries_.size(); ++i) {
-    entries_[i].computeSizes(sizeShape(i), byteSize_);
-    totalByteSize_ += entries_[i].byteSizePerBatch_ * batchSize_;
+    entries_[i].computeSizes(sizeShape(i), byteSize_, batchSize_);
+    entries_[i].offset_ = totalByteSize_;
+    totalByteSize_ += entries_[i].totalByteSize_;
   }
 }
 
@@ -217,6 +219,7 @@ void TritonInputData::toServer(TritonInputContainer<DT> ptr) {
   computeSizes();
   updateMem(totalByteSize_);
 
+  unsigned offset = 0;
   unsigned counter = 0;
   for (unsigned i = 0; i < entries_.size(); ++i) {
     auto& entry = entries_[i];
@@ -225,7 +228,8 @@ void TritonInputData::toServer(TritonInputContainer<DT> ptr) {
     entry.data_->SetShape(entry.fullShape_);
 
     for (unsigned i0 = 0; i0 < batchSize_; ++i0) {
-      memResource_->copyInput(data_in[counter].data(), counter * entry.byteSizePerBatch_);
+      memResource_->copyInput(data_in[counter].data(), offset, i);
+      offset += i0 * entry.byteSizePerBatch_;
       ++counter;
     }
   }
@@ -254,23 +258,21 @@ TritonOutput<DT> TritonOutputData::fromServer() const {
   //check type
   checkType<DT>();
 
-  const uint8_t* r0 = memResource_->copyOutput();
-  const DT* r1 = reinterpret_cast<const DT*>(r0);
+  memResource_->copyOutput();
 
   TritonOutput<DT> dataOut;
   dataOut.reserve(getEntrySize());
-  unsigned counter = 0;
   for (unsigned i = 0; i < entries_.size(); ++i) {
     const auto& entry = entries_[i];
+    const DT* r1 = reinterpret_cast<const DT*>(entry.output_);
 
     if (!entry.result_) {
       throw cms::Exception("TritonDataError") << name_ << " fromServer(): missing result";
     }
 
     for (unsigned i0 = 0; i0 < batchSize_; ++i0) {
-      auto offset = counter * entry.sizeShape_;
+      auto offset = i0 * entry.sizeShape_;
       dataOut.emplace_back(r1 + offset, r1 + offset + entry.sizeShape_);
-      ++counter;
     }
   }
 
@@ -278,16 +280,8 @@ TritonOutput<DT> TritonOutputData::fromServer() const {
   return dataOut;
 }
 
-template <>
-void TritonInputData::reset() {
-  done_ = false;
-  holder_.reset();
-  entries_.clear();
-  totalByteSize_ = 0;
-}
-
-template <>
-void TritonOutputData::reset() {
+template <typename IO>
+void TritonData<IO>::reset() {
   done_ = false;
   holder_.reset();
   entries_.clear();
diff --git a/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc b/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc
index 29a42cc2edf88..ea5d63cea072d 100644
--- a/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc
+++ b/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc
@@ -17,8 +17,10 @@ TritonMemResource<IO>::TritonMemResource(TritonData<IO>* data, const std::string
 
 template <typename IO>
 void TritonMemResource<IO>::set() {
-  TRITON_THROW_IF_ERROR(data_->data_->SetSharedMemory(name_, data_->totalByteSize_, 0),
-                        "unable to set shared memory (" + name_ + ")");
+  for (auto& entry : data_->entries_){
+    TRITON_THROW_IF_ERROR(entry.data_->SetSharedMemory(name_, entry.totalByteSize_, entry.offset_),
+                          "unable to set shared memory (" + name_ + ")");
+  }
 }
 
 template <typename IO>
@@ -26,23 +28,25 @@ TritonHeapResource<IO>::TritonHeapResource(TritonData<IO>* data, const std::stri
     : TritonMemResource<IO>(data, name, size) {}
 
 template <>
-void TritonInputHeapResource::copyInput(const void* values, size_t offset) {
-  TRITON_THROW_IF_ERROR(data_->data_->AppendRaw(reinterpret_cast<const uint8_t*>(values), data_->byteSizePerBatch_),
+void TritonInputHeapResource::copyInput(const void* values, size_t offset, unsigned entry) {
+  TRITON_THROW_IF_ERROR(data_->entries_[entry].data_->AppendRaw(reinterpret_cast<const uint8_t*>(values), data_->entries_[entry].byteSizePerBatch_),
                         data_->name_ + " toServer(): unable to set data for batch entry " +
-                            (data_->byteSizePerBatch_ ? std::to_string(offset / data_->byteSizePerBatch_) : ""));
+                            (data_->entries_.size() > 1 ? std::to_string(entry) : data_->entries_[entry].byteSizePerBatch_ ? std::to_string(offset / data_->entries_[entry].byteSizePerBatch_) : ""));
 }
 
 template <>
-const uint8_t* TritonOutputHeapResource::copyOutput() {
-  size_t contentByteSize;
-  const uint8_t* values;
-  TRITON_THROW_IF_ERROR(data_->result_->RawData(data_->name_, &values, &contentByteSize),
-                        data_->name_ + " fromServer(): unable to get raw");
+void TritonOutputHeapResource::copyOutput() {
+  size_t contentByteSize = 0;
+  for (auto& entry : data_->entries_) {
+    size_t contentByteSizeEntry;
+    TRITON_THROW_IF_ERROR(entry.result_->RawData(data_->name_, &entry.output_, &contentByteSizeEntry),
+                          data_->name_ + " fromServer(): unable to get raw");
+    contentByteSize += contentByteSizeEntry;
+  }
   if (contentByteSize != data_->totalByteSize_) {
     throw cms::Exception("TritonDataError") << data_->name_ << " fromServer(): unexpected content byte size "
                                             << contentByteSize << " (expected " << data_->totalByteSize_ << ")";
   }
-  return values;
 }
 
 //shared memory helpers based on:
@@ -108,14 +112,16 @@ void TritonCpuShmResource<IO>::close() {
 }
 
 template <>
-void TritonInputCpuShmResource::copyInput(const void* values, size_t offset) {
+void TritonInputCpuShmResource::copyInput(const void* values, size_t offset, unsigned entry) {
   if (size_ > 0)
-    std::memcpy(addr_ + offset, values, data_->byteSizePerBatch_);
+    std::memcpy(addr_ + offset, values, data_->entries_[entry].byteSizePerBatch_);
 }
 
 template <>
-const uint8_t* TritonOutputCpuShmResource::copyOutput() {
-  return addr_;
+void TritonOutputCpuShmResource::copyOutput() {
+  for (auto& entry : data_->entries_) {
+    entry.output_ = addr_ + entry.offset_;
+  }
 }
 
 template class TritonHeapResource<tc::InferInput>;
@@ -151,21 +157,23 @@ void TritonGpuShmResource<IO>::close() {
 }
 
 template <>
-void TritonInputGpuShmResource::copyInput(const void* values, size_t offset) {
+void TritonInputGpuShmResource::copyInput(const void* values, size_t offset, unsigned entry) {
   cudaCheck(
-      cudaMemcpy(addr_ + offset, values, data_->byteSizePerBatch_, cudaMemcpyHostToDevice),
-      data_->name_ + " toServer(): unable to memcpy " + std::to_string(data_->byteSizePerBatch_) + " bytes to GPU");
+      cudaMemcpy(addr_ + offset, values, data_->entries_[entry].byteSizePerBatch_, cudaMemcpyHostToDevice),
+      data_->name_ + " toServer(): unable to memcpy " + std::to_string(data_->entries_[entry].byteSizePerBatch_) + " bytes to GPU");
 }
 
 template <>
-const uint8_t* TritonOutputGpuShmResource::copyOutput() {
+void TritonOutputGpuShmResource::copyOutput() {
   //copy back from gpu, keep in scope
   auto ptr = std::make_shared<std::vector<uint8_t>>(data_->totalByteSize_);
   cudaCheck(
       cudaMemcpy(ptr->data(), addr_, data_->totalByteSize_, cudaMemcpyDeviceToHost),
       data_->name_ + " fromServer(): unable to memcpy " + std::to_string(data_->totalByteSize_) + " bytes from GPU");
   data_->holder_ = ptr;
-  return ptr->data();
+  for (auto& entry : data_->entries_) {
+    entry.output_ = ptr->data() + entry.offset_;
+  }
 }
 
 template class TritonGpuShmResource<tc::InferInput>;

From 8321b4fb2375edc0c6ec65be99c4e52905d4964c Mon Sep 17 00:00:00 2001
From: Kevin Pedro <kpedro88@gmail.com>
Date: Tue, 19 Apr 2022 13:28:04 -0500
Subject: [PATCH 07/31] fix off-by-one issues; unit tests now pass

---
 HeterogeneousCore/SonicTriton/src/TritonData.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/HeterogeneousCore/SonicTriton/src/TritonData.cc b/HeterogeneousCore/SonicTriton/src/TritonData.cc
index 0b76f161b5a81..93d1e015019e8 100644
--- a/HeterogeneousCore/SonicTriton/src/TritonData.cc
+++ b/HeterogeneousCore/SonicTriton/src/TritonData.cc
@@ -32,7 +32,7 @@ TritonData<IO>::TritonData(const std::string& name,
       byteSize_(tco::GetDataTypeByteSize(dtype_)),
       totalByteSize_(0) {
   //initialize first shape entry
-  addEntryImpl(1);
+  addEntryImpl(0);
   //one-time computation of some shape info
   variableDims_ = anyNeg(entries_.front().shape_);
   productDims_ = variableDims_ ? -1 : dimProduct(entries_.front().shape_);
@@ -53,7 +53,7 @@ void TritonData<IO>::addEntry(unsigned entry) {
 
 template <typename IO>
 void TritonData<IO>::addEntryImpl(unsigned entry) {
-  if (entry > entries_.size()) {
+  if (entry >= entries_.size()) {
     entries_.reserve(entry+1);
     for (unsigned i = entries_.size(); i < entry+1; ++i) {
       entries_.emplace_back(dims_, noBatch_, name_, dname_);

From c89b4b3e1ff433beec65371f9285a6fa143b76a7 Mon Sep 17 00:00:00 2001
From: Kevin Pedro <kpedro88@gmail.com>
Date: Tue, 19 Apr 2022 16:11:25 -0500
Subject: [PATCH 08/31] some fixes for compatibility checks

---
 HeterogeneousCore/SonicTriton/scripts/cmsTriton | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/HeterogeneousCore/SonicTriton/scripts/cmsTriton b/HeterogeneousCore/SonicTriton/scripts/cmsTriton
index 4ea4b7cd7f64f..271cacc2a46ad 100755
--- a/HeterogeneousCore/SonicTriton/scripts/cmsTriton
+++ b/HeterogeneousCore/SonicTriton/scripts/cmsTriton
@@ -423,7 +423,10 @@ driver_docker(){
 }
 
 driver_apptainer(){
-	source ${SANDBOX}/.singularity.d/env/10-docker2singularity.sh && echo $CUDA_DRIVER_VERSION
+	D2S=${SANDBOX}/.singularity.d/env/10-docker2singularity.sh
+	if [ -f "$D2S" ]; then
+		source $D2S && echo $CUDA_DRIVER_VERSION
+	fi
 }
 
 compat_docker(){
@@ -475,8 +478,12 @@ check_drivers(){
 		if [ "$PWD" == "$TMPDIR" ]; then
 			$DRYRUN $COMPAT_FN
 			COMPAT_SCRIPT_BASE=$(basename $COMPAT_SCRIPT)
-			$DRYRUN sed -i 's/LD_LIBRARY_PATH="${_CUDA_COMPAT_REALLIB}"/&; LD_PRELOAD=""/' $COMPAT_SCRIPT_BASE
-			COMPAT_SCRIPT_MOUNT="$COMPAT_SCRIPT_BASE:$COMPAT_SCRIPT"
+			# newer containers already include this fix, no need to do it twice
+			if ! grep -Fq 'LD_PRELOAD=""' $COMPAT_SCRIPT_BASE; then
+				$DRYRUN sed -i 's/LD_LIBRARY_PATH="${_CUDA_COMPAT_REALLIB}"/&; LD_PRELOAD=""/' $COMPAT_SCRIPT_BASE
+				COMPAT_SCRIPT_PATH=$(readlink -f $TMPDIR/$COMPAT_SCRIPT_BASE)
+				COMPAT_SCRIPT_MOUNT="$COMPAT_SCRIPT_PATH:$COMPAT_SCRIPT"
+			fi
 		fi
 		return 0
 	else

From 5e20b341e1c2089d962cd443b401290d3d82f071 Mon Sep 17 00:00:00 2001
From: Kevin Pedro <kpedro88@gmail.com>
Date: Tue, 19 Apr 2022 16:21:03 -0500
Subject: [PATCH 09/31] update server image to newest release

---
 HeterogeneousCore/SonicTriton/scripts/cmsTriton | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/HeterogeneousCore/SonicTriton/scripts/cmsTriton b/HeterogeneousCore/SonicTriton/scripts/cmsTriton
index 271cacc2a46ad..c218f271ac759 100755
--- a/HeterogeneousCore/SonicTriton/scripts/cmsTriton
+++ b/HeterogeneousCore/SonicTriton/scripts/cmsTriton
@@ -18,7 +18,7 @@ PARENTPID=""
 BASEPORT=8000
 AUTOPORT=""
 NPORTS=3
-IMAGE=fastml/triton-torchgeo:21.06-py3-geometric
+IMAGE=fastml/triton-torchgeo:22.03-py3-geometric
 SANDBOX=""
 COMPAT_USR=""
 

From 71b4f1e0c929d2dec659381f71cab3815c112abd Mon Sep 17 00:00:00 2001
From: Kevin Pedro <kpedro88@gmail.com>
Date: Tue, 19 Apr 2022 17:33:50 -0500
Subject: [PATCH 10/31] add a test for ragged inputs

---
 .../test/TritonIdentityProducer.cc            | 66 +++++++++++++++++++
 .../SonicTriton/test/tritonTest_cfg.py        |  1 +
 2 files changed, 67 insertions(+)
 create mode 100644 HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc

diff --git a/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc b/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc
new file mode 100644
index 0000000000000..02e4fd9a077ba
--- /dev/null
+++ b/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc
@@ -0,0 +1,66 @@
+#include "HeterogeneousCore/SonicTriton/interface/TritonEDProducer.h"
+
+#include "FWCore/ParameterSet/interface/FileInPath.h"
+#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
+#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
+#include "FWCore/Framework/interface/MakerMacros.h"
+
+#include <sstream>
+#include <string>
+#include <vector>
+#include <map>
+#include <cmath>
+
+class TritonIdentityProducer : public TritonEDProducer<> {
+public:
+  explicit TritonIdentityProducer(edm::ParameterSet const& cfg)
+      : TritonEDProducer<>(cfg),
+        batchSize_(4) { }
+  void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, Input& iInput) override {
+    //follow Triton QA tests for ragged input
+    std::vector<std::vector<float>> value_lists{
+      {2,2},
+      {4,4,4,4},
+      {1},
+      {3,3,3}
+    };
+
+    client_->setBatchSize(batchSize_);
+    auto& input1 = iInput.at("RAGGED_INPUT");
+    auto data1 = input1.allocate<float>();
+    for (int i = 0; i < batchSize_; ++i) {
+      (*data1)[i] = value_lists[i];
+      input1.setShape(0, (*data1)[i].size(), i);
+    }
+
+    // convert to server format
+    input1.toServer(data1);
+  }
+  void produce(edm::Event& iEvent, edm::EventSetup const& iSetup, Output const& iOutput) override {
+    // check the results
+    const auto& output1 = iOutput.at("RAGGED_OUTPUT");
+    // convert from server format
+    const auto& tmp = output1.fromServer<float>();
+    edm::LogInfo msg(debugName_);
+    for (int i = 0; i < batchSize_; ++i){
+        msg << "output " << i << ": ";
+        for(int j = 0; j < output1.shape()[0]; ++j){
+            msg << tmp[i][j] << " ";
+        }
+        msg << "\n";
+    }
+  }
+  ~TritonIdentityProducer() override = default;
+
+  static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
+    edm::ParameterSetDescription desc;
+    TritonClient::fillPSetDescription(desc);
+    //to ensure distinct cfi names
+    descriptions.addWithDefaultLabel(desc);
+  }
+
+private:
+  int batchSize_;
+};
+
+DEFINE_FWK_MODULE(TritonIdentityProducer);
diff --git a/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py b/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py
index 2829a5e2db743..765a06d8cfd27 100644
--- a/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py
+++ b/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py
@@ -8,6 +8,7 @@
     "TritonGraphProducer": ["gat_test"],
     "TritonGraphFilter": ["gat_test"],
     "TritonGraphAnalyzer": ["gat_test"],
+    "TritonIdentityProducer": ["ragged_acc_shape"],
 }
 
 # other choices

From 7f00e86fe2971472defee20f789e5d7cb04aa134 Mon Sep 17 00:00:00 2001
From: Kevin Pedro <kpedro88@gmail.com>
Date: Tue, 19 Apr 2022 17:34:00 -0500
Subject: [PATCH 11/31] fix bug revealed by test

---
 HeterogeneousCore/SonicTriton/src/TritonData.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/HeterogeneousCore/SonicTriton/src/TritonData.cc b/HeterogeneousCore/SonicTriton/src/TritonData.cc
index 93d1e015019e8..3382468b52955 100644
--- a/HeterogeneousCore/SonicTriton/src/TritonData.cc
+++ b/HeterogeneousCore/SonicTriton/src/TritonData.cc
@@ -286,6 +286,8 @@ void TritonData<IO>::reset() {
   holder_.reset();
   entries_.clear();
   totalByteSize_ = 0;
+  //re-initialize first shape entry
+  addEntryImpl(0);
 }
 
 //explicit template instantiation declarations

From 111d24857538703139373da58de2fa6d1e14b6df Mon Sep 17 00:00:00 2001
From: Kevin Pedro <kpedro88@gmail.com>
Date: Wed, 20 Apr 2022 14:46:57 -0500
Subject: [PATCH 12/31] fix off-by-one

---
 HeterogeneousCore/SonicTriton/src/TritonClient.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc
index 263a77fb3d531..38ca800bfd4eb 100644
--- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc
+++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc
@@ -196,7 +196,7 @@ void TritonClient::addEntry(unsigned entry) {
   for (auto& element : output_) {
     element.second.addEntryImpl(entry);
   }
-  if (entry>1)
+  if (entry>0)
     setBatchSize(1);
 }
 

From 36a7ae9f3d3c13b5e0a5ff0e3baefbde58091acb Mon Sep 17 00:00:00 2001
From: Kevin Pedro <kpedro88@gmail.com>
Date: Wed, 20 Apr 2022 14:47:26 -0500
Subject: [PATCH 13/31] use simpler example, fix output printing

---
 .../SonicTriton/test/TritonIdentityProducer.cc            | 8 ++++----
 HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py      | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc b/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc
index 02e4fd9a077ba..177dd9bdb48b1 100644
--- a/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc
+++ b/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc
@@ -26,7 +26,7 @@ class TritonIdentityProducer : public TritonEDProducer<> {
     };
 
     client_->setBatchSize(batchSize_);
-    auto& input1 = iInput.at("RAGGED_INPUT");
+    auto& input1 = iInput.at("INPUT0");
     auto data1 = input1.allocate<float>();
     for (int i = 0; i < batchSize_; ++i) {
       (*data1)[i] = value_lists[i];
@@ -38,13 +38,13 @@ class TritonIdentityProducer : public TritonEDProducer<> {
   }
   void produce(edm::Event& iEvent, edm::EventSetup const& iSetup, Output const& iOutput) override {
     // check the results
-    const auto& output1 = iOutput.at("RAGGED_OUTPUT");
+    const auto& output1 = iOutput.at("OUTPUT0");
     // convert from server format
     const auto& tmp = output1.fromServer<float>();
     edm::LogInfo msg(debugName_);
     for (int i = 0; i < batchSize_; ++i){
-        msg << "output " << i << ": ";
-        for(int j = 0; j < output1.shape()[0]; ++j){
+        msg << "output " << i << " (" << triton_utils::printColl(output1.shape(i)) << "): ";
+        for(int j = 0; j < output1.shape(i)[0]; ++j){
             msg << tmp[i][j] << " ";
         }
         msg << "\n";
diff --git a/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py b/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py
index 765a06d8cfd27..1773e252cfae2 100644
--- a/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py
+++ b/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py
@@ -8,7 +8,7 @@
     "TritonGraphProducer": ["gat_test"],
     "TritonGraphFilter": ["gat_test"],
     "TritonGraphAnalyzer": ["gat_test"],
-    "TritonIdentityProducer": ["ragged_acc_shape"],
+    "TritonIdentityProducer": ["ragged_io"],
 }
 
 # other choices

From 0125089482f78a999dc0bcebbb234dcb2eea67be Mon Sep 17 00:00:00 2001
From: Kevin Pedro <kpedro88@gmail.com>
Date: Wed, 20 Apr 2022 17:41:00 -0500
Subject: [PATCH 14/31] simplify

---
 HeterogeneousCore/SonicTriton/interface/TritonData.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/HeterogeneousCore/SonicTriton/interface/TritonData.h b/HeterogeneousCore/SonicTriton/interface/TritonData.h
index a077ca4f7ef0e..90acf3084e4bf 100644
--- a/HeterogeneousCore/SonicTriton/interface/TritonData.h
+++ b/HeterogeneousCore/SonicTriton/interface/TritonData.h
@@ -137,7 +137,7 @@ class TritonData {
   void setBatchSize(unsigned bsize);
   size_t getEntrySize() const { return std::max(static_cast<size_t>(batchSize_), entries_.size()); }
   void reset();
-  void setResult(Result* result, unsigned entry=0) { entries_[entry].result_ = std::shared_ptr<Result>(result); }
+  void setResult(Result* result, unsigned entry=0) { entries_[entry].result_.reset(result); }
   IO* data(unsigned entry=0) { return entries_[entry].data_.get(); }
   void updateMem(size_t size);
   void computeSizes();

From e5f84dc08e87980969558bd5ea76e0887470eeb8 Mon Sep 17 00:00:00 2001
From: Kevin Pedro <kpedro88@gmail.com>
Date: Wed, 20 Apr 2022 18:10:09 -0500
Subject: [PATCH 15/31] fix offset error

---
 HeterogeneousCore/SonicTriton/src/TritonData.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/HeterogeneousCore/SonicTriton/src/TritonData.cc b/HeterogeneousCore/SonicTriton/src/TritonData.cc
index 3382468b52955..72caac78befb9 100644
--- a/HeterogeneousCore/SonicTriton/src/TritonData.cc
+++ b/HeterogeneousCore/SonicTriton/src/TritonData.cc
@@ -229,7 +229,7 @@ void TritonInputData::toServer(TritonInputContainer<DT> ptr) {
 
     for (unsigned i0 = 0; i0 < batchSize_; ++i0) {
       memResource_->copyInput(data_in[counter].data(), offset, i);
-      offset += i0 * entry.byteSizePerBatch_;
+      offset += entry.byteSizePerBatch_;
       ++counter;
     }
   }

From 0ee76bbc466c11aa63c130f9d8e8bb0db754c775 Mon Sep 17 00:00:00 2001
From: Kevin Pedro <kpedro88@gmail.com>
Date: Thu, 21 Apr 2022 09:55:10 -0500
Subject: [PATCH 16/31] update test docs, fix model fetching

---
 HeterogeneousCore/SonicTriton/test/README.md      | 9 +++++++--
 HeterogeneousCore/SonicTriton/test/fetch_model.sh | 2 +-
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/HeterogeneousCore/SonicTriton/test/README.md b/HeterogeneousCore/SonicTriton/test/README.md
index 6d48159e512dc..2249a3ebd6ac1 100644
--- a/HeterogeneousCore/SonicTriton/test/README.md
+++ b/HeterogeneousCore/SonicTriton/test/README.md
@@ -1,7 +1,7 @@
 # SONIC TritonClient tests
 
-Test modules `TritonImageProducer` and `TritonGraphProducer` (`TritonGraphFilter`, `TritonGraphAnalyzer`) are available.
-They generate arbitrary inputs for inference (with Inception/DenseNet or Graph Attention Network, respectively) and print the resulting output.
+Test modules `TritonImageProducer`, `TritonIdentityProducer`, and `TritonGraphProducer` (`TritonGraphFilter`, `TritonGraphAnalyzer`) are available.
+They generate arbitrary inputs for inference (with Inception/DenseNet, a simple identity model that allows ragged batching, or Graph Attention Network, respectively) and print the resulting output.
 
 First, the relevant data for the image classification networks should be downloaded:
 ```
@@ -19,6 +19,11 @@ Run the image test:
 cmsRun tritonTest_cfg.py maxEvents=1 modules=TritonImageProducer,TritonImageProducer models=inception_graphdef,densenet_onnx
 ```
 
+Run the identity test with ragged batching:
+```
+cmsRun tritonTest_cfg.py maxEvents=1 modules=TritonIdentityProducer models=ragged_io
+```
+
 Run the graph test:
 ```
 cmsRun tritonTest_cfg.py maxEvents=1 modules=TritonGraphProducer
diff --git a/HeterogeneousCore/SonicTriton/test/fetch_model.sh b/HeterogeneousCore/SonicTriton/test/fetch_model.sh
index 801c9b82310b7..5520e5582e779 100755
--- a/HeterogeneousCore/SonicTriton/test/fetch_model.sh
+++ b/HeterogeneousCore/SonicTriton/test/fetch_model.sh
@@ -3,7 +3,7 @@
 # borrowed from https://github.com/triton-inference-server/server/tree/master/docs/examples
 
 TRITON_REPO="https://github.com/triton-inference-server/server"
-TRITON_VERSION=$(scram tool info triton-inference-server | grep "Version : " | cut -d' ' -f3 | cut -d'-' -f1)
+TRITON_VERSION=$(scram tool info triton-inference-client | grep "Version : " | cut -d' ' -f3 | cut -d'-' -f1)
 
 TEST_DIR=`pwd`
 

From 1ec919043549b498e05aa5432b0d2909c658f0f8 Mon Sep 17 00:00:00 2001
From: Kevin Pedro <kpedro88@gmail.com>
Date: Thu, 21 Apr 2022 14:26:59 -0500
Subject: [PATCH 17/31] update readme for ragged case

---
 HeterogeneousCore/SonicTriton/README.md       |  34 +++++++++++-------
 .../SonicTriton/batching_diagrams.png         | Bin 0 -> 10530 bytes
 2 files changed, 22 insertions(+), 12 deletions(-)
 create mode 100644 HeterogeneousCore/SonicTriton/batching_diagrams.png

diff --git a/HeterogeneousCore/SonicTriton/README.md b/HeterogeneousCore/SonicTriton/README.md
index 11d970c0fc187..c0418b1517dc7 100644
--- a/HeterogeneousCore/SonicTriton/README.md
+++ b/HeterogeneousCore/SonicTriton/README.md
@@ -9,7 +9,7 @@ Triton supports multiple named inputs and outputs with different types. The allo
 boolean, unsigned integer (8, 16, 32, or 64 bits), integer (8, 16, 32, or 64 bits), floating point (16, 32, or 64 bit), or string.
 
 Triton additionally supports inputs and outputs with multiple dimensions, some of which might be variable (denoted by -1).
-Concrete values for variable dimensions must be specified for each call (event).
+Concrete values for variable dimensions must be specified for each entry (see [Batching](#batching) below).
 
 ## Client
 
@@ -34,6 +34,14 @@ The model information from the server can be printed by enabling `verbose` outpu
 * `useSharedMemory`: enable use of shared memory (see [below](#shared-memory)) with local servers (default: true)
 * `compression`: enable compression of input and output data to reduce bandwidth (using gzip or deflate) (default: none)
 
+### Batching
+
+SonicTriton supports two types of batching, rectangular and ragged, depicted below:
+![batching diagrams](./batching_diagrams.png)  
+In the rectangular case, the inputs for each object in an event have the same shape, so they can be batched together.
+In the ragged case, the inputs for each object in an event do not have the same shape, so they cannot be batched;
+instead, they are grouped together as separate entries, each with its own shape specified explicitly.
+
 The batch size should be set using the client accessor, in order to ensure a consistent value across all inputs:
 * `setBatchSize()`: set a new batch size
   * some models may not support batching
@@ -41,15 +49,17 @@ The batch size should be set using the client accessor, in order to ensure a con
 Useful `TritonData` accessors include:
 * `variableDims()`: return true if any variable dimensions
 * `sizeDims()`: return product of dimensions (-1 if any variable dimensions)
-* `shape()`: return actual shape (list of dimensions)
-* `sizeShape()`: return product of shape dimensions (returns `sizeDims()` if no variable dimensions)
+* `shape(unsigned entry=0)`: return actual shape (list of dimensions) for specified entry
+* `sizeShape(unsigned entry=0)`: return product of shape dimensions (returns `sizeDims()` if no variable dimensions) for specified entry
 * `byteSize()`: return number of bytes for data type
 * `dname()`: return name of data type
 * `batchSize()`: return current batch size
 
 To update the `TritonData` shape in the variable-dimension case:
-* `setShape(const std::vector<int64_t>& newShape)`: update all (variable) dimensions with values provided in `newShape`
-* `setShape(unsigned loc, int64_t val)`: update variable dimension at `loc` with `val`
+* `setShape(const std::vector<int64_t>& newShape, unsigned entry=0)`: update all (variable) dimensions with values provided in `newShape` for specified entry
+* `setShape(unsigned loc, int64_t val, unsigned entry=0)`: update variable dimension at `loc` with `val` for specified entry
+
+### I/O types
 
 There are specific local input and output containers that should be used in producers.
 Here, `T` is a primitive type, and the two aliases listed below are passed to `TritonInputData::toServer()`
@@ -58,7 +68,7 @@ and returned by `TritonOutputData::fromServer()`, respectively:
 * `TritonOutput<T> = std::vector<edm::Span<const T*>>`
 
 The `TritonInputContainer` object should be created using the helper function described below.
-It expects one vector per batch entry (i.e. the size of the outer vector is the batch size).
+It expects one vector per batch entry (i.e. the size of the outer vector is the batch size (rectangular case) or number of entries (ragged case)).
 Therefore, it is best to call `TritonClient::setBatchSize()`, if necessary, before calling the helper.
 It will also reserve the expected size of the input in each inner vector (by default),
 if the concrete shape is available (i.e. `setShape()` was already called, if the input has variable dimensions).
@@ -100,11 +110,11 @@ In a SONIC Triton producer, the basic flow should follow this pattern:
     a. access input object(s) from `TritonInputMap`  
     b. allocate input data using `allocate<T>()`  
     c. fill input data  
-    d. set input shape(s) (optional, only if any variable dimensions)  
+    d. set input shape(s) (optional for rectangular case, only if any variable dimensions; required for ragged case)  
     e. convert using `toServer()` function of input object(s)  
 2. `produce()`:  
-    a. access output object(s) from `TritonOutputMap`  
-    b. obtain output data as `TritonOutput<T>` using `fromServer()` function of output object(s) (sets output shape(s) if variable dimensions exist)  
+    a. access output object(s) from `TritonOutputMap` (includes shapes)  
+    b. obtain output data as `TritonOutput<T>` using `fromServer()` function of output object(s)  
     c. fill output products  
 
 ## Services
@@ -116,14 +126,14 @@ The script has two operations (`start` and `stop`) and the following options:
 * `-d`: use Docker instead of Apptainer
 * `-f`: force reuse of (possibly) existing container instance
 * `-g`: use GPU instead of CPU
-* `-i` [name]`: server image name (default: fastml/triton-torchgeo:20.09-py3-geometric)
+* `-i` [name]`: server image name (default: fastml/triton-torchgeo:22.03-py3-geometric)
 * `-M [dir]`: model repository (can be given more than once)
 * `-m [dir]`: specific model directory (can be given more than one)
 * `-n [name]`: name of container instance, also used for hidden temporary dir (default: triton_server_instance)
 * `-P [port]`: base port number for services (-1: automatically find an unused port range) (default: 8000)
 * `-p [pid]`: automatically shut down server when process w/ specified PID ends (-1: use parent process PID)
 * `-r [num]`: number of retries when starting container (default: 3)
-* `-s [dir]`: Apptainer sandbox directory (default: /cvmfs/unpacked.cern.ch/registry.hub.docker.com/fastml/triton-torchgeo:20.09-py3-geometric)
+* `-s [dir]`: Apptainer sandbox directory (default: /cvmfs/unpacked.cern.ch/registry.hub.docker.com/fastml/triton-torchgeo:22.03-py3-geometric)
 * `-t [dir]`: non-default hidden temporary dir
 * `-v`: (verbose) start: activate server debugging info; stop: keep server logs
 * `-w [time]`: maximum time to wait for server to start (default: 300 seconds)
@@ -172,4 +182,4 @@ The fallback server has a separate set of options, mostly related to the invocat
 
 ## Examples
 
-Several example producers (running image classification networks or Graph Attention Network) can be found in the [test](./test) directory.
+Several example producers can be found in the [test](./test) directory.
diff --git a/HeterogeneousCore/SonicTriton/batching_diagrams.png b/HeterogeneousCore/SonicTriton/batching_diagrams.png
new file mode 100644
index 0000000000000000000000000000000000000000..ca438ae2a4ca2ab94c5bd14aef4b940804c474be
GIT binary patch
literal 10530
zcmcI~1ymf%*6sv2!QI{61_&A;A-E@lI|B^v?(VJw1WN)02tH_VcemggJh%i2;SIU>
z-1pyK-db<H_13Je?y9a`yK2|&-TV8dqu#2>Vxp0u0RR9@c{wQ%0Pt)Zen_Jr!CO2T
z#gO4A8dEutG63Ms1ONnv0swdLrocS_zzqlh92f%tA{hVxv14}AJ5l%z1QSJBDZpby
z!bm&3hUzG%;{rdM{W-9qy({nwWLJ4*Y2+OwLR4ICD=qg_000TmTV2}~1oohDbapVe
zvW3vNdO1R9Jnb9-0PV0@9lhjyAjYqstk)<qFh(J)jk5-g`>*|LP$dM^P%~aNJ>U$^
zX2eW6lwh3U*TVzzuP$un!Vby=U7J(|ZLVbgcLd2CY;hRN_?TPU&ZJSmRXJEvS+?3Z
z&lMTIsU4@XTwQ&1TxOlJq)M<BiCG^L9hKL*ij938k!{MVzP$A0sv>g;Y}AQ5GLUi5
zlBJOozhOJxR3M}}hE}10lE5YBj)L|1Yx3C$q3Mn&?+;P7Cv@!JNJ@n^qB6CMyIH{X
zCZBLC`Hw?O#Enr%fG<{9#r1n6ZHM{f@TyQ6GC>M329m}cre`|V%8hglH2hy4CqBHE
z4B67WscftsK1<x9J^1*Zv=fp21+38$)A`i+f)6Sy6^OV`gCW0FZG#Xj0NAVr1To*p
z6l2oTtyC1Nd~3CyLdMZaa+c~YW6GtCy8NJr=<*^MhxYv!Ev=0@ZU%#dOP>v{AGaZ$
z{`+p@tm+}ss}me_AcmE)^zv>oQ`7oXey6~a#`3OaDhWROBKeOb0+I)MOh&d9RQMZE
zEGo$YNr!WKbg5c-sPq7KY6)QwGnwJL7oHW)=O8Pmaev~xQA2DXsc28iEN4D<qPBv$
z9{OxsB*Pm0R|HhWQpNmA6A)Qk>Xin<Cx;#gDfHCxwzRsML(+&+xh@|0QZx(pqPWav
z*Ye>x65Gj^9RIpR2w`=cK!R2ssSoU!>g}##B)e+wB$aq&j{t+-h_~V$(!SSgFSExH
zOP!!kCJt{cU*oQ@IX(B1ZdoT*CHlrh;~IE0jhj;`Gc2uYG83$Qb4coN)^a3NPj2^}
z${98`$Q7CP){wUTEknhm^=LEts_|QWllz$o-ksst@N&AMfoZiPR6bg)``m_*A8|#T
zXu)&^;8K6r+(^wy<K~iCPvSh0z}uz8dUqUiY;&acl^R!lzhw2PQ5nJD)>RYFO69RN
zQolN)U24N(B;0)HD2%_-tqsc2z@Wk9Jbo!QBB&+t-jD8L(}ykLu!}#VlFoJdpdxao
zZVA;(j4d!;(<gG55{$)s&aZptMclY7yY2?)dbvuxa2YyJ$q$H4ncOSJyL@r+{lQv1
zzt>qn0oqeA*D03;tSbXs=cp=+Q8^M>-{CTIj0@#b##}74G-glORuJrOZ4lI5D<pYT
z=HT5rgdcyUcs41BafIq3Vtrz<fLiRQ%NtQ}&36@mVXes3|N0VBay(3!w$7vG2f>Uu
zdu9meKI-ljTHPrAl1_;j{!Z%5>M$(j?R|k6mg3R<RRD$c%Yf+IgvELwbK$iLG$(xV
zo8nH#jCgs#eR{B8H4#;t&M<H4$%OiFy2%DL8)u2EqN>Szc!a<)1*^EjV+kfa5_vq;
z;Rm{x42l*2KzlAPC86%Qc$Cv#aj4*4)b2L1kLg*N@3c;udx&i5$Q&>7kxBxY1cPbY
zKRAd$C_C^Tw_q8_Ar^?p<}?$vhi-v*Nm#^O^-Qoy8u43)kRFztJi}E)LA{j1%pFR$
zUjdztM$E60XpNXsDW2%a7@y5UsgyQF_lTyu*Jb;0V>=t_?b8~s3W<?Vpl&{BGxbw4
zo=jlH$Z3y#dwnhGGY9wjz3<=9myZ<_0+yP@x~~gQ@{)@>!EYGGc2yaILh&vQa>ht%
zh${ETxXrV&OhDqJa@$pBVr!Al5(Y8H#LT-IPtaDQGzRS*d;wT7V|FPplU{bWO64Kt
z3H5YeM3OS3li)rP2v*~!NA883EZI-HATAzOK`DGqM`yW_#vZ)DJ_Fk=U8SK>;k!6u
zEpr$yVVYcg(<<J;oBB_%q=gC%Jfk=I9cgd7rrMHhrKKU99|`<{?)UO6hoVL6EO~P3
z>O`Bns#(9=d<*s6cVB*!$>r{?pxBgS(Yu|g?f<IJHNP_uAEv&W3W2>p88dL`R@C$*
zyQA{(spX{Z6z92?4~231VzIp}J3}j}Q8*}Yy!IXb>8h=Dy+Sn>`bpMjWNZf?=VPeX
z`+$K$*S4zDfOdISm|xn8I5Q{lM=9AgbItPu7hy=2i~DlAzwaXNHtr7*a>M$~8rxRl
z^~rh{P#;*Y<rzsniw%PBuV0$lC!b5tYdB8Nel{#=clKJu!TQ$LEEgD5A`=o+2Z$hQ
zO|Q{YakLilN`3FgPtP*VWSw(TxuGT=6hq;sm&*PML!P!VYlcJH1$JZePDY<Nm(-xb
zwA(cTWi@}RR0H)5az9FZ$*junR&rUr3k@S@<)CN>2S{o;+@81H^9hHkGtL(^h50Tk
zh-W%hq@4GHehl-l=asweMC$L|+XWGo&(WkO+gbl8-e%W;jZ~+O&sVKRkHfAE17(-S
znj%?Z=r^F*)EP+)z=+4AX{Nn4NrN^6F}hD6t?L^Bjyku9F>2bO`n|^F^tQUxLxrT1
z<~GZ((_C~*UBU-0tV=sEN<xTJA8(wUwnMMQg8%H#nmhcB^&b{&d<`8iJ`0S-P0Q$U
z`|s}k*?jo)WPBn+MMJ_rWwEuP+4x%DMtud@dT*U6<k+t+9V5*Kn?J|72)}jc-<ek4
z9rovAzmAc=eK9?xyvf(eoZRp$xfzf)x$&d^QhNh|G1Equ8}ZEP)KPWBAg>whSKKJ!
z>&<lahV{Li@LaSu<jp`vsMVG}*3441m*4n_7c%>G5PUSJZRx<Yg3!j;OvDB5uO8m@
zFY*wq&hEmLNY>1EG}OM{%`c^WQvZbQgqkrRdrdztnttuxqLcXw-1{2{clK8d-`DAq
zH>*T$4_D6T@FP$X%52DXDQAig7gy>C*bzERuSnb|*_gI>%1(cRR2-w+yuD?E%JNr2
z0oWM_{lE8A8<_^`RxJfNrNbn5>omCD*TS|izb;0%nyeZg&0|h8n2l@y)wAZq%BZd{
z+KW#p=CyU#aqj5SU?M%s2>X2Kea-Zs#Ck9=lxmoyNdD4L)rMW1iD~;#`<dZ3^gVFU
zp@k91HE51eU}4n5?J<<KCdb~vz3sC3S0{*-yY;oF9V-_d*0aPPMs}7dih3^%W$D?)
z#hJF<zn8<f<O6#*l=U+=B*j?-kSU0o8?t)oKZumW=>O`A`1yg6rhwH2#PS_q-5V{_
z7Atpf*J$dO605>BZhLK*ts^aUyM5N58RZb>A^B+9tDt#q?$ivfMLKFLG9B=60mm27
zmA~hZQjup$if<>w4z-yAnQ8Y3GWhx+oC}x-Tew@#f@Z>}C)HPV*0*KvJx5UIRmrUA
z*nRu(a1pU2e$QR;<grMjo_q(+MR1YtfMCjTlnzpGZQ*Tx>Qr{kry{*bg?p4o)vQNv
z&#~wRRvuyHr(dLxTg!ba3*~P)zWSKt`s=@kl?Jfpp%k?QY%>P)s|=2pRGn;tUZ7IB
zPA%CCfAUnf<04$ruig4<IQ#Uv$+%mDUA+JL>(5z3D$=cu<V0B6^K<+N9xg7_YN$xq
z=GIp751Ec$#tzz*9BKVv6K`cw3Iu(Si9vKX5ecCl2-Wc{vb0c0WV}dWa^8ghtGb=b
zV7wrwRvaNUb;>zDJr|c1k7jtR4q&Zay<kw4mw<h->Kh*)lZgQurO?jon?C>CAnnP5
zUn4*I4EYiqW#9BU9)dr1^5L-SBHxz`HU~?K+uz&XqhWMMGi@`=>NCYU4IuQ5<=GVP
zydw!FR2~ZFLHVJx0DK-K|Nph<G#uKD<_;@l?M>Rc3dz}Frb63X_2f6&if=iey+feM
zm9mYDoR>=-zV%dYD_(ftyeYQ(<;(&_r9$}z_2s~wn?pmhOLr+_{D_n^gkH6J^_&<d
z0VN_S8jYS}&Z6MvozBi3#fA$Pzn_cBq6AZeZs>9&YrmFFXNE&dwHeb^Ta4B?F%82)
zxo#1=OhLq7b-lu_O>W-WrEiy=J@i_>Sz7g^2jj%dS-D<HT@Eo0#IfIu%;F*Ec^Fui
zsX@mlg}$;#Kp@7wowBIW?8qmY*`wd~#C+TJEIea8v>^>Av*r$UxX0gwbH88d=fTE0
z#TOpOmNsrRkNt9w+_lgXP&@+WicjA27r!(mD7s9G*xbsub#$>(MFYiaZ+%}&AQTvJ
zBrvzVxU5R}viz_!zrH<(iZ(}@@<;%6h`(^}DEK9%rPv9+$MCo$7G>dnrK4?iEr~xN
z>~cMhwz+YktTrL6*vH!k`NDcC!6E67rEiTvR@23)ZJ|>ee+FFMvmVUgALJ`LykR%N
zDPq|PrtpX{?hUi}ix`FBz8%rCIn9zEJmcYZtl+!j@{2+h#NCHk0c{*P&A2$ju;C1#
z4Ies5ZK%AdK<JGo*oB%x(`IN>a&LdJT?KH7`_=WBDi39;<Vg&iO5<xHMNC>cWdO&~
z+sV_e(b}C*Pd}>V9HeHXnl+>g;bAs~81ybZa@GMV_v9{+YTI?up?Atz${peWqd-=!
zuPD9DT??;Bm3p3?p=6w3O%S4je?MSEFqVwKTF=PFM6p9c+}yG0782eo3yRTkRzhzY
z%X-Eem@PlG1@^_t>BL0iCGjpk;`Q1>M&B2Xq4)cCTh!bes(p@y(31!I-mudqvisTR
zM$fK_r~$+?26P2}^pP0$VvS2Im2M-VfAaMS&f9$e?Qd=X<iASNQejyMeCBL&VR&{H
zb03`|W@NE){$rZ2f_$yZRy;{zQbfb1HJmax<XfYz53As!+vSKxE91iGqz7xPc}w4{
z^-XfVCydFjgc;oJ$wpQ`w7IVFa;-8}$a7B8i^y=xx^B;yJp8Otn*g_)1c?dqchP>p
ztar;9FsPbic4gzgVKKaaknLX-Fe|SD7@!^v|28#UmHsP<+Iw)`ayajDylL3>Lw;4x
zd{vQVX5MLYW-T$KFsH>=cp|%L|0N}m+%QUvoxG0*$og-^p3VF)Fcv*Bp&Kn@9nU5N
z7*t-!!F7CUFpZi2s)x3V7V-wI2WgX7cByyQ@)>GQ{fTizOOFUa_$0Y0=;4b)kC4a|
zt3zX9q7*6@KHIxN-MS&X%`GuWCF5>SlV#CV<LutkIrZdEj{F;0Wjauw{v+LOMzx*t
zcpyeTCu*+Hhl?^jPOQ~;6Kfh*S(>0ZZ7rg!1>kYanA1G>U+nmE3%cZmexGL@Y>}3L
zR9Jra0o4BN+mhs$VeqJR7D$mBk~KgwBHaUg9{jJ^C2?r6?VE|!4LpvQ@?Fo<GT>R!
z{g+Yyw|W0u`FZ*HrB_Za|Fx#4lDFO*f4<*lK5pF2it2Rb);GORguK)k+uQkDG$cOc
z0oi|VuXPu-efNZ;sMO_7lKGYbZq=A;816^P?E)$2^w#c+<@auBDVRQ_|FCliT@VKY
zLT0womD#h+K}~gp6c|{P1rINI9kES{*<Cvk0e>eJrI#PRC8l_%&{mm``3gOw7+P!3
z@(L>=o%G_3he;Ar3z?4M`7rVsRU{yTC0`*<r>{*r>=rtqq_peRL=7k2WPKg-*Fkrp
zSV?VIzE$R(6lR!B){|^8vK?B=uZfgAPnti6^H{dw$^-B=F8r6V=Ss@XNsQTd><pxK
zZc*`DH2X=zC-LI8*~O{$S!og1H;1KU3*Ofz!}@d-Ij~IiK6b(PL#jsvf=ctnIh6_z
zXn}CEd02dfQjb)o!;iUVMbejcbHkf*Q3+C)X<Wke5yrzkt+>v%CHZNABxTX7Pr70M
zjZK!}&Rh{jCPQrb<)mzWU_Ys32*o1{_XiKx^e@Ws+9s1YFiC@KQ8G@B(wA7r2GY)J
z$7^N-9&4upFp1qD;b)g9Qfg)`*~X<<W7TSs`S3~HFpYc@mFpmWNM@X4NUI8%1Un)W
zFUZoVRJ9?E0&}Z#peUojps5b-C#mWo81GXtw`fbkiG=9b+Gor_0<rxCc)IH>4>|H@
zdOn))su{7aaY1!Yqim({SfKIGj<Tur)BvO?@qqU-zZRYEkY2f`s{Z8n@g#>zqEqmQ
zC}~A8$A{Gla+%T|y&BgLNKNFya>*GYlWm7i@p_bpWN8l6R%+fd|GEe{YLH4zbKlGq
zn8@}&pvyxr@6NWi48k)NUB`*yW30D;bbG#ar?eD1ht06cssCn$;o5e4Dtz%nvq{6;
zV#SR(0}Cnnz~_ju<-#$9uMT%yPq{DU$CFs%Yc}3D^8y8afr?!)Pc*p4cWj30$wk4?
z^yGEs3VLEuJrA36!+9Lp%8zkRc(=B2)=HIM(@>{Zz7MwTX+SXwcQUfRYa{F*6-ar9
zI{oWAt4hokMz`hoeecBAR61<A=D^peE{UjI#&$69GAim%fBi7chD55sG3<bv#Ggk7
zQWT=L6T`scaFw5SM_Pn!yEeUp3!}32E1Nb%<%MNxe))S3aQC20d|i(e*e^r2S00wi
zIG+dXBZBb9-sADkvhGDUI41^8*V?2ZI-Qr;pj$eS{OnyQANlN*(pNKD^-W(?Kt~j@
z?rB5Y3YuAsoKD%s=yF2?k@1e(thebto$&ToOPgclBYu{GF?51kj21-cJC`aQa~}-q
z+?QYAlFJD0>1!gr9%_I|LdS%wSzv!WCF-VWk@JXwN`A!$`^la{uz3Se^oqtV6!PR6
zTT(wNnk1FruH%>uZ<RFy{XX^_&1ybt-M&O^J7l5G5039yYOg2YE%zaw+%zUnuNIom
z+g%iz#|MG%d`;S{oV^~~bhq1gWq*H6$@cGVKU0NH&k8>*%ZtDCAAj7<dpWq@kL7ar
zN{do(gB>`X<T-K>DmtGh(e3FjrE&czqjcyi!hux}7G)~&>`)NxD8ar4)6a*WWhm(k
zM=~ZcG+w2=o9h&ajgY2LhQtm}<-%R11edr$M^5R<n<FIq@CM(AqR!(C%Cy{>KCQ$!
zay&RA^e<X^Ld_8}3J4|K$2b^93<M=bDT9P;54GqUG#7wW+2+F{nW>njIP)#=SVa|o
z6PWU77y9((2zjTSW1{*{1N#$b|GRg|pI1wj*ETuL@nG-Q<U8VNZ?204#t^h;o7XRV
zUhRH?-Msc4E0U*ZQQVh_U%cLrJ5}m};uM`s8L+sx3h;~EULpMCg1jSmwEYqwyi#vl
zgLN(Dlz}5}$C6$I9%&jpbSK4=W06>s_NP=DYA`Kup1;X;=n0e!Av~l`)+iDGLrV1Z
zCu9QKB!oelWmU&+m`vL*KbCOaK6gx6`s(pwj@6~(h$G`S?QT-ME#{<%(vSz7n|Rx9
z_%L0C^udXEX%8>u+)vnAks(p8{?jKI^zAZ!2^$;w*+r3ZyH`^qZ%SNou@q>3E`}ao
zXZLosY&f7oU{f&?y%A4Ad)|ju@7=<{k48IU*{+t*cW0z}`l#5ut2rl^ZOXPn+He|z
zybGFOoJV5H_-5hwm2esROWQLvpK%4NXSfRUFLFLOA75=O%L-SL#~M=??g)Ec)v+e*
z=W(?1F63#w>r1N{y)G%lwIvW6^3r56jl+y+hC_5(+uefLZs?|Rrt(?n&CN}y?K^?A
znvo|m!HS^Jn<s}S8phPoKkl%1WzWXZenlL6hC2Vj)`}I<a?X=(9TGE628qjCVB~&4
zpbp?(a@t^8hB5YNT$eQGg_*K)Pw?H(%{vb0o%}`8<*qO!bZb{HKEjcos<%*<=0fmS
zL-u?0U7FdL{o4c?^#OL40`tc_!XYT?IxJN%G|VnYpmOv#g%{RWitab40%7W$OO^WH
z@IjivYWH&sj`RN*bxvAJhrr%0A+mVpBiC|4D9h3(NqPUds?&X2Z?61gL}b2m&1C+t
zx%NaCJ>n!g343Cjnpd*woWtU7!N<+3(J-N*$ClLm=|4ke3EQs{BU=d4f8oi`pFyOm
z;t*!8P~e+NpXU442g|pFlMUZXE=-au7)mI0RM|t^sj!kbyuQ`e3uySdZ>_!DNvmnd
zv=!NXqifMEVG3%aoE|#b`$H{!`B`-%<k?tAYC+9rk270ipx{A^-lR*D0`)kzt@<>E
z&;Bn;!OreW<LmthndxYnCAr(>Ha7u^7fjnrxLSrn+VTpK!#5LjL!nO7UOLK!UQNiJ
z<>tPXr@2%A<P)}LY6WhkK-k>rGYgJ#8L20c{&EAW(oz(3qOf?IY!9P+a!4ASU08TN
z>rt?jShr18+C10ShyZt7osE-1g7s%=5A%$M?goiusL&~VeUqHrW$r^utvs{Jiv^L|
zZBFa_C^O;JBmO}d0WzVTBewu|y7Qpzjk(IIPi)+k(%#8=?SVI$7V5{X`h<&aps{!b
z8yhvokDDEAxy5^N4^;=YLY7KHD<FSEj#W!Ib5KXO4K5kpXYVLQG3{bVAIBphPDl%6
zH*6({0LWce3S?I*r{&`TpHGoT@D<l=eBfbS%geC@8T)$r=paPXkGX)1s}<8fOvM{2
zePcwe5^7QA;PZ7?d+XA{N^z)0bttZ8f2OE6@SOPk>~OfQ1jHF6``XR>-~w)*`Ae~C
zvCyMnZ1)}dZ8jgRh#YK<9n{Y(G0XC1`f^73UoW;hw>M4hR%@&z<OZG>?yaSzIS$*N
zH1OxQ#zcbLmWA}PmO0H7wVh2Ch(!DAU#3G%4`&-Av)3B>!X;O|xjS&`(LuQ%C`413
z3wwYbMl-G7)Dnl^y`#%M(BUrHIvT5g`547}To1^r2dz?>Efx!G87xLkr5jxf!B*=)
z6%8AXf_j#p)q3FQpAk|b|9gRPAx<ntGEh;WOwnl`A%FA2%<hf^JKfCVx5BC^1x4~!
z?aSG|{>mBq*cin(Wevyrg7uScd01be^y2~{BM3L*9{t7gf`%(H0za-;VxzRv-`vHq
zhR2pGKDGJ%JhdCHf3S(R|IO-CPbsL}a5QT-<MYgR$te14QVX2rx@5Q{(@NY?-_`4w
zxxH1HHo0lOU*8IgvxVb;=RyBUUH>yq_)oCm|Gqk}sH1;YSmg>8a&F;WCsYg<dkF{s
zbrSyt{4c!Jy%Djl?<w>Q(VZ1&bbog0;+}YQxN7gQ8?L2Dkr@>|B$I0*Ho#3og^NW`
zNGKyzHZFG-Vm_P_T3Ji5^VbvfudLlU!FTelBZ|H4BUd!%J#O~A@_qIudF6{plZn)j
zSvtqq$LaFvZUxCrX!Fu$tBfLj8$qactxA4A^QSndB)@%^MFmvorsWW1ngjEV?;Dgx
zRY(d&BtWJfC7v4T4o<0zjHof%<-YfC2wo>Tsd{d__AbbQNK1I+AeeXX@T)i^C&Ul-
znA3pB4TG(_7;(0&uo0te-12i%TMTdBzAUU1C7`B;gk&lDfD)a1$e17f%c{WP`9RaG
zB5X_Pz^>8i&f#)=f-*j)M^=tCiru3?`P--p%{Jdr{N+ib(+@48nqbBjY~|OxBMYzC
zBO}HIl_hpLNQX(%OFnlXS7Y1?JC^WRj-WLd=RhV!`nnzSOBqe)7<}Obzqh|M#wJ0p
ze&UwB#$w}X**Zdy*`o)*p#hZ36nj@OW)L;*8=to&f6&|5Ig6K0O64M+3+5c)?s<Eg
zbAC;@rtv{LX>-|m1ePpfHpQ1`RQP+mx2O{8Stjo_z0Ze2+Gg1my$&Sf#KPb4+VIGq
zGTad4dM57rMJ)#}CHcoSp6n+HkE&A7`}RVjbwWz3+`wx%MFVQLszIGcUPlM^G7^8h
zkrKLR4v7+?p?Y?)@~nxDcBDOTqlzI-r(V#oJ98K8ST%Mm!EpbsX8*Hn!S*a3b$Z&(
z?KP6}4Dmz;E~^dv@dU}-8-2<sFVm@+@!H66xUqF#vl&XQwl$MX#13FYaxNSnRjXdq
zvq0-W#@87RUm@u(o!=0)^r`bTmPmC_J-ui#O4Sghpkei3>FK5&?W>jch-aYjcQSH<
z?H-ot<@zli2`lw5gIINotjoVub-SFBX_fZg4N0@Mi~K)9|39dI&3F6JEqT`Hl=@fc
zwyT>8`NN@#Gs5fN#E)YqB7+CtD0eQo?*xC2e=f3s+|s|gOs<}UzS_@oOJ8^$VJ}h*
zRxRg!bs;Hx@=p5K7A;%!_KZjoJmcK$IJs?BInQ*>n5J$T_{`1vIF)7**D>dOC#BNv
znxq<(=h8pq^UZb&tn#=lEiUi_LK4zZVEfPFLnR!j0L6oDgvtU@xvKp$;DBl$CjuoR
z6_phSKe94^4n#!kaZ|A$K1sdlQu|uUs@6~Givtaxs2}33F{k0Z#dc!4c0?D@o4ym$
zbz5v|PnE|p7K~-B5Ip@w1Byd^LTZ9Q-gof{b<J$_-1bBe%n|=tWZdL9Dnxd%Lc@?3
zvU-5h4XgmWiH}sm>48daR{S{*e*0NAn{3%DUW}~@{cEXd!Ly${ejssI`=?|%l|p2#
z*|Kqm&g4HXq~j@91}8;g!=agR@8ulmc!yH88@LFDr^Nq}fB8cY2n5@aBn+p9-gLbL
zg<ZB4{GxZQkG-knEK)r+mA!Q}s$s4Ix81ocGPG7EUO%QkE-LoP==oPZmN$=Z6g*sE
z%(#c2S<hFcgE9UI{s-RBC8ornMKPn}sMdeNd$OWLQeq+!^{Fb>1gBgOs*6nM=3)&z
z`DTD^E91W-OvGF}pFEKChQc5P)l$BH@^Yqlru#?cq8(^&=P+}D{-H5>Uw-lr#4|4n
zIxor_<I!N@0W#M=DZX3}Jzb}1*m!epq$|5W(U#(yo%erq;v!Q&Q45&Y$Ae3U3BM*3
z`+gSUSLV`A?;p*VP%nuk0;pwWdG}pz8;ov(>@{=GZ}DHbrA~9_eCM-mYSzV$m+EQG
z5L>WxV-#aB9z#0q{f4_ZEEDRUnu3SPS+rsAUZ=pQ&(}TpJAlqFrdbi(O*q^R=NT~R
zX_8$kg*2H;pD@XXKV(3p2k%#29S9d)_*uuEI-W|q)<c{eLLy3@#}hLW!aMYPPv4}k
ze~@hWn=kCZ03RL2j09T&9Mq6;^PP%S5!$TES;(=qqc#@~)(;8w6k!{$CT7l8DQY0Q
zPW~rJazNJicCsY?bzviA5g%JoLn6!*Dc1GNnl_J>e>Rxp=pV#@VDS-C{*G6_NGF^y
zYUECvK#<)(QHoxx_tc!r@BlcGTuge1VVbTt-eKLuzJQYa+~e;j#rfgA%9)q;jKO&H
zV1`{Q9K4L@)blEos2Jo2wdW7!#6qW7t;-^&#n?)_HI8#Z;e4x1^mv-%Y1i6HXX1C#
z93Fm#wx_Q7HB?L8yK{2`XVCIddZ0Ee6^`oEVd>qVxGZWb5*Z)&^5bKTegsodmJ#~G
z9y?tKw#5l%lgZ4cTsP1@QbmG})2eC|<vP{sq7}2dk$sIp2qVHO3yvU3uzNm<N<6{Z
z2q}ubUd^AD$ayDqRr;lH97mPXi`tNgr>pC+0DsOC<O^rTHVw>uDOTUTc(T0ny8+qw
zEI;|%afz<@^SkPr?B6Z<zTqQ3r!=&ePVOqKF;93+y$#1^9h-=zenYk8?p3Guk8Y~8
z6uY+4BNMiasVQeuQXi2Y3-c$o+jrJ(4P=EMVty+#3u6-9>irH0IASPmH_i`O@$jpZ
zM3}hUa3DHwE5dS*Xz=x@^q*H`%9DBYS!49%=$Vzu2KCqowJ={rO(2GYT|<#zi~v!A
zDH`j9Kg&ogQIXJUN!tp`?t5BDR1Ny+M;<3X6s{~SIGkr#`HL6_xpMR8wmi{&J+RQ`
z3FDh7Feu_t^xXq8Nl~eq-3nmsrK?Q&7~RE^`mhgQOPOdJpFA{C-PB{Y(dl}&6S{Vu
zp7S;GSJ!yR@V>{LR?hbso^!q5aRCPmrE8}(zsK7Sede&3O+03AIWx9)T+XXhBJ#Q^
z1YKT+dfqUe`TI;f@v?`l3JxX&aAvlL_V7iKz4r^h(*#OR69TuHkfvz0>gIzgT-&Yc
zd3St>!FqaIsW&NQR76SuO6J@`FMMIxQK3q5^75xqB?iwIT3Ru-crs7Z1o0@(o?o%Q
z&w2_alT!FFjeOAA!L`5>tC&3K!AQ0KBk%qA)VYohf<7M94RI?R?3>JLLt4#{jFi3@
zma|y?y^)F0T&Y8E?dnA=INxPwb58-Z%oJylwY}!6-)4;lV--mw4R@k`L71-o>iv(U
z9Ub9?{zt__pJVtF7Zb~v9Oj8vMVqYr@Ii~6)#j5+V$?{LkY>1(V2_1_Qde+d5*oYt
zm28~{uG49i1b25MwK%oNU(ZyvI#TT(B+8ldUB5U?B;}QXN!&`+zt<+hknS4;FzGPi
z;ezo<WIkwwpD=>wq7B`i1hEg7+VQ+S&XN|A6VoqaqMvNu4PTDFbYcWTaPdBExh}*n
zT^Orm&O4VR+=){JviQ3gN<YtJ((S4A!wv1fhIhZ->pQIX;~9R*C&(Go9+carqL@U;
z_rhN5Bx8A<o5n_}M&;*g7xa%4@+$57j;$eBTAF(ddB?YE>Fn@aYGLU-uTB-XZVxT%
zfPV$T=qj!4Y6f<Nh?qJ<;3ohtke8Pe_=1y{N1f+|2#=u13qejGzX%ZM;6L8;Zvu7>
zW>)52|FZxUy@w}Ufb@?BQxS6qXFISf0PN^!Yh?;{wQ{iMb~H1m3aaUZccOXf1h0z7
zS%DxfZyen0&8+M#-Z*#y_ymPKc?E&&@ShoqfDgPc{oj4xIy;!TnL?Za5@rr25Sq7A
zGBiq7CeC1IFB%^B|FXZX#<Sqv3I6V`32}CTYX!jjaq@8SW4;d2Qi9)SdAbk(GEPMD
zj{&6q8zb<G@bLe`2!5kO8*mxkf0J==R(Ei)1xT1WJD7l7pXQ+qwt+aqCqV;WG|gKF
z4~X+y2P=Em{fYQ;xHQ|}(#jB5Fx>Xw|3Rm~KXmp4wBP^7c`E<0`~O+yX6c7CTt?)d
znv|7Aq#R7$>>&28@=^ddH!Cv{L36MmFRuWQ(~KV?$oWDL!pkYl59HxA0}2cA@t6ph
zngE4*8iF7GeH3|n7gw;oDdg`_cp+v&W&)-!IE8u51>mCy!ml7^f}8@Tyn?)Z@Ina4
V8+zGy1Rn(;FRda~@y0m#e*uV1>N)@b

literal 0
HcmV?d00001


From 4c844ec85c2698ca2b3c809d30d58ad67d533772 Mon Sep 17 00:00:00 2001
From: Kevin Pedro <kpedro88@gmail.com>
Date: Thu, 2 Jun 2022 21:27:08 -0500
Subject: [PATCH 18/31] handle batch size zero w/ ragged (including test)

---
 HeterogeneousCore/SonicTriton/src/TritonData.cc    |  8 +++++++-
 .../SonicTriton/test/TritonIdentityProducer.cc     | 14 +++++++++++---
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/HeterogeneousCore/SonicTriton/src/TritonData.cc b/HeterogeneousCore/SonicTriton/src/TritonData.cc
index 72caac78befb9..23512c91780ce 100644
--- a/HeterogeneousCore/SonicTriton/src/TritonData.cc
+++ b/HeterogeneousCore/SonicTriton/src/TritonData.cc
@@ -119,8 +119,14 @@ template <typename IO>
 void TritonData<IO>::setBatchSize(unsigned bsize) {
   batchSize_ = bsize;
   if (!noBatch_) {
+    //zero disables inference in TritonClient: remove all entries
+    if (batchSize_==0)
+      entries_.clear();
     //should only be set to 1 in cases when entries > 1
-    if (batchSize_==1 or entries_.size()==1) {
+    else if (batchSize_==1 or entries_.size()==1) {
+      //in case batch size was previously zero for some reason
+      if (entries_.empty())
+        addEntryImpl(0);
       for (auto& entry : entries_) {
         entry.fullShape_[0] = batchSize_;
       }
diff --git a/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc b/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc
index 177dd9bdb48b1..afc23b684df8c 100644
--- a/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc
+++ b/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc
@@ -15,7 +15,9 @@ class TritonIdentityProducer : public TritonEDProducer<> {
 public:
   explicit TritonIdentityProducer(edm::ParameterSet const& cfg)
       : TritonEDProducer<>(cfg),
-        batchSize_(4) { }
+        batchSizes_{1,2,0},
+        batchSize_(0),
+        batchCounter_(0) {}
   void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, Input& iInput) override {
     //follow Triton QA tests for ragged input
     std::vector<std::vector<float>> value_lists{
@@ -25,6 +27,8 @@ class TritonIdentityProducer : public TritonEDProducer<> {
       {3,3,3}
     };
 
+    batchSize_ = batchSizes_[batchCounter_];
+    batchCounter_ = (batchCounter_+1) % batchSizes_.size();
     client_->setBatchSize(batchSize_);
     auto& input1 = iInput.at("INPUT0");
     auto data1 = input1.allocate<float>();
@@ -34,9 +38,12 @@ class TritonIdentityProducer : public TritonEDProducer<> {
     }
 
     // convert to server format
-    input1.toServer(data1);
+    if (batchSize_>0)
+      input1.toServer(data1);
   }
   void produce(edm::Event& iEvent, edm::EventSetup const& iSetup, Output const& iOutput) override {
+    if (batchSize_==0)
+      return;
     // check the results
     const auto& output1 = iOutput.at("OUTPUT0");
     // convert from server format
@@ -60,7 +67,8 @@ class TritonIdentityProducer : public TritonEDProducer<> {
   }
 
 private:
-  int batchSize_;
+  std::vector<int> batchSizes_;
+  int batchSize_, batchCounter_;
 };
 
 DEFINE_FWK_MODULE(TritonIdentityProducer);

From 0ca9b30c1ba26b5eaea897a43bee6fdce0b5bcc7 Mon Sep 17 00:00:00 2001
From: Kevin Pedro <kpedro88@gmail.com>
Date: Wed, 13 Jul 2022 19:10:08 -0500
Subject: [PATCH 19/31] improved batching interface

---
 .../SonicTriton/interface/TritonClient.h      | 19 +++--
 .../SonicTriton/interface/TritonData.h        | 11 +--
 .../SonicTriton/src/TritonClient.cc           | 71 ++++++++++++-------
 .../SonicTriton/src/TritonData.cc             | 57 ++++++---------
 .../SonicTriton/test/TritonImageProducer.cc   |  2 +-
 5 files changed, 85 insertions(+), 75 deletions(-)

diff --git a/HeterogeneousCore/SonicTriton/interface/TritonClient.h b/HeterogeneousCore/SonicTriton/interface/TritonClient.h
index 50c4c5ee83642..8e6bb3518633a 100644
--- a/HeterogeneousCore/SonicTriton/interface/TritonClient.h
+++ b/HeterogeneousCore/SonicTriton/interface/TritonClient.h
@@ -16,6 +16,8 @@
 #include "grpc_client.h"
 #include "grpc_service.pb.h"
 
+enum class TritonBatchMode { Rectangular = 1, Ragged = 2 };
+
 class TritonClient : public SonicClient<TritonInputMap, TritonOutputMap> {
 public:
   struct ServerSideStats {
@@ -36,13 +38,15 @@ class TritonClient : public SonicClient<TritonInputMap, TritonOutputMap> {
   ~TritonClient() override;
 
   //accessors
-  unsigned batchSize() const { return batchSize_; }
+  unsigned batchSize() const;
+  TritonBatchMode batchMode() const { return batchMode_; }
   bool verbose() const { return verbose_; }
   bool useSharedMemory() const { return useSharedMemory_; }
   void setUseSharedMemory(bool useShm) { useSharedMemory_ = useShm; }
   bool setBatchSize(unsigned bsize);
+  void setBatchMode(TritonBatchMode batchMode);
+  void resetBatchMode();
   void reset() override;
-  bool noBatch() const { return noBatch_; }
   TritonServerType serverType() const { return serverType_; }
 
   //for fillDescriptions
@@ -50,6 +54,8 @@ class TritonClient : public SonicClient<TritonInputMap, TritonOutputMap> {
 
 protected:
   //helpers
+  bool noOuterDim() const { return noOuterDim_; }
+  unsigned outerDim() const { return outerDim_; }
   void getResults(std::vector<triton::client::InferResult*>& results);
   void evaluate() override;
   template <typename F>
@@ -62,9 +68,12 @@ class TritonClient : public SonicClient<TritonInputMap, TritonOutputMap> {
   inference::ModelStatistics getServerSideStatus() const;
 
   //members
-  unsigned maxBatchSize_;
-  unsigned batchSize_;
-  bool noBatch_;
+  unsigned maxOuterDim_;
+  unsigned outerDim_;
+  bool noOuterDim_;
+  unsigned nEntries_;
+  TritonBatchMode batchMode_;
+  bool manualBatchMode_;
   bool verbose_;
   bool useSharedMemory_;
   TritonServerType serverType_;
diff --git a/HeterogeneousCore/SonicTriton/interface/TritonData.h b/HeterogeneousCore/SonicTriton/interface/TritonData.h
index 90acf3084e4bf..d10cc2752460b 100644
--- a/HeterogeneousCore/SonicTriton/interface/TritonData.h
+++ b/HeterogeneousCore/SonicTriton/interface/TritonData.h
@@ -71,7 +71,6 @@ class TritonData {
   const ShapeView& shape(unsigned entry=0) const { return entries_.at(entry).shape_; }
   int64_t byteSize() const { return byteSize_; }
   const std::string& dname() const { return dname_; }
-  unsigned batchSize() const { return batchSize_; }
 
   //utilities
   bool variableDims() const { return variableDims_; }
@@ -93,9 +92,9 @@ class TritonData {
   class TritonDataEntry {
     public:
       //constructors
-      TritonDataEntry(const ShapeType& dims, bool noBatch, const std::string& name, const std::string& dname)
+      TritonDataEntry(const ShapeType& dims, bool noOuterDim, const std::string& name, const std::string& dname)
           : fullShape_(dims),
-            shape_(fullShape_.begin() + (noBatch ? 0 : 1), fullShape_.end()),
+            shape_(fullShape_.begin() + (noOuterDim ? 0 : 1), fullShape_.end()),
             sizeShape_(0),
             byteSizePerBatch_(0),
             totalByteSize_(0),
@@ -133,9 +132,7 @@ class TritonData {
 
   //private accessors only used internally or by client
   void checkShm() {}
-  unsigned fullLoc(unsigned loc) const { return loc + (noBatch_ ? 0 : 1); }
-  void setBatchSize(unsigned bsize);
-  size_t getEntrySize() const { return std::max(static_cast<size_t>(batchSize_), entries_.size()); }
+  unsigned fullLoc(unsigned loc) const;
   void reset();
   void setResult(Result* result, unsigned entry=0) { entries_[entry].result_.reset(result); }
   IO* data(unsigned entry=0) { return entries_[entry].data_.get(); }
@@ -171,8 +168,6 @@ class TritonData {
   bool useShm_;
   std::string shmName_;
   const ShapeType dims_;
-  bool noBatch_;
-  unsigned batchSize_;
   bool variableDims_;
   int64_t productDims_;
   std::string dname_;
diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc
index 38ca800bfd4eb..285b39b14708b 100644
--- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc
+++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc
@@ -38,6 +38,8 @@ namespace {
 
 TritonClient::TritonClient(const edm::ParameterSet& params, const std::string& debugName)
     : SonicClient(params, debugName, "TritonClient"),
+      batchMode_(TritonBatchMode::Rectangular),
+      manualBatchMode_(false),
       verbose_(params.getUntrackedParameter<bool>("verbose")),
       useSharedMemory_(params.getUntrackedParameter<bool>("useSharedMemory")),
       compressionAlgo_(getCompressionAlgo(params.getUntrackedParameter<std::string>("compression"))) {
@@ -71,12 +73,14 @@ TritonClient::TritonClient(const edm::ParameterSet& params, const std::string& d
   inference::ModelConfig modelConfig(modelConfigResponse.config());
 
   //check batch size limitations (after i/o setup)
-  //triton uses max batch size = 0 to denote a model that does not support batching
-  //but for models that do support batching, a given event may set batch size 0 to indicate no valid input is present
-  //so set the local max to 1 and keep track of "no batch" case
-  maxBatchSize_ = modelConfig.max_batch_size();
-  noBatch_ = maxBatchSize_ == 0;
-  maxBatchSize_ = std::max(1u, maxBatchSize_);
+  //triton uses max batch size = 0 to denote a model that does not support native batching (using the outer dimension)
+  //but for models that do support batching (native or otherwise), a given event may set batch size 0 to indicate no valid input is present
+  //so set the local max to 1 and keep track of "no outer dim" case
+  maxOuterDim_ = modelConfig.max_batch_size();
+  noOuterDim_ = maxOuterDim_ == 0;
+  maxOuterDim_ = std::max(1u, maxOuterDim_);
+  //propagate batch size
+  setBatchSize(1);
 
   //get model info
   inference::ModelMetadataResponse modelMetadata;
@@ -149,15 +153,12 @@ TritonClient::TritonClient(const edm::ParameterSet& params, const std::string& d
     throw cms::Exception("MissingOutput")
         << "Some requested outputs were not available on the server: " << triton_utils::printColl(s_outputs);
 
-  //propagate batch size to inputs and outputs
-  setBatchSize(1);
-
   //print model info
   std::stringstream model_msg;
   if (verbose_) {
     model_msg << "Model name: " << options_[0].model_name_ << "\n"
               << "Model version: " << options_[0].model_version_ << "\n"
-              << "Model max batch size: " << (noBatch_ ? 0 : maxBatchSize_) << "\n";
+              << "Model max outer dim: " << (noOuterDim_ ? 0 : maxOuterDim_) << "\n";
     edm::LogInfo(fullDebugName_) << model_msg.str() << io_msg.str();
   }
 }
@@ -171,20 +172,38 @@ TritonClient::~TritonClient() {
   output_.clear();
 }
 
+void TritonClient::setBatchMode(TritonBatchMode batchMode) {
+  unsigned oldBatchSize = batchSize();
+  batchMode_ = batchMode;
+  manualBatchMode_ = true;
+  //this allows calling setBatchSize() and setBatchMode() in either order consistently to change back and forth
+  //but changing from ragged to rectangular once multiple entries may cause issues; todo: check this case
+  setBatchSize(oldBatchSize);
+}
+
+void TritonClient::resetBatchMode() {
+  batchMode_ = TritonBatchMode::Rectangular;
+  manualBatchMode_ = false;
+}
+
+unsigned TritonClient::batchSize() const {
+  return batchMode_==TritonBatchMode::Rectangular ? outerDim_ : nEntries_;
+}
+
 bool TritonClient::setBatchSize(unsigned bsize) {
-  if (bsize > maxBatchSize_) {
-    edm::LogWarning(fullDebugName_) << "Requested batch size " << bsize << " exceeds server-specified max batch size "
-                                    << maxBatchSize_ << ". Batch size will remain as" << batchSize_;
-    return false;
-  } else {
-    batchSize_ = bsize;
-    //set for input and output
-    for (auto& element : input_) {
-      element.second.setBatchSize(bsize);
-    }
-    for (auto& element : output_) {
-      element.second.setBatchSize(bsize);
+  if (batchMode_==TritonBatchMode::Rectangular) {
+    if (bsize > maxOuterDim_) {
+      edm::LogWarning(fullDebugName_) << "Requested batch size " << bsize << " exceeds server-specified max batch size "
+                                      << maxOuterDim_ << ". Batch size will remain as " << outerDim_;
+      return false;
+    } else {
+      outerDim_ = bsize;
+      return true;
     }
+  } else {
+    addEntry(bsize);
+    nEntries_ = bsize;
+    outerDim_ = 1;
     return true;
   }
 }
@@ -197,10 +216,12 @@ void TritonClient::addEntry(unsigned entry) {
     element.second.addEntryImpl(entry);
   }
   if (entry>0)
-    setBatchSize(1);
+    batchMode_ = TritonBatchMode::Ragged;
 }
 
 void TritonClient::reset() {
+  if (!manualBatchMode_)
+    batchMode_ = TritonBatchMode::Rectangular;
   for (auto& element : input_) {
     element.second.reset();
   }
@@ -237,7 +258,7 @@ void TritonClient::getResults(std::vector<tc::InferResult*>& results) {
       if (output.variableDims()) {
         std::vector<int64_t> tmp_shape;
         TRITON_THROW_IF_ERROR(result->Shape(oname, &tmp_shape), "getResults(): unable to get output shape for " + oname);
-        if (!noBatch_)
+        if (!noOuterDim_)
           tmp_shape.erase(tmp_shape.begin());
         output.setShape(tmp_shape,i);
       }
@@ -252,7 +273,7 @@ void TritonClient::getResults(std::vector<tc::InferResult*>& results) {
 //default case for sync and pseudo async
 void TritonClient::evaluate() {
   //in case there is nothing to process
-  if (batchSize_ == 0) {
+  if (batchSize() == 0) {
     finish(true);
     return;
   }
diff --git a/HeterogeneousCore/SonicTriton/src/TritonData.cc b/HeterogeneousCore/SonicTriton/src/TritonData.cc
index 23512c91780ce..e037b8887a87c 100644
--- a/HeterogeneousCore/SonicTriton/src/TritonData.cc
+++ b/HeterogeneousCore/SonicTriton/src/TritonData.cc
@@ -25,8 +25,6 @@ TritonData<IO>::TritonData(const std::string& name,
       //ensure unique name for shared memory region
       shmName_(useShm_ ? pid + "_" + xput() + std::to_string(uid()) : ""),
       dims_(model_info.shape().begin(), model_info.shape().end()),
-      noBatch_(client_->noBatch()),
-      batchSize_(0),
       dname_(model_info.datatype()),
       dtype_(tco::ProtocolStringToDataType(dname_)),
       byteSize_(tco::GetDataTypeByteSize(dtype_)),
@@ -56,7 +54,7 @@ void TritonData<IO>::addEntryImpl(unsigned entry) {
   if (entry >= entries_.size()) {
     entries_.reserve(entry+1);
     for (unsigned i = entries_.size(); i < entry+1; ++i) {
-      entries_.emplace_back(dims_, noBatch_, name_, dname_);
+      entries_.emplace_back(dims_, client_->noOuterDim(), name_, dname_);
     }
   }
 }
@@ -115,27 +113,6 @@ void TritonData<IO>::setShape(unsigned loc, int64_t val, unsigned entry) {
   }
 }
 
-template <typename IO>
-void TritonData<IO>::setBatchSize(unsigned bsize) {
-  batchSize_ = bsize;
-  if (!noBatch_) {
-    //zero disables inference in TritonClient: remove all entries
-    if (batchSize_==0)
-      entries_.clear();
-    //should only be set to 1 in cases when entries > 1
-    else if (batchSize_==1 or entries_.size()==1) {
-      //in case batch size was previously zero for some reason
-      if (entries_.empty())
-        addEntryImpl(0);
-      for (auto& entry : entries_) {
-        entry.fullShape_[0] = batchSize_;
-      }
-    }
-    else
-      throw cms::Exception("TritonDataError") << "attempt to set batch size to " << bsize << " when ragged batching is in use";
-  }
-}
-
 template <typename IO>
 void TritonData<IO>::TritonDataEntry::computeSizes(int64_t shapeSize, int64_t byteSize, int64_t batchSize) {
   sizeShape_ = shapeSize;
@@ -146,8 +123,9 @@ void TritonData<IO>::TritonDataEntry::computeSizes(int64_t shapeSize, int64_t by
 template <typename IO>
 void TritonData<IO>::computeSizes() {
   totalByteSize_ = 0;
+  unsigned outerDim = client_->outerDim();
   for (unsigned i = 0; i < entries_.size(); ++i) {
-    entries_[i].computeSizes(sizeShape(i), byteSize_, batchSize_);
+    entries_[i].computeSizes(sizeShape(i), byteSize_, outerDim);
     entries_[i].offset_ = totalByteSize_;
     totalByteSize_ += entries_[i].totalByteSize_;
   }
@@ -186,8 +164,8 @@ void TritonData<IO>::updateMem(size_t size) {
 template <>
 template <typename DT>
 TritonInputContainer<DT> TritonInputData::allocate(bool reserve) {
-  //automatically creates a vector for each entry (if batch size or entry size known)
-  auto ptr = std::make_shared<TritonInput<DT>>(getEntrySize());
+  //automatically creates a vector for each item (if batch size known)
+  auto ptr = std::make_shared<TritonInput<DT>>(client_->batchSize());
   if (reserve) {
     computeSizes();
     for (auto& entry : entries_){
@@ -210,13 +188,11 @@ void TritonInputData::toServer(TritonInputContainer<DT> ptr) {
   const auto& data_in = *ptr;
 
   //check batch size
-  if (entries_.size()==1 and data_in.size() != batchSize_) {
+  unsigned batchSize = client_->batchSize();
+  unsigned outerDim = client_->outerDim();
+  if (data_in.size() != batchSize) {
     throw cms::Exception("TritonDataError") << name_ << " toServer(): input vector has size " << data_in.size()
-                                            << " but specified batch size is " << batchSize_;
-  }
-  else if (entries_.size()>1 and data_in.size() != entries_.size()) {
-    throw cms::Exception("TritonDataError") << name_ << " toServer(): input vector has size " << data_in.size()
-                                            << " but specified entries size is " << entries_.size();
+                                            << " but specified batch size is " << batchSize;
   }
 
   //check type
@@ -231,9 +207,11 @@ void TritonInputData::toServer(TritonInputContainer<DT> ptr) {
     auto& entry = entries_[i];
 
     //shape must be specified for variable dims or if batch size changes
+    if (!client_->noOuterDim())
+      entry.fullShape_[0] = outerDim;
     entry.data_->SetShape(entry.fullShape_);
 
-    for (unsigned i0 = 0; i0 < batchSize_; ++i0) {
+    for (unsigned i0 = 0; i0 < outerDim; ++i0) {
       memResource_->copyInput(data_in[counter].data(), offset, i);
       offset += entry.byteSizePerBatch_;
       ++counter;
@@ -266,8 +244,9 @@ TritonOutput<DT> TritonOutputData::fromServer() const {
 
   memResource_->copyOutput();
 
+  unsigned outerDim = client_->outerDim();
   TritonOutput<DT> dataOut;
-  dataOut.reserve(getEntrySize());
+  dataOut.reserve(client_->batchSize());
   for (unsigned i = 0; i < entries_.size(); ++i) {
     const auto& entry = entries_[i];
     const DT* r1 = reinterpret_cast<const DT*>(entry.output_);
@@ -276,7 +255,7 @@ TritonOutput<DT> TritonOutputData::fromServer() const {
       throw cms::Exception("TritonDataError") << name_ << " fromServer(): missing result";
     }
 
-    for (unsigned i0 = 0; i0 < batchSize_; ++i0) {
+    for (unsigned i0 = 0; i0 < outerDim; ++i0) {
       auto offset = i0 * entry.sizeShape_;
       dataOut.emplace_back(r1 + offset, r1 + offset + entry.sizeShape_);
     }
@@ -296,6 +275,12 @@ void TritonData<IO>::reset() {
   addEntryImpl(0);
 }
 
+template <typename IO>
+unsigned TritonData<IO>::fullLoc(unsigned loc) const {
+  return loc + (client_->noOuterDim() ? 0 : 1);
+}
+
+
 //explicit template instantiation declarations
 template class TritonData<tc::InferInput>;
 template class TritonData<tc::InferRequestedOutput>;
diff --git a/HeterogeneousCore/SonicTriton/test/TritonImageProducer.cc b/HeterogeneousCore/SonicTriton/test/TritonImageProducer.cc
index 084686d56a653..07d5211dfd1c3 100644
--- a/HeterogeneousCore/SonicTriton/test/TritonImageProducer.cc
+++ b/HeterogeneousCore/SonicTriton/test/TritonImageProducer.cc
@@ -76,7 +76,7 @@ class TritonImageProducer : public TritonEDProducer<> {
   void findTopN(const TritonOutputData& scores, unsigned n = 5) const {
     const auto& tmp = scores.fromServer<float>();
     auto dim = scores.sizeDims();
-    for (unsigned i0 = 0; i0 < scores.batchSize(); i0++) {
+    for (unsigned i0 = 0; i0 < client_->batchSize(); i0++) {
       //match score to type by index, then put in largest-first map
       std::map<float, std::string, std::greater<float>> score_map;
       for (unsigned i = 0; i < std::min((unsigned)dim, (unsigned)imageList_.size()); ++i) {

From c96bb07f9ad4b2651358117711e08b52d3f562aa Mon Sep 17 00:00:00 2001
From: Kevin Pedro <kpedro88@gmail.com>
Date: Thu, 14 Jul 2022 14:58:33 -0500
Subject: [PATCH 20/31] fix nEntries handling

---
 .../SonicTriton/interface/TritonClient.h      |  1 +
 .../SonicTriton/src/TritonClient.cc           | 21 ++++++++++++-------
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/HeterogeneousCore/SonicTriton/interface/TritonClient.h b/HeterogeneousCore/SonicTriton/interface/TritonClient.h
index 8e6bb3518633a..99ecf48bd3376 100644
--- a/HeterogeneousCore/SonicTriton/interface/TritonClient.h
+++ b/HeterogeneousCore/SonicTriton/interface/TritonClient.h
@@ -56,6 +56,7 @@ class TritonClient : public SonicClient<TritonInputMap, TritonOutputMap> {
   //helpers
   bool noOuterDim() const { return noOuterDim_; }
   unsigned outerDim() const { return outerDim_; }
+  unsigned nEntries() const;
   void getResults(std::vector<triton::client::InferResult*>& results);
   void evaluate() override;
   template <typename F>
diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc
index 285b39b14708b..4195c0d1cafee 100644
--- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc
+++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc
@@ -186,8 +186,12 @@ void TritonClient::resetBatchMode() {
   manualBatchMode_ = false;
 }
 
+unsigned TritonClient::nEntries() const {
+  return !input_.empty() ? input_.begin()->second.entries_.size() : 0;
+}
+
 unsigned TritonClient::batchSize() const {
-  return batchMode_==TritonBatchMode::Rectangular ? outerDim_ : nEntries_;
+  return batchMode_==TritonBatchMode::Rectangular ? outerDim_ : nEntries();
 }
 
 bool TritonClient::setBatchSize(unsigned bsize) {
@@ -202,7 +206,6 @@ bool TritonClient::setBatchSize(unsigned bsize) {
     }
   } else {
     addEntry(bsize);
-    nEntries_ = bsize;
     outerDim_ = 1;
     return true;
   }
@@ -215,8 +218,10 @@ void TritonClient::addEntry(unsigned entry) {
   for (auto& element : output_) {
     element.second.addEntryImpl(entry);
   }
-  if (entry>0)
+  if (entry>0) {
     batchMode_ = TritonBatchMode::Ragged;
+    outerDim_ = 1;
+  }
 }
 
 void TritonClient::reset() {
@@ -280,24 +285,24 @@ void TritonClient::evaluate() {
 
   //set up input pointers for triton (generalized for multi-request ragged batching case)
   //one vector<InferInput*> per request
-  unsigned nEntries = input_.begin()->second.entries_.size();
-  std::vector<std::vector<triton::client::InferInput*>> inputsTriton(nEntries);
+  unsigned nEntriesVal = nEntries();
+  std::vector<std::vector<triton::client::InferInput*>> inputsTriton(nEntriesVal);
   for (auto& inputTriton : inputsTriton) {
     inputTriton.reserve(input_.size());
   }
   for (auto& [iname, input] : input_) {
-    for (unsigned i = 0; i < nEntries; ++i){
+    for (unsigned i = 0; i < nEntriesVal; ++i){
       inputsTriton[i].push_back(input.data(i));
     }
   }
 
   //set up output pointers similarly
-  std::vector<std::vector<const triton::client::InferRequestedOutput*>> outputsTriton(nEntries);
+  std::vector<std::vector<const triton::client::InferRequestedOutput*>> outputsTriton(nEntriesVal);
   for (auto& outputTriton : outputsTriton) {
     outputTriton.reserve(output_.size());
   }
   for (auto& [oname, output] : output_) {
-    for (unsigned i = 0; i < nEntries; ++i){
+    for (unsigned i = 0; i < nEntriesVal; ++i){
       outputsTriton[i].push_back(output.data(i));
     }
   }

From 8b95069ab5950685567b74823945eafde441cf33 Mon Sep 17 00:00:00 2001
From: Kevin Pedro <kpedro88@gmail.com>
Date: Thu, 14 Jul 2022 16:15:39 -0500
Subject: [PATCH 21/31] handle ragged -> rectangular by removing entries

---
 .../SonicTriton/interface/TritonClient.h       |  1 +
 .../SonicTriton/interface/TritonData.h         |  8 ++++++++
 .../SonicTriton/src/TritonClient.cc            | 18 ++++++++++++++++--
 3 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/HeterogeneousCore/SonicTriton/interface/TritonClient.h b/HeterogeneousCore/SonicTriton/interface/TritonClient.h
index 99ecf48bd3376..cf2e438db2856 100644
--- a/HeterogeneousCore/SonicTriton/interface/TritonClient.h
+++ b/HeterogeneousCore/SonicTriton/interface/TritonClient.h
@@ -92,6 +92,7 @@ class TritonClient : public SonicClient<TritonInputMap, TritonOutputMap> {
   //private accessors only used by data
   auto client() { return client_.get(); }
   void addEntry(unsigned entry);
+  void resizeEntries(unsigned entry);
 };
 
 #endif
diff --git a/HeterogeneousCore/SonicTriton/interface/TritonData.h b/HeterogeneousCore/SonicTriton/interface/TritonData.h
index d10cc2752460b..ee573655ddf19 100644
--- a/HeterogeneousCore/SonicTriton/interface/TritonData.h
+++ b/HeterogeneousCore/SonicTriton/interface/TritonData.h
@@ -105,6 +105,14 @@ class TritonData {
         createObject(&iotmp, name, dname);
         data_.reset(iotmp);
       }
+      //default needed to be able to use std::vector resize()
+      TritonDataEntry()
+          : shape_(fullShape_.begin(), fullShape_.end()),
+            sizeShape_(0),
+            byteSizePerBatch_(0),
+            totalByteSize_(0),
+            offset_(0),
+            output_(nullptr) { }
 
     private:
       friend class TritonData<IO>;
diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc
index 4195c0d1cafee..0d2603f32fde8 100644
--- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc
+++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc
@@ -177,7 +177,7 @@ void TritonClient::setBatchMode(TritonBatchMode batchMode) {
   batchMode_ = batchMode;
   manualBatchMode_ = true;
   //this allows calling setBatchSize() and setBatchMode() in either order consistently to change back and forth
-  //but changing from ragged to rectangular once multiple entries may cause issues; todo: check this case
+  //includes handling of change from ragged to rectangular if multiple entries already created
   setBatchSize(oldBatchSize);
 }
 
@@ -202,15 +202,29 @@ bool TritonClient::setBatchSize(unsigned bsize) {
       return false;
     } else {
       outerDim_ = bsize;
+      resizeEntries(1);
       return true;
     }
   } else {
-    addEntry(bsize);
+    resizeEntries(bsize);
     outerDim_ = 1;
     return true;
   }
 }
 
+void TritonClient::resizeEntries(unsigned entry) {
+  if (entry > nEntries())
+    addEntry(entry);
+  else if (entry < nEntries()) {
+    for (auto& element : input_) {
+      element.second.entries_.resize(entry);
+    }
+    for (auto& element : output_) {
+      element.second.entries_.resize(entry);
+    }
+  }
+}
+
 void TritonClient::addEntry(unsigned entry) {
   for (auto& element : input_) {
     element.second.addEntryImpl(entry);

From 733d151b656a03658f2b044cea9dd446fae67213 Mon Sep 17 00:00:00 2001
From: Kevin Pedro <kpedro88@gmail.com>
Date: Thu, 14 Jul 2022 16:33:03 -0500
Subject: [PATCH 22/31] update batching terminology in docs

---
 HeterogeneousCore/SonicTriton/README.md       |  24 +++++++++++++-----
 .../SonicTriton/batching_diagrams.png         | Bin 10530 -> 10467 bytes
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/HeterogeneousCore/SonicTriton/README.md b/HeterogeneousCore/SonicTriton/README.md
index c0418b1517dc7..759b50676040e 100644
--- a/HeterogeneousCore/SonicTriton/README.md
+++ b/HeterogeneousCore/SonicTriton/README.md
@@ -38,13 +38,19 @@ The model information from the server can be printed by enabling `verbose` outpu
 
 SonicTriton supports two types of batching, rectangular and ragged, depicted below:
 ![batching diagrams](./batching_diagrams.png)  
-In the rectangular case, the inputs for each object in an event have the same shape, so they can be batched together.
-In the ragged case, the inputs for each object in an event do not have the same shape, so they cannot be batched;
-instead, they are grouped together as separate entries, each with its own shape specified explicitly.
-
-The batch size should be set using the client accessor, in order to ensure a consistent value across all inputs:
+In the rectangular case, the inputs for each object in an event have the same shape, so they can be combined into a single entry.
+(In this case, the batch size is specified as the "outer dimension" of the shape.)
+In the ragged case, the inputs for each object in an event do not have the same shape, so they cannot be combined;
+instead, they are represented internally as separate entries, each with its own shape specified explicitly.
+
+The batch size is set and accessed using the client, in order to ensure a consistent value across all inputs.
+The batch mode can also be changed manually, in order to allow optimizing the allocation of entries.
+(If two entries with different shapes are specified, the batch mode will always automatically switch to ragged.)
 * `setBatchSize()`: set a new batch size
   * some models may not support batching
+* `batchSize()`: return current batch size
+* `setBatchMode()`: set the batch mode (`Rectangular` or `Ragged`)
+* `batchMode()`: get the current batch mode
 
 Useful `TritonData` accessors include:
 * `variableDims()`: return true if any variable dimensions
@@ -53,7 +59,6 @@ Useful `TritonData` accessors include:
 * `sizeShape(unsigned entry=0)`: return product of shape dimensions (returns `sizeDims()` if no variable dimensions) for specified entry
 * `byteSize()`: return number of bytes for data type
 * `dname()`: return name of data type
-* `batchSize()`: return current batch size
 
 To update the `TritonData` shape in the variable-dimension case:
 * `setShape(const std::vector<int64_t>& newShape, unsigned entry=0)`: update all (variable) dimensions with values provided in `newShape` for specified entry
@@ -183,3 +188,10 @@ The fallback server has a separate set of options, mostly related to the invocat
 ## Examples
 
 Several example producers can be found in the [test](./test) directory.
+
+## Legend
+
+The SonicTriton documentation uses different terms than Triton itself for certain concepts.
+The SonicTriton:Triton correspondence for those terms is given here:
+* Entry : request
+* Rectangular batching : Triton-supported batching
diff --git a/HeterogeneousCore/SonicTriton/batching_diagrams.png b/HeterogeneousCore/SonicTriton/batching_diagrams.png
index ca438ae2a4ca2ab94c5bd14aef4b940804c474be..6fa2a575ec36d255a74cc62e98f390f477702e54 100644
GIT binary patch
literal 10467
zcmbt)1yCH%x9*Y<B!op1AZ&1g!{RIu+zIZ^!Y=M^0fM``26uN4P8JI;f#4EIfF&Vl
z$XkB*-Fo+5_uhA_-m9&t*_l44yXQ>z>F@h`B2|@Tai6|?3IG6b<>jQ*0RXf;)FF+9
zj=JMT{{|CvqB56LR{{WhUIPGuAppP)>Q>+(0N@S=0FF!n0Kp6ZfW#@MQ%x9E@W@P2
zRtoSpJbt1Nb%pIDr{{_~!~Qw&(uUVje=yzTm83EEF@ZRw7!Y!nL;wIC(??U+O&#h<
z<>caMVQmMea`SeAQ+e4t0RXz83wj1gMc`+5mrTF0WE$ZDz%68()@{e=hJ)k)H(he(
zYES_NR%?kv1-&rQ<>e*iC64IJBmw(o75KS!#_FWm(`_YI{YHeKy>#cjN7n_7m$BYv
zat2SB_B*R&yr_;6E%uZOWnEI<g@WjQr%-V%R>B!$A$#=-cV^u2>Jk=~p`T5>i}ZUL
zt;*=SK^Ia57vQsuJm<F-XWwO{wi+b-SS87OyfCfaA>w%y%Ea|QAQ39Y$!T8NRv&XK
z%jJX1f@UP7eGC<Io=DaV(wcrOLbCe1M|kv7oQSIj4mZaUO4LKd%Ps`4#<WWV2W@fH
z-yuY2=|3d0?<k*{SPj$eB*w@VOD|fX^JgW{j?kx4Vd8=E>#GUSfa>8x5n2$BRLReR
zVju>pvqu|PyaY6}7hy+UtW6d*j}vh#V#>1{BYlg9^gpw`UrDA)+RR+&V!2!;ky?`Q
z=UD5Y4FE4s^-w^h7fsbXUG@o>8Y~bb2V0;9uzKTLAQ;yH=dxDz^RHVjGRDsJgnjxV
zj*_Ytd<vH_&Jd7(DJ3&lQR_q02fP%fXeu1sr4a5YI*c4@{1@m`PsW2nQ;yMI8v&74
z?$d`jbSbc1L)edKIauT_aA>k-Dz-Q3(`PHY3=?#!8yUlWsI%?3Ni42bSnn#gZLG(k
zH>mL0Q;#2qD~@6x#@>O{A5-w?a<lIW)JD>Fn|pee|AY+1mi5mN^cqIC;N(<)qrM58
zw+%m!xXx88ibTFas|f;U;TDD{%y7vP`NwJof;hJ|MR?Hp`X1rCv4*xbbY`3Z^DbZA
z`jo}8Qe%23Phvx{hjGm~#VIEH0|4__Z{Idfd$LZA@jqqM#NyslMYEmAXNxy`rQs_3
zJ?_mZCWJrFXN-!Gh8J-pdw{fh{+xDzFR$xr8ST)~ab@v+w(w^x|7=T_wQl*Rwy%qX
z`oFSMfAvis&zBQz7~V_Q`5-LEHp`X0q+J(`V45wK>}0rsOyKEERhrYx?b4@S2z%n~
zS7E(zX<;n1KIx@8HF)w_ewbj}Iy8F#yT>O@=HyWu9Nif6W6#dJ(o%+@7xRG6@iOmZ
zo+qeerLWbMzR9BeE%TlxZ3y_f!o__2Xk_tve%>dTd)(sdcKbC2LzQ`}X!dJ7A)Yu~
z>CUe?g^R}U-SwRhQuo;f^y%DGH(>=uLxypyx6ECD78S+)lzIQs`)P6esXb6cFZ9sc
zj9u=BQL0hUswKUvx{avXU(15lF7qzt7R$&Ex|T~#4+5L~lnLvKcYXKMl0P<8W{%aq
zs7y##^b|FYtKRy2%E?$QyO<7OI*aCQyRl%?=l2Uwn>{PZxWx=8tI!st%Namr34C*?
zvIi=k;G(jT=fgGX!10#B(g6Ud>*S@xHN94U<o47a>$`mD6FmHd(fz{;<P~;#gZ6SQ
zvPij|Qk*=)RQ<%M@ICQc0kjJ&bq7<{H-V3to#!JDaV#JIdS3QwIgot4@I9ZY<(^oI
zl)5Ng3w=`I5v|iy+KW#u+KD9>I#mk9wDIljmc7b6-#TG<<yCk?Szo_7P$*vu8M$lt
zIG>&9wUvqeU<8Gb5VnR07^R;~xoZ*sb?}Uy^6&X*-}me4vgQjnXruF<$@?W}L~7gb
zj|`z4qxjN-bu57`@+j}ltg(Zo6mj6K1EKJn#^l`O>>ccpjUjV4>c?zg9{Ul?v{`Ru
z#K**<-K~DPs@Va|I!eL|?62IE{H)6`fu?h}=<2`Z8QYa6mYu=8Bt@j{<e@OpK0+9{
zYGk*g{WkU3^_-leb^=Dr%$%6x)<d5VY9|e+ZF*Nl*BjCv$Wjmz>Pov36@}23`1Hs0
z`*gDEqMmZUyhvhsIqT50O!j?&U#Q{l%dUADWA@=*(6$_-!S!t8Xo+F%*uGR;hvq>J
z%!7Ahqf0+*srZV=?ak?88*O&K2=}FYNaN%?paMfR(xswF{Yb#`^8477n{N8lI>l*7
zA%o=v`#uqVL5N;fz(k2tANaD(q>Q-BHT4g~lAfeMieclVcvtJdMVT*s*NgGbVK*#_
zds&OCn|;_4BFG`O5vN!gGsyS%5m-WP5&i0FrK<ibhYs9RZ*zyzxGga#dio_iHo$Af
zBU*r3L_~zzAVp~ljZIa;aL=i3`&e{v)UvxP>5JyI4b$(X0z0hfzTE>&vUvOZFSZ>y
zsgqA26bj97Uu7;!&FjKvRmE4VvIkENzc?$WIya66XfEWM$&RM(S6^Vea;Rr?`44V|
zviF@2yW)AexKt=O$+^2;x0m>NM7d@&o3ITRfSR_~pL)iP5&v;w{oFlov#Xw<6bfp&
zDt?*-(b~h|s!rQ(0_Bo?a2%B;B7i#=wm;dyw1BZqAnPPoN#D4YN7q%!N@*S<@_NHU
zTThm+X{A>7adjGJ`L*{d)nNy<bqXilkz;(G@kfSHjUDW=(t3soZ|%vn6_I*zCT?%E
z186`wg9>|7to2qCWRGhFvbLM@JJz`m1_mNVy7PB6_LHuQFj;X%BCEb=_u8gzyFlWK
zOMH9;q#AcM=2kmE6=~*w_DaX$ec8la!<yH^$+L>v+#?K0ZQm2S03|D1>$cw-2A({%
zskLr-84({G?rZVmW)|mI^3${Jah3`l1vS}Jb2g@`7;Lgca7FHTq77uR?sSivl4_V8
ztR<pixQbPul!Bj5;A(oQc#><vHkl^&(ylLov*@<E+lHCn#J5kKSb^txdSJs_Ptl+Y
zS8MjsuGDKB_3uE9y{tn`iMZD^Ou(w9zOT3@Z=55}mfe+hs)Jv)7<GlGnMnh@s&CYm
z3d|}E^Pk^~rxF>O<>rX0nJ&%LN(N%Sb@OSz!70Udsp4HVQdHYs_O?%nejU5(CF_u8
zR+iDC!~J`X2fK??8;HcyR=}nwQb9^dPxrQq)C>2n#p<f@MOGhn8vDujsYfw;z!e<F
zN+LmyWevo)krn6m)quKR3bACC9A61ZpU+#M7nAUBlL+lym=r(XBsqsPiPx(lY{{`4
z*@m@-r~yfXi(#)jcv5%l=abC-a%wrqEHWP_q3eP?w-L`M8|51yoAt}54lm*Z&l?eW
z#H6n-#TCWmjP{EY#u*qdh&^mvo8R_!%l~94VH}ga8z5-x54F4Rv}RY@^%Bxj_an5c
zHq0kJ8=BNk7L9Py9apZ9ZU5-EWNbx~KXJA^25O{FPkA@Wg(i$G&hRmZfZcYb=Yt3Z
zCk4J-iO?&hz2qmx9xd^sIe~+PiLV<6Ea?b1H}PRzwJO&VOi#Tm%Nj~Mt1}hF`JTIU
z6QNpr+1aZ0?Ta&9Xqc4V@zsw8rX1CJAkEmsUk_P_Drt+J8qBH^XC!;3w^^FMepX>c
z0ChfAnU}vcQUA*_(}NVDOCP2q0X#w5A1(02zDln2B)eJ!G72`O>uZz<O(o9Xxmxva
zNtc!JYNsn^9gn*kf`&vZN>qm`7hGMn)7_FeIioP=r6)l;RlczrMmQinMK^uB{>j&$
zY!&G3UPF88Ico=sCFN`lQHF@dj@P4IX;l(Ye?~R0BMC^-c}`g*Wmp4GO%QXtBxF&<
z_N&L4WCRWD?LZLHhTXhxO03wb@^0e$fwAX{t+<ew&98G3!Io6&&A{Ip$?i{nzC9Ic
zefK1cT3Ta|LDq02#%UQ6lPkKUsboSLM68sGYmUFHy8^~|U(pDC_DBOhNQz#H2;;<h
z`%sd>Og=&d*5Zp1H?6cI4gRS4|6>TOWUakQ!ck(1%NfD*INI=HjPO0M>K%FPy;pXD
zD<&Mdj8D*z$f$8Jm*J<@lhL*P#&Z4&2i?+~$X^`T=)b=7V`~j*9ZAcrrnWMsF=TL$
zu}!dbR$DkA1U$y66krSO=#{$4DJVUi(u0g$8H<m_cDdX%zlgcojHTyBEnnR>y=c^*
zL~L?vy%^qL%)dBT{<ipuQ@fyj_@vdVZe})u10p@4Shl!eK0_{<K+b=bT1F2P$?u%k
zWnj%Oa&}P*Qiqui_se3RCk9^<ST>RNjGkUPhqZZ)GIklsX7?<KdqubkPaSoLFyt2b
zc?1}x4gQoqd6&22uYwbgb?LNJew?*XE<{UQc9t5xy;Ef8#Jxaqpb0UHO@1e;z6>5z
z%V?OK9b~UOKT9_cPM2pGled)mt)c4&VIN0By*wZNr@$|kUPXOd;+%Id_Sx^6M4ut9
zK=UWav+K{qZc|B#F;1CQPv1tu$96uJjC|l^S<7)t^ZrKKv!$$}C{rz;M_s$a5h0$_
z&2=#EOo`4i75m(fDq(#CxcP&2#%92GX~CNj89I%0jVU&c5fWx|&fMa(ym{7;)<+=u
z%@ZM{>1XoZPx{<;oO4|LC_$dX&p-7>l*Ghef4kxJo)W|Khp{syB5m~16)R^_?&+s}
z`KiBM`kb-B3z5oh5^oh6MHNAj80-D)JFo6JngMlIazD{|0`zgxZVE15nEHO?Hw{uO
z4cR8z0?A;QJBtZF+P}qdla|PMCprj~XKq?w@#59V3-4DGynR1*B0HIueIqwmMlZt7
zf^6Es&MJ5Oz<F9+EiD}>=Pi0iIBW->%qp33UGm#(S_!CQB9duL#1-fJlZrnU%g3)W
zd_<Lt4bT{AOvAl}gy|#>x_<s;ef=?rQ7CWo;2NLVNEk%L<iR*JNWDI39esi;FLrm=
zvHg8ysmtN|WR80z){%-YUyhz5x3Sq+5;)y&D_j%Iyd-$>C$q{&#e+k3sRVq4e5q2k
z|5_FaQ@T;7g${Z#+q5FKH&cZ+>S6@E7A3u381Go~9-5Me&6rF?NBqm=^Pf>_Jtx0U
z%GdvNq;nUU&sLW0dQ)?+#R5U23ILORRgj%CKF9Q5ksp!KxGxV_G`4u08)`2d_NoPO
z7qT%`*u%(T5C6F*=4w-R>NW)PqUF>6mpLr1V-6n#*HVc9SxjRTt9kvRGnvU{rt!nE
z!c#bz@QET!-bgR$rVbCk>b35Kx{XC>ZircU-C%IR*^C{^TBhXz*ttBEEaBZ!pr?a}
z&-j0ezBtW!YNMd;6rZ`tRl1v6tyaZKE%KH73Y{;8;Wd;lu2gX2dY%ZA<CIC0NCY<7
zxGdzb$jp%mc61AclH$%lfZ<n(Toyg9pG{1)JRYx=Yb^@=Jc5&sJf7i4;)(dGm-@^G
zT1;WME<bC9{z@=E(6jv=JSjEwpYzsB72n*c9I`MZDV<qVfZp->x5fW1i3Qa@<L|Sb
z$-4C&|AynYq%QJf={p%rfSQ`!(NZ|y!M{HCID_(g9k-R}a^FEZ+YeugpF;N|E}f9F
zjkcs|g5<3lD+Wx0Rdr<r8012<Y-_Q)CdY1d{NMWYix68nel*if<pouqoizPJ67A*M
z|5HA0IZIOfF#~cMvYkajFnGxHT)EQd54zu1-ZauSXca3hHk{RADU~OtpF0(KK{P}V
zaNvyB2E{b62nifrO<4g`D0|sgPo(hT>{Ho()(Qr;dZFKaz#a?Y%r{z^17l2PT-qAh
z<&H5Qhty^8=l;$*o+_{mYC(h{O_?7HcAyiYRlJVX)HDBsjvr1iURz~N`nb%jnw6Rw
zTVzwwZO|+Qy;^VKV$zm$BsPX+1cBx#BrARs$70-+p$5bOiLfOVL4~uLY(IH}lz;&Y
z0+afC5mFm(6LE_$>B7s^DMaLx$-Wb?C)>qa`Vht;z%gA^)Oo!mLJO9whj=vDVl;jt
zEs@MSTlnyRy7HI-2htHX80}^huyy48u?PiIeiqMaT{Y1JCh9F?DCtowB~4&_0sDkq
z<+L*+gjz&5(D+e-^xwv>rx_V8126dXK6XH@Om8H0s>K%l4hhACUu;|H3oG7j>nHut
zd9bwR11@HupVGZycCg`k@J%|&y^RW%^+~#yhjul%*W(uDzm0$?KSgJqUUr@cG!->H
zaacW;O*lMms~E0hE#o*dSG;aEY04W{JxJQzA^2Hw(8{sbDi*~M*U`*hYtH_oZ)zRR
zDCAmGl&L*dT%~=@aHrskyw%j#vt~)NemwcDjPLvtl&g9W3s1ly=xwBZwH;|sarbhH
zECZDqvTV%ap$k$=Zw^y$iBV%!M)VrSI=7a$OSlad6x^Sk8bO3X4a=IB-I97g(w7~g
zR_7T-=CaUtz0>5be421<CwX0))$f9X;1eY^oa~Nf6YfVc%`MFiq#QBmjR{!nUSIE0
zR${}lw|CBiOR2JLiT1*vey^~xGq^GQkki%g(?@2N^!GyK7mLdTjCAI~7|Nd@4iLSc
zoCYPCk{?1KW27YYe5i4!_z7~tDgNm5H8KF-ev#RdEN9&bHACMqK}IJ_+E;uUv+=Ix
zbyM979d~O)aR<H7DMK1DB6UywY>dl3HR}WxX#mV{{#uaYjcjd#NDk(yRWmJWT&R>S
zz!tzCxI54Y1`z@Z>-};^j}dCs$k%hmuE=G%7{#QPsW7N;5O+2y&bPdfKup4uJ{#DN
zl6<bw1}g3_r0$fx*+k;zEB$Zlf2&5qvP*};W`OChXb*F^#S`s?LZQD1nWW0gTWfh9
zUcL*6>2|1FEB9g0^z1J8zlg8D@pHw3k-m}Laq+wb8MPob7MRv@ufDq6JlbeUiFYP&
zKFWY)QrhhZ?ql8_uh9(#v6}t(p?mFko!DnfUuN>N?k;%G%~e?xl@jtckf(PVERa9T
zX3Bq0OONBC)mm5~7&qKjq|>qO=w4zH)jtJwmYdU|zkt}fJ48f}3#v=Ps_<-6af>V?
z#>1xH8L$Egtld>XC>(Z3M!0=wdD1kPMp@~>YJm9#d=sYQ)?UI?b+pI$P)?~le87!D
zXfhkRoQMEoyt>tua1@C96${Cm_}#R)O~hFfhiN`%95y2~WS$nAVPse;6tMh!>A{7P
zAf6zhzVde~HflI=;@hEy<o}574mv!=)|zg$lYA6!U!`3a7Lg&9aj&X!D>n>ZP)2ZM
zzK#m^WLqCo73%fFH<nu_+;%hSWP!)Am&#c78E=Xdvqo*3yuGL^`_dp(9sEZQr9HUk
zbO>Y9E~=A(+~Uy*6+XKsnqX?|-6CsUgs<}O%s!4kdFIWo9eUHv`2_T$rH+!5-C?mS
zG37Z+wXf0piu2&ln(2OkwLSL}P^|j^$jQL9G+^S|XoD@x>Uk*wPb%3idsS8Uk!n}8
zeKU#ynM14<=@R5x3kw_5RM)sGn3-{qO*5q?qDF?9pJX&OX_(xPhZ7sBoBU1}_xd!+
zk@Q1P46*4aa(fbL%I#nttF6(rQ0)Om(UBKA13NvJmqGy!ugHLDwFm2APB89e%YEHQ
z)z)wo;PEWBLHF89lZ3ul2;rg<1LL~yyY5T*spFU2%M2BIKx23NDAyZH#kv^0ORW^O
zZw^h8Hc$+5X%&~eJ%ldx;1)$Li7D17z19d+@xEk@4H<HEbroW###hocafT#2ogJ$>
zT5ywLL}URdMSTmFZcFOFFPMM_7)*m3OA45Gp4W9~lN<UiGTPqd8aB>yoFVaSgb+O;
z2CX6O2x|AgO-_{voL>VS&$&S7Gu~)nPB|LzobS8_lsai@Q8nLdQ(cLlv}I&5O{r<s
zm0F6vRAaBBO7Fb&O=J0@j8<E99tQSg{e05`S)(1N)))or1Szp=MW3@QUc@l7^mn*c
z!34e+{e1LntKEvGkkzHG^+7FAJup_6NU{I=fv(Fo<JOJyjYd%r8YXw6b`o#LqG-CS
z22rBD@_Oy!zEga;VVdL-N@(bL4tGxa3)hO9$~T}Ngi>0xU+aQ${zR3dRDxm(WFWj>
zz*$Q-u{W=m+n)7h)JPzUa+*FCbt#KHv*qI4Y;kV5B%PuNZ%wrlS^l59%RKLICrU4}
z*)tQgKRJEQVBUzA|K|RSY=`KR!HSQrA4qs@AgT$Eo;%`vy|$zaFxp{eTBrYYrK>-R
zS+TkuR3<kP5+lcW6(DSo&Bd;IRWQ?BoS>w6J=>wh1>z}1T+*`J3+T!#M2sIU(;za`
z*4_1#O8h%9y<)Zf9P9IDE85Xx=zk&%+xS)_)G@OaD`5h3Z&N8Fl<9X(X=s216kTz)
zIqoJ!Lh#h>dciLkTAsz(^|s@0wDD?UogbGX;^CnIswGB;$C1jHkxNP>#S%|&zG1T?
zy+;pLJmSvfe)DD5m>Yckz?U_~J{oSHTuoo}-h9zG-m*^WK_0lF*G^d&w@t`{T~1Tq
zPjN9f8MCvj+E;dWF{1*M8Ipun<yBbd**aL6wDkc+_tUq%IR6&b0Qx>h5to(Jmem33
zib>5!GH+PltKI8k9}Qu!(gM*EK&^a+wMv=Egy2WBLS5YD|2X>#`<BSRpK-oyMBlZ{
zIVk?PjsJ^<v8707&RzZyYg|_(JZ#z@+Xg7mVf!x1b)5vk=D&rtvMIqA@UDjsj-Wro
zV&qVHm&;5B%&sMA4R<nZ=>NJR*MaDthdN5OG@)yxNL$cxAxi3ICa%t{T%39Tn65Na
z(ZPa~+D=zfoyNbLw(%BqL`JCdt+nfBPq4xib-`vU`>dm_zR77C-?TC!3`N=`!X!lm
zIX>dYu~Z7nK=fb7<3vI&ddo4Z%BVDCr6ZjXo;`VSZGB(v?=2DSqL2KtD$jqHm$uk&
zua@O9y>o6Fjr`@HwP!;HysSTStf65?12QowDm6~CfAT-k13fZSH#3SFeBm`6&%iP#
zixLKIZ`02ywl))TO5c?8iTGD|&>9*Nn)P@dbhgHXMOC<{2wVg*(Z2Y2nF*1R?nEAO
zHCos8f@#c*oWAkXoN~1?uY0+ZxvvklnR|j-nL1a>)|Wjr`Pbug($x;0)bE@z$}pIO
zD$V}zvL|yl1_-&}4Z_nV`_e3;k4wHF5vbZYP|Tk12mF7|^Qd+5|Ap@Ni>D4hH*qOw
z8cLiS3*--Z=Ib%k<Jv5~^S=9h?goP@Ps%PLVp%SwqREr+O0be-TfJLcx&_w(7vL}d
zU(oyDyMLwkCB|M!XQ>iU<E?5C>`s9*AU|TRti^N)(~&jvQ~B|>)=yk4`QK{pZo@)M
zJMS6=MwQ!?;}XM$*gV>XePh#P{5~<684I@5QbaF44YyeftwZtKv+Mbi5kcrRC^k8o
zkXRTSlMyJA^<;KpEht%fXb>`V{pDR>JXQO!&<NoBD$CjW9dmeB=ZBakpW12p1IGMy
zlk~lh(NZt`PRc^t;#~^xjpMvm^34yXH<<kPP|^=0`P$o-QHWNY?hb4to|tdKiP*xf
z=@*(b`dAN$y!)-7xUwK^WB$PQxU(KGgIRQD4l%GK+)w@o7cXk7!6osmG%Ii_)0LDZ
zB78c$^n+_!3@l_^O2brvRgwaeofjikZ)?A}X+m-I5ZBX3pUX^Xp!xVn^CylOK11CE
z#C=V+B-YjZ2?-YLvhA_gt(<q(*;jXW3I)F{*E<kx1<4LjMbXg0Ya&xbl6A~+NcAT7
z9Qb@ghsq@JZq;R?F+~tIi<56saz8zso}cDbE$=<|iJOtjZ!Y9{zcZgcXk?;k;MB6D
zXU);@(OdUIqFnssr0>&&vW4QF_2XXQe{tA8vb>xXMw`rE3@Y)r>UpwEafX|*nZ#6Z
zB_$A^*slx1j}N@Qe`J)1Z?Q4$q??4W^W-z0Og(@)rA+x#HQuT<?Ujnmo>@o6k1O1L
z_X>J_L~b=9#AK_cMx*`}uTP58K(EQ7;^!JK<(1v<4O>J=q8&^!K`DS9-KA)9Qlmjo
z&hpX6l!j{fpnu$!*6taI=bgStJryF3;mTVC9sV2-Q!eV&Q>2rer|W8=uOuP9Xh5w%
z7xJRkp!c>})HNeQr8p^FSx>)?I{aXhH%~kG3gNBg@BaVA?)wg38(fRteH3j?S}-}I
zyo-K-j^uwfYIe~B?>O#bH&FNM^e4XMU(`|NE6JL}uOIh)r!y~N3YZtQ<!E}ZyxvSR
z7^e^S-$(J$qNuCeA|2xK3g>^&XW|Buzh;-)ffGef6P0u-@5)+PZP8oP=15#_G{~=a
zuOQsjer}T^Z2P}d>~|l4;eS;54sB2vaD)V(GW<DsgnqFCg(QVvZR2kxaJ_zF_6T{=
z)4&`ZE#x=nHh2g`La_1GGVCc6kI2WKsceXqRu9{XXal_?1&G6D$gtVX*?nKRa{q-P
zr@nnCr4xwNhhrm=dHoFaP+^X&Giuv365l>-T4$bx>#2i00k2{diKHjmkF5ni+j*g#
zzJ7NE3YYYgCqunm=OS&ror$V+C@J1vCw*3LWWd?CWUhsdNmKF>4VUkJr#)L)k^icl
zJYd-#X`0fjbcX)(Z?Re(xo`5qpge<u%m4Dc|E6kkFQe3#*K|WVzvcF3yVUn63tGV2
zeT_N2ALH5f=KLq~#KQD`9W*QZ=&=9zEBd!3IVQIBPWi8aZ)T?A?H980@6UD?>i<ct
zC>)JUiD1CNS7|LIe1O<0iK@T6av)Igr!kp7l^JVeQ6N7(;@p3-t)~3xSej=ny_wB1
zipyzK=4eI-r?bg*EcQQW1gXdGJ<F=~z?gc$wbjA)q&-pte$VVvxh=1{(hMZ_vmWZ^
zsCdbe59v!byMJ%uk%Tn29<H<;YrYX|wHaN^S#Mrk;C|!>dY9gy+{L?Q$8er9O?h2W
zlH+sl@{3Z<S07U^q*$;givny}O+#?#t5XTu()qK$#>4<GIUPh;w=d5zr+`N*TF27(
zl^W?Qa4B0!;V8!1SmDY8xy#*LHcNpDGkl`9NeNdm`-gl>L{d4ndV7HG#2IZ9B6#z&
zbWv=@XJks;T?^B|{2GliK1D@of;s_3dytUGbf2le-KDI0C!)OHO~>UV$fCcv!*R{e
z1Y(eY^uVBz+|xjQn^Y7=AdGM5-m{o~HrP(vY4)UXUosuA-GDEPN738(q7#O*_(2P`
z{ot8<h{-Z?QOEsk2Sa#LaQbEPW^S?U@K@(1;BR5uXrk(9d(wFNRJ$Qo;8(J`?0n*|
zPF%s|mp?Pty@s0a-NT@3A4BB!i^V>2xh7{@VxZqzJEV>rBM9p)N`BTJTaymT9CRmd
zqvhXtzwQB_F1W3ZL*EZtCfDeoJVzXEjtRFZFI0tJW<^L19r#l|8?+)qIz{ZZzoLW9
zilxFs#_7nT$}DeA!l2D4{_k=u$K;%>y@Eosa7*br{s^*>rsPwx^cie9&9&kc7>4ga
z@%;*lo=3|0x7$QqP(xV2_Otzb&x)i)$i{y~ad9|(D#=0(08yqI^8DgkzdFbu!2=hh
zUjQ~Wo1$6NVm>P>lcU0k+zhh4Rmt4L<wq&}Md@;KazS`kgt-u?%*Xu^r8v*sY4L%y
z^P7g2ocrz)zp#nR?SMb8Tdr!4E4A^@N+V;$H{i6!D@jFbX#DBFFNX|Q0}{1OM)O2#
zCfH6+HX^K3JKS_?&j0TAHSc!b>D-a`*)x>|IMC-){|vasyDv%2xeroWO0A(vG;On&
z7xr_(m?I3FivN2`+)Jyq*yq4j^}E%7`E@c!(chk9!e0?6WyoWT(j)E)*k1A%%^-#=
zCQto$xnAf4oh3xNw_KJpQACz5O+Sb~U!5I@%ohMtsJ)1r(M+Y7P&XBckK-KQ^vhAH
ziGEKhrXwX#?Ye6!sPSc(b8cb9+ZH1Be?kN?SIXDjsGK^rs%Gum;xOPQxF!<CV=vn{
zGHFTD7$0-I(z!24t@fXsz7g%LJ>`vCw?&pieeA-rr{$+(fsVt^_^0PA2PgB-sxHmH
zN@F1u43LtGHp)MKS#@rn8ay>4ZkZ46e7rp$iD4eCCeydvx&t@bR58+NDGa%$P%vbB
z{?HAEQ3_)=orQl?oUGr<sBM;_@Cd~~OuC^9n&wPNs=>oog!Ll~uQ4R_qBY(r4QlH>
zG9*mX>f%97gXhkf$PUM7xWrJQ;LEqD)VLTENT8Nng&K8IAMchc7E?U^Dh%Un2=6CH
z_b`>j`Ifd<NW+ly!cU9LmEc-2QqFO&Yg`8Cj&;lC54*_2oM|5<1Rhnmk{ggKjck7u
z0j3zOY>!kE7G~B-%bqqB@>(uVhYH92A4MV0%Apz|IKyxCNqG@$jIZQ9YR-}<)ktkt
z%R4~LpZWW#liO1<!_n?9;FvF;Gx(9DXo}kIclLZnIzU)$g+NR>Jiv@}q{hndjmXrT
zSgJca1$m^ieB<H@I418iO7&IA+?oGU1|^lr=HXf6p+`wJW?z_|{imI%4=fxs8>u~~
zq6w#%8FQX&d*;HpD18RvO+z`Q1QuOxo71YgYr7lUXi1u@Gl=p0Yz7O}KDW*nZ6(_(
zI!1g=|BIegPR9~J&o71ggoEBqTGtH*b%P6<yTDN=02i2xiw(@r#>u0}$tTFgBM9bW
z1M>=k!3!pZSO4N*?+CND@c!=|iXXjvXo2jX7R&`L99`_8ZUCs0lby9W)Xmz_fx`)A
zLGiw+A7w=KV1&99l(SZcyGl5^JHV_REF~Ph0NnfnUR?ZO7Ss=h#pjE%rTuqXRToE?
zyE)tiAP#dhgHx$W$xywuHgkcxcvEq5vV-p~rn6Dz#Q$!t4R>)xwF*Glv2n8V;)(_9
zyhW8WK9r+AZ4;FICje6aMG&|JIZ-ir2*QYojvmT~>tB2vT{ImX?EvEDE{<kUw}*Zx
zL2cnKs7_F!9-2zk(G%{X>S*oY_U&_A4a%GO-@TRKZctS0q5n;r-2c|*p$oUof9$8y
zKXL!R`^+zz&ZB$;|94ADN`g|3=I-`z2RC^sfV;ajOc25ix8Q?w!Pxk?1i09E_$~O^
zpl}E`8xNGv3=Zak!z|39f3RD(|Fsr*2Uj<!gE{=)YvD5E<pc9@@v?DogL&C_EO>d?
l1R$LJY!EoF0EEwi7tCV@E!K%eeKZ6B$V)3r)k>HK{V%aspJo66

literal 10530
zcmcI~1ymf%*6sv2!QI{61_&A;A-E@lI|B^v?(VJw1WN)02tH_VcemggJh%i2;SIU>
z-1pyK-db<H_13Je?y9a`yK2|&-TV8dqu#2>Vxp0u0RR9@c{wQ%0Pt)Zen_Jr!CO2T
z#gO4A8dEutG63Ms1ONnv0swdLrocS_zzqlh92f%tA{hVxv14}AJ5l%z1QSJBDZpby
z!bm&3hUzG%;{rdM{W-9qy({nwWLJ4*Y2+OwLR4ICD=qg_000TmTV2}~1oohDbapVe
zvW3vNdO1R9Jnb9-0PV0@9lhjyAjYqstk)<qFh(J)jk5-g`>*|LP$dM^P%~aNJ>U$^
zX2eW6lwh3U*TVzzuP$un!Vby=U7J(|ZLVbgcLd2CY;hRN_?TPU&ZJSmRXJEvS+?3Z
z&lMTIsU4@XTwQ&1TxOlJq)M<BiCG^L9hKL*ij938k!{MVzP$A0sv>g;Y}AQ5GLUi5
zlBJOozhOJxR3M}}hE}10lE5YBj)L|1Yx3C$q3Mn&?+;P7Cv@!JNJ@n^qB6CMyIH{X
zCZBLC`Hw?O#Enr%fG<{9#r1n6ZHM{f@TyQ6GC>M329m}cre`|V%8hglH2hy4CqBHE
z4B67WscftsK1<x9J^1*Zv=fp21+38$)A`i+f)6Sy6^OV`gCW0FZG#Xj0NAVr1To*p
z6l2oTtyC1Nd~3CyLdMZaa+c~YW6GtCy8NJr=<*^MhxYv!Ev=0@ZU%#dOP>v{AGaZ$
z{`+p@tm+}ss}me_AcmE)^zv>oQ`7oXey6~a#`3OaDhWROBKeOb0+I)MOh&d9RQMZE
zEGo$YNr!WKbg5c-sPq7KY6)QwGnwJL7oHW)=O8Pmaev~xQA2DXsc28iEN4D<qPBv$
z9{OxsB*Pm0R|HhWQpNmA6A)Qk>Xin<Cx;#gDfHCxwzRsML(+&+xh@|0QZx(pqPWav
z*Ye>x65Gj^9RIpR2w`=cK!R2ssSoU!>g}##B)e+wB$aq&j{t+-h_~V$(!SSgFSExH
zOP!!kCJt{cU*oQ@IX(B1ZdoT*CHlrh;~IE0jhj;`Gc2uYG83$Qb4coN)^a3NPj2^}
z${98`$Q7CP){wUTEknhm^=LEts_|QWllz$o-ksst@N&AMfoZiPR6bg)``m_*A8|#T
zXu)&^;8K6r+(^wy<K~iCPvSh0z}uz8dUqUiY;&acl^R!lzhw2PQ5nJD)>RYFO69RN
zQolN)U24N(B;0)HD2%_-tqsc2z@Wk9Jbo!QBB&+t-jD8L(}ykLu!}#VlFoJdpdxao
zZVA;(j4d!;(<gG55{$)s&aZptMclY7yY2?)dbvuxa2YyJ$q$H4ncOSJyL@r+{lQv1
zzt>qn0oqeA*D03;tSbXs=cp=+Q8^M>-{CTIj0@#b##}74G-glORuJrOZ4lI5D<pYT
z=HT5rgdcyUcs41BafIq3Vtrz<fLiRQ%NtQ}&36@mVXes3|N0VBay(3!w$7vG2f>Uu
zdu9meKI-ljTHPrAl1_;j{!Z%5>M$(j?R|k6mg3R<RRD$c%Yf+IgvELwbK$iLG$(xV
zo8nH#jCgs#eR{B8H4#;t&M<H4$%OiFy2%DL8)u2EqN>Szc!a<)1*^EjV+kfa5_vq;
z;Rm{x42l*2KzlAPC86%Qc$Cv#aj4*4)b2L1kLg*N@3c;udx&i5$Q&>7kxBxY1cPbY
zKRAd$C_C^Tw_q8_Ar^?p<}?$vhi-v*Nm#^O^-Qoy8u43)kRFztJi}E)LA{j1%pFR$
zUjdztM$E60XpNXsDW2%a7@y5UsgyQF_lTyu*Jb;0V>=t_?b8~s3W<?Vpl&{BGxbw4
zo=jlH$Z3y#dwnhGGY9wjz3<=9myZ<_0+yP@x~~gQ@{)@>!EYGGc2yaILh&vQa>ht%
zh${ETxXrV&OhDqJa@$pBVr!Al5(Y8H#LT-IPtaDQGzRS*d;wT7V|FPplU{bWO64Kt
z3H5YeM3OS3li)rP2v*~!NA883EZI-HATAzOK`DGqM`yW_#vZ)DJ_Fk=U8SK>;k!6u
zEpr$yVVYcg(<<J;oBB_%q=gC%Jfk=I9cgd7rrMHhrKKU99|`<{?)UO6hoVL6EO~P3
z>O`Bns#(9=d<*s6cVB*!$>r{?pxBgS(Yu|g?f<IJHNP_uAEv&W3W2>p88dL`R@C$*
zyQA{(spX{Z6z92?4~231VzIp}J3}j}Q8*}Yy!IXb>8h=Dy+Sn>`bpMjWNZf?=VPeX
z`+$K$*S4zDfOdISm|xn8I5Q{lM=9AgbItPu7hy=2i~DlAzwaXNHtr7*a>M$~8rxRl
z^~rh{P#;*Y<rzsniw%PBuV0$lC!b5tYdB8Nel{#=clKJu!TQ$LEEgD5A`=o+2Z$hQ
zO|Q{YakLilN`3FgPtP*VWSw(TxuGT=6hq;sm&*PML!P!VYlcJH1$JZePDY<Nm(-xb
zwA(cTWi@}RR0H)5az9FZ$*junR&rUr3k@S@<)CN>2S{o;+@81H^9hHkGtL(^h50Tk
zh-W%hq@4GHehl-l=asweMC$L|+XWGo&(WkO+gbl8-e%W;jZ~+O&sVKRkHfAE17(-S
znj%?Z=r^F*)EP+)z=+4AX{Nn4NrN^6F}hD6t?L^Bjyku9F>2bO`n|^F^tQUxLxrT1
z<~GZ((_C~*UBU-0tV=sEN<xTJA8(wUwnMMQg8%H#nmhcB^&b{&d<`8iJ`0S-P0Q$U
z`|s}k*?jo)WPBn+MMJ_rWwEuP+4x%DMtud@dT*U6<k+t+9V5*Kn?J|72)}jc-<ek4
z9rovAzmAc=eK9?xyvf(eoZRp$xfzf)x$&d^QhNh|G1Equ8}ZEP)KPWBAg>whSKKJ!
z>&<lahV{Li@LaSu<jp`vsMVG}*3441m*4n_7c%>G5PUSJZRx<Yg3!j;OvDB5uO8m@
zFY*wq&hEmLNY>1EG}OM{%`c^WQvZbQgqkrRdrdztnttuxqLcXw-1{2{clK8d-`DAq
zH>*T$4_D6T@FP$X%52DXDQAig7gy>C*bzERuSnb|*_gI>%1(cRR2-w+yuD?E%JNr2
z0oWM_{lE8A8<_^`RxJfNrNbn5>omCD*TS|izb;0%nyeZg&0|h8n2l@y)wAZq%BZd{
z+KW#p=CyU#aqj5SU?M%s2>X2Kea-Zs#Ck9=lxmoyNdD4L)rMW1iD~;#`<dZ3^gVFU
zp@k91HE51eU}4n5?J<<KCdb~vz3sC3S0{*-yY;oF9V-_d*0aPPMs}7dih3^%W$D?)
z#hJF<zn8<f<O6#*l=U+=B*j?-kSU0o8?t)oKZumW=>O`A`1yg6rhwH2#PS_q-5V{_
z7Atpf*J$dO605>BZhLK*ts^aUyM5N58RZb>A^B+9tDt#q?$ivfMLKFLG9B=60mm27
zmA~hZQjup$if<>w4z-yAnQ8Y3GWhx+oC}x-Tew@#f@Z>}C)HPV*0*KvJx5UIRmrUA
z*nRu(a1pU2e$QR;<grMjo_q(+MR1YtfMCjTlnzpGZQ*Tx>Qr{kry{*bg?p4o)vQNv
z&#~wRRvuyHr(dLxTg!ba3*~P)zWSKt`s=@kl?Jfpp%k?QY%>P)s|=2pRGn;tUZ7IB
zPA%CCfAUnf<04$ruig4<IQ#Uv$+%mDUA+JL>(5z3D$=cu<V0B6^K<+N9xg7_YN$xq
z=GIp751Ec$#tzz*9BKVv6K`cw3Iu(Si9vKX5ecCl2-Wc{vb0c0WV}dWa^8ghtGb=b
zV7wrwRvaNUb;>zDJr|c1k7jtR4q&Zay<kw4mw<h->Kh*)lZgQurO?jon?C>CAnnP5
zUn4*I4EYiqW#9BU9)dr1^5L-SBHxz`HU~?K+uz&XqhWMMGi@`=>NCYU4IuQ5<=GVP
zydw!FR2~ZFLHVJx0DK-K|Nph<G#uKD<_;@l?M>Rc3dz}Frb63X_2f6&if=iey+feM
zm9mYDoR>=-zV%dYD_(ftyeYQ(<;(&_r9$}z_2s~wn?pmhOLr+_{D_n^gkH6J^_&<d
z0VN_S8jYS}&Z6MvozBi3#fA$Pzn_cBq6AZeZs>9&YrmFFXNE&dwHeb^Ta4B?F%82)
zxo#1=OhLq7b-lu_O>W-WrEiy=J@i_>Sz7g^2jj%dS-D<HT@Eo0#IfIu%;F*Ec^Fui
zsX@mlg}$;#Kp@7wowBIW?8qmY*`wd~#C+TJEIea8v>^>Av*r$UxX0gwbH88d=fTE0
z#TOpOmNsrRkNt9w+_lgXP&@+WicjA27r!(mD7s9G*xbsub#$>(MFYiaZ+%}&AQTvJ
zBrvzVxU5R}viz_!zrH<(iZ(}@@<;%6h`(^}DEK9%rPv9+$MCo$7G>dnrK4?iEr~xN
z>~cMhwz+YktTrL6*vH!k`NDcC!6E67rEiTvR@23)ZJ|>ee+FFMvmVUgALJ`LykR%N
zDPq|PrtpX{?hUi}ix`FBz8%rCIn9zEJmcYZtl+!j@{2+h#NCHk0c{*P&A2$ju;C1#
z4Ies5ZK%AdK<JGo*oB%x(`IN>a&LdJT?KH7`_=WBDi39;<Vg&iO5<xHMNC>cWdO&~
z+sV_e(b}C*Pd}>V9HeHXnl+>g;bAs~81ybZa@GMV_v9{+YTI?up?Atz${peWqd-=!
zuPD9DT??;Bm3p3?p=6w3O%S4je?MSEFqVwKTF=PFM6p9c+}yG0782eo3yRTkRzhzY
z%X-Eem@PlG1@^_t>BL0iCGjpk;`Q1>M&B2Xq4)cCTh!bes(p@y(31!I-mudqvisTR
zM$fK_r~$+?26P2}^pP0$VvS2Im2M-VfAaMS&f9$e?Qd=X<iASNQejyMeCBL&VR&{H
zb03`|W@NE){$rZ2f_$yZRy;{zQbfb1HJmax<XfYz53As!+vSKxE91iGqz7xPc}w4{
z^-XfVCydFjgc;oJ$wpQ`w7IVFa;-8}$a7B8i^y=xx^B;yJp8Otn*g_)1c?dqchP>p
ztar;9FsPbic4gzgVKKaaknLX-Fe|SD7@!^v|28#UmHsP<+Iw)`ayajDylL3>Lw;4x
zd{vQVX5MLYW-T$KFsH>=cp|%L|0N}m+%QUvoxG0*$og-^p3VF)Fcv*Bp&Kn@9nU5N
z7*t-!!F7CUFpZi2s)x3V7V-wI2WgX7cByyQ@)>GQ{fTizOOFUa_$0Y0=;4b)kC4a|
zt3zX9q7*6@KHIxN-MS&X%`GuWCF5>SlV#CV<LutkIrZdEj{F;0Wjauw{v+LOMzx*t
zcpyeTCu*+Hhl?^jPOQ~;6Kfh*S(>0ZZ7rg!1>kYanA1G>U+nmE3%cZmexGL@Y>}3L
zR9Jra0o4BN+mhs$VeqJR7D$mBk~KgwBHaUg9{jJ^C2?r6?VE|!4LpvQ@?Fo<GT>R!
z{g+Yyw|W0u`FZ*HrB_Za|Fx#4lDFO*f4<*lK5pF2it2Rb);GORguK)k+uQkDG$cOc
z0oi|VuXPu-efNZ;sMO_7lKGYbZq=A;816^P?E)$2^w#c+<@auBDVRQ_|FCliT@VKY
zLT0womD#h+K}~gp6c|{P1rINI9kES{*<Cvk0e>eJrI#PRC8l_%&{mm``3gOw7+P!3
z@(L>=o%G_3he;Ar3z?4M`7rVsRU{yTC0`*<r>{*r>=rtqq_peRL=7k2WPKg-*Fkrp
zSV?VIzE$R(6lR!B){|^8vK?B=uZfgAPnti6^H{dw$^-B=F8r6V=Ss@XNsQTd><pxK
zZc*`DH2X=zC-LI8*~O{$S!og1H;1KU3*Ofz!}@d-Ij~IiK6b(PL#jsvf=ctnIh6_z
zXn}CEd02dfQjb)o!;iUVMbejcbHkf*Q3+C)X<Wke5yrzkt+>v%CHZNABxTX7Pr70M
zjZK!}&Rh{jCPQrb<)mzWU_Ys32*o1{_XiKx^e@Ws+9s1YFiC@KQ8G@B(wA7r2GY)J
z$7^N-9&4upFp1qD;b)g9Qfg)`*~X<<W7TSs`S3~HFpYc@mFpmWNM@X4NUI8%1Un)W
zFUZoVRJ9?E0&}Z#peUojps5b-C#mWo81GXtw`fbkiG=9b+Gor_0<rxCc)IH>4>|H@
zdOn))su{7aaY1!Yqim({SfKIGj<Tur)BvO?@qqU-zZRYEkY2f`s{Z8n@g#>zqEqmQ
zC}~A8$A{Gla+%T|y&BgLNKNFya>*GYlWm7i@p_bpWN8l6R%+fd|GEe{YLH4zbKlGq
zn8@}&pvyxr@6NWi48k)NUB`*yW30D;bbG#ar?eD1ht06cssCn$;o5e4Dtz%nvq{6;
zV#SR(0}Cnnz~_ju<-#$9uMT%yPq{DU$CFs%Yc}3D^8y8afr?!)Pc*p4cWj30$wk4?
z^yGEs3VLEuJrA36!+9Lp%8zkRc(=B2)=HIM(@>{Zz7MwTX+SXwcQUfRYa{F*6-ar9
zI{oWAt4hokMz`hoeecBAR61<A=D^peE{UjI#&$69GAim%fBi7chD55sG3<bv#Ggk7
zQWT=L6T`scaFw5SM_Pn!yEeUp3!}32E1Nb%<%MNxe))S3aQC20d|i(e*e^r2S00wi
zIG+dXBZBb9-sADkvhGDUI41^8*V?2ZI-Qr;pj$eS{OnyQANlN*(pNKD^-W(?Kt~j@
z?rB5Y3YuAsoKD%s=yF2?k@1e(thebto$&ToOPgclBYu{GF?51kj21-cJC`aQa~}-q
z+?QYAlFJD0>1!gr9%_I|LdS%wSzv!WCF-VWk@JXwN`A!$`^la{uz3Se^oqtV6!PR6
zTT(wNnk1FruH%>uZ<RFy{XX^_&1ybt-M&O^J7l5G5039yYOg2YE%zaw+%zUnuNIom
z+g%iz#|MG%d`;S{oV^~~bhq1gWq*H6$@cGVKU0NH&k8>*%ZtDCAAj7<dpWq@kL7ar
zN{do(gB>`X<T-K>DmtGh(e3FjrE&czqjcyi!hux}7G)~&>`)NxD8ar4)6a*WWhm(k
zM=~ZcG+w2=o9h&ajgY2LhQtm}<-%R11edr$M^5R<n<FIq@CM(AqR!(C%Cy{>KCQ$!
zay&RA^e<X^Ld_8}3J4|K$2b^93<M=bDT9P;54GqUG#7wW+2+F{nW>njIP)#=SVa|o
z6PWU77y9((2zjTSW1{*{1N#$b|GRg|pI1wj*ETuL@nG-Q<U8VNZ?204#t^h;o7XRV
zUhRH?-Msc4E0U*ZQQVh_U%cLrJ5}m};uM`s8L+sx3h;~EULpMCg1jSmwEYqwyi#vl
zgLN(Dlz}5}$C6$I9%&jpbSK4=W06>s_NP=DYA`Kup1;X;=n0e!Av~l`)+iDGLrV1Z
zCu9QKB!oelWmU&+m`vL*KbCOaK6gx6`s(pwj@6~(h$G`S?QT-ME#{<%(vSz7n|Rx9
z_%L0C^udXEX%8>u+)vnAks(p8{?jKI^zAZ!2^$;w*+r3ZyH`^qZ%SNou@q>3E`}ao
zXZLosY&f7oU{f&?y%A4Ad)|ju@7=<{k48IU*{+t*cW0z}`l#5ut2rl^ZOXPn+He|z
zybGFOoJV5H_-5hwm2esROWQLvpK%4NXSfRUFLFLOA75=O%L-SL#~M=??g)Ec)v+e*
z=W(?1F63#w>r1N{y)G%lwIvW6^3r56jl+y+hC_5(+uefLZs?|Rrt(?n&CN}y?K^?A
znvo|m!HS^Jn<s}S8phPoKkl%1WzWXZenlL6hC2Vj)`}I<a?X=(9TGE628qjCVB~&4
zpbp?(a@t^8hB5YNT$eQGg_*K)Pw?H(%{vb0o%}`8<*qO!bZb{HKEjcos<%*<=0fmS
zL-u?0U7FdL{o4c?^#OL40`tc_!XYT?IxJN%G|VnYpmOv#g%{RWitab40%7W$OO^WH
z@IjivYWH&sj`RN*bxvAJhrr%0A+mVpBiC|4D9h3(NqPUds?&X2Z?61gL}b2m&1C+t
zx%NaCJ>n!g343Cjnpd*woWtU7!N<+3(J-N*$ClLm=|4ke3EQs{BU=d4f8oi`pFyOm
z;t*!8P~e+NpXU442g|pFlMUZXE=-au7)mI0RM|t^sj!kbyuQ`e3uySdZ>_!DNvmnd
zv=!NXqifMEVG3%aoE|#b`$H{!`B`-%<k?tAYC+9rk270ipx{A^-lR*D0`)kzt@<>E
z&;Bn;!OreW<LmthndxYnCAr(>Ha7u^7fjnrxLSrn+VTpK!#5LjL!nO7UOLK!UQNiJ
z<>tPXr@2%A<P)}LY6WhkK-k>rGYgJ#8L20c{&EAW(oz(3qOf?IY!9P+a!4ASU08TN
z>rt?jShr18+C10ShyZt7osE-1g7s%=5A%$M?goiusL&~VeUqHrW$r^utvs{Jiv^L|
zZBFa_C^O;JBmO}d0WzVTBewu|y7Qpzjk(IIPi)+k(%#8=?SVI$7V5{X`h<&aps{!b
z8yhvokDDEAxy5^N4^;=YLY7KHD<FSEj#W!Ib5KXO4K5kpXYVLQG3{bVAIBphPDl%6
zH*6({0LWce3S?I*r{&`TpHGoT@D<l=eBfbS%geC@8T)$r=paPXkGX)1s}<8fOvM{2
zePcwe5^7QA;PZ7?d+XA{N^z)0bttZ8f2OE6@SOPk>~OfQ1jHF6``XR>-~w)*`Ae~C
zvCyMnZ1)}dZ8jgRh#YK<9n{Y(G0XC1`f^73UoW;hw>M4hR%@&z<OZG>?yaSzIS$*N
zH1OxQ#zcbLmWA}PmO0H7wVh2Ch(!DAU#3G%4`&-Av)3B>!X;O|xjS&`(LuQ%C`413
z3wwYbMl-G7)Dnl^y`#%M(BUrHIvT5g`547}To1^r2dz?>Efx!G87xLkr5jxf!B*=)
z6%8AXf_j#p)q3FQpAk|b|9gRPAx<ntGEh;WOwnl`A%FA2%<hf^JKfCVx5BC^1x4~!
z?aSG|{>mBq*cin(Wevyrg7uScd01be^y2~{BM3L*9{t7gf`%(H0za-;VxzRv-`vHq
zhR2pGKDGJ%JhdCHf3S(R|IO-CPbsL}a5QT-<MYgR$te14QVX2rx@5Q{(@NY?-_`4w
zxxH1HHo0lOU*8IgvxVb;=RyBUUH>yq_)oCm|Gqk}sH1;YSmg>8a&F;WCsYg<dkF{s
zbrSyt{4c!Jy%Djl?<w>Q(VZ1&bbog0;+}YQxN7gQ8?L2Dkr@>|B$I0*Ho#3og^NW`
zNGKyzHZFG-Vm_P_T3Ji5^VbvfudLlU!FTelBZ|H4BUd!%J#O~A@_qIudF6{plZn)j
zSvtqq$LaFvZUxCrX!Fu$tBfLj8$qactxA4A^QSndB)@%^MFmvorsWW1ngjEV?;Dgx
zRY(d&BtWJfC7v4T4o<0zjHof%<-YfC2wo>Tsd{d__AbbQNK1I+AeeXX@T)i^C&Ul-
znA3pB4TG(_7;(0&uo0te-12i%TMTdBzAUU1C7`B;gk&lDfD)a1$e17f%c{WP`9RaG
zB5X_Pz^>8i&f#)=f-*j)M^=tCiru3?`P--p%{Jdr{N+ib(+@48nqbBjY~|OxBMYzC
zBO}HIl_hpLNQX(%OFnlXS7Y1?JC^WRj-WLd=RhV!`nnzSOBqe)7<}Obzqh|M#wJ0p
ze&UwB#$w}X**Zdy*`o)*p#hZ36nj@OW)L;*8=to&f6&|5Ig6K0O64M+3+5c)?s<Eg
zbAC;@rtv{LX>-|m1ePpfHpQ1`RQP+mx2O{8Stjo_z0Ze2+Gg1my$&Sf#KPb4+VIGq
zGTad4dM57rMJ)#}CHcoSp6n+HkE&A7`}RVjbwWz3+`wx%MFVQLszIGcUPlM^G7^8h
zkrKLR4v7+?p?Y?)@~nxDcBDOTqlzI-r(V#oJ98K8ST%Mm!EpbsX8*Hn!S*a3b$Z&(
z?KP6}4Dmz;E~^dv@dU}-8-2<sFVm@+@!H66xUqF#vl&XQwl$MX#13FYaxNSnRjXdq
zvq0-W#@87RUm@u(o!=0)^r`bTmPmC_J-ui#O4Sghpkei3>FK5&?W>jch-aYjcQSH<
z?H-ot<@zli2`lw5gIINotjoVub-SFBX_fZg4N0@Mi~K)9|39dI&3F6JEqT`Hl=@fc
zwyT>8`NN@#Gs5fN#E)YqB7+CtD0eQo?*xC2e=f3s+|s|gOs<}UzS_@oOJ8^$VJ}h*
zRxRg!bs;Hx@=p5K7A;%!_KZjoJmcK$IJs?BInQ*>n5J$T_{`1vIF)7**D>dOC#BNv
znxq<(=h8pq^UZb&tn#=lEiUi_LK4zZVEfPFLnR!j0L6oDgvtU@xvKp$;DBl$CjuoR
z6_phSKe94^4n#!kaZ|A$K1sdlQu|uUs@6~Givtaxs2}33F{k0Z#dc!4c0?D@o4ym$
zbz5v|PnE|p7K~-B5Ip@w1Byd^LTZ9Q-gof{b<J$_-1bBe%n|=tWZdL9Dnxd%Lc@?3
zvU-5h4XgmWiH}sm>48daR{S{*e*0NAn{3%DUW}~@{cEXd!Ly${ejssI`=?|%l|p2#
z*|Kqm&g4HXq~j@91}8;g!=agR@8ulmc!yH88@LFDr^Nq}fB8cY2n5@aBn+p9-gLbL
zg<ZB4{GxZQkG-knEK)r+mA!Q}s$s4Ix81ocGPG7EUO%QkE-LoP==oPZmN$=Z6g*sE
z%(#c2S<hFcgE9UI{s-RBC8ornMKPn}sMdeNd$OWLQeq+!^{Fb>1gBgOs*6nM=3)&z
z`DTD^E91W-OvGF}pFEKChQc5P)l$BH@^Yqlru#?cq8(^&=P+}D{-H5>Uw-lr#4|4n
zIxor_<I!N@0W#M=DZX3}Jzb}1*m!epq$|5W(U#(yo%erq;v!Q&Q45&Y$Ae3U3BM*3
z`+gSUSLV`A?;p*VP%nuk0;pwWdG}pz8;ov(>@{=GZ}DHbrA~9_eCM-mYSzV$m+EQG
z5L>WxV-#aB9z#0q{f4_ZEEDRUnu3SPS+rsAUZ=pQ&(}TpJAlqFrdbi(O*q^R=NT~R
zX_8$kg*2H;pD@XXKV(3p2k%#29S9d)_*uuEI-W|q)<c{eLLy3@#}hLW!aMYPPv4}k
ze~@hWn=kCZ03RL2j09T&9Mq6;^PP%S5!$TES;(=qqc#@~)(;8w6k!{$CT7l8DQY0Q
zPW~rJazNJicCsY?bzviA5g%JoLn6!*Dc1GNnl_J>e>Rxp=pV#@VDS-C{*G6_NGF^y
zYUECvK#<)(QHoxx_tc!r@BlcGTuge1VVbTt-eKLuzJQYa+~e;j#rfgA%9)q;jKO&H
zV1`{Q9K4L@)blEos2Jo2wdW7!#6qW7t;-^&#n?)_HI8#Z;e4x1^mv-%Y1i6HXX1C#
z93Fm#wx_Q7HB?L8yK{2`XVCIddZ0Ee6^`oEVd>qVxGZWb5*Z)&^5bKTegsodmJ#~G
z9y?tKw#5l%lgZ4cTsP1@QbmG})2eC|<vP{sq7}2dk$sIp2qVHO3yvU3uzNm<N<6{Z
z2q}ubUd^AD$ayDqRr;lH97mPXi`tNgr>pC+0DsOC<O^rTHVw>uDOTUTc(T0ny8+qw
zEI;|%afz<@^SkPr?B6Z<zTqQ3r!=&ePVOqKF;93+y$#1^9h-=zenYk8?p3Guk8Y~8
z6uY+4BNMiasVQeuQXi2Y3-c$o+jrJ(4P=EMVty+#3u6-9>irH0IASPmH_i`O@$jpZ
zM3}hUa3DHwE5dS*Xz=x@^q*H`%9DBYS!49%=$Vzu2KCqowJ={rO(2GYT|<#zi~v!A
zDH`j9Kg&ogQIXJUN!tp`?t5BDR1Ny+M;<3X6s{~SIGkr#`HL6_xpMR8wmi{&J+RQ`
z3FDh7Feu_t^xXq8Nl~eq-3nmsrK?Q&7~RE^`mhgQOPOdJpFA{C-PB{Y(dl}&6S{Vu
zp7S;GSJ!yR@V>{LR?hbso^!q5aRCPmrE8}(zsK7Sede&3O+03AIWx9)T+XXhBJ#Q^
z1YKT+dfqUe`TI;f@v?`l3JxX&aAvlL_V7iKz4r^h(*#OR69TuHkfvz0>gIzgT-&Yc
zd3St>!FqaIsW&NQR76SuO6J@`FMMIxQK3q5^75xqB?iwIT3Ru-crs7Z1o0@(o?o%Q
z&w2_alT!FFjeOAA!L`5>tC&3K!AQ0KBk%qA)VYohf<7M94RI?R?3>JLLt4#{jFi3@
zma|y?y^)F0T&Y8E?dnA=INxPwb58-Z%oJylwY}!6-)4;lV--mw4R@k`L71-o>iv(U
z9Ub9?{zt__pJVtF7Zb~v9Oj8vMVqYr@Ii~6)#j5+V$?{LkY>1(V2_1_Qde+d5*oYt
zm28~{uG49i1b25MwK%oNU(ZyvI#TT(B+8ldUB5U?B;}QXN!&`+zt<+hknS4;FzGPi
z;ezo<WIkwwpD=>wq7B`i1hEg7+VQ+S&XN|A6VoqaqMvNu4PTDFbYcWTaPdBExh}*n
zT^Orm&O4VR+=){JviQ3gN<YtJ((S4A!wv1fhIhZ->pQIX;~9R*C&(Go9+carqL@U;
z_rhN5Bx8A<o5n_}M&;*g7xa%4@+$57j;$eBTAF(ddB?YE>Fn@aYGLU-uTB-XZVxT%
zfPV$T=qj!4Y6f<Nh?qJ<;3ohtke8Pe_=1y{N1f+|2#=u13qejGzX%ZM;6L8;Zvu7>
zW>)52|FZxUy@w}Ufb@?BQxS6qXFISf0PN^!Yh?;{wQ{iMb~H1m3aaUZccOXf1h0z7
zS%DxfZyen0&8+M#-Z*#y_ymPKc?E&&@ShoqfDgPc{oj4xIy;!TnL?Za5@rr25Sq7A
zGBiq7CeC1IFB%^B|FXZX#<Sqv3I6V`32}CTYX!jjaq@8SW4;d2Qi9)SdAbk(GEPMD
zj{&6q8zb<G@bLe`2!5kO8*mxkf0J==R(Ei)1xT1WJD7l7pXQ+qwt+aqCqV;WG|gKF
z4~X+y2P=Em{fYQ;xHQ|}(#jB5Fx>Xw|3Rm~KXmp4wBP^7c`E<0`~O+yX6c7CTt?)d
znv|7Aq#R7$>>&28@=^ddH!Cv{L36MmFRuWQ(~KV?$oWDL!pkYl59HxA0}2cA@t6ph
zngE4*8iF7GeH3|n7gw;oDdg`_cp+v&W&)-!IE8u51>mCy!ml7^f}8@Tyn?)Z@Ina4
V8+zGy1Rn(;FRda~@y0m#e*uV1>N)@b


From d5a708d0dbb92731fd576f1a5042f11ff03059c5 Mon Sep 17 00:00:00 2001
From: Kevin Pedro <kpedro88@gmail.com>
Date: Sun, 24 Jul 2022 19:04:25 -0500
Subject: [PATCH 23/31] try to handle empty batches and size zero inputs
 automatically

---
 .../SonicTriton/interface/TritonData.h          |  3 ++-
 .../SonicTriton/src/TritonClient.cc             |  3 +++
 HeterogeneousCore/SonicTriton/src/TritonData.cc |  6 ++++--
 .../SonicTriton/src/TritonMemResource.cc        |  7 ++++---
 .../SonicTriton/test/TritonIdentityProducer.cc  | 17 ++++++-----------
 5 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/HeterogeneousCore/SonicTriton/interface/TritonData.h b/HeterogeneousCore/SonicTriton/interface/TritonData.h
index ee573655ddf19..de9c5afab8662 100644
--- a/HeterogeneousCore/SonicTriton/interface/TritonData.h
+++ b/HeterogeneousCore/SonicTriton/interface/TritonData.h
@@ -159,7 +159,8 @@ class TritonData {
     return std::any_of(vec.begin(), vec.end(), [](int64_t i) { return i < 0; });
   }
   int64_t dimProduct(const ShapeView& vec) const {
-    return std::accumulate(vec.begin(), vec.end(), 1, std::multiplies<int64_t>());
+    //lambda treats negative dimensions as 0 to avoid overflows
+    return std::accumulate(vec.begin(), vec.end(), 1, [](int64_t dim1, int64_t dim2){ return dim1*std::max(0l,dim2); });
   }
   //generates a unique id number for each instance of the class
   unsigned uid() const {
diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc
index 0d2603f32fde8..0612181dfb436 100644
--- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc
+++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc
@@ -293,6 +293,9 @@ void TritonClient::getResults(std::vector<tc::InferResult*>& results) {
 void TritonClient::evaluate() {
   //in case there is nothing to process
   if (batchSize() == 0) {
+    //call getResults on an empty vector
+    std::vector<tc::InferResult*> empty_results;
+    getResults(empty_results);
     finish(true);
     return;
   }
diff --git a/HeterogeneousCore/SonicTriton/src/TritonData.cc b/HeterogeneousCore/SonicTriton/src/TritonData.cc
index e037b8887a87c..879ea0949fcc5 100644
--- a/HeterogeneousCore/SonicTriton/src/TritonData.cc
+++ b/HeterogeneousCore/SonicTriton/src/TritonData.cc
@@ -212,7 +212,9 @@ void TritonInputData::toServer(TritonInputContainer<DT> ptr) {
     entry.data_->SetShape(entry.fullShape_);
 
     for (unsigned i0 = 0; i0 < outerDim; ++i0) {
-      memResource_->copyInput(data_in[counter].data(), offset, i);
+      //avoid copying empty input
+      if (entry.byteSizePerBatch_>0)
+        memResource_->copyInput(data_in[counter].data(), offset, i);
       offset += entry.byteSizePerBatch_;
       ++counter;
     }
@@ -251,7 +253,7 @@ TritonOutput<DT> TritonOutputData::fromServer() const {
     const auto& entry = entries_[i];
     const DT* r1 = reinterpret_cast<const DT*>(entry.output_);
 
-    if (!entry.result_) {
+    if (entry.totalByteSize_>0 and !entry.result_) {
       throw cms::Exception("TritonDataError") << name_ << " fromServer(): missing result";
     }
 
diff --git a/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc b/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc
index ea5d63cea072d..9d5d4d431cecd 100644
--- a/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc
+++ b/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc
@@ -38,9 +38,10 @@ template <>
 void TritonOutputHeapResource::copyOutput() {
   size_t contentByteSize = 0;
   for (auto& entry : data_->entries_) {
-    size_t contentByteSizeEntry;
-    TRITON_THROW_IF_ERROR(entry.result_->RawData(data_->name_, &entry.output_, &contentByteSizeEntry),
-                          data_->name_ + " fromServer(): unable to get raw");
+    size_t contentByteSizeEntry(0);
+    if (entry.totalByteSize_>0)
+      TRITON_THROW_IF_ERROR(entry.result_->RawData(data_->name_, &entry.output_, &contentByteSizeEntry),
+                            data_->name_ + " fromServer(): unable to get raw");
     contentByteSize += contentByteSizeEntry;
   }
   if (contentByteSize != data_->totalByteSize_) {
diff --git a/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc b/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc
index afc23b684df8c..e6ed3b70fa4e2 100644
--- a/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc
+++ b/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc
@@ -16,7 +16,6 @@ class TritonIdentityProducer : public TritonEDProducer<> {
   explicit TritonIdentityProducer(edm::ParameterSet const& cfg)
       : TritonEDProducer<>(cfg),
         batchSizes_{1,2,0},
-        batchSize_(0),
         batchCounter_(0) {}
   void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, Input& iInput) override {
     //follow Triton QA tests for ragged input
@@ -27,29 +26,25 @@ class TritonIdentityProducer : public TritonEDProducer<> {
       {3,3,3}
     };
 
-    batchSize_ = batchSizes_[batchCounter_];
+    client_->setBatchSize(batchSizes_[batchCounter_]);
     batchCounter_ = (batchCounter_+1) % batchSizes_.size();
-    client_->setBatchSize(batchSize_);
     auto& input1 = iInput.at("INPUT0");
     auto data1 = input1.allocate<float>();
-    for (int i = 0; i < batchSize_; ++i) {
+    for (unsigned i = 0; i < client_->batchSize(); ++i) {
       (*data1)[i] = value_lists[i];
       input1.setShape(0, (*data1)[i].size(), i);
     }
 
     // convert to server format
-    if (batchSize_>0)
-      input1.toServer(data1);
+    input1.toServer(data1);
   }
   void produce(edm::Event& iEvent, edm::EventSetup const& iSetup, Output const& iOutput) override {
-    if (batchSize_==0)
-      return;
     // check the results
     const auto& output1 = iOutput.at("OUTPUT0");
     // convert from server format
     const auto& tmp = output1.fromServer<float>();
     edm::LogInfo msg(debugName_);
-    for (int i = 0; i < batchSize_; ++i){
+    for (unsigned i = 0; i < client_->batchSize(); ++i){
         msg << "output " << i << " (" << triton_utils::printColl(output1.shape(i)) << "): ";
         for(int j = 0; j < output1.shape(i)[0]; ++j){
             msg << tmp[i][j] << " ";
@@ -67,8 +62,8 @@ class TritonIdentityProducer : public TritonEDProducer<> {
   }
 
 private:
-  std::vector<int> batchSizes_;
-  int batchSize_, batchCounter_;
+  std::vector<unsigned> batchSizes_;
+  unsigned batchCounter_;
 };
 
 DEFINE_FWK_MODULE(TritonIdentityProducer);

From a6352219f4c8b5d99052798303efa49736f31140 Mon Sep 17 00:00:00 2001
From: Kevin Pedro <kpedro88@gmail.com>
Date: Thu, 30 Jun 2022 09:16:49 -0500
Subject: [PATCH 24/31] correct size check

---
 HeterogeneousCore/SonicTriton/interface/TritonMemResource.h | 3 +++
 HeterogeneousCore/SonicTriton/src/TritonMemResource.cc      | 4 ++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/HeterogeneousCore/SonicTriton/interface/TritonMemResource.h b/HeterogeneousCore/SonicTriton/interface/TritonMemResource.h
index 830a543360301..9ccd27fd0c0cf 100644
--- a/HeterogeneousCore/SonicTriton/interface/TritonMemResource.h
+++ b/HeterogeneousCore/SonicTriton/interface/TritonMemResource.h
@@ -52,6 +52,9 @@ class TritonCpuShmResource : public TritonMemResource<IO> {
   void close() override;
   void copyInput(const void* values, size_t offset, unsigned entry) override {}
   void copyOutput() override {}
+
+protected:
+  size_t sizeOrig_;
 };
 
 using TritonInputHeapResource = TritonHeapResource<triton::client::InferInput>;
diff --git a/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc b/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc
index 9d5d4d431cecd..762a9d443db85 100644
--- a/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc
+++ b/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc
@@ -56,7 +56,7 @@ void TritonOutputHeapResource::copyOutput() {
 
 template <typename IO>
 TritonCpuShmResource<IO>::TritonCpuShmResource(TritonData<IO>* data, const std::string& name, size_t size)
-    : TritonMemResource<IO>(data, name, size) {
+    : TritonMemResource<IO>(data, name, size), sizeOrig_(size) {
   //mmap of size zero is required to fail by POSIX, but still need to have some shared memory region available for Triton
   this->size_ = std::max<size_t>(this->size_, 1);
 
@@ -114,7 +114,7 @@ void TritonCpuShmResource<IO>::close() {
 
 template <>
 void TritonInputCpuShmResource::copyInput(const void* values, size_t offset, unsigned entry) {
-  if (size_ > 0)
+  if (sizeOrig_ > 0)
     std::memcpy(addr_ + offset, values, data_->entries_[entry].byteSizePerBatch_);
 }
 

From 453924071c3042ef34a6631d38ff1be639c3961d Mon Sep 17 00:00:00 2001
From: Kevin Pedro <kpedro88@gmail.com>
Date: Mon, 8 Aug 2022 15:58:35 -0500
Subject: [PATCH 25/31] update server version

---
 HeterogeneousCore/SonicTriton/README.md         | 4 ++--
 HeterogeneousCore/SonicTriton/scripts/cmsTriton | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/HeterogeneousCore/SonicTriton/README.md b/HeterogeneousCore/SonicTriton/README.md
index 759b50676040e..9888266eaeddd 100644
--- a/HeterogeneousCore/SonicTriton/README.md
+++ b/HeterogeneousCore/SonicTriton/README.md
@@ -131,14 +131,14 @@ The script has two operations (`start` and `stop`) and the following options:
 * `-d`: use Docker instead of Apptainer
 * `-f`: force reuse of (possibly) existing container instance
 * `-g`: use GPU instead of CPU
-* `-i` [name]`: server image name (default: fastml/triton-torchgeo:22.03-py3-geometric)
+* `-i` [name]`: server image name (default: fastml/triton-torchgeo:22.07-py3-geometric)
 * `-M [dir]`: model repository (can be given more than once)
 * `-m [dir]`: specific model directory (can be given more than one)
 * `-n [name]`: name of container instance, also used for hidden temporary dir (default: triton_server_instance)
 * `-P [port]`: base port number for services (-1: automatically find an unused port range) (default: 8000)
 * `-p [pid]`: automatically shut down server when process w/ specified PID ends (-1: use parent process PID)
 * `-r [num]`: number of retries when starting container (default: 3)
-* `-s [dir]`: Apptainer sandbox directory (default: /cvmfs/unpacked.cern.ch/registry.hub.docker.com/fastml/triton-torchgeo:22.03-py3-geometric)
+* `-s [dir]`: Apptainer sandbox directory (default: /cvmfs/unpacked.cern.ch/registry.hub.docker.com/fastml/triton-torchgeo:22.07-py3-geometric)
 * `-t [dir]`: non-default hidden temporary dir
 * `-v`: (verbose) start: activate server debugging info; stop: keep server logs
 * `-w [time]`: maximum time to wait for server to start (default: 300 seconds)
diff --git a/HeterogeneousCore/SonicTriton/scripts/cmsTriton b/HeterogeneousCore/SonicTriton/scripts/cmsTriton
index c218f271ac759..acd4873828cdf 100755
--- a/HeterogeneousCore/SonicTriton/scripts/cmsTriton
+++ b/HeterogeneousCore/SonicTriton/scripts/cmsTriton
@@ -18,7 +18,7 @@ PARENTPID=""
 BASEPORT=8000
 AUTOPORT=""
 NPORTS=3
-IMAGE=fastml/triton-torchgeo:22.03-py3-geometric
+IMAGE=fastml/triton-torchgeo:22.07-py3-geometric
 SANDBOX=""
 COMPAT_USR=""
 

From e77801f6c739baad1dea0b7eaea55ebeb3b5a621 Mon Sep 17 00:00:00 2001
From: Kevin Pedro <kpedro88@gmail.com>
Date: Wed, 31 Aug 2022 12:39:58 -0500
Subject: [PATCH 26/31] fix counting bugs for new batching interface

---
 HeterogeneousCore/SonicTriton/src/TritonClient.cc | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc
index 0612181dfb436..ab18b92e3dd29 100644
--- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc
+++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc
@@ -202,7 +202,8 @@ bool TritonClient::setBatchSize(unsigned bsize) {
       return false;
     } else {
       outerDim_ = bsize;
-      resizeEntries(1);
+      //take min to allow resizing to 0
+      resizeEntries(std::min(outerDim_,1u));
       return true;
     }
   } else {
@@ -214,7 +215,8 @@ bool TritonClient::setBatchSize(unsigned bsize) {
 
 void TritonClient::resizeEntries(unsigned entry) {
   if (entry > nEntries())
-    addEntry(entry);
+    //addEntry(entry) extends the vector to size entry+1
+    addEntry(entry-1);
   else if (entry < nEntries()) {
     for (auto& element : input_) {
       element.second.entries_.resize(entry);

From 4b12f67756d4c9ad0c4f66a4b70079121cbda834 Mon Sep 17 00:00:00 2001
From: Kevin Pedro <kpedro88@gmail.com>
Date: Fri, 17 Feb 2023 12:54:23 -0600
Subject: [PATCH 27/31] only create shared_ptr once (avoid double free)

---
 HeterogeneousCore/SonicTriton/interface/TritonData.h |  2 +-
 HeterogeneousCore/SonicTriton/src/TritonClient.cc    | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/HeterogeneousCore/SonicTriton/interface/TritonData.h b/HeterogeneousCore/SonicTriton/interface/TritonData.h
index de9c5afab8662..23a3b8f57500f 100644
--- a/HeterogeneousCore/SonicTriton/interface/TritonData.h
+++ b/HeterogeneousCore/SonicTriton/interface/TritonData.h
@@ -142,7 +142,7 @@ class TritonData {
   void checkShm() {}
   unsigned fullLoc(unsigned loc) const;
   void reset();
-  void setResult(Result* result, unsigned entry=0) { entries_[entry].result_.reset(result); }
+  void setResult(std::shared_ptr<Result> result, unsigned entry=0) { entries_[entry].result_ = result; }
   IO* data(unsigned entry=0) { return entries_[entry].data_.get(); }
   void updateMem(size_t size);
   void computeSizes();
diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc
index ab18b92e3dd29..2910caa5f6356 100644
--- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc
+++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc
@@ -272,9 +272,9 @@ bool TritonClient::handle_exception(F&& call) {
 }
 
 void TritonClient::getResults(std::vector<tc::InferResult*>& results) {
-  for (auto& [oname, output] : output_) {
-    for (unsigned i = 0; i < results.size(); ++i) {
-      auto result = results[i];
+  for (unsigned i = 0; i < results.size(); ++i) {
+    std::shared_ptr<tc::InferResult> result(results[i]);
+    for (auto& [oname, output] : output_) {
       //set shape here before output becomes const
       if (output.variableDims()) {
         std::vector<int64_t> tmp_shape;
@@ -285,9 +285,9 @@ void TritonClient::getResults(std::vector<tc::InferResult*>& results) {
       }
       //extend lifetime
       output.setResult(result,i);
+      //compute size after getting all result entries
+      if(i==results.size()-1) output.computeSizes();
     }
-    //compute size after getting all result entries
-    output.computeSizes();
   }
 }
 

From 2342a44fd30a8f4100e3ae3a2809ef89935803f6 Mon Sep 17 00:00:00 2001
From: Kevin Pedro <kpedro88@gmail.com>
Date: Sat, 18 Feb 2023 12:18:19 -0600
Subject: [PATCH 28/31] code format

---
 .../SonicTriton/interface/TritonData.h        | 111 ++++++++++--------
 .../SonicTriton/src/TritonClient.cc           |  32 +++--
 .../SonicTriton/src/TritonData.cc             |  26 ++--
 .../SonicTriton/src/TritonMemResource.cc      |  18 +--
 .../test/TritonIdentityProducer.cc            |  25 ++--
 5 files changed, 109 insertions(+), 103 deletions(-)

diff --git a/HeterogeneousCore/SonicTriton/interface/TritonData.h b/HeterogeneousCore/SonicTriton/interface/TritonData.h
index 23a3b8f57500f..a6703811b6257 100644
--- a/HeterogeneousCore/SonicTriton/interface/TritonData.h
+++ b/HeterogeneousCore/SonicTriton/interface/TritonData.h
@@ -55,8 +55,8 @@ class TritonData {
   TritonData(const std::string& name, const TensorMetadata& model_info, TritonClient* client, const std::string& pid);
 
   //some members can be modified
-  void setShape(const ShapeType& newShape, unsigned entry=0);
-  void setShape(unsigned loc, int64_t val, unsigned entry=0);
+  void setShape(const ShapeType& newShape, unsigned entry = 0);
+  void setShape(unsigned loc, int64_t val, unsigned entry = 0);
 
   //io accessors
   template <typename DT>
@@ -68,7 +68,7 @@ class TritonData {
   TritonOutput<DT> fromServer() const;
 
   //const accessors
-  const ShapeView& shape(unsigned entry=0) const { return entries_.at(entry).shape_; }
+  const ShapeView& shape(unsigned entry = 0) const { return entries_.at(entry).shape_; }
   int64_t byteSize() const { return byteSize_; }
   const std::string& dname() const { return dname_; }
 
@@ -76,7 +76,9 @@ class TritonData {
   bool variableDims() const { return variableDims_; }
   int64_t sizeDims() const { return productDims_; }
   //default to dims if shape isn't filled
-  int64_t sizeShape(unsigned entry=0) const { return variableDims_ ? dimProduct(entries_.at(entry).shape_) : sizeDims(); }
+  int64_t sizeShape(unsigned entry = 0) const {
+    return variableDims_ ? dimProduct(entries_.at(entry).shape_) : sizeDims();
+  }
 
 private:
   friend class TritonClient;
@@ -90,60 +92,60 @@ class TritonData {
   //group together all relevant information for a single request
   //helpful for organizing multi-request ragged batching case
   class TritonDataEntry {
-    public:
-      //constructors
-      TritonDataEntry(const ShapeType& dims, bool noOuterDim, const std::string& name, const std::string& dname)
-          : fullShape_(dims),
-            shape_(fullShape_.begin() + (noOuterDim ? 0 : 1), fullShape_.end()),
-            sizeShape_(0),
-            byteSizePerBatch_(0),
-            totalByteSize_(0),
-            offset_(0),
-            output_(nullptr) {
-        //create input or output object
-        IO* iotmp;
-        createObject(&iotmp, name, dname);
-        data_.reset(iotmp);
-      }
-      //default needed to be able to use std::vector resize()
-      TritonDataEntry()
-          : shape_(fullShape_.begin(), fullShape_.end()),
-            sizeShape_(0),
-            byteSizePerBatch_(0),
-            totalByteSize_(0),
-            offset_(0),
-            output_(nullptr) { }
-
-    private:
-      friend class TritonData<IO>;
-      friend class TritonClient;
-      friend class TritonMemResource<IO>;
-      friend class TritonHeapResource<IO>;
-      friend class TritonCpuShmResource<IO>;
+  public:
+    //constructors
+    TritonDataEntry(const ShapeType& dims, bool noOuterDim, const std::string& name, const std::string& dname)
+        : fullShape_(dims),
+          shape_(fullShape_.begin() + (noOuterDim ? 0 : 1), fullShape_.end()),
+          sizeShape_(0),
+          byteSizePerBatch_(0),
+          totalByteSize_(0),
+          offset_(0),
+          output_(nullptr) {
+      //create input or output object
+      IO* iotmp;
+      createObject(&iotmp, name, dname);
+      data_.reset(iotmp);
+    }
+    //default needed to be able to use std::vector resize()
+    TritonDataEntry()
+        : shape_(fullShape_.begin(), fullShape_.end()),
+          sizeShape_(0),
+          byteSizePerBatch_(0),
+          totalByteSize_(0),
+          offset_(0),
+          output_(nullptr) {}
+
+  private:
+    friend class TritonData<IO>;
+    friend class TritonClient;
+    friend class TritonMemResource<IO>;
+    friend class TritonHeapResource<IO>;
+    friend class TritonCpuShmResource<IO>;
 #ifdef TRITON_ENABLE_GPU
-      friend class TritonGpuShmResource<IO>;
+    friend class TritonGpuShmResource<IO>;
 #endif
 
-      //accessors
-      void createObject(IO** ioptr, const std::string& name, const std::string& dname);
-      void computeSizes(int64_t shapeSize, int64_t byteSize, int64_t batchSize);
-
-      //members
-      ShapeType fullShape_;
-      ShapeView shape_;
-      size_t sizeShape_, byteSizePerBatch_, totalByteSize_;
-      std::shared_ptr<IO> data_;
-      std::shared_ptr<Result> result_;
-      unsigned offset_;
-      const uint8_t* output_;
+    //accessors
+    void createObject(IO** ioptr, const std::string& name, const std::string& dname);
+    void computeSizes(int64_t shapeSize, int64_t byteSize, int64_t batchSize);
+
+    //members
+    ShapeType fullShape_;
+    ShapeView shape_;
+    size_t sizeShape_, byteSizePerBatch_, totalByteSize_;
+    std::shared_ptr<IO> data_;
+    std::shared_ptr<Result> result_;
+    unsigned offset_;
+    const uint8_t* output_;
   };
 
   //private accessors only used internally or by client
   void checkShm() {}
   unsigned fullLoc(unsigned loc) const;
   void reset();
-  void setResult(std::shared_ptr<Result> result, unsigned entry=0) { entries_[entry].result_ = result; }
-  IO* data(unsigned entry=0) { return entries_[entry].data_.get(); }
+  void setResult(std::shared_ptr<Result> result, unsigned entry = 0) { entries_[entry].result_ = result; }
+  IO* data(unsigned entry = 0) { return entries_[entry].data_.get(); }
   void updateMem(size_t size);
   void computeSizes();
   triton::client::InferenceServerGrpcClient* client();
@@ -160,7 +162,8 @@ class TritonData {
   }
   int64_t dimProduct(const ShapeView& vec) const {
     //lambda treats negative dimensions as 0 to avoid overflows
-    return std::accumulate(vec.begin(), vec.end(), 1, [](int64_t dim1, int64_t dim2){ return dim1*std::max(0l,dim2); });
+    return std::accumulate(
+        vec.begin(), vec.end(), 1, [](int64_t dim1, int64_t dim2) { return dim1 * std::max(0l, dim2); });
   }
   //generates a unique id number for each instance of the class
   unsigned uid() const {
@@ -200,9 +203,13 @@ using TritonOutputMap = std::unordered_map<std::string, TritonOutputData>;
 
 //avoid "explicit specialization after instantiation" error
 template <>
-void TritonInputData::TritonDataEntry::createObject(triton::client::InferInput** ioptr, const std::string& name, const std::string& dname);
+void TritonInputData::TritonDataEntry::createObject(triton::client::InferInput** ioptr,
+                                                    const std::string& name,
+                                                    const std::string& dname);
 template <>
-void TritonOutputData::TritonDataEntry::createObject(triton::client::InferRequestedOutput** ioptr, const std::string& name, const std::string& dname);
+void TritonOutputData::TritonDataEntry::createObject(triton::client::InferRequestedOutput** ioptr,
+                                                     const std::string& name,
+                                                     const std::string& dname);
 template <>
 void TritonOutputData::checkShm();
 template <>
diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc
index 2910caa5f6356..4d91dd6cebd8b 100644
--- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc
+++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc
@@ -186,16 +186,12 @@ void TritonClient::resetBatchMode() {
   manualBatchMode_ = false;
 }
 
-unsigned TritonClient::nEntries() const {
-  return !input_.empty() ? input_.begin()->second.entries_.size() : 0;
-}
+unsigned TritonClient::nEntries() const { return !input_.empty() ? input_.begin()->second.entries_.size() : 0; }
 
-unsigned TritonClient::batchSize() const {
-  return batchMode_==TritonBatchMode::Rectangular ? outerDim_ : nEntries();
-}
+unsigned TritonClient::batchSize() const { return batchMode_ == TritonBatchMode::Rectangular ? outerDim_ : nEntries(); }
 
 bool TritonClient::setBatchSize(unsigned bsize) {
-  if (batchMode_==TritonBatchMode::Rectangular) {
+  if (batchMode_ == TritonBatchMode::Rectangular) {
     if (bsize > maxOuterDim_) {
       edm::LogWarning(fullDebugName_) << "Requested batch size " << bsize << " exceeds server-specified max batch size "
                                       << maxOuterDim_ << ". Batch size will remain as " << outerDim_;
@@ -203,7 +199,7 @@ bool TritonClient::setBatchSize(unsigned bsize) {
     } else {
       outerDim_ = bsize;
       //take min to allow resizing to 0
-      resizeEntries(std::min(outerDim_,1u));
+      resizeEntries(std::min(outerDim_, 1u));
       return true;
     }
   } else {
@@ -216,7 +212,7 @@ bool TritonClient::setBatchSize(unsigned bsize) {
 void TritonClient::resizeEntries(unsigned entry) {
   if (entry > nEntries())
     //addEntry(entry) extends the vector to size entry+1
-    addEntry(entry-1);
+    addEntry(entry - 1);
   else if (entry < nEntries()) {
     for (auto& element : input_) {
       element.second.entries_.resize(entry);
@@ -234,7 +230,7 @@ void TritonClient::addEntry(unsigned entry) {
   for (auto& element : output_) {
     element.second.addEntryImpl(entry);
   }
-  if (entry>0) {
+  if (entry > 0) {
     batchMode_ = TritonBatchMode::Ragged;
     outerDim_ = 1;
   }
@@ -278,15 +274,17 @@ void TritonClient::getResults(std::vector<tc::InferResult*>& results) {
       //set shape here before output becomes const
       if (output.variableDims()) {
         std::vector<int64_t> tmp_shape;
-        TRITON_THROW_IF_ERROR(result->Shape(oname, &tmp_shape), "getResults(): unable to get output shape for " + oname);
+        TRITON_THROW_IF_ERROR(result->Shape(oname, &tmp_shape),
+                              "getResults(): unable to get output shape for " + oname);
         if (!noOuterDim_)
           tmp_shape.erase(tmp_shape.begin());
-        output.setShape(tmp_shape,i);
+        output.setShape(tmp_shape, i);
       }
       //extend lifetime
-      output.setResult(result,i);
+      output.setResult(result, i);
       //compute size after getting all result entries
-      if(i==results.size()-1) output.computeSizes();
+      if (i == results.size() - 1)
+        output.computeSizes();
     }
   }
 }
@@ -310,7 +308,7 @@ void TritonClient::evaluate() {
     inputTriton.reserve(input_.size());
   }
   for (auto& [iname, input] : input_) {
-    for (unsigned i = 0; i < nEntriesVal; ++i){
+    for (unsigned i = 0; i < nEntriesVal; ++i) {
       inputsTriton[i].push_back(input.data(i));
     }
   }
@@ -321,7 +319,7 @@ void TritonClient::evaluate() {
     outputTriton.reserve(output_.size());
   }
   for (auto& [oname, output] : output_) {
-    for (unsigned i = 0; i < nEntriesVal; ++i){
+    for (unsigned i = 0; i < nEntriesVal; ++i) {
       outputsTriton[i].push_back(output.data(i));
     }
   }
@@ -351,7 +349,7 @@ void TritonClient::evaluate() {
           client_->AsyncInferMulti(
               [start_status, this](std::vector<tc::InferResult*> results) {
                 //check results
-                for (auto ptr : results){
+                for (auto ptr : results) {
                   auto success = handle_exception(
                       [&]() { TRITON_THROW_IF_ERROR(ptr->RequestStatus(), "evaluate(): unable to get result(s)"); });
                   if (!success)
diff --git a/HeterogeneousCore/SonicTriton/src/TritonData.cc b/HeterogeneousCore/SonicTriton/src/TritonData.cc
index 879ea0949fcc5..d8fc506d6e99a 100644
--- a/HeterogeneousCore/SonicTriton/src/TritonData.cc
+++ b/HeterogeneousCore/SonicTriton/src/TritonData.cc
@@ -52,20 +52,24 @@ void TritonData<IO>::addEntry(unsigned entry) {
 template <typename IO>
 void TritonData<IO>::addEntryImpl(unsigned entry) {
   if (entry >= entries_.size()) {
-    entries_.reserve(entry+1);
-    for (unsigned i = entries_.size(); i < entry+1; ++i) {
+    entries_.reserve(entry + 1);
+    for (unsigned i = entries_.size(); i < entry + 1; ++i) {
       entries_.emplace_back(dims_, client_->noOuterDim(), name_, dname_);
     }
   }
 }
 
 template <>
-void TritonInputData::TritonDataEntry::createObject(tc::InferInput** ioptr, const std::string& name, const std::string& dname) {
+void TritonInputData::TritonDataEntry::createObject(tc::InferInput** ioptr,
+                                                    const std::string& name,
+                                                    const std::string& dname) {
   tc::InferInput::Create(ioptr, name, fullShape_, dname);
 }
 
 template <>
-void TritonOutputData::TritonDataEntry::createObject(tc::InferRequestedOutput** ioptr, const std::string& name, const std::string& dname) {
+void TritonOutputData::TritonDataEntry::createObject(tc::InferRequestedOutput** ioptr,
+                                                     const std::string& name,
+                                                     const std::string& dname) {
   tc::InferRequestedOutput::Create(ioptr, name);
 }
 
@@ -101,8 +105,8 @@ void TritonData<IO>::setShape(unsigned loc, int64_t val, unsigned entry) {
 
   //check boundary
   if (locFull >= entries_[entry].fullShape_.size())
-    throw cms::Exception("TritonDataError")
-        << name_ << " setShape(): dimension " << locFull << " out of bounds (" << entries_[entry].fullShape_.size() << ")";
+    throw cms::Exception("TritonDataError") << name_ << " setShape(): dimension " << locFull << " out of bounds ("
+                                            << entries_[entry].fullShape_.size() << ")";
 
   if (val != entries_[entry].fullShape_[locFull]) {
     if (dims_[locFull] == -1)
@@ -168,8 +172,9 @@ TritonInputContainer<DT> TritonInputData::allocate(bool reserve) {
   auto ptr = std::make_shared<TritonInput<DT>>(client_->batchSize());
   if (reserve) {
     computeSizes();
-    for (auto& entry : entries_){
-      if (anyNeg(entry.shape_)) continue;
+    for (auto& entry : entries_) {
+      if (anyNeg(entry.shape_))
+        continue;
       for (auto& vec : *ptr) {
         vec.reserve(entry.sizeShape_);
       }
@@ -213,7 +218,7 @@ void TritonInputData::toServer(TritonInputContainer<DT> ptr) {
 
     for (unsigned i0 = 0; i0 < outerDim; ++i0) {
       //avoid copying empty input
-      if (entry.byteSizePerBatch_>0)
+      if (entry.byteSizePerBatch_ > 0)
         memResource_->copyInput(data_in[counter].data(), offset, i);
       offset += entry.byteSizePerBatch_;
       ++counter;
@@ -253,7 +258,7 @@ TritonOutput<DT> TritonOutputData::fromServer() const {
     const auto& entry = entries_[i];
     const DT* r1 = reinterpret_cast<const DT*>(entry.output_);
 
-    if (entry.totalByteSize_>0 and !entry.result_) {
+    if (entry.totalByteSize_ > 0 and !entry.result_) {
       throw cms::Exception("TritonDataError") << name_ << " fromServer(): missing result";
     }
 
@@ -282,7 +287,6 @@ unsigned TritonData<IO>::fullLoc(unsigned loc) const {
   return loc + (client_->noOuterDim() ? 0 : 1);
 }
 
-
 //explicit template instantiation declarations
 template class TritonData<tc::InferInput>;
 template class TritonData<tc::InferRequestedOutput>;
diff --git a/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc b/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc
index 762a9d443db85..0f9e70c7df12a 100644
--- a/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc
+++ b/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc
@@ -17,7 +17,7 @@ TritonMemResource<IO>::TritonMemResource(TritonData<IO>* data, const std::string
 
 template <typename IO>
 void TritonMemResource<IO>::set() {
-  for (auto& entry : data_->entries_){
+  for (auto& entry : data_->entries_) {
     TRITON_THROW_IF_ERROR(entry.data_->SetSharedMemory(name_, entry.totalByteSize_, entry.offset_),
                           "unable to set shared memory (" + name_ + ")");
   }
@@ -29,9 +29,13 @@ TritonHeapResource<IO>::TritonHeapResource(TritonData<IO>* data, const std::stri
 
 template <>
 void TritonInputHeapResource::copyInput(const void* values, size_t offset, unsigned entry) {
-  TRITON_THROW_IF_ERROR(data_->entries_[entry].data_->AppendRaw(reinterpret_cast<const uint8_t*>(values), data_->entries_[entry].byteSizePerBatch_),
+  TRITON_THROW_IF_ERROR(data_->entries_[entry].data_->AppendRaw(reinterpret_cast<const uint8_t*>(values),
+                                                                data_->entries_[entry].byteSizePerBatch_),
                         data_->name_ + " toServer(): unable to set data for batch entry " +
-                            (data_->entries_.size() > 1 ? std::to_string(entry) : data_->entries_[entry].byteSizePerBatch_ ? std::to_string(offset / data_->entries_[entry].byteSizePerBatch_) : ""));
+                            (data_->entries_.size() > 1 ? std::to_string(entry)
+                             : data_->entries_[entry].byteSizePerBatch_
+                                 ? std::to_string(offset / data_->entries_[entry].byteSizePerBatch_)
+                                 : ""));
 }
 
 template <>
@@ -39,7 +43,7 @@ void TritonOutputHeapResource::copyOutput() {
   size_t contentByteSize = 0;
   for (auto& entry : data_->entries_) {
     size_t contentByteSizeEntry(0);
-    if (entry.totalByteSize_>0)
+    if (entry.totalByteSize_ > 0)
       TRITON_THROW_IF_ERROR(entry.result_->RawData(data_->name_, &entry.output_, &contentByteSizeEntry),
                             data_->name_ + " fromServer(): unable to get raw");
     contentByteSize += contentByteSizeEntry;
@@ -159,9 +163,9 @@ void TritonGpuShmResource<IO>::close() {
 
 template <>
 void TritonInputGpuShmResource::copyInput(const void* values, size_t offset, unsigned entry) {
-  cudaCheck(
-      cudaMemcpy(addr_ + offset, values, data_->entries_[entry].byteSizePerBatch_, cudaMemcpyHostToDevice),
-      data_->name_ + " toServer(): unable to memcpy " + std::to_string(data_->entries_[entry].byteSizePerBatch_) + " bytes to GPU");
+  cudaCheck(cudaMemcpy(addr_ + offset, values, data_->entries_[entry].byteSizePerBatch_, cudaMemcpyHostToDevice),
+            data_->name_ + " toServer(): unable to memcpy " + std::to_string(data_->entries_[entry].byteSizePerBatch_) +
+                " bytes to GPU");
 }
 
 template <>
diff --git a/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc b/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc
index e6ed3b70fa4e2..8a947f021c654 100644
--- a/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc
+++ b/HeterogeneousCore/SonicTriton/test/TritonIdentityProducer.cc
@@ -14,20 +14,13 @@
 class TritonIdentityProducer : public TritonEDProducer<> {
 public:
   explicit TritonIdentityProducer(edm::ParameterSet const& cfg)
-      : TritonEDProducer<>(cfg),
-        batchSizes_{1,2,0},
-        batchCounter_(0) {}
+      : TritonEDProducer<>(cfg), batchSizes_{1, 2, 0}, batchCounter_(0) {}
   void acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, Input& iInput) override {
     //follow Triton QA tests for ragged input
-    std::vector<std::vector<float>> value_lists{
-      {2,2},
-      {4,4,4,4},
-      {1},
-      {3,3,3}
-    };
+    std::vector<std::vector<float>> value_lists{{2, 2}, {4, 4, 4, 4}, {1}, {3, 3, 3}};
 
     client_->setBatchSize(batchSizes_[batchCounter_]);
-    batchCounter_ = (batchCounter_+1) % batchSizes_.size();
+    batchCounter_ = (batchCounter_ + 1) % batchSizes_.size();
     auto& input1 = iInput.at("INPUT0");
     auto data1 = input1.allocate<float>();
     for (unsigned i = 0; i < client_->batchSize(); ++i) {
@@ -44,12 +37,12 @@ class TritonIdentityProducer : public TritonEDProducer<> {
     // convert from server format
     const auto& tmp = output1.fromServer<float>();
     edm::LogInfo msg(debugName_);
-    for (unsigned i = 0; i < client_->batchSize(); ++i){
-        msg << "output " << i << " (" << triton_utils::printColl(output1.shape(i)) << "): ";
-        for(int j = 0; j < output1.shape(i)[0]; ++j){
-            msg << tmp[i][j] << " ";
-        }
-        msg << "\n";
+    for (unsigned i = 0; i < client_->batchSize(); ++i) {
+      msg << "output " << i << " (" << triton_utils::printColl(output1.shape(i)) << "): ";
+      for (int j = 0; j < output1.shape(i)[0]; ++j) {
+        msg << tmp[i][j] << " ";
+      }
+      msg << "\n";
     }
   }
   ~TritonIdentityProducer() override = default;

From b658adc0da648372ced9b12a6c0a6b41fb1061f1 Mon Sep 17 00:00:00 2001
From: Kevin Pedro <kpedro88@gmail.com>
Date: Mon, 27 Feb 2023 11:36:49 -0600
Subject: [PATCH 29/31] move image file

---
 HeterogeneousCore/SonicTriton/README.md             |   2 +-
 .../SonicTriton/{ => doc}/batching_diagrams.png     | Bin
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename HeterogeneousCore/SonicTriton/{ => doc}/batching_diagrams.png (100%)

diff --git a/HeterogeneousCore/SonicTriton/README.md b/HeterogeneousCore/SonicTriton/README.md
index 9888266eaeddd..314b5d4d15986 100644
--- a/HeterogeneousCore/SonicTriton/README.md
+++ b/HeterogeneousCore/SonicTriton/README.md
@@ -37,7 +37,7 @@ The model information from the server can be printed by enabling `verbose` outpu
 ### Batching
 
 SonicTriton supports two types of batching, rectangular and ragged, depicted below:
-![batching diagrams](./batching_diagrams.png)  
+![batching diagrams](./doc/batching_diagrams.png)  
 In the rectangular case, the inputs for each object in an event have the same shape, so they can be combined into a single entry.
 (In this case, the batch size is specified as the "outer dimension" of the shape.)
 In the ragged case, the inputs for each object in an event do not have the same shape, so they cannot be combined;
diff --git a/HeterogeneousCore/SonicTriton/batching_diagrams.png b/HeterogeneousCore/SonicTriton/doc/batching_diagrams.png
similarity index 100%
rename from HeterogeneousCore/SonicTriton/batching_diagrams.png
rename to HeterogeneousCore/SonicTriton/doc/batching_diagrams.png

From b088dfe4b60cac6cc65f70fed23722017a418a97 Mon Sep 17 00:00:00 2001
From: Kevin Pedro <kpedro88@gmail.com>
Date: Tue, 28 Feb 2023 10:19:04 -0600
Subject: [PATCH 30/31] improve memory handling

---
 .../SonicTriton/interface/TritonClient.h      |  2 +-
 .../SonicTriton/src/TritonClient.cc           | 31 +++++++++++++------
 2 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/HeterogeneousCore/SonicTriton/interface/TritonClient.h b/HeterogeneousCore/SonicTriton/interface/TritonClient.h
index cf2e438db2856..833d329417d18 100644
--- a/HeterogeneousCore/SonicTriton/interface/TritonClient.h
+++ b/HeterogeneousCore/SonicTriton/interface/TritonClient.h
@@ -57,7 +57,7 @@ class TritonClient : public SonicClient<TritonInputMap, TritonOutputMap> {
   bool noOuterDim() const { return noOuterDim_; }
   unsigned outerDim() const { return outerDim_; }
   unsigned nEntries() const;
-  void getResults(std::vector<triton::client::InferResult*>& results);
+  void getResults(const std::vector<std::shared_ptr<triton::client::InferResult>>& results);
   void evaluate() override;
   template <typename F>
   bool handle_exception(F&& call);
diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc
index 4d91dd6cebd8b..0b1da3b6edfb7 100644
--- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc
+++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc
@@ -31,6 +31,15 @@ namespace {
       throw cms::Exception("GrpcCompression")
           << "Unknown compression algorithm requested: " << name << " (choices: none, deflate, gzip)";
   }
+
+  std::vector<std::shared_ptr<tc::InferResult>> convertToShared(const std::vector<tc::InferResult*>& tmp) {
+    std::vector<std::shared_ptr<tc::InferResult>> results;
+    results.reserve(tmp.size());
+    std::transform(tmp.begin(), tmp.end(), std::back_inserter(results), [](tc::InferResult* ptr) {
+      return std::shared_ptr<tc::InferResult>(ptr);
+    });
+    return results;
+  }
 }  // namespace
 
 //based on https://github.com/triton-inference-server/server/blob/v2.3.0/src/clients/c++/examples/simple_grpc_async_infer_client.cc
@@ -267,9 +276,9 @@ bool TritonClient::handle_exception(F&& call) {
   }
 }
 
-void TritonClient::getResults(std::vector<tc::InferResult*>& results) {
+void TritonClient::getResults(const std::vector<std::shared_ptr<tc::InferResult>>& results) {
   for (unsigned i = 0; i < results.size(); ++i) {
-    std::shared_ptr<tc::InferResult> result(results[i]);
+    const auto& result = results[i];
     for (auto& [oname, output] : output_) {
       //set shape here before output becomes const
       if (output.variableDims()) {
@@ -294,8 +303,8 @@ void TritonClient::evaluate() {
   //in case there is nothing to process
   if (batchSize() == 0) {
     //call getResults on an empty vector
-    std::vector<tc::InferResult*> empty_results;
-    getResults(empty_results);
+    std::vector<std::shared_ptr<tc::InferResult>> empty_results;
+    getResults(std::move(empty_results));
     finish(true);
     return;
   }
@@ -347,7 +356,9 @@ void TritonClient::evaluate() {
     success = handle_exception([&]() {
       TRITON_THROW_IF_ERROR(
           client_->AsyncInferMulti(
-              [start_status, this](std::vector<tc::InferResult*> results) {
+              [start_status, this](std::vector<tc::InferResult*> resultsTmp) {
+                //immediately convert to shared_ptr
+                const auto& results = convertToShared(resultsTmp);
                 //check results
                 for (auto ptr : results) {
                   auto success = handle_exception(
@@ -367,7 +378,7 @@ void TritonClient::evaluate() {
                 }
 
                 //check result
-                auto success = handle_exception([&]() { getResults(results); });
+                auto success = handle_exception([&]() { getResults(std::move(results)); });
                 if (!success)
                   return;
 
@@ -385,12 +396,14 @@ void TritonClient::evaluate() {
       return;
   } else {
     //blocking call
-    std::vector<tc::InferResult*> results;
+    std::vector<tc::InferResult*> resultsTmp;
     success = handle_exception([&]() {
       TRITON_THROW_IF_ERROR(
-          client_->InferMulti(&results, options_, inputsTriton, outputsTriton, headers_, compressionAlgo_),
+          client_->InferMulti(&resultsTmp, options_, inputsTriton, outputsTriton, headers_, compressionAlgo_),
           "evaluate(): unable to run and/or get result");
     });
+    //immediately convert to shared_ptr
+    const auto& results = convertToShared(resultsTmp);
     if (!success)
       return;
 
@@ -404,7 +417,7 @@ void TritonClient::evaluate() {
       reportServerSideStats(stats);
     }
 
-    success = handle_exception([&]() { getResults(results); });
+    success = handle_exception([&]() { getResults(std::move(results)); });
     if (!success)
       return;
 

From 570a966bba73d860bc55be433e54d60718b9237b Mon Sep 17 00:00:00 2001
From: Kevin Pedro <kpedro88@gmail.com>
Date: Wed, 1 Mar 2023 10:15:35 -0600
Subject: [PATCH 31/31] remove unnecessary moves

---
 HeterogeneousCore/SonicTriton/src/TritonClient.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc
index 0b1da3b6edfb7..c57a8355d07a1 100644
--- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc
+++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc
@@ -304,7 +304,7 @@ void TritonClient::evaluate() {
   if (batchSize() == 0) {
     //call getResults on an empty vector
     std::vector<std::shared_ptr<tc::InferResult>> empty_results;
-    getResults(std::move(empty_results));
+    getResults(empty_results);
     finish(true);
     return;
   }
@@ -378,7 +378,7 @@ void TritonClient::evaluate() {
                 }
 
                 //check result
-                auto success = handle_exception([&]() { getResults(std::move(results)); });
+                auto success = handle_exception([&]() { getResults(results); });
                 if (!success)
                   return;
 
@@ -417,7 +417,7 @@ void TritonClient::evaluate() {
       reportServerSideStats(stats);
     }
 
-    success = handle_exception([&]() { getResults(std::move(results)); });
+    success = handle_exception([&]() { getResults(results); });
     if (!success)
       return;