From 25934f615ca905b9586b5fa781f4b78129e94f4b Mon Sep 17 00:00:00 2001
From: Kyle McGill <101670481+nv-kmcgill53@users.noreply.github.com>
Date: Fri, 8 Mar 2024 16:03:57 -0800
Subject: [PATCH] Revert "Add support for models with dynamic shapes (#69)"
 (#70)

This reverts commit a68ca46bc88a324982a0f35e9785d92a283466bf.

Co-authored-by: kyle <kmcgill@kmcgill-ubuntu.nvidia.com>
---
 README.md             | 49 +++++++++++--------------------------------
 src/openvino.cc       | 44 +++++++++++++++++---------------------
 src/openvino_utils.cc | 43 ++++++++++++++++++-------------------
 src/openvino_utils.h  |  4 ++--
 4 files changed, 53 insertions(+), 87 deletions(-)

diff --git a/README.md b/README.md
index e93dbe4..56a775d 100644
--- a/README.md
+++ b/README.md
@@ -137,9 +137,7 @@ and
 is provided, then `dynamic_batching` will be enabled with default settings.
 
 
-## Examples of the "config.pbtxt" file sections depending on the use case
-
-### Latency mode
+### Examples of the "config.pbtxt" files depending on the use case
 
 Latency mode with low concurrency on the client side. Recommended for performance optimization with low number of parallel clients.
 ```
@@ -159,8 +157,6 @@ parameters: [
 ]
 ```
 
-### Throughput mode
-
 Throughput mode with high concurrency on the client side. Recommended for throughput optimization with high number of parallel clients.
 Number of streams should be lower or equal to number of parallel clients and lower of equal to the number of CPU cores.
 For example, with ~20 clients on the host with 12 CPU cores, the config could be like:
@@ -181,14 +177,19 @@ parameters: [
 ]
 ```
 
-### Loading non default model format
-
 When loading model with the non default format of Intermediate Representation and the name model.xml, use and extra parameter "default_model_filename".
 For example, using TensorFlow saved_model format use:
 ```
 default_model_filename: "model.saved_model"
+parameters: [
+{
+   key: "PERFORMANCE_HINT"
+   value: {
+     string_value: "LATENCY"
+   }
+}
+]
 ```
-
 and copy the model to the subfolder called "model.saved_model"
 ```
 model_repository/
@@ -200,38 +201,12 @@ model_repository/
     └── config.pbtxt
 
 ```
-Other allowed values are `model.pdmodel` or `model.onnx`.
-### Reshaping models
 
-Following section shows how to use OpenVINO dynamic shapes. `-1` denotes dimension accepting any value on input. In this case
-while model originally accepted input with layout `NCHW` and shape `(1,3,224,224)`, now it accepts any batch size and resolution.
 
-*Note*: If the model is originally exported with dynamic shapes, there is no need to manually specify dynamic shapes in config.
+## Known Issues
 
-```
-input [
-  {
-    name: "input"
-    data_type: TYPE_FP32
-    dims: [ -1, 3, -1, -1]
-  }
-]
-output [
-  {
-    name: "output"
-    data_type: TYPE_FP32
-    dims: [ -1, 1001]
-  }
-]
-parameters: {
-key: "RESHAPE_IO_LAYERS"
-value: {
-string_value:"yes"
-}
-}
-```
+* Models with dynamic shape are not supported in this backend now.
 
-## Known Issues
+* As of now, the Openvino backend does not support variable shaped tensors. However, the dynamic batch sizes in the model are supported. See `SKIP_OV_DYNAMIC_BATCHSIZE` and `ENABLE_BATCH_PADDING` parameters for more details.
 
 * Models with the scalar on the input (shape without any dimension are not supported)
-* Reshaping using [dimension ranges](https://docs.openvino.ai/2023.3/ovms_docs_dynamic_shape_dynamic_model.html) is not supported.
diff --git a/src/openvino.cc b/src/openvino.cc
index 54e4c59..99d4d1a 100644
--- a/src/openvino.cc
+++ b/src/openvino.cc
@@ -554,32 +554,24 @@ ModelState::ValidateInputs(const size_t expected_input_cnt)
     }
 
     ov::Shape input_shape;
-    ov::PartialShape partial_input_shape;
     RETURN_IF_OPENVINO_ASSIGN_ERROR(
-        partial_input_shape,
-        model_inputs[model_inputs_name_to_index[io_name]].get_partial_shape(),
+        input_shape,
+        model_inputs[model_inputs_name_to_index[io_name]].get_shape(),
         ("retrieving original shapes from input " + io_name).c_str());
+
     if (reshape_io_layers_) {
       int index = (MaxBatchSize() != 0) ? 1 : 0;
       for (const auto dim : dims) {
-        if (dim > 0) {
-          partial_input_shape[index++] = ov::Dimension(dim);
-        } else if (dim == -1) {
-          partial_input_shape[index++] = ov::Dimension::dynamic();
-        } else {
-          return TRITONSERVER_ErrorNew(
-              TRITONSERVER_ERROR_INTERNAL,
-              std::string("openvino backend does dimensions values other than "
-                          "-1 or positive integers"));
-        }
+        input_shape[index++] = dim;
       }
       RETURN_IF_OPENVINO_ERROR(
-          ppp.input(io_name).tensor().set_shape(partial_input_shape),
+          ppp.input(io_name).tensor().set_shape(input_shape),
           std::string("setting shape for " + io_name).c_str());
     } else {
       RETURN_IF_ERROR(CompareDimsSupported(
-          Name(), io_name, partial_input_shape, dims, MaxBatchSize(),
-          false /* compare_exact */));
+          Name(), io_name,
+          std::vector<size_t>(input_shape.begin(), input_shape.end()), dims,
+          MaxBatchSize(), false /* compare_exact */));
     }
 
     if (MaxBatchSize()) {
@@ -653,14 +645,15 @@ ModelState::ValidateOutputs()
     } else {
       RETURN_IF_ERROR(ParseShape(io, "dims", &dims));
     }
-    ov::PartialShape output_shape;
+    ov::Shape output_shape;
     RETURN_IF_OPENVINO_ASSIGN_ERROR(
         output_shape,
-        model_outputs[model_outputs_name_to_index[io_name]].get_partial_shape(),
+        model_outputs[model_outputs_name_to_index[io_name]].get_shape(),
         ("retrieving original shapes from output " + io_name).c_str());
     RETURN_IF_ERROR(CompareDimsSupported(
-        Name(), io_name, output_shape, dims, MaxBatchSize(),
-        true /* compare_exact */));
+        Name(), io_name,
+        std::vector<size_t>(output_shape.begin(), output_shape.end()), dims,
+        MaxBatchSize(), true /* compare_exact */));
   }
 
   // Model preprocessing
@@ -819,9 +812,9 @@ ModelState::AutoCompleteInputOrOutput(
           "data_type",
           OpenVINOElementToModelConfigDataType(ov_io.get_element_type())));
       // Find shape
-      ov::PartialShape io_shape;
+      ov::Shape io_shape;
       RETURN_IF_OPENVINO_ASSIGN_ERROR(
-          io_shape, ov_io.get_partial_shape(),
+          io_shape, ov_io.get_shape(),
           ("retrieving original shapes from" + std::string(io_json_obj_name) +
            " " + io_name)
               .c_str());
@@ -829,8 +822,7 @@ ModelState::AutoCompleteInputOrOutput(
       triton::common::TritonJson::Value dims(
           ModelConfig(), triton::common::TritonJson::ValueType::ARRAY);
       for (size_t i = (MaxBatchSize() > 0) ? 1 : 0; i < io_shape.size(); i++) {
-        RETURN_IF_ERROR(dims.AppendInt(
-            io_shape.is_static() ? io_shape[i].get_length() : -1));
+        RETURN_IF_ERROR(dims.AppendInt(io_shape[i]));
       }
       RETURN_IF_ERROR(io_json.Add("dims", std::move(dims)));
       // Add individual input/output to new input/output
@@ -850,6 +842,7 @@ ModelState::AutoCompleteInputOrOutput(
          "': " + io_json_obj_name + " already specified")
             .c_str());
   }
+
   return nullptr;  // success
 }
 
@@ -938,7 +931,7 @@ ModelInstanceState::ModelInstanceState(
     throw triton::backend::BackendModelInstanceException(TRITONSERVER_ErrorNew(
         TRITONSERVER_ERROR_INVALID_ARG,
         (std::string("unable to load model '") + model_state_->Name() +
-         "', Triton openVINO backend supports only CPU device")
+         "', openVINO backend supports only CPU device")
             .c_str()));
   }
 
@@ -1370,6 +1363,7 @@ TRITONBACKEND_Initialize(TRITONBACKEND_Backend* backend)
   const char* cname;
   RETURN_IF_ERROR(TRITONBACKEND_BackendName(backend, &cname));
   std::string name(cname);
+
   LOG_MESSAGE(
       TRITONSERVER_LOG_INFO,
       (std::string("TRITONBACKEND_Initialize: ") + name).c_str());
diff --git a/src/openvino_utils.cc b/src/openvino_utils.cc
index c5bc1a1..6795e0e 100644
--- a/src/openvino_utils.cc
+++ b/src/openvino_utils.cc
@@ -195,25 +195,24 @@ OpenVINOElementToModelConfigDataType(const ov::element::Type& data_type)
   return "TYPE_INVALID";
 }
 
-static bool
-doesMatch(const ov::Dimension& ov_dim, int64_t config_dim)
-{
-  if (ov_dim.is_static()) {
-    return ov_dim.get_length() == config_dim;
-  }
-  if (!ov_dim.get_interval().has_upper_bound()) {
-    return true;
-  }
-  return (config_dim < ov_dim.get_max_length()) &&
-         (config_dim > ov_dim.get_min_length());
-}
-
 TRITONSERVER_Error*
 CompareDimsSupported(
     const std::string& model_name, const std::string& tensor_name,
-    const ov::PartialShape& model_shape, const std::vector<int64_t>& dims,
+    const std::vector<size_t>& model_shape, const std::vector<int64_t>& dims,
     const int max_batch_size, const bool compare_exact)
 {
+  // TODO: OpenVINO backend does not support the dynamic shapes as of now.
+  // We can use RESIZE_BILINEAR preProcess in InputInfo to support dynamic
+  // shapes in future.
+  for (const auto& dim : dims) {
+    RETURN_ERROR_IF_TRUE(
+        (dim == -1), TRITONSERVER_ERROR_INVALID_ARG,
+        std::string("model '") + model_name + "', tensor '" + tensor_name +
+            "': provides -1 dim (shape " + ShapeToString(dims) +
+            "), openvino "
+            "currently does not support dynamic shapes.");
+  }
+
   // If the model configuration expects batching support in the model,
   // then the openvino first dimension will be reshaped hence should not
   // be compared.
@@ -233,8 +232,9 @@ CompareDimsSupported(
     bool succ = (model_shape.size() == (size_t)full_dims.size());
     if (succ) {
       for (size_t i = 0; i < full_dims.size(); ++i) {
-        if (compare_exact || ((i != 0) && (full_dims[i] != -1))) {
-          succ &= doesMatch(model_shape[i], full_dims[i]);
+        const int64_t model_dim = model_shape[i];
+        if (compare_exact || (i != 0)) {
+          succ &= (model_dim == full_dims[i]);
         }
       }
     }
@@ -256,7 +256,8 @@ CompareDimsSupported(
     bool succ = (model_shape.size() == dims.size());
     if (succ) {
       for (size_t i = 0; i < dims.size(); ++i) {
-        succ &= doesMatch(model_shape[i], dims[i]);
+        const int64_t model_dim = model_shape[i];
+        succ &= (model_dim == dims[i]);
       }
     }
 
@@ -288,13 +289,9 @@ ReadParameter(
 }
 
 std::vector<int64_t>
-ConvertToSignedShape(const ov::PartialShape& shape)
+ConvertToSignedShape(const std::vector<size_t> shape)
 {
-  std::vector<int64_t> out;
-  for (const auto& dim : shape) {
-    out.emplace_back(dim.is_static() ? dim.get_length() : -1);
-  }
-  return out;
+  return std::vector<int64_t>{shape.begin(), shape.end()};
 }
 
 }}}  // namespace triton::backend::openvino
diff --git a/src/openvino_utils.h b/src/openvino_utils.h
index 2fbaadb..38565a9 100644
--- a/src/openvino_utils.h
+++ b/src/openvino_utils.h
@@ -92,13 +92,13 @@ std::string OpenVINOElementToModelConfigDataType(
 
 TRITONSERVER_Error* CompareDimsSupported(
     const std::string& model_name, const std::string& tensor_name,
-    const ov::PartialShape& model_shape, const std::vector<int64_t>& dims,
+    const std::vector<size_t>& model_shape, const std::vector<int64_t>& dims,
     const int max_batch_size, const bool compare_exact);
 
 TRITONSERVER_Error* ReadParameter(
     triton::common::TritonJson::Value& params, const std::string& key,
     std::string* param);
 
-std::vector<int64_t> ConvertToSignedShape(const ov::PartialShape& shape);
+std::vector<int64_t> ConvertToSignedShape(const std::vector<size_t> shape);
 
 }}}  // namespace triton::backend::openvino